From fafbd7e7c0b72338bb7db601d34ef27e4a2626ce Mon Sep 17 00:00:00 2001 From: Houjun Liu Date: Mon, 23 Oct 2023 23:34:22 -0700 Subject: [PATCH] duplicate samples each time they are used --- stanza/models/pos/data.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/stanza/models/pos/data.py b/stanza/models/pos/data.py index c8f963cb49..7289c46a77 100644 --- a/stanza/models/pos/data.py +++ b/stanza/models/pos/data.py @@ -1,6 +1,7 @@ from os import set_inheritable import random import logging +import copy import torch from collections import namedtuple @@ -184,8 +185,11 @@ def __getitem__(self, key): pretrained = sample.pretrain # and deal with char - char = [i.copy() for i in sample.char] - + char = copy.deepcopy(sample.char) + words = copy.deepcopy(words) + xpos = copy.deepcopy(xpos) + ufeats = copy.deepcopy(ufeats) + pretrained = copy.deepcopy(pretrained) raw_text = sample[6] # mask out the elements that we need to mask out