diff --git a/instructor/real_data/instructor.py b/instructor/real_data/instructor.py index 622c382a..96739ecd 100644 --- a/instructor/real_data/instructor.py +++ b/instructor/real_data/instructor.py @@ -40,8 +40,8 @@ def __init__(self, opt): try: self.train_data = GenDataIter(cfg.train_data) self.test_data = GenDataIter(cfg.test_data, if_test_data=True) - except: - pass + except Exception as e: + print(f'Could not load train and test data: {e}') try: self.train_data_list = [GenDataIter(cfg.cat_train_data.format(i)) for i in range(cfg.k_label)] @@ -52,8 +52,8 @@ def __init__(self, opt): self.train_samples_list = [self.train_data_list[i].target for i in range(cfg.k_label)] self.clas_samples_list = [self.clas_data_list[i].target for i in range(cfg.k_label)] - except: - pass + except Exception as e: + print(f'Could not create train_samples_list and class_samples_list: {e}') # Criterion self.mle_criterion = nn.NLLLoss() diff --git a/utils/data_loader.py b/utils/data_loader.py index 3d7ed791..9dc0a92e 100644 --- a/utils/data_loader.py +++ b/utils/data_loader.py @@ -82,6 +82,7 @@ def prepare(samples, gpu=False): def load_data(self, filename): """Load real data from local file""" self.tokens = get_tokenlized(filename) + self.tokens = [tokens for tokens in self.tokens if len(tokens) > 0] samples_index = tokens_to_tensor(self.tokens, self.word2idx_dict) return self.prepare(samples_index)