From 56e4e627ba4275f23efdb0dcb840fa8e380cdfb3 Mon Sep 17 00:00:00 2001 From: Ning Dong Date: Wed, 19 Jun 2019 11:08:51 -0700 Subject: [PATCH] Fix dimension off by not adding things to dictionary in intermediate eval (#573) Summary: Pull Request resolved: https://github.com/pytorch/translate/pull/573 Repeat title. The long term solution could be to add an option to distinguish intermediate eval. But confirmed with Juan setting it to False shouldn't affect existing stuff. Reviewed By: jmp84 Differential Revision: D15793470 fbshipit-source-id: 670de05741607e6765cb8f839cab1301999ba1f0 --- pytorch_translate/generate.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pytorch_translate/generate.py b/pytorch_translate/generate.py index f11fa71f..ed3c4ad2 100644 --- a/pytorch_translate/generate.py +++ b/pytorch_translate/generate.py @@ -205,7 +205,9 @@ def _generate_score(models, args, task, dataset): maxlen_b=args.max_len_b, cuda=use_cuda, timer=gen_timer, - prefix_size=1 if pytorch_translate_data.is_multilingual_many_to_one(args) else 0, + prefix_size=1 + if pytorch_translate_data.is_multilingual_many_to_one(args) + else 0, ) for trans_info in _iter_translations( @@ -432,7 +434,7 @@ def _iter_translations(args, task, dataset, translations, align_dict, rescorer): # Convert back to tokens for evaluation with unk replacement # and/or without BPE target_tokens = task.target_dictionary.encode_line( - target_str, add_if_not_exist=True + target_str, add_if_not_exist=False ) # The probs score for the hypo_str; whether it's normalized by # sequence length or not depends on normalize_scores, which is