From 56e4e627ba4275f23efdb0dcb840fa8e380cdfb3 Mon Sep 17 00:00:00 2001
From: Ning Dong <dnn@fb.com>
Date: Wed, 19 Jun 2019 11:08:51 -0700
Subject: [PATCH] Fix dimension off by not adding things to dictionary in
 intermediate eval (#573)

Summary:
Pull Request resolved: https://github.com/pytorch/translate/pull/573

Repeat title. The long term solution could be to add an option to distinguish intermediate eval. But confirmed with Juan setting it to False shouldn't affect existing stuff.

Reviewed By: jmp84

Differential Revision: D15793470

fbshipit-source-id: 670de05741607e6765cb8f839cab1301999ba1f0
---
 pytorch_translate/generate.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pytorch_translate/generate.py b/pytorch_translate/generate.py
index f11fa71f..ed3c4ad2 100644
--- a/pytorch_translate/generate.py
+++ b/pytorch_translate/generate.py
@@ -205,7 +205,9 @@ def _generate_score(models, args, task, dataset):
             maxlen_b=args.max_len_b,
             cuda=use_cuda,
             timer=gen_timer,
-            prefix_size=1 if pytorch_translate_data.is_multilingual_many_to_one(args) else 0,
+            prefix_size=1
+            if pytorch_translate_data.is_multilingual_many_to_one(args)
+            else 0,
         )
 
         for trans_info in _iter_translations(
@@ -432,7 +434,7 @@ def _iter_translations(args, task, dataset, translations, align_dict, rescorer):
                     # Convert back to tokens for evaluation with unk replacement
                     # and/or without BPE
                     target_tokens = task.target_dictionary.encode_line(
-                        target_str, add_if_not_exist=True
+                        target_str, add_if_not_exist=False
                     )
                 # The probs score for the hypo_str; whether it's normalized by
                 # sequence length or not depends on normalize_scores, which is