From af6b8e0825936b6cba90315144b0782d06aa787d Mon Sep 17 00:00:00 2001 From: Zae Myung Kim Date: Thu, 10 Jan 2019 15:01:35 +0900 Subject: [PATCH] Resolves [BUG] tot_len_ratio being inf when src.size() is zero Resolves ["[BUG] tot_len_ratio being inf when src.size() is zero"](https://github.com/clab/fast_align/issues/39) by skipping lines with zero length. --- src/fast_align.cc | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/fast_align.cc b/src/fast_align.cc index 637af26..6af918e 100644 --- a/src/fast_align.cc +++ b/src/fast_align.cc @@ -247,6 +247,7 @@ void InitialPass(const unsigned kNULL, const bool use_null, TTable* s2t, string line; bool flag = false; int lc = 0; + int skipped_lines = 0; cerr << "INITIAL PASS " << endl; while (true) { getline(in, line); @@ -259,7 +260,9 @@ void InitialPass(const unsigned kNULL, const bool use_null, TTable* s2t, if (is_reverse) swap(src, trg); if (src.size() == 0 || trg.size() == 0) { - cerr << "Error in line " << lc << "\n" << line << endl; + cerr << "Error in line " << lc << ". Skipped.\n" << line << endl; + skipped_lines++; + continue; } *tot_len_ratio += static_cast(trg.size()) / static_cast(src.size()); *n_target_tokens += trg.size(); @@ -288,10 +291,11 @@ void InitialPass(const unsigned kNULL, const bool use_null, TTable* s2t, } AddTranslationOptions(insert_buffer, s2t); - mean_srclen_multiplier = (*tot_len_ratio) / lc; + mean_srclen_multiplier = (*tot_len_ratio) / (lc - skipped_lines); if (flag) { cerr << endl; } + cerr << "number of skipped lines = " << skipped_lines << endl; cerr << "expected target length = source length * " << mean_srclen_multiplier << endl; }