diff --git a/nmt/train.py b/nmt/train.py index 75978ec4..a8077249 100644 --- a/nmt/train.py +++ b/nmt/train.py @@ -199,7 +199,8 @@ def run_full_eval(model_dir, infer_model, infer_sess, eval_model, eval_sess, def init_stats(): """Initialize statistics that we want to accumulate.""" return {"step_time": 0.0, "loss": 0.0, "predict_count": 0.0, - "total_count": 0.0, "grad_norm": 0.0} + "total_count": 0.0, "grad_norm": 0.0, + "checkpoint_total_samples": 0.0} def update_stats(stats, start_time, step_result): @@ -213,6 +214,7 @@ def update_stats(stats, start_time, step_result): stats["predict_count"] += step_predict_count stats["total_count"] += float(step_word_count) stats["grad_norm"] += grad_norm + stats["checkpoint_total_samples"] += float(batch_size) return global_step, learning_rate, step_summary @@ -220,9 +222,9 @@ def update_stats(stats, start_time, step_result): def print_step_info(prefix, global_step, info, result_summary, log_f): """Print all info at the current global step.""" utils.print_out( - "%sstep %d lr %g step-time %.2fs wps %.2fK ppl %.2f gN %.2f %s, %s" % + "%sstep %d lr %g step-time %.2fs wps %.2fK sps %5.2f ppl %.2f gN %.2f %s, %s" % (prefix, global_step, info["learning_rate"], info["avg_step_time"], - info["speed"], info["train_ppl"], info["avg_grad_norm"], result_summary, + info["speed"], info["speed_samples"], info["train_ppl"], info["avg_grad_norm"], result_summary, time.ctime()), log_f) @@ -234,6 +236,7 @@ def process_stats(stats, info, global_step, steps_per_stats, log_f): info["avg_grad_norm"] = stats["grad_norm"] / steps_per_stats info["train_ppl"] = utils.safe_exp(stats["loss"] / stats["predict_count"]) info["speed"] = stats["total_count"] / (1000 * stats["step_time"]) + info["speed_samples"] = stats["checkpoint_total_samples"] / stats["step_time"] # Check for overflow is_overflow = False @@ -253,7 +256,7 @@ def before_train(loaded_train_model, train_model, train_sess, global_step, info = {"train_ppl": 0.0, "speed": 0.0, "avg_step_time": 0.0, "avg_grad_norm": 0.0, "learning_rate": loaded_train_model.learning_rate.eval( - session=train_sess)} + session=train_sess), "speed_samples": 0.0} start_train_time = time.time() utils.print_out("# Start step %d, lr %g, %s" % (global_step, info["learning_rate"], time.ctime()), log_f)