From 25ae9da04607329387aeb68ad0b8424e6d624f82 Mon Sep 17 00:00:00 2001 From: 0x000011b <0x000011b@waifu.club> Date: Sun, 18 Dec 2022 22:24:26 -0300 Subject: [PATCH] fix: tensorboard getting messed up after epoch end --- training/colossalai/run_clm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/training/colossalai/run_clm.py b/training/colossalai/run_clm.py index 4c76c4b..b2e1bfe 100644 --- a/training/colossalai/run_clm.py +++ b/training/colossalai/run_clm.py @@ -577,8 +577,8 @@ def main(): train_perplexity = math.exp(loss) except OverflowError: train_perplexity = float("inf") - writer.add_scalar("Train/Perplexity (Step)", train_perplexity, step) - writer.add_scalar("Train/Loss (Step)", loss, step) + writer.add_scalar("Train/Perplexity (Step)", train_perplexity, global_step) + writer.add_scalar("Train/Loss (Step)", loss, global_step) if args.output_dir is not None and args.checkpointing_steps is not None: if args.checkpointing_steps != "epoch" and completed_steps % int(args.checkpointing_steps) == 0: