From 2f9a60438798f6139464cbdfad910917e2375026 Mon Sep 17 00:00:00 2001 From: Frank Ruis Date: Fri, 3 May 2024 23:00:18 +0200 Subject: [PATCH] fix double zero_grad call messing up gradient accumulation (#11217) Co-authored-by: Glenn Jocher --- ultralytics/engine/trainer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py index 233ce40b..05c56dea 100644 --- a/ultralytics/engine/trainer.py +++ b/ultralytics/engine/trainer.py @@ -329,6 +329,7 @@ class BaseTrainer: base_idx = (self.epochs - self.args.close_mosaic) * nb self.plot_idx.extend([base_idx, base_idx + 1, base_idx + 2]) epoch = self.start_epoch + self.optimizer.zero_grad() # zero any resumed gradients to ensure stability on train start while True: self.epoch = epoch self.run_callbacks("on_train_epoch_start") @@ -349,7 +350,6 @@ class BaseTrainer: LOGGER.info(self.progress_string()) pbar = TQDM(enumerate(self.train_loader), total=nb) self.tloss = None - self.optimizer.zero_grad() for i, batch in pbar: self.run_callbacks("on_train_batch_start") # Warmup