From f4e7756bff5c1b0c246afb491f42e9f9de84dc84 Mon Sep 17 00:00:00 2001
From: Laughing <61612323+Laughing-q@users.noreply.github.com>
Date: Thu, 31 Oct 2024 20:36:27 +0800
Subject: [PATCH] `ultralytics 8.3.26` EdgeTPU Pose models fix (#17281)

Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
---
 ultralytics/__init__.py        |  2 +-
 ultralytics/nn/autobackend.py  |  3 +++
 ultralytics/nn/modules/head.py | 18 +++++++++++++++---
 3 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index c847dd4d..fedf8629 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 
-__version__ = "8.3.25"
+__version__ = "8.3.26"
 
 import os
 
diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py
index 245e42c4..cef05a35 100644
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@@ -663,6 +663,9 @@ class AutoBackend(nn.Module):
                         else:
                             x[:, [0, 2]] *= w
                             x[:, [1, 3]] *= h
+                            if self.task == "pose":
+                                x[:, 5::3] *= w
+                                x[:, 6::3] *= h
                     y.append(x)
             # TF segment fixes: export is reversed vs ONNX export and protos are transposed
             if len(y) == 2:  # segment with (det, proto) output order reversed
diff --git a/ultralytics/nn/modules/head.py b/ultralytics/nn/modules/head.py
index 4bc1fa25..84c31709 100644
--- a/ultralytics/nn/modules/head.py
+++ b/ultralytics/nn/modules/head.py
@@ -246,9 +246,21 @@ class Pose(Detect):
     def kpts_decode(self, bs, kpts):
         """Decodes keypoints."""
         ndim = self.kpt_shape[1]
-        if self.export:  # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
-            y = kpts.view(bs, *self.kpt_shape, -1)
-            a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
+        if self.export:
+            if self.format in {
+                "tflite",
+                "edgetpu",
+            }:  # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
+                # Precompute normalization factor to increase numerical stability
+                y = kpts.view(bs, *self.kpt_shape, -1)
+                grid_h, grid_w = self.shape[2], self.shape[3]
+                grid_size = torch.tensor([grid_w, grid_h], device=y.device).reshape(1, 2, 1)
+                norm = self.strides / (self.stride[0] * grid_size)
+                a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * norm
+            else:
+                # NCNN fix
+                y = kpts.view(bs, *self.kpt_shape, -1)
+                a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
             if ndim == 3:
                 a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
             return a.view(bs, self.nk, -1)