From 533774290c06f7d5750ccf4c9d8fc2778b4df85a Mon Sep 17 00:00:00 2001 From: franklinOliveira Date: Tue, 15 Oct 2024 21:54:34 -0300 Subject: [PATCH] Optimize Example YOLO post-processing speed (#16821) Co-authored-by: UltralyticsAssistant Co-authored-by: Ultralytics Assistant <135830346+UltralyticsAssistant@users.noreply.github.com> Co-authored-by: Glenn Jocher --- .../YOLOv8-OpenCV-int8-tflite-Python/main.py | 46 +++++++++++-------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/examples/YOLOv8-OpenCV-int8-tflite-Python/main.py b/examples/YOLOv8-OpenCV-int8-tflite-Python/main.py index 70bccfa1..46d7fb42 100644 --- a/examples/YOLOv8-OpenCV-int8-tflite-Python/main.py +++ b/examples/YOLOv8-OpenCV-int8-tflite-Python/main.py @@ -188,38 +188,48 @@ class Yolov8TFLite: Returns: numpy.ndarray: The input image with detections drawn on it. """ + # Transpose predictions outside the loop + output = [np.transpose(pred) for pred in output] + boxes = [] scores = [] class_ids = [] - for pred in output: - pred = np.transpose(pred) - for box in pred: - x, y, w, h = box[:4] - x1 = x - w / 2 - y1 = y - h / 2 - boxes.append([x1, y1, w, h]) - idx = np.argmax(box[4:]) - scores.append(box[idx + 4]) - class_ids.append(idx) + # Vectorize extraction of bounding boxes, scores, and class IDs + for pred in output: + x, y, w, h = pred[:, 0], pred[:, 1], pred[:, 2], pred[:, 3] + x1 = x - w / 2 + y1 = y - h / 2 + boxes.extend(np.column_stack([x1, y1, w, h])) + + # Argmax and score extraction for all predictions at once + idx = np.argmax(pred[:, 4:], axis=1) + scores.extend(pred[np.arange(pred.shape[0]), idx + 4]) + class_ids.extend(idx) + + # Precompute gain and pad once + img_height, img_width = input_image.shape[:2] + gain = min(img_width / self.img_width, img_height / self.img_height) + pad = ( + round((img_width - self.img_width * gain) / 2 - 0.1), + round((img_height - self.img_height * gain) / 2 - 0.1), + ) + + # Non-Maximum Suppression (NMS) in one go indices = cv2.dnn.NMSBoxes(boxes, scores, self.confidence_thres, self.iou_thres) - for i in indices: - # Get the box, score, and class ID corresponding to the index + # Process selected indices + for i in indices.flatten(): box = boxes[i] - gain = min(img_width / self.img_width, img_height / self.img_height) - pad = ( - round((img_width - self.img_width * gain) / 2 - 0.1), - round((img_height - self.img_height * gain) / 2 - 0.1), - ) box[0] = (box[0] - pad[0]) / gain box[1] = (box[1] - pad[1]) / gain box[2] = box[2] / gain box[3] = box[3] / gain + score = scores[i] class_id = class_ids[i] + if score > 0.25: - print(box, score, class_id) # Draw the detection on the input image self.draw_detections(input_image, box, score, class_id)