Introduced BaseSolution class for Ultralytics solutions (#16671)

Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
2024-10-05 03:19:36 +05:00 · 2024-10-05 03:19:36 +05:00 · 70ba988c68
commit 70ba988c68
parent e5d3427a52
6 changed files with 270 additions and 298 deletions
--- a/ultralytics/solutions/object_counter.py
+++ b/ultralytics/solutions/object_counter.py
@ -1,243 +1,129 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license

-from collections import defaultdict
+from shapely.geometry import LineString, Point

-import cv2
-
-from ultralytics.utils.checks import check_imshow, check_requirements
+from ultralytics.solutions.solutions import BaseSolution  # Import a parent class
 from ultralytics.utils.plotting import Annotator, colors

-check_requirements("shapely>=2.0.0")

-from shapely.geometry import LineString, Point, Polygon
-
-
-class ObjectCounter:
+class ObjectCounter(BaseSolution):
    """A class to manage the counting of objects in a real-time video stream based on their tracks."""

-    def __init__(
-        self,
-        names,
-        reg_pts=None,
-        line_thickness=2,
-        view_img=False,
-        view_in_counts=True,
-        view_out_counts=True,
-        draw_tracks=False,
-    ):
+    def __init__(self, **kwargs):
+        """Initialization function for Count class, a child class of BaseSolution class, can be used for counting the
+        objects.
        """
-        Initializes the ObjectCounter with various tracking and counting parameters.
+        super().__init__(**kwargs)
+
+        self.in_count = 0  # Counter for objects moving inward
+        self.out_count = 0  # Counter for objects moving outward
+        self.counted_ids = []  # List of IDs of objects that have been counted
+        self.classwise_counts = {}  # Dictionary for counts, categorized by object class
+
+        self.initialize_region()  # Setup region and counting areas
+
+        self.show_in = self.CFG["show_in"]
+        self.show_out = self.CFG["show_out"]
+
+    def count_objects(self, track_line, box, track_id, prev_position, cls):
+        """
+        Helper function to count objects within a polygonal region.

        Args:
-            names (dict): Dictionary of class names.
-            reg_pts (list): List of points defining the counting region.
-            line_thickness (int): Line thickness for bounding boxes.
-            view_img (bool): Flag to control whether to display the video stream.
-            view_in_counts (bool): Flag to control whether to display the in counts on the video stream.
-            view_out_counts (bool): Flag to control whether to display the out counts on the video stream.
-            draw_tracks (bool): Flag to control whether to draw the object tracks.
+            track_line (dict): last 30 frame track record
+            box (list): Bounding box data for specific track in current frame
+            track_id (int): track ID of the object
+            prev_position (tuple): last frame position coordinates of the track
+            cls (int): Class index for classwise count updates
        """
-        # Mouse events
-        self.is_drawing = False
-        self.selected_point = None
+        if prev_position is None or track_id in self.counted_ids:
+            return

-        # Region & Line Information
-        self.reg_pts = [(20, 400), (1260, 400)] if reg_pts is None else reg_pts
-        self.counting_region = None
+        centroid = self.r_s.centroid
+        dx = (box[0] - prev_position[0]) * (centroid.x - prev_position[0])
+        dy = (box[1] - prev_position[1]) * (centroid.y - prev_position[1])

-        # Image and annotation Information
-        self.im0 = None
-        self.tf = line_thickness
-        self.view_img = view_img
-        self.view_in_counts = view_in_counts
-        self.view_out_counts = view_out_counts
+        if len(self.region) >= 3 and self.r_s.contains(Point(track_line[-1])):
+            self.counted_ids.append(track_id)
+            # For polygon region
+            if dx > 0:
+                self.in_count += 1
+                self.classwise_counts[self.names[cls]]["IN"] += 1
+            else:
+                self.out_count += 1
+                self.classwise_counts[self.names[cls]]["OUT"] += 1

-        self.names = names  # Classes names
-        self.window_name = "Ultralytics YOLOv8 Object Counter"
+        elif len(self.region) < 3 and LineString([prev_position, box[:2]]).intersects(self.l_s):
+            self.counted_ids.append(track_id)
+            # For linear region
+            if dx > 0 and dy > 0:
+                self.in_count += 1
+                self.classwise_counts[self.names[cls]]["IN"] += 1
+            else:
+                self.out_count += 1
+                self.classwise_counts[self.names[cls]]["OUT"] += 1

-        # Object counting Information
-        self.in_counts = 0
-        self.out_counts = 0
-        self.count_ids = []
-        self.class_wise_count = {}
-
-        # Tracks info
-        self.track_history = defaultdict(list)
-        self.draw_tracks = draw_tracks
-
-        # Check if environment supports imshow
-        self.env_check = check_imshow(warn=True)
-
-        # Initialize counting region
-        if len(self.reg_pts) == 2:
-            print("Line Counter Initiated.")
-            self.counting_region = LineString(self.reg_pts)
-        elif len(self.reg_pts) >= 3:
-            print("Polygon Counter Initiated.")
-            self.counting_region = Polygon(self.reg_pts)
-        else:
-            print("Invalid Region points provided, region_points must be 2 for lines or >= 3 for polygons.")
-            print("Using Line Counter Now")
-            self.counting_region = LineString(self.reg_pts)
-
-        # Define the counting line segment
-        self.counting_line_segment = LineString(
-            [
-                (self.reg_pts[0][0], self.reg_pts[0][1]),
-                (self.reg_pts[1][0], self.reg_pts[1][1]),
-            ]
-        )
-
-    def mouse_event_for_region(self, event, x, y, flags, params):
+    def store_classwise_counts(self, cls):
        """
-        Handles mouse events for defining and moving the counting region in a real-time video stream.
+        Initialize class-wise counts if not already present.

        Args:
-            event (int): The type of mouse event (e.g., cv2.EVENT_MOUSEMOVE, cv2.EVENT_LBUTTONDOWN, etc.).
-            x (int): The x-coordinate of the mouse pointer.
-            y (int): The y-coordinate of the mouse pointer.
-            flags (int): Any associated event flags (e.g., cv2.EVENT_FLAG_CTRLKEY,  cv2.EVENT_FLAG_SHIFTKEY, etc.).
-            params (dict): Additional parameters for the function.
+            cls (int): Class index for classwise count updates
        """
-        if event == cv2.EVENT_LBUTTONDOWN:
-            for i, point in enumerate(self.reg_pts):
-                if (
-                    isinstance(point, (tuple, list))
-                    and len(point) >= 2
-                    and (abs(x - point[0]) < 10 and abs(y - point[1]) < 10)
-                ):
-                    self.selected_point = i
-                    self.is_drawing = True
-                    break
+        if self.names[cls] not in self.classwise_counts:
+            self.classwise_counts[self.names[cls]] = {"IN": 0, "OUT": 0}

-        elif event == cv2.EVENT_MOUSEMOVE:
-            if self.is_drawing and self.selected_point is not None:
-                self.reg_pts[self.selected_point] = (x, y)
-                self.counting_region = Polygon(self.reg_pts)
+    def display_counts(self, im0):
+        """
+        Helper function to display object counts on the frame.

-        elif event == cv2.EVENT_LBUTTONUP:
-            self.is_drawing = False
-            self.selected_point = None
-
-    def extract_and_process_tracks(self, tracks):
-        """Extracts and processes tracks for object counting in a video stream."""
-        # Annotator Init and region drawing
-        annotator = Annotator(self.im0, self.tf, self.names)
-
-        # Draw region or line
-        annotator.draw_region(reg_pts=self.reg_pts, color=(104, 0, 123), thickness=self.tf * 2)
-
-        # Extract tracks for OBB or object detection
-        track_data = tracks[0].obb or tracks[0].boxes
-
-        if track_data and track_data.id is not None:
-            boxes = track_data.xyxy.cpu()
-            clss = track_data.cls.cpu().tolist()
-            track_ids = track_data.id.int().cpu().tolist()
-
-            # Extract tracks
-            for box, track_id, cls in zip(boxes, track_ids, clss):
-                # Draw bounding box
-                annotator.box_label(box, label=self.names[cls], color=colors(int(track_id), True))
-
-                # Store class info
-                if self.names[cls] not in self.class_wise_count:
-                    self.class_wise_count[self.names[cls]] = {"IN": 0, "OUT": 0}
-
-                # Draw Tracks
-                track_line = self.track_history[track_id]
-                track_line.append((float((box[0] + box[2]) / 2), float((box[1] + box[3]) / 2)))
-                if len(track_line) > 30:
-                    track_line.pop(0)
-
-                # Draw track trails
-                if self.draw_tracks:
-                    annotator.draw_centroid_and_tracks(
-                        track_line,
-                        color=colors(int(track_id), True),
-                        track_thickness=self.tf,
-                    )
-
-                prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None
-
-                # Count objects in any polygon
-                if len(self.reg_pts) >= 3:
-                    is_inside = self.counting_region.contains(Point(track_line[-1]))
-
-                    if prev_position is not None and is_inside and track_id not in self.count_ids:
-                        self.count_ids.append(track_id)
-
-                        if (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0]) > 0:
-                            self.in_counts += 1
-                            self.class_wise_count[self.names[cls]]["IN"] += 1
-                        else:
-                            self.out_counts += 1
-                            self.class_wise_count[self.names[cls]]["OUT"] += 1
-
-                # Count objects using line
-                elif len(self.reg_pts) == 2:
-                    if (
-                        prev_position is not None
-                        and track_id not in self.count_ids
-                        and LineString([(prev_position[0], prev_position[1]), (box[0], box[1])]).intersects(
-                            self.counting_line_segment
-                        )
-                    ):
-                        self.count_ids.append(track_id)
-
-                        # Determine the direction of movement (IN or OUT)
-                        dx = (box[0] - prev_position[0]) * (self.counting_region.centroid.x - prev_position[0])
-                        dy = (box[1] - prev_position[1]) * (self.counting_region.centroid.y - prev_position[1])
-                        if dx > 0 and dy > 0:
-                            self.in_counts += 1
-                            self.class_wise_count[self.names[cls]]["IN"] += 1
-                        else:
-                            self.out_counts += 1
-                            self.class_wise_count[self.names[cls]]["OUT"] += 1
-
-        labels_dict = {}
-
-        for key, value in self.class_wise_count.items():
-            if value["IN"] != 0 or value["OUT"] != 0:
-                if not self.view_in_counts and not self.view_out_counts:
-                    continue
-                elif not self.view_in_counts:
-                    labels_dict[str.capitalize(key)] = f"OUT {value['OUT']}"
-                elif not self.view_out_counts:
-                    labels_dict[str.capitalize(key)] = f"IN {value['IN']}"
-                else:
-                    labels_dict[str.capitalize(key)] = f"IN {value['IN']} OUT {value['OUT']}"
+        Args:
+            im0 (ndarray): The input image or frame
+        """
+        labels_dict = {
+            str.capitalize(key): f"{'IN ' + str(value['IN']) if self.show_in else ''} "
+            f"{'OUT ' + str(value['OUT']) if self.show_out else ''}".strip()
+            for key, value in self.classwise_counts.items()
+            if value["IN"] != 0 or value["OUT"] != 0
+        }

        if labels_dict:
-            annotator.display_analytics(self.im0, labels_dict, (104, 31, 17), (255, 255, 255), 10)
+            self.annotator.display_analytics(im0, labels_dict, (104, 31, 17), (255, 255, 255), 10)

-    def display_frames(self):
-        """Displays the current frame with annotations and regions in a window."""
-        if self.env_check:
-            cv2.namedWindow(self.window_name)
-            if len(self.reg_pts) == 4:  # only add mouse event If user drawn region
-                cv2.setMouseCallback(self.window_name, self.mouse_event_for_region, {"region_points": self.reg_pts})
-            cv2.imshow(self.window_name, self.im0)
-            # Break Window
-            if cv2.waitKey(1) & 0xFF == ord("q"):
-                return
-
-    def start_counting(self, im0, tracks):
+    def count(self, im0):
        """
-        Main function to start the object counting process.
+        Processes input data (frames or object tracks) and updates counts.

        Args:
-            im0 (ndarray): Current frame from the video stream.
-            tracks (list): List of tracks obtained from the object tracking process.
+            im0 (ndarray): The input image that will be used for processing
+        Returns
+            im0 (ndarray): The processed image for more usage
        """
-        self.im0 = im0  # store image
-        self.extract_and_process_tracks(tracks)  # draw region even if no objects
+        self.annotator = Annotator(im0, line_width=self.line_width)  # Initialize annotator
+        self.extract_tracks(im0)  # Extract tracks

-        if self.view_img:
-            self.display_frames()
-        return self.im0
+        self.annotator.draw_region(
+            reg_pts=self.region, color=(104, 0, 123), thickness=self.line_width * 2
+        )  # Draw region

+        # Iterate over bounding boxes, track ids and classes index
+        if self.track_data is not None and self.track_data.id is not None:
+            for box, track_id, cls in zip(self.boxes, self.track_ids, self.clss):
+                # Draw bounding box and counting region
+                self.annotator.box_label(box, label=self.names[cls], color=colors(track_id, True))
+                self.store_tracking_history(track_id, box)  # Store track history
+                self.store_classwise_counts(cls)  # store classwise counts in dict

-if __name__ == "__main__":
-    classes_names = {0: "person", 1: "car"}  # example class names
-    ObjectCounter(classes_names)
+                # Draw centroid of objects
+                self.annotator.draw_centroid_and_tracks(
+                    self.track_line, color=colors(int(track_id), True), track_thickness=self.line_width
+                )
+
+                # store previous position of track for object counting
+                prev_position = self.track_history[track_id][-2] if len(self.track_history[track_id]) > 1 else None
+                self.count_objects(self.track_line, box, track_id, prev_position, cls)  # Perform object counting
+
+        self.display_counts(im0)  # Display the counts on the frame
+        self.display_output(im0)  # display output with base class function
+
+        return im0  # return output image for more usage