Add Segment masks to YOLO-Seg labels converter (#14557)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com> Co-authored-by: Glenn Jocher <glenn.jocher@ultralytics.com>
2024-08-01 20:28:53 +05:00 · 2024-08-01 20:28:53 +05:00 · 16fc325308
commit 16fc325308
parent 9ff33d67b6
4 changed files with 102 additions and 0 deletions
--- a/docs/en/reference/data/converter.md
+++ b/docs/en/reference/data/converter.md
@ -23,6 +23,10 @@ keywords: Ultralytics, data conversion, YOLO models, COCO, DOTA, YOLO bbox2segme

 <br><br><hr><br>

+## ::: ultralytics.data.converter.convert_segment_masks_to_yolo_seg
+
+<br><br><hr><br>
+
 ## ::: ultralytics.data.converter.convert_dota_to_yolo_obb

 <br><br><hr><br>
--- a/docs/en/usage/simple-utilities.md
+++ b/docs/en/usage/simple-utilities.md
@ -51,6 +51,22 @@ auto_annotate(  # (1)!

 - Use in combination with the [function `segments2boxes`](#convert-segments-to-bounding-boxes) to generate object detection bounding boxes as well

+### Convert Segmentation Masks into YOLO Format
+
+![Segmentation Masks to YOLO Format](https://github.com/user-attachments/assets/1a823fc1-f3a1-4dd5-83e7-0b209df06fc3)
+
+Use to convert a dataset of segmentation mask images to the `YOLO` segmentation format.
+This function takes the directory containing the binary format mask images and converts them into YOLO segmentation format.
+
+The converted masks will be saved in the specified output directory.
+
+```python
+from ultralytics.data.converter import convert_segment_masks_to_yolo_seg
+
+# For COCO dataset we have 80 classes
+convert_segment_masks_to_yolo_seg(masks_dir="path/to/masks_dir", output_dir="path/to/output_dir", classes=80)
+```
+
 ### Convert COCO into YOLO Format

 Use to convert COCO JSON annotations into proper YOLO format. For object detection (bounding box) datasets, `use_segments` and `use_keypoints` should both be `False`
--- a/docs/mkdocs_github_authors.yaml
+++ b/docs/mkdocs_github_authors.yaml
@ -1,3 +1,4 @@
+116908874+jk4e@users.noreply.github.com: jk4e
 1185102784@qq.com: Laughing-q
 130829914+IvorZhu331@users.noreply.github.com: IvorZhu331
 135830346+UltralyticsAssistant@users.noreply.github.com: UltralyticsAssistant
--- a/ultralytics/data/converter.py
+++ b/ultralytics/data/converter.py
@ -334,6 +334,87 @@ def convert_coco(
    LOGGER.info(f"{'LVIS' if lvis else 'COCO'} data converted successfully.\nResults saved to {save_dir.resolve()}")


+def convert_segment_masks_to_yolo_seg(masks_dir, output_dir, classes):
+    """
+    Converts a dataset of segmentation mask images to the YOLO segmentation format.
+
+    This function takes the directory containing the binary format mask images and converts them into YOLO segmentation format.
+    The converted masks are saved in the specified output directory.
+
+    Args:
+        masks_dir (str): The path to the directory where all mask images (png, jpg) are stored.
+        output_dir (str): The path to the directory where the converted YOLO segmentation masks will be stored.
+        classes (int): Total classes in the dataset i.e for COCO classes=80
+
+    Example:
+        ```python
+        from ultralytics.data.converter import convert_segment_masks_to_yolo_seg
+
+        # for coco dataset, we have 80 classes
+        convert_segment_masks_to_yolo_seg('path/to/masks_directory', 'path/to/output/directory', classes=80)
+        ```
+
+    Notes:
+        The expected directory structure for the masks is:
+
+            - masks
+                ├─ mask_image_01.png or mask_image_01.jpg
+                ├─ mask_image_02.png or mask_image_02.jpg
+                ├─ mask_image_03.png or mask_image_03.jpg
+                └─ mask_image_04.png or mask_image_04.jpg
+
+        After execution, the labels will be organized in the following structure:
+
+            - output_dir
+                ├─ mask_yolo_01.txt
+                ├─ mask_yolo_02.txt
+                ├─ mask_yolo_03.txt
+                └─ mask_yolo_04.txt
+    """
+    import os
+
+    pixel_to_class_mapping = {i + 1: i for i in range(80)}
+    for mask_filename in os.listdir(masks_dir):
+        if mask_filename.endswith(".png"):
+            mask_path = os.path.join(masks_dir, mask_filename)
+            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)  # Read the mask image in grayscale
+            img_height, img_width = mask.shape  # Get image dimensions
+            LOGGER.info(f"Processing {mask_path} imgsz = {img_height} x {img_width}")
+
+            unique_values = np.unique(mask)  # Get unique pixel values representing different classes
+            yolo_format_data = []
+
+            for value in unique_values:
+                if value == 0:
+                    continue  # Skip background
+                class_index = pixel_to_class_mapping.get(value, -1)
+                if class_index == -1:
+                    LOGGER.warning(f"Unknown class for pixel value {value} in file {mask_filename}, skipping.")
+                    continue
+
+                # Create a binary mask for the current class and find contours
+                contours, _ = cv2.findContours(
+                    (mask == value).astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+                )  # Find contours
+
+                for contour in contours:
+                    if len(contour) >= 3:  # YOLO requires at least 3 points for a valid segmentation
+                        contour = contour.squeeze()  # Remove single-dimensional entries
+                        yolo_format = [class_index]
+                        for point in contour:
+                            # Normalize the coordinates
+                            yolo_format.append(round(point[0] / img_width, 6))  # Rounding to 6 decimal places
+                            yolo_format.append(round(point[1] / img_height, 6))
+                        yolo_format_data.append(yolo_format)
+            # Save Ultralytics YOLO format data to file
+            output_path = os.path.join(output_dir, os.path.splitext(mask_filename)[0] + ".txt")
+            with open(output_path, "w") as file:
+                for item in yolo_format_data:
+                    line = " ".join(map(str, item))
+                    file.write(line + "\n")
+            LOGGER.info(f"Processed and stored at {output_path} imgsz = {img_height} x {img_width}")
+
+
 def convert_dota_to_yolo_obb(dota_root_path: str):
    """
    Converts DOTA dataset annotations to YOLO OBB (Oriented Bounding Box) format.