Reformat Markdown code blocks (#12795)

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
2024-05-18 18:58:06 +02:00 · 2024-05-18 18:58:06 +02:00 · fceea033ad
commit fceea033ad
parent 2af71d15a6
128 changed files with 1067 additions and 1018 deletions
--- a/docs/en/guides/kfold-cross-validation.md
+++ b/docs/en/guides/kfold-cross-validation.md
@ -62,36 +62,36 @@ Without further ado, let's dive in!
    ```python
    import datetime
    import shutil
-    from pathlib import Path
    from collections import Counter
+    from pathlib import Path

-    import yaml
    import numpy as np
    import pandas as pd
-    from ultralytics import YOLO
+    import yaml
    from sklearn.model_selection import KFold
+    from ultralytics import YOLO
    ```

 2. Proceed to retrieve all label files for your dataset.

    ```python
-    dataset_path = Path('./Fruit-detection') # replace with 'path/to/dataset' for your custom data
-    labels = sorted(dataset_path.rglob("*labels/*.txt")) # all data in 'labels'
+    dataset_path = Path("./Fruit-detection")  # replace with 'path/to/dataset' for your custom data
+    labels = sorted(dataset_path.rglob("*labels/*.txt"))  # all data in 'labels'
    ```

 3. Now, read the contents of the dataset YAML file and extract the indices of the class labels.

    ```python
-    yaml_file = 'path/to/data.yaml'  # your data YAML with data directories and names dictionary
-    with open(yaml_file, 'r', encoding="utf8") as y:
-        classes = yaml.safe_load(y)['names']
+    yaml_file = "path/to/data.yaml"  # your data YAML with data directories and names dictionary
+    with open(yaml_file, "r", encoding="utf8") as y:
+        classes = yaml.safe_load(y)["names"]
    cls_idx = sorted(classes.keys())
    ```

 4. Initialize an empty `pandas` DataFrame.

    ```python
-    indx = [l.stem for l in labels] # uses base filename as ID (no extension)
+    indx = [l.stem for l in labels]  # uses base filename as ID (no extension)
    labels_df = pd.DataFrame([], columns=cls_idx, index=indx)
    ```

@ -101,16 +101,16 @@ Without further ado, let's dive in!
    for label in labels:
        lbl_counter = Counter()

-        with open(label,'r') as lf:
+        with open(label, "r") as lf:
            lines = lf.readlines()

        for l in lines:
            # classes for YOLO label uses integer at first position of each line
-            lbl_counter[int(l.split(' ')[0])] += 1
+            lbl_counter[int(l.split(" ")[0])] += 1

        labels_df.loc[label.stem] = lbl_counter

-    labels_df = labels_df.fillna(0.0) # replace `nan` values with `0.0`
+    labels_df = labels_df.fillna(0.0)  # replace `nan` values with `0.0`
    ```

 6. The following is a sample view of the populated DataFrame:
@ -142,7 +142,7 @@ The rows index the label files, each corresponding to an image in your dataset,

    ```python
    ksplit = 5
-    kf = KFold(n_splits=ksplit, shuffle=True, random_state=20)   # setting random_state for repeatable results
+    kf = KFold(n_splits=ksplit, shuffle=True, random_state=20)  # setting random_state for repeatable results

    kfolds = list(kf.split(labels_df))
    ```
@ -150,12 +150,12 @@ The rows index the label files, each corresponding to an image in your dataset,
 2. The dataset has now been split into `k` folds, each having a list of `train` and `val` indices. We will construct a DataFrame to display these results more clearly.

    ```python
-    folds = [f'split_{n}' for n in range(1, ksplit + 1)]
+    folds = [f"split_{n}" for n in range(1, ksplit + 1)]
    folds_df = pd.DataFrame(index=indx, columns=folds)

    for idx, (train, val) in enumerate(kfolds, start=1):
-        folds_df[f'split_{idx}'].loc[labels_df.iloc[train].index] = 'train'
-        folds_df[f'split_{idx}'].loc[labels_df.iloc[val].index] = 'val'
+        folds_df[f"split_{idx}"].loc[labels_df.iloc[train].index] = "train"
+        folds_df[f"split_{idx}"].loc[labels_df.iloc[val].index] = "val"
    ```

 3. Now we will calculate the distribution of class labels for each fold as a ratio of the classes present in `val` to those present in `train`.
@ -168,8 +168,8 @@ The rows index the label files, each corresponding to an image in your dataset,
        val_totals = labels_df.iloc[val_indices].sum()

        # To avoid division by zero, we add a small value (1E-7) to the denominator
-        ratio = val_totals / (train_totals + 1E-7)
-        fold_lbl_distrb.loc[f'split_{n}'] = ratio
+        ratio = val_totals / (train_totals + 1e-7)
+        fold_lbl_distrb.loc[f"split_{n}"] = ratio
    ```

   The ideal scenario is for all class ratios to be reasonably similar for each split and across classes. This, however, will be subject to the specifics of your dataset.
@ -177,17 +177,17 @@ The rows index the label files, each corresponding to an image in your dataset,
 4. Next, we create the directories and dataset YAML files for each split.

    ```python
-    supported_extensions = ['.jpg', '.jpeg', '.png']
+    supported_extensions = [".jpg", ".jpeg", ".png"]

    # Initialize an empty list to store image file paths
    images = []

    # Loop through supported extensions and gather image files
    for ext in supported_extensions:
-        images.extend(sorted((dataset_path / 'images').rglob(f"*{ext}")))
+        images.extend(sorted((dataset_path / "images").rglob(f"*{ext}")))

    # Create the necessary directories and dataset YAML files (unchanged)
-    save_path = Path(dataset_path / f'{datetime.date.today().isoformat()}_{ksplit}-Fold_Cross-val')
+    save_path = Path(dataset_path / f"{datetime.date.today().isoformat()}_{ksplit}-Fold_Cross-val")
    save_path.mkdir(parents=True, exist_ok=True)
    ds_yamls = []

@ -195,22 +195,25 @@ The rows index the label files, each corresponding to an image in your dataset,
        # Create directories
        split_dir = save_path / split
        split_dir.mkdir(parents=True, exist_ok=True)
-        (split_dir / 'train' / 'images').mkdir(parents=True, exist_ok=True)
-        (split_dir / 'train' / 'labels').mkdir(parents=True, exist_ok=True)
-        (split_dir / 'val' / 'images').mkdir(parents=True, exist_ok=True)
-        (split_dir / 'val' / 'labels').mkdir(parents=True, exist_ok=True)
+        (split_dir / "train" / "images").mkdir(parents=True, exist_ok=True)
+        (split_dir / "train" / "labels").mkdir(parents=True, exist_ok=True)
+        (split_dir / "val" / "images").mkdir(parents=True, exist_ok=True)
+        (split_dir / "val" / "labels").mkdir(parents=True, exist_ok=True)

        # Create dataset YAML files
-        dataset_yaml = split_dir / f'{split}_dataset.yaml'
+        dataset_yaml = split_dir / f"{split}_dataset.yaml"
        ds_yamls.append(dataset_yaml)

-        with open(dataset_yaml, 'w') as ds_y:
-            yaml.safe_dump({
-                'path': split_dir.as_posix(),
-                'train': 'train',
-                'val': 'val',
-                'names': classes
-            }, ds_y)
+        with open(dataset_yaml, "w") as ds_y:
+            yaml.safe_dump(
+                {
+                    "path": split_dir.as_posix(),
+                    "train": "train",
+                    "val": "val",
+                    "names": classes,
+                },
+                ds_y,
+            )
    ```

 5. Lastly, copy images and labels into the respective directory ('train' or 'val') for each split.
@ -221,8 +224,8 @@ The rows index the label files, each corresponding to an image in your dataset,
    for image, label in zip(images, labels):
        for split, k_split in folds_df.loc[image.stem].items():
            # Destination directory
-            img_to_path = save_path / split / k_split / 'images'
-            lbl_to_path = save_path / split / k_split / 'labels'
+            img_to_path = save_path / split / k_split / "images"
+            lbl_to_path = save_path / split / k_split / "labels"

            # Copy image and label files to new directory (SamefileError if file already exists)
            shutil.copy(image, img_to_path / image.name)
@ -243,8 +246,8 @@ fold_lbl_distrb.to_csv(save_path / "kfold_label_distribution.csv")
 1. First, load the YOLO model.

    ```python
-    weights_path = 'path/to/weights.pt'
-    model = YOLO(weights_path, task='detect')
+    weights_path = "path/to/weights.pt"
+    model = YOLO(weights_path, task="detect")
    ```

 2. Next, iterate over the dataset YAML files to run training. The results will be saved to a directory specified by the `project` and `name` arguments. By default, this directory is 'exp/runs#' where # is an integer index.
@ -254,12 +257,12 @@ fold_lbl_distrb.to_csv(save_path / "kfold_label_distribution.csv")

    # Define your additional arguments here
    batch = 16
-    project = 'kfold_demo'
+    project = "kfold_demo"
    epochs = 100

    for k in range(ksplit):
        dataset_yaml = ds_yamls[k]
-        model.train(data=dataset_yaml,epochs=epochs, batch=batch, project=project)  # include any train arguments
+        model.train(data=dataset_yaml, epochs=epochs, batch=batch, project=project)  # include any train arguments
        results[k] = model.metrics  # save output metrics for further analysis
    ```