Reformat Markdown code blocks (#12795)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com> Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
This commit is contained in:
parent
2af71d15a6
commit
fceea033ad
128 changed files with 1067 additions and 1018 deletions
|
|
@ -62,36 +62,36 @@ Without further ado, let's dive in!
|
|||
```python
|
||||
import datetime
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
import yaml
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from ultralytics import YOLO
|
||||
import yaml
|
||||
from sklearn.model_selection import KFold
|
||||
from ultralytics import YOLO
|
||||
```
|
||||
|
||||
2. Proceed to retrieve all label files for your dataset.
|
||||
|
||||
```python
|
||||
dataset_path = Path('./Fruit-detection') # replace with 'path/to/dataset' for your custom data
|
||||
labels = sorted(dataset_path.rglob("*labels/*.txt")) # all data in 'labels'
|
||||
dataset_path = Path("./Fruit-detection") # replace with 'path/to/dataset' for your custom data
|
||||
labels = sorted(dataset_path.rglob("*labels/*.txt")) # all data in 'labels'
|
||||
```
|
||||
|
||||
3. Now, read the contents of the dataset YAML file and extract the indices of the class labels.
|
||||
|
||||
```python
|
||||
yaml_file = 'path/to/data.yaml' # your data YAML with data directories and names dictionary
|
||||
with open(yaml_file, 'r', encoding="utf8") as y:
|
||||
classes = yaml.safe_load(y)['names']
|
||||
yaml_file = "path/to/data.yaml" # your data YAML with data directories and names dictionary
|
||||
with open(yaml_file, "r", encoding="utf8") as y:
|
||||
classes = yaml.safe_load(y)["names"]
|
||||
cls_idx = sorted(classes.keys())
|
||||
```
|
||||
|
||||
4. Initialize an empty `pandas` DataFrame.
|
||||
|
||||
```python
|
||||
indx = [l.stem for l in labels] # uses base filename as ID (no extension)
|
||||
indx = [l.stem for l in labels] # uses base filename as ID (no extension)
|
||||
labels_df = pd.DataFrame([], columns=cls_idx, index=indx)
|
||||
```
|
||||
|
||||
|
|
@ -101,16 +101,16 @@ Without further ado, let's dive in!
|
|||
for label in labels:
|
||||
lbl_counter = Counter()
|
||||
|
||||
with open(label,'r') as lf:
|
||||
with open(label, "r") as lf:
|
||||
lines = lf.readlines()
|
||||
|
||||
for l in lines:
|
||||
# classes for YOLO label uses integer at first position of each line
|
||||
lbl_counter[int(l.split(' ')[0])] += 1
|
||||
lbl_counter[int(l.split(" ")[0])] += 1
|
||||
|
||||
labels_df.loc[label.stem] = lbl_counter
|
||||
|
||||
labels_df = labels_df.fillna(0.0) # replace `nan` values with `0.0`
|
||||
labels_df = labels_df.fillna(0.0) # replace `nan` values with `0.0`
|
||||
```
|
||||
|
||||
6. The following is a sample view of the populated DataFrame:
|
||||
|
|
@ -142,7 +142,7 @@ The rows index the label files, each corresponding to an image in your dataset,
|
|||
|
||||
```python
|
||||
ksplit = 5
|
||||
kf = KFold(n_splits=ksplit, shuffle=True, random_state=20) # setting random_state for repeatable results
|
||||
kf = KFold(n_splits=ksplit, shuffle=True, random_state=20) # setting random_state for repeatable results
|
||||
|
||||
kfolds = list(kf.split(labels_df))
|
||||
```
|
||||
|
|
@ -150,12 +150,12 @@ The rows index the label files, each corresponding to an image in your dataset,
|
|||
2. The dataset has now been split into `k` folds, each having a list of `train` and `val` indices. We will construct a DataFrame to display these results more clearly.
|
||||
|
||||
```python
|
||||
folds = [f'split_{n}' for n in range(1, ksplit + 1)]
|
||||
folds = [f"split_{n}" for n in range(1, ksplit + 1)]
|
||||
folds_df = pd.DataFrame(index=indx, columns=folds)
|
||||
|
||||
for idx, (train, val) in enumerate(kfolds, start=1):
|
||||
folds_df[f'split_{idx}'].loc[labels_df.iloc[train].index] = 'train'
|
||||
folds_df[f'split_{idx}'].loc[labels_df.iloc[val].index] = 'val'
|
||||
folds_df[f"split_{idx}"].loc[labels_df.iloc[train].index] = "train"
|
||||
folds_df[f"split_{idx}"].loc[labels_df.iloc[val].index] = "val"
|
||||
```
|
||||
|
||||
3. Now we will calculate the distribution of class labels for each fold as a ratio of the classes present in `val` to those present in `train`.
|
||||
|
|
@ -168,8 +168,8 @@ The rows index the label files, each corresponding to an image in your dataset,
|
|||
val_totals = labels_df.iloc[val_indices].sum()
|
||||
|
||||
# To avoid division by zero, we add a small value (1E-7) to the denominator
|
||||
ratio = val_totals / (train_totals + 1E-7)
|
||||
fold_lbl_distrb.loc[f'split_{n}'] = ratio
|
||||
ratio = val_totals / (train_totals + 1e-7)
|
||||
fold_lbl_distrb.loc[f"split_{n}"] = ratio
|
||||
```
|
||||
|
||||
The ideal scenario is for all class ratios to be reasonably similar for each split and across classes. This, however, will be subject to the specifics of your dataset.
|
||||
|
|
@ -177,17 +177,17 @@ The rows index the label files, each corresponding to an image in your dataset,
|
|||
4. Next, we create the directories and dataset YAML files for each split.
|
||||
|
||||
```python
|
||||
supported_extensions = ['.jpg', '.jpeg', '.png']
|
||||
supported_extensions = [".jpg", ".jpeg", ".png"]
|
||||
|
||||
# Initialize an empty list to store image file paths
|
||||
images = []
|
||||
|
||||
# Loop through supported extensions and gather image files
|
||||
for ext in supported_extensions:
|
||||
images.extend(sorted((dataset_path / 'images').rglob(f"*{ext}")))
|
||||
images.extend(sorted((dataset_path / "images").rglob(f"*{ext}")))
|
||||
|
||||
# Create the necessary directories and dataset YAML files (unchanged)
|
||||
save_path = Path(dataset_path / f'{datetime.date.today().isoformat()}_{ksplit}-Fold_Cross-val')
|
||||
save_path = Path(dataset_path / f"{datetime.date.today().isoformat()}_{ksplit}-Fold_Cross-val")
|
||||
save_path.mkdir(parents=True, exist_ok=True)
|
||||
ds_yamls = []
|
||||
|
||||
|
|
@ -195,22 +195,25 @@ The rows index the label files, each corresponding to an image in your dataset,
|
|||
# Create directories
|
||||
split_dir = save_path / split
|
||||
split_dir.mkdir(parents=True, exist_ok=True)
|
||||
(split_dir / 'train' / 'images').mkdir(parents=True, exist_ok=True)
|
||||
(split_dir / 'train' / 'labels').mkdir(parents=True, exist_ok=True)
|
||||
(split_dir / 'val' / 'images').mkdir(parents=True, exist_ok=True)
|
||||
(split_dir / 'val' / 'labels').mkdir(parents=True, exist_ok=True)
|
||||
(split_dir / "train" / "images").mkdir(parents=True, exist_ok=True)
|
||||
(split_dir / "train" / "labels").mkdir(parents=True, exist_ok=True)
|
||||
(split_dir / "val" / "images").mkdir(parents=True, exist_ok=True)
|
||||
(split_dir / "val" / "labels").mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Create dataset YAML files
|
||||
dataset_yaml = split_dir / f'{split}_dataset.yaml'
|
||||
dataset_yaml = split_dir / f"{split}_dataset.yaml"
|
||||
ds_yamls.append(dataset_yaml)
|
||||
|
||||
with open(dataset_yaml, 'w') as ds_y:
|
||||
yaml.safe_dump({
|
||||
'path': split_dir.as_posix(),
|
||||
'train': 'train',
|
||||
'val': 'val',
|
||||
'names': classes
|
||||
}, ds_y)
|
||||
with open(dataset_yaml, "w") as ds_y:
|
||||
yaml.safe_dump(
|
||||
{
|
||||
"path": split_dir.as_posix(),
|
||||
"train": "train",
|
||||
"val": "val",
|
||||
"names": classes,
|
||||
},
|
||||
ds_y,
|
||||
)
|
||||
```
|
||||
|
||||
5. Lastly, copy images and labels into the respective directory ('train' or 'val') for each split.
|
||||
|
|
@ -221,8 +224,8 @@ The rows index the label files, each corresponding to an image in your dataset,
|
|||
for image, label in zip(images, labels):
|
||||
for split, k_split in folds_df.loc[image.stem].items():
|
||||
# Destination directory
|
||||
img_to_path = save_path / split / k_split / 'images'
|
||||
lbl_to_path = save_path / split / k_split / 'labels'
|
||||
img_to_path = save_path / split / k_split / "images"
|
||||
lbl_to_path = save_path / split / k_split / "labels"
|
||||
|
||||
# Copy image and label files to new directory (SamefileError if file already exists)
|
||||
shutil.copy(image, img_to_path / image.name)
|
||||
|
|
@ -243,8 +246,8 @@ fold_lbl_distrb.to_csv(save_path / "kfold_label_distribution.csv")
|
|||
1. First, load the YOLO model.
|
||||
|
||||
```python
|
||||
weights_path = 'path/to/weights.pt'
|
||||
model = YOLO(weights_path, task='detect')
|
||||
weights_path = "path/to/weights.pt"
|
||||
model = YOLO(weights_path, task="detect")
|
||||
```
|
||||
|
||||
2. Next, iterate over the dataset YAML files to run training. The results will be saved to a directory specified by the `project` and `name` arguments. By default, this directory is 'exp/runs#' where # is an integer index.
|
||||
|
|
@ -254,12 +257,12 @@ fold_lbl_distrb.to_csv(save_path / "kfold_label_distribution.csv")
|
|||
|
||||
# Define your additional arguments here
|
||||
batch = 16
|
||||
project = 'kfold_demo'
|
||||
project = "kfold_demo"
|
||||
epochs = 100
|
||||
|
||||
for k in range(ksplit):
|
||||
dataset_yaml = ds_yamls[k]
|
||||
model.train(data=dataset_yaml,epochs=epochs, batch=batch, project=project) # include any train arguments
|
||||
model.train(data=dataset_yaml, epochs=epochs, batch=batch, project=project) # include any train arguments
|
||||
results[k] = model.metrics # save output metrics for further analysis
|
||||
```
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue