ultralytics 8.0.217 HUB YAML path improvements (#6556)
Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
This commit is contained in:
parent
8f1c3f3d1e
commit
40a349bceb
3 changed files with 33 additions and 33 deletions
|
|
@ -18,7 +18,7 @@ from PIL import Image, ImageOps
|
|||
|
||||
from ultralytics.nn.autobackend import check_class_names
|
||||
from ultralytics.utils import (DATASETS_DIR, LOGGER, NUM_THREADS, ROOT, SETTINGS_YAML, TQDM, clean_url, colorstr,
|
||||
emojis, yaml_load)
|
||||
emojis, yaml_load, yaml_save)
|
||||
from ultralytics.utils.checks import check_file, check_font, is_ascii
|
||||
from ultralytics.utils.downloads import download, safe_download, unzip_file
|
||||
from ultralytics.utils.ops import segments2boxes
|
||||
|
|
@ -250,28 +250,26 @@ def check_det_dataset(dataset, autodownload=True):
|
|||
(dict): Parsed dataset information and paths.
|
||||
"""
|
||||
|
||||
data = check_file(dataset)
|
||||
file = check_file(dataset)
|
||||
|
||||
# Download (optional)
|
||||
extract_dir = ''
|
||||
if isinstance(data, (str, Path)) and (zipfile.is_zipfile(data) or is_tarfile(data)):
|
||||
new_dir = safe_download(data, dir=DATASETS_DIR, unzip=True, delete=False)
|
||||
data = find_dataset_yaml(DATASETS_DIR / new_dir)
|
||||
extract_dir, autodownload = data.parent, False
|
||||
if zipfile.is_zipfile(file) or is_tarfile(file):
|
||||
new_dir = safe_download(file, dir=DATASETS_DIR, unzip=True, delete=False)
|
||||
file = find_dataset_yaml(DATASETS_DIR / new_dir)
|
||||
extract_dir, autodownload = file.parent, False
|
||||
|
||||
# Read YAML (optional)
|
||||
if isinstance(data, (str, Path)):
|
||||
data = yaml_load(data, append_filename=True) # dictionary
|
||||
# Read YAML
|
||||
data = yaml_load(file, append_filename=True) # dictionary
|
||||
|
||||
# Checks
|
||||
for k in 'train', 'val':
|
||||
if k not in data:
|
||||
if k == 'val' and 'validation' in data:
|
||||
LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.")
|
||||
data['val'] = data.pop('validation') # replace 'validation' key with 'val' key
|
||||
else:
|
||||
if k != 'val' or 'validation' not in data:
|
||||
raise SyntaxError(
|
||||
emojis(f"{dataset} '{k}:' key missing ❌.\n'train' and 'val' are required in all data YAMLs."))
|
||||
LOGGER.info("WARNING ⚠️ renaming data YAML 'validation' key to 'val' to match YOLO format.")
|
||||
data['val'] = data.pop('validation') # replace 'validation' key with 'val' key
|
||||
if 'names' not in data and 'nc' not in data:
|
||||
raise SyntaxError(emojis(f"{dataset} key missing ❌.\n either 'names' or 'nc' are required in all data YAMLs."))
|
||||
if 'names' in data and 'nc' in data and len(data['names']) != data['nc']:
|
||||
|
|
@ -285,9 +283,10 @@ def check_det_dataset(dataset, autodownload=True):
|
|||
|
||||
# Resolve paths
|
||||
path = Path(extract_dir or data.get('path') or Path(data.get('yaml_file', '')).parent) # dataset root
|
||||
|
||||
if not path.is_absolute():
|
||||
path = (DATASETS_DIR / path).resolve()
|
||||
|
||||
# Set paths
|
||||
data['path'] = path # download scripts
|
||||
for k in 'train', 'val', 'test':
|
||||
if data.get(k): # prepend path
|
||||
|
|
@ -404,7 +403,7 @@ class HUBDatasetStats:
|
|||
A class for generating HUB dataset JSON and `-hub` dataset directory.
|
||||
|
||||
Args:
|
||||
path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco128.yaml'.
|
||||
path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip). Default is 'coco8.yaml'.
|
||||
task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'.
|
||||
autodownload (bool): Attempt to download dataset if not found locally. Default is False.
|
||||
|
||||
|
|
@ -424,7 +423,7 @@ class HUBDatasetStats:
|
|||
```
|
||||
"""
|
||||
|
||||
def __init__(self, path='coco128.yaml', task='detect', autodownload=False):
|
||||
def __init__(self, path='coco8.yaml', task='detect', autodownload=False):
|
||||
"""Initialize class."""
|
||||
path = Path(path).resolve()
|
||||
LOGGER.info(f'Starting HUB dataset checks for {path}....')
|
||||
|
|
@ -437,10 +436,12 @@ class HUBDatasetStats:
|
|||
else: # detect, segment, pose
|
||||
zipped, data_dir, yaml_path = self._unzip(Path(path))
|
||||
try:
|
||||
# data = yaml_load(check_yaml(yaml_path)) # data dict
|
||||
data = check_det_dataset(yaml_path, autodownload) # data dict
|
||||
if zipped:
|
||||
data['path'] = data_dir
|
||||
# Load YAML with checks
|
||||
data = yaml_load(yaml_path)
|
||||
data['path'] = '' # strip path since YAML should be in dataset root for all HUB datasets
|
||||
yaml_save(yaml_path, data)
|
||||
data = check_det_dataset(yaml_path, autodownload) # dict
|
||||
data['path'] = data_dir # YAML path should be set to '' (relative) or parent (absolute)
|
||||
except Exception as e:
|
||||
raise Exception('error/HUB/dataset_stats/init') from e
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue