ultralytics 8.3.5 add cache=disk space and writable checks (#16696)
This commit is contained in:
parent
4073fa0b87
commit
fe61f9d54a
2 changed files with 38 additions and 6 deletions
|
|
@ -1,6 +1,6 @@
|
||||||
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
# Ultralytics YOLO 🚀, AGPL-3.0 license
|
||||||
|
|
||||||
__version__ = "8.3.4"
|
__version__ = "8.3.5"
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -90,13 +90,15 @@ class BaseDataset(Dataset):
|
||||||
self.ims, self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni, [None] * self.ni
|
self.ims, self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni, [None] * self.ni
|
||||||
self.npy_files = [Path(f).with_suffix(".npy") for f in self.im_files]
|
self.npy_files = [Path(f).with_suffix(".npy") for f in self.im_files]
|
||||||
self.cache = cache.lower() if isinstance(cache, str) else "ram" if cache is True else None
|
self.cache = cache.lower() if isinstance(cache, str) else "ram" if cache is True else None
|
||||||
if (self.cache == "ram" and self.check_cache_ram()) or self.cache == "disk":
|
if self.cache == "ram" and self.check_cache_ram():
|
||||||
if self.cache == "ram" and hyp.deterministic:
|
if hyp.deterministic:
|
||||||
LOGGER.warning(
|
LOGGER.warning(
|
||||||
"WARNING ⚠️ cache='ram' may produce non-deterministic training results. "
|
"WARNING ⚠️ cache='ram' may produce non-deterministic training results. "
|
||||||
"Consider cache='disk' as a deterministic alternative if your disk space allows."
|
"Consider cache='disk' as a deterministic alternative if your disk space allows."
|
||||||
)
|
)
|
||||||
self.cache_images()
|
self.cache_images()
|
||||||
|
elif self.cache == "disk" and self.check_cache_disk():
|
||||||
|
self.cache_images()
|
||||||
|
|
||||||
# Transforms
|
# Transforms
|
||||||
self.transforms = self.build_transforms(hyp=hyp)
|
self.transforms = self.build_transforms(hyp=hyp)
|
||||||
|
|
@ -206,25 +208,55 @@ class BaseDataset(Dataset):
|
||||||
if not f.exists():
|
if not f.exists():
|
||||||
np.save(f.as_posix(), cv2.imread(self.im_files[i]), allow_pickle=False)
|
np.save(f.as_posix(), cv2.imread(self.im_files[i]), allow_pickle=False)
|
||||||
|
|
||||||
|
def check_cache_disk(self, safety_margin=0.5):
|
||||||
|
"""Check image caching requirements vs available disk space."""
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
|
||||||
|
n = min(self.ni, 30) # extrapolate from 30 random images
|
||||||
|
for _ in range(n):
|
||||||
|
im_file = random.choice(self.im_files)
|
||||||
|
im = cv2.imread(im_file)
|
||||||
|
if im is None:
|
||||||
|
continue
|
||||||
|
b += im.nbytes
|
||||||
|
if not os.access(Path(im_file).parent, os.W_OK):
|
||||||
|
self.cache = None
|
||||||
|
LOGGER.info(f"{self.prefix}Skipping caching images to disk, directory not writeable ⚠️")
|
||||||
|
return False
|
||||||
|
disk_required = b * self.ni / n * (1 + safety_margin) # bytes required to cache dataset to disk
|
||||||
|
total, used, free = shutil.disk_usage(Path(self.im_files[0]).parent)
|
||||||
|
if disk_required > free:
|
||||||
|
self.cache = None
|
||||||
|
LOGGER.info(
|
||||||
|
f"{self.prefix}{disk_required / gb:.1f}GB disk space required, "
|
||||||
|
f"with {int(safety_margin * 100)}% safety margin but only "
|
||||||
|
f"{free / gb:.1f}/{total / gb:.1f}GB free, not caching images to disk ⚠️"
|
||||||
|
)
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def check_cache_ram(self, safety_margin=0.5):
|
def check_cache_ram(self, safety_margin=0.5):
|
||||||
"""Check image caching requirements vs available memory."""
|
"""Check image caching requirements vs available memory."""
|
||||||
b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
|
b, gb = 0, 1 << 30 # bytes of cached images, bytes per gigabytes
|
||||||
n = min(self.ni, 30) # extrapolate from 30 random images
|
n = min(self.ni, 30) # extrapolate from 30 random images
|
||||||
for _ in range(n):
|
for _ in range(n):
|
||||||
im = cv2.imread(random.choice(self.im_files)) # sample image
|
im = cv2.imread(random.choice(self.im_files)) # sample image
|
||||||
|
if im is None:
|
||||||
|
continue
|
||||||
ratio = self.imgsz / max(im.shape[0], im.shape[1]) # max(h, w) # ratio
|
ratio = self.imgsz / max(im.shape[0], im.shape[1]) # max(h, w) # ratio
|
||||||
b += im.nbytes * ratio**2
|
b += im.nbytes * ratio**2
|
||||||
mem_required = b * self.ni / n * (1 + safety_margin) # GB required to cache dataset into RAM
|
mem_required = b * self.ni / n * (1 + safety_margin) # GB required to cache dataset into RAM
|
||||||
mem = psutil.virtual_memory()
|
mem = psutil.virtual_memory()
|
||||||
success = mem_required < mem.available # to cache or not to cache, that is the question
|
if mem_required > mem.available:
|
||||||
if not success:
|
|
||||||
self.cache = None
|
self.cache = None
|
||||||
LOGGER.info(
|
LOGGER.info(
|
||||||
f"{self.prefix}{mem_required / gb:.1f}GB RAM required to cache images "
|
f"{self.prefix}{mem_required / gb:.1f}GB RAM required to cache images "
|
||||||
f"with {int(safety_margin * 100)}% safety margin but only "
|
f"with {int(safety_margin * 100)}% safety margin but only "
|
||||||
f"{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, not caching images ⚠️"
|
f"{mem.available / gb:.1f}/{mem.total / gb:.1f}GB available, not caching images ⚠️"
|
||||||
)
|
)
|
||||||
return success
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
def set_rectangle(self):
|
def set_rectangle(self):
|
||||||
"""Sets the shape of bounding boxes for YOLO detections as rectangles."""
|
"""Sets the shape of bounding boxes for YOLO detections as rectangles."""
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue