Threadpool fixes and CLI improvements (#550)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
This commit is contained in:
Glenn Jocher 2023-01-22 17:08:08 +01:00 committed by GitHub
parent d9a0fba251
commit 21b701c4ea
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 338 additions and 251 deletions

View file

@ -93,7 +93,7 @@ class BaseDataset(Dataset):
# self.img_files = sorted([x for x in f if x.suffix[1:].lower() in IMG_FORMATS]) # pathlib
assert im_files, f"{self.prefix}No images found"
except Exception as e:
raise FileNotFoundError(f"{self.prefix}Error loading data from {img_path}: {e}\n{HELP_URL}") from e
raise FileNotFoundError(f"{self.prefix}Error loading data from {img_path}\n{HELP_URL}") from e
return im_files
def update_labels(self, include_class: Optional[list]):
@ -134,16 +134,17 @@ class BaseDataset(Dataset):
gb = 0 # Gigabytes of cached images
self.im_hw0, self.im_hw = [None] * self.ni, [None] * self.ni
fcn = self.cache_images_to_disk if cache == "disk" else self.load_image
results = ThreadPool(NUM_THREADS).imap(fcn, range(self.ni))
pbar = tqdm(enumerate(results), total=self.ni, bar_format=TQDM_BAR_FORMAT, disable=LOCAL_RANK > 0)
for i, x in pbar:
if cache == "disk":
gb += self.npy_files[i].stat().st_size
else: # 'ram'
self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
gb += self.ims[i].nbytes
pbar.desc = f"{self.prefix}Caching images ({gb / 1E9:.1f}GB {cache})"
pbar.close()
with ThreadPool(NUM_THREADS) as pool:
results = pool.imap(fcn, range(self.ni))
pbar = tqdm(enumerate(results), total=self.ni, bar_format=TQDM_BAR_FORMAT, disable=LOCAL_RANK > 0)
for i, x in pbar:
if cache == "disk":
gb += self.npy_files[i].stat().st_size
else: # 'ram'
self.ims[i], self.im_hw0[i], self.im_hw[i] = x # im, hw_orig, hw_resized = load_image(self, i)
gb += self.ims[i].nbytes
pbar.desc = f"{self.prefix}Caching images ({gb / 1E9:.1f}GB {cache})"
pbar.close()
def cache_images_to_disk(self, i):
# Saves an image as an *.npy file for faster loading