imgsz warning fix, download function consolidation (#681)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: HaeJin Lee <seareale@gmail.com> Co-authored-by: Ayush Chaurasia <ayush.chaurarsia@gmail.com>
2023-01-29 02:31:37 +01:00 · 2023-01-29 02:31:37 +01:00 · 899abe9f82
commit 899abe9f82
parent 0609561549
26 changed files with 171 additions and 147 deletions
--- a/ultralytics/yolo/utils/downloads.py
+++ b/ultralytics/yolo/utils/downloads.py
@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, GPL-3.0 license

-import logging
+import contextlib
 import os
 import subprocess
 import urllib
@ -15,27 +15,6 @@ import torch
 from ultralytics.yolo.utils import LOGGER


-def safe_download(file, url, url2=None, min_bytes=1E0, error_msg='', progress=True):
-    # Attempts to download file from url or url2, checks and removes incomplete downloads < min_bytes
-    file = Path(file)
-    assert_msg = f"Downloaded file '{file}' does not exist or size is < min_bytes={min_bytes}"
-    try:  # url1
-        LOGGER.info(f'Downloading {url} to {file}...')
-        torch.hub.download_url_to_file(url, str(file), progress=progress and LOGGER.level <= logging.INFO)
-        assert file.exists() and file.stat().st_size > min_bytes, assert_msg  # check
-    except Exception as e:  # url2
-        if file.exists():
-            file.unlink()  # remove partial downloads
-        LOGGER.info(f'ERROR: {e}\nRe-attempting {url2 or url} to {file}...')
-        os.system(f"curl -# -L '{url2 or url}' -o '{file}' --retry 3 -C -")  # curl download, retry and resume on fail
-    finally:
-        if not file.exists() or file.stat().st_size < min_bytes:  # check
-            if file.exists():
-                file.unlink()  # remove partial downloads
-            LOGGER.warning(f"ERROR: {assert_msg}\n{error_msg}")
-        LOGGER.info('')
-
-
 def is_url(url, check=True):
    # Check if string is URL and check if URL exists
    try:
@ -47,7 +26,71 @@ def is_url(url, check=True):
        return False


-def attempt_download(file, repo='ultralytics/assets', release='v0.0.0'):
+def safe_download(url,
+                  file=None,
+                  dir=None,
+                  unzip=True,
+                  delete=False,
+                  curl=False,
+                  retry=3,
+                  min_bytes=1E0,
+                  progress=True):
+    """
+    Function for downloading files from a URL, with options for retrying, unzipping, and deleting the downloaded file.
+
+    Args:
+        url: str: The URL of the file to be downloaded.
+        file: str, optional: The filename of the downloaded file.
+            If not provided, the file will be saved with the same name as the URL.
+        dir: str, optional: The directory to save the downloaded file.
+            If not provided, the file will be saved in the current working directory.
+        unzip: bool, optional: Whether to unzip the downloaded file. Default: True.
+        delete: bool, optional: Whether to delete the downloaded file after unzipping. Default: False.
+        curl: bool, optional: Whether to use curl command line tool for downloading. Default: False.
+        retry: int, optional: The number of times to retry the download in case of failure. Default: 3.
+        min_bytes: float, optional: The minimum number of bytes that the downloaded file should have, to be considered
+            a successful download. Default: 1E0.
+        progress: bool, optional: Whether to display a progress bar during the download. Default: True.
+    """
+    if '://' not in str(url) and Path(url).is_file():  # exists ('://' check required in Windows Python<3.10)
+        f = Path(url)  # filename
+    else:  # does not exist
+        assert dir or file, 'dir or file required for download'
+        f = dir / Path(url).name if dir else Path(file)
+        LOGGER.info(f'Downloading {url} to {f}...')
+        f.parent.mkdir(parents=True, exist_ok=True)  # make directory if missing
+        for i in range(retry + 1):
+            try:
+                if curl or i > 0:  # curl download with retry, continue
+                    s = 'sS' * (not progress)  # silent
+                    r = os.system(f'curl -# -{s}L "{url}" -o "{f}" --retry 9 -C -')
+                else:  # torch download
+                    r = torch.hub.download_url_to_file(url, f, progress=progress)
+                assert r in {0, None}
+            except Exception as e:
+                if i >= retry:
+                    raise ConnectionError(f'❌  Download failure for {url}') from e
+                LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...')
+                continue
+
+            if f.exists():
+                if f.stat().st_size > min_bytes:
+                    break  # success
+                f.unlink()  # remove partial downloads
+
+    if unzip and f.exists() and f.suffix in {'.zip', '.tar', '.gz'}:
+        LOGGER.info(f'Unzipping {f}...')
+        if f.suffix == '.zip':
+            ZipFile(f).extractall(path=f.parent)  # unzip
+        elif f.suffix == '.tar':
+            os.system(f'tar xf {f} --directory {f.parent}')  # unzip
+        elif f.suffix == '.gz':
+            os.system(f'tar xfz {f} --directory {f.parent}')  # unzip
+        if delete:
+            f.unlink()  # remove zip
+
+
+def attempt_download_asset(file, repo='ultralytics/assets', release='v0.0.0'):
    # Attempt file download from GitHub release assets if not found locally. release = 'latest', 'v6.2', etc.
    from ultralytics.yolo.utils import SETTINGS

@ -73,7 +116,7 @@ def attempt_download(file, repo='ultralytics/assets', release='v0.0.0'):
            if Path(file).is_file():
                LOGGER.info(f'Found {url} locally at {file}')  # file already exists
            else:
-                safe_download(file=file, url=url, min_bytes=1E5)
+                safe_download(url=url, file=file, min_bytes=1E5)
            return file

        # GitHub assets
@ -91,61 +134,23 @@ def attempt_download(file, repo='ultralytics/assets', release='v0.0.0'):

        file.parent.mkdir(parents=True, exist_ok=True)  # make parent dir (if required)
        if name in assets:
-            safe_download(file,
-                          url=f'https://github.com/{repo}/releases/download/{tag}/{name}',
-                          min_bytes=1E5,
-                          error_msg=f'{file} missing, try downloading from https://github.com/{repo}/releases/{tag}')
+            safe_download(url=f'https://github.com/{repo}/releases/download/{tag}/{name}', file=file, min_bytes=1E5)

        return str(file)


-def download(url, dir=Path.cwd(), unzip=True, delete=True, curl=False, threads=1, retry=3):
+def download(url, dir=Path.cwd(), unzip=True, delete=False, curl=False, threads=1, retry=3):
    # Multithreaded file download and unzip function, used in data.yaml for autodownload
-    def download_one(url, dir):
-        # Download 1 file
-        success = True
-        if '://' not in str(url) and Path(url).is_file():  # exists ('://' check required in Windows Python<3.10)
-            f = Path(url)  # filename
-        else:  # does not exist
-            f = dir / Path(url).name
-            LOGGER.info(f'Downloading {url} to {f}...')
-            for i in range(retry + 1):
-                if curl:  # curl download with retry, continue
-                    s = 'sS' * (threads > 1)  # silent
-                    r = os.system(f'curl -# -{s}L "{url}" -o "{f}" --retry 9 -C -')
-                    success = r == 0
-                else:  # torch download
-                    torch.hub.download_url_to_file(url, f, progress=threads == 1)
-                    success = f.is_file()
-                if success:
-                    break
-                elif i < retry:
-                    LOGGER.warning(f'⚠️ Download failure, retrying {i + 1}/{retry} {url}...')
-                else:
-                    LOGGER.warning(f'❌ Failed to download {url}...')
-
-        if unzip and success and f.suffix in ('.zip', '.tar', '.gz'):
-            LOGGER.info(f'Unzipping {f}...')
-            if f.suffix == '.zip':
-                ZipFile(f).extractall(path=dir)  # unzip
-            elif f.suffix == '.tar':
-                os.system(f'tar xf {f} --directory {f.parent}')  # unzip
-            elif f.suffix == '.gz':
-                os.system(f'tar xfz {f} --directory {f.parent}')  # unzip
-            if delete:
-                f.unlink()  # remove zip
-
    dir = Path(dir)
    dir.mkdir(parents=True, exist_ok=True)  # make directory
    if threads > 1:
-        # pool = ThreadPool(threads)
-        # pool.imap(lambda x: download_one(*x), zip(url, repeat(dir)))  # multithreaded
-        # pool.close()
-        # pool.join()
        with ThreadPool(threads) as pool:
-            pool.imap(lambda x: download_one(*x), zip(url, repeat(dir)))  # multithreaded
+            pool.map(
+                lambda x: safe_download(
+                    url=x[0], dir=x[1], unzip=unzip, delete=delete, curl=curl, retry=retry, progress=threads <= 1),
+                zip(url, repeat(dir)))
            pool.close()
            pool.join()
    else:
        for u in [url] if isinstance(url, (str, Path)) else url:
-            download_one(u, dir)
+            safe_download(url=u, dir=dir, unzip=unzip, delete=delete, curl=curl, retry=retry)