Lunderberg commented on a change in pull request #8533:
URL: https://github.com/apache/tvm/pull/8533#discussion_r675557821
##########
File path: python/tvm/contrib/download.py
##########
@@ -62,72 +62,74 @@ def download(url, path, overwrite=False,
size_compare=False, verbose=1, retries=
res_get = urllib2.urlopen(url)
url_file_size = int(res_get.headers["Content-Length"])
if url_file_size != file_size:
- print("exist file got corrupted, downloading %s file
freshly..." % path)
- download(url, path, True, False)
+ if verbose:
+ print(f"Existing file {path} has incorrect size,
downloading fresh copy")
Review comment:
No particular reason for me, other than it being the way the file was
previously written. Looking through the history, there aren't any commits that
suggest a need for `print` statements, so I agree that `logging` is the better
route to go.
With `logging`, I think that we can drop the `verbose` flag altogether. The
user can still specify whether they want the verbose output by setting that
logger's log level, but it doesn't need to be part of the main function call.
For the information in these print statuses, I think they should be at `INFO`
level, except for the download progress bar which can be at `DEBUG` level.
##########
File path: python/tvm/contrib/download.py
##########
@@ -62,72 +62,74 @@ def download(url, path, overwrite=False,
size_compare=False, verbose=1, retries=
res_get = urllib2.urlopen(url)
url_file_size = int(res_get.headers["Content-Length"])
if url_file_size != file_size:
- print("exist file got corrupted, downloading %s file
freshly..." % path)
- download(url, path, True, False)
+ if verbose:
+ print(f"Existing file {path} has incorrect size,
downloading fresh copy")
+ download(
+ url, path, overwrite=True, size_compare=False,
verbose=verbose, retries=retries
+ )
return
- print("File {} exists, skip.".format(path))
+
+ if verbose:
+ print(f"File {path} exists, skipping.")
return
- if verbose >= 1:
- print("Downloading from url {} to {}".format(url, path))
+ if verbose:
+ print(f"Downloading from url {url} to {path}")
# Stateful start time
start_time = time.time()
dirpath = path.parent
dirpath.mkdir(parents=True, exist_ok=True)
- random_uuid = str(uuid.uuid4())
- tempfile = Path(dirpath, random_uuid)
def _download_progress(count, block_size, total_size):
# pylint: disable=unused-argument
"""Show the download progress."""
if count == 0:
return
duration = time.time() - start_time
- progress_size = int(count * block_size)
- speed = int(progress_size / (1024 * duration))
+ progress_bytes = int(count * block_size)
+ progress_megabytes = progress_bytes / (1024.0 * 1024)
+ speed_kBps = int(progress_bytes / (1024 * duration))
percent = min(int(count * block_size * 100 / total_size), 100)
sys.stdout.write(
- "\r...%d%%, %.2f MB, %d KB/s, %d seconds passed"
- % (percent, progress_size / (1024.0 * 1024), speed, duration)
+ f"\r...{percent:d}%, {progress_megabytes:.2f} MB, {speed_kBps}
kB/s, {duration:.1f} seconds passed"
)
sys.stdout.flush()
- while retries >= 0:
- # Disable pyling too broad Exception
- # pylint: disable=W0703
- try:
- if sys.version_info >= (3,):
- urllib2.urlretrieve(url, tempfile,
reporthook=_download_progress)
- print("")
- else:
- f = urllib2.urlopen(url)
- data = f.read()
- with open(tempfile, "wb") as code:
- code.write(data)
- shutil.move(tempfile, path)
- break
- except Exception as err:
- retries -= 1
- if retries == 0:
- if tempfile.exists():
- tempfile.unlink()
- raise err
- print(
- "download failed due to {}, retrying, {} attempt{}
left".format(
- repr(err), retries, "s" if retries > 1 else ""
+ reporthook = _download_progress if verbose else None
+
+ with tempfile.TemporaryDirectory() as tempdir:
+ tempdir = Path(tempdir)
+ download_loc = tempdir.joinpath(path.name)
+
+ for i_retry in range(retries):
+ # pylint: disable=broad-except
+ try:
+ urllib2.urlretrieve(url, download_loc, reporthook=reporthook)
+ if verbose:
+ print("")
+ download_loc.rename(path)
+ return
+
+ except Exception as err:
+ if i_retry == retries - 1:
+ raise err
+
+ print(
+ "\n".join(
+ [repr(err), f"Download attempt {i_retry}/{retries}
failed, retrying."]
+ )
)
- )
-if "TEST_DATA_ROOT_PATH" in environ:
- TEST_DATA_ROOT_PATH = Path(environ.get("TEST_DATA_ROOT_PATH"))
+if "TEST_DATA_ROOT_PATH" in os.environ:
+ TEST_DATA_ROOT_PATH = Path(os.environ.get("TEST_DATA_ROOT_PATH"))
else:
TEST_DATA_ROOT_PATH = Path(Path("~").expanduser(), ".tvm_test_data")
TEST_DATA_ROOT_PATH.mkdir(parents=True, exist_ok=True)
-def download_testdata(url, relpath, module=None, overwrite=False):
+def download_testdata(url, relpath, module=None, overwrite=False,
verbose=False):
Review comment:
Good catch, and can do.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]