[GitHub] [tvm] leandron commented on a change in pull request #8533: [Contrib] Added default non-verbose to download_testdata(), pass to download()

GitBox Fri, 23 Jul 2021 01:53:22 -0700


leandron commented on a change in pull request #8533:
URL: https://github.com/apache/tvm/pull/8533#discussion_r675384181




##########
File path: python/tvm/contrib/download.py
##########
@@ -62,72 +62,74 @@ def download(url, path, overwrite=False, 
size_compare=False, verbose=1, retries=
                 res_get = urllib2.urlopen(url)
             url_file_size = int(res_get.headers["Content-Length"])
             if url_file_size != file_size:
-                print("exist file got corrupted, downloading %s file 
freshly..." % path)
-                download(url, path, True, False)
+                if verbose:
+                    print(f"Existing file {path} has incorrect size, 
downloading fresh copy")

Review comment:
       Is the use of `print()` intentional? as opposed to using the use of 
`logging` module, similar to other parts of our code base?
   
   One advantage of `logging` is that it can set various log levels, removing 
the need for the blocks like `if verbose: print("message")`, and instead, we 
could use `verbose` to set log level into e.g. `DEBUG` in this module.
   
   What do you think?
   
   

##########
File path: python/tvm/contrib/download.py
##########
@@ -62,72 +62,74 @@ def download(url, path, overwrite=False, 
size_compare=False, verbose=1, retries=
                 res_get = urllib2.urlopen(url)
             url_file_size = int(res_get.headers["Content-Length"])
             if url_file_size != file_size:
-                print("exist file got corrupted, downloading %s file 
freshly..." % path)
-                download(url, path, True, False)
+                if verbose:
+                    print(f"Existing file {path} has incorrect size, 
downloading fresh copy")
+                download(
+                    url, path, overwrite=True, size_compare=False, 
verbose=verbose, retries=retries
+                )
                 return
-        print("File {} exists, skip.".format(path))
+
+        if verbose:
+            print(f"File {path} exists, skipping.")
         return
 
-    if verbose >= 1:
-        print("Downloading from url {} to {}".format(url, path))
+    if verbose:
+        print(f"Downloading from url {url} to {path}")
 
     # Stateful start time
     start_time = time.time()
     dirpath = path.parent
     dirpath.mkdir(parents=True, exist_ok=True)
-    random_uuid = str(uuid.uuid4())
-    tempfile = Path(dirpath, random_uuid)
 
     def _download_progress(count, block_size, total_size):
         # pylint: disable=unused-argument
         """Show the download progress."""
         if count == 0:
             return
         duration = time.time() - start_time
-        progress_size = int(count * block_size)
-        speed = int(progress_size / (1024 * duration))
+        progress_bytes = int(count * block_size)
+        progress_megabytes = progress_bytes / (1024.0 * 1024)
+        speed_kBps = int(progress_bytes / (1024 * duration))
         percent = min(int(count * block_size * 100 / total_size), 100)
         sys.stdout.write(
-            "\r...%d%%, %.2f MB, %d KB/s, %d seconds passed"
-            % (percent, progress_size / (1024.0 * 1024), speed, duration)
+            f"\r...{percent:d}%, {progress_megabytes:.2f} MB, {speed_kBps} 
kB/s, {duration:.1f} seconds passed"
         )
         sys.stdout.flush()
 
-    while retries >= 0:
-        # Disable pyling too broad Exception
-        # pylint: disable=W0703
-        try:
-            if sys.version_info >= (3,):
-                urllib2.urlretrieve(url, tempfile, 
reporthook=_download_progress)
-                print("")
-            else:
-                f = urllib2.urlopen(url)
-                data = f.read()
-                with open(tempfile, "wb") as code:
-                    code.write(data)
-            shutil.move(tempfile, path)
-            break
-        except Exception as err:
-            retries -= 1
-            if retries == 0:
-                if tempfile.exists():
-                    tempfile.unlink()
-                raise err
-            print(
-                "download failed due to {}, retrying, {} attempt{} 
left".format(
-                    repr(err), retries, "s" if retries > 1 else ""
+    reporthook = _download_progress if verbose else None
+
+    with tempfile.TemporaryDirectory() as tempdir:
+        tempdir = Path(tempdir)
+        download_loc = tempdir.joinpath(path.name)
+
+        for i_retry in range(retries):
+            # pylint: disable=broad-except
+            try:
+                urllib2.urlretrieve(url, download_loc, reporthook=reporthook)
+                if verbose:
+                    print("")
+                download_loc.rename(path)
+                return
+
+            except Exception as err:
+                if i_retry == retries - 1:
+                    raise err
+
+                print(
+                    "\n".join(
+                        [repr(err), f"Download attempt {i_retry}/{retries} 
failed, retrying."]
+                    )
                 )
-            )
 
 
-if "TEST_DATA_ROOT_PATH" in environ:
-    TEST_DATA_ROOT_PATH = Path(environ.get("TEST_DATA_ROOT_PATH"))
+if "TEST_DATA_ROOT_PATH" in os.environ:
+    TEST_DATA_ROOT_PATH = Path(os.environ.get("TEST_DATA_ROOT_PATH"))
 else:
     TEST_DATA_ROOT_PATH = Path(Path("~").expanduser(), ".tvm_test_data")
 TEST_DATA_ROOT_PATH.mkdir(parents=True, exist_ok=True)
 
 
-def download_testdata(url, relpath, module=None, overwrite=False):
+def download_testdata(url, relpath, module=None, overwrite=False, 
verbose=False):

Review comment:
       Can you also add `overwrite` to the docstring?

##########
File path: python/tvm/contrib/download.py
##########
@@ -41,8 +41,8 @@ def download(url, path, overwrite=False, size_compare=False, 
verbose=1, retries=
     size_compare : bool, optional
         Whether to do size compare to check downloaded file.
 
-    verbose: int, optional
-        Verbose level
+    verbose: bool, optional
+        If status messages should be printed.

Review comment:
       ```suggestion
           If status messages should be printed. Defaults to false.
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [tvm] leandron commented on a change in pull request #8533: [Contrib] Added default non-verbose to download_testdata(), pass to download()

Reply via email to