maropu commented on a change in pull request #32505:
URL: https://github.com/apache/spark/pull/32505#discussion_r635661782



##########
File path: build/mvn
##########
@@ -26,36 +26,67 @@ _COMPILE_JVM_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=1g"
 
 # Installs any application tarball given a URL, the expected tarball name,
 # and, optionally, a checkable binary path to determine if the binary has
-# already been installed
-## Arg1 - URL
-## Arg2 - Tarball Name
-## Arg3 - Checkable Binary
+# already been installed. Arguments:
+# 1 - Mirror host
+# 2 - URL path on host
+# 3 - URL query string
+# 4 - checksum suffix
+# 5 - Tarball Name
+# 6 - Checkable Binary
 install_app() {
-  local remote_tarball="$1"
-  local local_tarball="${_DIR}/$2"
-  local binary="${_DIR}/$3"
+  local mirror_host="$1"
+  local url_path="$2"
+  local url_query="$3"
+  local checksum_suffix="$4"
+  local local_tarball="${_DIR}/$5"
+  local binary="${_DIR}/$6"
+  local remote_tarball="${mirror_host}/${url_path}${url_query}"
+  local local_checksum="${local_tarball}.${checksum_suffix}"
+  local 
remote_checksum="https://archive.apache.org/dist/${url_path}.${checksum_suffix}";
 
   local curl_opts="--silent --show-error -L"
   local wget_opts="--no-verbose"
 
-  if [ -z "$3" -o ! -f "$binary" ]; then
+  if [ ! -f "$binary" ]; then
     # check if we already have the tarball
     # check if we have curl installed
     # download application
-    [ ! -f "${local_tarball}" ] && [ $(command -v curl) ] && \
-      echo "exec: curl ${curl_opts} ${remote_tarball}" 1>&2 && \
+    if [ ! -f "${local_tarball}" -a $(command -v curl) ]; then
+      echo "exec: curl ${curl_opts} ${remote_tarball}" 1>&2
       curl ${curl_opts} "${remote_tarball}" > "${local_tarball}"
+      if [ ! -z "${checksum_suffix}" ]; then
+        echo "exec: curl ${curl_opts} ${remote_checksum}" 1>&2
+        curl ${curl_opts} "${remote_checksum}" > "${local_checksum}"
+      fi
+    fi
     # if the file still doesn't exist, lets try `wget` and cross our fingers
-    [ ! -f "${local_tarball}" ] && [ $(command -v wget) ] && \
-      echo "exec: wget ${wget_opts} ${remote_tarball}" 1>&2 && \
+    if [ ! -f "${local_tarball}" -a $(command -v wget) ]; then
+      echo "exec: wget ${wget_opts} ${remote_tarball}" 1>&2
       wget ${wget_opts} -O "${local_tarball}" "${remote_tarball}"
+      if [ ! -z "${checksum_suffix}" ]; then
+        echo "exec: wget ${wget_opts} ${remote_checksum}" 1>&2
+        wget ${wget_opts} -O "${local_checksum}" "${remote_checksum}"
+      fi
+    fi
     # if both were unsuccessful, exit
-    [ ! -f "${local_tarball}" ] && \
-      echo -n "ERROR: Cannot download $2 with cURL or wget; " && \
-      echo "please install manually and try again." && \
+    if [ ! -f "${local_tarball}" ]; then
+      echo -n "ERROR: Cannot download ${remote_tarball} with cURL or wget; 
please install manually and try again."
       exit 2
-    cd "${_DIR}" && tar -xzf "$2"
-    rm -rf "$local_tarball"
+    fi
+    # Checksum may not have been specified; don't check if doesn't exist
+    if [ -f "${local_checksum}" ]; then

Review comment:
       > Oh really? shoot. That seemed to be the one command that was on Linux 
and OS X.
   
   Yea, for Amazon Linux2, we need to run `sudo yum install perl-Digest-SHA` 
before running `build/mvn`.
   
   > Is that better or worse than just silently letting it proceed without 
verification?
   
   IMO, since this check exists to prevent cyber attacks, it is safer to 
make`build/mvn` fail if  `shasum` does not exists (actually, it seems the other 
OSs have `shasum` by default, so most developers don't care, I think).
   
   > Would developers generally build on this distro?
   
   Not sure, but that's the default option in aws ec2. I noticed this issue 
because my daily build broke after this commit merged.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to