nealrichardson commented on code in PR #38115:
URL: https://github.com/apache/arrow/pull/38115#discussion_r1354919868


##########
r/tools/nixlibs.R:
##########
@@ -103,6 +104,42 @@ download_binary <- function(lib) {
     }
     libfile <- NULL
   }
+  # Explicitly setting the env var to "false" will skip checksum validation
+  # e.g. in case the included checksums are stale.
+  skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false")
+  enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true")
+  # validate binary checksum for CRAN release only
+  if (!skip_checksum && dir.exists(checksum_path) && is_release ||
+    enforce_checksum) {
+    checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url)
+    checksum_file <- file.path(checksum_path, checksum_file)
+    checksum_cmd <- "shasum"
+    checksum_args <- c("--status", "-a", "512", "-c", checksum_file)
+
+    # shasum is not available on all linux versions

Review Comment:
   Use `sys.which()` to see if it's present?



##########
r/tools/nixlibs.R:
##########
@@ -103,6 +104,42 @@ download_binary <- function(lib) {
     }
     libfile <- NULL
   }
+  # Explicitly setting the env var to "false" will skip checksum validation
+  # e.g. in case the included checksums are stale.
+  skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false")
+  enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true")
+  # validate binary checksum for CRAN release only
+  if (!skip_checksum && dir.exists(checksum_path) && is_release ||
+    enforce_checksum) {
+    checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url)

Review Comment:
   I'd factor out the checksum logic to its own function



##########
r/tools/update-checksums.R:
##########
@@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Run this script AFTER the release was voted and the artifacts
+# are moved into the final dir. This script will download the checksum
+# files and save them to the tools/checksums directory mirroring the
+# artifactory layout. *libs.R uses these files to validated the downloaded
+# binaries when installing the package.
+#
+# Run this script from the r/ directory of the arrow repo with the version
+# as the first argument$ Rscript tools/update-checksum.R 14.0.0
+
+args <- commandArgs(TRUE)
+VERSION <- args[1]
+tools_root <- ""
+
+if (length(args) != 1) {
+  stop("Usage: Rscript tools/update-checksums.R <version>")
+}
+
+tasks_yml <- "../dev/tasks/tasks.yml"
+
+if (!file.exists(tasks_yml)) {
+  stop("Run this script from the r/ directory of the arrow repo")
+}
+
+# Get the libarrow binary paths from the tasks.yml file
+binary_paths <- readLines(tasks_yml) |>
+  grep("r-lib__libarrow", x = _, value = TRUE) |>
+  sub(".+r-lib__libarrow__bin__(.+\\.zip)", "\\1", x = _) |>
+  sub("{no_rc_r_version}", VERSION, fixed = TRUE, x = _) |>
+  sub("__", "/", x = _) |>
+  sub("\\.zip", ".zip", fixed = TRUE, x = _)
+
+artifactory_root <- 
"https://apache.jfrog.io/artifactory/arrow/r/%s/libarrow/bin/%s";
+
+# Get the checksuym file from the artifactory

Review Comment:
   "checksuym" spelling



##########
r/tools/winlibs.R:
##########
@@ -17,55 +17,84 @@
 
 args <- commandArgs(TRUE)
 VERSION <- args[1]
-if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) {
-  if (length(args) > 1) {
-    # Arg 2 would be the path/to/lib.zip
-    localfile <- args[2]
-    cat(sprintf("*** Using RWINLIB_LOCAL %s\n", localfile))
-    if (!file.exists(localfile)) {
-      cat(sprintf("*** %s does not exist; build will fail\n", localfile))
-    }
-    file.copy(localfile, "lib.zip")
-  } else {
-    # Download static arrow from the apache artifactory
-    quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true")
-    get_file <- function(template, version) {
-      try(
-        suppressWarnings(
-          download.file(sprintf(template, version), "lib.zip", quiet = quietly)
-        ),
-        silent = quietly
-      )
-    }
+dev_version <- package_version(VERSION)[1, 4]
+# Small dev versions are added for R-only changes during CRAN submission
+is_release <- is.na(dev_version) || dev_version < "100"
+env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value)
+# We want to log the message in the style of the configure script
+# not as an R error. Use `return` to exit the script after logging.
+lg <- function(...) {
+  cat("*** ", sprintf(...), "\n")
+}
 
-    # URL templates
-    nightly <- paste0(
-      getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r";),
-      "/libarrow/bin/windows/arrow-%s.zip"
-    )
-    # %1$s uses the first variable for both substitutions
-    artifactory <- paste0(
-      getOption("arrow.repo", 
"https://apache.jfrog.io/artifactory/arrow/r/%1$s";),
-      "/libarrow/bin/windows/arrow-%1$s.zip"
-    )
-    rwinlib <- "https://github.com/rwinlib/arrow/archive/v%s.zip";
+if (is_release) {
+  # This is a release version, so we need to use the major.minor.patch version 
without
+  # the CRAN suffix/dev_version
+  VERSION <- package_version(VERSION)[1, 1:3]
+  # %1$s uses the first variable for both substitutions
+  url_template <- paste0(
+    getOption("arrow.repo", 
"https://apache.jfrog.io/artifactory/arrow/r/%1$s";),
+    "/libarrow/bin/windows/arrow-%1$s.zip"
+  )
+} else {
+  url_template <- paste0(

Review Comment:
   Confirming here that we're no longer going to try downloading from 
https://github.com/rwinlib/arrow, i.e. we'll be using the ASF libarrow binaries 
for both macOS and Windows. Correct? cc @thisisnic 



##########
r/tools/nixlibs.R:
##########
@@ -103,6 +104,42 @@ download_binary <- function(lib) {
     }
     libfile <- NULL
   }
+  # Explicitly setting the env var to "false" will skip checksum validation
+  # e.g. in case the included checksums are stale.
+  skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false")
+  enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true")
+  # validate binary checksum for CRAN release only
+  if (!skip_checksum && dir.exists(checksum_path) && is_release ||
+    enforce_checksum) {
+    checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url)
+    checksum_file <- file.path(checksum_path, checksum_file)
+    checksum_cmd <- "shasum"
+    checksum_args <- c("--status", "-a", "512", "-c", checksum_file)
+
+    # shasum is not available on all linux versions
+    status_shasum <- try(
+      suppressWarnings(
+        system2("shasum", args = c("--help"), stdout = FALSE, stderr = FALSE)
+      ),
+      silent = TRUE
+    )
+
+    if (inherits(status_shasum, "try-error") || is.integer(status_shasum) && 
status_shasum != 0) {
+      checksum_cmd <- "sha512sum"
+      checksum_args <- c("--status", "-c", checksum_file)
+    }
+
+    checksum_ok <- system2(checksum_cmd, args = checksum_args)

Review Comment:
   So to checksum, we require a shell command? Is that ok? Should it be 
declared in SystemRequirements? 
   
   Alternatively, the `openssl` package can compute hashes, so we could depend 
on that.
   
   Or, perhaps: since the binaries require openssl on the system, if we're here 
downloading, we've confirmed that it is present, so could we shell out to it 
like `openssl dgst -sha512`?



##########
r/PACKAGING.md:
##########
@@ -68,6 +68,7 @@ Wait for the release candidate to be cut:
 - [ ] Create a PR entitled `WIP: [R] Verify CRAN release-10.0.1-rc0`. Add
   a comment `@github-actions crossbow submit --group r` to run all R crossbow
   jobs against the CRAN-specific release branch.
+- [ ] Run `Rscript tools/update-checksums.R <libarrow version>` to download 
the checksums for the pre-compiled binaries from the ASF artifactory into the 
tools directory.

Review Comment:
   Should this get folded into one of the makefile commands (maybe rename 
`sync-cpp` to cover this, it already syncs things like the NOTICE file) so that 
it's not overlooked?
   
   Presumably if this step is not done, win-builder will fail, but macbuilder 
would probably proceed trying to build from source and may succeed. Is that 
enough "testing" to validate that we haven't forgotten this step at release 
time?
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to