nealrichardson commented on code in PR #38115:
URL: https://github.com/apache/arrow/pull/38115#discussion_r1354919868
##########
r/tools/nixlibs.R:
##########
@@ -103,6 +104,42 @@ download_binary <- function(lib) {
}
libfile <- NULL
}
+ # Explicitly setting the env var to "false" will skip checksum validation
+ # e.g. in case the included checksums are stale.
+ skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false")
+ enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true")
+ # validate binary checksum for CRAN release only
+ if (!skip_checksum && dir.exists(checksum_path) && is_release ||
+ enforce_checksum) {
+ checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url)
+ checksum_file <- file.path(checksum_path, checksum_file)
+ checksum_cmd <- "shasum"
+ checksum_args <- c("--status", "-a", "512", "-c", checksum_file)
+
+ # shasum is not available on all linux versions
Review Comment:
Use `sys.which()` to see if it's present?
##########
r/tools/nixlibs.R:
##########
@@ -103,6 +104,42 @@ download_binary <- function(lib) {
}
libfile <- NULL
}
+ # Explicitly setting the env var to "false" will skip checksum validation
+ # e.g. in case the included checksums are stale.
+ skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false")
+ enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true")
+ # validate binary checksum for CRAN release only
+ if (!skip_checksum && dir.exists(checksum_path) && is_release ||
+ enforce_checksum) {
+ checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url)
Review Comment:
I'd factor out the checksum logic to its own function
##########
r/tools/update-checksums.R:
##########
@@ -0,0 +1,67 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# Run this script AFTER the release was voted and the artifacts
+# are moved into the final dir. This script will download the checksum
+# files and save them to the tools/checksums directory mirroring the
+# artifactory layout. *libs.R uses these files to validated the downloaded
+# binaries when installing the package.
+#
+# Run this script from the r/ directory of the arrow repo with the version
+# as the first argument$ Rscript tools/update-checksum.R 14.0.0
+
+args <- commandArgs(TRUE)
+VERSION <- args[1]
+tools_root <- ""
+
+if (length(args) != 1) {
+ stop("Usage: Rscript tools/update-checksums.R <version>")
+}
+
+tasks_yml <- "../dev/tasks/tasks.yml"
+
+if (!file.exists(tasks_yml)) {
+ stop("Run this script from the r/ directory of the arrow repo")
+}
+
+# Get the libarrow binary paths from the tasks.yml file
+binary_paths <- readLines(tasks_yml) |>
+ grep("r-lib__libarrow", x = _, value = TRUE) |>
+ sub(".+r-lib__libarrow__bin__(.+\\.zip)", "\\1", x = _) |>
+ sub("{no_rc_r_version}", VERSION, fixed = TRUE, x = _) |>
+ sub("__", "/", x = _) |>
+ sub("\\.zip", ".zip", fixed = TRUE, x = _)
+
+artifactory_root <-
"https://apache.jfrog.io/artifactory/arrow/r/%s/libarrow/bin/%s"
+
+# Get the checksuym file from the artifactory
Review Comment:
"checksuym" spelling
##########
r/tools/winlibs.R:
##########
@@ -17,55 +17,84 @@
args <- commandArgs(TRUE)
VERSION <- args[1]
-if (!file.exists(sprintf("windows/arrow-%s/include/arrow/api.h", VERSION))) {
- if (length(args) > 1) {
- # Arg 2 would be the path/to/lib.zip
- localfile <- args[2]
- cat(sprintf("*** Using RWINLIB_LOCAL %s\n", localfile))
- if (!file.exists(localfile)) {
- cat(sprintf("*** %s does not exist; build will fail\n", localfile))
- }
- file.copy(localfile, "lib.zip")
- } else {
- # Download static arrow from the apache artifactory
- quietly <- !identical(tolower(Sys.getenv("ARROW_R_DEV")), "true")
- get_file <- function(template, version) {
- try(
- suppressWarnings(
- download.file(sprintf(template, version), "lib.zip", quiet = quietly)
- ),
- silent = quietly
- )
- }
+dev_version <- package_version(VERSION)[1, 4]
+# Small dev versions are added for R-only changes during CRAN submission
+is_release <- is.na(dev_version) || dev_version < "100"
+env_is <- function(var, value) identical(tolower(Sys.getenv(var)), value)
+# We want to log the message in the style of the configure script
+# not as an R error. Use `return` to exit the script after logging.
+lg <- function(...) {
+ cat("*** ", sprintf(...), "\n")
+}
- # URL templates
- nightly <- paste0(
- getOption("arrow.dev_repo", "https://nightlies.apache.org/arrow/r"),
- "/libarrow/bin/windows/arrow-%s.zip"
- )
- # %1$s uses the first variable for both substitutions
- artifactory <- paste0(
- getOption("arrow.repo",
"https://apache.jfrog.io/artifactory/arrow/r/%1$s"),
- "/libarrow/bin/windows/arrow-%1$s.zip"
- )
- rwinlib <- "https://github.com/rwinlib/arrow/archive/v%s.zip"
+if (is_release) {
+ # This is a release version, so we need to use the major.minor.patch version
without
+ # the CRAN suffix/dev_version
+ VERSION <- package_version(VERSION)[1, 1:3]
+ # %1$s uses the first variable for both substitutions
+ url_template <- paste0(
+ getOption("arrow.repo",
"https://apache.jfrog.io/artifactory/arrow/r/%1$s"),
+ "/libarrow/bin/windows/arrow-%1$s.zip"
+ )
+} else {
+ url_template <- paste0(
Review Comment:
Confirming here that we're no longer going to try downloading from
https://github.com/rwinlib/arrow, i.e. we'll be using the ASF libarrow binaries
for both macOS and Windows. Correct? cc @thisisnic
##########
r/tools/nixlibs.R:
##########
@@ -103,6 +104,42 @@ download_binary <- function(lib) {
}
libfile <- NULL
}
+ # Explicitly setting the env var to "false" will skip checksum validation
+ # e.g. in case the included checksums are stale.
+ skip_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "false")
+ enforce_checksum <- env_is("ARROW_R_ENFORCE_CHECKSUM", "true")
+ # validate binary checksum for CRAN release only
+ if (!skip_checksum && dir.exists(checksum_path) && is_release ||
+ enforce_checksum) {
+ checksum_file <- sub(".+/bin/(.+\\.zip)", "\\1\\.sha512", binary_url)
+ checksum_file <- file.path(checksum_path, checksum_file)
+ checksum_cmd <- "shasum"
+ checksum_args <- c("--status", "-a", "512", "-c", checksum_file)
+
+ # shasum is not available on all linux versions
+ status_shasum <- try(
+ suppressWarnings(
+ system2("shasum", args = c("--help"), stdout = FALSE, stderr = FALSE)
+ ),
+ silent = TRUE
+ )
+
+ if (inherits(status_shasum, "try-error") || is.integer(status_shasum) &&
status_shasum != 0) {
+ checksum_cmd <- "sha512sum"
+ checksum_args <- c("--status", "-c", checksum_file)
+ }
+
+ checksum_ok <- system2(checksum_cmd, args = checksum_args)
Review Comment:
So to checksum, we require a shell command? Is that ok? Should it be
declared in SystemRequirements?
Alternatively, the `openssl` package can compute hashes, so we could depend
on that.
Or, perhaps: since the binaries require openssl on the system, if we're here
downloading, we've confirmed that it is present, so could we shell out to it
like `openssl dgst -sha512`?
##########
r/PACKAGING.md:
##########
@@ -68,6 +68,7 @@ Wait for the release candidate to be cut:
- [ ] Create a PR entitled `WIP: [R] Verify CRAN release-10.0.1-rc0`. Add
a comment `@github-actions crossbow submit --group r` to run all R crossbow
jobs against the CRAN-specific release branch.
+- [ ] Run `Rscript tools/update-checksums.R <libarrow version>` to download
the checksums for the pre-compiled binaries from the ASF artifactory into the
tools directory.
Review Comment:
Should this get folded into one of the makefile commands (maybe rename
`sync-cpp` to cover this, it already syncs things like the NOTICE file) so that
it's not overlooked?
Presumably if this step is not done, win-builder will fail, but macbuilder
would probably proceed trying to build from source and may succeed. Is that
enough "testing" to validate that we haven't forgotten this step at release
time?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]