nealrichardson commented on a change in pull request #11001:
URL: https://github.com/apache/arrow/pull/11001#discussion_r696042759
##########
File path: r/tools/nixlibs.R
##########
@@ -271,13 +281,18 @@ apache_download <- function(version, destfile, n_mirrors
= 3) {
}
find_local_source <- function(arrow_home = Sys.getenv("ARROW_SOURCE_HOME",
"..")) {
+ cpp_dir <- NULL
if (file.exists(paste0(arrow_home, "/cpp/src/arrow/api.h"))) {
# We're in a git checkout of arrow, so we can build it
- cat("*** Found local C++ source\n")
- return(paste0(arrow_home, "/cpp"))
- } else {
- return(NULL)
+ cpp_dir <- paste0(arrow_home, "/cpp")
+ } else if (file.exists("tools/cpp/src/arrow/api.h")) {
Review comment:
Instead of the if/else if pattern, you could call find_local_source()
twice, first with no args (to get the default), then
`find_local_source("tools")`. Or you could assume `arrow_home` is a vector of
paths to try and iterate over it.
##########
File path: r/tools/nixlibs.R
##########
@@ -435,22 +558,177 @@ with_s3_support <- function(env_vars) {
cat("**** S3 support requires version >= 1.0.2 of openssl-devel (rpm),
libssl-dev (deb), or openssl (brew); building with ARROW_S3=OFF\n")
arrow_s3 <- FALSE
}
+ download_unavailable <- remote_download_unavailable(c(
+ "ARROW_AWSSDK_URL",
+ "ARROW_AWS_C_COMMON_URL",
+ "ARROW_AWS_CHECKSUMS_URL",
+ "ARROW_AWS_C_EVENT_STREAM_URL"
+ ))
+ if (download_unavailable) {
+ cat(paste(
+ "**** S3 dependencies need to be downloaded, but can't be.",
+ "See ?arrow::download_optional_dependencies.",
+ "Building with ARROW_S3=OFF\n"
+ ))
+ arrow_s3 <- FALSE
+ }
}
paste(env_vars, ifelse(arrow_s3, "ARROW_S3=ON", "ARROW_S3=OFF"))
}
-with_mimalloc <- function(env_vars) {
- arrow_mimalloc <- toupper(Sys.getenv("ARROW_MIMALLOC")) == "ON" ||
tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false"
- if (arrow_mimalloc) {
- # User wants mimalloc. If they're using gcc, let's make sure the version
is >= 4.9
- if (isTRUE(cmake_gcc_version(env_vars) < "4.9")) {
- cat("**** mimalloc support not available for gcc < 4.9; building with
ARROW_MIMALLOC=OFF\n")
- arrow_mimalloc <- FALSE
+# Compression features: brotli, bz2, lz4, snappy, zlib, zstd
+with_brotli <- function(env_vars) {
+ arrow_brotli <- is_feature_requested("ARROW_WITH_BROTLI")
+ if (arrow_brotli) {
+ download_unavailable <- remote_download_unavailable("ARROW_BROTLI_URL")
+ if (download_unavailable) {
+ cat("**** brotli requested but cannot be downloaded. Setting
ARROW_WITH_BROTLI=OFF\n")
+ arrow_brotli <- FALSE
}
}
- paste(env_vars, ifelse(arrow_mimalloc, "ARROW_MIMALLOC=ON",
"ARROW_MIMALLOC=OFF"))
+ paste(env_vars, ifelse(arrow_brotli, "ARROW_WITH_BROTLI=ON",
"ARROW_WITH_BROTLI=OFF"))
+}
+
+with_bz2 <- function(env_vars) {
+ arrow_brotli <- is_feature_requested("ARROW_WITH_BZ2")
Review comment:
This function says brotli but should be bz2
##########
File path: r/tools/nixlibs.R
##########
@@ -52,6 +43,21 @@ try_download <- function(from_url, to_file) {
!inherits(status, "try-error") && status == 0
}
+quietly <- !env_is("ARROW_R_DEV", "true") # try_download uses quietly global
+# * download_ok, build_ok: Use prebuilt binary, if found, otherwise try to
build
+# * no download, build_ok: Build with local git checkout, if available, or
+# sources included in r/tools/cpp/. Optional dependencies are not included,
+# and will not be automatically downloaded.
+# https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds
+# * download_ok, no build: Only use prebuilt binary, if found
+# * neither: Get the arrow-without-arrow package
+# Download and build are OK unless you say not to (or can't access github)
+download_ok <- (!env_is("LIBARROW_DOWNLOAD", "false")) &&
try_download("https://github.com", tempfile())
+build_ok <- !env_is("LIBARROW_BUILD", "false")
+# But binary defaults to not OK
+binary_ok <- !identical(tolower(Sys.getenv("LIBARROW_BINARY", "false")),
"false")
+# For local debugging, set ARROW_R_DEV=TRUE to make this script print more
Review comment:
This comment goes above L46
##########
File path: r/tools/nixlibs.R
##########
@@ -52,6 +43,21 @@ try_download <- function(from_url, to_file) {
!inherits(status, "try-error") && status == 0
}
+quietly <- !env_is("ARROW_R_DEV", "true") # try_download uses quietly global
+# * download_ok, build_ok: Use prebuilt binary, if found, otherwise try to
build
+# * no download, build_ok: Build with local git checkout, if available, or
+# sources included in r/tools/cpp/. Optional dependencies are not included,
+# and will not be automatically downloaded.
+# https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds
+# * download_ok, no build: Only use prebuilt binary, if found
+# * neither: Get the arrow-without-arrow package
+# Download and build are OK unless you say not to (or can't access github)
+download_ok <- (!env_is("LIBARROW_DOWNLOAD", "false")) &&
try_download("https://github.com", tempfile())
Review comment:
I think we want to remove the `LIBARROW_DOWNLOAD` env var altogether,
and in fact not ever download Arrow C++ source (which is what this variable
governs currently). Third party dependencies should always download if you're
not offline, I think, as they do now.
(This is why the github actions failed: the linux jobs set
LIBARROW_DOWNLOAD: false in docker-compose.yml so that they're forced to use
the local C++ checkout, but now they fail to download `cmake` because you
extended the offline checks there too.)
We probably should replace this with a new `TEST_OFFLINE_BUILD` variable
that allows us to turn off downloading in order to simulate the offline build,
and have CI jobs that test both with and without the
`download_optional_dependencies()` call.
##########
File path: r/tools/nixlibs.R
##########
@@ -503,12 +781,10 @@ if (!file.exists(paste0(dst_dir,
"/include/arrow/api.h"))) {
unlink(bin_file)
} else if (build_ok) {
# (2) Find source and build it
- if (download_ok) {
+ src_dir <- find_local_source()
+ if (is.null(src_dir) && download_ok) {
Review comment:
I would delete `download_source()`--if we're bundling the C++ source in
the R package, then we should never need to download it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]