nealrichardson commented on a change in pull request #11001:
URL: https://github.com/apache/arrow/pull/11001#discussion_r701097959



##########
File path: r/tools/nixlibs.R
##########
@@ -52,6 +42,24 @@ try_download <- function(from_url, to_file) {
   !inherits(status, "try-error") && status == 0
 }
 
+# For local debugging, set ARROW_R_DEV=TRUE to make this script print more
+quietly <- !env_is("ARROW_R_DEV", "true")
+
+# Default is build from source, not download a binary
+build_ok <- !env_is("LIBARROW_BUILD", "false")
+binary_ok <- !(env_is("LIBARROW_BINARY", "false") || env_is("LIBARROW_BINARY", 
""))
+
+# Check if we're doing an offline build.
+# (Note that cmake will still be downloaded if necessary
+#  https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds)
+download_ok <- !env_is("TEST_OFFLINE_BUILD", "true") && 
try_download("https://github.com";, tempfile())
+
+# This path, within the tar file, might exist if
+# create_package_with_all_dependencies() was run. Otherwise, it won't, but
+# tools/cpp/thirdparty/ still will.

Review comment:
       `tools/cpp/thirdparty/` isn't guaranteed to exist in a git checkout

##########
File path: r/tools/nixlibs.R
##########
@@ -413,66 +421,137 @@ cmake_version <- function(cmd = "cmake") {
   )
 }
 
-with_s3_support <- function(env_vars) {
-  arrow_s3 <- toupper(Sys.getenv("ARROW_S3")) == "ON" || 
tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false"
+turn_off_thirdparty_features <- function(env_var_list) {
+  # Because these are done as environment variables (as opposed to build 
flags),
+  # setting these to "OFF" overrides any previous setting. We don't need to
+  # check the existing value.
+  turn_off <- c(
+    "ARROW_MIMALLOC" = "OFF",
+    "ARROW_JEMALLOC" = "OFF",
+    "ARROW_PARQUET" = "OFF", # depends on thrift
+    "ARROW_DATASET" = "OFF", # depends on parquet
+    "ARROW_S3" = "OFF",
+    "ARROW_WITH_BROTLI" = "OFF",
+    "ARROW_WITH_BZ2" = "OFF",
+    "ARROW_WITH_LZ4" = "OFF",
+    "ARROW_WITH_SNAPPY" = "OFF",
+    "ARROW_WITH_ZLIB" = "OFF",
+    "ARROW_WITH_ZSTD" = "OFF",
+    "ARROW_WITH_RE2" = "OFF",
+    "ARROW_WITH_UTF8PROC" = "OFF",
+    # NOTE: this code sets the environment variable ARROW_JSON to "OFF", but
+    # that setting is will *not* be honored by build_arrow_static.sh until
+    # ARROW-13768 is resolved.
+    "ARROW_JSON" = "OFF",
+    # The syntax to turn off XSIMD is different.
+    # Pull existing value of EXTRA_CMAKE_FLAGS first (must be defined)
+    "EXTRA_CMAKE_FLAGS" = paste(
+      env_var_list[["EXTRA_CMAKE_FLAGS"]],
+      "-DARROW_SIMD_LEVEL=NONE -DARROW_RUNTIME_SIMD_LEVEL=NONE"
+    )
+  )
+  # Create a new env_var_list, with the values of turn_off set.
+  # replace() also adds new values if they didn't exist before
+  replace(env_var_list, names(turn_off), turn_off)
+}
+
+set_thirdparty_urls <- function(env_var_list) {
+  # This function does *not* check if existing *_SOURCE_URL variables are set.
+  # The directory tools/cpp/thirdparty/download is created by
+  # create_package_with_all_dependencies() and saved in the tar file.
+  # In all other cases, where we're not installing from that offline tar file,
+  # that directory won't exist, but tools/cpp/thirdparty/ still should.
+  # Test tools/cpp/thirdparty to avoid false negatives.
+  deps_dir <- thirdparty_dependency_dir # defined at the top
+  stopifnot(dir.exists(dirname(thirdparty_dependency_dir)))
+  if (!dir.exists(deps_dir)) {
+    return(env_var_list)
+  }
+  files <- list.files(deps_dir, full.names = FALSE)
+  url_env_varname <- toupper(sub("(.*?)-.*", "ARROW_\\1_URL", files))
+  # Special handling for the aws dependencies, which have extra `-`
+  aws <- grepl("^aws", files)
+  url_env_varname[aws] <- sub(
+    "AWS_SDK_CPP", "AWSSDK",
+    gsub(
+      "-", "_",
+      sub(
+        "(AWS.*)-.*", "ARROW_\\1_URL",
+        toupper(files[aws])
+      )
+    )
+  )
+  full_filenames <- file.path(normalizePath(deps_dir), files)

Review comment:
       ```suggestion
     full_filenames <- file.path(normalizePath(thirdparty_dependency_dir), 
files)
   ```

##########
File path: r/tools/nixlibs.R
##########
@@ -320,33 +299,54 @@ build_libarrow <- function(src_dir, dst_dir) {
     BUILD_DIR = build_dir,
     DEST_DIR = dst_dir,
     CMAKE = cmake,
+    # EXTRA_CMAKE_FLAGS will often be "", but it's convenient later to have it 
defined
+    EXTRA_CMAKE_FLAGS = Sys.getenv("EXTRA_CMAKE_FLAGS"),
     # Make sure we build with the same compiler settings that R is using
     CC = R_CMD_config("CC"),
     CXX = paste(R_CMD_config("CXX11"), R_CMD_config("CXX11STD")),
     # CXXFLAGS = R_CMD_config("CXX11FLAGS"), # We don't want the same debug 
symbols
     LDFLAGS = R_CMD_config("LDFLAGS")
   )
-  env_vars <- paste0(names(env_var_list), '="', env_var_list, '"', collapse = 
" ")
-  env_vars <- with_s3_support(env_vars)
-  env_vars <- with_mimalloc(env_vars)
-  if (tolower(Sys.info()[["sysname"]]) %in% "sunos") {
-    # jemalloc doesn't seem to build on Solaris
-    # nor does thrift, so turn off parquet,
-    # and arrowExports.cpp requires parquet for dataset (ARROW-11994), so turn 
that off
-    # xsimd doesn't compile, so set SIMD level to NONE to skip it
-    # re2 and utf8proc do compile,
-    # but `ar` fails to build libarrow_bundled_dependencies, so turn them off
-    # so that there are no bundled deps
-    env_vars <- paste(env_vars, "ARROW_JEMALLOC=OFF ARROW_PARQUET=OFF 
ARROW_DATASET=OFF ARROW_WITH_RE2=OFF ARROW_WITH_UTF8PROC=OFF 
EXTRA_CMAKE_FLAGS=-DARROW_SIMD_LEVEL=NONE")
+  env_var_list <- with_s3_support(env_var_list)
+  env_var_list <- with_mimalloc(env_var_list)
+  # turn_off_thirdparty_features() needs to happen after with_mimalloc() and
+  # with_s3_support(), since those might turn features ON.
+  thirdparty_deps_unavailable <- !download_ok &&
+    !dir.exists(thirdparty_dependency_dir) &&
+    !env_is("ARROW_DEPENDENCY_SOURCE", "system")
+  if (is_solaris()) {
+    # Note that JSON support does work on Solaris, but will be turned off with
+    # the rest of the thirdparty dependencies (when ARROW-13768 is resolved and
+    # JSON can be turned off at all). All other dependencies don't compile
+    # (e.g thrift, jemalloc, and xsimd) or do compile but `ar` fails to build
+    # libarrow_bundled_dependencies (e.g. re2 and utf8proc).
+    env_var_list <- turn_off_thirdparty_features(env_var_list)
+  } else if (thirdparty_deps_unavailable) {
+    cat(paste0(
+      "*** Building C++ library from source, but downloading thirdparty 
dependencies\n",
+      "    is not possible, so this build will turn off all thirdparty 
features.\n",
+      "    See install vignette for details:\n",
+      "    
https://cran.r-project.org/web/packages/arrow/vignettes/install.html\n";
+    ))
+    env_var_list <- turn_off_thirdparty_features(env_var_list)
+  } else {
+    # If thirdparty_dependency_dir exists, the *_SOURCE_URL env vars

Review comment:
       How about this?
   
   ```suggestion
     } else if (dir.exists(thirdparty_dependency_dir)) {
       # Add the *_SOURCE_URL env vars
   ```

##########
File path: r/tools/nixlibs.R
##########
@@ -413,66 +421,137 @@ cmake_version <- function(cmd = "cmake") {
   )
 }
 
-with_s3_support <- function(env_vars) {
-  arrow_s3 <- toupper(Sys.getenv("ARROW_S3")) == "ON" || 
tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false"
+turn_off_thirdparty_features <- function(env_var_list) {
+  # Because these are done as environment variables (as opposed to build 
flags),
+  # setting these to "OFF" overrides any previous setting. We don't need to
+  # check the existing value.
+  turn_off <- c(
+    "ARROW_MIMALLOC" = "OFF",
+    "ARROW_JEMALLOC" = "OFF",
+    "ARROW_PARQUET" = "OFF", # depends on thrift
+    "ARROW_DATASET" = "OFF", # depends on parquet
+    "ARROW_S3" = "OFF",
+    "ARROW_WITH_BROTLI" = "OFF",
+    "ARROW_WITH_BZ2" = "OFF",
+    "ARROW_WITH_LZ4" = "OFF",
+    "ARROW_WITH_SNAPPY" = "OFF",
+    "ARROW_WITH_ZLIB" = "OFF",
+    "ARROW_WITH_ZSTD" = "OFF",
+    "ARROW_WITH_RE2" = "OFF",
+    "ARROW_WITH_UTF8PROC" = "OFF",
+    # NOTE: this code sets the environment variable ARROW_JSON to "OFF", but
+    # that setting is will *not* be honored by build_arrow_static.sh until
+    # ARROW-13768 is resolved.
+    "ARROW_JSON" = "OFF",
+    # The syntax to turn off XSIMD is different.
+    # Pull existing value of EXTRA_CMAKE_FLAGS first (must be defined)
+    "EXTRA_CMAKE_FLAGS" = paste(
+      env_var_list[["EXTRA_CMAKE_FLAGS"]],
+      "-DARROW_SIMD_LEVEL=NONE -DARROW_RUNTIME_SIMD_LEVEL=NONE"
+    )
+  )
+  # Create a new env_var_list, with the values of turn_off set.
+  # replace() also adds new values if they didn't exist before
+  replace(env_var_list, names(turn_off), turn_off)
+}
+
+set_thirdparty_urls <- function(env_var_list) {
+  # This function does *not* check if existing *_SOURCE_URL variables are set.
+  # The directory tools/cpp/thirdparty/download is created by
+  # create_package_with_all_dependencies() and saved in the tar file.
+  # In all other cases, where we're not installing from that offline tar file,
+  # that directory won't exist, but tools/cpp/thirdparty/ still should.
+  # Test tools/cpp/thirdparty to avoid false negatives.
+  deps_dir <- thirdparty_dependency_dir # defined at the top
+  stopifnot(dir.exists(dirname(thirdparty_dependency_dir)))
+  if (!dir.exists(deps_dir)) {
+    return(env_var_list)
+  }
+  files <- list.files(deps_dir, full.names = FALSE)

Review comment:
       ```suggestion
     files <- list.files(thirdparty_dependency_dir, full.names = FALSE)
   ```

##########
File path: r/tools/nixlibs.R
##########
@@ -413,66 +421,137 @@ cmake_version <- function(cmd = "cmake") {
   )
 }
 
-with_s3_support <- function(env_vars) {
-  arrow_s3 <- toupper(Sys.getenv("ARROW_S3")) == "ON" || 
tolower(Sys.getenv("LIBARROW_MINIMAL")) == "false"
+turn_off_thirdparty_features <- function(env_var_list) {
+  # Because these are done as environment variables (as opposed to build 
flags),
+  # setting these to "OFF" overrides any previous setting. We don't need to
+  # check the existing value.
+  turn_off <- c(
+    "ARROW_MIMALLOC" = "OFF",
+    "ARROW_JEMALLOC" = "OFF",
+    "ARROW_PARQUET" = "OFF", # depends on thrift
+    "ARROW_DATASET" = "OFF", # depends on parquet
+    "ARROW_S3" = "OFF",
+    "ARROW_WITH_BROTLI" = "OFF",
+    "ARROW_WITH_BZ2" = "OFF",
+    "ARROW_WITH_LZ4" = "OFF",
+    "ARROW_WITH_SNAPPY" = "OFF",
+    "ARROW_WITH_ZLIB" = "OFF",
+    "ARROW_WITH_ZSTD" = "OFF",
+    "ARROW_WITH_RE2" = "OFF",
+    "ARROW_WITH_UTF8PROC" = "OFF",
+    # NOTE: this code sets the environment variable ARROW_JSON to "OFF", but
+    # that setting is will *not* be honored by build_arrow_static.sh until
+    # ARROW-13768 is resolved.
+    "ARROW_JSON" = "OFF",
+    # The syntax to turn off XSIMD is different.
+    # Pull existing value of EXTRA_CMAKE_FLAGS first (must be defined)
+    "EXTRA_CMAKE_FLAGS" = paste(
+      env_var_list[["EXTRA_CMAKE_FLAGS"]],
+      "-DARROW_SIMD_LEVEL=NONE -DARROW_RUNTIME_SIMD_LEVEL=NONE"
+    )
+  )
+  # Create a new env_var_list, with the values of turn_off set.
+  # replace() also adds new values if they didn't exist before
+  replace(env_var_list, names(turn_off), turn_off)
+}
+
+set_thirdparty_urls <- function(env_var_list) {
+  # This function does *not* check if existing *_SOURCE_URL variables are set.
+  # The directory tools/cpp/thirdparty/download is created by
+  # create_package_with_all_dependencies() and saved in the tar file.
+  # In all other cases, where we're not installing from that offline tar file,
+  # that directory won't exist, but tools/cpp/thirdparty/ still should.
+  # Test tools/cpp/thirdparty to avoid false negatives.
+  deps_dir <- thirdparty_dependency_dir # defined at the top
+  stopifnot(dir.exists(dirname(thirdparty_dependency_dir)))
+  if (!dir.exists(deps_dir)) {
+    return(env_var_list)
+  }
+  files <- list.files(deps_dir, full.names = FALSE)
+  url_env_varname <- toupper(sub("(.*?)-.*", "ARROW_\\1_URL", files))
+  # Special handling for the aws dependencies, which have extra `-`
+  aws <- grepl("^aws", files)
+  url_env_varname[aws] <- sub(
+    "AWS_SDK_CPP", "AWSSDK",
+    gsub(
+      "-", "_",
+      sub(
+        "(AWS.*)-.*", "ARROW_\\1_URL",
+        toupper(files[aws])
+      )
+    )
+  )
+  full_filenames <- file.path(normalizePath(deps_dir), files)
+
+  env_var_list <- replace(env_var_list, url_env_varname, full_filenames)
+  if (env_is("ARROW_R_DEV", "true")) {

Review comment:
       ```suggestion
     if (!quietly) {
   ```

##########
File path: r/vignettes/developing.Rmd
##########
@@ -107,6 +107,7 @@ You can choose to build and then install the Arrow library 
into a user-defined d
 
 It is recommended that you install the arrow library to a user-level directory 
to be used in development. This is so that the development version you are 
using doesn't overwrite a released version of Arrow you may have installed. You 
are also able to have more than one version of the Arrow library to link to 
with this approach (by using different `ARROW_HOME` directories for the 
different versions). This approach also matches the recommendations for other 
Arrow bindings like 
[Python](http://arrow.apache.org/docs/developers/python.html).
 
+

Review comment:
       ```suggestion
   ```

##########
File path: r/vignettes/install.Rmd
##########
@@ -102,6 +102,42 @@ satisfy C++ dependencies.
 
 > Note that, unlike packages like `tensorflow`, `blogdown`, and others that 
 > require external dependencies, you do not need to run `install_arrow()` 
 > after a successful `arrow` installation.
 
+The `install-arrow.R` file also includes the 
`create_package_with_all_dependencies()`
+function. Normally, when installing on a computer with internet access, the
+build process will download third-party dependencies as needed.
+This function provides a way to download them in advance.
+Doing so may be useful when installing Arrow on a computer without internet 
access.
+Note that Arrow _can_ be installed on a computer without internet access, but
+many useful features will be disabled, as they depend on third-party 
components.
+More precisely, `arrow::arrow_info()$capabilities()` will be `FALSE` for every
+capability.
+One approach to add more capabilities in an offline install is to prepare a
+package with pre-downloaded dependencies. The
+`create_package_with_all_dependencies()` function does this preparation.
+
+### Using a computer with internet access, pre-download the dependencies:
+* Install the `arrow` package
+* Run `create_package_with_all_dependencies("my_arrow_pkg.tar.gz")`
+* Copy the newly created `my_arrow_pkg.tar.gz` to the computer without 
internet access
+
+### On the computer without internet access, install the prepared package:
+* Install the `arrow` package from the copied file 
(`install.packages("my_arrow_pkg.tar.gz")`)
+  * This installation will build from source, so `cmake` must be available
+* Run `arrow_info()` to check installed capabilities
+
+
+### Using a computer with internet access, pre-download the dependencies:
+* Install the `arrow` package
+* Run `download_optional_dependencies(my_dependencies)`
+* Copy the directory `my-arrow-dependencies` to the computer without internet 
access
+
+### On the computer without internet access, use the pre-downloaded 
dependencies:
+* Create a environment variable called `ARROW_THIRDPARTY_DEPENDENCY_DIR` that
+  points to the newly copied `my_dependencies`.
+* Install the `arrow` package
+  * This installation will build from source, so `cmake` must be available
+* Run `arrow_info()` to check installed capabilities
+

Review comment:
       This is stale, right?
   
   ```suggestion
   ```

##########
File path: r/R/install-arrow.R
##########
@@ -137,3 +136,91 @@ reload_arrow <- function() {
     message("Please restart R to use the 'arrow' package.")
   }
 }
+
+
+#' Create an install package with all thirdparty dependencies
+#'
+#' @param outfile File path for the new tar.gz package. Defaults to
+#' `arrow_V.V.V_with_deps.tar.gz` in the current directory (`V.V.V` is the 
version)
+#' @param package_source File path for the input tar.gz package. Defaults to
+#' downloading from CRAN.

Review comment:
       Technically it will download from wherever `options(repos)` says, which 
might not be CRAN (like, you could do this with our nightly package repository 
too).

##########
File path: r/R/install-arrow.R
##########
@@ -137,3 +136,91 @@ reload_arrow <- function() {
     message("Please restart R to use the 'arrow' package.")
   }
 }
+
+
+#' Create an install package with all thirdparty dependencies
+#'
+#' @param outfile File path for the new tar.gz package. Defaults to
+#' `arrow_V.V.V_with_deps.tar.gz` in the current directory (`V.V.V` is the 
version)
+#' @param package_source File path for the input tar.gz package. Defaults to
+#' downloading from CRAN.
+#' @param quietly boolean, default `TRUE`. If `FALSE`, narrate progress.
+#' @return The full path to `outfile`, invisibly
+#'
+#' This function is used for setting up an offline build. If it's possible to
+#' download at build time, don't use this function. Instead, let `cmake`
+#' download the required dependencies for you.
+#' These downloaded dependencies are only used in the build if
+#' `ARROW_DEPENDENCY_SOURCE` is unset, `BUNDLED`, or `AUTO`.
+#' https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds
+#'
+#' ## Steps for an offline install with optional dependencies:
+#'
+#' ### Using a computer with internet access, pre-download the dependencies:
+#' * Install the `arrow` package
+#' * Run `create_package_with_all_dependencies("my_arrow_pkg.tar.gz")`
+#' * Copy the newly created `my_arrow_pkg.tar.gz` to the computer without 
internet access
+#'
+#' ### On the computer without internet access, install the prepared package:
+#' * Install the `arrow` package from the copied file 
(`install.packages("my_arrow_pkg.tar.gz")`)
+#'   * This installation will build from source, so `cmake` must be available
+#' * Run [arrow_info()] to check installed capabilities
+#'
+#'
+#' @examples
+#' \dontrun{
+#' new_pkg <- create_package_with_all_dependencies()
+#' # Note: this works when run in the same R session, but it's meant to be
+#' # copied to a different computer.
+#' install.packages(new_pkg, dependencies = c("Depends", "Imports", 
"LinkingTo"))
+#' }
+#' @export
+create_package_with_all_dependencies <- function(outfile = NULL, 
package_source = NULL, quietly = TRUE) {

Review comment:
       Any reason we need `quietly` as an argument here (other than to make it 
quiet by default)? Seems like you could achieve the same with 
`suppressMessages()`.

##########
File path: r/R/install-arrow.R
##########
@@ -137,3 +136,91 @@ reload_arrow <- function() {
     message("Please restart R to use the 'arrow' package.")
   }
 }
+
+
+#' Create an install package with all thirdparty dependencies
+#'
+#' @param outfile File path for the new tar.gz package. Defaults to
+#' `arrow_V.V.V_with_deps.tar.gz` in the current directory (`V.V.V` is the 
version)
+#' @param package_source File path for the input tar.gz package. Defaults to
+#' downloading from CRAN.
+#' @param quietly boolean, default `TRUE`. If `FALSE`, narrate progress.
+#' @return The full path to `outfile`, invisibly
+#'
+#' This function is used for setting up an offline build. If it's possible to
+#' download at build time, don't use this function. Instead, let `cmake`
+#' download the required dependencies for you.
+#' These downloaded dependencies are only used in the build if
+#' `ARROW_DEPENDENCY_SOURCE` is unset, `BUNDLED`, or `AUTO`.
+#' https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds
+#'
+#' ## Steps for an offline install with optional dependencies:
+#'
+#' ### Using a computer with internet access, pre-download the dependencies:
+#' * Install the `arrow` package
+#' * Run `create_package_with_all_dependencies("my_arrow_pkg.tar.gz")`
+#' * Copy the newly created `my_arrow_pkg.tar.gz` to the computer without 
internet access
+#'
+#' ### On the computer without internet access, install the prepared package:
+#' * Install the `arrow` package from the copied file 
(`install.packages("my_arrow_pkg.tar.gz")`)
+#'   * This installation will build from source, so `cmake` must be available
+#' * Run [arrow_info()] to check installed capabilities
+#'
+#'
+#' @examples
+#' \dontrun{
+#' new_pkg <- create_package_with_all_dependencies()
+#' # Note: this works when run in the same R session, but it's meant to be
+#' # copied to a different computer.
+#' install.packages(new_pkg, dependencies = c("Depends", "Imports", 
"LinkingTo"))
+#' }
+#' @export
+create_package_with_all_dependencies <- function(outfile = NULL, 
package_source = NULL, quietly = TRUE) {

Review comment:
       Also, what do you think about a signature like this? Inputs before 
outputs, and make clear that both arguments are the same kind of thing (a 
string file path).
   
   ```
   create_package_with_all_dependencies <- function(source_file = NULL, 
dest_file = NULL) {
   ```

##########
File path: r/tools/nixlibs.R
##########
@@ -52,6 +42,24 @@ try_download <- function(from_url, to_file) {
   !inherits(status, "try-error") && status == 0
 }
 
+# For local debugging, set ARROW_R_DEV=TRUE to make this script print more
+quietly <- !env_is("ARROW_R_DEV", "true")
+
+# Default is build from source, not download a binary
+build_ok <- !env_is("LIBARROW_BUILD", "false")
+binary_ok <- !(env_is("LIBARROW_BINARY", "false") || env_is("LIBARROW_BINARY", 
""))
+
+# Check if we're doing an offline build.
+# (Note that cmake will still be downloaded if necessary
+#  https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds)
+download_ok <- !env_is("TEST_OFFLINE_BUILD", "true") && 
try_download("https://github.com";, tempfile())
+
+# This path, within the tar file, might exist if
+# create_package_with_all_dependencies() was run. Otherwise, it won't, but
+# tools/cpp/thirdparty/ still will.
+thirdparty_dependency_dir <- "tools/cpp/thirdparty/download"

Review comment:
       Is there any value in allowing this to be outside of the tarball still, 
like `Sys.getenv("ARROW_THIRDPARTY_DEPENDENCY_DIR", 
"tools/cpp/thirdparty/download")`?

##########
File path: r/R/install-arrow.R
##########
@@ -137,3 +136,91 @@ reload_arrow <- function() {
     message("Please restart R to use the 'arrow' package.")
   }
 }
+
+
+#' Create an install package with all thirdparty dependencies
+#'
+#' @param outfile File path for the new tar.gz package. Defaults to
+#' `arrow_V.V.V_with_deps.tar.gz` in the current directory (`V.V.V` is the 
version)
+#' @param package_source File path for the input tar.gz package. Defaults to
+#' downloading from CRAN.
+#' @param quietly boolean, default `TRUE`. If `FALSE`, narrate progress.
+#' @return The full path to `outfile`, invisibly
+#'
+#' This function is used for setting up an offline build. If it's possible to
+#' download at build time, don't use this function. Instead, let `cmake`
+#' download the required dependencies for you.
+#' These downloaded dependencies are only used in the build if
+#' `ARROW_DEPENDENCY_SOURCE` is unset, `BUNDLED`, or `AUTO`.
+#' https://arrow.apache.org/docs/developers/cpp/building.html#offline-builds
+#'
+#' ## Steps for an offline install with optional dependencies:
+#'
+#' ### Using a computer with internet access, pre-download the dependencies:
+#' * Install the `arrow` package

Review comment:
       Since we're not expecting things inside `inst/` anymore, you could also 
source(github_url/install-arrow.R) now, right?

##########
File path: dev/tasks/r/github.linux.offline.build.yml
##########
@@ -0,0 +1,111 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# NOTE: must set "Crossbow" as name to have the badge links working in the
+# github comment reports!
+name: Crossbow
+
+on:
+  push
+
+jobs:
+  grab-dependencies:
+    name: "Download thirdparty dependencies"
+    runs-on: ubuntu-20.04
+    strategy:
+      fail-fast: false
+    env:
+      ARROW_R_DEV: "TRUE"
+      RSPM: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest";
+    steps:
+      - name: Checkout Arrow
+        run: |
+          git clone --no-checkout {{ arrow.remote }} arrow
+          git -C arrow fetch -t {{ arrow.remote }} {{ arrow.branch }}
+          git -C arrow checkout FETCH_HEAD
+          git -C arrow submodule update --init --recursive
+      - name: Free Up Disk Space
+        shell: bash
+        run: arrow/ci/scripts/util_cleanup.sh
+      - name: Fetch Submodules and Tags
+        shell: bash
+        run: cd arrow && ci/scripts/util_checkout.sh
+      - uses: r-lib/actions/setup-r@v1
+      - name: Pull Arrow dependencies
+        run: |
+          cd arrow/r
+          # copy the two files we will need
+          # TODO: allow manually specifying `download_dependencies.sh` in 
`download_optional_dependencies()` then we won't need to install
+          mkdir -p inst/thirdparty/
+          cp -p ../cpp/thirdparty/download_dependencies.sh inst/thirdparty/
+          cp -p ../cpp/thirdparty/versions.txt inst/thirdparty/
+          mkdir thirdparty_deps
+          R -e 'source("R/install-arrow.R"); 
download_optional_dependencies("thirdparty_deps", download_dependencies_sh = 
"./inst/thirdparty/download_dependencies.sh")'

Review comment:
       Need to update these CI jobs still




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to