This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new dd8734dc8f GH-37923: [R] Move macOS build system to nixlibs.R (#37684)
dd8734dc8f is described below
commit dd8734dc8ff21e7f8463b239e800aeac0ddcb764
Author: Jacob Wujciak-Jens <[email protected]>
AuthorDate: Thu Oct 5 14:48:44 2023 +0200
GH-37923: [R] Move macOS build system to nixlibs.R (#37684)
This PR modifies the build system of the R package to no longer rely on
auto/homebrew. Instead this PR adds the infrastructure and code paths to use
the same type of pre-compiled libarrow binaries as we use for Linux. The main
difference is the use of the binaries even on CRAN (as we previously also used
binaries in form of brew bottles).
The addition of the new artifacts to tasks.yml should ensure that they get
uploaded to the nightly repo as well as to the artifactory during the release
(@ kou please confirm).
A summary of the changes in this PR:
- update `r/configure` and `r/tools/nixlibs.R` to enable the source build
on macOS and usage of precompiled binaries using the existing mechanism to test
compile a program to detect the exisitng openssl version
- added tests for the changes in nixlibs.R
- update the binary allow-list
- Add the build jobs for libarrow binaries for arm64 and x86_64 macos with
openssl 1.1 and 3.0 to the `r-binary-packages` job
- Use the binaries to build the nightly packages
- bump snappy version to 1.1.10 (and patch it on 10.13) due to build issues
with the current version. This also touches on a number of issues in regards to
a sanitizer issue we have had for a long time: #32562 #31766
- Disable the centos binary test step: #37922
Follow up issues:
- #37921
- #37941
- #37945
* Closes: #37923
Lead-authored-by: Jacob Wujciak-Jens <[email protected]>
Co-authored-by: Jonathan Keane <[email protected]>
Co-authored-by: Sutou Kouhei <[email protected]>
Signed-off-by: Dewey Dunnington <[email protected]>
---
cpp/Brewfile | 6 +++
cpp/cmake_modules/SetupCxxFlags.cmake | 15 ++++--
cpp/cmake_modules/ThirdpartyToolchain.cmake | 21 ++++++++
cpp/cmake_modules/snappy.diff | 12 +++++
cpp/thirdparty/versions.txt | 5 +-
dev/release/rat_exclude_files.txt | 1 +
dev/tasks/macros.jinja | 14 ++++-
dev/tasks/r/github.packages.yml | 83 +++++++++++++++++++++++-----
dev/tasks/tasks.yml | 4 ++
r/configure | 55 +++----------------
r/tools/nixlibs-allowlist.txt | 1 +
r/tools/nixlibs.R | 84 ++++++++++++++++++++++-------
r/tools/test-nixlibs.R | 64 ++++++++++++++++++----
13 files changed, 268 insertions(+), 97 deletions(-)
diff --git a/cpp/Brewfile b/cpp/Brewfile
index 58015d2121..0f55279853 100644
--- a/cpp/Brewfile
+++ b/cpp/Brewfile
@@ -19,7 +19,9 @@ brew "aws-sdk-cpp"
brew "bash"
brew "boost"
brew "brotli"
+brew "bzip2"
brew "c-ares"
+brew "curl"
brew "ccache"
brew "cmake"
brew "flatbuffers"
@@ -29,14 +31,18 @@ brew "googletest"
brew "grpc"
brew "llvm@14"
brew "lz4"
+brew "mimalloc"
brew "ninja"
brew "node"
brew "openssl@3"
+brew "pkg-config"
brew "protobuf"
brew "python"
brew "rapidjson"
+brew "re2"
brew "snappy"
brew "thrift"
+brew "utf8proc"
brew "wget"
brew "xsimd"
brew "zstd"
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake
b/cpp/cmake_modules/SetupCxxFlags.cmake
index 5531415ac2..e90c00612c 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -456,11 +456,18 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR
CMAKE_CXX_COMPILER_ID STRE
# Don't complain about optimization passes that were not possible
set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-pass-failed")
- # Avoid clang / libc++ error about C++17 aligned allocation on macOS.
- # See
https://chromium.googlesource.com/chromium/src/+/eee44569858fc650b635779c4e34be5cb0c73186%5E%21/#F0
- # for details.
if(APPLE)
- set(CXX_ONLY_FLAGS "${CXX_ONLY_FLAGS} -fno-aligned-new")
+ # Avoid clang / libc++ error about C++17 aligned allocation on macOS.
+ # See
https://chromium.googlesource.com/chromium/src/+/eee44569858fc650b635779c4e34be5cb0c73186%5E%21/#F0
+ # for details.
+ string(APPEND CXX_ONLY_FLAGS " -fno-aligned-new")
+
+ if(CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
+ # Avoid C++17 std::get 'not available' issue on macOS 10.13
+ # This will be required until atleast R 4.4 is released and
+ # CRAN (hopefully) stops checking on 10.13
+ string(APPEND CXX_ONLY_FLAGS " -D_LIBCPP_DISABLE_AVAILABILITY")
+ endif()
endif()
endif()
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 85c0337d10..6d6a2bf775 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1308,6 +1308,26 @@ macro(build_snappy)
set(SNAPPY_CMAKE_ARGS
${EP_COMMON_CMAKE_ARGS} -DSNAPPY_BUILD_TESTS=OFF
-DSNAPPY_BUILD_BENCHMARKS=OFF
"-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}")
+ # Snappy unconditionaly enables Werror when building with clang this can lead
+ # to build failues by way of new compiler warnings. This adds a flag to
disable
+ # Werror to the very end of the invocation to override the snappy internal
setting.
+ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ foreach(CONFIG DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
+ list(APPEND
+ SNAPPY_CMAKE_ARGS
+
"-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS_${CONFIG}} -Wno-error"
+ )
+ endforeach()
+ endif()
+
+ if(APPLE AND CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
+ # On macOS 10.13 we need to explicitly add <functional> to avoid a missing
include error
+ # This can be removed once CRAN no longer checks on macOS 10.13
+ find_program(PATCH patch REQUIRED)
+ set(SNAPPY_PATCH_COMMAND ${PATCH} -p1 -i
${CMAKE_CURRENT_LIST_DIR}/snappy.diff)
+ else()
+ set(SNAPPY_PATCH_COMMAND)
+ endif()
externalproject_add(snappy_ep
${EP_COMMON_OPTIONS}
@@ -1315,6 +1335,7 @@ macro(build_snappy)
INSTALL_DIR ${SNAPPY_PREFIX}
URL ${SNAPPY_SOURCE_URL}
URL_HASH "SHA256=${ARROW_SNAPPY_BUILD_SHA256_CHECKSUM}"
+ PATCH_COMMAND ${SNAPPY_PATCH_COMMAND}
CMAKE_ARGS ${SNAPPY_CMAKE_ARGS}
BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}")
diff --git a/cpp/cmake_modules/snappy.diff b/cpp/cmake_modules/snappy.diff
new file mode 100644
index 0000000000..f86e2bb197
--- /dev/null
+++ b/cpp/cmake_modules/snappy.diff
@@ -0,0 +1,12 @@
+diff --git a/snappy.cc b/snappy.cc
+index d414718..5b0d0d6 100644
+--- a/snappy.cc
++++ b/snappy.cc
+@@ -83,6 +83,7 @@
+ #include <string>
+ #include <utility>
+ #include <vector>
++#include <functional>
+
+ namespace snappy {
+
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 52d302592b..56163f35d2 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -101,9 +101,8 @@
ARROW_RAPIDJSON_BUILD_VERSION=232389d4f1012dddec4ef84861face2d2ba85709
ARROW_RAPIDJSON_BUILD_SHA256_CHECKSUM=b9290a9a6d444c8e049bd589ab804e0ccf2b05dc5984a19ed5ae75d090064806
ARROW_RE2_BUILD_VERSION=2022-06-01
ARROW_RE2_BUILD_SHA256_CHECKSUM=f89c61410a072e5cbcf8c27e3a778da7d6fd2f2b5b1445cd4f4508bee946ab0f
-# 1.1.9 is patched to implement https://github.com/google/snappy/pull/148 if
this is bumped, remove the patch
-ARROW_SNAPPY_BUILD_VERSION=1.1.9
-ARROW_SNAPPY_BUILD_SHA256_CHECKSUM=75c1fbb3d618dd3a0483bff0e26d0a92b495bbe5059c8b4f1c962b478b6e06e7
+ARROW_SNAPPY_BUILD_VERSION=1.1.10
+ARROW_SNAPPY_BUILD_SHA256_CHECKSUM=49d831bffcc5f3d01482340fe5af59852ca2fe76c3e05df0e67203ebbe0f1d90
ARROW_SUBSTRAIT_BUILD_VERSION=v0.27.0
ARROW_SUBSTRAIT_BUILD_SHA256_CHECKSUM=4ed375f69d972a57fdc5ec406c17003a111831d8640d3f1733eccd4b3ff45628
ARROW_S2N_TLS_BUILD_VERSION=v1.3.35
diff --git a/dev/release/rat_exclude_files.txt
b/dev/release/rat_exclude_files.txt
index aebe321d61..af084ea215 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -24,6 +24,7 @@ cpp/build-support/iwyu/*
cpp/cmake_modules/FindPythonLibsNew.cmake
cpp/cmake_modules/SnappyCMakeLists.txt
cpp/cmake_modules/SnappyConfig.h
+cpp/cmake_modules/snappy.diff
cpp/examples/parquet/parquet-arrow/cmake_modules/FindArrow.cmake
cpp/src/parquet/.parquetcppversion
cpp/src/generated/parquet_constants.cpp
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index faf77a1168..54f676fd49 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -307,7 +307,8 @@ on:
stopifnot(packageVersion("arrow") == {{
'"${{needs.source.outputs.pkg_version}}"' }})
{% endmacro %}
-{%- macro github_setup_local_r_repo(get_nix, get_win) -%}
+{%- macro github_setup_local_r_repo(get_nix, get_win, get_mac=False) -%}
+# TODO: improve arg handling
- name: Setup local repo
shell: bash
run: mkdir repo
@@ -327,6 +328,17 @@ on:
path: repo/libarrow/bin/linux-openssl-{{ openssl_version }}
{% endfor %}
{% endif %}
+ {% if get_mac %}
+ {% for openssl_version in ["1.1", "3.0"] %}
+ {% for arch in ["x86_64", "arm64"] %}
+ - name: Get macOS {{ arch }} OpenSSL {{ openssl_version }} binary
+ uses: actions/download-artifact@v3
+ with:
+ name: r-lib__libarrow__bin__darwin-{{arch}}-openssl-{{ openssl_version }}
+ path: repo/libarrow/bin/darwin-{{ arch }}-openssl-{{ openssl_version }}
+ {% endfor %}
+ {% endfor %}
+ {% endif %}
- name: Get src pkg
uses: actions/download-artifact@v3
with:
diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml
index dbe21ffb6b..760e3b6da4 100644
--- a/dev/tasks/r/github.packages.yml
+++ b/dev/tasks/r/github.packages.yml
@@ -56,6 +56,59 @@ jobs:
name: r-pkg__src__contrib
path: arrow/r/arrow_*.tar.gz
+ macos-cpp:
+ name: C++ Binary macOS OpenSSL {{ '${{ matrix.openssl }}' }} {{ '${{
matrix.platform.arch }}' }}
+
+ runs-on: {{ '${{ matrix.platform.runs_on }}' }}
+
+ needs: source
+ strategy:
+ fail-fast: false
+ matrix:
+ platform:
+ - { runs_on: ["self-hosted", "macos-10.13"], arch: "x86_64" }
+
+ - { runs_on: ["self-hosted", "macOS", "arm64", "devops-managed"],
arch: "arm64" }
+ openssl: ['3.0', '1.1']
+
+ steps:
+ {{ macros.github_checkout_arrow(action_v="3")|indent }}
+ {{ macros.github_change_r_pkg_version(is_fork, '${{
needs.source.outputs.pkg_version }}')|indent }}
+ - name: Install Deps
+ if: {{ "${{ !contains(matrix.platform.runs_on, 'macos-10.13') }}" }}
+ run: |
+ brew install sccache ninja
+ brew install openssl@{{ '${{ matrix.openssl }}' }}
+ - name: Build libarrow
+ shell: bash
+ env:
+ {{ macros.github_set_sccache_envvars()|indent(8) }}
+ MACOSX_DEPLOYMENT_TARGET: "10.13"
+ ARROW_S3: ON
+ ARROW_GCS: ON
+ ARROW_DEPENDENCY_SOURCE: BUNDLED
+ CMAKE_GENERATOR: Ninja
+ LIBARROW_MINIMAL: false
+ run: |
+ sccache --start-server
+ export EXTRA_CMAKE_FLAGS="-DOPENSSL_ROOT_DIR=$(brew --prefix
openssl@{{ '${{ matrix.openssl }}' }})"
+ cd arrow
+ r/inst/build_arrow_static.sh
+ - name: Bundle libarrow
+ shell: bash
+ env:
+ PKG_FILE: arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip
+ VERSION: {{ '${{ needs.source.outputs.pkg_version }}' }}
+ run: |
+ cd arrow/r/libarrow/dist
+ zip -r $PKG_FILE lib/ include/
+
+ - name: Upload binary artifact
+ uses: actions/upload-artifact@v3
+ with:
+ name: r-lib__libarrow__bin__darwin-{{ '${{ matrix.platform.arch }}'
}}-openssl-{{ '${{ matrix.openssl }}' }}
+ path: arrow/r/libarrow/dist/arrow-*.zip
+
linux-cpp:
name: C++ Binary Linux OpenSSL {{ '${{ matrix.openssl }}' }}
runs-on: ubuntu-latest
@@ -135,7 +188,7 @@ jobs:
path: build/arrow-*.zip
r-packages:
- needs: [source, windows-cpp]
+ needs: [source, windows-cpp, macos-cpp]
name: {{ '${{ matrix.platform.name }} ${{ matrix.r_version.r }}' }}
runs-on: {{ '${{ matrix.platform.runs_on }}' }}
strategy:
@@ -167,7 +220,7 @@ jobs:
rig system setup-user-lib
rig system add-pak
- {{ macros.github_setup_local_r_repo(false, true)|indent }}
+ {{ macros.github_setup_local_r_repo(false, true, true)|indent }}
- name: Prepare Dependency Installation
shell: bash
@@ -178,18 +231,19 @@ jobs:
with:
working-directory: 'arrow'
extra-packages: cpp11
- - name: Install sccache
- if: startsWith(matrix.platform, 'macos')
- run: brew install sccache
+ - name: Set CRAN like openssl
+ if: contains(matrix.platform.runs_on, 'arm64')
+ run: |
+ # The arm64 runners contain openssl 1.1.1t in this path that is
always included first so we need to override the
+ # default setting of the brew --prefix as root dir to avoid version
conflicts.
+ echo "OPENSSL_ROOT_DIR=/opt/R/arm64" >> $GITHUB_ENV
- name: Build Binary
id: build
shell: Rscript {0}
env:
- NOT_CRAN: "true" # actions/setup-r sets this implicitly
+ NOT_CRAN: "false" # actions/setup-r sets this implicitly
ARROW_R_DEV: "true"
- FORCE_AUTOBREW: "true" # this is ignored on windows
- # sccache for macos
- {{ macros.github_set_sccache_envvars()|indent(8) }}
+ LIBARROW_BINARY: "true" # has to be set as long as allowlist not
updated
run: |
on_windows <- tolower(Sys.info()[["sysname"]]) == "windows"
@@ -213,8 +267,10 @@ jobs:
INSTALL_opts = INSTALL_opts
)
+
# Test
library(arrow)
+ arrow_info()
read_parquet(system.file("v0.7.1.parquet", package = "arrow"))
# encode contrib.url for artifact name
@@ -233,7 +289,6 @@ jobs:
with:
name: r-pkg{{ '${{ steps.build.outputs.path }}' }}
path: arrow_*
-
test-linux-binary:
needs: [source, linux-cpp]
name: Test binary {{ '${{ matrix.config.image }}' }}
@@ -291,7 +346,10 @@ jobs:
with:
name: r-pkg_centos7
path: arrow_*
+
test-centos-binary:
+ # arrow binary package not on ppm currently see #37922
+ if: false
needs: test-linux-binary
runs-on: ubuntu-latest
container: "rstudio/r-base:4.2-centos7"
@@ -317,7 +375,8 @@ jobs:
read_parquet(system.file("v0.7.1.parquet", package = "arrow"))
print(arrow_info())
- test-source:
+ #TODO test macos source build?
+ test-linux-source:
needs: source
name: Test linux source build
runs-on: ubuntu-latest
@@ -367,7 +426,7 @@ jobs:
upload-binaries:
# Only upload binaries if all tests pass.
- needs: [r-packages, test-source, test-linux-binary, test-centos-binary]
+ needs: [r-packages, test-linux-source, test-linux-binary]
name: Upload artifacts
runs-on: ubuntu-latest
steps:
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 859ff8ddb5..798932db23 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -994,6 +994,10 @@ tasks:
- r-lib__libarrow__bin__linux-openssl-1.0__arrow-{no_rc_r_version}\.zip
- r-lib__libarrow__bin__linux-openssl-1.1__arrow-{no_rc_r_version}\.zip
- r-lib__libarrow__bin__linux-openssl-3.0__arrow-{no_rc_r_version}\.zip
+ -
r-lib__libarrow__bin__darwin-arm64-openssl-1.1__arrow-{no_rc_r_version}\.zip
+ -
r-lib__libarrow__bin__darwin-arm64-openssl-3.0__arrow-{no_rc_r_version}\.zip
+ -
r-lib__libarrow__bin__darwin-x86_64-openssl-1.1__arrow-{no_rc_r_version}\.zip
+ -
r-lib__libarrow__bin__darwin-x86_64-openssl-3.0__arrow-{no_rc_r_version}\.zip
- r-pkg__bin__windows__contrib__4.1__arrow_{no_rc_r_version}\.zip
- r-pkg__bin__windows__contrib__4.2__arrow_{no_rc_r_version}\.zip
- r-pkg__bin__macosx__contrib__4.1__arrow_{no_rc_r_version}\.tgz
diff --git a/r/configure b/r/configure
index 593f60bbdd..d244b1a7c1 100755
--- a/r/configure
+++ b/r/configure
@@ -39,8 +39,8 @@
#
# * Installing a released version from source, as from CRAN, with
# no other prior setup
-# * On macOS, autobrew is used to retrieve libarrow and dependencies
-# * On Linux, the nixlibs.R build script will download or build
+# * On macOS and Linux, the nixlibs.R build script will download
+# or build libarrow and dependencies
# * Installing a released version but first installing libarrow.
# It will use pkg-config and brew to search for libraries.
# * Installing a development version from source as a user.
@@ -65,8 +65,6 @@ PKG_TEST_HEADER="<arrow/api.h>"
# Some env vars that control the build (all logical, case insensitive)
# Development mode, also increases verbosity in the bundled build
ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'`
-# autobrew is how mac binaries are built on CRAN; FORCE ensures we use it here
-FORCE_AUTOBREW=`echo $FORCE_AUTOBREW | tr '[:upper:]' '[:lower:]'`
# The bundled build compiles arrow C++ from source; FORCE ensures we don't
pick up
# any other packages that may be found on the system
FORCE_BUNDLED_BUILD=`echo $FORCE_BUNDLED_BUILD | tr '[:upper:]' '[:lower:]'`
@@ -141,19 +139,11 @@ fi
find_or_build_libarrow () {
if [ "$FORCE_BUNDLED_BUILD" = "true" ]; then
do_bundled_build
- elif [ "$FORCE_AUTOBREW" = "true" ]; then
- do_autobrew
else
find_arrow
if [ "$_LIBARROW_FOUND" = "false" ]; then
# If we haven't found a suitable version of libarrow, build it
- if [ "$UNAME" = "Darwin" ] && ! echo $VERSION | grep -q "000"; then
- # Only autobrew on release version (for testing, use FORCE_AUTOBREW
above)
- # (dev versions end in .9000, and nightly gets something like
.10000xxx)
- do_autobrew
- else
- do_bundled_build
- fi
+ do_bundled_build
fi
fi
}
@@ -175,12 +165,6 @@ find_arrow () {
# 2. Use pkg-config to find arrow on the system
_LIBARROW_FOUND="`${PKG_CONFIG} --variable=prefix --silence-errors
${PKG_CONFIG_NAME}`"
echo "*** Trying Arrow C++ found by pkg-config: $_LIBARROW_FOUND"
- elif brew --prefix ${PKG_BREW_NAME} > /dev/null 2>&1; then
- # 3. On macOS, look for Homebrew apache-arrow
- # (note that if you have pkg-config, homebrew arrow may have already
been found)
- _LIBARROW_FOUND=`brew --prefix ${PKG_BREW_NAME}`
- echo "*** Trying Arrow C++ found by Homebrew: ${_LIBARROW_FOUND}"
- export
PKG_CONFIG_PATH="${_LIBARROW_FOUND}/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}"
else
_LIBARROW_FOUND="false"
fi
@@ -247,34 +231,6 @@ do_bundled_build () {
fi
}
-do_autobrew () {
- echo "*** Downloading ${PKG_BREW_NAME}"
-
- # Setup for local autobrew testing
- if [ -f "tools/apache-arrow.rb" ]; then
- # If you want to use a local apache-arrow.rb formula, do
- # $ cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow*.rb tools
- # before R CMD build or INSTALL (assuming a local checkout of the
apache/arrow repository).
- # If you have this, you should use the local autobrew script so they match.
- cp tools/autobrew .
- fi
-
- if [ -f "autobrew" ]; then
- echo "**** Using local manifest for ${PKG_BREW_NAME}"
- else
- if ! curl -sfL "https://autobrew.github.io/scripts/$PKG_BREW_NAME" >
autobrew; then
- echo "Failed to download manifest for ${PKG_BREW_NAME}"
- # Fall back to the local copy
- cp tools/autobrew .
- fi
- fi
- if ! . autobrew; then
- echo "Failed to retrieve binary for ${PKG_BREW_NAME}"
- fi
- # autobrew sets `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS`
- # TODO: move PKG_LIBS and PKG_CFLAGS out of autobrew and use set_pkg_vars
-}
-
# Once libarrow is obtained, this function sets `PKG_LIBS`, `PKG_DIRS`, and
`PKG_CFLAGS`
# either from pkg-config or by inferring things about the directory in $1
set_pkg_vars () {
@@ -298,6 +254,11 @@ set_pkg_vars () {
if [ "$ARROW_R_CXXFLAGS" ]; then
PKG_CFLAGS="$PKG_CFLAGS $ARROW_R_CXXFLAGS"
fi
+
+ if [ "$UNAME" = "Darwin" ] && expr $(sw_vers -productVersion) : '10\.13';
then
+ # avoid C++17 availability warnings on macOS < 11
+ PKG_CFLAGS="$PKG_CFLAGS -D_LIBCPP_DISABLE_AVAILABILITY"
+ fi
}
# If we have pkg-config, it will tell us what libarrow needs
diff --git a/r/tools/nixlibs-allowlist.txt b/r/tools/nixlibs-allowlist.txt
index bd9f0c1b2c..9c368e6ed1 100644
--- a/r/tools/nixlibs-allowlist.txt
+++ b/r/tools/nixlibs-allowlist.txt
@@ -2,3 +2,4 @@ ubuntu
centos
redhat
rhel
+darwin
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index 3d908c05ca..60deca05cd 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -28,6 +28,8 @@ if (test_mode && is.na(VERSION)) {
}
dev_version <- package_version(VERSION)[1, 4]
+on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin"
+
# Small dev versions are added for R-only changes during CRAN submission.
if (is.na(dev_version) || dev_version < "100") {
@@ -94,8 +96,10 @@ download_binary <- function(lib) {
}
} else {
if (!quietly) {
- cat(sprintf("*** Downloading libarrow binary failed for version %s
(%s)\n at %s\n",
- VERSION, lib, binary_url))
+ cat(sprintf(
+ "*** Downloading libarrow binary failed for version %s (%s)\n at
%s\n",
+ VERSION, lib, binary_url
+ ))
}
libfile <- NULL
}
@@ -114,6 +118,10 @@ download_binary <- function(lib) {
# * "linux-openssl-1.0" (OpenSSL 1.0)
# * "linux-openssl-1.1" (OpenSSL 1.1)
# * "linux-openssl-3.0" (OpenSSL 3.0)
+# * "macos-amd64-openssl-1.1" (OpenSSL 1.1)
+# * "macos-amd64-openssl-3.0" (OpenSSL 3.0)
+# * "macos-arm64-openssl-1.1" (OpenSSL 1.1)
+# * "macos-arm64-openssl-3.0" (OpenSSL 3.0)
# These string values, along with `NULL`, are the potential return values of
# this function.
identify_binary <- function(lib = Sys.getenv("LIBARROW_BINARY"), info =
distro()) {
@@ -142,7 +150,7 @@ check_allowlist <- function(os, allowed =
"https://raw.githubusercontent.com/apa
# Try a remote allowlist so that we can add/remove without a release
suppressWarnings(readLines(allowed)),
# Fallback to default: allowed only on Ubuntu and CentOS/RHEL
- error = function(e) c("ubuntu", "centos", "redhat", "rhel")
+ error = function(e) c("ubuntu", "centos", "redhat", "rhel", "darwin")
)
# allowlist should contain valid regular expressions (plain strings ok too)
any(grepl(paste(allowlist, collapse = "|"), os))
@@ -151,14 +159,16 @@ check_allowlist <- function(os, allowed =
"https://raw.githubusercontent.com/apa
select_binary <- function(os = tolower(Sys.info()[["sysname"]]),
arch = tolower(Sys.info()[["machine"]]),
test_program = test_for_curl_and_openssl) {
- if (identical(os, "linux") && identical(arch, "x86_64")) {
- # We only host x86 linux binaries today
- tryCatch(
+ if (identical(os, "darwin") || (identical(os, "linux") && identical(arch,
"x86_64"))) {
+ # We only host x86 linux binaries and x86 & arm64 macos today
+ binary <- tryCatch(
# Somehow the test program system2 call errors on the sanitizer builds
# so globally handle the possibility that this could fail
{
errs <- compile_test_program(test_program)
- determine_binary_from_stderr(errs)
+ openssl_version <- determine_binary_from_stderr(errs)
+ arch <- ifelse(identical(os, "darwin"), paste0("-", arch, "-"), "-")
+ ifelse(is.null(openssl_version), NULL, paste0(os, arch,
openssl_version))
},
error = function(e) {
cat("*** Unable to find libcurl and openssl\n")
@@ -168,17 +178,20 @@ select_binary <- function(os =
tolower(Sys.info()[["sysname"]]),
} else {
# No binary available for arch
cat(sprintf("*** Building on %s %s\n", os, arch))
- NULL
+ binary <- NULL
}
+ return(binary)
}
# This tests that curl and OpenSSL are present (bc we can include their
headers)
# and it checks for other versions/features and raises errors that we grep for
test_for_curl_and_openssl <- "
+#ifndef __APPLE__
#include <ciso646>
#ifdef _LIBCPP_VERSION
#error Using libc++
#endif
+#endif
#include <curl/curl.h>
#include <openssl/opensslv.h>
@@ -194,11 +207,14 @@ test_for_curl_and_openssl <- "
"
compile_test_program <- function(code) {
- # Note: if we wanted to check for openssl on macOS, we'd have to set the brew
- # path as a -I directory. But since we (currently) only run this code to
- # determine whether we can download a Linux binary, it's not relevant.
+ openssl_dir <- ""
+ if (on_macos) {
+ openssl_root_dir <- get_macos_openssl_dir()
+ openssl_dir <- paste0("-I", openssl_root_dir, "/include")
+ }
runner <- paste(
R_CMD_config("CXX17"),
+ openssl_dir,
R_CMD_config("CPPFLAGS"),
R_CMD_config("CXX17FLAGS"),
R_CMD_config("CXX17STD"),
@@ -208,17 +224,34 @@ compile_test_program <- function(code) {
suppressWarnings(system2("echo", sprintf('"%s" | %s -', code, runner),
stdout = FALSE, stderr = TRUE))
}
+get_macos_openssl_dir <- function() {
+ openssl_root_dir <- Sys.getenv("OPENSSL_ROOT_DIR", NA)
+ header <- "openssl/opensslv.h"
+ if (is.na(openssl_root_dir) || !file.exists(file.path(openssl_root_dir,
"include", header))) {
+ # try to guess default openssl include dir based on CRAN's build script
+ # https://github.com/R-macos/recipes/blob/master/build.sh#L35
+ if (identical(Sys.info()["machine"], "arm64") &&
file.exists(file.path("/opt/R/arm64/include", header))) {
+ openssl_root_dir <- "/opt/R/arm64"
+ } else if (identical(Sys.info()["machine"], "x86_64") &&
file.exists(file.path("/opt/R/x86_64/include", header))) {
+ openssl_root_dir <- "/opt/R/x86_64"
+ } else {
+ openssl_root_dir <- "/usr/local"
+ }
+ }
+ return(openssl_root_dir)
+}
+
# (built with newer devtoolset but older glibc (2.17) for broader
compatibility,# like manylinux2014)
determine_binary_from_stderr <- function(errs) {
if (is.null(attr(errs, "status"))) {
# There was no error in compiling: so we found libcurl and OpenSSL >= 1.1,
# openssl is < 3.0
cat("*** Found libcurl and OpenSSL >= 1.1\n")
- return("linux-openssl-1.1")
+ return("openssl-1.1")
# Else, check for dealbreakers:
- } else if (any(grepl("Using libc++", errs, fixed = TRUE))) {
- # Our binaries are all built with GNU stdlib so they fail with libc++
- cat("*** Found libc++\n")
+ } else if (!on_macos && any(grepl("Using libc++", errs, fixed = TRUE))) {
+ # Our linux binaries are all built with GNU stdlib so they fail with libc++
+ cat("*** Linux binaries incompatible with libc++\n")
return(NULL)
} else if (header_not_found("curl/curl", errs)) {
cat("*** libcurl not found\n")
@@ -231,11 +264,15 @@ determine_binary_from_stderr <- function(errs) {
return(NULL)
# Else, determine which other binary will work
} else if (any(grepl("Using OpenSSL version 1.0", errs))) {
+ if (on_macos) {
+ cat("*** OpenSSL 1.0 is not supported on macOS\n")
+ return(NULL)
+ }
cat("*** Found libcurl and OpenSSL < 1.1\n")
- return("linux-openssl-1.0")
+ return("openssl-1.0")
} else if (any(grepl("Using OpenSSL version 3", errs))) {
cat("*** Found libcurl and OpenSSL >= 3.0.0\n")
- return("linux-openssl-3.0")
+ return("openssl-3.0")
}
NULL
}
@@ -248,6 +285,11 @@ header_not_found <- function(header, errs) {
#### start distro ####
distro <- function() {
+ # This is not part of distro but needed to enable prebuilt binaries on macos
+ if (on_macos) {
+ return(list(id = "darwin", arch = tolower(Sys.info()[["machine"]])))
+ }
+
# The code in this script is a (potentially stale) copy of the distro package
if (requireNamespace("distro", quietly = TRUE)) {
# Use the version from the package, which may be updated from this
@@ -487,8 +529,10 @@ build_libarrow <- function(src_dir, dst_dir) {
# It failed :(
cat("**** Error building Arrow C++.", "\n")
if (quietly) {
- cat("**** Printing contents of build log because the build failed",
- "while ARROW_R_DEV was set to FALSE\n")
+ cat(
+ "**** Printing contents of build log because the build failed",
+ "while ARROW_R_DEV was set to FALSE\n"
+ )
cat(readLines(build_log_path), sep = "\n")
cat("**** Complete build log may still be present at", build_log_path,
"\n")
}
@@ -503,7 +547,7 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") {
# If not found, download it
cat("**** cmake\n")
CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.26.4")
- if (tolower(Sys.info()[["sysname"]]) %in% "darwin") {
+ if (on_macos) {
postfix <- "-macos-universal.tar.gz"
} else if (tolower(Sys.info()[["machine"]]) %in% c("arm64", "aarch64")) {
postfix <- "-linux-aarch64.tar.gz"
diff --git a/r/tools/test-nixlibs.R b/r/tools/test-nixlibs.R
index e099dcdad4..c9571b58b7 100644
--- a/r/tools/test-nixlibs.R
+++ b/r/tools/test-nixlibs.R
@@ -21,8 +21,10 @@
# Flag so that we just load the functions and don't evaluate them like we do
# when called from configure.R
TESTING <- TRUE
-
-source("nixlibs.R", local = TRUE)
+# The functions use `on_macos` from the env they were sourced in, so we need
tool
+# explicitly set it in that environment.
+nixlibs_env <- environment()
+source("nixlibs.R", local = nixlibs_env)
test_that("identify_binary() based on LIBARROW_BINARY", {
expect_null(identify_binary("FALSE"))
@@ -31,10 +33,6 @@ test_that("identify_binary() based on LIBARROW_BINARY", {
})
test_that("select_binary() based on system", {
- expect_output(
- expect_null(select_binary("darwin", "x86_64")), # Not built today
- "Building on darwin x86_64"
- )
expect_output(
expect_null(select_binary("linux", arch = "aarch64")), # Not built today
"Building on linux aarch64"
@@ -52,21 +50,30 @@ test_that("determine_binary_from_stderr", {
expect_output(
expect_identical(
determine_binary_from_stderr(compile_test_program("int a;")),
- "linux-openssl-1.1"
+ "openssl-1.1"
),
"Found libcurl and OpenSSL >= 1.1"
)
+
+ nixlibs_env$on_macos <- FALSE
expect_output(
expect_identical(
determine_binary_from_stderr(compile_test_program("#error Using OpenSSL
version 1.0")),
- "linux-openssl-1.0"
+ "openssl-1.0"
),
"Found libcurl and OpenSSL < 1.1"
)
+ nixlibs_env$on_macos <- TRUE
+ expect_output(
+ expect_null(
+ determine_binary_from_stderr(compile_test_program("#error Using OpenSSL
version 1.0"))
+ ),
+ "OpenSSL 1.0 is not supported on macOS"
+ )
expect_output(
expect_identical(
determine_binary_from_stderr(compile_test_program("#error Using OpenSSL
version 3")),
- "linux-openssl-3.0"
+ "openssl-3.0"
),
"Found libcurl and OpenSSL >= 3.0.0"
)
@@ -79,6 +86,7 @@ test_that("determine_binary_from_stderr", {
})
test_that("select_binary() with test program", {
+ nixlibs_env$on_macos <- FALSE
expect_output(
expect_identical(
select_binary("linux", "x86_64", "int a;"),
@@ -100,13 +108,49 @@ test_that("select_binary() with test program", {
),
"Found libcurl and OpenSSL >= 3.0.0"
)
+ nixlibs_env$on_macos <- TRUE
+ expect_output(
+ expect_identical(
+ select_binary("darwin", "x86_64", "int a;"),
+ "darwin-x86_64-openssl-1.1"
+ ),
+ "Found libcurl and OpenSSL >= 1.1"
+ )
+ expect_output(
+ expect_identical(
+ select_binary("darwin", "x86_64", "#error Using OpenSSL version 3"),
+ "darwin-x86_64-openssl-3.0"
+ ),
+ "Found libcurl and OpenSSL >= 3.0.0"
+ )
+ expect_output(
+ expect_identical(
+ select_binary("darwin", "arm64", "int a;"),
+ "darwin-arm64-openssl-1.1"
+ ),
+ "Found libcurl and OpenSSL >= 1.1"
+ )
+ expect_output(
+ expect_identical(
+ select_binary("darwin", "arm64", "#error Using OpenSSL version 3"),
+ "darwin-arm64-openssl-3.0"
+ ),
+ "Found libcurl and OpenSSL >= 3.0.0"
+ )
+ expect_output(
+ expect_null(
+ select_binary("darwin", "x86_64", "#error Using OpenSSL version 1.0")
+ ),
+ "OpenSSL 1.0 is not supported on macOS"
+ )
})
test_that("check_allowlist", {
tf <- tempfile()
- cat("tu$\n^cent\n", file = tf)
+ cat("tu$\n^cent\n^dar\n", file = tf)
expect_true(check_allowlist("ubuntu", tf))
expect_true(check_allowlist("centos", tf))
+ expect_true(check_allowlist("darwin", tf))
expect_false(check_allowlist("redhat", tf)) # remote allowlist doesn't have
this
expect_true(check_allowlist("redhat", tempfile())) # remote allowlist
doesn't exist, so we fall back to the default list, which contains redhat
expect_false(check_allowlist("debian", tempfile()))