This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new dd8734dc8f GH-37923: [R] Move macOS build system to nixlibs.R (#37684)
dd8734dc8f is described below

commit dd8734dc8ff21e7f8463b239e800aeac0ddcb764
Author: Jacob Wujciak-Jens <[email protected]>
AuthorDate: Thu Oct 5 14:48:44 2023 +0200

    GH-37923: [R] Move macOS build system to nixlibs.R (#37684)
    
    This PR modifies the build system of the R package to no longer rely on 
auto/homebrew. Instead this PR adds the infrastructure and code paths to use 
the same type of  pre-compiled libarrow binaries as we use for Linux. The main 
difference is the use of the binaries even on CRAN (as we previously also used 
binaries in form of brew bottles).
    
    The addition of the new artifacts to tasks.yml should ensure that they get 
uploaded to the nightly repo as well as to the artifactory during the release 
(@ kou please confirm).
    
    A summary of the changes in this PR:
    - update `r/configure` and `r/tools/nixlibs.R` to enable the source build 
on macOS and usage of precompiled binaries using the existing mechanism to test 
compile a program to detect the exisitng openssl version
    - added tests for the changes in nixlibs.R
    - update the binary allow-list
    - Add the build jobs for libarrow binaries for arm64 and x86_64 macos with 
openssl 1.1 and 3.0 to the `r-binary-packages` job
    - Use the binaries to build the nightly packages
    - bump snappy version to 1.1.10 (and patch it on 10.13) due to build issues 
with the current version. This also touches on a number of issues in regards to 
a sanitizer issue we have had for a long time: #32562 #31766
    - Disable the centos binary test step: #37922
    
    Follow up issues:
    - #37921
    - #37941
    - #37945
    * Closes: #37923
    
    Lead-authored-by: Jacob Wujciak-Jens <[email protected]>
    Co-authored-by: Jonathan Keane <[email protected]>
    Co-authored-by: Sutou Kouhei <[email protected]>
    Signed-off-by: Dewey Dunnington <[email protected]>
---
 cpp/Brewfile                                |  6 +++
 cpp/cmake_modules/SetupCxxFlags.cmake       | 15 ++++--
 cpp/cmake_modules/ThirdpartyToolchain.cmake | 21 ++++++++
 cpp/cmake_modules/snappy.diff               | 12 +++++
 cpp/thirdparty/versions.txt                 |  5 +-
 dev/release/rat_exclude_files.txt           |  1 +
 dev/tasks/macros.jinja                      | 14 ++++-
 dev/tasks/r/github.packages.yml             | 83 +++++++++++++++++++++++-----
 dev/tasks/tasks.yml                         |  4 ++
 r/configure                                 | 55 +++----------------
 r/tools/nixlibs-allowlist.txt               |  1 +
 r/tools/nixlibs.R                           | 84 ++++++++++++++++++++++-------
 r/tools/test-nixlibs.R                      | 64 ++++++++++++++++++----
 13 files changed, 268 insertions(+), 97 deletions(-)

diff --git a/cpp/Brewfile b/cpp/Brewfile
index 58015d2121..0f55279853 100644
--- a/cpp/Brewfile
+++ b/cpp/Brewfile
@@ -19,7 +19,9 @@ brew "aws-sdk-cpp"
 brew "bash"
 brew "boost"
 brew "brotli"
+brew "bzip2"
 brew "c-ares"
+brew "curl"
 brew "ccache"
 brew "cmake"
 brew "flatbuffers"
@@ -29,14 +31,18 @@ brew "googletest"
 brew "grpc"
 brew "llvm@14"
 brew "lz4"
+brew "mimalloc"
 brew "ninja"
 brew "node"
 brew "openssl@3"
+brew "pkg-config"
 brew "protobuf"
 brew "python"
 brew "rapidjson"
+brew "re2"
 brew "snappy"
 brew "thrift"
+brew "utf8proc"
 brew "wget"
 brew "xsimd"
 brew "zstd"
diff --git a/cpp/cmake_modules/SetupCxxFlags.cmake 
b/cpp/cmake_modules/SetupCxxFlags.cmake
index 5531415ac2..e90c00612c 100644
--- a/cpp/cmake_modules/SetupCxxFlags.cmake
+++ b/cpp/cmake_modules/SetupCxxFlags.cmake
@@ -456,11 +456,18 @@ elseif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang" OR 
CMAKE_CXX_COMPILER_ID STRE
   # Don't complain about optimization passes that were not possible
   set(CXX_COMMON_FLAGS "${CXX_COMMON_FLAGS} -Wno-pass-failed")
 
-  # Avoid clang / libc++ error about C++17 aligned allocation on macOS.
-  # See 
https://chromium.googlesource.com/chromium/src/+/eee44569858fc650b635779c4e34be5cb0c73186%5E%21/#F0
-  # for details.
   if(APPLE)
-    set(CXX_ONLY_FLAGS "${CXX_ONLY_FLAGS} -fno-aligned-new")
+    # Avoid clang / libc++ error about C++17 aligned allocation on macOS.
+    # See 
https://chromium.googlesource.com/chromium/src/+/eee44569858fc650b635779c4e34be5cb0c73186%5E%21/#F0
+    # for details.
+    string(APPEND CXX_ONLY_FLAGS " -fno-aligned-new")
+
+    if(CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
+      # Avoid C++17 std::get 'not available' issue on macOS 10.13
+      # This will be required until atleast R 4.4 is released and
+      # CRAN (hopefully) stops checking on 10.13
+      string(APPEND CXX_ONLY_FLAGS " -D_LIBCPP_DISABLE_AVAILABILITY")
+    endif()
   endif()
 endif()
 
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake 
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index 85c0337d10..6d6a2bf775 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -1308,6 +1308,26 @@ macro(build_snappy)
   set(SNAPPY_CMAKE_ARGS
       ${EP_COMMON_CMAKE_ARGS} -DSNAPPY_BUILD_TESTS=OFF 
-DSNAPPY_BUILD_BENCHMARKS=OFF
       "-DCMAKE_INSTALL_PREFIX=${SNAPPY_PREFIX}")
+  # Snappy unconditionaly enables Werror when building with clang this can lead
+  # to build failues by way of new compiler warnings. This adds a flag to 
disable
+  # Werror to the very end of the invocation to override the snappy internal 
setting.
+  if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+    foreach(CONFIG DEBUG MINSIZEREL RELEASE RELWITHDEBINFO)
+      list(APPEND
+           SNAPPY_CMAKE_ARGS
+           
"-DCMAKE_CXX_FLAGS_${UPPERCASE_BUILD_TYPE}=${EP_CXX_FLAGS_${CONFIG}} -Wno-error"
+      )
+    endforeach()
+  endif()
+
+  if(APPLE AND CMAKE_HOST_SYSTEM_VERSION VERSION_LESS 20)
+    # On macOS 10.13 we need to explicitly add <functional> to avoid a missing 
include error
+    # This can be removed once CRAN no longer checks on macOS 10.13
+    find_program(PATCH patch REQUIRED)
+    set(SNAPPY_PATCH_COMMAND ${PATCH} -p1 -i 
${CMAKE_CURRENT_LIST_DIR}/snappy.diff)
+  else()
+    set(SNAPPY_PATCH_COMMAND)
+  endif()
 
   externalproject_add(snappy_ep
                       ${EP_COMMON_OPTIONS}
@@ -1315,6 +1335,7 @@ macro(build_snappy)
                       INSTALL_DIR ${SNAPPY_PREFIX}
                       URL ${SNAPPY_SOURCE_URL}
                       URL_HASH "SHA256=${ARROW_SNAPPY_BUILD_SHA256_CHECKSUM}"
+                      PATCH_COMMAND ${SNAPPY_PATCH_COMMAND}
                       CMAKE_ARGS ${SNAPPY_CMAKE_ARGS}
                       BUILD_BYPRODUCTS "${SNAPPY_STATIC_LIB}")
 
diff --git a/cpp/cmake_modules/snappy.diff b/cpp/cmake_modules/snappy.diff
new file mode 100644
index 0000000000..f86e2bb197
--- /dev/null
+++ b/cpp/cmake_modules/snappy.diff
@@ -0,0 +1,12 @@
+diff --git a/snappy.cc b/snappy.cc
+index d414718..5b0d0d6 100644
+--- a/snappy.cc
++++ b/snappy.cc
+@@ -83,6 +83,7 @@
+ #include <string>
+ #include <utility>
+ #include <vector>
++#include <functional>
+ 
+ namespace snappy {
+ 
diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt
index 52d302592b..56163f35d2 100644
--- a/cpp/thirdparty/versions.txt
+++ b/cpp/thirdparty/versions.txt
@@ -101,9 +101,8 @@ 
ARROW_RAPIDJSON_BUILD_VERSION=232389d4f1012dddec4ef84861face2d2ba85709
 
ARROW_RAPIDJSON_BUILD_SHA256_CHECKSUM=b9290a9a6d444c8e049bd589ab804e0ccf2b05dc5984a19ed5ae75d090064806
 ARROW_RE2_BUILD_VERSION=2022-06-01
 
ARROW_RE2_BUILD_SHA256_CHECKSUM=f89c61410a072e5cbcf8c27e3a778da7d6fd2f2b5b1445cd4f4508bee946ab0f
-# 1.1.9 is patched to implement https://github.com/google/snappy/pull/148 if 
this is bumped, remove the patch
-ARROW_SNAPPY_BUILD_VERSION=1.1.9
-ARROW_SNAPPY_BUILD_SHA256_CHECKSUM=75c1fbb3d618dd3a0483bff0e26d0a92b495bbe5059c8b4f1c962b478b6e06e7
+ARROW_SNAPPY_BUILD_VERSION=1.1.10
+ARROW_SNAPPY_BUILD_SHA256_CHECKSUM=49d831bffcc5f3d01482340fe5af59852ca2fe76c3e05df0e67203ebbe0f1d90
 ARROW_SUBSTRAIT_BUILD_VERSION=v0.27.0
 
ARROW_SUBSTRAIT_BUILD_SHA256_CHECKSUM=4ed375f69d972a57fdc5ec406c17003a111831d8640d3f1733eccd4b3ff45628
 ARROW_S2N_TLS_BUILD_VERSION=v1.3.35
diff --git a/dev/release/rat_exclude_files.txt 
b/dev/release/rat_exclude_files.txt
index aebe321d61..af084ea215 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -24,6 +24,7 @@ cpp/build-support/iwyu/*
 cpp/cmake_modules/FindPythonLibsNew.cmake
 cpp/cmake_modules/SnappyCMakeLists.txt
 cpp/cmake_modules/SnappyConfig.h
+cpp/cmake_modules/snappy.diff
 cpp/examples/parquet/parquet-arrow/cmake_modules/FindArrow.cmake
 cpp/src/parquet/.parquetcppversion
 cpp/src/generated/parquet_constants.cpp
diff --git a/dev/tasks/macros.jinja b/dev/tasks/macros.jinja
index faf77a1168..54f676fd49 100644
--- a/dev/tasks/macros.jinja
+++ b/dev/tasks/macros.jinja
@@ -307,7 +307,8 @@ on:
   stopifnot(packageVersion("arrow") == {{ 
'"${{needs.source.outputs.pkg_version}}"' }})
 {% endmacro %}
 
-{%- macro github_setup_local_r_repo(get_nix, get_win) -%}
+{%- macro github_setup_local_r_repo(get_nix, get_win, get_mac=False) -%}
+# TODO: improve arg handling
   - name: Setup local repo
     shell: bash
     run: mkdir repo
@@ -327,6 +328,17 @@ on:
       path: repo/libarrow/bin/linux-openssl-{{ openssl_version }}
     {% endfor %}
   {% endif %}
+  {% if get_mac %}
+    {% for openssl_version in ["1.1", "3.0"] %}
+      {% for arch in ["x86_64", "arm64"] %}
+  - name: Get macOS {{ arch }} OpenSSL {{ openssl_version }} binary
+    uses: actions/download-artifact@v3
+    with:
+      name: r-lib__libarrow__bin__darwin-{{arch}}-openssl-{{ openssl_version }}
+      path: repo/libarrow/bin/darwin-{{ arch }}-openssl-{{ openssl_version }}
+      {% endfor %}
+    {% endfor %}
+  {% endif %}
   - name: Get src pkg
     uses: actions/download-artifact@v3
     with:
diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml
index dbe21ffb6b..760e3b6da4 100644
--- a/dev/tasks/r/github.packages.yml
+++ b/dev/tasks/r/github.packages.yml
@@ -56,6 +56,59 @@ jobs:
           name: r-pkg__src__contrib
           path: arrow/r/arrow_*.tar.gz
 
+  macos-cpp:
+    name: C++ Binary macOS OpenSSL {{ '${{ matrix.openssl }}' }} {{ '${{ 
matrix.platform.arch }}' }}
+
+    runs-on: {{ '${{ matrix.platform.runs_on }}' }}
+
+    needs: source
+    strategy:
+      fail-fast: false
+      matrix:
+        platform:
+          - { runs_on: ["self-hosted", "macos-10.13"], arch: "x86_64" }
+
+          - { runs_on: ["self-hosted", "macOS", "arm64", "devops-managed"], 
arch: "arm64" }
+        openssl: ['3.0', '1.1']
+
+    steps:
+      {{ macros.github_checkout_arrow(action_v="3")|indent }}
+      {{ macros.github_change_r_pkg_version(is_fork, '${{ 
needs.source.outputs.pkg_version }}')|indent }}
+      - name: Install Deps
+        if: {{ "${{ !contains(matrix.platform.runs_on, 'macos-10.13') }}" }}
+        run: |
+          brew install sccache ninja
+          brew install openssl@{{ '${{ matrix.openssl }}' }}
+      - name: Build libarrow
+        shell: bash
+        env:
+        {{ macros.github_set_sccache_envvars()|indent(8) }}
+          MACOSX_DEPLOYMENT_TARGET: "10.13"
+          ARROW_S3: ON
+          ARROW_GCS: ON
+          ARROW_DEPENDENCY_SOURCE: BUNDLED
+          CMAKE_GENERATOR: Ninja
+          LIBARROW_MINIMAL: false
+        run: |
+          sccache --start-server
+          export EXTRA_CMAKE_FLAGS="-DOPENSSL_ROOT_DIR=$(brew --prefix 
openssl@{{ '${{ matrix.openssl }}' }})"
+          cd arrow
+          r/inst/build_arrow_static.sh
+      - name: Bundle libarrow
+        shell: bash
+        env:
+          PKG_FILE: arrow-{{ '${{ needs.source.outputs.pkg_version }}' }}.zip
+          VERSION: {{ '${{ needs.source.outputs.pkg_version }}' }}
+        run: |
+          cd arrow/r/libarrow/dist
+          zip -r $PKG_FILE lib/ include/
+
+      - name: Upload binary artifact
+        uses: actions/upload-artifact@v3
+        with:
+          name: r-lib__libarrow__bin__darwin-{{ '${{ matrix.platform.arch }}' 
}}-openssl-{{ '${{ matrix.openssl }}' }}
+          path: arrow/r/libarrow/dist/arrow-*.zip
+
   linux-cpp:
     name: C++ Binary Linux OpenSSL {{ '${{ matrix.openssl }}' }}
     runs-on: ubuntu-latest
@@ -135,7 +188,7 @@ jobs:
           path: build/arrow-*.zip
 
   r-packages:
-    needs: [source, windows-cpp]
+    needs: [source, windows-cpp, macos-cpp]
     name: {{ '${{ matrix.platform.name }} ${{ matrix.r_version.r }}' }}
     runs-on: {{ '${{ matrix.platform.runs_on }}' }}
     strategy:
@@ -167,7 +220,7 @@ jobs:
 
           rig system setup-user-lib
           rig system add-pak
-      {{ macros.github_setup_local_r_repo(false, true)|indent }}
+      {{ macros.github_setup_local_r_repo(false, true, true)|indent }}
       - name: Prepare Dependency Installation
 
         shell: bash
@@ -178,18 +231,19 @@ jobs:
         with:
            working-directory: 'arrow'
            extra-packages: cpp11
-      - name: Install sccache
-        if: startsWith(matrix.platform, 'macos')
-        run: brew install sccache
+      - name: Set CRAN like openssl
+        if: contains(matrix.platform.runs_on, 'arm64')
+        run: |
+          # The arm64 runners contain openssl 1.1.1t in this path that is 
always included first so we need to override the
+          # default setting of the brew --prefix as root dir to avoid version 
conflicts.
+          echo "OPENSSL_ROOT_DIR=/opt/R/arm64" >> $GITHUB_ENV
       - name: Build Binary
         id: build
         shell: Rscript {0}
         env:
-          NOT_CRAN: "true" # actions/setup-r sets this implicitly
+          NOT_CRAN: "false" # actions/setup-r sets this implicitly
           ARROW_R_DEV: "true"
-          FORCE_AUTOBREW: "true" # this is ignored on windows
-          # sccache for macos
-        {{ macros.github_set_sccache_envvars()|indent(8) }}
+          LIBARROW_BINARY: "true" # has to be set as long as allowlist not 
updated
         run: |
           on_windows <- tolower(Sys.info()[["sysname"]]) == "windows"
 
@@ -213,8 +267,10 @@ jobs:
             INSTALL_opts = INSTALL_opts
           )
 
+
           # Test
           library(arrow)
+          arrow_info()
           read_parquet(system.file("v0.7.1.parquet", package = "arrow"))
 
           # encode contrib.url for artifact name
@@ -233,7 +289,6 @@ jobs:
         with:
           name: r-pkg{{ '${{ steps.build.outputs.path }}' }}
           path: arrow_*
-
   test-linux-binary:
     needs: [source, linux-cpp]
     name: Test binary {{ '${{ matrix.config.image }}' }}
@@ -291,7 +346,10 @@ jobs:
         with:
           name: r-pkg_centos7
           path: arrow_*
+
   test-centos-binary:
+    # arrow binary package not on ppm currently see #37922
+    if: false
     needs: test-linux-binary
     runs-on: ubuntu-latest
     container: "rstudio/r-base:4.2-centos7"
@@ -317,7 +375,8 @@ jobs:
           read_parquet(system.file("v0.7.1.parquet", package = "arrow"))
           print(arrow_info())
 
-  test-source:
+  #TODO test macos source build?
+  test-linux-source:
     needs: source
     name: Test linux source build
     runs-on: ubuntu-latest
@@ -367,7 +426,7 @@ jobs:
 
   upload-binaries:
     # Only upload binaries if all tests pass.
-    needs: [r-packages, test-source, test-linux-binary, test-centos-binary]
+    needs: [r-packages, test-linux-source, test-linux-binary]
     name: Upload artifacts
     runs-on: ubuntu-latest
     steps:
diff --git a/dev/tasks/tasks.yml b/dev/tasks/tasks.yml
index 859ff8ddb5..798932db23 100644
--- a/dev/tasks/tasks.yml
+++ b/dev/tasks/tasks.yml
@@ -994,6 +994,10 @@ tasks:
       - r-lib__libarrow__bin__linux-openssl-1.0__arrow-{no_rc_r_version}\.zip
       - r-lib__libarrow__bin__linux-openssl-1.1__arrow-{no_rc_r_version}\.zip
       - r-lib__libarrow__bin__linux-openssl-3.0__arrow-{no_rc_r_version}\.zip
+      - 
r-lib__libarrow__bin__darwin-arm64-openssl-1.1__arrow-{no_rc_r_version}\.zip
+      - 
r-lib__libarrow__bin__darwin-arm64-openssl-3.0__arrow-{no_rc_r_version}\.zip
+      - 
r-lib__libarrow__bin__darwin-x86_64-openssl-1.1__arrow-{no_rc_r_version}\.zip
+      - 
r-lib__libarrow__bin__darwin-x86_64-openssl-3.0__arrow-{no_rc_r_version}\.zip
       - r-pkg__bin__windows__contrib__4.1__arrow_{no_rc_r_version}\.zip
       - r-pkg__bin__windows__contrib__4.2__arrow_{no_rc_r_version}\.zip
       - r-pkg__bin__macosx__contrib__4.1__arrow_{no_rc_r_version}\.tgz
diff --git a/r/configure b/r/configure
index 593f60bbdd..d244b1a7c1 100755
--- a/r/configure
+++ b/r/configure
@@ -39,8 +39,8 @@
 #
 # * Installing a released version from source, as from CRAN, with
 #   no other prior setup
-#   * On macOS, autobrew is used to retrieve libarrow and dependencies
-#   * On Linux, the nixlibs.R build script will download or build
+#   * On macOS and Linux, the nixlibs.R build script will download
+#     or build libarrow and dependencies
 # * Installing a released version but first installing libarrow.
 #   It will use pkg-config and brew to search for libraries.
 # * Installing a development version from source as a user.
@@ -65,8 +65,6 @@ PKG_TEST_HEADER="<arrow/api.h>"
 # Some env vars that control the build (all logical, case insensitive)
 # Development mode, also increases verbosity in the bundled build
 ARROW_R_DEV=`echo $ARROW_R_DEV | tr '[:upper:]' '[:lower:]'`
-# autobrew is how mac binaries are built on CRAN; FORCE ensures we use it here
-FORCE_AUTOBREW=`echo $FORCE_AUTOBREW | tr '[:upper:]' '[:lower:]'`
 # The bundled build compiles arrow C++ from source; FORCE ensures we don't 
pick up
 # any other packages that may be found on the system
 FORCE_BUNDLED_BUILD=`echo $FORCE_BUNDLED_BUILD | tr '[:upper:]' '[:lower:]'`
@@ -141,19 +139,11 @@ fi
 find_or_build_libarrow () {
   if [ "$FORCE_BUNDLED_BUILD" = "true" ]; then
     do_bundled_build
-  elif [ "$FORCE_AUTOBREW" = "true" ]; then
-    do_autobrew
   else
     find_arrow
     if [ "$_LIBARROW_FOUND" = "false" ]; then
       # If we haven't found a suitable version of libarrow, build it
-      if [ "$UNAME" = "Darwin" ] && ! echo $VERSION | grep -q "000"; then
-        # Only autobrew on release version (for testing, use FORCE_AUTOBREW 
above)
-        # (dev versions end in .9000, and nightly gets something like 
.10000xxx)
-        do_autobrew
-      else
-        do_bundled_build
-      fi
+      do_bundled_build
     fi
   fi
 }
@@ -175,12 +165,6 @@ find_arrow () {
     # 2. Use pkg-config to find arrow on the system
     _LIBARROW_FOUND="`${PKG_CONFIG} --variable=prefix --silence-errors 
${PKG_CONFIG_NAME}`"
     echo "*** Trying Arrow C++ found by pkg-config: $_LIBARROW_FOUND"
-  elif brew --prefix ${PKG_BREW_NAME} > /dev/null 2>&1; then
-    # 3. On macOS, look for Homebrew apache-arrow
-    #    (note that if you have pkg-config, homebrew arrow may have already 
been found)
-    _LIBARROW_FOUND=`brew --prefix ${PKG_BREW_NAME}`
-    echo "*** Trying Arrow C++ found by Homebrew: ${_LIBARROW_FOUND}"
-    export 
PKG_CONFIG_PATH="${_LIBARROW_FOUND}/lib/pkgconfig${PKG_CONFIG_PATH:+:${PKG_CONFIG_PATH}}"
   else
     _LIBARROW_FOUND="false"
   fi
@@ -247,34 +231,6 @@ do_bundled_build () {
   fi
 }
 
-do_autobrew () {
-  echo "*** Downloading ${PKG_BREW_NAME}"
-
-  # Setup for local autobrew testing
-  if [ -f "tools/apache-arrow.rb" ]; then
-    # If you want to use a local apache-arrow.rb formula, do
-    # $ cp ../dev/tasks/homebrew-formulae/autobrew/apache-arrow*.rb tools
-    # before R CMD build or INSTALL (assuming a local checkout of the 
apache/arrow repository).
-    # If you have this, you should use the local autobrew script so they match.
-    cp tools/autobrew .
-  fi
-
-  if [ -f "autobrew" ]; then
-    echo "**** Using local manifest for ${PKG_BREW_NAME}"
-  else
-    if ! curl -sfL "https://autobrew.github.io/scripts/$PKG_BREW_NAME"; > 
autobrew; then
-      echo "Failed to download manifest for ${PKG_BREW_NAME}"
-      # Fall back to the local copy
-      cp tools/autobrew .
-    fi
-  fi
-  if ! . autobrew; then
-    echo "Failed to retrieve binary for ${PKG_BREW_NAME}"
-  fi
-  # autobrew sets `PKG_LIBS`, `PKG_DIRS`, and `PKG_CFLAGS`
-  # TODO: move PKG_LIBS and PKG_CFLAGS out of autobrew and use set_pkg_vars
-}
-
 # Once libarrow is obtained, this function sets `PKG_LIBS`, `PKG_DIRS`, and 
`PKG_CFLAGS`
 # either from pkg-config or by inferring things about the directory in $1
 set_pkg_vars () {
@@ -298,6 +254,11 @@ set_pkg_vars () {
   if [ "$ARROW_R_CXXFLAGS" ]; then
     PKG_CFLAGS="$PKG_CFLAGS $ARROW_R_CXXFLAGS"
   fi
+
+  if [ "$UNAME" = "Darwin" ] && expr $(sw_vers -productVersion) : '10\.13'; 
then
+    # avoid C++17 availability warnings on macOS < 11
+    PKG_CFLAGS="$PKG_CFLAGS -D_LIBCPP_DISABLE_AVAILABILITY"
+  fi
 }
 
 # If we have pkg-config, it will tell us what libarrow needs
diff --git a/r/tools/nixlibs-allowlist.txt b/r/tools/nixlibs-allowlist.txt
index bd9f0c1b2c..9c368e6ed1 100644
--- a/r/tools/nixlibs-allowlist.txt
+++ b/r/tools/nixlibs-allowlist.txt
@@ -2,3 +2,4 @@ ubuntu
 centos
 redhat
 rhel
+darwin
diff --git a/r/tools/nixlibs.R b/r/tools/nixlibs.R
index 3d908c05ca..60deca05cd 100644
--- a/r/tools/nixlibs.R
+++ b/r/tools/nixlibs.R
@@ -28,6 +28,8 @@ if (test_mode && is.na(VERSION)) {
 }
 
 dev_version <- package_version(VERSION)[1, 4]
+on_macos <- tolower(Sys.info()[["sysname"]]) == "darwin"
+
 
 # Small dev versions are added for R-only changes during CRAN submission.
 if (is.na(dev_version) || dev_version < "100") {
@@ -94,8 +96,10 @@ download_binary <- function(lib) {
     }
   } else {
     if (!quietly) {
-      cat(sprintf("*** Downloading libarrow binary failed for version %s 
(%s)\n    at %s\n",
-                  VERSION, lib, binary_url))
+      cat(sprintf(
+        "*** Downloading libarrow binary failed for version %s (%s)\n    at 
%s\n",
+        VERSION, lib, binary_url
+      ))
     }
     libfile <- NULL
   }
@@ -114,6 +118,10 @@ download_binary <- function(lib) {
 #    * "linux-openssl-1.0" (OpenSSL 1.0)
 #    * "linux-openssl-1.1" (OpenSSL 1.1)
 #    * "linux-openssl-3.0" (OpenSSL 3.0)
+#    * "macos-amd64-openssl-1.1" (OpenSSL 1.1)
+#    * "macos-amd64-openssl-3.0" (OpenSSL 3.0)
+#    * "macos-arm64-openssl-1.1" (OpenSSL 1.1)
+#    * "macos-arm64-openssl-3.0" (OpenSSL 3.0)
 #   These string values, along with `NULL`, are the potential return values of
 #   this function.
 identify_binary <- function(lib = Sys.getenv("LIBARROW_BINARY"), info = 
distro()) {
@@ -142,7 +150,7 @@ check_allowlist <- function(os, allowed = 
"https://raw.githubusercontent.com/apa
     # Try a remote allowlist so that we can add/remove without a release
     suppressWarnings(readLines(allowed)),
     # Fallback to default: allowed only on Ubuntu and CentOS/RHEL
-    error = function(e) c("ubuntu", "centos", "redhat", "rhel")
+    error = function(e) c("ubuntu", "centos", "redhat", "rhel", "darwin")
   )
   # allowlist should contain valid regular expressions (plain strings ok too)
   any(grepl(paste(allowlist, collapse = "|"), os))
@@ -151,14 +159,16 @@ check_allowlist <- function(os, allowed = 
"https://raw.githubusercontent.com/apa
 select_binary <- function(os = tolower(Sys.info()[["sysname"]]),
                           arch = tolower(Sys.info()[["machine"]]),
                           test_program = test_for_curl_and_openssl) {
-  if (identical(os, "linux") && identical(arch, "x86_64")) {
-    # We only host x86 linux binaries today
-    tryCatch(
+  if (identical(os, "darwin") || (identical(os, "linux") && identical(arch, 
"x86_64"))) {
+    # We only host x86 linux binaries and x86 & arm64 macos today
+    binary <- tryCatch(
       # Somehow the test program system2 call errors on the sanitizer builds
       # so globally handle the possibility that this could fail
       {
         errs <- compile_test_program(test_program)
-        determine_binary_from_stderr(errs)
+        openssl_version <- determine_binary_from_stderr(errs)
+        arch <- ifelse(identical(os, "darwin"), paste0("-", arch, "-"), "-")
+        ifelse(is.null(openssl_version), NULL, paste0(os, arch, 
openssl_version))
       },
       error = function(e) {
         cat("*** Unable to find libcurl and openssl\n")
@@ -168,17 +178,20 @@ select_binary <- function(os = 
tolower(Sys.info()[["sysname"]]),
   } else {
     # No binary available for arch
     cat(sprintf("*** Building on %s %s\n", os, arch))
-    NULL
+    binary <- NULL
   }
+  return(binary)
 }
 
 # This tests that curl and OpenSSL are present (bc we can include their 
headers)
 # and it checks for other versions/features and raises errors that we grep for
 test_for_curl_and_openssl <- "
+#ifndef __APPLE__
 #include <ciso646>
 #ifdef _LIBCPP_VERSION
 #error Using libc++
 #endif
+#endif
 
 #include <curl/curl.h>
 #include <openssl/opensslv.h>
@@ -194,11 +207,14 @@ test_for_curl_and_openssl <- "
 "
 
 compile_test_program <- function(code) {
-  # Note: if we wanted to check for openssl on macOS, we'd have to set the brew
-  # path as a -I directory. But since we (currently) only run this code to
-  # determine whether we can download a Linux binary, it's not relevant.
+  openssl_dir <- ""
+  if (on_macos) {
+    openssl_root_dir <- get_macos_openssl_dir()
+    openssl_dir <- paste0("-I", openssl_root_dir, "/include")
+  }
   runner <- paste(
     R_CMD_config("CXX17"),
+    openssl_dir,
     R_CMD_config("CPPFLAGS"),
     R_CMD_config("CXX17FLAGS"),
     R_CMD_config("CXX17STD"),
@@ -208,17 +224,34 @@ compile_test_program <- function(code) {
   suppressWarnings(system2("echo", sprintf('"%s" | %s -', code, runner), 
stdout = FALSE, stderr = TRUE))
 }
 
+get_macos_openssl_dir <- function() {
+  openssl_root_dir <- Sys.getenv("OPENSSL_ROOT_DIR", NA)
+  header <- "openssl/opensslv.h"
+  if (is.na(openssl_root_dir) || !file.exists(file.path(openssl_root_dir, 
"include", header))) {
+    # try to guess default openssl include dir based on CRAN's build script
+    # https://github.com/R-macos/recipes/blob/master/build.sh#L35
+    if (identical(Sys.info()["machine"], "arm64") && 
file.exists(file.path("/opt/R/arm64/include", header))) {
+      openssl_root_dir <- "/opt/R/arm64"
+    } else if (identical(Sys.info()["machine"], "x86_64") && 
file.exists(file.path("/opt/R/x86_64/include", header))) {
+      openssl_root_dir <- "/opt/R/x86_64"
+    } else {
+      openssl_root_dir <- "/usr/local"
+    }
+  }
+  return(openssl_root_dir)
+}
+
 # (built with newer devtoolset but older glibc (2.17) for broader 
compatibility,# like manylinux2014)
 determine_binary_from_stderr <- function(errs) {
   if (is.null(attr(errs, "status"))) {
     # There was no error in compiling: so we found libcurl and OpenSSL >= 1.1,
     # openssl is < 3.0
     cat("*** Found libcurl and OpenSSL >= 1.1\n")
-    return("linux-openssl-1.1")
+    return("openssl-1.1")
     # Else, check for dealbreakers:
-  } else if (any(grepl("Using libc++", errs, fixed = TRUE))) {
-    # Our binaries are all built with GNU stdlib so they fail with libc++
-    cat("*** Found libc++\n")
+  } else if (!on_macos && any(grepl("Using libc++", errs, fixed = TRUE))) {
+    # Our linux binaries are all built with GNU stdlib so they fail with libc++
+    cat("*** Linux binaries incompatible with libc++\n")
     return(NULL)
   } else if (header_not_found("curl/curl", errs)) {
     cat("*** libcurl not found\n")
@@ -231,11 +264,15 @@ determine_binary_from_stderr <- function(errs) {
     return(NULL)
     # Else, determine which other binary will work
   } else if (any(grepl("Using OpenSSL version 1.0", errs))) {
+    if (on_macos) {
+      cat("*** OpenSSL 1.0 is not supported on macOS\n")
+      return(NULL)
+    }
     cat("*** Found libcurl and OpenSSL < 1.1\n")
-    return("linux-openssl-1.0")
+    return("openssl-1.0")
   } else if (any(grepl("Using OpenSSL version 3", errs))) {
     cat("*** Found libcurl and OpenSSL >= 3.0.0\n")
-    return("linux-openssl-3.0")
+    return("openssl-3.0")
   }
   NULL
 }
@@ -248,6 +285,11 @@ header_not_found <- function(header, errs) {
 #### start distro ####
 
 distro <- function() {
+  # This is not part of distro but needed to enable prebuilt binaries on macos
+  if (on_macos) {
+    return(list(id = "darwin", arch = tolower(Sys.info()[["machine"]])))
+  }
+
   # The code in this script is a (potentially stale) copy of the distro package
   if (requireNamespace("distro", quietly = TRUE)) {
     # Use the version from the package, which may be updated from this
@@ -487,8 +529,10 @@ build_libarrow <- function(src_dir, dst_dir) {
     # It failed :(
     cat("**** Error building Arrow C++.", "\n")
     if (quietly) {
-      cat("**** Printing contents of build log because the build failed", 
-          "while ARROW_R_DEV was set to FALSE\n")
+      cat(
+        "**** Printing contents of build log because the build failed",
+        "while ARROW_R_DEV was set to FALSE\n"
+      )
       cat(readLines(build_log_path), sep = "\n")
       cat("**** Complete build log may still be present at", build_log_path, 
"\n")
     }
@@ -503,7 +547,7 @@ ensure_cmake <- function(cmake_minimum_required = "3.16") {
     # If not found, download it
     cat("**** cmake\n")
     CMAKE_VERSION <- Sys.getenv("CMAKE_VERSION", "3.26.4")
-    if (tolower(Sys.info()[["sysname"]]) %in% "darwin") {
+    if (on_macos) {
       postfix <- "-macos-universal.tar.gz"
     } else if (tolower(Sys.info()[["machine"]]) %in% c("arm64", "aarch64")) {
       postfix <- "-linux-aarch64.tar.gz"
diff --git a/r/tools/test-nixlibs.R b/r/tools/test-nixlibs.R
index e099dcdad4..c9571b58b7 100644
--- a/r/tools/test-nixlibs.R
+++ b/r/tools/test-nixlibs.R
@@ -21,8 +21,10 @@
 # Flag so that we just load the functions and don't evaluate them like we do
 # when called from configure.R
 TESTING <- TRUE
-
-source("nixlibs.R", local = TRUE)
+# The functions use `on_macos` from the env they were sourced in, so we need 
tool
+# explicitly set it in that environment.
+nixlibs_env <- environment()
+source("nixlibs.R", local = nixlibs_env)
 
 test_that("identify_binary() based on LIBARROW_BINARY", {
   expect_null(identify_binary("FALSE"))
@@ -31,10 +33,6 @@ test_that("identify_binary() based on LIBARROW_BINARY", {
 })
 
 test_that("select_binary() based on system", {
-  expect_output(
-    expect_null(select_binary("darwin", "x86_64")), # Not built today
-    "Building on darwin x86_64"
-  )
   expect_output(
     expect_null(select_binary("linux", arch = "aarch64")), # Not built today
     "Building on linux aarch64"
@@ -52,21 +50,30 @@ test_that("determine_binary_from_stderr", {
   expect_output(
     expect_identical(
       determine_binary_from_stderr(compile_test_program("int a;")),
-      "linux-openssl-1.1"
+      "openssl-1.1"
     ),
     "Found libcurl and OpenSSL >= 1.1"
   )
+
+  nixlibs_env$on_macos <- FALSE
   expect_output(
     expect_identical(
       determine_binary_from_stderr(compile_test_program("#error Using OpenSSL 
version 1.0")),
-      "linux-openssl-1.0"
+      "openssl-1.0"
     ),
     "Found libcurl and OpenSSL < 1.1"
   )
+  nixlibs_env$on_macos <- TRUE
+  expect_output(
+    expect_null(
+      determine_binary_from_stderr(compile_test_program("#error Using OpenSSL 
version 1.0"))
+    ),
+    "OpenSSL 1.0 is not supported on macOS"
+  )
   expect_output(
     expect_identical(
       determine_binary_from_stderr(compile_test_program("#error Using OpenSSL 
version 3")),
-      "linux-openssl-3.0"
+      "openssl-3.0"
     ),
     "Found libcurl and OpenSSL >= 3.0.0"
   )
@@ -79,6 +86,7 @@ test_that("determine_binary_from_stderr", {
 })
 
 test_that("select_binary() with test program", {
+  nixlibs_env$on_macos <- FALSE
   expect_output(
     expect_identical(
       select_binary("linux", "x86_64", "int a;"),
@@ -100,13 +108,49 @@ test_that("select_binary() with test program", {
     ),
     "Found libcurl and OpenSSL >= 3.0.0"
   )
+  nixlibs_env$on_macos <- TRUE
+  expect_output(
+    expect_identical(
+      select_binary("darwin", "x86_64", "int a;"),
+      "darwin-x86_64-openssl-1.1"
+    ),
+    "Found libcurl and OpenSSL >= 1.1"
+  )
+  expect_output(
+    expect_identical(
+      select_binary("darwin", "x86_64", "#error Using OpenSSL version 3"),
+      "darwin-x86_64-openssl-3.0"
+    ),
+    "Found libcurl and OpenSSL >= 3.0.0"
+  )
+  expect_output(
+    expect_identical(
+      select_binary("darwin", "arm64", "int a;"),
+      "darwin-arm64-openssl-1.1"
+    ),
+    "Found libcurl and OpenSSL >= 1.1"
+  )
+  expect_output(
+    expect_identical(
+      select_binary("darwin", "arm64", "#error Using OpenSSL version 3"),
+      "darwin-arm64-openssl-3.0"
+    ),
+    "Found libcurl and OpenSSL >= 3.0.0"
+  )
+  expect_output(
+    expect_null(
+      select_binary("darwin", "x86_64", "#error Using OpenSSL version 1.0")
+    ),
+    "OpenSSL 1.0 is not supported on macOS"
+  )
 })
 
 test_that("check_allowlist", {
   tf <- tempfile()
-  cat("tu$\n^cent\n", file = tf)
+  cat("tu$\n^cent\n^dar\n", file = tf)
   expect_true(check_allowlist("ubuntu", tf))
   expect_true(check_allowlist("centos", tf))
+  expect_true(check_allowlist("darwin", tf))
   expect_false(check_allowlist("redhat", tf)) # remote allowlist doesn't have 
this
   expect_true(check_allowlist("redhat", tempfile())) # remote allowlist 
doesn't exist, so we fall back to the default list, which contains redhat
   expect_false(check_allowlist("debian", tempfile()))

Reply via email to