This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 5a5f521461 ARROW-16803: [R][CI] Fix caching for R mingw build (#13379)
5a5f521461 is described below

commit 5a5f521461d8a7eeaac20058970e358c258b537b
Author: Jacob Wujciak-Jens <[email protected]>
AuthorDate: Thu Jun 16 15:45:01 2022 +0200

    ARROW-16803: [R][CI] Fix caching for R mingw build (#13379)
    
    The primary issue here was that a matrix build used the same cache keys on 
all sub-builds which caused all but one cache-step to fail.
    
    During my work on this, I noticed several `actions/cache` related issues:
    - `actions/cache` is not updating the cache on primary key hit 
https://github.com/actions/cache/issues/342
      - I think this is one of the reasons for bad ccache hit-rate, I have 
implemented a workaround that seems to work but has the downside of creating a 
new cache.
    - actions/cache also uses the path of the cached files when matching 
existing caches https://github.com/actions/cache/issues/815
      - This prevents re-use of R package dependencies (on windows only?). I 
switched to `r-libs/actions/setup-r-dependecies` which is more concise and 
***much*** faster (~ 2min vs. ~15+min)  even without caching.
    
    I have also increased the ccache maxsize to prevent/reduce the amount of 
(slow) cleanup operations and increase cache hits and performance. This is a 
change that will affect other workflows through `setup_ccache.sh`, which is why 
I have marked this as a draft for now.
    
    I also removed ccache from the windows R builds as we do not use it when 
building the R package (we could enable that  in the future)
    
    Authored-by: Jacob Wujciak-Jens <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 .github/workflows/r.yml      | 121 ++++++++++---------------------------------
 ci/scripts/ccache_setup.sh   |   2 +-
 ci/scripts/r_test.sh         |   2 +-
 docker-compose.yml           |   2 +-
 r/R/arrow-tabular.R          |   2 +-
 r/R/dataset-format.R         |   4 +-
 r/R/dplyr-datetime-helpers.R |   2 +-
 r/R/dplyr-mutate.R           |   2 +-
 8 files changed, 34 insertions(+), 103 deletions(-)

diff --git a/.github/workflows/r.yml b/.github/workflows/r.yml
index 8de703b71a..48d9672c74 100644
--- a/.github/workflows/r.yml
+++ b/.github/workflows/r.yml
@@ -72,8 +72,12 @@ jobs:
         uses: actions/cache@v2
         with:
           path: .docker
-          key: ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ 
hashFiles('cpp/**') }}
-          restore-keys: ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-
+          # As this key is identical on both matrix builds only one will be 
able to successfully cache,
+          # this is fine as there are no differences in the build
+          key: ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ 
hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-${{ github.run_id }}
+          restore-keys: |
+            ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-${{ 
hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-
+            ubuntu-${{ matrix.ubuntu }}-r-${{ matrix.r }}-
       - name: Check pkgdown reference sections
         run: ci/scripts/r_pkgdown_check.sh
       - name: Setup Python
@@ -126,12 +130,6 @@ jobs:
         uses: actions/checkout@v3
         with:
           fetch-depth: 0
-      - name: Cache Docker Volumes
-        uses: actions/cache@v2
-        with:
-          path: .docker
-          key: ${{ matrix.config.image }}-r-${{ hashFiles('cpp/**') }}
-          restore-keys: ${{ matrix.config.image }}-r-
       - name: Setup Python
         uses: actions/setup-python@v1
         with:
@@ -197,8 +195,10 @@ jobs:
         uses: actions/cache@v2
         with:
           path: ccache
-          key: r-${{ matrix.config.rtools }}-ccache-mingw-${{ 
hashFiles('cpp/**') }}
-          restore-keys: r-${{ matrix.config.rtools }}-ccache-mingw-
+          key: r-${{ matrix.config.rtools }}-ccache-mingw-${{ 
matrix.config.arch }}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-${{ 
github.run_id }}
+          restore-keys: |
+            r-${{ matrix.config.rtools }}-ccache-mingw-${{ matrix.config.arch 
}}-${{ hashFiles('cpp/src/**/*.cc','cpp/src/**/*.h)') }}-
+            r-${{ matrix.config.rtools }}-ccache-mingw-${{ matrix.config.arch 
}}-
       # We use the makepkg-mingw setup that is included in rtools40 even when
       # we use the rtools35 compilers, so we always install R 4.0/Rtools40
       - uses: r-lib/actions/setup-r@v2
@@ -257,28 +257,16 @@ jobs:
           fetch-depth: 0
       - run: mkdir r/windows
       - name: Download artifacts
-        if: ${{ matrix.config.rtools == 35 }}
-        uses: actions/download-artifact@v2
-        with:
-          name: libarrow-rtools35-mingw32.zip
-          path: r/windows
-      - name: Download artifacts
-        if: ${{ matrix.config.rtools == 35 }}
+        if: ${{ matrix.config.rtools != 42 }}
         uses: actions/download-artifact@v2
         with:
-          name: libarrow-rtools35-mingw64.zip
+          name: libarrow-rtools${{ matrix.config.rtools }}-mingw32.zip
           path: r/windows
       - name: Download artifacts
-        if: ${{ matrix.config.rtools == 40 }}
+        if: ${{ matrix.config.rtools !=42 }}
         uses: actions/download-artifact@v2
         with:
-          name: libarrow-rtools40-mingw32.zip
-          path: r/windows
-      - name: Download artifacts
-        if: ${{ matrix.config.rtools == 40 }}
-        uses: actions/download-artifact@v2
-        with:
-          name: libarrow-rtools40-mingw64.zip
+          name: libarrow-rtools${{ matrix.config.rtools }}-mingw64.zip
           path: r/windows
       - name: Download artifacts
         if: ${{ matrix.config.rtools == 42 }}
@@ -292,75 +280,23 @@ jobs:
           cd r/windows
           ls *.zip | xargs -n 1 unzip -uo
           rm -rf *.zip
-      - name: Setup ccache
-        shell: bash
-        run: |
-          ci/scripts/ccache_setup.sh
-          echo "CCACHE_DIR=$(cygpath --absolute --windows ccache)" >> 
$GITHUB_ENV
-      # We must enable actions/cache before r-lib/actions/setup-r to ensure
-      # using system tar instead of tar provided by Rtools.
-      # We can use tar provided by Rtools when we drop support for Rtools 3.5.
-      # Because Rtools 4.0 or later has zstd. actions/cache requires zstd
-      # when tar is GNU tar.
-      - name: Cache ccache
-        uses: actions/cache@v2
-        with:
-          path: ccache
-          key: r-${{ matrix.config.rtools }}-ccache-mingw-${{ 
hashFiles('cpp/**') }}
-          restore-keys: r-${{ matrix.config.rtools }}-ccache-mingw-
       - uses: r-lib/actions/setup-r@v2
         with:
           r-version: ${{ matrix.config.rversion }}
           rtools-version: ${{ matrix.config.rtools }}
+          # RSPM keeps install times short for 3.6
+          use-public-rspm: true
           Ncpus: 2
-      - name: Make R tests verbose
-        # If you get a segfault/mysterious test Execution halted,
-        # make this `true` to see where it dies.
-        if: false
-        shell: cmd
-        run: |
-          cd r/tests
-          sed -i.bak -E -e 's/"arrow"/"arrow", reporter = "location"/' 
testthat.R
-          rm -f testthat.R.bak
-      - name: Install cpp11 (on R 3.6)
-      # Since we force installation of binary packages below, dependency 
versions
-      # are frozen for old versions of R. We need newer cpp11 than is 
available as
-      # "binary" (though it doesn't matter because the cpp11 R package is just 
a
-      # vehicle for the header-only C++ code.
-        if: ${{ matrix.config.rtools == 35 }}
-        shell: Rscript {0}
-        run: install.packages("cpp11", type = "source")
-      - name: Prune dependencies (on R 3.6)
-        if: ${{ matrix.config.rtools == 35 }}
-        shell: Rscript {0}
-        run: |
-            # To prevent the build from timing out, let's prune some optional 
deps (and their possible version requirements)
-            setwd("r")
-            # create a backup to use later
-            file.copy("DESCRIPTION", "DESCRIPTION.bak")
-            d <- read.dcf("DESCRIPTION")
-            to_prune <- c("duckdb", "DBI", "dbplyr", "decor", "knitr", 
"rmarkdown", "pkgload", "reticulate")
-            pattern <- paste0("\\n?", to_prune, "( \\([^,]*\\))?,?", collapse 
= "|")
-            d[,"Suggests"] <- gsub(pattern, "", d[,"Suggests"])
-            write.dcf(d, "DESCRIPTION")
-      - name: R package cache
-        uses: actions/cache@v2
+      - uses: r-lib/actions/setup-r-dependencies@v2
+        env:
+          GITHUB_PAT: "${{ github.token }}"
         with:
-          path: |
-            ${{ env.R_LIBS_USER }}/*
-          key: r-${{ matrix.config.rtools }}-R-LIBS-${{ 
hashFiles('r/DESCRIPTION') }}
-          restore-keys: r-${{ matrix.config.rtools }}-R-LIBS-
-      - name: Install R package dependencies
-        shell: Rscript {0}
-        run: |
-          # options(pkgType="win.binary") # processx doesn't have a binary for 
UCRT yet
-          install.packages(c("remotes", "rcmdcheck"))
-          remotes::install_deps("r", dependencies = TRUE)
-      - name: Restore DESCRIPTION for 3.6
-        if: ${{ matrix.config.rtools == 35 }}
-        run: |
-          rm r/DESCRIPTION
-          mv r/DESCRIPTION.bak r/DESCRIPTION
+          # For some arcane reason caching does not work on the windows runners
+          # most likely due to https://github.com/actions/cache/issues/815
+          cache: false
+          working-directory: 'r'
+          extra-packages: |
+            any::rcmdcheck
       - name: Check
         shell: Rscript {0}
         run: |
@@ -391,7 +327,7 @@ jobs:
         shell: Rscript {0}
         working-directory: r
         run: |
-          remotes::install_github("jonkeane/lintr@arrow-branch")
+          pak::pak("lintr")
           lintr::expect_lint_free()
       - name: Dump install logs
         shell: cmd
@@ -401,8 +337,3 @@ jobs:
         shell: bash
         run: find r/check -name 'testthat.Rout*' -exec cat '{}' \; || true
         if: always()
-      # We can remove this when we drop support for Rtools 3.5.
-      - name: Ensure using system tar in actions/cache
-        run: |
-          Write-Output "${Env:windir}\System32" | `
-            Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
diff --git a/ci/scripts/ccache_setup.sh b/ci/scripts/ccache_setup.sh
index f77fbb3736..6afcdda7d0 100755
--- a/ci/scripts/ccache_setup.sh
+++ b/ci/scripts/ccache_setup.sh
@@ -23,4 +23,4 @@ echo "ARROW_USE_CCACHE=ON" >> $GITHUB_ENV
 echo "CCACHE_COMPILERCHECK=content" >> $GITHUB_ENV
 echo "CCACHE_COMPRESS=1" >> $GITHUB_ENV
 echo "CCACHE_COMPRESSLEVEL=6" >> $GITHUB_ENV
-echo "CCACHE_MAXSIZE=500M" >> $GITHUB_ENV
+echo "CCACHE_MAXSIZE=1G" >> $GITHUB_ENV
diff --git a/ci/scripts/r_test.sh b/ci/scripts/r_test.sh
index 354655e1ad..8429187d88 100755
--- a/ci/scripts/r_test.sh
+++ b/ci/scripts/r_test.sh
@@ -33,7 +33,7 @@ printenv
 # In the other CI checks the files are synced but ignored.
 make sync-cpp
 
-if [ "$ARROW_R_FORCE_TESTS" = "true"]; then
+if [ "$ARROW_R_FORCE_TESTS" = "true" ]; then
   export ARROW_R_DEV=TRUE
   export NOT_CRAN=true
   export ARROW_LARGE_MEMORY_TESTS=TRUE
diff --git a/docker-compose.yml b/docker-compose.yml
index 7a8ca192f9..6ae62a5ae4 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -59,7 +59,7 @@ x-ccache: &ccache
   CCACHE_COMPILERCHECK: content
   CCACHE_COMPRESS: 1
   CCACHE_COMPRESSLEVEL: 6
-  CCACHE_MAXSIZE: 500M
+  CCACHE_MAXSIZE: 1G
   CCACHE_DIR: /ccache
 
 # CPU/memory limit presets to pass to Docker.
diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R
index 58a604ba61..ae68cc2118 100644
--- a/r/R/arrow-tabular.R
+++ b/r/R/arrow-tabular.R
@@ -161,7 +161,7 @@ as.data.frame.ArrowTabular <- function(x, row.names = NULL, 
optional = FALSE, ..
 
 #' @export
 `[[<-.ArrowTabular` <- function(x, i, value) {
-  if (!is.character(i) & !is.numeric(i)) {
+  if (!is.character(i) && !is.numeric(i)) {
     stop("'i' must be character or numeric, not ", class(i), call. = FALSE)
   }
   assert_that(length(i) == 1, !is.na(i))
diff --git a/r/R/dataset-format.R b/r/R/dataset-format.R
index acc1a41b02..948abf2829 100644
--- a/r/R/dataset-format.R
+++ b/r/R/dataset-format.R
@@ -132,7 +132,7 @@ CsvFileFormat$create <- function(...,
   column_names <- read_options$column_names
   schema_names <- names(schema)
 
-  if (!is.null(schema) & !identical(schema_names, column_names)) {
+  if (!is.null(schema) && !identical(schema_names, column_names)) {
     missing_from_schema <- setdiff(column_names, schema_names)
     missing_from_colnames <- setdiff(schema_names, column_names)
     message_colnames <- NULL
@@ -153,7 +153,7 @@ CsvFileFormat$create <- function(...,
       )
     }
 
-    if (length(missing_from_schema) == 0 & length(missing_from_colnames) == 0) 
{
+    if (length(missing_from_schema) == 0 && length(missing_from_colnames) == 
0) {
       message_order <- "`column_names` and `schema` field names match but are 
not in the same order"
     }
 
diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index 607104d7ce..bc1a13075d 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -16,7 +16,7 @@
 # under the License.
 
 check_time_locale <- function(locale = Sys.getlocale("LC_TIME")) {
-  if (tolower(Sys.info()[["sysname"]]) == "windows" & locale != "C") {
+  if (tolower(Sys.info()[["sysname"]]) == "windows" && locale != "C") {
     # MingW C++ std::locale only supports "C" and "POSIX"
     stop(paste0(
       "On Windows, time locales other than 'C' are not supported in Arrow. ",
diff --git a/r/R/dplyr-mutate.R b/r/R/dplyr-mutate.R
index 07802f8c83..653c1e6f25 100644
--- a/r/R/dplyr-mutate.R
+++ b/r/R/dplyr-mutate.R
@@ -117,7 +117,7 @@ transmute.arrow_dplyr_query <- function(.data, ...) {
   dots <- check_transmute_args(...)
   has_null <- map_lgl(dots, quo_is_null)
   .data <- dplyr::mutate(.data, !!!dots, .keep = "none")
-  if (is_empty(dots) | any(has_null)) {
+  if (is_empty(dots) || any(has_null)) {
     return(.data)
   }
 

Reply via email to