[arrow] branch master updated: MINOR: [R] Cleanup skips and TODOs (#13576)

npr Tue, 12 Jul 2022 06:02:56 -0700

This is an automated email from the ASF dual-hosted git repository.

npr pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/master by this push:
     new a01b0c20c7 MINOR: [R] Cleanup skips and TODOs (#13576)
a01b0c20c7 is described below

commit a01b0c20c7e2c3283cf195de38372b998dbf17d5
Author: Neal Richardson <[email protected]>
AuthorDate: Tue Jul 12 09:02:40 2022 -0400

    MINOR: [R] Cleanup skips and TODOs (#13576)
    
    Authored-by: Neal Richardson <[email protected]>
    Signed-off-by: Neal Richardson <[email protected]>
---
 r/R/array.R                                  |  6 ---
 r/R/arrow-datum.R                            | 16 +++++--
 r/R/chunked-array.R                          | 12 ------
 r/R/compute.R                                |  1 -
 r/R/dplyr-datetime-helpers.R                 | 20 ++++-----
 r/R/dplyr-distinct.R                         |  3 +-
 r/R/dplyr-funcs-datetime.R                   |  5 +--
 r/R/dplyr-summarize.R                        |  1 -
 r/src/altrep.cpp                             |  1 -
 r/tests/testthat/test-compute-arith.R        |  6 +--
 r/tests/testthat/test-compute-sort.R         |  4 +-
 r/tests/testthat/test-dplyr-collapse.R       | 12 ++++--
 r/tests/testthat/test-dplyr-distinct.R       |  2 +-
 r/tests/testthat/test-dplyr-filter.R         | 10 -----
 r/tests/testthat/test-dplyr-funcs-datetime.R | 63 +++++++++++++++-------------
 r/tests/testthat/test-dplyr-funcs-type.R     |  3 +-
 r/tests/testthat/test-dplyr-mutate.R         |  2 +-
 r/tests/testthat/test-dplyr-summarize.R      |  2 +-
 r/tools/autobrew                             |  3 +-
 19 files changed, 76 insertions(+), 96 deletions(-)

diff --git a/r/R/array.R b/r/R/array.R
index 89e9fbfa33..9ae7631e7d 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -155,12 +155,6 @@ Array <- R6Class("Array",
       assert_is(i, "Array")
       call_function("filter", self, i, options = list(keep_na = keep_na))
     },
-    SortIndices = function(descending = FALSE) {
-      assert_that(is.logical(descending))
-      assert_that(length(descending) == 1L)
-      assert_that(!is.na(descending))
-      call_function("array_sort_indices", self, options = list(order = 
descending))
-    },
     RangeEquals = function(other, start_idx, end_idx, other_start_idx = 0L) {
       assert_is(other, "Array")
       Array__RangeEquals(self, other, start_idx, end_idx, other_start_idx)
diff --git a/r/R/arrow-datum.R b/r/R/arrow-datum.R
index 39362628bb..8632ca3053 100644
--- a/r/R/arrow-datum.R
+++ b/r/R/arrow-datum.R
@@ -26,6 +26,16 @@ ArrowDatum <- R6Class("ArrowDatum",
       opts <- cast_options(safe, ...)
       opts$to_type <- as_type(target_type)
       call_function("cast", self, options = opts)
+    },
+    SortIndices = function(descending = FALSE) {
+      assert_that(is.logical(descending))
+      assert_that(length(descending) == 1L)
+      assert_that(!is.na(descending))
+      call_function(
+        "sort_indices",
+        self,
+        options = list(names = "", orders = as.integer(descending))
+      )
     }
   )
 )
@@ -55,8 +65,8 @@ is.na.ArrowDatum <- function(x) {
 #' @export
 is.nan.ArrowDatum <- function(x) {
   if (x$type_id() %in% TYPES_WITH_NAN) {
-    # TODO: if an option is added to the is_nan kernel to treat NA as NaN,
-    # use that to simplify the code here (ARROW-13366)
+    # TODO(ARROW-13366): if an option is added to the is_nan kernel to treat NA
+    # as NaN, use that to simplify the code here
     call_function("is_nan", x) & call_function("is_valid", x)
   } else {
     Scalar$create(FALSE)$as_array(length(x))
@@ -336,7 +346,7 @@ sort.ArrowDatum <- function(x, decreasing = FALSE, na.last 
= NA, ...) {
   # Arrow always sorts nulls at the end of the array. This corresponds to
   # sort(na.last = TRUE). For the other two cases (na.last = NA and
   # na.last = FALSE) we need to use workarounds.
-  # TODO: Implement this more cleanly after ARROW-12063
+  # TODO(ARROW-14085): use NullPlacement ArraySortOptions instead of this 
workaround
   if (is.na(na.last)) {
     # Filter out NAs before sorting
     x <- x$Filter(!is.na(x))
diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R
index 24ca7e6e58..c16f562017 100644
--- a/r/R/chunked-array.R
+++ b/r/R/chunked-array.R
@@ -113,18 +113,6 @@ ChunkedArray <- R6Class("ChunkedArray",
       }
       call_function("filter", self, i, options = list(keep_na = keep_na))
     },
-    SortIndices = function(descending = FALSE) {
-      assert_that(is.logical(descending))
-      assert_that(length(descending) == 1L)
-      assert_that(!is.na(descending))
-      # TODO: after ARROW-12042 is closed, review whether this and the
-      # Array$SortIndices definition can be consolidated
-      call_function(
-        "sort_indices",
-        self,
-        options = list(names = "", orders = as.integer(descending))
-      )
-    },
     View = function(type) {
       ChunkedArray__View(self, as_type(type))
     },
diff --git a/r/R/compute.R b/r/R/compute.R
index 3ce598c22c..1cd12f2e29 100644
--- a/r/R/compute.R
+++ b/r/R/compute.R
@@ -97,7 +97,6 @@ list_compute_functions <- function(pattern = NULL, ...) {
   if (!is.null(pattern)) {
     funcs <- grep(pattern, funcs, value = TRUE, ...)
   }
-  # TODO: Filtering of hash funcs will already happen in C++ with ARROW-13943
   funcs <- grep(
     "^hash_",
     funcs,
diff --git a/r/R/dplyr-datetime-helpers.R b/r/R/dplyr-datetime-helpers.R
index 7099d79c78..af2f1deef8 100644
--- a/r/R/dplyr-datetime-helpers.R
+++ b/r/R/dplyr-datetime-helpers.R
@@ -39,6 +39,7 @@ check_time_locale <- function(locale = 
Sys.getlocale("LC_TIME")) {
   "dnanoseconds" = list(1, "ns")
 )
 make_duration <- function(x, unit) {
+  # TODO(ARROW-15862): remove first cast to int64
   x <- build_expr("cast", x, options = cast_options(to_type = int64()))
   x$cast(duration(unit))
 }
@@ -49,9 +50,7 @@ binding_format_datetime <- function(x, format = "", tz = "", 
usetz = FALSE) {
   }
 
   if (call_binding("is.POSIXct", x)) {
-    # the casting part might not be required once
-    # https://issues.apache.org/jira/browse/ARROW-14442 is solved
-    # TODO revisit the steps below once the PR for that issue is merged
+    # Make sure the timezone is reflected
     if (tz == "" && x$type()$timezone() != "") {
       tz <- x$type()$timezone()
     } else if (tz == "") {
@@ -59,8 +58,8 @@ binding_format_datetime <- function(x, format = "", tz = "", 
usetz = FALSE) {
     }
     x <- build_expr("cast", x, options = cast_options(to_type = 
timestamp(x$type()$unit(), tz)))
   }
-
-  build_expr("strftime", x, options = list(format = format, locale = 
Sys.getlocale("LC_TIME")))
+  opts <- list(format = format, locale = Sys.getlocale("LC_TIME"))
+  build_expr("strftime", x, options = opts)
 }
 
 # this is a helper function used for creating a difftime / duration objects 
from
@@ -104,7 +103,6 @@ binding_as_date <- function(x,
                             format = NULL,
                             tryFormats = "%Y-%m-%d",
                             origin = "1970-01-01") {
-
   if (call_binding("is.Date", x)) {
     return(x)
 
@@ -132,16 +130,15 @@ binding_as_date_numeric <- function(x, origin = 
"1970-01-01") {
 
   # Arrow does not support direct casting from double to date32(), but for
   # integer-like values we can go via int32()
-  # https://issues.apache.org/jira/browse/ARROW-15798
-  # TODO revisit if arrow decides to support double -> date casting
+  # TODO: revisit after ARROW-15798
   if (!call_binding("is.integer", x)) {
     x <- build_expr("cast", x, options = cast_options(to_type = int32()))
   }
 
   if (origin != "1970-01-01") {
     delta_in_sec <- call_binding("difftime", origin, "1970-01-01")
-    # TODO revisit once https://issues.apache.org/jira/browse/ARROW-15862
-    # (casting from int32 -> duration or double -> duration) is addressed
+    # TODO: revisit after ARROW-15862
+    # (casting from int32 -> duration or double -> duration)
     delta_in_days <- (delta_in_sec$cast(int64()) / 86400L)$cast(int32())
     x <- build_expr("+", x, delta_in_days)
   }
@@ -292,7 +289,6 @@ build_format_from_order <- function(order) {
 #'  * `augmented_x_qy`
 #' @noRd
 process_data_for_parsing <- function(x, orders) {
-
   processed_x <- x$cast(string())
 
   # make all separators (non-letters and non-numbers) into "-"
@@ -302,7 +298,7 @@ process_data_for_parsing <- function(x, orders) {
 
   # we need to transform `x` when orders are `ym`, `my`, and `yq`
   # for `ym` and `my` orders we add a day ("01")
-  # TODO revisit after https://issues.apache.org/jira/browse/ARROW-16627
+  # TODO: revisit after ARROW-16627
   augmented_x_ym <- NULL
   if (any(orders %in% c("ym", "my", "Ym", "mY"))) {
     # add day as "-01" if there is a "-" separator and as "01" if not
diff --git a/r/R/dplyr-distinct.R b/r/R/dplyr-distinct.R
index d5a8c81e6b..c663d84e65 100644
--- a/r/R/dplyr-distinct.R
+++ b/r/R/dplyr-distinct.R
@@ -19,7 +19,8 @@
 
 distinct.arrow_dplyr_query <- function(.data, ..., .keep_all = FALSE) {
   if (.keep_all == TRUE) {
-    # After ARROW-13993 is merged, we can implement this (ARROW-14045)
+    # TODO(ARROW-14045): the function is called "hash_one" (from ARROW-13993)
+    # May need to call it: `summarize(x = one(x), ...)` for x in non-group cols
     arrow_not_supported("`distinct()` with `.keep_all = TRUE`")
   }
 
diff --git a/r/R/dplyr-funcs-datetime.R b/r/R/dplyr-funcs-datetime.R
index f7d948fdf2..df830a6b66 100644
--- a/r/R/dplyr-funcs-datetime.R
+++ b/r/R/dplyr-funcs-datetime.R
@@ -297,7 +297,6 @@ register_bindings_datetime_conversion <- function() {
                                        tryFormats = "%Y-%m-%d",
                                        origin = "1970-01-01",
                                        tz = "UTC") {
-
     if (is.null(format) && length(tryFormats) > 1) {
       abort(
         paste(
@@ -461,8 +460,7 @@ register_bindings_duration <- function() {
 
     # numeric -> duration not supported in Arrow yet so we use int64() as an
     # intermediate step
-    # TODO revisit if https://issues.apache.org/jira/browse/ARROW-15862 results
-    # in numeric -> duration support
+    # TODO: revisit after ARROW-15862
 
     if (call_binding("is.numeric", x)) {
       # coerce x to be int64(). it should work for integer-like doubles and 
fail
@@ -567,7 +565,6 @@ register_bindings_datetime_parsers <- function() {
     } else {
       coalesce_output
     }
-
   })
 
   ymd_parser_vec <- c("ymd", "ydm", "mdy", "myd", "dmy", "dym", "ym", "my", 
"yq")
diff --git a/r/R/dplyr-summarize.R b/r/R/dplyr-summarize.R
index 59b6533742..9226c476cb 100644
--- a/r/R/dplyr-summarize.R
+++ b/r/R/dplyr-summarize.R
@@ -280,7 +280,6 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) {
     } else {
       stop(paste("Invalid .groups argument:", .groups))
     }
-    # TODO: shouldn't we be doing something with `drop_empty_groups` in 
summarize? (ARROW-14044)
     out$drop_empty_groups <- .data$drop_empty_groups
   }
   out
diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp
index 33fe09d398..f0cd33fa95 100644
--- a/r/src/altrep.cpp
+++ b/r/src/altrep.cpp
@@ -85,7 +85,6 @@ const std::shared_ptr<ChunkedArray>& GetChunkedArray(SEXP 
alt) {
 }
 
 struct ArrayResolve {
-  // TODO: ARROW-11989
   ArrayResolve(const std::shared_ptr<ChunkedArray>& chunked_array, int64_t i) {
     for (int idx_chunk = 0; idx_chunk < chunked_array->num_chunks(); 
idx_chunk++) {
       std::shared_ptr<Array> chunk = chunked_array->chunk(idx_chunk);
diff --git a/r/tests/testthat/test-compute-arith.R 
b/r/tests/testthat/test-compute-arith.R
index 02681c2585..1f3432363f 100644
--- a/r/tests/testthat/test-compute-arith.R
+++ b/r/tests/testthat/test-compute-arith.R
@@ -129,9 +129,9 @@ test_that("Power", {
 test_that("Dates casting", {
   a <- Array$create(c(Sys.Date() + 1:4, NA_integer_))
 
-  skip("ARROW-11090 (date/datetime arithmetic)")
-  # Error: NotImplemented: Function add_checked has no kernel matching input 
types (array[date32[day]], scalar[double])
-  expect_equal(a + 2, Array$create(c((Sys.Date() + 1:4) + 2), NA_integer_))
+  skip("ARROW-17043 (date/datetime arithmetic with integers)")
+  # Error: NotImplemented: Function 'add_checked' has no kernel matching input 
types (timestamp[s], int32)
+  expect_equal(a + 2L, Array$create(c((Sys.Date() + 1:4) + 2), NA_integer_))
 })
 
 test_that("Unary Ops group generics work on Array objects", {
diff --git a/r/tests/testthat/test-compute-sort.R 
b/r/tests/testthat/test-compute-sort.R
index e3574d86f7..f521efeddc 100644
--- a/r/tests/testthat/test-compute-sort.R
+++ b/r/tests/testthat/test-compute-sort.R
@@ -39,7 +39,7 @@ test_that("Array$SortIndices()", {
     Array$create(int)$SortIndices(),
     Array$create(order(int) - 1L, type = uint64())
   )
-  # Need to remove NAs because ARROW-12063
+  # TODO(ARROW-14085): remove workaround once NA behavior is supported
   int <- na.omit(int)
   expect_equal(
     Array$create(int)$SortIndices(descending = TRUE),
@@ -57,7 +57,7 @@ test_that("ChunkedArray$SortIndices()", {
     ChunkedArray$create(int[1:4], int[5:length(int)])$SortIndices(),
     Array$create(order(int) - 1L, type = uint64())
   )
-  # Need to remove NAs because ARROW-12063
+  # TODO(ARROW-14085): remove workaround once NA behavior is supported
   int <- na.omit(int)
   expect_equal(
     ChunkedArray$create(int[1:4], int[5:length(int)])$SortIndices(descending = 
TRUE),
diff --git a/r/tests/testthat/test-dplyr-collapse.R 
b/r/tests/testthat/test-dplyr-collapse.R
index 746c4aa074..3c121780da 100644
--- a/r/tests/testthat/test-dplyr-collapse.R
+++ b/r/tests/testthat/test-dplyr-collapse.R
@@ -195,12 +195,18 @@ See $.data for the source Arrow object",
   # Component "total": Mean relative difference: 0.9230769
   # Component "extra": Mean relative difference: 0.9230769
   expect_equal(
-    q %>% head(1) %>% collect(),
+    q %>%
+      arrange(lgl) %>%
+      head(1) %>%
+      collect(),
     tibble::tibble(lgl = FALSE, total = 8L, extra = 40)
   )
-  skip("TODO (ARROW-1XXXX): implement sorting option about where NAs go")
+  skip("TODO (ARROW-16630): make sure BottomK can handle NA ordering")
   expect_equal(
-    q %>% tail(1) %>% collect(),
+    q %>%
+      arrange(lgl) %>%
+      tail(1) %>%
+      collect(),
     tibble::tibble(lgl = NA, total = 25L, extra = 125)
   )
 })
diff --git a/r/tests/testthat/test-dplyr-distinct.R 
b/r/tests/testthat/test-dplyr-distinct.R
index 876c192ec3..8b42614084 100644
--- a/r/tests/testthat/test-dplyr-distinct.R
+++ b/r/tests/testthat/test-dplyr-distinct.R
@@ -93,7 +93,7 @@ test_that("distinct() can contain expressions", {
 })
 
 test_that("distinct() can return all columns", {
-  skip("ARROW-13993 - need this to return correct rows from other cols")
+  skip("ARROW-14045")
   compare_dplyr_binding(
     .input %>%
       distinct(lgl, .keep_all = TRUE) %>%
diff --git a/r/tests/testthat/test-dplyr-filter.R 
b/r/tests/testthat/test-dplyr-filter.R
index 12b2a47f4a..60c740a5c1 100644
--- a/r/tests/testthat/test-dplyr-filter.R
+++ b/r/tests/testthat/test-dplyr-filter.R
@@ -399,14 +399,4 @@ test_that("filter() with .data pronoun", {
       collect(),
     tbl
   )
-
-  skip("test now faulty - code no longer gives error & outputs a empty tibble")
-  # but there is an error if we don't override the masking with `.env`
-  compare_dplyr_error(
-    .input %>%
-      filter(.data$dbl > chr) %>%
-      select(.data$chr, .data$int, .data$lgl) %>%
-      collect(),
-    tbl
-  )
 })
diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R 
b/r/tests/testthat/test-dplyr-funcs-datetime.R
index 94855fd7d6..ca70de41d0 100644
--- a/r/tests/testthat/test-dplyr-funcs-datetime.R
+++ b/r/tests/testthat/test-dplyr-funcs-datetime.R
@@ -25,7 +25,7 @@ library(dplyr, warn.conflicts = FALSE)
 # base::strptime() defaults to local timezone
 # but arrow's strptime defaults to UTC.
 # So that tests are consistent, set the local timezone to UTC
-# TODO: consider reevaluating this workaround after ARROW-12980
+# TODO: consider reevaluating now that ARROW-12980 has merged
 withr::local_timezone("UTC")
 
 if (tolower(Sys.info()[["sysname"]]) == "windows") {
@@ -37,8 +37,7 @@ test_date <- as.POSIXct("2017-01-01 00:00:11.3456789", tz = 
"Pacific/Marquesas")
 
 test_df <- tibble::tibble(
   # test_date + 1 turns the tzone = "" to NULL, which is functionally 
equivalent
-  # so we can run some tests on Windows, but this skirts around
-  # https://issues.apache.org/jira/browse/ARROW-13588
+  # so we can run some tests on Windows, but this skirts around ARROW-13588.
   # That issue is tough because in C++, "" is the "no timezone" value
   # due to static typing, so we can't distinguish a literal "" from NULL
   datetime = c(test_date, NA) + 1,
@@ -743,7 +742,7 @@ test_that("leap_year mirror lubridate", {
         "1998-01-01", # not leap year
         "1996-01-01", # leap year (divide by 4 rule)
         "1900-01-01", # not leap year (divide by 100 rule)
-        "2000-01-01"  # leap year (divide by 400 rule)
+        "2000-01-01" # leap year (divide by 400 rule)
       ))
     )
   )
@@ -941,7 +940,7 @@ test_that("date works in arrow", {
   # we can't (for now) use namespacing, so we need to make sure 
lubridate::date()
   # and not base::date() is being used. This is due to the way testthat runs 
and
   # normal use of arrow would not have to do this explicitly.
-  # TODO remove once https://issues.apache.org/jira/browse/ARROW-14575 is done
+  # TODO: remove after ARROW-14575
   date <- lubridate::date
 
   compare_dplyr_binding(
@@ -1584,7 +1583,7 @@ test_that("`as.Date()` and `as_date()`", {
 
   # strptime does not support a partial format - Arrow returns NA, while
   # lubridate parses correctly
-  # TODO revisit once - https://issues.apache.org/jira/browse/ARROW-15813
+  # TODO: revisit after ARROW-15813
   expect_error(
     expect_equal(
       test_df %>%
@@ -1611,16 +1610,13 @@ test_that("`as.Date()` and `as_date()`", {
   )
 
   # we do not support as.Date() with double/ float (error surfaced from C++)
-  # TODO revisit after https://issues.apache.org/jira/browse/ARROW-15798
+  # TODO: revisit after ARROW-15798
   expect_error(
     test_df %>%
       arrow_table() %>%
       mutate(date_double = as.Date(double_var, origin = "1970-01-01")) %>%
       collect()
   )
-
-  # we do not support as_date with double/ float (error surfaced from C++)
-  # TODO: revisit after https://issues.apache.org/jira/browse/ARROW-15798
   expect_error(
     test_df %>%
       arrow_table() %>%
@@ -1742,8 +1738,7 @@ test_that("parse_date_time() works with year, month, and 
date components", {
     )
   )
 
-  # locale (affecting "%b% and "%B" formats) does not work properly on Windows
-  # TODO revisit once https://issues.apache.org/jira/browse/ARROW-16443 is done
+  # TODO(ARROW-16443): locale (affecting "%b% and "%B") does not work on 
Windows
   skip_on_os("windows")
   compare_dplyr_binding(
     .input %>%
@@ -1893,7 +1888,6 @@ test_that("ym, my & yq parsers", {
 })
 
 test_that("lubridate's fast_strptime", {
-
   compare_dplyr_binding(
     .input %>%
       mutate(
@@ -2032,7 +2026,7 @@ test_that("parse_date_time with hours, minutes and 
seconds components", {
     dmy_hms_string =
       c("09-01-67 12:34:56", "22-05-1970 20:13:59", "220887201359", NA),
     dmy_hm_string =
-      c("09-01-67 12:34", "22-05-1970 20:13",  "2208872013", NA),
+      c("09-01-67 12:34", "22-05-1970 20:13", "2208872013", NA),
     dmy_h_string =
       c("09-01-67 12", "22-05-1970 20", "22088720", NA),
     mdy_hms_string =
@@ -2110,15 +2104,14 @@ test_that("parse_date_time with hours, minutes and 
seconds components", {
       ) %>%
       collect(),
     tibble(
-     ymd_ims_string =
-       c("67-01-09 9:34:56", "1970-05-22 10:13:59", "19870822171359", NA)
-   )
+      ymd_ims_string =
+        c("67-01-09 9:34:56", "1970-05-22 10:13:59", "19870822171359", NA)
+    )
   )
 })
 
 test_that("parse_date_time with month names and HMS", {
-  # locale (affecting "%b% and "%B" formats) does not work properly on Windows
-  # TODO revisit once https://issues.apache.org/jira/browse/ARROW-16443 is done
+  # TODO(ARROW-16443): locale (affecting "%b% and "%B") does not work on 
Windows
   skip_on_os("windows")
 
   # these functions' internals use some string processing which requires the
@@ -2197,7 +2190,7 @@ test_that("parse_date_time with truncated formats", {
   # RE2 library (not available on Windows with R 3.6)
   skip_if_not_available("re2")
 
-  test_truncation_df <-  tibble(
+  test_truncation_df <- tibble(
     truncated_ymd_string =
       c(
         "2022-05-19 13:46:51",
@@ -2290,22 +2283,26 @@ test_that("build_formats() and 
build_format_from_order()", {
       "%m%y%d", "%B%y%d", "%b%y%d", "%m%Y%d", "%B%Y%d", "%b%Y%d",
       # formats from "%Y-%d-%m" format
       "%y-%d-%m", "%Y-%d-%m", "%y-%d-%B", "%Y-%d-%B", "%y-%d-%b", "%Y-%d-%b",
-      "%y%d%m", "%Y%d%m", "%y%d%B", "%Y%d%B", "%y%d%b", "%Y%d%b")
+      "%y%d%m", "%Y%d%m", "%y%d%B", "%Y%d%B", "%y%d%b", "%Y%d%b"
+    )
   )
 
   expect_equal(
     build_formats("ymd_HMS"),
-    c("%y-%m-%d-%H-%M-%S", "%Y-%m-%d-%H-%M-%S", "%y-%B-%d-%H-%M-%S",
+    c(
+      "%y-%m-%d-%H-%M-%S", "%Y-%m-%d-%H-%M-%S", "%y-%B-%d-%H-%M-%S",
       "%Y-%B-%d-%H-%M-%S", "%y-%b-%d-%H-%M-%S", "%Y-%b-%d-%H-%M-%S",
       "%y%m%d%H%M%S", "%Y%m%d%H%M%S", "%y%B%d%H%M%S", "%Y%B%d%H%M%S",
-      "%y%b%d%H%M%S", "%Y%b%d%H%M%S")
+      "%y%b%d%H%M%S", "%Y%b%d%H%M%S"
+    )
   )
 
   # when order is one of "yq", "qy", "ym" or"my" the data is augmented to "ymd"
   # or "ydm" and the formats are built accordingly
   ymd_formats <- c(
     "%y-%m-%d", "%Y-%m-%d", "%y-%B-%d", "%Y-%B-%d", "%y-%b-%d", "%Y-%b-%d",
-    "%y%m%d", "%Y%m%d", "%y%B%d", "%Y%B%d", "%y%b%d", "%Y%b%d")
+    "%y%m%d", "%Y%m%d", "%y%B%d", "%Y%B%d", "%y%b%d", "%Y%b%d"
+  )
   expect_equal(
     build_formats("yq"),
     ymd_formats
@@ -2329,8 +2326,10 @@ test_that("build_formats() and 
build_format_from_order()", {
 
   expect_equal(
     build_formats("my"),
-    c("%m-%y-%d", "%B-%y-%d", "%b-%y-%d", "%m-%Y-%d", "%B-%Y-%d", "%b-%Y-%d",
-      "%m%y%d", "%B%y%d", "%b%y%d", "%m%Y%d", "%B%Y%d", "%b%Y%d")
+    c(
+      "%m-%y-%d", "%B-%y-%d", "%b-%y-%d", "%m-%Y-%d", "%B-%Y-%d", "%b-%Y-%d",
+      "%m%y%d", "%B%y%d", "%b%y%d", "%m%Y%d", "%B%Y%d", "%b%Y%d"
+    )
   )
 
   # ab not supported yet
@@ -2346,15 +2345,19 @@ test_that("build_formats() and 
build_format_from_order()", {
 
   expect_equal(
     build_format_from_order("ymd"),
-    c("%y-%m-%d", "%Y-%m-%d", "%y-%B-%d", "%Y-%B-%d", "%y-%b-%d", "%Y-%b-%d",
-      "%y%m%d", "%Y%m%d", "%y%B%d", "%Y%B%d", "%y%b%d", "%Y%b%d")
+    c(
+      "%y-%m-%d", "%Y-%m-%d", "%y-%B-%d", "%Y-%B-%d", "%y-%b-%d", "%Y-%b-%d",
+      "%y%m%d", "%Y%m%d", "%y%B%d", "%Y%B%d", "%y%b%d", "%Y%b%d"
+    )
   )
 
   expect_equal(
     build_format_from_order("ymdHMS"),
-    c("%y-%m-%d-%H-%M-%S", "%Y-%m-%d-%H-%M-%S", "%y-%B-%d-%H-%M-%S",
+    c(
+      "%y-%m-%d-%H-%M-%S", "%Y-%m-%d-%H-%M-%S", "%y-%B-%d-%H-%M-%S",
       "%Y-%B-%d-%H-%M-%S", "%y-%b-%d-%H-%M-%S", "%Y-%b-%d-%H-%M-%S",
       "%y%m%d%H%M%S", "%Y%m%d%H%M%S", "%y%B%d%H%M%S", "%Y%B%d%H%M%S",
-      "%y%b%d%H%M%S", "%Y%b%d%H%M%S")
+      "%y%b%d%H%M%S", "%Y%b%d%H%M%S"
+    )
   )
 })
diff --git a/r/tests/testthat/test-dplyr-funcs-type.R 
b/r/tests/testthat/test-dplyr-funcs-type.R
index 57561fb08b..7ee0ec4d0f 100644
--- a/r/tests/testthat/test-dplyr-funcs-type.R
+++ b/r/tests/testthat/test-dplyr-funcs-type.R
@@ -806,8 +806,7 @@ test_that("nested structs can be created from scalars and 
existing data frames",
 })
 
 test_that("format date/time", {
-  # locale issues
-  # TODO revisit after https://issues.apache.org/jira/browse/ARROW-16399 is 
done
+  # TODO(ARROW-16399): remove this workaround
   if (tolower(Sys.info()[["sysname"]]) == "windows") {
     withr::local_locale(LC_TIME = "C")
   }
diff --git a/r/tests/testthat/test-dplyr-mutate.R 
b/r/tests/testthat/test-dplyr-mutate.R
index a746335940..beb893afec 100644
--- a/r/tests/testthat/test-dplyr-mutate.R
+++ b/r/tests/testthat/test-dplyr-mutate.R
@@ -365,7 +365,7 @@ test_that("dplyr::mutate's examples", {
   # The mutate operation may yield different results on grouped
   # tibbles because the expressions are computed within groups.
   # The following normalises `mass` by the global average:
-  # TODO: ARROW-13926
+  # TODO(ARROW-13926): support window functions
   compare_dplyr_binding(
     .input %>%
       select(name, mass, species) %>%
diff --git a/r/tests/testthat/test-dplyr-summarize.R 
b/r/tests/testthat/test-dplyr-summarize.R
index 73e3312ee0..5ad7425ee8 100644
--- a/r/tests/testthat/test-dplyr-summarize.R
+++ b/r/tests/testthat/test-dplyr-summarize.R
@@ -548,7 +548,6 @@ test_that("min() and max() on character strings", {
       collect(),
     tbl,
   )
-  skip("Strings not supported by hash_min_max (ARROW-13988)")
   compare_dplyr_binding(
     .input %>%
       group_by(fct) %>%
@@ -556,6 +555,7 @@ test_that("min() and max() on character strings", {
         min_chr = min(chr, na.rm = TRUE),
         max_chr = max(chr, na.rm = TRUE)
       ) %>%
+      arrange(min_chr) %>%
       collect(),
     tbl,
   )
diff --git a/r/tools/autobrew b/r/tools/autobrew
index 37a94472bd..ea46be2c0d 100644
--- a/r/tools/autobrew
+++ b/r/tools/autobrew
@@ -60,8 +60,7 @@ for FILE in $BREWDIR/Cellar/*/*/lib/*.a; do
   PKG_LIBS=`echo $PKG_LIBS | sed "s/-l$LIBNAME/-lbrew$LIBNAME/g"`
 done
 
-# TODO: add -DARROW_R_WITH_GCS
-PKG_CFLAGS="-I$BREWDIR/opt/$PKG_BREW_NAME/include -DARROW_R_WITH_PARQUET 
-DARROW_R_WITH_DATASET -DARROW_R_WITH_JSON -DARROW_R_WITH_S3"
+PKG_CFLAGS="-I$BREWDIR/opt/$PKG_BREW_NAME/include -DARROW_R_WITH_PARQUET 
-DARROW_R_WITH_DATASET -DARROW_R_WITH_JSON -DARROW_R_WITH_S3 -DARROW_R_WITH_GCS"
 
 unset HOMEBREW_NO_ANALYTICS
 unset HOMEBREW_NO_AUTO_UPDATE

[arrow] branch master updated: MINOR: [R] Cleanup skips and TODOs (#13576)

Reply via email to