This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new b02191f  fix(r): Ensure simple `list()`s can be converted without 
arrow installed (#344)
b02191f is described below

commit b02191f9746bf16e3022cd083cd7ece13ea341b4
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Jan 2 09:08:03 2024 -0400

    fix(r): Ensure simple `list()`s can be converted without arrow installed 
(#344)
    
    Closes #343.
---
 r/NAMESPACE                      |  4 +++
 r/R/as-array.R                   | 53 +++++++++++++++++++++++++++++-
 r/R/schema.R                     | 28 ++++++++++++++++
 r/man/as_nanoarrow_schema.Rd     |  4 +--
 r/src/as_array.c                 |  4 +--
 r/tests/testthat/test-as-array.R | 69 ++++++++++++++++++++++++++++++++++++++++
 r/tests/testthat/test-schema.R   | 19 +++++++++++
 7 files changed, 176 insertions(+), 5 deletions(-)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index 8152a6a..05ce598 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -26,10 +26,12 @@ S3method(as_nanoarrow_array,POSIXlt)
 S3method(as_nanoarrow_array,RecordBatch)
 S3method(as_nanoarrow_array,Table)
 S3method(as_nanoarrow_array,blob)
+S3method(as_nanoarrow_array,data.frame)
 S3method(as_nanoarrow_array,default)
 S3method(as_nanoarrow_array,difftime)
 S3method(as_nanoarrow_array,factor)
 S3method(as_nanoarrow_array,integer64)
+S3method(as_nanoarrow_array,list)
 S3method(as_nanoarrow_array,nanoarrow_array)
 S3method(as_nanoarrow_array,nanoarrow_buffer)
 S3method(as_nanoarrow_array,vctrs_unspecified)
@@ -59,6 +61,7 @@ S3method(infer_nanoarrow_ptype_extension,default)
 S3method(infer_nanoarrow_ptype_extension,nanoarrow_extension_spec_vctrs)
 S3method(infer_nanoarrow_schema,Array)
 S3method(infer_nanoarrow_schema,ArrowTabular)
+S3method(infer_nanoarrow_schema,AsIs)
 S3method(infer_nanoarrow_schema,ChunkedArray)
 S3method(infer_nanoarrow_schema,Dataset)
 S3method(infer_nanoarrow_schema,Date)
@@ -78,6 +81,7 @@ S3method(infer_nanoarrow_schema,factor)
 S3method(infer_nanoarrow_schema,hms)
 S3method(infer_nanoarrow_schema,integer)
 S3method(infer_nanoarrow_schema,integer64)
+S3method(infer_nanoarrow_schema,list)
 S3method(infer_nanoarrow_schema,logical)
 S3method(infer_nanoarrow_schema,nanoarrow_array)
 S3method(infer_nanoarrow_schema,nanoarrow_array_stream)
diff --git a/r/R/as-array.R b/r/R/as-array.R
index 8a12147..e7285d4 100644
--- a/r/R/as-array.R
+++ b/r/R/as-array.R
@@ -112,7 +112,6 @@ as_nanoarrow_array.integer64 <- function(x, ..., schema = 
NULL) {
     },
     as_nanoarrow_array(as.double(x), schema = schema)
   )
-
 }
 
 #' @export
@@ -195,6 +194,58 @@ as_nanoarrow_array.blob <- function(x, ..., schema = NULL) 
{
   as_nanoarrow_array(unclass(x), schema = schema)
 }
 
+#' @export
+as_nanoarrow_array.data.frame <- function(x, ..., schema = NULL) {
+  # We need to override this to prevent the list implementation from handling 
it
+  as_nanoarrow_array.default(x, ..., schema = schema)
+}
+
+#' @export
+as_nanoarrow_array.list <- function(x, ..., schema = NULL) {
+  if (is.null(schema)) {
+    schema <- infer_nanoarrow_schema(x)
+  }
+
+  schema <- as_nanoarrow_schema(schema)
+  parsed <- nanoarrow_schema_parse(schema)
+  if (!is.null(parsed$extension_name) || parsed$type != "list") {
+    return(NextMethod())
+  }
+
+  # This R implementation can't handle complex nesting
+  if (startsWith(schema$children[[1]]$format, "+")) {
+    return(NextMethod())
+  }
+
+  array <- nanoarrow_array_init(schema)
+
+  child <- unlist(x, recursive = FALSE, use.names = FALSE)
+  if (is.null(child)) {
+    child_array <- as_nanoarrow_array.vctrs_unspecified(logical(), schema = 
na_na())
+  } else {
+    child_array <- as_nanoarrow_array(child, schema = schema$children[[1]])
+  }
+
+  offsets <- c(0L, cumsum(lengths(x)))
+  is_na <- vapply(x, is.null, logical(1))
+  validity <- as_nanoarrow_array(!is_na)$buffers[[2]]
+
+  nanoarrow_array_modify(
+    array,
+    list(
+      length = length(x),
+      null_count = sum(is_na),
+      buffers = list(
+        validity,
+        offsets
+      ),
+      children = list(
+        child_array
+      )
+    )
+  )
+}
+
 #' @export
 as_nanoarrow_array.Date <- function(x, ..., schema = NULL) {
   if (is.null(schema)) {
diff --git a/r/R/schema.R b/r/R/schema.R
index 4088bd4..e8a4b8d 100644
--- a/r/R/schema.R
+++ b/r/R/schema.R
@@ -163,6 +163,34 @@ infer_nanoarrow_schema.vctrs_list_of <- function(x, ...) {
   }
 }
 
+#' @export
+infer_nanoarrow_schema.AsIs <- function(x, ...) {
+  # NextMethod() goes directly to `default`
+  class(x) <- class(x)[-1]
+  infer_nanoarrow_schema(x)
+}
+
+#' @export
+infer_nanoarrow_schema.list <- function(x, ...) {
+  # TODO: Move this to C
+  is_null <- vapply(x, is.null, logical(1))
+
+  if (all(is_null)) {
+    return(na_list(na_na()))
+  }
+
+  is_raw <- vapply(x, is.raw, logical(1))
+  if (!all(is_raw | is_null)) {
+    return(NextMethod())
+  }
+
+  if (length(x) > 0 && sum(lengths(x)) > .Machine$integer.max) {
+    na_large_binary()
+  } else {
+    na_binary()
+  }
+}
+
 #' @rdname as_nanoarrow_schema
 #' @export
 nanoarrow_schema_parse <- function(x, recursive = FALSE) {
diff --git a/r/man/as_nanoarrow_schema.Rd b/r/man/as_nanoarrow_schema.Rd
index 20f97f6..db32ff3 100644
--- a/r/man/as_nanoarrow_schema.Rd
+++ b/r/man/as_nanoarrow_schema.Rd
@@ -33,8 +33,8 @@ An object of class 'nanoarrow_schema'
 \description{
 In nanoarrow a 'schema' refers to a \verb{struct ArrowSchema} as defined in the
 Arrow C Data interface. This data structure can be used to represent an
-\code{\link[arrow:Schema]{arrow::schema()}}, an 
\code{\link[arrow:Field]{arrow::field()}}, or an \code{arrow::DataType}. Note 
that
-in nanoarrow, an \code{\link[arrow:Schema]{arrow::schema()}} and a 
non-nullable \code{\link[arrow:data-type]{arrow::struct()}}
+\code{\link[arrow:schema]{arrow::schema()}}, an 
\code{\link[arrow:Field]{arrow::field()}}, or an \code{arrow::DataType}. Note 
that
+in nanoarrow, an \code{\link[arrow:schema]{arrow::schema()}} and a 
non-nullable \code{\link[arrow:data-type]{arrow::struct()}}
 are represented identically.
 }
 \examples{
diff --git a/r/src/as_array.c b/r/src/as_array.c
index afba1f2..a69aa26 100644
--- a/r/src/as_array.c
+++ b/r/src/as_array.c
@@ -422,8 +422,8 @@ static void as_array_data_frame(SEXP x_sexp, struct 
ArrowArray* array, SEXP sche
 
 static void as_array_list(SEXP x_sexp, struct ArrowArray* array, SEXP 
schema_xptr,
                           struct ArrowSchemaView* schema_view, struct 
ArrowError* error) {
-  // We handle list(raw()) for now but fall back to arrow for vctrs::list_of()
-  // Arbitrary nested list support is complicated without some concept of a
+  // We handle list(raw()) in C but fall back to S3 for other types of list 
output.
+  // Arbitrary nested list support is complicated in C without some concept of 
a
   // "builder", which we don't use.
   if (schema_view->type != NANOARROW_TYPE_BINARY) {
     call_as_nanoarrow_array(x_sexp, array, schema_xptr, 
"as_nanoarrow_array_from_c");
diff --git a/r/tests/testthat/test-as-array.R b/r/tests/testthat/test-as-array.R
index 52b144c..fc9417a 100644
--- a/r/tests/testthat/test-as-array.R
+++ b/r/tests/testthat/test-as-array.R
@@ -535,6 +535,75 @@ test_that("as_nanoarrow_array() works for blob::blob() -> 
na_large_binary()", {
   )
 })
 
+
+test_that("as_nanoarrow_array() works for list(raw()) -> na_binary()", {
+  # Without nulls
+  array <- as_nanoarrow_array(lapply(letters, charToRaw))
+  expect_identical(infer_nanoarrow_schema(array)$format, "z")
+  expect_identical(as.raw(array$buffers[[1]]), raw())
+  expect_identical(array$offset, 0L)
+  expect_identical(array$null_count, 0L)
+  expect_identical(
+    as.raw(array$buffers[[2]]),
+    as.raw(as_nanoarrow_buffer(0:26))
+  )
+  expect_identical(
+    as.raw(array$buffers[[3]]),
+    as.raw(as_nanoarrow_buffer(paste(letters, collapse = "")))
+  )
+
+  # With nulls
+  array <- as_nanoarrow_array(c(lapply(letters, charToRaw), list(NULL)))
+  expect_identical(infer_nanoarrow_schema(array)$format, "z")
+  expect_identical(array$null_count, 1L)
+  expect_identical(
+    as.raw(array$buffers[[1]]),
+    packBits(c(rep(TRUE, 26), FALSE, rep(FALSE, 5)))
+  )
+  expect_identical(
+    as.raw(array$buffers[[2]]),
+    as.raw(as_nanoarrow_buffer(c(0:26, 26L)))
+  )
+  expect_identical(
+    as.raw(array$buffers[[3]]),
+    as.raw(as_nanoarrow_buffer(paste(letters, collapse = "")))
+  )
+})
+
+test_that("as_nanoarrow_array() works for list(NULL) -> na_list(na_na())", {
+  array <- as_nanoarrow_array(list(NULL))
+  expect_identical(infer_nanoarrow_schema(array)$format, "+l")
+  expect_identical(array$length, 1L)
+  expect_identical(array$null_count, 1L)
+  expect_identical(
+    as.raw(array$buffers[[1]]),
+    as.raw(as_nanoarrow_array(FALSE)$buffers[[2]])
+  )
+  expect_identical(
+    as.raw(array$buffers[[2]]),
+    as.raw(as_nanoarrow_buffer(c(0L, 0L)))
+  )
+  expect_identical(infer_nanoarrow_schema(array$children[[1]])$format, "n")
+  expect_identical(array$children[[1]]$length, 0L)
+})
+
+test_that("as_nanoarrow_array() works for list(integer()) -> 
na_list(na_int32())", {
+  array <- as_nanoarrow_array(list(1:5, 6:10), schema = na_list(na_int32()))
+  expect_identical(infer_nanoarrow_schema(array)$format, "+l")
+  expect_identical(array$length, 2L)
+  expect_identical(array$null_count, 0L)
+  expect_identical(
+    as.raw(array$buffers[[1]]),
+    as.raw(as_nanoarrow_array(c(TRUE, TRUE))$buffers[[2]])
+  )
+  expect_identical(
+    as.raw(array$buffers[[2]]),
+    as.raw(as_nanoarrow_buffer(c(0L, 5L, 10L)))
+  )
+  expect_identical(infer_nanoarrow_schema(array$children[[1]])$format, "i")
+  expect_identical(array$children[[1]]$length, 10L)
+})
+
 test_that("as_nanoarrow_array() works for unspecified() -> na_na()", {
   skip_if_not_installed("vctrs")
 
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index ee2cbd9..6dd86d8 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -100,6 +100,25 @@ test_that("infer_nanoarrow_schema() method works for 
integer64()", {
   expect_identical(infer_nanoarrow_schema(bit64::integer64())$format, "l")
 })
 
+test_that("infer_nanoarrow_schema() method works for AsIs", {
+  expect_identical(
+    infer_nanoarrow_schema(I(integer()))$format,
+    infer_nanoarrow_schema(integer())$format
+  )
+})
+
+test_that("infer_nanoarrow_schema() returns list of null for empty or all null 
list", {
+  expect_identical(infer_nanoarrow_schema(list())$format, "+l")
+  expect_identical(infer_nanoarrow_schema(list())$children[[1]]$format, "n")
+  expect_identical(infer_nanoarrow_schema(list(NULL))$format, "+l")
+  expect_identical(infer_nanoarrow_schema(list())$children[[1]]$format, "n")
+})
+
+test_that("infer_nanoarrow_schema() returns binary for list of raw", {
+  expect_identical(infer_nanoarrow_schema(list(raw()))$format, "z")
+  expect_identical(infer_nanoarrow_schema(list(raw(), NULL))$format, "z")
+})
+
 test_that("nanoarrow_schema_parse() works", {
   simple_info <- nanoarrow_schema_parse(na_int32())
   expect_identical(simple_info$type, "int32")

Reply via email to