This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new b02191f fix(r): Ensure simple `list()`s can be converted without
arrow installed (#344)
b02191f is described below
commit b02191f9746bf16e3022cd083cd7ece13ea341b4
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Jan 2 09:08:03 2024 -0400
fix(r): Ensure simple `list()`s can be converted without arrow installed
(#344)
Closes #343.
---
r/NAMESPACE | 4 +++
r/R/as-array.R | 53 +++++++++++++++++++++++++++++-
r/R/schema.R | 28 ++++++++++++++++
r/man/as_nanoarrow_schema.Rd | 4 +--
r/src/as_array.c | 4 +--
r/tests/testthat/test-as-array.R | 69 ++++++++++++++++++++++++++++++++++++++++
r/tests/testthat/test-schema.R | 19 +++++++++++
7 files changed, 176 insertions(+), 5 deletions(-)
diff --git a/r/NAMESPACE b/r/NAMESPACE
index 8152a6a..05ce598 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -26,10 +26,12 @@ S3method(as_nanoarrow_array,POSIXlt)
S3method(as_nanoarrow_array,RecordBatch)
S3method(as_nanoarrow_array,Table)
S3method(as_nanoarrow_array,blob)
+S3method(as_nanoarrow_array,data.frame)
S3method(as_nanoarrow_array,default)
S3method(as_nanoarrow_array,difftime)
S3method(as_nanoarrow_array,factor)
S3method(as_nanoarrow_array,integer64)
+S3method(as_nanoarrow_array,list)
S3method(as_nanoarrow_array,nanoarrow_array)
S3method(as_nanoarrow_array,nanoarrow_buffer)
S3method(as_nanoarrow_array,vctrs_unspecified)
@@ -59,6 +61,7 @@ S3method(infer_nanoarrow_ptype_extension,default)
S3method(infer_nanoarrow_ptype_extension,nanoarrow_extension_spec_vctrs)
S3method(infer_nanoarrow_schema,Array)
S3method(infer_nanoarrow_schema,ArrowTabular)
+S3method(infer_nanoarrow_schema,AsIs)
S3method(infer_nanoarrow_schema,ChunkedArray)
S3method(infer_nanoarrow_schema,Dataset)
S3method(infer_nanoarrow_schema,Date)
@@ -78,6 +81,7 @@ S3method(infer_nanoarrow_schema,factor)
S3method(infer_nanoarrow_schema,hms)
S3method(infer_nanoarrow_schema,integer)
S3method(infer_nanoarrow_schema,integer64)
+S3method(infer_nanoarrow_schema,list)
S3method(infer_nanoarrow_schema,logical)
S3method(infer_nanoarrow_schema,nanoarrow_array)
S3method(infer_nanoarrow_schema,nanoarrow_array_stream)
diff --git a/r/R/as-array.R b/r/R/as-array.R
index 8a12147..e7285d4 100644
--- a/r/R/as-array.R
+++ b/r/R/as-array.R
@@ -112,7 +112,6 @@ as_nanoarrow_array.integer64 <- function(x, ..., schema =
NULL) {
},
as_nanoarrow_array(as.double(x), schema = schema)
)
-
}
#' @export
@@ -195,6 +194,58 @@ as_nanoarrow_array.blob <- function(x, ..., schema = NULL)
{
as_nanoarrow_array(unclass(x), schema = schema)
}
+#' @export
+as_nanoarrow_array.data.frame <- function(x, ..., schema = NULL) {
+ # We need to override this to prevent the list implementation from handling
it
+ as_nanoarrow_array.default(x, ..., schema = schema)
+}
+
+#' @export
+as_nanoarrow_array.list <- function(x, ..., schema = NULL) {
+ if (is.null(schema)) {
+ schema <- infer_nanoarrow_schema(x)
+ }
+
+ schema <- as_nanoarrow_schema(schema)
+ parsed <- nanoarrow_schema_parse(schema)
+ if (!is.null(parsed$extension_name) || parsed$type != "list") {
+ return(NextMethod())
+ }
+
+ # This R implementation can't handle complex nesting
+ if (startsWith(schema$children[[1]]$format, "+")) {
+ return(NextMethod())
+ }
+
+ array <- nanoarrow_array_init(schema)
+
+ child <- unlist(x, recursive = FALSE, use.names = FALSE)
+ if (is.null(child)) {
+ child_array <- as_nanoarrow_array.vctrs_unspecified(logical(), schema =
na_na())
+ } else {
+ child_array <- as_nanoarrow_array(child, schema = schema$children[[1]])
+ }
+
+ offsets <- c(0L, cumsum(lengths(x)))
+ is_na <- vapply(x, is.null, logical(1))
+ validity <- as_nanoarrow_array(!is_na)$buffers[[2]]
+
+ nanoarrow_array_modify(
+ array,
+ list(
+ length = length(x),
+ null_count = sum(is_na),
+ buffers = list(
+ validity,
+ offsets
+ ),
+ children = list(
+ child_array
+ )
+ )
+ )
+}
+
#' @export
as_nanoarrow_array.Date <- function(x, ..., schema = NULL) {
if (is.null(schema)) {
diff --git a/r/R/schema.R b/r/R/schema.R
index 4088bd4..e8a4b8d 100644
--- a/r/R/schema.R
+++ b/r/R/schema.R
@@ -163,6 +163,34 @@ infer_nanoarrow_schema.vctrs_list_of <- function(x, ...) {
}
}
+#' @export
+infer_nanoarrow_schema.AsIs <- function(x, ...) {
+ # NextMethod() goes directly to `default`
+ class(x) <- class(x)[-1]
+ infer_nanoarrow_schema(x)
+}
+
+#' @export
+infer_nanoarrow_schema.list <- function(x, ...) {
+ # TODO: Move this to C
+ is_null <- vapply(x, is.null, logical(1))
+
+ if (all(is_null)) {
+ return(na_list(na_na()))
+ }
+
+ is_raw <- vapply(x, is.raw, logical(1))
+ if (!all(is_raw | is_null)) {
+ return(NextMethod())
+ }
+
+ if (length(x) > 0 && sum(lengths(x)) > .Machine$integer.max) {
+ na_large_binary()
+ } else {
+ na_binary()
+ }
+}
+
#' @rdname as_nanoarrow_schema
#' @export
nanoarrow_schema_parse <- function(x, recursive = FALSE) {
diff --git a/r/man/as_nanoarrow_schema.Rd b/r/man/as_nanoarrow_schema.Rd
index 20f97f6..db32ff3 100644
--- a/r/man/as_nanoarrow_schema.Rd
+++ b/r/man/as_nanoarrow_schema.Rd
@@ -33,8 +33,8 @@ An object of class 'nanoarrow_schema'
\description{
In nanoarrow a 'schema' refers to a \verb{struct ArrowSchema} as defined in the
Arrow C Data interface. This data structure can be used to represent an
-\code{\link[arrow:Schema]{arrow::schema()}}, an
\code{\link[arrow:Field]{arrow::field()}}, or an \code{arrow::DataType}. Note
that
-in nanoarrow, an \code{\link[arrow:Schema]{arrow::schema()}} and a
non-nullable \code{\link[arrow:data-type]{arrow::struct()}}
+\code{\link[arrow:schema]{arrow::schema()}}, an
\code{\link[arrow:Field]{arrow::field()}}, or an \code{arrow::DataType}. Note
that
+in nanoarrow, an \code{\link[arrow:schema]{arrow::schema()}} and a
non-nullable \code{\link[arrow:data-type]{arrow::struct()}}
are represented identically.
}
\examples{
diff --git a/r/src/as_array.c b/r/src/as_array.c
index afba1f2..a69aa26 100644
--- a/r/src/as_array.c
+++ b/r/src/as_array.c
@@ -422,8 +422,8 @@ static void as_array_data_frame(SEXP x_sexp, struct
ArrowArray* array, SEXP sche
static void as_array_list(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
struct ArrowSchemaView* schema_view, struct
ArrowError* error) {
- // We handle list(raw()) for now but fall back to arrow for vctrs::list_of()
- // Arbitrary nested list support is complicated without some concept of a
+ // We handle list(raw()) in C but fall back to S3 for other types of list
output.
+ // Arbitrary nested list support is complicated in C without some concept of
a
// "builder", which we don't use.
if (schema_view->type != NANOARROW_TYPE_BINARY) {
call_as_nanoarrow_array(x_sexp, array, schema_xptr,
"as_nanoarrow_array_from_c");
diff --git a/r/tests/testthat/test-as-array.R b/r/tests/testthat/test-as-array.R
index 52b144c..fc9417a 100644
--- a/r/tests/testthat/test-as-array.R
+++ b/r/tests/testthat/test-as-array.R
@@ -535,6 +535,75 @@ test_that("as_nanoarrow_array() works for blob::blob() ->
na_large_binary()", {
)
})
+
+test_that("as_nanoarrow_array() works for list(raw()) -> na_binary()", {
+ # Without nulls
+ array <- as_nanoarrow_array(lapply(letters, charToRaw))
+ expect_identical(infer_nanoarrow_schema(array)$format, "z")
+ expect_identical(as.raw(array$buffers[[1]]), raw())
+ expect_identical(array$offset, 0L)
+ expect_identical(array$null_count, 0L)
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(0:26))
+ )
+ expect_identical(
+ as.raw(array$buffers[[3]]),
+ as.raw(as_nanoarrow_buffer(paste(letters, collapse = "")))
+ )
+
+ # With nulls
+ array <- as_nanoarrow_array(c(lapply(letters, charToRaw), list(NULL)))
+ expect_identical(infer_nanoarrow_schema(array)$format, "z")
+ expect_identical(array$null_count, 1L)
+ expect_identical(
+ as.raw(array$buffers[[1]]),
+ packBits(c(rep(TRUE, 26), FALSE, rep(FALSE, 5)))
+ )
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(c(0:26, 26L)))
+ )
+ expect_identical(
+ as.raw(array$buffers[[3]]),
+ as.raw(as_nanoarrow_buffer(paste(letters, collapse = "")))
+ )
+})
+
+test_that("as_nanoarrow_array() works for list(NULL) -> na_list(na_na())", {
+ array <- as_nanoarrow_array(list(NULL))
+ expect_identical(infer_nanoarrow_schema(array)$format, "+l")
+ expect_identical(array$length, 1L)
+ expect_identical(array$null_count, 1L)
+ expect_identical(
+ as.raw(array$buffers[[1]]),
+ as.raw(as_nanoarrow_array(FALSE)$buffers[[2]])
+ )
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(c(0L, 0L)))
+ )
+ expect_identical(infer_nanoarrow_schema(array$children[[1]])$format, "n")
+ expect_identical(array$children[[1]]$length, 0L)
+})
+
+test_that("as_nanoarrow_array() works for list(integer()) ->
na_list(na_int32())", {
+ array <- as_nanoarrow_array(list(1:5, 6:10), schema = na_list(na_int32()))
+ expect_identical(infer_nanoarrow_schema(array)$format, "+l")
+ expect_identical(array$length, 2L)
+ expect_identical(array$null_count, 0L)
+ expect_identical(
+ as.raw(array$buffers[[1]]),
+ as.raw(as_nanoarrow_array(c(TRUE, TRUE))$buffers[[2]])
+ )
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(c(0L, 5L, 10L)))
+ )
+ expect_identical(infer_nanoarrow_schema(array$children[[1]])$format, "i")
+ expect_identical(array$children[[1]]$length, 10L)
+})
+
test_that("as_nanoarrow_array() works for unspecified() -> na_na()", {
skip_if_not_installed("vctrs")
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index ee2cbd9..6dd86d8 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -100,6 +100,25 @@ test_that("infer_nanoarrow_schema() method works for
integer64()", {
expect_identical(infer_nanoarrow_schema(bit64::integer64())$format, "l")
})
+test_that("infer_nanoarrow_schema() method works for AsIs", {
+ expect_identical(
+ infer_nanoarrow_schema(I(integer()))$format,
+ infer_nanoarrow_schema(integer())$format
+ )
+})
+
+test_that("infer_nanoarrow_schema() returns list of null for empty or all null
list", {
+ expect_identical(infer_nanoarrow_schema(list())$format, "+l")
+ expect_identical(infer_nanoarrow_schema(list())$children[[1]]$format, "n")
+ expect_identical(infer_nanoarrow_schema(list(NULL))$format, "+l")
+ expect_identical(infer_nanoarrow_schema(list())$children[[1]]$format, "n")
+})
+
+test_that("infer_nanoarrow_schema() returns binary for list of raw", {
+ expect_identical(infer_nanoarrow_schema(list(raw()))$format, "z")
+ expect_identical(infer_nanoarrow_schema(list(raw(), NULL))$format, "z")
+})
+
test_that("nanoarrow_schema_parse() works", {
simple_info <- nanoarrow_schema_parse(na_int32())
expect_identical(simple_info$type, "int32")