This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 65bb552 feat(r): Implement extension type registration/conversion
(#288)
65bb552 is described below
commit 65bb5520e0bd998032e7b62cf302c62f74067c88
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri Sep 1 12:39:56 2023 -0300
feat(r): Implement extension type registration/conversion (#288)
This PR implements extension type registration, enabling nanoarrow or
other R packages to register conversions to and from R vectors. It also
implements the "vctrs" extension type, which is also implemented in the
Arrow R package: this extension type allows any R vector whose storage
type is supported by nanoarrow to be roundtripped through an Arrow
array. It's also a good test for the extension type mechanism since it
has diverse storage and R vector type requirements.
This required some changes to the conversion system: when calling
`as_nanoarrow_array(x, type = some_extension_type())`, the extension
needs to have the first chance at handling the conversion. When calling
`convert_array(some_extension_array)`, similarly, it needs to have a
chance to perform the conversion. The conversion process in both
directions has become sufficiently complicated that I think it may need
some refactoring; however, I think the intended logic and test coverage
is there in this PR.
``` r
library(nanoarrow)
# Works from Arrow
vctr <- wk::wkt("POINT (0 1)", crs = "OGC:CRS84")
arrow_ext_array <- arrow::vctrs_extension_array(vctr)
(array <- as_nanoarrow_array(arrow_ext_array))
#> <nanoarrow_array arrow.r.vctrs{string}[1]>
#> $ length : int 1
#> $ null_count: int 0
#> $ offset : int 0
#> $ buffers :List of 3
#> ..$ :<nanoarrow_buffer validity<bool>[0][0 b]> ``
#> ..$ :<nanoarrow_buffer data_offset<int32>[2][8 b]> `0 11`
#> ..$ :<nanoarrow_buffer data<string>[11 b]> `POINT (0 1)`
#> $ dictionary: NULL
#> $ children : list()
infer_nanoarrow_ptype(array)
#> <wk_wkt[0] with CRS=OGC:CRS84>
convert_array(array)
#> <wk_wkt[1] with CRS=OGC:CRS84>
#> [1] POINT (0 1)
# Can also create vctrs extension arrays
array <- as_nanoarrow_array(
vctr,
schema = na_vctrs(vctr)
)
array
#> <nanoarrow_array arrow.r.vctrs{string}[1]>
#> $ length : int 1
#> $ null_count: int 0
#> $ offset : int 0
#> $ buffers :List of 3
#> ..$ :<nanoarrow_buffer validity<bool>[0][0 b]> ``
#> ..$ :<nanoarrow_buffer data_offset<int32>[2][8 b]> `0 11`
#> ..$ :<nanoarrow_buffer data<string>[11 b]> `POINT (0 1)`
#> $ dictionary: NULL
#> $ children : list()
infer_nanoarrow_ptype(array)
#> <wk_wkt[0] with CRS=OGC:CRS84>
convert_array(array)
#> <wk_wkt[1] with CRS=OGC:CRS84>
#> [1] POINT (0 1)
```
<sup>Created on 2023-08-29 with [reprex
v2.0.2](https://reprex.tidyverse.org)</sup>
---------
Co-authored-by: Kirill Müller <[email protected]>
---
r/NAMESPACE | 15 +++
r/R/as-array.R | 43 ++++++-
r/R/convert-array.R | 11 +-
r/R/extension-vctrs.R | 104 +++++++++++++++++
r/R/extension.R | 187 +++++++++++++++++++++++++++++++
r/R/infer-ptype.R | 8 +-
r/R/util.R | 15 ++-
r/R/zzz.R | 2 +
r/man/as_nanoarrow_schema.Rd | 4 +-
r/man/infer_nanoarrow_ptype_extension.Rd | 48 ++++++++
r/man/na_vctrs.Rd | 32 ++++++
r/man/nanoarrow_extension_array.Rd | 32 ++++++
r/man/nanoarrow_extension_spec.Rd | 45 ++++++++
r/src/as_array.c | 116 ++++++++-----------
r/src/convert.c | 19 ----
r/src/convert_array.c | 48 ++++++--
r/src/infer_ptype.c | 6 +-
r/src/materialize.c | 127 ++++++++++++++-------
r/tests/testthat/test-convert-array.R | 95 +++++++++++-----
r/tests/testthat/test-extension-vctrs.R | 85 ++++++++++++++
r/tests/testthat/test-extension.R | 116 +++++++++++++++++++
r/tests/testthat/test-util.R | 8 ++
22 files changed, 993 insertions(+), 173 deletions(-)
diff --git a/r/NAMESPACE b/r/NAMESPACE
index aa19def..e563e4e 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -32,6 +32,8 @@ S3method(as_nanoarrow_array,factor)
S3method(as_nanoarrow_array,nanoarrow_array)
S3method(as_nanoarrow_array,nanoarrow_buffer)
S3method(as_nanoarrow_array,vctrs_unspecified)
+S3method(as_nanoarrow_array_extension,default)
+S3method(as_nanoarrow_array_extension,nanoarrow_extension_spec_vctrs)
S3method(as_nanoarrow_array_stream,RecordBatchReader)
S3method(as_nanoarrow_array_stream,data.frame)
S3method(as_nanoarrow_array_stream,default)
@@ -46,10 +48,14 @@ S3method(convert_array,default)
S3method(convert_array,double)
S3method(convert_array,factor)
S3method(convert_array,vctrs_partial_frame)
+S3method(convert_array_extension,default)
+S3method(convert_array_extension,nanoarrow_extension_spec_vctrs)
S3method(format,nanoarrow_array)
S3method(format,nanoarrow_array_stream)
S3method(format,nanoarrow_buffer)
S3method(format,nanoarrow_schema)
+S3method(infer_nanoarrow_ptype_extension,default)
+S3method(infer_nanoarrow_ptype_extension,nanoarrow_extension_spec_vctrs)
S3method(infer_nanoarrow_schema,Array)
S3method(infer_nanoarrow_schema,ArrowTabular)
S3method(infer_nanoarrow_schema,ChunkedArray)
@@ -94,15 +100,18 @@ S3method(str,nanoarrow_buffer)
S3method(str,nanoarrow_schema)
export(array_stream_set_finalizer)
export(as_nanoarrow_array)
+export(as_nanoarrow_array_extension)
export(as_nanoarrow_array_stream)
export(as_nanoarrow_buffer)
export(as_nanoarrow_schema)
export(basic_array_stream)
export(collect_array_stream)
export(convert_array)
+export(convert_array_extension)
export(convert_array_stream)
export(convert_buffer)
export(infer_nanoarrow_ptype)
+export(infer_nanoarrow_ptype_extension)
export(infer_nanoarrow_schema)
export(na_binary)
export(na_bool)
@@ -143,6 +152,7 @@ export(na_uint16)
export(na_uint32)
export(na_uint64)
export(na_uint8)
+export(na_vctrs)
export(nanoarrow_allocate_array)
export(nanoarrow_allocate_array_stream)
export(nanoarrow_allocate_schema)
@@ -151,6 +161,8 @@ export(nanoarrow_array_modify)
export(nanoarrow_array_set_schema)
export(nanoarrow_buffer_append)
export(nanoarrow_buffer_init)
+export(nanoarrow_extension_array)
+export(nanoarrow_extension_spec)
export(nanoarrow_pointer_addr_chr)
export(nanoarrow_pointer_addr_dbl)
export(nanoarrow_pointer_addr_pretty)
@@ -162,6 +174,9 @@ export(nanoarrow_pointer_set_protected)
export(nanoarrow_schema_modify)
export(nanoarrow_schema_parse)
export(nanoarrow_version)
+export(register_nanoarrow_extension)
+export(resolve_nanoarrow_extension)
+export(unregister_nanoarrow_extension)
importFrom(utils,getFromNamespace)
importFrom(utils,str)
useDynLib(nanoarrow, .registration = TRUE)
diff --git a/r/R/as-array.R b/r/R/as-array.R
index 68ed939..b26cad8 100644
--- a/r/R/as-array.R
+++ b/r/R/as-array.R
@@ -21,6 +21,14 @@ as_nanoarrow_array.default <- function(x, ..., schema =
NULL, .from_c = FALSE) {
# and no suitable S3 method was found or the x--schema combination is not
# implemented in nanoarrow. Try arrow::as_arrow_array().
if (.from_c) {
+ # Give extension types a chance to handle conversion
+ parsed <- .Call(nanoarrow_c_schema_parse, schema)
+
+ if (!is.null(parsed$extension_name)) {
+ spec <- resolve_nanoarrow_extension(parsed$extension_name)
+ return(as_nanoarrow_array_extension(spec, x, ..., schema = schema))
+ }
+
assert_arrow_installed(
sprintf(
"create %s array from object of type %s",
@@ -73,6 +81,11 @@ as_nanoarrow_array.POSIXct <- function(x, ..., schema =
NULL) {
}
parsed <- nanoarrow_schema_parse(schema)
+ if (!is.null(parsed$extension_name)) {
+ spec <- resolve_nanoarrow_extension(parsed$extension_name)
+ return(as_nanoarrow_array_extension(spec, x, ..., schema = schema))
+ }
+
switch(
parsed$type,
timestamp = ,
@@ -97,6 +110,11 @@ as_nanoarrow_array.difftime <- function(x, ..., schema =
NULL) {
}
parsed <- nanoarrow_schema_parse(schema)
+ if (!is.null(parsed$extension_name)) {
+ spec <- resolve_nanoarrow_extension(parsed$extension_name)
+ return(as_nanoarrow_array_extension(spec, x, ..., schema = schema))
+ }
+
src_unit <- attr(x, "units")
switch(
parsed$type,
@@ -141,6 +159,11 @@ as_nanoarrow_array.Date <- function(x, ..., schema = NULL)
{
}
parsed <- nanoarrow_schema_parse(schema)
+ if (!is.null(parsed$extension_name)) {
+ spec <- resolve_nanoarrow_extension(parsed$extension_name)
+ return(as_nanoarrow_array_extension(spec, x, ..., schema = schema))
+ }
+
switch(
parsed$type,
date32 = {
@@ -169,6 +192,12 @@ as_nanoarrow_array.POSIXlt <- function(x, ..., schema =
NULL) {
schema <- infer_nanoarrow_schema(x)
}
+ parsed <- nanoarrow_schema_parse(schema)
+ if (!is.null(parsed$extension_name)) {
+ spec <- resolve_nanoarrow_extension(parsed$extension_name)
+ return(as_nanoarrow_array_extension(spec, x, ..., schema = schema))
+ }
+
as_nanoarrow_array(new_data_frame(x, length(x)), schema = schema)
}
@@ -178,6 +207,12 @@ as_nanoarrow_array.factor <- function(x, ..., schema =
NULL) {
schema <- infer_nanoarrow_schema(x)
}
+ parsed <- nanoarrow_schema_parse(schema)
+ if (!is.null(parsed$extension_name)) {
+ spec <- resolve_nanoarrow_extension(parsed$extension_name)
+ return(as_nanoarrow_array_extension(spec, x, ..., schema = schema))
+ }
+
if (is.null(schema$dictionary)) {
return(as_nanoarrow_array(as.character(x), schema = schema))
}
@@ -198,8 +233,14 @@ as_nanoarrow_array.vctrs_unspecified <- function(x, ...,
schema = NULL) {
schema <- as_nanoarrow_schema(schema)
}
+ parsed <- nanoarrow_schema_parse(schema)
+ if (!is.null(parsed$extension_name)) {
+ spec <- resolve_nanoarrow_extension(parsed$extension_name)
+ return(as_nanoarrow_array_extension(spec, x, ..., schema = schema))
+ }
+
switch(
- nanoarrow_schema_parse(schema)$storage_type,
+ parsed$storage_type,
na = {
array <- nanoarrow_array_init(schema)
array$length <- length(x)
diff --git a/r/R/convert-array.R b/r/R/convert-array.R
index dcf5715..7077c73 100644
--- a/r/R/convert-array.R
+++ b/r/R/convert-array.R
@@ -85,6 +85,15 @@ convert_array <- function(array, to = NULL, ...) {
#' @export
convert_array.default <- function(array, to = NULL, ..., .from_c = FALSE) {
if (.from_c) {
+ # Handle extension conversion
+ # We don't need the user-friendly versions and this is
performance-sensitive
+ schema <- .Call(nanoarrow_c_infer_schema_array, array)
+ parsed <- .Call(nanoarrow_c_schema_parse, schema)
+ if (!is.null(parsed$extension_name)) {
+ spec <- resolve_nanoarrow_extension(parsed$extension_name)
+ return(convert_array_extension(spec, array, to, ...))
+ }
+
# Handle default dictionary conversion since it's the same for all types
dictionary <- array$dictionary
@@ -92,7 +101,7 @@ convert_array.default <- function(array, to = NULL, ...,
.from_c = FALSE) {
values <- .Call(nanoarrow_c_convert_array, dictionary, to)
array$dictionary <- NULL
indices <- .Call(nanoarrow_c_convert_array, array, integer())
- return(values[indices + 1L])
+ return(vec_slice2(values, indices + 1L))
}
stop_cant_convert_array(array, to)
diff --git a/r/R/extension-vctrs.R b/r/R/extension-vctrs.R
new file mode 100644
index 0000000..bf910f3
--- /dev/null
+++ b/r/R/extension-vctrs.R
@@ -0,0 +1,104 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Vctrs extension type
+#'
+#' The Arrow format provides a rich type system that can handle most R
+#' vector types; however, many R vector types do not roundtrip perfectly
+#' through Arrow memory. The vctrs extension type uses [vctrs::vec_data()],
+#' [vctrs::vec_restore()], and [vctrs::vec_ptype()] in calls to
+#' [as_nanoarrow_array()] and [convert_array()] to ensure roundtrip fidelity.
+#'
+#' @param ptype A vctrs prototype as returned by [vctrs::vec_ptype()].
+#' The prototype can be of arbitrary size, but a zero-size vector
+#' is sufficient here.
+#' @inheritParams na_type
+#'
+#' @return A [nanoarrow_schema][as_nanoarrow_schema].
+#' @export
+#'
+#' @examples
+#' vctr <- as.POSIXlt("2000-01-02 03:45", tz = "UTC")
+#' array <- as_nanoarrow_array(vctr, schema = na_vctrs(vctr))
+#' infer_nanoarrow_ptype(array)
+#' convert_array(array)
+#'
+na_vctrs <- function(ptype, storage_type = NULL) {
+ ptype <- vctrs::vec_ptype(ptype)
+
+ if (is.null(storage_type)) {
+ storage_type <- infer_nanoarrow_schema(vctrs::vec_data(ptype))
+ }
+
+ # Note: a potential replacement for this is the JSON generated by the cereal
+ # package; however, as of this writing that JSON doesn't handle arbitrary
nesting.
+ # The arrow package currently uses the non-ASCII version; however, it
generally
+ # makes life easier if the metadata is valid UTF-8. The deserializer works
with
+ # either.
+ na_extension(storage_type, "arrow.r.vctrs", serialize(ptype, NULL, ascii =
TRUE))
+}
+
+register_vctrs_extension <- function() {
+ register_nanoarrow_extension(
+ "arrow.r.vctrs",
+ nanoarrow_extension_spec(subclass = "nanoarrow_extension_spec_vctrs")
+ )
+}
+
+#' @export
+infer_nanoarrow_ptype_extension.nanoarrow_extension_spec_vctrs <-
function(extension_spec, x, ...) {
+ parsed <- .Call(nanoarrow_c_schema_parse, x)
+ unserialize(parsed$extension_metadata)
+}
+
+#' @export
+convert_array_extension.nanoarrow_extension_spec_vctrs <-
function(extension_spec,
+ array, to,
+ ...) {
+ # Restore the vector data to the ptype that is serialized in the type
metadata
+ to_r_data <- infer_nanoarrow_ptype(array)
+ to_data <- vctrs::vec_data(to_r_data)
+ data <- convert_array_extension(NULL, array, to_data, warn_unregistered =
FALSE)
+ vctr <- vctrs::vec_restore(data, to_r_data)
+
+ # Cast to `to` if a different ptype was requested
+ if (!is.null(to)) {
+ vctrs::vec_cast(vctr, to)
+ } else {
+ vctr
+ }
+}
+
+#' @export
+as_nanoarrow_array_extension.nanoarrow_extension_spec_vctrs <- function(
+ extension_spec, x, ...,
+ schema = NULL) {
+ storage_schema <- schema
+ storage_schema$metadata[["ARROW:extension:name"]] <- NULL
+ storage_schema$metadata[["ARROW:extension:metadata"]] <- NULL
+
+ storage_array <- as_nanoarrow_array(
+ vctrs::vec_data(x),
+ schema = storage_schema
+ )
+
+ nanoarrow_extension_array(
+ storage_array,
+ "arrow.r.vctrs",
+ schema$metadata[["ARROW:extension:metadata"]]
+ )
+}
diff --git a/r/R/extension.R b/r/R/extension.R
new file mode 100644
index 0000000..46fa03b
--- /dev/null
+++ b/r/R/extension.R
@@ -0,0 +1,187 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' Register Arrow extension types
+#'
+#' @param extension_name An Arrow extension type name (e.g., arrow.r.vctrs)
+#' @param extension_spec An extension specification inheriting from
+#' 'nanoarrow_extension_spec'.
+#' @param data Optional data to include in the extension type specification
+#' @param subclass A subclass for the extension type specification. Extension
+#' methods will dispatch on this object.
+#'
+#' @return
+#' - `nanoarrow_extension_spec()` returns an object of class
+#' 'nanoarrow_extension_spec'.
+#' - `register_nanoarrow_extension()` returns `extension_spec`, invisibly.
+#' - `unregister_nanoarrow_extension()` returns `extension_name`, invisibly.
+#' - `resolve_nanoarrow_extension()` returns an object of class
+#' 'nanoarrow_extension_spec' or NULL if the extension type was not
+#' registered.
+#' @export
+#'
+#' @examples
+#' nanoarrow_extension_spec("mynamespace.mytype", subclass =
"mypackage_mytype_spec")
+nanoarrow_extension_spec <- function(data = list(), subclass = character()) {
+ structure(
+ data,
+ class = union(subclass, "nanoarrow_extension_spec")
+ )
+}
+
+#' @rdname nanoarrow_extension_spec
+#' @export
+register_nanoarrow_extension <- function(extension_name, extension_spec) {
+ extension_registry[[extension_name]] <- extension_spec
+ invisible(extension_name)
+}
+
+#' @rdname nanoarrow_extension_spec
+#' @export
+unregister_nanoarrow_extension <- function(extension_name) {
+ extension_registry[[extension_name]] <- NULL
+ invisible(extension_name)
+}
+
+#' @rdname nanoarrow_extension_spec
+#' @export
+resolve_nanoarrow_extension <- function(extension_name) {
+ extension_registry[[extension_name]]
+}
+
+
+#' Implement Arrow extension types
+#'
+#' @inheritParams nanoarrow_extension_spec
+#' @param warn_unregistered Use `FALSE` to infer/convert based on the storage
+#' type without a warning.
+#' @param x,array,to,schema,... Passed from [infer_nanoarrow_ptype()],
+#' [convert_array()], [as_nanoarrow_array()], and/or
+#' [as_nanoarrow_array_stream()].
+#'
+#' @return
+#' - `infer_nanoarrow_ptype_extension()`: The R vector prototype to be used
+#' as the default conversion target.
+#' - `convert_array_extension()`: An R vector of type `to`.
+#' - `as_nanoarrow_array_extension()`: A
[nanoarrow_array][as_nanoarrow_array]
+#' of type `schema`.
+#' @export
+#'
+infer_nanoarrow_ptype_extension <- function(extension_spec, x, ...,
+ warn_unregistered = TRUE) {
+ UseMethod("infer_nanoarrow_ptype_extension")
+}
+
+#' @rdname infer_nanoarrow_ptype_extension
+#' @export
+convert_array_extension <- function(extension_spec, array, to, ...,
+ warn_unregistered = TRUE) {
+ UseMethod("convert_array_extension")
+}
+
+#' @rdname infer_nanoarrow_ptype_extension
+#' @export
+as_nanoarrow_array_extension <- function(extension_spec, x, ..., schema =
NULL) {
+ UseMethod("as_nanoarrow_array_extension")
+}
+
+#' @export
+infer_nanoarrow_ptype_extension.default <- function(extension_spec, x, ...,
+ warn_unregistered = TRUE) {
+ if (warn_unregistered) {
+ warn_unregistered_extension_type(x)
+ }
+
+ x$metadata[["ARROW:extension:name"]] <- NULL
+ infer_nanoarrow_ptype(x)
+}
+
+#' @export
+convert_array_extension.default <- function(extension_spec, array, to,
+ ...,
+ warn_unregistered = TRUE) {
+ storage <- .Call(nanoarrow_c_infer_schema_array, array)
+
+ if (warn_unregistered) {
+ warn_unregistered_extension_type(storage)
+ }
+
+ storage$metadata[["ARROW:extension:name"]] <- NULL
+
+ array <- array_shallow_copy(array, validate = FALSE)
+ nanoarrow_array_set_schema(array, storage)
+ convert_array(array, to, ...)
+}
+
+#' @export
+as_nanoarrow_array_extension.default <- function(extension_spec, x, ...,
+ schema = NULL) {
+ stop(
+ sprintf(
+ "as_nanoarrow_array_extension() not implemented for extension %s",
+ nanoarrow_schema_formatted(schema)
+ )
+ )
+}
+
+#' Create Arrow extension arrays
+#'
+#' @param storage_array A [nanoarrow_array][as_nanoarrow_array].
+#' @inheritParams na_type
+#'
+#' @return A [nanoarrow_array][as_nanoarrow_array] with attached extension
+#' schema.
+#' @export
+#'
+#' @examples
+#' nanoarrow_extension_array(1:10, "some_ext", '{"key": "value"}')
+#'
+nanoarrow_extension_array <- function(storage_array, extension_name,
+ extension_metadata = NULL) {
+ storage_array <- as_nanoarrow_array(storage_array)
+
+ schema <- .Call(nanoarrow_c_infer_schema_array, storage_array)
+ schema$metadata[["ARROW:extension:name"]] <- extension_name
+ schema$metadata[["ARROW:extension:metadata"]] <- extension_metadata
+
+ shallow_copy <- array_shallow_copy(storage_array)
+ nanoarrow_array_set_schema(shallow_copy, schema)
+ shallow_copy
+}
+
+warn_unregistered_extension_type <- function(x) {
+ # Warn that we're about to ignore an extension type
+ if (!is.null(x$name) && !identical(x$name, "")) {
+ warning(
+ sprintf(
+ "%s: Converting unknown extension %s as storage type",
+ x$name,
+ nanoarrow_schema_formatted(x)
+ )
+ )
+ } else {
+ warning(
+ sprintf(
+ "Converting unknown extension %s as storage type",
+ nanoarrow_schema_formatted(x)
+ )
+ )
+ }
+}
+
+# Mutable registry to look up extension specifications
+extension_registry <- new.env(parent = emptyenv())
diff --git a/r/R/infer-ptype.R b/r/R/infer-ptype.R
index 3c38e2f..ce3c716 100644
--- a/r/R/infer-ptype.R
+++ b/r/R/infer-ptype.R
@@ -63,9 +63,15 @@ infer_nanoarrow_ptype <- function(x) {
# have been tried. Some of these inferences could be moved to C to be faster
# (but are much less verbose to create here)
infer_ptype_other <- function(schema) {
- # we don't need the user-friendly versions and this is performance-sensitive
+ # We don't need the user-friendly versions and this is performance-sensitive
parsed <- .Call(nanoarrow_c_schema_parse, schema)
+ # Give registered extension types a chance to resolve the ptype
+ if (!is.null(parsed$extension_name)) {
+ spec <- resolve_nanoarrow_extension(parsed$extension_name)
+ return(infer_nanoarrow_ptype_extension(spec, schema))
+ }
+
switch(
parsed$type,
"na" = vctrs::unspecified(),
diff --git a/r/R/util.R b/r/R/util.R
index d09a2c9..260f34a 100644
--- a/r/R/util.R
+++ b/r/R/util.R
@@ -72,6 +72,15 @@ current_stack_trace_chr <- function() {
paste0(utils::capture.output(print(tb)), collapse = "\n")
}
+# Consolidate places we should call vctrs::vec_slice()
+# if/when a vctrs dependency is added
+vec_slice2 <- function(x, i) {
+ if (is.data.frame(x)) {
+ x[i, , drop = FALSE]
+ } else {
+ x[i]
+ }
+}
`%||%` <- function(rhs, lhs) {
if (is.null(rhs)) lhs else rhs
@@ -113,8 +122,10 @@ vec_gen <- function(ptype, n = 1e3, prop_true = 0.5,
prop_na = 0,
vec_shuffle <- function(x) {
if (is.data.frame(x)) {
- x[sample(seq_len(nrow(x)), replace = FALSE), , drop = FALSE]
+ i <- sample(seq_len(nrow(x)), replace = FALSE)
} else {
- x[sample(seq_along(x), replace = FALSE)]
+ i <- sample(seq_along(x), replace = FALSE)
}
+
+ vec_slice2(x, i)
}
diff --git a/r/R/zzz.R b/r/R/zzz.R
index e2dd492..5999215 100644
--- a/r/R/zzz.R
+++ b/r/R/zzz.R
@@ -17,6 +17,8 @@
# nocov start
.onLoad <- function(...) {
+ register_vctrs_extension()
+
s3_register("arrow::infer_type", "nanoarrow_array")
s3_register("arrow::as_data_type", "nanoarrow_schema")
s3_register("arrow::as_schema", "nanoarrow_schema")
diff --git a/r/man/as_nanoarrow_schema.Rd b/r/man/as_nanoarrow_schema.Rd
index 20f97f6..db32ff3 100644
--- a/r/man/as_nanoarrow_schema.Rd
+++ b/r/man/as_nanoarrow_schema.Rd
@@ -33,8 +33,8 @@ An object of class 'nanoarrow_schema'
\description{
In nanoarrow a 'schema' refers to a \verb{struct ArrowSchema} as defined in the
Arrow C Data interface. This data structure can be used to represent an
-\code{\link[arrow:Schema]{arrow::schema()}}, an
\code{\link[arrow:Field]{arrow::field()}}, or an \code{arrow::DataType}. Note
that
-in nanoarrow, an \code{\link[arrow:Schema]{arrow::schema()}} and a
non-nullable \code{\link[arrow:data-type]{arrow::struct()}}
+\code{\link[arrow:schema]{arrow::schema()}}, an
\code{\link[arrow:Field]{arrow::field()}}, or an \code{arrow::DataType}. Note
that
+in nanoarrow, an \code{\link[arrow:schema]{arrow::schema()}} and a
non-nullable \code{\link[arrow:data-type]{arrow::struct()}}
are represented identically.
}
\examples{
diff --git a/r/man/infer_nanoarrow_ptype_extension.Rd
b/r/man/infer_nanoarrow_ptype_extension.Rd
new file mode 100644
index 0000000..83bbc16
--- /dev/null
+++ b/r/man/infer_nanoarrow_ptype_extension.Rd
@@ -0,0 +1,48 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/extension.R
+\name{infer_nanoarrow_ptype_extension}
+\alias{infer_nanoarrow_ptype_extension}
+\alias{convert_array_extension}
+\alias{as_nanoarrow_array_extension}
+\title{Implement Arrow extension types}
+\usage{
+infer_nanoarrow_ptype_extension(
+ extension_spec,
+ x,
+ ...,
+ warn_unregistered = TRUE
+)
+
+convert_array_extension(
+ extension_spec,
+ array,
+ to,
+ ...,
+ warn_unregistered = TRUE
+)
+
+as_nanoarrow_array_extension(extension_spec, x, ..., schema = NULL)
+}
+\arguments{
+\item{extension_spec}{An extension specification inheriting from
+'nanoarrow_extension_spec'.}
+
+\item{x, array, to, schema, ...}{Passed from
\code{\link[=infer_nanoarrow_ptype]{infer_nanoarrow_ptype()}},
+\code{\link[=convert_array]{convert_array()}},
\code{\link[=as_nanoarrow_array]{as_nanoarrow_array()}}, and/or
+\code{\link[=as_nanoarrow_array_stream]{as_nanoarrow_array_stream()}}.}
+
+\item{warn_unregistered}{Use \code{FALSE} to infer/convert based on the storage
+type without a warning.}
+}
+\value{
+\itemize{
+\item \code{infer_nanoarrow_ptype_extension()}: The R vector prototype to be
used
+as the default conversion target.
+\item \code{convert_array_extension()}: An R vector of type \code{to}.
+\item \code{as_nanoarrow_array_extension()}: A
\link[=as_nanoarrow_array]{nanoarrow_array}
+of type \code{schema}.
+}
+}
+\description{
+Implement Arrow extension types
+}
diff --git a/r/man/na_vctrs.Rd b/r/man/na_vctrs.Rd
new file mode 100644
index 0000000..f1fbd24
--- /dev/null
+++ b/r/man/na_vctrs.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/extension-vctrs.R
+\name{na_vctrs}
+\alias{na_vctrs}
+\title{Vctrs extension type}
+\usage{
+na_vctrs(ptype, storage_type = NULL)
+}
+\arguments{
+\item{ptype}{A vctrs prototype as returned by
\code{\link[vctrs:vec_ptype]{vctrs::vec_ptype()}}.
+The prototype can be of arbitrary size, but a zero-size vector
+is sufficient here.}
+
+\item{storage_type}{For \code{\link[=na_extension]{na_extension()}}, the
underlying value type.}
+}
+\value{
+A \link[=as_nanoarrow_schema]{nanoarrow_schema}.
+}
+\description{
+The Arrow format provides a rich type system that can handle most R
+vector types; however, many R vector types do not roundtrip perfectly
+through Arrow memory. The vctrs extension type uses
\code{\link[vctrs:vec_data]{vctrs::vec_data()}},
+\code{\link[vctrs:vec_proxy]{vctrs::vec_restore()}}, and
\code{\link[vctrs:vec_ptype]{vctrs::vec_ptype()}} in calls to
+\code{\link[=as_nanoarrow_array]{as_nanoarrow_array()}} and
\code{\link[=convert_array]{convert_array()}} to ensure roundtrip fidelity.
+}
+\examples{
+vctr <- as.POSIXlt("2000-01-02 03:45", tz = "UTC")
+array <- as_nanoarrow_array(vctr, schema = na_vctrs(vctr))
+infer_nanoarrow_ptype(array)
+convert_array(array)
+
+}
diff --git a/r/man/nanoarrow_extension_array.Rd
b/r/man/nanoarrow_extension_array.Rd
new file mode 100644
index 0000000..18a742d
--- /dev/null
+++ b/r/man/nanoarrow_extension_array.Rd
@@ -0,0 +1,32 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/extension.R
+\name{nanoarrow_extension_array}
+\alias{nanoarrow_extension_array}
+\title{Create Arrow extension arrays}
+\usage{
+nanoarrow_extension_array(
+ storage_array,
+ extension_name,
+ extension_metadata = NULL
+)
+}
+\arguments{
+\item{storage_array}{A \link[=as_nanoarrow_array]{nanoarrow_array}.}
+
+\item{extension_name}{For \code{\link[=na_extension]{na_extension()}}, the
extension name. This is
+typically namespaced separated by dots (e.g., arrow.r.vctrs).}
+
+\item{extension_metadata}{A string or raw vector defining extension metadata.
+Most Arrow extension types define extension metadata as a JSON object.}
+}
+\value{
+A \link[=as_nanoarrow_array]{nanoarrow_array} with attached extension
+schema.
+}
+\description{
+Create Arrow extension arrays
+}
+\examples{
+nanoarrow_extension_array(1:10, "some_ext", '{"key": "value"}')
+
+}
diff --git a/r/man/nanoarrow_extension_spec.Rd
b/r/man/nanoarrow_extension_spec.Rd
new file mode 100644
index 0000000..cd69fa0
--- /dev/null
+++ b/r/man/nanoarrow_extension_spec.Rd
@@ -0,0 +1,45 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/extension.R
+\name{nanoarrow_extension_spec}
+\alias{nanoarrow_extension_spec}
+\alias{register_nanoarrow_extension}
+\alias{unregister_nanoarrow_extension}
+\alias{resolve_nanoarrow_extension}
+\title{Register Arrow extension types}
+\usage{
+nanoarrow_extension_spec(data = list(), subclass = character())
+
+register_nanoarrow_extension(extension_name, extension_spec)
+
+unregister_nanoarrow_extension(extension_name)
+
+resolve_nanoarrow_extension(extension_name)
+}
+\arguments{
+\item{data}{Optional data to include in the extension type specification}
+
+\item{subclass}{A subclass for the extension type specification. Extension
+methods will dispatch on this object.}
+
+\item{extension_name}{An Arrow extension type name (e.g., arrow.r.vctrs)}
+
+\item{extension_spec}{An extension specification inheriting from
+'nanoarrow_extension_spec'.}
+}
+\value{
+\itemize{
+\item \code{nanoarrow_extension_spec()} returns an object of class
+'nanoarrow_extension_spec'.
+\item \code{register_nanoarrow_extension()} returns \code{extension_spec},
invisibly.
+\item \code{unregister_nanoarrow_extension()} returns \code{extension_name},
invisibly.
+\item \code{resolve_nanoarrow_extension()} returns an object of class
+'nanoarrow_extension_spec' or NULL if the extension type was not
+registered.
+}
+}
+\description{
+Register Arrow extension types
+}
+\examples{
+nanoarrow_extension_spec("mynamespace.mytype", subclass =
"mypackage_mytype_spec")
+}
diff --git a/r/src/as_array.c b/r/src/as_array.c
index adc3ef7..5098e23 100644
--- a/r/src/as_array.c
+++ b/r/src/as_array.c
@@ -47,16 +47,9 @@ static void call_as_nanoarrow_array(SEXP x_sexp, struct
ArrowArray* array,
}
static void as_array_int(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
- struct ArrowError* error) {
- struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
- struct ArrowSchemaView schema_view;
- int result = ArrowSchemaViewInit(&schema_view, schema, error);
- if (result != NANOARROW_OK) {
- Rf_error("ArrowSchemaViewInit(): %s", error->message);
- }
-
+ struct ArrowSchemaView* schema_view, struct
ArrowError* error) {
// Only consider the default create for now
- if (schema_view.type != NANOARROW_TYPE_INT32) {
+ if (schema_view->type != NANOARROW_TYPE_INT32) {
call_as_nanoarrow_array(x_sexp, array, schema_xptr,
"as_nanoarrow_array_from_c");
return;
}
@@ -67,7 +60,7 @@ static void as_array_int(SEXP x_sexp, struct ArrowArray*
array, SEXP schema_xptr
int* x_data = INTEGER(x_sexp);
int64_t len = Rf_xlength(x_sexp);
- result = ArrowArrayInitFromType(array, NANOARROW_TYPE_INT32);
+ int result = ArrowArrayInitFromType(array, NANOARROW_TYPE_INT32);
if (result != NANOARROW_OK) {
Rf_error("ArrowArrayInitFromType() failed");
}
@@ -116,22 +109,15 @@ static void as_array_int(SEXP x_sexp, struct ArrowArray*
array, SEXP schema_xptr
}
static void as_array_lgl(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
- struct ArrowError* error) {
- struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
- struct ArrowSchemaView schema_view;
- int result = ArrowSchemaViewInit(&schema_view, schema, error);
- if (result != NANOARROW_OK) {
- Rf_error("ArrowSchemaViewInit(): %s", error->message);
- }
-
+ struct ArrowSchemaView* schema_view, struct
ArrowError* error) {
// We can zero-copy convert to int32
- if (schema_view.type == NANOARROW_TYPE_INT32) {
- as_array_int(x_sexp, array, schema_xptr, error);
+ if (schema_view->type == NANOARROW_TYPE_INT32) {
+ as_array_int(x_sexp, array, schema_xptr, schema_view, error);
return;
}
// Only consider bool for now
- if (schema_view.type != NANOARROW_TYPE_BOOL) {
+ if (schema_view->type != NANOARROW_TYPE_BOOL) {
call_as_nanoarrow_array(x_sexp, array, schema_xptr,
"as_nanoarrow_array_from_c");
return;
}
@@ -139,7 +125,7 @@ static void as_array_lgl(SEXP x_sexp, struct ArrowArray*
array, SEXP schema_xptr
int* x_data = INTEGER(x_sexp);
int64_t len = Rf_xlength(x_sexp);
- result = ArrowArrayInitFromType(array, NANOARROW_TYPE_BOOL);
+ int result = ArrowArrayInitFromType(array, NANOARROW_TYPE_BOOL);
if (result != NANOARROW_OK) {
Rf_error("ArrowArrayInitFromType() failed");
}
@@ -194,17 +180,10 @@ static void as_array_lgl(SEXP x_sexp, struct ArrowArray*
array, SEXP schema_xptr
}
static void as_array_dbl(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
- struct ArrowError* error) {
- struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
- struct ArrowSchemaView schema_view;
- int result = ArrowSchemaViewInit(&schema_view, schema, error);
- if (result != NANOARROW_OK) {
- Rf_error("ArrowSchemaViewInit(): %s", error->message);
- }
-
+ struct ArrowSchemaView* schema_view, struct
ArrowError* error) {
// Consider double -> na_double() and double -> na_int64()/na_int32()
// (mostly so that we can support date/time types with various units)
- switch (schema_view.type) {
+ switch (schema_view->type) {
case NANOARROW_TYPE_DOUBLE:
case NANOARROW_TYPE_INT64:
case NANOARROW_TYPE_INT32:
@@ -217,16 +196,16 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray*
array, SEXP schema_xptr
double* x_data = REAL(x_sexp);
int64_t len = Rf_xlength(x_sexp);
- result = ArrowArrayInitFromType(array, schema_view.type);
+ int result = ArrowArrayInitFromType(array, schema_view->type);
if (result != NANOARROW_OK) {
Rf_error("ArrowArrayInitFromType() failed");
}
- if (schema_view.type == NANOARROW_TYPE_DOUBLE) {
+ if (schema_view->type == NANOARROW_TYPE_DOUBLE) {
// Just borrow the data buffer (zero-copy)
buffer_borrowed(ArrowArrayBuffer(array, 1), x_data, len * sizeof(double),
x_sexp);
- } else if (schema_view.type == NANOARROW_TYPE_INT64) {
+ } else if (schema_view->type == NANOARROW_TYPE_INT64) {
// double -> int64_t
struct ArrowBuffer* buffer = ArrowArrayBuffer(array, 1);
result = ArrowBufferReserve(buffer, len * sizeof(int64_t));
@@ -319,23 +298,16 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray*
array, SEXP schema_xptr
}
static void as_array_chr(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
- struct ArrowError* error) {
- struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
- struct ArrowSchemaView schema_view;
- int result = ArrowSchemaViewInit(&schema_view, schema, error);
- if (result != NANOARROW_OK) {
- Rf_error("ArrowSchemaViewInit(): %s", error->message);
- }
-
+ struct ArrowSchemaView* schema_view, struct
ArrowError* error) {
// Only consider the default create for now
- if (schema_view.type != NANOARROW_TYPE_STRING) {
+ if (schema_view->type != NANOARROW_TYPE_STRING) {
call_as_nanoarrow_array(x_sexp, array, schema_xptr,
"as_nanoarrow_array_from_c");
return;
}
int64_t len = Rf_xlength(x_sexp);
- result = ArrowArrayInitFromType(array, NANOARROW_TYPE_STRING);
+ int result = ArrowArrayInitFromType(array, NANOARROW_TYPE_STRING);
if (result != NANOARROW_OK) {
Rf_error("ArrowArrayInitFromType() failed");
}
@@ -407,15 +379,11 @@ static void as_array_default(SEXP x_sexp, struct
ArrowArray* array, SEXP schema_
struct ArrowError* error);
static void as_array_data_frame(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
+ struct ArrowSchemaView* schema_view,
struct ArrowError* error) {
struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
- struct ArrowSchemaView schema_view;
- int result = ArrowSchemaViewInit(&schema_view, schema, error);
- if (result != NANOARROW_OK) {
- Rf_error("ArrowSchemaViewInit(): %s", error->message);
- }
- switch (schema_view.type) {
+ switch (schema_view->type) {
case NANOARROW_TYPE_SPARSE_UNION:
case NANOARROW_TYPE_DENSE_UNION:
call_as_nanoarrow_array(x_sexp, array, schema_xptr,
"union_array_from_data_frame");
@@ -432,7 +400,7 @@ static void as_array_data_frame(SEXP x_sexp, struct
ArrowArray* array, SEXP sche
(long)schema->n_children);
}
- result = ArrowArrayInitFromType(array, NANOARROW_TYPE_STRUCT);
+ int result = ArrowArrayInitFromType(array, NANOARROW_TYPE_STRUCT);
if (result != NANOARROW_OK) {
Rf_error("ArrowArrayInitFromType() failed");
}
@@ -454,23 +422,16 @@ static void as_array_data_frame(SEXP x_sexp, struct
ArrowArray* array, SEXP sche
}
static void as_array_list(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
- struct ArrowError* error) {
- struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
- struct ArrowSchemaView schema_view;
- int result = ArrowSchemaViewInit(&schema_view, schema, error);
- if (result != NANOARROW_OK) {
- Rf_error("ArrowSchemaViewInit(): %s", error->message);
- }
-
+ struct ArrowSchemaView* schema_view, struct
ArrowError* error) {
// We handle list(raw()) for now but fall back to arrow for vctrs::list_of()
// Arbitrary nested list support is complicated without some concept of a
// "builder", which we don't use.
- if (schema_view.type != NANOARROW_TYPE_BINARY) {
+ if (schema_view->type != NANOARROW_TYPE_BINARY) {
call_as_nanoarrow_array(x_sexp, array, schema_xptr,
"as_nanoarrow_array_from_c");
return;
}
- result = ArrowArrayInitFromType(array, schema_view.type);
+ int result = ArrowArrayInitFromType(array, schema_view->type);
if (result != NANOARROW_OK) {
Rf_error("ArrowArrayInitFromType() failed");
}
@@ -544,9 +505,23 @@ static void as_array_list(SEXP x_sexp, struct ArrowArray*
array, SEXP schema_xpt
static void as_array_default(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
struct ArrowError* error) {
+ struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
+
+ struct ArrowSchemaView schema_view;
+ int result = ArrowSchemaViewInit(&schema_view, schema, error);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowSchemaViewInit(): %s", error->message);
+ }
+
+ // Ensure that extension types dispatch from R regardless of source
+ if (schema_view.extension_name.size_bytes > 0) {
+ call_as_nanoarrow_array(x_sexp, array, schema_xptr,
"as_nanoarrow_array_from_c");
+ return;
+ }
+
if (Rf_isObject(x_sexp)) {
if (Rf_inherits(x_sexp, "data.frame")) {
- as_array_data_frame(x_sexp, array, schema_xptr, error);
+ as_array_data_frame(x_sexp, array, schema_xptr, &schema_view, error);
return;
} else {
call_as_nanoarrow_array(x_sexp, array, schema_xptr,
"as_nanoarrow_array_from_c");
@@ -556,19 +531,19 @@ static void as_array_default(SEXP x_sexp, struct
ArrowArray* array, SEXP schema_
switch (TYPEOF(x_sexp)) {
case LGLSXP:
- as_array_lgl(x_sexp, array, schema_xptr, error);
+ as_array_lgl(x_sexp, array, schema_xptr, &schema_view, error);
return;
case INTSXP:
- as_array_int(x_sexp, array, schema_xptr, error);
+ as_array_int(x_sexp, array, schema_xptr, &schema_view, error);
return;
case REALSXP:
- as_array_dbl(x_sexp, array, schema_xptr, error);
+ as_array_dbl(x_sexp, array, schema_xptr, &schema_view, error);
return;
case STRSXP:
- as_array_chr(x_sexp, array, schema_xptr, error);
+ as_array_chr(x_sexp, array, schema_xptr, &schema_view, error);
return;
case VECSXP:
- as_array_list(x_sexp, array, schema_xptr, error);
+ as_array_list(x_sexp, array, schema_xptr, &schema_view, error);
return;
default:
call_as_nanoarrow_array(x_sexp, array, schema_xptr,
"as_nanoarrow_array_from_c");
@@ -576,12 +551,13 @@ static void as_array_default(SEXP x_sexp, struct
ArrowArray* array, SEXP schema_
}
}
-SEXP nanoarrow_c_as_array_default(SEXP x_sexp, SEXP schema_sexp) {
+SEXP nanoarrow_c_as_array_default(SEXP x_sexp, SEXP schema_xptr) {
SEXP array_xptr = PROTECT(array_owning_xptr());
struct ArrowArray* array = (struct ArrowArray*)R_ExternalPtrAddr(array_xptr);
struct ArrowError error;
- as_array_default(x_sexp, array, schema_sexp, &error);
- array_xptr_set_schema(array_xptr, schema_sexp);
+
+ as_array_default(x_sexp, array, schema_xptr, &error);
+ array_xptr_set_schema(array_xptr, schema_xptr);
UNPROTECT(1);
return array_xptr;
}
diff --git a/r/src/convert.c b/r/src/convert.c
index 22bbfaa..3b62b80 100644
--- a/r/src/convert.c
+++ b/r/src/convert.c
@@ -290,25 +290,6 @@ int nanoarrow_converter_set_schema(SEXP converter_xptr,
SEXP schema_xptr) {
// TODO: Currently we error at the materialize stage if a conversion is not
possible;
// however, at this stage we have all the information we need to calculate
that.
-
- // For extension types, warn that we are about to strip the extension type,
as we don't
- // have a mechanism for dealing with them yet
- if (converter->schema_view.extension_name.size_bytes > 0) {
- int64_t schema_chars = ArrowSchemaToString(schema, NULL, 0, 1);
- SEXP fmt_shelter = PROTECT(Rf_allocVector(RAWSXP, schema_chars + 1));
- ArrowSchemaToString(schema, (char*)RAW(fmt_shelter), schema_chars + 1, 1);
- const char* schema_name = schema->name;
- if (schema_name == NULL || schema_name[0] == '\0') {
- Rf_warning("Converting unknown extension %s as storage type",
- (const char*)RAW(fmt_shelter));
- } else {
- Rf_warning("%s: Converting unknown extension %s as storage type",
schema_name,
- (const char*)RAW(fmt_shelter));
- }
-
- UNPROTECT(1);
- }
-
SET_VECTOR_ELT(converter_shelter, 1, schema_xptr);
ArrowArrayViewReset(&converter->array_view);
diff --git a/r/src/convert_array.c b/r/src/convert_array.c
index 8bb698a..82e2942 100644
--- a/r/src/convert_array.c
+++ b/r/src/convert_array.c
@@ -100,7 +100,27 @@ static SEXP convert_array_default(SEXP array_xptr, enum
VectorType vector_type,
return result;
}
-static SEXP convert_array_chr(SEXP array_xptr) {
+static SEXP convert_array_chr(SEXP array_xptr, SEXP ptype_sexp) {
+ struct ArrowSchema* schema = schema_from_array_xptr(array_xptr);
+ struct ArrowSchemaView schema_view;
+ if (ArrowSchemaViewInit(&schema_view, schema, NULL) != NANOARROW_OK) {
+ Rf_error("Invalid schema");
+ }
+
+ // If array_xptr is an extension, use default conversion
+ if (schema_view.extension_name.size_bytes > 0) {
+ // Default conversion requires a ptype: resolve it if not already specified
+ if (ptype_sexp == R_NilValue) {
+ ptype_sexp =
PROTECT(nanoarrow_c_infer_ptype(array_xptr_get_schema(array_xptr)));
+ SEXP default_result =
+ PROTECT(convert_array_default(array_xptr, VECTOR_TYPE_OTHER,
ptype_sexp));
+ UNPROTECT(2);
+ return default_result;
+ } else {
+ return convert_array_default(array_xptr, VECTOR_TYPE_OTHER, ptype_sexp);
+ }
+ }
+
struct ArrowArray* array = (struct ArrowArray*)R_ExternalPtrAddr(array_xptr);
if (array->dictionary == NULL) {
SEXP result = PROTECT(nanoarrow_c_make_altrep_chr(array_xptr));
@@ -117,19 +137,27 @@ static SEXP convert_array_chr(SEXP array_xptr) {
SEXP nanoarrow_c_convert_array(SEXP array_xptr, SEXP ptype_sexp);
static SEXP convert_array_data_frame(SEXP array_xptr, SEXP ptype_sexp) {
- // If array_xptr is a union, use default convert behaviour
struct ArrowSchema* schema = schema_from_array_xptr(array_xptr);
struct ArrowSchemaView schema_view;
if (ArrowSchemaViewInit(&schema_view, schema, NULL) != NANOARROW_OK) {
Rf_error("Invalid schema");
}
- if (schema_view.storage_type != NANOARROW_TYPE_STRUCT) {
- ptype_sexp =
PROTECT(nanoarrow_c_infer_ptype(array_xptr_get_schema(array_xptr)));
- SEXP default_result =
- convert_array_default(array_xptr, VECTOR_TYPE_DATA_FRAME, ptype_sexp);
- UNPROTECT(1);
- return default_result;
+ // If array_xptr is an extension, union, or the ptype isn't a data.frame
+ // use convert/materialize convert behaviour.
+ // Default conversion requires a ptype: resolve it if not already specified
+ if (schema_view.storage_type != NANOARROW_TYPE_STRUCT ||
+ schema_view.extension_name.size_bytes > 0 ||
+ (ptype_sexp != R_NilValue && !Rf_inherits(ptype_sexp, "data.frame"))) {
+ if (ptype_sexp == R_NilValue) {
+ ptype_sexp =
PROTECT(nanoarrow_c_infer_ptype(array_xptr_get_schema(array_xptr)));
+ SEXP default_result =
+ PROTECT(convert_array_default(array_xptr, VECTOR_TYPE_OTHER,
ptype_sexp));
+ UNPROTECT(2);
+ return default_result;
+ } else {
+ return convert_array_default(array_xptr, VECTOR_TYPE_DATA_FRAME,
ptype_sexp);
+ }
}
struct ArrowArray* array = array_from_xptr(array_xptr);
@@ -190,7 +218,7 @@ SEXP nanoarrow_c_convert_array(SEXP array_xptr, SEXP
ptype_sexp) {
case VECTOR_TYPE_DBL:
return convert_array_default(array_xptr, vector_type, R_NilValue);
case VECTOR_TYPE_CHR:
- return convert_array_chr(array_xptr);
+ return convert_array_chr(array_xptr, ptype_sexp);
case VECTOR_TYPE_DATA_FRAME:
return convert_array_data_frame(array_xptr, R_NilValue);
default:
@@ -231,7 +259,7 @@ SEXP nanoarrow_c_convert_array(SEXP array_xptr, SEXP
ptype_sexp) {
case REALSXP:
return convert_array_default(array_xptr, VECTOR_TYPE_DBL, ptype_sexp);
case STRSXP:
- return convert_array_chr(array_xptr);
+ return convert_array_chr(array_xptr, ptype_sexp);
default:
return call_convert_array(array_xptr, ptype_sexp);
}
diff --git a/r/src/infer_ptype.c b/r/src/infer_ptype.c
index 1e0879f..2e8007b 100644
--- a/r/src/infer_ptype.c
+++ b/r/src/infer_ptype.c
@@ -78,7 +78,11 @@ enum VectorType nanoarrow_infer_vector_type_schema(SEXP
schema_xptr) {
Rf_error("nanoarrow_infer_vector_type_schema(): %s",
ArrowErrorMessage(&error));
}
- return nanoarrow_infer_vector_type(schema_view.type);
+ if (schema_view.extension_name.size_bytes > 0) {
+ return VECTOR_TYPE_OTHER;
+ } else {
+ return nanoarrow_infer_vector_type(schema_view.type);
+ }
}
// The same as the above, but from a nanoarrow_array()
diff --git a/r/src/materialize.c b/r/src/materialize.c
index e4c8db7..7804813 100644
--- a/r/src/materialize.c
+++ b/r/src/materialize.c
@@ -160,6 +160,10 @@ static void fill_vec_with_nulls(SEXP x, R_xlen_t offset,
R_xlen_t len) {
}
switch (TYPEOF(x)) {
+ case RAWSXP:
+ // Not perfect: raw() doesn't really support NA in R
+ memset(RAW(x), 0, len * sizeof(char));
+ break;
case LGLSXP:
case INTSXP: {
int* values = INTEGER(x);
@@ -175,6 +179,17 @@ static void fill_vec_with_nulls(SEXP x, R_xlen_t offset,
R_xlen_t len) {
}
return;
}
+ case CPLXSXP: {
+ Rcomplex* values = COMPLEX(x);
+ Rcomplex na_value;
+ na_value.r = NA_REAL;
+ na_value.i = NA_REAL;
+
+ for (R_xlen_t i = 0; i < len; i++) {
+ values[offset + i] = na_value;
+ }
+ return;
+ }
case STRSXP:
for (R_xlen_t i = 0; i < len; i++) {
SET_STRING_ELT(x, offset + i, NA_STRING);
@@ -190,6 +205,70 @@ static void fill_vec_with_nulls(SEXP x, R_xlen_t offset,
R_xlen_t len) {
}
}
+static void copy_vec_into(SEXP x, SEXP dst, R_xlen_t offset, R_xlen_t len) {
+ if (nanoarrow_ptype_is_data_frame(dst)) {
+ if (!nanoarrow_ptype_is_data_frame(x)) {
+ Rf_error("Expected record-style vctr result but got non-record-style
result");
+ }
+
+ R_xlen_t x_len = nanoarrow_data_frame_size(x);
+ if (len != x_len) {
+ Rf_error("Unexpected data.frame row count in copy_vec_into()");
+ }
+
+ // This does not currently consider column names (i.e., it blindly copies
+ // by index).
+ if (Rf_xlength(x) != Rf_xlength(dst)) {
+ Rf_error("Unexpected data.frame column count in copy_vec_into()");
+ }
+
+ for (R_xlen_t i = 0; i < Rf_xlength(x); i++) {
+ copy_vec_into(VECTOR_ELT(x, i), VECTOR_ELT(dst, i), offset, len);
+ }
+
+ return;
+ } else if (nanoarrow_ptype_is_data_frame(x)) {
+ Rf_error("Expected non-record-style vctr result but got record-style
result");
+ }
+
+ if (TYPEOF(dst) != TYPEOF(x)) {
+ Rf_error("Unexpected SEXP type in result copy_vec_into()");
+ }
+
+ if (Rf_length(x) != len) {
+ Rf_error("Unexpected length of result in copy_vec_into()");
+ }
+
+ switch (TYPEOF(dst)) {
+ case RAWSXP:
+ memcpy(RAW(dst) + offset, RAW(x), len * sizeof(uint8_t));
+ break;
+ case REALSXP:
+ memcpy(REAL(dst) + offset, REAL(x), len * sizeof(double));
+ break;
+ case INTSXP:
+ case LGLSXP:
+ memcpy(INTEGER(dst) + offset, INTEGER(x), len * sizeof(int));
+ break;
+ case CPLXSXP:
+ memcpy(COMPLEX(dst) + offset, COMPLEX(x), len * sizeof(Rcomplex));
+ break;
+ case STRSXP:
+ for (R_xlen_t i = 0; i < len; i++) {
+ SET_STRING_ELT(dst, offset + i, STRING_ELT(x, i));
+ }
+ break;
+ case VECSXP:
+ for (R_xlen_t i = 0; i < len; i++) {
+ SET_VECTOR_ELT(dst, offset + i, VECTOR_ELT(x, i));
+ }
+ break;
+ default:
+ Rf_error("Unhandled SEXP type in copy_vec_into()");
+ break;
+ }
+}
+
static int nanoarrow_materialize_other(struct RConverter* converter,
SEXP converter_xptr) {
// Ensure that we have a ptype SEXP to send in the call back to R
@@ -223,46 +302,9 @@ static int nanoarrow_materialize_other(struct RConverter*
converter,
Rf_lang5(fun, array_xptr, offset_sexp, length_sexp,
converter->ptype_view.ptype));
SEXP result_src = PROTECT(Rf_eval(call, nanoarrow_ns_pkg));
- // Currently this method can only handle the case where result_src and dst
have the same
- // SEXP type and length. This won't work for a data frame/record array-like
result.
- if (Rf_xlength(result_src) != converter->dst.length) {
- Rf_error("Unexpected length in result of
nanoarrow:::convert_fallback_other()");
- }
-
- if (TYPEOF(result_src) != TYPEOF(converter->dst.vec_sexp)) {
- Rf_error("Unexpected SEXP type in result of
nanoarrow:::convert_fallback_other()");
- }
-
- switch (TYPEOF(result_src)) {
- case REALSXP:
- memcpy(REAL(converter->dst.vec_sexp) + converter->dst.offset,
REAL(result_src),
- converter->dst.length * sizeof(double));
- break;
- case INTSXP:
- case LGLSXP:
- memcpy(INTEGER(converter->dst.vec_sexp) + converter->dst.offset,
- INTEGER(result_src), converter->dst.length * sizeof(int));
- break;
- case STRSXP:
- for (R_xlen_t i = 0; i < converter->dst.length; i++) {
- SET_STRING_ELT(converter->dst.vec_sexp, converter->dst.offset + i,
- STRING_ELT(result_src, i));
- }
- break;
- case VECSXP:
- for (R_xlen_t i = 0; i < converter->dst.length; i++) {
- SET_VECTOR_ELT(converter->dst.vec_sexp, converter->dst.offset + i,
- VECTOR_ELT(result_src, i));
- }
- break;
- case NILSXP:
- // Do nothing if the function returned NULL
- break;
- default:
- Rf_error(
- "Unhandled SEXP type in conversion of
nanoarrow:::convert_fallback_other()");
- break;
- }
+ // Copy the result into a slice of dst
+ copy_vec_into(result_src, converter->dst.vec_sexp, converter->dst.offset,
+ converter->dst.length);
UNPROTECT(7);
return NANOARROW_OK;
@@ -414,6 +456,11 @@ static int nanoarrow_materialize_base(struct RConverter*
converter, SEXP convert
struct VectorSlice* dst = &converter->dst;
struct MaterializeOptions* options = converter->options;
+ // Make sure extension conversion calls into R
+ if (converter->schema_view.extension_name.size_bytes > 0) {
+ return nanoarrow_materialize_other(converter, converter_xptr);
+ }
+
switch (converter->ptype_view.vector_type) {
case VECTOR_TYPE_UNSPECIFIED:
return nanoarrow_materialize_unspecified(src, dst, options);
diff --git a/r/tests/testthat/test-convert-array.R
b/r/tests/testthat/test-convert-array.R
index 4481758..592e58b 100644
--- a/r/tests/testthat/test-convert-array.R
+++ b/r/tests/testthat/test-convert-array.R
@@ -89,6 +89,31 @@ test_that("convert to vector works for partial_frame", {
)
})
+test_that("convert to vector works for extension<struct> -> data.frame()", {
+ array <- nanoarrow_extension_array(
+ data.frame(x = c(TRUE, FALSE, NA, FALSE, TRUE)),
+ "some_ext"
+ )
+
+ expect_warning(
+ expect_identical(
+ convert_array(array, data.frame(x = logical())),
+ data.frame(x = c(TRUE, FALSE, NA, FALSE, TRUE))
+ ),
+ "Converting unknown extension"
+ )
+})
+
+test_that("convert to vector works for dictionary<struct> -> data.frame()", {
+ array <- as_nanoarrow_array(c(0L, 1L, 2L, 1L, 0L))
+ array$dictionary <- as_nanoarrow_array(data.frame(x = c(TRUE, FALSE, NA)))
+
+ expect_identical(
+ convert_array(array, data.frame(x = logical())),
+ data.frame(x = c(TRUE, FALSE, NA, FALSE, TRUE))
+ )
+})
+
test_that("convert to vector works for function()", {
tibble_or_bust <- function(array, ptype) {
if (is.data.frame(ptype)) {
@@ -254,6 +279,18 @@ test_that("convert to vector works for null -> logical()",
{
)
})
+test_that("convert to vector works for extension<boolean> -> logical()", {
+ array <- nanoarrow_extension_array(c(TRUE, FALSE, NA), "some_ext")
+
+ expect_warning(
+ expect_identical(
+ convert_array(array, logical()),
+ c(TRUE, FALSE, NA)
+ ),
+ "Converting unknown extension"
+ )
+})
+
test_that("convert to vector works for dictionary<boolean> -> logical()", {
array <- as_nanoarrow_array(c(0L, 1L, 2L, 1L, 0L))
array$dictionary <- as_nanoarrow_array(c(TRUE, FALSE, NA))
@@ -338,13 +375,15 @@ test_that("convert to vector works for null ->
logical()", {
)
})
-test_that("convert to vector works for dictionary<integer> -> integer()", {
- array <- as_nanoarrow_array(c(0L, 1L, 2L, 1L, 0L))
- array$dictionary <- as_nanoarrow_array(c(123L, 0L, NA_integer_))
+test_that("convert to vector works for extension<integer> -> integer()", {
+ array <- nanoarrow_extension_array(c(0L, 1L, NA_integer_), "some_ext")
- expect_identical(
- convert_array(array, integer()),
- c(123L, 0L, NA_integer_, 0L, 123L)
+ expect_warning(
+ expect_identical(
+ convert_array(array, integer()),
+ c(0L, 1L, NA_integer_)
+ ),
+ "Converting unknown extension"
)
})
@@ -454,6 +493,18 @@ test_that("convert to vector works for null -> double()", {
)
})
+test_that("convert to vector works for extension<double> -> double()", {
+ array <- nanoarrow_extension_array(c(0, 1, NA_real_), "some_ext")
+
+ expect_warning(
+ expect_identical(
+ convert_array(array, double()),
+ c(0, 1, NA_real_)
+ ),
+ "Converting unknown extension"
+ )
+})
+
test_that("convert to vector works for dictionary<double> -> double()", {
array <- as_nanoarrow_array(c(0L, 1L, 2L, 1L, 0L))
array$dictionary <- as_nanoarrow_array(c(123, 0, NA_real_))
@@ -501,6 +552,18 @@ test_that("convert to vector works for null ->
character()", {
)
})
+test_that("convert to vector works for extension<string> -> character()", {
+ array <- nanoarrow_extension_array(c("a", "b", NA_character_), "some_ext")
+
+ expect_warning(
+ expect_identical(
+ convert_array(array, character()),
+ c("a", "b", NA_character_)
+ ),
+ "Converting unknown extension"
+ )
+})
+
test_that("convert to vector works for dictionary<string> -> character()", {
array <- as_nanoarrow_array(factor(letters[5:1]))
@@ -887,23 +950,3 @@ test_that("convert to vector works for lists nested in
data frames", {
df_in_list_in_df
)
})
-
-test_that("convert to vector warns for stripped extension type", {
- ext_arr <- as_nanoarrow_array(1:5)
- nanoarrow_array_set_schema(ext_arr, na_extension(na_int32(), "some_ext"))
- expect_warning(
- expect_identical(convert_array(ext_arr), 1:5),
- "Converting unknown extension some_ext"
- )
-
- nested_ext_array <- as_nanoarrow_array(data.frame(x = 1:5))
- nanoarrow_array_set_schema(
- nested_ext_array,
- na_struct(list(x = na_extension(na_int32(), "some_ext")))
- )
-
- expect_warning(
- expect_identical(convert_array(nested_ext_array), data.frame(x = 1:5)),
- "x: Converting unknown extension some_ext"
- )
-})
diff --git a/r/tests/testthat/test-extension-vctrs.R
b/r/tests/testthat/test-extension-vctrs.R
new file mode 100644
index 0000000..cb6d840
--- /dev/null
+++ b/r/tests/testthat/test-extension-vctrs.R
@@ -0,0 +1,85 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("vctrs extension type can roundtrip built-in vector types", {
+ skip_if_not_installed("tibble")
+
+ # Arrow tibbleifies everything, so we do here too
+ # Lists aren't automatically handled in nanoarrow conversion, so they
+ # aren't listed here yet.
+ vectors <- list(
+ lgl = c(FALSE, TRUE, NA),
+ int = c(0L, 1L, NA_integer_),
+ dbl = c(0, 1, NA_real_),
+ chr = c("a", NA_character_),
+ posixct = as.POSIXct("2000-01-01 12:23", tz = "UTC"),
+ posixlt = as.POSIXlt("2000-01-01 12:23", tz = "UTC"),
+ date = as.Date("2000-01-01"),
+ difftime = as.difftime(123, units = "secs"),
+ data_frame_simple = tibble::tibble(x = 1:5),
+ data_frame_nested = tibble::tibble(x = 1:5, y = tibble::tibble(z =
letters[1:5]))
+ )
+
+ for (nm in names(vectors)) {
+ vctr <- vectors[[nm]]
+ ptype <- vctrs::vec_ptype(vctr)
+ schema <- na_vctrs(vctr)
+
+ array <- as_nanoarrow_array(vctr, schema = schema)
+ array_schema <- infer_nanoarrow_schema(array)
+
+ # Roundtrip through convert_array()
+ expect_true(nanoarrow_schema_identical(array_schema, schema))
+ expect_identical(infer_nanoarrow_ptype(array), ptype)
+ expect_identical(convert_array(array), vctr)
+
+ # Roundtrip with an empty array stream
+ stream <- basic_array_stream(list(), schema = schema)
+ expect_identical(convert_array_stream(stream), ptype)
+
+ # Roundtrip with multiple chunks
+ stream <- basic_array_stream(list(array, array))
+ expect_identical(convert_array_stream(stream), vctrs::vec_rep(vctr, 2))
+
+ if (requireNamespace("arrow", quietly = TRUE)) {
+ # Roundtrip from nanoarrow -> arrow -> R
+ arrow_array <- arrow::as_arrow_array(array)
+ expect_s3_class(arrow_array, "ExtensionArray")
+ expect_identical(arrow_array$type$ptype(), ptype)
+ expect_identical(arrow_array$as_vector(), vctr)
+
+ # Roundtrip from arrow -> nanoarrow -> R
+ arrow_array <- arrow::vctrs_extension_array(vctr)
+ array <- as_nanoarrow_array(vctr, schema = schema)
+ expect_identical(infer_nanoarrow_ptype(array), ptype)
+ expect_identical(convert_array(array), vctr)
+ }
+ }
+})
+
+test_that("vctrs extension type respects `to` in convert_array()", {
+ skip_if_not_installed("vctrs")
+
+ vctr <- as.Date("2000-01-01")
+ array <- as_nanoarrow_array(vctr, schema = na_vctrs(vctr))
+
+ expect_identical(convert_array(array), vctr)
+ expect_identical(
+ convert_array(array, to = as.POSIXct(character())),
+ vctrs::vec_cast(vctr, as.POSIXct(character()))
+ )
+})
diff --git a/r/tests/testthat/test-extension.R
b/r/tests/testthat/test-extension.R
new file mode 100644
index 0000000..ceb5717
--- /dev/null
+++ b/r/tests/testthat/test-extension.R
@@ -0,0 +1,116 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("extension types can be registered and unregistered", {
+ spec <- nanoarrow_extension_spec()
+ register_nanoarrow_extension("some_ext", spec)
+ expect_identical(resolve_nanoarrow_extension("some_ext"), spec)
+ unregister_nanoarrow_extension("some_ext")
+ expect_identical(resolve_nanoarrow_extension("some_ext"), NULL)
+})
+
+test_that("infer_nanoarrow_ptype() dispatches on registered extension spec", {
+ register_nanoarrow_extension(
+ "some_ext",
+ nanoarrow_extension_spec(subclass = "some_spec_class")
+ )
+ on.exit(unregister_nanoarrow_extension("some_ext"))
+
+ infer_nanoarrow_ptype_extension.some_spec_class <- function(spec, x, ...) {
+ infer_nanoarrow_ptype_extension(NULL, x, ..., warn_unregistered = FALSE)
+ }
+
+ s3_register(
+ "nanoarrow::infer_nanoarrow_ptype_extension",
+ "some_spec_class",
+ infer_nanoarrow_ptype_extension.some_spec_class
+ )
+
+ expect_identical(
+ infer_nanoarrow_ptype(
+ na_extension(na_struct(list(some_name = na_int32())), "some_ext")
+ ),
+ data.frame(some_name = integer())
+ )
+})
+
+test_that("convert_array() dispatches on registered extension spec", {
+ register_nanoarrow_extension(
+ "some_ext",
+ nanoarrow_extension_spec(subclass = "some_spec_class")
+ )
+ on.exit(unregister_nanoarrow_extension("some_ext"))
+
+ convert_array_extension.some_spec_class <- function(spec, array, to, ...) {
+ convert_array_extension(NULL, array, to, ..., warn_unregistered = FALSE)
+ }
+
+ s3_register(
+ "nanoarrow::convert_array_extension",
+ "some_spec_class",
+ convert_array_extension.some_spec_class
+ )
+
+ expect_identical(
+ convert_array(
+ nanoarrow_extension_array(data.frame(some_name = 1:5), "some_ext")
+ ),
+ data.frame(some_name = 1:5)
+ )
+})
+
+test_that("as_nanoarrow_array() dispatches on registered extension spec", {
+ register_nanoarrow_extension(
+ "some_ext",
+ nanoarrow_extension_spec(subclass = "some_spec_class")
+ )
+ on.exit(unregister_nanoarrow_extension("some_ext"))
+
+ expect_error(
+ as_nanoarrow_array(
+ data.frame(some_name = 1:5),
+ schema = na_extension(
+ na_struct(list(some_name = na_int32())),
+ "some_ext"
+ )
+ ),
+ "not implemented for extension"
+ )
+
+ as_nanoarrow_array_extension.some_spec_class <- function(spec, x, ...,
schema = NULL) {
+ nanoarrow_extension_array(x, "some_ext")
+ }
+
+ s3_register(
+ "nanoarrow::as_nanoarrow_array_extension",
+ "some_spec_class",
+ as_nanoarrow_array_extension.some_spec_class
+ )
+
+ ext_array <- as_nanoarrow_array(
+ data.frame(some_name = 1:5),
+ schema = na_extension(
+ na_struct(list(some_name = na_int32())),
+ "some_ext"
+ )
+ )
+
+ expect_identical(
+ infer_nanoarrow_schema(ext_array)$metadata[["ARROW:extension:name"]],
+ "some_ext"
+ )
+})
diff --git a/r/tests/testthat/test-util.R b/r/tests/testthat/test-util.R
index c1bdc8e..0626f4d 100644
--- a/r/tests/testthat/test-util.R
+++ b/r/tests/testthat/test-util.R
@@ -48,6 +48,14 @@ test_that("preserve/release works when release happens on
another thread", {
expect_identical(preserved_empty(), 0)
})
+test_that("vector slicer works", {
+ expect_identical(vec_slice2(letters, 1), "a")
+ expect_identical(
+ vec_slice2(data.frame(letters = letters, stringsAsFactors = FALSE), 1),
+ data.frame(letters = "a", stringsAsFactors = FALSE)
+ )
+})
+
test_that("new_data_frame() works", {
expect_identical(
new_data_frame(list(x = 1, y = 2), nrow = 1),