This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new c1522ad [R] Add accessors and print methods to S3 objects (#47)
c1522ad is described below
commit c1522adb4cda2cf734c394242147845f6434544f
Author: Dewey Dunnington <[email protected]>
AuthorDate: Thu Sep 15 22:28:59 2022 -0300
[R] Add accessors and print methods to S3 objects (#47)
* with list interface for nanoarrow_schema
* s3 print/str/format methods
* slightly prettier output
* first pass at array info
* schemaless array list interface
* with list interface working
* fix buffer types and test them
* str, format, print for array
* simplify str() for schema
* with array stream accessors and printing
* better buffer printing + as.raw()
* add examples
* add release() as method for the stream
* print release callback
* _proxy instead of _info for list representations
* better readme
* add infer_nanoarrow_schema() for array stream
---
r/NAMESPACE | 28 +++
r/R/array-stream.R | 97 +++++++
r/R/array.R | 120 ++++++++-
.../testthat/test-array-stream.R => R/buffer.R} | 35 ++-
r/R/pointers.R | 6 +
r/R/schema.R | 90 +++++++
r/README.Rmd | 66 ++++-
r/README.md | 185 +++++++++++++-
r/man/as_nanoarrow_array.Rd | 8 +
r/man/as_nanoarrow_array_stream.Rd | 12 +
r/man/as_nanoarrow_schema.Rd | 5 +
r/man/nanoarrow_pointer_is_valid.Rd | 3 +
r/src/array.c | 280 +++++++++++++++++++--
r/src/array_stream.c | 40 +++
r/src/{array_stream.c => buffer.c} | 31 ++-
r/src/init.c | 20 +-
r/src/pointers.c | 7 +
r/src/schema.c | 91 +++++++
r/src/schema.h | 6 +-
r/tests/testthat/test-array-stream.R | 66 +++++
r/tests/testthat/test-array.R | 170 +++++++++++++
r/tests/testthat/test-pointers.R | 1 +
r/tests/testthat/test-schema.R | 69 +++++
23 files changed, 1395 insertions(+), 41 deletions(-)
diff --git a/r/NAMESPACE b/r/NAMESPACE
index 61ec3ef..14da83f 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -1,6 +1,13 @@
# Generated by roxygen2: do not edit by hand
+S3method("$",nanoarrow_array)
+S3method("$",nanoarrow_array_stream)
+S3method("$",nanoarrow_schema)
+S3method("[[",nanoarrow_array)
+S3method("[[",nanoarrow_array_stream)
+S3method("[[",nanoarrow_schema)
S3method(as.data.frame,nanoarrow_array)
+S3method(as.raw,nanoarrow_buffer)
S3method(as.vector,nanoarrow_array)
S3method(as_nanoarrow_array,Array)
S3method(as_nanoarrow_array,ChunkedArray)
@@ -14,9 +21,28 @@ S3method(as_nanoarrow_schema,DataType)
S3method(as_nanoarrow_schema,Field)
S3method(as_nanoarrow_schema,Schema)
S3method(as_nanoarrow_schema,nanoarrow_schema)
+S3method(format,nanoarrow_array)
+S3method(format,nanoarrow_array_stream)
+S3method(format,nanoarrow_buffer)
+S3method(format,nanoarrow_schema)
S3method(from_nanoarrow_array,default)
S3method(infer_nanoarrow_schema,default)
S3method(infer_nanoarrow_schema,nanoarrow_array)
+S3method(infer_nanoarrow_schema,nanoarrow_array_stream)
+S3method(length,nanoarrow_array)
+S3method(length,nanoarrow_array_stream)
+S3method(length,nanoarrow_schema)
+S3method(names,nanoarrow_array)
+S3method(names,nanoarrow_array_stream)
+S3method(names,nanoarrow_schema)
+S3method(print,nanoarrow_array)
+S3method(print,nanoarrow_array_stream)
+S3method(print,nanoarrow_buffer)
+S3method(print,nanoarrow_schema)
+S3method(str,nanoarrow_array)
+S3method(str,nanoarrow_array_stream)
+S3method(str,nanoarrow_buffer)
+S3method(str,nanoarrow_schema)
export(as_nanoarrow_array)
export(as_nanoarrow_array_stream)
export(as_nanoarrow_schema)
@@ -28,9 +54,11 @@ export(nanoarrow_allocate_schema)
export(nanoarrow_build_id)
export(nanoarrow_pointer_addr_chr)
export(nanoarrow_pointer_addr_dbl)
+export(nanoarrow_pointer_addr_pretty)
export(nanoarrow_pointer_export)
export(nanoarrow_pointer_is_valid)
export(nanoarrow_pointer_move)
export(nanoarrow_pointer_release)
importFrom(utils,getFromNamespace)
+importFrom(utils,str)
useDynLib(nanoarrow, .registration = TRUE)
diff --git a/r/R/array-stream.R b/r/R/array-stream.R
index 6b5c049..3d37ff7 100644
--- a/r/R/array-stream.R
+++ b/r/R/array-stream.R
@@ -33,6 +33,18 @@
#'
#' @return An object of class 'nanoarrow_array_stream'
#' @export
+#'
+#' @examples
+#' (stream <- as_nanoarrow_array_stream(data.frame(x = 1:5)))
+#' stream$get_schema()
+#' stream$get_next()
+#'
+#' # The last batch is returned as NULL
+#' stream$get_next()
+#'
+#' # Release the stream
+#' stream$release()
+#'
as_nanoarrow_array_stream <- function(x, ..., schema = NULL) {
UseMethod("as_nanoarrow_array_stream")
}
@@ -53,3 +65,88 @@ as_nanoarrow_array_stream.default <- function(x, ..., schema
= NULL) {
schema = schema
)
}
+
+#' @export
+infer_nanoarrow_schema.nanoarrow_array_stream <- function(x, ...) {
+ x$get_schema()
+}
+
+#' @importFrom utils str
+#' @export
+str.nanoarrow_array_stream <- function(object, ...) {
+ cat(sprintf("%s\n", format(object)))
+
+ if (nanoarrow_pointer_is_valid(object)) {
+ # Use the str() of the list version but remove the first
+ # line of the output ("List of 2")
+ info <- list(
+ get_schema = object$get_schema,
+ get_next = object$get_next,
+ release = object$release
+ )
+ raw_str_output <- utils::capture.output(str(info, ..., give.attr = FALSE))
+ cat(paste0(raw_str_output[-1], collapse = "\n"))
+ cat("\n")
+ }
+
+ invisible(object)
+}
+
+#' @export
+print.nanoarrow_array_stream <- function(x, ...) {
+ str(x, ...)
+ invisible(x)
+}
+
+#' @export
+format.nanoarrow_array_stream <- function(x, ...) {
+ if (nanoarrow_pointer_is_valid(x)) {
+ tryCatch(
+ sprintf("<nanoarrow_array_stream[%s]>", x$get_schema()$format),
+ error = function(...) "<nanoarrow_array_stream[<error calling
get_schema()]>"
+ )
+
+ } else {
+ "<nanoarrow_array_stream[invalid pointer]>"
+ }
+}
+
+# This is the list()-like interface to nanoarrow_array_stream that allows $
and [[
+# to make nice auto-complete when interacting in an IDE
+
+#' @export
+length.nanoarrow_array_stream <- function(x, ...) {
+ 3L
+}
+
+#' @export
+names.nanoarrow_array_stream <- function(x, ...) {
+ c("get_schema", "get_next", "release")
+}
+
+#' @export
+`[[.nanoarrow_array_stream` <- function(x, i, ...) {
+ force(x)
+ if (identical(i, "get_schema") || isTRUE(i == 1L)) {
+ function() .Call(nanoarrow_c_array_stream_get_schema, x)
+ } else if (identical(i, "get_next") || isTRUE(i == 2L)) {
+ function(schema = x$get_schema(), validate = TRUE) {
+ array <- .Call(nanoarrow_c_array_stream_get_next, x)
+ if (!nanoarrow_pointer_is_valid(array)) {
+ return(NULL)
+ }
+
+ nanoarrow_array_set_schema(array, schema, validate = validate)
+ array
+ }
+ } else if (identical(i, "release") || isTRUE(i == 3L)) {
+ function() nanoarrow_pointer_release(x)
+ } else {
+ NULL
+ }
+}
+
+#' @export
+`$.nanoarrow_array_stream` <- function(x, i, ...) {
+ x[[i]]
+}
diff --git a/r/R/array.R b/r/R/array.R
index 645af83..f8e5612 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -34,6 +34,14 @@
#'
#' @return An object of class 'nanoarrow_array'
#' @export
+#'
+#' @examples
+#' (array <- as_nanoarrow_array(1:5))
+#' as.vector(array)
+#'
+#' (array <- as_nanoarrow_array(data.frame(x = 1:5)))
+#' as.data.frame(array)
+#'
as_nanoarrow_array <- function(x, ..., schema = NULL) {
UseMethod("as_nanoarrow_array")
}
@@ -87,7 +95,115 @@ infer_nanoarrow_schema.nanoarrow_array <- function(x, ...) {
stop("nanoarrow_array() has no associated schema")
}
-nanoarrow_array_set_schema <- function(array, schema) {
- .Call(nanoarrow_c_array_set_schema, array, schema)
+nanoarrow_array_set_schema <- function(array, schema, validate = TRUE) {
+ .Call(nanoarrow_c_array_set_schema, array, schema, as.logical(validate)[1])
invisible(array)
}
+
+#' @importFrom utils str
+#' @export
+str.nanoarrow_array <- function(object, ...) {
+ cat(sprintf("%s\n", format(object)))
+
+ if (nanoarrow_pointer_is_valid(object)) {
+ # Use the str() of the list version but remove the first
+ # line of the output ("List of 6")
+ info <- nanoarrow_array_proxy_safe(object)
+ raw_str_output <- utils::capture.output(str(info, ...))
+ cat(paste0(raw_str_output[-1], collapse = "\n"))
+ cat("\n")
+ }
+
+ invisible(object)
+}
+
+#' @export
+print.nanoarrow_array <- function(x, ...) {
+ str(x, ...)
+ invisible(x)
+}
+
+#' @export
+format.nanoarrow_array <- function(x, ...) {
+ if (nanoarrow_pointer_is_valid(x)) {
+ schema <- .Call(nanoarrow_c_infer_schema_array, x)
+ if (is.null(schema)) {
+ sprintf("<nanoarrow_array <unknown schema>[%s]>", x$length)
+ } else {
+ sprintf("<nanoarrow_array %s[%s]>", schema$format, x$length)
+ }
+ } else {
+ "<nanoarrow_array[invalid pointer]>"
+ }
+}
+
+
+# This is the list()-like interface to nanoarrow_array that allows $ and [[
+# to make nice auto-complete for the array fields
+
+
+#' @export
+length.nanoarrow_array <- function(x, ...) {
+ 6L
+}
+
+#' @export
+names.nanoarrow_array <- function(x, ...) {
+ c("length", "null_count", "offset", "buffers", "children", "dictionary")
+}
+
+#' @export
+`[[.nanoarrow_array` <- function(x, i, ...) {
+ nanoarrow_array_proxy_safe(x)[[i]]
+}
+
+#' @export
+`$.nanoarrow_array` <- function(x, i, ...) {
+ nanoarrow_array_proxy_safe(x)[[i]]
+}
+
+# A version of nanoarrow_array_proxy() that is less likely to error for invalid
+# arrays and/or schemas
+nanoarrow_array_proxy_safe <- function(array, recursive = FALSE) {
+ schema <- .Call(nanoarrow_c_infer_schema_array, array)
+ tryCatch(
+ nanoarrow_array_proxy(array, schema = schema, recursive = recursive),
+ error = function(...) nanoarrow_array_proxy(array, recursive = recursive)
+ )
+}
+
+nanoarrow_array_proxy <- function(array, schema = NULL, recursive = FALSE) {
+ if (!is.null(schema)) {
+ array_view <- .Call(nanoarrow_c_array_view, array, schema)
+ result <- .Call(nanoarrow_c_array_proxy, array, array_view, recursive)
+
+ # Pass on some information from the schema if we have it
+ if (!is.null(result$dictionary)) {
+ nanoarrow_array_set_schema(result$dictionary, schema$dictionary)
+ }
+
+ names(result$children) <- names(schema$children)
+
+ if (!recursive) {
+ result$children <- Map(
+ nanoarrow_array_set_schema,
+ result$children,
+ schema$children
+ )
+ }
+ } else {
+ result <- .Call(nanoarrow_c_array_proxy, array, NULL, recursive)
+ }
+
+ # Recursive-ness of the dictionary is handled here because it's not
+ # part of the array view
+ if (recursive && !is.null(result$dictionary)) {
+ result$dictionary <- nanoarrow_array_proxy(
+ result$dictionary,
+ schema = schema$dictionary,
+ recursive = TRUE
+ )
+ }
+
+ result
+}
diff --git a/r/tests/testthat/test-array-stream.R b/r/R/buffer.R
similarity index 57%
copy from r/tests/testthat/test-array-stream.R
copy to r/R/buffer.R
index a24080c..ea9b6dc 100644
--- a/r/tests/testthat/test-array-stream.R
+++ b/r/R/buffer.R
@@ -15,13 +15,32 @@
# specific language governing permissions and limitations
# under the License.
-test_that("as_nanoarrow_array_stream() works for nanoarow_array_stream", {
- stream <- as_nanoarrow_array_stream(data.frame(x = 1:5))
- expect_identical(as_nanoarrow_array_stream(stream), stream)
+#' @importFrom utils str
+#' @export
+str.nanoarrow_buffer <- function(object, ...) {
+ cat(sprintf("%s\n", format(object)))
+ invisible(object)
+}
- # Not supported yet
- expect_error(
- as_nanoarrow_array_stream(stream, schema = data.frame(x = double())),
- "is.null\\(schema\\) is not TRUE"
+#' @export
+print.nanoarrow_buffer <- function(x, ...) {
+ str(x, ...)
+ invisible(x)
+}
+
+#' @export
+format.nanoarrow_buffer <- function(x, ...) {
+ info <- .Call(nanoarrow_c_buffer_info, x)
+ size_bytes <- info$size_bytes %||% NA_integer_
+ sprintf(
+ "<%s[%s b] at %s>",
+ class(x)[1],
+ size_bytes,
+ nanoarrow_pointer_addr_pretty(x)
)
-})
+}
+
+#' @export
+as.raw.nanoarrow_buffer <- function(x, ...) {
+ .Call(nanoarrow_c_buffer_as_raw, x)
+}
diff --git a/r/R/pointers.R b/r/R/pointers.R
index 6a0200d..6694380 100644
--- a/r/R/pointers.R
+++ b/r/R/pointers.R
@@ -79,6 +79,12 @@ nanoarrow_pointer_addr_chr <- function(ptr) {
.Call(nanoarrow_c_pointer_addr_chr, ptr)
}
+#' @rdname nanoarrow_pointer_is_valid
+#' @export
+nanoarrow_pointer_addr_pretty <- function(ptr) {
+ .Call(nanoarrow_c_pointer_addr_pretty, ptr)
+}
+
#' @rdname nanoarrow_pointer_is_valid
#' @export
nanoarrow_pointer_release <- function(ptr) {
diff --git a/r/R/schema.R b/r/R/schema.R
index 97f960b..57aa528 100644
--- a/r/R/schema.R
+++ b/r/R/schema.R
@@ -28,6 +28,11 @@
#'
#' @return An object of class 'nanoarrow_schema'
#' @export
+#'
+#' @examples
+#' infer_nanoarrow_schema(integer())
+#' infer_nanoarrow_schema(data.frame(x = integer()))
+#'
as_nanoarrow_schema <- function(x, ...) {
UseMethod("as_nanoarrow_schema")
}
@@ -47,3 +52,88 @@ infer_nanoarrow_schema <- function(x, ...) {
infer_nanoarrow_schema.default <- function(x, ...) {
as_nanoarrow_schema(arrow::infer_type(x, ...))
}
+
+#' @importFrom utils str
+#' @export
+str.nanoarrow_schema <- function(object, ...) {
+ cat(sprintf("%s\n", format(object)))
+
+ if (nanoarrow_pointer_is_valid(object)) {
+ # Use the str() of the list version but remove the first
+ # line of the output ("List of 6")
+ info <- nanoarrow_schema_proxy(object)
+ raw_str_output <- utils::capture.output(str(info, ...))
+ cat(paste0(raw_str_output[-1], collapse = "\n"))
+ cat("\n")
+ }
+
+ invisible(object)
+}
+
+#' @export
+print.nanoarrow_schema <- function(x, ...) {
+ str(x, ...)
+ invisible(x)
+}
+
+#' @export
+format.nanoarrow_schema <- function(x, ...) {
+ if (nanoarrow_pointer_is_valid(x)) {
+ sprintf("<nanoarrow_schema[%s]>", x$format)
+ } else {
+ "<nanoarrow_schema[invalid pointer]>"
+ }
+}
+
+# This is the list()-like interface to nanoarrow_schema that allows $ and [[
+# to make nice auto-complete for the schema fields
+
+#' @export
+length.nanoarrow_schema <- function(x, ...) {
+ 6L
+}
+
+#' @export
+names.nanoarrow_schema <- function(x, ...) {
+ c("format", "name", "metadata", "flags", "children", "dictionary")
+}
+
+#' @export
+`[[.nanoarrow_schema` <- function(x, i, ...) {
+ nanoarrow_schema_proxy(x)[[i]]
+}
+
+#' @export
+`$.nanoarrow_schema` <- function(x, i, ...) {
+ nanoarrow_schema_proxy(x)[[i]]
+}
+
+nanoarrow_schema_proxy <- function(schema, recursive = FALSE) {
+ result <- .Call(nanoarrow_c_schema_to_list, schema)
+ if (recursive && !is.null(schema$children)) {
+ result$children <- lapply(
+ schema$children,
+ nanoarrow_schema_proxy,
+ recursive = TRUE
+ )
+ }
+
+ if (recursive && !is.null(schema$dictionary)) {
+ result$dictionary <- nanoarrow_schema_proxy(schema$dictionary, recursive =
TRUE)
+ }
+
+ result$metadata <- list_of_raw_to_metadata(result$metadata)
+
+ result
+}
+
+list_of_raw_to_metadata <- function(metadata) {
+ lapply(metadata, function(x) {
+ if (is.character(x) || any(x == 0)) {
+ x
+ } else {
+ x_str <- iconv(list(x), from = "UTF-8", to = "UTF-8", mark = TRUE)[[1]]
+ if (is.na(x_str)) x else x_str
+ }
+ })
+}
diff --git a/r/README.Rmd b/r/README.Rmd
index 92d6532..88b763c 100644
--- a/r/README.Rmd
+++ b/r/README.Rmd
@@ -45,5 +45,69 @@ You can install the development version of nanoarrow from
[GitHub](https://githu
``` r
# install.packages("remotes")
-remotes::install_github("apache/arrow-nanoarrow/r")
+remotes::install_github("apache/arrow-nanoarrow/r", build = FALSE)
```
+
+If you can load the package, you're good to go!
+
+```{r}
+library(nanoarrow)
+```
+
+## Example
+
+The Arrow C Data and Arrow C Stream interfaces are comprised of three
structures: the `ArrowSchema` which represents a data type of an array, the
`ArrowArray` which represents the values of an array, and an
`ArrowArrayStream`, which represents zero or more `ArrowArray`s with a common
`ArrowSchema`. All three can be wrapped by R objects using the nanoarrow R
package.
+
+### Schemas
+
+Use `infer_nanoarrow_schema()` to get the ArrowSchema object that corresponds
to a given R vector type; use `as_nanoarrow_schema()` to convert an object from
some other data type representation (e.g., an arrow R package `DataType` like
`arrow::int32()`).
+
+```{r}
+infer_nanoarrow_schema(1:5)
+as_nanoarrow_schema(arrow::schema(col1 = arrow::float64()))
+```
+
+### Arrays
+
+Use `as_nanoarrow_array()` to convert an object to an ArrowArray object:
+
+```{r}
+as_nanoarrow_array(1:5)
+as_nanoarrow_array(arrow::record_batch(col1 = c(1.1, 2.2)))
+```
+
+You can use `as.vector()` or `as.data.frame()` to get the R representation of
the object back:
+
+```{r}
+array <- as_nanoarrow_array(arrow::record_batch(col1 = c(1.1, 2.2)))
+as.data.frame(array)
+```
+
+Even though at the C level the ArrowArray is distinct from the ArrowSchema, at
the R level we attach a schema wherever possible. You can access the attached
schema using `infer_nanoarrow_schema()`:
+
+```{r}
+infer_nanoarrow_schema(array)
+```
+
+### Array Streams
+
+The easiest way to create an ArrowArrayStream is from an
`arrow::RecordBatchReader`:
+
+```{r}
+reader <- arrow::RecordBatchReader$create(
+ arrow::record_batch(col1 = c(1.1, 2.2)),
+ arrow::record_batch(col1 = c(3.3, 4.4))
+)
+
+(stream <- as_nanoarrow_array_stream(reader))
+```
+
+You can pull batches from the stream using the `$get_next()` method. The last
batch will return `NULL`.
+
+```{r}
+stream$get_next()
+stream$get_next()
+stream$get_next()
+```
+
+After consuming a stream, you should call the release method as soon as you
can. This lets the implementation of the stream release any resources (like
open files) it may be holding in a more predictable way than waiting for the
garabge collector to clean up the object.
diff --git a/r/README.md b/r/README.md
index e14e01b..9cfac16 100644
--- a/r/README.md
+++ b/r/README.md
@@ -38,5 +38,188 @@ You can install the development version of nanoarrow from
``` r
# install.packages("remotes")
-remotes::install_github("apache/arrow-nanoarrow/r")
+remotes::install_github("apache/arrow-nanoarrow/r", build = FALSE)
```
+
+If you can load the package, you’re good to go!
+
+``` r
+library(nanoarrow)
+```
+
+## Example
+
+The Arrow C Data and Arrow C Stream interfaces are comprised of three
+structures: the `ArrowSchema` which represents a data type of an array,
+the `ArrowArray` which represents the values of an array, and an
+`ArrowArrayStream`, which represents zero or more `ArrowArray`s with a
+common `ArrowSchema`. All three can be wrapped by R objects using the
+nanoarrow R package.
+
+### Schemas
+
+Use `infer_nanoarrow_schema()` to get the ArrowSchema object that
+corresponds to a given R vector type; use `as_nanoarrow_schema()` to
+convert an object from some other data type representation (e.g., an
+arrow R package `DataType` like `arrow::int32()`).
+
+``` r
+infer_nanoarrow_schema(1:5)
+#> <nanoarrow_schema[i]>
+#> $ format : chr "i"
+#> $ name : chr ""
+#> $ metadata : list()
+#> $ flags : int 2
+#> $ children : NULL
+#> $ dictionary: NULL
+as_nanoarrow_schema(arrow::schema(col1 = arrow::float64()))
+#> <nanoarrow_schema[+s]>
+#> $ format : chr "+s"
+#> $ name : chr ""
+#> $ metadata : list()
+#> $ flags : int 0
+#> $ children :List of 1
+#> ..$ col1:<nanoarrow_schema[g]>
+#> .. ..$ format : chr "g"
+#> .. ..$ name : chr "col1"
+#> .. ..$ metadata : list()
+#> .. ..$ flags : int 2
+#> .. ..$ children : NULL
+#> .. ..$ dictionary: NULL
+#> $ dictionary: NULL
+```
+
+### Arrays
+
+Use `as_nanoarrow_array()` to convert an object to an ArrowArray object:
+
+``` r
+as_nanoarrow_array(1:5)
+#> <nanoarrow_array i[5]>
+#> $ length : int 5
+#> $ null_count: int 0
+#> $ offset : int 0
+#> $ buffers :List of 2
+#> ..$ :<nanoarrow_buffer_validity[0 b] at 0x0>
+#> ..$ :<nanoarrow_buffer_data_int32[20 b] at 0x1397d7758>
+#> $ dictionary: NULL
+#> $ children : list()
+as_nanoarrow_array(arrow::record_batch(col1 = c(1.1, 2.2)))
+#> <nanoarrow_array +s[2]>
+#> $ length : int 2
+#> $ null_count: int 0
+#> $ offset : int 0
+#> $ buffers :List of 1
+#> ..$ :<nanoarrow_buffer_validity[0 b] at 0x0>
+#> $ children :List of 1
+#> ..$ col1:<nanoarrow_array g[2]>
+#> .. ..$ length : int 2
+#> .. ..$ null_count: int 0
+#> .. ..$ offset : int 0
+#> .. ..$ buffers :List of 2
+#> .. .. ..$ :<nanoarrow_buffer_validity[0 b] at 0x0>
+#> .. .. ..$ :<nanoarrow_buffer_data_double[16 b] at 0x118a4b2b8>
+#> .. ..$ dictionary: NULL
+#> .. ..$ children : list()
+#> $ dictionary: NULL
+```
+
+You can use `as.vector()` or `as.data.frame()` to get the R
+representation of the object back:
+
+``` r
+array <- as_nanoarrow_array(arrow::record_batch(col1 = c(1.1, 2.2)))
+as.data.frame(array)
+#> col1
+#> 1 1.1
+#> 2 2.2
+```
+
+Even though at the C level the ArrowArray is distinct from the
+ArrowSchema, at the R level we attach a schema wherever possible. You
+can access the attached schema using `infer_nanoarrow_schema()`:
+
+``` r
+infer_nanoarrow_schema(array)
+#> <nanoarrow_schema[+s]>
+#> $ format : chr "+s"
+#> $ name : chr ""
+#> $ metadata : list()
+#> $ flags : int 0
+#> $ children :List of 1
+#> ..$ col1:<nanoarrow_schema[g]>
+#> .. ..$ format : chr "g"
+#> .. ..$ name : chr "col1"
+#> .. ..$ metadata : list()
+#> .. ..$ flags : int 2
+#> .. ..$ children : NULL
+#> .. ..$ dictionary: NULL
+#> $ dictionary: NULL
+```
+
+### Array Streams
+
+The easiest way to create an ArrowArrayStream is from an
+`arrow::RecordBatchReader`:
+
+``` r
+reader <- arrow::RecordBatchReader$create(
+ arrow::record_batch(col1 = c(1.1, 2.2)),
+ arrow::record_batch(col1 = c(3.3, 4.4))
+)
+
+(stream <- as_nanoarrow_array_stream(reader))
+#> <nanoarrow_array_stream[+s]>
+#> $ get_schema:function ()
+#> $ get_next :function (schema = x$get_schema(), validate = TRUE)
+#> $ release :function ()
+```
+
+You can pull batches from the stream using the `$get_next()` method. The
+last batch will return `NULL`.
+
+``` r
+stream$get_next()
+#> <nanoarrow_array +s[2]>
+#> $ length : int 2
+#> $ null_count: int 0
+#> $ offset : int 0
+#> $ buffers :List of 1
+#> ..$ :<nanoarrow_buffer_validity[0 b] at 0x0>
+#> $ children :List of 1
+#> ..$ col1:<nanoarrow_array g[2]>
+#> .. ..$ length : int 2
+#> .. ..$ null_count: int 0
+#> .. ..$ offset : int 0
+#> .. ..$ buffers :List of 2
+#> .. .. ..$ :<nanoarrow_buffer_validity[0 b] at 0x0>
+#> .. .. ..$ :<nanoarrow_buffer_data_double[16 b] at 0x1195924b8>
+#> .. ..$ dictionary: NULL
+#> .. ..$ children : list()
+#> $ dictionary: NULL
+stream$get_next()
+#> <nanoarrow_array +s[2]>
+#> $ length : int 2
+#> $ null_count: int 0
+#> $ offset : int 0
+#> $ buffers :List of 1
+#> ..$ :<nanoarrow_buffer_validity[0 b] at 0x0>
+#> $ children :List of 1
+#> ..$ col1:<nanoarrow_array g[2]>
+#> .. ..$ length : int 2
+#> .. ..$ null_count: int 0
+#> .. ..$ offset : int 0
+#> .. ..$ buffers :List of 2
+#> .. .. ..$ :<nanoarrow_buffer_validity[0 b] at 0x0>
+#> .. .. ..$ :<nanoarrow_buffer_data_double[16 b] at 0x1195920f8>
+#> .. ..$ dictionary: NULL
+#> .. ..$ children : list()
+#> $ dictionary: NULL
+stream$get_next()
+#> NULL
+```
+
+After consuming a stream, you should call the release method as soon as
+you can. This lets the implementation of the stream release any
+resources (like open files) it may be holding in a more predictable way
+than waiting for the garabge collector to clean up the object.
diff --git a/r/man/as_nanoarrow_array.Rd b/r/man/as_nanoarrow_array.Rd
index e749931..353dfa0 100644
--- a/r/man/as_nanoarrow_array.Rd
+++ b/r/man/as_nanoarrow_array.Rd
@@ -33,3 +33,11 @@ class can be used in a similar way as an
\link[arrow:array]{arrow::Array}. Note
nanoarrow an \link[arrow:RecordBatch]{arrow::RecordBatch} and a non-nullable
\link[arrow:array]{arrow::StructArray}
are represented identically.
}
+\examples{
+(array <- as_nanoarrow_array(1:5))
+as.vector(array)
+
+(array <- as_nanoarrow_array(data.frame(x = 1:5)))
+as.data.frame(array)
+
+}
diff --git a/r/man/as_nanoarrow_array_stream.Rd
b/r/man/as_nanoarrow_array_stream.Rd
index b49648d..0470d08 100644
--- a/r/man/as_nanoarrow_array_stream.Rd
+++ b/r/man/as_nanoarrow_array_stream.Rd
@@ -28,3 +28,15 @@ and a stream of non-nullable struct arrays are represented
identically.
Also note that array streams are mutable objects and are passed by
reference and not by value.
}
+\examples{
+(stream <- as_nanoarrow_array_stream(data.frame(x = 1:5)))
+stream$get_schema()
+stream$get_next()
+
+# The last batch is returned as NULL
+stream$get_next()
+
+# Release the stream
+stream$release()
+
+}
diff --git a/r/man/as_nanoarrow_schema.Rd b/r/man/as_nanoarrow_schema.Rd
index 192c5e8..9cc25b0 100644
--- a/r/man/as_nanoarrow_schema.Rd
+++ b/r/man/as_nanoarrow_schema.Rd
@@ -24,3 +24,8 @@ Arrow C Data interface. This data structure can be used to
represent an
in nanoarrow, an \code{\link[arrow:Schema]{arrow::schema()}} and a
non-nullable \code{\link[arrow:data-type]{arrow::struct()}}
are represented identically.
}
+\examples{
+infer_nanoarrow_schema(integer())
+infer_nanoarrow_schema(data.frame(x = integer()))
+
+}
diff --git a/r/man/nanoarrow_pointer_is_valid.Rd
b/r/man/nanoarrow_pointer_is_valid.Rd
index a77bdd3..f90ff7f 100644
--- a/r/man/nanoarrow_pointer_is_valid.Rd
+++ b/r/man/nanoarrow_pointer_is_valid.Rd
@@ -4,6 +4,7 @@
\alias{nanoarrow_pointer_is_valid}
\alias{nanoarrow_pointer_addr_dbl}
\alias{nanoarrow_pointer_addr_chr}
+\alias{nanoarrow_pointer_addr_pretty}
\alias{nanoarrow_pointer_release}
\alias{nanoarrow_pointer_move}
\alias{nanoarrow_pointer_export}
@@ -18,6 +19,8 @@ nanoarrow_pointer_addr_dbl(ptr)
nanoarrow_pointer_addr_chr(ptr)
+nanoarrow_pointer_addr_pretty(ptr)
+
nanoarrow_pointer_release(ptr)
nanoarrow_pointer_move(ptr_src, ptr_dst)
diff --git a/r/src/array.c b/r/src/array.c
index fe3c8e2..b4d5777 100644
--- a/r/src/array.c
+++ b/r/src/array.c
@@ -34,29 +34,32 @@ void finalize_array_xptr(SEXP array_xptr) {
}
}
-SEXP nanoarrow_c_array_set_schema(SEXP array_xptr, SEXP schema_xptr) {
+SEXP nanoarrow_c_array_set_schema(SEXP array_xptr, SEXP schema_xptr, SEXP
validate_sexp) {
// Fair game to remove a schema from a pointer
if (schema_xptr == R_NilValue) {
array_xptr_set_schema(array_xptr, R_NilValue);
return R_NilValue;
}
- // If adding a schema, validate the pair
- struct ArrowArray* array = array_from_xptr(array_xptr);
- struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
+ int validate = LOGICAL(validate_sexp)[0];
+ if (validate) {
+ // If adding a schema, validate the schema and the pair
+ struct ArrowArray* array = array_from_xptr(array_xptr);
+ struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
- struct ArrowArrayView array_view;
- struct ArrowError error;
- int result = ArrowArrayViewInitFromSchema(&array_view, schema, &error);
- if (result != NANOARROW_OK) {
- ArrowArrayViewReset(&array_view);
- Rf_error("%s", ArrowErrorMessage(&error));
- }
+ struct ArrowArrayView array_view;
+ struct ArrowError error;
+ int result = ArrowArrayViewInitFromSchema(&array_view, schema, &error);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(&array_view);
+ Rf_error("%s", ArrowErrorMessage(&error));
+ }
- result = ArrowArrayViewSetArray(&array_view, array, &error);
- ArrowArrayViewReset(&array_view);
- if (result != NANOARROW_OK) {
- Rf_error("%s", ArrowErrorMessage(&error));
+ result = ArrowArrayViewSetArray(&array_view, array, &error);
+ ArrowArrayViewReset(&array_view);
+ if (result != NANOARROW_OK) {
+ Rf_error("%s", ArrowErrorMessage(&error));
+ }
}
array_xptr_set_schema(array_xptr, schema_xptr);
@@ -72,9 +75,252 @@ SEXP nanoarrow_c_infer_schema_array(SEXP array_xptr) {
}
}
-// for ArrowArray* that are exported references to an R schema_xptr
+static SEXP borrow_array_xptr(struct ArrowArray* array, SEXP shelter) {
+ SEXP array_xptr = PROTECT(R_MakeExternalPtr(array, R_NilValue, shelter));
+ Rf_setAttrib(array_xptr, R_ClassSymbol, Rf_mkString("nanoarrow_array"));
+ UNPROTECT(1);
+ return array_xptr;
+}
+
+static SEXP borrow_array_view_child(struct ArrowArrayView* array_view, int64_t
i,
+ SEXP shelter) {
+ if (array_view != NULL) {
+ return R_MakeExternalPtr(array_view->children[i], R_NilValue, shelter);
+ } else {
+ return R_NilValue;
+ }
+}
+
+static SEXP borrow_unknown_buffer(struct ArrowArray* array, int64_t i, SEXP
shelter) {
+ SEXP buffer_class = PROTECT(Rf_allocVector(STRSXP, 2));
+ SET_STRING_ELT(buffer_class, 0, Rf_mkChar("nanoarrow_buffer_unknown"));
+ SET_STRING_ELT(buffer_class, 1, Rf_mkChar("nanoarrow_buffer"));
+
+ SEXP buffer = PROTECT(R_MakeExternalPtr((void*)array->buffers[i],
R_NilValue, shelter));
+ Rf_setAttrib(buffer, R_ClassSymbol, buffer_class);
+ UNPROTECT(2);
+ return buffer;
+}
+
+static SEXP length_from_int64(int64_t value) {
+ if (value < 2147483647) {
+ return Rf_ScalarInteger(value);
+ } else {
+ return Rf_ScalarReal(value);
+ }
+}
+
+static SEXP borrow_buffer(struct ArrowArrayView* array_view, int64_t i, SEXP
shelter) {
+ SEXP buffer_class = PROTECT(Rf_allocVector(STRSXP, 2));
+ SET_STRING_ELT(buffer_class, 1, Rf_mkChar("nanoarrow_buffer"));
+
+ const char* class0 = "nanoarrow_buffer_unknown";
+
+ switch (array_view->layout.buffer_type[i]) {
+ case NANOARROW_BUFFER_TYPE_VALIDITY:
+ class0 = "nanoarrow_buffer_validity";
+ break;
+ case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+ switch (array_view->layout.element_size_bits[i]) {
+ case 32:
+ class0 = "nanoarrow_buffer_data_offset32";
+ break;
+ case 64:
+ class0 = "nanoarrow_buffer_data_offset64";
+ break;
+ default:
+ break;
+ }
+ break;
+ case NANOARROW_BUFFER_TYPE_DATA:
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_BOOL:
+ class0 = "nanoarrow_buffer_data_bool";
+ break;
+ case NANOARROW_TYPE_UINT8:
+ class0 = "nanoarrow_buffer_data_uint8";
+ break;
+ case NANOARROW_TYPE_INT8:
+ class0 = "nanoarrow_buffer_data_int8";
+ break;
+ case NANOARROW_TYPE_UINT16:
+ class0 = "nanoarrow_buffer_data_uint16";
+ break;
+ case NANOARROW_TYPE_INT16:
+ class0 = "nanoarrow_buffer_data_int16";
+ break;
+ case NANOARROW_TYPE_UINT32:
+ class0 = "nanoarrow_buffer_data_uint32";
+ break;
+ case NANOARROW_TYPE_INT32:
+ class0 = "nanoarrow_buffer_data_int32";
+ break;
+ case NANOARROW_TYPE_UINT64:
+ class0 = "nanoarrow_buffer_data_uint64";
+ break;
+ case NANOARROW_TYPE_INT64:
+ class0 = "nanoarrow_buffer_data_int64";
+ break;
+ case NANOARROW_TYPE_HALF_FLOAT:
+ class0 = "nanoarrow_buffer_data_half_float";
+ break;
+ case NANOARROW_TYPE_FLOAT:
+ class0 = "nanoarrow_buffer_data_float";
+ break;
+ case NANOARROW_TYPE_DOUBLE:
+ class0 = "nanoarrow_buffer_data_double";
+ break;
+ case NANOARROW_TYPE_DECIMAL128:
+ class0 = "nanoarrow_buffer_data_decimal128";
+ break;
+ case NANOARROW_TYPE_DECIMAL256:
+ class0 = "nanoarrow_buffer_data_decimal256";
+ break;
+ case NANOARROW_TYPE_INTERVAL_MONTHS:
+ class0 = "nanoarrow_buffer_data_int32";
+ break;
+ case NANOARROW_TYPE_INTERVAL_DAY_TIME:
+ class0 = "nanoarrow_buffer_data_int64";
+ break;
+ case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
+ class0 = "nanoarrow_buffer_data_interval_month_day_nano";
+ break;
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_LARGE_STRING:
+ class0 = "nanoarrow_buffer_data_utf8";
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ case NANOARROW_TYPE_BINARY:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ class0 = "nanoarrow_buffer_data_uint8";
+ break;
+ default:
+ break;
+ }
+ break;
+ case NANOARROW_BUFFER_TYPE_TYPE_ID:
+ class0 = "nanoarrow_buffer_type_id";
+ break;
+ case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+ class0 = "nanoarrow_buffer_union_offset";
+ break;
+ default:
+ break;
+ }
+
+ SET_STRING_ELT(buffer_class, 0, Rf_mkChar(class0));
+
+ const char* names[] = {"size_bytes", "element_size_bits", ""};
+ SEXP buffer_info = PROTECT(Rf_mkNamed(VECSXP, names));
+ SET_VECTOR_ELT(buffer_info, 0,
length_from_int64(array_view->buffer_views[i].n_bytes));
+ SET_VECTOR_ELT(buffer_info, 1,
length_from_int64(array_view->layout.element_size_bits[i]));
+
+ SEXP buffer =
PROTECT(R_MakeExternalPtr((void*)array_view->buffer_views[i].data.data,
+ buffer_info, shelter));
+ Rf_setAttrib(buffer, R_ClassSymbol, buffer_class);
+ UNPROTECT(3);
+ return buffer;
+}
+
+static void finalize_array_view_xptr(SEXP array_view_xptr) {
+ struct ArrowArrayView* array_view =
+ (struct ArrowArrayView*)R_ExternalPtrAddr(array_view_xptr);
+ if (array_view != NULL) {
+ ArrowArrayViewReset(array_view);
+ ArrowFree(array_view);
+ }
+}
+
+SEXP nanoarrow_c_array_view(SEXP array_xptr, SEXP schema_xptr) {
+ struct ArrowArray* array = array_from_xptr(array_xptr);
+ struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
+
+ struct ArrowError error;
+ ArrowErrorSet(&error, "");
+
+ struct ArrowArrayView* array_view =
+ (struct ArrowArrayView*)ArrowMalloc(sizeof(struct ArrowArrayView));
+ ArrowArrayViewInit(array_view, NANOARROW_TYPE_UNINITIALIZED);
+ SEXP xptr = PROTECT(R_MakeExternalPtr(array_view, R_NilValue, array_xptr));
+ R_RegisterCFinalizer(xptr, &finalize_array_view_xptr);
+
+ int result = ArrowArrayViewInitFromSchema(array_view, schema, &error);
+ if (result != NANOARROW_OK) {
+ Rf_error("<ArrowArrayViewInitFromSchema> %s", error.message);
+ }
+
+ result = ArrowArrayViewSetArray(array_view, array, &error);
+ if (result != NANOARROW_OK) {
+ Rf_error("<ArrowArrayViewSetArray> %s", error.message);
+ }
+
+ UNPROTECT(1);
+ return xptr;
+}
+
+SEXP nanoarrow_c_array_proxy(SEXP array_xptr, SEXP array_view_xptr, SEXP
recursive_sexp) {
+ struct ArrowArray* array = array_from_xptr(array_xptr);
+ int recursive = LOGICAL(recursive_sexp)[0];
+ struct ArrowArrayView* array_view = NULL;
+ if (array_view_xptr != R_NilValue) {
+ array_view = (struct ArrowArrayView*)R_ExternalPtrAddr(array_view_xptr);
+ }
+
+ const char* names[] = {"length", "null_count", "offset", "buffers",
+ "children", "dictionary", ""};
+ SEXP array_proxy = PROTECT(Rf_mkNamed(VECSXP, names));
+
+ SET_VECTOR_ELT(array_proxy, 0, length_from_int64(array->length));
+ SET_VECTOR_ELT(array_proxy, 1, length_from_int64(array->null_count));
+ SET_VECTOR_ELT(array_proxy, 2, length_from_int64(array->offset));
+
+ if (array->n_buffers > 0) {
+ SEXP buffers = PROTECT(Rf_allocVector(VECSXP, array->n_buffers));
+ for (int64_t i = 0; i < array->n_buffers; i++) {
+ if (array_view != NULL) {
+ SET_VECTOR_ELT(buffers, i, borrow_buffer(array_view, i, array_xptr));
+ } else {
+ SET_VECTOR_ELT(buffers, i, borrow_unknown_buffer(array, i,
array_xptr));
+ }
+ }
+
+ SET_VECTOR_ELT(array_proxy, 3, buffers);
+ UNPROTECT(1);
+ }
+
+ if (array->n_children > 0) {
+ SEXP children = PROTECT(Rf_allocVector(VECSXP, array->n_children));
+ for (int64_t i = 0; i < array->n_children; i++) {
+ SEXP child = PROTECT(borrow_array_xptr(array->children[i], array_xptr));
+ if (recursive) {
+ SEXP array_view_child =
+ PROTECT(borrow_array_view_child(array_view, i, array_view_xptr));
+ SET_VECTOR_ELT(children, i,
+ nanoarrow_c_array_proxy(child, array_view_child,
recursive_sexp));
+ UNPROTECT(1);
+ } else {
+ SET_VECTOR_ELT(children, i, child);
+ }
+ UNPROTECT(1);
+ }
+
+ SET_VECTOR_ELT(array_proxy, 4, children);
+ UNPROTECT(1);
+ }
+
+ // The recursive-ness of the dictionary is handled in R because this is not
part
+ // of the struct ArrowArrayView.
+ if (array->dictionary != NULL) {
+ SET_VECTOR_ELT(array_proxy, 5, borrow_array_xptr(array->dictionary,
array_xptr));
+ }
+
+ UNPROTECT(1);
+ return array_proxy;
+}
+
+// for ArrowArray* that are exported references to an R array_xptr
void finalize_exported_array(struct ArrowArray* array) {
- SEXP array_xptr = (SEXP) array->private_data;
+ SEXP array_xptr = (SEXP)array->private_data;
R_ReleaseObject(array_xptr);
// TODO: properly relocate child arrays
diff --git a/r/src/array_stream.c b/r/src/array_stream.c
index 177ff36..7182fe2 100644
--- a/r/src/array_stream.c
+++ b/r/src/array_stream.c
@@ -20,6 +20,8 @@
#include <Rinternals.h>
#include "array_stream.h"
+#include "schema.h"
+#include "array.h"
#include "nanoarrow.h"
void finalize_array_stream_xptr(SEXP array_stream_xptr) {
@@ -33,3 +35,41 @@ void finalize_array_stream_xptr(SEXP array_stream_xptr) {
ArrowFree(array_stream);
}
}
+
+SEXP nanoarrow_c_array_stream_get_schema(SEXP array_stream_xptr) {
+ struct ArrowArrayStream* array_stream =
array_stream_from_xptr(array_stream_xptr);
+
+ SEXP schema_xptr = PROTECT(schema_owning_xptr());
+ struct ArrowSchema* schema = (struct
ArrowSchema*)R_ExternalPtrAddr(schema_xptr);
+ int result = array_stream->get_schema(array_stream, schema);
+
+ if (result != 0) {
+ const char* last_error = array_stream->get_last_error(array_stream);
+ if (last_error == NULL) {
+ last_error = "";
+ }
+ Rf_error("array_stream->get_schema(): [%d] %s", result, last_error);
+ }
+
+ UNPROTECT(1);
+ return schema_xptr;
+}
+
+SEXP nanoarrow_c_array_stream_get_next(SEXP array_stream_xptr) {
+ struct ArrowArrayStream* array_stream =
array_stream_from_xptr(array_stream_xptr);
+
+ SEXP array_xptr = PROTECT(array_owning_xptr());
+ struct ArrowArray* array = (struct ArrowArray*)R_ExternalPtrAddr(array_xptr);
+ int result = array_stream->get_next(array_stream, array);
+
+ if (result != 0) {
+ const char* last_error = array_stream->get_last_error(array_stream);
+ if (last_error == NULL) {
+ last_error = "";
+ }
+ Rf_error("array_stream->get_next(): [%d] %s", result, last_error);
+ }
+
+ UNPROTECT(1);
+ return array_xptr;
+}
diff --git a/r/src/array_stream.c b/r/src/buffer.c
similarity index 52%
copy from r/src/array_stream.c
copy to r/src/buffer.c
index 177ff36..12f83b0 100644
--- a/r/src/array_stream.c
+++ b/r/src/buffer.c
@@ -19,17 +19,30 @@
#include <R.h>
#include <Rinternals.h>
-#include "array_stream.h"
-#include "nanoarrow.h"
+#include <string.h>
-void finalize_array_stream_xptr(SEXP array_stream_xptr) {
- struct ArrowArrayStream* array_stream =
- (struct ArrowArrayStream*)R_ExternalPtrAddr(array_stream_xptr);
- if (array_stream != NULL && array_stream->release != NULL) {
- array_stream->release(array_stream);
+SEXP nanoarrow_c_buffer_info(SEXP buffer_xptr) {
+ return R_ExternalPtrTag(buffer_xptr);
+}
+
+SEXP nanoarrow_c_buffer_as_raw(SEXP buffer_xptr) {
+ SEXP info = R_ExternalPtrTag(buffer_xptr);
+ if (info == R_NilValue) {
+ Rf_error("Can't as.raw() a nanoarrow_buffer with unknown size");
}
- if (array_stream != NULL) {
- ArrowFree(array_stream);
+ SEXP size_bytes_sexp = VECTOR_ELT(info, 0);
+ R_xlen_t size_bytes = 0;
+ if (TYPEOF(size_bytes_sexp) == INTSXP) {
+ size_bytes = INTEGER(size_bytes_sexp)[0];
+ } else if (TYPEOF(size_bytes_sexp) == REALSXP) {
+ size_bytes = REAL(size_bytes_sexp)[0];
+ } else {
+ Rf_error("Unknown object type for nanoarrow_buffer size_bytes");
}
+
+ SEXP result = PROTECT(Rf_allocVector(RAWSXP, size_bytes));
+ memcpy(RAW(result), R_ExternalPtrAddr(buffer_xptr), size_bytes);
+ UNPROTECT(1);
+ return result;
}
diff --git a/r/src/init.c b/r/src/init.c
index 117e600..107853d 100644
--- a/r/src/init.c
+++ b/r/src/init.c
@@ -20,8 +20,14 @@
#include <Rinternals.h>
/* generated by tools/make-callentries.R */
-extern SEXP nanoarrow_c_array_set_schema(SEXP array_xptr, SEXP schema_xptr);
+extern SEXP nanoarrow_c_array_stream_get_schema(SEXP array_stream_xptr);
+extern SEXP nanoarrow_c_array_stream_get_next(SEXP array_stream_xptr);
+extern SEXP nanoarrow_c_array_set_schema(SEXP array_xptr, SEXP schema_xptr,
SEXP validate_sexp);
extern SEXP nanoarrow_c_infer_schema_array(SEXP array_xptr);
+extern SEXP nanoarrow_c_array_view(SEXP array_xptr, SEXP schema_xptr);
+extern SEXP nanoarrow_c_array_proxy(SEXP array_xptr, SEXP array_view_xptr,
SEXP recursive_sexp);
+extern SEXP nanoarrow_c_buffer_info(SEXP buffer_xptr);
+extern SEXP nanoarrow_c_buffer_as_raw(SEXP buffer_xptr);
extern SEXP nanoarrow_c_build_id();
extern SEXP nanoarrow_c_build_id_runtime();
extern SEXP nanoarrow_c_allocate_schema();
@@ -30,15 +36,23 @@ extern SEXP nanoarrow_c_allocate_array_stream();
extern SEXP nanoarrow_c_pointer(SEXP obj_sexp);
extern SEXP nanoarrow_c_pointer_addr_dbl(SEXP ptr);
extern SEXP nanoarrow_c_pointer_addr_chr(SEXP ptr);
+extern SEXP nanoarrow_c_pointer_addr_pretty(SEXP ptr);
extern SEXP nanoarrow_c_pointer_is_valid(SEXP ptr);
extern SEXP nanoarrow_c_pointer_release(SEXP ptr);
extern SEXP nanoarrow_c_pointer_move(SEXP ptr_src, SEXP ptr_dst);
extern SEXP nanoarrow_c_export_schema(SEXP schema_xptr, SEXP ptr_dst);
extern SEXP nanoarrow_c_export_array(SEXP array_xptr, SEXP ptr_dst);
+extern SEXP nanoarrow_c_schema_to_list(SEXP schema_xptr);
static const R_CallMethodDef CallEntries[] = {
- {"nanoarrow_c_array_set_schema", (DL_FUNC)&nanoarrow_c_array_set_schema,
2},
+ {"nanoarrow_c_array_stream_get_schema",
(DL_FUNC)&nanoarrow_c_array_stream_get_schema, 1},
+ {"nanoarrow_c_array_stream_get_next",
(DL_FUNC)&nanoarrow_c_array_stream_get_next, 1},
+ {"nanoarrow_c_array_set_schema", (DL_FUNC)&nanoarrow_c_array_set_schema,
3},
{"nanoarrow_c_infer_schema_array",
(DL_FUNC)&nanoarrow_c_infer_schema_array, 1},
+ {"nanoarrow_c_array_view", (DL_FUNC)&nanoarrow_c_array_view, 2},
+ {"nanoarrow_c_array_proxy", (DL_FUNC)&nanoarrow_c_array_proxy, 3},
+ {"nanoarrow_c_buffer_info", (DL_FUNC)&nanoarrow_c_buffer_info, 1},
+ {"nanoarrow_c_buffer_as_raw", (DL_FUNC)&nanoarrow_c_buffer_as_raw, 1},
{"nanoarrow_c_build_id", (DL_FUNC)&nanoarrow_c_build_id, 0},
{"nanoarrow_c_build_id_runtime", (DL_FUNC)&nanoarrow_c_build_id_runtime,
0},
{"nanoarrow_c_allocate_schema", (DL_FUNC)&nanoarrow_c_allocate_schema, 0},
@@ -47,11 +61,13 @@ static const R_CallMethodDef CallEntries[] = {
{"nanoarrow_c_pointer", (DL_FUNC)&nanoarrow_c_pointer, 1},
{"nanoarrow_c_pointer_addr_dbl", (DL_FUNC)&nanoarrow_c_pointer_addr_dbl,
1},
{"nanoarrow_c_pointer_addr_chr", (DL_FUNC)&nanoarrow_c_pointer_addr_chr,
1},
+ {"nanoarrow_c_pointer_addr_pretty",
(DL_FUNC)&nanoarrow_c_pointer_addr_pretty, 1},
{"nanoarrow_c_pointer_is_valid", (DL_FUNC)&nanoarrow_c_pointer_is_valid,
1},
{"nanoarrow_c_pointer_release", (DL_FUNC)&nanoarrow_c_pointer_release, 1},
{"nanoarrow_c_pointer_move", (DL_FUNC)&nanoarrow_c_pointer_move, 2},
{"nanoarrow_c_export_schema", (DL_FUNC)&nanoarrow_c_export_schema, 2},
{"nanoarrow_c_export_array", (DL_FUNC)&nanoarrow_c_export_array, 2},
+ {"nanoarrow_c_schema_to_list", (DL_FUNC)&nanoarrow_c_schema_to_list, 1},
{NULL, NULL, 0}};
/* end generated by tools/make-callentries.R */
diff --git a/r/src/pointers.c b/r/src/pointers.c
index e351ff5..7441322 100644
--- a/r/src/pointers.c
+++ b/r/src/pointers.c
@@ -66,6 +66,13 @@ SEXP nanoarrow_c_pointer_addr_chr(SEXP ptr) {
return Rf_mkString(addr_chars);
}
+SEXP nanoarrow_c_pointer_addr_pretty(SEXP ptr) {
+ char addr_chars[100];
+ memset(addr_chars, 0, 100);
+ snprintf(addr_chars, sizeof(addr_chars), "%p",
R_ExternalPtrAddr(nanoarrow_c_pointer(ptr)));
+ return Rf_mkString(addr_chars);
+}
+
SEXP nanoarrow_c_pointer_is_valid(SEXP ptr) {
if (Rf_inherits(ptr, "nanoarrow_schema")) {
struct ArrowSchema* obj = (struct ArrowSchema*)R_ExternalPtrAddr(ptr);
diff --git a/r/src/schema.c b/r/src/schema.c
index 90c6269..58cf995 100644
--- a/r/src/schema.c
+++ b/r/src/schema.c
@@ -32,3 +32,94 @@ void finalize_schema_xptr(SEXP schema_xptr) {
ArrowFree(schema);
}
}
+
+static SEXP schema_metadata_to_list(const char* metadata) {
+ if (metadata == NULL) {
+ return R_NilValue;
+ }
+
+ struct ArrowMetadataReader reader;
+ ArrowMetadataReaderInit(&reader, metadata);
+ SEXP names = PROTECT(Rf_allocVector(STRSXP, reader.remaining_keys));
+ SEXP values = PROTECT(Rf_allocVector(VECSXP, reader.remaining_keys));
+
+ struct ArrowStringView key;
+ struct ArrowStringView value;
+ R_xlen_t i = 0;
+ while (reader.remaining_keys > 0) {
+ ArrowMetadataReaderRead(&reader, &key, &value);
+ SET_STRING_ELT(names, i, Rf_mkCharLenCE(key.data, key.n_bytes, CE_UTF8));
+ SEXP value_raw = PROTECT(Rf_allocVector(RAWSXP, value.n_bytes));
+ memcpy(RAW(value_raw), value.data, value.n_bytes);
+ SET_VECTOR_ELT(values, i, value_raw);
+ UNPROTECT(1);
+ i++;
+ }
+
+ Rf_setAttrib(values, R_NamesSymbol, names);
+ UNPROTECT(2);
+ return values;
+}
+
+static SEXP borrow_schema_xptr(struct ArrowSchema* schema, SEXP shelter) {
+ SEXP schema_xptr = PROTECT(R_MakeExternalPtr(schema, R_NilValue, shelter));
+ Rf_setAttrib(schema_xptr, R_ClassSymbol, Rf_mkString("nanoarrow_schema"));
+ UNPROTECT(1);
+ return schema_xptr;
+}
+
+SEXP nanoarrow_c_schema_to_list(SEXP schema_xptr) {
+ struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
+
+ const char* names[] = {"format", "name", "metadata", "flags",
+ "children", "dictionary", ""};
+ SEXP result = PROTECT(Rf_mkNamed(VECSXP, names));
+
+ SEXP format_sexp = PROTECT(Rf_allocVector(STRSXP, 1));
+ SET_STRING_ELT(format_sexp, 0, Rf_mkCharCE(schema->format, CE_UTF8));
+ SET_VECTOR_ELT(result, 0, format_sexp);
+ UNPROTECT(1);
+
+ if (schema->name != NULL) {
+ SEXP name_sexp = PROTECT(Rf_allocVector(STRSXP, 1));
+ SET_STRING_ELT(name_sexp, 0, Rf_mkCharCE(schema->name, CE_UTF8));
+ SET_VECTOR_ELT(result, 1, name_sexp);
+ UNPROTECT(1);
+ } else {
+ SET_VECTOR_ELT(result, 1, R_NilValue);
+ }
+
+ SET_VECTOR_ELT(result, 2, schema_metadata_to_list(schema->metadata));
+ SET_VECTOR_ELT(result, 3, Rf_ScalarInteger(schema->flags));
+
+ if (schema->n_children > 0) {
+ SEXP children_sexp = PROTECT(Rf_allocVector(VECSXP, schema->n_children));
+ SEXP children_names_sexp = PROTECT(Rf_allocVector(STRSXP,
schema->n_children));
+ for (R_xlen_t i = 0; i < schema->n_children; i++) {
+ SEXP child_xptr = PROTECT(borrow_schema_xptr(schema->children[i],
schema_xptr));
+ SET_VECTOR_ELT(children_sexp, i, child_xptr);
+ if (schema->children[i]->name != NULL) {
+ SET_STRING_ELT(children_names_sexp, i,
Rf_mkCharCE(schema->children[i]->name, CE_UTF8));
+ } else {
+ SET_STRING_ELT(children_names_sexp, i, Rf_mkCharCE("", CE_UTF8));
+ }
+ UNPROTECT(1);
+ }
+ Rf_setAttrib(children_sexp, R_NamesSymbol, children_names_sexp);
+ SET_VECTOR_ELT(result, 4, children_sexp);
+ UNPROTECT(2);
+ } else {
+ SET_VECTOR_ELT(result, 4, R_NilValue);
+ }
+
+ if (schema->dictionary != NULL) {
+ SEXP dictionary_xptr = PROTECT(borrow_schema_xptr(schema->dictionary,
schema_xptr));
+ SET_VECTOR_ELT(result, 5, dictionary_xptr);
+ UNPROTECT(1);
+ } else {
+ SET_VECTOR_ELT(result, 5, R_NilValue);
+ }
+
+ UNPROTECT(1);
+ return result;
+}
diff --git a/r/src/schema.h b/r/src/schema.h
index 62d49f6..f0fc406 100644
--- a/r/src/schema.h
+++ b/r/src/schema.h
@@ -27,7 +27,7 @@ void finalize_schema_xptr(SEXP schema_xptr);
static inline struct ArrowSchema* schema_from_xptr(SEXP schema_xptr) {
if (!Rf_inherits(schema_xptr, "nanoarrow_schema")) {
- Rf_error("`schema` argument that is not");
+ Rf_error("`schema` argument that does not inherit from
'nanoarrow_schema'");
}
struct ArrowSchema* schema = (struct
ArrowSchema*)R_ExternalPtrAddr(schema_xptr);
@@ -53,6 +53,10 @@ static inline struct ArrowSchema*
nullable_schema_from_xptr(SEXP schema_xptr) {
static inline SEXP schema_owning_xptr() {
struct ArrowSchema* schema =
(struct ArrowSchema*)ArrowMalloc(sizeof(struct ArrowSchema));
+ if (schema == NULL) {
+ Rf_error("Failed to allocate ArrowSchema");
+ }
+
schema->release = NULL;
SEXP schema_xptr = PROTECT(R_MakeExternalPtr(schema, R_NilValue,
R_NilValue));
diff --git a/r/tests/testthat/test-array-stream.R
b/r/tests/testthat/test-array-stream.R
index a24080c..f106d35 100644
--- a/r/tests/testthat/test-array-stream.R
+++ b/r/tests/testthat/test-array-stream.R
@@ -15,6 +15,20 @@
# specific language governing permissions and limitations
# under the License.
+test_that("nanoarrow_array_stream format, print, and str methods work", {
+ array_stream <- as_nanoarrow_array_stream(data.frame(x = 1:10))
+ expect_identical(format(array_stream), "<nanoarrow_array_stream[+s]>")
+ expect_output(expect_identical(str(array_stream), array_stream),
"nanoarrow_array_stream")
+ expect_output(expect_identical(print(array_stream), array_stream),
"nanoarrow_array_stream")
+})
+
+test_that("released nanoarrow_array_stream format, print, and str methods
work", {
+ array_stream <- nanoarrow_allocate_array_stream()
+ expect_identical(format(array_stream), "<nanoarrow_array_stream[invalid
pointer]>")
+ expect_output(expect_identical(str(array_stream), array_stream),
"nanoarrow_array_stream")
+ expect_output(expect_identical(print(array_stream), array_stream),
"nanoarrow_array_stream")
+})
+
test_that("as_nanoarrow_array_stream() works for nanoarow_array_stream", {
stream <- as_nanoarrow_array_stream(data.frame(x = 1:5))
expect_identical(as_nanoarrow_array_stream(stream), stream)
@@ -25,3 +39,55 @@ test_that("as_nanoarrow_array_stream() works for
nanoarow_array_stream", {
"is.null\\(schema\\) is not TRUE"
)
})
+
+test_that("infer_nanoarrow_schema() is implemented for streams", {
+ stream <- as_nanoarrow_array_stream(data.frame(x = 1:5))
+ schema <- infer_nanoarrow_schema(stream)
+ expect_identical(schema$children$x$format, "i")
+})
+
+test_that("nanoarrow_array_stream list interface works", {
+ stream <- as_nanoarrow_array_stream(data.frame(x = 1:5))
+ expect_identical(length(stream), 3L)
+ expect_identical(names(stream), c("get_schema", "get_next", "release"))
+ expect_identical(formals(stream[["get_schema"]]), formals(stream$get_schema))
+ expect_identical(formals(stream[["get_next"]]), formals(stream$get_next))
+ expect_identical(formals(stream[["release"]]), formals(stream$release))
+ expect_null(stream[["this key does not exist"]])
+})
+
+test_that("nanoarrow_array_stream can get_schema() and get_next()", {
+ stream <- as_nanoarrow_array_stream(data.frame(x = 1:5))
+ expect_identical(stream$get_schema()$format, "+s")
+ expect_identical(as.data.frame(stream$get_next()), data.frame(x = 1:5))
+ expect_null(stream$get_next())
+})
+
+test_that("nanoarrow_array_stream can release()", {
+ stream <- as_nanoarrow_array_stream(data.frame(x = 1:5))
+ expect_true(nanoarrow_pointer_is_valid(stream))
+ stream$release()
+ expect_false(nanoarrow_pointer_is_valid(stream))
+})
+
+test_that("nanoarrow_array_stream can validate or not on get_next()", {
+ stream <- as_nanoarrow_array_stream(data.frame(x = 1:5))
+ expect_error(
+ stream$get_next(schema = infer_nanoarrow_schema(integer())),
+ "Expected array with 2 buffer"
+ )
+
+ stream <- as_nanoarrow_array_stream(data.frame(x = 1:5))
+ expect_silent(
+ stream$get_next(
+ schema = infer_nanoarrow_schema(integer()),
+ validate = FALSE
+ )
+ )
+})
+
+test_that("nanoarrow_array_stream get_next() with schema = NULL", {
+ stream <- as_nanoarrow_array_stream(data.frame(x = 1:5))
+ array <- stream$get_next(schema = NULL)
+ expect_error(infer_nanoarrow_schema(array), "has no associated schema")
+})
diff --git a/r/tests/testthat/test-array.R b/r/tests/testthat/test-array.R
index 69d83aa..48aaef0 100644
--- a/r/tests/testthat/test-array.R
+++ b/r/tests/testthat/test-array.R
@@ -15,6 +15,28 @@
# specific language governing permissions and limitations
# under the License.
+test_that("nanoarrow_array format, print, and str methods work", {
+ array <- as_nanoarrow_array(1:10)
+ expect_identical(format(array), "<nanoarrow_array i[10]>")
+ expect_output(expect_identical(str(array), array), "nanoarrow_array")
+ expect_output(expect_identical(print(array), array), "nanoarrow_array")
+})
+
+test_that("released nanoarrow_array format, print, and str methods work", {
+ array <- nanoarrow_allocate_array()
+ expect_identical(format(array), "<nanoarrow_array[invalid pointer]>")
+ expect_output(expect_identical(str(array), array), "nanoarrow_array")
+ expect_output(expect_identical(print(array), array), "nanoarrow_array")
+})
+
+test_that("schemaless nanoarrow_array format, print, and str methods work", {
+ array <- as_nanoarrow_array(1:10)
+ nanoarrow_array_set_schema(array, NULL)
+ expect_identical(format(array), "<nanoarrow_array <unknown schema>[10]>")
+ expect_output(expect_identical(str(array), array), "nanoarrow_array")
+ expect_output(expect_identical(print(array), array), "nanoarrow_array")
+})
+
test_that("as_nanoarrow_array() / from_nanoarrow_array() default method
works", {
array <- as_nanoarrow_array(1:10)
expect_identical(from_nanoarrow_array(array), 1:10)
@@ -48,3 +70,151 @@ test_that("as.vector() and as.data.frame() work for array",
{
struct_array <- as_nanoarrow_array(data.frame(a = 1:10))
expect_identical(as.data.frame(struct_array), data.frame(a = 1:10))
})
+
+test_that("schemaless array list interface works for non-nested types", {
+ array <- as_nanoarrow_array(1:10)
+ nanoarrow_array_set_schema(array, NULL)
+
+ expect_identical(length(array), 6L)
+ expect_identical(
+ names(array),
+ c("length", "null_count", "offset", "buffers", "children", "dictionary")
+ )
+ expect_identical(array$length, 10L)
+ expect_identical(array$null_count, 0L)
+ expect_identical(array$offset, 0L)
+ expect_length(array$buffers, 2L)
+ expect_s3_class(array$buffers[[1]], "nanoarrow_buffer")
+ expect_s3_class(array$buffers[[2]], "nanoarrow_buffer")
+ expect_null(array$children)
+ expect_null(array$dictionary)
+})
+
+test_that("schemaless array list interface works for nested types", {
+ array <- as_nanoarrow_array(data.frame(a = 1L, b = "two"))
+ nanoarrow_array_set_schema(array, NULL)
+
+ expect_length(array$children, 2L)
+ expect_length(array$children[[1]]$buffers, 2L)
+ expect_length(array$children[[2]]$buffers, 3L)
+ expect_s3_class(array$children[[1]], "nanoarrow_array")
+ expect_s3_class(array$children[[2]], "nanoarrow_array")
+
+ info_recursive <- nanoarrow_array_proxy(array, recursive = TRUE)
+ expect_type(info_recursive$children[[1]], "list")
+ expect_length(info_recursive$children[[1]]$buffers, 2L)
+})
+
+test_that("schemaless array list interface works for dictionary types", {
+ array <- as_nanoarrow_array(factor(letters[1:5]))
+ nanoarrow_array_set_schema(array, NULL)
+
+ expect_length(array$buffers, 2L)
+ expect_length(array$dictionary$buffers, 3L)
+ expect_s3_class(array$dictionary, "nanoarrow_array")
+
+ info_recursive <- nanoarrow_array_proxy_safe(array, recursive = TRUE)
+ expect_type(info_recursive$dictionary, "list")
+ expect_length(info_recursive$dictionary$buffers, 3L)
+})
+
+test_that("array list interface classes data buffers for relevant types", {
+ types <- list(
+ int8 = arrow::int8(),
+ uint8 = arrow::uint8(),
+ int16 = arrow::int16(),
+ uint16 = arrow::uint16(),
+ int32 = arrow::int32(),
+ uint32 = arrow::uint32(),
+ int64 = arrow::int64(),
+ uint64 = arrow::uint64(),
+ half_float = arrow::float16(),
+ float = arrow::float32(),
+ double = arrow::float64(),
+ decimal128 = arrow::decimal128(2, 3),
+ decimal256 = arrow::decimal256(2, 3)
+ )
+
+ arrays <- lapply(types, function(x) arrow::concat_arrays(type = x))
+
+ for (nm in names(arrays)) {
+ expect_s3_class(
+ as_nanoarrow_array(arrays[[!!nm]])$buffers[[1]],
+ "nanoarrow_buffer_validity"
+ )
+ expect_s3_class(
+ as_nanoarrow_array(arrays[[!!nm]])$buffers[[2]],
+ paste0("nanoarrow_buffer_data_", nm)
+ )
+ }
+})
+
+test_that("array list interface classes offset buffers for relevant types", {
+ arr_string <- arrow::concat_arrays(type = arrow::utf8())
+ expect_s3_class(
+ as_nanoarrow_array(arr_string)$buffers[[2]],
+ "nanoarrow_buffer_data_offset32"
+ )
+ expect_s3_class(
+ as_nanoarrow_array(arr_string)$buffers[[3]],
+ "nanoarrow_buffer_data_utf8"
+ )
+
+ arr_large_string <- arrow::concat_arrays(type = arrow::large_utf8())
+ expect_s3_class(
+ as_nanoarrow_array(arr_large_string)$buffers[[2]],
+ "nanoarrow_buffer_data_offset64"
+ )
+ expect_s3_class(
+ as_nanoarrow_array(arr_large_string)$buffers[[3]],
+ "nanoarrow_buffer_data_utf8"
+ )
+
+ arr_binary <- arrow::concat_arrays(type = arrow::binary())
+ expect_s3_class(
+ as_nanoarrow_array(arr_binary)$buffers[[2]],
+ "nanoarrow_buffer_data_offset32"
+ )
+ expect_s3_class(
+ as_nanoarrow_array(arr_binary)$buffers[[3]],
+ "nanoarrow_buffer_data_uint8"
+ )
+
+ arr_large_binary <- arrow::concat_arrays(type = arrow::large_binary())
+ expect_s3_class(
+ as_nanoarrow_array(arr_large_binary)$buffers[[2]],
+ "nanoarrow_buffer_data_offset64"
+ )
+ expect_s3_class(
+ as_nanoarrow_array(arr_large_binary)$buffers[[3]],
+ "nanoarrow_buffer_data_uint8"
+ )
+})
+
+test_that("array list interface works for nested types", {
+ array <- as_nanoarrow_array(data.frame(a = 1L, b = "two"))
+
+ expect_named(array$children, c("a", "b"))
+ expect_s3_class(array$children[[1]], "nanoarrow_array")
+ expect_s3_class(infer_nanoarrow_schema(array$children[[1]]),
"nanoarrow_schema")
+
+ expect_s3_class(array$buffers[[1]], "nanoarrow_buffer_validity")
+ expect_s3_class(array$children$a$buffers[[2]], "nanoarrow_buffer_data_int32")
+ expect_s3_class(array$children$b$buffers[[2]],
"nanoarrow_buffer_data_offset32")
+
+ info_recursive <- nanoarrow_array_proxy_safe(array, recursive = TRUE)
+ expect_type(info_recursive$children, "list")
+ expect_s3_class(info_recursive$children$a$buffers[[2]],
"nanoarrow_buffer_data_int32")
+ expect_s3_class(info_recursive$children$b$buffers[[2]],
"nanoarrow_buffer_data_offset32")
+})
+
+test_that("array list interface works for dictionary types", {
+ array <- as_nanoarrow_array(factor(letters[1:5]))
+
+ expect_s3_class(array$buffers[[2]], "nanoarrow_buffer_data_int8")
+ expect_s3_class(array$dictionary$buffers[[2]],
"nanoarrow_buffer_data_offset32")
+
+ info_recursive <- nanoarrow_array_proxy_safe(array, recursive = TRUE)
+ expect_type(info_recursive$dictionary, "list")
+ expect_s3_class(info_recursive$dictionary$buffers[[2]],
"nanoarrow_buffer_data_offset32")
+})
diff --git a/r/tests/testthat/test-pointers.R b/r/tests/testthat/test-pointers.R
index a66b97b..f7a9032 100644
--- a/r/tests/testthat/test-pointers.R
+++ b/r/tests/testthat/test-pointers.R
@@ -197,4 +197,5 @@ test_that("nanoarrow_pointer_export() errors for unknown
object", {
test_that("pointer address getters work", {
schema <- infer_nanoarrow_schema(integer())
expect_match(nanoarrow_pointer_addr_chr(schema), "^[0-9]+$")
+ expect_match(nanoarrow_pointer_addr_pretty(schema), "^(0x)?[0-9a-fA-F]+$")
})
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index 00f4087..19db7df 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -15,6 +15,20 @@
# specific language governing permissions and limitations
# under the License.
+test_that("nanoarrow_schema format, print, and str methods work", {
+ schema <- infer_nanoarrow_schema(1:10)
+ expect_identical(format(schema), "<nanoarrow_schema[i]>")
+ expect_output(expect_identical(str(schema), schema), "nanoarrow_schema")
+ expect_output(expect_identical(print(schema), schema), "nanoarrow_schema")
+})
+
+test_that("nanoarrow_schema format, print, and str methods work for invalid
pointers", {
+ schema <- nanoarrow_allocate_schema()
+ expect_identical(format(schema), "<nanoarrow_schema[invalid pointer]>")
+ expect_output(expect_identical(str(schema), schema), "nanoarrow_schema")
+ expect_output(expect_identical(print(schema), schema), "nanoarrow_schema")
+})
+
test_that("as_nanoarrow_schema() works for nanoarrow_schema", {
schema <- infer_nanoarrow_schema(1:10)
expect_identical(as_nanoarrow_schema(schema), schema)
@@ -24,3 +38,58 @@ test_that("infer_nanoarrow_schema() default method works", {
schema <- infer_nanoarrow_schema(1:10)
expect_true(arrow::as_data_type(schema)$Equals(arrow::int32()))
})
+
+test_that("schema list interface works for non-nested types", {
+ schema <- infer_nanoarrow_schema(1:10)
+ expect_identical(length(schema), 6L)
+ expect_identical(
+ names(schema),
+ c("format", "name", "metadata", "flags", "children", "dictionary")
+ )
+ expect_identical(schema$format, "i")
+ expect_identical(schema$name, "")
+ expect_identical(schema$metadata, list())
+ expect_identical(schema$flags, 2L)
+ expect_identical(schema$children, NULL)
+ expect_identical(schema$dictionary, NULL)
+})
+
+test_that("schema list interface works for nested types", {
+ schema <- infer_nanoarrow_schema(data.frame(a = 1L, b = "two"))
+
+ expect_identical(schema$format, "+s")
+ expect_named(schema$children, c("a", "b"))
+ expect_identical(schema$children$a, schema$children[[1]])
+ expect_identical(schema$children$a$format, "i")
+ expect_identical(schema$children$b$format, "u")
+ expect_s3_class(schema$children$a, "nanoarrow_schema")
+ expect_s3_class(schema$children$b, "nanoarrow_schema")
+
+ info_recursive <- nanoarrow_schema_proxy(schema, recursive = TRUE)
+ expect_type(info_recursive$children$a, "list")
+ expect_identical(info_recursive$children$a$format, "i")
+})
+
+test_that("schema list interface works for dictionary types", {
+ schema <- infer_nanoarrow_schema(factor(letters[1:5]))
+
+ expect_identical(schema$format, "c")
+ expect_identical(schema$dictionary$format, "u")
+ expect_s3_class(schema$dictionary, "nanoarrow_schema")
+
+ info_recursive <- nanoarrow_schema_proxy(schema, recursive = TRUE)
+ expect_type(info_recursive$dictionary, "list")
+ expect_identical(info_recursive$dictionary$format, "u")
+})
+
+test_that("schema list interface works with metadata", {
+ schema <- infer_nanoarrow_schema(as.POSIXlt("2020-01-01", tz = "UTC"))
+ expect_identical(
+ schema$metadata[["ARROW:extension:name"]],
+ "arrow.r.vctrs"
+ )
+ expect_s3_class(
+ unserialize(schema$metadata[["ARROW:extension:metadata"]]),
+ "POSIXlt"
+ )
+})