This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new a035a1e feat(r): Add `as_nanoarrow_array()` implementation that does
not fall back on `arrow::as_arrow_array()` everywhere (#108)
a035a1e is described below
commit a035a1e136a338387c9f55d3969a2305ccc26b87
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Feb 15 17:05:07 2023 -0400
feat(r): Add `as_nanoarrow_array()` implementation that does not fall back
on `arrow::as_arrow_array()` everywhere (#108)
---
dev/release/rat_exclude_files.txt | 1 +
r/DESCRIPTION | 3 +-
r/NAMESPACE | 9 +
r/R/array-stream.R | 27 +-
r/R/array.R | 22 +-
r/R/as-array.R | 227 +++++++++++
r/R/convert-array.R | 7 +
r/R/schema.R | 21 +-
r/R/type.R | 6 +-
r/R/util.R | 24 ++
r/man/na_type.Rd | 6 +-
r/src/array.c | 4 +-
r/src/as_array.c | 571 +++++++++++++++++++++++++++
r/src/buffer.h | 15 +-
r/src/convert.c | 13 +-
r/src/convert_array_stream.c | 1 +
r/src/init.c | 2 +
r/src/materialize.c | 15 +
r/src/materialize.h | 4 +
r/tests/testthat/_snaps/array-stream.md | 4 +
r/tests/testthat/_snaps/as-array.md | 12 +
r/tests/testthat/test-altrep.R | 11 +-
r/tests/testthat/test-array-stream.R | 13 +-
r/tests/testthat/test-array.R | 42 +-
r/tests/testthat/test-as-array.R | 414 +++++++++++++++++++
r/tests/testthat/test-convert-array-stream.R | 68 ++--
r/tests/testthat/test-convert-array.R | 122 ++++--
r/tests/testthat/test-infer-ptype.R | 10 +-
r/tests/testthat/test-pointers.R | 10 +-
r/tests/testthat/test-schema.R | 4 +-
r/tests/testthat/test-type.R | 6 +-
r/tests/testthat/test-util.R | 20 +
32 files changed, 1541 insertions(+), 173 deletions(-)
diff --git a/dev/release/rat_exclude_files.txt
b/dev/release/rat_exclude_files.txt
index 6adceb9..c42d956 100644
--- a/dev/release/rat_exclude_files.txt
+++ b/dev/release/rat_exclude_files.txt
@@ -8,3 +8,4 @@ r/NAMESPACE
r/.Rbuildignore
r/nanoarrow.Rproj
*.Rd
+r/tests/testthat/_snaps/*.md
diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index ab34f81..69b809a 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -28,5 +28,6 @@ Suggests:
rlang,
testthat (>= 3.0.0),
tibble,
- vctrs
+ vctrs,
+ withr
Config/testthat/edition: 3
diff --git a/r/NAMESPACE b/r/NAMESPACE
index 1cc531b..71e5324 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -17,11 +17,19 @@ S3method(as.vector,nanoarrow_array)
S3method(as.vector,nanoarrow_array_stream)
S3method(as_nanoarrow_array,Array)
S3method(as_nanoarrow_array,ChunkedArray)
+S3method(as_nanoarrow_array,Date)
+S3method(as_nanoarrow_array,POSIXct)
+S3method(as_nanoarrow_array,POSIXlt)
S3method(as_nanoarrow_array,RecordBatch)
S3method(as_nanoarrow_array,Table)
+S3method(as_nanoarrow_array,blob)
S3method(as_nanoarrow_array,default)
+S3method(as_nanoarrow_array,difftime)
+S3method(as_nanoarrow_array,factor)
S3method(as_nanoarrow_array,nanoarrow_array)
+S3method(as_nanoarrow_array,vctrs_unspecified)
S3method(as_nanoarrow_array_stream,RecordBatchReader)
+S3method(as_nanoarrow_array_stream,data.frame)
S3method(as_nanoarrow_array_stream,default)
S3method(as_nanoarrow_array_stream,nanoarrow_array_stream)
S3method(as_nanoarrow_buffer,default)
@@ -43,6 +51,7 @@ S3method(infer_nanoarrow_schema,Dataset)
S3method(infer_nanoarrow_schema,Date)
S3method(infer_nanoarrow_schema,Expression)
S3method(infer_nanoarrow_schema,POSIXct)
+S3method(infer_nanoarrow_schema,POSIXlt)
S3method(infer_nanoarrow_schema,RecordBatchReader)
S3method(infer_nanoarrow_schema,Scalar)
S3method(infer_nanoarrow_schema,arrow_dplyr_query)
diff --git a/r/R/array-stream.R b/r/R/array-stream.R
index 12a9dd0..d6ac3d8 100644
--- a/r/R/array-stream.R
+++ b/r/R/array-stream.R
@@ -87,20 +87,39 @@ as_nanoarrow_array_stream <- function(x, ..., schema =
NULL) {
#' @export
as_nanoarrow_array_stream.nanoarrow_array_stream <- function(x, ..., schema =
NULL) {
if (is.null(schema)) {
- x
- } else {
- NextMethod()
+ return(x)
+ }
+
+ inferred_schema <- infer_nanoarrow_schema(x)
+ if (nanoarrow_schema_identical(schema, inferred_schema)) {
+ return(x)
}
+
+ NextMethod()
}
#' @export
as_nanoarrow_array_stream.default <- function(x, ..., schema = NULL) {
+ assert_arrow_installed("default coerce to nanoarrow_array_stream")
+
as_nanoarrow_array_stream(
- arrow::as_record_batch_reader(x, ...),
+ arrow::as_record_batch_reader(x, ..., schema = arrow::as_schema(schema)),
schema = schema
)
}
+#' @export
+as_nanoarrow_array_stream.data.frame <- function(x, ..., schema = NULL) {
+ if (is.null(schema)) {
+ schema <- infer_nanoarrow_schema(x)
+ } else {
+ schema <- as_nanoarrow_schema(schema)
+ }
+
+ x <- as_nanoarrow_array(x, schema = schema)
+ basic_array_stream(list(x), schema = schema)
+}
+
#' @export
infer_nanoarrow_schema.nanoarrow_array_stream <- function(x, ...) {
x$get_schema()
diff --git a/r/R/array.R b/r/R/array.R
index ae2e05f..2e36c57 100644
--- a/r/R/array.R
+++ b/r/R/array.R
@@ -43,6 +43,8 @@ as_nanoarrow_array <- function(x, ..., schema = NULL) {
UseMethod("as_nanoarrow_array")
}
+# See as-array.R for S3 method implementations
+
#' @export
as.vector.nanoarrow_array <- function(x, mode = "any") {
stopifnot(identical(mode, "any"))
@@ -69,26 +71,6 @@ as_tibble.nanoarrow_array <- function(x, ...) {
tibble::as_tibble(as.data.frame.nanoarrow_array(x), ...)
}
-#' @export
-as_nanoarrow_array.default <- function(x, ..., schema = NULL) {
- # For now, use arrow's conversion for everything
- if (is.null(schema)) {
- as_nanoarrow_array(arrow::as_arrow_array(x))
- } else {
- schema <- as_nanoarrow_schema(schema)
- as_nanoarrow_array(arrow::as_arrow_array(x, type =
arrow::as_data_type(schema)))
- }
-}
-
-#' @export
-as_nanoarrow_array.nanoarrow_array <- function(x, ..., schema = NULL) {
- if (is.null(schema)) {
- return(x)
- }
-
- NextMethod()
-}
-
#' @export
infer_nanoarrow_schema.nanoarrow_array <- function(x, ...) {
.Call(nanoarrow_c_infer_schema_array, x) %||%
diff --git a/r/R/as-array.R b/r/R/as-array.R
new file mode 100644
index 0000000..c381dfe
--- /dev/null
+++ b/r/R/as-array.R
@@ -0,0 +1,227 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+#' @export
+as_nanoarrow_array.default <- function(x, ..., schema = NULL, .from_c = FALSE)
{
+ # If we're coming from C it's because we've tried all the internal
conversions
+ # and no suitable S3 method was found or the x--schema combination is not
+ # implemented in nanoarrow. Try arrow::as_arrow_array().
+ if (.from_c) {
+ assert_arrow_installed(
+ sprintf(
+ "create %s array from object of type %s",
+ nanoarrow_schema_formatted(schema),
+ paste0(class(x), collapse = "/")
+ )
+ )
+
+ result <- as_nanoarrow_array(
+ arrow::as_arrow_array(
+ x,
+ type = arrow::as_data_type(schema)
+ )
+ )
+
+ # Skip nanoarrow_pointer_export() for these arrays since we know there
+ # are no external references to them
+ class(result) <- c("nanoarrow_array_dont_export", class(result))
+
+ return(result)
+ }
+
+ if (is.null(schema)) {
+ schema <- infer_nanoarrow_schema(x)
+ } else {
+ schema <- as_nanoarrow_schema(schema)
+ }
+
+ .Call(nanoarrow_c_as_array_default, x, schema)
+}
+
+#' @export
+as_nanoarrow_array.nanoarrow_array <- function(x, ..., schema = NULL) {
+ if (is.null(schema)) {
+ return(x)
+ }
+
+ inferred_schema <- infer_nanoarrow_schema(x)
+ if (nanoarrow_schema_identical(schema, inferred_schema)) {
+ return(x)
+ }
+
+ NextMethod()
+}
+
+#' @export
+as_nanoarrow_array.POSIXct <- function(x, ..., schema = NULL) {
+ if (is.null(schema)) {
+ schema <- infer_nanoarrow_schema(x)
+ }
+
+ parsed <- nanoarrow_schema_parse(schema)
+ switch(
+ parsed$type,
+ timestamp = ,
+ duration = {
+ multipliers <- c(s = 1.0, ms = 1e3, us = 1e6, ns = 1e9)
+ multiplier <- unname(multipliers[parsed$time_unit])
+ array <- as_nanoarrow_array(
+ as.numeric(x) * multiplier,
+ schema = na_type(parsed$storage_type)
+ )
+ nanoarrow_array_set_schema(array, schema)
+ array
+ },
+ NextMethod()
+ )
+}
+
+#' @export
+as_nanoarrow_array.difftime <- function(x, ..., schema = NULL) {
+ if (is.null(schema)) {
+ schema <- infer_nanoarrow_schema(x)
+ }
+
+ parsed <- nanoarrow_schema_parse(schema)
+ src_unit <- attr(x, "units")
+ switch(
+ parsed$type,
+ time32 = ,
+ time64 = ,
+ duration = {
+ multipliers <- c(s = 1.0, ms = 1e3, us = 1e6, ns = 1e9)
+ src_multipliers <- c(
+ secs = 1.0,
+ mins = 60.0,
+ hours = 3600.0,
+ days = 86400.0,
+ weeks = 604800.0
+ )
+
+ multiplier <- unname(multipliers[parsed$time_unit]) *
+ unname(src_multipliers[src_unit])
+ array <- as_nanoarrow_array(
+ as.numeric(x) * multiplier,
+ schema = na_type(parsed$storage_type)
+ )
+ nanoarrow_array_set_schema(array, schema)
+ array
+ },
+ NextMethod()
+ )
+}
+
+#' @export
+as_nanoarrow_array.blob <- function(x, ..., schema = NULL) {
+ if (is.null(schema)) {
+ schema <- infer_nanoarrow_schema(x)
+ }
+
+ as_nanoarrow_array(unclass(x), schema = schema)
+}
+
+#' @export
+as_nanoarrow_array.Date <- function(x, ..., schema = NULL) {
+ if (is.null(schema)) {
+ schema <- infer_nanoarrow_schema(x)
+ }
+
+ parsed <- nanoarrow_schema_parse(schema)
+ switch(
+ parsed$type,
+ date32 = {
+ storage <- as_nanoarrow_array(
+ as.integer(x),
+ schema = na_type(parsed$storage_type)
+ )
+ nanoarrow_array_set_schema(storage, schema)
+ storage
+ },
+ date64 = {
+ storage <- as_nanoarrow_array(
+ as.numeric(x) * 86400000,
+ schema = na_type(parsed$storage_type)
+ )
+ nanoarrow_array_set_schema(storage, schema)
+ storage
+ },
+ NextMethod()
+ )
+}
+
+#' @export
+as_nanoarrow_array.POSIXlt <- function(x, ..., schema = NULL) {
+ if (is.null(schema)) {
+ schema <- infer_nanoarrow_schema(x)
+ }
+
+ as_nanoarrow_array(new_data_frame(x, length(x)), schema = schema)
+}
+
+#' @export
+as_nanoarrow_array.factor <- function(x, ..., schema = NULL) {
+ if (is.null(schema)) {
+ schema <- infer_nanoarrow_schema(x)
+ }
+
+ if (is.null(schema$dictionary)) {
+ return(as_nanoarrow_array(as.character(x), schema = schema))
+ }
+
+ storage <- schema
+ storage$dictionary <- NULL
+
+ array <- as_nanoarrow_array(unclass(x) - 1L, schema = storage)
+ array$dictionary <- as_nanoarrow_array(levels(x), schema = schema$dictionary)
+ array
+}
+
+#' @export
+as_nanoarrow_array.vctrs_unspecified <- function(x, ..., schema = NULL) {
+ if (is.null(schema)) {
+ schema <- infer_nanoarrow_schema(x)
+ } else {
+ schema <- as_nanoarrow_schema(schema)
+ }
+
+ switch(
+ nanoarrow_schema_parse(schema)$storage_type,
+ na = {
+ array <- nanoarrow_array_init(schema)
+ array$length <- length(x)
+ array$null_count <- length(x)
+ array
+ },
+ NextMethod()
+ )
+}
+
+# This is defined because it's verbose to pass named arguments from C.
+# When converting data frame columns, we try the internal C conversions
+# first to save R evaluation overhead. When the internal conversions fail,
+# we call as_nanoarrow_array() to dispatch to conversions defined via S3
+# dispatch, making sure to let the default method know that we've already
+# tried the internal C conversions.
+as_nanoarrow_array_from_c <- function(x, schema) {
+ result <- as_nanoarrow_array(x, schema = schema, .from_c = TRUE)
+
+ # Anything we get from an S3 method we need to validate (even from the
+ # arrow package, which occasionally does not honour the schema argument)
+ nanoarrow_array_set_schema(result, schema, validate = TRUE)
+
+ result
+}
diff --git a/r/R/convert-array.R b/r/R/convert-array.R
index 1160ab6..97ecc14 100644
--- a/r/R/convert-array.R
+++ b/r/R/convert-array.R
@@ -144,6 +144,13 @@ stop_cant_convert_schema <- function(schema, to, n = 0) {
# Called from C for decimal types
convert_decimal_to_double <- function(array, schema, offset, length) {
+ assert_arrow_installed(
+ sprintf(
+ "convert %s array to object of type double",
+ nanoarrow_schema_formatted(schema)
+ )
+ )
+
array2 <- nanoarrow_allocate_array()
schema2 <- nanoarrow_allocate_schema()
nanoarrow_pointer_export(array, array2)
diff --git a/r/R/schema.R b/r/R/schema.R
index 564dc38..0b70741 100644
--- a/r/R/schema.R
+++ b/r/R/schema.R
@@ -80,7 +80,7 @@ infer_nanoarrow_schema.double <- function(x, ...) {
#' @export
infer_nanoarrow_schema.character <- function(x, ...) {
- if (length(x) > 0 && sum(nchar(x, type = "bytes")) > .Machine$integer.max) {
+ if (length(x) > 0 && sum(nchar(x, type = "bytes"), na.rm = TRUE) >
.Machine$integer.max) {
na_large_string()
} else {
na_string()
@@ -89,7 +89,11 @@ infer_nanoarrow_schema.character <- function(x, ...) {
#' @export
infer_nanoarrow_schema.factor <- function(x, ...) {
- na_dictionary(infer_nanoarrow_schema.character(levels(x)), na_int32())
+ na_dictionary(
+ infer_nanoarrow_schema(levels(x)),
+ na_int32(),
+ ordered = is.ordered(x)
+ )
}
#' @export
@@ -102,6 +106,11 @@ infer_nanoarrow_schema.POSIXct <- function(x, ...) {
na_timestamp(timezone = tz)
}
+#' @export
+infer_nanoarrow_schema.POSIXlt <- function(x, ...) {
+ infer_nanoarrow_schema(new_data_frame(x, length(x)))
+}
+
#' @export
infer_nanoarrow_schema.Date <- function(x, ...) {
na_date32()
@@ -237,6 +246,14 @@ nanoarrow_schema_modify <- function(x, new_values,
validate = TRUE) {
schema_deep_copy
}
+nanoarrow_schema_identical <- function(x, y) {
+ identical(x, y) ||
+ identical(
+ nanoarrow_schema_proxy(x, recursive = TRUE),
+ nanoarrow_schema_proxy(y, recursive = TRUE)
+ )
+}
+
#' @importFrom utils str
#' @export
str.nanoarrow_schema <- function(object, ...) {
diff --git a/r/R/type.R b/r/R/type.R
index 81ae322..f0b5a49 100644
--- a/r/R/type.R
+++ b/r/R/type.R
@@ -236,7 +236,7 @@ na_time32 <- function(unit = c("ms", "s"), nullable = TRUE)
{
#' @rdname na_type
#' @export
-na_time64 <- function(unit = c("ns", "us"), nullable = TRUE) {
+na_time64 <- function(unit = c("us", "ns"), nullable = TRUE) {
unit <- match.arg(unit)
.Call(
nanoarrow_c_schema_init_date_time,
@@ -280,7 +280,7 @@ na_interval_month_day_nano <- function(nullable = TRUE) {
#' @rdname na_type
#' @export
-na_timestamp <- function(unit = c("ms", "s", "us", "ns"), timezone = "",
nullable = TRUE) {
+na_timestamp <- function(unit = c("us", "ns", "s", "ms"), timezone = "",
nullable = TRUE) {
unit <- match.arg(unit)
if (!is.character(timezone) || length(timezone) != 1 || is.na(timezone)) {
stop("`timezone` must be character(1)")
@@ -321,7 +321,7 @@ na_decimal256 <- function(precision, scale, nullable =
TRUE) {
#' @rdname na_type
#' @export
-na_struct <- function(column_types = list(), nullable = TRUE) {
+na_struct <- function(column_types = list(), nullable = FALSE) {
schema <- .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$STRUCT,
isTRUE(nullable))
schema$children <- column_types
schema
diff --git a/r/R/util.R b/r/R/util.R
index db7ece1..d09a2c9 100644
--- a/r/R/util.R
+++ b/r/R/util.R
@@ -15,6 +15,30 @@
# specific language governing permissions and limitations
# under the License.
+
+arrow_installed <- function() {
+ opt <- Sys.getenv(
+ "R_NANOARROW_WITHOUT_ARROW",
+ getOption("nanoarrow.without_arrow", FALSE)
+ )
+
+ if (identical(tolower(opt), "true")) {
+ FALSE
+ } else {
+ requireNamespace("arrow", quietly = TRUE)
+ }
+}
+
+assert_arrow_installed <- function(reason) {
+ if (!arrow_installed()) {
+ stop(
+ sprintf("Package 'arrow' required for %s", reason),
+ call. = FALSE
+ )
+ }
+}
+
+
# Internally we use R_PreserveObject() and R_ReleaseObject() to manage R
objects
# that must be kept alive for ArrowArray buffers to stay valid. This count
# should be zero after tests have run in a fresh session and both gc() and
diff --git a/r/man/na_type.Rd b/r/man/na_type.Rd
index 5966464..1f668bf 100644
--- a/r/man/na_type.Rd
+++ b/r/man/na_type.Rd
@@ -103,7 +103,7 @@ na_date64(nullable = TRUE)
na_time32(unit = c("ms", "s"), nullable = TRUE)
-na_time64(unit = c("ns", "us"), nullable = TRUE)
+na_time64(unit = c("us", "ns"), nullable = TRUE)
na_duration(unit = c("ms", "s", "us", "ns"), nullable = TRUE)
@@ -113,13 +113,13 @@ na_interval_day_time(nullable = TRUE)
na_interval_month_day_nano(nullable = TRUE)
-na_timestamp(unit = c("ms", "s", "us", "ns"), timezone = "", nullable = TRUE)
+na_timestamp(unit = c("us", "ns", "s", "ms"), timezone = "", nullable = TRUE)
na_decimal128(precision, scale, nullable = TRUE)
na_decimal256(precision, scale, nullable = TRUE)
-na_struct(column_types = list(), nullable = TRUE)
+na_struct(column_types = list(), nullable = FALSE)
na_sparse_union(column_types = list())
diff --git a/r/src/array.c b/r/src/array.c
index 441764f..4e38c14 100644
--- a/r/src/array.c
+++ b/r/src/array.c
@@ -19,6 +19,8 @@
#include <R.h>
#include <Rinternals.h>
+#include <limits.h>
+
#include "array.h"
#include "buffer.h"
#include "nanoarrow.h"
@@ -375,7 +377,7 @@ static SEXP borrow_unknown_buffer(struct ArrowArray* array,
int64_t i, SEXP shel
}
static SEXP length_from_int64(int64_t value) {
- if (value < 2147483647) {
+ if (value < INT_MAX) {
return Rf_ScalarInteger(value);
} else {
return Rf_ScalarReal(value);
diff --git a/r/src/as_array.c b/r/src/as_array.c
new file mode 100644
index 0000000..b227612
--- /dev/null
+++ b/r/src/as_array.c
@@ -0,0 +1,571 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#define R_NO_REMAP
+#include <R.h>
+#include <Rinternals.h>
+
+#include <limits.h>
+
+#include "array.h"
+#include "buffer.h"
+#include "materialize.h"
+#include "nanoarrow.h"
+#include "schema.h"
+#include "util.h"
+
+static void call_as_nanoarrow_array(SEXP x_sexp, struct ArrowArray* array,
+ SEXP schema_xptr) {
+ SEXP fun = PROTECT(Rf_install("as_nanoarrow_array_from_c"));
+ SEXP call = PROTECT(Rf_lang3(fun, x_sexp, schema_xptr));
+ SEXP result = PROTECT(Rf_eval(call, nanoarrow_ns_pkg));
+
+ // In many cases we can skip the array_export() step (which adds some
complexity
+ // and an additional R object to the mix)
+ if (Rf_inherits(result, "nanoarrow_array_dont_export")) {
+ struct ArrowArray* array_result = array_from_xptr(result);
+ ArrowArrayMove(array_result, array);
+ } else {
+ array_export(result, array);
+ }
+
+ UNPROTECT(3);
+}
+
+static void as_array_int(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
+ struct ArrowError* error) {
+ struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
+ struct ArrowSchemaView schema_view;
+ int result = ArrowSchemaViewInit(&schema_view, schema, error);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowSchemaViewInit(): %s", error->message);
+ }
+
+ // Only consider the default create for now
+ if (schema_view.type != NANOARROW_TYPE_INT32) {
+ call_as_nanoarrow_array(x_sexp, array, schema_xptr);
+ return;
+ }
+
+ // We don't consider altrep for now: we need an array of int32_t, and while
we
+ // *could* avoid materializing, there's no point because the source altrep
+ // object almost certainly knows how to do this faster than we do.
+ int* x_data = INTEGER(x_sexp);
+ int64_t len = Rf_xlength(x_sexp);
+
+ result = ArrowArrayInitFromType(array, NANOARROW_TYPE_INT32);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayInitFromType() failed");
+ }
+
+ // Borrow the data buffer
+ buffer_borrowed(ArrowArrayBuffer(array, 1), x_data, len * sizeof(int32_t),
x_sexp);
+
+ // Set the array fields
+ array->length = len;
+ array->offset = 0;
+ int64_t null_count = 0;
+
+ // Look for the first null (will be the last index if there are none)
+ int64_t first_null = -1;
+ for (int64_t i = 0; i < len; i++) {
+ if (x_data[i] == NA_INTEGER) {
+ first_null = i;
+ break;
+ }
+ }
+
+ // If there are nulls, pack the validity buffer
+ if (first_null != -1) {
+ struct ArrowBitmap bitmap;
+ ArrowBitmapInit(&bitmap);
+ result = ArrowBitmapReserve(&bitmap, len);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowBitmapReserve() failed");
+ }
+
+ ArrowBitmapAppendUnsafe(&bitmap, 1, first_null);
+ for (int64_t i = first_null; i < len; i++) {
+ uint8_t is_valid = x_data[i] != NA_INTEGER;
+ null_count += !is_valid;
+ ArrowBitmapAppend(&bitmap, is_valid, 1);
+ }
+
+ ArrowArraySetValidityBitmap(array, &bitmap);
+ }
+
+ array->null_count = null_count;
+ result = ArrowArrayFinishBuilding(array, error);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayFinishBuilding(): %s", error->message);
+ }
+}
+
+static void as_array_lgl(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
+ struct ArrowError* error) {
+ struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
+ struct ArrowSchemaView schema_view;
+ int result = ArrowSchemaViewInit(&schema_view, schema, error);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowSchemaViewInit(): %s", error->message);
+ }
+
+ // We can zero-copy convert to int32
+ if (schema_view.type == NANOARROW_TYPE_INT32) {
+ as_array_int(x_sexp, array, schema_xptr, error);
+ return;
+ }
+
+ // Only consider bool for now
+ if (schema_view.type != NANOARROW_TYPE_BOOL) {
+ call_as_nanoarrow_array(x_sexp, array, schema_xptr);
+ return;
+ }
+
+ int* x_data = INTEGER(x_sexp);
+ int64_t len = Rf_xlength(x_sexp);
+
+ result = ArrowArrayInitFromType(array, NANOARROW_TYPE_BOOL);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayInitFromType() failed");
+ }
+
+ struct ArrowBitmap value_bitmap;
+ ArrowBitmapInit(&value_bitmap);
+ result = ArrowBitmapReserve(&value_bitmap, len);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowBitmapReserve() failed");
+ }
+
+ int has_nulls = 0;
+ for (int64_t i = 0; i < len; i++) {
+ if (x_data[i] == NA_INTEGER) {
+ has_nulls = 1;
+ ArrowBitmapAppend(&value_bitmap, 0, 1);
+ } else {
+ ArrowBitmapAppend(&value_bitmap, x_data[i] != 0, 1);
+ }
+ }
+
+ ArrowArraySetBuffer(array, 1, &value_bitmap.buffer);
+
+ // Set the array fields
+ array->length = len;
+ array->offset = 0;
+ int64_t null_count = 0;
+
+ // If there are nulls, pack the validity buffer
+ if (has_nulls) {
+ struct ArrowBitmap bitmap;
+ ArrowBitmapInit(&bitmap);
+ result = ArrowBitmapReserve(&bitmap, len);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowBitmapReserve() failed");
+ }
+
+ for (int64_t i = 0; i < len; i++) {
+ uint8_t is_valid = x_data[i] != NA_INTEGER;
+ null_count += !is_valid;
+ ArrowBitmapAppend(&bitmap, is_valid, 1);
+ }
+
+ ArrowArraySetValidityBitmap(array, &bitmap);
+ }
+
+ array->null_count = null_count;
+ result = ArrowArrayFinishBuilding(array, error);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayFinishBuilding(): %s", error->message);
+ }
+}
+
+static void as_array_dbl(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
+ struct ArrowError* error) {
+ struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
+ struct ArrowSchemaView schema_view;
+ int result = ArrowSchemaViewInit(&schema_view, schema, error);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowSchemaViewInit(): %s", error->message);
+ }
+
+ // Consider double -> na_double() and double -> na_int64()/na_int32()
+ // (mostly so that we can support date/time types with various units)
+ switch (schema_view.type) {
+ case NANOARROW_TYPE_DOUBLE:
+ case NANOARROW_TYPE_INT64:
+ case NANOARROW_TYPE_INT32:
+ break;
+ default:
+ call_as_nanoarrow_array(x_sexp, array, schema_xptr);
+ return;
+ }
+
+ double* x_data = REAL(x_sexp);
+ int64_t len = Rf_xlength(x_sexp);
+
+ result = ArrowArrayInitFromType(array, schema_view.type);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayInitFromType() failed");
+ }
+
+ if (schema_view.type == NANOARROW_TYPE_DOUBLE) {
+ // Just borrow the data buffer (zero-copy)
+ buffer_borrowed(ArrowArrayBuffer(array, 1), x_data, len * sizeof(double),
x_sexp);
+
+ } else if (schema_view.type == NANOARROW_TYPE_INT64) {
+ // double -> int64_t
+ struct ArrowBuffer* buffer = ArrowArrayBuffer(array, 1);
+ result = ArrowBufferReserve(buffer, len * sizeof(int64_t));
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowBufferReserve() failed");
+ }
+
+ int64_t* buffer_data = (int64_t*)buffer->data;
+ for (int64_t i = 0; i < len; i++) {
+ buffer_data[i] = x_data[i];
+ }
+
+ buffer->size_bytes = len * sizeof(int64_t);
+
+ } else {
+ // double -> int32_t
+ struct ArrowBuffer* buffer = ArrowArrayBuffer(array, 1);
+ result = ArrowBufferReserve(buffer, len * sizeof(int32_t));
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowBufferReserve() failed");
+ }
+
+ int32_t* buffer_data = (int32_t*)buffer->data;
+
+ // It's easy to accidentally overflow here, so make sure to warn
+ int64_t n_overflow = 0;
+ for (int64_t i = 0; i < len; i++) {
+ if (x_data[i] > INT_MAX || x_data[i] < INT_MIN) {
+ n_overflow++;
+ buffer_data[i] = 0;
+ } else {
+ buffer_data[i] = x_data[i];
+ }
+ }
+
+ if (n_overflow > 0) {
+ Rf_warning("%ld value(s) overflowed in double -> na_int32() creation",
+ (long)n_overflow);
+ }
+
+ buffer->size_bytes = len * sizeof(int32_t);
+ }
+
+ // Set the array fields
+ array->length = len;
+ array->offset = 0;
+ int64_t null_count = 0;
+
+ // Look for the first null (will be the last index if there are none)
+ int64_t first_null = -1;
+ for (int64_t i = 0; i < len; i++) {
+ if (R_IsNA(x_data[i]) || R_IsNaN(x_data[i])) {
+ first_null = i;
+ break;
+ }
+ }
+
+ // If there are nulls, pack the validity buffer
+ if (first_null != -1) {
+ struct ArrowBitmap bitmap;
+ ArrowBitmapInit(&bitmap);
+ result = ArrowBitmapReserve(&bitmap, len);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowBitmapReserve() failed");
+ }
+
+ ArrowBitmapAppendUnsafe(&bitmap, 1, first_null);
+ for (int64_t i = first_null; i < len; i++) {
+ uint8_t is_valid = !R_IsNA(x_data[i]) && !R_IsNaN(x_data[i]);
+ null_count += !is_valid;
+ ArrowBitmapAppend(&bitmap, is_valid, 1);
+ }
+
+ ArrowArraySetValidityBitmap(array, &bitmap);
+ }
+
+ array->null_count = null_count;
+ result = ArrowArrayFinishBuilding(array, error);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayFinishBuilding(): %s", error->message);
+ }
+}
+
+static void as_array_chr(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
+ struct ArrowError* error) {
+ struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
+ struct ArrowSchemaView schema_view;
+ int result = ArrowSchemaViewInit(&schema_view, schema, error);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowSchemaViewInit(): %s", error->message);
+ }
+
+ // Only consider the default create for now
+ if (schema_view.type != NANOARROW_TYPE_STRING) {
+ call_as_nanoarrow_array(x_sexp, array, schema_xptr);
+ return;
+ }
+
+ int64_t len = Rf_xlength(x_sexp);
+
+ result = ArrowArrayInitFromType(array, NANOARROW_TYPE_STRING);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayInitFromType() failed");
+ }
+
+ // Keep these buffers under the umbrella of the array so that we don't have
+ // to worry about cleaning them up if STRING_ELT jumps
+ struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1);
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 2);
+
+ ArrowBufferReserve(offset_buffer, (len + 1) * sizeof(int32_t));
+
+ int64_t null_count = 0;
+ int32_t cumulative_len = 0;
+ ArrowBufferAppendUnsafe(offset_buffer, &cumulative_len, sizeof(int32_t));
+
+ for (int64_t i = 0; i < len; i++) {
+ SEXP item = STRING_ELT(x_sexp, i);
+ if (item == NA_STRING) {
+ null_count++;
+ } else {
+ const void* vmax = vmaxget();
+ const char* item_utf8 = Rf_translateCharUTF8(item);
+ int64_t item_size = strlen(item_utf8);
+ if ((item_size + cumulative_len) > INT_MAX) {
+ Rf_error("Use na_large_string() to convert character() with total size
> 2GB");
+ }
+
+ int result = ArrowBufferAppend(data_buffer, item_utf8, item_size);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowBufferAppend() failed");
+ }
+ cumulative_len += item_size;
+
+ vmaxset(vmax);
+ }
+
+ ArrowBufferAppendUnsafe(offset_buffer, &cumulative_len, sizeof(int32_t));
+ }
+
+ // Set the array fields
+ array->length = len;
+ array->offset = 0;
+
+ // If there are nulls, pack the validity buffer
+ if (null_count > 0) {
+ struct ArrowBitmap bitmap;
+ ArrowBitmapInit(&bitmap);
+ result = ArrowBitmapReserve(&bitmap, len);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowBitmapReserve() failed");
+ }
+
+ for (int64_t i = 0; i < len; i++) {
+ uint8_t is_valid = STRING_ELT(x_sexp, i) != NA_STRING;
+ ArrowBitmapAppend(&bitmap, is_valid, 1);
+ }
+
+ ArrowArraySetValidityBitmap(array, &bitmap);
+ }
+
+ array->null_count = null_count;
+ result = ArrowArrayFinishBuilding(array, error);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayFinishBuilding(): %s", error->message);
+ }
+}
+
+static void as_array_default(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
+ struct ArrowError* error);
+
+static void as_array_data_frame(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
+ struct ArrowError* error) {
+ struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
+ struct ArrowSchemaView schema_view;
+ int result = ArrowSchemaViewInit(&schema_view, schema, error);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowSchemaViewInit(): %s", error->message);
+ }
+
+ if (schema_view.type != NANOARROW_TYPE_STRUCT) {
+ call_as_nanoarrow_array(x_sexp, array, schema_xptr);
+ return;
+ }
+
+ if (Rf_xlength(x_sexp) != schema->n_children) {
+ Rf_error("Expected %ld schema children but found %ld",
(long)Rf_xlength(x_sexp),
+ (long)schema->n_children);
+ }
+
+ result = ArrowArrayInitFromType(array, NANOARROW_TYPE_STRUCT);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayInitFromType() failed");
+ }
+
+ result = ArrowArrayAllocateChildren(array, schema->n_children);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayAllocateChildren() failed");
+ }
+
+ for (int64_t i = 0; i < schema->n_children; i++) {
+ as_array_default(VECTOR_ELT(x_sexp, i), array->children[i],
+ borrow_schema_child_xptr(schema_xptr, i), error);
+ }
+
+ array->length = nanoarrow_data_frame_size(x_sexp);
+ array->null_count = 0;
+ array->offset = 0;
+}
+
+static void as_array_list(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
+ struct ArrowError* error) {
+ struct ArrowSchema* schema = schema_from_xptr(schema_xptr);
+ struct ArrowSchemaView schema_view;
+ int result = ArrowSchemaViewInit(&schema_view, schema, error);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowSchemaViewInit(): %s", error->message);
+ }
+
+ // We handle list(raw()) for now but fall back to arrow for vctrs::list_of()
+ // Arbitrary nested list support is complicated without some concept of a
+ // "builder", which we don't use.
+ if (schema_view.type != NANOARROW_TYPE_BINARY) {
+ call_as_nanoarrow_array(x_sexp, array, schema_xptr);
+ return;
+ }
+
+ result = ArrowArrayInitFromType(array, schema_view.type);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayInitFromType() failed");
+ }
+
+ int64_t len = Rf_xlength(x_sexp);
+ struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1);
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 2);
+
+ result = ArrowBufferReserve(offset_buffer, (len + 1) * sizeof(int32_t));
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowBufferReserve() failed");
+ }
+
+ int64_t null_count = 0;
+ int32_t cumulative_len = 0;
+ ArrowBufferAppendUnsafe(offset_buffer, &cumulative_len, sizeof(int32_t));
+
+ for (int64_t i = 0; i < len; i++) {
+ SEXP item = VECTOR_ELT(x_sexp, i);
+ if (item == R_NilValue) {
+ ArrowBufferAppendUnsafe(offset_buffer, &cumulative_len, sizeof(int32_t));
+ null_count++;
+ continue;
+ }
+
+ if (Rf_isObject(item) || TYPEOF(item) != RAWSXP) {
+ Rf_error("All list items must be raw() or NULL in conversion to
na_binary()");
+ }
+
+ int64_t item_size = Rf_xlength(item);
+ if ((item_size + cumulative_len) > INT_MAX) {
+ Rf_error("Use na_large_binary() to convert list(raw()) with total size >
2GB");
+ }
+
+ result = ArrowBufferAppend(data_buffer, RAW(item), item_size);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowBufferAppend() failed");
+ }
+
+ cumulative_len += item_size;
+ ArrowBufferAppendUnsafe(offset_buffer, &cumulative_len, sizeof(int32_t));
+ }
+
+ // Set the array fields
+ array->length = len;
+ array->offset = 0;
+
+ // If there are nulls, pack the validity buffer
+ if (null_count > 0) {
+ struct ArrowBitmap bitmap;
+ ArrowBitmapInit(&bitmap);
+ result = ArrowBitmapReserve(&bitmap, len);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowBitmapReserve() failed");
+ }
+
+ for (int64_t i = 0; i < len; i++) {
+ uint8_t is_valid = VECTOR_ELT(x_sexp, i) != R_NilValue;
+ ArrowBitmapAppend(&bitmap, is_valid, 1);
+ }
+
+ ArrowArraySetValidityBitmap(array, &bitmap);
+ }
+
+ array->null_count = null_count;
+ result = ArrowArrayFinishBuilding(array, error);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayFinishBuilding(): %s", error->message);
+ }
+}
+
+static void as_array_default(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
+ struct ArrowError* error) {
+ if (Rf_isObject(x_sexp)) {
+ if (Rf_inherits(x_sexp, "data.frame")) {
+ as_array_data_frame(x_sexp, array, schema_xptr, error);
+ return;
+ } else {
+ call_as_nanoarrow_array(x_sexp, array, schema_xptr);
+ return;
+ }
+ }
+
+ switch (TYPEOF(x_sexp)) {
+ case LGLSXP:
+ as_array_lgl(x_sexp, array, schema_xptr, error);
+ return;
+ case INTSXP:
+ as_array_int(x_sexp, array, schema_xptr, error);
+ return;
+ case REALSXP:
+ as_array_dbl(x_sexp, array, schema_xptr, error);
+ return;
+ case STRSXP:
+ as_array_chr(x_sexp, array, schema_xptr, error);
+ return;
+ case VECSXP:
+ as_array_list(x_sexp, array, schema_xptr, error);
+ return;
+ default:
+ call_as_nanoarrow_array(x_sexp, array, schema_xptr);
+ return;
+ }
+}
+
+SEXP nanoarrow_c_as_array_default(SEXP x_sexp, SEXP schema_sexp) {
+ SEXP array_xptr = PROTECT(array_owning_xptr());
+ struct ArrowArray* array = (struct ArrowArray*)R_ExternalPtrAddr(array_xptr);
+ struct ArrowError error;
+ as_array_default(x_sexp, array, schema_sexp, &error);
+ array_xptr_set_schema(array_xptr, schema_sexp);
+ UNPROTECT(1);
+ return array_xptr;
+}
diff --git a/r/src/buffer.h b/r/src/buffer.h
index 00065cb..cf4bacd 100644
--- a/r/src/buffer.h
+++ b/r/src/buffer.h
@@ -44,6 +44,15 @@ static inline SEXP buffer_owning_xptr(void) {
// Create an arrow_buffer with a deallocator that will release shelter when
// the buffer is no longer needed.
+static inline void buffer_borrowed(struct ArrowBuffer* buffer, const void*
addr,
+ int64_t size_bytes, SEXP shelter) {
+ buffer->allocator = ArrowBufferDeallocator(&nanoarrow_sexp_deallocator,
shelter);
+ buffer->data = (uint8_t*)addr;
+ buffer->size_bytes = size_bytes;
+ buffer->capacity_bytes = size_bytes;
+ nanoarrow_preserve_sexp(shelter);
+}
+
static inline SEXP buffer_borrowed_xptr(const void* addr, int64_t size_bytes,
SEXP shelter) {
SEXP buffer_xptr = PROTECT(buffer_owning_xptr());
@@ -55,11 +64,7 @@ static inline SEXP buffer_borrowed_xptr(const void* addr,
int64_t size_bytes,
}
struct ArrowBuffer* buffer = (struct
ArrowBuffer*)R_ExternalPtrAddr(buffer_xptr);
- buffer->allocator = ArrowBufferDeallocator(&nanoarrow_sexp_deallocator,
shelter);
- buffer->data = (uint8_t*)addr;
- buffer->size_bytes = size_bytes;
- buffer->capacity_bytes = size_bytes;
- nanoarrow_preserve_sexp(shelter);
+ buffer_borrowed(buffer, addr, size_bytes, shelter);
UNPROTECT(1);
return buffer_xptr;
}
diff --git a/r/src/convert.c b/r/src/convert.c
index f87698d..6590806 100644
--- a/r/src/convert.c
+++ b/r/src/convert.c
@@ -28,18 +28,7 @@
static R_xlen_t nanoarrow_vec_size(SEXP vec_sexp, struct PTypeView*
ptype_view) {
if (ptype_view->vector_type == VECTOR_TYPE_DATA_FRAME) {
- if (Rf_length(vec_sexp) > 0) {
- // This both avoids materializing the row.names attribute and
- // makes this work with struct-style vctrs that don't have a
- // row.names attribute but that always have one or more element
- return Rf_xlength(VECTOR_ELT(vec_sexp, 0));
- } else {
- // Since ALTREP was introduced, materializing the row.names attribute is
- // usually deferred such that values in the form c(NA, -nrow), 1:nrow, or
- // as.character(1:nrow) are never actually computed when the length is
- // taken.
- return Rf_xlength(Rf_getAttrib(vec_sexp, R_RowNamesSymbol));
- }
+ return nanoarrow_data_frame_size(vec_sexp);
} else {
return Rf_xlength(vec_sexp);
}
diff --git a/r/src/convert_array_stream.c b/r/src/convert_array_stream.c
index 7bb4dc9..8cdf4b0 100644
--- a/r/src/convert_array_stream.c
+++ b/r/src/convert_array_stream.c
@@ -77,6 +77,7 @@ SEXP nanoarrow_c_convert_array_stream(SEXP array_stream_xptr,
SEXP ptype_sexp,
break;
}
+ array->release(array);
result = array_stream->get_next(array_stream, array);
n_batches++;
if (result != NANOARROW_OK) {
diff --git a/r/src/init.c b/r/src/init.c
index 965029e..73e4f4f 100644
--- a/r/src/init.c
+++ b/r/src/init.c
@@ -42,6 +42,7 @@ extern SEXP nanoarrow_c_array_validate_after_modify(SEXP
array_xptr, SEXP schema
extern SEXP nanoarrow_c_array_set_schema(SEXP array_xptr, SEXP schema_xptr,
SEXP validate_sexp);
extern SEXP nanoarrow_c_infer_schema_array(SEXP array_xptr);
extern SEXP nanoarrow_c_array_proxy(SEXP array_xptr, SEXP array_view_xptr,
SEXP recursive_sexp);
+extern SEXP nanoarrow_c_as_array_default(SEXP x_sexp, SEXP schema_sexp);
extern SEXP nanoarrow_c_as_buffer_default(SEXP x_sexp);
extern SEXP nanoarrow_c_buffer_append(SEXP buffer_xptr, SEXP new_buffer_xptr);
extern SEXP nanoarrow_c_buffer_info(SEXP buffer_xptr);
@@ -100,6 +101,7 @@ static const R_CallMethodDef CallEntries[] = {
{"nanoarrow_c_array_set_schema", (DL_FUNC)&nanoarrow_c_array_set_schema,
3},
{"nanoarrow_c_infer_schema_array",
(DL_FUNC)&nanoarrow_c_infer_schema_array, 1},
{"nanoarrow_c_array_proxy", (DL_FUNC)&nanoarrow_c_array_proxy, 3},
+ {"nanoarrow_c_as_array_default", (DL_FUNC)&nanoarrow_c_as_array_default,
2},
{"nanoarrow_c_as_buffer_default", (DL_FUNC)&nanoarrow_c_as_buffer_default,
1},
{"nanoarrow_c_buffer_append", (DL_FUNC)&nanoarrow_c_buffer_append, 2},
{"nanoarrow_c_buffer_info", (DL_FUNC)&nanoarrow_c_buffer_info, 1},
diff --git a/r/src/materialize.c b/r/src/materialize.c
index 46c1eb2..ad88f83 100644
--- a/r/src/materialize.c
+++ b/r/src/materialize.c
@@ -59,6 +59,21 @@ static int has_attrib_safe(SEXP x, SEXP sym) {
return FALSE;
}
+R_xlen_t nanoarrow_data_frame_size(SEXP x) {
+ if (Rf_length(x) > 0) {
+ // This both avoids materializing the row.names attribute and
+ // makes this work with struct-style vctrs that don't have a
+ // row.names attribute but that always have one or more element
+ return Rf_xlength(VECTOR_ELT(x, 0));
+ } else {
+ // Since ALTREP was introduced, materializing the row.names attribute is
+ // usually deferred such that values in the form c(NA, -nrow), 1:nrow, or
+ // as.character(1:nrow) are never actually computed when the length is
+ // taken.
+ return Rf_xlength(Rf_getAttrib(x, R_RowNamesSymbol));
+ }
+}
+
void nanoarrow_set_rownames(SEXP x, R_xlen_t len) {
// If len fits in the integer range, we can use the c(NA, -nrow)
// shortcut for the row.names attribute. R expands this when
diff --git a/r/src/materialize.h b/r/src/materialize.h
index eab3dc3..a8f36cc 100644
--- a/r/src/materialize.h
+++ b/r/src/materialize.h
@@ -28,6 +28,10 @@
// true if ptype is a data.frame or is an S3 list with names.
int nanoarrow_ptype_is_data_frame(SEXP ptype);
+// Returns the number of rows in a data.frame in a way that is least likely to
+// expand the attr(x, "row.names")
+R_xlen_t nanoarrow_data_frame_size(SEXP x);
+
// Set rownames of a data.frame (with special handling if len > INT_MAX)
void nanoarrow_set_rownames(SEXP x, R_xlen_t len);
diff --git a/r/tests/testthat/_snaps/array-stream.md
b/r/tests/testthat/_snaps/array-stream.md
new file mode 100644
index 0000000..745bcf5
--- /dev/null
+++ b/r/tests/testthat/_snaps/array-stream.md
@@ -0,0 +1,4 @@
+# as_nanoarrow_array_stream() works for nanoarow_array_stream
+
+ is.null(schema) is not TRUE
+
diff --git a/r/tests/testthat/_snaps/as-array.md
b/r/tests/testthat/_snaps/as-array.md
new file mode 100644
index 0000000..547dbb7
--- /dev/null
+++ b/r/tests/testthat/_snaps/as-array.md
@@ -0,0 +1,12 @@
+# as_nanoarrow_array() errors for bad logical() creation
+
+ Invalid: Expecting a character vector
+
+# as_nanoarrow_array() errors for bad data.frame() -> na_struct()
+
+ Can't create Array<int32()> from object of type data.frame
+
+# as_nanoarrow_array() works for bad unspecified() create
+
+ NotImplemented: day_time_interval
+
diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R
index f26ccdc..821fc65 100644
--- a/r/tests/testthat/test-altrep.R
+++ b/r/tests/testthat/test-altrep.R
@@ -21,7 +21,7 @@ test_that("nanoarrow_altrep_chr() returns NULL for
unsupported types", {
})
test_that("nanoarrow_altrep_chr() works for string", {
- x <- as_nanoarrow_array(c(NA, letters), schema = arrow::utf8())
+ x <- as_nanoarrow_array(c(NA, letters), schema = na_string())
x_altrep <- nanoarrow_altrep_chr(x)
expect_output(.Internal(inspect(x_altrep)),
"<nanoarrow::altrep_chr\\[27\\]>")
@@ -52,7 +52,8 @@ test_that("nanoarrow_altrep_chr() works for string", {
})
test_that("nanoarrow_altrep_chr() works for large string", {
- x <- as_nanoarrow_array(letters, schema = arrow::large_utf8())
+ skip_if_not_installed("arrow")
+ x <- as_nanoarrow_array(letters, schema = na_large_string())
x_altrep <- nanoarrow_altrep_chr(x)
expect_identical(x_altrep, letters)
})
@@ -64,13 +65,13 @@ test_that("is_nanoarrow_altrep() returns true for nanoarrow
altrep objects", {
})
test_that("nanoarrow_altrep_chr_force_materialize() forces materialization", {
- x <- as_nanoarrow_array(letters, schema = arrow::utf8())
+ x <- as_nanoarrow_array(letters, schema = na_string())
x_altrep <- nanoarrow_altrep_chr(x)
expect_identical(nanoarrow_altrep_force_materialize("not altrep"), 0L)
expect_identical(nanoarrow_altrep_force_materialize(x_altrep), 1L)
- x <- as_nanoarrow_array(letters, schema = arrow::utf8())
+ x <- as_nanoarrow_array(letters, schema = na_string())
x_altrep_df <- data.frame(x = nanoarrow_altrep_chr(x))
expect_identical(
nanoarrow_altrep_force_materialize(x_altrep_df, recursive = FALSE),
@@ -90,7 +91,7 @@ test_that("is_nanoarrow_altrep_materialized() checks for
materialization", {
expect_identical(is_nanoarrow_altrep_materialized("not altrep"), NA)
expect_identical(is_nanoarrow_altrep_materialized(1:10), NA)
- x <- as_nanoarrow_array(letters, schema = arrow::utf8())
+ x <- as_nanoarrow_array(letters, schema = na_string())
x_altrep <- nanoarrow_altrep_chr(x)
expect_false(is_nanoarrow_altrep_materialized(x_altrep))
expect_identical(nanoarrow_altrep_force_materialize(x_altrep), 1L)
diff --git a/r/tests/testthat/test-array-stream.R
b/r/tests/testthat/test-array-stream.R
index 5315039..d0c8d7c 100644
--- a/r/tests/testthat/test-array-stream.R
+++ b/r/tests/testthat/test-array-stream.R
@@ -86,10 +86,15 @@ test_that("as_nanoarrow_array_stream() works for
nanoarow_array_stream", {
stream <- as_nanoarrow_array_stream(data.frame(x = 1:5))
expect_identical(as_nanoarrow_array_stream(stream), stream)
- # Not supported yet
- expect_error(
- as_nanoarrow_array_stream(stream, schema = data.frame(x = double())),
- "is.null\\(schema\\) is not TRUE"
+ stream <- as_nanoarrow_array_stream(data.frame(x = 1:5))
+ expect_identical(
+ as_nanoarrow_array_stream(stream, schema = na_struct(list(x =
na_int32()))),
+ stream
+ )
+
+ skip_if_not_installed("arrow")
+ expect_snapshot_error(
+ as_nanoarrow_array_stream(stream, schema = na_struct(list(x =
na_double())))
)
})
diff --git a/r/tests/testthat/test-array.R b/r/tests/testthat/test-array.R
index f6de5b5..8b23ce7 100644
--- a/r/tests/testthat/test-array.R
+++ b/r/tests/testthat/test-array.R
@@ -41,14 +41,14 @@ test_that("as_nanoarrow_array() / convert_array() default
method works", {
array <- as_nanoarrow_array(1:10)
expect_identical(convert_array(array), 1:10)
- array <- as_nanoarrow_array(as.double(1:10), schema = arrow::float64())
+ array <- as_nanoarrow_array(as.double(1:10), schema = na_double())
expect_identical(convert_array(array), as.double(1:10))
})
test_that("infer_nanoarrow_schema() works for nanoarrow_array", {
array <- as_nanoarrow_array(1:10)
schema <- infer_nanoarrow_schema(array)
- expect_true(arrow::as_data_type(schema)$Equals(arrow::int32()))
+ expect_true(nanoarrow_schema_identical(schema, na_int32()))
nanoarrow_array_set_schema(array, NULL)
expect_error(infer_nanoarrow_schema(array), "has no associated schema")
@@ -129,22 +129,22 @@ test_that("schemaless array list interface works for
dictionary types", {
test_that("array list interface classes data buffers for relevant types", {
types <- list(
- int8 = arrow::int8(),
- uint8 = arrow::uint8(),
- int16 = arrow::int16(),
- uint16 = arrow::uint16(),
- int32 = arrow::int32(),
- uint32 = arrow::uint32(),
- int64 = arrow::int64(),
- uint64 = arrow::uint64(),
- half_float = arrow::float16(),
- float = arrow::float32(),
- double = arrow::float64(),
- decimal128 = arrow::decimal128(2, 3),
- decimal256 = arrow::decimal256(2, 3)
- )
-
- arrays <- lapply(types, function(x) arrow::concat_arrays(type = x))
+ int8 = na_int8(),
+ uint8 = na_uint8(),
+ int16 = na_int16(),
+ uint16 = na_uint16(),
+ int32 = na_int32(),
+ uint32 = na_uint32(),
+ int64 = na_int64(),
+ uint64 = na_uint64(),
+ half_float = na_half_float(),
+ float = na_float(),
+ double = na_double(),
+ decimal128 = na_decimal128(2, 3),
+ decimal256 = na_decimal256(2, 3)
+ )
+
+ arrays <- lapply(types, function(x) nanoarrow_array_init(x))
for (nm in names(arrays)) {
expect_s3_class(
@@ -159,7 +159,7 @@ test_that("array list interface classes data buffers for
relevant types", {
})
test_that("array list interface classes offset buffers for relevant types", {
- arr_string <- arrow::concat_arrays(type = arrow::utf8())
+ arr_string <- nanoarrow_array_init(na_string())
expect_s3_class(
as_nanoarrow_array(arr_string)$buffers[[2]],
"nanoarrow_buffer_data_offset32"
@@ -169,6 +169,8 @@ test_that("array list interface classes offset buffers for
relevant types", {
"nanoarrow_buffer_data_utf8"
)
+ skip_if_not_installed("arrow")
+
arr_large_string <- arrow::concat_arrays(type = arrow::large_utf8())
expect_s3_class(
as_nanoarrow_array(arr_large_string)$buffers[[2]],
@@ -220,7 +222,7 @@ test_that("array list interface works for nested types", {
test_that("array list interface works for dictionary types", {
array <- as_nanoarrow_array(factor(letters[1:5]))
- expect_s3_class(array$buffers[[2]], "nanoarrow_buffer_data_int8")
+ expect_s3_class(array$buffers[[2]], "nanoarrow_buffer_data_int32")
expect_s3_class(array$dictionary$buffers[[2]],
"nanoarrow_buffer_data_offset32")
info_recursive <- nanoarrow_array_proxy_safe(array, recursive = TRUE)
diff --git a/r/tests/testthat/test-as-array.R b/r/tests/testthat/test-as-array.R
new file mode 100644
index 0000000..8bb250d
--- /dev/null
+++ b/r/tests/testthat/test-as-array.R
@@ -0,0 +1,414 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+test_that("as_nanoarrow_array() works for nanoarrow_array", {
+ array <- as_nanoarrow_array(1:10)
+ expect_identical(as_nanoarrow_array(array), array)
+
+ array <- as_nanoarrow_array(1:10, schema = na_int32())
+ expect_identical(as_nanoarrow_array(array), array)
+
+ skip_if_not_installed("arrow")
+ casted <- as_nanoarrow_array(array, schema = na_int64())
+ expect_identical(infer_nanoarrow_schema(casted)$format, "l")
+ expect_identical(convert_array(casted), as.double(1:10))
+})
+
+test_that("as_nanoarrow_array() works for logical() -> na_bool()", {
+ # Without nulls
+ array <- as_nanoarrow_array(c(TRUE, FALSE, TRUE, FALSE), schema = na_bool())
+ expect_identical(infer_nanoarrow_schema(array)$format, "b")
+ expect_identical(as.raw(array$buffers[[1]]), raw())
+ expect_identical(array$offset, 0L)
+ expect_identical(array$null_count, 0L)
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(packBits(c(TRUE, FALSE, TRUE, FALSE, rep(FALSE, 4))))
+ )
+
+ # With nulls
+ array <- as_nanoarrow_array(c(TRUE, FALSE, NA), schema = na_bool())
+ expect_identical(infer_nanoarrow_schema(array)$format, "b")
+ expect_identical(array$null_count, 1L)
+ expect_identical(
+ as.raw(array$buffers[[1]]),
+ packBits(c(rep(TRUE, 2), FALSE, rep(FALSE, 5)))
+ )
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(packBits(c(TRUE, FALSE, FALSE, rep(FALSE, 5))))
+ )
+})
+
+test_that("as_nanoarrow_array() errors for bad logical() creation", {
+ skip_if_not_installed("arrow")
+ expect_snapshot_error(
+ as_nanoarrow_array(TRUE, schema = na_string())
+ )
+})
+
+test_that("as_nanoarrow_array() works for logical() -> na_int32()", {
+ # Without nulls
+ array <- as_nanoarrow_array(c(TRUE, FALSE, TRUE, FALSE), schema = na_int32())
+ expect_identical(infer_nanoarrow_schema(array)$format, "i")
+ expect_identical(as.raw(array$buffers[[1]]), raw())
+ expect_identical(array$offset, 0L)
+ expect_identical(array$null_count, 0L)
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(c(TRUE, FALSE, TRUE, FALSE)))
+ )
+
+ # With nulls
+ array <- as_nanoarrow_array(c(TRUE, FALSE, NA), schema = na_int32())
+ expect_identical(infer_nanoarrow_schema(array)$format, "i")
+ expect_identical(array$null_count, 1L)
+ expect_identical(
+ as.raw(array$buffers[[1]]),
+ packBits(c(rep(TRUE, 2), FALSE, rep(FALSE, 5)))
+ )
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(c(TRUE, FALSE, NA)))
+ )
+})
+
+test_that("as_nanoarrow_array() works for integer() -> na_int32()", {
+ # Without nulls
+ array <- as_nanoarrow_array(1:10)
+ expect_identical(infer_nanoarrow_schema(array)$format, "i")
+ expect_identical(as.raw(array$buffers[[1]]), raw())
+ expect_identical(array$offset, 0L)
+ expect_identical(array$null_count, 0L)
+ expect_identical(as.raw(array$buffers[[2]]),
as.raw(as_nanoarrow_buffer(1:10)))
+
+ # With nulls
+ array <- as_nanoarrow_array(c(1:10, NA))
+ expect_identical(infer_nanoarrow_schema(array)$format, "i")
+ expect_identical(array$null_count, 1L)
+ expect_identical(
+ as.raw(array$buffers[[1]]),
+ packBits(c(rep(TRUE, 10), FALSE, rep(FALSE, 5)))
+ )
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(c(1:10, NA)))
+ )
+})
+
+test_that("as_nanoarrow_array() works for integer -> na_int64()", {
+ skip_if_not_installed("arrow")
+ casted <- as_nanoarrow_array(1:10, schema = na_int64())
+ expect_identical(infer_nanoarrow_schema(casted)$format, "l")
+ expect_identical(convert_array(casted), as.double(1:10))
+})
+
+test_that("as_nanoarrow_array() works for double() -> na_double()", {
+ # Without nulls
+ array <- as_nanoarrow_array(as.double(1:10))
+ expect_identical(infer_nanoarrow_schema(array)$format, "g")
+ expect_identical(as.raw(array$buffers[[1]]), raw())
+ expect_identical(array$offset, 0L)
+ expect_identical(array$null_count, 0L)
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(as.double(1:10)))
+ )
+
+ # With nulls
+ array <- as_nanoarrow_array(c(1:10, NA_real_))
+ expect_identical(infer_nanoarrow_schema(array)$format, "g")
+ expect_identical(array$null_count, 1L)
+ expect_identical(
+ as.raw(array$buffers[[1]]),
+ packBits(c(rep(TRUE, 10), FALSE, rep(FALSE, 5)))
+ )
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(c(1:10, NA_real_)))
+ )
+})
+
+test_that("as_nanoarrow_array() works for double() -> na_int32()", {
+ # Without nulls
+ array <- as_nanoarrow_array(as.double(1:10), schema = na_int32())
+ expect_identical(infer_nanoarrow_schema(array)$format, "i")
+ expect_identical(as.raw(array$buffers[[1]]), raw())
+ expect_identical(array$offset, 0L)
+ expect_identical(array$null_count, 0L)
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(1:10))
+ )
+
+ # With nulls
+ array <- as_nanoarrow_array(c(1:10, NA_real_), schema = na_int32())
+ expect_identical(infer_nanoarrow_schema(array)$format, "i")
+ expect_identical(array$null_count, 1L)
+ expect_identical(
+ as.raw(array$buffers[[1]]),
+ packBits(c(rep(TRUE, 10), FALSE, rep(FALSE, 5)))
+ )
+ # The last element here is (int)NaN not NA_integer_
+ expect_identical(
+ head(as.raw(array$buffers[[2]]), 10 * 4L),
+ as.raw(as_nanoarrow_buffer(1:10))
+ )
+
+ # With overflow
+ expect_warning(
+ as_nanoarrow_array(.Machine$integer.max + as.double(1:5), schema =
na_int32()),
+ "5 value\\(s\\) overflowed"
+ )
+})
+
+test_that("as_nanoarrow_array() works for double() -> na_int64()", {
+ # Without nulls
+ array <- as_nanoarrow_array(as.double(1:10), schema = na_int64())
+ expect_identical(infer_nanoarrow_schema(array)$format, "l")
+ expect_identical(as.raw(array$buffers[[1]]), raw())
+ expect_identical(array$offset, 0L)
+ expect_identical(array$null_count, 0L)
+ # This *is* how we create int64 buffers, so just check the roundtrip
+ expect_identical(convert_array(array), as.double(1:10))
+
+ # With nulls
+ array <- as_nanoarrow_array(c(1:10, NA_real_), schema = na_int64())
+ expect_identical(infer_nanoarrow_schema(array)$format, "l")
+ expect_identical(array$null_count, 1L)
+ expect_identical(
+ as.raw(array$buffers[[1]]),
+ packBits(c(rep(TRUE, 10), FALSE, rep(FALSE, 5)))
+ )
+ expect_identical(convert_array(array), as.double(c(1:10, NA_real_)))
+})
+
+test_that("as_nanoarrow_array() works for double -> na_int8()", {
+ skip_if_not_installed("arrow")
+ casted <- as_nanoarrow_array(as.double(1:10), schema = na_int8())
+ expect_identical(infer_nanoarrow_schema(casted)$format, "c")
+ expect_identical(convert_array(casted), 1:10)
+})
+
+test_that("as_nanoarrow_array() works for character() -> na_string()", {
+ # Without nulls
+ array <- as_nanoarrow_array(letters)
+ expect_identical(infer_nanoarrow_schema(array)$format, "u")
+ expect_identical(as.raw(array$buffers[[1]]), raw())
+ expect_identical(array$offset, 0L)
+ expect_identical(array$null_count, 0L)
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(0:26))
+ )
+ expect_identical(
+ as.raw(array$buffers[[3]]),
+ as.raw(as_nanoarrow_buffer(paste(letters, collapse = "")))
+ )
+
+ # With nulls
+ array <- as_nanoarrow_array(c(letters, NA))
+ expect_identical(infer_nanoarrow_schema(array)$format, "u")
+ expect_identical(array$null_count, 1L)
+ expect_identical(
+ as.raw(array$buffers[[1]]),
+ packBits(c(rep(TRUE, 26), FALSE, rep(FALSE, 5)))
+ )
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(c(0:26, 26L)))
+ )
+ expect_identical(
+ as.raw(array$buffers[[3]]),
+ as.raw(as_nanoarrow_buffer(paste(letters, collapse = "")))
+ )
+})
+
+test_that("as_nanoarrow_array() works for character() -> na_large_string()", {
+ skip_if_not_installed("arrow")
+
+ # Without nulls
+ array <- as_nanoarrow_array(letters, schema = na_large_string())
+ expect_identical(infer_nanoarrow_schema(array)$format, "U")
+ expect_identical(as.raw(array$buffers[[1]]), raw())
+ expect_identical(array$offset, 0L)
+ expect_identical(array$null_count, 0L)
+ expect_identical(
+ as.raw(array$buffers[[3]]),
+ as.raw(as_nanoarrow_buffer(paste(letters, collapse = "")))
+ )
+
+ # With nulls
+ array <- as_nanoarrow_array(c(letters, NA), schema = na_large_string())
+ expect_identical(infer_nanoarrow_schema(array)$format, "U")
+ expect_identical(array$null_count, 1L)
+ expect_identical(
+ as.raw(array$buffers[[1]]),
+ packBits(c(rep(TRUE, 26), FALSE, rep(FALSE, 5)))
+ )
+ expect_identical(
+ as.raw(array$buffers[[3]]),
+ as.raw(as_nanoarrow_buffer(paste(letters, collapse = "")))
+ )
+})
+
+test_that("as_nanoarrow_array() works for factor() -> na_dictionary()", {
+ array <- as_nanoarrow_array(
+ factor(letters),
+ schema = na_dictionary(na_string(), na_int32())
+ )
+
+ expect_identical(infer_nanoarrow_schema(array)$format, "i")
+ expect_identical(infer_nanoarrow_schema(array$dictionary)$format, "u")
+
+ expect_identical(as.raw(array$buffers[[1]]), raw())
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(0:25))
+ )
+
+ expect_identical(
+ as.raw(array$dictionary$buffers[[3]]),
+ charToRaw(paste0(letters, collapse = ""))
+ )
+})
+
+test_that("as_nanoarrow_array() works for factor() -> na_string()", {
+ array <- as_nanoarrow_array(
+ factor(letters),
+ schema = na_string()
+ )
+
+ expect_identical(infer_nanoarrow_schema(array)$format, "u")
+ expect_null(array$dictionary)
+
+ expect_identical(as.raw(array$buffers[[1]]), raw())
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(0:26))
+ )
+ expect_identical(
+ as.raw(array$buffers[[3]]),
+ charToRaw(paste0(letters, collapse = ""))
+ )
+})
+
+test_that("as_nanoarrow_array() works for data.frame() -> na_struct()", {
+ array <- as_nanoarrow_array(data.frame(x = 1:10))
+ expect_identical(array$length, 10L)
+ expect_identical(array$offset, 0L)
+ expect_identical(array$null_count, 0L)
+ expect_identical(infer_nanoarrow_schema(array)$format, "+s")
+ expect_identical(infer_nanoarrow_schema(array$children$x)$format, "i")
+ expect_identical(as.raw(array$children$x$buffers[[2]]),
as.raw(as_nanoarrow_buffer(1:10)))
+})
+
+test_that("as_nanoarrow_array() errors for bad data.frame() -> na_struct()", {
+ expect_error(
+ as_nanoarrow_array(data.frame(x = 1:10), schema = na_struct()),
+ "Expected 1 schema children"
+ )
+
+ skip_if_not_installed("arrow")
+ expect_snapshot_error(
+ as_nanoarrow_array(data.frame(x = 1:10), schema = na_int32())
+ )
+})
+
+test_that("as_nanoarrow_array() works for blob::blob() -> na_binary()", {
+ skip_if_not_installed("blob")
+
+ # Without nulls
+ array <- as_nanoarrow_array(blob::as_blob(letters))
+ expect_identical(infer_nanoarrow_schema(array)$format, "z")
+ expect_identical(as.raw(array$buffers[[1]]), raw())
+ expect_identical(array$offset, 0L)
+ expect_identical(array$null_count, 0L)
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(0:26))
+ )
+ expect_identical(
+ as.raw(array$buffers[[3]]),
+ as.raw(as_nanoarrow_buffer(paste(letters, collapse = "")))
+ )
+
+ # With nulls
+ array <- as_nanoarrow_array(blob::as_blob(c(letters, NA)))
+ expect_identical(infer_nanoarrow_schema(array)$format, "z")
+ expect_identical(array$null_count, 1L)
+ expect_identical(
+ as.raw(array$buffers[[1]]),
+ packBits(c(rep(TRUE, 26), FALSE, rep(FALSE, 5)))
+ )
+ expect_identical(
+ as.raw(array$buffers[[2]]),
+ as.raw(as_nanoarrow_buffer(c(0:26, 26L)))
+ )
+ expect_identical(
+ as.raw(array$buffers[[3]]),
+ as.raw(as_nanoarrow_buffer(paste(letters, collapse = "")))
+ )
+})
+
+test_that("as_nanoarrow_array() works for blob::blob() -> na_large_binary()", {
+ skip_if_not_installed("arrow")
+
+ # Without nulls
+ array <- as_nanoarrow_array(blob::as_blob(letters), schema =
na_large_binary())
+ expect_identical(infer_nanoarrow_schema(array)$format, "Z")
+ expect_identical(as.raw(array$buffers[[1]]), raw())
+ expect_identical(array$offset, 0L)
+ expect_identical(array$null_count, 0L)
+ expect_identical(
+ as.raw(array$buffers[[3]]),
+ as.raw(as_nanoarrow_buffer(paste(letters, collapse = "")))
+ )
+
+ # With nulls
+ array <- as_nanoarrow_array(
+ blob::as_blob(c(letters, NA)),
+ schema = na_large_binary()
+ )
+ expect_identical(infer_nanoarrow_schema(array)$format, "Z")
+ expect_identical(array$null_count, 1L)
+ expect_identical(
+ as.raw(array$buffers[[1]]),
+ packBits(c(rep(TRUE, 26), FALSE, rep(FALSE, 5)))
+ )
+ expect_identical(
+ as.raw(array$buffers[[3]]),
+ as.raw(as_nanoarrow_buffer(paste(letters, collapse = "")))
+ )
+})
+
+test_that("as_nanoarrow_array() works for unspecified() -> na_na()", {
+ skip_if_not_installed("vctrs")
+
+ array <- as_nanoarrow_array(vctrs::unspecified(5))
+ expect_identical(infer_nanoarrow_schema(array)$format, "n")
+ expect_identical(array$length, 5L)
+ expect_identical(array$null_count, 5L)
+})
+
+test_that("as_nanoarrow_array() works for bad unspecified() create", {
+ skip_if_not_installed("vctrs")
+ skip_if_not_installed("arrow")
+ expect_snapshot_error(
+ as_nanoarrow_array(vctrs::unspecified(5), schema = na_interval_day_time())
+ )
+})
diff --git a/r/tests/testthat/test-convert-array-stream.R
b/r/tests/testthat/test-convert-array-stream.R
index 5f58cba..404a43f 100644
--- a/r/tests/testthat/test-convert-array-stream.R
+++ b/r/tests/testthat/test-convert-array-stream.R
@@ -16,50 +16,40 @@
# under the License.
test_that("convert array stream works", {
- stream0 <- arrow::RecordBatchReader$create(
- schema = arrow::schema(x = arrow::int32())
- )
- stream0 <- as_nanoarrow_array_stream(stream0)
+ stream0 <- basic_array_stream(list(), schema = na_struct(list(x =
na_int32())))
expect_identical(convert_array_stream(stream0), data.frame(x = integer()))
- stream1 <- arrow::RecordBatchReader$create(
- arrow::record_batch(x = 1:5)
- )
- stream1 <- as_nanoarrow_array_stream(stream1)
+ stream1 <- basic_array_stream(list(data.frame(x = 1:5)))
expect_identical(convert_array_stream(stream1), data.frame(x = 1:5))
- stream2 <- arrow::RecordBatchReader$create(
- arrow::record_batch(x = 1:5),
- arrow::record_batch(x = 6:10)
+ stream2 <- basic_array_stream(
+ list(
+ data.frame(x = 1:5),
+ data.frame(x = 6:10)
+ )
)
- stream2 <- as_nanoarrow_array_stream(stream2)
expect_identical(convert_array_stream(stream2), data.frame(x = 1:10))
})
test_that("convert array stream with explicit size works", {
- stream0 <- arrow::RecordBatchReader$create(
- schema = arrow::schema(x = arrow::int32())
- )
- stream0 <- as_nanoarrow_array_stream(stream0)
+ stream0 <- basic_array_stream(list(), schema = na_struct(list(x =
na_int32())))
expect_identical(
convert_array_stream(stream0, size = 0),
data.frame(x = integer())
)
- stream1 <- arrow::RecordBatchReader$create(
- arrow::record_batch(x = 1:5)
- )
- stream1 <- as_nanoarrow_array_stream(stream1)
+ stream1 <- basic_array_stream(list(data.frame(x = 1:5)))
expect_identical(
convert_array_stream(stream1, size = 5),
data.frame(x = 1:5)
)
- stream2 <- arrow::RecordBatchReader$create(
- arrow::record_batch(x = 1:5),
- arrow::record_batch(x = 6:10)
+ stream2 <- basic_array_stream(
+ list(
+ data.frame(x = 1:5),
+ data.frame(x = 6:10)
+ )
)
- stream2 <- as_nanoarrow_array_stream(stream2)
expect_identical(
convert_array_stream(stream2, size = 10),
data.frame(x = 1:10)
@@ -143,27 +133,24 @@ test_that("convert array stream works for struct-style
vectors", {
test_that("convert array stream respects the value of n", {
batches <- list(
- arrow::record_batch(x = 1:5),
- arrow::record_batch(x = 6:10),
- arrow::record_batch(x = 11:15)
+ data.frame(x = 1:5),
+ data.frame(x = 6:10),
+ data.frame(x = 11:15)
)
- reader3 <- arrow::RecordBatchReader$create(batches = batches)
- stream3 <- as_nanoarrow_array_stream(reader3)
+ stream3 <- basic_array_stream(batches)
expect_identical(
convert_array_stream(stream3, n = 0),
data.frame(x = integer())
)
- reader3 <- arrow::RecordBatchReader$create(batches = batches)
- stream3 <- as_nanoarrow_array_stream(reader3)
+ stream3 <- basic_array_stream(batches)
expect_identical(
convert_array_stream(stream3, n = 1),
data.frame(x = 1:5)
)
- reader3 <- arrow::RecordBatchReader$create(batches = batches)
- stream3 <- as_nanoarrow_array_stream(reader3)
+ stream3 <- basic_array_stream(batches)
expect_identical(
convert_array_stream(stream3, n = 2),
data.frame(x = 1:10)
@@ -172,27 +159,24 @@ test_that("convert array stream respects the value of n",
{
test_that("fixed-size convert array stream respects the value of n", {
batches <- list(
- arrow::record_batch(x = 1:5),
- arrow::record_batch(x = 6:10),
- arrow::record_batch(x = 11:15)
+ data.frame(x = 1:5),
+ data.frame(x = 6:10),
+ data.frame(x = 11:15)
)
- reader3 <- arrow::RecordBatchReader$create(batches = batches)
- stream3 <- as_nanoarrow_array_stream(reader3)
+ stream3 <- basic_array_stream(batches)
expect_identical(
convert_array_stream(stream3, n = 0, size = 0),
data.frame(x = integer())
)
- reader3 <- arrow::RecordBatchReader$create(batches = batches)
- stream3 <- as_nanoarrow_array_stream(reader3)
+ stream3 <- basic_array_stream(batches)
expect_identical(
convert_array_stream(stream3, n = 1, size = 5),
data.frame(x = 1:5)
)
- reader3 <- arrow::RecordBatchReader$create(batches = batches)
- stream3 <- as_nanoarrow_array_stream(reader3)
+ stream3 <- basic_array_stream(batches)
expect_identical(
convert_array_stream(stream3, n = 2, size = 10),
data.frame(x = 1:10)
diff --git a/r/tests/testthat/test-convert-array.R
b/r/tests/testthat/test-convert-array.R
index bd51897..2f21ad1 100644
--- a/r/tests/testthat/test-convert-array.R
+++ b/r/tests/testthat/test-convert-array.R
@@ -53,10 +53,10 @@ test_that("convert_array() errors for unsupported ptype", {
})
test_that("convert_array() errors for unsupported array", {
- unsupported_array <- arrow::concat_arrays(type = arrow::decimal256(3, 4))
+ unsupported_array <- nanoarrow_array_init(na_interval_day_time())
expect_error(
convert_array(as_nanoarrow_array(unsupported_array)),
- "Can't infer R vector type for array <decimal256\\(3, 4\\)>"
+ "Can't infer R vector type for array <interval_day_time>"
)
})
@@ -146,7 +146,9 @@ test_that("convert to vector works for struct-style
vectors", {
})
test_that("convert to vector works for unspecified()", {
- array <- as_nanoarrow_array(arrow::Array$create(rep(NA, 10), arrow::null()))
+ array <- nanoarrow_array_init(na_na())
+ array$length <- 10
+ array$null_count <- 10
# implicit for null type
expect_identical(
@@ -179,6 +181,8 @@ test_that("convert to vector works for unspecified()", {
})
test_that("convert to vector works for valid logical()", {
+ skip_if_not_installed("arrow")
+
arrow_numeric_types <- list(
int8 = arrow::int8(),
uint8 = arrow::uint8(),
@@ -233,7 +237,10 @@ test_that("convert to vector works for valid logical()", {
})
test_that("convert to vector works for null -> logical()", {
- array <- as_nanoarrow_array(arrow::Array$create(rep(NA, 10), arrow::null()))
+ array <- nanoarrow_array_init(na_na())
+ array$length <- 10
+ array$null_count <- 10
+
expect_identical(
convert_array(array, logical()),
rep(NA, 10)
@@ -248,6 +255,8 @@ test_that("convert to vector errors for bad array to
logical()", {
})
test_that("convert to vector works for valid integer()", {
+ skip_if_not_installed("arrow")
+
arrow_int_types <- list(
int8 = arrow::int8(),
uint8 = arrow::uint8(),
@@ -302,7 +311,10 @@ test_that("convert to vector works for valid integer()", {
})
test_that("convert to vector works for null -> logical()", {
- array <- as_nanoarrow_array(arrow::Array$create(rep(NA, 10), arrow::null()))
+ array <- nanoarrow_array_init(na_na())
+ array$length <- 10
+ array$null_count <- 10
+
expect_identical(
convert_array(array, integer()),
rep(NA_integer_, 10)
@@ -310,13 +322,13 @@ test_that("convert to vector works for null ->
logical()", {
})
test_that("convert to vector warns for invalid integer()", {
- array <- as_nanoarrow_array(arrow::as_arrow_array(.Machine$double.xmax))
+ array <- as_nanoarrow_array(.Machine$double.xmax)
expect_warning(
expect_identical(convert_array(array, integer()), NA_integer_),
"1 value\\(s\\) outside integer range set to NA"
)
- array <- as_nanoarrow_array(arrow::as_arrow_array(c(NA,
.Machine$double.xmax)))
+ array <- as_nanoarrow_array(c(NA, .Machine$double.xmax))
expect_warning(
expect_identical(convert_array(array, integer()), c(NA_integer_,
NA_integer_)),
"1 value\\(s\\) outside integer range set to NA"
@@ -331,6 +343,8 @@ test_that("convert to vector errors for bad array to
integer()", {
})
test_that("convert to vector works for valid double()", {
+ skip_if_not_installed("arrow")
+
arrow_numeric_types <- list(
int8 = arrow::int8(),
uint8 = arrow::uint8(),
@@ -385,6 +399,8 @@ test_that("convert to vector works for valid double()", {
})
test_that("convert to vector works for decimal128 -> double()", {
+ skip_if_not_installed("arrow")
+
array <-
as_nanoarrow_array(arrow::Array$create(1:10)$cast(arrow::decimal128(20, 10)))
expect_equal(
convert_array(array, double()),
@@ -393,7 +409,10 @@ test_that("convert to vector works for decimal128 ->
double()", {
})
test_that("convert to vector works for null -> double()", {
- array <- as_nanoarrow_array(arrow::Array$create(rep(NA, 10), arrow::null()))
+ array <- nanoarrow_array_init(na_na())
+ array$length <- 10
+ array$null_count <- 10
+
expect_identical(
convert_array(array, double()),
rep(NA_real_, 10)
@@ -425,7 +444,10 @@ test_that("convert to vector works for character()", {
})
test_that("convert to vector works for null -> character()", {
- array <- as_nanoarrow_array(arrow::Array$create(rep(NA, 10), arrow::null()))
+ array <- nanoarrow_array_init(na_na())
+ array$length <- 10
+ array$null_count <- 10
+
all_nulls <- convert_array(array, character())
nanoarrow_altrep_force_materialize(all_nulls)
expect_identical(
@@ -435,7 +457,10 @@ test_that("convert to vector works for null ->
character()", {
})
test_that("convert to vector works for blob::blob()", {
- array <- as_nanoarrow_array(list(as.raw(1:5)), schema = arrow::binary())
+ skip_if_not_installed("blob")
+
+ array <- as_nanoarrow_array(list(as.raw(1:5)), schema = na_binary())
+
expect_identical(
convert_array(array),
blob::blob(as.raw(1:5))
@@ -448,7 +473,10 @@ test_that("convert to vector works for blob::blob()", {
})
test_that("convert to vector works for null -> blob::blob()", {
- array <- as_nanoarrow_array(arrow::Array$create(rep(NA, 10), arrow::null()))
+ array <- nanoarrow_array_init(na_na())
+ array$length <- 10
+ array$null_count <- 10
+
expect_identical(
convert_array(array, blob::blob()),
blob::new_blob(rep(list(NULL), 10))
@@ -456,6 +484,8 @@ test_that("convert to vector works for null ->
blob::blob()", {
})
test_that("convert to vector works for list -> vctrs::list_of", {
+ skip_if_not_installed("arrow")
+
array_list <- as_nanoarrow_array(
arrow::Array$create(
list(1:5, 6:10, NULL),
@@ -491,6 +521,8 @@ test_that("convert to vector works for list ->
vctrs::list_of", {
})
test_that("convert to vector works for large_list -> vctrs::list_of", {
+ skip_if_not_installed("arrow")
+
array_list <- as_nanoarrow_array(
arrow::Array$create(
list(1:5, 6:10, NULL),
@@ -518,6 +550,8 @@ test_that("convert to vector works for large_list ->
vctrs::list_of", {
})
test_that("convert to vector works for fixed_size_list -> vctrs::list_of", {
+ skip_if_not_installed("arrow")
+
array_list <- as_nanoarrow_array(
arrow::Array$create(
list(1:5, 6:10, NULL),
@@ -545,7 +579,10 @@ test_that("convert to vector works for fixed_size_list ->
vctrs::list_of", {
})
test_that("convert to vector works for null -> vctrs::list_of()", {
- array <- as_nanoarrow_array(arrow::Array$create(rep(NA, 10), arrow::null()))
+ array <- nanoarrow_array_init(na_na())
+ array$length <- 10
+ array$null_count <- 10
+
expect_identical(
convert_array(array, vctrs::list_of(.ptype = integer())),
vctrs::new_list_of(rep(list(NULL), 10), ptype = integer())
@@ -560,7 +597,8 @@ test_that("convert to vector works for Date", {
)
array_date <- as_nanoarrow_array(
- arrow::Array$create(as.Date(c(NA, "2000-01-01")), arrow::date64())
+ as.Date(c(NA, "2000-01-01")),
+ schema = na_date64()
)
expect_identical(
convert_array(array_date),
@@ -569,7 +607,10 @@ test_that("convert to vector works for Date", {
})
test_that("convert to vector works for null -> Date", {
- array <- as_nanoarrow_array(arrow::Array$create(rep(NA, 10), arrow::null()))
+ array <- nanoarrow_array_init(na_na())
+ array$length <- 10
+ array$null_count <- 10
+
expect_identical(
convert_array(array, as.Date(character())),
as.Date(rep(NA_character_, 10))
@@ -585,7 +626,10 @@ test_that("convert to vector works for hms", {
})
test_that("convert to vector works for null -> hms", {
- array <- as_nanoarrow_array(arrow::Array$create(rep(NA, 10), arrow::null()))
+ array <- nanoarrow_array_init(na_na())
+ array$length <- 10
+ array$null_count <- 10
+
expect_identical(
convert_array(array, hms::hms()),
hms::parse_hms(rep(NA_character_, 10))
@@ -604,7 +648,10 @@ test_that("convert to vector works for POSIXct", {
})
test_that("convert to vector works for null -> POSIXct", {
- array <- as_nanoarrow_array(arrow::Array$create(rep(NA, 10), arrow::null()))
+ array <- nanoarrow_array_init(na_na())
+ array$length <- 10
+ array$null_count <- 10
+
expect_identical(
convert_array(array, as.POSIXct(character(), tz = "America/Halifax")),
as.POSIXct(rep(NA_character_, 10), tz = "America/Halifax")
@@ -636,24 +683,16 @@ test_that("convert to vector works for difftime", {
# with all Arrow units
x <- as.difftime(123, units = "secs")
- array_duration <- as_nanoarrow_array(
- arrow::Array$create(x, arrow::duration("s"))
- )
+ array_duration <- as_nanoarrow_array(x, na_duration("s"))
expect_identical(convert_array(array_duration), x)
- array_duration <- as_nanoarrow_array(
- arrow::Array$create(x, arrow::duration("ms"))
- )
+ array_duration <- as_nanoarrow_array(x, na_duration("ms"))
expect_identical(convert_array(array_duration), x)
- array_duration <- as_nanoarrow_array(
- arrow::Array$create(x, arrow::duration("us"))
- )
+ array_duration <- as_nanoarrow_array(x, na_duration("us"))
expect_identical(convert_array(array_duration), x)
- array_duration <- as_nanoarrow_array(
- arrow::Array$create(x, arrow::duration("ns"))
- )
+ array_duration <- as_nanoarrow_array(x, na_duration("ns"))
expect_equal(convert_array(array_duration), x)
# bad ptype values
@@ -689,7 +728,10 @@ test_that("convert to vector works for difftime", {
})
test_that("convert to vector works for null -> difftime", {
- array <- as_nanoarrow_array(arrow::Array$create(rep(NA, 10), arrow::null()))
+ array <- nanoarrow_array_init(na_na())
+ array$length <- 10
+ array$null_count <- 10
+
expect_identical(
convert_array(array, as.difftime(numeric(), units = "secs")),
as.difftime(rep(NA_real_, 10), units = "secs")
@@ -697,6 +739,8 @@ test_that("convert to vector works for null -> difftime", {
})
test_that("convert to vector works for data frames nested inside lists", {
+ skip_if_not_installed("arrow")
+
df_in_list <- vctrs::list_of(
data.frame(x = 1:5),
data.frame(x = 6:10),
@@ -711,6 +755,8 @@ test_that("convert to vector works for data frames nested
inside lists", {
})
test_that("convert to vector works for lists nested in data frames", {
+ skip_if_not_installed("arrow")
+
df_in_list_in_df <- data.frame(
x = vctrs::list_of(
data.frame(x = 1:5),
@@ -727,22 +773,22 @@ test_that("convert to vector works for lists nested in
data frames", {
})
test_that("convert to vector warns for stripped extension type", {
- ext_arr <- as_nanoarrow_array(
- arrow::Array$create(vctrs::new_vctr(1:5, class = "my_vctr"))
- )
+ ext_arr <- as_nanoarrow_array(1:5)
+ nanoarrow_array_set_schema(ext_arr, na_extension(na_int32(), "some_ext"))
expect_warning(
expect_identical(convert_array(ext_arr), 1:5),
- "Converting unknown extension arrow.r.vctrs"
+ "Converting unknown extension some_ext"
)
- nested_ext_array <- as_nanoarrow_array(
- arrow::record_batch(
- x = vctrs::new_vctr(1:5, class = "my_vctr")
- )
+ nested_ext_array <- as_nanoarrow_array(data.frame(x = 1:5))
+ nanoarrow_array_set_schema(
+ nested_ext_array,
+ na_struct(list(x = na_extension(na_int32(), "some_ext")))
)
+
expect_warning(
expect_identical(convert_array(nested_ext_array), data.frame(x = 1:5)),
- "x: Converting unknown extension arrow.r.vctrs"
+ "x: Converting unknown extension some_ext"
)
})
diff --git a/r/tests/testthat/test-infer-ptype.R
b/r/tests/testthat/test-infer-ptype.R
index 76ed25b..2fe3c96 100644
--- a/r/tests/testthat/test-infer-ptype.R
+++ b/r/tests/testthat/test-infer-ptype.R
@@ -53,7 +53,7 @@ test_that("infer_nanoarrow_ptype() works for basic types", {
)
expect_identical(
- infer_nanoarrow_ptype(as_nanoarrow_schema(arrow::decimal128(2, 3))),
+ infer_nanoarrow_ptype(as_nanoarrow_schema(na_decimal128(2, 3))),
double()
)
@@ -97,6 +97,8 @@ test_that("infer_nanoarrow_ptype() infers ptypes for
date/time types", {
})
test_that("infer_nanoarrow_ptype() infers ptypes for nested types", {
+ skip_if_not_installed("arrow")
+
array_list <- as_nanoarrow_array(vctrs::list_of(integer()))
expect_identical(
infer_nanoarrow_ptype(array_list),
@@ -116,14 +118,14 @@ test_that("infer_nanoarrow_ptype() infers ptypes for
nested types", {
})
test_that("infer_nanoarrow_ptype() errors for types it can't infer", {
- unsupported_array <- arrow::concat_arrays(type = arrow::decimal256(3, 4))
+ unsupported_array <- nanoarrow_array_init(na_decimal256(3, 4))
expect_error(
infer_nanoarrow_ptype(as_nanoarrow_array(unsupported_array)),
"Can't infer R vector type for array <decimal256\\(3, 4\\)>"
)
- unsupported_struct <- arrow::concat_arrays(
- type = arrow::struct(col = arrow::decimal256(3, 4))
+ unsupported_struct <- nanoarrow_array_init(
+ na_struct(list(col = na_decimal256(3, 4)))
)
expect_error(
infer_nanoarrow_ptype(as_nanoarrow_array(unsupported_struct)),
diff --git a/r/tests/testthat/test-pointers.R b/r/tests/testthat/test-pointers.R
index 412b2b6..36efa5a 100644
--- a/r/tests/testthat/test-pointers.R
+++ b/r/tests/testthat/test-pointers.R
@@ -53,7 +53,7 @@ test_that("nanoarrow_pointer_move() works for schema", {
dst <- nanoarrow_allocate_schema()
nanoarrow_pointer_move(ptr, dst)
expect_false(nanoarrow_pointer_is_valid(ptr))
- expect_true(arrow::as_data_type(dst)$Equals(arrow::int32()))
+ expect_true(nanoarrow_schema_identical(dst, na_int32()))
expect_error(
nanoarrow_pointer_move(ptr, dst),
@@ -71,7 +71,7 @@ test_that("nanoarrow_pointer_move() works for array", {
dst <- nanoarrow_allocate_array()
nanoarrow_pointer_move(ptr, dst)
expect_false(nanoarrow_pointer_is_valid(ptr))
-
expect_true(arrow::as_arrow_array(dst)$Equals(arrow::Array$create(integer())))
+ expect_identical(convert_array(dst), integer())
expect_error(
nanoarrow_pointer_move(ptr, dst),
@@ -137,9 +137,7 @@ test_that("nanoarrow_pointer_export() works for schema", {
dst <- nanoarrow_allocate_schema()
nanoarrow_pointer_export(ptr, dst)
expect_true(nanoarrow_pointer_is_valid(ptr))
- expect_true(
- arrow::as_data_type(dst)$Equals(arrow::int32())
- )
+ expect_true(nanoarrow_schema_identical(dst, na_int32()))
expect_error(
nanoarrow_pointer_export(ptr, dst),
@@ -159,7 +157,7 @@ test_that("nanoarrow_pointer_export() works for array", {
expect_true(nanoarrow_pointer_is_valid(ptr))
# (when exporting the schema is not included)
nanoarrow_array_set_schema(dst, infer_nanoarrow_schema(ptr))
-
expect_true(arrow::as_arrow_array(dst)$Equals(arrow::Array$create(integer())))
+ expect_identical(convert_array(dst), integer())
expect_error(
nanoarrow_pointer_export(ptr, dst),
diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R
index f625c72..fec1cd1 100644
--- a/r/tests/testthat/test-schema.R
+++ b/r/tests/testthat/test-schema.R
@@ -53,12 +53,12 @@ test_that("infer_nanoarrow_schema() methods work for
built-in types", {
expect_identical(infer_nanoarrow_schema(factor())$dictionary$format, "u")
time <- as.POSIXct("2000-01-01", tz = "UTC")
- expect_identical(infer_nanoarrow_schema(time)$format, "tsm:UTC")
+ expect_identical(infer_nanoarrow_schema(time)$format, "tsu:UTC")
time <- as.POSIXct("2000-01-01", tz = "")
expect_identical(
infer_nanoarrow_schema(time)$format,
- paste0("tsm:", Sys.timezone())
+ paste0("tsu:", Sys.timezone())
)
difftime <- as.difftime(double(), unit = "secs")
diff --git a/r/tests/testthat/test-type.R b/r/tests/testthat/test-type.R
index 689dfc1..041903d 100644
--- a/r/tests/testthat/test-type.R
+++ b/r/tests/testthat/test-type.R
@@ -35,7 +35,11 @@ test_that("type constructors for parameter-free types work",
{
)
# Check that the default schema is nullable
- expect_identical(na_type(!!type_name)$flags, 2L)
+ if (type_name == "struct") {
+ expect_identical(na_type(!!type_name)$flags, 0L)
+ } else {
+ expect_identical(na_type(!!type_name)$flags, 2L)
+ }
# Check that non-nullable schemas are non-nullable
expect_identical(na_type(!!type_name, nullable = FALSE)$flags, 0L)
diff --git a/r/tests/testthat/test-util.R b/r/tests/testthat/test-util.R
index b8ab6bb..5905c05 100644
--- a/r/tests/testthat/test-util.R
+++ b/r/tests/testthat/test-util.R
@@ -15,6 +15,26 @@
# specific language governing permissions and limitations
# under the License.
+test_that("can set option/env var to pretend the arrow package is not
installed", {
+ skip_if_not_installed("arrow")
+
+ expect_true(arrow_installed())
+ expect_silent(assert_arrow_installed("life"))
+
+ withr::with_options(list(nanoarrow.without_arrow = TRUE), {
+ expect_false(arrow_installed())
+
+ expect_error(
+ assert_arrow_installed("life"),
+ "Package 'arrow' required for life"
+ )
+ })
+
+ withr::with_envvar(list(R_NANOARROW_WITHOUT_ARROW = "true"), {
+ expect_false(arrow_installed())
+ })
+})
+
test_that("preserve/release works when release happens on another thread", {
some_non_null_sexp <- 1L
count0 <- preserved_count()