This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 18652fcc feat(r): Support native creation of more numeric Arrow arrays
from integer vectors (#697)
18652fcc is described below
commit 18652fccbace6ac41e67f78447e46718f63cd03e
Author: Dewey Dunnington <[email protected]>
AuthorDate: Mon Dec 30 16:01:08 2024 -0600
feat(r): Support native creation of more numeric Arrow arrays from integer
vectors (#697)
Before we had been punting to the arrow R package for this which wasn't
necessary! This was exposed by a test that had been added for matrix
support that wasn't skipped if arrow wasn't installed.
``` r
library(nanoarrow)
as_nanoarrow_array(1:10, schema = na_double())
#> <nanoarrow_array double[10]>
#> $ length : int 10
#> $ null_count: int 0
#> $ offset : int 0
#> $ buffers :List of 2
#> ..$ :<nanoarrow_buffer validity<bool>[null] ``
#> ..$ :<nanoarrow_buffer data<double>[10][80 b]> `1 2 3 4 5 6 7 8 9 10`
#> $ dictionary: NULL
#> $ children : list()
```
<sup>Created on 2024-12-26 with [reprex
v2.1.1](https://reprex.tidyverse.org)</sup>
---
r/src/as_array.c | 42 +++++++++++++++++++++++++++++++++-------
r/tests/testthat/test-as-array.R | 1 -
2 files changed, 35 insertions(+), 8 deletions(-)
diff --git a/r/src/as_array.c b/r/src/as_array.c
index aeb4930d..bcafb5a3 100644
--- a/r/src/as_array.c
+++ b/r/src/as_array.c
@@ -48,10 +48,17 @@ static void call_as_nanoarrow_array(SEXP x_sexp, struct
ArrowArray* array,
static void as_array_int(SEXP x_sexp, struct ArrowArray* array, SEXP
schema_xptr,
struct ArrowSchemaView* schema_view, struct
ArrowError* error) {
- // Only consider the default create for now
- if (schema_view->type != NANOARROW_TYPE_INT32) {
- call_as_nanoarrow_array(x_sexp, array, schema_xptr,
"as_nanoarrow_array_from_c");
- return;
+ // Consider integer -> numeric types that are easy to implement
+ switch (schema_view->type) {
+ case NANOARROW_TYPE_DOUBLE:
+ case NANOARROW_TYPE_FLOAT:
+ case NANOARROW_TYPE_HALF_FLOAT:
+ case NANOARROW_TYPE_INT64:
+ case NANOARROW_TYPE_INT32:
+ break;
+ default:
+ call_as_nanoarrow_array(x_sexp, array, schema_xptr,
"as_nanoarrow_array_from_c");
+ return;
}
// We don't consider altrep for now: we need an array of int32_t, and while
we
@@ -60,13 +67,33 @@ static void as_array_int(SEXP x_sexp, struct ArrowArray*
array, SEXP schema_xptr
int* x_data = INTEGER(x_sexp);
int64_t len = Rf_xlength(x_sexp);
- int result = ArrowArrayInitFromType(array, NANOARROW_TYPE_INT32);
+ int result = ArrowArrayInitFromType(array, schema_view->type);
if (result != NANOARROW_OK) {
Rf_error("ArrowArrayInitFromType() failed");
}
- // Borrow the data buffer
- buffer_borrowed(ArrowArrayBuffer(array, 1), x_data, len * sizeof(int32_t),
x_sexp);
+ if (schema_view->type == NANOARROW_TYPE_INT32) {
+ // Zero-copy create: just borrow the data buffer
+ buffer_borrowed(ArrowArrayBuffer(array, 1), x_data, len * sizeof(int32_t),
x_sexp);
+ } else {
+ // Otherwise, use the integer appender
+ result = ArrowArrayStartAppending(array);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayStartAppending() failed");
+ }
+
+ result = ArrowArrayReserve(array, len);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayReserve() failed");
+ }
+
+ for (int64_t i = 0; i < len; i++) {
+ result = ArrowArrayAppendInt(array, x_data[i]);
+ if (result != NANOARROW_OK) {
+ Rf_error("ArrowArrayAppendInt() failed");
+ }
+ }
+ }
// Set the array fields
array->length = len;
@@ -102,6 +129,7 @@ static void as_array_int(SEXP x_sexp, struct ArrowArray*
array, SEXP schema_xptr
}
array->null_count = null_count;
+
result = ArrowArrayFinishBuildingDefault(array, error);
if (result != NANOARROW_OK) {
Rf_error("ArrowArrayFinishBuildingDefault(): %s", error->message);
diff --git a/r/tests/testthat/test-as-array.R b/r/tests/testthat/test-as-array.R
index 28950ba0..00e88a75 100644
--- a/r/tests/testthat/test-as-array.R
+++ b/r/tests/testthat/test-as-array.R
@@ -111,7 +111,6 @@ test_that("as_nanoarrow_array() works for integer() ->
na_int32()", {
})
test_that("as_nanoarrow_array() works for integer -> na_int64()", {
- skip_if_not_installed("arrow")
casted <- as_nanoarrow_array(1:10, schema = na_int64())
expect_identical(infer_nanoarrow_schema(casted)$format, "l")
expect_identical(convert_array(casted), as.double(1:10))