This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 18652fcc feat(r): Support native creation of more numeric Arrow arrays 
from integer vectors (#697)
18652fcc is described below

commit 18652fccbace6ac41e67f78447e46718f63cd03e
Author: Dewey Dunnington <[email protected]>
AuthorDate: Mon Dec 30 16:01:08 2024 -0600

    feat(r): Support native creation of more numeric Arrow arrays from integer 
vectors (#697)
    
    Before we had been punting to the arrow R package for this which wasn't
    necessary! This was exposed by a test that had been added for matrix
    support that wasn't skipped if arrow wasn't installed.
    
    ``` r
    library(nanoarrow)
    as_nanoarrow_array(1:10, schema = na_double())
    #> <nanoarrow_array double[10]>
    #>  $ length    : int 10
    #>  $ null_count: int 0
    #>  $ offset    : int 0
    #>  $ buffers   :List of 2
    #>   ..$ :<nanoarrow_buffer validity<bool>[null] ``
    #>   ..$ :<nanoarrow_buffer data<double>[10][80 b]> `1 2 3 4 5 6 7 8 9 10`
    #>  $ dictionary: NULL
    #>  $ children  : list()
    ```
    
    <sup>Created on 2024-12-26 with [reprex
    v2.1.1](https://reprex.tidyverse.org)</sup>
---
 r/src/as_array.c                 | 42 +++++++++++++++++++++++++++++++++-------
 r/tests/testthat/test-as-array.R |  1 -
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/r/src/as_array.c b/r/src/as_array.c
index aeb4930d..bcafb5a3 100644
--- a/r/src/as_array.c
+++ b/r/src/as_array.c
@@ -48,10 +48,17 @@ static void call_as_nanoarrow_array(SEXP x_sexp, struct 
ArrowArray* array,
 
 static void as_array_int(SEXP x_sexp, struct ArrowArray* array, SEXP 
schema_xptr,
                          struct ArrowSchemaView* schema_view, struct 
ArrowError* error) {
-  // Only consider the default create for now
-  if (schema_view->type != NANOARROW_TYPE_INT32) {
-    call_as_nanoarrow_array(x_sexp, array, schema_xptr, 
"as_nanoarrow_array_from_c");
-    return;
+  // Consider integer -> numeric types that are easy to implement
+  switch (schema_view->type) {
+    case NANOARROW_TYPE_DOUBLE:
+    case NANOARROW_TYPE_FLOAT:
+    case NANOARROW_TYPE_HALF_FLOAT:
+    case NANOARROW_TYPE_INT64:
+    case NANOARROW_TYPE_INT32:
+      break;
+    default:
+      call_as_nanoarrow_array(x_sexp, array, schema_xptr, 
"as_nanoarrow_array_from_c");
+      return;
   }
 
   // We don't consider altrep for now: we need an array of int32_t, and while 
we
@@ -60,13 +67,33 @@ static void as_array_int(SEXP x_sexp, struct ArrowArray* 
array, SEXP schema_xptr
   int* x_data = INTEGER(x_sexp);
   int64_t len = Rf_xlength(x_sexp);
 
-  int result = ArrowArrayInitFromType(array, NANOARROW_TYPE_INT32);
+  int result = ArrowArrayInitFromType(array, schema_view->type);
   if (result != NANOARROW_OK) {
     Rf_error("ArrowArrayInitFromType() failed");
   }
 
-  // Borrow the data buffer
-  buffer_borrowed(ArrowArrayBuffer(array, 1), x_data, len * sizeof(int32_t), 
x_sexp);
+  if (schema_view->type == NANOARROW_TYPE_INT32) {
+    // Zero-copy create: just borrow the data buffer
+    buffer_borrowed(ArrowArrayBuffer(array, 1), x_data, len * sizeof(int32_t), 
x_sexp);
+  } else {
+    // Otherwise, use the integer appender
+    result = ArrowArrayStartAppending(array);
+    if (result != NANOARROW_OK) {
+      Rf_error("ArrowArrayStartAppending() failed");
+    }
+
+    result = ArrowArrayReserve(array, len);
+    if (result != NANOARROW_OK) {
+      Rf_error("ArrowArrayReserve() failed");
+    }
+
+    for (int64_t i = 0; i < len; i++) {
+      result = ArrowArrayAppendInt(array, x_data[i]);
+      if (result != NANOARROW_OK) {
+        Rf_error("ArrowArrayAppendInt() failed");
+      }
+    }
+  }
 
   // Set the array fields
   array->length = len;
@@ -102,6 +129,7 @@ static void as_array_int(SEXP x_sexp, struct ArrowArray* 
array, SEXP schema_xptr
   }
 
   array->null_count = null_count;
+
   result = ArrowArrayFinishBuildingDefault(array, error);
   if (result != NANOARROW_OK) {
     Rf_error("ArrowArrayFinishBuildingDefault(): %s", error->message);
diff --git a/r/tests/testthat/test-as-array.R b/r/tests/testthat/test-as-array.R
index 28950ba0..00e88a75 100644
--- a/r/tests/testthat/test-as-array.R
+++ b/r/tests/testthat/test-as-array.R
@@ -111,7 +111,6 @@ test_that("as_nanoarrow_array() works for integer() -> 
na_int32()", {
 })
 
 test_that("as_nanoarrow_array() works for integer -> na_int64()", {
-  skip_if_not_installed("arrow")
   casted <- as_nanoarrow_array(1:10, schema = na_int64())
   expect_identical(infer_nanoarrow_schema(casted)$format, "l")
   expect_identical(convert_array(casted), as.double(1:10))

Reply via email to