This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 5b98b3da feat(r): Implement string view support in R bindings (#636)
5b98b3da is described below

commit 5b98b3daf3561cae8167c49f0461e18b07a482ea
Author: Dewey Dunnington <[email protected]>
AuthorDate: Mon Sep 30 11:58:00 2024 -0500

    feat(r): Implement string view support in R bindings (#636)
    
    This PR adds support for string view and binary view types to the R
    bindings. As a side effect of this, conversion of character vectors to
    Arrow types is now simpler (just goes through nanoarrow C's array
    builder) and supports more types (e.g., the arrow package is no longer
    required to create large_string, large_binary, or fixed_size_binary).
    
    ``` r
    library(nanoarrow)
    
    long_strings <- rep(strrep(letters, 100), 100)
    
    (array <- as_nanoarrow_array(long_strings, schema = na_string_view()))
    #> <nanoarrow_array string_view[2600]>
    #>  $ length    : int 2600
    #>  $ null_count: int 0
    #>  $ offset    : int 0
    #>  $ buffers   :List of 11
    #>   ..$ :<nanoarrow_buffer validity<bool>[0][0 b]> ``
    #>   ..$ :<nanoarrow_buffer unknown<string_view>[2600][41600 b]>`
    #>   ..$ :<nanoarrow_buffer data<string>[32700 b]> 
`aaaaaaaaaaaaaaaaaaaaaaaaaaa...`
    #>   ..$ :<nanoarrow_buffer data<string>[32700 b]> 
`ppppppppppppppppppppppppppp...`
    #>   ..$ :<nanoarrow_buffer data<string>[32700 b]> 
`eeeeeeeeeeeeeeeeeeeeeeeeeee...`
    #>   ..$ :<nanoarrow_buffer data<string>[32700 b]> 
`ttttttttttttttttttttttttttt...`
    #>   ..$ :<nanoarrow_buffer data<string>[32700 b]> 
`iiiiiiiiiiiiiiiiiiiiiiiiiii...`
    #>   ..$ :<nanoarrow_buffer data<string>[32700 b]> 
`xxxxxxxxxxxxxxxxxxxxxxxxxxx...`
    #>   ..$ :<nanoarrow_buffer data<string>[32700 b]> 
`mmmmmmmmmmmmmmmmmmmmmmmmmmm...`
    #>   ..$ :<nanoarrow_buffer data<string>[31100 b]> 
`bbbbbbbbbbbbbbbbbbbbbbbbbbb...`
    #>   ..$ :<nanoarrow_buffer data<int64>[8][64 b]> `32700 32700 32700 32700 
3270...`
    #>  $ dictionary: NULL
    #>  $ children  : list()
    
    identical(convert_array(array), long_strings)
    #> [1] TRUE
    ```
    
    <sup>Created on 2024-09-27 with [reprex
    v2.1.1](https://reprex.tidyverse.org)</sup>
    
    ---------
    
    Co-authored-by: Bryce Mecum <[email protected]>
---
 r/NAMESPACE                           |   2 +
 r/R/buffer.R                          |  27 ++++--
 r/R/infer-ptype.R                     |   3 +-
 r/R/type.R                            |  17 +++-
 r/man/na_type.Rd                      |   6 ++
 r/src/array.c                         |  40 +++++++-
 r/src/as_array.c                      | 173 ++++++++++++----------------------
 r/src/buffer.c                        |   3 +
 r/src/infer_ptype.c                   |   1 +
 r/src/materialize_blob.h              |   2 +
 r/src/materialize_chr.h               |   1 +
 r/tests/testthat/_snaps/array.md      |  33 +++++++
 r/tests/testthat/test-array.R         |  11 +++
 r/tests/testthat/test-as-array.R      |  97 ++++++++++++++++++-
 r/tests/testthat/test-buffer.R        |   5 +
 r/tests/testthat/test-convert-array.R |  30 ++++++
 16 files changed, 318 insertions(+), 133 deletions(-)

diff --git a/r/NAMESPACE b/r/NAMESPACE
index d9fa3303..76234961 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -147,6 +147,7 @@ export(infer_nanoarrow_ptype)
 export(infer_nanoarrow_ptype_extension)
 export(infer_nanoarrow_schema)
 export(na_binary)
+export(na_binary_view)
 export(na_bool)
 export(na_date32)
 export(na_date64)
@@ -176,6 +177,7 @@ export(na_map)
 export(na_na)
 export(na_sparse_union)
 export(na_string)
+export(na_string_view)
 export(na_struct)
 export(na_time32)
 export(na_time64)
diff --git a/r/R/buffer.R b/r/R/buffer.R
index e8b28d8f..2267d742 100644
--- a/r/R/buffer.R
+++ b/r/R/buffer.R
@@ -64,7 +64,7 @@ as_nanoarrow_buffer.default <- function(x, ...) {
 
 #' @importFrom utils str
 #' @export
-str.nanoarrow_buffer <- function(object, ..., db = F, indent.str = "",
+str.nanoarrow_buffer <- function(object, ..., indent.str = "",
                                  width = getOption("width")) {
   formatted <- format(object)
   cat(formatted)
@@ -117,8 +117,10 @@ print.nanoarrow_buffer <- function(x, ...) {
 }
 
 #' @export
+
 format.nanoarrow_buffer <- function(x, ...) {
   info <- nanoarrow_buffer_info(x)
+  is_null <- identical(nanoarrow_pointer_addr_chr(info$data), "0")
   if (info$data_type == "unknown") {
     len <- ""
   } else if (info$element_size_bits == 0 || info$data_type %in% c("binary", 
"string")) {
@@ -128,14 +130,17 @@ format.nanoarrow_buffer <- function(x, ...) {
     len <- sprintf("[%s][%s b]", logical_length, info$size_bytes)
   }
 
-
-  sprintf(
-    "<%s %s<%s>%s>",
-    class(x)[1],
-    info$type,
-    info$data_type,
-    len
-  )
+  if (is_null) {
+    sprintf("<%s %s<%s>[null]", class(x)[1], info$type, info$data_type)
+  } else {
+    sprintf(
+      "<%s %s<%s>%s>",
+      class(x)[1],
+      info$type,
+      info$data_type,
+      len
+    )
+  }
 }
 
 #' Create and modify nanoarrow buffers
@@ -209,7 +214,7 @@ as_nanoarrow_array.nanoarrow_buffer <- function(x, ..., 
schema = NULL) {
         buffers = list(NULL, offsets, x)
       )
     )
-  } else if(data_type %in% c("string", "binary")) {
+  } else if (data_type %in% c("string", "binary")) {
     array <- nanoarrow_array_init(na_type(paste0("large_", data_type)))
     offsets <- as_nanoarrow_array(c(0, logical_length), schema = 
na_int64())$buffers[[2]]
     nanoarrow_array_modify(
@@ -220,6 +225,8 @@ as_nanoarrow_array.nanoarrow_buffer <- function(x, ..., 
schema = NULL) {
         buffers = list(NULL, offsets, x)
       )
     )
+  } else if (data_type %in% c("string_view", "binary_view")) {
+    stop("Can't convert buffer of type string_view or binary_view to array")
   } else {
     array <- nanoarrow_array_init(na_type(data_type))
     nanoarrow_array_modify(
diff --git a/r/R/infer-ptype.R b/r/R/infer-ptype.R
index ce3c7165..bc90cec5 100644
--- a/r/R/infer-ptype.R
+++ b/r/R/infer-ptype.R
@@ -76,7 +76,8 @@ infer_ptype_other <- function(schema) {
     parsed$type,
     "na" = vctrs::unspecified(),
     "binary" = ,
-    "large_binary" = new_blob_internal(),
+    "large_binary" = ,
+    "binary_view" = new_blob_internal(),
     "date32" = structure(numeric(), class = "Date"),
     "time32" = ,
     "time64" = hms::hms(),
diff --git a/r/R/type.R b/r/R/type.R
index 7348c210..77bc0ad8 100644
--- a/r/R/type.R
+++ b/r/R/type.R
@@ -186,6 +186,12 @@ na_large_string <- function(nullable = TRUE) {
   .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$LARGE_STRING, isTRUE(nullable))
 }
 
+#' @rdname na_type
+#' @export
+na_string_view <- function(nullable = TRUE) {
+  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$STRING_VIEW, isTRUE(nullable))
+}
+
 #' @rdname na_type
 #' @export
 na_binary <- function(nullable = TRUE) {
@@ -209,6 +215,12 @@ na_fixed_size_binary <- function(byte_width, nullable = 
TRUE) {
   )
 }
 
+#' @rdname na_type
+#' @export
+na_binary_view <- function(nullable = TRUE) {
+  .Call(nanoarrow_c_schema_init, NANOARROW_TYPE$BINARY_VIEW, isTRUE(nullable))
+}
+
 #' @rdname na_type
 #' @export
 na_date32 <- function(nullable = TRUE) {
@@ -460,7 +472,10 @@ NANOARROW_TYPE <- list(
   LARGE_STRING = 35L,
   LARGE_BINARY = 36L,
   LARGE_LIST = 37L,
-  INTERVAL_MONTH_DAY_NANO = 38L
+  INTERVAL_MONTH_DAY_NANO = 38L,
+  RUN_END_ENCODED = 39L,
+  BINARY_VIEW = 40L,
+  STRING_VIEW = 41L
 )
 
 ARROW_FLAG <- list(
diff --git a/r/man/na_type.Rd b/r/man/na_type.Rd
index a678cd21..ca58123f 100644
--- a/r/man/na_type.Rd
+++ b/r/man/na_type.Rd
@@ -17,9 +17,11 @@
 \alias{na_double}
 \alias{na_string}
 \alias{na_large_string}
+\alias{na_string_view}
 \alias{na_binary}
 \alias{na_large_binary}
 \alias{na_fixed_size_binary}
+\alias{na_binary_view}
 \alias{na_date32}
 \alias{na_date64}
 \alias{na_time32}
@@ -91,12 +93,16 @@ na_string(nullable = TRUE)
 
 na_large_string(nullable = TRUE)
 
+na_string_view(nullable = TRUE)
+
 na_binary(nullable = TRUE)
 
 na_large_binary(nullable = TRUE)
 
 na_fixed_size_binary(byte_width, nullable = TRUE)
 
+na_binary_view(nullable = TRUE)
+
 na_date32(nullable = TRUE)
 
 na_date64(nullable = TRUE)
diff --git a/r/src/array.c b/r/src/array.c
index 0b84cd5f..5ae53ddd 100644
--- a/r/src/array.c
+++ b/r/src/array.c
@@ -370,13 +370,43 @@ static SEXP borrow_buffer(struct ArrowArrayView* 
array_view, int64_t i, SEXP she
   SEXP buffer_class = PROTECT(Rf_allocVector(STRSXP, 2));
   SET_STRING_ELT(buffer_class, 1, Rf_mkChar("nanoarrow_buffer"));
 
+  struct ArrowBufferView view;
+  enum ArrowBufferType buffer_type;
+  enum ArrowType data_type;
+  int64_t element_size_bits;
+  if ((array_view->storage_type == NANOARROW_TYPE_STRING_VIEW ||
+       array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW) &&
+      i >= NANOARROW_BINARY_VIEW_FIXED_BUFFERS) {
+    view.data.data = array_view->array->buffers[i];
+
+    if (i == (array_view->n_variadic_buffers + 
NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) {
+      view.size_bytes = array_view->n_variadic_buffers * sizeof(int64_t);
+      buffer_type = NANOARROW_BUFFER_TYPE_DATA;
+      data_type = NANOARROW_TYPE_INT64;
+      element_size_bits = 64;
+    } else {
+      view.size_bytes =
+          array_view->variadic_buffer_sizes[i - 
NANOARROW_BINARY_VIEW_FIXED_BUFFERS];
+      buffer_type = NANOARROW_BUFFER_TYPE_DATA;
+
+      if (array_view->storage_type == NANOARROW_TYPE_STRING_VIEW) {
+        data_type = NANOARROW_TYPE_STRING;
+      } else {
+        data_type = NANOARROW_TYPE_BINARY;
+      }
+      element_size_bits = 0;
+    }
+  } else {
+    view = array_view->buffer_views[i];
+    buffer_type = array_view->layout.buffer_type[i];
+    data_type = array_view->layout.buffer_data_type[i];
+    element_size_bits = array_view->layout.element_size_bits[i];
+  }
+
   SEXP buffer_xptr =
-      PROTECT(buffer_borrowed_xptr(array_view->buffer_views[i].data.data,
-                                   array_view->buffer_views[i].size_bytes, 
shelter));
+      PROTECT(buffer_borrowed_xptr(view.data.data, view.size_bytes, shelter));
 
-  buffer_borrowed_xptr_set_type(buffer_xptr, array_view->layout.buffer_type[i],
-                                array_view->layout.buffer_data_type[i],
-                                array_view->layout.element_size_bits[i]);
+  buffer_borrowed_xptr_set_type(buffer_xptr, buffer_type, data_type, 
element_size_bits);
   UNPROTECT(2);
   return buffer_xptr;
 }
diff --git a/r/src/as_array.c b/r/src/as_array.c
index f6f3a98f..4825033d 100644
--- a/r/src/as_array.c
+++ b/r/src/as_array.c
@@ -301,82 +301,57 @@ static void as_array_dbl(SEXP x_sexp, struct ArrowArray* 
array, SEXP schema_xptr
 
 static void as_array_chr(SEXP x_sexp, struct ArrowArray* array, SEXP 
schema_xptr,
                          struct ArrowSchemaView* schema_view, struct 
ArrowError* error) {
-  // Only consider the default create for now
-  if (schema_view->type != NANOARROW_TYPE_STRING) {
-    call_as_nanoarrow_array(x_sexp, array, schema_xptr, 
"as_nanoarrow_array_from_c");
-    return;
+  switch (schema_view->type) {
+    case NANOARROW_TYPE_BINARY:
+    case NANOARROW_TYPE_STRING:
+    case NANOARROW_TYPE_LARGE_STRING:
+    case NANOARROW_TYPE_LARGE_BINARY:
+    case NANOARROW_TYPE_STRING_VIEW:
+    case NANOARROW_TYPE_BINARY_VIEW:
+      break;
+    default:
+      call_as_nanoarrow_array(x_sexp, array, schema_xptr, 
"as_nanoarrow_array_from_c");
+      return;
   }
 
   int64_t len = Rf_xlength(x_sexp);
 
-  int result = ArrowArrayInitFromType(array, NANOARROW_TYPE_STRING);
+  int result = ArrowArrayInitFromType(array, schema_view->type);
   if (result != NANOARROW_OK) {
     Rf_error("ArrowArrayInitFromType() failed");
   }
 
-  // Keep these buffers under the umbrella of the array so that we don't have
-  // to worry about cleaning them up if STRING_ELT jumps
-  struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1);
-  struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 2);
-
-  result = ArrowBufferReserve(offset_buffer, (len + 1) * sizeof(int32_t));
+  result = ArrowArrayStartAppending(array);
   if (result != NANOARROW_OK) {
-    Rf_error("ArrowBufferReserve() failed");
+    Rf_error("ArrowArrayStartAppending() failed");
   }
 
-  int64_t null_count = 0;
-  int32_t cumulative_len = 0;
-  ArrowBufferAppendUnsafe(offset_buffer, &cumulative_len, sizeof(int32_t));
-
+  struct ArrowStringView item_view;
   for (int64_t i = 0; i < len; i++) {
     SEXP item = STRING_ELT(x_sexp, i);
+
     if (item == NA_STRING) {
-      null_count++;
+      result = ArrowArrayAppendNull(array, 1);
+      if (result != NANOARROW_OK) {
+        Rf_error("ArrowArrayAppendString() failed");
+      }
     } else {
       const void* vmax = vmaxget();
-      const char* item_utf8 = Rf_translateCharUTF8(item);
-      int64_t item_size = strlen(item_utf8);
-      if ((item_size + cumulative_len) > INT_MAX) {
-        Rf_error("Use na_large_string() to convert character() with total size 
> 2GB");
-      }
-
-      int result = ArrowBufferAppend(data_buffer, item_utf8, item_size);
+      item_view.data = Rf_translateCharUTF8(item);
+      item_view.size_bytes = strlen(item_view.data);
+      result = ArrowArrayAppendString(array, item_view);
       if (result != NANOARROW_OK) {
-        Rf_error("ArrowBufferAppend() failed");
+        Rf_error("ArrowArrayAppendString() failed");
       }
-      cumulative_len += (int32_t)item_size;
 
       vmaxset(vmax);
     }
-
-    ArrowBufferAppendUnsafe(offset_buffer, &cumulative_len, sizeof(int32_t));
   }
 
-  // Set the array fields
-  array->length = len;
-  array->offset = 0;
-
-  // If there are nulls, pack the validity buffer
-  if (null_count > 0) {
-    struct ArrowBitmap bitmap;
-    ArrowBitmapInit(&bitmap);
-    result = ArrowBitmapReserve(&bitmap, len);
-    if (result != NANOARROW_OK) {
-      Rf_error("ArrowBitmapReserve() failed");
-    }
-
-    for (int64_t i = 0; i < len; i++) {
-      uint8_t is_valid = STRING_ELT(x_sexp, i) != NA_STRING;
-      ArrowBitmapAppendUnsafe(&bitmap, is_valid, 1);
-    }
-
-    ArrowArraySetValidityBitmap(array, &bitmap);
-  }
-
-  array->null_count = null_count;
   result = ArrowArrayFinishBuildingDefault(array, error);
   if (result != NANOARROW_OK) {
-    Rf_error("ArrowArrayFinishBuildingDefault(): %s", error->message);
+    Rf_error("ArrowArrayFinishBuildingDefault() failed with code %d: %s", 
result,
+             error->message);
   }
 }
 
@@ -428,83 +403,57 @@ static void as_array_data_frame(SEXP x_sexp, struct 
ArrowArray* array, SEXP sche
 
 static void as_array_list(SEXP x_sexp, struct ArrowArray* array, SEXP 
schema_xptr,
                           struct ArrowSchemaView* schema_view, struct 
ArrowError* error) {
-  // We handle list(raw()) in C but fall back to S3 for other types of list 
output.
-  // Arbitrary nested list support is complicated in C without some concept of 
a
-  // "builder", which we don't use.
-  if (schema_view->type != NANOARROW_TYPE_BINARY) {
-    call_as_nanoarrow_array(x_sexp, array, schema_xptr, 
"as_nanoarrow_array_from_c");
-    return;
-  }
-
-  int result = ArrowArrayInitFromType(array, schema_view->type);
-  if (result != NANOARROW_OK) {
-    Rf_error("ArrowArrayInitFromType() failed");
+  switch (schema_view->type) {
+    case NANOARROW_TYPE_BINARY:
+    case NANOARROW_TYPE_LARGE_BINARY:
+    case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+    case NANOARROW_TYPE_BINARY_VIEW:
+      break;
+    default:
+      call_as_nanoarrow_array(x_sexp, array, schema_xptr, 
"as_nanoarrow_array_from_c");
+      return;
   }
 
   int64_t len = Rf_xlength(x_sexp);
-  struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1);
-  struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 2);
 
-  result = ArrowBufferReserve(offset_buffer, (len + 1) * sizeof(int32_t));
+  // Use schema here to ensure we fixed-size binary byte width works
+  struct ArrowSchema* schema = nanoarrow_schema_from_xptr(schema_xptr);
+  int result = ArrowArrayInitFromSchema(array, schema, error);
   if (result != NANOARROW_OK) {
-    Rf_error("ArrowBufferReserve() failed");
+    Rf_error("ArrowArrayInitFromType() failed: %s", error->message);
   }
 
-  int64_t null_count = 0;
-  int32_t cumulative_len = 0;
-  ArrowBufferAppendUnsafe(offset_buffer, &cumulative_len, sizeof(int32_t));
+  result = ArrowArrayStartAppending(array);
+  if (result != NANOARROW_OK) {
+    Rf_error("ArrowArrayStartAppending() failed");
+  }
 
+  struct ArrowBufferView item_view;
   for (int64_t i = 0; i < len; i++) {
     SEXP item = VECTOR_ELT(x_sexp, i);
-    if (item == R_NilValue) {
-      ArrowBufferAppendUnsafe(offset_buffer, &cumulative_len, sizeof(int32_t));
-      null_count++;
-      continue;
-    }
 
-    if (Rf_isObject(item) || TYPEOF(item) != RAWSXP) {
-      Rf_error("All list items must be raw() or NULL in conversion to 
na_binary()");
-    }
-
-    int64_t item_size = Rf_xlength(item);
-    if ((item_size + cumulative_len) > INT_MAX) {
-      Rf_error("Use na_large_binary() to convert list(raw()) with total size > 
2GB");
-    }
-
-    result = ArrowBufferAppend(data_buffer, RAW(item), item_size);
-    if (result != NANOARROW_OK) {
-      Rf_error("ArrowBufferAppend() failed");
-    }
-
-    cumulative_len += (int32_t)item_size;
-    ArrowBufferAppendUnsafe(offset_buffer, &cumulative_len, sizeof(int32_t));
-  }
-
-  // Set the array fields
-  array->length = len;
-  array->offset = 0;
-
-  // If there are nulls, pack the validity buffer
-  if (null_count > 0) {
-    struct ArrowBitmap bitmap;
-    ArrowBitmapInit(&bitmap);
-    result = ArrowBitmapReserve(&bitmap, len);
-    if (result != NANOARROW_OK) {
-      Rf_error("ArrowBitmapReserve() failed");
-    }
-
-    for (int64_t i = 0; i < len; i++) {
-      uint8_t is_valid = VECTOR_ELT(x_sexp, i) != R_NilValue;
-      ArrowBitmapAppendUnsafe(&bitmap, is_valid, 1);
+    if (item == R_NilValue) {
+      result = ArrowArrayAppendNull(array, 1);
+      if (result != NANOARROW_OK) {
+        Rf_error("ArrowArrayAppendNull() failed");
+      }
+    } else if (TYPEOF(item) == RAWSXP) {
+      item_view.data.data = RAW(item);
+      item_view.size_bytes = Rf_xlength(item);
+      result = ArrowArrayAppendBytes(array, item_view);
+      if (result != NANOARROW_OK) {
+        Rf_error("ArrowArrayAppendBytes() failed");
+      }
+    } else {
+      Rf_error("All list items must be raw() or NULL in conversion to %s",
+               ArrowTypeString(schema_view->type));
     }
-
-    ArrowArraySetValidityBitmap(array, &bitmap);
   }
 
-  array->null_count = null_count;
   result = ArrowArrayFinishBuildingDefault(array, error);
   if (result != NANOARROW_OK) {
-    Rf_error("ArrowArrayFinishBuildingDefault(): %s", error->message);
+    Rf_error("ArrowArrayFinishBuildingDefault() failed with code %d: %s", 
result,
+             error->message);
   }
 }
 
diff --git a/r/src/buffer.c b/r/src/buffer.c
index 55e52228..20dd79dc 100644
--- a/r/src/buffer.c
+++ b/r/src/buffer.c
@@ -163,6 +163,9 @@ SEXP nanoarrow_c_buffer_info(SEXP buffer_xptr) {
       case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
         buffer_type_string = "union_offset";
         break;
+      case NANOARROW_BUFFER_TYPE_DATA_VIEW:
+        buffer_type_string = "data_view";
+        break;
       default:
         buffer_type_string = "unknown";
         break;
diff --git a/r/src/infer_ptype.c b/r/src/infer_ptype.c
index 1f5f8e04..2a352758 100644
--- a/r/src/infer_ptype.c
+++ b/r/src/infer_ptype.c
@@ -56,6 +56,7 @@ enum VectorType nanoarrow_infer_vector_type(enum ArrowType 
type) {
 
     case NANOARROW_TYPE_STRING:
     case NANOARROW_TYPE_LARGE_STRING:
+    case NANOARROW_TYPE_STRING_VIEW:
       return VECTOR_TYPE_CHR;
 
     case NANOARROW_TYPE_DENSE_UNION:
diff --git a/r/src/materialize_blob.h b/r/src/materialize_blob.h
index eb6fabaf..5114649f 100644
--- a/r/src/materialize_blob.h
+++ b/r/src/materialize_blob.h
@@ -33,6 +33,8 @@ static inline int nanoarrow_materialize_blob(struct 
ArrayViewSlice* src,
     case NANOARROW_TYPE_LARGE_STRING:
     case NANOARROW_TYPE_BINARY:
     case NANOARROW_TYPE_LARGE_BINARY:
+    case NANOARROW_TYPE_STRING_VIEW:
+    case NANOARROW_TYPE_BINARY_VIEW:
       break;
     default:
       return ENOTSUP;
diff --git a/r/src/materialize_chr.h b/r/src/materialize_chr.h
index 2db422b5..6fe857bd 100644
--- a/r/src/materialize_chr.h
+++ b/r/src/materialize_chr.h
@@ -66,6 +66,7 @@ static inline int nanoarrow_materialize_chr(struct 
RConverter* converter) {
 
     case NANOARROW_TYPE_STRING:
     case NANOARROW_TYPE_LARGE_STRING:
+    case NANOARROW_TYPE_STRING_VIEW:
       break;
 
     default:
diff --git a/r/tests/testthat/_snaps/array.md b/r/tests/testthat/_snaps/array.md
new file mode 100644
index 00000000..9af9870b
--- /dev/null
+++ b/r/tests/testthat/_snaps/array.md
@@ -0,0 +1,33 @@
+# string/binary view nanoarrow_array buffers print correctly
+
+    Code
+      print(view_array_all_inlined)
+    Output
+      <nanoarrow_array string_view[26]>
+       $ length    : int 26
+       $ null_count: int 0
+       $ offset    : int 0
+       $ buffers   :List of 3
+        ..$ :<nanoarrow_buffer validity<bool>[null] ``
+        ..$ :<nanoarrow_buffer data_view<string_view>[26][416 b]>`
+        ..$ :<nanoarrow_buffer data<int64>[null] ``
+       $ dictionary: NULL
+       $ children  : list()
+
+---
+
+    Code
+      print(view_array_not_all_inlined)
+    Output
+      <nanoarrow_array string_view[1]>
+       $ length    : int 1
+       $ null_count: int 0
+       $ offset    : int 0
+       $ buffers   :List of 4
+        ..$ :<nanoarrow_buffer validity<bool>[null] ``
+        ..$ :<nanoarrow_buffer data_view<string_view>[1][16 b]>`
+        ..$ :<nanoarrow_buffer data<string>[35 b]> `this string is longer than 
12 ...`
+        ..$ :<nanoarrow_buffer data<int64>[1][8 b]> `35`
+       $ dictionary: NULL
+       $ children  : list()
+
diff --git a/r/tests/testthat/test-array.R b/r/tests/testthat/test-array.R
index 9bbeac31..1e6f06e1 100644
--- a/r/tests/testthat/test-array.R
+++ b/r/tests/testthat/test-array.R
@@ -37,6 +37,17 @@ test_that("schemaless nanoarrow_array format, print, and str 
methods work", {
   expect_output(expect_identical(print(array), array), "nanoarrow_array")
 })
 
+test_that("string/binary view nanoarrow_array buffers print correctly", {
+  view_array_all_inlined <- as_nanoarrow_array(letters, schema = 
na_string_view())
+  expect_snapshot(print(view_array_all_inlined))
+
+  view_array_not_all_inlined <- as_nanoarrow_array(
+    "this string is longer than 12 bytes",
+    schema = na_string_view()
+  )
+  expect_snapshot(print(view_array_not_all_inlined))
+})
+
 test_that("as_nanoarrow_array() / convert_array() default method works", {
   array <- as_nanoarrow_array(1:10)
   expect_identical(convert_array(array), 1:10)
diff --git a/r/tests/testthat/test-as-array.R b/r/tests/testthat/test-as-array.R
index fc9417aa..ed65ecd1 100644
--- a/r/tests/testthat/test-as-array.R
+++ b/r/tests/testthat/test-as-array.R
@@ -295,8 +295,6 @@ test_that("as_nanoarrow_array() works for character() -> 
na_string()", {
 })
 
 test_that("as_nanoarrow_array() works for character() -> na_large_string()", {
-  skip_if_not_installed("arrow")
-
   # Without nulls
   array <- as_nanoarrow_array(letters, schema = na_large_string())
   expect_identical(infer_nanoarrow_schema(array)$format, "U")
@@ -322,6 +320,37 @@ test_that("as_nanoarrow_array() works for character() -> 
na_large_string()", {
   )
 })
 
+test_that("as_nanoarrow_array() works for character() -> na_string_view()", {
+  # Without nulls
+  array <- as_nanoarrow_array(letters, schema = na_string_view())
+  expect_identical(infer_nanoarrow_schema(array)$format, "vu")
+  expect_identical(as.raw(array$buffers[[1]]), raw())
+  expect_identical(array$offset, 0L)
+  expect_identical(array$null_count, 0L)
+  # All these strings are shorter than four characters and thus are all inlined
+  expect_identical(length(array$buffers), 3L)
+  expect_identical(as.vector(array$buffers[[3]]), double())
+
+  # With nulls
+  array <- as_nanoarrow_array(c(letters, NA), schema = na_string_view())
+  expect_identical(infer_nanoarrow_schema(array)$format, "vu")
+  expect_identical(array$null_count, 1L)
+  expect_identical(
+    as.raw(array$buffers[[1]]),
+    packBits(c(rep(TRUE, 26), FALSE, rep(FALSE, 5)))
+  )
+  # All these strings are shorter than four characters and thus are all inlined
+  expect_identical(length(array$buffers), 3L)
+  expect_identical(as.vector(array$buffers[[3]]), double())
+
+  # With non-inlinable strings
+  item <- "this string is longer than 12 bytes"
+  array <- as_nanoarrow_array(item, schema = na_string_view())
+  expect_identical(length(array$buffers), 4L)
+  expect_identical(as.raw(array$buffers[[3]]), charToRaw(item))
+  expect_identical(as.vector(array$buffers[[4]]), as.double(nchar(item)))
+})
+
 test_that("as_nanoarrow_array() works for factor() -> na_dictionary()", {
   array <- as_nanoarrow_array(
     factor(letters),
@@ -504,9 +533,36 @@ test_that("as_nanoarrow_array() works for blob::blob() -> 
na_binary()", {
   )
 })
 
-test_that("as_nanoarrow_array() works for blob::blob() -> na_large_binary()", {
-  skip_if_not_installed("arrow")
+test_that("as_nanoarrow_array() works for blob::blob() -> 
na_fixed_size_binary()", {
+  # Without nulls
+  array <- as_nanoarrow_array(blob::as_blob(letters), schema = 
na_fixed_size_binary(1))
+  expect_identical(infer_nanoarrow_schema(array)$format, "w:1")
+  expect_identical(as.raw(array$buffers[[1]]), raw())
+  expect_identical(array$offset, 0L)
+  expect_identical(array$null_count, 0L)
+  expect_identical(
+    as.raw(array$buffers[[2]]),
+    as.raw(as_nanoarrow_buffer(paste(letters, collapse = "")))
+  )
 
+  # With nulls
+  array <- as_nanoarrow_array(
+    blob::as_blob(c(letters, NA)),
+    schema = na_fixed_size_binary(1)
+  )
+  expect_identical(infer_nanoarrow_schema(array)$format, "w:1")
+  expect_identical(array$null_count, 1L)
+  expect_identical(
+    as.raw(array$buffers[[1]]),
+    packBits(c(rep(TRUE, 26), FALSE, rep(FALSE, 5)))
+  )
+  expect_identical(
+    as.raw(array$buffers[[2]]),
+    c(as.raw(as_nanoarrow_buffer(paste(letters, collapse = ""))), as.raw(0x00))
+  )
+})
+
+test_that("as_nanoarrow_array() works for blob::blob() -> na_large_binary()", {
   # Without nulls
   array <- as_nanoarrow_array(blob::as_blob(letters), schema = 
na_large_binary())
   expect_identical(infer_nanoarrow_schema(array)$format, "Z")
@@ -535,6 +591,39 @@ test_that("as_nanoarrow_array() works for blob::blob() -> 
na_large_binary()", {
   )
 })
 
+test_that("as_nanoarrow_array() works for blob::blob() -> na_binary_view()", {
+  # Without nulls
+  array <- as_nanoarrow_array(blob::as_blob(letters), schema = 
na_binary_view())
+  expect_identical(infer_nanoarrow_schema(array)$format, "vz")
+  expect_identical(as.raw(array$buffers[[1]]), raw())
+  expect_identical(array$offset, 0L)
+  expect_identical(array$null_count, 0L)
+  # All these strings are shorter than four characters and thus are all inlined
+  expect_identical(length(array$buffers), 3L)
+  expect_identical(as.vector(array$buffers[[3]]), double())
+
+  # With nulls
+  array <- as_nanoarrow_array(
+    blob::as_blob(c(letters, NA)),
+    schema = na_binary_view()
+  )
+  expect_identical(infer_nanoarrow_schema(array)$format, "vz")
+  expect_identical(array$null_count, 1L)
+  expect_identical(
+    as.raw(array$buffers[[1]]),
+    packBits(c(rep(TRUE, 26), FALSE, rep(FALSE, 5)))
+  )
+  # All these strings are shorter than four characters and thus are all inlined
+  expect_identical(length(array$buffers), 3L)
+  expect_identical(as.vector(array$buffers[[3]]), double())
+
+  # With non-inlinable strings
+  item <- list(charToRaw("this string is longer than 12 bytes"))
+  array <- as_nanoarrow_array(item, schema = na_binary_view())
+  expect_identical(length(array$buffers), 4L)
+  expect_identical(as.raw(array$buffers[[3]]), item[[1]])
+  expect_identical(as.vector(array$buffers[[4]]), as.double(length(item[[1]])))
+})
 
 test_that("as_nanoarrow_array() works for list(raw()) -> na_binary()", {
   # Without nulls
diff --git a/r/tests/testthat/test-buffer.R b/r/tests/testthat/test-buffer.R
index 031218b7..400bce91 100644
--- a/r/tests/testthat/test-buffer.R
+++ b/r/tests/testthat/test-buffer.R
@@ -61,6 +61,11 @@ test_that("buffers can be printed", {
   expect_snapshot(str(array$buffers[[2]]))
 })
 
+test_that("buffers whose pointer is NULL print as such", {
+  empty_buffer <- as_nanoarrow_buffer(logical())
+  expect_match(format(empty_buffer), "data<int32>[null]", fixed = TRUE)
+})
+
 test_that("as_nanoarrow_buffer() errors for unsupported types", {
   expect_error(
     as_nanoarrow_buffer(NA_character_),
diff --git a/r/tests/testthat/test-convert-array.R 
b/r/tests/testthat/test-convert-array.R
index 17d192fd..bf7f45c9 100644
--- a/r/tests/testthat/test-convert-array.R
+++ b/r/tests/testthat/test-convert-array.R
@@ -781,6 +781,20 @@ test_that("convert to vector works for character()", {
   )
 })
 
+test_that("convert to vector works for string_view -> character()", {
+  array <- as_nanoarrow_array(letters, schema = na_string_view())
+  expect_identical(
+    convert_array(array, character()),
+    letters
+  )
+
+  array_with_nulls <- as_nanoarrow_array(c(letters, NA), schema = 
na_string_view())
+  expect_identical(
+    convert_array(array_with_nulls, character()),
+    c(letters, NA)
+  )
+})
+
 test_that("convert to vector works for null -> character()", {
   array <- nanoarrow_array_init(na_na())
   array$length <- 10
@@ -893,6 +907,22 @@ test_that("convert to vector works for blob::blob()", {
   )
 })
 
+test_that("convert to vector works for binary_view -> blob::blob()", {
+  skip_if_not_installed("blob")
+
+  array <- as_nanoarrow_array(letters, schema = na_binary_view())
+  expect_identical(
+    convert_array(array, blob::blob()),
+    blob::as_blob(lapply(letters, charToRaw))
+  )
+
+  array_with_nulls <- as_nanoarrow_array(c(letters, NA), schema = 
na_binary_view())
+  expect_identical(
+    convert_array(array_with_nulls, blob::blob()),
+    blob::as_blob(c(lapply(letters, charToRaw), list(NULL)))
+  )
+})
+
 test_that("convert to vector works for null -> blob::blob()", {
   array <- nanoarrow_array_init(na_na())
   array$length <- 10

Reply via email to