This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 1e6ca7e  feat: Add `enum ArrowType buffer_data_type` member to `struct 
ArrowLayout` (#207)
1e6ca7e is described below

commit 1e6ca7e2080ba0cab54ee6a23fd42df4dfb76e30
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue May 30 08:49:38 2023 -0400

    feat: Add `enum ArrowType buffer_data_type` member to `struct ArrowLayout` 
(#207)
    
    The existing `enum ArrowBufferType` is useful to switch on for some
    things and the `element_size_bits` is useful for calculating byte
    offsets; however, the combination of those is still not sufficient to do
    endian-swapping as each Arrow type has its own rules. This concept has
    also come up when printing buffers in the R package and when exporting
    in Python via the buffer protocol, both of which had their own
    workarounds. Attaching a reasonable `ArrowType` to each buffer should
    provide a generic route to solving all of those.
---
 src/nanoarrow/nanoarrow_types.h |  3 +++
 src/nanoarrow/schema_test.cc    | 45 +++++++++++++++++++++++++++++++++++++++++
 src/nanoarrow/utils.c           | 43 ++++++++++++++++++++++++++++++---------
 3 files changed, 82 insertions(+), 9 deletions(-)

diff --git a/src/nanoarrow/nanoarrow_types.h b/src/nanoarrow/nanoarrow_types.h
index 2fac404..79954db 100644
--- a/src/nanoarrow/nanoarrow_types.h
+++ b/src/nanoarrow/nanoarrow_types.h
@@ -554,6 +554,9 @@ struct ArrowLayout {
   /// \brief The function of each buffer
   enum ArrowBufferType buffer_type[3];
 
+  /// \brief The data type of each buffer
+  enum ArrowType buffer_data_type[3];
+
   /// \brief The size of an element each buffer or 0 if this size is variable 
or unknown
   int64_t element_size_bits[3];
 
diff --git a/src/nanoarrow/schema_test.cc b/src/nanoarrow/schema_test.cc
index 72ccb18..c7419e9 100644
--- a/src/nanoarrow/schema_test.cc
+++ b/src/nanoarrow/schema_test.cc
@@ -575,6 +575,9 @@ void ExpectSimpleTypeOk(std::shared_ptr<DataType> arrow_t, 
enum ArrowType nanoar
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
   EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], nanoarrow_t);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], 
NANOARROW_TYPE_UNINITIALIZED);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], bitwidth);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
@@ -638,6 +641,9 @@ TEST(SchemaViewTest, SchemaViewInitDecimal) {
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
   EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_DECIMAL128);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], 
NANOARROW_TYPE_UNINITIALIZED);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], 128);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
@@ -654,6 +660,9 @@ TEST(SchemaViewTest, SchemaViewInitDecimal) {
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
   EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_DECIMAL256);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], 
NANOARROW_TYPE_UNINITIALIZED);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], 256);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
@@ -721,6 +730,9 @@ TEST(SchemaViewTest, SchemaViewInitBinaryAndString) {
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
   EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_DATA);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_BINARY);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], 
NANOARROW_TYPE_UNINITIALIZED);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], 123 * 8);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
@@ -735,6 +747,9 @@ TEST(SchemaViewTest, SchemaViewInitBinaryAndString) {
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
   EXPECT_EQ(schema_view.layout.buffer_type[1], 
NANOARROW_BUFFER_TYPE_DATA_OFFSET);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_DATA);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_INT32);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], NANOARROW_TYPE_STRING);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], 32);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
@@ -748,6 +763,9 @@ TEST(SchemaViewTest, SchemaViewInitBinaryAndString) {
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
   EXPECT_EQ(schema_view.layout.buffer_type[1], 
NANOARROW_BUFFER_TYPE_DATA_OFFSET);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_DATA);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_INT32);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], NANOARROW_TYPE_BINARY);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], 32);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
@@ -761,6 +779,9 @@ TEST(SchemaViewTest, SchemaViewInitBinaryAndString) {
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
   EXPECT_EQ(schema_view.layout.buffer_type[1], 
NANOARROW_BUFFER_TYPE_DATA_OFFSET);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_DATA);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_INT64);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], NANOARROW_TYPE_BINARY);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], 64);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
@@ -774,6 +795,9 @@ TEST(SchemaViewTest, SchemaViewInitBinaryAndString) {
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
   EXPECT_EQ(schema_view.layout.buffer_type[1], 
NANOARROW_BUFFER_TYPE_DATA_OFFSET);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_DATA);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_INT64);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], NANOARROW_TYPE_STRING);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], 64);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
@@ -1037,6 +1061,9 @@ TEST(SchemaViewTest, SchemaViewInitNestedList) {
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
   EXPECT_EQ(schema_view.layout.buffer_type[1], 
NANOARROW_BUFFER_TYPE_DATA_OFFSET);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_INT32);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], 
NANOARROW_TYPE_UNINITIALIZED);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], 32);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
@@ -1050,6 +1077,9 @@ TEST(SchemaViewTest, SchemaViewInitNestedList) {
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
   EXPECT_EQ(schema_view.layout.buffer_type[1], 
NANOARROW_BUFFER_TYPE_DATA_OFFSET);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_INT64);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], 
NANOARROW_TYPE_UNINITIALIZED);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], 64);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
@@ -1063,6 +1093,9 @@ TEST(SchemaViewTest, SchemaViewInitNestedList) {
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
   EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_NONE);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], 
NANOARROW_TYPE_UNINITIALIZED);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], 
NANOARROW_TYPE_UNINITIALIZED);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], 0);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
@@ -1109,6 +1142,9 @@ TEST(SchemaViewTest, SchemaViewInitNestedStruct) {
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
   EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_NONE);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], 
NANOARROW_TYPE_UNINITIALIZED);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], 
NANOARROW_TYPE_UNINITIALIZED);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], 0);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
@@ -1158,6 +1194,9 @@ TEST(SchemaViewTest, SchemaViewInitNestedMap) {
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
   EXPECT_EQ(schema_view.layout.buffer_type[1], 
NANOARROW_BUFFER_TYPE_DATA_OFFSET);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_BOOL);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_INT32);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], 
NANOARROW_TYPE_UNINITIALIZED);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 1);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], 32);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
@@ -1238,6 +1277,9 @@ TEST(SchemaViewTest, SchemaViewInitNestedUnion) {
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_TYPE_ID);
   EXPECT_EQ(schema_view.layout.buffer_type[1], 
NANOARROW_BUFFER_TYPE_UNION_OFFSET);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_INT8);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], NANOARROW_TYPE_INT32);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], 
NANOARROW_TYPE_UNINITIALIZED);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 8);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], 32);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
@@ -1252,6 +1294,9 @@ TEST(SchemaViewTest, SchemaViewInitNestedUnion) {
   EXPECT_EQ(schema_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_TYPE_ID);
   EXPECT_EQ(schema_view.layout.buffer_type[1], NANOARROW_BUFFER_TYPE_NONE);
   EXPECT_EQ(schema_view.layout.buffer_type[2], NANOARROW_BUFFER_TYPE_NONE);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[0], NANOARROW_TYPE_INT8);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[1], 
NANOARROW_TYPE_UNINITIALIZED);
+  EXPECT_EQ(schema_view.layout.buffer_data_type[2], 
NANOARROW_TYPE_UNINITIALIZED);
   EXPECT_EQ(schema_view.layout.element_size_bits[0], 8);
   EXPECT_EQ(schema_view.layout.element_size_bits[1], 0);
   EXPECT_EQ(schema_view.layout.element_size_bits[2], 0);
diff --git a/src/nanoarrow/utils.c b/src/nanoarrow/utils.c
index b16fdb9..fb9effa 100644
--- a/src/nanoarrow/utils.c
+++ b/src/nanoarrow/utils.c
@@ -59,8 +59,11 @@ const char* ArrowErrorMessage(struct ArrowError* error) {
 
 void ArrowLayoutInit(struct ArrowLayout* layout, enum ArrowType storage_type) {
   layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_VALIDITY;
-  layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
+  layout->buffer_data_type[0] = NANOARROW_TYPE_BOOL;
+  layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+  layout->buffer_data_type[1] = storage_type;
   layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_NONE;
+  layout->buffer_data_type[2] = NANOARROW_TYPE_UNINITIALIZED;
 
   layout->element_size_bits[0] = 1;
   layout->element_size_bits[1] = 0;
@@ -72,43 +75,53 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum 
ArrowType storage_type) {
     case NANOARROW_TYPE_UNINITIALIZED:
     case NANOARROW_TYPE_NA:
       layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE;
+      layout->buffer_data_type[0] = NANOARROW_TYPE_UNINITIALIZED;
+      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
       layout->element_size_bits[0] = 0;
       break;
 
     case NANOARROW_TYPE_LIST:
     case NANOARROW_TYPE_MAP:
       layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
       layout->element_size_bits[1] = 32;
       break;
 
     case NANOARROW_TYPE_LARGE_LIST:
       layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
       layout->element_size_bits[1] = 64;
       break;
 
+    case NANOARROW_TYPE_STRUCT:
+    case NANOARROW_TYPE_FIXED_SIZE_LIST:
+      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
+      break;
+
     case NANOARROW_TYPE_BOOL:
-      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
       layout->element_size_bits[1] = 1;
       break;
 
     case NANOARROW_TYPE_UINT8:
     case NANOARROW_TYPE_INT8:
-      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
       layout->element_size_bits[1] = 8;
       break;
 
     case NANOARROW_TYPE_UINT16:
     case NANOARROW_TYPE_INT16:
     case NANOARROW_TYPE_HALF_FLOAT:
-      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
       layout->element_size_bits[1] = 16;
       break;
 
     case NANOARROW_TYPE_UINT32:
     case NANOARROW_TYPE_INT32:
     case NANOARROW_TYPE_FLOAT:
+      layout->element_size_bits[1] = 32;
+      break;
     case NANOARROW_TYPE_INTERVAL_MONTHS:
-      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
       layout->element_size_bits[1] = 32;
       break;
 
@@ -116,49 +129,61 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum 
ArrowType storage_type) {
     case NANOARROW_TYPE_INT64:
     case NANOARROW_TYPE_DOUBLE:
     case NANOARROW_TYPE_INTERVAL_DAY_TIME:
-      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
       layout->element_size_bits[1] = 64;
       break;
 
     case NANOARROW_TYPE_DECIMAL128:
     case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
-      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
       layout->element_size_bits[1] = 128;
       break;
 
     case NANOARROW_TYPE_DECIMAL256:
-      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
       layout->element_size_bits[1] = 256;
       break;
 
     case NANOARROW_TYPE_FIXED_SIZE_BINARY:
-      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_BINARY;
       break;
 
     case NANOARROW_TYPE_DENSE_UNION:
       layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
+      layout->buffer_data_type[0] = NANOARROW_TYPE_INT8;
       layout->element_size_bits[0] = 8;
       layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_UNION_OFFSET;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
       layout->element_size_bits[1] = 32;
       break;
 
     case NANOARROW_TYPE_SPARSE_UNION:
       layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_TYPE_ID;
+      layout->buffer_data_type[0] = NANOARROW_TYPE_INT8;
       layout->element_size_bits[0] = 8;
+      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_UNINITIALIZED;
       break;
 
     case NANOARROW_TYPE_STRING:
     case NANOARROW_TYPE_BINARY:
       layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_INT32;
       layout->element_size_bits[1] = 32;
       layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+      layout->buffer_data_type[2] = storage_type;
       break;
 
     case NANOARROW_TYPE_LARGE_STRING:
+      layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
+      layout->element_size_bits[1] = 64;
+      layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+      layout->buffer_data_type[2] = NANOARROW_TYPE_STRING;
+      break;
     case NANOARROW_TYPE_LARGE_BINARY:
       layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA_OFFSET;
+      layout->buffer_data_type[1] = NANOARROW_TYPE_INT64;
       layout->element_size_bits[1] = 64;
       layout->buffer_type[2] = NANOARROW_BUFFER_TYPE_DATA;
+      layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY;
       break;
 
     default:

Reply via email to