This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 07880fe Implement array appenders (#16)
07880fe is described below
commit 07880feff2117c05bb1189f605a93f42d91bc744
Author: Dewey Dunnington <[email protected]>
AuthorDate: Thu Aug 18 12:22:57 2022 -0300
Implement array appenders (#16)
* squash!
* maybe fix gtest build
* try setting the linker path
* ldconfig everywhere?
* fix bad merges
* better name/implementation for start appending
* add ArrowArrayInitFromSchema
* actually test appending fixed size binary
* document appenders, make sure logic is going to work for nested types
* fix some things
* with passing tests for nested appends
* fix error in tests
* add generic reserve
* with more integer appenders
* sketch uint appenders
* append to uint arrays
* more signed/unsigned appends
* use NANOARROW_RETURN_NOT_OK() in array_inline.h
* use NANOARROW_RETURN_NOT_OK() in array.c
* test float/double arrays
* sketch initial boolean support
* actually support bool columns
* fix array test
* make shrink to fit its own function
* give an error message and check lengths that depend on offsets
* add error message to setarray
* tiny rabbit hole making sure list lengths get checked
---
.github/workflows/build-and-test.yaml | 23 +-
CMakeLists.txt | 2 +-
src/nanoarrow/array.c | 248 +++++++++++--
src/nanoarrow/array_inline.h | 400 ++++++++++++++++++++-
src/nanoarrow/array_test.cc | 633 +++++++++++++++++++++++++++++++++-
src/nanoarrow/array_view.c | 80 ++++-
src/nanoarrow/array_view_test.cc | 85 ++++-
src/nanoarrow/buffer_inline.h | 12 +
src/nanoarrow/buffer_test.cc | 23 ++
src/nanoarrow/error.c | 4 +
src/nanoarrow/nanoarrow.h | 102 +++++-
src/nanoarrow/typedefs_inline.h | 3 +
src/nanoarrow/utils_inline.h | 16 +
13 files changed, 1545 insertions(+), 86 deletions(-)
diff --git a/.github/workflows/build-and-test.yaml
b/.github/workflows/build-and-test.yaml
index a2ad105..a0a6023 100644
--- a/.github/workflows/build-and-test.yaml
+++ b/.github/workflows/build-and-test.yaml
@@ -41,7 +41,7 @@ jobs:
uses: actions/cache@v3
with:
path: build-deps
- key: ${{ runner.os }}-3
+ key: ${{ runner.os }}-5
- name: Init build dir
if: steps.cache-deps-build.outputs.cache-hit != 'true'
@@ -62,10 +62,15 @@ jobs:
if: steps.cache-deps-build.outputs.cache-hit != 'true'
run: |
cd build-deps/googletest
- cmake .
+ cmake . -DCMAKE_CXX_FLAGS=-fPIC
cmake --build .
cmake --install . --prefix ../../dist
+ - name: Install googletest
+ run: |
+ cd build-deps/googletest
+ cmake --install . --prefix ../../dist
+
- name: Fetch Arrow
if: steps.cache-deps-build.outputs.cache-hit != 'true'
uses: actions/checkout@v3
@@ -80,19 +85,19 @@ jobs:
run: |
mkdir build-deps/arrow-build
cd build-deps/arrow-build
- cmake ../arrow/cpp -DARROW_JSON=ON -DARROW_TESTING=ON
-DBoost_SOURCE=BUNDLED
+ cmake ../arrow/cpp -DARROW_JSON=ON -DARROW_TESTING=ON
-DBoost_SOURCE=BUNDLED -DGTest_DIR=`pwd`/../../dist/lib/cmake/GTest
cmake --build .
cmake --install . --prefix ../../dist
- - name: Install Dependencies
+ - name: Install arrow
run: |
cd build-deps/arrow-build
cmake --install . --prefix ../../dist
- cd ../googletest
- cmake --install . --prefix ../../dist
- name: Build nanoarrow
run: |
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
+ sudo ldconfig
mkdir build
cd build
cmake .. -DCMAKE_BUILD_TYPE=Debug
-DGTest_DIR=`pwd`/../dist/lib/cmake/GTest
-DArrow_DIR=`pwd`/../dist/lib/cmake/arrow
-DArrowTesting_DIR=`pwd`/../dist/lib/cmake/arrow -DNANOARROW_CODE_COVERAGE=ON
-DNANOARROW_BUILD_TESTS=ON
@@ -100,13 +105,17 @@ jobs:
- name: Run tests
run: |
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
+ sudo ldconfig
cd build
ctest -T test --output-on-failure .
- name: Run tests with valgrind
run: |
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`pwd`/dist/lib
+ sudo ldconfig
cd build
- ctest -T memcheck .
+ ctest -T memcheck .
- name: Calculate coverage
run: |
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 95383cc..6121b05 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -78,7 +78,7 @@ if (NANOARROW_BUILD_TESTS)
target_link_libraries(allocator_test nanoarrow GTest::gtest_main
arrow_shared arrow_testing_shared)
target_link_libraries(buffer_test nanoarrow GTest::gtest_main)
target_link_libraries(bitmap_test nanoarrow GTest::gtest_main)
- target_link_libraries(array_test nanoarrow GTest::gtest_main)
+ target_link_libraries(array_test nanoarrow GTest::gtest_main arrow_shared
arrow_testing_shared)
target_link_libraries(array_view_test nanoarrow GTest::gtest_main)
target_link_libraries(error_test nanoarrow GTest::gtest_main)
target_link_libraries(metadata_test nanoarrow GTest::gtest_main
arrow_shared arrow_testing_shared)
diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c
index 460cfd1..e2a5c46 100644
--- a/src/nanoarrow/array.c
+++ b/src/nanoarrow/array.c
@@ -23,12 +23,13 @@
static void ArrowArrayRelease(struct ArrowArray* array) {
// Release buffers held by this array
- struct ArrowArrayPrivateData* data = (struct
ArrowArrayPrivateData*)array->private_data;
- if (data != NULL) {
- ArrowBitmapReset(&data->bitmap);
- ArrowBufferReset(&data->buffers[0]);
- ArrowBufferReset(&data->buffers[1]);
- ArrowFree(data);
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ if (private_data != NULL) {
+ ArrowBitmapReset(&private_data->bitmap);
+ ArrowBufferReset(&private_data->buffers[0]);
+ ArrowBufferReset(&private_data->buffers[1]);
+ ArrowFree(private_data);
}
// This object owns the memory for all the children, but those
@@ -71,8 +72,6 @@ ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray*
array,
array->n_buffers = 0;
break;
- case NANOARROW_TYPE_LIST:
- case NANOARROW_TYPE_LARGE_LIST:
case NANOARROW_TYPE_FIXED_SIZE_LIST:
case NANOARROW_TYPE_STRUCT:
case NANOARROW_TYPE_MAP:
@@ -80,6 +79,8 @@ ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray*
array,
array->n_buffers = 1;
break;
+ case NANOARROW_TYPE_LIST:
+ case NANOARROW_TYPE_LARGE_LIST:
case NANOARROW_TYPE_BOOL:
case NANOARROW_TYPE_UINT8:
case NANOARROW_TYPE_INT8:
@@ -92,6 +93,8 @@ ArrowErrorCode ArrowArraySetStorageType(struct ArrowArray*
array,
case NANOARROW_TYPE_HALF_FLOAT:
case NANOARROW_TYPE_FLOAT:
case NANOARROW_TYPE_DOUBLE:
+ case NANOARROW_TYPE_DECIMAL128:
+ case NANOARROW_TYPE_DECIMAL256:
case NANOARROW_TYPE_INTERVAL_MONTHS:
case NANOARROW_TYPE_INTERVAL_DAY_TIME:
case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
@@ -109,16 +112,19 @@ ArrowErrorCode ArrowArraySetStorageType(struct
ArrowArray* array,
default:
return EINVAL;
+
+ return NANOARROW_OK;
}
- struct ArrowArrayPrivateData* data = (struct
ArrowArrayPrivateData*)array->private_data;
- data->storage_type = storage_type;
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ private_data->storage_type = storage_type;
return NANOARROW_OK;
}
ArrowErrorCode ArrowArrayInit(struct ArrowArray* array, enum ArrowType
storage_type) {
array->length = 0;
- array->null_count = -1;
+ array->null_count = 0;
array->offset = 0;
array->n_buffers = 0;
array->n_children = 0;
@@ -128,22 +134,22 @@ ArrowErrorCode ArrowArrayInit(struct ArrowArray* array,
enum ArrowType storage_t
array->release = &ArrowArrayRelease;
array->private_data = NULL;
- struct ArrowArrayPrivateData* data =
+ struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)ArrowMalloc(sizeof(struct
ArrowArrayPrivateData));
- if (data == NULL) {
+ if (private_data == NULL) {
array->release = NULL;
return ENOMEM;
}
- ArrowBitmapInit(&data->bitmap);
- ArrowBufferInit(&data->buffers[0]);
- ArrowBufferInit(&data->buffers[1]);
- data->buffer_data[0] = NULL;
- data->buffer_data[1] = NULL;
- data->buffer_data[2] = NULL;
+ ArrowBitmapInit(&private_data->bitmap);
+ ArrowBufferInit(&private_data->buffers[0]);
+ ArrowBufferInit(&private_data->buffers[1]);
+ private_data->buffer_data[0] = NULL;
+ private_data->buffer_data[1] = NULL;
+ private_data->buffer_data[2] = NULL;
- array->private_data = data;
- array->buffers = (const void**)(&data->buffer_data);
+ array->private_data = private_data;
+ array->buffers = (const void**)(&private_data->buffer_data);
int result = ArrowArraySetStorageType(array, storage_type);
if (result != NANOARROW_OK) {
@@ -151,6 +157,44 @@ ArrowErrorCode ArrowArrayInit(struct ArrowArray* array,
enum ArrowType storage_t
return result;
}
+ ArrowLayoutInit(&private_data->layout, storage_type);
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array,
+ struct ArrowArrayView*
array_view,
+ struct ArrowError* error) {
+ ArrowArrayInit(array, array_view->storage_type);
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ int result = ArrowArrayAllocateChildren(array, array_view->n_children);
+ if (result != NANOARROW_OK) {
+ array->release(array);
+ return result;
+ }
+
+ private_data->layout = array_view->layout;
+
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ int result =
+ ArrowArrayInitFromArrayView(array->children[i],
array_view->children[i], error);
+ if (result != NANOARROW_OK) {
+ array->release(array);
+ return result;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array,
+ struct ArrowSchema* schema,
+ struct ArrowError* error) {
+ struct ArrowArrayView array_view;
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromSchema(&array_view, schema,
error));
+ NANOARROW_RETURN_NOT_OK(ArrowArrayInitFromArrayView(array, &array_view,
error));
+ ArrowArrayViewReset(&array_view);
return NANOARROW_OK;
}
@@ -200,26 +244,29 @@ ArrowErrorCode ArrowArrayAllocateDictionary(struct
ArrowArray* array) {
}
void ArrowArraySetValidityBitmap(struct ArrowArray* array, struct ArrowBitmap*
bitmap) {
- struct ArrowArrayPrivateData* data = (struct
ArrowArrayPrivateData*)array->private_data;
- ArrowBufferMove(&bitmap->buffer, &data->bitmap.buffer);
- data->bitmap.size_bits = bitmap->size_bits;
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ ArrowBufferMove(&bitmap->buffer, &private_data->bitmap.buffer);
+ private_data->bitmap.size_bits = bitmap->size_bits;
bitmap->size_bits = 0;
- data->buffer_data[0] = data->bitmap.buffer.data;
+ private_data->buffer_data[0] = private_data->bitmap.buffer.data;
+ array->null_count = -1;
}
ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray* array, int64_t i,
struct ArrowBuffer* buffer) {
- struct ArrowArrayPrivateData* data = (struct
ArrowArrayPrivateData*)array->private_data;
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
switch (i) {
case 0:
- ArrowBufferMove(buffer, &data->bitmap.buffer);
- data->buffer_data[i] = data->bitmap.buffer.data;
+ ArrowBufferMove(buffer, &private_data->bitmap.buffer);
+ private_data->buffer_data[i] = private_data->bitmap.buffer.data;
break;
case 1:
case 2:
- ArrowBufferMove(buffer, &data->buffers[i - 1]);
- data->buffer_data[i] = data->buffers[i - 1].data;
+ ArrowBufferMove(buffer, &private_data->buffers[i - 1]);
+ private_data->buffer_data[i] = private_data->buffers[i - 1].data;
break;
default:
return EINVAL;
@@ -227,3 +274,144 @@ ArrowErrorCode ArrowArraySetBuffer(struct ArrowArray*
array, int64_t i,
return NANOARROW_OK;
}
+
+static ArrowErrorCode ArrowArrayViewInitFromArray(struct ArrowArrayView*
array_view,
+ struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ ArrowArrayViewInit(array_view, private_data->storage_type);
+ array_view->layout = private_data->layout;
+
+ int result = ArrowArrayViewAllocateChildren(array_view, array->n_children);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+
+ for (int64_t i = 0; i < array->n_children; i++) {
+ result = ArrowArrayViewInitFromArray(array_view->children[i],
array->children[i]);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(array_view);
+ return result;
+ }
+ }
+
+ return NANOARROW_OK;
+}
+
+static ArrowErrorCode ArrowArrayReserveInternal(struct ArrowArray* array,
+ struct ArrowArrayView*
array_view) {
+ // Loop through buffers and reserve the extra space that we know about
+ for (int64_t i = 0; i < array->n_buffers; i++) {
+ // Don't reserve on a validity buffer that hasn't been allocated yet
+ if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
+ ArrowArrayBuffer(array, i)->data == NULL) {
+ continue;
+ }
+
+ int64_t additional_size_bytes =
+ array_view->buffer_views[i].n_bytes - ArrowArrayBuffer(array,
i)->size_bytes;
+
+ if (additional_size_bytes > 0) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferReserve(ArrowArrayBuffer(array, i),
additional_size_bytes));
+ }
+ }
+
+ // Recursively reserve children
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayReserveInternal(array->children[i],
array_view->children[i]));
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array,
+ int64_t additional_size_elements) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowArrayView array_view;
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromArray(&array_view, array));
+
+ // Calculate theoretical buffer sizes (recursively)
+ ArrowArrayViewSetLength(&array_view, array->length +
additional_size_elements);
+
+ // Walk the structure (recursively)
+ int result = ArrowArrayReserveInternal(array, &array_view);
+ ArrowArrayViewReset(&array_view);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static void ArrowArrayFlushInternalPointers(struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ for (int64_t i = 0; i < 3; i++) {
+ private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
+ }
+
+ for (int64_t i = 0; i < array->n_children; i++) {
+ ArrowArrayFlushInternalPointers(array->children[i]);
+ }
+}
+
+static ArrowErrorCode ArrowArrayCheckInternalBufferSizes(
+ struct ArrowArray* array, struct ArrowArrayView* array_view,
+ struct ArrowError* error) {
+ for (int64_t i = 0; i < array->n_buffers; i++) {
+ if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_VALIDITY &&
+ array->null_count == 0 && array->buffers[i] == NULL) {
+ continue;
+ }
+
+ int64_t expected_size = array_view->buffer_views[i].n_bytes;
+ int64_t actual_size = ArrowArrayBuffer(array, i)->size_bytes;
+
+ if (actual_size < expected_size) {
+ ArrowErrorSet(
+ error,
+ "Expected buffer %d to size >= %ld bytes but found buffer with %ld
bytes", i,
+ (long)expected_size, (long)actual_size);
+ return EINVAL;
+ }
+ }
+
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayCheckInternalBufferSizes(
+ array->children[i], array_view->children[i], error));
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
+ struct ArrowError* error) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ // Make sure the value we get with array->buffers[i] is set to the actual
+ // pointer (which may have changed from the original due to reallocation)
+ ArrowArrayFlushInternalPointers(array);
+
+ // Check buffer sizes to make sure we are not sending an ArrowArray
+ // into the wild that is going to segfault
+ struct ArrowArrayView array_view;
+
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromArray(&array_view, array));
+ int result = ArrowArrayViewSetArray(&array_view, array, error);
+ if (result != NANOARROW_OK) {
+ ArrowArrayViewReset(&array_view);
+ return result;
+ }
+
+ result = ArrowArrayCheckInternalBufferSizes(array, &array_view, error);
+ ArrowArrayViewReset(&array_view);
+ return result;
+}
diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
index f8bb4d7..14335a8 100644
--- a/src/nanoarrow/array_inline.h
+++ b/src/nanoarrow/array_inline.h
@@ -19,12 +19,15 @@
#define NANOARROW_ARRAY_INLINE_H_INCLUDED
#include <errno.h>
+#include <float.h>
+#include <limits.h>
#include <stdint.h>
#include <string.h>
#include "bitmap_inline.h"
#include "buffer_inline.h"
#include "typedefs_inline.h"
+#include "utils_inline.h"
#ifdef __cplusplus
extern "C" {
@@ -47,33 +50,398 @@ static inline struct ArrowBuffer* ArrowArrayBuffer(struct
ArrowArray* array, int
}
}
-static inline ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
- char shrink_to_fit) {
+static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray*
array) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
- // Make sure the value we get with array->buffers[i] is set to the actual
- // pointer (which may have changed from the original due to reallocation)
- int result;
- for (int64_t i = 0; i < 3; i++) {
- struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
- if (shrink_to_fit) {
- result = ArrowBufferResize(buffer, buffer->size_bytes, shrink_to_fit);
- if (result != NANOARROW_OK) {
- return result;
- }
+ if (private_data->storage_type == NANOARROW_TYPE_UNINITIALIZED) {
+ return EINVAL;
+ }
+
+ // Initialize any data offset buffer with a single zero
+ for (int i = 0; i < 3; i++) {
+ if (private_data->layout.buffer_type[i] ==
NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
+ private_data->layout.element_size_bits[i] == 64) {
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array,
i), 0));
+ } else if (private_data->layout.buffer_type[i] ==
NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
+ private_data->layout.element_size_bits[i] == 32) {
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(ArrowArrayBuffer(array,
i), 0));
}
+ }
- private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
+ // Start building any child arrays
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayStartAppending(array->children[i]));
+ }
+
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ for (int64_t i = 0; i < 3; i++) {
+ struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes, 1));
}
for (int64_t i = 0; i < array->n_children; i++) {
- result = ArrowArrayFinishBuilding(array->children[i], shrink_to_fit);
- if (result != NANOARROW_OK) {
- return result;
+ NANOARROW_RETURN_NOT_OK(ArrowArrayShrinkToFit(array->children[i]));
+ }
+
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode _ArrowArrayAppendBits(struct ArrowArray* array,
+ int64_t buffer_i, uint8_t
value,
+ int64_t n) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+ struct ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i);
+ int64_t bytes_required =
+
_ArrowRoundUpToMultipleOf8(private_data->layout.element_size_bits[buffer_i] *
+ (array->length + 1)) /
+ 8;
+ if (bytes_required > buffer->size_bytes) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendFill(buffer, 0, bytes_required - buffer->size_bytes));
+ }
+
+ ArrowBitsSetTo(buffer->data, array->length, n, value);
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array,
int64_t n) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ if (n == 0) {
+ return NANOARROW_OK;
+ }
+
+ if (private_data->storage_type == NANOARROW_TYPE_NA) {
+ array->null_count += n;
+ array->length += n;
+ return NANOARROW_OK;
+ }
+
+ // Append n 0 bits to the validity bitmap. If we haven't allocated a bitmap
yet, do it
+ // now
+ if (private_data->bitmap.buffer.data == NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap,
array->length + n));
+ ArrowBitmapAppendUnsafe(&private_data->bitmap, 1, array->length);
+ ArrowBitmapAppendUnsafe(&private_data->bitmap, 0, n);
+ } else {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapReserve(&private_data->bitmap, n));
+ ArrowBitmapAppendUnsafe(&private_data->bitmap, 0, n);
+ }
+
+ // Add appropriate buffer fill
+ struct ArrowBuffer* buffer;
+ int64_t size_bytes;
+
+ for (int i = 0; i < 3; i++) {
+ buffer = ArrowArrayBuffer(array, i);
+ size_bytes = private_data->layout.element_size_bits[i] / 8;
+
+ switch (private_data->layout.buffer_type[i]) {
+ case NANOARROW_BUFFER_TYPE_NONE:
+ case NANOARROW_BUFFER_TYPE_VALIDITY:
+ continue;
+ case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+ // Append the current value at the end of the offset buffer for each
element
+ NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes * n));
+
+ for (int64_t j = 0; j < n; j++) {
+ ArrowBufferAppendUnsafe(buffer, buffer->data + size_bytes *
(array->length + j),
+ size_bytes);
+ }
+
+ // Skip the data buffer
+ i++;
+ continue;
+ case NANOARROW_BUFFER_TYPE_DATA:
+ // Zero out the next bit of memory
+ if (private_data->layout.element_size_bits[i] % 8 == 0) {
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFill(buffer, 0, size_bytes
* n));
+ } else {
+ NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, i, 0, n));
+ }
+ continue;
+
+ case NANOARROW_BUFFER_TYPE_TYPE_ID:
+ case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
+ // Not supported
+ return EINVAL;
}
}
+ // For fixed-size list and struct we need to append some nulls to
+ // children for the lengths to line up properly
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendNull(
+ array->children[0], n * private_data->layout.child_size_elements));
+ break;
+ case NANOARROW_TYPE_STRUCT:
+ for (int64_t i = 0; i < array->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayAppendNull(array->children[i], n));
+ }
+ default:
+ break;
+ }
+
+ array->length += n;
+ array->null_count += n;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array,
+ int64_t value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_INT64:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value,
sizeof(int64_t)));
+ break;
+ case NANOARROW_TYPE_INT32:
+ _NANOARROW_CHECK_RANGE(value, INT32_MIN, INT32_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt32(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_INT16:
+ _NANOARROW_CHECK_RANGE(value, INT16_MIN, INT16_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt16(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_INT8:
+ _NANOARROW_CHECK_RANGE(value, INT8_MIN, INT8_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt8(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_UINT64:
+ case NANOARROW_TYPE_UINT32:
+ case NANOARROW_TYPE_UINT16:
+ case NANOARROW_TYPE_UINT8:
+ _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX);
+ return ArrowArrayAppendUInt(array, value);
+ case NANOARROW_TYPE_DOUBLE:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_FLOAT:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_BOOL:
+ NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
+ break;
+ default:
+ return EINVAL;
+ }
+
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array),
1, 1));
+ }
+
+ array->length++;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array,
+ uint64_t value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_UINT64:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value,
sizeof(uint64_t)));
+ break;
+ case NANOARROW_TYPE_UINT32:
+ _NANOARROW_CHECK_RANGE(value, 0, UINT32_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt32(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_UINT16:
+ _NANOARROW_CHECK_RANGE(value, 0, UINT16_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt16(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_UINT8:
+ _NANOARROW_CHECK_RANGE(value, 0, UINT8_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendUInt8(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_INT64:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_INT8:
+ _NANOARROW_CHECK_RANGE(value, 0, INT64_MAX);
+ return ArrowArrayAppendInt(array, value);
+ case NANOARROW_TYPE_DOUBLE:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendDouble(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_FLOAT:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value));
+ break;
+ case NANOARROW_TYPE_BOOL:
+ NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
+ break;
+ default:
+ return EINVAL;
+ }
+
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array),
1, 1));
+ }
+
+ array->length++;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array,
+ double value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_DOUBLE:
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &value,
sizeof(double)));
+ break;
+ case NANOARROW_TYPE_FLOAT:
+ _NANOARROW_CHECK_RANGE(value, FLT_MIN, FLT_MAX);
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer, value));
+ break;
+ default:
+ return EINVAL;
+ }
+
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array),
1, 1));
+ }
+
+ array->length++;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
+ struct ArrowBufferView
value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1);
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(
+ array, 1 + (private_data->storage_type !=
NANOARROW_TYPE_FIXED_SIZE_BINARY));
+ int32_t offset;
+ int64_t large_offset;
+ int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8;
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ offset = ((int32_t*)offset_buffer->data)[array->length];
+ if ((offset + value.n_bytes) > INT32_MAX) {
+ return EINVAL;
+ }
+
+ offset += value.n_bytes;
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(offset_buffer, &offset,
sizeof(int32_t)));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(data_buffer, value.data.data, value.n_bytes));
+ break;
+
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ large_offset = ((int64_t*)offset_buffer->data)[array->length];
+ large_offset += value.n_bytes;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(offset_buffer, &large_offset, sizeof(int64_t)));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(data_buffer, value.data.data, value.n_bytes));
+ break;
+
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ if (value.n_bytes != fixed_size_bytes) {
+ return EINVAL;
+ }
+
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(data_buffer, value.data.data, value.n_bytes));
+ break;
+ default:
+ return EINVAL;
+ }
+
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array),
1, 1));
+ }
+
+ array->length++;
+ return NANOARROW_OK;
+}
+
+static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array,
+ struct ArrowStringView
value) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ struct ArrowBufferView buffer_view;
+ buffer_view.data.data = value.data;
+ buffer_view.n_bytes = value.n_bytes;
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_LARGE_STRING:
+ return ArrowArrayAppendBytes(array, buffer_view);
+ default:
+ return EINVAL;
+ }
+}
+
+static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array)
{
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ int64_t child_length;
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_LIST:
+ child_length = array->children[0]->length;
+ if (child_length > INT32_MAX) {
+ return EINVAL;
+ }
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendInt32(ArrowArrayBuffer(array, 1), child_length));
+ break;
+ case NANOARROW_TYPE_LARGE_LIST:
+ child_length = array->children[0]->length;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendInt64(ArrowArrayBuffer(array, 1), child_length));
+ break;
+ case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ child_length = array->children[0]->length;
+ if (child_length !=
+ ((array->length + 1) * private_data->layout.child_size_elements)) {
+ return EINVAL;
+ }
+ break;
+ case NANOARROW_TYPE_STRUCT:
+ for (int64_t i = 0; i < array->n_children; i++) {
+ child_length = array->children[i]->length;
+ if (child_length != (array->length + 1)) {
+ return EINVAL;
+ }
+ }
+ break;
+ default:
+ return EINVAL;
+ }
+
+ if (private_data->bitmap.buffer.data != NULL) {
+ NANOARROW_RETURN_NOT_OK(ArrowBitmapAppend(ArrowArrayValidityBitmap(array),
1, 1));
+ }
+
+ array->length++;
return NANOARROW_OK;
}
diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index 4d32837..e422074 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -17,9 +17,15 @@
#include <gtest/gtest.h>
+#include <arrow/array.h>
+#include <arrow/c/bridge.h>
+#include <arrow/testing/gtest_util.h>
+
#include "nanoarrow/nanoarrow.h"
-TEST(ArrayTest, ArrayTestBasic) {
+using namespace arrow;
+
+TEST(ArrayTest, ArrayTestInit) {
struct ArrowArray array;
EXPECT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_UNINITIALIZED),
NANOARROW_OK);
@@ -83,6 +89,25 @@ TEST(ArrayTest, ArrayTestAllocateDictionary) {
array.release(&array);
}
+TEST(ArrayTest, ArrayTestInitFromSchema) {
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+ struct ArrowError error;
+
+ ASSERT_EQ(ArrowSchemaInit(&schema, NANOARROW_TYPE_STRUCT), NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaAllocateChildren(&schema, 2), NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaInit(schema.children[0], NANOARROW_TYPE_INT32),
NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaInit(schema.children[1], NANOARROW_TYPE_STRING),
NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayInitFromSchema(&array, &schema, &error), NANOARROW_OK);
+ EXPECT_EQ(array.n_children, 2);
+ EXPECT_EQ(array.children[0]->n_buffers, 2);
+ EXPECT_EQ(array.children[1]->n_buffers, 3);
+
+ array.release(&array);
+ schema.release(&schema);
+}
+
TEST(ArrayTest, ArrayTestSetBitmap) {
struct ArrowBitmap bitmap;
ArrowBitmapInit(&bitmap);
@@ -142,6 +167,7 @@ TEST(ArrayTest, ArrayTestBuildByBuffer) {
const char* data = "abcdefghij";
struct ArrowArray array;
+ struct ArrowError error;
ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_STRING), NANOARROW_OK);
ASSERT_EQ(ArrowBitmapReserve(ArrowArrayValidityBitmap(&array), 100),
NANOARROW_OK);
@@ -153,7 +179,9 @@ TEST(ArrayTest, ArrayTestBuildByBuffer) {
ASSERT_EQ(ArrowBufferReserve(ArrowArrayBuffer(&array, 2), 100),
NANOARROW_OK);
ArrowBufferAppendUnsafe(ArrowArrayBuffer(&array, 2), data, 10);
- EXPECT_EQ(ArrowArrayFinishBuilding(&array, true), NANOARROW_OK);
+ array.length = 7;
+ EXPECT_EQ(ArrowArrayShrinkToFit(&array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, &error), NANOARROW_OK);
EXPECT_EQ(memcmp(array.buffers[0], validity_bitmap, 1), 0);
EXPECT_EQ(memcmp(array.buffers[1], offsets, 8 * sizeof(int32_t)), 0);
@@ -167,5 +195,606 @@ TEST(ArrayTest, ArrayTestBuildByBuffer) {
EXPECT_EQ(ArrowArrayBuffer(&array, 1)->size_bytes, 8 * sizeof(int32_t));
EXPECT_EQ(ArrowArrayBuffer(&array, 2)->size_bytes, 10);
+ array.length = 8;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, &error), EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Expected buffer 1 to size >= 36 bytes but found buffer with 32
bytes");
+
+ array.length = 7;
+ int32_t* offsets_buffer =
reinterpret_cast<int32_t*>(ArrowArrayBuffer(&array, 1)->data);
+ offsets_buffer[7] = offsets_buffer[7] + 1;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, &error), EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Expected buffer 2 to size >= 11 bytes but found buffer with 10
bytes");
+
array.release(&array);
}
+
+TEST(ArrayTest, ArrayTestAppendToNullArray) {
+ struct ArrowArray array;
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_NA), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendNull(&array, 0), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 2);
+ EXPECT_EQ(array.null_count, 2);
+
+ auto arrow_array = ImportArray(&array, null());
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(ArrayFromJSON(null(), "[null,
null]")));
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_NA), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, 0), EINVAL);
+ EXPECT_EQ(ArrowArrayAppendUInt(&array, 0), EINVAL);
+ EXPECT_EQ(ArrowArrayAppendDouble(&array, 0), EINVAL);
+ struct ArrowBufferView buffer_view;
+ buffer_view.data.data = nullptr;
+ buffer_view.n_bytes = 0;
+ EXPECT_EQ(ArrowArrayAppendBytes(&array, buffer_view), EINVAL);
+ EXPECT_EQ(ArrowArrayAppendString(&array, ArrowCharView("")), EINVAL);
+ array.release(&array);
+}
+
+TEST(ArrayTest, ArrayTestAppendToInt64Array) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_INT64), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, 1), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendUInt(&array, 3), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendUInt(&array,
std::numeric_limits<uint64_t>::max()), EINVAL);
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 4);
+ EXPECT_EQ(array.null_count, 2);
+ auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
+ auto data_buffer = reinterpret_cast<const int64_t*>(array.buffers[1]);
+ EXPECT_EQ(validity_buffer[0], 0x01 | 0x08);
+ EXPECT_EQ(data_buffer[0], 1);
+ EXPECT_EQ(data_buffer[1], 0);
+ EXPECT_EQ(data_buffer[2], 0);
+ EXPECT_EQ(data_buffer[3], 3);
+
+ auto arrow_array = ImportArray(&array, int64());
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(
+ arrow_array.ValueUnsafe()->Equals(ArrayFromJSON(int64(), "[1, null,
null, 3]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToInt32Array) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_INT32), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, 123), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, std::numeric_limits<int64_t>::max()),
EINVAL);
+ EXPECT_EQ(ArrowArrayAppendUInt(&array,
std::numeric_limits<uint64_t>::max()), EINVAL);
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 1);
+ EXPECT_EQ(array.null_count, 0);
+ auto data_buffer = reinterpret_cast<const int32_t*>(array.buffers[1]);
+ EXPECT_EQ(array.buffers[0], nullptr);
+ EXPECT_EQ(data_buffer[0], 123);
+
+ auto arrow_array = ImportArray(&array, int32());
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(ArrayFromJSON(int32(),
"[123]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToInt16Array) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_INT16), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, 123), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, std::numeric_limits<int64_t>::max()),
EINVAL);
+ EXPECT_EQ(ArrowArrayAppendUInt(&array,
std::numeric_limits<uint64_t>::max()), EINVAL);
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 1);
+ EXPECT_EQ(array.null_count, 0);
+ auto data_buffer = reinterpret_cast<const int16_t*>(array.buffers[1]);
+ EXPECT_EQ(array.buffers[0], nullptr);
+ EXPECT_EQ(data_buffer[0], 123);
+
+ auto arrow_array = ImportArray(&array, int16());
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(ArrayFromJSON(int16(),
"[123]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToInt8Array) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_INT8), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, 1), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, std::numeric_limits<int64_t>::max()),
EINVAL);
+ EXPECT_EQ(ArrowArrayAppendUInt(&array,
std::numeric_limits<uint64_t>::max()), EINVAL);
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 1);
+ EXPECT_EQ(array.null_count, 0);
+ auto data_buffer = reinterpret_cast<const int8_t*>(array.buffers[1]);
+ EXPECT_EQ(array.buffers[0], nullptr);
+ EXPECT_EQ(data_buffer[0], 1);
+
+ auto arrow_array = ImportArray(&array, int8());
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(ArrayFromJSON(int8(), "[1]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToStringArray) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_STRING), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+ // Check that we can reserve
+ ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayBuffer(&array, 1)->capacity_bytes, (5 + 1) *
sizeof(int32_t));
+
+ EXPECT_EQ(ArrowArrayAppendString(&array, ArrowCharView("1234")),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendString(&array, ArrowCharView("56789")),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 4);
+ EXPECT_EQ(array.null_count, 2);
+ auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
+ auto offset_buffer = reinterpret_cast<const int32_t*>(array.buffers[1]);
+ auto data_buffer = reinterpret_cast<const char*>(array.buffers[2]);
+ EXPECT_EQ(validity_buffer[0], 0x01 | 0x08);
+ EXPECT_EQ(offset_buffer[0], 0);
+ EXPECT_EQ(offset_buffer[1], 4);
+ EXPECT_EQ(offset_buffer[2], 4);
+ EXPECT_EQ(offset_buffer[3], 4);
+ EXPECT_EQ(offset_buffer[4], 9);
+ EXPECT_EQ(memcmp(data_buffer, "123456789", 9), 0);
+
+ auto arrow_array = ImportArray(&array, utf8());
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(
+ ArrayFromJSON(utf8(), "[\"1234\", null, null, \"56789\"]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToUInt64Array) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_UINT64), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendUInt(&array, 1), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, 3), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 4);
+ EXPECT_EQ(array.null_count, 2);
+ auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
+ auto data_buffer = reinterpret_cast<const uint64_t*>(array.buffers[1]);
+ EXPECT_EQ(validity_buffer[0], 0x01 | 0x08);
+ EXPECT_EQ(data_buffer[0], 1);
+ EXPECT_EQ(data_buffer[1], 0);
+ EXPECT_EQ(data_buffer[2], 0);
+ EXPECT_EQ(data_buffer[3], 3);
+
+ auto arrow_array = ImportArray(&array, uint64());
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(
+ arrow_array.ValueUnsafe()->Equals(ArrayFromJSON(uint64(), "[1, null,
null, 3]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToUInt32Array) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_UINT32), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendUInt(&array, 1), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, 3), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayAppendUInt(&array,
std::numeric_limits<uint64_t>::max()), EINVAL);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, -1), EINVAL);
+
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 2);
+ EXPECT_EQ(array.null_count, 0);
+ auto data_buffer = reinterpret_cast<const uint32_t*>(array.buffers[1]);
+ EXPECT_EQ(array.buffers[0], nullptr);
+ EXPECT_EQ(data_buffer[0], 1);
+ EXPECT_EQ(data_buffer[1], 3);
+
+ auto arrow_array = ImportArray(&array, uint32());
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(ArrayFromJSON(uint32(), "[1,
3]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToUInt16Array) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_UINT16), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendUInt(&array, 1), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, 3), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayAppendUInt(&array,
std::numeric_limits<uint64_t>::max()), EINVAL);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, -1), EINVAL);
+
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 2);
+ EXPECT_EQ(array.null_count, 0);
+ auto data_buffer = reinterpret_cast<const uint16_t*>(array.buffers[1]);
+ EXPECT_EQ(array.buffers[0], nullptr);
+ EXPECT_EQ(data_buffer[0], 1);
+ EXPECT_EQ(data_buffer[1], 3);
+
+ auto arrow_array = ImportArray(&array, uint16());
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(ArrayFromJSON(uint16(), "[1,
3]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToUInt8Array) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_UINT8), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendUInt(&array, 1), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, 3), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayAppendUInt(&array,
std::numeric_limits<uint64_t>::max()), EINVAL);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, -1), EINVAL);
+
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 2);
+ EXPECT_EQ(array.null_count, 0);
+ auto data_buffer = reinterpret_cast<const uint8_t*>(array.buffers[1]);
+ EXPECT_EQ(array.buffers[0], nullptr);
+ EXPECT_EQ(data_buffer[0], 1);
+ EXPECT_EQ(data_buffer[1], 3);
+
+ auto arrow_array = ImportArray(&array, uint8());
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(ArrayFromJSON(uint8(), "[1,
3]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToDoubleArray) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_DOUBLE), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, 1), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendUInt(&array, 3), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendDouble(&array, 3.14), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 5);
+ EXPECT_EQ(array.null_count, 2);
+ auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
+ auto data_buffer = reinterpret_cast<const double*>(array.buffers[1]);
+ EXPECT_EQ(validity_buffer[0], 0x01 | 0x08 | 0x10);
+ EXPECT_EQ(data_buffer[0], 1);
+ EXPECT_EQ(data_buffer[1], 0);
+ EXPECT_EQ(data_buffer[2], 0);
+ EXPECT_EQ(data_buffer[3], 3);
+ EXPECT_DOUBLE_EQ(data_buffer[4], 3.14);
+
+ auto arrow_array = ImportArray(&array, float64());
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(
+ ArrayFromJSON(float64(), "[1.0, null, null, 3.0, 3.14]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToFloatArray) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_FLOAT), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, 1), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendUInt(&array, 3), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendDouble(&array, 3.14), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendDouble(&array,
std::numeric_limits<double>::max()), EINVAL);
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 5);
+ EXPECT_EQ(array.null_count, 2);
+ auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
+ auto data_buffer = reinterpret_cast<const float*>(array.buffers[1]);
+ EXPECT_EQ(validity_buffer[0], 0x01 | 0x08 | 0x10);
+ EXPECT_EQ(data_buffer[0], 1);
+ EXPECT_EQ(data_buffer[1], 0);
+ EXPECT_EQ(data_buffer[2], 0);
+ EXPECT_EQ(data_buffer[3], 3);
+ EXPECT_FLOAT_EQ(data_buffer[4], 3.14);
+
+ auto arrow_array = ImportArray(&array, float32());
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(
+ ArrayFromJSON(float32(), "[1.0, null, null, 3.0, 3.14]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToBoolArray) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_BOOL), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendInt(&array, 1), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendUInt(&array, 0), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 4);
+ EXPECT_EQ(array.null_count, 2);
+ auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
+ auto data_buffer = reinterpret_cast<const uint8_t*>(array.buffers[1]);
+ EXPECT_EQ(validity_buffer[0], 0x01 | 0x08);
+ EXPECT_EQ(ArrowBitGet(data_buffer, 0), 0x01);
+ EXPECT_EQ(ArrowBitGet(data_buffer, 1), 0x00);
+ EXPECT_EQ(ArrowBitGet(data_buffer, 2), 0x00);
+ EXPECT_EQ(ArrowBitGet(data_buffer, 3), 0x00);
+
+ auto arrow_array = ImportArray(&array, boolean());
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(
+ ArrayFromJSON(boolean(), "[true, null, null, false]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToLargeStringArray) {
+ struct ArrowArray array;
+
+ ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_LARGE_STRING), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+ // Check that we can reserve
+ ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayBuffer(&array, 1)->capacity_bytes, (5 + 1) *
sizeof(int64_t));
+
+ EXPECT_EQ(ArrowArrayAppendString(&array, ArrowCharView("1234")),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendString(&array, ArrowCharView("56789")),
NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 4);
+ EXPECT_EQ(array.null_count, 2);
+ auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
+ auto offset_buffer = reinterpret_cast<const int64_t*>(array.buffers[1]);
+ auto data_buffer = reinterpret_cast<const char*>(array.buffers[2]);
+ EXPECT_EQ(validity_buffer[0], 0x01 | 0x08);
+ EXPECT_EQ(offset_buffer[0], 0);
+ EXPECT_EQ(offset_buffer[1], 4);
+ EXPECT_EQ(offset_buffer[2], 4);
+ EXPECT_EQ(offset_buffer[3], 4);
+ EXPECT_EQ(offset_buffer[4], 9);
+ EXPECT_EQ(memcmp(data_buffer, "123456789", 9), 0);
+
+ auto arrow_array = ImportArray(&array, large_utf8());
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(
+ ArrayFromJSON(large_utf8(), "[\"1234\", null, null, \"56789\"]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToFixedSizeBinaryArray) {
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+
+ ASSERT_EQ(ArrowSchemaInitFixedSize(&schema,
NANOARROW_TYPE_FIXED_SIZE_BINARY, 5),
+ NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+ // Check that we can reserve
+ ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayBuffer(&array, 1)->capacity_bytes, 5 * 5);
+
+ EXPECT_EQ(ArrowArrayAppendBytes(&array, {"12345", 5}), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendNull(&array, 2), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayAppendBytes(&array, {"67890", 5}), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ EXPECT_EQ(array.length, 4);
+ EXPECT_EQ(array.null_count, 2);
+ auto validity_buffer = reinterpret_cast<const uint8_t*>(array.buffers[0]);
+ auto data_buffer = reinterpret_cast<const char*>(array.buffers[1]);
+ EXPECT_EQ(validity_buffer[0], 0x01 | 0x08);
+ char expected_data[] = {'1', '2', '3', '4', '5', 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, '6', '7', '8', '9',
'0'};
+ EXPECT_EQ(memcmp(data_buffer, expected_data, 20), 0);
+
+ auto arrow_array = ImportArray(&array, &schema);
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(
+ ArrayFromJSON(fixed_size_binary(5), "[\"12345\", null, null,
\"67890\"]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToListArray) {
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+ struct ArrowError error;
+
+ ASSERT_EQ(ArrowSchemaInit(&schema, NANOARROW_TYPE_LIST), NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaAllocateChildren(&schema, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaInit(schema.children[0], NANOARROW_TYPE_INT64),
NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetName(schema.children[0], "item"), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+ // Check that we can reserve recursively without erroring
+ ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayBuffer(array.children[0], 1)->capacity_bytes, 0);
+
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 123), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 456), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 789), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+ // Make sure number of children is checked at finish
+ array.n_children = 0;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, &error), EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Expected 1 child of list array but found 0 child arrays");
+ array.n_children = 1;
+
+ // Make sure final child size is checked at finish
+ array.children[0]->length = array.children[0]->length - 1;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, &error), EINVAL);
+ EXPECT_STREQ(
+ ArrowErrorMessage(&error),
+ "Expected child of list array with length >= 3 but found array with
length 2");
+
+ array.children[0]->length = array.children[0]->length + 1;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, &error), NANOARROW_OK);
+
+ auto arrow_array = ImportArray(&array, &schema);
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(
+ ArrayFromJSON(list(int64()), "[[123], null, [456, 789]]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToLargeListArray) {
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+ struct ArrowError error;
+
+ ASSERT_EQ(ArrowSchemaInit(&schema, NANOARROW_TYPE_LARGE_LIST), NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaAllocateChildren(&schema, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaInit(schema.children[0], NANOARROW_TYPE_INT64),
NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetName(schema.children[0], "item"), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+ // Check that we can reserve recursively without erroring
+ ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayBuffer(array.children[0], 1)->capacity_bytes, 0);
+
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 123), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 456), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 789), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+ // Make sure number of children is checked at finish
+ array.n_children = 0;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, &error), EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Expected 1 child of large list array but found 0 child
arrays");
+ array.n_children = 1;
+
+ // Make sure final child size is checked at finish
+ array.children[0]->length = array.children[0]->length - 1;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, &error), EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Expected child of large list array with length >= 3 but found
array with "
+ "length 2");
+
+ array.children[0]->length = array.children[0]->length + 1;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, &error), NANOARROW_OK);
+
+ auto arrow_array = ImportArray(&array, &schema);
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(
+ ArrayFromJSON(large_list(int64()), "[[123], null, [456, 789]]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToFixedSizeListArray) {
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+ struct ArrowError error;
+
+ ASSERT_EQ(ArrowSchemaInitFixedSize(&schema, NANOARROW_TYPE_FIXED_SIZE_LIST,
2),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaAllocateChildren(&schema, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaInit(schema.children[0], NANOARROW_TYPE_INT64),
NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetName(schema.children[0], "item"), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+ // Check that we can reserve recursively
+ ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayBuffer(array.children[0], 1)->capacity_bytes,
+ 2 * 5 * sizeof(int64_t));
+
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 123), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 456), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 789), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 12), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+ // Make sure number of children is checked at finish
+ array.n_children = 0;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, &error), EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Expected 1 child of fixed-size array but found 0 child
arrays");
+ array.n_children = 1;
+
+ // Make sure final child size is checked at finish
+ array.children[0]->length = array.children[0]->length - 1;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, &error), EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Expected child of fixed-size list array with length >= 6 but
found array "
+ "with length 5");
+
+ array.children[0]->length = array.children[0]->length + 1;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ auto arrow_array = ImportArray(&array, &schema);
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(
+ ArrayFromJSON(fixed_size_list(int64(), 2), "[[123, 456], null, [789,
12]]")));
+}
+
+TEST(ArrayTest, ArrayTestAppendToStructArray) {
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+
+ ASSERT_EQ(ArrowSchemaInit(&schema, NANOARROW_TYPE_STRUCT), NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaAllocateChildren(&schema, 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaInit(schema.children[0], NANOARROW_TYPE_INT64),
NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetName(schema.children[0], "col1"), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+
+ // Check that we can reserve recursively
+ ASSERT_EQ(ArrowArrayReserve(&array, 5), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayBuffer(array.children[0], 1)->capacity_bytes, 5 *
sizeof(int64_t));
+
+ // Wrong child length
+ EXPECT_EQ(ArrowArrayFinishElement(&array), EINVAL);
+
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 123), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayAppendNull(&array, 1), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 456), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayFinishElement(&array), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
+
+ auto arrow_array = ImportArray(&array, &schema);
+ ARROW_EXPECT_OK(arrow_array);
+ EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(ArrayFromJSON(
+ struct_({field("col1", int64())}), "[{\"col1\": 123}, null, {\"col1\":
456}]")));
+}
diff --git a/src/nanoarrow/array_view.c b/src/nanoarrow/array_view.c
index a5427ac..1914e48 100644
--- a/src/nanoarrow/array_view.c
+++ b/src/nanoarrow/array_view.c
@@ -141,15 +141,18 @@ void ArrowArrayViewSetLength(struct ArrowArrayView*
array_view, int64_t length)
}
break;
case NANOARROW_TYPE_FIXED_SIZE_LIST:
- ArrowArrayViewSetLength(array_view->children[0],
- length * array_view->layout.child_size_elements);
+ if (array_view->n_children >= 1) {
+ ArrowArrayViewSetLength(array_view->children[0],
+ length *
array_view->layout.child_size_elements);
+ }
default:
break;
}
}
ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
- struct ArrowArray* array) {
+ struct ArrowArray* array,
+ struct ArrowError* error) {
array_view->array = array;
ArrowArrayViewSetLength(array_view, array->offset + array->length);
@@ -171,6 +174,8 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
}
if (buffers_required != array->n_buffers) {
+ ArrowErrorSet(error, "Expected array with %d buffer(s) but found %d
buffer(s)",
+ (int)buffers_required, (int)array->n_buffers);
return EINVAL;
}
@@ -179,7 +184,6 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
}
// Check child sizes and calculate sizes that depend on data in the array
buffers
- int result;
int64_t last_offset;
switch (array_view->storage_type) {
case NANOARROW_TYPE_STRING:
@@ -201,14 +205,74 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
case NANOARROW_TYPE_STRUCT:
for (int64_t i = 0; i < array_view->n_children; i++) {
if (array->children[i]->length < (array->offset + array->length)) {
+ ArrowErrorSet(
+ error,
+ "Expected struct child %d to have length >= %ld but found child
with "
+ "length %ld",
+ (int)(i + 1), (long)(array->offset + array->length),
+ (long)array->children[i]->length);
+ return EINVAL;
+ }
+ }
+ break;
+ case NANOARROW_TYPE_LIST:
+ if (array->n_children != 1) {
+ ArrowErrorSet(error,
+ "Expected 1 child of list array but found %d child
arrays",
+ (int)array->n_children);
+ return EINVAL;
+ }
+
+ if (array_view->buffer_views[1].n_bytes != 0) {
+ last_offset =
+ array_view->buffer_views[1].data.as_int32[array->offset +
array->length];
+ if (array->children[0]->length < last_offset) {
+ ArrowErrorSet(
+ error,
+ "Expected child of list array with length >= %ld but found array
with "
+ "length %ld",
+ (long)last_offset, (long)array->children[0]->length);
+ return EINVAL;
+ }
+ }
+ break;
+ case NANOARROW_TYPE_LARGE_LIST:
+ if (array->n_children != 1) {
+ ArrowErrorSet(error,
+ "Expected 1 child of large list array but found %d child
arrays",
+ (int)array->n_children);
+ return EINVAL;
+ }
+
+ if (array_view->buffer_views[1].n_bytes != 0) {
+ last_offset =
+ array_view->buffer_views[1].data.as_int64[array->offset +
array->length];
+ if (array->children[0]->length < last_offset) {
+ ArrowErrorSet(
+ error,
+ "Expected child of large list array with length >= %ld but found
array "
+ "with length %ld",
+ (long)last_offset, (long)array->children[0]->length);
return EINVAL;
}
}
break;
case NANOARROW_TYPE_FIXED_SIZE_LIST:
+ if (array->n_children != 1) {
+ ArrowErrorSet(error,
+ "Expected 1 child of fixed-size array but found %d child
arrays",
+ (int)array->n_children);
+ return EINVAL;
+ }
+
last_offset =
(array->offset + array->length) *
array_view->layout.child_size_elements;
- if (array->n_children != 1 || array->children[0]->length < last_offset) {
+ if (array->children[0]->length < last_offset) {
+ ArrowErrorSet(
+ error,
+ "Expected child of fixed-size list array with length >= %ld but
found array "
+ "with length %ld",
+ (long)last_offset, (long)array->children[0]->length);
return EINVAL;
}
break;
@@ -217,10 +281,8 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
}
for (int64_t i = 0; i < array_view->n_children; i++) {
- result = ArrowArrayViewSetArray(array_view->children[i],
array->children[i]);
- if (result != NANOARROW_OK) {
- return result;
- }
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayViewSetArray(array_view->children[i], array->children[i],
error));
}
return NANOARROW_OK;
diff --git a/src/nanoarrow/array_view_test.cc b/src/nanoarrow/array_view_test.cc
index 1ce4156..55b8bf0 100644
--- a/src/nanoarrow/array_view_test.cc
+++ b/src/nanoarrow/array_view_test.cc
@@ -21,6 +21,7 @@
TEST(ArrayTest, ArrayViewTestBasic) {
struct ArrowArrayView array_view;
+ struct ArrowError error;
ArrowArrayViewInit(&array_view, NANOARROW_TYPE_INT32);
EXPECT_EQ(array_view.array, nullptr);
@@ -43,9 +44,9 @@ TEST(ArrayTest, ArrayViewTestBasic) {
ASSERT_EQ(ArrowBufferAppendInt32(ArrowArrayBuffer(&array, 1), 13),
NANOARROW_OK);
array.length = 3;
array.null_count = 0;
- ASSERT_EQ(ArrowArrayFinishBuilding(&array, false), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].n_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].n_bytes, 3 * sizeof(int32_t));
EXPECT_EQ(array_view.buffer_views[1].data.as_int32[0], 11);
@@ -55,16 +56,16 @@ TEST(ArrayTest, ArrayViewTestBasic) {
// Build with validity buffer
ASSERT_EQ(ArrowBitmapAppend(ArrowArrayValidityBitmap(&array), 1, 3),
NANOARROW_OK);
array.null_count = -1;
- ASSERT_EQ(ArrowArrayFinishBuilding(&array, false), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].n_bytes, 1);
EXPECT_EQ(array_view.buffer_views[1].n_bytes, 3 * sizeof(int32_t));
// Expect error for the wrong number of buffers
ArrowArrayViewReset(&array_view);
ArrowArrayViewInit(&array_view, NANOARROW_TYPE_STRING);
- EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), EINVAL);
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
array.release(&array);
ArrowArrayViewReset(&array_view);
@@ -72,6 +73,7 @@ TEST(ArrayTest, ArrayViewTestBasic) {
TEST(ArrayTest, ArrayViewTestString) {
struct ArrowArrayView array_view;
+ struct ArrowError error;
ArrowArrayViewInit(&array_view, NANOARROW_TYPE_STRING);
EXPECT_EQ(array_view.array, nullptr);
@@ -99,7 +101,7 @@ TEST(ArrayTest, ArrayViewTestString) {
// Build + check zero length
ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_STRING), NANOARROW_OK);
array.null_count = 0;
- EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].n_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].n_bytes, 0);
EXPECT_EQ(array_view.buffer_views[2].n_bytes, 0);
@@ -110,9 +112,9 @@ TEST(ArrayTest, ArrayViewTestString) {
ASSERT_EQ(ArrowBufferReserve(ArrowArrayBuffer(&array, 2), 4), NANOARROW_OK);
ArrowBufferAppendUnsafe(ArrowArrayBuffer(&array, 2), "abcd", 4);
array.length = 1;
- ASSERT_EQ(ArrowArrayFinishBuilding(&array, false), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].n_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].n_bytes, (1 + 1) * sizeof(int32_t));
EXPECT_EQ(array_view.buffer_views[2].n_bytes, 4);
@@ -123,6 +125,7 @@ TEST(ArrayTest, ArrayViewTestString) {
TEST(ArrayTest, ArrayViewTestLargeString) {
struct ArrowArrayView array_view;
+ struct ArrowError error;
ArrowArrayViewInit(&array_view, NANOARROW_TYPE_LARGE_STRING);
EXPECT_EQ(array_view.array, nullptr);
@@ -150,7 +153,7 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
// Build + check zero length
ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_STRING), NANOARROW_OK);
array.null_count = 0;
- EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].n_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].n_bytes, 0);
EXPECT_EQ(array_view.buffer_views[2].n_bytes, 0);
@@ -161,9 +164,9 @@ TEST(ArrayTest, ArrayViewTestLargeString) {
ASSERT_EQ(ArrowBufferReserve(ArrowArrayBuffer(&array, 2), 4), NANOARROW_OK);
ArrowBufferAppendUnsafe(ArrowArrayBuffer(&array, 2), "abcd", 4);
array.length = 1;
- ASSERT_EQ(ArrowArrayFinishBuilding(&array, false), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(array_view.buffer_views[0].n_bytes, 0);
EXPECT_EQ(array_view.buffer_views[1].n_bytes, (1 + 1) * sizeof(int64_t));
EXPECT_EQ(array_view.buffer_views[2].n_bytes, 4);
@@ -203,6 +206,52 @@ TEST(ArrayTest, ArrayViewTestStruct) {
ArrowArrayViewReset(&array_view);
}
+TEST(ArrayTest, ArrayViewTestList) {
+ struct ArrowArrayView array_view;
+ ArrowArrayViewInit(&array_view, NANOARROW_TYPE_LIST);
+
+ EXPECT_EQ(array_view.array, nullptr);
+ EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_LIST);
+ EXPECT_EQ(array_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(array_view.layout.buffer_type[1],
NANOARROW_BUFFER_TYPE_DATA_OFFSET);
+ EXPECT_EQ(array_view.layout.element_size_bits[1], 8 * sizeof(int32_t));
+
+ EXPECT_EQ(ArrowArrayViewAllocateChildren(&array_view, 1), NANOARROW_OK);
+ EXPECT_EQ(array_view.n_children, 1);
+ ArrowArrayViewInit(array_view.children[0], NANOARROW_TYPE_INT32);
+ EXPECT_EQ(array_view.children[0]->storage_type, NANOARROW_TYPE_INT32);
+
+ ArrowArrayViewSetLength(&array_view, 5);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 1);
+ EXPECT_EQ(array_view.buffer_views[1].n_bytes, (5 + 1) * sizeof(int32_t));
+
+ ArrowArrayViewReset(&array_view);
+}
+
+TEST(ArrayTest, ArrayViewTestLargeList) {
+ struct ArrowArrayView array_view;
+ ArrowArrayViewInit(&array_view, NANOARROW_TYPE_LARGE_LIST);
+
+ EXPECT_EQ(array_view.array, nullptr);
+ EXPECT_EQ(array_view.storage_type, NANOARROW_TYPE_LARGE_LIST);
+ EXPECT_EQ(array_view.layout.buffer_type[0], NANOARROW_BUFFER_TYPE_VALIDITY);
+ EXPECT_EQ(array_view.layout.element_size_bits[0], 1);
+ EXPECT_EQ(array_view.layout.buffer_type[1],
NANOARROW_BUFFER_TYPE_DATA_OFFSET);
+ EXPECT_EQ(array_view.layout.element_size_bits[1], 8 * sizeof(int64_t));
+
+ EXPECT_EQ(ArrowArrayViewAllocateChildren(&array_view, 1), NANOARROW_OK);
+ EXPECT_EQ(array_view.n_children, 1);
+ ArrowArrayViewInit(array_view.children[0], NANOARROW_TYPE_INT32);
+ EXPECT_EQ(array_view.children[0]->storage_type, NANOARROW_TYPE_INT32);
+
+ ArrowArrayViewSetLength(&array_view, 5);
+ EXPECT_EQ(array_view.buffer_views[0].n_bytes, 1);
+ EXPECT_EQ(array_view.buffer_views[1].n_bytes, (5 + 1) * sizeof(int64_t));
+
+ ArrowArrayViewReset(&array_view);
+}
+
TEST(ArrayTest, ArrayViewTestFixedSizeList) {
struct ArrowArrayView array_view;
ArrowArrayViewInit(&array_view, NANOARROW_TYPE_FIXED_SIZE_LIST);
@@ -242,21 +291,21 @@ TEST(ArrayTest, ArrayViewTestStructArray) {
ASSERT_EQ(ArrowArrayInit(&array, NANOARROW_TYPE_STRUCT), NANOARROW_OK);
// Expect error for the wrong number of children
- EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), EINVAL);
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
ASSERT_EQ(ArrowArrayAllocateChildren(&array, 1), NANOARROW_OK);
ASSERT_EQ(ArrowArrayInit(array.children[0], NANOARROW_TYPE_INT32),
NANOARROW_OK);
// Expect error for the wrong number of child elements
array.length = 1;
- EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), EINVAL);
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
ASSERT_EQ(ArrowBufferAppendInt32(ArrowArrayBuffer(array.children[0], 1),
123),
NANOARROW_OK);
array.children[0]->length = 1;
- ASSERT_EQ(ArrowArrayFinishBuilding(&array, false), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuilding(&array, nullptr), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(array_view.children[0]->buffer_views[1].n_bytes, sizeof(int32_t));
EXPECT_EQ(array_view.children[0]->buffer_views[1].data.as_int32[0], 123);
@@ -286,7 +335,7 @@ TEST(ArrayTest, ArrayViewTestFixedSizeListArray) {
// Expect error for the wrong number of child elements
array.length = 1;
- EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), EINVAL);
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), EINVAL);
ASSERT_EQ(ArrowBufferAppendInt32(ArrowArrayBuffer(array.children[0], 1),
123),
NANOARROW_OK);
@@ -295,9 +344,9 @@ TEST(ArrayTest, ArrayViewTestFixedSizeListArray) {
ASSERT_EQ(ArrowBufferAppendInt32(ArrowArrayBuffer(array.children[0], 1),
789),
NANOARROW_OK);
array.children[0]->length = 3;
- ASSERT_EQ(ArrowArrayFinishBuilding(&array, false), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayFinishBuilding(&array, &error), NANOARROW_OK);
- EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array), NANOARROW_OK);
+ EXPECT_EQ(ArrowArrayViewSetArray(&array_view, &array, &error), NANOARROW_OK);
EXPECT_EQ(array_view.children[0]->buffer_views[1].n_bytes, 3 *
sizeof(int32_t));
EXPECT_EQ(array_view.children[0]->buffer_views[1].data.as_int32[0], 123);
diff --git a/src/nanoarrow/buffer_inline.h b/src/nanoarrow/buffer_inline.h
index 5400a1b..9b5320c 100644
--- a/src/nanoarrow/buffer_inline.h
+++ b/src/nanoarrow/buffer_inline.h
@@ -179,6 +179,18 @@ static inline ArrowErrorCode ArrowBufferAppendFloat(struct
ArrowBuffer* buffer,
return ArrowBufferAppend(buffer, &value, sizeof(float));
}
+static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer,
+ uint8_t value, int64_t
size_bytes) {
+ int result = ArrowBufferReserve(buffer, size_bytes);
+ if (result != NANOARROW_OK) {
+ return result;
+ }
+
+ memset(buffer->data + buffer->size_bytes, value, size_bytes);
+ buffer->size_bytes += size_bytes;
+ return NANOARROW_OK;
+}
+
#ifdef __cplusplus
}
#endif
diff --git a/src/nanoarrow/buffer_test.cc b/src/nanoarrow/buffer_test.cc
index 1c7946b..7bb4496 100644
--- a/src/nanoarrow/buffer_test.cc
+++ b/src/nanoarrow/buffer_test.cc
@@ -125,6 +125,29 @@ TEST(BufferTest, BufferTestMove) {
ArrowBufferReset(&buffer_out);
}
+TEST(BufferTest, BufferTestFill) {
+ struct ArrowBuffer buffer;
+ ArrowBufferInit(&buffer);
+
+ EXPECT_EQ(ArrowBufferAppendFill(&buffer, 0xff, 10), NANOARROW_OK);
+ EXPECT_EQ(buffer.size_bytes, 10);
+ for (int i = 0; i < 10; i++) {
+ EXPECT_EQ(buffer.data[i], 0xff);
+ }
+
+ buffer.size_bytes = 0;
+ EXPECT_EQ(ArrowBufferAppendFill(&buffer, 0, 10), NANOARROW_OK);
+ EXPECT_EQ(buffer.size_bytes, 10);
+ for (int i = 0; i < 10; i++) {
+ EXPECT_EQ(buffer.data[i], 0);
+ }
+
+ ArrowBufferReset(&buffer);
+
+ EXPECT_EQ(ArrowBufferAppendFill(&buffer, 0,
std::numeric_limits<int64_t>::max()),
+ ENOMEM);
+}
+
TEST(BufferTest, BufferTestResize0) {
struct ArrowBuffer buffer;
diff --git a/src/nanoarrow/error.c b/src/nanoarrow/error.c
index 74539d3..9af6267 100644
--- a/src/nanoarrow/error.c
+++ b/src/nanoarrow/error.c
@@ -23,6 +23,10 @@
#include "nanoarrow.h"
int ArrowErrorSet(struct ArrowError* error, const char* fmt, ...) {
+ if (error == NULL) {
+ return NANOARROW_OK;
+ }
+
memset(error->message, 0, sizeof(error->message));
va_list args;
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 4635e24..c417eaa 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -262,6 +262,7 @@ struct ArrowSchemaView {
/// interpret the buffers in the array.
enum ArrowType storage_data_type;
+ /// \brief The storage layout represented by the schema
struct ArrowLayout layout;
/// \brief The extension type name if it exists
@@ -404,6 +405,13 @@ static inline void ArrowBufferAppendUnsafe(struct
ArrowBuffer* buffer, const voi
static inline ArrowErrorCode ArrowBufferAppend(struct ArrowBuffer* buffer,
const void* data, int64_t
size_bytes);
+/// \brief Write fill to buffer and increment the buffer size
+///
+/// This function writes the specified number of fill bytes and
+/// ensures that the buffer has the required capacity,
+static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer,
+ uint8_t value, int64_t
size_bytes);
+
/// \brief Write an 8-bit integer to a buffer
static inline ArrowErrorCode ArrowBufferAppendInt8(struct ArrowBuffer* buffer,
int8_t value);
@@ -526,6 +534,14 @@ static inline void ArrowBitmapReset(struct ArrowBitmap*
bitmap);
/// NANOARROW_OK is returned.
ArrowErrorCode ArrowArrayInit(struct ArrowArray* array, enum ArrowType
storage_type);
+/// \brief Initialize the contents of an ArrowArray from an ArrowSchema
+///
+/// Caller is responsible for calling the array->release callback if
+/// NANOARROW_OK is returned.
+ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array,
+ struct ArrowSchema* schema,
+ struct ArrowError* error);
+
/// \brief Allocate the array->children array
///
/// Includes the memory for each child struct ArrowArray,
@@ -563,11 +579,90 @@ static inline struct ArrowBitmap*
ArrowArrayValidityBitmap(struct ArrowArray* ar
/// array must have been allocated using ArrowArrayInit
static inline struct ArrowBuffer* ArrowArrayBuffer(struct ArrowArray* array,
int64_t i);
+/// \brief Start element-wise appending to an ArrowArray
+///
+/// Initializes any values needed to use ArrowArrayAppend*() functions.
+/// All element-wise appenders append by value and return EINVAL if the exact
value
+/// cannot be represented by the underlying storage type.
+/// array must have been allocated using ArrowArrayInit
+static inline ArrowErrorCode ArrowArrayStartAppending(struct ArrowArray*
array);
+
+/// \brief Reserve space for future appends
+///
+/// For buffer sizes that can be calculated (i.e., not string data buffers or
+/// child array sizes for non-fixed-size arrays), recursively reserve space for
+/// additional elements. This is useful for reducing the number of
reallocations
+/// that occur using the item-wise appenders.
+ArrowErrorCode ArrowArrayReserve(struct ArrowArray* array,
+ int64_t additional_size_elements);
+
+/// \brief Append a null value to an array
+static inline ArrowErrorCode ArrowArrayAppendNull(struct ArrowArray* array,
int64_t n);
+
+/// \brief Append a signed integer value to an array
+///
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g., value
+/// is outside the valid array range).
+static inline ArrowErrorCode ArrowArrayAppendInt(struct ArrowArray* array,
int64_t value);
+
+/// \brief Append an unsigned integer value to an array
+///
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g., value
+/// is outside the valid array range).
+static inline ArrowErrorCode ArrowArrayAppendUInt(struct ArrowArray* array,
+ uint64_t value);
+
+/// \brief Append a double value to an array
+///
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g., value
+/// is outside the valid array range or there is an attempt to append
+/// a non-integer to an array with an integer storage type).
+static inline ArrowErrorCode ArrowArrayAppendDouble(struct ArrowArray* array,
+ double value);
+
+/// \brief Append a string of bytes to an array
+///
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g.,
+/// the underlying array is not a binary, string, large binary, large string,
+/// or fixed-size binary array, or value is the wrong size for a fixed-size
+/// binary array).
+static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
+ struct ArrowBufferView
value);
+
+/// \brief Append a string value to an array
+/// Returns NANOARROW_OK if value can be exactly represented by
+/// the underlying storage type or EINVAL otherwise (e.g.,
+/// the underlying array is not a string or large string array).
+static inline ArrowErrorCode ArrowArrayAppendString(struct ArrowArray* array,
+ struct ArrowStringView
value);
+
+/// \brief Finish a nested array element
+///
+/// Appends a non-null element to the array based on the first child's current
+/// length. Returns NANOARROW_OK if the item was successfully added or EINVAL
+/// if the underlying storage type is not a struct, list, large list, or
fixed-size
+/// list, or if there was an attempt to add a struct or fixed-size list
element where the
+/// length of the child array(s) did not match the expected length.
+static inline ArrowErrorCode ArrowArrayFinishElement(struct ArrowArray* array);
+
+/// \brief Shrink buffer capacity to the size required
+///
+/// Also applies shrinking to any child arrays. array must have been allocated
using
+/// ArrowArrayInit
+static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array);
+
/// \brief Finish building an ArrowArray
///
+/// Flushes any pointers from internal buffers that may have been reallocated
+/// into the array->buffers array and checks the actual size of the buffers
+/// against the expected size based on the final length.
/// array must have been allocated using ArrowArrayInit
-static inline ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
- char shrink_to_fit);
+ArrowErrorCode ArrowArrayFinishBuilding(struct ArrowArray* array,
+ struct ArrowError* error);
/// }@
@@ -593,7 +688,8 @@ void ArrowArrayViewSetLength(struct ArrowArrayView*
array_view, int64_t length);
/// \brief Set buffer sizes and data pointers from an ArrowArray
ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
- struct ArrowArray* array);
+ struct ArrowArray* array,
+ struct ArrowError* error);
/// \brief Reset the contents of an ArrowArrayView and frees resources
void ArrowArrayViewReset(struct ArrowArrayView* array_view);
diff --git a/src/nanoarrow/typedefs_inline.h b/src/nanoarrow/typedefs_inline.h
index 73d839e..0959be5 100644
--- a/src/nanoarrow/typedefs_inline.h
+++ b/src/nanoarrow/typedefs_inline.h
@@ -290,6 +290,9 @@ struct ArrowArrayPrivateData {
// The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown
enum ArrowType storage_type;
+
+ // The buffer arrangement for the storage type
+ struct ArrowLayout layout;
};
struct ArrowArrayView {
diff --git a/src/nanoarrow/utils_inline.h b/src/nanoarrow/utils_inline.h
index 4c61555..3083339 100644
--- a/src/nanoarrow/utils_inline.h
+++ b/src/nanoarrow/utils_inline.h
@@ -18,6 +18,7 @@
#ifndef NANOARROW_UTILS_INLINE_H_INCLUDED
#define NANOARROW_UTILS_INLINE_H_INCLUDED
+#include <errno.h>
#include <string.h>
#include "typedefs_inline.h"
@@ -26,6 +27,21 @@
extern "C" {
#endif
+#define _NANOARROW_CONCAT(x, y) x##y
+#define _NANOARROW_MAKE_NAME(x, y) _NANOARROW_CONCAT(x, y)
+
+#define _NANOARROW_RETURN_NOT_OK_IMPL(NAME, EXPR) \
+ do { \
+ const int NAME = (EXPR); \
+ if (NAME) return NAME; \
+ } while (0)
+
+#define NANOARROW_RETURN_NOT_OK(EXPR) \
+ _NANOARROW_RETURN_NOT_OK_IMPL(_NANOARROW_MAKE_NAME(errno_status_,
__COUNTER__), EXPR)
+
+#define _NANOARROW_CHECK_RANGE(x_, min_, max_) \
+ NANOARROW_RETURN_NOT_OK((x_ >= min_ && x_ <= max_) ? NANOARROW_OK : EINVAL)
+
static inline struct ArrowStringView ArrowCharView(const char* value) {
struct ArrowStringView out;