This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 46776e0 Update dist/ for commit
4744498daf166bc3ae8d34d157a2598002447d4e
46776e0 is described below
commit 46776e0ffeec906f6be8607e856283287d4728a9
Author: GitHub Actions <[email protected]>
AuthorDate: Thu Nov 30 01:29:10 2023 +0000
Update dist/ for commit 4744498daf166bc3ae8d34d157a2598002447d4e
---
dist/nanoarrow.c | 54 +++---
dist/nanoarrow.h | 141 +++++++-------
dist/nanoarrow_device.c | 2 +-
dist/nanoarrow_ipc.c | 4 +-
dist/nanoarrow_testing.hpp | 451 ++++++++++++++++++++++++++++++++++++++++++++-
5 files changed, 551 insertions(+), 101 deletions(-)
diff --git a/dist/nanoarrow.c b/dist/nanoarrow.c
index 264eb96..31e0b86 100644
--- a/dist/nanoarrow.c
+++ b/dist/nanoarrow.c
@@ -716,7 +716,7 @@ ArrowErrorCode ArrowSchemaAllocateDictionary(struct
ArrowSchema* schema) {
return NANOARROW_OK;
}
-ArrowErrorCode ArrowSchemaDeepCopy(struct ArrowSchema* schema,
+ArrowErrorCode ArrowSchemaDeepCopy(const struct ArrowSchema* schema,
struct ArrowSchema* schema_out) {
ArrowSchemaInit(schema_out);
@@ -1336,7 +1336,8 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct
ArrowSchemaView* schema_vie
}
ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view,
- struct ArrowSchema* schema, struct
ArrowError* error) {
+ const struct ArrowSchema* schema,
+ struct ArrowError* error) {
if (schema == NULL) {
ArrowErrorSet(error, "Expected non-NULL schema");
return EINVAL;
@@ -1462,7 +1463,7 @@ static inline void ArrowToStringLogChars(char** out,
int64_t n_chars_last,
}
}
-int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n,
+int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out,
int64_t n,
char recursive) {
if (schema == NULL) {
return snprintf(out, n, "[invalid: pointer is null]");
@@ -1924,7 +1925,7 @@ ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray*
array,
}
ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array,
- struct ArrowArrayView* array_view,
+ const struct ArrowArrayView*
array_view,
struct ArrowError* error) {
NANOARROW_RETURN_NOT_OK_WITH_ERROR(
ArrowArrayInitFromType(array, array_view->storage_type), error);
@@ -1970,7 +1971,7 @@ ArrowErrorCode ArrowArrayInitFromArrayView(struct
ArrowArray* array,
}
ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array,
- struct ArrowSchema* schema,
+ const struct ArrowSchema* schema,
struct ArrowError* error) {
struct ArrowArrayView array_view;
NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromSchema(&array_view, schema,
error));
@@ -2195,7 +2196,7 @@ static void ArrowArrayFlushInternalPointers(struct
ArrowArray* array) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
- for (int64_t i = 0; i < 3; i++) {
+ for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
}
@@ -2293,7 +2294,7 @@ ArrowErrorCode ArrowArrayViewAllocateDictionary(struct
ArrowArrayView* array_vie
}
ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
- struct ArrowSchema* schema,
+ const struct ArrowSchema* schema,
struct ArrowError* error) {
struct ArrowSchemaView schema_view;
int result = ArrowSchemaViewInit(&schema_view, schema, error);
@@ -2379,7 +2380,7 @@ void ArrowArrayViewReset(struct ArrowArrayView*
array_view) {
}
void ArrowArrayViewSetLength(struct ArrowArrayView* array_view, int64_t
length) {
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
switch (array_view->layout.buffer_type[i]) {
@@ -2427,28 +2428,15 @@ void ArrowArrayViewSetLength(struct ArrowArrayView*
array_view, int64_t length)
// This version recursively extracts information from the array and stores it
// in the array view, performing any checks that require the original array.
static int ArrowArrayViewSetArrayInternal(struct ArrowArrayView* array_view,
- struct ArrowArray* array,
+ const struct ArrowArray* array,
struct ArrowError* error) {
- // Check length and offset
- if (array->offset < 0) {
- ArrowErrorSet(error, "Expected array offset >= 0 but found array offset of
%ld",
- (long)array->offset);
- return EINVAL;
- }
-
- if (array->length < 0) {
- ArrowErrorSet(error, "Expected array length >= 0 but found array length of
%ld",
- (long)array->length);
- return EINVAL;
- }
-
array_view->array = array;
array_view->offset = array->offset;
array_view->length = array->length;
array_view->null_count = array->null_count;
int64_t buffers_required = 0;
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
break;
}
@@ -2507,6 +2495,18 @@ static int ArrowArrayViewSetArrayInternal(struct
ArrowArrayView* array_view,
static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
struct ArrowError* error) {
+ if (array_view->length < 0) {
+ ArrowErrorSet(error, "Expected length >= 0 but found length %ld",
+ (long)array_view->length);
+ return EINVAL;
+ }
+
+ if (array_view->offset < 0) {
+ ArrowErrorSet(error, "Expected offset >= 0 but found offset %ld",
+ (long)array_view->offset);
+ return EINVAL;
+ }
+
// Calculate buffer sizes that do not require buffer access. If marked as
// unknown, assign the buffer size; otherwise, validate it.
int64_t offset_plus_length = array_view->offset + array_view->length;
@@ -2767,7 +2767,7 @@ static int ArrowArrayViewValidateDefault(struct
ArrowArrayView* array_view,
}
ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
- struct ArrowArray* array,
+ const struct ArrowArray* array,
struct ArrowError* error) {
// Extract information from the array into the array view
NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array,
error));
@@ -2780,7 +2780,7 @@ ArrowErrorCode ArrowArrayViewSetArray(struct
ArrowArrayView* array_view,
}
ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view,
- struct ArrowArray* array,
+ const struct ArrowArray* array,
struct ArrowError* error) {
// Extract information from the array into the array view
NANOARROW_RETURN_NOT_OK(ArrowArrayViewSetArrayInternal(array_view, array,
error));
@@ -2861,7 +2861,7 @@ static int ArrowAssertInt8In(struct ArrowBufferView view,
const int8_t* values,
static int ArrowArrayViewValidateFull(struct ArrowArrayView* array_view,
struct ArrowError* error) {
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
switch (array_view->layout.buffer_type[i]) {
case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
if (array_view->layout.element_size_bits[i] == 32) {
@@ -3079,7 +3079,7 @@ void ArrowBasicArrayStreamSetArray(struct
ArrowArrayStream* array_stream, int64_
ArrowArrayMove(array, &private_data->arrays[i]);
}
-ArrowErrorCode ArrowBasicArrayStreamValidate(struct ArrowArrayStream*
array_stream,
+ArrowErrorCode ArrowBasicArrayStreamValidate(const struct ArrowArrayStream*
array_stream,
struct ArrowError* error) {
struct BasicArrayStreamPrivate* private_data =
(struct BasicArrayStreamPrivate*)array_stream->private_data;
diff --git a/dist/nanoarrow.h b/dist/nanoarrow.h
index d784633..d0fea4a 100644
--- a/dist/nanoarrow.h
+++ b/dist/nanoarrow.h
@@ -481,6 +481,14 @@ enum ArrowBufferType {
NANOARROW_BUFFER_TYPE_DATA
};
+/// \brief The maximum number of buffers in an ArrowArrayView or ArrowLayout
+/// \ingroup nanoarrow-array-view
+///
+/// All currently supported types have 3 buffers or fewer; however, future
types
+/// may involve a variable number of buffers (e.g., string view). These buffers
+/// will be represented by separate members of the ArrowArrayView or
ArrowLayout.
+#define NANOARROW_MAX_FIXED_BUFFERS 3
+
/// \brief An non-owning view of a string
/// \ingroup nanoarrow-utils
struct ArrowStringView {
@@ -593,13 +601,13 @@ struct ArrowBitmap {
/// the length and offset of the array.
struct ArrowLayout {
/// \brief The function of each buffer
- enum ArrowBufferType buffer_type[3];
+ enum ArrowBufferType buffer_type[NANOARROW_MAX_FIXED_BUFFERS];
/// \brief The data type of each buffer
- enum ArrowType buffer_data_type[3];
+ enum ArrowType buffer_data_type[NANOARROW_MAX_FIXED_BUFFERS];
/// \brief The size of an element each buffer or 0 if this size is variable
or unknown
- int64_t element_size_bits[3];
+ int64_t element_size_bits[NANOARROW_MAX_FIXED_BUFFERS];
/// \brief The number of elements in the child array per element in this
array for a
/// fixed-size list
@@ -618,7 +626,7 @@ struct ArrowLayout {
struct ArrowArrayView {
/// \brief The underlying ArrowArray or NULL if it has not been set or
/// if the buffers in this ArrowArrayView are not backed by an ArrowArray.
- struct ArrowArray* array;
+ const struct ArrowArray* array;
/// \brief The number of elements from the physical start of the buffers.
int64_t offset;
@@ -641,7 +649,7 @@ struct ArrowArrayView {
struct ArrowLayout layout;
/// \brief This Array's buffers as ArrowBufferView objects
- struct ArrowBufferView buffer_views[3];
+ struct ArrowBufferView buffer_views[NANOARROW_MAX_FIXED_BUFFERS];
/// \brief The number of children of this view
int64_t n_children;
@@ -669,12 +677,12 @@ struct ArrowArrayPrivateData {
struct ArrowBitmap bitmap;
// Holder for additional buffers as required
- struct ArrowBuffer buffers[2];
+ struct ArrowBuffer buffers[NANOARROW_MAX_FIXED_BUFFERS - 1];
// The array of pointers to buffers. This must be updated after a sequence
// of appends to synchronize its values with the actual buffer addresses
// (which may have ben reallocated uring that time)
- const void* buffer_data[3];
+ const void* buffer_data[NANOARROW_MAX_FIXED_BUFFERS];
// The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown
enum ArrowType storage_type;
@@ -760,19 +768,20 @@ static inline void ArrowDecimalInit(struct ArrowDecimal*
decimal, int32_t bitwid
/// This does not check if the decimal's precision sufficiently small to fit
/// within the signed 64-bit integer range (A precision less than or equal
/// to 18 is sufficiently small).
-static inline int64_t ArrowDecimalGetIntUnsafe(struct ArrowDecimal* decimal) {
+static inline int64_t ArrowDecimalGetIntUnsafe(const struct ArrowDecimal*
decimal) {
return (int64_t)decimal->words[decimal->low_word_index];
}
/// \brief Copy the bytes of this decimal into a sufficiently large buffer
/// \ingroup nanoarrow-utils
-static inline void ArrowDecimalGetBytes(struct ArrowDecimal* decimal, uint8_t*
out) {
+static inline void ArrowDecimalGetBytes(const struct ArrowDecimal* decimal,
+ uint8_t* out) {
memcpy(out, decimal->words, decimal->n_words * sizeof(uint64_t));
}
/// \brief Returns 1 if the value represented by decimal is >= 0 or -1
otherwise
/// \ingroup nanoarrow-utils
-static inline int64_t ArrowDecimalSign(struct ArrowDecimal* decimal) {
+static inline int64_t ArrowDecimalSign(const struct ArrowDecimal* decimal) {
return 1 | ((int64_t)(decimal->words[decimal->high_word_index]) >> 63);
}
@@ -1078,7 +1087,7 @@ ArrowErrorCode ArrowSchemaInitFromType(struct
ArrowSchema* schema, enum ArrowTyp
/// and returns the number of characters required for the output if
/// n were sufficiently large. If recursive is non-zero, the result will
/// also include children.
-int64_t ArrowSchemaToString(struct ArrowSchema* schema, char* out, int64_t n,
+int64_t ArrowSchemaToString(const struct ArrowSchema* schema, char* out,
int64_t n,
char recursive);
/// \brief Set the format field of a schema from an ArrowType
@@ -1140,7 +1149,7 @@ ArrowErrorCode ArrowSchemaSetTypeUnion(struct
ArrowSchema* schema, enum ArrowTyp
/// \brief Make a (recursive) copy of a schema
///
/// Allocates and copies fields of schema into schema_out.
-ArrowErrorCode ArrowSchemaDeepCopy(struct ArrowSchema* schema,
+ArrowErrorCode ArrowSchemaDeepCopy(const struct ArrowSchema* schema,
struct ArrowSchema* schema_out);
/// \brief Copy format into schema->format
@@ -1255,7 +1264,7 @@ ArrowErrorCode ArrowMetadataBuilderRemove(struct
ArrowBuffer* buffer,
/// compatibility.
struct ArrowSchemaView {
/// \brief A pointer to the schema represented by this view
- struct ArrowSchema* schema;
+ const struct ArrowSchema* schema;
/// \brief The data type represented by the schema
///
@@ -1338,7 +1347,8 @@ struct ArrowSchemaView {
/// \brief Initialize an ArrowSchemaView
ArrowErrorCode ArrowSchemaViewInit(struct ArrowSchemaView* schema_view,
- struct ArrowSchema* schema, struct
ArrowError* error);
+ const struct ArrowSchema* schema,
+ struct ArrowError* error);
/// @}
@@ -1567,7 +1577,7 @@ ArrowErrorCode ArrowArrayInitFromType(struct ArrowArray*
array,
/// Caller is responsible for calling the array->release callback if
/// NANOARROW_OK is returned.
ArrowErrorCode ArrowArrayInitFromSchema(struct ArrowArray* array,
- struct ArrowSchema* schema,
+ const struct ArrowSchema* schema,
struct ArrowError* error);
/// \brief Initialize the contents of an ArrowArray from an ArrowArrayView
@@ -1575,7 +1585,7 @@ ArrowErrorCode ArrowArrayInitFromSchema(struct
ArrowArray* array,
/// Caller is responsible for calling the array->release callback if
/// NANOARROW_OK is returned.
ArrowErrorCode ArrowArrayInitFromArrayView(struct ArrowArray* array,
- struct ArrowArrayView* array_view,
+ const struct ArrowArrayView*
array_view,
struct ArrowError* error);
/// \brief Allocate the array->children array
@@ -1688,14 +1698,14 @@ static inline ArrowErrorCode
ArrowArrayAppendString(struct ArrowArray* array,
/// Returns NANOARROW_OK if value can be exactly represented by
/// the underlying storage type or EINVAL otherwise.
static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray* array,
- struct ArrowInterval*
value);
+ const struct
ArrowInterval* value);
/// \brief Append a decimal value to an array
///
/// Returns NANOARROW_OK if array is a decimal array with the appropriate
/// bitwidth or EINVAL otherwise.
static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array,
- struct ArrowDecimal*
value);
+ const struct
ArrowDecimal* value);
/// \brief Finish a nested array element
///
@@ -1763,7 +1773,7 @@ static inline void ArrowArrayViewMove(struct
ArrowArrayView* src,
/// \brief Initialize the contents of an ArrowArrayView from an ArrowSchema
ArrowErrorCode ArrowArrayViewInitFromSchema(struct ArrowArrayView* array_view,
- struct ArrowSchema* schema,
+ const struct ArrowSchema* schema,
struct ArrowError* error);
/// \brief Allocate the array_view->children array
@@ -1780,12 +1790,13 @@ void ArrowArrayViewSetLength(struct ArrowArrayView*
array_view, int64_t length);
/// \brief Set buffer sizes and data pointers from an ArrowArray
ArrowErrorCode ArrowArrayViewSetArray(struct ArrowArrayView* array_view,
- struct ArrowArray* array, struct
ArrowError* error);
+ const struct ArrowArray* array,
+ struct ArrowError* error);
/// \brief Set buffer sizes and data pointers from an ArrowArray except for
those
/// that require dereferencing buffer content.
ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct ArrowArrayView* array_view,
- struct ArrowArray* array,
+ const struct ArrowArray* array,
struct ArrowError* error);
/// \brief Performs checks on the content of an ArrowArrayView
@@ -1804,59 +1815,60 @@ ArrowErrorCode ArrowArrayViewValidate(struct
ArrowArrayView* array_view,
void ArrowArrayViewReset(struct ArrowArrayView* array_view);
/// \brief Check for a null element in an ArrowArrayView
-static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view,
int64_t i);
+static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView*
array_view,
+ int64_t i);
/// \brief Get the type id of a union array element
-static inline int8_t ArrowArrayViewUnionTypeId(struct ArrowArrayView*
array_view,
+static inline int8_t ArrowArrayViewUnionTypeId(const struct ArrowArrayView*
array_view,
int64_t i);
/// \brief Get the child index of a union array element
-static inline int8_t ArrowArrayViewUnionChildIndex(struct ArrowArrayView*
array_view,
- int64_t i);
+static inline int8_t ArrowArrayViewUnionChildIndex(
+ const struct ArrowArrayView* array_view, int64_t i);
/// \brief Get the index to use into the relevant union child array
-static inline int64_t ArrowArrayViewUnionChildOffset(struct ArrowArrayView*
array_view,
- int64_t i);
+static inline int64_t ArrowArrayViewUnionChildOffset(
+ const struct ArrowArrayView* array_view, int64_t i);
/// \brief Get an element in an ArrowArrayView as an integer
///
/// This function does not check for null values, that values are actually
integers, or
/// that values are within a valid range for an int64.
-static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView*
array_view,
+static inline int64_t ArrowArrayViewGetIntUnsafe(const struct ArrowArrayView*
array_view,
int64_t i);
/// \brief Get an element in an ArrowArrayView as an unsigned integer
///
/// This function does not check for null values, that values are actually
integers, or
/// that values are within a valid range for a uint64.
-static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView*
array_view,
- int64_t i);
+static inline uint64_t ArrowArrayViewGetUIntUnsafe(
+ const struct ArrowArrayView* array_view, int64_t i);
/// \brief Get an element in an ArrowArrayView as a double
///
/// This function does not check for null values, or
/// that values are within a valid range for a double.
-static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView*
array_view,
- int64_t i);
+static inline double ArrowArrayViewGetDoubleUnsafe(
+ const struct ArrowArrayView* array_view, int64_t i);
/// \brief Get an element in an ArrowArrayView as an ArrowStringView
///
/// This function does not check for null values.
static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
- struct ArrowArrayView* array_view, int64_t i);
+ const struct ArrowArrayView* array_view, int64_t i);
/// \brief Get an element in an ArrowArrayView as an ArrowBufferView
///
/// This function does not check for null values.
static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
- struct ArrowArrayView* array_view, int64_t i);
+ const struct ArrowArrayView* array_view, int64_t i);
/// \brief Get an element in an ArrowArrayView as an ArrowDecimal
///
/// This function does not check for null values. The out parameter must
/// be initialized with ArrowDecimalInit() with the proper parameters for this
/// type before calling this for the first time.
-static inline void ArrowArrayViewGetDecimalUnsafe(struct ArrowArrayView*
array_view,
+static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView*
array_view,
int64_t i, struct
ArrowDecimal* out);
/// @}
@@ -1893,7 +1905,7 @@ void ArrowBasicArrayStreamSetArray(struct
ArrowArrayStream* array_stream, int64_
/// array_stream must have been initialized with ArrowBasicArrayStreamInit().
/// This function uses ArrowArrayStreamInitFromSchema() and
ArrowArrayStreamSetArray()
/// to validate the contents of the arrays.
-ArrowErrorCode ArrowBasicArrayStreamValidate(struct ArrowArrayStream*
array_stream,
+ArrowErrorCode ArrowBasicArrayStreamValidate(const struct ArrowArrayStream*
array_stream,
struct ArrowError* error);
/// @}
@@ -2648,7 +2660,7 @@ static inline ArrowErrorCode
ArrowArrayStartAppending(struct ArrowArray* array)
}
// Initialize any data offset buffer with a single zero
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
if (private_data->layout.buffer_type[i] ==
NANOARROW_BUFFER_TYPE_DATA_OFFSET &&
private_data->layout.element_size_bits[i] == 64) {
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendInt64(ArrowArrayBuffer(array,
i), 0));
@@ -2671,7 +2683,7 @@ static inline ArrowErrorCode
ArrowArrayStartAppending(struct ArrowArray* array)
}
static inline ArrowErrorCode ArrowArrayShrinkToFit(struct ArrowArray* array) {
- for (int64_t i = 0; i < 3; i++) {
+ for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
struct ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
NANOARROW_RETURN_NOT_OK(ArrowBufferResize(buffer, buffer->size_bytes, 1));
}
@@ -2786,7 +2798,7 @@ static inline ArrowErrorCode
_ArrowArrayAppendEmptyInternal(struct ArrowArray* a
struct ArrowBuffer* buffer;
int64_t size_bytes;
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
buffer = ArrowArrayBuffer(array, i);
size_bytes = private_data->layout.element_size_bits[i] / 8;
@@ -3038,7 +3050,7 @@ static inline ArrowErrorCode
ArrowArrayAppendString(struct ArrowArray* array,
}
static inline ArrowErrorCode ArrowArrayAppendInterval(struct ArrowArray* array,
- struct ArrowInterval*
value) {
+ const struct
ArrowInterval* value) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
@@ -3081,7 +3093,7 @@ static inline ArrowErrorCode
ArrowArrayAppendInterval(struct ArrowArray* array,
}
static inline ArrowErrorCode ArrowArrayAppendDecimal(struct ArrowArray* array,
- struct ArrowDecimal*
value) {
+ const struct
ArrowDecimal* value) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
@@ -3213,7 +3225,8 @@ static inline void ArrowArrayViewMove(struct
ArrowArrayView* src,
ArrowArrayViewInitFromType(src, NANOARROW_TYPE_UNINITIALIZED);
}
-static inline int8_t ArrowArrayViewIsNull(struct ArrowArrayView* array_view,
int64_t i) {
+static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView*
array_view,
+ int64_t i) {
const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8;
i += array_view->offset;
switch (array_view->storage_type) {
@@ -3228,7 +3241,7 @@ static inline int8_t ArrowArrayViewIsNull(struct
ArrowArrayView* array_view, int
}
}
-static inline int8_t ArrowArrayViewUnionTypeId(struct ArrowArrayView*
array_view,
+static inline int8_t ArrowArrayViewUnionTypeId(const struct ArrowArrayView*
array_view,
int64_t i) {
switch (array_view->storage_type) {
case NANOARROW_TYPE_DENSE_UNION:
@@ -3239,8 +3252,8 @@ static inline int8_t ArrowArrayViewUnionTypeId(struct
ArrowArrayView* array_view
}
}
-static inline int8_t ArrowArrayViewUnionChildIndex(struct ArrowArrayView*
array_view,
- int64_t i) {
+static inline int8_t ArrowArrayViewUnionChildIndex(
+ const struct ArrowArrayView* array_view, int64_t i) {
int8_t type_id = ArrowArrayViewUnionTypeId(array_view, i);
if (array_view->union_type_id_map == NULL) {
return type_id;
@@ -3249,8 +3262,8 @@ static inline int8_t ArrowArrayViewUnionChildIndex(struct
ArrowArrayView* array_
}
}
-static inline int64_t ArrowArrayViewUnionChildOffset(struct ArrowArrayView*
array_view,
- int64_t i) {
+static inline int64_t ArrowArrayViewUnionChildOffset(
+ const struct ArrowArrayView* array_view, int64_t i) {
switch (array_view->storage_type) {
case NANOARROW_TYPE_DENSE_UNION:
return array_view->buffer_views[1].data.as_int32[i];
@@ -3261,8 +3274,8 @@ static inline int64_t
ArrowArrayViewUnionChildOffset(struct ArrowArrayView* arra
}
}
-static inline int64_t ArrowArrayViewListChildOffset(struct ArrowArrayView*
array_view,
- int64_t i) {
+static inline int64_t ArrowArrayViewListChildOffset(
+ const struct ArrowArrayView* array_view, int64_t i) {
switch (array_view->storage_type) {
case NANOARROW_TYPE_LIST:
return array_view->buffer_views[1].data.as_int32[i];
@@ -3273,9 +3286,9 @@ static inline int64_t
ArrowArrayViewListChildOffset(struct ArrowArrayView* array
}
}
-static inline int64_t ArrowArrayViewGetIntUnsafe(struct ArrowArrayView*
array_view,
+static inline int64_t ArrowArrayViewGetIntUnsafe(const struct ArrowArrayView*
array_view,
int64_t i) {
- struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+ const struct ArrowBufferView* data_view = &array_view->buffer_views[1];
i += array_view->offset;
switch (array_view->storage_type) {
case NANOARROW_TYPE_INT64:
@@ -3305,10 +3318,10 @@ static inline int64_t ArrowArrayViewGetIntUnsafe(struct
ArrowArrayView* array_vi
}
}
-static inline uint64_t ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView*
array_view,
- int64_t i) {
+static inline uint64_t ArrowArrayViewGetUIntUnsafe(
+ const struct ArrowArrayView* array_view, int64_t i) {
i += array_view->offset;
- struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+ const struct ArrowBufferView* data_view = &array_view->buffer_views[1];
switch (array_view->storage_type) {
case NANOARROW_TYPE_INT64:
return data_view->data.as_int64[i];
@@ -3337,10 +3350,10 @@ static inline uint64_t
ArrowArrayViewGetUIntUnsafe(struct ArrowArrayView* array_
}
}
-static inline double ArrowArrayViewGetDoubleUnsafe(struct ArrowArrayView*
array_view,
- int64_t i) {
+static inline double ArrowArrayViewGetDoubleUnsafe(
+ const struct ArrowArrayView* array_view, int64_t i) {
i += array_view->offset;
- struct ArrowBufferView* data_view = &array_view->buffer_views[1];
+ const struct ArrowBufferView* data_view = &array_view->buffer_views[1];
switch (array_view->storage_type) {
case NANOARROW_TYPE_INT64:
return (double)data_view->data.as_int64[i];
@@ -3370,9 +3383,9 @@ static inline double ArrowArrayViewGetDoubleUnsafe(struct
ArrowArrayView* array_
}
static inline struct ArrowStringView ArrowArrayViewGetStringUnsafe(
- struct ArrowArrayView* array_view, int64_t i) {
+ const struct ArrowArrayView* array_view, int64_t i) {
i += array_view->offset;
- struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
+ const struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
const char* data_view = array_view->buffer_views[2].data.as_char;
struct ArrowStringView view;
@@ -3403,9 +3416,9 @@ static inline struct ArrowStringView
ArrowArrayViewGetStringUnsafe(
}
static inline struct ArrowBufferView ArrowArrayViewGetBytesUnsafe(
- struct ArrowArrayView* array_view, int64_t i) {
+ const struct ArrowArrayView* array_view, int64_t i) {
i += array_view->offset;
- struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
+ const struct ArrowBufferView* offsets_view = &array_view->buffer_views[1];
const uint8_t* data_view = array_view->buffer_views[2].data.as_uint8;
struct ArrowBufferView view;
@@ -3436,8 +3449,8 @@ static inline struct ArrowBufferView
ArrowArrayViewGetBytesUnsafe(
return view;
}
-static inline void ArrowArrayViewGetIntervalUnsafe(struct ArrowArrayView*
array_view,
- int64_t i, struct
ArrowInterval* out) {
+static inline void ArrowArrayViewGetIntervalUnsafe(
+ const struct ArrowArrayView* array_view, int64_t i, struct ArrowInterval*
out) {
const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8;
switch (array_view->storage_type) {
case NANOARROW_TYPE_INTERVAL_MONTHS: {
@@ -3463,7 +3476,7 @@ static inline void ArrowArrayViewGetIntervalUnsafe(struct
ArrowArrayView* array_
}
}
-static inline void ArrowArrayViewGetDecimalUnsafe(struct ArrowArrayView*
array_view,
+static inline void ArrowArrayViewGetDecimalUnsafe(const struct ArrowArrayView*
array_view,
int64_t i, struct
ArrowDecimal* out) {
i += array_view->offset;
const uint8_t* data_view = array_view->buffer_views[1].data.as_uint8;
diff --git a/dist/nanoarrow_device.c b/dist/nanoarrow_device.c
index 4be7a93..c4df3d1 100644
--- a/dist/nanoarrow_device.c
+++ b/dist/nanoarrow_device.c
@@ -408,7 +408,7 @@ static ArrowErrorCode
ArrowDeviceArrayViewCopyInternal(struct ArrowDevice* devic
dst->offset = src->offset;
dst->null_count = src->null_count;
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
if (src->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
break;
}
diff --git a/dist/nanoarrow_ipc.c b/dist/nanoarrow_ipc.c
index 437b61a..1e8b47c 100644
--- a/dist/nanoarrow_ipc.c
+++ b/dist/nanoarrow_ipc.c
@@ -21426,7 +21426,7 @@ static void ArrowIpcDecoderInitFields(struct
ArrowIpcField* fields,
field->array = array;
field->buffer_offset = *n_buffers;
- for (int i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
*n_buffers += array_view->layout.buffer_type[i] !=
NANOARROW_BUFFER_TYPE_NONE;
}
@@ -21795,7 +21795,7 @@ static int ArrowIpcDecoderWalkSetArrayView(struct
ArrowIpcArraySetter* setter,
array_view->null_count = ns(FieldNode_null_count(field));
setter->field_i += 1;
- for (int64_t i = 0; i < 3; i++) {
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
break;
}
diff --git a/dist/nanoarrow_testing.hpp b/dist/nanoarrow_testing.hpp
index 454a6da..103f22e 100644
--- a/dist/nanoarrow_testing.hpp
+++ b/dist/nanoarrow_testing.hpp
@@ -16,6 +16,7 @@
// under the License.
#include <iostream>
+#include <limits>
#include <sstream>
#include <string>
@@ -439,7 +440,7 @@ class TestingJSONWriter {
}
} else {
// No need to quote smaller ints (i.e., 123456)
- out << values[0];
+ out << static_cast<int64_t>(values[0]);
for (int64_t i = 1; i < n_values; i++) {
out << ", " << static_cast<int64_t>(values[i]);
}
@@ -621,36 +622,76 @@ class TestingJSONReader {
using json = nlohmann::json;
public:
- ArrowErrorCode ReadSchema(const std::string& value, ArrowSchema* out,
+ /// \brief Read JSON representing a Schema
+ ///
+ /// Reads a JSON object in the form `{"fields": [...], "metadata": [...]}`,
+ /// propagating `out` on success.
+ ArrowErrorCode ReadSchema(const std::string& schema_json, ArrowSchema* out,
ArrowError* error = nullptr) {
try {
- auto obj = json::parse(value);
+ auto obj = json::parse(schema_json);
nanoarrow::UniqueSchema schema;
NANOARROW_RETURN_NOT_OK(SetSchema(schema.get(), obj, error));
ArrowSchemaMove(schema.get(), out);
return NANOARROW_OK;
- } catch (std::exception& e) {
+ } catch (json::exception& e) {
ArrowErrorSet(error, "Exception in TestingJSONReader::ReadSchema(): %s",
e.what());
return EINVAL;
}
}
- ArrowErrorCode ReadField(const std::string& value, ArrowSchema* out,
+ /// \brief Read JSON representing a Field
+ ///
+ /// Read a JSON object in the form `{"name" : "col", "type": {...}, ...}`,
+ /// propagating `out` on success.
+ ArrowErrorCode ReadField(const std::string& field_json, ArrowSchema* out,
ArrowError* error = nullptr) {
try {
- auto obj = json::parse(value);
+ auto obj = json::parse(field_json);
nanoarrow::UniqueSchema schema;
NANOARROW_RETURN_NOT_OK(SetField(schema.get(), obj, error));
ArrowSchemaMove(schema.get(), out);
return NANOARROW_OK;
- } catch (std::exception& e) {
+ } catch (json::exception& e) {
ArrowErrorSet(error, "Exception in TestingJSONReader::ReadField(): %s",
e.what());
return EINVAL;
}
}
+ /// \brief Read JSON representing a Column
+ ///
+ /// Read a JSON object in the form
+ /// `{"name": "col", "count": 123, "VALIDITY": [...], ...}`, propagating
+ /// `out` on success.
+ ArrowErrorCode ReadColumn(const std::string& column_json, const ArrowSchema*
schema,
+ ArrowArray* out, ArrowError* error = nullptr) {
+ try {
+ auto obj = json::parse(column_json);
+
+ // ArrowArrayView to enable validation
+ nanoarrow::UniqueArrayView array_view;
+ NANOARROW_RETURN_NOT_OK(ArrowArrayViewInitFromSchema(
+ array_view.get(), const_cast<ArrowSchema*>(schema), error));
+
+ // ArrowArray to hold memory
+ nanoarrow::UniqueArray array;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayInitFromSchema(array.get(),
const_cast<ArrowSchema*>(schema), error));
+
+ // Parse the JSON into the array
+ NANOARROW_RETURN_NOT_OK(SetArrayColumn(obj, array_view.get(),
array.get(), error));
+
+ // Return the result
+ ArrowArrayMove(array.get(), out);
+ return NANOARROW_OK;
+ } catch (json::exception& e) {
+ ArrowErrorSet(error, "Exception in TestingJSONReader::ReadColumn(): %s",
e.what());
+ return EINVAL;
+ }
+ }
+
private:
ArrowErrorCode SetSchema(ArrowSchema* schema, const json& value, ArrowError*
error) {
NANOARROW_RETURN_NOT_OK(
@@ -1053,6 +1094,402 @@ class TestingJSONReader {
return NANOARROW_OK;
}
+ ArrowErrorCode SetArrayColumn(const json& value, ArrowArrayView* array_view,
+ ArrowArray* array, ArrowError* error,
+ const std::string& parent_error_prefix = "") {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.is_object(), error, "Expected Column to be a JSON
object"));
+
+ // Check + resolve name early to generate better error messages
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("name"), error, "Column missing key 'name'"));
+
+ const auto& name = value["name"];
+ NANOARROW_RETURN_NOT_OK(Check(name.is_null() || name.is_string(), error,
+ "Column name must be string or null"));
+
+ std::string error_prefix;
+ if (name.is_string()) {
+ error_prefix = parent_error_prefix + "-> Column '" +
name.get<std::string>() + "' ";
+ } else {
+ error_prefix = parent_error_prefix + "-> Column <name is null> ";
+ }
+
+ // Check, resolve, and recurse children
+ NANOARROW_RETURN_NOT_OK(
+ Check(array_view->n_children == 0 || value.contains("children"), error,
+ error_prefix + "missing key children"));
+
+ if (value.contains("children")) {
+ const auto& children = value["children"];
+ NANOARROW_RETURN_NOT_OK(
+ Check(children.is_array(), error, error_prefix + "children must be
array"));
+ NANOARROW_RETURN_NOT_OK(Check(children.size() == array_view->n_children,
error,
+ error_prefix + "children has incorrect
size"));
+
+ for (int64_t i = 0; i < array_view->n_children; i++) {
+ NANOARROW_RETURN_NOT_OK(SetArrayColumn(children[i],
array_view->children[i],
+ array->children[i], error,
error_prefix));
+ }
+ }
+
+ // Build buffers
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
+ NANOARROW_RETURN_NOT_OK(
+ PrefixError(SetArrayColumnBuffers(value, array_view, array, i,
error), error,
+ error_prefix));
+ }
+
+ // Check + resolve count
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("count"), error, error_prefix + "missing key
'count'"));
+ const auto& count = value["count"];
+ NANOARROW_RETURN_NOT_OK(
+ Check(count.is_number_integer(), error, error_prefix + "count must be
integer"));
+ array_view->length = count.get<int64_t>();
+
+ // Set ArrayView buffer views. This is because ArrowArrayInitFromSchema()
doesn't
+ // support custom type ids for unions but the ArrayView does (otherwise
+ // ArrowArrayFinishBuilding() would work).
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
+ ArrowBuffer* buffer = ArrowArrayBuffer(array, i);
+ ArrowBufferView* buffer_view = array_view->buffer_views + i;
+ buffer_view->data.as_uint8 = buffer->data;
+ buffer_view->size_bytes = buffer->size_bytes;
+ }
+
+ // Validate the array view
+ NANOARROW_RETURN_NOT_OK(PrefixError(
+ ArrowArrayViewValidate(array_view, NANOARROW_VALIDATION_LEVEL_FULL,
error), error,
+ error_prefix + "failed to validate: "));
+
+ // Flush length and buffer pointers to the Array
+ array->length = array_view->length;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowArrayFinishBuilding(array, NANOARROW_VALIDATION_LEVEL_NONE,
nullptr), error);
+
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode SetArrayColumnBuffers(const json& value, ArrowArrayView*
array_view,
+ ArrowArray* array, int buffer_i,
+ ArrowError* error) {
+ ArrowBuffer* buffer = ArrowArrayBuffer(array, buffer_i);
+
+ switch (array_view->layout.buffer_type[buffer_i]) {
+ case NANOARROW_BUFFER_TYPE_VALIDITY: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("VALIDITY"), error, "missing key
'VALIDITY'"));
+ const auto& validity = value["VALIDITY"];
+ NANOARROW_RETURN_NOT_OK(
+ SetBufferBitmap(validity, ArrowArrayValidityBitmap(array), error));
+ break;
+ }
+ case NANOARROW_BUFFER_TYPE_TYPE_ID: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("TYPE_ID"), error, "missing key 'TYPE_ID'"));
+ const auto& type_id = value["TYPE_ID"];
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int8_t>(type_id, buffer, error));
+ break;
+ }
+ case NANOARROW_BUFFER_TYPE_UNION_OFFSET: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("OFFSET"), error, "missing key 'OFFSET'"));
+ const auto& offset = value["OFFSET"];
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int32_t>(offset, buffer, error));
+ break;
+ }
+ case NANOARROW_BUFFER_TYPE_DATA_OFFSET: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("OFFSET"), error, "missing key 'OFFSET'"));
+ const auto& offset = value["OFFSET"];
+
+ if (array_view->layout.element_size_bits[buffer_i] == 32) {
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int32_t>(offset, buffer,
error));
+ } else {
+ NANOARROW_RETURN_NOT_OK(SetBufferInt<int64_t>(offset, buffer,
error));
+ }
+ break;
+ }
+
+ case NANOARROW_BUFFER_TYPE_DATA: {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.contains("DATA"), error, "missing key 'DATA'"));
+ const auto& data = value["DATA"];
+
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_BOOL: {
+ nanoarrow::UniqueBitmap bitmap;
+ NANOARROW_RETURN_NOT_OK(SetBufferBitmap(data, bitmap.get(),
error));
+ ArrowBufferMove(&bitmap->buffer, buffer);
+ return NANOARROW_OK;
+ }
+ case NANOARROW_TYPE_INT8:
+ return SetBufferInt<int8_t>(data, buffer, error);
+ case NANOARROW_TYPE_UINT8:
+ return SetBufferInt<uint8_t>(data, buffer, error);
+ case NANOARROW_TYPE_INT16:
+ return SetBufferInt<int16_t>(data, buffer, error);
+ case NANOARROW_TYPE_UINT16:
+ return SetBufferInt<uint16_t>(data, buffer, error);
+ case NANOARROW_TYPE_INT32:
+ return SetBufferInt<int32_t>(data, buffer, error);
+ case NANOARROW_TYPE_UINT32:
+ return SetBufferInt<uint32_t>(data, buffer, error);
+ case NANOARROW_TYPE_INT64:
+ return SetBufferInt<int64_t>(data, buffer, error);
+ case NANOARROW_TYPE_UINT64:
+ return SetBufferInt<uint64_t, uint64_t>(data, buffer, error);
+
+ case NANOARROW_TYPE_FLOAT:
+ return SetBufferFloatingPoint<float>(data, buffer, error);
+ case NANOARROW_TYPE_DOUBLE:
+ return SetBufferFloatingPoint<double>(data, buffer, error);
+
+ case NANOARROW_TYPE_STRING:
+ return SetBufferString<int32_t>(data, ArrowArrayBuffer(array,
buffer_i - 1),
+ buffer, error);
+ case NANOARROW_TYPE_LARGE_STRING:
+ return SetBufferString<int64_t>(data, ArrowArrayBuffer(array,
buffer_i - 1),
+ buffer, error);
+ case NANOARROW_TYPE_BINARY:
+ return SetBufferBinary<int32_t>(data, ArrowArrayBuffer(array,
buffer_i - 1),
+ buffer, error);
+ case NANOARROW_TYPE_LARGE_BINARY:
+ return SetBufferBinary<int64_t>(data, ArrowArrayBuffer(array,
buffer_i - 1),
+ buffer, error);
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ return SetBufferFixedSizeBinary(
+ data, buffer, array_view->layout.element_size_bits[buffer_i] /
8, error);
+
+ default:
+ ArrowErrorSet(error, "storage type %s DATA buffer not supported",
+ ArrowTypeString(array_view->storage_type));
+ return ENOTSUP;
+ }
+ break;
+ }
+ case NANOARROW_BUFFER_TYPE_NONE:
+ break;
+ }
+
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode SetBufferBitmap(const json& value, ArrowBitmap* bitmap,
+ ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.is_array(), error, "bitmap buffer must be array"));
+
+ for (const auto& item : value) {
+ // Some example files write bitmaps as [true, false, true] but the
documentation
+ // says [1, 0, 1]. Accept both for simplicity.
+ NANOARROW_RETURN_NOT_OK(Check(item.is_boolean() ||
item.is_number_integer(), error,
+ "bitmap item must be bool or integer"));
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBitmapAppend(bitmap,
item.get<int>(), 1),
+ error);
+ }
+
+ return NANOARROW_OK;
+ }
+
+ template <typename T, typename BiggerT = int64_t>
+ ArrowErrorCode SetBufferInt(const json& value, ArrowBuffer* buffer,
ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK(Check(value.is_array(), error, "int buffer must be
array"));
+
+ for (const auto& item : value) {
+ // NANOARROW_RETURN_NOT_OK() interacts poorly with multiple template args
+ ArrowErrorCode result = SetBufferIntItem<T, BiggerT>(item, buffer,
error);
+ NANOARROW_RETURN_NOT_OK(result);
+ }
+
+ return NANOARROW_OK;
+ }
+
+ template <typename T, typename BiggerT = int64_t>
+ ArrowErrorCode SetBufferIntItem(const json& item, ArrowBuffer* buffer,
+ ArrowError* error) {
+ if (item.is_string()) {
+ try {
+ // The JSON parser here can handle up to 2^64 - 1
+ auto item_int = json::parse(item.get<std::string>());
+ return SetBufferIntItem<T, BiggerT>(item_int, buffer, error);
+ } catch (json::parse_error& e) {
+ ArrowErrorSet(error,
+ "integer buffer item encoded as string must parse as
integer: %s",
+ item.dump().c_str());
+ return EINVAL;
+ }
+ }
+
+ NANOARROW_RETURN_NOT_OK(
+ Check(item.is_number_integer(), error,
+ "integer buffer item must be integer number or string"));
+ NANOARROW_RETURN_NOT_OK(
+ Check(std::numeric_limits<T>::is_signed || item.is_number_unsigned(),
error,
+ "expected unsigned integer buffer item but found signed integer
'" +
+ item.dump() + "'"));
+
+ auto item_int = item.get<BiggerT>();
+
+ NANOARROW_RETURN_NOT_OK(
+ Check(item_int >= std::numeric_limits<T>::lowest() &&
+ item_int <= std::numeric_limits<T>::max(),
+ error, "integer buffer item '" + item.dump() + "' outside type
limits"));
+
+ T buffer_value = item_int;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowBufferAppend(buffer, &buffer_value, sizeof(T)), error);
+
+ return NANOARROW_OK;
+ }
+
+ template <typename T>
+ ArrowErrorCode SetBufferFloatingPoint(const json& value, ArrowBuffer* buffer,
+ ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.is_array(), error, "floatingpoint buffer must be array"));
+
+ for (const auto& item : value) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(item.is_number(), error, "floatingpoint buffer item must be
number"));
+ double item_dbl = item.get<double>();
+
+ NANOARROW_RETURN_NOT_OK(Check(
+ item_dbl >= std::numeric_limits<T>::lowest() &&
+ item_dbl <= std::numeric_limits<T>::max(),
+ error, "floatingpoint buffer item '" + item.dump() + "' outside type
limits"));
+
+ T buffer_value = item_dbl;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowBufferAppend(buffer, &buffer_value, sizeof(T)), error);
+ }
+
+ return NANOARROW_OK;
+ }
+
+ template <typename T>
+ ArrowErrorCode SetBufferString(const json& value, ArrowBuffer* offsets,
+ ArrowBuffer* data, ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.is_array(), error, "utf8 data buffer must be array"));
+
+ // Check offsets against values
+ const T* expected_offset = reinterpret_cast<const T*>(offsets->data);
+ NANOARROW_RETURN_NOT_OK(Check(
+ offsets->size_bytes == ((value.size() + 1) * sizeof(T)), error,
+ "Expected offset buffer with " + std::to_string(value.size()) + "
elements"));
+ NANOARROW_RETURN_NOT_OK(
+ Check(*expected_offset++ == 0, error, "first offset must be zero"));
+
+ int64_t last_offset = 0;
+
+ for (const auto& item : value) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(item.is_string(), error, "utf8 data buffer item must be
string"));
+ auto item_str = item.get<std::string>();
+
+ // Append data
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowBufferAppend(data, reinterpret_cast<const
uint8_t*>(item_str.data()),
+ item_str.size()),
+ error);
+
+ // Check offset
+ last_offset += item_str.size();
+ NANOARROW_RETURN_NOT_OK(Check(*expected_offset++ == last_offset, error,
+ "Expected offset value " +
+ std::to_string(last_offset) +
+ " at utf8 data buffer item " +
item.dump()));
+ }
+
+ return NANOARROW_OK;
+ }
+
+ template <typename T>
+ ArrowErrorCode SetBufferBinary(const json& value, ArrowBuffer* offsets,
+ ArrowBuffer* data, ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.is_array(), error, "binary data buffer must be array"));
+
+ // Check offsets against values if not fixed size
+ const T* expected_offset = reinterpret_cast<const T*>(offsets->data);
+ NANOARROW_RETURN_NOT_OK(Check(
+ offsets->size_bytes == ((value.size() + 1) * sizeof(T)), error,
+ "Expected offset buffer with " + std::to_string(value.size()) + "
elements"));
+ NANOARROW_RETURN_NOT_OK(
+ Check(*expected_offset++ == 0, error, "first offset must be zero"));
+
+ for (const auto& item : value) {
+ NANOARROW_RETURN_NOT_OK(AppendBinaryElement(item, data, error));
+
+ // Check offset
+ NANOARROW_RETURN_NOT_OK(Check(*expected_offset++ == data->size_bytes,
error,
+ "Expected offset value " +
+ std::to_string(data->size_bytes) +
+ " at binary data buffer item " +
item.dump()));
+ }
+
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode SetBufferFixedSizeBinary(const json& value, ArrowBuffer* data,
+ int64_t fixed_size, ArrowError*
error) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(value.is_array(), error, "binary data buffer must be array"));
+
+ int64_t last_offset = 0;
+
+ for (const auto& item : value) {
+ NANOARROW_RETURN_NOT_OK(AppendBinaryElement(item, data, error));
+ int64_t item_size_bytes = data->size_bytes - last_offset;
+
+ NANOARROW_RETURN_NOT_OK(Check(item_size_bytes == fixed_size, error,
+ "Expected fixed size binary value of size
" +
+ std::to_string(fixed_size) +
+ " at binary data buffer item " +
item.dump()));
+ last_offset = data->size_bytes;
+ }
+
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode AppendBinaryElement(const json& item, ArrowBuffer* data,
+ ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK(
+ Check(item.is_string(), error, "binary data buffer item must be
string"));
+ auto item_str = item.get<std::string>();
+
+ int64_t item_size_bytes = item_str.size() / 2;
+ NANOARROW_RETURN_NOT_OK(Check((item_size_bytes * 2) == item_str.size(),
error,
+ "binary data buffer item must have even
size"));
+
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowBufferReserve(data,
item_size_bytes), error);
+ for (int64_t i = 0; i < item_str.size(); i += 2) {
+ std::string byte_hex = item_str.substr(i, 2);
+ char* end_ptr;
+ uint8_t byte = std::strtoul(byte_hex.data(), &end_ptr, 16);
+ NANOARROW_RETURN_NOT_OK(
+ Check(end_ptr == (byte_hex.data() + 2), error,
+ "binary data buffer item must contain a valid hex-encoded byte
string"));
+
+ data->data[data->size_bytes] = byte;
+ data->size_bytes++;
+ }
+
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode PrefixError(ArrowErrorCode value, ArrowError* error,
+ const std::string& prefix) {
+ if (value != NANOARROW_OK && error != nullptr) {
+ std::string msg = prefix + error->message;
+ ArrowErrorSet(error, "%s", msg.c_str());
+ }
+
+ return value;
+ }
+
ArrowErrorCode Check(bool value, ArrowError* error, const std::string& err) {
if (value) {
return NANOARROW_OK;