This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-adbc.git
The following commit(s) were added to refs/heads/main by this push:
new 2087d5000 chore(c/vendor): Update nanoarrow to 0.6.0 (#2220)
2087d5000 is described below
commit 2087d5000d73c453340ab1cfe22c63ee2df925f4
Author: Dewey Dunnington <[email protected]>
AuthorDate: Tue Oct 15 23:08:21 2024 +0000
chore(c/vendor): Update nanoarrow to 0.6.0 (#2220)
This PR updates the vendored nanoarrow to 0.6.0!
---
c/subprojects/nanoarrow.wrap | 10 +-
c/vendor/nanoarrow/nanoarrow.c | 701 ++++++++++++++++++++++++++++++++-------
c/vendor/nanoarrow/nanoarrow.h | 600 +++++++++++++++++++++++++++++----
c/vendor/nanoarrow/nanoarrow.hpp | 13 +-
c/vendor/vendor_nanoarrow.sh | 22 +-
5 files changed, 1146 insertions(+), 200 deletions(-)
diff --git a/c/subprojects/nanoarrow.wrap b/c/subprojects/nanoarrow.wrap
index 1a7e85693..612d1118a 100644
--- a/c/subprojects/nanoarrow.wrap
+++ b/c/subprojects/nanoarrow.wrap
@@ -1,10 +1,8 @@
[wrap-file]
-directory = arrow-nanoarrow-apache-arrow-nanoarrow-0.5.0
-source_url =
https://github.com/apache/arrow-nanoarrow/archive/refs/tags/apache-arrow-nanoarrow-0.5.0.tar.gz
-source_filename = apache-arrow-nanoarrow-0.5.0.tar.gz
-source_hash = 0ceeaa1fb005dbc89c8c7d1b39f2dba07344e40aa9d885ee25fb55b4d57e331a
-source_fallback_url =
https://github.com/mesonbuild/wrapdb/releases/download/nanoarrow_0.5.0-1/apache-arrow-nanoarrow-0.5.0.tar.gz
-wrapdb_version = 0.5.0-1
+directory = arrow-nanoarrow-33d2c8b973d8f8f424e02ac92ddeaace2a92f8dd
+source_url =
https://github.com/apache/arrow-nanoarrow/archive/33d2c8b973d8f8f424e02ac92ddeaace2a92f8dd.tar.gz
+source_filename =
arrow-nanoarrow-33d2c8b973d8f8f424e02ac92ddeaace2a92f8dd.tar.gz
+source_hash = be4d2a6f1467793fe1b02c6ecf12383ed9ecf29557531715a3b9e11578ab18e8
[provide]
nanoarrow = nanoarrow_dep
diff --git a/c/vendor/nanoarrow/nanoarrow.c b/c/vendor/nanoarrow/nanoarrow.c
index 9677a0e53..8f2659881 100644
--- a/c/vendor/nanoarrow/nanoarrow.c
+++ b/c/vendor/nanoarrow/nanoarrow.c
@@ -66,6 +66,7 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum
ArrowType storage_type) {
switch (storage_type) {
case NANOARROW_TYPE_UNINITIALIZED:
case NANOARROW_TYPE_NA:
+ case NANOARROW_TYPE_RUN_END_ENCODED:
layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE;
layout->buffer_data_type[0] = NANOARROW_TYPE_UNINITIALIZED;
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;
@@ -178,6 +179,16 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum
ArrowType storage_type) {
layout->buffer_data_type[2] = NANOARROW_TYPE_BINARY;
break;
+ case NANOARROW_TYPE_BINARY_VIEW:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_BINARY_VIEW;
+ layout->element_size_bits[1] = 128;
+ break;
+ case NANOARROW_TYPE_STRING_VIEW:
+ layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_DATA;
+ layout->buffer_data_type[1] = NANOARROW_TYPE_STRING_VIEW;
+ layout->element_size_bits[1] = 128;
+
default:
break;
}
@@ -345,6 +356,7 @@ ArrowErrorCode ArrowDecimalSetDigits(struct ArrowDecimal*
decimal,
//
https://github.com/apache/arrow/blob/cd3321b28b0c9703e5d7105d6146c1270bbadd7f/cpp/src/arrow/util/decimal.cc#L365
ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal*
decimal,
struct ArrowBuffer* buffer) {
+ NANOARROW_DCHECK(decimal->n_words == 2 || decimal->n_words == 4);
int is_negative = ArrowDecimalSign(decimal) < 0;
uint64_t words_little_endian[4];
@@ -468,6 +480,7 @@ ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const
struct ArrowDecimal* decim
// under the License.
#include <errno.h>
+#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -552,8 +565,12 @@ static const char* ArrowSchemaFormatTemplate(enum
ArrowType type) {
return "u";
case NANOARROW_TYPE_LARGE_STRING:
return "U";
+ case NANOARROW_TYPE_STRING_VIEW:
+ return "vu";
case NANOARROW_TYPE_BINARY:
return "z";
+ case NANOARROW_TYPE_BINARY_VIEW:
+ return "vz";
case NANOARROW_TYPE_LARGE_BINARY:
return "Z";
@@ -576,6 +593,8 @@ static const char* ArrowSchemaFormatTemplate(enum ArrowType
type) {
return "+s";
case NANOARROW_TYPE_MAP:
return "+m";
+ case NANOARROW_TYPE_RUN_END_ENCODED:
+ return "+r";
default:
return NULL;
@@ -607,6 +626,13 @@ static int ArrowSchemaInitChildrenIfNeeded(struct
ArrowSchema* schema,
NANOARROW_RETURN_NOT_OK(
ArrowSchemaSetName(schema->children[0]->children[1], "value"));
break;
+ case NANOARROW_TYPE_RUN_END_ENCODED:
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 2));
+ ArrowSchemaInit(schema->children[0]);
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0],
"run_ends"));
+ schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE;
+ ArrowSchemaInit(schema->children[1]);
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[1],
"values"));
default:
break;
}
@@ -676,10 +702,10 @@ ArrowErrorCode ArrowSchemaSetTypeFixedSize(struct
ArrowSchema* schema,
int n_chars;
switch (type) {
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
- n_chars = snprintf(buffer, sizeof(buffer), "w:%d", (int)fixed_size);
+ n_chars = snprintf(buffer, sizeof(buffer), "w:%" PRId32, fixed_size);
break;
case NANOARROW_TYPE_FIXED_SIZE_LIST:
- n_chars = snprintf(buffer, sizeof(buffer), "+w:%d", (int)fixed_size);
+ n_chars = snprintf(buffer, sizeof(buffer), "+w:%" PRId32, fixed_size);
break;
default:
return EINVAL;
@@ -729,6 +755,28 @@ ArrowErrorCode ArrowSchemaSetTypeDecimal(struct
ArrowSchema* schema, enum ArrowT
return ArrowSchemaSetFormat(schema, buffer);
}
+ArrowErrorCode ArrowSchemaSetTypeRunEndEncoded(struct ArrowSchema* schema,
+ enum ArrowType run_end_type) {
+ switch (run_end_type) {
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_INT64:
+ break;
+ default:
+ return EINVAL;
+ }
+
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(
+ schema, ArrowSchemaFormatTemplate(NANOARROW_TYPE_RUN_END_ENCODED)));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowSchemaInitChildrenIfNeeded(schema, NANOARROW_TYPE_RUN_END_ENCODED));
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema->children[0],
run_end_type));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowSchemaSetType(schema->children[1], NANOARROW_TYPE_UNINITIALIZED));
+
+ return NANOARROW_OK;
+}
+
static const char* ArrowTimeUnitFormatString(enum ArrowTimeUnit time_unit) {
switch (time_unit) {
case NANOARROW_TIME_UNIT_SECOND:
@@ -850,7 +898,7 @@ ArrowErrorCode ArrowSchemaSetTypeUnion(struct ArrowSchema*
schema, enum ArrowTyp
format_out_size -= n_chars;
for (int64_t i = 1; i < n_children; i++) {
- n_chars = snprintf(format_cursor, format_out_size, ",%d", (int)i);
+ n_chars = snprintf(format_cursor, format_out_size, ",%" PRId64, i);
format_cursor += n_chars;
format_out_size -= n_chars;
}
@@ -1144,8 +1192,9 @@ static ArrowErrorCode ArrowSchemaViewParse(struct
ArrowSchemaView* schema_view,
ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_DECIMAL256);
return NANOARROW_OK;
default:
- ArrowErrorSet(error, "Expected decimal bitwidth of 128 or 256 but
found %d",
- (int)schema_view->decimal_bitwidth);
+ ArrowErrorSet(error,
+ "Expected decimal bitwidth of 128 or 256 but found %"
PRId32,
+ schema_view->decimal_bitwidth);
return EINVAL;
}
@@ -1202,6 +1251,13 @@ static ArrowErrorCode ArrowSchemaViewParse(struct
ArrowSchemaView* schema_view,
*format_end_out = format + 2;
return NANOARROW_OK;
+ // run end encoded has no buffer at all
+ case 'r':
+ schema_view->storage_type = NANOARROW_TYPE_RUN_END_ENCODED;
+ schema_view->type = NANOARROW_TYPE_RUN_END_ENCODED;
+ *format_end_out = format + 2;
+ return NANOARROW_OK;
+
// just validity buffer
case 'w':
if (format[2] != ':' || format[3] == '\0') {
@@ -1249,11 +1305,10 @@ static ArrowErrorCode ArrowSchemaViewParse(struct
ArrowSchemaView* schema_view,
int64_t n_type_ids =
_ArrowParseUnionTypeIds(schema_view->union_type_ids, NULL);
if (n_type_ids != schema_view->schema->n_children) {
- ArrowErrorSet(
- error,
- "Expected union type_ids parameter to be a comma-separated
list of %ld "
- "values between 0 and 127 but found '%s'",
- (long)schema_view->schema->n_children,
schema_view->union_type_ids);
+ ArrowErrorSet(error,
+ "Expected union type_ids parameter to be a
comma-separated "
+ "list of %" PRId64 " values between 0 and 127 but
found '%s'",
+ schema_view->schema->n_children,
schema_view->union_type_ids);
return EINVAL;
}
*format_end_out = format + strlen(format);
@@ -1432,6 +1487,24 @@ static ArrowErrorCode ArrowSchemaViewParse(struct
ArrowSchemaView* schema_view,
return EINVAL;
}
+ // view types
+ case 'v': {
+ switch (format[1]) {
+ case 'u':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_STRING_VIEW);
+ *format_end_out = format + 2;
+ return NANOARROW_OK;
+ case 'z':
+ ArrowSchemaViewSetPrimitive(schema_view, NANOARROW_TYPE_BINARY_VIEW);
+ *format_end_out = format + 2;
+ return NANOARROW_OK;
+ default:
+ ArrowErrorSet(error, "Expected 'u', or 'z' following 'v' but found
'%s'",
+ format + 1);
+ return EINVAL;
+ }
+ }
+
default:
ArrowErrorSet(error, "Unknown format: '%s'", format);
return EINVAL;
@@ -1441,8 +1514,9 @@ static ArrowErrorCode ArrowSchemaViewParse(struct
ArrowSchemaView* schema_view,
static ArrowErrorCode ArrowSchemaViewValidateNChildren(
struct ArrowSchemaView* schema_view, int64_t n_children, struct
ArrowError* error) {
if (n_children != -1 && schema_view->schema->n_children != n_children) {
- ArrowErrorSet(error, "Expected schema with %d children but found %d
children",
- (int)n_children, (int)schema_view->schema->n_children);
+ ArrowErrorSet(
+ error, "Expected schema with %" PRId64 " children but found %" PRId64
" children",
+ n_children, schema_view->schema->n_children);
return EINVAL;
}
@@ -1452,15 +1526,15 @@ static ArrowErrorCode ArrowSchemaViewValidateNChildren(
for (int64_t i = 0; i < schema_view->schema->n_children; i++) {
child = schema_view->schema->children[i];
if (child == NULL) {
- ArrowErrorSet(error,
- "Expected valid schema at schema->children[%ld] but found
NULL",
- (long)i);
+ ArrowErrorSet(
+ error, "Expected valid schema at schema->children[%" PRId64 "] but
found NULL",
+ i);
return EINVAL;
} else if (child->release == NULL) {
- ArrowErrorSet(
- error,
- "Expected valid schema at schema->children[%ld] but found a released
schema",
- (long)i);
+ ArrowErrorSet(error,
+ "Expected valid schema at schema->children[%" PRId64
+ "] but found a released schema",
+ i);
return EINVAL;
}
}
@@ -1478,8 +1552,9 @@ static ArrowErrorCode ArrowSchemaViewValidateMap(struct
ArrowSchemaView* schema_
NANOARROW_RETURN_NOT_OK(ArrowSchemaViewValidateNChildren(schema_view, 1,
error));
if (schema_view->schema->children[0]->n_children != 2) {
- ArrowErrorSet(error, "Expected child of map type to have 2 children but
found %d",
- (int)schema_view->schema->children[0]->n_children);
+ ArrowErrorSet(error,
+ "Expected child of map type to have 2 children but found %"
PRId64,
+ schema_view->schema->children[0]->n_children);
return EINVAL;
}
@@ -1561,6 +1636,8 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct
ArrowSchemaView* schema_vie
case NANOARROW_TYPE_TIME32:
case NANOARROW_TYPE_TIME64:
case NANOARROW_TYPE_DURATION:
+ case NANOARROW_TYPE_BINARY_VIEW:
+ case NANOARROW_TYPE_STRING_VIEW:
return ArrowSchemaViewValidateNChildren(schema_view, 0, error);
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
@@ -1576,6 +1653,9 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct
ArrowSchemaView* schema_vie
case NANOARROW_TYPE_FIXED_SIZE_LIST:
return ArrowSchemaViewValidateNChildren(schema_view, 1, error);
+ case NANOARROW_TYPE_RUN_END_ENCODED:
+ return ArrowSchemaViewValidateNChildren(schema_view, 2, error);
+
case NANOARROW_TYPE_STRUCT:
return ArrowSchemaViewValidateNChildren(schema_view, -1, error);
@@ -1591,7 +1671,7 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct
ArrowSchemaView* schema_vie
default:
ArrowErrorSet(error, "Expected a valid enum ArrowType value but found
%d",
- (int)schema_view->type);
+ schema_view->type);
return EINVAL;
}
@@ -1641,8 +1721,8 @@ ArrowErrorCode ArrowSchemaViewInit(struct
ArrowSchemaView* schema_view,
}
if ((format + format_len) != format_end_out) {
- ArrowErrorSet(error, "Error parsing schema->format '%s': parsed %d/%d
characters",
- format, (int)(format_end_out - format), (int)(format_len));
+ ArrowErrorSet(error, "Error parsing schema->format '%s': parsed %d/%zu
characters",
+ format, (int)(format_end_out - format), format_len);
return EINVAL;
}
@@ -1702,9 +1782,8 @@ static int64_t ArrowSchemaTypeToStringInternal(struct
ArrowSchemaView* schema_vi
switch (schema_view->type) {
case NANOARROW_TYPE_DECIMAL128:
case NANOARROW_TYPE_DECIMAL256:
- return snprintf(out, n, "%s(%d, %d)", type_string,
- (int)schema_view->decimal_precision,
- (int)schema_view->decimal_scale);
+ return snprintf(out, n, "%s(%" PRId32 ", %" PRId32 ")", type_string,
+ schema_view->decimal_precision,
schema_view->decimal_scale);
case NANOARROW_TYPE_TIMESTAMP:
return snprintf(out, n, "%s('%s', '%s')", type_string,
ArrowTimeUnitString(schema_view->time_unit),
schema_view->timezone);
@@ -1715,7 +1794,7 @@ static int64_t ArrowSchemaTypeToStringInternal(struct
ArrowSchemaView* schema_vi
ArrowTimeUnitString(schema_view->time_unit));
case NANOARROW_TYPE_FIXED_SIZE_BINARY:
case NANOARROW_TYPE_FIXED_SIZE_LIST:
- return snprintf(out, n, "%s(%ld)", type_string,
(long)schema_view->fixed_size);
+ return snprintf(out, n, "%s(%" PRId32 ")", type_string,
schema_view->fixed_size);
case NANOARROW_TYPE_SPARSE_UNION:
case NANOARROW_TYPE_DENSE_UNION:
return snprintf(out, n, "%s([%s])", type_string,
schema_view->union_type_ids);
@@ -1731,7 +1810,7 @@ static inline void ArrowToStringLogChars(char** out,
int64_t n_chars_last,
// In the unlikely snprintf() returning a negative value (encoding error),
// ensure the result won't cause an out-of-bounds access.
if (n_chars_last < 0) {
- n_chars = 0;
+ n_chars_last = 0;
}
*n_chars += n_chars_last;
@@ -2070,6 +2149,10 @@ ArrowErrorCode ArrowMetadataBuilderRemove(struct
ArrowBuffer* buffer,
// under the License.
#include <errno.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -2083,6 +2166,12 @@ static void ArrowArrayReleaseInternal(struct ArrowArray*
array) {
ArrowBitmapReset(&private_data->bitmap);
ArrowBufferReset(&private_data->buffers[0]);
ArrowBufferReset(&private_data->buffers[1]);
+ ArrowFree(private_data->buffer_data);
+ for (int32_t i = 0; i < private_data->n_variadic_buffers; ++i) {
+ ArrowBufferReset(&private_data->variadic_buffers[i]);
+ }
+ ArrowFree(private_data->variadic_buffers);
+ ArrowFree(private_data->variadic_buffer_sizes);
ArrowFree(private_data);
}
@@ -2123,6 +2212,7 @@ static ArrowErrorCode ArrowArraySetStorageType(struct
ArrowArray* array,
switch (storage_type) {
case NANOARROW_TYPE_UNINITIALIZED:
case NANOARROW_TYPE_NA:
+ case NANOARROW_TYPE_RUN_END_ENCODED:
array->n_buffers = 0;
break;
@@ -2156,7 +2246,10 @@ static ArrowErrorCode ArrowArraySetStorageType(struct
ArrowArray* array,
case NANOARROW_TYPE_DENSE_UNION:
array->n_buffers = 2;
break;
-
+ case NANOARROW_TYPE_BINARY_VIEW:
+ case NANOARROW_TYPE_STRING_VIEW:
+ array->n_buffers = NANOARROW_BINARY_VIEW_FIXED_BUFFERS + 1;
+ break;
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_BINARY:
@@ -2199,12 +2292,36 @@ ArrowErrorCode ArrowArrayInitFromType(struct
ArrowArray* array,
ArrowBitmapInit(&private_data->bitmap);
ArrowBufferInit(&private_data->buffers[0]);
ArrowBufferInit(&private_data->buffers[1]);
- private_data->buffer_data[0] = NULL;
- private_data->buffer_data[1] = NULL;
- private_data->buffer_data[2] = NULL;
+ private_data->buffer_data =
+ (const void**)ArrowMalloc(sizeof(void*) * NANOARROW_MAX_FIXED_BUFFERS);
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; ++i) {
+ private_data->buffer_data[i] = NULL;
+ }
+ private_data->n_variadic_buffers = 0;
+ private_data->variadic_buffers = NULL;
+ private_data->variadic_buffer_sizes = NULL;
array->private_data = private_data;
- array->buffers = (const void**)(&private_data->buffer_data);
+ array->buffers = (const void**)(private_data->buffer_data);
+
+ // These are not technically "storage" in the sense that they do not appear
+ // in the ArrowSchemaView's storage_type member; however, allowing them here
+ // is helpful to maximize the number of types that can avoid going through
+ // ArrowArrayInitFromSchema().
+ switch (storage_type) {
+ case NANOARROW_TYPE_DURATION:
+ case NANOARROW_TYPE_TIMESTAMP:
+ case NANOARROW_TYPE_TIME64:
+ case NANOARROW_TYPE_DATE64:
+ storage_type = NANOARROW_TYPE_INT64;
+ break;
+ case NANOARROW_TYPE_TIME32:
+ case NANOARROW_TYPE_DATE32:
+ storage_type = NANOARROW_TYPE_INT32;
+ break;
+ default:
+ break;
+ }
int result = ArrowArraySetStorageType(array, storage_type);
if (result != NANOARROW_OK) {
@@ -2488,10 +2605,26 @@ static void ArrowArrayFlushInternalPointers(struct
ArrowArray* array) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
- for (int64_t i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
+ const bool is_binary_view = private_data->storage_type ==
NANOARROW_TYPE_STRING_VIEW ||
+ private_data->storage_type ==
NANOARROW_TYPE_BINARY_VIEW;
+ const int32_t nfixed_buf = is_binary_view ? 2 : NANOARROW_MAX_FIXED_BUFFERS;
+
+ for (int32_t i = 0; i < nfixed_buf; i++) {
private_data->buffer_data[i] = ArrowArrayBuffer(array, i)->data;
}
+ if (is_binary_view) {
+ const int32_t nvirt_buf = private_data->n_variadic_buffers;
+ private_data->buffer_data = (const void**)ArrowRealloc(
+ private_data->buffer_data, sizeof(void*) * (nfixed_buf + nvirt_buf +
1));
+ for (int32_t i = 0; i < nvirt_buf; i++) {
+ private_data->buffer_data[nfixed_buf + i] =
private_data->variadic_buffers[i].data;
+ }
+ private_data->buffer_data[nfixed_buf + nvirt_buf] =
+ private_data->variadic_buffer_sizes;
+ array->buffers = (const void**)(private_data->buffer_data);
+ }
+
for (int64_t i = 0; i < array->n_children; i++) {
ArrowArrayFlushInternalPointers(array->children[i]);
}
@@ -2547,6 +2680,11 @@ ArrowErrorCode ArrowArrayViewAllocateChildren(struct
ArrowArrayView* array_view,
return EINVAL;
}
+ if (n_children == 0) {
+ array_view->n_children = 0;
+ return NANOARROW_OK;
+ }
+
array_view->children =
(struct ArrowArrayView**)ArrowMalloc(n_children * sizeof(struct
ArrowArrayView*));
if (array_view->children == NULL) {
@@ -2695,6 +2833,8 @@ void ArrowArrayViewSetLength(struct ArrowArrayView*
array_view, int64_t length)
case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
array_view->buffer_views[i].size_bytes = element_size_bytes * length;
continue;
+ case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
+ case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
case NANOARROW_BUFFER_TYPE_NONE:
array_view->buffer_views[i].size_bytes = 0;
continue;
@@ -2727,9 +2867,16 @@ static int ArrowArrayViewSetArrayInternal(struct
ArrowArrayView* array_view,
array_view->offset = array->offset;
array_view->length = array->length;
array_view->null_count = array->null_count;
+ array_view->variadic_buffer_sizes = NULL;
+ array_view->variadic_buffers = NULL;
+ array_view->n_variadic_buffers = 0;
int64_t buffers_required = 0;
- for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
+ const int nfixed_buf = array_view->storage_type ==
NANOARROW_TYPE_STRING_VIEW ||
+ array_view->storage_type ==
NANOARROW_TYPE_BINARY_VIEW
+ ? NANOARROW_BINARY_VIEW_FIXED_BUFFERS
+ : NANOARROW_MAX_FIXED_BUFFERS;
+ for (int i = 0; i < nfixed_buf; i++) {
if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
break;
}
@@ -2747,17 +2894,30 @@ static int ArrowArrayViewSetArrayInternal(struct
ArrowArrayView* array_view,
}
}
- // Check the number of buffers
+ if (array_view->storage_type == NANOARROW_TYPE_STRING_VIEW ||
+ array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW) {
+ const int64_t n_buffers = array->n_buffers;
+ const int32_t nfixed_buf = NANOARROW_BINARY_VIEW_FIXED_BUFFERS;
+
+ const int32_t nvariadic_buf = (int32_t)(n_buffers - nfixed_buf - 1);
+ array_view->n_variadic_buffers = nvariadic_buf;
+ buffers_required += nvariadic_buf + 1;
+ array_view->variadic_buffers = array->buffers +
NANOARROW_BINARY_VIEW_FIXED_BUFFERS;
+ array_view->variadic_buffer_sizes = (int64_t*)array->buffers[n_buffers -
1];
+ }
+
if (buffers_required != array->n_buffers) {
- ArrowErrorSet(error, "Expected array with %d buffer(s) but found %d
buffer(s)",
- (int)buffers_required, (int)array->n_buffers);
+ ArrowErrorSet(error,
+ "Expected array with %" PRId64 " buffer(s) but found %"
PRId64
+ " buffer(s)",
+ buffers_required, array->n_buffers);
return EINVAL;
}
// Check number of children
if (array_view->n_children != array->n_children) {
- ArrowErrorSet(error, "Expected %ld children but found %ld children",
- (long)array_view->n_children, (long)array->n_children);
+ ArrowErrorSet(error, "Expected %" PRId64 " children but found %" PRId64 "
children",
+ array_view->n_children, array->n_children);
return EINVAL;
}
@@ -2789,14 +2949,20 @@ static int ArrowArrayViewSetArrayInternal(struct
ArrowArrayView* array_view,
static int ArrowArrayViewValidateMinimal(struct ArrowArrayView* array_view,
struct ArrowError* error) {
if (array_view->length < 0) {
- ArrowErrorSet(error, "Expected length >= 0 but found length %ld",
- (long)array_view->length);
+ ArrowErrorSet(error, "Expected length >= 0 but found length %" PRId64,
+ array_view->length);
return EINVAL;
}
if (array_view->offset < 0) {
- ArrowErrorSet(error, "Expected offset >= 0 but found offset %ld",
- (long)array_view->offset);
+ ArrowErrorSet(error, "Expected offset >= 0 but found offset %" PRId64,
+ array_view->offset);
+ return EINVAL;
+ }
+
+ // Ensure that offset + length fits within an int64 before a possible
overflow
+ if ((uint64_t)array_view->offset + (uint64_t)array_view->length >
(uint64_t)INT64_MAX) {
+ ArrowErrorSet(error, "Offset + length is > INT64_MAX");
return EINVAL;
}
@@ -2809,7 +2975,9 @@ static int ArrowArrayViewValidateMinimal(struct
ArrowArrayView* array_view,
for (int i = 0; i < 2; i++) {
int64_t element_size_bytes = array_view->layout.element_size_bits[i] / 8;
// Initialize with a value that will cause an error if accidentally used
uninitialized
- int64_t min_buffer_size_bytes = array_view->buffer_views[i].size_bytes + 1;
+ // Need to suppress the clang-tidy warning because gcc warns for possible
use
+ int64_t min_buffer_size_bytes = //
NOLINT(clang-analyzer-deadcode.DeadStores)
+ array_view->buffer_views[i].size_bytes + 1;
switch (array_view->layout.buffer_type[i]) {
case NANOARROW_BUFFER_TYPE_VALIDITY:
@@ -2835,6 +3003,8 @@ static int ArrowArrayViewValidateMinimal(struct
ArrowArrayView* array_view,
case NANOARROW_BUFFER_TYPE_UNION_OFFSET:
min_buffer_size_bytes = element_size_bytes * offset_plus_length;
break;
+ case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
+ case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
case NANOARROW_BUFFER_TYPE_NONE:
continue;
}
@@ -2844,11 +3014,11 @@ static int ArrowArrayViewValidateMinimal(struct
ArrowArrayView* array_view,
array_view->buffer_views[i].size_bytes = min_buffer_size_bytes;
} else if (array_view->buffer_views[i].size_bytes < min_buffer_size_bytes)
{
ArrowErrorSet(error,
- "Expected %s array buffer %d to have size >= %ld bytes but
found "
- "buffer with %ld bytes",
- ArrowTypeString(array_view->storage_type), (int)i,
- (long)min_buffer_size_bytes,
- (long)array_view->buffer_views[i].size_bytes);
+ "Expected %s array buffer %d to have size >= %" PRId64
+ " bytes but found "
+ "buffer with %" PRId64 " bytes",
+ ArrowTypeString(array_view->storage_type), i,
min_buffer_size_bytes,
+ array_view->buffer_views[i].size_bytes);
return EINVAL;
}
}
@@ -2860,11 +3030,20 @@ static int ArrowArrayViewValidateMinimal(struct
ArrowArrayView* array_view,
case NANOARROW_TYPE_FIXED_SIZE_LIST:
case NANOARROW_TYPE_MAP:
if (array_view->n_children != 1) {
- ArrowErrorSet(error, "Expected 1 child of %s array but found %ld child
arrays",
- ArrowTypeString(array_view->storage_type),
- (long)array_view->n_children);
+ ArrowErrorSet(error,
+ "Expected 1 child of %s array but found %" PRId64 "
child arrays",
+ ArrowTypeString(array_view->storage_type),
array_view->n_children);
return EINVAL;
}
+ break;
+ case NANOARROW_TYPE_RUN_END_ENCODED:
+ if (array_view->n_children != 2) {
+ ArrowErrorSet(
+ error, "Expected 2 children for %s array but found %" PRId64 "
child arrays",
+ ArrowTypeString(array_view->storage_type), array_view->n_children);
+ return EINVAL;
+ }
+ break;
default:
break;
}
@@ -2878,12 +3057,11 @@ static int ArrowArrayViewValidateMinimal(struct
ArrowArrayView* array_view,
child_min_length = (array_view->offset + array_view->length);
for (int64_t i = 0; i < array_view->n_children; i++) {
if (array_view->children[i]->length < child_min_length) {
- ArrowErrorSet(
- error,
- "Expected struct child %d to have length >= %ld but found child
with "
- "length %ld",
- (int)(i + 1), (long)(child_min_length),
- (long)array_view->children[i]->length);
+ ArrowErrorSet(error,
+ "Expected struct child %" PRId64 " to have length >=
%" PRId64
+ " but found child with "
+ "length %" PRId64,
+ i + 1, child_min_length,
array_view->children[i]->length);
return EINVAL;
}
}
@@ -2894,12 +3072,78 @@ static int ArrowArrayViewValidateMinimal(struct
ArrowArrayView* array_view,
array_view->layout.child_size_elements;
if (array_view->children[0]->length < child_min_length) {
ArrowErrorSet(error,
- "Expected child of fixed_size_list array to have length
>= %ld but "
- "found array with length %ld",
- (long)child_min_length,
(long)array_view->children[0]->length);
+ "Expected child of fixed_size_list array to have length
>= %" PRId64
+ " but "
+ "found array with length %" PRId64,
+ child_min_length, array_view->children[0]->length);
return EINVAL;
}
break;
+
+ case NANOARROW_TYPE_RUN_END_ENCODED: {
+ if (array_view->n_children != 2) {
+ ArrowErrorSet(error,
+ "Expected 2 children for run-end encoded array but found
%" PRId64,
+ array_view->n_children);
+ return EINVAL;
+ }
+ struct ArrowArrayView* run_ends_view = array_view->children[0];
+ struct ArrowArrayView* values_view = array_view->children[1];
+ int64_t max_length;
+ switch (run_ends_view->storage_type) {
+ case NANOARROW_TYPE_INT16:
+ max_length = INT16_MAX;
+ break;
+ case NANOARROW_TYPE_INT32:
+ max_length = INT32_MAX;
+ break;
+ case NANOARROW_TYPE_INT64:
+ max_length = INT64_MAX;
+ break;
+ default:
+ ArrowErrorSet(
+ error,
+ "Run-end encoded array only supports INT16, INT32 or INT64
run-ends "
+ "but found run-ends type %s",
+ ArrowTypeString(run_ends_view->storage_type));
+ return EINVAL;
+ }
+
+ // There is already a check above that offset_plus_length < INT64_MAX
+ if (offset_plus_length > max_length) {
+ ArrowErrorSet(error,
+ "Offset + length of a run-end encoded array must fit in
a value"
+ " of the run end type %s but is %" PRId64 " + %" PRId64,
+ ArrowTypeString(run_ends_view->storage_type),
array_view->offset,
+ array_view->length);
+ return EINVAL;
+ }
+
+ if (run_ends_view->length > values_view->length) {
+ ArrowErrorSet(error,
+ "Length of run_ends is greater than the length of
values: %" PRId64
+ " > %" PRId64,
+ run_ends_view->length, values_view->length);
+ return EINVAL;
+ }
+
+ if (run_ends_view->length == 0 && values_view->length != 0) {
+ ArrowErrorSet(error,
+ "Run-end encoded array has zero length %" PRId64
+ ", but values array has "
+ "non-zero length",
+ values_view->length);
+ return EINVAL;
+ }
+
+ if (run_ends_view->null_count != 0) {
+ ArrowErrorSet(error, "Null count must be 0 for run ends array, but is
%" PRId64,
+ run_ends_view->null_count);
+ return EINVAL;
+ }
+ break;
+ }
+
default:
break;
}
@@ -2935,24 +3179,30 @@ static int ArrowArrayViewValidateDefault(struct
ArrowArrayView* array_view,
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_BINARY:
if (array_view->buffer_views[1].size_bytes != 0) {
- first_offset = array_view->buffer_views[1].data.as_int32[0];
+ first_offset =
array_view->buffer_views[1].data.as_int32[array_view->offset];
if (first_offset < 0) {
- ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
- (long)first_offset);
+ ArrowErrorSet(error, "Expected first offset >= 0 but found %" PRId64,
+ first_offset);
return EINVAL;
}
last_offset =
array_view->buffer_views[1].data.as_int32[offset_plus_length];
+ if (last_offset < 0) {
+ ArrowErrorSet(error, "Expected last offset >= 0 but found %" PRId64,
+ last_offset);
+ return EINVAL;
+ }
// If the data buffer size is unknown, assign it; otherwise, check it
if (array_view->buffer_views[2].size_bytes == -1) {
array_view->buffer_views[2].size_bytes = last_offset;
} else if (array_view->buffer_views[2].size_bytes < last_offset) {
ArrowErrorSet(error,
- "Expected %s array buffer 2 to have size >= %ld bytes
but found "
- "buffer with %ld bytes",
- ArrowTypeString(array_view->storage_type),
(long)last_offset,
- (long)array_view->buffer_views[2].size_bytes);
+ "Expected %s array buffer 2 to have size >= %" PRId64
+ " bytes but found "
+ "buffer with %" PRId64 " bytes",
+ ArrowTypeString(array_view->storage_type), last_offset,
+ array_view->buffer_views[2].size_bytes);
return EINVAL;
}
} else if (array_view->buffer_views[2].size_bytes == -1) {
@@ -2965,24 +3215,30 @@ static int ArrowArrayViewValidateDefault(struct
ArrowArrayView* array_view,
case NANOARROW_TYPE_LARGE_STRING:
case NANOARROW_TYPE_LARGE_BINARY:
if (array_view->buffer_views[1].size_bytes != 0) {
- first_offset = array_view->buffer_views[1].data.as_int64[0];
+ first_offset =
array_view->buffer_views[1].data.as_int64[array_view->offset];
if (first_offset < 0) {
- ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
- (long)first_offset);
+ ArrowErrorSet(error, "Expected first offset >= 0 but found %" PRId64,
+ first_offset);
return EINVAL;
}
last_offset =
array_view->buffer_views[1].data.as_int64[offset_plus_length];
+ if (last_offset < 0) {
+ ArrowErrorSet(error, "Expected last offset >= 0 but found %" PRId64,
+ last_offset);
+ return EINVAL;
+ }
// If the data buffer size is unknown, assign it; otherwise, check it
if (array_view->buffer_views[2].size_bytes == -1) {
array_view->buffer_views[2].size_bytes = last_offset;
} else if (array_view->buffer_views[2].size_bytes < last_offset) {
ArrowErrorSet(error,
- "Expected %s array buffer 2 to have size >= %ld bytes
but found "
- "buffer with %ld bytes",
- ArrowTypeString(array_view->storage_type),
(long)last_offset,
- (long)array_view->buffer_views[2].size_bytes);
+ "Expected %s array buffer 2 to have size >= %" PRId64
+ " bytes but found "
+ "buffer with %" PRId64 " bytes",
+ ArrowTypeString(array_view->storage_type), last_offset,
+ array_view->buffer_views[2].size_bytes);
return EINVAL;
}
} else if (array_view->buffer_views[2].size_bytes == -1) {
@@ -2995,12 +3251,11 @@ static int ArrowArrayViewValidateDefault(struct
ArrowArrayView* array_view,
case NANOARROW_TYPE_STRUCT:
for (int64_t i = 0; i < array_view->n_children; i++) {
if (array_view->children[i]->length < offset_plus_length) {
- ArrowErrorSet(
- error,
- "Expected struct child %d to have length >= %ld but found child
with "
- "length %ld",
- (int)(i + 1), (long)offset_plus_length,
- (long)array_view->children[i]->length);
+ ArrowErrorSet(error,
+ "Expected struct child %" PRId64 " to have length >=
%" PRId64
+ " but found child with "
+ "length %" PRId64,
+ i + 1, offset_plus_length,
array_view->children[i]->length);
return EINVAL;
}
}
@@ -3009,21 +3264,27 @@ static int ArrowArrayViewValidateDefault(struct
ArrowArrayView* array_view,
case NANOARROW_TYPE_LIST:
case NANOARROW_TYPE_MAP:
if (array_view->buffer_views[1].size_bytes != 0) {
- first_offset = array_view->buffer_views[1].data.as_int32[0];
+ first_offset =
array_view->buffer_views[1].data.as_int32[array_view->offset];
if (first_offset < 0) {
- ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
- (long)first_offset);
+ ArrowErrorSet(error, "Expected first offset >= 0 but found %" PRId64,
+ first_offset);
return EINVAL;
}
last_offset =
array_view->buffer_views[1].data.as_int32[offset_plus_length];
+ if (last_offset < 0) {
+ ArrowErrorSet(error, "Expected last offset >= 0 but found %" PRId64,
+ last_offset);
+ return EINVAL;
+ }
+
if (array_view->children[0]->length < last_offset) {
- ArrowErrorSet(
- error,
- "Expected child of %s array to have length >= %ld but found
array with "
- "length %ld",
- ArrowTypeString(array_view->storage_type), (long)last_offset,
- (long)array_view->children[0]->length);
+ ArrowErrorSet(error,
+ "Expected child of %s array to have length >= %" PRId64
+ " but found array with "
+ "length %" PRId64,
+ ArrowTypeString(array_view->storage_type), last_offset,
+ array_view->children[0]->length);
return EINVAL;
}
}
@@ -3031,24 +3292,58 @@ static int ArrowArrayViewValidateDefault(struct
ArrowArrayView* array_view,
case NANOARROW_TYPE_LARGE_LIST:
if (array_view->buffer_views[1].size_bytes != 0) {
- first_offset = array_view->buffer_views[1].data.as_int64[0];
+ first_offset =
array_view->buffer_views[1].data.as_int64[array_view->offset];
if (first_offset < 0) {
- ArrowErrorSet(error, "Expected first offset >= 0 but found %ld",
- (long)first_offset);
+ ArrowErrorSet(error, "Expected first offset >= 0 but found %" PRId64,
+ first_offset);
return EINVAL;
}
last_offset =
array_view->buffer_views[1].data.as_int64[offset_plus_length];
+ if (last_offset < 0) {
+ ArrowErrorSet(error, "Expected last offset >= 0 but found %" PRId64,
+ last_offset);
+ return EINVAL;
+ }
+
if (array_view->children[0]->length < last_offset) {
- ArrowErrorSet(
- error,
- "Expected child of large list array to have length >= %ld but
found array "
- "with length %ld",
- (long)last_offset, (long)array_view->children[0]->length);
+ ArrowErrorSet(error,
+ "Expected child of large list array to have length >=
%" PRId64
+ " but found array "
+ "with length %" PRId64,
+ last_offset, array_view->children[0]->length);
return EINVAL;
}
}
break;
+
+ case NANOARROW_TYPE_RUN_END_ENCODED: {
+ struct ArrowArrayView* run_ends_view = array_view->children[0];
+ if (run_ends_view->length == 0) {
+ break;
+ }
+
+ int64_t first_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0);
+ if (first_run_end < 1) {
+ ArrowErrorSet(
+ error,
+ "All run ends must be greater than 0 but the first run end is %"
PRId64,
+ first_run_end);
+ return EINVAL;
+ }
+
+ // offset + length < INT64_MAX is checked in
ArrowArrayViewValidateMinimal()
+ int64_t last_run_end =
+ ArrowArrayViewGetIntUnsafe(run_ends_view, run_ends_view->length - 1);
+ if (last_run_end < offset_plus_length) {
+ ArrowErrorSet(error,
+ "Last run end is %" PRId64 " but it should be >= (%"
PRId64
+ " + %" PRId64 ")",
+ last_run_end, array_view->offset, array_view->length);
+ return EINVAL;
+ }
+ break;
+ }
default:
break;
}
@@ -3101,7 +3396,7 @@ static int ArrowAssertIncreasingInt32(struct
ArrowBufferView view,
for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int32_t); i++) {
if (view.data.as_int32[i] < view.data.as_int32[i - 1]) {
- ArrowErrorSet(error, "[%ld] Expected element size >= 0", (long)i);
+ ArrowErrorSet(error, "[%" PRId64 "] Expected element size >= 0", i);
return EINVAL;
}
}
@@ -3117,7 +3412,7 @@ static int ArrowAssertIncreasingInt64(struct
ArrowBufferView view,
for (int64_t i = 1; i < view.size_bytes / (int64_t)sizeof(int64_t); i++) {
if (view.data.as_int64[i] < view.data.as_int64[i - 1]) {
- ArrowErrorSet(error, "[%ld] Expected element size >= 0", (long)i);
+ ArrowErrorSet(error, "[%" PRId64 "] Expected element size >= 0", i);
return EINVAL;
}
}
@@ -3130,8 +3425,9 @@ static int ArrowAssertRangeInt8(struct ArrowBufferView
view, int8_t min_value,
for (int64_t i = 0; i < view.size_bytes; i++) {
if (view.data.as_int8[i] < min_value || view.data.as_int8[i] > max_value) {
ArrowErrorSet(error,
- "[%ld] Expected buffer value between %d and %d but found
value %d",
- (long)i, (int)min_value, (int)max_value,
(int)view.data.as_int8[i]);
+ "[%" PRId64 "] Expected buffer value between %" PRId8 "
and %" PRId8
+ " but found value %" PRId8,
+ i, min_value, max_value, view.data.as_int8[i]);
return EINVAL;
}
}
@@ -3151,8 +3447,8 @@ static int ArrowAssertInt8In(struct ArrowBufferView view,
const int8_t* values,
}
if (!item_found) {
- ArrowErrorSet(error, "[%ld] Unexpected buffer value %d", (long)i,
- (int)view.data.as_int8[i]);
+ ArrowErrorSet(error, "[%" PRId64 "] Unexpected buffer value %" PRId8, i,
+ view.data.as_int8[i]);
return EINVAL;
}
}
@@ -3164,13 +3460,24 @@ static int ArrowArrayViewValidateFull(struct
ArrowArrayView* array_view,
struct ArrowError* error) {
for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
switch (array_view->layout.buffer_type[i]) {
+ // Only validate the portion of the buffer that is strictly required,
+ // which includes not validating the offset buffer of a zero-length
array.
case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
+ if (array_view->length == 0) {
+ continue;
+ }
if (array_view->layout.element_size_bits[i] == 32) {
- NANOARROW_RETURN_NOT_OK(
- ArrowAssertIncreasingInt32(array_view->buffer_views[i], error));
+ struct ArrowBufferView sliced_offsets;
+ sliced_offsets.data.as_int32 =
+ array_view->buffer_views[i].data.as_int32 + array_view->offset;
+ sliced_offsets.size_bytes = (array_view->length + 1) *
sizeof(int32_t);
+ NANOARROW_RETURN_NOT_OK(ArrowAssertIncreasingInt32(sliced_offsets,
error));
} else {
- NANOARROW_RETURN_NOT_OK(
- ArrowAssertIncreasingInt64(array_view->buffer_views[i], error));
+ struct ArrowBufferView sliced_offsets;
+ sliced_offsets.data.as_int64 =
+ array_view->buffer_views[i].data.as_int64 + array_view->offset;
+ sliced_offsets.size_bytes = (array_view->length + 1) *
sizeof(int64_t);
+ NANOARROW_RETURN_NOT_OK(ArrowAssertIncreasingInt64(sliced_offsets,
error));
}
break;
default:
@@ -3180,6 +3487,15 @@ static int ArrowArrayViewValidateFull(struct
ArrowArrayView* array_view,
if (array_view->storage_type == NANOARROW_TYPE_DENSE_UNION ||
array_view->storage_type == NANOARROW_TYPE_SPARSE_UNION) {
+ struct ArrowBufferView sliced_type_ids;
+ sliced_type_ids.size_bytes = array_view->length * sizeof(int8_t);
+ if (array_view->length > 0) {
+ sliced_type_ids.data.as_int8 =
+ array_view->buffer_views[0].data.as_int8 + array_view->offset;
+ } else {
+ sliced_type_ids.data.as_int8 = NULL;
+ }
+
if (array_view->union_type_id_map == NULL) {
// If the union_type_id map is NULL (e.g., when using
ArrowArrayInitFromType() +
// ArrowArrayAllocateChildren() + ArrowArrayFinishBuilding()), we don't
have enough
@@ -3191,9 +3507,9 @@ static int ArrowArrayViewValidateFull(struct
ArrowArrayView* array_view,
array_view->union_type_id_map, array_view->n_children,
array_view->n_children)) {
NANOARROW_RETURN_NOT_OK(ArrowAssertRangeInt8(
- array_view->buffer_views[0], 0, (int8_t)(array_view->n_children -
1), error));
+ sliced_type_ids, 0, (int8_t)(array_view->n_children - 1), error));
} else {
- NANOARROW_RETURN_NOT_OK(ArrowAssertInt8In(array_view->buffer_views[0],
+ NANOARROW_RETURN_NOT_OK(ArrowAssertInt8In(sliced_type_ids,
array_view->union_type_id_map
+ 128,
array_view->n_children,
error));
}
@@ -3207,16 +3523,37 @@ static int ArrowArrayViewValidateFull(struct
ArrowArrayView* array_view,
int64_t offset = ArrowArrayViewUnionChildOffset(array_view, i);
int64_t child_length = array_view->children[child_id]->length;
if (offset < 0 || offset > child_length) {
- ArrowErrorSet(
- error,
- "[%ld] Expected union offset for child id %d to be between 0 and
%ld but "
- "found offset value %ld",
- (long)i, (int)child_id, (long)child_length, (long)offset);
+ ArrowErrorSet(error,
+ "[%" PRId64 "] Expected union offset for child id %"
PRId8
+ " to be between 0 and %" PRId64
+ " but "
+ "found offset value %" PRId64,
+ i, child_id, child_length, offset);
return EINVAL;
}
}
}
+ if (array_view->storage_type == NANOARROW_TYPE_RUN_END_ENCODED) {
+ struct ArrowArrayView* run_ends_view = array_view->children[0];
+ if (run_ends_view->length > 0) {
+ int64_t last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0);
+ for (int64_t i = 1; i < run_ends_view->length; i++) {
+ const int64_t run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, i);
+ if (run_end <= last_run_end) {
+ ArrowErrorSet(
+ error,
+ "Every run end must be strictly greater than the previous run
end, "
+ "but run_ends[%" PRId64 " is %" PRId64 " and run_ends[%" PRId64
+ "] is %" PRId64,
+ i, run_end, i - 1, last_run_end);
+ return EINVAL;
+ }
+ last_run_end = run_end;
+ }
+ }
+ }
+
// Recurse for children
for (int64_t i = 0; i < array_view->n_children; i++) {
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i],
error));
@@ -3249,6 +3586,136 @@ ArrowErrorCode ArrowArrayViewValidate(struct
ArrowArrayView* array_view,
ArrowErrorSet(error, "validation_level not recognized");
return EINVAL;
}
+
+struct ArrowComparisonInternalState {
+ enum ArrowCompareLevel level;
+ int is_equal;
+ struct ArrowError* reason;
+};
+
+NANOARROW_CHECK_PRINTF_ATTRIBUTE static void ArrowComparePrependPath(
+ struct ArrowError* out, const char* fmt, ...) {
+ if (out == NULL) {
+ return;
+ }
+
+ char prefix[128];
+ prefix[0] = '\0';
+ va_list args;
+ va_start(args, fmt);
+ int prefix_len = vsnprintf(prefix, sizeof(prefix), fmt, args);
+ va_end(args);
+
+ if (prefix_len <= 0) {
+ return;
+ }
+
+ size_t out_len = strlen(out->message);
+ size_t out_len_to_move = sizeof(struct ArrowError) - prefix_len - 1;
+ if (out_len_to_move > out_len) {
+ out_len_to_move = out_len;
+ }
+
+ memmove(out->message + prefix_len, out->message, out_len_to_move);
+ memcpy(out->message, prefix, prefix_len);
+ out->message[out_len + prefix_len] = '\0';
+}
+
+#define SET_NOT_EQUAL_AND_RETURN_IF_IMPL(cond_, state_, reason_) \
+ do { \
+ if (cond_) { \
+ ArrowErrorSet(state_->reason, ": %s", reason_); \
+ state_->is_equal = 0; \
+ return; \
+ } \
+ } while (0)
+
+#define SET_NOT_EQUAL_AND_RETURN_IF(condition_, state_) \
+ SET_NOT_EQUAL_AND_RETURN_IF_IMPL(condition_, state_, #condition_)
+
+static void ArrowArrayViewCompareBuffer(const struct ArrowArrayView* actual,
+ const struct ArrowArrayView* expected,
int i,
+ struct ArrowComparisonInternalState*
state) {
+ SET_NOT_EQUAL_AND_RETURN_IF(
+ actual->buffer_views[i].size_bytes !=
expected->buffer_views[i].size_bytes, state);
+
+ int64_t buffer_size = actual->buffer_views[i].size_bytes;
+ if (buffer_size > 0) {
+ SET_NOT_EQUAL_AND_RETURN_IF(
+ memcmp(actual->buffer_views[i].data.data,
expected->buffer_views[i].data.data,
+ buffer_size) != 0,
+ state);
+ }
+}
+
+static void ArrowArrayViewCompareIdentical(const struct ArrowArrayView* actual,
+ const struct ArrowArrayView*
expected,
+ struct
ArrowComparisonInternalState* state) {
+ SET_NOT_EQUAL_AND_RETURN_IF(actual->storage_type != expected->storage_type,
state);
+ SET_NOT_EQUAL_AND_RETURN_IF(actual->n_children != expected->n_children,
state);
+ SET_NOT_EQUAL_AND_RETURN_IF(actual->dictionary == NULL &&
expected->dictionary != NULL,
+ state);
+ SET_NOT_EQUAL_AND_RETURN_IF(actual->dictionary != NULL &&
expected->dictionary == NULL,
+ state);
+
+ SET_NOT_EQUAL_AND_RETURN_IF(actual->length != expected->length, state);
+ SET_NOT_EQUAL_AND_RETURN_IF(actual->offset != expected->offset, state);
+ SET_NOT_EQUAL_AND_RETURN_IF(actual->null_count != expected->null_count,
state);
+
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
+ ArrowArrayViewCompareBuffer(actual, expected, i, state);
+ if (!state->is_equal) {
+ ArrowComparePrependPath(state->reason, ".buffers[%d]", i);
+ return;
+ }
+ }
+
+ for (int64_t i = 0; i < actual->n_children; i++) {
+ ArrowArrayViewCompareIdentical(actual->children[i], expected->children[i],
state);
+ if (!state->is_equal) {
+ ArrowComparePrependPath(state->reason, ".children[%" PRId64 "]", i);
+ return;
+ }
+ }
+
+ if (actual->dictionary != NULL) {
+ ArrowArrayViewCompareIdentical(actual->dictionary, expected->dictionary,
state);
+ if (!state->is_equal) {
+ ArrowComparePrependPath(state->reason, ".dictionary");
+ return;
+ }
+ }
+}
+
+// Top-level entry point to take care of creating, cleaning up, and
+// propagating the ArrowComparisonInternalState to the caller
+ArrowErrorCode ArrowArrayViewCompare(const struct ArrowArrayView* actual,
+ const struct ArrowArrayView* expected,
+ enum ArrowCompareLevel level, int* out,
+ struct ArrowError* reason) {
+ struct ArrowComparisonInternalState state;
+ state.level = level;
+ state.is_equal = 1;
+ state.reason = reason;
+
+ switch (level) {
+ case NANOARROW_COMPARE_IDENTICAL:
+ ArrowArrayViewCompareIdentical(actual, expected, &state);
+ break;
+ default:
+ return EINVAL;
+ }
+
+ *out = state.is_equal;
+ if (!state.is_equal) {
+ ArrowComparePrependPath(state.reason, "root");
+ }
+
+ return NANOARROW_OK;
+}
+
+#undef SET_NOT_EQUAL_AND_RETURN_IF
+#undef SET_NOT_EQUAL_AND_RETURN_IF_IMPL
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
diff --git a/c/vendor/nanoarrow/nanoarrow.h b/c/vendor/nanoarrow/nanoarrow.h
index e845d0ad9..264aad5b6 100644
--- a/c/vendor/nanoarrow/nanoarrow.h
+++ b/c/vendor/nanoarrow/nanoarrow.h
@@ -19,9 +19,9 @@
#define NANOARROW_BUILD_ID_H_INCLUDED
#define NANOARROW_VERSION_MAJOR 0
-#define NANOARROW_VERSION_MINOR 5
+#define NANOARROW_VERSION_MINOR 6
#define NANOARROW_VERSION_PATCH 0
-#define NANOARROW_VERSION "0.5.0"
+#define NANOARROW_VERSION "0.6.0"
#define NANOARROW_VERSION_INT \
(NANOARROW_VERSION_MAJOR * 10000 + NANOARROW_VERSION_MINOR * 100 + \
@@ -181,14 +181,14 @@ struct ArrowArrayStream {
NANOARROW_RETURN_NOT_OK((x_ <= max_) ? NANOARROW_OK : EINVAL)
#if defined(NANOARROW_DEBUG)
-#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR,
EXPR_STR) \
- do {
\
- const int NAME = (EXPR);
\
- if (NAME) {
\
- ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d\n* %s:%d",
EXPR_STR, \
- NAME, __FILE__, __LINE__);
\
- return NAME;
\
- }
\
+#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR,
EXPR_STR) \
+ do {
\
+ const int NAME = (EXPR);
\
+ if (NAME) {
\
+ ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d(%s)\n* %s:%d",
EXPR_STR, \
+ NAME, strerror(NAME), __FILE__, __LINE__);
\
+ return NAME;
\
+ }
\
} while (0)
#else
#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR,
EXPR_STR) \
@@ -482,7 +482,10 @@ enum ArrowType {
NANOARROW_TYPE_LARGE_STRING,
NANOARROW_TYPE_LARGE_BINARY,
NANOARROW_TYPE_LARGE_LIST,
- NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
+ NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO,
+ NANOARROW_TYPE_RUN_END_ENCODED,
+ NANOARROW_TYPE_BINARY_VIEW,
+ NANOARROW_TYPE_STRING_VIEW
};
/// \brief Get a string value of an enum ArrowType value
@@ -569,6 +572,12 @@ static inline const char* ArrowTypeString(enum ArrowType
type) {
return "large_list";
case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
return "interval_month_day_nano";
+ case NANOARROW_TYPE_RUN_END_ENCODED:
+ return "run_end_encoded";
+ case NANOARROW_TYPE_BINARY_VIEW:
+ return "binary_view";
+ case NANOARROW_TYPE_STRING_VIEW:
+ return "string_view";
default:
return NULL;
}
@@ -605,6 +614,17 @@ enum ArrowValidationLevel {
NANOARROW_VALIDATION_LEVEL_FULL = 3
};
+/// \brief Comparison level enumerator
+/// \ingroup nanoarrow-utils
+enum ArrowCompareLevel {
+ /// \brief Consider arrays equal if buffers contain identical content
+ /// and have identical offset, null count, and length. Note that this is
+ /// a much stricter check than logical equality, which would take into
+ /// account potentially different content of null slots, arrays with a
+ /// non-zero offset, and other considerations.
+ NANOARROW_COMPARE_IDENTICAL,
+};
+
/// \brief Get a string value of an enum ArrowTimeUnit value
/// \ingroup nanoarrow-utils
///
@@ -634,15 +654,13 @@ enum ArrowBufferType {
NANOARROW_BUFFER_TYPE_TYPE_ID,
NANOARROW_BUFFER_TYPE_UNION_OFFSET,
NANOARROW_BUFFER_TYPE_DATA_OFFSET,
- NANOARROW_BUFFER_TYPE_DATA
+ NANOARROW_BUFFER_TYPE_DATA,
+ NANOARROW_BUFFER_TYPE_VARIADIC_DATA,
+ NANOARROW_BUFFER_TYPE_VARIADIC_SIZE
};
-/// \brief The maximum number of buffers in an ArrowArrayView or ArrowLayout
+/// \brief The maximum number of fixed buffers in an ArrowArrayView or
ArrowLayout
/// \ingroup nanoarrow-array-view
-///
-/// All currently supported types have 3 buffers or fewer; however, future
types
-/// may involve a variable number of buffers (e.g., string view). These buffers
-/// will be represented by separate members of the ArrowArrayView or
ArrowLayout.
#define NANOARROW_MAX_FIXED_BUFFERS 3
/// \brief An non-owning view of a string
@@ -689,6 +707,7 @@ union ArrowBufferViewData {
const double* as_double;
const float* as_float;
const char* as_char;
+ const union ArrowBinaryView* as_binary_view;
};
/// \brief An non-owning view of a buffer
@@ -826,6 +845,15 @@ struct ArrowArrayView {
/// type_id == union_type_id_map[128 + child_index]. This value may be
/// NULL in the case where child_id == type_id.
int8_t* union_type_id_map;
+
+ /// \brief Number of variadic buffers
+ int32_t n_variadic_buffers;
+
+ /// \brief Pointers to variadic buffers of binary/string_view arrays
+ const void** variadic_buffers;
+
+ /// \brief Size of each variadic buffer
+ int64_t* variadic_buffer_sizes;
};
// Used as the private data member for ArrowArrays allocated here and accessed
@@ -840,8 +868,8 @@ struct ArrowArrayPrivateData {
// The array of pointers to buffers. This must be updated after a sequence
// of appends to synchronize its values with the actual buffer addresses
- // (which may have ben reallocated uring that time)
- const void* buffer_data[NANOARROW_MAX_FIXED_BUFFERS];
+ // (which may have been reallocated during that time)
+ const void** buffer_data;
// The storage data type, or NANOARROW_TYPE_UNINITIALIZED if unknown
enum ArrowType storage_type;
@@ -853,6 +881,15 @@ struct ArrowArrayPrivateData {
// In the future this could be replaced with a type id<->child mapping
// to support constructing unions in append mode where type_id != child_index
int8_t union_type_id_is_child_index;
+
+ // Number of variadic buffers for binary view types
+ int32_t n_variadic_buffers;
+
+ // Variadic buffers for binary view types
+ struct ArrowBuffer* variadic_buffers;
+
+ // Size of each variadic buffer in bytes
+ int64_t* variadic_buffer_sizes;
};
/// \brief A representation of an interval.
@@ -911,7 +948,7 @@ static inline void ArrowDecimalInit(struct ArrowDecimal*
decimal, int32_t bitwid
memset(decimal->words, 0, sizeof(decimal->words));
decimal->precision = precision;
decimal->scale = scale;
- decimal->n_words = bitwidth / 8 / sizeof(uint64_t);
+ decimal->n_words = (int)(bitwidth / 8 / sizeof(uint64_t));
if (_ArrowIsLittleEndian()) {
decimal->low_word_index = 0;
@@ -1052,6 +1089,8 @@ static inline void ArrowDecimalSetBytes(struct
ArrowDecimal* decimal,
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeFixedSize)
#define ArrowSchemaSetTypeDecimal \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDecimal)
+#define ArrowSchemaSetTypeRunEndEncoded \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeRunEndEncoded)
#define ArrowSchemaSetTypeDateTime \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDateTime)
#define ArrowSchemaSetTypeUnion \
@@ -1118,6 +1157,7 @@ static inline void ArrowDecimalSetBytes(struct
ArrowDecimal* decimal,
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewSetArrayMinimal)
#define ArrowArrayViewValidate \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowArrayViewValidate)
+#define ArrowArrayViewCompare NANOARROW_SYMBOL(NANOARROW_NAMESPACE,
ArrowArrayViewCompare)
#define ArrowArrayViewReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE,
ArrowArrayViewReset)
#define ArrowBasicArrayStreamInit \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowBasicArrayStreamInit)
@@ -1281,6 +1321,12 @@ ArrowErrorCode ArrowDecimalSetDigits(struct
ArrowDecimal* decimal,
ArrowErrorCode ArrowDecimalAppendDigitsToBuffer(const struct ArrowDecimal*
decimal,
struct ArrowBuffer* buffer);
+/// \brief Get the half float value of a float
+static inline uint16_t ArrowFloatToHalfFloat(float value);
+
+/// \brief Get the float value of a half float
+static inline float ArrowHalfFloatToFloat(uint16_t value);
+
/// \brief Resolve a chunk index from increasing int64_t offsets
///
/// Given a buffer of increasing int64_t offsets that begin with 0 (e.g.,
offset buffer
@@ -1358,6 +1404,17 @@ ArrowErrorCode ArrowSchemaSetTypeDecimal(struct
ArrowSchema* schema, enum ArrowT
int32_t decimal_precision,
int32_t decimal_scale);
+/// \brief Set the format field of a run-end encoded schema
+///
+/// Returns EINVAL for run_end_type that is not
+/// NANOARROW_TYPE_INT16, NANOARROW_TYPE_INT32 or NANOARROW_TYPE_INT64.
+/// Schema must have been initialized using ArrowSchemaInit() or
ArrowSchemaDeepCopy().
+/// The caller must call `ArrowSchemaSetTypeXXX(schema->children[1])` to
+/// set the value type. Note that when building arrays using the
`ArrowArrayAppendXXX()`
+/// functions, the run-end encoded array's logical length must be updated
manually.
+ArrowErrorCode ArrowSchemaSetTypeRunEndEncoded(struct ArrowSchema* schema,
+ enum ArrowType run_end_type);
+
/// \brief Set the format field of a time, timestamp, or duration schema
///
/// Returns EINVAL for type that is not
@@ -2025,6 +2082,48 @@ ArrowErrorCode ArrowArrayViewSetArrayMinimal(struct
ArrowArrayView* array_view,
const struct ArrowArray* array,
struct ArrowError* error);
+/// \brief Get the number of buffers
+///
+/// The number of buffers referred to by this ArrowArrayView. In may cases
this can also
+/// be calculated from the ArrowLayout member of the ArrowArrayView or
ArrowSchemaView;
+/// however, for binary view and string view types, the number of total
buffers depends on
+/// the number of variadic buffers.
+static inline int64_t ArrowArrayViewGetNumBuffers(struct ArrowArrayView*
array_view);
+
+/// \brief Get a view of a specific buffer from an ArrowArrayView
+///
+/// This is the ArrowArrayView equivalent of ArrowArray::buffers[i] that
includes
+/// size information (if known).
+static inline struct ArrowBufferView ArrowArrayViewGetBufferView(
+ struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get the function of a specific buffer in an ArrowArrayView
+///
+/// In may cases this can also be obtained from the ArrowLayout member of the
+/// ArrowArrayView or ArrowSchemaView; however, for binary view and string
view types,
+/// the function of each buffer may be different between two arrays of the
same type
+/// depending on the number of variadic buffers.
+static inline enum ArrowBufferType ArrowArrayViewGetBufferType(
+ struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get the data type of a specific buffer in an ArrowArrayView
+///
+/// In may cases this can also be obtained from the ArrowLayout member of the
+/// ArrowArrayView or ArrowSchemaView; however, for binary view and string
view types,
+/// the data type of each buffer may be different between two arrays of the
same type
+/// depending on the number of variadic buffers.
+static inline enum ArrowType ArrowArrayViewGetBufferDataType(
+ struct ArrowArrayView* array_view, int64_t i);
+
+/// \brief Get the element size (in bits) of a specific buffer in an
ArrowArrayView
+///
+/// In may cases this can also be obtained from the ArrowLayout member of the
+/// ArrowArrayView or ArrowSchemaView; however, for binary view and string
view types,
+/// the element width of each buffer may be different between two arrays of
the same type
+/// depending on the number of variadic buffers.
+static inline int64_t ArrowArrayViewGetBufferElementSizeBits(
+ struct ArrowArrayView* array_view, int64_t i);
+
/// \brief Performs checks on the content of an ArrowArrayView
///
/// If using ArrowArrayViewSetArray() to back array_view with an ArrowArray,
@@ -2037,6 +2136,19 @@ ArrowErrorCode ArrowArrayViewValidate(struct
ArrowArrayView* array_view,
enum ArrowValidationLevel
validation_level,
struct ArrowError* error);
+/// \brief Compare two ArrowArrayView objects for equality
+///
+/// Given two ArrowArrayView instances, place either 0 (not equal) and
+/// 1 (equal) at the address pointed to by out. If the comparison determines
+/// that actual and expected are not equal, a reason will be communicated via
+/// error if error is non-NULL.
+///
+/// Returns NANOARROW_OK if the comparison completed successfully.
+ArrowErrorCode ArrowArrayViewCompare(const struct ArrowArrayView* actual,
+ const struct ArrowArrayView* expected,
+ enum ArrowCompareLevel level, int* out,
+ struct ArrowError* reason);
+
/// \brief Reset the contents of an ArrowArrayView and frees resources
void ArrowArrayViewReset(struct ArrowArrayView* array_view);
@@ -2044,6 +2156,10 @@ void ArrowArrayViewReset(struct ArrowArrayView*
array_view);
static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView*
array_view,
int64_t i);
+/// \brief Compute null count for an ArrowArrayView
+static inline int64_t ArrowArrayViewComputeNullCount(
+ const struct ArrowArrayView* array_view);
+
/// \brief Get the type id of a union array element
static inline int8_t ArrowArrayViewUnionTypeId(const struct ArrowArrayView*
array_view,
int64_t i);
@@ -2233,6 +2349,57 @@ static inline int64_t _ArrowGrowByFactor(int64_t
current_capacity, int64_t new_c
}
}
+// float to half float conversion, adapted from Arrow Go
+// https://github.com/apache/arrow/blob/main/go/arrow/float16/float16.go
+static inline uint16_t ArrowFloatToHalfFloat(float value) {
+ union {
+ float f;
+ uint32_t b;
+ } u;
+ u.f = value;
+
+ uint16_t sn = (uint16_t)((u.b >> 31) & 0x1);
+ uint16_t exp = (u.b >> 23) & 0xff;
+ int16_t res = (int16_t)(exp - 127 + 15);
+ uint16_t fc = (uint16_t)(u.b >> 13) & 0x3ff;
+
+ if (exp == 0) {
+ res = 0;
+ } else if (exp == 0xff) {
+ res = 0x1f;
+ } else if (res > 0x1e) {
+ res = 0x1f;
+ fc = 0;
+ } else if (res < 0x01) {
+ res = 0;
+ fc = 0;
+ }
+
+ return (uint16_t)((sn << 15) | (uint16_t)(res << 10) | fc);
+}
+
+// half float to float conversion, adapted from Arrow Go
+// https://github.com/apache/arrow/blob/main/go/arrow/float16/float16.go
+static inline float ArrowHalfFloatToFloat(uint16_t value) {
+ uint32_t sn = (uint32_t)((value >> 15) & 0x1);
+ uint32_t exp = (value >> 10) & 0x1f;
+ uint32_t res = exp + 127 - 15;
+ uint32_t fc = value & 0x3ff;
+
+ if (exp == 0) {
+ res = 0;
+ } else if (exp == 0x1f) {
+ res = 0xff;
+ }
+
+ union {
+ float f;
+ uint32_t b;
+ } u;
+ u.b = (uint32_t)(sn << 31) | (uint32_t)(res << 23) | (uint32_t)(fc << 13);
+ return u.f;
+}
+
static inline void ArrowBufferInit(struct ArrowBuffer* buffer) {
buffer->data = NULL;
buffer->size_bytes = 0;
@@ -2316,6 +2483,7 @@ static inline ArrowErrorCode ArrowBufferReserve(struct
ArrowBuffer* buffer,
static inline void ArrowBufferAppendUnsafe(struct ArrowBuffer* buffer, const
void* data,
int64_t size_bytes) {
if (size_bytes > 0) {
+ NANOARROW_DCHECK(buffer->data != NULL);
memcpy(buffer->data + buffer->size_bytes, data, size_bytes);
buffer->size_bytes += size_bytes;
}
@@ -2391,10 +2559,16 @@ static inline ArrowErrorCode
ArrowBufferAppendBufferView(struct ArrowBuffer* buf
static inline ArrowErrorCode ArrowBufferAppendFill(struct ArrowBuffer* buffer,
uint8_t value, int64_t
size_bytes) {
+ if (size_bytes == 0) {
+ return NANOARROW_OK;
+ }
+
NANOARROW_RETURN_NOT_OK(ArrowBufferReserve(buffer, size_bytes));
+ NANOARROW_DCHECK(buffer->data != NULL); // To help clang-tidy
memset(buffer->data + buffer->size_bytes, value, size_bytes);
buffer->size_bytes += size_bytes;
+
return NANOARROW_OK;
}
@@ -2511,6 +2685,8 @@ static inline void ArrowBitsUnpackInt32(const uint8_t*
bits, int64_t start_offse
return;
}
+ NANOARROW_DCHECK(bits != NULL && out != NULL);
+
const int64_t i_begin = start_offset;
const int64_t i_end = start_offset + length;
const int64_t i_last_valid = i_end - 1;
@@ -2553,12 +2729,18 @@ static inline void ArrowBitClear(uint8_t* bits, int64_t
i) {
}
static inline void ArrowBitSetTo(uint8_t* bits, int64_t i, uint8_t bit_is_set)
{
- bits[i / 8] ^=
- ((uint8_t)(-((uint8_t)(bit_is_set != 0)) ^ bits[i / 8])) &
_ArrowkBitmask[i % 8];
+ bits[i / 8] ^= (uint8_t)(((uint8_t)(-((uint8_t)(bit_is_set != 0)) ^ bits[i /
8])) &
+ _ArrowkBitmask[i % 8]);
}
static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t
length,
uint8_t bits_are_set) {
+ if (length == 0) {
+ return;
+ }
+
+ NANOARROW_DCHECK(bits != NULL);
+
const int64_t i_begin = start_offset;
const int64_t i_end = start_offset + length;
const uint8_t fill_byte = (uint8_t)(-bits_are_set);
@@ -2602,6 +2784,8 @@ static inline int64_t ArrowBitCountSet(const uint8_t*
bits, int64_t start_offset
return 0;
}
+ NANOARROW_DCHECK(bits != NULL);
+
const int64_t i_begin = start_offset;
const int64_t i_end = start_offset + length;
const int64_t i_last_valid = i_end - 1;
@@ -3095,6 +3279,8 @@ static inline ArrowErrorCode
_ArrowArrayAppendEmptyInternal(struct ArrowArray* a
switch (private_data->layout.buffer_type[i]) {
case NANOARROW_BUFFER_TYPE_NONE:
+ case NANOARROW_BUFFER_TYPE_VARIADIC_DATA:
+ case NANOARROW_BUFFER_TYPE_VARIADIC_SIZE:
case NANOARROW_BUFFER_TYPE_VALIDITY:
continue;
case NANOARROW_BUFFER_TYPE_DATA_OFFSET:
@@ -3173,6 +3359,10 @@ static inline ArrowErrorCode ArrowArrayAppendInt(struct
ArrowArray* array,
case NANOARROW_TYPE_FLOAT:
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer,
(float)value));
break;
+ case NANOARROW_TYPE_HALF_FLOAT:
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendUInt16(data_buffer,
ArrowFloatToHalfFloat((float)value)));
+ break;
case NANOARROW_TYPE_BOOL:
NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
break;
@@ -3223,6 +3413,10 @@ static inline ArrowErrorCode ArrowArrayAppendUInt(struct
ArrowArray* array,
case NANOARROW_TYPE_FLOAT:
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer,
(float)value));
break;
+ case NANOARROW_TYPE_HALF_FLOAT:
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendUInt16(data_buffer,
ArrowFloatToHalfFloat((float)value)));
+ break;
case NANOARROW_TYPE_BOOL:
NANOARROW_RETURN_NOT_OK(_ArrowArrayAppendBits(array, 1, value != 0, 1));
break;
@@ -3252,6 +3446,10 @@ static inline ArrowErrorCode
ArrowArrayAppendDouble(struct ArrowArray* array,
case NANOARROW_TYPE_FLOAT:
NANOARROW_RETURN_NOT_OK(ArrowBufferAppendFloat(data_buffer,
(float)value));
break;
+ case NANOARROW_TYPE_HALF_FLOAT:
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppendUInt16(data_buffer,
ArrowFloatToHalfFloat((float)value)));
+ break;
default:
return EINVAL;
}
@@ -3264,52 +3462,151 @@ static inline ArrowErrorCode
ArrowArrayAppendDouble(struct ArrowArray* array,
return NANOARROW_OK;
}
+// Binary views only have two fixed buffers, but be aware that they must also
+// always have more 1 buffer to store variadic buffer sizes (even if there are
none)
+#define NANOARROW_BINARY_VIEW_FIXED_BUFFERS 2
+#define NANOARROW_BINARY_VIEW_INLINE_SIZE 12
+#define NANOARROW_BINARY_VIEW_PREFIX_SIZE 4
+#define NANOARROW_BINARY_VIEW_BLOCK_SIZE (32 << 10) // 32KB
+
+// The Arrow C++ implementation uses anonymous structs as members
+// of the ArrowBinaryView. For Cython support in this library, we define
+// those structs outside of the ArrowBinaryView
+struct ArrowBinaryViewInlined {
+ int32_t size;
+ uint8_t data[NANOARROW_BINARY_VIEW_INLINE_SIZE];
+};
+
+struct ArrowBinaryViewRef {
+ int32_t size;
+ uint8_t prefix[NANOARROW_BINARY_VIEW_PREFIX_SIZE];
+ int32_t buffer_index;
+ int32_t offset;
+};
+
+union ArrowBinaryView {
+ struct ArrowBinaryViewInlined inlined;
+ struct ArrowBinaryViewRef ref;
+ int64_t alignment_dummy;
+};
+
+static inline int32_t ArrowArrayVariadicBufferCount(struct ArrowArray* array) {
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ return private_data->n_variadic_buffers;
+}
+
+static inline ArrowErrorCode ArrowArrayAddVariadicBuffers(struct ArrowArray*
array,
+ int32_t nbuffers) {
+ const int32_t n_current_bufs = ArrowArrayVariadicBufferCount(array);
+ const int32_t nvariadic_bufs_needed = n_current_bufs + nbuffers;
+
+ struct ArrowArrayPrivateData* private_data =
+ (struct ArrowArrayPrivateData*)array->private_data;
+
+ private_data->variadic_buffers = (struct ArrowBuffer*)ArrowRealloc(
+ private_data->variadic_buffers, sizeof(struct ArrowBuffer) *
nvariadic_bufs_needed);
+ if (private_data->variadic_buffers == NULL) {
+ return ENOMEM;
+ }
+ private_data->variadic_buffer_sizes = (int64_t*)ArrowRealloc(
+ private_data->variadic_buffer_sizes, sizeof(int64_t) *
nvariadic_bufs_needed);
+ if (private_data->variadic_buffer_sizes == NULL) {
+ return ENOMEM;
+ }
+
+ for (int32_t i = n_current_bufs; i < nvariadic_bufs_needed; i++) {
+ ArrowBufferInit(&private_data->variadic_buffers[i]);
+ private_data->variadic_buffer_sizes[i] = 0;
+ }
+ private_data->n_variadic_buffers = nvariadic_bufs_needed;
+ array->n_buffers = NANOARROW_BINARY_VIEW_FIXED_BUFFERS + 1 +
nvariadic_bufs_needed;
+
+ return NANOARROW_OK;
+}
+
static inline ArrowErrorCode ArrowArrayAppendBytes(struct ArrowArray* array,
struct ArrowBufferView
value) {
struct ArrowArrayPrivateData* private_data =
(struct ArrowArrayPrivateData*)array->private_data;
- struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1);
- struct ArrowBuffer* data_buffer = ArrowArrayBuffer(
- array, 1 + (private_data->storage_type !=
NANOARROW_TYPE_FIXED_SIZE_BINARY));
- int32_t offset;
- int64_t large_offset;
- int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8;
+ if (private_data->storage_type == NANOARROW_TYPE_STRING_VIEW ||
+ private_data->storage_type == NANOARROW_TYPE_BINARY_VIEW) {
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(array, 1);
+ union ArrowBinaryView bvt;
+ bvt.inlined.size = (int32_t)value.size_bytes;
- switch (private_data->storage_type) {
- case NANOARROW_TYPE_STRING:
- case NANOARROW_TYPE_BINARY:
- offset = ((int32_t*)offset_buffer->data)[array->length];
- if ((((int64_t)offset) + value.size_bytes) > INT32_MAX) {
- return EOVERFLOW;
+ if (value.size_bytes <= NANOARROW_BINARY_VIEW_INLINE_SIZE) {
+ memcpy(bvt.inlined.data, value.data.as_char, value.size_bytes);
+ memset(bvt.inlined.data + bvt.inlined.size, 0,
+ NANOARROW_BINARY_VIEW_INLINE_SIZE - bvt.inlined.size);
+ } else {
+ int32_t current_n_vbufs = ArrowArrayVariadicBufferCount(array);
+ if (current_n_vbufs == 0 ||
+ private_data->variadic_buffers[current_n_vbufs - 1].size_bytes +
+ value.size_bytes >
+ NANOARROW_BINARY_VIEW_BLOCK_SIZE) {
+ const int32_t additional_bufs_needed = 1;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayAddVariadicBuffers(array, additional_bufs_needed));
+ current_n_vbufs += additional_bufs_needed;
}
- offset += (int32_t)value.size_bytes;
- NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(offset_buffer, &offset,
sizeof(int32_t)));
+ const int32_t buf_index = current_n_vbufs - 1;
+ struct ArrowBuffer* variadic_buf =
&private_data->variadic_buffers[buf_index];
+ memcpy(bvt.ref.prefix, value.data.as_char,
NANOARROW_BINARY_VIEW_PREFIX_SIZE);
+ bvt.ref.buffer_index = (int32_t)buf_index;
+ bvt.ref.offset = (int32_t)variadic_buf->size_bytes;
NANOARROW_RETURN_NOT_OK(
- ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes));
- break;
+ ArrowBufferAppend(variadic_buf, value.data.as_char,
value.size_bytes));
+ private_data->variadic_buffer_sizes[buf_index] =
variadic_buf->size_bytes;
+ }
+ NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_buffer, &bvt, sizeof(bvt)));
+ } else {
+ struct ArrowBuffer* offset_buffer = ArrowArrayBuffer(array, 1);
+ struct ArrowBuffer* data_buffer = ArrowArrayBuffer(
+ array, 1 + (private_data->storage_type !=
NANOARROW_TYPE_FIXED_SIZE_BINARY));
+ int32_t offset;
+ int64_t large_offset;
+ int64_t fixed_size_bytes = private_data->layout.element_size_bits[1] / 8;
+
+ switch (private_data->storage_type) {
+ case NANOARROW_TYPE_STRING:
+ case NANOARROW_TYPE_BINARY:
+ offset = ((int32_t*)offset_buffer->data)[array->length];
+ if ((((int64_t)offset) + value.size_bytes) > INT32_MAX) {
+ return EOVERFLOW;
+ }
- case NANOARROW_TYPE_LARGE_STRING:
- case NANOARROW_TYPE_LARGE_BINARY:
- large_offset = ((int64_t*)offset_buffer->data)[array->length];
- large_offset += value.size_bytes;
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferAppend(offset_buffer, &large_offset, sizeof(int64_t)));
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes));
- break;
+ offset += (int32_t)value.size_bytes;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(offset_buffer, &offset, sizeof(int32_t)));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes));
+ break;
- case NANOARROW_TYPE_FIXED_SIZE_BINARY:
- if (value.size_bytes != fixed_size_bytes) {
- return EINVAL;
- }
+ case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_LARGE_BINARY:
+ large_offset = ((int64_t*)offset_buffer->data)[array->length];
+ large_offset += value.size_bytes;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(offset_buffer, &large_offset, sizeof(int64_t)));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes));
+ break;
- NANOARROW_RETURN_NOT_OK(
- ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes));
- break;
- default:
- return EINVAL;
+ case NANOARROW_TYPE_FIXED_SIZE_BINARY:
+ if (value.size_bytes != fixed_size_bytes) {
+ return EINVAL;
+ }
+
+ NANOARROW_RETURN_NOT_OK(
+ ArrowBufferAppend(data_buffer, value.data.data, value.size_bytes));
+ break;
+ default:
+ return EINVAL;
+ }
}
if (private_data->bitmap.buffer.data != NULL) {
@@ -3332,8 +3629,10 @@ static inline ArrowErrorCode
ArrowArrayAppendString(struct ArrowArray* array,
switch (private_data->storage_type) {
case NANOARROW_TYPE_STRING:
case NANOARROW_TYPE_LARGE_STRING:
+ case NANOARROW_TYPE_STRING_VIEW:
case NANOARROW_TYPE_BINARY:
case NANOARROW_TYPE_LARGE_BINARY:
+ case NANOARROW_TYPE_BINARY_VIEW:
return ArrowArrayAppendBytes(array, buffer_view);
default:
return EINVAL;
@@ -3520,6 +3819,132 @@ static inline void ArrowArrayViewMove(struct
ArrowArrayView* src,
ArrowArrayViewInitFromType(src, NANOARROW_TYPE_UNINITIALIZED);
}
+static inline int64_t ArrowArrayViewGetNumBuffers(struct ArrowArrayView*
array_view) {
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_BINARY_VIEW:
+ case NANOARROW_TYPE_STRING_VIEW:
+ return NANOARROW_BINARY_VIEW_FIXED_BUFFERS +
array_view->n_variadic_buffers + 1;
+ default:
+ break;
+ }
+
+ int64_t n_buffers = 0;
+ for (int i = 0; i < NANOARROW_MAX_FIXED_BUFFERS; i++) {
+ if (array_view->layout.buffer_type[i] == NANOARROW_BUFFER_TYPE_NONE) {
+ break;
+ }
+
+ n_buffers++;
+ }
+
+ return n_buffers;
+}
+
+static inline struct ArrowBufferView ArrowArrayViewGetBufferView(
+ struct ArrowArrayView* array_view, int64_t i) {
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_BINARY_VIEW:
+ case NANOARROW_TYPE_STRING_VIEW:
+ if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) {
+ return array_view->buffer_views[i];
+ } else if (i >=
+ (array_view->n_variadic_buffers +
NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) {
+ struct ArrowBufferView view;
+ view.data.as_int64 = array_view->variadic_buffer_sizes;
+ view.size_bytes = array_view->n_variadic_buffers * sizeof(double);
+ return view;
+ } else {
+ struct ArrowBufferView view;
+ view.data.data =
+ array_view->variadic_buffers[i -
NANOARROW_BINARY_VIEW_FIXED_BUFFERS];
+ view.size_bytes =
+ array_view->variadic_buffer_sizes[i -
NANOARROW_BINARY_VIEW_FIXED_BUFFERS];
+ return view;
+ }
+ default:
+ // We need this check to avoid -Warray-bounds from complaining
+ if (i >= NANOARROW_MAX_FIXED_BUFFERS) {
+ struct ArrowBufferView view;
+ view.data.data = NULL;
+ view.size_bytes = 0;
+ return view;
+ } else {
+ return array_view->buffer_views[i];
+ }
+ }
+}
+
+enum ArrowBufferType ArrowArrayViewGetBufferType(struct ArrowArrayView*
array_view,
+ int64_t i) {
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_BINARY_VIEW:
+ case NANOARROW_TYPE_STRING_VIEW:
+ if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) {
+ return array_view->layout.buffer_type[i];
+ } else if (i ==
+ (array_view->n_variadic_buffers +
NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) {
+ return NANOARROW_BUFFER_TYPE_VARIADIC_SIZE;
+ } else {
+ return NANOARROW_BUFFER_TYPE_VARIADIC_DATA;
+ }
+ default:
+ // We need this check to avoid -Warray-bounds from complaining
+ if (i >= NANOARROW_MAX_FIXED_BUFFERS) {
+ return NANOARROW_BUFFER_TYPE_NONE;
+ } else {
+ return array_view->layout.buffer_type[i];
+ }
+ }
+}
+
+static inline enum ArrowType ArrowArrayViewGetBufferDataType(
+ struct ArrowArrayView* array_view, int64_t i) {
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_BINARY_VIEW:
+ case NANOARROW_TYPE_STRING_VIEW:
+ if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) {
+ return array_view->layout.buffer_data_type[i];
+ } else if (i >=
+ (array_view->n_variadic_buffers +
NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) {
+ return NANOARROW_TYPE_INT64;
+ } else if (array_view->storage_type == NANOARROW_TYPE_BINARY_VIEW) {
+ return NANOARROW_TYPE_BINARY;
+ } else {
+ return NANOARROW_TYPE_STRING;
+ }
+ default:
+ // We need this check to avoid -Warray-bounds from complaining
+ if (i >= NANOARROW_MAX_FIXED_BUFFERS) {
+ return NANOARROW_TYPE_UNINITIALIZED;
+ } else {
+ return array_view->layout.buffer_data_type[i];
+ }
+ }
+}
+
+static inline int64_t ArrowArrayViewGetBufferElementSizeBits(
+ struct ArrowArrayView* array_view, int64_t i) {
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_BINARY_VIEW:
+ case NANOARROW_TYPE_STRING_VIEW:
+ if (i < NANOARROW_BINARY_VIEW_FIXED_BUFFERS) {
+ return array_view->layout.element_size_bits[i];
+ } else if (i >=
+ (array_view->n_variadic_buffers +
NANOARROW_BINARY_VIEW_FIXED_BUFFERS)) {
+ return sizeof(int64_t) * 8;
+ } else {
+ return 0;
+ }
+ default:
+ // We need this check to avoid -Warray-bounds from complaining
+ if (i >= NANOARROW_MAX_FIXED_BUFFERS) {
+ return 0;
+ } else {
+ return array_view->layout.element_size_bits[i];
+ }
+ }
+}
+
static inline int8_t ArrowArrayViewIsNull(const struct ArrowArrayView*
array_view,
int64_t i) {
const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8;
@@ -3536,12 +3961,37 @@ static inline int8_t ArrowArrayViewIsNull(const struct
ArrowArrayView* array_vie
}
}
+static inline int64_t ArrowArrayViewComputeNullCount(
+ const struct ArrowArrayView* array_view) {
+ if (array_view->length == 0) {
+ return 0;
+ }
+
+ switch (array_view->storage_type) {
+ case NANOARROW_TYPE_NA:
+ return array_view->length;
+ case NANOARROW_TYPE_DENSE_UNION:
+ case NANOARROW_TYPE_SPARSE_UNION:
+ // Unions are "never null" in Arrow land
+ return 0;
+ default:
+ break;
+ }
+
+ const uint8_t* validity_buffer = array_view->buffer_views[0].data.as_uint8;
+ if (validity_buffer == NULL) {
+ return 0;
+ }
+ return array_view->length -
+ ArrowBitCountSet(validity_buffer, array_view->offset,
array_view->length);
+}
+
static inline int8_t ArrowArrayViewUnionTypeId(const struct ArrowArrayView*
array_view,
int64_t i) {
switch (array_view->storage_type) {
case NANOARROW_TYPE_DENSE_UNION:
case NANOARROW_TYPE_SPARSE_UNION:
- return array_view->buffer_views[0].data.as_int8[i];
+ return array_view->buffer_views[0].data.as_int8[array_view->offset + i];
default:
return -1;
}
@@ -3561,9 +4011,9 @@ static inline int64_t ArrowArrayViewUnionChildOffset(
const struct ArrowArrayView* array_view, int64_t i) {
switch (array_view->storage_type) {
case NANOARROW_TYPE_DENSE_UNION:
- return array_view->buffer_views[1].data.as_int32[i];
+ return array_view->buffer_views[1].data.as_int32[array_view->offset + i];
case NANOARROW_TYPE_SPARSE_UNION:
- return i;
+ return array_view->offset + i;
default:
return -1;
}
@@ -3581,6 +4031,20 @@ static inline int64_t ArrowArrayViewListChildOffset(
}
}
+static struct ArrowBufferView ArrowArrayViewGetBytesFromViewArrayUnsafe(
+ const struct ArrowArrayView* array_view, int64_t i) {
+ const union ArrowBinaryView* bv =
&array_view->buffer_views[1].data.as_binary_view[i];
+ struct ArrowBufferView out = {{NULL}, bv->inlined.size};
+ if (bv->inlined.size <= NANOARROW_BINARY_VIEW_INLINE_SIZE) {
+ out.data.as_uint8 = bv->inlined.data;
+ return out;
+ }
+
+ out.data.data = array_view->variadic_buffers[bv->ref.buffer_index];
+ out.data.as_uint8 += bv->ref.offset;
+ return out;
+}
+
static inline int64_t ArrowArrayViewGetIntUnsafe(const struct ArrowArrayView*
array_view,
int64_t i) {
const struct ArrowBufferView* data_view = &array_view->buffer_views[1];
@@ -3607,6 +4071,8 @@ static inline int64_t ArrowArrayViewGetIntUnsafe(const
struct ArrowArrayView* ar
return (int64_t)data_view->data.as_double[i];
case NANOARROW_TYPE_FLOAT:
return (int64_t)data_view->data.as_float[i];
+ case NANOARROW_TYPE_HALF_FLOAT:
+ return (int64_t)ArrowHalfFloatToFloat(data_view->data.as_uint16[i]);
case NANOARROW_TYPE_BOOL:
return ArrowBitGet(data_view->data.as_uint8, i);
default:
@@ -3640,6 +4106,8 @@ static inline uint64_t ArrowArrayViewGetUIntUnsafe(
return (uint64_t)data_view->data.as_double[i];
case NANOARROW_TYPE_FLOAT:
return (uint64_t)data_view->data.as_float[i];
+ case NANOARROW_TYPE_HALF_FLOAT:
+ return (uint64_t)ArrowHalfFloatToFloat(data_view->data.as_uint16[i]);
case NANOARROW_TYPE_BOOL:
return ArrowBitGet(data_view->data.as_uint8, i);
default:
@@ -3672,6 +4140,8 @@ static inline double ArrowArrayViewGetDoubleUnsafe(
return data_view->data.as_double[i];
case NANOARROW_TYPE_FLOAT:
return data_view->data.as_float[i];
+ case NANOARROW_TYPE_HALF_FLOAT:
+ return ArrowHalfFloatToFloat(data_view->data.as_uint16[i]);
case NANOARROW_TYPE_BOOL:
return ArrowBitGet(data_view->data.as_uint8, i);
default:
@@ -3703,6 +4173,14 @@ static inline struct ArrowStringView
ArrowArrayViewGetStringUnsafe(
view.size_bytes = array_view->layout.element_size_bits[1] / 8;
view.data = array_view->buffer_views[1].data.as_char + (i *
view.size_bytes);
break;
+ case NANOARROW_TYPE_STRING_VIEW:
+ case NANOARROW_TYPE_BINARY_VIEW: {
+ struct ArrowBufferView buf_view =
+ ArrowArrayViewGetBytesFromViewArrayUnsafe(array_view, i);
+ view.data = buf_view.data.as_char;
+ view.size_bytes = buf_view.size_bytes;
+ break;
+ }
default:
view.data = NULL;
view.size_bytes = 0;
@@ -3737,6 +4215,10 @@ static inline struct ArrowBufferView
ArrowArrayViewGetBytesUnsafe(
view.data.as_uint8 =
array_view->buffer_views[1].data.as_uint8 + (i * view.size_bytes);
break;
+ case NANOARROW_TYPE_STRING_VIEW:
+ case NANOARROW_TYPE_BINARY_VIEW:
+ view = ArrowArrayViewGetBytesFromViewArrayUnsafe(array_view, i);
+ break;
default:
view.data.data = NULL;
view.size_bytes = 0;
diff --git a/c/vendor/nanoarrow/nanoarrow.hpp b/c/vendor/nanoarrow/nanoarrow.hpp
index 49ba38f0d..16c2e55b9 100644
--- a/c/vendor/nanoarrow/nanoarrow.hpp
+++ b/c/vendor/nanoarrow/nanoarrow.hpp
@@ -15,11 +15,12 @@
// specific language governing permissions and limitations
// under the License.
+#include <cstring>
#include <exception>
#include <string>
#include <vector>
-#include "nanoarrow/nanoarrow.h"
+#include "nanoarrow.h"
#ifndef NANOARROW_HPP_INCLUDED
#define NANOARROW_HPP_INCLUDED
@@ -216,10 +217,16 @@ template <typename T>
class Unique {
public:
/// \brief Construct an invalid instance of T holding no resources
- Unique() { init_pointer(&data_); }
+ Unique() {
+ std::memset(&data_, 0, sizeof(data_));
+ init_pointer(&data_);
+ }
/// \brief Move and take ownership of data
- Unique(T* data) { move_pointer(data, &data_); }
+ Unique(T* data) {
+ std::memset(&data_, 0, sizeof(data_));
+ move_pointer(data, &data_);
+ }
/// \brief Move and take ownership of data wrapped by rhs
Unique(Unique&& rhs) : Unique(rhs.get()) {}
diff --git a/c/vendor/vendor_nanoarrow.sh b/c/vendor/vendor_nanoarrow.sh
index f74ebdeda..9024090fe 100755
--- a/c/vendor/vendor_nanoarrow.sh
+++ b/c/vendor/vendor_nanoarrow.sh
@@ -21,7 +21,7 @@
main() {
local -r repo_url="https://github.com/apache/arrow-nanoarrow"
# Check releases page: https://github.com/apache/arrow-nanoarrow/releases/
- local -r commit_sha=c5fb10035c17b598e6fd688ad9eb7b874c7c631b
+ local -r commit_sha=33d2c8b973d8f8f424e02ac92ddeaace2a92f8dd
echo "Fetching $commit_sha from $repo_url"
SCRATCH=$(mktemp -d)
@@ -34,21 +34,13 @@ main() {
mkdir -p nanoarrow
tar --strip-components 1 -C "$SCRATCH" -xf "$tarball"
- # Build the bundle using cmake. We could also use the dist/ files
- # but this allows us to add the symbol namespace and ensures that the
- # resulting bundle is perfectly synchronized with the commit we've pulled.
- pushd "$SCRATCH"
- mkdir build && cd build
- # Do not use "adbc" in the namespace name since our scripts expose all
- # such symbols
- cmake .. -DNANOARROW_BUNDLE=ON -DNANOARROW_NAMESPACE=Private
- cmake --build .
- cmake --install . --prefix=../dist-adbc
- popd
+ # Build the bundle
+ python "$SCRATCH/ci/scripts/bundle.py" \
+ --symbol-namespace=Private \
+ --include-output-dir=nanoarrow \
+ --source-output-dir=nanoarrow \
+ --header-namespace=
- cp "$SCRATCH/dist-adbc/nanoarrow.c" nanoarrow/
- cp "$SCRATCH/dist-adbc/nanoarrow.h" nanoarrow/
- cp "$SCRATCH/dist-adbc/nanoarrow.hpp" nanoarrow/
mv CMakeLists.nanoarrow.tmp nanoarrow/CMakeLists.txt
}