This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 7d4569d ipc: Update dist/ for commit
b617f44644ee91210402c0181560af6f9a579c50
7d4569d is described below
commit 7d4569dc4d212cf6c09e7b481fecd9d2347b03a1
Author: GitHub Actions <[email protected]>
AuthorDate: Thu Feb 23 13:35:59 2023 +0000
ipc: Update dist/ for commit b617f44644ee91210402c0181560af6f9a579c50
---
dist/nanoarrow_ipc.c | 454 +++++++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 443 insertions(+), 11 deletions(-)
diff --git a/dist/nanoarrow_ipc.c b/dist/nanoarrow_ipc.c
index 7cf4909..7300657 100644
--- a/dist/nanoarrow_ipc.c
+++ b/dist/nanoarrow_ipc.c
@@ -20287,6 +20287,7 @@ static inline int
org_apache_arrow_flatbuf_Tensor_verify_as_root_with_type_hash(
// under the License.
#include <errno.h>
+#include <stdio.h>
#include <string.h>
#include "nanoarrow.h"
@@ -20338,6 +20339,59 @@ static inline int32_t ArrowIpcReadInt32LE(struct
ArrowBufferView* data) {
#define ns(x) FLATBUFFERS_WRAP_NAMESPACE(org_apache_arrow_flatbuf, x)
+static int ArrowIpcReaderSetMetadata(struct ArrowSchema* schema,
+ ns(KeyValue_vec_t) kv_vec,
+ struct ArrowError* error) {
+ int64_t n_pairs = ns(KeyValue_vec_len(kv_vec));
+ if (n_pairs == 0) {
+ return NANOARROW_OK;
+ }
+
+ if (n_pairs > 2147483647) {
+ ArrowErrorSet(error,
+ "Expected between 0 and 2147483647 key/value pairs but found
%ld",
+ (long)n_pairs);
+ return EINVAL;
+ }
+
+ struct ArrowBuffer buf;
+ struct ArrowStringView key;
+ struct ArrowStringView value;
+ ns(KeyValue_table_t) kv;
+
+ int result = ArrowMetadataBuilderInit(&buf, NULL);
+ if (result != NANOARROW_OK) {
+ ArrowBufferReset(&buf);
+ ArrowErrorSet(error, "ArrowMetadataBuilderInit() failed");
+ return result;
+ }
+
+ for (int64_t i = 0; i < n_pairs; i++) {
+ kv = ns(KeyValue_vec_at(kv_vec, i));
+
+ key.data = ns(KeyValue_key(kv));
+ key.size_bytes = strlen(key.data);
+ value.data = ns(KeyValue_value(kv));
+ value.size_bytes = strlen(value.data);
+
+ result = ArrowMetadataBuilderAppend(&buf, key, value);
+ if (result != NANOARROW_OK) {
+ ArrowBufferReset(&buf);
+ ArrowErrorSet(error, "ArrowMetadataBuilderAppend() failed");
+ return result;
+ }
+ }
+
+ result = ArrowSchemaSetMetadata(schema, (const char*)buf.data);
+ ArrowBufferReset(&buf);
+ if (result != NANOARROW_OK) {
+ ArrowErrorSet(error, "ArrowSchemaSetMetadata() failed");
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
static int ArrowIpcReaderSetTypeSimple(struct ArrowSchema* schema, int
nanoarrow_type,
struct ArrowError* error) {
int result = ArrowSchemaSetType(schema, nanoarrow_type);
@@ -20404,8 +20458,371 @@ static int ArrowIpcReaderSetTypeInt(struct
ArrowSchema* schema,
return ArrowIpcReaderSetTypeSimple(schema, nanoarrow_type, error);
}
+static int ArrowIpcReaderSetTypeFloatingPoint(struct ArrowSchema* schema,
+ flatbuffers_generic_t
type_generic,
+ struct ArrowError* error) {
+ ns(FloatingPoint_table_t) type = (ns(FloatingPoint_table_t))type_generic;
+ int precision = ns(FloatingPoint_precision(type));
+ switch (precision) {
+ case ns(Precision_HALF):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_HALF_FLOAT,
error);
+ case ns(Precision_SINGLE):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_FLOAT, error);
+ case ns(Precision_DOUBLE):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_DOUBLE, error);
+ default:
+ ArrowErrorSet(error, "Unexpected FloatingPoint Precision value: %d",
+ (int)precision);
+ return EINVAL;
+ }
+}
+
+static int ArrowIpcReaderSetTypeDecimal(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ struct ArrowError* error) {
+ ns(Decimal_table_t) type = (ns(Decimal_table_t))type_generic;
+ int scale = ns(Decimal_scale(type));
+ int precision = ns(Decimal_precision(type));
+ int bitwidth = ns(Decimal_bitWidth(type));
+
+ int result;
+ switch (bitwidth) {
+ case 128:
+ result =
+ ArrowSchemaSetTypeDecimal(schema, NANOARROW_TYPE_DECIMAL128,
precision, scale);
+ break;
+ case 256:
+ result =
+ ArrowSchemaSetTypeDecimal(schema, NANOARROW_TYPE_DECIMAL256,
precision, scale);
+ break;
+ default:
+ ArrowErrorSet(error, "Unexpected Decimal bitwidth value: %d",
(int)bitwidth);
+ return EINVAL;
+ }
+
+ if (result != NANOARROW_OK) {
+ ArrowErrorSet(error, "ArrowSchemaSetTypeDecimal() failed");
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowIpcReaderSetTypeFixedSizeBinary(struct ArrowSchema* schema,
+ flatbuffers_generic_t
type_generic,
+ struct ArrowError* error) {
+ ns(FixedSizeBinary_table_t) type = (ns(FixedSizeBinary_table_t))type_generic;
+ int fixed_size = ns(FixedSizeBinary_byteWidth(type));
+ return ArrowSchemaSetTypeFixedSize(schema, NANOARROW_TYPE_FIXED_SIZE_BINARY,
+ fixed_size);
+}
+
+static int ArrowIpcReaderSetTypeDate(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ struct ArrowError* error) {
+ ns(Date_table_t) type = (ns(Date_table_t))type_generic;
+ int date_unit = ns(Date_unit(type));
+ switch (date_unit) {
+ case ns(DateUnit_DAY):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_DATE32, error);
+ case ns(DateUnit_MILLISECOND):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_DATE64, error);
+ default:
+ ArrowErrorSet(error, "Unexpected Date DateUnit value: %d",
(int)date_unit);
+ return EINVAL;
+ }
+}
+
+static int ArrowIpcReaderSetTypeTime(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ struct ArrowError* error) {
+ ns(Time_table_t) type = (ns(Time_table_t))type_generic;
+ int time_unit = ns(Time_unit(type));
+ int bitwidth = ns(Time_bitWidth(type));
+ int nanoarrow_type;
+
+ switch (time_unit) {
+ case ns(TimeUnit_SECOND):
+ case ns(TimeUnit_MILLISECOND):
+ if (bitwidth != 32) {
+ ArrowErrorSet(error, "Expected bitwidth of 32 for Time TimeUnit %s but
found %d",
+ ns(TimeUnit_name(time_unit)), bitwidth);
+ return EINVAL;
+ }
+
+ nanoarrow_type = NANOARROW_TYPE_TIME32;
+ break;
+
+ case ns(TimeUnit_MICROSECOND):
+ case ns(TimeUnit_NANOSECOND):
+ if (bitwidth != 64) {
+ ArrowErrorSet(error, "Expected bitwidth of 64 for Time TimeUnit %s but
found %d",
+ ns(TimeUnit_name(time_unit)), bitwidth);
+ return EINVAL;
+ }
+
+ nanoarrow_type = NANOARROW_TYPE_TIME64;
+ break;
+
+ default:
+ ArrowErrorSet(error, "Unexpected Time TimeUnit value: %d",
(int)time_unit);
+ return EINVAL;
+ }
+
+ int result = ArrowSchemaSetTypeDateTime(schema, nanoarrow_type, time_unit,
NULL);
+ if (result != NANOARROW_OK) {
+ ArrowErrorSet(error, "ArrowSchemaSetTypeDateTime() failed");
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowIpcReaderSetTypeTimestamp(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ struct ArrowError* error) {
+ ns(Timestamp_table_t) type = (ns(Timestamp_table_t))type_generic;
+ int time_unit = ns(Timestamp_unit(type));
+
+ const char* timezone = "";
+ if (ns(Timestamp_timezone_is_present(type))) {
+ timezone = ns(Timestamp_timezone_get(type));
+ }
+
+ int result =
+ ArrowSchemaSetTypeDateTime(schema, NANOARROW_TYPE_TIMESTAMP, time_unit,
timezone);
+ if (result != NANOARROW_OK) {
+ ArrowErrorSet(error, "ArrowSchemaSetTypeDateTime() failed");
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowIpcReaderSetTypeDuration(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ struct ArrowError* error) {
+ ns(Duration_table_t) type = (ns(Duration_table_t))type_generic;
+ int time_unit = ns(Duration_unit(type));
+
+ int result =
+ ArrowSchemaSetTypeDateTime(schema, NANOARROW_TYPE_DURATION, time_unit,
NULL);
+ if (result != NANOARROW_OK) {
+ ArrowErrorSet(error, "ArrowSchemaSetTypeDateTime() failed");
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowIpcReaderSetTypeInterval(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ struct ArrowError* error) {
+ ns(Interval_table_t) type = (ns(Interval_table_t))type_generic;
+ int interval_unit = ns(Interval_unit(type));
+
+ switch (interval_unit) {
+ case ns(IntervalUnit_YEAR_MONTH):
+ return ArrowIpcReaderSetTypeSimple(schema,
NANOARROW_TYPE_INTERVAL_MONTHS, error);
+ case ns(IntervalUnit_DAY_TIME):
+ return ArrowIpcReaderSetTypeSimple(schema,
NANOARROW_TYPE_INTERVAL_DAY_TIME, error);
+ case ns(IntervalUnit_MONTH_DAY_NANO):
+ return ArrowIpcReaderSetTypeSimple(schema,
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO,
+ error);
+ default:
+ ArrowErrorSet(error, "Unexpected Interval unit value: %d",
(int)interval_unit);
+ return EINVAL;
+ }
+}
+
+// We can't quite use nanoarrow's built-in SchemaSet functions for nested types
+// because the IPC format allows modifying some of the defaults those
functions assume.
+// In particular, the allocate + initialize children step is handled outside
these
+// setters.
+static int ArrowIpcReaderSetTypeSimpleNested(struct ArrowSchema* schema,
+ const char* format,
+ struct ArrowError* error) {
+ int result = ArrowSchemaSetFormat(schema, format);
+ if (result != NANOARROW_OK) {
+ ArrowErrorSet(error, "ArrowSchemaSetFormat('%s') failed", format);
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowIpcReaderSetTypeFixedSizeList(struct ArrowSchema* schema,
+ flatbuffers_generic_t
type_generic,
+ struct ArrowError* error) {
+ ns(FixedSizeList_table_t) type = (ns(FixedSizeList_table_t))type_generic;
+ int32_t fixed_size = ns(FixedSizeList_listSize(type));
+
+ char fixed_size_str[128];
+ int n_chars = snprintf(fixed_size_str, 128, "+w:%d", fixed_size);
+ fixed_size_str[n_chars] = '\0';
+ return ArrowIpcReaderSetTypeSimpleNested(schema, fixed_size_str, error);
+}
+
+static int ArrowIpcReaderSetTypeMap(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ struct ArrowError* error) {
+ ns(Map_table_t) type = (ns(Map_table_t))type_generic;
+ NANOARROW_RETURN_NOT_OK(ArrowIpcReaderSetTypeSimpleNested(schema, "+m",
error));
+
+ if (ns(Map_keysSorted(type))) {
+ schema->flags |= ARROW_FLAG_MAP_KEYS_SORTED;
+ } else {
+ schema->flags &= ~ARROW_FLAG_MAP_KEYS_SORTED;
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowIpcReaderSetTypeUnion(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ int64_t n_children, struct ArrowError*
error) {
+ ns(Union_table_t) type = (ns(Union_table_t))type_generic;
+ int union_mode = ns(Union_mode(type));
+
+ if (n_children < 0 || n_children > 127) {
+ ArrowErrorSet(error,
+ "Expected between 0 and 127 children for Union type but
found %ld",
+ (long)n_children);
+ return EINVAL;
+ }
+
+ // Max valid typeIds size is 127; the longest single ID that could be
present here
+ // is -INT_MIN (11 chars). With commas and the prefix the max size would be
+ // 1527 characters. (Any ids outside the range 0...127 are unlikely to be
valid
+ // elsewhere but they could in theory be present here).
+ char union_types_str[2048];
+ memset(union_types_str, 0, sizeof(union_types_str));
+ char* format_cursor = union_types_str;
+ int format_out_size = sizeof(union_types_str);
+ int n_chars = 0;
+
+ const char* format_prefix;
+ switch (union_mode) {
+ case ns(UnionMode_Sparse):
+ n_chars = snprintf(format_cursor, format_out_size, "+us:");
+ format_cursor += n_chars;
+ format_out_size -= n_chars;
+ break;
+ case ns(UnionMode_Dense):
+ n_chars = snprintf(format_cursor, format_out_size, "+ud:");
+ format_cursor += n_chars;
+ format_out_size -= n_chars;
+ break;
+ default:
+ ArrowErrorSet(error, "Unexpected Union UnionMode value: %d",
(int)union_mode);
+ return EINVAL;
+ }
+
+ if (ns(Union_typeIds_is_present(type))) {
+ flatbuffers_int32_vec_t type_ids = ns(Union_typeIds(type));
+ int64_t n_type_ids = flatbuffers_int32_vec_len(type_ids);
+
+ if (n_type_ids != n_children) {
+ ArrowErrorSet(
+ error,
+ "Expected between %ld children for Union type with %ld typeIds but
found %ld",
+ (long)n_type_ids, (long)n_type_ids, (long)n_children);
+ return EINVAL;
+ }
+
+ if (n_type_ids > 0) {
+ n_chars = snprintf(format_cursor, format_out_size, "%d",
+ flatbuffers_int32_vec_at(type_ids, 0));
+ format_cursor += n_chars;
+ format_out_size -= n_chars;
+
+ for (int64_t i = 1; i < n_type_ids; i++) {
+ n_chars = snprintf(format_cursor, format_out_size, ",%d",
+ (int)flatbuffers_int32_vec_at(type_ids, i));
+ format_cursor += n_chars;
+ format_out_size -= n_chars;
+ }
+ }
+ } else if (n_children > 0) {
+ n_chars = snprintf(format_cursor, format_out_size, "0");
+ format_cursor += n_chars;
+ format_out_size -= n_chars;
+
+ for (int64_t i = 1; i < n_children; i++) {
+ n_chars = snprintf(format_cursor, format_out_size, ",%d", (int)i);
+ format_cursor += n_chars;
+ format_out_size -= n_chars;
+ }
+ }
+
+ return ArrowIpcReaderSetTypeSimpleNested(schema, union_types_str, error);
+}
+
+static int ArrowIpcReaderSetType(struct ArrowSchema* schema, ns(Field_table_t)
field,
+ int64_t n_children, struct ArrowError* error)
{
+ int type_type = ns(Field_type_type(field));
+ switch (type_type) {
+ case ns(Type_Null):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_NA, error);
+ case ns(Type_Bool):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_BOOL, error);
+ case ns(Type_Int):
+ return ArrowIpcReaderSetTypeInt(schema, ns(Field_type_get(field)),
error);
+ case ns(Type_FloatingPoint):
+ return ArrowIpcReaderSetTypeFloatingPoint(schema,
ns(Field_type_get(field)), error);
+ case ns(Type_Decimal):
+ return ArrowIpcReaderSetTypeDecimal(schema, ns(Field_type_get(field)),
error);
+ case ns(Type_Binary):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_BINARY, error);
+ case ns(Type_LargeBinary):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_LARGE_BINARY,
error);
+ case ns(Type_FixedSizeBinary):
+ return ArrowIpcReaderSetTypeFixedSizeBinary(schema,
ns(Field_type_get(field)),
+ error);
+ case ns(Type_Utf8):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_STRING, error);
+ case ns(Type_LargeUtf8):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_LARGE_STRING,
error);
+ case ns(Type_Date):
+ return ArrowIpcReaderSetTypeDate(schema, ns(Field_type_get(field)),
error);
+ case ns(Type_Time):
+ return ArrowIpcReaderSetTypeTime(schema, ns(Field_type_get(field)),
error);
+ case ns(Type_Timestamp):
+ return ArrowIpcReaderSetTypeTimestamp(schema, ns(Field_type_get(field)),
error);
+ case ns(Type_Duration):
+ return ArrowIpcReaderSetTypeDuration(schema, ns(Field_type_get(field)),
error);
+ case ns(Type_Interval):
+ return ArrowIpcReaderSetTypeInterval(schema, ns(Field_type_get(field)),
error);
+ case ns(Type_Struct_):
+ return ArrowIpcReaderSetTypeSimpleNested(schema, "+s", error);
+ case ns(Type_List):
+ return ArrowIpcReaderSetTypeSimpleNested(schema, "+l", error);
+ case ns(Type_LargeList):
+ return ArrowIpcReaderSetTypeSimpleNested(schema, "+L", error);
+ case ns(Type_FixedSizeList):
+ return ArrowIpcReaderSetTypeFixedSizeList(schema,
ns(Field_type_get(field)), error);
+ case ns(Type_Map):
+ return ArrowIpcReaderSetTypeMap(schema, ns(Field_type_get(field)),
error);
+ case ns(Type_Union):
+ return ArrowIpcReaderSetTypeUnion(schema, ns(Field_type_get(field)),
n_children,
+ error);
+ default:
+ ArrowErrorSet(error, "Unrecognized Field type with value %d",
(int)type_type);
+ return EINVAL;
+ }
+}
+
+static int ArrowIpcReaderSetChildren(struct ArrowSchema* schema,
ns(Field_vec_t) fields,
+ struct ArrowError* error);
+
static int ArrowIpcReaderSetField(struct ArrowSchema* schema,
ns(Field_table_t) field,
struct ArrowError* error) {
+ // No dictionary support yet
+ if (ns(Field_dictionary_is_present(field))) {
+ ArrowErrorSet(error, "Field DictionaryEncoding not supported");
+ return ENOTSUP;
+ }
+
int result;
if (ns(Field_name_is_present(field))) {
result = ArrowSchemaSetName(schema, ns(Field_name_get(field)));
@@ -20418,22 +20835,35 @@ static int ArrowIpcReaderSetField(struct ArrowSchema*
schema, ns(Field_table_t)
return result;
}
+ // Sets the schema->format and validates type-related inconsistencies
+ // that might exist in the flatbuffer
+ ns(Field_vec_t) children = ns(Field_children(field));
+ int64_t n_children = ns(Field_vec_len(children));
+
+ NANOARROW_RETURN_NOT_OK(ArrowIpcReaderSetType(schema, field, n_children,
error));
+
+ // nanoarrow's type setters set the nullable flag by default, so we might
+ // have to unset it here.
if (ns(Field_nullable_get(field))) {
schema->flags |= ARROW_FLAG_NULLABLE;
+ } else {
+ schema->flags &= ~ARROW_FLAG_NULLABLE;
}
- int type_type = ns(Field_type_type(field));
- switch (type_type) {
- case ns(Type_Int):
- NANOARROW_RETURN_NOT_OK(
- ArrowIpcReaderSetTypeInt(schema, ns(Field_type_get(field)), error));
- break;
- default:
- ArrowErrorSet(error, "Unrecognized Field type with value %d",
(int)type_type);
- return EINVAL;
+ // Children are defined separately in the flatbuffer, so we allocate,
initialize
+ // and set them separately as well.
+ result = ArrowSchemaAllocateChildren(schema, n_children);
+ if (result != NANOARROW_OK) {
+ ArrowErrorSet(error, "ArrowSchemaAllocateChildren() failed");
+ return result;
}
- return NANOARROW_OK;
+ for (int64_t i = 0; i < n_children; i++) {
+ ArrowSchemaInit(schema->children[i]);
+ }
+
+ NANOARROW_RETURN_NOT_OK(ArrowIpcReaderSetChildren(schema, children, error));
+ return ArrowIpcReaderSetMetadata(schema, ns(Field_custom_metadata(field)),
error);
}
static int ArrowIpcReaderSetChildren(struct ArrowSchema* schema,
ns(Field_vec_t) fields,
@@ -20500,7 +20930,9 @@ static int ArrowIpcReaderDecodeSchema(struct
ArrowIpcReader* reader,
return result;
}
- return ArrowIpcReaderSetChildren(&reader->schema, fields, error);
+ NANOARROW_RETURN_NOT_OK(ArrowIpcReaderSetChildren(&reader->schema, fields,
error));
+ return ArrowIpcReaderSetMetadata(&reader->schema,
ns(Schema_custom_metadata(schema)),
+ error);
}
static inline int ArrowIpcReaderCheckHeader(struct ArrowIpcReader* reader,