lidavidm commented on code in PR #115:
URL: https://github.com/apache/arrow-nanoarrow/pull/115#discussion_r1114944816
##########
extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_test.cc:
##########
@@ -168,3 +169,58 @@ TEST(NanoarrowIpcTest, NanoarrowIpcDecodeSimpleSchema) {
ArrowIpcReaderReset(&reader);
}
+
+class ArrowTypeParameterizedTestFixture
+ : public ::testing::TestWithParam<std::shared_ptr<arrow::DataType>> {
+ protected:
+ std::shared_ptr<arrow::DataType> data_type;
+};
+
+TEST_P(ArrowTypeParameterizedTestFixture, NanoarrowIpcArrowTypeRoundtrip) {
+ const std::shared_ptr<arrow::DataType>& data_type = GetParam();
+ std::shared_ptr<arrow::Schema> dummy_schema =
+ arrow::schema({arrow::field("dummy_name", data_type)});
+ auto maybe_serialized = arrow::ipc::SerializeSchema(*dummy_schema);
+ ASSERT_TRUE(maybe_serialized.ok());
+
+ struct ArrowBufferView buffer_view;
+ buffer_view.data.data = maybe_serialized.ValueUnsafe()->data();
+ buffer_view.size_bytes = maybe_serialized.ValueOrDie()->size();
+
+ struct ArrowIpcReader reader;
+ ArrowIpcReaderInit(&reader);
+ ASSERT_EQ(ArrowIpcReaderVerify(&reader, buffer_view, nullptr), NANOARROW_OK);
+ EXPECT_EQ(reader.header_size_bytes, buffer_view.size_bytes);
+ EXPECT_EQ(reader.body_size_bytes, 0);
+
+ ASSERT_EQ(ArrowIpcReaderDecode(&reader, buffer_view, nullptr), NANOARROW_OK);
+ auto maybe_schema = arrow::ImportSchema(&reader.schema);
+ ASSERT_TRUE(maybe_schema.ok());
+ EXPECT_TRUE(maybe_schema.ValueUnsafe()->Equals(dummy_schema));
+
+ ArrowIpcReaderReset(&reader);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+ NanoarrowIpcTest, ArrowTypeParameterizedTestFixture,
+ ::testing::Values(
+ arrow::null(), arrow::boolean(), arrow::int8(), arrow::uint8(),
arrow::int16(),
+ arrow::uint16(), arrow::int32(), arrow::uint32(), arrow::int64(),
arrow::uint64(),
+ arrow::utf8(), arrow::float16(), arrow::float32(), arrow::float64(),
+ arrow::decimal128(10, 3), arrow::decimal256(10, 3),
arrow::large_utf8(),
+ arrow::binary(), arrow::large_binary(), arrow::fixed_size_binary(123),
+ arrow::date32(), arrow::date64(),
arrow::time32(arrow::TimeUnit::SECOND),
+ arrow::time32(arrow::TimeUnit::MILLI),
arrow::time64(arrow::TimeUnit::MICRO),
+ arrow::time64(arrow::TimeUnit::NANO),
arrow::timestamp(arrow::TimeUnit::SECOND),
+ arrow::timestamp(arrow::TimeUnit::MILLI),
+ arrow::timestamp(arrow::TimeUnit::MICRO),
arrow::timestamp(arrow::TimeUnit::NANO),
+ arrow::timestamp(arrow::TimeUnit::SECOND, "UTC"),
+ arrow::duration(arrow::TimeUnit::SECOND),
arrow::duration(arrow::TimeUnit::MILLI),
+ arrow::duration(arrow::TimeUnit::MICRO),
arrow::duration(arrow::TimeUnit::NANO),
+ arrow::month_interval(), arrow::day_time_interval(),
+ arrow::month_day_nano_interval(),
+ arrow::list(arrow::field("some_custom_name", arrow::int32())),
+ arrow::large_list(arrow::field("some_custom_name", arrow::int32())),
+ arrow::fixed_size_list(arrow::field("some_custom_name",
arrow::int32()), 123),
+ arrow::struct_({arrow::field("col1", arrow::int32()),
Review Comment:
Unions aren't tested here - I suppose they're not yet complete?
##########
extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc.c:
##########
@@ -133,6 +134,307 @@ static int ArrowIpcReaderSetTypeInt(struct ArrowSchema*
schema,
return ArrowIpcReaderSetTypeSimple(schema, nanoarrow_type, error);
}
+static int ArrowIpcReaderSetTypeFloatingPoint(struct ArrowSchema* schema,
+ flatbuffers_generic_t
type_generic,
+ struct ArrowError* error) {
+ ns(FloatingPoint_table_t) type = (ns(FloatingPoint_table_t))type_generic;
+ int precision = ns(FloatingPoint_precision(type));
+ switch (precision) {
+ case ns(Precision_HALF):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_HALF_FLOAT,
error);
+ case ns(Precision_SINGLE):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_FLOAT, error);
+ case ns(Precision_DOUBLE):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_DOUBLE, error);
+ default:
+ ArrowErrorSet(error, "Unexpected FloatingPoint Precision value: %d",
+ (int)precision);
+ return EINVAL;
+ }
+}
+
+static int ArrowIpcReaderSetTypeDecimal(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ struct ArrowError* error) {
+ ns(Decimal_table_t) type = (ns(Decimal_table_t))type_generic;
+ int scale = ns(Decimal_scale(type));
+ int precision = ns(Decimal_precision(type));
+ int bitwidth = ns(Decimal_bitWidth(type));
+
+ int result;
+ switch (bitwidth) {
+ case 128:
+ result =
+ ArrowSchemaSetTypeDecimal(schema, NANOARROW_TYPE_DECIMAL128,
precision, scale);
+ break;
+ case 256:
+ result =
+ ArrowSchemaSetTypeDecimal(schema, NANOARROW_TYPE_DECIMAL256,
precision, scale);
+ break;
+ default:
+ ArrowErrorSet(error, "Unexpected Decimal bitwidth value: %d",
(int)bitwidth);
+ return EINVAL;
+ }
+
+ if (result != NANOARROW_OK) {
+ ArrowErrorSet(error, "ArrowSchemaSetTypeDecimal() failed");
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowIpcReaderSetTypeFixedSizeBinary(struct ArrowSchema* schema,
+ flatbuffers_generic_t
type_generic,
+ struct ArrowError* error) {
+ ns(FixedSizeBinary_table_t) type = (ns(FixedSizeBinary_table_t))type_generic;
+ int fixed_size = ns(FixedSizeBinary_byteWidth(type));
+ return ArrowSchemaSetTypeFixedSize(schema, NANOARROW_TYPE_FIXED_SIZE_BINARY,
+ fixed_size);
+}
+
+static int ArrowIpcReaderSetTypeDate(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ struct ArrowError* error) {
+ ns(Date_table_t) type = (ns(Date_table_t))type_generic;
+ int date_unit = ns(Date_unit(type));
+ switch (date_unit) {
+ case ns(DateUnit_DAY):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_DATE32, error);
+ case ns(DateUnit_MILLISECOND):
+ return ArrowIpcReaderSetTypeSimple(schema, NANOARROW_TYPE_DATE64, error);
+ default:
+ ArrowErrorSet(error, "Unexpected Date DateUnit value: %d",
(int)date_unit);
+ return EINVAL;
+ }
+}
+
+static int ArrowIpcReaderSetTypeTime(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ struct ArrowError* error) {
+ ns(Time_table_t) type = (ns(Time_table_t))type_generic;
+ int time_unit = ns(Time_unit(type));
+ int bitwidth = ns(Time_bitWidth(type));
+ int nanoarrow_type;
+
+ switch (time_unit) {
+ case ns(TimeUnit_SECOND):
+ case ns(TimeUnit_MILLISECOND):
+ if (bitwidth != 32) {
+ ArrowErrorSet(error, "Expected bitwidth of 32 for Time TimeUnit %s but
found %d",
+ ns(TimeUnit_name(time_unit)), bitwidth);
+ return EINVAL;
+ }
+
+ nanoarrow_type = NANOARROW_TYPE_TIME32;
+ break;
+
+ case ns(TimeUnit_MICROSECOND):
+ case ns(TimeUnit_NANOSECOND):
+ if (bitwidth != 64) {
+ ArrowErrorSet(error, "Expected bitwidth of 64 for Time TimeUnit %s but
found %d",
+ ns(TimeUnit_name(time_unit)), bitwidth);
+ return EINVAL;
+ }
+
+ nanoarrow_type = NANOARROW_TYPE_TIME64;
+ break;
+
+ default:
+ ArrowErrorSet(error, "Unexpected Time TimeUnit value: %d",
(int)time_unit);
+ return EINVAL;
+ }
+
+ int result = ArrowSchemaSetTypeDateTime(schema, nanoarrow_type, time_unit,
NULL);
+ if (result != NANOARROW_OK) {
+ ArrowErrorSet(error, "ArrowSchemaSetTypeDateTime() failed");
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowIpcReaderSetTypeTimestamp(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ struct ArrowError* error) {
+ ns(Timestamp_table_t) type = (ns(Timestamp_table_t))type_generic;
+ int time_unit = ns(Timestamp_unit(type));
+
+ const char* timezone = "";
+ if (ns(Timestamp_timezone_is_present(type))) {
+ timezone = ns(Timestamp_timezone_get(type));
+ }
+
+ int result =
+ ArrowSchemaSetTypeDateTime(schema, NANOARROW_TYPE_TIMESTAMP, time_unit,
timezone);
+ if (result != NANOARROW_OK) {
+ ArrowErrorSet(error, "ArrowSchemaSetTypeDateTime() failed");
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowIpcReaderSetTypeDuration(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ struct ArrowError* error) {
+ ns(Duration_table_t) type = (ns(Duration_table_t))type_generic;
+ int time_unit = ns(Duration_unit(type));
+
+ int result =
+ ArrowSchemaSetTypeDateTime(schema, NANOARROW_TYPE_DURATION, time_unit,
NULL);
+ if (result != NANOARROW_OK) {
+ ArrowErrorSet(error, "ArrowSchemaSetTypeDateTime() failed");
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowIpcReaderSetTypeInterval(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ struct ArrowError* error) {
+ ns(Interval_table_t) type = (ns(Interval_table_t))type_generic;
+ int interval_unit = ns(Interval_unit(type));
+
+ switch (interval_unit) {
+ case ns(IntervalUnit_YEAR_MONTH):
+ return ArrowIpcReaderSetTypeSimple(schema,
NANOARROW_TYPE_INTERVAL_MONTHS, error);
+ case ns(IntervalUnit_DAY_TIME):
+ return ArrowIpcReaderSetTypeSimple(schema,
NANOARROW_TYPE_INTERVAL_DAY_TIME, error);
+ case ns(IntervalUnit_MONTH_DAY_NANO):
+ return ArrowIpcReaderSetTypeSimple(schema,
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO,
+ error);
+ default:
+ ArrowErrorSet(error, "Unexpected Interval unit value: %d",
(int)interval_unit);
+ return EINVAL;
+ }
+}
+
+// We can't quite use nanoarrow's built-in SchemaSet functions for nested types
+// because the IPC format allows modifying some of the defaults those
functions assume.
+// In particular, the allocate + initialize children step is handled outside
these
+// setters.
+static int ArrowIpcReaderSetTypeSimpleNested(struct ArrowSchema* schema,
+ const char* format,
+ struct ArrowError* error) {
+ int result = ArrowSchemaSetFormat(schema, format);
+ if (result != NANOARROW_OK) {
+ ArrowErrorSet(error, "ArrowSchemaSetFormat('%s') failed", format);
+ return result;
+ }
+
+ return NANOARROW_OK;
+}
+
+static int ArrowIpcReaderSetTypeFixedSizeList(struct ArrowSchema* schema,
+ flatbuffers_generic_t
type_generic,
+ struct ArrowError* error) {
+ ns(FixedSizeList_table_t) type = (ns(FixedSizeList_table_t))type_generic;
+ int32_t fixed_size = ns(FixedSizeList_listSize(type));
+
+ char fixed_size_str[128];
+ int n_chars = snprintf(fixed_size_str, 128, "+w:%d", fixed_size);
+ fixed_size_str[n_chars] = '\0';
+ return ArrowIpcReaderSetTypeSimpleNested(schema, fixed_size_str, error);
+}
+
+static int ArrowIpcReaderSetTypeUnion(struct ArrowSchema* schema,
+ flatbuffers_generic_t type_generic,
+ int64_t n_children, struct ArrowError*
error) {
+ ns(Union_table_t) type = (ns(Union_table_t))type_generic;
+ int union_mode = ns(Union_mode(type));
+
+ // Max valid typeIds size is 127; the longest single ID that could be
present here
+ // is -INT_MIN (11 chars). With commas and the prefix the max size would be
+ // 1527 characters.
+ char union_types_str[2048];
+ memset(union_types_str, 0, sizeof(union_types_str));
+ char* format_cursor = union_types_str;
+ int format_out_size = sizeof(union_types_str);
+ int n_chars = 0;
+
+ const char* format_prefix;
+ switch (union_mode) {
+ case ns(UnionMode_Sparse):
+ n_chars = snprintf(format_cursor, format_out_size, "+us:");
+ format_cursor += n_chars;
+ format_out_size -= n_chars;
+ break;
+ case ns(UnionMode_Dense):
+ n_chars = snprintf(format_cursor, format_out_size, "+ud:");
+ format_cursor += n_chars;
+ format_out_size -= n_chars;
+ break;
+ default:
+ ArrowErrorSet(error, "Unexpected Union UnionMode value: %d",
(int)union_mode);
+ return EINVAL;
+ }
+
+ if (ns(Union_typeIds_is_present(type))) {
+
Review Comment:
empty branch?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]