This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new c4d20a08 feat: Add support for run-end encoded array (#507)
c4d20a08 is described below
commit c4d20a08514eaa0520e0c5d3f0d51469011a1a4a
Author: Cocoa <[email protected]>
AuthorDate: Fri Jun 7 20:57:23 2024 +0100
feat: Add support for run-end encoded array (#507)
Hi this PR tries to add support for run-end encoded array based on the
arrow spec here,
https://arrow.apache.org/docs/format/Columnar.html#run-end-encoded-layout.
---
src/nanoarrow/array.c | 108 ++++++++++++++++++++++++++++
src/nanoarrow/array_inline.h | 1 +
src/nanoarrow/array_test.cc | 153 ++++++++++++++++++++++++++++++++++++++++
src/nanoarrow/nanoarrow.h | 13 ++++
src/nanoarrow/nanoarrow_types.h | 5 +-
src/nanoarrow/schema.c | 41 +++++++++++
src/nanoarrow/schema_test.cc | 61 ++++++++++++++++
src/nanoarrow/utils.c | 1 +
8 files changed, 382 insertions(+), 1 deletion(-)
diff --git a/src/nanoarrow/array.c b/src/nanoarrow/array.c
index 4fb7b7b1..82236490 100644
--- a/src/nanoarrow/array.c
+++ b/src/nanoarrow/array.c
@@ -69,6 +69,7 @@ static ArrowErrorCode ArrowArraySetStorageType(struct
ArrowArray* array,
switch (storage_type) {
case NANOARROW_TYPE_UNINITIALIZED:
case NANOARROW_TYPE_NA:
+ case NANOARROW_TYPE_RUN_END_ENCODED:
array->n_buffers = 0;
break;
@@ -811,6 +812,15 @@ static int ArrowArrayViewValidateMinimal(struct
ArrowArrayView* array_view,
(long)array_view->n_children);
return EINVAL;
}
+ break;
+ case NANOARROW_TYPE_RUN_END_ENCODED:
+ if (array_view->n_children != 2) {
+ ArrowErrorSet(
+ error, "Expected 2 children for %s array but found %ld child
arrays",
+ ArrowTypeString(array_view->storage_type),
(long)array_view->n_children);
+ return EINVAL;
+ }
+ break;
default:
break;
}
@@ -846,6 +856,68 @@ static int ArrowArrayViewValidateMinimal(struct
ArrowArrayView* array_view,
return EINVAL;
}
break;
+
+ case NANOARROW_TYPE_RUN_END_ENCODED: {
+ if (array_view->n_children != 2) {
+ ArrowErrorSet(error,
+ "Expected 2 children for run-end encoded array but found
%ld",
+ (long)array_view->n_children);
+ return EINVAL;
+ }
+ struct ArrowArrayView* run_ends_view = array_view->children[0];
+ struct ArrowArrayView* values_view = array_view->children[1];
+ int64_t max_length;
+ switch (run_ends_view->storage_type) {
+ case NANOARROW_TYPE_INT16:
+ max_length = INT16_MAX;
+ break;
+ case NANOARROW_TYPE_INT32:
+ max_length = INT32_MAX;
+ break;
+ case NANOARROW_TYPE_INT64:
+ max_length = INT64_MAX;
+ break;
+ default:
+ ArrowErrorSet(
+ error,
+ "Run-end encoded array only supports INT16, INT32 or INT64
run-ends "
+ "but found run-ends type %s",
+ ArrowTypeString(run_ends_view->storage_type));
+ return EINVAL;
+ }
+ // uint64_t is used here to avoid overflow when adding the offset and
length
+ if ((uint64_t)array_view->offset + (uint64_t)array_view->length >
+ (uint64_t)max_length) {
+ ArrowErrorSet(
+ error,
+ "Offset + length of a run-end encoded array must fit in a value"
+ " of the run end type %s, but offset + length is %lu while the "
+ "allowed maximum is %lu",
+ ArrowTypeString(run_ends_view->storage_type),
+ (unsigned long)array_view->offset + (unsigned
long)array_view->length,
+ (unsigned long)max_length);
+ return EINVAL;
+ }
+ if (run_ends_view->length > values_view->length) {
+ ArrowErrorSet(
+ error, "Length of run_ends is greater than the length of values:
%ld > %ld",
+ (long)run_ends_view->length, (long)values_view->length);
+ return EINVAL;
+ }
+ if (run_ends_view->length == 0 && values_view->length != 0) {
+ ArrowErrorSet(error,
+ "Run-end encoded array has zero length %ld, but values
array has "
+ "non-zero length",
+ (long)values_view->length);
+ return EINVAL;
+ }
+ if (run_ends_view->null_count != 0) {
+ ArrowErrorSet(error, "Null count must be 0 for run ends array, but is
%ld",
+ (long)run_ends_view->null_count);
+ return EINVAL;
+ }
+ break;
+ }
default:
break;
}
@@ -995,6 +1067,18 @@ static int ArrowArrayViewValidateDefault(struct
ArrowArrayView* array_view,
}
}
break;
+
+ case NANOARROW_TYPE_RUN_END_ENCODED: {
+ struct ArrowArrayView* run_ends_view = array_view->children[0];
+ int64_t last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0);
+ if (last_run_end < 1) {
+ ArrowErrorSet(error,
+ "All run ends must be greater than 0 but the first run
end is %ld",
+ (long)last_run_end);
+ return EINVAL;
+ }
+ break;
+ }
default:
break;
}
@@ -1163,6 +1247,30 @@ static int ArrowArrayViewValidateFull(struct
ArrowArrayView* array_view,
}
}
+ if (array_view->storage_type == NANOARROW_TYPE_RUN_END_ENCODED) {
+ struct ArrowArrayView* run_ends_view = array_view->children[0];
+ int64_t last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, 0);
+ for (int64_t i = 1; i < run_ends_view->length; i++) {
+ const int64_t run_end = ArrowArrayViewGetIntUnsafe(run_ends_view, i);
+ if (run_end <= last_run_end) {
+ ArrowErrorSet(error,
+ "Every run end must be strictly greater than the
previous run end, "
+ "but run_ends[%ld] is %ld and run_ends[%ld] is %ld",
+ (long)i, (long)run_end, (long)i - 1, (long)last_run_end);
+ return EINVAL;
+ }
+ last_run_end = run_end;
+ }
+ last_run_end = ArrowArrayViewGetIntUnsafe(run_ends_view,
run_ends_view->length - 1);
+ if (last_run_end < (array_view->offset + array_view->length)) {
+ ArrowErrorSet(error,
+ "Last run end is %ld but it should >= %ld (offset: %ld,
length: %ld)",
+ (long)last_run_end, (long)(array_view->offset +
array_view->length),
+ (long)array_view->offset, (long)array_view->length);
+ return EINVAL;
+ }
+ }
+
// Recurse for children
for (int64_t i = 0; i < array_view->n_children; i++) {
NANOARROW_RETURN_NOT_OK(ArrowArrayViewValidateFull(array_view->children[i],
error));
diff --git a/src/nanoarrow/array_inline.h b/src/nanoarrow/array_inline.h
index 7ee3d943..5c734b46 100644
--- a/src/nanoarrow/array_inline.h
+++ b/src/nanoarrow/array_inline.h
@@ -661,6 +661,7 @@ static inline ArrowErrorCode ArrowArrayFinishElement(struct
ArrowArray* array) {
}
}
break;
+ return NANOARROW_OK;
default:
return EINVAL;
}
diff --git a/src/nanoarrow/array_test.cc b/src/nanoarrow/array_test.cc
index ff9ffcd2..4cc1bd08 100644
--- a/src/nanoarrow/array_test.cc
+++ b/src/nanoarrow/array_test.cc
@@ -26,6 +26,7 @@
#include <arrow/array/builder_decimal.h>
#include <arrow/array/builder_nested.h>
#include <arrow/array/builder_primitive.h>
+#include <arrow/array/builder_run_end.h>
#include <arrow/array/builder_time.h>
#include <arrow/array/builder_union.h>
#include <arrow/c/bridge.h>
@@ -1440,6 +1441,158 @@ TEST(ArrayTest, ArrayTestAppendToStructArray) {
EXPECT_TRUE(arrow_array.ValueUnsafe()->Equals(expected_array.ValueUnsafe()));
}
+TEST(ArrayTest, ArrayTestAppendToRunEndEncodedArray) {
+ struct ArrowArray array;
+ struct ArrowSchema schema;
+ struct ArrowError error;
+
+ // in this test case we construct a run-end encoded array with logical
length = 7
+ // and the values are float32s
+ //
+ // the virtual big array:
+ // type: Float32
+ // [1.0, 1.0, 1.0, 1.0, null, null, 2.0]
+ //
+ // run-end encoded array:
+ // run_ends<INT32>: [4, 6, 7]
+ // values<FLOAT>: [1.0, null, 2.0]
+
+ ArrowSchemaInit(&schema);
+ ASSERT_EQ(ArrowSchemaSetTypeRunEndEncoded(&schema, NANOARROW_TYPE_INT32),
NANOARROW_OK);
+ ASSERT_EQ(ArrowSchemaSetType(schema.children[1], NANOARROW_TYPE_FLOAT),
NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayInitFromSchema(&array, &schema, nullptr), NANOARROW_OK);
+
+ ASSERT_EQ(ArrowArrayStartAppending(&array), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 4), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 6), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendInt(array.children[0], 7), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendDouble(array.children[1], 1.0), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendNull(array.children[1], 1), NANOARROW_OK);
+ ASSERT_EQ(ArrowArrayAppendDouble(array.children[1], 2.0), NANOARROW_OK);
+ array.length = 7;
+
+ // Make sure number of children is checked at finish
+ array.n_children = 0;
+ EXPECT_EQ(ArrowArrayFinishBuildingDefault(&array, &error), EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Expected 2 children for run_end_encoded array but found 0
child arrays");
+ array.n_children = 2;
+
+ {
+ array.offset = INT32_MAX;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(
+ ArrowErrorMessage(&error),
+ "Offset + length of a run-end encoded array must fit in a value of the
"
+ "run end type int32, but offset + length is 2147483654 while the
allowed "
+ "maximum is 2147483647");
+
+ ((struct
ArrowArrayPrivateData*)(array.children[0]->private_data))->storage_type =
+ NANOARROW_TYPE_INT16;
+ array.offset = INT16_MAX;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(
+ ArrowErrorMessage(&error),
+ "Offset + length of a run-end encoded array must fit in a value of the
run end "
+ "type int16, but offset + length is 32774 while the allowed maximum is
32767");
+
+ ((struct
ArrowArrayPrivateData*)(array.children[0]->private_data))->storage_type =
+ NANOARROW_TYPE_INT64;
+ array.offset = INT64_MAX;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array,
NANOARROW_VALIDATION_LEVEL_FULL, &error),
+ EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Offset + length of a run-end encoded array must fit in a
value of the "
+ "run end type int64, but offset + length is
9223372036854775814 while "
+ "the allowed "
+ "maximum is 9223372036854775807");
+ }
+ ((struct
ArrowArrayPrivateData*)(array.children[0]->private_data))->storage_type =
+ NANOARROW_TYPE_INT32;
+ array.offset = 0;
+
+ // Make sure final child size is checked at finish
+ array.children[0]->length += 1;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, NANOARROW_VALIDATION_LEVEL_FULL,
&error),
+ EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Length of run_ends is greater than the length of values: 4 >
3");
+ array.children[0]->length -= 1;
+
+ array.children[0]->length = 0;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, NANOARROW_VALIDATION_LEVEL_FULL,
&error),
+ EINVAL);
+ EXPECT_STREQ(
+ ArrowErrorMessage(&error),
+ "Run-end encoded array has zero length 3, but values array has non-zero
length");
+ array.children[0]->length = 3;
+
+ array.children[0]->null_count = 1;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, NANOARROW_VALIDATION_LEVEL_FULL,
&error),
+ EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Null count must be 0 for run ends array, but is 1");
+ array.children[0]->null_count = 0;
+
+ // it can be a projection of the virtual big array
+ // [1.0, 1.0, 1.0, 1.0, null, null, 2.0]
+ // ^ ^
+ // |- offset = 1 |- length = 6
+ array.length = 6;
+ array.offset = 1;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, NANOARROW_VALIDATION_LEVEL_FULL,
&error),
+ NANOARROW_OK);
+
+ // checks for one-off errors
+ // this one makes the logical length larger than the last run end
+ // [1.0, 1.0, 1.0, 1.0, null, null, 2.0]
+ // ^ ^
+ // |- offset = 1 |- length = 7 (out of bound)
+ array.length = 7;
+ array.offset = 1;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, NANOARROW_VALIDATION_LEVEL_FULL,
&error),
+ EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Last run end is 7 but it should >= 8 (offset: 1, length: 7)");
+
+ // [1.0, 1.0, 1.0, 1.0, null, null, 2.0]
+ // ^ ^
+ // |- offset = 1 |- length = 8 (out of bound)
+ array.length = 8;
+ array.offset = 0;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, NANOARROW_VALIDATION_LEVEL_FULL,
&error),
+ EINVAL);
+ EXPECT_STREQ(ArrowErrorMessage(&error),
+ "Last run end is 7 but it should >= 8 (offset: 0, length: 8)");
+
+ array.length = 7;
+ array.offset = 0;
+ EXPECT_EQ(ArrowArrayFinishBuilding(&array, NANOARROW_VALIDATION_LEVEL_FULL,
&error),
+ NANOARROW_OK);
+
+ auto arrow_array = ImportArray(&array, &schema);
+ ARROW_EXPECT_OK(arrow_array);
+
+ auto run_ends_builder = std::make_shared<Int32Builder>();
+ auto values_builder = std::make_shared<FloatBuilder>();
+ auto builder =
+ RunEndEncodedBuilder(default_memory_pool(), run_ends_builder,
values_builder,
+ run_end_encoded(int32(), float32()));
+ ARROW_EXPECT_OK(run_ends_builder->Append(4));
+ ARROW_EXPECT_OK(run_ends_builder->Append(6));
+ ARROW_EXPECT_OK(run_ends_builder->Append(7));
+ ARROW_EXPECT_OK(values_builder->Append(1.0));
+ ARROW_EXPECT_OK(values_builder->AppendNull());
+ ARROW_EXPECT_OK(values_builder->Append(2.0));
+ auto expected_array = builder.Finish();
+ ARROW_EXPECT_OK(expected_array);
+
+ EXPECT_STREQ(arrow_array.ValueUnsafe()->ToString().c_str(),
+ expected_array.ValueUnsafe()->ToString().c_str());
+}
+
TEST(ArrayTest, ArrayTestUnionUtils) {
// Check length calculation with nullptr
EXPECT_EQ(_ArrowParseUnionTypeIds("", nullptr), 0);
diff --git a/src/nanoarrow/nanoarrow.h b/src/nanoarrow/nanoarrow.h
index 4831a40a..84d59850 100644
--- a/src/nanoarrow/nanoarrow.h
+++ b/src/nanoarrow/nanoarrow.h
@@ -60,6 +60,8 @@
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeFixedSize)
#define ArrowSchemaSetTypeDecimal \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDecimal)
+#define ArrowSchemaSetTypeRunEndEncoded \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeRunEndEncoded)
#define ArrowSchemaSetTypeDateTime \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowSchemaSetTypeDateTime)
#define ArrowSchemaSetTypeUnion \
@@ -372,6 +374,17 @@ ArrowErrorCode ArrowSchemaSetTypeDecimal(struct
ArrowSchema* schema, enum ArrowT
int32_t decimal_precision,
int32_t decimal_scale);
+/// \brief Set the format field of a run-end encoded schema
+///
+/// Returns EINVAL for run_end_type that is not
+/// NANOARROW_TYPE_INT16, NANOARROW_TYPE_INT32 or NANOARROW_TYPE_INT64.
+/// Schema must have been initialized using ArrowSchemaInit() or
ArrowSchemaDeepCopy().
+/// The caller must call `ArrowSchemaSetTypeXXX(schema->children[1])` to
+/// set the value type. Note that when building arrays using the
`ArrowArrayAppendXXX()`
+/// functions, the run-end encoded array's logical length must be updated
manually.
+ArrowErrorCode ArrowSchemaSetTypeRunEndEncoded(struct ArrowSchema* schema,
+ enum ArrowType run_end_type);
+
/// \brief Set the format field of a time, timestamp, or duration schema
///
/// Returns EINVAL for type that is not
diff --git a/src/nanoarrow/nanoarrow_types.h b/src/nanoarrow/nanoarrow_types.h
index d814a056..03c5836e 100644
--- a/src/nanoarrow/nanoarrow_types.h
+++ b/src/nanoarrow/nanoarrow_types.h
@@ -450,7 +450,8 @@ enum ArrowType {
NANOARROW_TYPE_LARGE_STRING,
NANOARROW_TYPE_LARGE_BINARY,
NANOARROW_TYPE_LARGE_LIST,
- NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO
+ NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO,
+ NANOARROW_TYPE_RUN_END_ENCODED
};
/// \brief Get a string value of an enum ArrowType value
@@ -537,6 +538,8 @@ static inline const char* ArrowTypeString(enum ArrowType
type) {
return "large_list";
case NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO:
return "interval_month_day_nano";
+ case NANOARROW_TYPE_RUN_END_ENCODED:
+ return "run_end_encoded";
default:
return NULL;
}
diff --git a/src/nanoarrow/schema.c b/src/nanoarrow/schema.c
index 7451136c..aa8725b2 100644
--- a/src/nanoarrow/schema.c
+++ b/src/nanoarrow/schema.c
@@ -124,6 +124,8 @@ static const char* ArrowSchemaFormatTemplate(enum ArrowType
type) {
return "+s";
case NANOARROW_TYPE_MAP:
return "+m";
+ case NANOARROW_TYPE_RUN_END_ENCODED:
+ return "+r";
default:
return NULL;
@@ -155,6 +157,13 @@ static int ArrowSchemaInitChildrenIfNeeded(struct
ArrowSchema* schema,
NANOARROW_RETURN_NOT_OK(
ArrowSchemaSetName(schema->children[0]->children[1], "value"));
break;
+ case NANOARROW_TYPE_RUN_END_ENCODED:
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaAllocateChildren(schema, 2));
+ ArrowSchemaInit(schema->children[0]);
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[0],
"run_ends"));
+ schema->children[0]->flags &= ~ARROW_FLAG_NULLABLE;
+ ArrowSchemaInit(schema->children[1]);
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetName(schema->children[1],
"values"));
default:
break;
}
@@ -277,6 +286,28 @@ ArrowErrorCode ArrowSchemaSetTypeDecimal(struct
ArrowSchema* schema, enum ArrowT
return ArrowSchemaSetFormat(schema, buffer);
}
+ArrowErrorCode ArrowSchemaSetTypeRunEndEncoded(struct ArrowSchema* schema,
+ enum ArrowType run_end_type) {
+ switch (run_end_type) {
+ case NANOARROW_TYPE_INT16:
+ case NANOARROW_TYPE_INT32:
+ case NANOARROW_TYPE_INT64:
+ break;
+ default:
+ return EINVAL;
+ }
+
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetFormat(
+ schema, ArrowSchemaFormatTemplate(NANOARROW_TYPE_RUN_END_ENCODED)));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowSchemaInitChildrenIfNeeded(schema, NANOARROW_TYPE_RUN_END_ENCODED));
+ NANOARROW_RETURN_NOT_OK(ArrowSchemaSetType(schema->children[0],
run_end_type));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowSchemaSetType(schema->children[1], NANOARROW_TYPE_UNINITIALIZED));
+
+ return NANOARROW_OK;
+}
+
static const char* ArrowTimeUnitFormatString(enum ArrowTimeUnit time_unit) {
switch (time_unit) {
case NANOARROW_TIME_UNIT_SECOND:
@@ -750,6 +781,13 @@ static ArrowErrorCode ArrowSchemaViewParse(struct
ArrowSchemaView* schema_view,
*format_end_out = format + 2;
return NANOARROW_OK;
+ // run end encoded has no buffer at all
+ case 'r':
+ schema_view->storage_type = NANOARROW_TYPE_RUN_END_ENCODED;
+ schema_view->type = NANOARROW_TYPE_RUN_END_ENCODED;
+ *format_end_out = format + 2;
+ return NANOARROW_OK;
+
// just validity buffer
case 'w':
if (format[2] != ':' || format[3] == '\0') {
@@ -1124,6 +1162,9 @@ static ArrowErrorCode ArrowSchemaViewValidate(struct
ArrowSchemaView* schema_vie
case NANOARROW_TYPE_FIXED_SIZE_LIST:
return ArrowSchemaViewValidateNChildren(schema_view, 1, error);
+ case NANOARROW_TYPE_RUN_END_ENCODED:
+ return ArrowSchemaViewValidateNChildren(schema_view, 2, error);
+
case NANOARROW_TYPE_STRUCT:
return ArrowSchemaViewValidateNChildren(schema_view, -1, error);
diff --git a/src/nanoarrow/schema_test.cc b/src/nanoarrow/schema_test.cc
index 6e521eef..da8c6a1e 100644
--- a/src/nanoarrow/schema_test.cc
+++ b/src/nanoarrow/schema_test.cc
@@ -219,6 +219,47 @@ TEST(SchemaTest, SchemaInitDecimal) {
EXPECT_TRUE(arrow_type.ValueUnsafe()->Equals(decimal256(3, 4)));
}
+TEST(SchemaTest, SchemaInitRunEndEncoded) {
+ struct ArrowSchema schema;
+
+ // run-ends type has to be one of INT16, INT32, INT64
+ ArrowSchemaInit(&schema);
+ EXPECT_EQ(ArrowSchemaSetTypeRunEndEncoded(&schema, NANOARROW_TYPE_DOUBLE),
EINVAL);
+
+ ArrowSchemaInit(&schema);
+ EXPECT_EQ(ArrowSchemaSetTypeRunEndEncoded(&schema, NANOARROW_TYPE_UINT16),
EINVAL);
+
+ ArrowSchemaInit(&schema);
+ EXPECT_EQ(ArrowSchemaSetTypeRunEndEncoded(&schema, NANOARROW_TYPE_INT16),
NANOARROW_OK);
+ EXPECT_STREQ(schema.format, "+r");
+
+ ASSERT_EQ(ArrowSchemaSetType(schema.children[1], NANOARROW_TYPE_FLOAT),
NANOARROW_OK);
+
+ auto arrow_type = ImportType(&schema);
+ ARROW_EXPECT_OK(arrow_type);
+ EXPECT_TRUE(arrow_type.ValueUnsafe()->Equals(run_end_encoded(int16(),
float32())));
+
+ ArrowSchemaInit(&schema);
+ EXPECT_EQ(ArrowSchemaSetTypeRunEndEncoded(&schema, NANOARROW_TYPE_INT32),
NANOARROW_OK);
+ EXPECT_STREQ(schema.format, "+r");
+
+ ASSERT_EQ(ArrowSchemaSetType(schema.children[1], NANOARROW_TYPE_FLOAT),
NANOARROW_OK);
+
+ arrow_type = ImportType(&schema);
+ ARROW_EXPECT_OK(arrow_type);
+ EXPECT_TRUE(arrow_type.ValueUnsafe()->Equals(run_end_encoded(int32(),
float32())));
+
+ ArrowSchemaInit(&schema);
+ EXPECT_EQ(ArrowSchemaSetTypeRunEndEncoded(&schema, NANOARROW_TYPE_INT64),
NANOARROW_OK);
+ EXPECT_STREQ(schema.format, "+r");
+
+ ASSERT_EQ(ArrowSchemaSetType(schema.children[1], NANOARROW_TYPE_FLOAT),
NANOARROW_OK);
+
+ arrow_type = ImportType(&schema);
+ ARROW_EXPECT_OK(arrow_type);
+ EXPECT_TRUE(arrow_type.ValueUnsafe()->Equals(run_end_encoded(int64(),
float32())));
+}
+
TEST(SchemaTest, SchemaInitDateTime) {
struct ArrowSchema schema;
@@ -501,6 +542,26 @@ TEST(SchemaTest, SchemaCopyDictType) {
ArrowSchemaRelease(&schema_copy);
}
+TEST(SchemaTest, SchemaCopyRunEndEncodedType) {
+ struct ArrowSchema schema;
+ auto struct_type = run_end_encoded(int32(), float32());
+ ARROW_EXPECT_OK(ExportType(*struct_type, &schema));
+
+ struct ArrowSchema schema_copy;
+ ASSERT_EQ(ArrowSchemaDeepCopy(&schema, &schema_copy), NANOARROW_OK);
+
+ ASSERT_NE(schema_copy.release, nullptr);
+ EXPECT_STREQ(schema_copy.format, "+r");
+ EXPECT_EQ(schema_copy.n_children, 2);
+ EXPECT_STREQ(schema_copy.children[0]->format, "i");
+ EXPECT_STREQ(schema_copy.children[0]->name, "run_ends");
+ EXPECT_STREQ(schema_copy.children[1]->format, "f");
+ EXPECT_STREQ(schema_copy.children[1]->name, "values");
+
+ ArrowSchemaRelease(&schema);
+ ArrowSchemaRelease(&schema_copy);
+}
+
TEST(SchemaTest, SchemaCopyFlags) {
struct ArrowSchema schema;
ARROW_EXPECT_OK(ExportType(*int32(), &schema));
diff --git a/src/nanoarrow/utils.c b/src/nanoarrow/utils.c
index c9b4ebd6..4aba25ae 100644
--- a/src/nanoarrow/utils.c
+++ b/src/nanoarrow/utils.c
@@ -66,6 +66,7 @@ void ArrowLayoutInit(struct ArrowLayout* layout, enum
ArrowType storage_type) {
switch (storage_type) {
case NANOARROW_TYPE_UNINITIALIZED:
case NANOARROW_TYPE_NA:
+ case NANOARROW_TYPE_RUN_END_ENCODED:
layout->buffer_type[0] = NANOARROW_BUFFER_TYPE_NONE;
layout->buffer_data_type[0] = NANOARROW_TYPE_UNINITIALIZED;
layout->buffer_type[1] = NANOARROW_BUFFER_TYPE_NONE;