This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 5b7184cf feat: Add Dictionary schema read support in IPC reader (#738)
5b7184cf is described below
commit 5b7184cfd5d17b712454d3e2e738930b3755c219
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri Mar 6 08:45:26 2026 -0600
feat: Add Dictionary schema read support in IPC reader (#738)
To start #622, this PR adds read support for Schema messages that
contain dictionary-encoded columns. I opened #844 and #845 to track the
next steps.
---------
Co-authored-by: Copilot <[email protected]>
---
src/nanoarrow/ipc/decoder.c | 187 +++++++++++++--
src/nanoarrow/ipc/decoder_test.cc | 493 ++++++++++++++++++++++++--------------
src/nanoarrow/ipc/files_test.cc | 8 +-
src/nanoarrow/ipc/reader.c | 15 +-
src/nanoarrow/nanoarrow_ipc.h | 13 +
5 files changed, 515 insertions(+), 201 deletions(-)
diff --git a/src/nanoarrow/ipc/decoder.c b/src/nanoarrow/ipc/decoder.c
index 07bfbe0f..f3b4b443 100644
--- a/src/nanoarrow/ipc/decoder.c
+++ b/src/nanoarrow/ipc/decoder.c
@@ -95,6 +95,8 @@ struct ArrowIpcDecoderPrivate {
int64_t n_union_fields;
// A pointer to the last flatbuffers message.
const void* last_message;
+ // Storage for a DictionaryBatch
+ struct ArrowIpcDictionaryBatch dictionary;
// Storage for a Footer
struct ArrowIpcFooter footer;
// Decompressor for compression support
@@ -865,17 +867,132 @@ static int ArrowIpcDecoderSetType(struct ArrowSchema*
schema, ns(Field_table_t)
}
}
+// A fun corner case when decoding dictionaries: the extension metadata lives
with
+// the dictionary (i.e., the non-index type); however, the field metadata still
+// needs to exist on the field.
+static int ArrowIpcMoveNonExtensionFieldMetadataBackToFieldIfNeeded(
+ struct ArrowSchema* schema) {
+ NANOARROW_DCHECK(schema->dictionary != NULL);
+ struct ArrowMetadataReader reader;
+ NANOARROW_RETURN_NOT_OK(ArrowMetadataReaderInit(&reader,
schema->dictionary->metadata));
+
+ // For the most common case (no metadata), nothing needs to be done here
+ if (reader.remaining_keys == 0) {
+ return NANOARROW_OK;
+ }
+
+ struct ArrowBuffer field_metadata;
+ struct ArrowBuffer extension_metadata;
+ NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderInit(&field_metadata, NULL));
+ ArrowErrorCode result = ArrowMetadataBuilderInit(&extension_metadata, NULL);
+ if (result != NANOARROW_OK) {
+ ArrowBufferReset(&field_metadata);
+ return result;
+ }
+
+ const struct ArrowStringView extension_name_key =
ArrowCharView("ARROW:extension:name");
+ const struct ArrowStringView extension_metadata_key =
+ ArrowCharView("ARROW:extension:metadata");
+
+ struct ArrowStringView key;
+ struct ArrowStringView value;
+ while (reader.remaining_keys > 0) {
+ result = ArrowMetadataReaderRead(&reader, &key, &value);
+ if (result != NANOARROW_OK) {
+ ArrowBufferReset(&field_metadata);
+ ArrowBufferReset(&extension_metadata);
+ return result;
+ }
+
+ int key_is_extension_name =
+ key.size_bytes == extension_name_key.size_bytes &&
+ strncmp(key.data, extension_name_key.data, key.size_bytes) == 0;
+ int key_is_extension_metadata =
+ key.size_bytes == extension_metadata_key.size_bytes &&
+ strncmp(key.data, extension_metadata_key.data, key.size_bytes) == 0;
+ if (!key_is_extension_name && !key_is_extension_metadata) {
+ result = ArrowMetadataBuilderAppend(&field_metadata, key, value);
+ if (result != NANOARROW_OK) {
+ ArrowBufferReset(&field_metadata);
+ ArrowBufferReset(&extension_metadata);
+ return result;
+ }
+ } else {
+ result = ArrowMetadataBuilderAppend(&extension_metadata, key, value);
+ if (result != NANOARROW_OK) {
+ ArrowBufferReset(&field_metadata);
+ ArrowBufferReset(&extension_metadata);
+ return result;
+ }
+ }
+ }
+
+ result = ArrowSchemaSetMetadata(schema, (char*)field_metadata.data);
+ if (result != NANOARROW_OK) {
+ ArrowBufferReset(&field_metadata);
+ ArrowBufferReset(&extension_metadata);
+ return result;
+ }
+
+ result = ArrowSchemaSetMetadata(schema->dictionary,
(char*)extension_metadata.data);
+ ArrowBufferReset(&field_metadata);
+ ArrowBufferReset(&extension_metadata);
+
+ return result;
+}
+
+static int ArrowIpcSetDictionaryEncoding(
+ struct ArrowSchema* schema, ns(DictionaryEncoding_table_t
dictionary_encoding),
+ struct ArrowError* error) {
+ switch (
+
org_apache_arrow_flatbuf_DictionaryEncoding_dictionaryKind(dictionary_encoding))
{
+ case ns(DictionaryKind_DenseArray):
+ break;
+ default:
+ ArrowErrorSet(error, "Unexpected value for DictionaryKind");
+ return EINVAL;
+ }
+
+ struct ArrowSchema tmp;
+ ArrowSchemaMove(schema, &tmp);
+
+ ArrowSchemaInit(schema);
+ int result = ArrowSchemaAllocateDictionary(schema);
+ if (result != NANOARROW_OK) {
+ ArrowSchemaRelease(&tmp);
+ ArrowErrorSet(error, "ArrowSchemaAllocateDictionary() failed");
+ return result;
+ }
+
+ ArrowSchemaMove(&tmp, schema->dictionary);
+
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaSetName(schema,
schema->dictionary->name),
+ error);
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaSetName(schema->dictionary,
""), error);
+
+ NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderSetTypeInt(
+ schema, ns(DictionaryEncoding_indexType_get(dictionary_encoding)),
error));
+
+ if (ns(DictionaryEncoding_isOrdered_get(dictionary_encoding))) {
+ schema->flags |= ARROW_FLAG_DICTIONARY_ORDERED;
+ }
+
+ // Field metadata should stay with the field; however, we need the extension
metadata
+ // to stay with the dictionary.
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowIpcMoveNonExtensionFieldMetadataBackToFieldIfNeeded(schema), error);
+
+ // TODO: Track the dictionary
+ // https://github.com/apache/arrow-nanoarrow/issues/844
+
+ return NANOARROW_OK;
+}
+
static int ArrowIpcDecoderSetChildren(struct ArrowSchema* schema,
ns(Field_vec_t) fields,
struct ArrowError* error);
static int ArrowIpcDecoderSetField(struct ArrowSchema* schema,
ns(Field_table_t) field,
struct ArrowError* error) {
- // No dictionary support yet
- if (ns(Field_dictionary_is_present(field))) {
- ArrowErrorSet(error, "Schema message field with DictionaryEncoding not
supported");
- return ENOTSUP;
- }
-
int result;
if (ns(Field_name_is_present(field))) {
result = ArrowSchemaSetName(schema, ns(Field_name_get(field)));
@@ -916,7 +1033,16 @@ static int ArrowIpcDecoderSetField(struct ArrowSchema*
schema, ns(Field_table_t)
}
NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderSetChildren(schema, children, error));
- return ArrowIpcDecoderSetMetadata(schema, ns(Field_custom_metadata(field)),
error);
+ NANOARROW_RETURN_NOT_OK(
+ ArrowIpcDecoderSetMetadata(schema, ns(Field_custom_metadata(field)),
error));
+
+ // If this is a dictionary encoded field, set the dictionary encoding
+ if (ns(Field_dictionary_is_present(field))) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowIpcSetDictionaryEncoding(schema, ns(Field_dictionary(field)),
error));
+ }
+
+ return NANOARROW_OK;
}
static int ArrowIpcDecoderSetChildren(struct ArrowSchema* schema,
ns(Field_vec_t) fields,
@@ -972,6 +1098,19 @@ static int ArrowIpcDecoderDecodeSchemaHeader(struct
ArrowIpcDecoder* decoder,
return NANOARROW_OK;
}
+static int ArrowIpcDecoderDecodeDictionaryBatchHeader(
+ struct ArrowIpcDecoder* decoder, flatbuffers_generic_t message_header) {
+ struct ArrowIpcDecoderPrivate* private_data =
+ (struct ArrowIpcDecoderPrivate*)decoder->private_data;
+
+ ns(DictionaryBatch_table_t) dictionary =
(ns(DictionaryBatch_table_t))message_header;
+ private_data->dictionary.id = ns(DictionaryBatch_id(dictionary));
+ private_data->dictionary.is_delta = ns(DictionaryBatch_isDelta(dictionary));
+
+ decoder->dictionary = &private_data->dictionary;
+ return NANOARROW_OK;
+}
+
static int ArrowIpcDecoderDecodeRecordBatchHeader(struct ArrowIpcDecoder*
decoder,
flatbuffers_generic_t
message_header,
struct ArrowError* error) {
@@ -1042,6 +1181,8 @@ static inline void ArrowIpcDecoderResetHeaderInfo(struct
ArrowIpcDecoder* decode
decoder->codec = 0;
decoder->header_size_bytes = 0;
decoder->body_size_bytes = 0;
+ decoder->dictionary = NULL;
+ memset(&private_data->dictionary, 0, sizeof(struct ArrowIpcDictionaryBatch));
decoder->footer = NULL;
ArrowIpcFooterReset(&private_data->footer);
private_data->last_message = NULL;
@@ -1284,11 +1425,14 @@ ArrowErrorCode ArrowIpcDecoderDecodeHeader(struct
ArrowIpcDecoder* decoder,
NANOARROW_RETURN_NOT_OK(
ArrowIpcDecoderDecodeSchemaHeader(decoder, message_header, error));
break;
+ case ns(MessageHeader_DictionaryBatch):
+ NANOARROW_RETURN_NOT_OK(
+ ArrowIpcDecoderDecodeDictionaryBatchHeader(decoder, message_header));
+ break;
case ns(MessageHeader_RecordBatch):
NANOARROW_RETURN_NOT_OK(
ArrowIpcDecoderDecodeRecordBatchHeader(decoder, message_header,
error));
break;
- case ns(MessageHeader_DictionaryBatch):
case ns(MessageHeader_Tensor):
case ns(MessageHeader_SparseTensor):
ArrowErrorSet(error, "Unsupported message type: '%s'",
@@ -1857,6 +2001,11 @@ static int ArrowIpcDecoderWalkGetArray(struct
ArrowArrayView* array_view,
array_view->children[i], array->children[i], out->children[i], error));
}
+ if (array->dictionary != NULL) {
+ ArrowErrorSet(error, "Decode of dictionary array is not yet supported");
+ return ENOTSUP;
+ }
+
return NANOARROW_OK;
}
@@ -1970,12 +2119,6 @@ static ArrowErrorCode
ArrowIpcDecoderDecodeArrayViewInternal(
struct ArrowIpcDecoderPrivate* private_data =
(struct ArrowIpcDecoderPrivate*)decoder->private_data;
- if (private_data->last_message == NULL ||
- decoder->message_type != NANOARROW_IPC_MESSAGE_TYPE_RECORD_BATCH) {
- ArrowErrorSet(error, "decoder did not just decode a RecordBatch message");
- return EINVAL;
- }
-
// RecordBatch messages don't count the root node but decoder->fields does
// (decoder->fields[0] is the root field)
if (field_i + 1 >= private_data->n_fields) {
@@ -2038,6 +2181,14 @@ ArrowErrorCode ArrowIpcDecoderDecodeArrayView(struct
ArrowIpcDecoder* decoder,
struct ArrowBufferView body,
int64_t i,
struct ArrowArrayView** out,
struct ArrowError* error) {
+ struct ArrowIpcDecoderPrivate* private_data =
+ (struct ArrowIpcDecoderPrivate*)decoder->private_data;
+ if (private_data->last_message == NULL ||
+ decoder->message_type != NANOARROW_IPC_MESSAGE_TYPE_RECORD_BATCH) {
+ ArrowErrorSet(error, "decoder did not just decode a RecordBatch message");
+ return EINVAL;
+ }
+
return ArrowIpcDecoderDecodeArrayViewInternal(
decoder, ArrowIpcBufferFactoryFromView(&body), i, out, error);
}
@@ -2047,6 +2198,14 @@ ArrowErrorCode ArrowIpcDecoderDecodeArray(struct
ArrowIpcDecoder* decoder,
struct ArrowArray* out,
enum ArrowValidationLevel
validation_level,
struct ArrowError* error) {
+ struct ArrowIpcDecoderPrivate* private_data =
+ (struct ArrowIpcDecoderPrivate*)decoder->private_data;
+ if (private_data->last_message == NULL ||
+ decoder->message_type != NANOARROW_IPC_MESSAGE_TYPE_RECORD_BATCH) {
+ ArrowErrorSet(error, "decoder did not just decode a RecordBatch message");
+ return EINVAL;
+ }
+
struct ArrowArrayView* array_view;
NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderDecodeArrayViewInternal(
decoder, ArrowIpcBufferFactoryFromView(&body), i, &array_view, error));
diff --git a/src/nanoarrow/ipc/decoder_test.cc
b/src/nanoarrow/ipc/decoder_test.cc
index 81993bce..40322908 100644
--- a/src/nanoarrow/ipc/decoder_test.cc
+++ b/src/nanoarrow/ipc/decoder_test.cc
@@ -21,6 +21,7 @@
#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
#include <arrow/array.h>
#include <arrow/c/bridge.h>
+#include <arrow/extension/uuid.h>
#include <arrow/ipc/api.h>
#include <arrow/util/key_value_metadata.h>
#endif
@@ -54,6 +55,7 @@ struct ArrowIpcDecoderPrivate {
struct ArrowIpcField* fields;
int64_t n_buffers;
const void* last_message;
+ struct ArrowIpcDictionaryBatch dictionary;
struct ArrowIpcFooter footer;
struct ArrowIpcDecompressor decompressor;
};
@@ -159,6 +161,51 @@ alignas(8) static uint8_t
kSimpleRecordBatchUncompressible[] = {
0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00,
0x00, 0x00, 0x00, 0x00};
+alignas(8) static uint8_t kDictionarySchema[] = {
+ 0xff, 0xff, 0xff, 0xff, 0x50, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x0a, 0x00, 0x0e, 0x00, 0x06, 0x00, 0x05, 0x00, 0x08, 0x00, 0x0a, 0x00,
0x00, 0x00,
+ 0x00, 0x01, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00,
0x0c, 0x00,
+ 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0xb0, 0x00,
0x00, 0x00,
+ 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00,
0x08, 0x00,
+ 0x0c, 0x00, 0x04, 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x8c, 0x00,
0x00, 0x00,
+ 0x04, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x41, 0x0a, 0x33, 0x0a,
0x32, 0x36,
+ 0x33, 0x31, 0x37, 0x30, 0x0a, 0x31, 0x39, 0x37, 0x38, 0x38, 0x38, 0x0a,
0x35, 0x0a,
+ 0x55, 0x54, 0x46, 0x2d, 0x38, 0x0a, 0x35, 0x33, 0x31, 0x0a, 0x31, 0x0a,
0x35, 0x33,
+ 0x31, 0x0a, 0x31, 0x0a, 0x32, 0x35, 0x34, 0x0a, 0x31, 0x30, 0x32, 0x36,
0x0a, 0x31,
+ 0x0a, 0x32, 0x36, 0x32, 0x31, 0x35, 0x33, 0x0a, 0x35, 0x0a, 0x6e, 0x61,
0x6d, 0x65,
+ 0x73, 0x0a, 0x31, 0x36, 0x0a, 0x31, 0x0a, 0x32, 0x36, 0x32, 0x31, 0x35,
0x33, 0x0a,
+ 0x38, 0x0a, 0x73, 0x6f, 0x6d, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x0a, 0x32,
0x35, 0x34,
+ 0x0a, 0x31, 0x30, 0x32, 0x36, 0x0a, 0x35, 0x31, 0x31, 0x0a, 0x31, 0x36,
0x0a, 0x31,
+ 0x0a, 0x32, 0x36, 0x32, 0x31, 0x35, 0x33, 0x0a, 0x37, 0x0a, 0x63, 0x6f,
0x6c, 0x75,
+ 0x6d, 0x6e, 0x73, 0x0a, 0x32, 0x35, 0x34, 0x0a, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00,
+ 0x72, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x10, 0x00,
+ 0x18, 0x00, 0x08, 0x00, 0x06, 0x00, 0x07, 0x00, 0x0c, 0x00, 0x10, 0x00,
0x14, 0x00,
+ 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x05, 0x14, 0x00, 0x00, 0x00,
0x48, 0x00,
+ 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x08, 0x00, 0x00, 0x00, 0x73, 0x6f, 0x6d, 0x65, 0x5f, 0x63, 0x6f, 0x6c,
0x00, 0x00,
+ 0x00, 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00,
0x00, 0x00,
+ 0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x07, 0x00,
0x08, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00,
0x04, 0x00,
+ 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+
+alignas(8) static uint8_t kDictionaryBatch[] = {
+ 0xff, 0xff, 0xff, 0xff, 0xa8, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x06, 0x00, 0x05, 0x00, 0x08, 0x00,
0x0c, 0x00,
+ 0x0c, 0x00, 0x00, 0x00, 0x00, 0x02, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00,
0x20, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00,
0x04, 0x00,
+ 0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00,
0x18, 0x00,
+ 0x0c, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x4c, 0x00,
0x00, 0x00,
+ 0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x04, 0x00,
+ 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x7a, 0x65,
0x72, 0x6f,
+ 0x6f, 0x6e, 0x65, 0x74, 0x77, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
TEST(NanoarrowIpcTest, NanoarrowIpcCheckHeader) {
struct ArrowIpcDecoder decoder;
struct ArrowError error;
@@ -538,6 +585,56 @@ TEST(NanoarrowIpcTest,
NanoarrowIpcDecodeSimpleRecordBatchErrors) {
ArrowIpcDecoderReset(&decoder);
}
+TEST(NanoarrowIpcTest, NanoarrowIpcDecodeDictionarySchema) {
+ struct ArrowIpcDecoder decoder;
+ struct ArrowError error;
+ struct ArrowSchema schema;
+
+ struct ArrowBufferView data;
+ data.data.as_uint8 = kDictionarySchema;
+ data.size_bytes = sizeof(kDictionarySchema);
+
+ ASSERT_EQ(ArrowIpcDecoderInit(&decoder), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowIpcDecoderDecodeHeader(&decoder, data, &error), NANOARROW_OK);
+ ASSERT_EQ(decoder.message_type, NANOARROW_IPC_MESSAGE_TYPE_SCHEMA);
+
+ ASSERT_EQ(ArrowIpcDecoderDecodeSchema(&decoder, &schema, &error),
NANOARROW_OK);
+ ASSERT_EQ(schema.n_children, 1);
+ EXPECT_STREQ(schema.children[0]->name, "some_col");
+ EXPECT_EQ(schema.children[0]->flags, ARROW_FLAG_NULLABLE);
+ EXPECT_STREQ(schema.children[0]->format, "c");
+
+ ASSERT_NE(schema.children[0]->dictionary, nullptr);
+ EXPECT_STREQ(schema.children[0]->dictionary->format, "u");
+
+ ArrowSchemaRelease(&schema);
+ ArrowIpcDecoderReset(&decoder);
+}
+
+TEST(NanoarrowIpcTest, NanoarrowIpcDecodeDictionaryBatch) {
+ struct ArrowIpcDecoder decoder;
+ struct ArrowError error;
+
+ struct ArrowBufferView data;
+ data.data.as_uint8 = kDictionaryBatch;
+ data.size_bytes = sizeof(kDictionaryBatch);
+
+ ASSERT_EQ(ArrowIpcDecoderInit(&decoder), NANOARROW_OK);
+
+ EXPECT_EQ(ArrowIpcDecoderDecodeHeader(&decoder, data, &error), NANOARROW_OK);
+ ASSERT_EQ(decoder.message_type, NANOARROW_IPC_MESSAGE_TYPE_DICTIONARY_BATCH);
+
+ ASSERT_NE(decoder.dictionary, nullptr);
+ EXPECT_EQ(decoder.dictionary->id, 0);
+ EXPECT_FALSE(decoder.dictionary->is_delta);
+
+ // TODO: Access RecordBatch content
+ // https://github.com/apache/arrow-nanoarrow/issues/845
+
+ ArrowIpcDecoderReset(&decoder);
+}
+
TEST(NanoarrowIpcTest, NanoarrowIpcSetSchema) {
struct ArrowIpcDecoder decoder;
struct ArrowSchema schema;
@@ -605,139 +702,6 @@ TEST(NanoarrowIpcTest, NanoarrowIpcSetDecompressor) {
ArrowIpcDecoderReset(&decoder);
}
-#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
-class ArrowTypeParameterizedTestFixture
- : public ::testing::TestWithParam<std::shared_ptr<arrow::DataType>> {
- protected:
- std::shared_ptr<arrow::DataType> data_type;
-};
-
-TEST_P(ArrowTypeParameterizedTestFixture, NanoarrowIpcArrowTypeRoundtrip) {
- const std::shared_ptr<arrow::DataType>& data_type = GetParam();
- std::shared_ptr<arrow::Schema> dummy_schema =
- arrow::schema({arrow::field("dummy_name", data_type)});
- auto maybe_serialized = arrow::ipc::SerializeSchema(*dummy_schema);
- ASSERT_TRUE(maybe_serialized.ok());
-
- struct ArrowBufferView buffer_view;
- buffer_view.data.data = maybe_serialized.ValueUnsafe()->data();
- buffer_view.size_bytes = maybe_serialized.ValueOrDie()->size();
-
- struct ArrowIpcDecoder decoder;
- ArrowIpcDecoderInit(&decoder);
- ASSERT_EQ(ArrowIpcDecoderVerifyHeader(&decoder, buffer_view, nullptr),
NANOARROW_OK);
- EXPECT_EQ(decoder.header_size_bytes, buffer_view.size_bytes);
- EXPECT_EQ(decoder.body_size_bytes, 0);
-
- ASSERT_EQ(ArrowIpcDecoderDecodeHeader(&decoder, buffer_view, nullptr),
NANOARROW_OK);
- struct ArrowSchema schema;
- ASSERT_EQ(ArrowIpcDecoderDecodeSchema(&decoder, &schema, nullptr),
NANOARROW_OK);
- auto maybe_schema = arrow::ImportSchema(&schema);
- ASSERT_TRUE(maybe_schema.ok());
-
- // Better failure message if we first check for string equality
- EXPECT_EQ(maybe_schema.ValueUnsafe()->ToString(), dummy_schema->ToString());
- EXPECT_TRUE(maybe_schema.ValueUnsafe()->Equals(dummy_schema, true));
-
- ArrowIpcDecoderReset(&decoder);
-}
-#endif
-
-std::string ArrowSchemaMetadataToString(const char* metadata) {
- struct ArrowMetadataReader reader;
- auto st = ArrowMetadataReaderInit(&reader, metadata);
- EXPECT_EQ(st, NANOARROW_OK);
-
- bool comma = false;
- std::string out;
- while (reader.remaining_keys > 0) {
- struct ArrowStringView key, value;
- auto st = ArrowMetadataReaderRead(&reader, &key, &value);
- EXPECT_EQ(st, NANOARROW_OK);
- if (comma) {
- out += ", ";
- }
- comma = true;
-
- out.append(key.data, key.size_bytes);
- out += "=";
- out.append(value.data, value.size_bytes);
- }
- return out;
-}
-
-std::string ArrowSchemaToString(const struct ArrowSchema* schema) {
- int64_t n = ArrowSchemaToString(schema, nullptr, 0, /*recursive=*/false);
- std::vector<char> out_vec(n, '\0');
- ArrowSchemaToString(schema, out_vec.data(), n, /*recursive=*/false);
- std::string out(out_vec.data(), out_vec.size());
-
- std::string metadata = ArrowSchemaMetadataToString(schema->metadata);
- if (!metadata.empty()) {
- out += "{" + metadata + "}";
- }
-
- bool comma = false;
- if (schema->format[0] == '+') {
- out += "<";
- for (int64_t i = 0; i < schema->n_children; ++i) {
- if (comma) {
- out += ", ";
- }
- comma = true;
-
- auto* child = schema->children[i];
- if (child && child->name[0] != '\0') {
- out += child->name;
- out += ": ";
- }
- out += ArrowSchemaToString(schema->children[i]);
- }
- out += ">";
- }
-
- return out;
-}
-
-#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
-TEST_P(ArrowTypeParameterizedTestFixture, NanoarrowIpcNanoarrowTypeRoundtrip) {
- nanoarrow::UniqueSchema schema;
- ASSERT_TRUE(
- arrow::ExportSchema(arrow::Schema({arrow::field("", GetParam())}),
schema.get())
- .ok());
-
- nanoarrow::ipc::UniqueEncoder encoder;
- EXPECT_EQ(ArrowIpcEncoderInit(encoder.get()), NANOARROW_OK);
-
- struct ArrowError error;
- EXPECT_EQ(ArrowIpcEncoderEncodeSchema(encoder.get(), schema.get(), &error),
- NANOARROW_OK)
- << error.message;
-
- nanoarrow::UniqueBuffer buffer;
- EXPECT_EQ(
- ArrowIpcEncoderFinalizeBuffer(encoder.get(), /*encapsulate=*/true,
buffer.get()),
- NANOARROW_OK);
-
- struct ArrowBufferView buffer_view;
- buffer_view.data.data = buffer->data;
- buffer_view.size_bytes = buffer->size_bytes;
-
- nanoarrow::ipc::UniqueDecoder decoder;
- ArrowIpcDecoderInit(decoder.get());
- ASSERT_EQ(ArrowIpcDecoderVerifyHeader(decoder.get(), buffer_view, nullptr),
- NANOARROW_OK);
- ASSERT_EQ(ArrowIpcDecoderDecodeHeader(decoder.get(), buffer_view, nullptr),
- NANOARROW_OK);
-
- nanoarrow::UniqueSchema roundtripped;
- ASSERT_EQ(ArrowIpcDecoderDecodeSchema(decoder.get(), roundtripped.get(),
nullptr),
- NANOARROW_OK);
-
- EXPECT_EQ(ArrowSchemaToString(roundtripped.get()),
ArrowSchemaToString(schema.get()));
-}
-#endif
-
TEST(NanoarrowIpcTest, NanoarrowIpcDecodeSimpleRecordBatchFromShared) {
struct ArrowIpcDecoder decoder;
struct ArrowError error;
@@ -871,8 +835,190 @@ TEST(NanoarrowIpcTest,
NanoarrowIpcSharedBufferThreadSafeDecode) {
// We will get a (occasional) memory leak if the atomic counter does not work
}
+TEST(NanoarrowIpcTest, NanoarrowIpcFooterDecodingErrors) {
+ struct ArrowError error;
+
+ nanoarrow::ipc::UniqueDecoder decoder;
+ ArrowIpcDecoderInit(decoder.get());
+
+ // not enough data to get the size+magic
+ EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(), {{nullptr}, 3}, &error),
ESPIPE)
+ << error.message;
+
+ // doesn't end with magic
+ EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(), {{"\0\0\0\0blargh"}, 10},
&error),
+ EINVAL)
+ << error.message;
+
+ // negative size
+ EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(),
+ {{"\xFF\xFF\xFF\xFF"
+ "ARROW1"},
+ 10},
+ &error),
+ EINVAL)
+ << error.message;
+
+ // PeekFooter doesn't check for available data
+ EXPECT_EQ(
+ ArrowIpcDecoderPeekFooter(decoder.get(), {{"\xFF\xFF\0\0ARROW1"}, 10},
&error),
+ NANOARROW_OK)
+ << error.message;
+ EXPECT_EQ(decoder->header_size_bytes, 0xFFFF);
+
+ decoder->header_size_bytes = -1;
+
+ // VerifyFooter *does* check for enough available data
+ EXPECT_EQ(
+ ArrowIpcDecoderVerifyFooter(decoder.get(), {{"\xFF\xFF\0\0ARROW1"}, 10},
&error),
+ ESPIPE)
+ << error.message;
+ EXPECT_EQ(decoder->header_size_bytes, 0xFFFF);
+}
+
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
+class ArrowTypeParameterizedTestFixture
+ : public ::testing::TestWithParam<std::shared_ptr<arrow::DataType>> {
+ protected:
+ std::shared_ptr<arrow::DataType> data_type;
+};
+
+TEST_P(ArrowTypeParameterizedTestFixture, NanoarrowIpcArrowTypeRoundtrip) {
+ const std::shared_ptr<arrow::DataType>& data_type = GetParam();
+ std::shared_ptr<arrow::Schema> dummy_schema =
+ arrow::schema({arrow::field("dummy_name", data_type)});
+ auto maybe_serialized = arrow::ipc::SerializeSchema(*dummy_schema);
+ ASSERT_TRUE(maybe_serialized.ok());
+
+ struct ArrowBufferView buffer_view;
+ buffer_view.data.data = maybe_serialized.ValueUnsafe()->data();
+ buffer_view.size_bytes = maybe_serialized.ValueOrDie()->size();
+
+ struct ArrowIpcDecoder decoder;
+ ArrowIpcDecoderInit(&decoder);
+ ASSERT_EQ(ArrowIpcDecoderVerifyHeader(&decoder, buffer_view, nullptr),
NANOARROW_OK);
+ EXPECT_EQ(decoder.header_size_bytes, buffer_view.size_bytes);
+ EXPECT_EQ(decoder.body_size_bytes, 0);
+
+ ASSERT_EQ(ArrowIpcDecoderDecodeHeader(&decoder, buffer_view, nullptr),
NANOARROW_OK);
+ struct ArrowSchema schema;
+ ASSERT_EQ(ArrowIpcDecoderDecodeSchema(&decoder, &schema, nullptr),
NANOARROW_OK);
+ auto maybe_schema = arrow::ImportSchema(&schema);
+ ASSERT_TRUE(maybe_schema.ok());
+
+ // Better failure message if we first check for string equality
+ EXPECT_EQ(maybe_schema.ValueUnsafe()->ToString(), dummy_schema->ToString());
+ EXPECT_TRUE(maybe_schema.ValueUnsafe()->Equals(dummy_schema, true));
+
+ ArrowIpcDecoderReset(&decoder);
+}
+#endif
+
+std::string ArrowSchemaMetadataToString(const char* metadata) {
+ struct ArrowMetadataReader reader;
+ auto st = ArrowMetadataReaderInit(&reader, metadata);
+ EXPECT_EQ(st, NANOARROW_OK);
+
+ bool comma = false;
+ std::string out;
+ while (reader.remaining_keys > 0) {
+ struct ArrowStringView key, value;
+ auto st = ArrowMetadataReaderRead(&reader, &key, &value);
+ EXPECT_EQ(st, NANOARROW_OK);
+ if (comma) {
+ out += ", ";
+ }
+ comma = true;
+
+ out.append(key.data, key.size_bytes);
+ out += "=";
+ out.append(value.data, value.size_bytes);
+ }
+ return out;
+}
+
+std::string ArrowSchemaToString(const struct ArrowSchema* schema) {
+ int64_t n = ArrowSchemaToString(schema, nullptr, 0, /*recursive=*/false);
+ std::vector<char> out_vec(n, '\0');
+ ArrowSchemaToString(schema, out_vec.data(), n, /*recursive=*/false);
+ std::string out(out_vec.data(), out_vec.size());
+
+ std::string metadata = ArrowSchemaMetadataToString(schema->metadata);
+ if (!metadata.empty()) {
+ out += "{" + metadata + "}";
+ }
+
+ bool comma = false;
+ if (schema->format[0] == '+') {
+ out += "<";
+ for (int64_t i = 0; i < schema->n_children; ++i) {
+ if (comma) {
+ out += ", ";
+ }
+ comma = true;
+
+ auto* child = schema->children[i];
+ if (child && child->name[0] != '\0') {
+ out += child->name;
+ out += ": ";
+ }
+ out += ArrowSchemaToString(schema->children[i]);
+ }
+ out += ">";
+ }
+
+ return out;
+}
+
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
+TEST_P(ArrowTypeParameterizedTestFixture, NanoarrowIpcNanoarrowTypeRoundtrip) {
+ if (GetParam()->id() == arrow::Type::DICTIONARY) {
+ GTEST_SKIP() << "Dictionary array decode is not yet supported";
+ }
+
+ nanoarrow::UniqueSchema schema;
+ ASSERT_TRUE(
+ arrow::ExportSchema(arrow::Schema({arrow::field("", GetParam())}),
schema.get())
+ .ok());
+
+ nanoarrow::ipc::UniqueEncoder encoder;
+ EXPECT_EQ(ArrowIpcEncoderInit(encoder.get()), NANOARROW_OK);
+
+ struct ArrowError error;
+ EXPECT_EQ(ArrowIpcEncoderEncodeSchema(encoder.get(), schema.get(), &error),
+ NANOARROW_OK)
+ << error.message;
+
+ nanoarrow::UniqueBuffer buffer;
+ EXPECT_EQ(
+ ArrowIpcEncoderFinalizeBuffer(encoder.get(), /*encapsulate=*/true,
buffer.get()),
+ NANOARROW_OK);
+
+ struct ArrowBufferView buffer_view;
+ buffer_view.data.data = buffer->data;
+ buffer_view.size_bytes = buffer->size_bytes;
+
+ nanoarrow::ipc::UniqueDecoder decoder;
+ ArrowIpcDecoderInit(decoder.get());
+ ASSERT_EQ(ArrowIpcDecoderVerifyHeader(decoder.get(), buffer_view, nullptr),
+ NANOARROW_OK);
+ ASSERT_EQ(ArrowIpcDecoderDecodeHeader(decoder.get(), buffer_view, nullptr),
+ NANOARROW_OK);
+
+ nanoarrow::UniqueSchema roundtripped;
+ ASSERT_EQ(ArrowIpcDecoderDecodeSchema(decoder.get(), roundtripped.get(),
nullptr),
+ NANOARROW_OK);
+
+ EXPECT_EQ(ArrowSchemaToString(roundtripped.get()),
ArrowSchemaToString(schema.get()));
+}
+#endif
+
#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
TEST_P(ArrowTypeParameterizedTestFixture, NanoarrowIpcArrowArrayRoundtrip) {
+ if (GetParam()->id() == arrow::Type::DICTIONARY) {
+ GTEST_SKIP() << "Dictionary array decode is not yet supported";
+ }
+
const std::shared_ptr<arrow::DataType>& data_type = GetParam();
std::shared_ptr<arrow::Schema> dummy_schema =
arrow::schema({arrow::field("dummy_name", data_type)});
@@ -964,6 +1110,10 @@ void AssertArrayViewIdentical(const struct
ArrowArrayView* actual,
#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
TEST_P(ArrowTypeParameterizedTestFixture, NanoarrowIpcNanoarrowArrayRoundtrip)
{
+ if (GetParam()->id() == arrow::Type::DICTIONARY) {
+ GTEST_SKIP() << "nanoarrow encoder cannot yet encode dictionaries";
+ }
+
struct ArrowError error;
nanoarrow::UniqueSchema schema;
ASSERT_TRUE(
@@ -1043,8 +1193,7 @@ INSTANTIATE_TEST_SUITE_P(
arrow::map(arrow::utf8(), arrow::int64(), true),
arrow::struct_({arrow::field("col1", arrow::int32()),
arrow::field("col2", arrow::utf8())}),
- // Zero-size union doesn't roundtrip through the C Data interface until
- // Arrow 11 (which is not yet available on all platforms)
+ // Zero-size union
// arrow::sparse_union(FieldVector()),
arrow::dense_union(FieldVector()),
// No custom type IDs
arrow::sparse_union({arrow::field("col1", arrow::int32()),
@@ -1061,9 +1210,14 @@ INSTANTIATE_TEST_SUITE_P(
// Type with nested metadata
arrow::list(arrow::field("some_custom_name", arrow::int32(),
- arrow::KeyValueMetadata::Make({"key1"},
{"value1"})))
-
- ));
+ arrow::KeyValueMetadata::Make({"key1"},
{"value1"}))),
+ // Dictionary encoding
+ arrow::dictionary(arrow::int32(), arrow::utf8()),
+ arrow::dictionary(arrow::int32(), arrow::utf8(), true),
+ // Extension type
+ arrow::extension::uuid(),
+ // Dictionary-encoded extension
+ arrow::dictionary(arrow::int32(), arrow::extension::uuid())));
class ArrowSchemaParameterizedTestFixture
: public ::testing::TestWithParam<std::shared_ptr<arrow::Schema>> {
@@ -1101,6 +1255,12 @@ TEST_P(ArrowSchemaParameterizedTestFixture,
NanoarrowIpcArrowSchemaRoundtrip) {
}
TEST_P(ArrowSchemaParameterizedTestFixture,
NanoarrowIpcNanoarrowSchemaRoundtrip) {
+ for (const auto& field : GetParam()->fields()) {
+ if (field->type()->id() == arrow::Type::DICTIONARY) {
+ GTEST_SKIP() << "nanoarrow cannot yet encode arrays with dictionaries";
+ }
+ }
+
const std::shared_ptr<arrow::Schema>& arrow_schema = GetParam();
nanoarrow::UniqueSchema schema;
@@ -1138,6 +1298,12 @@ TEST_P(ArrowSchemaParameterizedTestFixture,
NanoarrowIpcNanoarrowSchemaRoundtrip
}
TEST_P(ArrowSchemaParameterizedTestFixture,
NanoarrowIpcNanoarrowFooterRoundtrip) {
+ for (const auto& field : GetParam()->fields()) {
+ if (field->type()->id() == arrow::Type::DICTIONARY) {
+ GTEST_SKIP() << "nanoarrow cannot yet encode arrays with dictionaries";
+ }
+ }
+
using namespace nanoarrow::literals;
const std::shared_ptr<arrow::Schema>& arrow_schema = GetParam();
@@ -1208,7 +1374,15 @@ INSTANTIATE_TEST_SUITE_P(
// Schema metadata
arrow::schema({}, arrow::KeyValueMetadata::Make({"key1"}, {"value1"})),
// Non-nullable field
- arrow::schema({arrow::field("some_name", arrow::int32(), false)})));
+ arrow::schema({arrow::field("some_name", arrow::int32(), false)}),
+ // Dictionary with field metadata
+ arrow::schema({arrow::field(
+ "some_name", arrow::dictionary(arrow::int32(), arrow::utf8()),
+ arrow::KeyValueMetadata::Make({"key1", "key2"}, {"value1",
"value2"}))}),
+ // Dictionary with field metadata
+ arrow::schema({arrow::field(
+ "some_name", arrow::dictionary(arrow::int32(),
arrow::extension::uuid()),
+ arrow::KeyValueMetadata::Make({"key1", "key2"}, {"value1",
"value2"}))})));
class ArrowTypeIdParameterizedTestFixture
: public ::testing::TestWithParam<enum ArrowType> {
@@ -1372,44 +1546,3 @@ INSTANTIATE_TEST_SUITE_P(NanoarrowIpcTest,
ArrowTypeIdParameterizedTestFixture,
NANOARROW_TYPE_DECIMAL256,
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO));
#endif
-
-TEST(NanoarrowIpcTest, NanoarrowIpcFooterDecodingErrors) {
- struct ArrowError error;
-
- nanoarrow::ipc::UniqueDecoder decoder;
- ArrowIpcDecoderInit(decoder.get());
-
- // not enough data to get the size+magic
- EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(), {{nullptr}, 3}, &error),
ESPIPE)
- << error.message;
-
- // doesn't end with magic
- EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(), {{"\0\0\0\0blargh"}, 10},
&error),
- EINVAL)
- << error.message;
-
- // negative size
- EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(),
- {{"\xFF\xFF\xFF\xFF"
- "ARROW1"},
- 10},
- &error),
- EINVAL)
- << error.message;
-
- // PeekFooter doesn't check for available data
- EXPECT_EQ(
- ArrowIpcDecoderPeekFooter(decoder.get(), {{"\xFF\xFF\0\0ARROW1"}, 10},
&error),
- NANOARROW_OK)
- << error.message;
- EXPECT_EQ(decoder->header_size_bytes, 0xFFFF);
-
- decoder->header_size_bytes = -1;
-
- // VerifyFooter *does* check for enough available data
- EXPECT_EQ(
- ArrowIpcDecoderVerifyFooter(decoder.get(), {{"\xFF\xFF\0\0ARROW1"}, 10},
&error),
- ESPIPE)
- << error.message;
- EXPECT_EQ(decoder->header_size_bytes, 0xFFFF);
-}
diff --git a/src/nanoarrow/ipc/files_test.cc b/src/nanoarrow/ipc/files_test.cc
index ee2f384c..6a1d7c78 100644
--- a/src/nanoarrow/ipc/files_test.cc
+++ b/src/nanoarrow/ipc/files_test.cc
@@ -481,16 +481,16 @@ INSTANTIATE_TEST_SUITE_P(
// Files with features that are not yet supported (Dictionary encoding)
TestFile::NotSupported(
"generated_dictionary_unsigned.stream",
- "Schema message field with DictionaryEncoding not supported"),
+ "Found valid dictionary batch but dictionary encoding is not yet
supported"),
TestFile::NotSupported(
"generated_dictionary.stream",
- "Schema message field with DictionaryEncoding not supported"),
+ "Found valid dictionary batch but dictionary encoding is not yet
supported"),
TestFile::NotSupported(
"generated_nested_dictionary.stream",
- "Schema message field with DictionaryEncoding not supported"),
+ "Found valid dictionary batch but dictionary encoding is not yet
supported"),
TestFile::NotSupported(
"generated_extension.stream",
- "Schema message field with DictionaryEncoding not supported")
+ "Found valid dictionary batch but dictionary encoding is not yet
supported")
// Comment to keep last line from wrapping
));
diff --git a/src/nanoarrow/ipc/reader.c b/src/nanoarrow/ipc/reader.c
index 53937b80..7ecc4ccd 100644
--- a/src/nanoarrow/ipc/reader.c
+++ b/src/nanoarrow/ipc/reader.c
@@ -455,9 +455,18 @@ static int ArrowIpcArrayStreamReaderGetNext(struct
ArrowArrayStream* stream,
}
// Make sure we have a RecordBatch message
- if (private_data->decoder.message_type !=
NANOARROW_IPC_MESSAGE_TYPE_RECORD_BATCH) {
- ArrowErrorSet(&private_data->error, "Unexpected message type (expected
RecordBatch)");
- return EINVAL;
+ switch (private_data->decoder.message_type) {
+ case NANOARROW_IPC_MESSAGE_TYPE_RECORD_BATCH:
+ break;
+ case NANOARROW_IPC_MESSAGE_TYPE_DICTIONARY_BATCH:
+ ArrowErrorSet(
+ &private_data->error,
+ "Found valid dictionary batch but dictionary encoding is not yet
supported");
+ return ENOTSUP;
+ default:
+ ArrowErrorSet(&private_data->error,
+ "Unexpected message type (expected RecordBatch)");
+ return EINVAL;
}
// Read in the body
diff --git a/src/nanoarrow/nanoarrow_ipc.h b/src/nanoarrow/nanoarrow_ipc.h
index b9251a6b..a1a172cb 100644
--- a/src/nanoarrow/nanoarrow_ipc.h
+++ b/src/nanoarrow/nanoarrow_ipc.h
@@ -162,6 +162,15 @@ enum ArrowIpcCompressionType {
/// \brief Feature flag for a stream that uses compression
#define NANOARROW_IPC_FEATURE_COMPRESSED_BODY 2
+/// \brief Description of an Arrow IPC DictionaryBatch message
+struct ArrowIpcDictionaryBatch {
+ /// \brief The identifier for this dictionary
+ int64_t id;
+ /// \brief If non-zero, values should be appended to the existing dictionary.
+ /// Otherwise, values should replace the existing dictionary.
+ int is_delta;
+};
+
/// \brief Checks the nanoarrow runtime to make sure the run/build versions
match
NANOARROW_DLL ArrowErrorCode ArrowIpcCheckRuntime(struct ArrowError* error);
@@ -307,6 +316,9 @@ struct ArrowIpcDecoder {
/// \brief The number of bytes in the forthcoming body message.
int64_t body_size_bytes;
+ /// \brief The last decoded DictionaryBatch
+ const struct ArrowIpcDictionaryBatch* dictionary;
+
/// \brief The last decoded Footer
///
/// \warning This API is currently only public for use in integration
testing;
@@ -670,6 +682,7 @@ ArrowErrorCode ArrowIpcWriterStartFile(struct
ArrowIpcWriter* writer,
/// Writes the IPC file's footer, footer size, and ending magic.
NANOARROW_DLL ArrowErrorCode ArrowIpcWriterFinalizeFile(struct ArrowIpcWriter*
writer,
struct ArrowError*
error);
+
/// @}
// Internal APIs: