(arrow-nanoarrow) branch main updated: feat: Add Dictionary schema read support in IPC reader (#738)

paleolimbot Fri, 06 Mar 2026 06:45:43 -0800

This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 5b7184cf feat: Add Dictionary schema read support in IPC reader (#738)
5b7184cf is described below

commit 5b7184cfd5d17b712454d3e2e738930b3755c219
Author: Dewey Dunnington <[email protected]>
AuthorDate: Fri Mar 6 08:45:26 2026 -0600

    feat: Add Dictionary schema read support in IPC reader (#738)
    
    To start #622, this PR adds read support for Schema messages that
    contain dictionary-encoded columns. I opened #844 and #845 to track the
    next steps.
    
    ---------
    
    Co-authored-by: Copilot <[email protected]>
---
 src/nanoarrow/ipc/decoder.c       | 187 +++++++++++++--
 src/nanoarrow/ipc/decoder_test.cc | 493 ++++++++++++++++++++++++--------------
 src/nanoarrow/ipc/files_test.cc   |   8 +-
 src/nanoarrow/ipc/reader.c        |  15 +-
 src/nanoarrow/nanoarrow_ipc.h     |  13 +
 5 files changed, 515 insertions(+), 201 deletions(-)

diff --git a/src/nanoarrow/ipc/decoder.c b/src/nanoarrow/ipc/decoder.c
index 07bfbe0f..f3b4b443 100644
--- a/src/nanoarrow/ipc/decoder.c
+++ b/src/nanoarrow/ipc/decoder.c
@@ -95,6 +95,8 @@ struct ArrowIpcDecoderPrivate {
   int64_t n_union_fields;
   // A pointer to the last flatbuffers message.
   const void* last_message;
+  // Storage for a DictionaryBatch
+  struct ArrowIpcDictionaryBatch dictionary;
   // Storage for a Footer
   struct ArrowIpcFooter footer;
   // Decompressor for compression support
@@ -865,17 +867,132 @@ static int ArrowIpcDecoderSetType(struct ArrowSchema* 
schema, ns(Field_table_t)
   }
 }
 
+// A fun corner case when decoding dictionaries: the extension metadata lives 
with
+// the dictionary (i.e., the non-index type); however, the field metadata still
+// needs to exist on the field.
+static int ArrowIpcMoveNonExtensionFieldMetadataBackToFieldIfNeeded(
+    struct ArrowSchema* schema) {
+  NANOARROW_DCHECK(schema->dictionary != NULL);
+  struct ArrowMetadataReader reader;
+  NANOARROW_RETURN_NOT_OK(ArrowMetadataReaderInit(&reader, 
schema->dictionary->metadata));
+
+  // For the most common case (no metadata), nothing needs to be done here
+  if (reader.remaining_keys == 0) {
+    return NANOARROW_OK;
+  }
+
+  struct ArrowBuffer field_metadata;
+  struct ArrowBuffer extension_metadata;
+  NANOARROW_RETURN_NOT_OK(ArrowMetadataBuilderInit(&field_metadata, NULL));
+  ArrowErrorCode result = ArrowMetadataBuilderInit(&extension_metadata, NULL);
+  if (result != NANOARROW_OK) {
+    ArrowBufferReset(&field_metadata);
+    return result;
+  }
+
+  const struct ArrowStringView extension_name_key = 
ArrowCharView("ARROW:extension:name");
+  const struct ArrowStringView extension_metadata_key =
+      ArrowCharView("ARROW:extension:metadata");
+
+  struct ArrowStringView key;
+  struct ArrowStringView value;
+  while (reader.remaining_keys > 0) {
+    result = ArrowMetadataReaderRead(&reader, &key, &value);
+    if (result != NANOARROW_OK) {
+      ArrowBufferReset(&field_metadata);
+      ArrowBufferReset(&extension_metadata);
+      return result;
+    }
+
+    int key_is_extension_name =
+        key.size_bytes == extension_name_key.size_bytes &&
+        strncmp(key.data, extension_name_key.data, key.size_bytes) == 0;
+    int key_is_extension_metadata =
+        key.size_bytes == extension_metadata_key.size_bytes &&
+        strncmp(key.data, extension_metadata_key.data, key.size_bytes) == 0;
+    if (!key_is_extension_name && !key_is_extension_metadata) {
+      result = ArrowMetadataBuilderAppend(&field_metadata, key, value);
+      if (result != NANOARROW_OK) {
+        ArrowBufferReset(&field_metadata);
+        ArrowBufferReset(&extension_metadata);
+        return result;
+      }
+    } else {
+      result = ArrowMetadataBuilderAppend(&extension_metadata, key, value);
+      if (result != NANOARROW_OK) {
+        ArrowBufferReset(&field_metadata);
+        ArrowBufferReset(&extension_metadata);
+        return result;
+      }
+    }
+  }
+
+  result = ArrowSchemaSetMetadata(schema, (char*)field_metadata.data);
+  if (result != NANOARROW_OK) {
+    ArrowBufferReset(&field_metadata);
+    ArrowBufferReset(&extension_metadata);
+    return result;
+  }
+
+  result = ArrowSchemaSetMetadata(schema->dictionary, 
(char*)extension_metadata.data);
+  ArrowBufferReset(&field_metadata);
+  ArrowBufferReset(&extension_metadata);
+
+  return result;
+}
+
+static int ArrowIpcSetDictionaryEncoding(
+    struct ArrowSchema* schema, ns(DictionaryEncoding_table_t 
dictionary_encoding),
+    struct ArrowError* error) {
+  switch (
+      
org_apache_arrow_flatbuf_DictionaryEncoding_dictionaryKind(dictionary_encoding))
 {
+    case ns(DictionaryKind_DenseArray):
+      break;
+    default:
+      ArrowErrorSet(error, "Unexpected value for DictionaryKind");
+      return EINVAL;
+  }
+
+  struct ArrowSchema tmp;
+  ArrowSchemaMove(schema, &tmp);
+
+  ArrowSchemaInit(schema);
+  int result = ArrowSchemaAllocateDictionary(schema);
+  if (result != NANOARROW_OK) {
+    ArrowSchemaRelease(&tmp);
+    ArrowErrorSet(error, "ArrowSchemaAllocateDictionary() failed");
+    return result;
+  }
+
+  ArrowSchemaMove(&tmp, schema->dictionary);
+
+  NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaSetName(schema, 
schema->dictionary->name),
+                                     error);
+  NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaSetName(schema->dictionary, 
""), error);
+
+  NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderSetTypeInt(
+      schema, ns(DictionaryEncoding_indexType_get(dictionary_encoding)), 
error));
+
+  if (ns(DictionaryEncoding_isOrdered_get(dictionary_encoding))) {
+    schema->flags |= ARROW_FLAG_DICTIONARY_ORDERED;
+  }
+
+  // Field metadata should stay with the field; however, we need the extension 
metadata
+  // to stay with the dictionary.
+  NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+      ArrowIpcMoveNonExtensionFieldMetadataBackToFieldIfNeeded(schema), error);
+
+  // TODO: Track the dictionary
+  // https://github.com/apache/arrow-nanoarrow/issues/844
+
+  return NANOARROW_OK;
+}
+
 static int ArrowIpcDecoderSetChildren(struct ArrowSchema* schema, 
ns(Field_vec_t) fields,
                                       struct ArrowError* error);
 
 static int ArrowIpcDecoderSetField(struct ArrowSchema* schema, 
ns(Field_table_t) field,
                                    struct ArrowError* error) {
-  // No dictionary support yet
-  if (ns(Field_dictionary_is_present(field))) {
-    ArrowErrorSet(error, "Schema message field with DictionaryEncoding not 
supported");
-    return ENOTSUP;
-  }
-
   int result;
   if (ns(Field_name_is_present(field))) {
     result = ArrowSchemaSetName(schema, ns(Field_name_get(field)));
@@ -916,7 +1033,16 @@ static int ArrowIpcDecoderSetField(struct ArrowSchema* 
schema, ns(Field_table_t)
   }
 
   NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderSetChildren(schema, children, error));
-  return ArrowIpcDecoderSetMetadata(schema, ns(Field_custom_metadata(field)), 
error);
+  NANOARROW_RETURN_NOT_OK(
+      ArrowIpcDecoderSetMetadata(schema, ns(Field_custom_metadata(field)), 
error));
+
+  // If this is a dictionary encoded field, set the dictionary encoding
+  if (ns(Field_dictionary_is_present(field))) {
+    NANOARROW_RETURN_NOT_OK(
+        ArrowIpcSetDictionaryEncoding(schema, ns(Field_dictionary(field)), 
error));
+  }
+
+  return NANOARROW_OK;
 }
 
 static int ArrowIpcDecoderSetChildren(struct ArrowSchema* schema, 
ns(Field_vec_t) fields,
@@ -972,6 +1098,19 @@ static int ArrowIpcDecoderDecodeSchemaHeader(struct 
ArrowIpcDecoder* decoder,
   return NANOARROW_OK;
 }
 
+static int ArrowIpcDecoderDecodeDictionaryBatchHeader(
+    struct ArrowIpcDecoder* decoder, flatbuffers_generic_t message_header) {
+  struct ArrowIpcDecoderPrivate* private_data =
+      (struct ArrowIpcDecoderPrivate*)decoder->private_data;
+
+  ns(DictionaryBatch_table_t) dictionary = 
(ns(DictionaryBatch_table_t))message_header;
+  private_data->dictionary.id = ns(DictionaryBatch_id(dictionary));
+  private_data->dictionary.is_delta = ns(DictionaryBatch_isDelta(dictionary));
+
+  decoder->dictionary = &private_data->dictionary;
+  return NANOARROW_OK;
+}
+
 static int ArrowIpcDecoderDecodeRecordBatchHeader(struct ArrowIpcDecoder* 
decoder,
                                                   flatbuffers_generic_t 
message_header,
                                                   struct ArrowError* error) {
@@ -1042,6 +1181,8 @@ static inline void ArrowIpcDecoderResetHeaderInfo(struct 
ArrowIpcDecoder* decode
   decoder->codec = 0;
   decoder->header_size_bytes = 0;
   decoder->body_size_bytes = 0;
+  decoder->dictionary = NULL;
+  memset(&private_data->dictionary, 0, sizeof(struct ArrowIpcDictionaryBatch));
   decoder->footer = NULL;
   ArrowIpcFooterReset(&private_data->footer);
   private_data->last_message = NULL;
@@ -1284,11 +1425,14 @@ ArrowErrorCode ArrowIpcDecoderDecodeHeader(struct 
ArrowIpcDecoder* decoder,
       NANOARROW_RETURN_NOT_OK(
           ArrowIpcDecoderDecodeSchemaHeader(decoder, message_header, error));
       break;
+    case ns(MessageHeader_DictionaryBatch):
+      NANOARROW_RETURN_NOT_OK(
+          ArrowIpcDecoderDecodeDictionaryBatchHeader(decoder, message_header));
+      break;
     case ns(MessageHeader_RecordBatch):
       NANOARROW_RETURN_NOT_OK(
           ArrowIpcDecoderDecodeRecordBatchHeader(decoder, message_header, 
error));
       break;
-    case ns(MessageHeader_DictionaryBatch):
     case ns(MessageHeader_Tensor):
     case ns(MessageHeader_SparseTensor):
       ArrowErrorSet(error, "Unsupported message type: '%s'",
@@ -1857,6 +2001,11 @@ static int ArrowIpcDecoderWalkGetArray(struct 
ArrowArrayView* array_view,
         array_view->children[i], array->children[i], out->children[i], error));
   }
 
+  if (array->dictionary != NULL) {
+    ArrowErrorSet(error, "Decode of dictionary array is not yet supported");
+    return ENOTSUP;
+  }
+
   return NANOARROW_OK;
 }
 
@@ -1970,12 +2119,6 @@ static ArrowErrorCode 
ArrowIpcDecoderDecodeArrayViewInternal(
   struct ArrowIpcDecoderPrivate* private_data =
       (struct ArrowIpcDecoderPrivate*)decoder->private_data;
 
-  if (private_data->last_message == NULL ||
-      decoder->message_type != NANOARROW_IPC_MESSAGE_TYPE_RECORD_BATCH) {
-    ArrowErrorSet(error, "decoder did not just decode a RecordBatch message");
-    return EINVAL;
-  }
-
   // RecordBatch messages don't count the root node but decoder->fields does
   // (decoder->fields[0] is the root field)
   if (field_i + 1 >= private_data->n_fields) {
@@ -2038,6 +2181,14 @@ ArrowErrorCode ArrowIpcDecoderDecodeArrayView(struct 
ArrowIpcDecoder* decoder,
                                               struct ArrowBufferView body, 
int64_t i,
                                               struct ArrowArrayView** out,
                                               struct ArrowError* error) {
+  struct ArrowIpcDecoderPrivate* private_data =
+      (struct ArrowIpcDecoderPrivate*)decoder->private_data;
+  if (private_data->last_message == NULL ||
+      decoder->message_type != NANOARROW_IPC_MESSAGE_TYPE_RECORD_BATCH) {
+    ArrowErrorSet(error, "decoder did not just decode a RecordBatch message");
+    return EINVAL;
+  }
+
   return ArrowIpcDecoderDecodeArrayViewInternal(
       decoder, ArrowIpcBufferFactoryFromView(&body), i, out, error);
 }
@@ -2047,6 +2198,14 @@ ArrowErrorCode ArrowIpcDecoderDecodeArray(struct 
ArrowIpcDecoder* decoder,
                                           struct ArrowArray* out,
                                           enum ArrowValidationLevel 
validation_level,
                                           struct ArrowError* error) {
+  struct ArrowIpcDecoderPrivate* private_data =
+      (struct ArrowIpcDecoderPrivate*)decoder->private_data;
+  if (private_data->last_message == NULL ||
+      decoder->message_type != NANOARROW_IPC_MESSAGE_TYPE_RECORD_BATCH) {
+    ArrowErrorSet(error, "decoder did not just decode a RecordBatch message");
+    return EINVAL;
+  }
+
   struct ArrowArrayView* array_view;
   NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderDecodeArrayViewInternal(
       decoder, ArrowIpcBufferFactoryFromView(&body), i, &array_view, error));
diff --git a/src/nanoarrow/ipc/decoder_test.cc 
b/src/nanoarrow/ipc/decoder_test.cc
index 81993bce..40322908 100644
--- a/src/nanoarrow/ipc/decoder_test.cc
+++ b/src/nanoarrow/ipc/decoder_test.cc
@@ -21,6 +21,7 @@
 #if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
 #include <arrow/array.h>
 #include <arrow/c/bridge.h>
+#include <arrow/extension/uuid.h>
 #include <arrow/ipc/api.h>
 #include <arrow/util/key_value_metadata.h>
 #endif
@@ -54,6 +55,7 @@ struct ArrowIpcDecoderPrivate {
   struct ArrowIpcField* fields;
   int64_t n_buffers;
   const void* last_message;
+  struct ArrowIpcDictionaryBatch dictionary;
   struct ArrowIpcFooter footer;
   struct ArrowIpcDecompressor decompressor;
 };
@@ -159,6 +161,51 @@ alignas(8) static uint8_t 
kSimpleRecordBatchUncompressible[] = {
     0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00,
     0x00, 0x00, 0x00, 0x00};
 
+alignas(8) static uint8_t kDictionarySchema[] = {
+    0xff, 0xff, 0xff, 0xff, 0x50, 0x01, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 
0x00, 0x00,
+    0x0a, 0x00, 0x0e, 0x00, 0x06, 0x00, 0x05, 0x00, 0x08, 0x00, 0x0a, 0x00, 
0x00, 0x00,
+    0x00, 0x01, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 
0x0c, 0x00,
+    0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0xb0, 0x00, 
0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 
0x08, 0x00,
+    0x0c, 0x00, 0x04, 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x8c, 0x00, 
0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x7e, 0x00, 0x00, 0x00, 0x41, 0x0a, 0x33, 0x0a, 
0x32, 0x36,
+    0x33, 0x31, 0x37, 0x30, 0x0a, 0x31, 0x39, 0x37, 0x38, 0x38, 0x38, 0x0a, 
0x35, 0x0a,
+    0x55, 0x54, 0x46, 0x2d, 0x38, 0x0a, 0x35, 0x33, 0x31, 0x0a, 0x31, 0x0a, 
0x35, 0x33,
+    0x31, 0x0a, 0x31, 0x0a, 0x32, 0x35, 0x34, 0x0a, 0x31, 0x30, 0x32, 0x36, 
0x0a, 0x31,
+    0x0a, 0x32, 0x36, 0x32, 0x31, 0x35, 0x33, 0x0a, 0x35, 0x0a, 0x6e, 0x61, 
0x6d, 0x65,
+    0x73, 0x0a, 0x31, 0x36, 0x0a, 0x31, 0x0a, 0x32, 0x36, 0x32, 0x31, 0x35, 
0x33, 0x0a,
+    0x38, 0x0a, 0x73, 0x6f, 0x6d, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 0x0a, 0x32, 
0x35, 0x34,
+    0x0a, 0x31, 0x30, 0x32, 0x36, 0x0a, 0x35, 0x31, 0x31, 0x0a, 0x31, 0x36, 
0x0a, 0x31,
+    0x0a, 0x32, 0x36, 0x32, 0x31, 0x35, 0x33, 0x0a, 0x37, 0x0a, 0x63, 0x6f, 
0x6c, 0x75,
+    0x6d, 0x6e, 0x73, 0x0a, 0x32, 0x35, 0x34, 0x0a, 0x00, 0x00, 0x01, 0x00, 
0x00, 0x00,
+    0x72, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 
0x10, 0x00,
+    0x18, 0x00, 0x08, 0x00, 0x06, 0x00, 0x07, 0x00, 0x0c, 0x00, 0x10, 0x00, 
0x14, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x05, 0x14, 0x00, 0x00, 0x00, 
0x48, 0x00,
+    0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x73, 0x6f, 0x6d, 0x65, 0x5f, 0x63, 0x6f, 0x6c, 
0x00, 0x00,
+    0x00, 0x00, 0x08, 0x00, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 
0x00, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0c, 0x00, 0x08, 0x00, 0x07, 0x00, 
0x08, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x08, 0x00, 0x00, 0x00, 0x04, 0x00, 
0x04, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+
+alignas(8) static uint8_t kDictionaryBatch[] = {
+    0xff, 0xff, 0xff, 0xff, 0xa8, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 
0x00, 0x00,
+    0x00, 0x00, 0x0c, 0x00, 0x14, 0x00, 0x06, 0x00, 0x05, 0x00, 0x08, 0x00, 
0x0c, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x00, 0x02, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00, 
0x20, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 
0x04, 0x00,
+    0x08, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 
0x18, 0x00,
+    0x0c, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x4c, 0x00, 
0x00, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00,
+    0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00,
+    0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 
0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00,
+    0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 
0x04, 0x00,
+    0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x7a, 0x65, 
0x72, 0x6f,
+    0x6f, 0x6e, 0x65, 0x74, 0x77, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
 TEST(NanoarrowIpcTest, NanoarrowIpcCheckHeader) {
   struct ArrowIpcDecoder decoder;
   struct ArrowError error;
@@ -538,6 +585,56 @@ TEST(NanoarrowIpcTest, 
NanoarrowIpcDecodeSimpleRecordBatchErrors) {
   ArrowIpcDecoderReset(&decoder);
 }
 
+TEST(NanoarrowIpcTest, NanoarrowIpcDecodeDictionarySchema) {
+  struct ArrowIpcDecoder decoder;
+  struct ArrowError error;
+  struct ArrowSchema schema;
+
+  struct ArrowBufferView data;
+  data.data.as_uint8 = kDictionarySchema;
+  data.size_bytes = sizeof(kDictionarySchema);
+
+  ASSERT_EQ(ArrowIpcDecoderInit(&decoder), NANOARROW_OK);
+
+  EXPECT_EQ(ArrowIpcDecoderDecodeHeader(&decoder, data, &error), NANOARROW_OK);
+  ASSERT_EQ(decoder.message_type, NANOARROW_IPC_MESSAGE_TYPE_SCHEMA);
+
+  ASSERT_EQ(ArrowIpcDecoderDecodeSchema(&decoder, &schema, &error), 
NANOARROW_OK);
+  ASSERT_EQ(schema.n_children, 1);
+  EXPECT_STREQ(schema.children[0]->name, "some_col");
+  EXPECT_EQ(schema.children[0]->flags, ARROW_FLAG_NULLABLE);
+  EXPECT_STREQ(schema.children[0]->format, "c");
+
+  ASSERT_NE(schema.children[0]->dictionary, nullptr);
+  EXPECT_STREQ(schema.children[0]->dictionary->format, "u");
+
+  ArrowSchemaRelease(&schema);
+  ArrowIpcDecoderReset(&decoder);
+}
+
+TEST(NanoarrowIpcTest, NanoarrowIpcDecodeDictionaryBatch) {
+  struct ArrowIpcDecoder decoder;
+  struct ArrowError error;
+
+  struct ArrowBufferView data;
+  data.data.as_uint8 = kDictionaryBatch;
+  data.size_bytes = sizeof(kDictionaryBatch);
+
+  ASSERT_EQ(ArrowIpcDecoderInit(&decoder), NANOARROW_OK);
+
+  EXPECT_EQ(ArrowIpcDecoderDecodeHeader(&decoder, data, &error), NANOARROW_OK);
+  ASSERT_EQ(decoder.message_type, NANOARROW_IPC_MESSAGE_TYPE_DICTIONARY_BATCH);
+
+  ASSERT_NE(decoder.dictionary, nullptr);
+  EXPECT_EQ(decoder.dictionary->id, 0);
+  EXPECT_FALSE(decoder.dictionary->is_delta);
+
+  // TODO: Access RecordBatch content
+  // https://github.com/apache/arrow-nanoarrow/issues/845
+
+  ArrowIpcDecoderReset(&decoder);
+}
+
 TEST(NanoarrowIpcTest, NanoarrowIpcSetSchema) {
   struct ArrowIpcDecoder decoder;
   struct ArrowSchema schema;
@@ -605,139 +702,6 @@ TEST(NanoarrowIpcTest, NanoarrowIpcSetDecompressor) {
   ArrowIpcDecoderReset(&decoder);
 }
 
-#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
-class ArrowTypeParameterizedTestFixture
-    : public ::testing::TestWithParam<std::shared_ptr<arrow::DataType>> {
- protected:
-  std::shared_ptr<arrow::DataType> data_type;
-};
-
-TEST_P(ArrowTypeParameterizedTestFixture, NanoarrowIpcArrowTypeRoundtrip) {
-  const std::shared_ptr<arrow::DataType>& data_type = GetParam();
-  std::shared_ptr<arrow::Schema> dummy_schema =
-      arrow::schema({arrow::field("dummy_name", data_type)});
-  auto maybe_serialized = arrow::ipc::SerializeSchema(*dummy_schema);
-  ASSERT_TRUE(maybe_serialized.ok());
-
-  struct ArrowBufferView buffer_view;
-  buffer_view.data.data = maybe_serialized.ValueUnsafe()->data();
-  buffer_view.size_bytes = maybe_serialized.ValueOrDie()->size();
-
-  struct ArrowIpcDecoder decoder;
-  ArrowIpcDecoderInit(&decoder);
-  ASSERT_EQ(ArrowIpcDecoderVerifyHeader(&decoder, buffer_view, nullptr), 
NANOARROW_OK);
-  EXPECT_EQ(decoder.header_size_bytes, buffer_view.size_bytes);
-  EXPECT_EQ(decoder.body_size_bytes, 0);
-
-  ASSERT_EQ(ArrowIpcDecoderDecodeHeader(&decoder, buffer_view, nullptr), 
NANOARROW_OK);
-  struct ArrowSchema schema;
-  ASSERT_EQ(ArrowIpcDecoderDecodeSchema(&decoder, &schema, nullptr), 
NANOARROW_OK);
-  auto maybe_schema = arrow::ImportSchema(&schema);
-  ASSERT_TRUE(maybe_schema.ok());
-
-  // Better failure message if we first check for string equality
-  EXPECT_EQ(maybe_schema.ValueUnsafe()->ToString(), dummy_schema->ToString());
-  EXPECT_TRUE(maybe_schema.ValueUnsafe()->Equals(dummy_schema, true));
-
-  ArrowIpcDecoderReset(&decoder);
-}
-#endif
-
-std::string ArrowSchemaMetadataToString(const char* metadata) {
-  struct ArrowMetadataReader reader;
-  auto st = ArrowMetadataReaderInit(&reader, metadata);
-  EXPECT_EQ(st, NANOARROW_OK);
-
-  bool comma = false;
-  std::string out;
-  while (reader.remaining_keys > 0) {
-    struct ArrowStringView key, value;
-    auto st = ArrowMetadataReaderRead(&reader, &key, &value);
-    EXPECT_EQ(st, NANOARROW_OK);
-    if (comma) {
-      out += ", ";
-    }
-    comma = true;
-
-    out.append(key.data, key.size_bytes);
-    out += "=";
-    out.append(value.data, value.size_bytes);
-  }
-  return out;
-}
-
-std::string ArrowSchemaToString(const struct ArrowSchema* schema) {
-  int64_t n = ArrowSchemaToString(schema, nullptr, 0, /*recursive=*/false);
-  std::vector<char> out_vec(n, '\0');
-  ArrowSchemaToString(schema, out_vec.data(), n, /*recursive=*/false);
-  std::string out(out_vec.data(), out_vec.size());
-
-  std::string metadata = ArrowSchemaMetadataToString(schema->metadata);
-  if (!metadata.empty()) {
-    out += "{" + metadata + "}";
-  }
-
-  bool comma = false;
-  if (schema->format[0] == '+') {
-    out += "<";
-    for (int64_t i = 0; i < schema->n_children; ++i) {
-      if (comma) {
-        out += ", ";
-      }
-      comma = true;
-
-      auto* child = schema->children[i];
-      if (child && child->name[0] != '\0') {
-        out += child->name;
-        out += ": ";
-      }
-      out += ArrowSchemaToString(schema->children[i]);
-    }
-    out += ">";
-  }
-
-  return out;
-}
-
-#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
-TEST_P(ArrowTypeParameterizedTestFixture, NanoarrowIpcNanoarrowTypeRoundtrip) {
-  nanoarrow::UniqueSchema schema;
-  ASSERT_TRUE(
-      arrow::ExportSchema(arrow::Schema({arrow::field("", GetParam())}), 
schema.get())
-          .ok());
-
-  nanoarrow::ipc::UniqueEncoder encoder;
-  EXPECT_EQ(ArrowIpcEncoderInit(encoder.get()), NANOARROW_OK);
-
-  struct ArrowError error;
-  EXPECT_EQ(ArrowIpcEncoderEncodeSchema(encoder.get(), schema.get(), &error),
-            NANOARROW_OK)
-      << error.message;
-
-  nanoarrow::UniqueBuffer buffer;
-  EXPECT_EQ(
-      ArrowIpcEncoderFinalizeBuffer(encoder.get(), /*encapsulate=*/true, 
buffer.get()),
-      NANOARROW_OK);
-
-  struct ArrowBufferView buffer_view;
-  buffer_view.data.data = buffer->data;
-  buffer_view.size_bytes = buffer->size_bytes;
-
-  nanoarrow::ipc::UniqueDecoder decoder;
-  ArrowIpcDecoderInit(decoder.get());
-  ASSERT_EQ(ArrowIpcDecoderVerifyHeader(decoder.get(), buffer_view, nullptr),
-            NANOARROW_OK);
-  ASSERT_EQ(ArrowIpcDecoderDecodeHeader(decoder.get(), buffer_view, nullptr),
-            NANOARROW_OK);
-
-  nanoarrow::UniqueSchema roundtripped;
-  ASSERT_EQ(ArrowIpcDecoderDecodeSchema(decoder.get(), roundtripped.get(), 
nullptr),
-            NANOARROW_OK);
-
-  EXPECT_EQ(ArrowSchemaToString(roundtripped.get()), 
ArrowSchemaToString(schema.get()));
-}
-#endif
-
 TEST(NanoarrowIpcTest, NanoarrowIpcDecodeSimpleRecordBatchFromShared) {
   struct ArrowIpcDecoder decoder;
   struct ArrowError error;
@@ -871,8 +835,190 @@ TEST(NanoarrowIpcTest, 
NanoarrowIpcSharedBufferThreadSafeDecode) {
   // We will get a (occasional) memory leak if the atomic counter does not work
 }
 
+TEST(NanoarrowIpcTest, NanoarrowIpcFooterDecodingErrors) {
+  struct ArrowError error;
+
+  nanoarrow::ipc::UniqueDecoder decoder;
+  ArrowIpcDecoderInit(decoder.get());
+
+  // not enough data to get the size+magic
+  EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(), {{nullptr}, 3}, &error), 
ESPIPE)
+      << error.message;
+
+  // doesn't end with magic
+  EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(), {{"\0\0\0\0blargh"}, 10}, 
&error),
+            EINVAL)
+      << error.message;
+
+  // negative size
+  EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(),
+                                      {{"\xFF\xFF\xFF\xFF"
+                                        "ARROW1"},
+                                       10},
+                                      &error),
+            EINVAL)
+      << error.message;
+
+  // PeekFooter doesn't check for available data
+  EXPECT_EQ(
+      ArrowIpcDecoderPeekFooter(decoder.get(), {{"\xFF\xFF\0\0ARROW1"}, 10}, 
&error),
+      NANOARROW_OK)
+      << error.message;
+  EXPECT_EQ(decoder->header_size_bytes, 0xFFFF);
+
+  decoder->header_size_bytes = -1;
+
+  // VerifyFooter *does* check for enough available data
+  EXPECT_EQ(
+      ArrowIpcDecoderVerifyFooter(decoder.get(), {{"\xFF\xFF\0\0ARROW1"}, 10}, 
&error),
+      ESPIPE)
+      << error.message;
+  EXPECT_EQ(decoder->header_size_bytes, 0xFFFF);
+}
+
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
+class ArrowTypeParameterizedTestFixture
+    : public ::testing::TestWithParam<std::shared_ptr<arrow::DataType>> {
+ protected:
+  std::shared_ptr<arrow::DataType> data_type;
+};
+
+TEST_P(ArrowTypeParameterizedTestFixture, NanoarrowIpcArrowTypeRoundtrip) {
+  const std::shared_ptr<arrow::DataType>& data_type = GetParam();
+  std::shared_ptr<arrow::Schema> dummy_schema =
+      arrow::schema({arrow::field("dummy_name", data_type)});
+  auto maybe_serialized = arrow::ipc::SerializeSchema(*dummy_schema);
+  ASSERT_TRUE(maybe_serialized.ok());
+
+  struct ArrowBufferView buffer_view;
+  buffer_view.data.data = maybe_serialized.ValueUnsafe()->data();
+  buffer_view.size_bytes = maybe_serialized.ValueOrDie()->size();
+
+  struct ArrowIpcDecoder decoder;
+  ArrowIpcDecoderInit(&decoder);
+  ASSERT_EQ(ArrowIpcDecoderVerifyHeader(&decoder, buffer_view, nullptr), 
NANOARROW_OK);
+  EXPECT_EQ(decoder.header_size_bytes, buffer_view.size_bytes);
+  EXPECT_EQ(decoder.body_size_bytes, 0);
+
+  ASSERT_EQ(ArrowIpcDecoderDecodeHeader(&decoder, buffer_view, nullptr), 
NANOARROW_OK);
+  struct ArrowSchema schema;
+  ASSERT_EQ(ArrowIpcDecoderDecodeSchema(&decoder, &schema, nullptr), 
NANOARROW_OK);
+  auto maybe_schema = arrow::ImportSchema(&schema);
+  ASSERT_TRUE(maybe_schema.ok());
+
+  // Better failure message if we first check for string equality
+  EXPECT_EQ(maybe_schema.ValueUnsafe()->ToString(), dummy_schema->ToString());
+  EXPECT_TRUE(maybe_schema.ValueUnsafe()->Equals(dummy_schema, true));
+
+  ArrowIpcDecoderReset(&decoder);
+}
+#endif
+
+std::string ArrowSchemaMetadataToString(const char* metadata) {
+  struct ArrowMetadataReader reader;
+  auto st = ArrowMetadataReaderInit(&reader, metadata);
+  EXPECT_EQ(st, NANOARROW_OK);
+
+  bool comma = false;
+  std::string out;
+  while (reader.remaining_keys > 0) {
+    struct ArrowStringView key, value;
+    auto st = ArrowMetadataReaderRead(&reader, &key, &value);
+    EXPECT_EQ(st, NANOARROW_OK);
+    if (comma) {
+      out += ", ";
+    }
+    comma = true;
+
+    out.append(key.data, key.size_bytes);
+    out += "=";
+    out.append(value.data, value.size_bytes);
+  }
+  return out;
+}
+
+std::string ArrowSchemaToString(const struct ArrowSchema* schema) {
+  int64_t n = ArrowSchemaToString(schema, nullptr, 0, /*recursive=*/false);
+  std::vector<char> out_vec(n, '\0');
+  ArrowSchemaToString(schema, out_vec.data(), n, /*recursive=*/false);
+  std::string out(out_vec.data(), out_vec.size());
+
+  std::string metadata = ArrowSchemaMetadataToString(schema->metadata);
+  if (!metadata.empty()) {
+    out += "{" + metadata + "}";
+  }
+
+  bool comma = false;
+  if (schema->format[0] == '+') {
+    out += "<";
+    for (int64_t i = 0; i < schema->n_children; ++i) {
+      if (comma) {
+        out += ", ";
+      }
+      comma = true;
+
+      auto* child = schema->children[i];
+      if (child && child->name[0] != '\0') {
+        out += child->name;
+        out += ": ";
+      }
+      out += ArrowSchemaToString(schema->children[i]);
+    }
+    out += ">";
+  }
+
+  return out;
+}
+
+#if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
+TEST_P(ArrowTypeParameterizedTestFixture, NanoarrowIpcNanoarrowTypeRoundtrip) {
+  if (GetParam()->id() == arrow::Type::DICTIONARY) {
+    GTEST_SKIP() << "Dictionary array decode is not yet supported";
+  }
+
+  nanoarrow::UniqueSchema schema;
+  ASSERT_TRUE(
+      arrow::ExportSchema(arrow::Schema({arrow::field("", GetParam())}), 
schema.get())
+          .ok());
+
+  nanoarrow::ipc::UniqueEncoder encoder;
+  EXPECT_EQ(ArrowIpcEncoderInit(encoder.get()), NANOARROW_OK);
+
+  struct ArrowError error;
+  EXPECT_EQ(ArrowIpcEncoderEncodeSchema(encoder.get(), schema.get(), &error),
+            NANOARROW_OK)
+      << error.message;
+
+  nanoarrow::UniqueBuffer buffer;
+  EXPECT_EQ(
+      ArrowIpcEncoderFinalizeBuffer(encoder.get(), /*encapsulate=*/true, 
buffer.get()),
+      NANOARROW_OK);
+
+  struct ArrowBufferView buffer_view;
+  buffer_view.data.data = buffer->data;
+  buffer_view.size_bytes = buffer->size_bytes;
+
+  nanoarrow::ipc::UniqueDecoder decoder;
+  ArrowIpcDecoderInit(decoder.get());
+  ASSERT_EQ(ArrowIpcDecoderVerifyHeader(decoder.get(), buffer_view, nullptr),
+            NANOARROW_OK);
+  ASSERT_EQ(ArrowIpcDecoderDecodeHeader(decoder.get(), buffer_view, nullptr),
+            NANOARROW_OK);
+
+  nanoarrow::UniqueSchema roundtripped;
+  ASSERT_EQ(ArrowIpcDecoderDecodeSchema(decoder.get(), roundtripped.get(), 
nullptr),
+            NANOARROW_OK);
+
+  EXPECT_EQ(ArrowSchemaToString(roundtripped.get()), 
ArrowSchemaToString(schema.get()));
+}
+#endif
+
 #if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
 TEST_P(ArrowTypeParameterizedTestFixture, NanoarrowIpcArrowArrayRoundtrip) {
+  if (GetParam()->id() == arrow::Type::DICTIONARY) {
+    GTEST_SKIP() << "Dictionary array decode is not yet supported";
+  }
+
   const std::shared_ptr<arrow::DataType>& data_type = GetParam();
   std::shared_ptr<arrow::Schema> dummy_schema =
       arrow::schema({arrow::field("dummy_name", data_type)});
@@ -964,6 +1110,10 @@ void AssertArrayViewIdentical(const struct 
ArrowArrayView* actual,
 
 #if defined(NANOARROW_BUILD_TESTS_WITH_ARROW)
 TEST_P(ArrowTypeParameterizedTestFixture, NanoarrowIpcNanoarrowArrayRoundtrip) 
{
+  if (GetParam()->id() == arrow::Type::DICTIONARY) {
+    GTEST_SKIP() << "nanoarrow encoder cannot yet encode dictionaries";
+  }
+
   struct ArrowError error;
   nanoarrow::UniqueSchema schema;
   ASSERT_TRUE(
@@ -1043,8 +1193,7 @@ INSTANTIATE_TEST_SUITE_P(
         arrow::map(arrow::utf8(), arrow::int64(), true),
         arrow::struct_({arrow::field("col1", arrow::int32()),
                         arrow::field("col2", arrow::utf8())}),
-        // Zero-size union doesn't roundtrip through the C Data interface until
-        // Arrow 11 (which is not yet available on all platforms)
+        // Zero-size union
         // arrow::sparse_union(FieldVector()), 
arrow::dense_union(FieldVector()),
         // No custom type IDs
         arrow::sparse_union({arrow::field("col1", arrow::int32()),
@@ -1061,9 +1210,14 @@ INSTANTIATE_TEST_SUITE_P(
 
         // Type with nested metadata
         arrow::list(arrow::field("some_custom_name", arrow::int32(),
-                                 arrow::KeyValueMetadata::Make({"key1"}, 
{"value1"})))
-
-            ));
+                                 arrow::KeyValueMetadata::Make({"key1"}, 
{"value1"}))),
+        // Dictionary encoding
+        arrow::dictionary(arrow::int32(), arrow::utf8()),
+        arrow::dictionary(arrow::int32(), arrow::utf8(), true),
+        // Extension type
+        arrow::extension::uuid(),
+        // Dictionary-encoded extension
+        arrow::dictionary(arrow::int32(), arrow::extension::uuid())));
 
 class ArrowSchemaParameterizedTestFixture
     : public ::testing::TestWithParam<std::shared_ptr<arrow::Schema>> {
@@ -1101,6 +1255,12 @@ TEST_P(ArrowSchemaParameterizedTestFixture, 
NanoarrowIpcArrowSchemaRoundtrip) {
 }
 
 TEST_P(ArrowSchemaParameterizedTestFixture, 
NanoarrowIpcNanoarrowSchemaRoundtrip) {
+  for (const auto& field : GetParam()->fields()) {
+    if (field->type()->id() == arrow::Type::DICTIONARY) {
+      GTEST_SKIP() << "nanoarrow cannot yet encode arrays with dictionaries";
+    }
+  }
+
   const std::shared_ptr<arrow::Schema>& arrow_schema = GetParam();
 
   nanoarrow::UniqueSchema schema;
@@ -1138,6 +1298,12 @@ TEST_P(ArrowSchemaParameterizedTestFixture, 
NanoarrowIpcNanoarrowSchemaRoundtrip
 }
 
 TEST_P(ArrowSchemaParameterizedTestFixture, 
NanoarrowIpcNanoarrowFooterRoundtrip) {
+  for (const auto& field : GetParam()->fields()) {
+    if (field->type()->id() == arrow::Type::DICTIONARY) {
+      GTEST_SKIP() << "nanoarrow cannot yet encode arrays with dictionaries";
+    }
+  }
+
   using namespace nanoarrow::literals;
   const std::shared_ptr<arrow::Schema>& arrow_schema = GetParam();
 
@@ -1208,7 +1374,15 @@ INSTANTIATE_TEST_SUITE_P(
         // Schema metadata
         arrow::schema({}, arrow::KeyValueMetadata::Make({"key1"}, {"value1"})),
         // Non-nullable field
-        arrow::schema({arrow::field("some_name", arrow::int32(), false)})));
+        arrow::schema({arrow::field("some_name", arrow::int32(), false)}),
+        // Dictionary with field metadata
+        arrow::schema({arrow::field(
+            "some_name", arrow::dictionary(arrow::int32(), arrow::utf8()),
+            arrow::KeyValueMetadata::Make({"key1", "key2"}, {"value1", 
"value2"}))}),
+        // Dictionary with field metadata
+        arrow::schema({arrow::field(
+            "some_name", arrow::dictionary(arrow::int32(), 
arrow::extension::uuid()),
+            arrow::KeyValueMetadata::Make({"key1", "key2"}, {"value1", 
"value2"}))})));
 
 class ArrowTypeIdParameterizedTestFixture
     : public ::testing::TestWithParam<enum ArrowType> {
@@ -1372,44 +1546,3 @@ INSTANTIATE_TEST_SUITE_P(NanoarrowIpcTest, 
ArrowTypeIdParameterizedTestFixture,
                                            NANOARROW_TYPE_DECIMAL256,
                                            
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO));
 #endif
-
-TEST(NanoarrowIpcTest, NanoarrowIpcFooterDecodingErrors) {
-  struct ArrowError error;
-
-  nanoarrow::ipc::UniqueDecoder decoder;
-  ArrowIpcDecoderInit(decoder.get());
-
-  // not enough data to get the size+magic
-  EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(), {{nullptr}, 3}, &error), 
ESPIPE)
-      << error.message;
-
-  // doesn't end with magic
-  EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(), {{"\0\0\0\0blargh"}, 10}, 
&error),
-            EINVAL)
-      << error.message;
-
-  // negative size
-  EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(),
-                                      {{"\xFF\xFF\xFF\xFF"
-                                        "ARROW1"},
-                                       10},
-                                      &error),
-            EINVAL)
-      << error.message;
-
-  // PeekFooter doesn't check for available data
-  EXPECT_EQ(
-      ArrowIpcDecoderPeekFooter(decoder.get(), {{"\xFF\xFF\0\0ARROW1"}, 10}, 
&error),
-      NANOARROW_OK)
-      << error.message;
-  EXPECT_EQ(decoder->header_size_bytes, 0xFFFF);
-
-  decoder->header_size_bytes = -1;
-
-  // VerifyFooter *does* check for enough available data
-  EXPECT_EQ(
-      ArrowIpcDecoderVerifyFooter(decoder.get(), {{"\xFF\xFF\0\0ARROW1"}, 10}, 
&error),
-      ESPIPE)
-      << error.message;
-  EXPECT_EQ(decoder->header_size_bytes, 0xFFFF);
-}
diff --git a/src/nanoarrow/ipc/files_test.cc b/src/nanoarrow/ipc/files_test.cc
index ee2f384c..6a1d7c78 100644
--- a/src/nanoarrow/ipc/files_test.cc
+++ b/src/nanoarrow/ipc/files_test.cc
@@ -481,16 +481,16 @@ INSTANTIATE_TEST_SUITE_P(
         // Files with features that are not yet supported (Dictionary encoding)
         TestFile::NotSupported(
             "generated_dictionary_unsigned.stream",
-            "Schema message field with DictionaryEncoding not supported"),
+            "Found valid dictionary batch but dictionary encoding is not yet 
supported"),
         TestFile::NotSupported(
             "generated_dictionary.stream",
-            "Schema message field with DictionaryEncoding not supported"),
+            "Found valid dictionary batch but dictionary encoding is not yet 
supported"),
         TestFile::NotSupported(
             "generated_nested_dictionary.stream",
-            "Schema message field with DictionaryEncoding not supported"),
+            "Found valid dictionary batch but dictionary encoding is not yet 
supported"),
         TestFile::NotSupported(
             "generated_extension.stream",
-            "Schema message field with DictionaryEncoding not supported")
+            "Found valid dictionary batch but dictionary encoding is not yet 
supported")
         // Comment to keep last line from wrapping
         ));
 
diff --git a/src/nanoarrow/ipc/reader.c b/src/nanoarrow/ipc/reader.c
index 53937b80..7ecc4ccd 100644
--- a/src/nanoarrow/ipc/reader.c
+++ b/src/nanoarrow/ipc/reader.c
@@ -455,9 +455,18 @@ static int ArrowIpcArrayStreamReaderGetNext(struct 
ArrowArrayStream* stream,
   }
 
   // Make sure we have a RecordBatch message
-  if (private_data->decoder.message_type != 
NANOARROW_IPC_MESSAGE_TYPE_RECORD_BATCH) {
-    ArrowErrorSet(&private_data->error, "Unexpected message type (expected 
RecordBatch)");
-    return EINVAL;
+  switch (private_data->decoder.message_type) {
+    case NANOARROW_IPC_MESSAGE_TYPE_RECORD_BATCH:
+      break;
+    case NANOARROW_IPC_MESSAGE_TYPE_DICTIONARY_BATCH:
+      ArrowErrorSet(
+          &private_data->error,
+          "Found valid dictionary batch but dictionary encoding is not yet 
supported");
+      return ENOTSUP;
+    default:
+      ArrowErrorSet(&private_data->error,
+                    "Unexpected message type (expected RecordBatch)");
+      return EINVAL;
   }
 
   // Read in the body
diff --git a/src/nanoarrow/nanoarrow_ipc.h b/src/nanoarrow/nanoarrow_ipc.h
index b9251a6b..a1a172cb 100644
--- a/src/nanoarrow/nanoarrow_ipc.h
+++ b/src/nanoarrow/nanoarrow_ipc.h
@@ -162,6 +162,15 @@ enum ArrowIpcCompressionType {
 /// \brief Feature flag for a stream that uses compression
 #define NANOARROW_IPC_FEATURE_COMPRESSED_BODY 2
 
+/// \brief Description of an Arrow IPC DictionaryBatch message
+struct ArrowIpcDictionaryBatch {
+  /// \brief The identifier for this dictionary
+  int64_t id;
+  /// \brief If non-zero, values should be appended to the existing dictionary.
+  /// Otherwise, values should replace the existing dictionary.
+  int is_delta;
+};
+
 /// \brief Checks the nanoarrow runtime to make sure the run/build versions 
match
 NANOARROW_DLL ArrowErrorCode ArrowIpcCheckRuntime(struct ArrowError* error);
 
@@ -307,6 +316,9 @@ struct ArrowIpcDecoder {
   /// \brief The number of bytes in the forthcoming body message.
   int64_t body_size_bytes;
 
+  /// \brief The last decoded DictionaryBatch
+  const struct ArrowIpcDictionaryBatch* dictionary;
+
   /// \brief The last decoded Footer
   ///
   /// \warning This API is currently only public for use in integration 
testing;
@@ -670,6 +682,7 @@ ArrowErrorCode ArrowIpcWriterStartFile(struct 
ArrowIpcWriter* writer,
 /// Writes the IPC file's footer, footer size, and ending magic.
 NANOARROW_DLL ArrowErrorCode ArrowIpcWriterFinalizeFile(struct ArrowIpcWriter* 
writer,
                                                         struct ArrowError* 
error);
+
 /// @}
 
 // Internal APIs:

(arrow-nanoarrow) branch main updated: feat: Add Dictionary schema read support in IPC reader (#738)

Reply via email to