This is an automated email from the ASF dual-hosted git repository.
bkietz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new cf388965 feat: add Footer decoding (#598)
cf388965 is described below
commit cf38896523c2407cc021f552b73cccd8f57dea83
Author: Benjamin Kietzman <[email protected]>
AuthorDate: Fri Aug 30 14:22:42 2024 -0500
feat: add Footer decoding (#598)
- Adds ArrowIpcDecoderPeekFooter(), ArrowIpcDecoderVerifyFooter(), and
ArrowIpcDecoderDecodeFooter()
- Uses these to read IPC files in the integration test executable
---
src/nanoarrow/integration/ipc_integration.cc | 66 ++++++++---
src/nanoarrow/ipc/decoder.c | 167 +++++++++++++++++++++++----
src/nanoarrow/ipc/decoder_test.cc | 91 +++++++++++++++
src/nanoarrow/ipc/encoder.c | 6 +-
src/nanoarrow/nanoarrow_ipc.h | 69 ++++++++++-
5 files changed, 352 insertions(+), 47 deletions(-)
diff --git a/src/nanoarrow/integration/ipc_integration.cc
b/src/nanoarrow/integration/ipc_integration.cc
index b2a092bd..84f12ee5 100644
--- a/src/nanoarrow/integration/ipc_integration.cc
+++ b/src/nanoarrow/integration/ipc_integration.cc
@@ -95,6 +95,9 @@ int main(int argc, char** argv) try {
}
struct File {
+ File(FILE* file) : file_{file} {}
+ File() = default;
+
~File() {
if (file_ != nullptr) {
fclose(file_);
@@ -166,35 +169,62 @@ struct MaterializedArrayStream {
// Footer).
File ipc_file;
NANOARROW_RETURN_NOT_OK(ipc_file.open(path, "rb", error));
- return FromIpcFile(ipc_file, error);
- }
+ auto bytes = ipc_file.read();
- ArrowErrorCode FromIpcFile(FILE* ipc_file, struct ArrowError* error) {
- char prefix[sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC)] = {};
- if (fread(&prefix, 1, sizeof(prefix), ipc_file) < sizeof(prefix)) {
- ArrowErrorSet(error, "Expected file of more than %lu bytes, got %ld",
- sizeof(prefix), ftell(ipc_file));
+ auto min_size = sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC) + sizeof(int32_t) +
+ strlen(NANOARROW_IPC_FILE_PADDED_MAGIC);
+ if (bytes.size() < min_size) {
+ ArrowErrorSet(error, "Expected file of more than %lu bytes, got %ld",
min_size,
+ bytes.size());
return EINVAL;
}
- if (memcmp(&prefix, NANOARROW_IPC_FILE_PADDED_MAGIC, sizeof(prefix)) != 0)
{
+ if (memcmp(bytes.data(), NANOARROW_IPC_FILE_PADDED_MAGIC,
+ sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC)) != 0) {
ArrowErrorSet(error, "File did not begin with 'ARROW1\\0\\0'");
return EINVAL;
}
- nanoarrow::ipc::UniqueInputStream input_stream;
- NANOARROW_RETURN_NOT_OK_WITH_ERROR(
- ArrowIpcInputStreamInitFile(input_stream.get(), ipc_file,
- /*close_on_release=*/false),
- error);
+ nanoarrow::ipc::UniqueDecoder decoder;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowIpcDecoderInit(decoder.get()),
error);
+ NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderVerifyFooter(
+ decoder.get(), {{bytes.data()}, static_cast<int64_t>(bytes.size())},
error));
+ NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderDecodeFooter(
+ decoder.get(), {{bytes.data()}, static_cast<int64_t>(bytes.size())},
error));
- nanoarrow::UniqueArrayStream array_stream;
NANOARROW_RETURN_NOT_OK_WITH_ERROR(
- ArrowIpcArrayStreamReaderInit(array_stream.get(), input_stream.get(),
- /*options=*/nullptr),
- error);
+ ArrowSchemaDeepCopy(&decoder->footer->schema, schema.get()), error);
+ NANOARROW_RETURN_NOT_OK(
+ ArrowIpcDecoderSetSchema(decoder.get(), &decoder->footer->schema,
error));
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowIpcDecoderSetEndianness(decoder.get(), decoder->endianness),
error);
+
+ nanoarrow::UniqueBuffer record_batch_blocks;
+ ArrowBufferMove(&decoder->footer->record_batch_blocks,
record_batch_blocks.get());
+
+ for (int i = 0;
+ i < record_batch_blocks->size_bytes / sizeof(struct
ArrowIpcFileBlock); i++) {
+ const auto& block =
+ reinterpret_cast<struct
ArrowIpcFileBlock*>(record_batch_blocks->data)[i];
+ struct ArrowBufferView metadata_view = {
+ {bytes.data() + block.offset},
+ block.metadata_length,
+ };
+ NANOARROW_RETURN_NOT_OK(
+ ArrowIpcDecoderDecodeHeader(decoder.get(), metadata_view, error));
- return From(array_stream.get(), error);
+ struct ArrowBufferView body_view = {
+ {metadata_view.data.as_uint8 + metadata_view.size_bytes},
+ block.body_length,
+ };
+ nanoarrow::UniqueArray batch;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowIpcDecoderDecodeArray(decoder.get(), body_view, -1, batch.get(),
+ NANOARROW_VALIDATION_LEVEL_FULL, error));
+ batches.push_back(std::move(batch));
+ }
+
+ return NANOARROW_OK;
}
ArrowErrorCode Write(struct ArrowIpcOutputStream* output_stream, bool
write_file,
diff --git a/src/nanoarrow/ipc/decoder.c b/src/nanoarrow/ipc/decoder.c
index 6e37e20d..f2118db2 100644
--- a/src/nanoarrow/ipc/decoder.c
+++ b/src/nanoarrow/ipc/decoder.c
@@ -56,6 +56,8 @@
// at the beginning of every message header.
static const int32_t kMessageHeaderPrefixSize = 8;
+#define NANOARROW_IPC_MAGIC "ARROW1"
+
// Internal representation of a parsed "Field" from flatbuffers. This
// represents a field in a depth-first walk of column arrays and their
// children.
@@ -95,6 +97,8 @@ struct ArrowIpcDecoderPrivate {
int64_t n_buffers;
// A pointer to the last flatbuffers message.
const void* last_message;
+ // Storage for a Footer
+ struct ArrowIpcFooter footer;
};
ArrowErrorCode ArrowIpcCheckRuntime(struct ArrowError* error) {
@@ -236,6 +240,7 @@ ArrowErrorCode ArrowIpcDecoderInit(struct ArrowIpcDecoder*
decoder) {
memset(private_data, 0, sizeof(struct ArrowIpcDecoderPrivate));
private_data->system_endianness = ArrowIpcSystemEndianness();
+ ArrowIpcFooterInit(&private_data->footer);
decoder->private_data = private_data;
return NANOARROW_OK;
}
@@ -256,6 +261,8 @@ void ArrowIpcDecoderReset(struct ArrowIpcDecoder* decoder) {
private_data->n_fields = 0;
}
+ ArrowIpcFooterReset(&private_data->footer);
+
ArrowFree(private_data);
memset(decoder, 0, sizeof(struct ArrowIpcDecoder));
}
@@ -959,6 +966,8 @@ static inline void ArrowIpcDecoderResetHeaderInfo(struct
ArrowIpcDecoder* decode
decoder->codec = 0;
decoder->header_size_bytes = 0;
decoder->body_size_bytes = 0;
+ decoder->footer = NULL;
+ ArrowIpcFooterReset(&private_data->footer);
private_data->last_message = NULL;
}
@@ -1053,6 +1062,85 @@ ArrowErrorCode ArrowIpcDecoderVerifyHeader(struct
ArrowIpcDecoder* decoder,
return NANOARROW_OK;
}
+ArrowErrorCode ArrowIpcDecoderPeekFooter(struct ArrowIpcDecoder* decoder,
+ struct ArrowBufferView data,
+ struct ArrowError* error) {
+ struct ArrowIpcDecoderPrivate* private_data =
+ (struct ArrowIpcDecoderPrivate*)decoder->private_data;
+
+ ArrowIpcDecoderResetHeaderInfo(decoder);
+ if (data.size_bytes < (int)strlen(NANOARROW_IPC_MAGIC) +
(int)sizeof(int32_t)) {
+ ArrowErrorSet(error,
+ "Expected data of at least 10 bytes but only %" PRId64
+ " bytes are available",
+ data.size_bytes);
+ return ESPIPE;
+ }
+
+ const char* data_end = data.data.as_char + data.size_bytes;
+ const char* magic = data_end - strlen(NANOARROW_IPC_MAGIC);
+ const char* footer_size_data = magic - sizeof(int32_t);
+
+ if (memcmp(magic, NANOARROW_IPC_MAGIC, strlen(NANOARROW_IPC_MAGIC)) != 0) {
+ ArrowErrorSet(error, "Expected file to end with ARROW1 but got %s",
data_end);
+ return EINVAL;
+ }
+
+ int32_t footer_size;
+ memcpy(&footer_size, footer_size_data, sizeof(footer_size));
+ if (private_data->system_endianness == NANOARROW_IPC_ENDIANNESS_BIG) {
+ footer_size = bswap32(footer_size);
+ }
+
+ if (footer_size < 0) {
+ ArrowErrorSet(error, "Expected footer size > 0 but found footer size of %d
bytes",
+ footer_size);
+ return EINVAL;
+ }
+
+ decoder->header_size_bytes = footer_size;
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowIpcDecoderVerifyFooter(struct ArrowIpcDecoder* decoder,
+ struct ArrowBufferView data,
+ struct ArrowError* error) {
+ NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderPeekFooter(decoder, data, error));
+
+ // Check that data contains at least the entire footer (return ESPIPE to
signal
+ // that reading more data may help).
+ int32_t footer_and_size_and_magic_size =
+ decoder->header_size_bytes + sizeof(int32_t) +
strlen(NANOARROW_IPC_MAGIC);
+ if (data.size_bytes < footer_and_size_and_magic_size) {
+ ArrowErrorSet(error,
+ "Expected >= %d bytes of data but only %" PRId64
+ " bytes are in the buffer",
+ footer_and_size_and_magic_size, data.size_bytes);
+ return ESPIPE;
+ }
+
+ const uint8_t* footer_data =
+ data.data.as_uint8 + data.size_bytes - footer_and_size_and_magic_size;
+
+ // Run flatbuffers verification
+ if (ns(Footer_verify_as_root(footer_data, decoder->header_size_bytes) !=
+ flatcc_verify_ok)) {
+ ArrowErrorSet(error, "Footer flatbuffer verification failed");
+ return EINVAL;
+ }
+
+ // Read some basic information from the message
+ ns(Footer_table_t) footer = ns(Footer_as_root(footer_data));
+ if (ns(Footer_schema(footer)) == NULL) {
+ ArrowErrorSet(error, "Footer has no schema");
+ return EINVAL;
+ }
+
+ decoder->metadata_version = ns(Footer_version(footer));
+ decoder->body_size_bytes = 0;
+ return NANOARROW_OK;
+}
+
ArrowErrorCode ArrowIpcDecoderDecodeHeader(struct ArrowIpcDecoder* decoder,
struct ArrowBufferView data,
struct ArrowError* error) {
@@ -1126,6 +1214,29 @@ ArrowErrorCode ArrowIpcDecoderDecodeHeader(struct
ArrowIpcDecoder* decoder,
return NANOARROW_OK;
}
+static ArrowErrorCode ArrowIpcDecoderDecodeSchemaImpl(ns(Schema_table_t)
schema,
+ struct ArrowSchema* out,
+ struct ArrowError*
error) {
+ ArrowSchemaInit(out);
+ // Top-level batch schema is typically non-nullable
+ out->flags = 0;
+
+ ns(Field_vec_t) fields = ns(Schema_fields(schema));
+ int64_t n_fields = ns(Schema_vec_len(fields));
+
+ ArrowErrorCode result = ArrowSchemaSetTypeStruct(out, n_fields);
+ if (result != NANOARROW_OK) {
+ ArrowErrorSet(error, "Failed to allocate struct schema with %" PRId64 "
children",
+ n_fields);
+ return result;
+ }
+
+ NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderSetChildren(out, fields, error));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowIpcDecoderSetMetadata(out, ns(Schema_custom_metadata(schema)),
error));
+ return NANOARROW_OK;
+}
+
ArrowErrorCode ArrowIpcDecoderDecodeSchema(struct ArrowIpcDecoder* decoder,
struct ArrowSchema* out,
struct ArrowError* error) {
@@ -1138,37 +1249,47 @@ ArrowErrorCode ArrowIpcDecoderDecodeSchema(struct
ArrowIpcDecoder* decoder,
return EINVAL;
}
- ns(Schema_table_t) schema = (ns(Schema_table_t))private_data->last_message;
-
- ns(Field_vec_t) fields = ns(Schema_fields(schema));
- int64_t n_fields = ns(Schema_vec_len(fields));
-
struct ArrowSchema tmp;
- ArrowSchemaInit(&tmp);
- int result = ArrowSchemaSetTypeStruct(&tmp, n_fields);
- if (result != NANOARROW_OK) {
- ArrowSchemaRelease(&tmp);
- ArrowErrorSet(error, "Failed to allocate struct schema with %" PRId64 "
children",
- n_fields);
- return result;
- }
-
- // Top-level batch schema is typically non-nullable
- tmp.flags = 0;
+ ArrowErrorCode result = ArrowIpcDecoderDecodeSchemaImpl(
+ (ns(Schema_table_t))private_data->last_message, &tmp, error);
- result = ArrowIpcDecoderSetChildren(&tmp, fields, error);
if (result != NANOARROW_OK) {
ArrowSchemaRelease(&tmp);
return result;
}
+ ArrowSchemaMove(&tmp, out);
+ return NANOARROW_OK;
+}
- result = ArrowIpcDecoderSetMetadata(&tmp,
ns(Schema_custom_metadata(schema)), error);
- if (result != NANOARROW_OK) {
- ArrowSchemaRelease(&tmp);
- return result;
- }
+ArrowErrorCode ArrowIpcDecoderDecodeFooter(struct ArrowIpcDecoder* decoder,
+ struct ArrowBufferView data,
+ struct ArrowError* error) {
+ struct ArrowIpcDecoderPrivate* private_data =
+ (struct ArrowIpcDecoderPrivate*)decoder->private_data;
- ArrowSchemaMove(&tmp, out);
+ int32_t footer_and_size_and_magic_size =
+ decoder->header_size_bytes + sizeof(int32_t) +
strlen(NANOARROW_IPC_MAGIC);
+ const uint8_t* footer_data =
+ data.data.as_uint8 + data.size_bytes - footer_and_size_and_magic_size;
+ ns(Footer_table_t) footer = ns(Footer_as_root(footer_data));
+
+ NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderDecodeSchemaImpl(
+ ns(Footer_schema(footer)), &private_data->footer.schema, error));
+
+ ns(Block_vec_t) blocks = ns(Footer_recordBatches(footer));
+ int64_t n = ns(Block_vec_len(blocks));
+
NANOARROW_RETURN_NOT_OK(ArrowBufferResize(&private_data->footer.record_batch_blocks,
+ sizeof(struct ArrowIpcFileBlock) *
n,
+ /*shrink_to_fit=*/0));
+ struct ArrowIpcFileBlock* record_batches =
+ (struct ArrowIpcFileBlock*)private_data->footer.record_batch_blocks.data;
+ for (int64_t i = 0; i < n; i++) {
+ record_batches[i].offset = blocks[i].offset;
+ record_batches[i].metadata_length = blocks[i].metaDataLength;
+ record_batches[i].body_length = blocks[i].bodyLength;
+ }
+
+ decoder->footer = &private_data->footer;
return NANOARROW_OK;
}
diff --git a/src/nanoarrow/ipc/decoder_test.cc
b/src/nanoarrow/ipc/decoder_test.cc
index dd50201c..8d56db7a 100644
--- a/src/nanoarrow/ipc/decoder_test.cc
+++ b/src/nanoarrow/ipc/decoder_test.cc
@@ -48,6 +48,7 @@ struct ArrowIpcDecoderPrivate {
struct ArrowIpcField* fields;
int64_t n_buffers;
const void* last_message;
+ struct ArrowIpcFooter footer;
};
}
@@ -959,6 +960,56 @@ TEST_P(ArrowSchemaParameterizedTestFixture,
NanoarrowIpcNanoarrowSchemaRoundtrip
EXPECT_EQ(ArrowSchemaToString(roundtripped.get()),
ArrowSchemaToString(schema.get()));
}
+TEST_P(ArrowSchemaParameterizedTestFixture,
NanoarrowIpcNanoarrowFooterRoundtrip) {
+ using namespace nanoarrow::literals;
+ const std::shared_ptr<arrow::Schema>& arrow_schema = GetParam();
+
+ nanoarrow::ipc::UniqueFooter footer;
+ ASSERT_TRUE(arrow::ExportSchema(*arrow_schema, &footer->schema).ok());
+
+ struct ArrowIpcFileBlock dummy_block = {1, 2, 3};
+ EXPECT_EQ(
+ ArrowBufferAppend(&footer->record_batch_blocks, &dummy_block,
sizeof(dummy_block)),
+ NANOARROW_OK);
+
+ nanoarrow::ipc::UniqueEncoder encoder;
+ EXPECT_EQ(ArrowIpcEncoderInit(encoder.get()), NANOARROW_OK);
+
+ struct ArrowError error;
+ EXPECT_EQ(ArrowIpcEncoderEncodeFooter(encoder.get(), footer.get(), &error),
+ NANOARROW_OK)
+ << error.message;
+
+ nanoarrow::UniqueBuffer buffer;
+ EXPECT_EQ(
+ ArrowIpcEncoderFinalizeBuffer(encoder.get(), /*encapsulate=*/false,
buffer.get()),
+ NANOARROW_OK);
+ EXPECT_EQ(ArrowBufferAppendInt32(buffer.get(), buffer->size_bytes),
NANOARROW_OK);
+ EXPECT_EQ(ArrowBufferAppendStringView(buffer.get(), "ARROW1"_asv),
NANOARROW_OK);
+
+ struct ArrowBufferView buffer_view;
+ buffer_view.data.data = buffer->data;
+ buffer_view.size_bytes = buffer->size_bytes;
+
+ nanoarrow::ipc::UniqueDecoder decoder;
+ ArrowIpcDecoderInit(decoder.get());
+ ASSERT_EQ(ArrowIpcDecoderVerifyFooter(decoder.get(), buffer_view, &error),
NANOARROW_OK)
+ << error.message;
+ ASSERT_EQ(ArrowIpcDecoderDecodeFooter(decoder.get(), buffer_view, &error),
NANOARROW_OK)
+ << error.message;
+
+ EXPECT_EQ(ArrowSchemaToString(&decoder->footer->schema),
+ ArrowSchemaToString(&footer->schema));
+ EXPECT_EQ(decoder->footer->record_batch_blocks.size_bytes,
sizeof(dummy_block));
+
+ struct ArrowIpcFileBlock roundtripped_block;
+ memcpy(&roundtripped_block, decoder->footer->record_batch_blocks.data,
+ sizeof(roundtripped_block));
+ EXPECT_EQ(roundtripped_block.offset, dummy_block.offset);
+ EXPECT_EQ(roundtripped_block.metadata_length, dummy_block.metadata_length);
+ EXPECT_EQ(roundtripped_block.body_length, dummy_block.body_length);
+}
+
INSTANTIATE_TEST_SUITE_P(
NanoarrowIpcTest, ArrowSchemaParameterizedTestFixture,
::testing::Values(
@@ -1136,3 +1187,43 @@ INSTANTIATE_TEST_SUITE_P(NanoarrowIpcTest,
ArrowTypeIdParameterizedTestFixture,
NANOARROW_TYPE_DECIMAL128,
NANOARROW_TYPE_DECIMAL256,
NANOARROW_TYPE_INTERVAL_MONTH_DAY_NANO));
+
+TEST(NanoarrowIpcTest, NanoarrowIpcFooterDecodingErrors) {
+ struct ArrowError error;
+
+ nanoarrow::ipc::UniqueDecoder decoder;
+ ArrowIpcDecoderInit(decoder.get());
+
+ // not enough data to get the size+magic
+ EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(), {nullptr, 3}, &error),
ESPIPE)
+ << error.message;
+
+ // doesn't end with magic
+ EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(), {"\0\0\0\0blargh", 10},
&error),
+ EINVAL)
+ << error.message;
+
+ // negative size
+ EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(),
+ {"\xFF\xFF\xFF\xFF"
+ "ARROW1",
+ 10},
+ &error),
+ EINVAL)
+ << error.message;
+
+ // PeekFooter doesn't check for available data
+ EXPECT_EQ(ArrowIpcDecoderPeekFooter(decoder.get(), {"\xFF\xFF\0\0ARROW1",
10}, &error),
+ NANOARROW_OK)
+ << error.message;
+ EXPECT_EQ(decoder->header_size_bytes, 0xFFFF);
+
+ decoder->header_size_bytes = -1;
+
+ // VerifyFooter *does* check for enough available data
+ EXPECT_EQ(
+ ArrowIpcDecoderVerifyFooter(decoder.get(), {"\xFF\xFF\0\0ARROW1", 10},
&error),
+ ESPIPE)
+ << error.message;
+ EXPECT_EQ(decoder->header_size_bytes, 0xFFFF);
+}
diff --git a/src/nanoarrow/ipc/encoder.c b/src/nanoarrow/ipc/encoder.c
index 13b72c74..d02c8cb6 100644
--- a/src/nanoarrow/ipc/encoder.c
+++ b/src/nanoarrow/ipc/encoder.c
@@ -238,8 +238,10 @@ static ArrowErrorCode
ArrowIpcEncodeFieldType(flatcc_builder_t* builder,
FLATCC_RETURN_UNLESS_0(
Timestamp_unit_add(builder,
(ns(TimeUnit_enum_t))schema_view->time_unit),
error);
- FLATCC_RETURN_UNLESS_0(
- Timestamp_timezone_create_str(builder, schema_view->timezone),
error);
+ if (schema_view->timezone && schema_view->timezone[0] != 0) {
+ FLATCC_RETURN_UNLESS_0(
+ Timestamp_timezone_create_str(builder, schema_view->timezone),
error);
+ }
FLATCC_RETURN_UNLESS_0(Field_type_Timestamp_end(builder), error);
return NANOARROW_OK;
diff --git a/src/nanoarrow/nanoarrow_ipc.h b/src/nanoarrow/nanoarrow_ipc.h
index b11a0360..ebeb3222 100644
--- a/src/nanoarrow/nanoarrow_ipc.h
+++ b/src/nanoarrow/nanoarrow_ipc.h
@@ -49,6 +49,12 @@
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderSetSchema)
#define ArrowIpcDecoderSetEndianness \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderSetEndianness)
+#define ArrowIpcDecoderPeekFooter \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderPeekFooter)
+#define ArrowIpcDecoderVerifyFooter \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderVerifyFooter)
+#define ArrowIpcDecoderDecodeFooter \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcDecoderDecodeFooter)
#define ArrowIpcInputStreamInitBuffer \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcInputStreamInitBuffer)
#define ArrowIpcInputStreamInitFile \
@@ -222,6 +228,12 @@ struct ArrowIpcDecoder {
/// \brief The number of bytes in the forthcoming body message.
int64_t body_size_bytes;
+ /// \brief The last decoded Footer
+ ///
+ /// \warning This API is currently only public for use in integration
testing;
+ /// use at your own risk.
+ struct ArrowIpcFooter* footer;
+
/// \brief Private resources managed by this library
void* private_data;
};
@@ -569,7 +581,7 @@ ArrowErrorCode ArrowIpcWriterStartFile(struct
ArrowIpcWriter* writer,
/// \brief Finish writing an IPC file
///
-/// Writes the IPC file's Footer, footer size, and ending magic.
+/// Writes the IPC file's footer, footer size, and ending magic.
ArrowErrorCode ArrowIpcWriterFinalizeFile(struct ArrowIpcWriter* writer,
struct ArrowError* error);
/// @}
@@ -589,7 +601,7 @@ struct ArrowIpcFileBlock {
int64_t body_length;
};
-/// \brief A Footer for use in an IPC file
+/// \brief A footer for use in an IPC file
///
/// \warning This API is currently only public for use in integration testing;
/// use at your own risk.
@@ -603,7 +615,7 @@ struct ArrowIpcFooter {
struct ArrowBuffer record_batch_blocks;
};
-/// \brief Initialize a Footer
+/// \brief Initialize a footer
///
/// \warning This API is currently only public for use in integration testing;
/// use at your own risk.
@@ -615,7 +627,7 @@ void ArrowIpcFooterInit(struct ArrowIpcFooter* footer);
/// use at your own risk.
void ArrowIpcFooterReset(struct ArrowIpcFooter* footer);
-/// \brief Encode a Footer for use in an IPC file
+/// \brief Encode a footer for use in an IPC file
///
/// \warning This API is currently only public for use in integration testing;
/// use at your own risk.
@@ -625,6 +637,55 @@ ArrowErrorCode ArrowIpcEncoderEncodeFooter(struct
ArrowIpcEncoder* encoder,
const struct ArrowIpcFooter* footer,
struct ArrowError* error);
+/// \brief Peek at a footer
+///
+/// The last 10 bytes of an Arrow IPC file are the footer size as a
little-endian
+/// 32-bit integer followed by the ARROW1 magic. ArrowIpcDecoderPeekFooter()
reads
+/// these bytes and returns ESPIPE if there are not enough remaining bytes in
data
+/// to read the entire footer, EINVAL if the last 10 bytes are not valid,
+/// or NANOARROW_OK otherwise.
+///
+/// The footer size will be stored in decoder.header_size_bytes.
+///
+/// \warning This API is currently only public for use in integration testing;
+/// use at your own risk.
+ArrowErrorCode ArrowIpcDecoderPeekFooter(struct ArrowIpcDecoder* decoder,
+ struct ArrowBufferView data,
+ struct ArrowError* error);
+
+/// \brief Verify a footer
+///
+/// Runs ArrowIpcDecoderPeekFooter() to ensure data is sufficiently large but
additionally
+/// runs flatbuffer verification to ensure that decoding the data will not
access
+/// memory outside of the buffer specified by data.
ArrowIpcDecoderVerifyFooter() will
+/// also set decoder.header_size_bytes and decoder.metadata_version.
+///
+/// Returns as ArrowIpcDecoderPeekFooter() and additionally will
+/// return EINVAL if flatbuffer verification fails.
+///
+/// \warning This API is currently only public for use in integration testing;
+/// use at your own risk.
+ArrowErrorCode ArrowIpcDecoderVerifyFooter(struct ArrowIpcDecoder* decoder,
+ struct ArrowBufferView data,
+ struct ArrowError* error);
+
+/// \brief Decode a footer
+///
+/// Runs ArrowIpcDecoderPeekFooter() to ensure data is sufficiently large and
decodes
+/// the content of the footer. decoder.footer will be set for access to the
file's
+/// schema and record batches. In almost all cases this should be preceded by
a call to
+/// ArrowIpcDecoderVerifyFooter() to ensure decoding does not access data
outside of the
+/// specified buffer.
+///
+/// Returns EINVAL if the content of the footer cannot be decoded or ENOTSUP
if the
+/// content of the footer uses features not supported by this library.
+///
+/// \warning This API is currently only public for use in integration testing;
+/// use at your own risk.
+ArrowErrorCode ArrowIpcDecoderDecodeFooter(struct ArrowIpcDecoder* decoder,
+ struct ArrowBufferView data,
+ struct ArrowError* error);
+
#ifdef __cplusplus
}
#endif