This is an automated email from the ASF dual-hosted git repository.
bkietz pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 2c422684 feat: Add IPC integration test executable (#585)
2c422684 is described below
commit 2c42268460c1994fe35479b92e0e5bbebe4b9d9b
Author: Benjamin Kietzman <[email protected]>
AuthorDate: Thu Aug 15 14:40:39 2024 -0500
feat: Add IPC integration test executable (#585)
- Adds a new executable for use with the archery integration tests
- When reading files, it ignores the Footer entirely
- A few private workarounds are added, most notably the stream writer
tracks Blocks in order to populate the Footer
- Tested against patched archery
https://github.com/apache/arrow/compare/main...bkietz:arrow:nanoarrow-integration-tests
- issue to add nanoarrow to the tests
https://github.com/apache/arrow/issues/43680
---
CMakeLists.txt | 14 +-
src/nanoarrow/common/array.c | 1 +
src/nanoarrow/common/inline_types.h | 16 +-
src/nanoarrow/integration/ipc_integration.cc | 331 +++++++++++++++++++++++++++
src/nanoarrow/ipc/encoder.c | 96 +++++++-
src/nanoarrow/ipc/encoder_test.cc | 31 +++
src/nanoarrow/ipc/reader.c | 5 +-
src/nanoarrow/ipc/writer.c | 132 +++++++++--
src/nanoarrow/ipc/writer_test.cc | 103 ++++++++-
src/nanoarrow/nanoarrow_ipc.h | 72 ++++++
src/nanoarrow/nanoarrow_ipc.hpp | 19 ++
11 files changed, 780 insertions(+), 40 deletions(-)
diff --git a/CMakeLists.txt b/CMakeLists.txt
index a954d215..9b115840 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -225,6 +225,17 @@ if(NANOARROW_IPC)
DESTINATION include/nanoarrow)
endif()
+if(NANOARROW_IPC AND (NANOARROW_BUILD_INTEGRATION_TESTS OR
NANOARROW_BUILD_TESTS))
+ add_executable(nanoarrow_ipc_integration
src/nanoarrow/integration/ipc_integration.cc)
+ target_include_directories(nanoarrow_ipc_integration
+ PUBLIC
$<BUILD_INTERFACE:${NANOARROW_BUILD_INCLUDE_DIR}>
+
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}/src>
+ $<INSTALL_INTERFACE:include>)
+ target_link_libraries(nanoarrow_ipc_integration
+ PRIVATE nanoarrow_testing nanoarrow_ipc flatccrt
+ nanoarrow_coverage_config)
+endif()
+
if(NANOARROW_DEVICE)
if(NANOARROW_DEVICE_WITH_METAL)
if(NOT EXISTS "${CMAKE_BINARY_DIR}/metal-cpp")
@@ -348,7 +359,8 @@ foreach(target
nanoarrow_ipc
nanoarrow_device
nanoarrow_testing
- nanoarrow_c_data_integration)
+ nanoarrow_c_data_integration
+ nanoarrow_ipc_json_integration)
if(TARGET ${target})
target_compile_definitions(${target} PUBLIC
"$<$<CONFIG:Debug>:NANOARROW_DEBUG>")
diff --git a/src/nanoarrow/common/array.c b/src/nanoarrow/common/array.c
index be99cab8..5a9635e5 100644
--- a/src/nanoarrow/common/array.c
+++ b/src/nanoarrow/common/array.c
@@ -18,6 +18,7 @@
#include <errno.h>
#include <inttypes.h>
#include <stdarg.h>
+#include <stdio.h>
#include <stdlib.h>
#include <string.h>
diff --git a/src/nanoarrow/common/inline_types.h
b/src/nanoarrow/common/inline_types.h
index ac513279..fae01bb2 100644
--- a/src/nanoarrow/common/inline_types.h
+++ b/src/nanoarrow/common/inline_types.h
@@ -149,14 +149,14 @@ struct ArrowArrayStream {
NANOARROW_RETURN_NOT_OK((x_ <= max_) ? NANOARROW_OK : EINVAL)
#if defined(NANOARROW_DEBUG)
-#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR,
EXPR_STR) \
- do {
\
- const int NAME = (EXPR);
\
- if (NAME) {
\
- ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d\n* %s:%d",
EXPR_STR, \
- NAME, __FILE__, __LINE__);
\
- return NAME;
\
- }
\
+#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR,
EXPR_STR) \
+ do {
\
+ const int NAME = (EXPR);
\
+ if (NAME) {
\
+ ArrowErrorSet((ERROR_PTR_EXPR), "%s failed with errno %d(%s)\n* %s:%d",
EXPR_STR, \
+ NAME, strerror(NAME), __FILE__, __LINE__);
\
+ return NAME;
\
+ }
\
} while (0)
#else
#define _NANOARROW_RETURN_NOT_OK_WITH_ERROR_IMPL(NAME, EXPR, ERROR_PTR_EXPR,
EXPR_STR) \
diff --git a/src/nanoarrow/integration/ipc_integration.cc
b/src/nanoarrow/integration/ipc_integration.cc
new file mode 100644
index 00000000..52392c8d
--- /dev/null
+++ b/src/nanoarrow/integration/ipc_integration.cc
@@ -0,0 +1,331 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <cstdlib>
+
+#include <nanoarrow/nanoarrow_ipc.hpp>
+#include <nanoarrow/nanoarrow_testing.hpp>
+
+#define NANOARROW_IPC_FILE_PADDED_MAGIC "ARROW1\0"
+
+std::string GetEnv(char const* name) {
+ char const* val = std::getenv(name);
+ return val ? val : "";
+}
+
+constexpr auto kUsage = R"(USAGE:
+ # assert that f.arrow reads identical to f.json
+ env COMMAND=VALIDATE \
+ ARROW_PATH=f.arrow \
+ JSON_PATH=f.json \
+ nanoarrow_ipc_json_integration
+
+ # produce f.arrow from f.json
+ env COMMAND=JSON_TO_ARROW \
+ ARROW_PATH=f.arrow \
+ JSON_PATH=f.json \
+ nanoarrow_ipc_json_integration
+
+ # copy f.stream into f.arrow
+ env COMMAND=STREAM_TO_FILE \
+ ARROW_PATH=f.arrow \
+ nanoarrow_ipc_json_integration < f.stream
+
+ # copy f.arrow into f.stream
+ env COMMAND=FILE_TO_STREAM \
+ ARROW_PATH=f.arrow \
+ nanoarrow_ipc_json_integration > f.stream
+
+ # run all internal test cases
+ nanoarrow_ipc_json_integration
+)";
+
+ArrowErrorCode Validate(struct ArrowError*);
+ArrowErrorCode JsonToArrow(struct ArrowError*);
+ArrowErrorCode StreamToFile(struct ArrowError*);
+ArrowErrorCode FileToStream(struct ArrowError*);
+
+int main(int argc, char** argv) try {
+ std::string command = GetEnv("COMMAND");
+
+ ArrowErrorCode error_code;
+ struct ArrowError error;
+
+ if (command == "VALIDATE") {
+ std::cout << "Validating that " << GetEnv("ARROW_PATH") << " reads
identical to "
+ << GetEnv("JSON_PATH") << std::endl;
+
+ error_code = Validate(&error);
+ } else if (command == "JSON_TO_ARROW") {
+ std::cout << "Producing " << GetEnv("ARROW_PATH") << " from " <<
GetEnv("JSON_PATH")
+ << std::endl;
+
+ error_code = JsonToArrow(&error);
+ } else if (command == "STREAM_TO_FILE") {
+ error_code = StreamToFile(&error);
+ } else if (command == "FILE_TO_STREAM") {
+ error_code = FileToStream(&error);
+ } else {
+ std::cerr << kUsage;
+ return 1;
+ }
+
+ if (error_code != NANOARROW_OK) {
+ std::cerr << "Command " << command << " failed (" << error_code << "="
+ << strerror(error_code) << "): " << error.message << std::endl;
+ }
+ return error_code;
+} catch (std::exception const& e) {
+ std::cerr << "Uncaught exception: " << e.what() << std::endl;
+ return 1;
+}
+
+struct File {
+ ~File() {
+ if (file_ != nullptr) {
+ fclose(file_);
+ }
+ }
+
+ ArrowErrorCode open(std::string path, std::string mode, struct ArrowError*
error) {
+ file_ = fopen(path.c_str(), mode.c_str());
+ if (file_ != nullptr) {
+ return NANOARROW_OK;
+ }
+ ArrowErrorSet(error, "Opening file '%s' failed with errno=%d",
path.c_str(), errno);
+ return EINVAL;
+ }
+
+ std::string read() const {
+ fseek(file_, 0, SEEK_END);
+ std::string contents(ftell(file_), '\0');
+ rewind(file_);
+
+ size_t bytes_read = 0;
+ while (bytes_read < contents.size()) {
+ bytes_read += fread(&contents[bytes_read], 1, contents.size() -
bytes_read, file_);
+ }
+ return contents;
+ }
+
+ operator FILE*() const { return file_; }
+
+ FILE* file_ = nullptr;
+};
+
+struct MaterializedArrayStream {
+ nanoarrow::UniqueSchema schema;
+ std::vector<nanoarrow::UniqueArray> batches;
+
+ ArrowErrorCode From(struct ArrowArrayStream* stream, struct ArrowError*
error) {
+ NANOARROW_RETURN_NOT_OK(ArrowArrayStreamGetSchema(stream, schema.get(),
error));
+
+ while (true) {
+ nanoarrow::UniqueArray batch;
+ NANOARROW_RETURN_NOT_OK(ArrowArrayStreamGetNext(stream, batch.get(),
error));
+ if (batch->release == nullptr) {
+ break;
+ }
+ batches.push_back(std::move(batch));
+ }
+
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode FromJsonFile(std::string const& path, struct ArrowError*
error) {
+ File json_file;
+ NANOARROW_RETURN_NOT_OK(json_file.open(path, "r", error));
+ auto json = json_file.read();
+
+ nanoarrow::testing::TestingJSONReader reader;
+ nanoarrow::UniqueArrayStream array_stream;
+ NANOARROW_RETURN_NOT_OK(
+ reader.ReadDataFile(json, array_stream.get(), reader.kNumBatchReadAll,
error));
+ return From(array_stream.get(), error);
+ }
+
+ ArrowErrorCode FromIpcFile(std::string const& path, struct ArrowError*
error) {
+ // FIXME this API needs to be public; it's a bit smelly to pretend that we
support
+ // reading files when this bespoke program is the only one which can do it
+ //
+ // For now: just check the first 8 bytes of the file and read a stream
(ignoring the
+ // Footer).
+ File ipc_file;
+ NANOARROW_RETURN_NOT_OK(ipc_file.open(path, "rb", error));
+ return FromIpcFile(ipc_file, error);
+ }
+
+ ArrowErrorCode FromIpcFile(FILE* ipc_file, struct ArrowError* error) {
+ char prefix[sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC)] = {};
+ if (fread(&prefix, 1, sizeof(prefix), ipc_file) < sizeof(prefix)) {
+ ArrowErrorSet(error, "Expected file of more than %lu bytes, got %ld",
+ sizeof(prefix), ftell(ipc_file));
+ return EINVAL;
+ }
+
+ if (memcmp(&prefix, NANOARROW_IPC_FILE_PADDED_MAGIC, sizeof(prefix)) != 0)
{
+ ArrowErrorSet(error, "File did not begin with 'ARROW1\\0\\0'");
+ return EINVAL;
+ }
+
+ nanoarrow::ipc::UniqueInputStream input_stream;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowIpcInputStreamInitFile(input_stream.get(), ipc_file,
+ /*close_on_release=*/false),
+ error);
+
+ nanoarrow::UniqueArrayStream array_stream;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowIpcArrayStreamReaderInit(array_stream.get(), input_stream.get(),
+ /*options=*/nullptr),
+ error);
+
+ return From(array_stream.get(), error);
+ }
+
+ ArrowErrorCode Write(struct ArrowIpcOutputStream* output_stream, bool
write_file,
+ struct ArrowError* error) {
+ nanoarrow::ipc::UniqueWriter writer;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowIpcWriterInit(writer.get(),
output_stream),
+ error);
+
+ if (write_file) {
+ NANOARROW_RETURN_NOT_OK(ArrowIpcWriterStartFile(writer.get(), error));
+ }
+
+ NANOARROW_RETURN_NOT_OK(ArrowIpcWriterWriteSchema(writer.get(),
schema.get(), error));
+
+ nanoarrow::UniqueArrayView array_view;
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayViewInitFromSchema(array_view.get(), schema.get(), error));
+
+ for (const auto& batch : batches) {
+ NANOARROW_RETURN_NOT_OK(
+ ArrowArrayViewSetArray(array_view.get(), batch.get(), error));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowIpcWriterWriteArrayView(writer.get(), array_view.get(), error));
+ }
+
+ NANOARROW_RETURN_NOT_OK(ArrowIpcWriterWriteArrayView(writer.get(),
nullptr, error));
+
+ if (write_file) {
+ NANOARROW_RETURN_NOT_OK(ArrowIpcWriterFinalizeFile(writer.get(), error));
+ }
+
+ return NANOARROW_OK;
+ }
+
+ ArrowErrorCode WriteIpcFile(std::string const& path, struct ArrowError*
error) {
+ // FIXME this API needs to be public; it's a bit smelly to pretend that we
support
+ // writing files when this bespoke program is the only one which can do it
+ //
+ // For now: just write the leading magic, the stream + EOS, and a manual
Footer.
+ File ipc_file;
+ NANOARROW_RETURN_NOT_OK(ipc_file.open(path, "wb", error));
+
+ nanoarrow::ipc::UniqueOutputStream output_stream;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowIpcOutputStreamInitFile(output_stream.get(), ipc_file,
+ /*close_on_release=*/false),
+ error);
+
+ return Write(output_stream.get(), /*write_file=*/true, error);
+ }
+};
+
+ArrowErrorCode Validate(struct ArrowError* error) {
+ auto json_path = GetEnv("JSON_PATH");
+ MaterializedArrayStream json_table;
+ NANOARROW_RETURN_NOT_OK(json_table.FromJsonFile(json_path, error));
+
+ auto arrow_path = GetEnv("ARROW_PATH");
+ MaterializedArrayStream arrow_table;
+ NANOARROW_RETURN_NOT_OK(arrow_table.FromIpcFile(arrow_path, error));
+
+ nanoarrow::testing::TestingJSONComparison comparison;
+ NANOARROW_RETURN_NOT_OK(
+ comparison.CompareSchema(arrow_table.schema.get(),
json_table.schema.get(), error));
+ if (comparison.num_differences() != 0) {
+ std::stringstream differences;
+ comparison.WriteDifferences(differences);
+ ArrowErrorSet(error, "Found %d differences between schemas:\n%s\n",
+ (int)comparison.num_differences(),
differences.str().c_str());
+ return EINVAL;
+ }
+
+ if (arrow_table.batches.size() != json_table.batches.size()) {
+ ArrowErrorSet(error, "%s had %d batches but\n%s had %d batches\n", //
+ json_path.c_str(), (int)json_table.batches.size(), //
+ arrow_path.c_str(), (int)arrow_table.batches.size());
+ return EINVAL;
+ }
+
+ NANOARROW_RETURN_NOT_OK(comparison.SetSchema(arrow_table.schema.get(),
error));
+ for (size_t i = 0; i < arrow_table.batches.size(); i++) {
+ const auto& json_batch = json_table.batches[i];
+ const auto& arrow_batch = arrow_table.batches[i];
+ NANOARROW_RETURN_NOT_OK(comparison.CompareBatch(arrow_batch.get(),
json_batch.get(),
+ error, "Batch " +
std::to_string(i)));
+ }
+ if (comparison.num_differences() != 0) {
+ std::stringstream differences;
+ comparison.WriteDifferences(differences);
+ ArrowErrorSet(error, "Found %d differences between batches:\n%s\n",
+ (int)comparison.num_differences(),
differences.str().c_str());
+ return EINVAL;
+ }
+
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode JsonToArrow(struct ArrowError* error) {
+ MaterializedArrayStream table;
+ NANOARROW_RETURN_NOT_OK(table.FromJsonFile(GetEnv("JSON_PATH"), error));
+ return table.WriteIpcFile(GetEnv("ARROW_PATH"), error);
+}
+
+ArrowErrorCode StreamToFile(struct ArrowError* error) {
+ // wrap stdin into ArrowIpcInputStream
+ nanoarrow::ipc::UniqueInputStream input_stream;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowIpcInputStreamInitFile(input_stream.get(), stdin,
/*close_on_release=*/true),
+ error);
+
+ nanoarrow::UniqueArrayStream array_stream;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowIpcArrayStreamReaderInit(array_stream.get(), input_stream.get(),
+ /*options=*/nullptr),
+ error);
+
+ MaterializedArrayStream table;
+ NANOARROW_RETURN_NOT_OK(table.From(array_stream.get(), error));
+ return table.WriteIpcFile(GetEnv("ARROW_PATH"), error);
+}
+
+ArrowErrorCode FileToStream(struct ArrowError* error) {
+ MaterializedArrayStream table;
+ NANOARROW_RETURN_NOT_OK(table.FromIpcFile(GetEnv("ARROW_PATH"), error));
+
+ // wrap stdout into ArrowIpcOutputStream
+ nanoarrow::ipc::UniqueOutputStream output_stream;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowIpcOutputStreamInitFile(output_stream.get(), stdout,
+ /*close_on_release=*/true),
+ error);
+
+ return table.Write(output_stream.get(), /*write_file=*/false, error);
+}
diff --git a/src/nanoarrow/ipc/encoder.c b/src/nanoarrow/ipc/encoder.c
index 5ab4140d..890fe65e 100644
--- a/src/nanoarrow/ipc/encoder.c
+++ b/src/nanoarrow/ipc/encoder.c
@@ -16,7 +16,6 @@
// under the License.
#include <errno.h>
-#include <inttypes.h>
#include <stdio.h>
#include <string.h>
@@ -45,6 +44,7 @@ struct ArrowIpcEncoderPrivate {
flatcc_builder_t builder;
struct ArrowBuffer buffers;
struct ArrowBuffer nodes;
+ int encoding_footer;
};
ArrowErrorCode ArrowIpcEncoderInit(struct ArrowIpcEncoder* encoder) {
@@ -60,6 +60,7 @@ ArrowErrorCode ArrowIpcEncoderInit(struct ArrowIpcEncoder*
encoder) {
ArrowFree(private);
return ESPIPE;
}
+ private->encoding_footer = 0;
ArrowBufferInit(&private->buffers);
ArrowBufferInit(&private->nodes);
return NANOARROW_OK;
@@ -402,20 +403,18 @@ static ArrowErrorCode
ArrowIpcEncodeField(flatcc_builder_t* builder,
return NANOARROW_OK;
}
-ArrowErrorCode ArrowIpcEncoderEncodeSchema(struct ArrowIpcEncoder* encoder,
+static ArrowErrorCode ArrowIpcEncodeSchema(flatcc_builder_t* builder,
const struct ArrowSchema* schema,
struct ArrowError* error) {
- NANOARROW_DCHECK(encoder != NULL && encoder->private_data != NULL && schema
!= NULL);
-
- struct ArrowIpcEncoderPrivate* private =
- (struct ArrowIpcEncoderPrivate*)encoder->private_data;
-
- flatcc_builder_t* builder = &private->builder;
+ NANOARROW_DCHECK(schema->release != NULL);
- FLATCC_RETURN_UNLESS_0(Message_start_as_root(builder), error);
- FLATCC_RETURN_UNLESS_0(Message_version_add(builder, ns(MetadataVersion_V5)),
error);
-
- FLATCC_RETURN_UNLESS_0(Message_header_Schema_start(builder), error);
+ if (strcmp(schema->format, "+s") != 0) {
+ ArrowErrorSet(
+ error,
+ "Cannot encode schema with format '%s'; top level schema must have
struct type",
+ schema->format);
+ return EINVAL;
+ }
if (ArrowIpcSystemEndianness() == NANOARROW_IPC_ENDIANNESS_LITTLE) {
FLATCC_RETURN_UNLESS_0(Schema_endianness_add(builder,
ns(Endianness_Little)), error);
@@ -440,9 +439,29 @@ ArrowErrorCode ArrowIpcEncoderEncodeSchema(struct
ArrowIpcEncoder* encoder,
FLATCC_RETURN_UNLESS_0(Schema_features_start(builder), error);
FLATCC_RETURN_UNLESS_0(Schema_features_end(builder), error);
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowIpcEncoderEncodeSchema(struct ArrowIpcEncoder* encoder,
+ const struct ArrowSchema* schema,
+ struct ArrowError* error) {
+ NANOARROW_DCHECK(encoder != NULL && encoder->private_data != NULL && schema
!= NULL);
+
+ struct ArrowIpcEncoderPrivate* private =
+ (struct ArrowIpcEncoderPrivate*)encoder->private_data;
+
+ flatcc_builder_t* builder = &private->builder;
+
+ FLATCC_RETURN_UNLESS_0(Message_start_as_root(builder), error);
+
+ FLATCC_RETURN_UNLESS_0(Message_version_add(builder, ns(MetadataVersion_V5)),
error);
+
+ FLATCC_RETURN_UNLESS_0(Message_header_Schema_start(builder), error);
+ NANOARROW_RETURN_NOT_OK(ArrowIpcEncodeSchema(builder, schema, error));
FLATCC_RETURN_UNLESS_0(Message_header_Schema_end(builder), error);
FLATCC_RETURN_UNLESS_0(Message_bodyLength_add(builder, 0), error);
+
FLATCC_RETURN_IF_NULL(ns(Message_end_as_root(builder)), error);
return NANOARROW_OK;
}
@@ -602,3 +621,56 @@ ArrowErrorCode ArrowIpcEncoderEncodeSimpleRecordBatch(
return ArrowIpcEncoderEncodeRecordBatch(encoder, &buffer_encoder,
array_view, error);
}
+
+void ArrowIpcFooterInit(struct ArrowIpcFooter* footer) {
+ footer->schema.release = NULL;
+ ArrowBufferInit(&footer->record_batch_blocks);
+}
+
+void ArrowIpcFooterReset(struct ArrowIpcFooter* footer) {
+ if (footer->schema.release != NULL) {
+ ArrowSchemaRelease(&footer->schema);
+ }
+ ArrowBufferReset(&footer->record_batch_blocks);
+}
+
+ArrowErrorCode ArrowIpcEncoderEncodeFooter(struct ArrowIpcEncoder* encoder,
+ const struct ArrowIpcFooter* footer,
+ struct ArrowError* error) {
+ NANOARROW_DCHECK(encoder != NULL && encoder->private_data != NULL && footer
!= NULL);
+
+ struct ArrowIpcEncoderPrivate* private =
+ (struct ArrowIpcEncoderPrivate*)encoder->private_data;
+
+ flatcc_builder_t* builder = &private->builder;
+
+ FLATCC_RETURN_UNLESS_0(Footer_start_as_root(builder), error);
+
+ FLATCC_RETURN_UNLESS_0(Footer_version_add(builder, ns(MetadataVersion_V5)),
error);
+
+ FLATCC_RETURN_UNLESS_0(Footer_schema_start(builder), error);
+ NANOARROW_RETURN_NOT_OK(ArrowIpcEncodeSchema(builder, &footer->schema,
error));
+ FLATCC_RETURN_UNLESS_0(Footer_schema_end(builder), error);
+
+ const struct ArrowIpcFileBlock* blocks =
+ (struct ArrowIpcFileBlock*)footer->record_batch_blocks.data;
+ int64_t n_blocks =
+ footer->record_batch_blocks.size_bytes / sizeof(struct
ArrowIpcFileBlock);
+
+ FLATCC_RETURN_UNLESS_0(Footer_recordBatches_start(builder), error);
+ struct ns(Block)* flatcc_RecordBatch_blocks =
+ ns(Footer_recordBatches_extend(builder, n_blocks));
+ FLATCC_RETURN_IF_NULL(flatcc_RecordBatch_blocks, error);
+ for (int64_t i = 0; i < n_blocks; i++) {
+ struct ns(Block) block = {
+ blocks[i].offset,
+ blocks[i].metadata_length,
+ blocks[i].body_length,
+ };
+ flatcc_RecordBatch_blocks[i] = block;
+ }
+ FLATCC_RETURN_UNLESS_0(Footer_recordBatches_end(builder), error);
+
+ FLATCC_RETURN_IF_NULL(ns(Footer_end_as_root(builder)), error);
+ return NANOARROW_OK;
+}
diff --git a/src/nanoarrow/ipc/encoder_test.cc
b/src/nanoarrow/ipc/encoder_test.cc
index 78ca0978..bbeb06f7 100644
--- a/src/nanoarrow/ipc/encoder_test.cc
+++ b/src/nanoarrow/ipc/encoder_test.cc
@@ -30,6 +30,9 @@ struct ArrowIpcEncoderPrivate {
};
}
+#define NANOARROW_IPC_FILE_PADDED_MAGIC "ARROW1\0"
+static_assert(sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC) == 8);
+
TEST(NanoarrowIpcTest, NanoarrowIpcEncoderConstruction) {
nanoarrow::ipc::UniqueEncoder encoder;
@@ -70,3 +73,31 @@ TEST(NanoarrowIpcTest, NanoarrowIpcEncoderConstruction) {
EXPECT_GT(buffer->size_bytes, 8 + sizeof("hello world"));
EXPECT_EQ(buffer->size_bytes % 8, 0);
}
+
+TEST(NanoarrowIpcTest, NanoarrowIpcFooterEncoding) {
+ nanoarrow::ipc::UniqueEncoder encoder;
+ ASSERT_EQ(ArrowIpcEncoderInit(encoder.get()), NANOARROW_OK);
+
+ nanoarrow::ipc::UniqueFooter footer;
+ ASSERT_EQ(ArrowSchemaInitFromType(&footer->schema, NANOARROW_TYPE_STRUCT),
+ NANOARROW_OK);
+
+ nanoarrow::UniqueBuffer footer_buffer, raw_schema_buffer;
+ struct ArrowError error;
+
+ EXPECT_EQ(ArrowIpcEncoderEncodeFooter(encoder.get(), footer.get(), &error),
+ NANOARROW_OK)
+ << error.message;
+ EXPECT_EQ(ArrowIpcEncoderFinalizeBuffer(encoder.get(), /*encapsulate=*/false,
+ footer_buffer.get()),
+ NANOARROW_OK);
+
+ EXPECT_EQ(ArrowIpcEncoderEncodeSchema(encoder.get(), &footer->schema,
&error),
+ NANOARROW_OK)
+ << error.message;
+ EXPECT_EQ(ArrowIpcEncoderFinalizeBuffer(encoder.get(), /*encapsulate=*/false,
+ raw_schema_buffer.get()),
+ NANOARROW_OK);
+
+ EXPECT_GT(footer_buffer->size_bytes, raw_schema_buffer->size_bytes);
+}
diff --git a/src/nanoarrow/ipc/reader.c b/src/nanoarrow/ipc/reader.c
index 5415306d..3b22acf8 100644
--- a/src/nanoarrow/ipc/reader.c
+++ b/src/nanoarrow/ipc/reader.c
@@ -79,6 +79,8 @@ static void ArrowIpcInputStreamBufferRelease(struct
ArrowIpcInputStream* stream)
ArrowErrorCode ArrowIpcInputStreamInitBuffer(struct ArrowIpcInputStream*
stream,
struct ArrowBuffer* input) {
+ NANOARROW_DCHECK(stream != NULL);
+
struct ArrowIpcInputStreamBufferPrivate* private_data =
(struct ArrowIpcInputStreamBufferPrivate*)ArrowMalloc(
sizeof(struct ArrowIpcInputStreamBufferPrivate));
@@ -154,8 +156,9 @@ static ArrowErrorCode ArrowIpcInputStreamFileRead(struct
ArrowIpcInputStream* st
ArrowErrorCode ArrowIpcInputStreamInitFile(struct ArrowIpcInputStream* stream,
void* file_ptr, int
close_on_release) {
+ NANOARROW_DCHECK(stream != NULL);
if (file_ptr == NULL) {
- return EINVAL;
+ return errno ? errno : EINVAL;
}
struct ArrowIpcInputStreamFilePrivate* private_data =
diff --git a/src/nanoarrow/ipc/writer.c b/src/nanoarrow/ipc/writer.c
index 6a5372a4..bff7b527 100644
--- a/src/nanoarrow/ipc/writer.c
+++ b/src/nanoarrow/ipc/writer.c
@@ -16,13 +16,16 @@
// under the License.
#include <errno.h>
-#include <inttypes.h>
#include <stdio.h>
#include <string.h>
+#include "flatcc/flatcc_builder.h"
+#include "nanoarrow/ipc/flatcc_generated.h"
#include "nanoarrow/nanoarrow.h"
#include "nanoarrow/nanoarrow_ipc.h"
+#define ns(x) FLATBUFFERS_WRAP_NAMESPACE(org_apache_arrow_flatbuf, x)
+
void ArrowIpcOutputStreamMove(struct ArrowIpcOutputStream* src,
struct ArrowIpcOutputStream* dst) {
NANOARROW_DCHECK(src != NULL && dst != NULL);
@@ -31,6 +34,19 @@ void ArrowIpcOutputStreamMove(struct ArrowIpcOutputStream*
src,
src->release = NULL;
}
+ArrowErrorCode ArrowIpcOutputStreamWrite(struct ArrowIpcOutputStream* stream,
+ struct ArrowBufferView data,
+ struct ArrowError* error) {
+ while (data.size_bytes != 0) {
+ int64_t bytes_written = 0;
+ NANOARROW_RETURN_NOT_OK(stream->write(stream, data.data.as_uint8,
data.size_bytes,
+ &bytes_written, error));
+ data.size_bytes -= bytes_written;
+ data.data.as_uint8 += bytes_written;
+ }
+ return NANOARROW_OK;
+}
+
struct ArrowIpcOutputStreamBufferPrivate {
struct ArrowBuffer* output;
};
@@ -136,7 +152,7 @@ ArrowErrorCode ArrowIpcOutputStreamInitFile(struct
ArrowIpcOutputStream* stream,
void* file_ptr, int
close_on_release) {
NANOARROW_DCHECK(stream != NULL);
if (file_ptr == NULL) {
- return EINVAL;
+ return errno ? errno : EINVAL;
}
struct ArrowIpcOutputStreamFilePrivate* private_data =
@@ -161,20 +177,11 @@ struct ArrowIpcWriterPrivate {
struct ArrowIpcOutputStream output_stream;
struct ArrowBuffer buffer;
struct ArrowBuffer body_buffer;
-};
-ArrowErrorCode ArrowIpcOutputStreamWrite(struct ArrowIpcOutputStream* stream,
- struct ArrowBufferView data,
- struct ArrowError* error) {
- while (data.size_bytes != 0) {
- int64_t bytes_written = 0;
- NANOARROW_RETURN_NOT_OK(stream->write(stream, data.data.as_uint8,
data.size_bytes,
- &bytes_written, error));
- data.size_bytes -= bytes_written;
- data.data.as_uint8 += bytes_written;
- }
- return NANOARROW_OK;
-}
+ int writing_file;
+ int64_t bytes_written;
+ struct ArrowIpcFooter footer;
+};
ArrowErrorCode ArrowIpcWriterInit(struct ArrowIpcWriter* writer,
struct ArrowIpcOutputStream* output_stream) {
@@ -192,6 +199,10 @@ ArrowErrorCode ArrowIpcWriterInit(struct ArrowIpcWriter*
writer,
ArrowBufferInit(&private->buffer);
ArrowBufferInit(&private->body_buffer);
+ private->writing_file = 0;
+ private->bytes_written = 0;
+ ArrowIpcFooterInit(&private->footer);
+
writer->private_data = private;
return NANOARROW_OK;
}
@@ -208,6 +219,8 @@ void ArrowIpcWriterReset(struct ArrowIpcWriter* writer) {
ArrowBufferReset(&private->buffer);
ArrowBufferReset(&private->body_buffer);
+ ArrowIpcFooterReset(&private->footer);
+
ArrowFree(private);
}
memset(writer, 0, sizeof(struct ArrowIpcWriter));
@@ -242,12 +255,19 @@ ArrowErrorCode ArrowIpcWriterWriteSchema(struct
ArrowIpcWriter* writer,
(struct ArrowIpcWriterPrivate*)writer->private_data;
NANOARROW_ASSERT_OK(ArrowBufferResize(&private->buffer, 0, 0));
+
NANOARROW_RETURN_NOT_OK(ArrowIpcEncoderEncodeSchema(&private->encoder, in,
error));
NANOARROW_RETURN_NOT_OK_WITH_ERROR(
ArrowIpcEncoderFinalizeBuffer(&private->encoder, /*encapsulate=*/1,
&private->buffer),
error);
+ if (private->writing_file) {
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowSchemaDeepCopy(in,
&private->footer.schema),
+ error);
+ }
+ private->bytes_written += private->buffer.size_bytes;
+
return ArrowIpcOutputStreamWrite(&private->output_stream,
ArrowBufferToBufferView(&private->buffer),
error);
}
@@ -261,8 +281,9 @@ ArrowErrorCode ArrowIpcWriterWriteArrayView(struct
ArrowIpcWriter* writer,
if (in == NULL) {
int32_t eos[] = {-1, 0};
- struct ArrowBufferView data = {.data.as_int32 = eos, .size_bytes =
sizeof(eos)};
- return ArrowIpcOutputStreamWrite(&private->output_stream, data, error);
+ private->bytes_written += sizeof(eos);
+ struct ArrowBufferView eos_view = {.data.as_int32 = eos, .size_bytes =
sizeof(eos)};
+ return ArrowIpcOutputStreamWrite(&private->output_stream, eos_view, error);
}
NANOARROW_ASSERT_OK(ArrowBufferResize(&private->buffer, 0, 0));
@@ -275,6 +296,20 @@ ArrowErrorCode ArrowIpcWriterWriteArrayView(struct
ArrowIpcWriter* writer,
&private->buffer),
error);
+ if (private->writing_file) {
+ _NANOARROW_CHECK_RANGE(private->buffer.size_bytes, 0, INT32_MAX);
+ struct ArrowIpcFileBlock block = {
+ .offset = private->bytes_written,
+ .metadata_length = (int32_t) private->buffer.size_bytes,
+ .body_length = private->body_buffer.size_bytes,
+ };
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowBufferAppend(&private->footer.record_batch_blocks, &block,
sizeof(block)),
+ error);
+ }
+ private->bytes_written += private->buffer.size_bytes;
+ private->bytes_written += private->body_buffer.size_bytes;
+
NANOARROW_RETURN_NOT_OK(ArrowIpcOutputStreamWrite(
&private->output_stream, ArrowBufferToBufferView(&private->buffer),
error));
NANOARROW_RETURN_NOT_OK(ArrowIpcOutputStreamWrite(
@@ -329,3 +364,66 @@ ArrowErrorCode ArrowIpcWriterWriteArrayStream(struct
ArrowIpcWriter* writer,
ArrowArrayViewReset(&array_view);
return NANOARROW_OK;
}
+
+#define NANOARROW_IPC_FILE_PADDED_MAGIC "ARROW1\0"
+
+ArrowErrorCode ArrowIpcWriterStartFile(struct ArrowIpcWriter* writer,
+ struct ArrowError* error) {
+ NANOARROW_DCHECK(writer != NULL && writer->private_data != NULL);
+
+ struct ArrowIpcWriterPrivate* private =
+ (struct ArrowIpcWriterPrivate*)writer->private_data;
+
+ NANOARROW_DCHECK(!private->writing_file && private->bytes_written == 0);
+
+ struct ArrowBufferView magic = {
+ .data.data = NANOARROW_IPC_FILE_PADDED_MAGIC,
+ .size_bytes = sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC),
+ };
+ NANOARROW_RETURN_NOT_OK(
+ ArrowIpcOutputStreamWrite(&private->output_stream, magic, error));
+
+ private->writing_file = 1;
+ private->bytes_written = magic.size_bytes;
+ return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowIpcWriterFinalizeFile(struct ArrowIpcWriter* writer,
+ struct ArrowError* error) {
+ NANOARROW_DCHECK(writer != NULL && writer->private_data != NULL);
+
+ struct ArrowIpcWriterPrivate* private =
+ (struct ArrowIpcWriterPrivate*)writer->private_data;
+
+ NANOARROW_DCHECK(private->writing_file);
+
+ NANOARROW_ASSERT_OK(ArrowBufferResize(&private->buffer, 0, 0));
+ NANOARROW_RETURN_NOT_OK(
+ ArrowIpcEncoderEncodeFooter(&private->encoder, &private->footer, error));
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowIpcEncoderFinalizeBuffer(&private->encoder, /*encapsulate=*/0,
+ &private->buffer),
+ error);
+
+ _NANOARROW_CHECK_RANGE(private->buffer.size_bytes, 0, INT32_MAX);
+ int32_t size = (int32_t) private->buffer.size_bytes;
+ // we don't pad the magic at the end of the file
+ struct ArrowStringView unpadded_magic =
ArrowCharView(NANOARROW_IPC_FILE_PADDED_MAGIC);
+ NANOARROW_DCHECK(unpadded_magic.size_bytes == 6);
+
+ // just append to private->buffer instead of queueing two more tiny writes
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+ ArrowBufferReserve(&private->buffer, sizeof(size) +
unpadded_magic.size_bytes),
+ error);
+
+ if (ArrowIpcSystemEndianness() == NANOARROW_IPC_ENDIANNESS_BIG) {
+ size = (int32_t)bswap32((uint32_t)size);
+ }
+ NANOARROW_ASSERT_OK(ArrowBufferAppendInt32(&private->buffer, size));
+ NANOARROW_ASSERT_OK(ArrowBufferAppendStringView(&private->buffer,
unpadded_magic));
+
+ NANOARROW_RETURN_NOT_OK(ArrowIpcOutputStreamWrite(
+ &private->output_stream, ArrowBufferToBufferView(&private->buffer),
error));
+ private->bytes_written += private->buffer.size_bytes;
+ return NANOARROW_OK;
+}
diff --git a/src/nanoarrow/ipc/writer_test.cc b/src/nanoarrow/ipc/writer_test.cc
index 1730bd4d..5eed0809 100644
--- a/src/nanoarrow/ipc/writer_test.cc
+++ b/src/nanoarrow/ipc/writer_test.cc
@@ -63,7 +63,8 @@ TEST(NanoarrowIpcWriter, OutputStreamFile) {
fseek(file_ptr, 6, SEEK_SET);
nanoarrow::ipc::UniqueOutputStream stream;
- ASSERT_EQ(ArrowIpcOutputStreamInitFile(stream.get(), file_ptr, 1),
NANOARROW_OK);
+ ASSERT_EQ(ArrowIpcOutputStreamInitFile(stream.get(), file_ptr,
/*close_on_release=*/1),
+ NANOARROW_OK);
struct ArrowError error;
@@ -88,3 +89,103 @@ TEST(NanoarrowIpcWriter, OutputStreamFile) {
EXPECT_EQ(std::string(buffer.data(), buffer.size()),
"HELLO " + message + message + message + message);
}
+
+TEST(NanoarrowIpcWriter, OutputStreamFileError) {
+ nanoarrow::ipc::UniqueOutputStream stream;
+ EXPECT_EQ(ArrowIpcOutputStreamInitFile(stream.get(), nullptr,
/*close_on_release=*/1),
+ EINVAL);
+
+ auto phony_path = __FILE__ + std::string(".phony");
+ FILE* file_ptr = fopen(phony_path.c_str(), "rb");
+ ASSERT_EQ(file_ptr, nullptr);
+ EXPECT_EQ(ArrowIpcOutputStreamInitFile(stream.get(), file_ptr,
/*close_on_release=*/1),
+ ENOENT);
+}
+
+struct ArrowIpcWriterPrivate {
+ struct ArrowIpcEncoder encoder;
+ struct ArrowIpcOutputStream output_stream;
+ struct ArrowBuffer buffer;
+ struct ArrowBuffer body_buffer;
+
+ int writing_file;
+ int64_t bytes_written;
+ struct ArrowIpcFooter footer;
+};
+
+#define NANOARROW_IPC_FILE_PADDED_MAGIC "ARROW1\0"
+
+TEST(NanoarrowIpcWriter, FileWriting) {
+ struct ArrowError error;
+
+ nanoarrow::UniqueBuffer output;
+ nanoarrow::ipc::UniqueOutputStream stream;
+ ASSERT_EQ(ArrowIpcOutputStreamInitBuffer(stream.get(), output.get()),
NANOARROW_OK);
+
+ nanoarrow::ipc::UniqueWriter writer;
+ ASSERT_EQ(ArrowIpcWriterInit(writer.get(), stream.get()), NANOARROW_OK);
+
+ // the writer starts out in stream mode
+ auto* p = static_cast<struct ArrowIpcWriterPrivate*>(writer->private_data);
+ EXPECT_FALSE(p->writing_file);
+ EXPECT_EQ(p->bytes_written, 0);
+ EXPECT_EQ(p->footer.schema.release, nullptr);
+ EXPECT_EQ(p->footer.record_batch_blocks.size_bytes, 0);
+
+ // now it switches to file mode
+ EXPECT_EQ(ArrowIpcWriterStartFile(writer.get(), &error), NANOARROW_OK) <<
error.message;
+ EXPECT_TRUE(p->writing_file);
+ // and has written the leading magic
+ EXPECT_EQ(p->bytes_written, sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC));
+ // but not a schema or any record batches
+ EXPECT_EQ(p->footer.schema.release, nullptr);
+ EXPECT_EQ(p->footer.record_batch_blocks.size_bytes, 0);
+
+ // write a schema
+ nanoarrow::UniqueSchema schema;
+ ASSERT_EQ(ArrowSchemaInitFromType(schema.get(), NANOARROW_TYPE_STRUCT),
NANOARROW_OK);
+ EXPECT_EQ(ArrowIpcWriterWriteSchema(writer.get(), schema.get(), &error),
NANOARROW_OK)
+ << error.message;
+ // more has been written
+ auto after_schema = p->bytes_written;
+ EXPECT_GT(after_schema, sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC));
+ // the schema is cached in the writer's footer for later finalization
+ EXPECT_NE(p->footer.schema.release, nullptr);
+ // still no record batches
+ EXPECT_EQ(p->footer.record_batch_blocks.size_bytes, 0);
+
+ // write a batch
+ nanoarrow::UniqueArray array;
+ nanoarrow::UniqueArrayView array_view;
+ ASSERT_EQ(ArrowArrayInitFromSchema(array.get(), schema.get(), &error),
NANOARROW_OK)
+ << error.message;
+ ASSERT_EQ(ArrowArrayViewInitFromSchema(array_view.get(), schema.get(),
&error),
+ NANOARROW_OK)
+ << error.message;
+ ASSERT_EQ(ArrowArrayViewSetArray(array_view.get(), array.get(), &error),
NANOARROW_OK)
+ << error.message;
+ EXPECT_EQ(ArrowIpcWriterWriteArrayView(writer.get(), array_view.get(),
&error),
+ NANOARROW_OK)
+ << error.message;
+ // more has been written
+ auto after_batch = p->bytes_written;
+ EXPECT_GT(after_batch, after_schema);
+ // one record batch's block is stored
+ EXPECT_EQ(p->footer.record_batch_blocks.size_bytes, sizeof(struct
ArrowIpcFileBlock));
+
+ // end the stream
+ EXPECT_EQ(ArrowIpcWriterWriteArrayView(writer.get(), nullptr, &error),
NANOARROW_OK)
+ << error.message;
+ // more has been written
+ auto after_eos = p->bytes_written;
+ EXPECT_GT(after_eos, after_batch);
+ // EOS isn't stored in the blocks
+ EXPECT_EQ(p->footer.record_batch_blocks.size_bytes, sizeof(struct
ArrowIpcFileBlock));
+
+ // finalize the file
+ EXPECT_EQ(ArrowIpcWriterFinalizeFile(writer.get(), &error), NANOARROW_OK)
+ << error.message;
+ // more has been written
+ auto after_footer = p->bytes_written;
+ EXPECT_GT(after_footer, after_eos);
+}
diff --git a/src/nanoarrow/nanoarrow_ipc.h b/src/nanoarrow/nanoarrow_ipc.h
index 1e58c8dc..b11a0360 100644
--- a/src/nanoarrow/nanoarrow_ipc.h
+++ b/src/nanoarrow/nanoarrow_ipc.h
@@ -81,6 +81,14 @@
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcWriterWriteArrayView)
#define ArrowIpcWriterWriteArrayStream \
NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcWriterWriteArrayStream)
+#define ArrowIpcWriterStartFile \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcWriterStartFile)
+#define ArrowIpcWriterFinalizeFile \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcWriterFinalizeFile)
+#define ArrowIpcFooterInit NANOARROW_SYMBOL(NANOARROW_NAMESPACE,
ArrowIpcFooterInit)
+#define ArrowIpcFooterReset NANOARROW_SYMBOL(NANOARROW_NAMESPACE,
ArrowIpcFooterReset)
+#define ArrowIpcEncoderEncodeFooter \
+ NANOARROW_SYMBOL(NANOARROW_NAMESPACE, ArrowIpcEncoderEncodeFooter)
#endif
@@ -552,7 +560,71 @@ ArrowErrorCode ArrowIpcWriterWriteArrayView(struct
ArrowIpcWriter* writer,
ArrowErrorCode ArrowIpcWriterWriteArrayStream(struct ArrowIpcWriter* writer,
struct ArrowArrayStream* in,
struct ArrowError* error);
+
+/// \brief Start writing an IPC file
+///
+/// Writes the Arrow IPC magic and sets the writer up to track written blocks.
+ArrowErrorCode ArrowIpcWriterStartFile(struct ArrowIpcWriter* writer,
+ struct ArrowError* error);
+
+/// \brief Finish writing an IPC file
+///
+/// Writes the IPC file's Footer, footer size, and ending magic.
+ArrowErrorCode ArrowIpcWriterFinalizeFile(struct ArrowIpcWriter* writer,
+ struct ArrowError* error);
/// @}
+
+// Internal APIs:
+
+/// \brief Represents a byte range in an IPC file.
+///
+/// \warning This API is currently only public for use in integration testing;
+/// use at your own risk.
+struct ArrowIpcFileBlock {
+ /// \brief offset relative to the first byte of the file.
+ int64_t offset;
+ /// \brief length of encapsulated metadata Message (including padding)
+ int32_t metadata_length;
+ /// \brief length of contiguous body buffers (including padding)
+ int64_t body_length;
+};
+
+/// \brief A Footer for use in an IPC file
+///
+/// \warning This API is currently only public for use in integration testing;
+/// use at your own risk.
+///
+/// This structure is intended to be allocated by the caller, initialized using
+/// ArrowIpcFooterInit(), and released with ArrowIpcFooterReset().
+struct ArrowIpcFooter {
+ /// \brief the Footer's embedded Schema
+ struct ArrowSchema schema;
+ /// \brief all blocks containing RecordBatch Messages
+ struct ArrowBuffer record_batch_blocks;
+};
+
+/// \brief Initialize a Footer
+///
+/// \warning This API is currently only public for use in integration testing;
+/// use at your own risk.
+void ArrowIpcFooterInit(struct ArrowIpcFooter* footer);
+
+/// \brief Release all resources attached to an footer
+///
+/// \warning This API is currently only public for use in integration testing;
+/// use at your own risk.
+void ArrowIpcFooterReset(struct ArrowIpcFooter* footer);
+
+/// \brief Encode a Footer for use in an IPC file
+///
+/// \warning This API is currently only public for use in integration testing;
+/// use at your own risk.
+///
+/// Returns ENOMEM if allocation fails, NANOARROW_OK otherwise.
+ArrowErrorCode ArrowIpcEncoderEncodeFooter(struct ArrowIpcEncoder* encoder,
+ const struct ArrowIpcFooter* footer,
+ struct ArrowError* error);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/nanoarrow/nanoarrow_ipc.hpp b/src/nanoarrow/nanoarrow_ipc.hpp
index 6cf7bc22..e1b90cf1 100644
--- a/src/nanoarrow/nanoarrow_ipc.hpp
+++ b/src/nanoarrow/nanoarrow_ipc.hpp
@@ -41,6 +41,22 @@ inline void release_pointer(struct ArrowIpcDecoder* data) {
ArrowIpcDecoderReset(data);
}
+template <>
+inline void init_pointer(struct ArrowIpcFooter* data) {
+ ArrowIpcFooterInit(data);
+}
+
+template <>
+inline void move_pointer(struct ArrowIpcFooter* src, struct ArrowIpcFooter*
dst) {
+ ArrowSchemaMove(&src->schema, &dst->schema);
+ ArrowBufferMove(&src->record_batch_blocks, &dst->record_batch_blocks);
+}
+
+template <>
+inline void release_pointer(struct ArrowIpcFooter* data) {
+ ArrowIpcFooterReset(data);
+}
+
template <>
inline void init_pointer(struct ArrowIpcEncoder* data) {
data->private_data = nullptr;
@@ -128,6 +144,9 @@ namespace ipc {
/// \brief Class wrapping a unique struct ArrowIpcDecoder
using UniqueDecoder = internal::Unique<struct ArrowIpcDecoder>;
+/// \brief Class wrapping a unique struct ArrowIpcFooter
+using UniqueFooter = internal::Unique<struct ArrowIpcFooter>;
+
/// \brief Class wrapping a unique struct ArrowIpcEncoder
using UniqueEncoder = internal::Unique<struct ArrowIpcEncoder>;