paleolimbot commented on code in PR #598:
URL: https://github.com/apache/arrow-nanoarrow/pull/598#discussion_r1742621810
##########
src/nanoarrow/integration/ipc_integration.cc:
##########
@@ -166,35 +169,62 @@ struct MaterializedArrayStream {
// Footer).
File ipc_file;
NANOARROW_RETURN_NOT_OK(ipc_file.open(path, "rb", error));
- return FromIpcFile(ipc_file, error);
- }
+ auto bytes = ipc_file.read();
- ArrowErrorCode FromIpcFile(FILE* ipc_file, struct ArrowError* error) {
- char prefix[sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC)] = {};
- if (fread(&prefix, 1, sizeof(prefix), ipc_file) < sizeof(prefix)) {
- ArrowErrorSet(error, "Expected file of more than %lu bytes, got %ld",
- sizeof(prefix), ftell(ipc_file));
+ auto min_size = sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC) + sizeof(int32_t) +
+ strlen(NANOARROW_IPC_FILE_PADDED_MAGIC);
+ if (bytes.size() < min_size) {
+ ArrowErrorSet(error, "Expected file of more than %lu bytes, got %ld",
min_size,
+ bytes.size());
return EINVAL;
}
- if (memcmp(&prefix, NANOARROW_IPC_FILE_PADDED_MAGIC, sizeof(prefix)) != 0)
{
+ if (memcmp(bytes.data(), NANOARROW_IPC_FILE_PADDED_MAGIC,
+ sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC)) != 0) {
ArrowErrorSet(error, "File did not begin with 'ARROW1\\0\\0'");
return EINVAL;
}
- nanoarrow::ipc::UniqueInputStream input_stream;
- NANOARROW_RETURN_NOT_OK_WITH_ERROR(
- ArrowIpcInputStreamInitFile(input_stream.get(), ipc_file,
- /*close_on_release=*/false),
- error);
+ nanoarrow::ipc::UniqueDecoder decoder;
+ NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowIpcDecoderInit(decoder.get()),
error);
+ NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderVerifyFooter(
+ decoder.get(), {{bytes.data()}, static_cast<int64_t>(bytes.size())},
error));
+ NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderDecodeFooter(
+ decoder.get(), {{bytes.data()}, static_cast<int64_t>(bytes.size())},
error));
Review Comment:
Definitely possible to consolidate them...I think I separated them initially
in case somebody owned their whole pipeline and wanted to skip it as an
optimization (which is probably not actually any faster, and probably nobody
will want do do). For some flatbuffer uses this matters (
http://flatgeobuf.org/#why-am-i-not-getting-expected-performance-in-gdal ).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]