Re: [PR] feat: add Footer decoding [arrow-nanoarrow]

via GitHub Tue, 03 Sep 2024 13:06:16 -0700


paleolimbot commented on code in PR #598:
URL: https://github.com/apache/arrow-nanoarrow/pull/598#discussion_r1742621810



##########
src/nanoarrow/integration/ipc_integration.cc:
##########
@@ -166,35 +169,62 @@ struct MaterializedArrayStream {
     // Footer).
     File ipc_file;
     NANOARROW_RETURN_NOT_OK(ipc_file.open(path, "rb", error));
-    return FromIpcFile(ipc_file, error);
-  }
+    auto bytes = ipc_file.read();
 
-  ArrowErrorCode FromIpcFile(FILE* ipc_file, struct ArrowError* error) {
-    char prefix[sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC)] = {};
-    if (fread(&prefix, 1, sizeof(prefix), ipc_file) < sizeof(prefix)) {
-      ArrowErrorSet(error, "Expected file of more than %lu bytes, got %ld",
-                    sizeof(prefix), ftell(ipc_file));
+    auto min_size = sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC) + sizeof(int32_t) +
+                    strlen(NANOARROW_IPC_FILE_PADDED_MAGIC);
+    if (bytes.size() < min_size) {
+      ArrowErrorSet(error, "Expected file of more than %lu bytes, got %ld", 
min_size,
+                    bytes.size());
       return EINVAL;
     }
 
-    if (memcmp(&prefix, NANOARROW_IPC_FILE_PADDED_MAGIC, sizeof(prefix)) != 0) 
{
+    if (memcmp(bytes.data(), NANOARROW_IPC_FILE_PADDED_MAGIC,
+               sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC)) != 0) {
       ArrowErrorSet(error, "File did not begin with 'ARROW1\\0\\0'");
       return EINVAL;
     }
 
-    nanoarrow::ipc::UniqueInputStream input_stream;
-    NANOARROW_RETURN_NOT_OK_WITH_ERROR(
-        ArrowIpcInputStreamInitFile(input_stream.get(), ipc_file,
-                                    /*close_on_release=*/false),
-        error);
+    nanoarrow::ipc::UniqueDecoder decoder;
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowIpcDecoderInit(decoder.get()), 
error);
+    NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderVerifyFooter(
+        decoder.get(), {{bytes.data()}, static_cast<int64_t>(bytes.size())}, 
error));
+    NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderDecodeFooter(
+        decoder.get(), {{bytes.data()}, static_cast<int64_t>(bytes.size())}, 
error));

Review Comment:
   Definitely possible to consolidate them...I think I separated them initially 
in case somebody owned their whole pipeline and wanted to skip it as an 
optimization (which is probably not actually any faster, and probably nobody 
will want do do). For some flatbuffer uses this matters ( 
http://flatgeobuf.org/#why-am-i-not-getting-expected-performance-in-gdal ).



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat: add Footer decoding [arrow-nanoarrow]

Reply via email to