Re: [PR] feat: add Footer decoding [arrow-nanoarrow]

via GitHub Fri, 30 Aug 2024 12:17:55 -0700


bkietz commented on code in PR #598:
URL: https://github.com/apache/arrow-nanoarrow/pull/598#discussion_r1739298235



##########
src/nanoarrow/integration/ipc_integration.cc:
##########
@@ -166,35 +169,62 @@ struct MaterializedArrayStream {
     // Footer).
     File ipc_file;
     NANOARROW_RETURN_NOT_OK(ipc_file.open(path, "rb", error));
-    return FromIpcFile(ipc_file, error);
-  }
+    auto bytes = ipc_file.read();
 
-  ArrowErrorCode FromIpcFile(FILE* ipc_file, struct ArrowError* error) {
-    char prefix[sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC)] = {};
-    if (fread(&prefix, 1, sizeof(prefix), ipc_file) < sizeof(prefix)) {
-      ArrowErrorSet(error, "Expected file of more than %lu bytes, got %ld",
-                    sizeof(prefix), ftell(ipc_file));
+    auto min_size = sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC) + sizeof(int32_t) +
+                    strlen(NANOARROW_IPC_FILE_PADDED_MAGIC);
+    if (bytes.size() < min_size) {
+      ArrowErrorSet(error, "Expected file of more than %lu bytes, got %ld", 
min_size,
+                    bytes.size());
       return EINVAL;
     }
 
-    if (memcmp(&prefix, NANOARROW_IPC_FILE_PADDED_MAGIC, sizeof(prefix)) != 0) 
{
+    if (memcmp(bytes.data(), NANOARROW_IPC_FILE_PADDED_MAGIC,
+               sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC)) != 0) {
       ArrowErrorSet(error, "File did not begin with 'ARROW1\\0\\0'");
       return EINVAL;
     }
 
-    nanoarrow::ipc::UniqueInputStream input_stream;
-    NANOARROW_RETURN_NOT_OK_WITH_ERROR(
-        ArrowIpcInputStreamInitFile(input_stream.get(), ipc_file,
-                                    /*close_on_release=*/false),
-        error);
+    nanoarrow::ipc::UniqueDecoder decoder;
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowIpcDecoderInit(decoder.get()), 
error);
+    NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderVerifyFooter(
+        decoder.get(), {{bytes.data()}, static_cast<int64_t>(bytes.size())}, 
error));
+    NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderDecodeFooter(
+        decoder.get(), {{bytes.data()}, static_cast<int64_t>(bytes.size())}, 
error));

Review Comment:
   I'm following the pattern of schema decoding, where the two are separated. 
Verification checks offsets to ensure all flatbuffers are in bounds, while 
decode actually copies flatbuffers into nanoarrow data structures. I'd be fine 
consolidating them but that should be a follow up
   
   @paleolimbot 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat: add Footer decoding [arrow-nanoarrow]

Reply via email to