zeroshade commented on code in PR #598:
URL: https://github.com/apache/arrow-nanoarrow/pull/598#discussion_r1739011979


##########
src/nanoarrow/integration/ipc_integration.cc:
##########
@@ -166,35 +169,62 @@ struct MaterializedArrayStream {
     // Footer).
     File ipc_file;
     NANOARROW_RETURN_NOT_OK(ipc_file.open(path, "rb", error));
-    return FromIpcFile(ipc_file, error);
-  }
+    auto bytes = ipc_file.read();
 
-  ArrowErrorCode FromIpcFile(FILE* ipc_file, struct ArrowError* error) {
-    char prefix[sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC)] = {};
-    if (fread(&prefix, 1, sizeof(prefix), ipc_file) < sizeof(prefix)) {
-      ArrowErrorSet(error, "Expected file of more than %lu bytes, got %ld",
-                    sizeof(prefix), ftell(ipc_file));
+    auto min_size = sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC) + sizeof(int32_t) +
+                    strlen(NANOARROW_IPC_FILE_PADDED_MAGIC);
+    if (bytes.size() < min_size) {
+      ArrowErrorSet(error, "Expected file of more than %lu bytes, got %ld", 
min_size,
+                    bytes.size());
       return EINVAL;
     }
 
-    if (memcmp(&prefix, NANOARROW_IPC_FILE_PADDED_MAGIC, sizeof(prefix)) != 0) 
{
+    if (memcmp(bytes.data(), NANOARROW_IPC_FILE_PADDED_MAGIC,
+               sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC)) != 0) {
       ArrowErrorSet(error, "File did not begin with 'ARROW1\\0\\0'");
       return EINVAL;
     }
 
-    nanoarrow::ipc::UniqueInputStream input_stream;
-    NANOARROW_RETURN_NOT_OK_WITH_ERROR(
-        ArrowIpcInputStreamInitFile(input_stream.get(), ipc_file,
-                                    /*close_on_release=*/false),
-        error);
+    nanoarrow::ipc::UniqueDecoder decoder;
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowIpcDecoderInit(decoder.get()), 
error);
+    NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderVerifyFooter(
+        decoder.get(), {{bytes.data()}, static_cast<int64_t>(bytes.size())}, 
error));
+    NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderDecodeFooter(
+        decoder.get(), {{bytes.data()}, static_cast<int64_t>(bytes.size())}, 
error));
 
-    nanoarrow::UniqueArrayStream array_stream;
     NANOARROW_RETURN_NOT_OK_WITH_ERROR(
-        ArrowIpcArrayStreamReaderInit(array_stream.get(), input_stream.get(),
-                                      /*options=*/nullptr),
-        error);
+        ArrowSchemaDeepCopy(&decoder->footer->schema, schema.get()), error);
+    NANOARROW_RETURN_NOT_OK(
+        ArrowIpcDecoderSetSchema(decoder.get(), &decoder->footer->schema, 
error));
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(
+        ArrowIpcDecoderSetEndianness(decoder.get(), decoder->endianness), 
error);

Review Comment:
   Isn't endianness a flag in the schema?



##########
src/nanoarrow/integration/ipc_integration.cc:
##########
@@ -166,35 +169,62 @@ struct MaterializedArrayStream {
     // Footer).
     File ipc_file;
     NANOARROW_RETURN_NOT_OK(ipc_file.open(path, "rb", error));
-    return FromIpcFile(ipc_file, error);
-  }
+    auto bytes = ipc_file.read();
 
-  ArrowErrorCode FromIpcFile(FILE* ipc_file, struct ArrowError* error) {
-    char prefix[sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC)] = {};
-    if (fread(&prefix, 1, sizeof(prefix), ipc_file) < sizeof(prefix)) {
-      ArrowErrorSet(error, "Expected file of more than %lu bytes, got %ld",
-                    sizeof(prefix), ftell(ipc_file));
+    auto min_size = sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC) + sizeof(int32_t) +
+                    strlen(NANOARROW_IPC_FILE_PADDED_MAGIC);
+    if (bytes.size() < min_size) {
+      ArrowErrorSet(error, "Expected file of more than %lu bytes, got %ld", 
min_size,
+                    bytes.size());
       return EINVAL;
     }
 
-    if (memcmp(&prefix, NANOARROW_IPC_FILE_PADDED_MAGIC, sizeof(prefix)) != 0) 
{
+    if (memcmp(bytes.data(), NANOARROW_IPC_FILE_PADDED_MAGIC,
+               sizeof(NANOARROW_IPC_FILE_PADDED_MAGIC)) != 0) {
       ArrowErrorSet(error, "File did not begin with 'ARROW1\\0\\0'");
       return EINVAL;
     }
 
-    nanoarrow::ipc::UniqueInputStream input_stream;
-    NANOARROW_RETURN_NOT_OK_WITH_ERROR(
-        ArrowIpcInputStreamInitFile(input_stream.get(), ipc_file,
-                                    /*close_on_release=*/false),
-        error);
+    nanoarrow::ipc::UniqueDecoder decoder;
+    NANOARROW_RETURN_NOT_OK_WITH_ERROR(ArrowIpcDecoderInit(decoder.get()), 
error);
+    NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderVerifyFooter(
+        decoder.get(), {{bytes.data()}, static_cast<int64_t>(bytes.size())}, 
error));
+    NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderDecodeFooter(
+        decoder.get(), {{bytes.data()}, static_cast<int64_t>(bytes.size())}, 
error));

Review Comment:
   Should verify and decode be the same function rather than separate functions?



##########
src/nanoarrow/ipc/decoder.c:
##########
@@ -1053,6 +1062,85 @@ ArrowErrorCode ArrowIpcDecoderVerifyHeader(struct 
ArrowIpcDecoder* decoder,
   return NANOARROW_OK;
 }
 
+ArrowErrorCode ArrowIpcDecoderPeekFooter(struct ArrowIpcDecoder* decoder,
+                                         struct ArrowBufferView data,
+                                         struct ArrowError* error) {
+  struct ArrowIpcDecoderPrivate* private_data =
+      (struct ArrowIpcDecoderPrivate*)decoder->private_data;
+
+  ArrowIpcDecoderResetHeaderInfo(decoder);
+  if (data.size_bytes < (int)strlen(NANOARROW_IPC_MAGIC) + 
(int)sizeof(int32_t)) {
+    ArrowErrorSet(error,
+                  "Expected data of at least 10 bytes but only %" PRId64
+                  " bytes are available",
+                  data.size_bytes);
+    return ESPIPE;
+  }
+
+  const char* data_end = data.data.as_char + data.size_bytes;
+  const char* magic = data_end - strlen(NANOARROW_IPC_MAGIC);
+  const char* footer_size_data = magic - sizeof(int32_t);
+
+  if (memcmp(magic, NANOARROW_IPC_MAGIC, strlen(NANOARROW_IPC_MAGIC)) != 0) {
+    ArrowErrorSet(error, "Expected file to end with ARROW1 but got %s", 
data_end);
+    return EINVAL;
+  }
+
+  int32_t footer_size;
+  memcpy(&footer_size, footer_size_data, sizeof(footer_size));
+  if (private_data->system_endianness == NANOARROW_IPC_ENDIANNESS_BIG) {
+    footer_size = bswap32(footer_size);
+  }
+
+  if (footer_size < 0) {
+    ArrowErrorSet(error, "Expected footer size > 0 but found footer size of %d 
bytes",
+                  footer_size);
+    return EINVAL;
+  }
+
+  decoder->header_size_bytes = footer_size;
+  return NANOARROW_OK;
+}
+
+ArrowErrorCode ArrowIpcDecoderVerifyFooter(struct ArrowIpcDecoder* decoder,
+                                           struct ArrowBufferView data,
+                                           struct ArrowError* error) {
+  NANOARROW_RETURN_NOT_OK(ArrowIpcDecoderPeekFooter(decoder, data, error));
+
+  // Check that data contains at least the entire footer (return ESPIPE to 
signal
+  // that reading more data may help).
+  int32_t footer_and_size_and_magic_size =
+      decoder->header_size_bytes + sizeof(int32_t) + 
strlen(NANOARROW_IPC_MAGIC);
+  if (data.size_bytes < footer_and_size_and_magic_size) {
+    ArrowErrorSet(error,
+                  "Expected >= %d bytes of data but only %" PRId64
+                  " bytes are in the buffer",
+                  footer_and_size_and_magic_size, data.size_bytes);
+    return ESPIPE;
+  }
+
+  const uint8_t* footer_data =
+      data.data.as_uint8 + data.size_bytes - footer_and_size_and_magic_size;
+
+  // Run flatbuffers verification
+  if (ns(Footer_verify_as_root(footer_data, decoder->header_size_bytes) !=

Review Comment:
   What's `ns`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to