This is an automated email from the ASF dual-hosted git repository.

paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 03a0e69  fix(extensions/nanoarrow_ipc): Check number of bytes read 
when reading buffer body (#295)
03a0e69 is described below

commit 03a0e691288ea518534059dd21c99f8ec5ffe9a2
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Sep 6 11:16:50 2023 -0300

    fix(extensions/nanoarrow_ipc): Check number of bytes read when reading 
buffer body (#295)
    
    This PR adds a check in the `ArrowArrayStream` implementation when
    reading the message body. The decoder checks for buffer overrun when
    accessing specific buffers; however because message sizes are rounded up
    to a multiple of 8 bytes, removing parts of a record batch message body
    could lead to corrupted data being returned (even though no
    out-of-bounds memory accesses occurred). This PR adds that check.
---
 .../src/nanoarrow/nanoarrow_ipc_reader.c           |  9 ++++-
 .../src/nanoarrow/nanoarrow_ipc_reader_test.cc     | 44 ++++++++++++++++++++++
 2 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader.c 
b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader.c
index 812cb8c..275c016 100644
--- a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader.c
+++ b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader.c
@@ -270,7 +270,14 @@ static int ArrowIpcArrayStreamReaderNextBody(
                                                    &bytes_read, 
&private_data->error));
   private_data->body.size_bytes += bytes_read;
 
-  return NANOARROW_OK;
+  if (bytes_read != bytes_to_read) {
+    ArrowErrorSet(&private_data->error,
+                  "Expected to be able to read %ld bytes for message body but 
got %ld",
+                  (long)bytes_to_read, bytes_read);
+    return ESPIPE;
+  } else {
+    return NANOARROW_OK;
+  }
 }
 
 static int ArrowIpcArrayStreamReaderReadSchemaIfNeeded(
diff --git 
a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader_test.cc 
b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader_test.cc
index 9ad918d..cfdf382 100644
--- a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader_test.cc
+++ b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader_test.cc
@@ -244,6 +244,50 @@ TEST(NanoarrowIpcReader, StreamReaderBasicWithEndOfStream) 
{
   stream.release(&stream);
 }
 
+TEST(NanoarrowIpcReader, StreamReaderIncompleteMessageHeader) {
+  struct ArrowBuffer input_buffer;
+  ArrowBufferInit(&input_buffer);
+  ASSERT_EQ(ArrowBufferAppend(&input_buffer, kSimpleSchema, 
sizeof(kSimpleSchema) - 1),
+            NANOARROW_OK);
+
+  struct ArrowIpcInputStream input;
+  ASSERT_EQ(ArrowIpcInputStreamInitBuffer(&input, &input_buffer), 
NANOARROW_OK);
+
+  struct ArrowArrayStream stream;
+  ASSERT_EQ(ArrowIpcArrayStreamReaderInit(&stream, &input, nullptr), 
NANOARROW_OK);
+
+  struct ArrowSchema schema;
+  ASSERT_EQ(stream.get_schema(&stream, &schema), ESPIPE);
+  EXPECT_STREQ(stream.get_last_error(&stream),
+               "Expected >= 280 bytes of remaining data but found 279 bytes in 
buffer");
+
+  stream.release(&stream);
+}
+
+TEST(NanoarrowIpcReader, StreamReaderIncompleteMessageBody) {
+  struct ArrowBuffer input_buffer;
+  ArrowBufferInit(&input_buffer);
+  ASSERT_EQ(ArrowBufferAppend(&input_buffer, kSimpleSchema, 
sizeof(kSimpleSchema)),
+            NANOARROW_OK);
+  // Truncate the record batch at the very end of the body
+  ASSERT_EQ(ArrowBufferAppend(&input_buffer, kSimpleRecordBatch,
+                              sizeof(kSimpleRecordBatch) - 1),
+            NANOARROW_OK);
+
+  struct ArrowIpcInputStream input;
+  ASSERT_EQ(ArrowIpcInputStreamInitBuffer(&input, &input_buffer), 
NANOARROW_OK);
+
+  struct ArrowArrayStream stream;
+  ASSERT_EQ(ArrowIpcArrayStreamReaderInit(&stream, &input, nullptr), 
NANOARROW_OK);
+
+  struct ArrowArray array;
+  ASSERT_EQ(stream.get_next(&stream, &array), ESPIPE);
+  EXPECT_STREQ(stream.get_last_error(&stream),
+               "Expected to be able to read 16 bytes for message body but got 
15");
+
+  stream.release(&stream);
+}
+
 TEST(NanoarrowIpcReader, StreamReaderExpectedRecordBatch) {
   struct ArrowBuffer input_buffer;
   ArrowBufferInit(&input_buffer);

Reply via email to