This is an automated email from the ASF dual-hosted git repository.
paleolimbot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-nanoarrow.git
The following commit(s) were added to refs/heads/main by this push:
new 03a0e69 fix(extensions/nanoarrow_ipc): Check number of bytes read
when reading buffer body (#295)
03a0e69 is described below
commit 03a0e691288ea518534059dd21c99f8ec5ffe9a2
Author: Dewey Dunnington <[email protected]>
AuthorDate: Wed Sep 6 11:16:50 2023 -0300
fix(extensions/nanoarrow_ipc): Check number of bytes read when reading
buffer body (#295)
This PR adds a check in the `ArrowArrayStream` implementation when
reading the message body. The decoder checks for buffer overrun when
accessing specific buffers; however because message sizes are rounded up
to a multiple of 8 bytes, removing parts of a record batch message body
could lead to corrupted data being returned (even though no
out-of-bounds memory accesses occurred). This PR adds that check.
---
.../src/nanoarrow/nanoarrow_ipc_reader.c | 9 ++++-
.../src/nanoarrow/nanoarrow_ipc_reader_test.cc | 44 ++++++++++++++++++++++
2 files changed, 52 insertions(+), 1 deletion(-)
diff --git a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader.c
b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader.c
index 812cb8c..275c016 100644
--- a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader.c
+++ b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader.c
@@ -270,7 +270,14 @@ static int ArrowIpcArrayStreamReaderNextBody(
&bytes_read,
&private_data->error));
private_data->body.size_bytes += bytes_read;
- return NANOARROW_OK;
+ if (bytes_read != bytes_to_read) {
+ ArrowErrorSet(&private_data->error,
+ "Expected to be able to read %ld bytes for message body but
got %ld",
+ (long)bytes_to_read, bytes_read);
+ return ESPIPE;
+ } else {
+ return NANOARROW_OK;
+ }
}
static int ArrowIpcArrayStreamReaderReadSchemaIfNeeded(
diff --git
a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader_test.cc
b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader_test.cc
index 9ad918d..cfdf382 100644
--- a/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader_test.cc
+++ b/extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader_test.cc
@@ -244,6 +244,50 @@ TEST(NanoarrowIpcReader, StreamReaderBasicWithEndOfStream)
{
stream.release(&stream);
}
+TEST(NanoarrowIpcReader, StreamReaderIncompleteMessageHeader) {
+ struct ArrowBuffer input_buffer;
+ ArrowBufferInit(&input_buffer);
+ ASSERT_EQ(ArrowBufferAppend(&input_buffer, kSimpleSchema,
sizeof(kSimpleSchema) - 1),
+ NANOARROW_OK);
+
+ struct ArrowIpcInputStream input;
+ ASSERT_EQ(ArrowIpcInputStreamInitBuffer(&input, &input_buffer),
NANOARROW_OK);
+
+ struct ArrowArrayStream stream;
+ ASSERT_EQ(ArrowIpcArrayStreamReaderInit(&stream, &input, nullptr),
NANOARROW_OK);
+
+ struct ArrowSchema schema;
+ ASSERT_EQ(stream.get_schema(&stream, &schema), ESPIPE);
+ EXPECT_STREQ(stream.get_last_error(&stream),
+ "Expected >= 280 bytes of remaining data but found 279 bytes in
buffer");
+
+ stream.release(&stream);
+}
+
+TEST(NanoarrowIpcReader, StreamReaderIncompleteMessageBody) {
+ struct ArrowBuffer input_buffer;
+ ArrowBufferInit(&input_buffer);
+ ASSERT_EQ(ArrowBufferAppend(&input_buffer, kSimpleSchema,
sizeof(kSimpleSchema)),
+ NANOARROW_OK);
+ // Truncate the record batch at the very end of the body
+ ASSERT_EQ(ArrowBufferAppend(&input_buffer, kSimpleRecordBatch,
+ sizeof(kSimpleRecordBatch) - 1),
+ NANOARROW_OK);
+
+ struct ArrowIpcInputStream input;
+ ASSERT_EQ(ArrowIpcInputStreamInitBuffer(&input, &input_buffer),
NANOARROW_OK);
+
+ struct ArrowArrayStream stream;
+ ASSERT_EQ(ArrowIpcArrayStreamReaderInit(&stream, &input, nullptr),
NANOARROW_OK);
+
+ struct ArrowArray array;
+ ASSERT_EQ(stream.get_next(&stream, &array), ESPIPE);
+ EXPECT_STREQ(stream.get_last_error(&stream),
+ "Expected to be able to read 16 bytes for message body but got
15");
+
+ stream.release(&stream);
+}
+
TEST(NanoarrowIpcReader, StreamReaderExpectedRecordBatch) {
struct ArrowBuffer input_buffer;
ArrowBufferInit(&input_buffer);