lidavidm commented on code in PR #289:
URL: https://github.com/apache/arrow-nanoarrow/pull/289#discussion_r1305996098
##########
extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c:
##########
@@ -930,7 +930,8 @@ static inline void ArrowIpcDecoderResetHeaderInfo(struct
ArrowIpcDecoder* decode
}
// Returns NANOARROW_OK if data is large enough to read the message header,
Review Comment:
I guess this docstring is slightly misleading then because it may return 0
even if the buffer technically does not contain the entire header; it just has
to contain the continuation token and size?
##########
extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_decoder.c:
##########
@@ -1025,6 +1029,16 @@ ArrowErrorCode ArrowIpcDecoderDecodeHeader(struct
ArrowIpcDecoder* decoder,
NANOARROW_RETURN_NOT_OK(
ArrowIpcDecoderCheckHeader(decoder, &data, &decoder->header_size_bytes,
error));
+ // Check that data contains at least the entire header (return ESPIPE to
signal
+ // that reading more data may help).
+ int64_t message_body_size = decoder->header_size_bytes - 8;
Review Comment:
nit: above we use `2 * sizeof(int32_t)` but here we hardcode 8; maybe just
pull it out into a constant?
##########
extensions/nanoarrow_ipc/src/nanoarrow/nanoarrow_ipc_reader_test.cc:
##########
@@ -291,6 +291,62 @@ TEST(NanoarrowIpcReader, StreamReaderExpectedSchema) {
stream.release(&stream);
}
+TEST(NanoarrowIpcTest, StreamReaderInvalidBuffer) {
+ struct ArrowBuffer input_buffer;
+ struct ArrowIpcInputStream input;
+ struct ArrowArrayStream stream;
+ struct ArrowSchema schema;
+ struct ArrowArray array;
+
+ uint8_t simple_stream_invalid[sizeof(kSimpleSchema) +
sizeof(kSimpleRecordBatch)];
+ struct ArrowBufferView data;
+ data.data.as_uint8 = simple_stream_invalid;
+
+ // Create invalid data by removing bytes one at a time and ensuring an error
code and
+ // a null-terminated error. After byte 273/280 this passes because the bytes
are just
+ // padding.
+ data.size_bytes = sizeof(kSimpleSchema);
+ for (int64_t i = 1; i < 273; i++) {
+ memcpy(simple_stream_invalid, kSimpleSchema, i);
+ memcpy(simple_stream_invalid + i, kSimpleSchema + (i + 1),
+ (sizeof(kSimpleSchema) - i));
+
+ ArrowBufferInit(&input_buffer);
+ ASSERT_EQ(ArrowBufferAppendBufferView(&input_buffer, data), NANOARROW_OK);
+ ASSERT_EQ(ArrowIpcInputStreamInitBuffer(&input, &input_buffer),
NANOARROW_OK);
+ ASSERT_EQ(ArrowIpcArrayStreamReaderInit(&stream, &input, nullptr),
NANOARROW_OK);
+
+ ASSERT_NE(stream.get_schema(&stream, &schema), NANOARROW_OK)
+ << "After removing Schema message byte " << i;
Review Comment:
FWIW, you can `SCOPED_TRACE` inside the loop and have it apply to all the
assertions (though building up the message is a little annoying)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]