This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new fed23f388b GH-48844: [C++] Check IPC Message body length consistency
in IPC file (#48845)
fed23f388b is described below
commit fed23f388bc1ef0e574e9a818d64d10c38193e97
Author: Antoine Pitrou <[email protected]>
AuthorDate: Tue Jan 13 20:36:07 2026 +0100
GH-48844: [C++] Check IPC Message body length consistency in IPC file
(#48845)
### Rationale for this change
The IPC file exposes [redundant
information](https://github.com/apache/arrow/blob/d54a2051cf9020a0fdf50836420c38ad14787abb/format/File.fbs#L39-L50)
about Message sizes so as to allow for random access from the file footer.
We tried adding [consistency
checks](https://github.com/apache/arrow/issues/19596) in the past but this hit
a bug in the JavaScript IPC writer at the time, so the checks were left
disabled.
The JavaScript implementation was fixed soon after (7 years ago), so this
PR re-enables those checks so as to more easily detect potentially invalid IPC
files.
### Are these changes tested?
By existing tests.
### Are there any user-facing changes?
No, unless they try reading invalid IPC files.
* GitHub Issue: #48844
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/ipc/message.cc | 2 ++
cpp/src/arrow/ipc/reader.cc | 33 +++++++++++++++++++--------------
2 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/cpp/src/arrow/ipc/message.cc b/cpp/src/arrow/ipc/message.cc
index 7919878f14..8be09956f1 100644
--- a/cpp/src/arrow/ipc/message.cc
+++ b/cpp/src/arrow/ipc/message.cc
@@ -375,6 +375,8 @@ Result<std::unique_ptr<Message>> ReadMessage(int64_t
offset, int32_t metadata_le
decoder.next_required_size());
}
+ // TODO(GH-48846): we should take a body_length just like ReadMessageAsync
+ // and read metadata + body in one go.
ARROW_ASSIGN_OR_RAISE(auto metadata, file->ReadAt(offset, metadata_length));
if (metadata->size() < metadata_length) {
return Status::Invalid("Expected to read ", metadata_length,
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 4910b1596c..6a20dbb8c8 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -1180,31 +1180,36 @@ Status CheckAligned(const FileBlock& block) {
return Status::OK();
}
+template <typename MessagePtr>
+Result<MessagePtr> CheckBodyLength(MessagePtr message, const FileBlock& block)
{
+ if (message->body_length() != block.body_length) {
+ return Status::Invalid(
+ "Mismatching body length for IPC message "
+ "(Block.bodyLength: ",
+ block.body_length, " vs. Message.bodyLength: ",
message->body_length(), ")");
+ }
+ // NOTE: we cannot check metadata length as easily as we would have to
account
+ // for the additional IPC signalisation (such as optional continuation
bytes).
+ return message;
+}
+
Result<std::unique_ptr<Message>> ReadMessageFromBlock(
const FileBlock& block, io::RandomAccessFile* file,
const FieldsLoaderFunction& fields_loader) {
RETURN_NOT_OK(CheckAligned(block));
- // TODO(wesm): this breaks integration tests, see ARROW-3256
- // DCHECK_EQ((*out)->body_length(), block.body_length);
-
ARROW_ASSIGN_OR_RAISE(auto message, ReadMessage(block.offset,
block.metadata_length,
file, fields_loader));
- return message;
+ return CheckBodyLength(std::move(message), block);
}
Future<std::shared_ptr<Message>> ReadMessageFromBlockAsync(
const FileBlock& block, io::RandomAccessFile* file, const io::IOContext&
io_context) {
- if (!bit_util::IsMultipleOf8(block.offset) ||
- !bit_util::IsMultipleOf8(block.metadata_length) ||
- !bit_util::IsMultipleOf8(block.body_length)) {
- return Status::Invalid("Unaligned block in IPC file");
- }
-
- // TODO(wesm): this breaks integration tests, see ARROW-3256
- // DCHECK_EQ((*out)->body_length(), block.body_length);
-
+ RETURN_NOT_OK(CheckAligned(block));
return ReadMessageAsync(block.offset, block.metadata_length,
block.body_length, file,
- io_context);
+ io_context)
+ .Then([block](std::shared_ptr<Message> message) {
+ return CheckBodyLength(std::move(message), block);
+ });
}
class RecordBatchFileReaderImpl;