This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 4ba70c502f GH-49229: [C++] Fix abort when reading IPC file with a
union validity bitmap and pre-buffering enabled (#49230)
4ba70c502f is described below
commit 4ba70c502f76f07eff9b3aaa5897aa50ffcbf006
Author: Antoine Pitrou <[email protected]>
AuthorDate: Mon Feb 16 11:11:18 2026 +0100
GH-49229: [C++] Fix abort when reading IPC file with a union validity
bitmap and pre-buffering enabled (#49230)
### Rationale for this change
The logic for loading a Union array from a IPC file was inquiring whether a
validity bitmap is present in a V4 metadata file (i.e. `buffers[0] !=
nullptr`). However, in the pre-buffering case, the buffers haven't been
populated yet at the point, so the check would be ignored and the IPC file
reader could happily create a Union array with a top validity bitmap. This
could crash later in `UnionArray::SetData`.
Found by OSS-Fuzz in https://issues.oss-fuzz.com/issues/482161154
### Are these changes tested?
By integration test and fuzz regression file. There are no unit tests in
the C++ test suite that exercise V4 metadata IPC files with top-level union
validity bitmaps.
### Are there any user-facing changes?
No.
**This PR contains a "Critical Fix".** This fixes a controlled crash when
reading a pre-V5 IPC file with a top-level union validity bitmap and
pre-buffering enabled. Instead a regular error will be returned. There are no
known security implications.
* GitHub Issue: #49229
Authored-by: Antoine Pitrou <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/ipc/reader.cc | 28 ++++++++++++++++------------
testing | 2 +-
2 files changed, 17 insertions(+), 13 deletions(-)
diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 046eacb6ce..a47a629072 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -296,7 +296,7 @@ class ArrayLoader {
return Status::OK();
}
- Status LoadCommon(Type::type type_id) {
+ Status LoadCommon(Type::type type_id, bool allow_validity_bitmap = true) {
DCHECK_NE(out_, nullptr);
// This only contains the length and null count, which we need to figure
// out what to do with the buffers. For example, if null_count == 0, then
@@ -314,10 +314,16 @@ class ArrayLoader {
}
if (internal::HasValidityBitmap(type_id, metadata_version_)) {
- // Extract null_bitmap which is common to all arrays except for unions
+ // Extract null bitmap which is common to all arrays except for unions
// and nulls.
if (out_->null_count != 0) {
- RETURN_NOT_OK(GetBuffer(buffer_index_, &out_->buffers[0]));
+ if (allow_validity_bitmap) {
+ RETURN_NOT_OK(GetBuffer(buffer_index_, &out_->buffers[0]));
+ } else {
+ // Caller did not allow this
+ return Status::Invalid("Cannot read ",
::arrow::internal::ToTypeName(type_id),
+ " array with top-level validity bitmap");
+ }
}
buffer_index_++;
}
@@ -471,9 +477,10 @@ class ArrayLoader {
int n_buffers = type.mode() == UnionMode::SPARSE ? 2 : 3;
out_->buffers.resize(n_buffers);
- RETURN_NOT_OK(LoadCommon(type.id()));
-
- // With metadata V4, we can get a validity bitmap.
+ // With metadata V4, we can get a validity bitmap. The bitmap may be there
+ // if we're loading eagerly, or it might be scheduled for loading if we're
+ // using a BatchDataReadRequest.
+ //
// Trying to fix up union data to do without the top-level validity bitmap
// is hairy:
// - type ids must be rewritten to all have valid values (even for former
@@ -482,12 +489,9 @@ class ArrayLoader {
// by ANDing the top-level validity bitmap
// - dense union children must be rewritten (at least one of them)
// to insert the required null slots that were formerly omitted
- // So instead we bail out.
- if (out_->null_count != 0 && out_->buffers[0] != nullptr) {
- return Status::Invalid(
- "Cannot read pre-1.0.0 Union array with top-level validity bitmap");
- }
- out_->buffers[0] = nullptr;
+ //
+ // So instead we disallow validity bitmaps.
+ RETURN_NOT_OK(LoadCommon(type.id(), /*allow_validity_bitmap=*/false));
out_->null_count = 0;
if (out_->length > 0) {
diff --git a/testing b/testing
index df428ddaa2..ca49b7795c 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit df428ddaa22d94dfb525af4c0951f3dafb463795
+Subproject commit ca49b7795c09181c2915b0a5e762a8fac70f9556