This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 4ba70c502f GH-49229: [C++] Fix abort when reading IPC file with a 
union validity bitmap and pre-buffering enabled (#49230)
4ba70c502f is described below

commit 4ba70c502f76f07eff9b3aaa5897aa50ffcbf006
Author: Antoine Pitrou <[email protected]>
AuthorDate: Mon Feb 16 11:11:18 2026 +0100

    GH-49229: [C++] Fix abort when reading IPC file with a union validity 
bitmap and pre-buffering enabled (#49230)
    
    ### Rationale for this change
    
    The logic for loading a Union array from a IPC file was inquiring whether a 
validity bitmap is present in a V4 metadata file (i.e. `buffers[0] != 
nullptr`). However, in the pre-buffering case, the buffers haven't been 
populated yet at the point, so the check would be ignored and the IPC file 
reader could happily create a Union array with a top validity bitmap. This 
could crash later in `UnionArray::SetData`.
    
    Found by OSS-Fuzz in https://issues.oss-fuzz.com/issues/482161154
    
    ### Are these changes tested?
    
    By integration test and fuzz regression file. There are no unit tests in 
the C++ test suite that exercise V4 metadata IPC files with top-level union 
validity bitmaps.
    
    ### Are there any user-facing changes?
    
    No.
    
    **This PR contains a "Critical Fix".** This fixes a controlled crash when 
reading a pre-V5 IPC file with a top-level union validity bitmap and 
pre-buffering enabled. Instead a regular error will be returned. There are no 
known security implications.
    
    * GitHub Issue: #49229
    
    Authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/ipc/reader.cc | 28 ++++++++++++++++------------
 testing                     |  2 +-
 2 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/cpp/src/arrow/ipc/reader.cc b/cpp/src/arrow/ipc/reader.cc
index 046eacb6ce..a47a629072 100644
--- a/cpp/src/arrow/ipc/reader.cc
+++ b/cpp/src/arrow/ipc/reader.cc
@@ -296,7 +296,7 @@ class ArrayLoader {
     return Status::OK();
   }
 
-  Status LoadCommon(Type::type type_id) {
+  Status LoadCommon(Type::type type_id, bool allow_validity_bitmap = true) {
     DCHECK_NE(out_, nullptr);
     // This only contains the length and null count, which we need to figure
     // out what to do with the buffers. For example, if null_count == 0, then
@@ -314,10 +314,16 @@ class ArrayLoader {
     }
 
     if (internal::HasValidityBitmap(type_id, metadata_version_)) {
-      // Extract null_bitmap which is common to all arrays except for unions
+      // Extract null bitmap which is common to all arrays except for unions
       // and nulls.
       if (out_->null_count != 0) {
-        RETURN_NOT_OK(GetBuffer(buffer_index_, &out_->buffers[0]));
+        if (allow_validity_bitmap) {
+          RETURN_NOT_OK(GetBuffer(buffer_index_, &out_->buffers[0]));
+        } else {
+          // Caller did not allow this
+          return Status::Invalid("Cannot read ", 
::arrow::internal::ToTypeName(type_id),
+                                 " array with top-level validity bitmap");
+        }
       }
       buffer_index_++;
     }
@@ -471,9 +477,10 @@ class ArrayLoader {
     int n_buffers = type.mode() == UnionMode::SPARSE ? 2 : 3;
     out_->buffers.resize(n_buffers);
 
-    RETURN_NOT_OK(LoadCommon(type.id()));
-
-    // With metadata V4, we can get a validity bitmap.
+    // With metadata V4, we can get a validity bitmap. The bitmap may be there
+    // if we're loading eagerly, or it might be scheduled for loading if we're
+    // using a BatchDataReadRequest.
+    //
     // Trying to fix up union data to do without the top-level validity bitmap
     // is hairy:
     // - type ids must be rewritten to all have valid values (even for former
@@ -482,12 +489,9 @@ class ArrayLoader {
     //   by ANDing the top-level validity bitmap
     // - dense union children must be rewritten (at least one of them)
     //   to insert the required null slots that were formerly omitted
-    // So instead we bail out.
-    if (out_->null_count != 0 && out_->buffers[0] != nullptr) {
-      return Status::Invalid(
-          "Cannot read pre-1.0.0 Union array with top-level validity bitmap");
-    }
-    out_->buffers[0] = nullptr;
+    //
+    // So instead we disallow validity bitmaps.
+    RETURN_NOT_OK(LoadCommon(type.id(), /*allow_validity_bitmap=*/false));
     out_->null_count = 0;
 
     if (out_->length > 0) {
diff --git a/testing b/testing
index df428ddaa2..ca49b7795c 160000
--- a/testing
+++ b/testing
@@ -1 +1 @@
-Subproject commit df428ddaa22d94dfb525af4c0951f3dafb463795
+Subproject commit ca49b7795c09181c2915b0a5e762a8fac70f9556

Reply via email to