This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 79d645877e GH-48062: [C++] Fix null pointer dereference in 
MakeExecBatch (#48063)
79d645877e is described below

commit 79d645877e65fd892d662c40b77cfe788bec6a96
Author: Sirui Mu <[email protected]>
AuthorDate: Thu Nov 27 00:56:23 2025 +0800

    GH-48062: [C++] Fix null pointer dereference in MakeExecBatch (#48063)
    
    ### Rationale for this change
    
    The `arrow::compute::MakeExecBatch` function calls `DataType::id()` on 
`partial.type()` which could be a null pointer when `partial` is a datum 
representing a table. `MakeExecBatch` fails to check for this and thus null 
pointer dereference indeed happen.
    
    ### What changes are included in this PR?
    
    This patch adds a simple check in `MakeExecBatch` before calling 
`DataType::id()` to make sure it won't gets called on a null pointer.
    
    ### Are these changes tested?
    
    Yes. This patch updates the unit test `ExpressionUtils.MakeExecBatch` and 
includes a case that calls `MakeExecBatch` with a table.
    
    ### Are there any user-facing changes?
    
    No.
    
    This PR indeed resolve a crash problem, but I'm not quite sure whether this 
should be classified as a "critical fix".
    
    Resolve #48062 .
    * GitHub Issue: #48062
    
    Authored-by: Sirui Mu <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/compute/expression.cc      | 3 ++-
 cpp/src/arrow/compute/expression_test.cc | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/compute/expression.cc 
b/cpp/src/arrow/compute/expression.cc
index 3c2ec10040..2563674a59 100644
--- a/cpp/src/arrow/compute/expression.cc
+++ b/cpp/src/arrow/compute/expression.cc
@@ -687,7 +687,8 @@ Result<ExecBatch> MakeExecBatch(const Schema& full_schema, 
const Datum& partial,
   }
 
   // wasteful but useful for testing:
-  if (partial.type()->id() == Type::STRUCT) {
+  const auto& partial_type = partial.type();
+  if (partial_type && partial_type->id() == Type::STRUCT) {
     if (partial.is_array()) {
       ARROW_ASSIGN_OR_RAISE(auto partial_batch,
                             
RecordBatch::FromStructArray(partial.make_array()));
diff --git a/cpp/src/arrow/compute/expression_test.cc 
b/cpp/src/arrow/compute/expression_test.cc
index bbab57feeb..5e1f3c093e 100644
--- a/cpp/src/arrow/compute/expression_test.cc
+++ b/cpp/src/arrow/compute/expression_test.cc
@@ -224,6 +224,9 @@ TEST(ExpressionUtils, MakeExecBatch) {
   auto duplicated_names =
       RecordBatch::Make(schema({GetField("i32"), GetField("i32")}), kNumRows, 
{i32, i32});
   ASSERT_RAISES(Invalid, MakeExecBatch(*kBoringSchema, duplicated_names));
+
+  ASSERT_OK_AND_ASSIGN(auto boring_table, Table::MakeEmpty(kBoringSchema));
+  ASSERT_RAISES(NotImplemented, MakeExecBatch(*kBoringSchema, boring_table));
 }
 
 class WidgetifyOptions : public compute::FunctionOptions {

Reply via email to