This is an automated email from the ASF dual-hosted git repository.
apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 79d645877e GH-48062: [C++] Fix null pointer dereference in
MakeExecBatch (#48063)
79d645877e is described below
commit 79d645877e65fd892d662c40b77cfe788bec6a96
Author: Sirui Mu <[email protected]>
AuthorDate: Thu Nov 27 00:56:23 2025 +0800
GH-48062: [C++] Fix null pointer dereference in MakeExecBatch (#48063)
### Rationale for this change
The `arrow::compute::MakeExecBatch` function calls `DataType::id()` on
`partial.type()` which could be a null pointer when `partial` is a datum
representing a table. `MakeExecBatch` fails to check for this and thus null
pointer dereference indeed happen.
### What changes are included in this PR?
This patch adds a simple check in `MakeExecBatch` before calling
`DataType::id()` to make sure it won't gets called on a null pointer.
### Are these changes tested?
Yes. This patch updates the unit test `ExpressionUtils.MakeExecBatch` and
includes a case that calls `MakeExecBatch` with a table.
### Are there any user-facing changes?
No.
This PR indeed resolve a crash problem, but I'm not quite sure whether this
should be classified as a "critical fix".
Resolve #48062 .
* GitHub Issue: #48062
Authored-by: Sirui Mu <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/src/arrow/compute/expression.cc | 3 ++-
cpp/src/arrow/compute/expression_test.cc | 3 +++
2 files changed, 5 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/compute/expression.cc
b/cpp/src/arrow/compute/expression.cc
index 3c2ec10040..2563674a59 100644
--- a/cpp/src/arrow/compute/expression.cc
+++ b/cpp/src/arrow/compute/expression.cc
@@ -687,7 +687,8 @@ Result<ExecBatch> MakeExecBatch(const Schema& full_schema,
const Datum& partial,
}
// wasteful but useful for testing:
- if (partial.type()->id() == Type::STRUCT) {
+ const auto& partial_type = partial.type();
+ if (partial_type && partial_type->id() == Type::STRUCT) {
if (partial.is_array()) {
ARROW_ASSIGN_OR_RAISE(auto partial_batch,
RecordBatch::FromStructArray(partial.make_array()));
diff --git a/cpp/src/arrow/compute/expression_test.cc
b/cpp/src/arrow/compute/expression_test.cc
index bbab57feeb..5e1f3c093e 100644
--- a/cpp/src/arrow/compute/expression_test.cc
+++ b/cpp/src/arrow/compute/expression_test.cc
@@ -224,6 +224,9 @@ TEST(ExpressionUtils, MakeExecBatch) {
auto duplicated_names =
RecordBatch::Make(schema({GetField("i32"), GetField("i32")}), kNumRows,
{i32, i32});
ASSERT_RAISES(Invalid, MakeExecBatch(*kBoringSchema, duplicated_names));
+
+ ASSERT_OK_AND_ASSIGN(auto boring_table, Table::MakeEmpty(kBoringSchema));
+ ASSERT_RAISES(NotImplemented, MakeExecBatch(*kBoringSchema, boring_table));
}
class WidgetifyOptions : public compute::FunctionOptions {