This is an automated email from the ASF dual-hosted git repository.
rui-mo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git
The following commit(s) were added to refs/heads/main by this push:
new e2300d86ba [VL] Throws user error for invalid index in the nested
field reference into array/map (#12290)
e2300d86ba is described below
commit e2300d86baf58d935a087951f097a52f13ac30f3
Author: Felipe Pessoto <[email protected]>
AuthorDate: Mon Jun 22 07:06:20 2026 -0700
[VL] Throws user error for invalid index in the nested field reference into
array/map (#12290)
---
cpp/velox/substrait/SubstraitToVeloxExpr.cc | 20 ++++++-
cpp/velox/tests/CMakeLists.txt | 1 +
cpp/velox/tests/SubstraitVeloxExprConverterTest.cc | 69 ++++++++++++++++++++++
3 files changed, 88 insertions(+), 2 deletions(-)
diff --git a/cpp/velox/substrait/SubstraitToVeloxExpr.cc
b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
index b7615d844d..6ef99685c1 100755
--- a/cpp/velox/substrait/SubstraitToVeloxExpr.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
@@ -217,13 +217,29 @@ std::shared_ptr<const core::FieldAccessTypedExpr>
SubstraitVeloxExprConverter::t
auto inputColumnType = inputType;
for (;;) {
auto idx = tmp->field();
- fieldAccess = makeFieldAccessExpr(inputColumnType->nameOf(idx),
inputColumnType->childAt(idx), fieldAccess);
+ VELOX_USER_CHECK(
+ idx >= 0 && static_cast<uint32_t>(idx) < inputColumnType->size(),
+ "Field reference index {} is out of range for the {}-field row
type.",
+ idx,
+ inputColumnType->size());
+ const TypePtr childType = inputColumnType->childAt(idx);
+ fieldAccess = makeFieldAccessExpr(inputColumnType->nameOf(idx),
childType, fieldAccess);
if (!tmp->has_child()) {
break;
}
- inputColumnType = asRowType(inputColumnType->childAt(idx));
+ // Descending into a nested field is only valid when the current child
is
+ // itself a struct/row. For array/map/primitive children (e.g. a field
+ // nested under an array, as in Delta's "updating array type" case)
+ // asRowType() returns null; previously the next loop iteration
+ // dereferenced that null RowType and crashed the process with a
SIGSEGV.
+ // Throw a user error instead so plan validation fails cleanly and the
+ // query falls back to vanilla execution.
+ inputColumnType = asRowType(childType);
+ VELOX_USER_CHECK_NOT_NULL(
+ inputColumnType,
+ "Nested field reference into a non-struct type (e.g. an array or
map element) is not supported.");
tmp = &tmp->child().struct_field();
}
return fieldAccess;
diff --git a/cpp/velox/tests/CMakeLists.txt b/cpp/velox/tests/CMakeLists.txt
index 3052ce23f6..0f5e634b9d 100644
--- a/cpp/velox/tests/CMakeLists.txt
+++ b/cpp/velox/tests/CMakeLists.txt
@@ -129,6 +129,7 @@ add_velox_test(
Substrait2VeloxPlanValidatorTest.cc
Substrait2VeloxValuesNodeConversionTest.cc
SubstraitExtensionCollectorTest.cc
+ SubstraitVeloxExprConverterTest.cc
VeloxSubstraitRoundTripTest.cc
VeloxSubstraitSignatureTest.cc
VeloxToSubstraitTypeTest.cc)
diff --git a/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc
b/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc
new file mode 100644
index 0000000000..784ba0c13d
--- /dev/null
+++ b/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "substrait/SubstraitToVeloxExpr.h"
+
+#include "velox/common/base/tests/GTestUtils.h"
+#include "velox/type/Type.h"
+
+using namespace facebook::velox;
+
+namespace gluten {
+
+// Regression test for a SIGSEGV in
+// SubstraitVeloxExprConverter::toVeloxExpr(Expression::FieldReference, ...).
+// The direct-reference loop descends one nested struct_field at a time with
+// `inputColumnType = asRowType(childAt(idx))`. When the field path traverses a
+// non-struct child -- e.g. a field nested under an array, as produced by
Delta's
+// nested-array UPDATE rewrite ("nested data support - ... updating array
type")
+// -- asRowType() returns null and the next iteration dereferenced that null
+// RowType, crashing the whole forked JVM. A SIGSEGV is not catchable, so plan
+// validation could not fall back. The converter must instead throw a
+// VeloxUserError, which SubstraitToVeloxPlanValidator catches to fall back to
+// vanilla execution.
+TEST(SubstraitVeloxExprConverterTest, nestedFieldReferenceIntoNonStructThrows)
{
+ // Schema with a single array column.
+ RowTypePtr inputType = ROW({"arr"}, {ARRAY(INTEGER())});
+
+ // Reference column 0 (the array), then descend one more level via a child
+ // struct_field -- i.e. into the array's element, which is not a struct/row.
+ ::substrait::Expression::FieldReference fieldReference;
+ auto* structField =
fieldReference.mutable_direct_reference()->mutable_struct_field();
+ structField->set_field(0);
+ structField->mutable_child()->mutable_struct_field()->set_field(0);
+
+ VELOX_ASSERT_THROW(
+ SubstraitVeloxExprConverter::toVeloxExpr(fieldReference, inputType),
+ "Nested field reference into a non-struct type");
+}
+
+// A field-reference index past the end of the row type must be rejected
cleanly.
+// The out-of-range check is defense-in-depth: Spark's analyzer keeps field
indices
+// in bounds, but a malformed Substrait plan might have a negative index or an
index
+// exceeding the row size. Without the check, a negative index casts to a huge
uint32_t
+// and causes undefined behavior; an out-of-bounds index would throw from
Velox's
+// RowType::childAt but with a less clear error message.
+TEST(SubstraitVeloxExprConverterTest, fieldReferenceIndexOutOfRangeThrows) {
+ RowTypePtr inputType = ROW({"a", "b"}, {INTEGER(), INTEGER()});
+
+ ::substrait::Expression::FieldReference fieldReference;
+
fieldReference.mutable_direct_reference()->mutable_struct_field()->set_field(5);
+
+
VELOX_ASSERT_USER_THROW(SubstraitVeloxExprConverter::toVeloxExpr(fieldReference,
inputType), "out of range");
+}
+
+} // namespace gluten
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]