This is an automated email from the ASF dual-hosted git repository.

rui-mo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new e2300d86ba [VL] Throws user error for invalid index in the nested 
field reference into array/map (#12290)
e2300d86ba is described below

commit e2300d86baf58d935a087951f097a52f13ac30f3
Author: Felipe Pessoto <[email protected]>
AuthorDate: Mon Jun 22 07:06:20 2026 -0700

    [VL] Throws user error for invalid index in the nested field reference into 
array/map (#12290)
---
 cpp/velox/substrait/SubstraitToVeloxExpr.cc        | 20 ++++++-
 cpp/velox/tests/CMakeLists.txt                     |  1 +
 cpp/velox/tests/SubstraitVeloxExprConverterTest.cc | 69 ++++++++++++++++++++++
 3 files changed, 88 insertions(+), 2 deletions(-)

diff --git a/cpp/velox/substrait/SubstraitToVeloxExpr.cc 
b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
index b7615d844d..6ef99685c1 100755
--- a/cpp/velox/substrait/SubstraitToVeloxExpr.cc
+++ b/cpp/velox/substrait/SubstraitToVeloxExpr.cc
@@ -217,13 +217,29 @@ std::shared_ptr<const core::FieldAccessTypedExpr> 
SubstraitVeloxExprConverter::t
       auto inputColumnType = inputType;
       for (;;) {
         auto idx = tmp->field();
-        fieldAccess = makeFieldAccessExpr(inputColumnType->nameOf(idx), 
inputColumnType->childAt(idx), fieldAccess);
+        VELOX_USER_CHECK(
+            idx >= 0 && static_cast<uint32_t>(idx) < inputColumnType->size(),
+            "Field reference index {} is out of range for the {}-field row 
type.",
+            idx,
+            inputColumnType->size());
+        const TypePtr childType = inputColumnType->childAt(idx);
+        fieldAccess = makeFieldAccessExpr(inputColumnType->nameOf(idx), 
childType, fieldAccess);
 
         if (!tmp->has_child()) {
           break;
         }
 
-        inputColumnType = asRowType(inputColumnType->childAt(idx));
+        // Descending into a nested field is only valid when the current child 
is
+        // itself a struct/row. For array/map/primitive children (e.g. a field
+        // nested under an array, as in Delta's "updating array type" case)
+        // asRowType() returns null; previously the next loop iteration
+        // dereferenced that null RowType and crashed the process with a 
SIGSEGV.
+        // Throw a user error instead so plan validation fails cleanly and the
+        // query falls back to vanilla execution.
+        inputColumnType = asRowType(childType);
+        VELOX_USER_CHECK_NOT_NULL(
+            inputColumnType,
+            "Nested field reference into a non-struct type (e.g. an array or 
map element) is not supported.");
         tmp = &tmp->child().struct_field();
       }
       return fieldAccess;
diff --git a/cpp/velox/tests/CMakeLists.txt b/cpp/velox/tests/CMakeLists.txt
index 3052ce23f6..0f5e634b9d 100644
--- a/cpp/velox/tests/CMakeLists.txt
+++ b/cpp/velox/tests/CMakeLists.txt
@@ -129,6 +129,7 @@ add_velox_test(
   Substrait2VeloxPlanValidatorTest.cc
   Substrait2VeloxValuesNodeConversionTest.cc
   SubstraitExtensionCollectorTest.cc
+  SubstraitVeloxExprConverterTest.cc
   VeloxSubstraitRoundTripTest.cc
   VeloxSubstraitSignatureTest.cc
   VeloxToSubstraitTypeTest.cc)
diff --git a/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc 
b/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc
new file mode 100644
index 0000000000..784ba0c13d
--- /dev/null
+++ b/cpp/velox/tests/SubstraitVeloxExprConverterTest.cc
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "substrait/SubstraitToVeloxExpr.h"
+
+#include "velox/common/base/tests/GTestUtils.h"
+#include "velox/type/Type.h"
+
+using namespace facebook::velox;
+
+namespace gluten {
+
+// Regression test for a SIGSEGV in
+// SubstraitVeloxExprConverter::toVeloxExpr(Expression::FieldReference, ...).
+// The direct-reference loop descends one nested struct_field at a time with
+// `inputColumnType = asRowType(childAt(idx))`. When the field path traverses a
+// non-struct child -- e.g. a field nested under an array, as produced by 
Delta's
+// nested-array UPDATE rewrite ("nested data support - ... updating array 
type")
+// -- asRowType() returns null and the next iteration dereferenced that null
+// RowType, crashing the whole forked JVM. A SIGSEGV is not catchable, so plan
+// validation could not fall back. The converter must instead throw a
+// VeloxUserError, which SubstraitToVeloxPlanValidator catches to fall back to
+// vanilla execution.
+TEST(SubstraitVeloxExprConverterTest, nestedFieldReferenceIntoNonStructThrows) 
{
+  // Schema with a single array column.
+  RowTypePtr inputType = ROW({"arr"}, {ARRAY(INTEGER())});
+
+  // Reference column 0 (the array), then descend one more level via a child
+  // struct_field -- i.e. into the array's element, which is not a struct/row.
+  ::substrait::Expression::FieldReference fieldReference;
+  auto* structField = 
fieldReference.mutable_direct_reference()->mutable_struct_field();
+  structField->set_field(0);
+  structField->mutable_child()->mutable_struct_field()->set_field(0);
+
+  VELOX_ASSERT_THROW(
+      SubstraitVeloxExprConverter::toVeloxExpr(fieldReference, inputType),
+      "Nested field reference into a non-struct type");
+}
+
+// A field-reference index past the end of the row type must be rejected 
cleanly.
+// The out-of-range check is defense-in-depth: Spark's analyzer keeps field 
indices
+// in bounds, but a malformed Substrait plan might have a negative index or an 
index
+// exceeding the row size. Without the check, a negative index casts to a huge 
uint32_t
+// and causes undefined behavior; an out-of-bounds index would throw from 
Velox's
+// RowType::childAt but with a less clear error message.
+TEST(SubstraitVeloxExprConverterTest, fieldReferenceIndexOutOfRangeThrows) {
+  RowTypePtr inputType = ROW({"a", "b"}, {INTEGER(), INTEGER()});
+
+  ::substrait::Expression::FieldReference fieldReference;
+  
fieldReference.mutable_direct_reference()->mutable_struct_field()->set_field(5);
+
+  
VELOX_ASSERT_USER_THROW(SubstraitVeloxExprConverter::toVeloxExpr(fieldReference,
 inputType), "out of range");
+}
+
+} // namespace gluten


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to