This is an automated email from the ASF dual-hosted git repository.
924060929 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 91ff38d98d0 [fix](nereids) Run CheckCast on INSERT VALUES to reject
illegal casts (#62905)
91ff38d98d0 is described below
commit 91ff38d98d0a487576e278230cbf2e281016fac8
Author: Chenyang Sun <[email protected]>
AuthorDate: Tue May 12 11:51:15 2026 +0800
[fix](nereids) Run CheckCast on INSERT VALUES to reject illegal casts
(#62905)
## insert into var1 select CAST('123' AS variant)
INSERT VALUES generates `Cast` nodes via:
```
public static Expression castUnbound(Expression expression, DataType
targetType) {
if (expression instanceof Literal) {
return TypeCoercionUtils.castIfNotSameType(expression,
targetType); // SAFE: checkCanCastTo
} else {
return TypeCoercionUtils.unSafeCast(expression, targetType); // UNSAFE:
}
}
```
The non-Literal branch goes through `unSafeCast`, which by design **does
not validate** — validation is delegated to the rule
`CheckCast.INSTANCE`.
That rule is wired into the standard `ExpressionNormalization`
(`ExpressionNormalization.java:78`), but
`InsertIntoValuesAnalyzer.{INSERT_JOBS, BATCH_INSERT_JOBS}` defines its
own minimal pipeline and **omits `CheckCast.INSTANCE`**. Any non-Literal
cast on the INSERT VALUES path therefore reaches BE unchecked.
The hole is not variant-specific — it lets through any illegal cast
(`BITMAP → INT`, `HLL → VARCHAR`, `VARCHAR → BITMAP`, …). Variant is the
only pair that crashes BE; the others surface as BE-side runtime errors,
which is why the hole stayed undetected.
## Verification
| INSERT | Before | After |
| --- | --- | --- |
| `CAST(... AS variant<A>)` into `variant<B>` | **BE crash /
corruption** | `cannot cast variant<...> to variant<...>` (FE) |
| `CAST(bitmap_hash('x') AS INT)` into `INT` | BE error | `cannot cast
BITMAP to INT` (FE) |
| `bitmap_hash('x')` into `INT` (implicit) | BE error | `cannot cast
BITMAP to INT` (FE) |
| `CAST('123' AS BITMAP)` into `BITMAP` | BE error | `cannot cast
VARCHAR(3) to BITMAP` (FE) |
---
.../doris/nereids/util/TypeCoercionUtils.java | 18 +++
.../doc_mode/test_variant_cast_strict.out | 10 ++
.../doc_mode/test_variant_cast_strict.groovy | 129 +++++++++++++++++++++
3 files changed, 157 insertions(+)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
index 2970e9bbe46..3701da979ea 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
@@ -20,6 +20,7 @@ package org.apache.doris.nereids.util;
import org.apache.doris.common.Pair;
import org.apache.doris.nereids.annotation.Developing;
import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.exceptions.UnboundException;
import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext;
import org.apache.doris.nereids.rules.expression.check.CheckCast;
import org.apache.doris.nereids.rules.expression.rules.FoldConstantRuleOnFE;
@@ -476,10 +477,27 @@ public class TypeCoercionUtils {
}
}
+ /**
+ * Wrap {@code expression} in a cast to {@code targetType} when the source
type can
+ * already be resolved (Literal, or any expression whose {@code
getDataType()} does
+ * not throw {@link UnboundException}). For Literals, defers to
+ * {@link #castIfNotSameType}; for other resolvable expressions, validates
the cast
+ * via {@link #checkCanCastTo} before constructing it. Used by INSERT
VALUES so that
+ * illegal casts (e.g. {@code variant<config_A>} → {@code
variant<config_B>}) are
+ * rejected at parse time instead of slipping past analysis and crashing
BE.
+ * Truly unbound expressions are wrapped without validation; the normal
analysis
+ * pipeline's CheckCast pass will validate them after binding.
+ */
public static Expression castUnbound(Expression expression, DataType
targetType) {
if (expression instanceof Literal) {
return TypeCoercionUtils.castIfNotSameType(expression, targetType);
} else {
+ try {
+ checkCanCastTo(expression.getDataType(), targetType);
+ } catch (UnboundException e) {
+ // Source type not yet known (UnboundFunction, UnboundSlot,
...);
+ // CheckCast in the normal analysis pipeline validates after
binding.
+ }
return TypeCoercionUtils.unSafeCast(expression, targetType);
}
}
diff --git
a/regression-test/data/variant_p0/doc_mode/test_variant_cast_strict.out
b/regression-test/data/variant_p0/doc_mode/test_variant_cast_strict.out
new file mode 100644
index 00000000000..2c5e7e4cb18
--- /dev/null
+++ b/regression-test/data/variant_p0/doc_mode/test_variant_cast_strict.out
@@ -0,0 +1,10 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !case2 --
+15002 15001
+
+-- !case3 --
+15003 15001
+
+-- !case4b --
+15004 15001
+
diff --git
a/regression-test/suites/variant_p0/doc_mode/test_variant_cast_strict.groovy
b/regression-test/suites/variant_p0/doc_mode/test_variant_cast_strict.groovy
new file mode 100644
index 00000000000..d258e2bcd22
--- /dev/null
+++ b/regression-test/suites/variant_p0/doc_mode/test_variant_cast_strict.groovy
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// INSERT VALUES with explicit cast(<json> as variant) into a doc-mode
+// variant column previously aborted BE in MutableBlock::merge_impl.
+// The root cause: FE allowed implicit Variant->Variant cast even when
+// configurations (max_subcolumns_count, enable_doc_mode, ...) differ,
+// and BE has no real Variant->Variant conversion.
+//
+// Fix: FE rejects Variant->Variant cast when configurations differ.
+// User-visible behavior:
+// * BAD : cast('<json>' as variant) with mismatched config ->
AnalysisException
+// * GOOD: '<json>' -> auto-coerce
to target
+// * GOOD: cast('<json>' as variant<...matching...>) -> direct
+suite("test_variant_cast_strict", "p0") {
+ // Use session variables to set variant defaults (column-level properties
+ // forbid setting max_subcolumns_count and enable_doc_mode together).
+ sql """ set default_variant_enable_doc_mode = true """
+ sql """ set default_variant_max_subcolumns_count = 37 """
+ sql """ set default_variant_doc_materialization_min_rows = 8 """
+ sql """ set default_variant_doc_hash_shard_count = 7 """
+
+ def t = "variant_cast_strict"
+ sql """ DROP TABLE IF EXISTS ${t} """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${t} (
+ id bigint,
+ v variant
+ )
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES("replication_num" = "1", "disable_auto_compaction" =
"true");
+ """
+
+ def jsonValue =
'{"anchors":{"common_int":150025,"phase_marker":"phase_a","present":true,"row_id":15001},"dynamic":{"path_00000":15001000,"path_00001":15001001},"parent":{"child":{"name":"phase_a_15001"}},"phase_a_small":{"leaf":15001}}'
+
+ // ---- Case 1: BAD path — implicit Variant->Variant with mismatched
config should be
+ // rejected by FE. Before fix: BE aborts in MutableBlock::merge_impl.
After fix: FE
+ // throws AnalysisException; BE never receives the plan.
+ // We force a config mismatch on `variant_doc_materialization_min_rows`
(target=8 from
+ // session, source=999 here).
+ test {
+ sql """ insert into ${t} values (15001, cast('${jsonValue}' as
variant<properties(
+ "variant_enable_doc_mode" = "true",
+ "variant_doc_materialization_min_rows" = "999",
+ "variant_doc_hash_shard_count" = "7"
+ )>)); """
+ exception "cast"
+ }
+
+ // ---- Case 2: GOOD — drop the cast, let FE coerce String -> target
Variant directly.
+ sql """ insert into ${t} values (15002, '${jsonValue}'); """
+ qt_case2 """ select id, cast(v['anchors']['row_id'] as bigint) from ${t}
where id = 15002; """
+
+ // ---- Case 3: GOOD — explicit cast with matching parameters (from same
session).
+ // Bare `as variant` reads session-default config and matches target
exactly.
+ sql """ insert into ${t} values (15003, cast('${jsonValue}' as variant));
"""
+ qt_case3 """ select id, cast(v['anchors']['row_id'] as bigint) from ${t}
where id = 15003; """
+
+ // ---- Case 4: cross-table — different variant configs need an explicit
JSONB hop.
+ def t_src = "variant_cast_strict_src"
+ // Create source table with NO doc-mode by clearing session vars first,
then restore.
+ sql """ set default_variant_enable_doc_mode = false """
+ sql """ set default_variant_max_subcolumns_count = 0 """
+ sql """ DROP TABLE IF EXISTS ${t_src} """
+ sql """
+ CREATE TABLE IF NOT EXISTS ${t_src} (
+ id bigint,
+ v variant
+ )
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES("replication_num" = "1", "disable_auto_compaction" =
"true");
+ """
+ sql """ insert into ${t_src} values (15004, '${jsonValue}'); """
+ // Restore session vars so target's column-level config keeps matching.
+ sql """ set default_variant_enable_doc_mode = true """
+ sql """ set default_variant_max_subcolumns_count = 37 """
+
+ // 4a: direct copy is rejected (configs differ).
+ test {
+ sql """ insert into ${t} select id, v from ${t_src}; """
+ exception "cast"
+ }
+
+ // 4b: routing through JSONB works.
+ sql """ insert into ${t} select id, cast(cast(v as JSONB) as variant) from
${t_src}; """
+ qt_case4b """ select id, cast(v['anchors']['row_id'] as bigint) from ${t}
where id = 15004; """
+
+ // ---- Case 5: BAD — multi-row VALUES inside a transaction takes the
+ // BatchInsertIntoTableCommand (BATCH_INSERT_JOBS) path. The fix in
+ // TypeCoercionUtils.castUnbound rejects each row's illegal cast at parse
time,
+ // so this path is now covered without any pipeline-level rewrite.
+ sql """ begin """
+ try {
+ test {
+ sql """
+ insert into ${t} values
+ (15005, cast('${jsonValue}' as variant<properties(
+ "variant_enable_doc_mode" = "true",
+ "variant_doc_materialization_min_rows" = "999",
+ "variant_doc_hash_shard_count" = "7"
+ )>)),
+ (15006, cast('${jsonValue}' as variant<properties(
+ "variant_enable_doc_mode" = "true",
+ "variant_doc_materialization_min_rows" = "999",
+ "variant_doc_hash_shard_count" = "7"
+ )>));
+ """
+ exception "cast"
+ }
+ } finally {
+ sql """ rollback """
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]