This is an automated email from the ASF dual-hosted git repository.

924060929 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 91ff38d98d0 [fix](nereids) Run CheckCast on INSERT VALUES to reject 
illegal  casts (#62905)
91ff38d98d0 is described below

commit 91ff38d98d0a487576e278230cbf2e281016fac8
Author: Chenyang Sun <[email protected]>
AuthorDate: Tue May 12 11:51:15 2026 +0800

    [fix](nereids) Run CheckCast on INSERT VALUES to reject illegal  casts 
(#62905)
    
    ## insert into var1 select CAST('123' AS variant)
    
      INSERT VALUES generates `Cast` nodes via:
    
      ```
      public static Expression castUnbound(Expression expression, DataType
      targetType) {
          if (expression instanceof Literal) {
              return TypeCoercionUtils.castIfNotSameType(expression,
      targetType);  // SAFE: checkCanCastTo
          } else {
    return TypeCoercionUtils.unSafeCast(expression, targetType); // UNSAFE:
          }
      }
      ```
    
    The non-Literal branch goes through `unSafeCast`, which by design **does
    not validate** — validation is delegated to the rule
    `CheckCast.INSTANCE`.
    
    That rule is wired into the standard `ExpressionNormalization`
    (`ExpressionNormalization.java:78`), but
    `InsertIntoValuesAnalyzer.{INSERT_JOBS, BATCH_INSERT_JOBS}` defines its
    own minimal pipeline and **omits `CheckCast.INSTANCE`**. Any non-Literal
    cast on the INSERT VALUES path therefore reaches BE unchecked.
    
    The hole is not variant-specific — it lets through any illegal cast
    (`BITMAP → INT`, `HLL → VARCHAR`, `VARCHAR → BITMAP`, …). Variant is the
    only pair that crashes BE; the others surface as BE-side runtime errors,
    which is why the hole stayed undetected.
    
      ## Verification
    
      | INSERT | Before | After |
      | --- | --- | --- |
    | `CAST(... AS variant<A>)` into `variant<B>` | **BE crash /
    corruption** | `cannot cast variant<...> to variant<...>` (FE) |
    | `CAST(bitmap_hash('x') AS INT)` into `INT` | BE error | `cannot cast
    BITMAP to INT` (FE) |
    | `bitmap_hash('x')` into `INT` (implicit) | BE error | `cannot cast
    BITMAP to INT` (FE) |
    | `CAST('123' AS BITMAP)` into `BITMAP` | BE error | `cannot cast
    VARCHAR(3) to BITMAP` (FE) |
---
 .../doris/nereids/util/TypeCoercionUtils.java      |  18 +++
 .../doc_mode/test_variant_cast_strict.out          |  10 ++
 .../doc_mode/test_variant_cast_strict.groovy       | 129 +++++++++++++++++++++
 3 files changed, 157 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
index 2970e9bbe46..3701da979ea 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/TypeCoercionUtils.java
@@ -20,6 +20,7 @@ package org.apache.doris.nereids.util;
 import org.apache.doris.common.Pair;
 import org.apache.doris.nereids.annotation.Developing;
 import org.apache.doris.nereids.exceptions.AnalysisException;
+import org.apache.doris.nereids.exceptions.UnboundException;
 import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext;
 import org.apache.doris.nereids.rules.expression.check.CheckCast;
 import org.apache.doris.nereids.rules.expression.rules.FoldConstantRuleOnFE;
@@ -476,10 +477,27 @@ public class TypeCoercionUtils {
         }
     }
 
+    /**
+     * Wrap {@code expression} in a cast to {@code targetType} when the source 
type can
+     * already be resolved (Literal, or any expression whose {@code 
getDataType()} does
+     * not throw {@link UnboundException}). For Literals, defers to
+     * {@link #castIfNotSameType}; for other resolvable expressions, validates 
the cast
+     * via {@link #checkCanCastTo} before constructing it. Used by INSERT 
VALUES so that
+     * illegal casts (e.g. {@code variant<config_A>} → {@code 
variant<config_B>}) are
+     * rejected at parse time instead of slipping past analysis and crashing 
BE.
+     * Truly unbound expressions are wrapped without validation; the normal 
analysis
+     * pipeline's CheckCast pass will validate them after binding.
+     */
     public static Expression castUnbound(Expression expression, DataType 
targetType) {
         if (expression instanceof Literal) {
             return TypeCoercionUtils.castIfNotSameType(expression, targetType);
         } else {
+            try {
+                checkCanCastTo(expression.getDataType(), targetType);
+            } catch (UnboundException e) {
+                // Source type not yet known (UnboundFunction, UnboundSlot, 
...);
+                // CheckCast in the normal analysis pipeline validates after 
binding.
+            }
             return TypeCoercionUtils.unSafeCast(expression, targetType);
         }
     }
diff --git 
a/regression-test/data/variant_p0/doc_mode/test_variant_cast_strict.out 
b/regression-test/data/variant_p0/doc_mode/test_variant_cast_strict.out
new file mode 100644
index 00000000000..2c5e7e4cb18
--- /dev/null
+++ b/regression-test/data/variant_p0/doc_mode/test_variant_cast_strict.out
@@ -0,0 +1,10 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !case2 --
+15002  15001
+
+-- !case3 --
+15003  15001
+
+-- !case4b --
+15004  15001
+
diff --git 
a/regression-test/suites/variant_p0/doc_mode/test_variant_cast_strict.groovy 
b/regression-test/suites/variant_p0/doc_mode/test_variant_cast_strict.groovy
new file mode 100644
index 00000000000..d258e2bcd22
--- /dev/null
+++ b/regression-test/suites/variant_p0/doc_mode/test_variant_cast_strict.groovy
@@ -0,0 +1,129 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// INSERT VALUES with explicit cast(<json> as variant) into a doc-mode
+// variant column previously aborted BE in MutableBlock::merge_impl.
+// The root cause: FE allowed implicit Variant->Variant cast even when
+// configurations (max_subcolumns_count, enable_doc_mode, ...) differ,
+// and BE has no real Variant->Variant conversion.
+//
+// Fix: FE rejects Variant->Variant cast when configurations differ.
+// User-visible behavior:
+//   * BAD : cast('<json>' as variant) with mismatched config -> 
AnalysisException
+//   * GOOD: '<json>'                                          -> auto-coerce 
to target
+//   * GOOD: cast('<json>' as variant<...matching...>)         -> direct
+suite("test_variant_cast_strict", "p0") {
+    // Use session variables to set variant defaults (column-level properties
+    // forbid setting max_subcolumns_count and enable_doc_mode together).
+    sql """ set default_variant_enable_doc_mode = true """
+    sql """ set default_variant_max_subcolumns_count = 37 """
+    sql """ set default_variant_doc_materialization_min_rows = 8 """
+    sql """ set default_variant_doc_hash_shard_count = 7 """
+
+    def t = "variant_cast_strict"
+    sql """ DROP TABLE IF EXISTS ${t} """
+    sql """
+        CREATE TABLE IF NOT EXISTS ${t} (
+            id bigint,
+            v variant
+        )
+        DUPLICATE KEY(`id`)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1", "disable_auto_compaction" = 
"true");
+    """
+
+    def jsonValue = 
'{"anchors":{"common_int":150025,"phase_marker":"phase_a","present":true,"row_id":15001},"dynamic":{"path_00000":15001000,"path_00001":15001001},"parent":{"child":{"name":"phase_a_15001"}},"phase_a_small":{"leaf":15001}}'
+
+    // ---- Case 1: BAD path — implicit Variant->Variant with mismatched 
config should be
+    // rejected by FE. Before fix: BE aborts in MutableBlock::merge_impl. 
After fix: FE
+    // throws AnalysisException; BE never receives the plan.
+    // We force a config mismatch on `variant_doc_materialization_min_rows` 
(target=8 from
+    // session, source=999 here).
+    test {
+        sql """ insert into ${t} values (15001, cast('${jsonValue}' as 
variant<properties(
+            "variant_enable_doc_mode" = "true",
+            "variant_doc_materialization_min_rows" = "999",
+            "variant_doc_hash_shard_count" = "7"
+        )>)); """
+        exception "cast"
+    }
+
+    // ---- Case 2: GOOD — drop the cast, let FE coerce String -> target 
Variant directly.
+    sql """ insert into ${t} values (15002, '${jsonValue}'); """
+    qt_case2 """ select id, cast(v['anchors']['row_id'] as bigint) from ${t} 
where id = 15002; """
+
+    // ---- Case 3: GOOD — explicit cast with matching parameters (from same 
session).
+    // Bare `as variant` reads session-default config and matches target 
exactly.
+    sql """ insert into ${t} values (15003, cast('${jsonValue}' as variant)); 
"""
+    qt_case3 """ select id, cast(v['anchors']['row_id'] as bigint) from ${t} 
where id = 15003; """
+
+    // ---- Case 4: cross-table — different variant configs need an explicit 
JSONB hop.
+    def t_src = "variant_cast_strict_src"
+    // Create source table with NO doc-mode by clearing session vars first, 
then restore.
+    sql """ set default_variant_enable_doc_mode = false """
+    sql """ set default_variant_max_subcolumns_count = 0 """
+    sql """ DROP TABLE IF EXISTS ${t_src} """
+    sql """
+        CREATE TABLE IF NOT EXISTS ${t_src} (
+            id bigint,
+            v variant
+        )
+        DUPLICATE KEY(`id`)
+        DISTRIBUTED BY HASH(id) BUCKETS 1
+        PROPERTIES("replication_num" = "1", "disable_auto_compaction" = 
"true");
+    """
+    sql """ insert into ${t_src} values (15004, '${jsonValue}'); """
+    // Restore session vars so target's column-level config keeps matching.
+    sql """ set default_variant_enable_doc_mode = true """
+    sql """ set default_variant_max_subcolumns_count = 37 """
+
+    // 4a: direct copy is rejected (configs differ).
+    test {
+        sql """ insert into ${t} select id, v from ${t_src}; """
+        exception "cast"
+    }
+
+    // 4b: routing through JSONB works.
+    sql """ insert into ${t} select id, cast(cast(v as JSONB) as variant) from 
${t_src}; """
+    qt_case4b """ select id, cast(v['anchors']['row_id'] as bigint) from ${t} 
where id = 15004; """
+
+    // ---- Case 5: BAD — multi-row VALUES inside a transaction takes the
+    // BatchInsertIntoTableCommand (BATCH_INSERT_JOBS) path. The fix in
+    // TypeCoercionUtils.castUnbound rejects each row's illegal cast at parse 
time,
+    // so this path is now covered without any pipeline-level rewrite.
+    sql """ begin """
+    try {
+        test {
+            sql """
+                insert into ${t} values
+                    (15005, cast('${jsonValue}' as variant<properties(
+                        "variant_enable_doc_mode" = "true",
+                        "variant_doc_materialization_min_rows" = "999",
+                        "variant_doc_hash_shard_count" = "7"
+                    )>)),
+                    (15006, cast('${jsonValue}' as variant<properties(
+                        "variant_enable_doc_mode" = "true",
+                        "variant_doc_materialization_min_rows" = "999",
+                        "variant_doc_hash_shard_count" = "7"
+                    )>));
+            """
+            exception "cast"
+        }
+    } finally {
+        sql """ rollback """
+    }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to