This is an automated email from the ASF dual-hosted git repository.
eldenmoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 8cb5a4210f7 [fix](variant) Disable strict mode for variant internal
cast to fix INSERT INTO SELECT returning all NULLs (#60881)
8cb5a4210f7 is described below
commit 8cb5a4210f794abca49b9aa0355120877aba2f32
Author: lihangyu <[email protected]>
AuthorDate: Sat Feb 28 10:02:43 2026 +0800
[fix](variant) Disable strict mode for variant internal cast to fix INSERT
INTO SELECT returning all NULLs (#60881)
### Bug
INSERT INTO SELECT with LEFT JOIN on CTE that casts variant subcolumns
(e.g., cast(content['val'] as decimal)) returns all NULLs for the
right-side
CTE fields, while a direct SELECT produces correct results.
### Root cause
In INSERT context, strict mode is enabled. When cast_from_variant_impl
clones the FunctionContext, it inherits strict mode. The variant root
column may
contain null/empty JSONB entries for rows where the subcolumn doesn't
exist
(mixed-schema variant data). In strict mode, these null entries cause
the ENTIRE
cast to fail and return all NULLs.
### Fix
Explicitly disable strict mode in the cloned FunctionContext inside
cast_from_variant_impl, since this is an internal type conversion within
variant
processing, not user-provided INSERT data validation.
### Tests:
- BE unit test: CastFromVariantStrictModeRegression (variant→Int32,
string→Int32 with strict mode enabled)
- Regression test: test_variant_cast_strict_mode (INSERT INTO SELECT
with LEFT JOIN on CTE with variant subcolumn casts, verifying results
match SELECT)
---
be/src/vec/functions/cast/cast_to_variant.h | 8 ++
.../function/cast/function_variant_cast_test.cpp | 148 +++++++++++++++++++++
.../variant_p0/test_variant_cast_strict_mode.out | 19 +++
.../test_variant_cast_strict_mode.groovy | 131 ++++++++++++++++++
4 files changed, 306 insertions(+)
diff --git a/be/src/vec/functions/cast/cast_to_variant.h
b/be/src/vec/functions/cast/cast_to_variant.h
index 92139e86e28..491a3f1a92b 100644
--- a/be/src/vec/functions/cast/cast_to_variant.h
+++ b/be/src/vec/functions/cast/cast_to_variant.h
@@ -57,6 +57,14 @@ inline Status cast_from_variant_impl(FunctionContext*
context, Block& block,
auto new_context = context == nullptr ? nullptr : context->clone();
if (new_context != nullptr) {
new_context->set_jsonb_string_as_string(true);
+ // Disable strict mode for the inner JSONB→target conversion.
+ // The variant root column may contain null/empty JSONB entries
for rows
+ // where the subcolumn doesn't exist (e.g., mixed-schema variant
data).
+ // In strict mode (INSERT context), these null entries cause the
ENTIRE
+ // cast to fail and return all NULLs. Since this is an internal
type
+ // conversion within variant, not user-provided INSERT data
validation,
+ // strict mode should not apply here.
+ new_context->set_enable_strict_mode(false);
}
// dst type nullable has been removed, so we should remove the inner
nullable of root column
auto wrapper =
diff --git a/be/test/vec/function/cast/function_variant_cast_test.cpp
b/be/test/vec/function/cast/function_variant_cast_test.cpp
index 203606c236d..acfcb9e255e 100644
--- a/be/test/vec/function/cast/function_variant_cast_test.cpp
+++ b/be/test/vec/function/cast/function_variant_cast_test.cpp
@@ -24,9 +24,11 @@
#include "runtime/primitive_type.h"
#include "runtime/runtime_state.h"
#include "vec/columns/column_array.h"
+#include "vec/columns/column_decimal.h"
#include "vec/columns/column_variant.h"
#include "vec/core/field.h"
#include "vec/data_types/data_type_array.h"
+#include "vec/data_types/data_type_decimal.h"
#include "vec/data_types/data_type_nullable.h"
#include "vec/data_types/data_type_number.h"
#include "vec/data_types/data_type_string.h"
@@ -465,4 +467,150 @@ TEST(FunctionVariantCast, CastFromVariantWithEmptyRoot) {
}
}
+// Regression test for JIRA-233:
+// INSERT INTO SELECT with variant→target cast returns all NULLs in strict
mode.
+// The bug was that cast_from_variant_impl inherited strict mode from the
INSERT context,
+// causing internal JSONB→target conversion to fail for null entries, which
made the
+// entire cast return all NULLs.
+TEST(FunctionVariantCast, CastFromVariantStrictModeRegression) {
+ // Test: variant with nullable root → Int32 with strict mode enabled
+ // Before fix: strict mode causes all NULLs
+ // After fix: non-null entries produce correct values, null entries stay
NULL
+ {
+ auto variant_type = std::make_shared<DataTypeVariant>();
+ auto int32_type = std::make_shared<DataTypeInt32>();
+ auto nullable_int32_type =
std::make_shared<DataTypeNullable>(int32_type);
+
+ // Create variant column with nullable integer root (some null, some
not)
+ auto variant_col = ColumnVariant::create(0);
+ variant_col->create_root(
+ nullable_int32_type,
+ ColumnNullable::create(ColumnInt32::create(),
ColumnUInt8::create()));
+ MutableColumnPtr data = variant_col->get_root();
+
+ // Row 0: value 42
+ data->insert(Field::create_field<TYPE_INT>(42));
+ // Row 1: NULL (simulating a row where the variant subcolumn doesn't
exist)
+ data->insert(Field::create_field<TYPE_NULL>(Null()));
+ // Row 2: value 100
+ data->insert(Field::create_field<TYPE_INT>(100));
+ // Row 3: NULL
+ data->insert(Field::create_field<TYPE_NULL>(Null()));
+ // Row 4: value -5
+ data->insert(Field::create_field<TYPE_INT>(-5));
+
+ variant_col->finalize();
+
+ ColumnsWithTypeAndName arguments {{variant_col->get_ptr(),
variant_type, "variant_col"},
+ {nullptr, int32_type, "int32_type"}};
+
+ auto function =
+ SimpleFunctionFactory::instance().get_function("CAST",
arguments, int32_type);
+ ASSERT_NE(function, nullptr);
+
+ Block block {arguments};
+ size_t result_column = block.columns();
+ block.insert({nullptr, int32_type, "result"});
+
+ RuntimeState state;
+ auto ctx = FunctionContext::create_context(&state, {}, {});
+
+ // Enable strict mode to simulate INSERT context (this is the key!)
+ ctx->set_enable_strict_mode(true);
+
+ ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
5).ok());
+
+ auto result_col = block.get_by_position(result_column).column;
+ ASSERT_NE(result_col.get(), nullptr);
+
+ // The result should be a nullable column
+ const auto* nullable_result = assert_cast<const
ColumnNullable*>(result_col.get());
+ ASSERT_EQ(nullable_result->size(), 5);
+
+ const auto& result_data =
+ assert_cast<const
ColumnInt32&>(nullable_result->get_nested_column());
+ const auto& null_map = nullable_result->get_null_map_data();
+
+ // Row 0: value 42, not null
+ ASSERT_EQ(null_map[0], 0);
+ ASSERT_EQ(result_data.get_element(0), 42);
+
+ // Row 1: NULL
+ ASSERT_EQ(null_map[1], 1);
+
+ // Row 2: value 100, not null
+ ASSERT_EQ(null_map[2], 0);
+ ASSERT_EQ(result_data.get_element(2), 100);
+
+ // Row 3: NULL
+ ASSERT_EQ(null_map[3], 1);
+
+ // Row 4: value -5, not null
+ ASSERT_EQ(null_map[4], 0);
+ ASSERT_EQ(result_data.get_element(4), -5);
+ }
+
+ // Test 2: variant with string root → Int32 with strict mode
+ // Simulates casting variant['field'] that stored as string "123" to Int32
+ {
+ auto variant_type = std::make_shared<DataTypeVariant>();
+ auto int32_type = std::make_shared<DataTypeInt32>();
+ auto nullable_string_type =
+
std::make_shared<DataTypeNullable>(std::make_shared<DataTypeString>());
+
+ auto variant_col = ColumnVariant::create(0);
+ variant_col->create_root(
+ nullable_string_type,
+ ColumnNullable::create(ColumnString::create(),
ColumnUInt8::create()));
+ MutableColumnPtr data = variant_col->get_root();
+
+ // Row 0: "42"
+ data->insert(Field::create_field<TYPE_STRING>(String("42")));
+ // Row 1: NULL
+ data->insert(Field::create_field<TYPE_NULL>(Null()));
+ // Row 2: "100"
+ data->insert(Field::create_field<TYPE_STRING>(String("100")));
+
+ variant_col->finalize();
+
+ ColumnsWithTypeAndName arguments {{variant_col->get_ptr(),
variant_type, "variant_col"},
+ {nullptr, int32_type, "int32_type"}};
+
+ auto function =
+ SimpleFunctionFactory::instance().get_function("CAST",
arguments, int32_type);
+ ASSERT_NE(function, nullptr);
+
+ Block block {arguments};
+ size_t result_column = block.columns();
+ block.insert({nullptr, int32_type, "result"});
+
+ RuntimeState state;
+ auto ctx = FunctionContext::create_context(&state, {}, {});
+
+ // Enable strict mode (INSERT context)
+ ctx->set_enable_strict_mode(true);
+
+ ASSERT_TRUE(function->execute(ctx.get(), block, {0}, result_column,
3).ok());
+
+ auto result_col = block.get_by_position(result_column).column;
+ ASSERT_NE(result_col.get(), nullptr);
+
+ const auto* nullable_result = assert_cast<const
ColumnNullable*>(result_col.get());
+ ASSERT_EQ(nullable_result->size(), 3);
+
+ const auto& result_data =
+ assert_cast<const
ColumnInt32&>(nullable_result->get_nested_column());
+ const auto& null_map = nullable_result->get_null_map_data();
+
+ // After fix: non-null entries should produce correct values
+ ASSERT_EQ(null_map[0], 0);
+ ASSERT_EQ(result_data.get_element(0), 42);
+
+ ASSERT_EQ(null_map[1], 1); // NULL stays NULL
+
+ ASSERT_EQ(null_map[2], 0);
+ ASSERT_EQ(result_data.get_element(2), 100);
+ }
+}
+
} // namespace doris::vectorized
\ No newline at end of file
diff --git a/regression-test/data/variant_p0/test_variant_cast_strict_mode.out
b/regression-test/data/variant_p0/test_variant_cast_strict_mode.out
new file mode 100644
index 00000000000..ddd52dd911a
--- /dev/null
+++ b/regression-test/data/variant_p0/test_variant_cast_strict_mode.out
@@ -0,0 +1,19 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !select --
+1 100.50 a
+2 200.75 b
+3 \N c
+4 300.25 d
+
+-- !insert --
+1 100.50 a
+2 200.75 b
+3 \N c
+4 300.25 d
+
+-- !direct_insert --
+1 100.50 a
+2 200.75 b
+3 \N c
+4 300.25 d
+
diff --git
a/regression-test/suites/variant_p0/test_variant_cast_strict_mode.groovy
b/regression-test/suites/variant_p0/test_variant_cast_strict_mode.groovy
new file mode 100644
index 00000000000..4e5d88738e8
--- /dev/null
+++ b/regression-test/suites/variant_p0/test_variant_cast_strict_mode.groovy
@@ -0,0 +1,131 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Regression test for JIRA-233:
+// INSERT INTO SELECT with LEFT JOIN on CTE that casts variant subcolumns
+// returns all NULLs for the right-side CTE fields, while SELECT returns
correct results.
+//
+// Root cause: In INSERT context, strict mode is enabled. When
cast_from_variant_impl
+// clones the FunctionContext, it inherits strict mode. The variant root
column may have
+// null/empty JSONB entries for rows where the subcolumn doesn't exist. In
strict mode,
+// these cause the entire cast to fail and return all NULLs.
+
+suite("test_variant_cast_strict_mode", "variant_type") {
+
+ sql """ set default_variant_enable_doc_mode = false """
+
+ def variant_src = "test_variant_cast_strict_mode_src"
+ def target_tbl = "test_variant_cast_strict_mode_target"
+
+ sql "DROP TABLE IF EXISTS ${variant_src}"
+ sql "DROP TABLE IF EXISTS ${target_tbl}"
+
+ sql """
+ CREATE TABLE ${variant_src} (
+ `id` int NOT NULL,
+ `content` variant NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ )
+ """
+
+ // Insert rows: some have "val" field, some don't (mixed schema)
+ sql """
+ INSERT INTO ${variant_src} VALUES
+ (1, '{"val":"100.50", "name":"a"}'),
+ (2, '{"val":"200.75", "name":"b"}'),
+ (3, '{"name":"c"}'),
+ (4, '{"val":"300.25", "name":"d"}')
+ """
+
+ sql """
+ CREATE TABLE ${target_tbl} (
+ `id` int NULL,
+ `val_decimal` decimal(20,2) NULL,
+ `name_str` varchar(100) NULL
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ )
+ """
+
+ // Step 1: Verify SELECT with variant cast returns correct results
+ qt_select """
+ WITH cte AS (
+ SELECT
+ id,
+ cast(content['val'] AS decimal(20,2)) AS val_decimal,
+ cast(content['name'] AS varchar) AS name_str
+ FROM ${variant_src}
+ )
+ SELECT * FROM cte ORDER BY id
+ """
+
+ // Step 2: INSERT INTO SELECT with LEFT JOIN pattern (mirrors the original
issue)
+ // The LEFT JOIN pattern is key: v1 LEFT JOIN v3
+ // v3 contains the variant->decimal cast
+ sql """
+ INSERT INTO ${target_tbl}
+ WITH
+ v1 AS (
+ SELECT id FROM ${variant_src}
+ ),
+ v3 AS (
+ SELECT
+ id,
+ cast(content['val'] AS decimal(20,2)) AS val_decimal,
+ cast(content['name'] AS varchar) AS name_str
+ FROM ${variant_src}
+ )
+ SELECT
+ v1.id,
+ v3.val_decimal,
+ v3.name_str
+ FROM v1
+ LEFT JOIN v3 ON v3.id = v1.id
+ """
+
+ // Step 3: Verify inserted results match SELECT results
+ // Before fix: val_decimal and name_str were all NULL
+ // After fix: should match the SELECT results
+ qt_insert """
+ SELECT * FROM ${target_tbl} ORDER BY id
+ """
+
+ // Step 4: Test direct INSERT INTO SELECT without LEFT JOIN (simpler case)
+ sql "TRUNCATE TABLE ${target_tbl}"
+ sql """
+ INSERT INTO ${target_tbl}
+ SELECT
+ id,
+ cast(content['val'] AS decimal(20,2)) AS val_decimal,
+ cast(content['name'] AS varchar) AS name_str
+ FROM ${variant_src}
+ """
+ qt_direct_insert """
+ SELECT * FROM ${target_tbl} ORDER BY id
+ """
+
+ // Cleanup
+ sql "DROP TABLE IF EXISTS ${variant_src}"
+ sql "DROP TABLE IF EXISTS ${target_tbl}"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]