This is an automated email from the ASF dual-hosted git repository.
huajianlan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new dc57a0542c3 [feat](nereids) Add session variable
enable_insert_value_auto_cast for insert value truncate long string (#55325)
dc57a0542c3 is described below
commit dc57a0542c3b8818277618c03e3c9607a54dbc82
Author: yujun <[email protected]>
AuthorDate: Thu Aug 28 15:10:48 2025 +0800
[feat](nereids) Add session variable enable_insert_value_auto_cast for
insert value truncate long string (#55325)
for below sql:
```sql
create table t(a int, b varchar(2));
insert into t values(1, 'abcde');
```
for doris 2.0, the insert command will throw error 'Insert has filtered
data in strict mode', because length of value 'abcde' is bigger than
varchar(2).
for mysql, pg, the above sql will also throw error for the same reason.
for doris version >= 2.1.0, it will not throw error, and just truncate
'abcde' to 'ab'.
what's more, for stream load, no matter 2.0, or 2.1, ..., doris will
stream load fail.
but considering 2.1 have released for 1.5+ year, we don't want to make a
behaviour change for 2.1, so add a session variable
enable_insert_value_auto_cast, when insert value with longer string
value, then will have:
- if enable_insert_value_auto_cast = true (default), the longer string
will be truncate and insert succ;
- if enable_insert_value_auto_cast = false, enable_insert_strict =
true(default), will throw exception 'Insert has filtered data in strict
mode';
- if enable_insert_value_auto_cast = false, enable_insert_strict =
false, then the longer string will be filtered, other rows will insert
succ.
relate PR: #52802
---
.../doris/nereids/rules/analysis/BindSink.java | 5 +++-
.../java/org/apache/doris/qe/SessionVariable.java | 12 +++++++++
.../nereids_p0/insert_into_table/insert_values.out | Bin 2621 -> 2606 bytes
.../insert_into_table/insert_values.groovy | 28 +++++++++++++++------
4 files changed, 37 insertions(+), 8 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java
index b54248c1ad7..d0ad141c802 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java
@@ -289,6 +289,9 @@ public class BindSink implements AnalysisRuleFactory {
// add cast project
List<NamedExpression> castExprs = Lists.newArrayList();
+ ConnectContext connCtx = ConnectContext.get();
+ final boolean truncateString = needTruncateStringWhenInsert
+ && (connCtx == null ||
connCtx.getSessionVariable().enableInsertValueAutoCast);
for (int i = 0; i < tableSchema.size(); ++i) {
Column col = tableSchema.get(i);
NamedExpression expr = columnToOutput.get(col.getName()); //
relative outputExpr
@@ -308,7 +311,7 @@ public class BindSink implements AnalysisRuleFactory {
int targetLength = ((CharacterType) targetType).getLen();
if (sourceLength == targetLength) {
castExpr = TypeCoercionUtils.castIfNotSameType(castExpr,
targetType);
- } else if (needTruncateStringWhenInsert && sourceLength >
targetLength && targetLength >= 0) {
+ } else if (truncateString && sourceLength > targetLength &&
targetLength >= 0) {
castExpr = new Substring(castExpr, Literal.of(1),
Literal.of(targetLength));
} else if (targetType.isStringType()) {
castExpr = new Cast(castExpr, StringType.INSTANCE);
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index eb4474862d4..55d130e945e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -146,6 +146,7 @@ public class SessionVariable implements Serializable,
Writable {
public static final String MAX_INSTANCE_NUM = "max_instance_num";
public static final String DML_PLAN_RETRY_TIMES = "DML_PLAN_RETRY_TIMES";
public static final String ENABLE_INSERT_STRICT = "enable_insert_strict";
+ public static final String ENABLE_INSERT_VALUE_AUTO_CAST =
"enable_insert_value_auto_cast";
public static final String INSERT_MAX_FILTER_RATIO =
"insert_max_filter_ratio";
public static final String ENABLE_SERVER_SIDE_PREPARED_STATEMENT =
"enable_server_side_prepared_statement";
@@ -1143,6 +1144,13 @@ public class SessionVariable implements Serializable,
Writable {
@VariableMgr.VarAttr(name = ENABLE_INSERT_STRICT, needForward = true)
public boolean enableInsertStrict = true;
+ @VariableMgr.VarAttr(name = ENABLE_INSERT_VALUE_AUTO_CAST, needForward =
true, description = {
+ "INSERT VALUE 语句是否自动类型转换。当前只针对长字符串自动截短。默认开。",
+ "INSERT VALUE statement whether to automatically type cast. Only
use for truncate long string. "
+ + "ON by default."
+ })
+ public boolean enableInsertValueAutoCast = true;
+
@VariableMgr.VarAttr(name = INSERT_MAX_FILTER_RATIO, needForward = true)
public double insertMaxFilterRatio = 1.0;
@@ -3540,6 +3548,10 @@ public class SessionVariable implements Serializable,
Writable {
this.enableInsertStrict = enableInsertStrict;
}
+ public boolean getEnableInsertValueAutoCast() {
+ return enableInsertValueAutoCast;
+ }
+
public double getInsertMaxFilterRatio() {
return insertMaxFilterRatio;
}
diff --git
a/regression-test/data/nereids_p0/insert_into_table/insert_values.out
b/regression-test/data/nereids_p0/insert_into_table/insert_values.out
index 62d824e5e6f..59ba6ff19ac 100644
Binary files
a/regression-test/data/nereids_p0/insert_into_table/insert_values.out and
b/regression-test/data/nereids_p0/insert_into_table/insert_values.out differ
diff --git
a/regression-test/suites/nereids_p0/insert_into_table/insert_values.groovy
b/regression-test/suites/nereids_p0/insert_into_table/insert_values.groovy
index 98e42426321..2bd6eb54f84 100644
--- a/regression-test/suites/nereids_p0/insert_into_table/insert_values.groovy
+++ b/regression-test/suites/nereids_p0/insert_into_table/insert_values.groovy
@@ -21,8 +21,6 @@ suite('nereids_insert_into_values') {
sql 'set enable_nereids_dml=true'
sql 'set enable_strict_consistency_dml=true'
- sql 'use nereids_insert_into_table_test'
-
def t1 = 'value_t1'
def t2 = 'value_t2'
def t3 = 'value_t3'
@@ -165,16 +163,32 @@ suite('nereids_insert_into_values') {
drop table if exists test_insert_more_string;
CREATE TABLE test_insert_more_string (
`r_regionkey` int NULL,
- `r_name` varchar(25) NULL,
- `r_comment` varchar(152) NULL
+ `r_name` varchar(4) NULL
)
DUPLICATE KEY(`r_regionkey`)
DISTRIBUTED BY HASH(`r_regionkey`)
BUCKETS 1 PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
- insert into test_insert_more_string values (3,
"akljalkjbalkjsldkrjewokjfalksdjflaksjfdlaskjfalsdkfjalsdfjkasfdl", "aa")
"""
- qt_select_test_insert_more_string "select * from test_insert_more_string"
-}
\ No newline at end of file
+ // shorter varchar is ok
+ sql "insert into test_insert_more_string values (1, 'ab')"
+
+ // set enable_insert_value_auto_cast = true
+ // longer varchar will truncate
+ sql "insert into test_insert_more_string values (2, 'abcdefg')"
+
+ // when disable string auto cast and in insert strict mode, insert will
failed
+ sql 'set enable_insert_value_auto_cast = false'
+ test {
+ sql "insert into test_insert_more_string values (3, 'hi'), (4,
'jklmn')"
+ exception 'Insert has filtered data in strict mode'
+ }
+
+ // when disable insert strict, the longer varchar row will be filtered,
other rows will succ
+ sql 'set enable_insert_strict = false'
+ sql "insert into test_insert_more_string values (5, 'o'), (6, 'pqrst')"
+
+ order_qt_select_test_insert_more_string "select * from
test_insert_more_string"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]