This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch branch-3.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-3.0 by this push:
new b59a268ce5f [function](cast)Make string casting to integers more like
MySQL's behavior (#38847)
b59a268ce5f is described below
commit b59a268ce5fe0c31d607287042729123a53bd391
Author: Mryange <[email protected]>
AuthorDate: Thu Aug 22 20:56:12 2024 +0800
[function](cast)Make string casting to integers more like MySQL's behavior
(#38847)
## Proposed changes
There are two issues here. First, the results of casting are
inconsistent between FE and BE .
```
FE
mysql [(none)]>select cast('3.000' as int);
+----------------------+
| cast('3.000' as INT) |
+----------------------+
| 3 |
+----------------------+
mysql [(none)]>set debug_skip_fold_constant = true;
BE
mysql [(none)]>select cast('3.000' as int);
+----------------------+
| cast('3.000' as INT) |
+----------------------+
| NULL |
+----------------------+
```
The second issue is that casting on BE converts '3.0' to null. Here, the
casting logic for FE and BE has been unified
<!--Describe your changes.-->
---
be/src/util/string_parser.hpp | 20 ++++++++++++++++--
.../data/datatype_p0/string/test_string_basic.out | 6 ++++++
.../load_p0/stream_load/test_stream_load_cast.out | 9 +++++---
.../cast_function/test_cast_function.out | 6 +++---
.../data/variant_p0/compaction/test_compaction.out | 16 +++++++--------
regression-test/data/variant_p0/load.out | 2 +-
.../datatype_p0/string/test_string_basic.groovy | 8 +++++++-
.../stream_load/test_stream_load_cast.groovy | 24 +++++++++++-----------
8 files changed, 61 insertions(+), 30 deletions(-)
diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp
index 8bc6ecae914..a9cea7b8d29 100644
--- a/be/src/util/string_parser.hpp
+++ b/be/src/util/string_parser.hpp
@@ -243,6 +243,20 @@ private:
return true;
}
+ // For strings like "3.0", "3.123", and "3.", can parse them as 3.
+ static inline bool is_float_suffix(const char* __restrict s, int len) {
+ return (s[0] == '.' && is_all_digit(s + 1, len - 1));
+ }
+
+ static inline bool is_all_digit(const char* __restrict s, int len) {
+ for (int i = 0; i < len; ++i) {
+ if (!LIKELY(s[i] >= '0' && s[i] <= '9')) {
+ return false;
+ }
+ }
+ return true;
+ }
+
// Returns the position of the first non-whitespace character in s.
static inline int skip_leading_whitespace(const char* __restrict s, int
len) {
int i = 0;
@@ -306,7 +320,8 @@ T StringParser::string_to_int_internal(const char*
__restrict s, int len, ParseR
}
val = val * 10 + digit;
} else {
- if ((UNLIKELY(i == first || !is_all_whitespace(s + i, len - i)))) {
+ if ((UNLIKELY(i == first || (!is_all_whitespace(s + i, len - i) &&
+ !is_float_suffix(s + i, len - i))))) {
// Reject the string because either the first char was not a
digit,
// or the remaining chars are not all whitespace
*result = PARSE_FAILURE;
@@ -448,7 +463,8 @@ T StringParser::string_to_int_no_overflow(const char*
__restrict s, int len, Par
T digit = s[i] - '0';
val = val * 10 + digit;
} else {
- if ((UNLIKELY(!is_all_whitespace(s + i, len - i)))) {
+ if ((UNLIKELY(!is_all_whitespace(s + i, len - i) &&
+ !is_float_suffix(s + i, len - i)))) {
*result = PARSE_FAILURE;
return 0;
}
diff --git a/regression-test/data/datatype_p0/string/test_string_basic.out
b/regression-test/data/datatype_p0/string/test_string_basic.out
index 6270f4f5d89..af2c8f666e3 100644
--- a/regression-test/data/datatype_p0/string/test_string_basic.out
+++ b/regression-test/data/datatype_p0/string/test_string_basic.out
@@ -98,3 +98,9 @@
-- !test --
a
+-- !cast_string_to_int --
+3 3 0 0 3 \N 3
+
+-- !cast_string_to_int --
+3 3 0 0 3 \N 3
+
diff --git a/regression-test/data/load_p0/stream_load/test_stream_load_cast.out
b/regression-test/data/load_p0/stream_load/test_stream_load_cast.out
index 23256d7aeb0..d2373568a6f 100644
--- a/regression-test/data/load_p0/stream_load/test_stream_load_cast.out
+++ b/regression-test/data/load_p0/stream_load/test_stream_load_cast.out
@@ -1,18 +1,21 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !sql1 --
-\N \N \N \N \N \N \N \N \N \N
\N \N
+3 3 3 3 3 \N \N \N \N \N
\N \N
-- !sql2 --
-\N \N \N \N \N \N \N \N \N \N
\N \N
+3 3 3 3 3 \N \N \N \N \N
\N \N
+3 3 3 3 3 \N \N \N \N \N
\N \N
-- !sql3 --
\N \N \N \N \N \N \N \N \N \N
\N \N
+3 3 3 3 3 \N \N \N \N \N
\N \N
-- !sql4 --
-\N \N \N \N \N 3.12 3.12 3.1 2024-04-02
2024-04-02 2024-04-02T17:00 2024-04-02T17:00
+3 3 3 3 3 3.12 3.12 3.1 2024-04-02
2024-04-02 2024-04-02T17:00 2024-04-02T17:00
-- !sql5 --
\N \N \N \N \N \N \N 99999999.9 \N
\N \N \N
+3 3 3 3 3 3.12 3.12 3.1 2024-04-02
2024-04-02 2024-04-02T17:00 2024-04-02T17:00
-- !sql6 --
\N \N \N \N \N \N \N 99999999.9 \N
\N \N \N
diff --git
a/regression-test/data/nereids_p0/sql_functions/cast_function/test_cast_function.out
b/regression-test/data/nereids_p0/sql_functions/cast_function/test_cast_function.out
index 1eed1411842..6b34e73bd2e 100644
---
a/regression-test/data/nereids_p0/sql_functions/cast_function/test_cast_function.out
+++
b/regression-test/data/nereids_p0/sql_functions/cast_function/test_cast_function.out
@@ -30,11 +30,11 @@ true
\N
-- !sql_to_small --
-\N
+1212
-- !sql_to_int --
-\N
+1212
-- !sql_to_big --
-\N
+1212
diff --git a/regression-test/data/variant_p0/compaction/test_compaction.out
b/regression-test/data/variant_p0/compaction/test_compaction.out
index 57a5c142fbb..0b905e3930f 100644
--- a/regression-test/data/variant_p0/compaction/test_compaction.out
+++ b/regression-test/data/variant_p0/compaction/test_compaction.out
@@ -58,8 +58,8 @@
14 [null]
17 [1]
17 [1]
-18 [1, 2, null]
-18 [1, 2, null]
+18 [1, 2, 1]
+18 [1, 2, 1]
-- !sql_3 --
19 1 {"c":1}
@@ -146,8 +146,8 @@
14 [null]
17 [1]
17 [1]
-18 [1, 2, null]
-18 [1, 2, null]
+18 [1, 2, 1]
+18 [1, 2, 1]
-- !sql_33 --
19 1 {"c":1}
@@ -206,7 +206,7 @@
-- !sql_2 --
14 [null]
17 [1]
-18 [1, 2, null]
+18 [1, 2, 1]
-- !sql_3 --
19 1 {"c":1}
@@ -259,7 +259,7 @@
-- !sql_22 --
14 [null]
17 [1]
-18 [1, 2, null]
+18 [1, 2, 1]
-- !sql_33 --
19 1 {"c":1}
@@ -310,7 +310,7 @@
-- !sql_2 --
14 [null]
17 [1]
-18 [1, 2, null]
+18 [1, 2, 1]
-- !sql_3 --
19 1 {"c":1}
@@ -363,7 +363,7 @@
-- !sql_22 --
14 [null]
17 [1]
-18 [1, 2, null]
+18 [1, 2, 1]
-- !sql_33 --
19 1 {"c":1}
diff --git a/regression-test/data/variant_p0/load.out
b/regression-test/data/variant_p0/load.out
index 0127f11452b..988f3fa819f 100644
--- a/regression-test/data/variant_p0/load.out
+++ b/regression-test/data/variant_p0/load.out
@@ -1,6 +1,6 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !sql1 --
-1 [1, 2, null]
+1 [1, 2, 1]
1 [1]
1 [1]
1 [null]
diff --git a/regression-test/suites/datatype_p0/string/test_string_basic.groovy
b/regression-test/suites/datatype_p0/string/test_string_basic.groovy
index 625239f7369..2e1af427fc3 100644
--- a/regression-test/suites/datatype_p0/string/test_string_basic.groovy
+++ b/regression-test/suites/datatype_p0/string/test_string_basic.groovy
@@ -376,5 +376,11 @@ suite("test_string_basic") {
}
assertEquals(table_too_long, "fail")
sql "drop table if exists varchar_table_too_long;"
-}
+ // calculations on the BE.
+ sql """ set debug_skip_fold_constant = true;"""
+ qt_cast_string_to_int""" select cast('3.123' as int),cast('3.000' as int)
, cast('0000.0000' as int) , cast('0000' as int), cast('3.123' as int),
cast('3.000 ' as int), cast('3.' as int)"""
+ // calculations on the FE.
+ sql """ set debug_skip_fold_constant = false;"""
+ qt_cast_string_to_int""" select cast('3.123' as int),cast('3.000' as int)
, cast('0000.0000' as int) , cast('0000' as int), cast('3.123' as int),
cast('3.000 ' as int), cast('3.' as int)"""
+}
\ No newline at end of file
diff --git
a/regression-test/suites/load_p0/stream_load/test_stream_load_cast.groovy
b/regression-test/suites/load_p0/stream_load/test_stream_load_cast.groovy
index cf2ef15333a..d173c0e76e1 100644
--- a/regression-test/suites/load_p0/stream_load/test_stream_load_cast.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_stream_load_cast.groovy
@@ -62,7 +62,7 @@ suite("test_stream_load_cast", "p0") {
}
}
sql "sync"
- qt_sql1 "select * from ${tableName}"
+ qt_sql1 "select * from ${tableName} order by k0"
sql "sync"
sql "truncate table ${tableName}"
sql "sync"
@@ -83,9 +83,9 @@ suite("test_stream_load_cast", "p0") {
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
- assertEquals("fail", json.Status.toLowerCase())
+ assertEquals("success", json.Status.toLowerCase())
assertEquals(1, json.NumberTotalRows)
- assertEquals(1, json.NumberFilteredRows)
+ assertEquals(0, json.NumberFilteredRows)
}
}
@@ -112,7 +112,7 @@ suite("test_stream_load_cast", "p0") {
}
}
sql "sync"
- qt_sql2 "select * from ${tableName}"
+ qt_sql2 "select * from ${tableName} order by k0"
sql "sync"
sql "truncate table ${tableName}"
sql "sync"
@@ -134,9 +134,9 @@ suite("test_stream_load_cast", "p0") {
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
- assertEquals("fail", json.Status.toLowerCase())
+ assertEquals("success", json.Status.toLowerCase())
assertEquals(1, json.NumberTotalRows)
- assertEquals(1, json.NumberFilteredRows)
+ assertEquals(0, json.NumberFilteredRows)
}
}
@@ -162,7 +162,7 @@ suite("test_stream_load_cast", "p0") {
}
}
sql "sync"
- qt_sql3 "select * from ${tableName}"
+ qt_sql3 "select * from ${tableName} order by k0"
sql "sync"
sql "truncate table ${tableName}"
sql "sync"
@@ -210,7 +210,7 @@ suite("test_stream_load_cast", "p0") {
}
}
sql "sync"
- qt_sql4 "select * from ${tableName}"
+ qt_sql4 "select * from ${tableName} order by k0"
sql "sync"
sql "truncate table ${tableName}"
sql "sync"
@@ -231,9 +231,9 @@ suite("test_stream_load_cast", "p0") {
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
- assertEquals("fail", json.Status.toLowerCase())
+ assertEquals("success", json.Status.toLowerCase())
assertEquals(1, json.NumberTotalRows)
- assertEquals(1, json.NumberFilteredRows)
+ assertEquals(0, json.NumberFilteredRows)
}
}
@@ -259,7 +259,7 @@ suite("test_stream_load_cast", "p0") {
}
}
sql "sync"
- qt_sql5 "select * from ${tableName}"
+ qt_sql5 "select * from ${tableName} order by k0"
sql "sync"
sql "truncate table ${tableName}"
sql "sync"
@@ -311,7 +311,7 @@ suite("test_stream_load_cast", "p0") {
}
}
sql "sync"
- qt_sql6 "select * from ${tableName}"
+ qt_sql6 "select * from ${tableName} order by k0"
sql "sync"
sql "truncate table ${tableName}"
sql "sync"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]