This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new c23384ff075 [fix](decimal) Fix long string casting to decimalv2
(#35121)
c23384ff075 is described below
commit c23384ff0759b442c8d540110ee3d2bbf079a0e1
Author: Gabriel <[email protected]>
AuthorDate: Wed May 22 14:24:22 2024 +0800
[fix](decimal) Fix long string casting to decimalv2 (#35121)
---
be/src/util/string_parser.hpp | 169 ++++++++-------------
be/test/vec/data_types/from_string_test.cpp | 2 +-
.../data_types/serde/data_type_serde_csv_test.cpp | 31 ++--
.../data_types/serde/data_type_serde_text_test.cpp | 37 ++---
.../datatype_p0/decimalv2/test_decimalv2_load.out | 8 -
.../decimalv2/test_decimalv2_overflow2.out | 11 +-
.../decimalv2/test_decimalv2_load.groovy | 36 -----
.../decimalv2/test_decimalv2_overflow2.groovy | 18 +++
8 files changed, 121 insertions(+), 191 deletions(-)
diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp
index 521d1a6f39c..34bd678c947 100644
--- a/be/src/util/string_parser.hpp
+++ b/be/src/util/string_parser.hpp
@@ -594,123 +594,74 @@ T StringParser::string_to_decimal(const char* __restrict
s, int len, int type_pr
bool found_exponent = false;
int8_t exponent = 0;
T value = 0;
- if constexpr (TYPE_DECIMALV2 == P) {
- // decimalv2 do not care type_scale and type_precision,just keep the
origin logic
- for (int i = 0; i < len; ++i) {
- const char& c = s[i];
- if (LIKELY('0' <= c && c <= '9')) {
- found_value = true;
- // Ignore digits once the type's precision limit is reached.
This avoids
- // overflowing the underlying storage while handling a string
like
- // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for
ignored digits and
- // an exponent will be made later.
- if (LIKELY(type_precision > precision)) {
- value = (value * 10) + (c - '0'); // Benchmarks are faster
with parenthesis...
- } else {
- *result = StringParser::PARSE_OVERFLOW;
- value = is_negative
- ?
vectorized::min_decimal_value<DecimalType>(type_precision)
- :
vectorized::max_decimal_value<DecimalType>(type_precision);
- return value;
- }
- DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't
work with __int128.
+ bool has_round = false;
+ for (int i = 0; i < len; ++i) {
+ const char& c = s[i];
+ if (LIKELY('0' <= c && c <= '9')) {
+ found_value = true;
+ // Ignore digits once the type's precision limit is reached. This
avoids
+ // overflowing the underlying storage while handling a string like
+ // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored
digits and
+ // an exponent will be made later.
+ if (LIKELY(type_precision > precision) && !has_round) {
+ value = (value * 10) + (c - '0'); // Benchmarks are faster
with parenthesis...
++precision;
scale += found_dot;
- } else if (c == '.' && LIKELY(!found_dot)) {
- found_dot = 1;
- } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
- found_exponent = true;
- exponent = string_to_int_internal<int8_t>(s + i + 1, len - i -
1, result);
- if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
- if (*result == StringParser::PARSE_OVERFLOW && exponent <
0) {
- *result = StringParser::PARSE_UNDERFLOW;
- }
- return 0;
- }
- break;
- } else {
- if (value == 0) {
- *result = StringParser::PARSE_FAILURE;
- return 0;
- }
- *result = StringParser::PARSE_SUCCESS;
- value *= get_scale_multiplier<T>(type_scale - scale);
-
- return is_negative ? T(-value) : T(value);
- }
- }
- } else {
- // decimalv3
- bool has_round = false;
- for (int i = 0; i < len; ++i) {
- const char& c = s[i];
- if (LIKELY('0' <= c && c <= '9')) {
- found_value = true;
- // Ignore digits once the type's precision limit is reached.
This avoids
- // overflowing the underlying storage while handling a string
like
- // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for
ignored digits and
- // an exponent will be made later.
- if (LIKELY(type_precision > precision) && !has_round) {
- value = (value * 10) + (c - '0'); // Benchmarks are faster
with parenthesis...
- ++precision;
- scale += found_dot;
- cur_digit = precision - scale;
- } else if (!found_dot && max_digit < (precision - scale)) {
- *result = StringParser::PARSE_OVERFLOW;
- value = is_negative
- ?
vectorized::min_decimal_value<DecimalType>(type_precision)
+ cur_digit = precision - scale;
+ } else if (!found_dot && max_digit < (precision - scale)) {
+ *result = StringParser::PARSE_OVERFLOW;
+ value = is_negative ?
vectorized::min_decimal_value<DecimalType>(type_precision)
:
vectorized::max_decimal_value<DecimalType>(type_precision);
- return value;
- } else if (found_dot && scale >= type_scale && !has_round) {
- // make rounding cases
- if (c > '4') {
- value += 1;
- }
- has_round = true;
- continue;
- } else if (!found_dot) {
- ++cur_digit;
- }
- DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't
work with __int128.
- } else if (c == '.' && LIKELY(!found_dot)) {
- found_dot = 1;
- } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
- found_exponent = true;
- exponent = string_to_int_internal<int8_t>(s + i + 1, len - i -
1, result);
- if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
- if (*result == StringParser::PARSE_OVERFLOW && exponent <
0) {
- *result = StringParser::PARSE_UNDERFLOW;
- }
- return 0;
+ return value;
+ } else if (found_dot && scale >= type_scale && !has_round) {
+ // make rounding cases
+ if (c > '4') {
+ value += 1;
}
- break;
- } else {
- if (value == 0) {
- *result = StringParser::PARSE_FAILURE;
- return 0;
+ has_round = true;
+ continue;
+ } else if (!found_dot) {
+ ++cur_digit;
+ }
+ DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work
with __int128.
+ } else if (c == '.' && LIKELY(!found_dot)) {
+ found_dot = 1;
+ } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
+ found_exponent = true;
+ exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1,
result);
+ if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
+ if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
+ *result = StringParser::PARSE_UNDERFLOW;
}
- // here to handle
- *result = StringParser::PARSE_SUCCESS;
- if (type_scale >= scale) {
- value *= get_scale_multiplier<T>(type_scale - scale);
- // here meet non-valid character, should return the value,
keep going to meet
- // the E/e character because we make right user-given
type_precision
- // not max number type_precision
- if (!is_numeric_ascii(c)) {
- if (cur_digit > type_precision) {
- *result = StringParser::PARSE_OVERFLOW;
- value = is_negative ?
vectorized::min_decimal_value<DecimalType>(
- type_precision)
- :
vectorized::max_decimal_value<DecimalType>(
- type_precision);
- return value;
- }
- return is_negative ? T(-value) : T(value);
+ return 0;
+ }
+ break;
+ } else {
+ if (value == 0) {
+ *result = StringParser::PARSE_FAILURE;
+ return 0;
+ }
+ // here to handle
+ *result = StringParser::PARSE_SUCCESS;
+ if (type_scale >= scale) {
+ value *= get_scale_multiplier<T>(type_scale - scale);
+ // here meet non-valid character, should return the value,
keep going to meet
+ // the E/e character because we make right user-given
type_precision
+ // not max number type_precision
+ if (!is_numeric_ascii(c)) {
+ if (cur_digit > type_precision) {
+ *result = StringParser::PARSE_OVERFLOW;
+ value = is_negative
+ ?
vectorized::min_decimal_value<DecimalType>(type_precision)
+ :
vectorized::max_decimal_value<DecimalType>(
+ type_precision);
+ return value;
}
+ return is_negative ? T(-value) : T(value);
}
-
- return is_negative ? T(-value) : T(value);
}
+
+ return is_negative ? T(-value) : T(value);
}
}
diff --git a/be/test/vec/data_types/from_string_test.cpp
b/be/test/vec/data_types/from_string_test.cpp
index c19a5f1706d..83b65f0fa3a 100644
--- a/be/test/vec/data_types/from_string_test.cpp
+++ b/be/test/vec/data_types/from_string_test.cpp
@@ -103,7 +103,7 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) {
"12345678901234567.012345677",
"12345678901234567.012345677",
"999999999999999999.999999999"},
{"12345678901234567.012345678",
"123456789012345678.012345670",
- "12345678901234567.012345678", "", ""}),
+ "12345678901234567.012345678",
"12345678901234567.012345678", ""}),
// decimal32 ==> decimal32(9,2)
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32,
// (7,2) (6,3) (7,3)
(8,1)
diff --git a/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp
index 315ecded490..ca730fe7bc9 100644
--- a/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp
@@ -74,21 +74,22 @@ TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) {
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_STRING, {"doris
be better"},
{"doris be better"}),
// decimal ==> decimalv2(decimal<128>(27,9))
- FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL,
- {
- // (17, 9)(first 0 will ignore)
- "012345678901234567.012345678",
- // (18, 8) (automatically fill 0 for
scala)
- "123456789012345678.01234567",
- // (17, 10) (rounding last to make
it fit)
- "12345678901234567.0123456779",
- // (17, 11) (rounding last to make
it fit)
- "12345678901234567.01234567791",
- // (19, 8) (wrong)
- "1234567890123456789.01234567",
- },
- {"12345678901234567.012345678",
"123456789012345678.012345670",
- "12345678901234567.012345678", "", ""}),
+ FieldType_RandStr(
+ FieldType::OLAP_FIELD_TYPE_DECIMAL,
+ {
+ // (17, 9)(first 0 will ignore)
+ "012345678901234567.012345678",
+ // (18, 8) (automatically fill 0 for scala)
+ "123456789012345678.01234567",
+ // (17, 10) (rounding last to make it fit)
+ "12345678901234567.0123456779",
+ // (17, 11) (rounding last to make it fit)
+ "12345678901234567.01234567791",
+ // (19, 8) (wrong)
+ "1234567890123456789.01234567",
+ },
+ {"12345678901234567.012345678",
"123456789012345678.012345670",
+ "12345678901234567.012345678",
"12345678901234567.012345678", ""}),
// decimal32 ==> decimal32(9,2) (7,2)
(6,3) (7,3) (8,1)
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32,
{"1234567.12", "123456.123", "1234567.123",
"12345679.1"},
diff --git a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
index 7091f3b30fd..2e85c2c3dc1 100644
--- a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
@@ -74,21 +74,22 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) {
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_STRING, {"doris
be better"},
{"doris be better"}),
// decimal ==> decimalv2(decimal<128>(27,9))
- FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL,
- {
- // (17, 9)(first 0 will ignore)
- "012345678901234567.012345678",
- // (18, 8) (automatically fill 0 for
scala)
- "123456789012345678.01234567",
- // (17, 10) (rounding last to make
it fit)
- "12345678901234567.0123456779",
- // (17, 11) (rounding last to make
it fit)
- "12345678901234567.01234567791",
- // (19, 8) (wrong)
- "1234567890123456789.01234567",
- },
- {"12345678901234567.012345678",
"123456789012345678.012345670",
- "12345678901234567.012345678", "", ""}),
+ FieldType_RandStr(
+ FieldType::OLAP_FIELD_TYPE_DECIMAL,
+ {
+ // (17, 9)(first 0 will ignore)
+ "012345678901234567.012345678",
+ // (18, 8) (automatically fill 0 for scala)
+ "123456789012345678.01234567",
+ // (17, 10) (rounding last to make it fit)
+ "12345678901234567.0123456779",
+ // (17, 11) (rounding last to make it fit)
+ "12345678901234567.01234567791",
+ // (19, 8) (wrong)
+ "1234567890123456789.01234567",
+ },
+ {"12345678901234567.012345678",
"123456789012345678.012345670",
+ "12345678901234567.012345678",
"12345678901234567.012345678", ""}),
// decimal32 ==> decimal32(9,2) (7,2)
(6,3) (7,3) (8,1)
FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32,
{"1234567.12", "123456.123", "1234567.123",
"12345679.1"},
@@ -429,13 +430,13 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
"[\\1234567890123456789.01234567\\]"},
{"[4.000000000, 5.500000000, 6.670000000]",
"[12345678901234567.012345678,
123456789012345678.012345670, "
- "12345678901234567.012345678, null, null]",
+ "12345678901234567.012345678,
12345678901234567.012345678, null]",
"[null, null, null, null, null]", "[null]"},
{"[4.000000000, 5.500000000, 6.670000000]",
"[12345678901234567.012345678,
123456789012345678.012345670, "
- "12345678901234567.012345678, null, null]",
+ "12345678901234567.012345678,
12345678901234567.012345678, null]",
"[12345678901234567.012345678,
123456789012345678.012345670, "
- "12345678901234567.012345678, null, null]",
+ "12345678901234567.012345678,
12345678901234567.012345678, null]",
"[null]"}),
};
// array type
diff --git a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out
b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out
index ae9921a0f37..8156a9144aa 100644
--- a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out
+++ b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out
@@ -15,11 +15,3 @@
11.99990
837.43444
--- !decimalv2_insert --
-999999999999999999.999999999 1.000000000
--999999999999999999.999999999 2.000000000
-999999999999999999.999999999 3.000000000
--999999999999999999.999999999 4.000000000
-999999999999999999.999999999 5.000000000
--999999999999999999.999999999 6.000000000
-
diff --git
a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_overflow2.out
b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_overflow2.out
index ecce20f1b22..fdd14e48bad 100644
--- a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_overflow2.out
+++ b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_overflow2.out
@@ -27,10 +27,10 @@
999999999999999999.999999999
-- !multi_overflow2 --
-999999999999999999.999999999 999999999999999999.999999999000000000
+999999999999999999.999999999 999999999999999999.999999999
-- !multi_overflow3 --
-999999999999999999.999999999 999999999999999999.999999999000000000
+999999999999999999.999999999 999999999999999999.999999999
-- !multi_overflow4 --
999999999999999999.999999999 1.000000000 999999999999999999.999999999
@@ -39,10 +39,10 @@
99999999999999999.999999999 0.100000000 999999999999999999.999999990
-- !div_overflow2 --
-999999999999999999.999999990
+999999999999999999.99999999
-- !div_overflow3 --
-99999999999999999.999999999 0.1 999999999999999999.9999999900000
+99999999999999999.999999999 0.1 999999999999999999.999999990
-- !div_overflow4 --
999999999999999999.999999990
@@ -59,3 +59,6 @@
-- !mod4 --
0.099999999
+-- !sql --
+2023-12-18T00:00 95357.10
+
diff --git
a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy
b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy
index 08027c96d1d..5c065a921a0 100644
--- a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy
+++ b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy
@@ -84,42 +84,6 @@ suite("test_decimalv2_load", "nonConcurrent") {
select * from ${tableName2} order by 1;
"""
- sql """
- drop table if exists test_decimalv2_insert;
- """
- sql """
- CREATE TABLE `test_decimalv2_insert` (
- `k1` decimalv2(27, 9) null,
- `k2` decimalv2(27, 9) null
- )
- DISTRIBUTED BY HASH(`k1`) BUCKETS 10
- PROPERTIES (
- "replication_num" = "1"
- );
- """
- sql "set enable_insert_strict=true;"
- // overflow, max is inserted
- sql """
- insert into test_decimalv2_insert
values("999999999999999999999999999999",1);
- """
- // underflow, min is inserted
- sql """
- insert into test_decimalv2_insert
values("-999999999999999999999999999999",2);
- """
- sql """
- insert into test_decimalv2_insert
values("999999999999999999.9999999991",3);
- """
- sql """
- insert into test_decimalv2_insert
values("-999999999999999999.9999999991",4);
- """
- sql """
- insert into test_decimalv2_insert
values("999999999999999999.9999999995",5);
- """
- sql """
- insert into test_decimalv2_insert
values("-999999999999999999.9999999995",6);
- """
- qt_decimalv2_insert "select * from test_decimalv2_insert order by 2; "
-
sql """
admin set frontend config("enable_decimal_conversion" = "true");
"""
diff --git
a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_overflow2.groovy
b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_overflow2.groovy
index ad6dea6765c..b183e00243f 100644
---
a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_overflow2.groovy
+++
b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_overflow2.groovy
@@ -269,6 +269,24 @@ suite("test_decimalv2_overflow2") {
"""
+ sql """ drop TABLE if exists test_table """
+ sql """ CREATE TABLE `test_table` (
+ `day_date` datetime NULL COMMENT '',
+ `growth_money` decimalv2(18, 2) NULL COMMENT ''
+ ) ENGINE=OLAP
+ UNIQUE KEY(`day_date`)
+ COMMENT ''
+ DISTRIBUTED BY HASH(`day_date`) BUCKETS 4
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1",
+ "is_being_synced" = "false",
+ "storage_format" = "V2",
+ "disable_auto_compaction" = "false",
+ "enable_single_replica_compaction" = "false"
+ ); """
+ sql """ insert into test_table values ('2023-12-18',
'95357.100000000000000000000000000000000000')"""
+ qt_sql """ select * from test_table """
+ sql """ drop TABLE if exists test_table """
// TODO
// decimalv2 +-*/ integer
// integer +-*/ decimalv2
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]