(doris) branch branch-2.1 updated: [fix](decimal) Fix long string casting to decimalv2 (#35121)

yiguolei Tue, 21 May 2024 23:32:49 -0700

This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-2.1 by this push:
     new c23384ff075 [fix](decimal) Fix long string casting to decimalv2 
(#35121)
c23384ff075 is described below

commit c23384ff0759b442c8d540110ee3d2bbf079a0e1
Author: Gabriel <[email protected]>
AuthorDate: Wed May 22 14:24:22 2024 +0800

    [fix](decimal) Fix long string casting to decimalv2 (#35121)
---
 be/src/util/string_parser.hpp                      | 169 ++++++++-------------
 be/test/vec/data_types/from_string_test.cpp        |   2 +-
 .../data_types/serde/data_type_serde_csv_test.cpp  |  31 ++--
 .../data_types/serde/data_type_serde_text_test.cpp |  37 ++---
 .../datatype_p0/decimalv2/test_decimalv2_load.out  |   8 -
 .../decimalv2/test_decimalv2_overflow2.out         |  11 +-
 .../decimalv2/test_decimalv2_load.groovy           |  36 -----
 .../decimalv2/test_decimalv2_overflow2.groovy      |  18 +++
 8 files changed, 121 insertions(+), 191 deletions(-)

diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp
index 521d1a6f39c..34bd678c947 100644
--- a/be/src/util/string_parser.hpp
+++ b/be/src/util/string_parser.hpp
@@ -594,123 +594,74 @@ T StringParser::string_to_decimal(const char* __restrict 
s, int len, int type_pr
     bool found_exponent = false;
     int8_t exponent = 0;
     T value = 0;
-    if constexpr (TYPE_DECIMALV2 == P) {
-        // decimalv2 do not care type_scale and type_precision,just keep the 
origin logic
-        for (int i = 0; i < len; ++i) {
-            const char& c = s[i];
-            if (LIKELY('0' <= c && c <= '9')) {
-                found_value = true;
-                // Ignore digits once the type's precision limit is reached. 
This avoids
-                // overflowing the underlying storage while handling a string 
like
-                // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for 
ignored digits and
-                // an exponent will be made later.
-                if (LIKELY(type_precision > precision)) {
-                    value = (value * 10) + (c - '0'); // Benchmarks are faster 
with parenthesis...
-                } else {
-                    *result = StringParser::PARSE_OVERFLOW;
-                    value = is_negative
-                                    ? 
vectorized::min_decimal_value<DecimalType>(type_precision)
-                                    : 
vectorized::max_decimal_value<DecimalType>(type_precision);
-                    return value;
-                }
-                DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't 
work with __int128.
+    bool has_round = false;
+    for (int i = 0; i < len; ++i) {
+        const char& c = s[i];
+        if (LIKELY('0' <= c && c <= '9')) {
+            found_value = true;
+            // Ignore digits once the type's precision limit is reached. This 
avoids
+            // overflowing the underlying storage while handling a string like
+            // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for ignored 
digits and
+            // an exponent will be made later.
+            if (LIKELY(type_precision > precision) && !has_round) {
+                value = (value * 10) + (c - '0'); // Benchmarks are faster 
with parenthesis...
                 ++precision;
                 scale += found_dot;
-            } else if (c == '.' && LIKELY(!found_dot)) {
-                found_dot = 1;
-            } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
-                found_exponent = true;
-                exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 
1, result);
-                if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
-                    if (*result == StringParser::PARSE_OVERFLOW && exponent < 
0) {
-                        *result = StringParser::PARSE_UNDERFLOW;
-                    }
-                    return 0;
-                }
-                break;
-            } else {
-                if (value == 0) {
-                    *result = StringParser::PARSE_FAILURE;
-                    return 0;
-                }
-                *result = StringParser::PARSE_SUCCESS;
-                value *= get_scale_multiplier<T>(type_scale - scale);
-
-                return is_negative ? T(-value) : T(value);
-            }
-        }
-    } else {
-        // decimalv3
-        bool has_round = false;
-        for (int i = 0; i < len; ++i) {
-            const char& c = s[i];
-            if (LIKELY('0' <= c && c <= '9')) {
-                found_value = true;
-                // Ignore digits once the type's precision limit is reached. 
This avoids
-                // overflowing the underlying storage while handling a string 
like
-                // 10000000000e-10 into a DECIMAL(1, 0). Adjustments for 
ignored digits and
-                // an exponent will be made later.
-                if (LIKELY(type_precision > precision) && !has_round) {
-                    value = (value * 10) + (c - '0'); // Benchmarks are faster 
with parenthesis...
-                    ++precision;
-                    scale += found_dot;
-                    cur_digit = precision - scale;
-                } else if (!found_dot && max_digit < (precision - scale)) {
-                    *result = StringParser::PARSE_OVERFLOW;
-                    value = is_negative
-                                    ? 
vectorized::min_decimal_value<DecimalType>(type_precision)
+                cur_digit = precision - scale;
+            } else if (!found_dot && max_digit < (precision - scale)) {
+                *result = StringParser::PARSE_OVERFLOW;
+                value = is_negative ? 
vectorized::min_decimal_value<DecimalType>(type_precision)
                                     : 
vectorized::max_decimal_value<DecimalType>(type_precision);
-                    return value;
-                } else if (found_dot && scale >= type_scale && !has_round) {
-                    // make rounding cases
-                    if (c > '4') {
-                        value += 1;
-                    }
-                    has_round = true;
-                    continue;
-                } else if (!found_dot) {
-                    ++cur_digit;
-                }
-                DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't 
work with __int128.
-            } else if (c == '.' && LIKELY(!found_dot)) {
-                found_dot = 1;
-            } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
-                found_exponent = true;
-                exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 
1, result);
-                if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
-                    if (*result == StringParser::PARSE_OVERFLOW && exponent < 
0) {
-                        *result = StringParser::PARSE_UNDERFLOW;
-                    }
-                    return 0;
+                return value;
+            } else if (found_dot && scale >= type_scale && !has_round) {
+                // make rounding cases
+                if (c > '4') {
+                    value += 1;
                 }
-                break;
-            } else {
-                if (value == 0) {
-                    *result = StringParser::PARSE_FAILURE;
-                    return 0;
+                has_round = true;
+                continue;
+            } else if (!found_dot) {
+                ++cur_digit;
+            }
+            DCHECK(value >= 0); // For some reason //DCHECK_GE doesn't work 
with __int128.
+        } else if (c == '.' && LIKELY(!found_dot)) {
+            found_dot = 1;
+        } else if ((c == 'e' || c == 'E') && LIKELY(!found_exponent)) {
+            found_exponent = true;
+            exponent = string_to_int_internal<int8_t>(s + i + 1, len - i - 1, 
result);
+            if (UNLIKELY(*result != StringParser::PARSE_SUCCESS)) {
+                if (*result == StringParser::PARSE_OVERFLOW && exponent < 0) {
+                    *result = StringParser::PARSE_UNDERFLOW;
                 }
-                // here to handle
-                *result = StringParser::PARSE_SUCCESS;
-                if (type_scale >= scale) {
-                    value *= get_scale_multiplier<T>(type_scale - scale);
-                    // here meet non-valid character, should return the value, 
keep going to meet
-                    // the E/e character because we make right user-given 
type_precision
-                    // not max number type_precision
-                    if (!is_numeric_ascii(c)) {
-                        if (cur_digit > type_precision) {
-                            *result = StringParser::PARSE_OVERFLOW;
-                            value = is_negative ? 
vectorized::min_decimal_value<DecimalType>(
-                                                          type_precision)
-                                                : 
vectorized::max_decimal_value<DecimalType>(
-                                                          type_precision);
-                            return value;
-                        }
-                        return is_negative ? T(-value) : T(value);
+                return 0;
+            }
+            break;
+        } else {
+            if (value == 0) {
+                *result = StringParser::PARSE_FAILURE;
+                return 0;
+            }
+            // here to handle
+            *result = StringParser::PARSE_SUCCESS;
+            if (type_scale >= scale) {
+                value *= get_scale_multiplier<T>(type_scale - scale);
+                // here meet non-valid character, should return the value, 
keep going to meet
+                // the E/e character because we make right user-given 
type_precision
+                // not max number type_precision
+                if (!is_numeric_ascii(c)) {
+                    if (cur_digit > type_precision) {
+                        *result = StringParser::PARSE_OVERFLOW;
+                        value = is_negative
+                                        ? 
vectorized::min_decimal_value<DecimalType>(type_precision)
+                                        : 
vectorized::max_decimal_value<DecimalType>(
+                                                  type_precision);
+                        return value;
                     }
+                    return is_negative ? T(-value) : T(value);
                 }
-
-                return is_negative ? T(-value) : T(value);
             }
+
+            return is_negative ? T(-value) : T(value);
         }
     }
 
diff --git a/be/test/vec/data_types/from_string_test.cpp 
b/be/test/vec/data_types/from_string_test.cpp
index c19a5f1706d..83b65f0fa3a 100644
--- a/be/test/vec/data_types/from_string_test.cpp
+++ b/be/test/vec/data_types/from_string_test.cpp
@@ -103,7 +103,7 @@ TEST(FromStringTest, ScalaWrapperFieldVsDataType) {
                          "12345678901234567.012345677", 
"12345678901234567.012345677",
                          "999999999999999999.999999999"},
                         {"12345678901234567.012345678", 
"123456789012345678.012345670",
-                         "12345678901234567.012345678", "", ""}),
+                         "12345678901234567.012345678", 
"12345678901234567.012345678", ""}),
                 // decimal32 ==>  decimal32(9,2)
                 FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32,
                                   // (7,2)         (6,3)         (7,3)         
  (8,1)
diff --git a/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp 
b/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp
index 315ecded490..ca730fe7bc9 100644
--- a/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_csv_test.cpp
@@ -74,21 +74,22 @@ TEST(CsvSerde, ScalaDataTypeSerdeCsvTest) {
                 FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_STRING, {"doris 
be better"},
                                   {"doris be better"}),
                 // decimal ==> decimalv2(decimal<128>(27,9))
-                FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL,
-                                  {
-                                          // (17, 9)(first 0 will ignore)
-                                          "012345678901234567.012345678",
-                                          // (18, 8) (automatically fill 0 for 
scala)
-                                          "123456789012345678.01234567",
-                                          // (17, 10) (rounding last to make 
it fit)
-                                          "12345678901234567.0123456779",
-                                          // (17, 11) (rounding last to make 
it fit)
-                                          "12345678901234567.01234567791",
-                                          // (19, 8) (wrong)
-                                          "1234567890123456789.01234567",
-                                  },
-                                  {"12345678901234567.012345678", 
"123456789012345678.012345670",
-                                   "12345678901234567.012345678", "", ""}),
+                FieldType_RandStr(
+                        FieldType::OLAP_FIELD_TYPE_DECIMAL,
+                        {
+                                // (17, 9)(first 0 will ignore)
+                                "012345678901234567.012345678",
+                                // (18, 8) (automatically fill 0 for scala)
+                                "123456789012345678.01234567",
+                                // (17, 10) (rounding last to make it fit)
+                                "12345678901234567.0123456779",
+                                // (17, 11) (rounding last to make it fit)
+                                "12345678901234567.01234567791",
+                                // (19, 8) (wrong)
+                                "1234567890123456789.01234567",
+                        },
+                        {"12345678901234567.012345678", 
"123456789012345678.012345670",
+                         "12345678901234567.012345678", 
"12345678901234567.012345678", ""}),
                 // decimal32 ==>  decimal32(9,2)                       (7,2)   
      (6,3)         (7,3)           (8,1)
                 FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32,
                                   {"1234567.12", "123456.123", "1234567.123", 
"12345679.1"},
diff --git a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp 
b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
index 7091f3b30fd..2e85c2c3dc1 100644
--- a/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_text_test.cpp
@@ -74,21 +74,22 @@ TEST(TextSerde, ScalaDataTypeSerdeTextTest) {
                 FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_STRING, {"doris 
be better"},
                                   {"doris be better"}),
                 // decimal ==> decimalv2(decimal<128>(27,9))
-                FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL,
-                                  {
-                                          // (17, 9)(first 0 will ignore)
-                                          "012345678901234567.012345678",
-                                          // (18, 8) (automatically fill 0 for 
scala)
-                                          "123456789012345678.01234567",
-                                          // (17, 10) (rounding last to make 
it fit)
-                                          "12345678901234567.0123456779",
-                                          // (17, 11) (rounding last to make 
it fit)
-                                          "12345678901234567.01234567791",
-                                          // (19, 8) (wrong)
-                                          "1234567890123456789.01234567",
-                                  },
-                                  {"12345678901234567.012345678", 
"123456789012345678.012345670",
-                                   "12345678901234567.012345678", "", ""}),
+                FieldType_RandStr(
+                        FieldType::OLAP_FIELD_TYPE_DECIMAL,
+                        {
+                                // (17, 9)(first 0 will ignore)
+                                "012345678901234567.012345678",
+                                // (18, 8) (automatically fill 0 for scala)
+                                "123456789012345678.01234567",
+                                // (17, 10) (rounding last to make it fit)
+                                "12345678901234567.0123456779",
+                                // (17, 11) (rounding last to make it fit)
+                                "12345678901234567.01234567791",
+                                // (19, 8) (wrong)
+                                "1234567890123456789.01234567",
+                        },
+                        {"12345678901234567.012345678", 
"123456789012345678.012345670",
+                         "12345678901234567.012345678", 
"12345678901234567.012345678", ""}),
                 // decimal32 ==>  decimal32(9,2)                       (7,2)   
      (6,3)         (7,3)           (8,1)
                 FieldType_RandStr(FieldType::OLAP_FIELD_TYPE_DECIMAL32,
                                   {"1234567.12", "123456.123", "1234567.123", 
"12345679.1"},
@@ -429,13 +430,13 @@ TEST(TextSerde, ComplexTypeSerdeTextTest) {
                          "[\\1234567890123456789.01234567\\]"},
                         {"[4.000000000, 5.500000000, 6.670000000]",
                          "[12345678901234567.012345678, 
123456789012345678.012345670, "
-                         "12345678901234567.012345678, null, null]",
+                         "12345678901234567.012345678, 
12345678901234567.012345678, null]",
                          "[null, null, null, null, null]", "[null]"},
                         {"[4.000000000, 5.500000000, 6.670000000]",
                          "[12345678901234567.012345678, 
123456789012345678.012345670, "
-                         "12345678901234567.012345678, null, null]",
+                         "12345678901234567.012345678, 
12345678901234567.012345678, null]",
                          "[12345678901234567.012345678, 
123456789012345678.012345670, "
-                         "12345678901234567.012345678, null, null]",
+                         "12345678901234567.012345678, 
12345678901234567.012345678, null]",
                          "[null]"}),
         };
         // array type
diff --git a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out 
b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out
index ae9921a0f37..8156a9144aa 100644
--- a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out
+++ b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_load.out
@@ -15,11 +15,3 @@
 11.99990
 837.43444
 
--- !decimalv2_insert --
-999999999999999999.999999999   1.000000000
--999999999999999999.999999999  2.000000000
-999999999999999999.999999999   3.000000000
--999999999999999999.999999999  4.000000000
-999999999999999999.999999999   5.000000000
--999999999999999999.999999999  6.000000000
-
diff --git 
a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_overflow2.out 
b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_overflow2.out
index ecce20f1b22..fdd14e48bad 100644
--- a/regression-test/data/datatype_p0/decimalv2/test_decimalv2_overflow2.out
+++ b/regression-test/data/datatype_p0/decimalv2/test_decimalv2_overflow2.out
@@ -27,10 +27,10 @@
 999999999999999999.999999999
 
 -- !multi_overflow2 --
-999999999999999999.999999999   999999999999999999.999999999000000000
+999999999999999999.999999999   999999999999999999.999999999
 
 -- !multi_overflow3 --
-999999999999999999.999999999   999999999999999999.999999999000000000
+999999999999999999.999999999   999999999999999999.999999999
 
 -- !multi_overflow4 --
 999999999999999999.999999999   1.000000000     999999999999999999.999999999
@@ -39,10 +39,10 @@
 99999999999999999.999999999    0.100000000     999999999999999999.999999990
 
 -- !div_overflow2 --
-999999999999999999.999999990
+999999999999999999.99999999
 
 -- !div_overflow3 --
-99999999999999999.999999999    0.1     999999999999999999.9999999900000
+99999999999999999.999999999    0.1     999999999999999999.999999990
 
 -- !div_overflow4 --
 999999999999999999.999999990
@@ -59,3 +59,6 @@
 -- !mod4 --
 0.099999999
 
+-- !sql --
+2023-12-18T00:00       95357.10
+
diff --git 
a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy 
b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy
index 08027c96d1d..5c065a921a0 100644
--- a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy
+++ b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_load.groovy
@@ -84,42 +84,6 @@ suite("test_decimalv2_load", "nonConcurrent") {
         select * from ${tableName2} order by 1;
     """
 
-    sql """
-        drop table if exists test_decimalv2_insert;
-    """
-    sql """
-        CREATE TABLE `test_decimalv2_insert` (
-            `k1` decimalv2(27, 9) null,
-            `k2` decimalv2(27, 9) null
-        )
-        DISTRIBUTED BY HASH(`k1`) BUCKETS 10
-        PROPERTIES (
-        "replication_num" = "1"
-        );
-    """
-    sql "set enable_insert_strict=true;"
-    // overflow, max is inserted
-    sql """
-        insert into test_decimalv2_insert 
values("999999999999999999999999999999",1);
-    """
-    // underflow, min is inserted
-    sql """
-        insert into test_decimalv2_insert 
values("-999999999999999999999999999999",2);
-    """
-    sql """
-        insert into test_decimalv2_insert 
values("999999999999999999.9999999991",3);
-    """
-    sql """
-        insert into test_decimalv2_insert 
values("-999999999999999999.9999999991",4);
-    """
-    sql """
-        insert into test_decimalv2_insert 
values("999999999999999999.9999999995",5);
-    """
-    sql """
-        insert into test_decimalv2_insert 
values("-999999999999999999.9999999995",6);
-    """
-    qt_decimalv2_insert "select * from test_decimalv2_insert order by 2; "
-
     sql """
         admin set frontend config("enable_decimal_conversion" = "true");
     """
diff --git 
a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_overflow2.groovy 
b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_overflow2.groovy
index ad6dea6765c..b183e00243f 100644
--- 
a/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_overflow2.groovy
+++ 
b/regression-test/suites/datatype_p0/decimalv2/test_decimalv2_overflow2.groovy
@@ -269,6 +269,24 @@ suite("test_decimalv2_overflow2") {
     """
 
 
+    sql """ drop TABLE if exists test_table """
+    sql """ CREATE TABLE `test_table` (
+            `day_date` datetime NULL COMMENT '',
+            `growth_money` decimalv2(18, 2) NULL COMMENT ''
+            ) ENGINE=OLAP
+            UNIQUE KEY(`day_date`)
+            COMMENT ''
+            DISTRIBUTED BY HASH(`day_date`) BUCKETS 4
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "is_being_synced" = "false",
+            "storage_format" = "V2",
+            "disable_auto_compaction" = "false",
+            "enable_single_replica_compaction" = "false"
+            ); """
+    sql """ insert into test_table values ('2023-12-18', 
'95357.100000000000000000000000000000000000')"""
+    qt_sql """ select * from test_table """
+    sql """ drop TABLE if exists test_table """
     // TODO
     // decimalv2 +-*/ integer
     // integer +-*/ decimalv2


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch branch-2.1 updated: [fix](decimal) Fix long string casting to decimalv2 (#35121)

Reply via email to