(doris) branch branch-2.0 updated: [fix](decimal) fix undefined behaviour of divide by zero when cast string to decimal (#26792)

kxiao Fri, 10 Nov 2023 23:46:08 -0800

This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git



The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 2a39003b79d [fix](decimal) fix undefined behaviour of divide by zero 
when cast string to decimal (#26792)
2a39003b79d is described below

commit 2a39003b79db6d071723d5b85d65d66d9d23c197
Author: TengJianPing <[email protected]>
AuthorDate: Sat Nov 11 15:45:56 2023 +0800

    [fix](decimal) fix undefined behaviour of divide by zero when cast string 
to decimal (#26792)
---
 be/src/util/string_parser.hpp                      | 45 ++++--------
 be/src/vec/common/int_exp.h                        | 82 ++++++++++++++++++++--
 .../datatype_p0/decimalv3/test_decimalv3_cast.out  | 16 +++++
 .../decimalv3/test_decimalv3_cast.groovy           | 24 +++++++
 4 files changed, 130 insertions(+), 37 deletions(-)

diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp
index d1726202e46..b928d14057e 100644
--- a/be/src/util/string_parser.hpp
+++ b/be/src/util/string_parser.hpp
@@ -571,6 +571,9 @@ inline int 
StringParser::StringParseTraits<__int128>::max_ascii_len() {
 template <PrimitiveType P, typename T>
 T StringParser::string_to_decimal(const char* s, int len, int type_precision, 
int type_scale,
                                   ParseResult* result) {
+    static_assert(
+            std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> || 
std::is_same_v<T, __int128>,
+            "Cast string to decimal only support target type int32_t, int64_t 
or __int128.");
     // Special cases:
     //   1) '' == Fail, an empty string fails to parse.
     //   2) '   #   ' == #, leading and trailing white space is ignored.
@@ -670,11 +673,7 @@ T StringParser::string_to_decimal(const char* s, int len, 
int type_precision, in
                     return 0;
                 }
                 *result = StringParser::PARSE_SUCCESS;
-                if constexpr (std::is_same_v<T, vectorized::Int128I>) {
-                    value *= get_scale_multiplier<__int128>(type_scale - 
scale);
-                } else {
-                    value *= get_scale_multiplier<T>(type_scale - scale);
-                }
+                value *= get_scale_multiplier<T>(type_scale - scale);
 
                 return is_negative ? T(-value) : T(value);
             }
@@ -762,11 +761,7 @@ T StringParser::string_to_decimal(const char* s, int len, 
int type_precision, in
         //     scale must be set to 0 and the value set to 100 which means a 
precision of 3.
         precision += exponent - scale;
 
-        if constexpr (std::is_same_v<T, vectorized::Int128I>) {
-            value *= get_scale_multiplier<__int128>(exponent - scale);
-        } else {
-            value *= get_scale_multiplier<T>(exponent - scale);
-        }
+        value *= get_scale_multiplier<T>(exponent - scale);
         scale = 0;
     } else {
         // Ex: 100e-4, the scale must be set to 4 but no adjustment to the 
value is needed,
@@ -795,22 +790,14 @@ T StringParser::string_to_decimal(const char* s, int len, 
int type_precision, in
     } else if (UNLIKELY(scale > type_scale)) {
         *result = StringParser::PARSE_UNDERFLOW;
         int shift = scale - type_scale;
-        if (shift > 0) {
-            T divisor;
-            if constexpr (std::is_same_v<T, vectorized::Int128I>) {
-                divisor = get_scale_multiplier<__int128>(shift);
-            } else {
-                divisor = get_scale_multiplier<T>(shift);
-            }
-            if (LIKELY(divisor > 0)) {
-                T remainder = value % divisor;
-                value /= divisor;
-                if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor 
>> 1)) {
-                    value += 1;
-                }
-            } else {
-                DCHECK(divisor == -1 || divisor == 0); // //DCHECK_EQ doesn't 
work with __int128.
-                value = 0;
+        T divisor = get_scale_multiplier<T>(shift);
+        if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
+            value = 0;
+        } else {
+            T remainder = value % divisor;
+            value /= divisor;
+            if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >> 
1)) {
+                value += 1;
             }
         }
         DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
@@ -819,11 +806,7 @@ T StringParser::string_to_decimal(const char* s, int len, 
int type_precision, in
     }
 
     if (type_scale > scale) {
-        if constexpr (std::is_same_v<T, vectorized::Int128I>) {
-            value *= get_scale_multiplier<__int128>(type_scale - scale);
-        } else {
-            value *= get_scale_multiplier<T>(type_scale - scale);
-        }
+        value *= get_scale_multiplier<T>(type_scale - scale);
     }
 
     return is_negative ? T(-value) : T(value);
diff --git a/be/src/vec/common/int_exp.h b/be/src/vec/common/int_exp.h
index cac7f24f040..753b1d118db 100644
--- a/be/src/vec/common/int_exp.h
+++ b/be/src/vec/common/int_exp.h
@@ -66,16 +66,86 @@ inline uint64_t int_exp10(int x) {
 
 namespace common {
 
-inline constexpr std::int32_t exp10_i32(int x) {
-    return exp_details::get_exp<std::int32_t, 10, 10>(x);
+constexpr inline int exp10_i32(int x) {
+    if (x < 0) return 0;
+    if (x > 9) return std::numeric_limits<int>::max();
+
+    constexpr int values[] = {1,      10,      100,      1000,      10000,
+                              100000, 1000000, 10000000, 100000000, 
1000000000};
+    return values[x];
 }
 
-inline constexpr std::int64_t exp10_i64(int x) {
-    return exp_details::get_exp<std::int64_t, 10, 19>(x);
+constexpr inline int64_t exp10_i64(int x) {
+    if (x < 0) return 0;
+    if (x > 18) return std::numeric_limits<int64_t>::max();
+
+    constexpr int64_t values[] = {1LL,
+                                  10LL,
+                                  100LL,
+                                  1000LL,
+                                  10000LL,
+                                  100000LL,
+                                  1000000LL,
+                                  10000000LL,
+                                  100000000LL,
+                                  1000000000LL,
+                                  10000000000LL,
+                                  100000000000LL,
+                                  1000000000000LL,
+                                  10000000000000LL,
+                                  100000000000000LL,
+                                  1000000000000000LL,
+                                  10000000000000000LL,
+                                  100000000000000000LL,
+                                  1000000000000000000LL};
+    return values[x];
 }
 
-inline constexpr __int128 exp10_i128(int x) {
-    return exp_details::get_exp<__int128, 10, 39>(x);
+constexpr inline __int128 exp10_i128(int x) {
+    if (x < 0) return 0;
+    if (x > 38) return std::numeric_limits<__int128>::max();
+
+    constexpr __int128 values[] = {
+            static_cast<__int128>(1LL),
+            static_cast<__int128>(10LL),
+            static_cast<__int128>(100LL),
+            static_cast<__int128>(1000LL),
+            static_cast<__int128>(10000LL),
+            static_cast<__int128>(100000LL),
+            static_cast<__int128>(1000000LL),
+            static_cast<__int128>(10000000LL),
+            static_cast<__int128>(100000000LL),
+            static_cast<__int128>(1000000000LL),
+            static_cast<__int128>(10000000000LL),
+            static_cast<__int128>(100000000000LL),
+            static_cast<__int128>(1000000000000LL),
+            static_cast<__int128>(10000000000000LL),
+            static_cast<__int128>(100000000000000LL),
+            static_cast<__int128>(1000000000000000LL),
+            static_cast<__int128>(10000000000000000LL),
+            static_cast<__int128>(100000000000000000LL),
+            static_cast<__int128>(1000000000000000000LL),
+            static_cast<__int128>(1000000000000000000LL) * 10LL,
+            static_cast<__int128>(1000000000000000000LL) * 100LL,
+            static_cast<__int128>(1000000000000000000LL) * 1000LL,
+            static_cast<__int128>(1000000000000000000LL) * 10000LL,
+            static_cast<__int128>(1000000000000000000LL) * 100000LL,
+            static_cast<__int128>(1000000000000000000LL) * 1000000LL,
+            static_cast<__int128>(1000000000000000000LL) * 10000000LL,
+            static_cast<__int128>(1000000000000000000LL) * 100000000LL,
+            static_cast<__int128>(1000000000000000000LL) * 1000000000LL,
+            static_cast<__int128>(1000000000000000000LL) * 10000000000LL,
+            static_cast<__int128>(1000000000000000000LL) * 100000000000LL,
+            static_cast<__int128>(1000000000000000000LL) * 1000000000000LL,
+            static_cast<__int128>(1000000000000000000LL) * 10000000000000LL,
+            static_cast<__int128>(1000000000000000000LL) * 100000000000000LL,
+            static_cast<__int128>(1000000000000000000LL) * 1000000000000000LL,
+            static_cast<__int128>(1000000000000000000LL) * 10000000000000000LL,
+            static_cast<__int128>(1000000000000000000LL) * 
100000000000000000LL,
+            static_cast<__int128>(1000000000000000000LL) * 
100000000000000000LL * 10LL,
+            static_cast<__int128>(1000000000000000000LL) * 
100000000000000000LL * 100LL,
+            static_cast<__int128>(1000000000000000000LL) * 
100000000000000000LL * 1000LL};
+    return values[x];
 }
 
 } // namespace common
diff --git a/regression-test/data/datatype_p0/decimalv3/test_decimalv3_cast.out 
b/regression-test/data/datatype_p0/decimalv3/test_decimalv3_cast.out
new file mode 100644
index 00000000000..ad13bd06b45
--- /dev/null
+++ b/regression-test/data/datatype_p0/decimalv3/test_decimalv3_cast.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !cast0 --
+0
+
+-- !cast1 --
+0
+
+-- !cast2 --
+0
+
+-- !cast3 --
+0
+
+-- !cast4 --
+0
+
diff --git 
a/regression-test/suites/datatype_p0/decimalv3/test_decimalv3_cast.groovy 
b/regression-test/suites/datatype_p0/decimalv3/test_decimalv3_cast.groovy
new file mode 100644
index 00000000000..368c936009e
--- /dev/null
+++ b/regression-test/suites/datatype_p0/decimalv3/test_decimalv3_cast.groovy
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_decimalv3_cast") {
+    qt_cast0 """ select cast('0.00164999999999998' as decimalv3(9,0)); """
+    qt_cast1 """ select cast('0.00164999999999998' as decimalv3(10,0)); """
+    qt_cast2 """ select cast('0.000000000001234567890' as decimalv3(18,0)); """
+    qt_cast3 """ select cast('0.000000000001234567890' as decimalv3(19,0)); """
+    qt_cast4 """ select cast('0.00000000000000000000000000000012345678901' as 
decimalv3(38,0)); """
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(doris) branch branch-2.0 updated: [fix](decimal) fix undefined behaviour of divide by zero when cast string to decimal (#26792)

Reply via email to