This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 2a39003b79d [fix](decimal) fix undefined behaviour of divide by zero
when cast string to decimal (#26792)
2a39003b79d is described below
commit 2a39003b79db6d071723d5b85d65d66d9d23c197
Author: TengJianPing <[email protected]>
AuthorDate: Sat Nov 11 15:45:56 2023 +0800
[fix](decimal) fix undefined behaviour of divide by zero when cast string
to decimal (#26792)
---
be/src/util/string_parser.hpp | 45 ++++--------
be/src/vec/common/int_exp.h | 82 ++++++++++++++++++++--
.../datatype_p0/decimalv3/test_decimalv3_cast.out | 16 +++++
.../decimalv3/test_decimalv3_cast.groovy | 24 +++++++
4 files changed, 130 insertions(+), 37 deletions(-)
diff --git a/be/src/util/string_parser.hpp b/be/src/util/string_parser.hpp
index d1726202e46..b928d14057e 100644
--- a/be/src/util/string_parser.hpp
+++ b/be/src/util/string_parser.hpp
@@ -571,6 +571,9 @@ inline int
StringParser::StringParseTraits<__int128>::max_ascii_len() {
template <PrimitiveType P, typename T>
T StringParser::string_to_decimal(const char* s, int len, int type_precision,
int type_scale,
ParseResult* result) {
+ static_assert(
+ std::is_same_v<T, int32_t> || std::is_same_v<T, int64_t> ||
std::is_same_v<T, __int128>,
+ "Cast string to decimal only support target type int32_t, int64_t
or __int128.");
// Special cases:
// 1) '' == Fail, an empty string fails to parse.
// 2) ' # ' == #, leading and trailing white space is ignored.
@@ -670,11 +673,7 @@ T StringParser::string_to_decimal(const char* s, int len,
int type_precision, in
return 0;
}
*result = StringParser::PARSE_SUCCESS;
- if constexpr (std::is_same_v<T, vectorized::Int128I>) {
- value *= get_scale_multiplier<__int128>(type_scale -
scale);
- } else {
- value *= get_scale_multiplier<T>(type_scale - scale);
- }
+ value *= get_scale_multiplier<T>(type_scale - scale);
return is_negative ? T(-value) : T(value);
}
@@ -762,11 +761,7 @@ T StringParser::string_to_decimal(const char* s, int len,
int type_precision, in
// scale must be set to 0 and the value set to 100 which means a
precision of 3.
precision += exponent - scale;
- if constexpr (std::is_same_v<T, vectorized::Int128I>) {
- value *= get_scale_multiplier<__int128>(exponent - scale);
- } else {
- value *= get_scale_multiplier<T>(exponent - scale);
- }
+ value *= get_scale_multiplier<T>(exponent - scale);
scale = 0;
} else {
// Ex: 100e-4, the scale must be set to 4 but no adjustment to the
value is needed,
@@ -795,22 +790,14 @@ T StringParser::string_to_decimal(const char* s, int len,
int type_precision, in
} else if (UNLIKELY(scale > type_scale)) {
*result = StringParser::PARSE_UNDERFLOW;
int shift = scale - type_scale;
- if (shift > 0) {
- T divisor;
- if constexpr (std::is_same_v<T, vectorized::Int128I>) {
- divisor = get_scale_multiplier<__int128>(shift);
- } else {
- divisor = get_scale_multiplier<T>(shift);
- }
- if (LIKELY(divisor > 0)) {
- T remainder = value % divisor;
- value /= divisor;
- if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor
>> 1)) {
- value += 1;
- }
- } else {
- DCHECK(divisor == -1 || divisor == 0); // //DCHECK_EQ doesn't
work with __int128.
- value = 0;
+ T divisor = get_scale_multiplier<T>(shift);
+ if (UNLIKELY(divisor == std::numeric_limits<T>::max())) {
+ value = 0;
+ } else {
+ T remainder = value % divisor;
+ value /= divisor;
+ if ((remainder > 0 ? T(remainder) : T(-remainder)) >= (divisor >>
1)) {
+ value += 1;
}
}
DCHECK(value >= 0); // //DCHECK_GE doesn't work with __int128.
@@ -819,11 +806,7 @@ T StringParser::string_to_decimal(const char* s, int len,
int type_precision, in
}
if (type_scale > scale) {
- if constexpr (std::is_same_v<T, vectorized::Int128I>) {
- value *= get_scale_multiplier<__int128>(type_scale - scale);
- } else {
- value *= get_scale_multiplier<T>(type_scale - scale);
- }
+ value *= get_scale_multiplier<T>(type_scale - scale);
}
return is_negative ? T(-value) : T(value);
diff --git a/be/src/vec/common/int_exp.h b/be/src/vec/common/int_exp.h
index cac7f24f040..753b1d118db 100644
--- a/be/src/vec/common/int_exp.h
+++ b/be/src/vec/common/int_exp.h
@@ -66,16 +66,86 @@ inline uint64_t int_exp10(int x) {
namespace common {
-inline constexpr std::int32_t exp10_i32(int x) {
- return exp_details::get_exp<std::int32_t, 10, 10>(x);
+constexpr inline int exp10_i32(int x) {
+ if (x < 0) return 0;
+ if (x > 9) return std::numeric_limits<int>::max();
+
+ constexpr int values[] = {1, 10, 100, 1000, 10000,
+ 100000, 1000000, 10000000, 100000000,
1000000000};
+ return values[x];
}
-inline constexpr std::int64_t exp10_i64(int x) {
- return exp_details::get_exp<std::int64_t, 10, 19>(x);
+constexpr inline int64_t exp10_i64(int x) {
+ if (x < 0) return 0;
+ if (x > 18) return std::numeric_limits<int64_t>::max();
+
+ constexpr int64_t values[] = {1LL,
+ 10LL,
+ 100LL,
+ 1000LL,
+ 10000LL,
+ 100000LL,
+ 1000000LL,
+ 10000000LL,
+ 100000000LL,
+ 1000000000LL,
+ 10000000000LL,
+ 100000000000LL,
+ 1000000000000LL,
+ 10000000000000LL,
+ 100000000000000LL,
+ 1000000000000000LL,
+ 10000000000000000LL,
+ 100000000000000000LL,
+ 1000000000000000000LL};
+ return values[x];
}
-inline constexpr __int128 exp10_i128(int x) {
- return exp_details::get_exp<__int128, 10, 39>(x);
+constexpr inline __int128 exp10_i128(int x) {
+ if (x < 0) return 0;
+ if (x > 38) return std::numeric_limits<__int128>::max();
+
+ constexpr __int128 values[] = {
+ static_cast<__int128>(1LL),
+ static_cast<__int128>(10LL),
+ static_cast<__int128>(100LL),
+ static_cast<__int128>(1000LL),
+ static_cast<__int128>(10000LL),
+ static_cast<__int128>(100000LL),
+ static_cast<__int128>(1000000LL),
+ static_cast<__int128>(10000000LL),
+ static_cast<__int128>(100000000LL),
+ static_cast<__int128>(1000000000LL),
+ static_cast<__int128>(10000000000LL),
+ static_cast<__int128>(100000000000LL),
+ static_cast<__int128>(1000000000000LL),
+ static_cast<__int128>(10000000000000LL),
+ static_cast<__int128>(100000000000000LL),
+ static_cast<__int128>(1000000000000000LL),
+ static_cast<__int128>(10000000000000000LL),
+ static_cast<__int128>(100000000000000000LL),
+ static_cast<__int128>(1000000000000000000LL),
+ static_cast<__int128>(1000000000000000000LL) * 10LL,
+ static_cast<__int128>(1000000000000000000LL) * 100LL,
+ static_cast<__int128>(1000000000000000000LL) * 1000LL,
+ static_cast<__int128>(1000000000000000000LL) * 10000LL,
+ static_cast<__int128>(1000000000000000000LL) * 100000LL,
+ static_cast<__int128>(1000000000000000000LL) * 1000000LL,
+ static_cast<__int128>(1000000000000000000LL) * 10000000LL,
+ static_cast<__int128>(1000000000000000000LL) * 100000000LL,
+ static_cast<__int128>(1000000000000000000LL) * 1000000000LL,
+ static_cast<__int128>(1000000000000000000LL) * 10000000000LL,
+ static_cast<__int128>(1000000000000000000LL) * 100000000000LL,
+ static_cast<__int128>(1000000000000000000LL) * 1000000000000LL,
+ static_cast<__int128>(1000000000000000000LL) * 10000000000000LL,
+ static_cast<__int128>(1000000000000000000LL) * 100000000000000LL,
+ static_cast<__int128>(1000000000000000000LL) * 1000000000000000LL,
+ static_cast<__int128>(1000000000000000000LL) * 10000000000000000LL,
+ static_cast<__int128>(1000000000000000000LL) *
100000000000000000LL,
+ static_cast<__int128>(1000000000000000000LL) *
100000000000000000LL * 10LL,
+ static_cast<__int128>(1000000000000000000LL) *
100000000000000000LL * 100LL,
+ static_cast<__int128>(1000000000000000000LL) *
100000000000000000LL * 1000LL};
+ return values[x];
}
} // namespace common
diff --git a/regression-test/data/datatype_p0/decimalv3/test_decimalv3_cast.out
b/regression-test/data/datatype_p0/decimalv3/test_decimalv3_cast.out
new file mode 100644
index 00000000000..ad13bd06b45
--- /dev/null
+++ b/regression-test/data/datatype_p0/decimalv3/test_decimalv3_cast.out
@@ -0,0 +1,16 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !cast0 --
+0
+
+-- !cast1 --
+0
+
+-- !cast2 --
+0
+
+-- !cast3 --
+0
+
+-- !cast4 --
+0
+
diff --git
a/regression-test/suites/datatype_p0/decimalv3/test_decimalv3_cast.groovy
b/regression-test/suites/datatype_p0/decimalv3/test_decimalv3_cast.groovy
new file mode 100644
index 00000000000..368c936009e
--- /dev/null
+++ b/regression-test/suites/datatype_p0/decimalv3/test_decimalv3_cast.groovy
@@ -0,0 +1,24 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_decimalv3_cast") {
+ qt_cast0 """ select cast('0.00164999999999998' as decimalv3(9,0)); """
+ qt_cast1 """ select cast('0.00164999999999998' as decimalv3(10,0)); """
+ qt_cast2 """ select cast('0.000000000001234567890' as decimalv3(18,0)); """
+ qt_cast3 """ select cast('0.000000000001234567890' as decimalv3(19,0)); """
+ qt_cast4 """ select cast('0.00000000000000000000000000000012345678901' as
decimalv3(38,0)); """
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]