This is an automated email from the ASF dual-hosted git repository.
gabriellee pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 9114896178 [DecimalV3](opt) opt the function of decimalv3 to_string
logic (#16427)
9114896178 is described below
commit 91148961784fef3572e07ebf5a9f05a870fd154c
Author: HappenLee <[email protected]>
AuthorDate: Tue Feb 7 13:28:07 2023 +0800
[DecimalV3](opt) opt the function of decimalv3 to_string logic (#16427)
---
be/src/exec/olap_common.h | 13 ++----
be/src/exprs/runtime_filter.h | 18 +++-----
be/src/vec/core/types.h | 66 +++++++++++++++++++++++++++++
be/src/vec/data_types/data_type_decimal.cpp | 11 ++---
be/src/vec/exprs/vliteral.cpp | 15 ++++---
be/src/vec/functions/function_cast.h | 1 +
be/src/vec/io/io_helper.h | 15 -------
be/src/vec/utils/histogram_helpers.hpp | 27 +++---------
8 files changed, 96 insertions(+), 70 deletions(-)
diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h
index 2980791b63..fde528131e 100644
--- a/be/src/exec/olap_common.h
+++ b/be/src/exec/olap_common.h
@@ -30,6 +30,7 @@
#include "olap/olap_tuple.h"
#include "runtime/primitive_type.h"
#include "runtime/type_limit.h"
+#include "vec/core/types.h"
#include "vec/io/io_helper.h"
#include "vec/runtime/vdatetime_value.h"
@@ -38,17 +39,11 @@ namespace doris {
template <PrimitiveType primitive_type, class T>
std::string cast_to_string(T value, int scale) {
if constexpr (primitive_type == TYPE_DECIMAL32) {
- std::stringstream ss;
- vectorized::write_text<int32_t>((int32_t)value, scale, ss);
- return ss.str();
+ return ((vectorized::Decimal<int32_t>)value).to_string(scale);
} else if constexpr (primitive_type == TYPE_DECIMAL64) {
- std::stringstream ss;
- vectorized::write_text<int64_t>((int64_t)value, scale, ss);
- return ss.str();
+ return ((vectorized::Decimal<int64_t>)value).to_string(scale);
} else if constexpr (primitive_type == TYPE_DECIMAL128I) {
- std::stringstream ss;
- vectorized::write_text<int128_t>((int128_t)value, scale, ss);
- return ss.str();
+ return ((vectorized::Decimal<int128_t>)value).to_string(scale);
} else if constexpr (primitive_type == TYPE_TINYINT) {
return std::to_string(static_cast<int>(value));
} else if constexpr (primitive_type == TYPE_LARGEINT) {
diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h
index f7a0348d22..c0e2ef25aa 100644
--- a/be/src/exprs/runtime_filter.h
+++ b/be/src/exprs/runtime_filter.h
@@ -453,30 +453,24 @@ Status create_texpr_literal_node(const void* data,
TExprNode* node, int precisio
(*node).__set_decimal_literal(decimal_literal);
(*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMALV2,
precision, scale));
} else if constexpr (T == TYPE_DECIMAL32) {
- auto origin_value = reinterpret_cast<const int32_t*>(data);
+ auto origin_value = reinterpret_cast<const
vectorized::Decimal<int32_t>*>(data);
(*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
TDecimalLiteral decimal_literal;
- std::stringstream ss;
- vectorized::write_text<int32_t>(*origin_value, scale, ss);
- decimal_literal.__set_value(ss.str());
+ decimal_literal.__set_value(origin_value->to_string(scale));
(*node).__set_decimal_literal(decimal_literal);
(*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL32,
precision, scale));
} else if constexpr (T == TYPE_DECIMAL64) {
- auto origin_value = reinterpret_cast<const int64_t*>(data);
+ auto origin_value = reinterpret_cast<const
vectorized::Decimal<int64_t>*>(data);
(*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
TDecimalLiteral decimal_literal;
- std::stringstream ss;
- vectorized::write_text<int64_t>(*origin_value, scale, ss);
- decimal_literal.__set_value(ss.str());
+ decimal_literal.__set_value(origin_value->to_string(scale));
(*node).__set_decimal_literal(decimal_literal);
(*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL64,
precision, scale));
} else if constexpr (T == TYPE_DECIMAL128I) {
- auto origin_value = reinterpret_cast<const int128_t*>(data);
+ auto origin_value = reinterpret_cast<const
vectorized::Decimal<int128_t>*>(data);
(*node).__set_node_type(TExprNodeType::DECIMAL_LITERAL);
TDecimalLiteral decimal_literal;
- std::stringstream ss;
- vectorized::write_text<int128_t>(*origin_value, scale, ss);
- decimal_literal.__set_value(ss.str());
+ decimal_literal.__set_value(origin_value->to_string(scale));
(*node).__set_decimal_literal(decimal_literal);
(*node).__set_type(create_type_desc(PrimitiveType::TYPE_DECIMAL128I,
precision, scale));
} else if constexpr (T == TYPE_FLOAT) {
diff --git a/be/src/vec/core/types.h b/be/src/vec/core/types.h
index 7f0fefc0ad..d7eac97184 100644
--- a/be/src/vec/core/types.h
+++ b/be/src/vec/core/types.h
@@ -25,7 +25,9 @@
#include <string>
#include <vector>
+#include "common/consts.h"
#include "util/binary_cast.hpp"
+#include "vec/common/int_exp.h"
namespace doris {
@@ -266,6 +268,21 @@ using DateTimeV2 = UInt64;
struct Int128I {};
+template <typename T>
+inline T decimal_scale_multiplier(UInt32 scale);
+template <>
+inline Int32 decimal_scale_multiplier<Int32>(UInt32 scale) {
+ return common::exp10_i32(scale);
+}
+template <>
+inline Int64 decimal_scale_multiplier<Int64>(UInt32 scale) {
+ return common::exp10_i64(scale);
+}
+template <>
+inline Int128 decimal_scale_multiplier<Int128>(UInt32 scale) {
+ return common::exp10_i128(scale);
+}
+
/// Own FieldType for Decimal.
/// It is only a "storage" for decimal. To perform operations, you also have
to provide a scale (number of digits after point).
template <typename T>
@@ -325,6 +342,55 @@ struct Decimal {
return *this;
}
+ std::string to_string(UInt32 scale) const {
+ if (value == std::numeric_limits<T>::min()) {
+ fmt::memory_buffer buffer;
+ fmt::format_to(buffer, "{}", value);
+ std::string res {buffer.data(), buffer.size()};
+ res.insert(res.size() - scale, ".");
+ return res;
+ }
+
+ static constexpr auto precision =
+ std::is_same_v<T, Int32>
+ ? BeConsts::MAX_DECIMAL32_PRECISION
+ : (std::is_same_v<T, Int64> ?
BeConsts::MAX_DECIMAL64_PRECISION
+ :
BeConsts::MAX_DECIMAL128_PRECISION);
+ bool is_nagetive = value < 0;
+ int max_result_length = precision + (scale > 0) // Add a space for
decimal place
+ + (scale == precision) // Add a space for
leading 0
+ + (is_nagetive); // Add a space for
negative sign
+ std::string str = std::string(max_result_length, '0');
+
+ T abs_value = value;
+ int pos = 0;
+
+ if (is_nagetive) {
+ abs_value = -value;
+ str[pos++] = '-';
+ }
+
+ T whole_part = abs_value;
+ T frac_part;
+ if (scale) {
+ whole_part = abs_value / decimal_scale_multiplier<T>(scale);
+ frac_part = abs_value % decimal_scale_multiplier<T>(scale);
+ }
+ auto end = fmt::format_to(str.data() + pos, "{}", whole_part);
+ pos = end - str.data();
+
+ if (scale) {
+ str[pos++] = '.';
+ for (auto end_pos = pos + scale - 1; end_pos >= pos && frac_part >
0;
+ --end_pos, frac_part /= 10) {
+ str[end_pos] += frac_part % 10;
+ }
+ }
+
+ str.resize(pos + scale);
+ return str;
+ }
+
T value;
};
diff --git a/be/src/vec/data_types/data_type_decimal.cpp
b/be/src/vec/data_types/data_type_decimal.cpp
index 5d00e65033..afd0288154 100644
--- a/be/src/vec/data_types/data_type_decimal.cpp
+++ b/be/src/vec/data_types/data_type_decimal.cpp
@@ -47,9 +47,7 @@ template <typename T>
std::string DataTypeDecimal<T>::to_string(const IColumn& column, size_t
row_num) const {
T value = assert_cast<const
ColumnType&>(*column.convert_to_full_column_if_const().get())
.get_data()[row_num];
- std::ostringstream buf;
- write_text(value, scale, buf);
- return buf.str();
+ return value.to_string(scale);
}
template <typename T>
@@ -57,11 +55,8 @@ void DataTypeDecimal<T>::to_string(const IColumn& column,
size_t row_num,
BufferWritable& ostr) const {
// TODO: Reduce the copy in std::string mem to ostr, like DataTypeNumber
if constexpr (!IsDecimalV2<T>) {
- T value = assert_cast<const
ColumnType&>(*column.convert_to_full_column_if_const().get())
- .get_data()[row_num];
- std::ostringstream buf;
- write_text(value, scale, buf);
- std::string str = buf.str();
+ T value = assert_cast<const ColumnType&>(column).get_data()[row_num];
+ auto str = value.to_string(scale);
ostr.write(str.data(), str.size());
} else {
DecimalV2Value value = (DecimalV2Value)assert_cast<const ColumnType&>(
diff --git a/be/src/vec/exprs/vliteral.cpp b/be/src/vec/exprs/vliteral.cpp
index 782c8161b2..fe0170399e 100644
--- a/be/src/vec/exprs/vliteral.cpp
+++ b/be/src/vec/exprs/vliteral.cpp
@@ -267,18 +267,21 @@ std::string VLiteral::value() const {
break;
}
case TYPE_DECIMAL32: {
- write_text<int32_t>(*(reinterpret_cast<const
int32_t*>(ref.data)), _type.scale,
- out);
+ auto str =
+ reinterpret_cast<const
Decimal<int32_t>*>(ref.data)->to_string(_type.scale);
+ out << str;
break;
}
case TYPE_DECIMAL64: {
- write_text<int64_t>(*(reinterpret_cast<const
int64_t*>(ref.data)), _type.scale,
- out);
+ auto str =
+ reinterpret_cast<const
Decimal<int64_t>*>(ref.data)->to_string(_type.scale);
+ out << str;
break;
}
case TYPE_DECIMAL128I: {
- write_text<int128_t>(*(reinterpret_cast<const
int128_t*>(ref.data)), _type.scale,
- out);
+ auto str = reinterpret_cast<const
Decimal<int128_t>*>(ref.data)->to_string(
+ _type.scale);
+ out << str;
break;
}
default: {
diff --git a/be/src/vec/functions/function_cast.h
b/be/src/vec/functions/function_cast.h
index 59f58be84c..0fecd16f20 100644
--- a/be/src/vec/functions/function_cast.h
+++ b/be/src/vec/functions/function_cast.h
@@ -324,6 +324,7 @@ struct ConvertImplGenericToString {
size_t size = col_from.size();
auto col_to = ColumnString::create();
+ col_to->reserve(size * 2);
VectorBufferWriter write_buffer(*col_to.get());
for (size_t i = 0; i < size; ++i) {
type.to_string(col_from, i, write_buffer);
diff --git a/be/src/vec/io/io_helper.h b/be/src/vec/io/io_helper.h
index 5c848188c5..457ae26d50 100644
--- a/be/src/vec/io/io_helper.h
+++ b/be/src/vec/io/io_helper.h
@@ -42,21 +42,6 @@ static constexpr size_t DEFAULT_MAX_STRING_SIZE =
1073741824; // 1GB
static constexpr size_t DEFAULT_MAX_JSON_SIZE = 1073741824; // 1GB
static constexpr auto WRITE_HELPERS_MAX_INT_WIDTH = 40U;
-template <typename T>
-inline T decimal_scale_multiplier(UInt32 scale);
-template <>
-inline Int32 decimal_scale_multiplier<Int32>(UInt32 scale) {
- return common::exp10_i32(scale);
-}
-template <>
-inline Int64 decimal_scale_multiplier<Int64>(UInt32 scale) {
- return common::exp10_i64(scale);
-}
-template <>
-inline Int128 decimal_scale_multiplier<Int128>(UInt32 scale) {
- return common::exp10_i128(scale);
-}
-
inline std::string int128_to_string(__int128_t value) {
fmt::memory_buffer buffer;
fmt::format_to(buffer, "{}", value);
diff --git a/be/src/vec/utils/histogram_helpers.hpp
b/be/src/vec/utils/histogram_helpers.hpp
index 206948d733..b6ab1f5547 100644
--- a/be/src/vec/utils/histogram_helpers.hpp
+++ b/be/src/vec/utils/histogram_helpers.hpp
@@ -163,7 +163,13 @@ bool value_to_bucket(std::vector<Bucket<T>>& buckets, T v,
size_t num_per_bucket
template <typename T>
bool value_to_string(std::stringstream& ss, T input, const DataTypePtr&
data_type) {
- fmt::memory_buffer _insert_stmt_buffer;
+ if constexpr (std::is_same_v<T, Decimal32> || std::is_same_v<T, Decimal64>
||
+ std::is_same_v<T, Decimal128> || std::is_same_v<T,
Decimal128I>) {
+ auto scale = get_decimal_scale(*data_type);
+ ss << input.to_string(scale);
+ return true;
+ }
+
switch (data_type->get_type_id()) {
case TypeIndex::Int8:
case TypeIndex::UInt8:
@@ -183,25 +189,6 @@ bool value_to_string(std::stringstream& ss, T input, const
DataTypePtr& data_typ
ss << std::string(buffer.data(), buffer.size());
break;
}
- case TypeIndex::Decimal32: {
- auto scale = get_decimal_scale(*data_type);
- auto decimal_val = reinterpret_cast<const Decimal32*>(&input);
- write_text(*decimal_val, scale, ss);
- break;
- }
- case TypeIndex::Decimal64: {
- auto scale = get_decimal_scale(*data_type);
- auto decimal_val = reinterpret_cast<const Decimal64*>(&input);
- write_text(*decimal_val, scale, ss);
- break;
- }
- case TypeIndex::Decimal128:
- case TypeIndex::Decimal128I: {
- auto scale = get_decimal_scale(*data_type);
- auto decimal_val = reinterpret_cast<const Decimal128*>(&input);
- write_text(*decimal_val, scale, ss);
- break;
- }
case TypeIndex::Date:
case TypeIndex::DateTime: {
auto* date_int = reinterpret_cast<Int64*>(&input);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]