This is an automated email from the ASF dual-hosted git repository.
dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 3163841a3a [FIX](serde)Fix decimal for arrow serde (#21716)
3163841a3a is described below
commit 3163841a3aad40d9a520d8fecc6a35884fce526a
Author: amory <[email protected]>
AuthorDate: Wed Jul 12 19:15:48 2023 +0800
[FIX](serde)Fix decimal for arrow serde (#21716)
---
.../data_types/serde/data_type_decimal_serde.cpp | 26 ++++----
.../serde/data_type_serde_arrow_test.cpp | 78 +++++++++++++++++++++-
2 files changed, 88 insertions(+), 16 deletions(-)
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index 4137dd885c..5e46c996e7 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -80,10 +80,8 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const
IColumn& column, const
array_builder->type()->name());
continue;
}
- const auto& data_ref = col.get_data_at(i);
- const int32_t* p_value = reinterpret_cast<const
int32_t*>(data_ref.data);
- int64_t high = *p_value > 0 ? 0 : 1UL << 63;
- arrow::Decimal128 value(high, *p_value > 0 ? *p_value : -*p_value);
+ Int128 p_value = Int128(col.get_element(i));
+ arrow::Decimal128 value(reinterpret_cast<const
uint8_t*>(&p_value));
checkArrowStatus(builder.Append(value), column.get_name(),
array_builder->type()->name());
}
@@ -96,10 +94,8 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const
IColumn& column, const
array_builder->type()->name());
continue;
}
- const auto& data_ref = col.get_data_at(i);
- const int64_t* p_value = reinterpret_cast<const
int64_t*>(data_ref.data);
- int64_t high = *p_value > 0 ? 0 : 1UL << 63;
- arrow::Decimal128 value(high, *p_value > 0 ? *p_value : -*p_value);
+ Int128 p_value = Int128(col.get_element(i));
+ arrow::Decimal128 value(reinterpret_cast<const
uint8_t*>(&p_value));
checkArrowStatus(builder.Append(value), column.get_name(),
array_builder->type()->name());
}
@@ -112,13 +108,13 @@ template <typename T>
void DataTypeDecimalSerDe<T>::read_column_from_arrow(IColumn& column,
const arrow::Array*
arrow_array, int start,
int end, const
cctz::time_zone& ctz) const {
+ auto concrete_array = down_cast<const arrow::DecimalArray*>(arrow_array);
+ const auto* arrow_decimal_type =
+ static_cast<const arrow::DecimalType*>(arrow_array->type().get());
+ const auto arrow_scale = arrow_decimal_type->scale();
+ auto& column_data = static_cast<ColumnDecimal<T>&>(column).get_data();
if constexpr (std::is_same_v<T, Decimal<Int128>>) {
- auto& column_data =
static_cast<ColumnDecimal<vectorized::Decimal128>&>(column).get_data();
- auto concrete_array = down_cast<const
arrow::DecimalArray*>(arrow_array);
- const auto* arrow_decimal_type =
- static_cast<const
arrow::DecimalType*>(arrow_array->type().get());
// TODO check precision
- const auto arrow_scale = arrow_decimal_type->scale();
for (size_t value_i = start; value_i < end; ++value_i) {
auto value = *reinterpret_cast<const vectorized::Decimal128*>(
concrete_array->Value(value_i));
@@ -140,6 +136,10 @@ void
DataTypeDecimalSerDe<T>::read_column_from_arrow(IColumn& column,
}
column_data.emplace_back(value);
}
+ } else if constexpr (std::is_same_v<T, Decimal64> || std::is_same_v<T,
Decimal32>) {
+ for (size_t value_i = start; value_i < end; ++value_i) {
+ column_data.emplace_back(*reinterpret_cast<const
T*>(concrete_array->Value(value_i)));
+ }
} else {
LOG(FATAL) << "Not support read " << column.get_name() << " from
arrow";
}
diff --git a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
index a51cb9bc90..7792d40839 100644
--- a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
@@ -51,6 +51,7 @@
#include "util/arrow/row_batch.h"
#include "util/bitmap_value.h"
#include "util/quantile_state.h"
+#include "util/string_parser.hpp"
#include "vec/columns/column.h"
#include "vec/columns/column_array.h"
#include "vec/columns/column_complex.h"
@@ -77,6 +78,7 @@
#include "vec/data_types/data_type_time_v2.h"
#include "vec/runtime/vdatetime_value.h"
#include "vec/utils/arrow_column_to_doris_column.h"
+
namespace doris::vectorized {
template <bool is_scalar>
@@ -84,12 +86,16 @@ void serialize_and_deserialize_arrow_test() {
vectorized::Block block;
std::vector<std::tuple<std::string, FieldType, int, PrimitiveType, bool>>
cols;
if constexpr (is_scalar) {
- cols = {{"k1", FieldType::OLAP_FIELD_TYPE_INT, 1, TYPE_INT, false},
+ cols = {
+ {"k1", FieldType::OLAP_FIELD_TYPE_INT, 1, TYPE_INT, false},
{"k7", FieldType::OLAP_FIELD_TYPE_INT, 7, TYPE_INT, true},
{"k2", FieldType::OLAP_FIELD_TYPE_STRING, 2, TYPE_STRING,
false},
{"k3", FieldType::OLAP_FIELD_TYPE_DECIMAL128I, 3,
TYPE_DECIMAL128I, false},
{"k11", FieldType::OLAP_FIELD_TYPE_DATETIME, 11,
TYPE_DATETIME, false},
- {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN,
false}};
+ {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN,
false},
+ {"k5", FieldType::OLAP_FIELD_TYPE_DECIMAL32, 5,
TYPE_DECIMAL32, false},
+ {"k6", FieldType::OLAP_FIELD_TYPE_DECIMAL64, 6,
TYPE_DECIMAL64, false},
+ };
} else {
cols = {{"a", FieldType::OLAP_FIELD_TYPE_ARRAY, 6, TYPE_ARRAY, true},
{"m", FieldType::OLAP_FIELD_TYPE_MAP, 8, TYPE_MAP, true},
@@ -153,6 +159,72 @@ void serialize_and_deserialize_arrow_test() {
block.insert(std::move(type_and_name));
}
break;
+ case TYPE_DECIMAL32:
+ type_desc.precision = 9;
+ type_desc.scale = 2;
+ tslot.__set_slotType(type_desc.to_thrift());
+ {
+ vectorized::DataTypePtr decimal_data_type =
+
std::make_shared<DataTypeDecimal<Decimal32>>(type_desc.precision,
+
type_desc.scale);
+ auto decimal_column = decimal_data_type->create_column();
+ auto& data =
((vectorized::ColumnDecimal<vectorized::Decimal<vectorized::Int32>>*)
+ decimal_column.get())
+ ->get_data();
+ for (int i = 0; i < row_num; ++i) {
+ if (i == 0) {
+ data.push_back(Int32(0));
+ continue;
+ }
+ Int32 val;
+ StringParser::ParseResult result =
StringParser::PARSE_SUCCESS;
+ i % 2 == 0 ? val =
StringParser::string_to_decimal<__int128>(
+ "1234567.56", 11,
type_desc.precision, type_desc.scale,
+ &result)
+ : val =
StringParser::string_to_decimal<__int128>(
+ "-1234567.56", 12,
type_desc.precision, type_desc.scale,
+ &result);
+ EXPECT_TRUE(result == StringParser::PARSE_SUCCESS);
+ data.push_back(val);
+ }
+
+ vectorized::ColumnWithTypeAndName
type_and_name(decimal_column->get_ptr(),
+
decimal_data_type, col_name);
+ block.insert(type_and_name);
+ }
+ break;
+ case TYPE_DECIMAL64:
+ type_desc.precision = 18;
+ type_desc.scale = 6;
+ tslot.__set_slotType(type_desc.to_thrift());
+ {
+ vectorized::DataTypePtr decimal_data_type =
+
std::make_shared<DataTypeDecimal<Decimal64>>(type_desc.precision,
+
type_desc.scale);
+ auto decimal_column = decimal_data_type->create_column();
+ auto& data =
((vectorized::ColumnDecimal<vectorized::Decimal<vectorized::Int64>>*)
+ decimal_column.get())
+ ->get_data();
+ for (int i = 0; i < row_num; ++i) {
+ if (i == 0) {
+ data.push_back(Int64(0));
+ continue;
+ }
+ Int64 val;
+ StringParser::ParseResult result =
StringParser::PARSE_SUCCESS;
+ std::string decimal_string =
+ i % 2 == 0 ? "-123456789012.123456" :
"123456789012.123456";
+ val = StringParser::string_to_decimal<__int128>(
+ decimal_string.c_str(), decimal_string.size(),
type_desc.precision,
+ type_desc.scale, &result);
+ EXPECT_TRUE(result == StringParser::PARSE_SUCCESS);
+ data.push_back(val);
+ }
+ vectorized::ColumnWithTypeAndName
type_and_name(decimal_column->get_ptr(),
+
decimal_data_type, col_name);
+ block.insert(type_and_name);
+ }
+ break;
case TYPE_DECIMAL128I:
type_desc.precision = 27;
type_desc.scale = 9;
@@ -362,7 +434,7 @@ void serialize_and_deserialize_arrow_test() {
// serialize
std::shared_ptr<arrow::RecordBatch> result;
- std::cout << "block structure: " << block.dump_structure() << std::endl;
+ std::cout << "block data: " << block.dump_data(0, row_num) << std::endl;
std::cout << "_arrow_schema: " << _arrow_schema->ToString(true) <<
std::endl;
convert_to_arrow_batch(block, _arrow_schema, arrow::default_memory_pool(),
&result);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]