This is an automated email from the ASF dual-hosted git repository.

dataroaring pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 3163841a3a [FIX](serde)Fix decimal for arrow serde (#21716)
3163841a3a is described below

commit 3163841a3aad40d9a520d8fecc6a35884fce526a
Author: amory <[email protected]>
AuthorDate: Wed Jul 12 19:15:48 2023 +0800

    [FIX](serde)Fix decimal for arrow serde (#21716)
---
 .../data_types/serde/data_type_decimal_serde.cpp   | 26 ++++----
 .../serde/data_type_serde_arrow_test.cpp           | 78 +++++++++++++++++++++-
 2 files changed, 88 insertions(+), 16 deletions(-)

diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp 
b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index 4137dd885c..5e46c996e7 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -80,10 +80,8 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const 
IColumn& column, const
                                  array_builder->type()->name());
                 continue;
             }
-            const auto& data_ref = col.get_data_at(i);
-            const int32_t* p_value = reinterpret_cast<const 
int32_t*>(data_ref.data);
-            int64_t high = *p_value > 0 ? 0 : 1UL << 63;
-            arrow::Decimal128 value(high, *p_value > 0 ? *p_value : -*p_value);
+            Int128 p_value = Int128(col.get_element(i));
+            arrow::Decimal128 value(reinterpret_cast<const 
uint8_t*>(&p_value));
             checkArrowStatus(builder.Append(value), column.get_name(),
                              array_builder->type()->name());
         }
@@ -96,10 +94,8 @@ void DataTypeDecimalSerDe<T>::write_column_to_arrow(const 
IColumn& column, const
                                  array_builder->type()->name());
                 continue;
             }
-            const auto& data_ref = col.get_data_at(i);
-            const int64_t* p_value = reinterpret_cast<const 
int64_t*>(data_ref.data);
-            int64_t high = *p_value > 0 ? 0 : 1UL << 63;
-            arrow::Decimal128 value(high, *p_value > 0 ? *p_value : -*p_value);
+            Int128 p_value = Int128(col.get_element(i));
+            arrow::Decimal128 value(reinterpret_cast<const 
uint8_t*>(&p_value));
             checkArrowStatus(builder.Append(value), column.get_name(),
                              array_builder->type()->name());
         }
@@ -112,13 +108,13 @@ template <typename T>
 void DataTypeDecimalSerDe<T>::read_column_from_arrow(IColumn& column,
                                                      const arrow::Array* 
arrow_array, int start,
                                                      int end, const 
cctz::time_zone& ctz) const {
+    auto concrete_array = down_cast<const arrow::DecimalArray*>(arrow_array);
+    const auto* arrow_decimal_type =
+            static_cast<const arrow::DecimalType*>(arrow_array->type().get());
+    const auto arrow_scale = arrow_decimal_type->scale();
+    auto& column_data = static_cast<ColumnDecimal<T>&>(column).get_data();
     if constexpr (std::is_same_v<T, Decimal<Int128>>) {
-        auto& column_data = 
static_cast<ColumnDecimal<vectorized::Decimal128>&>(column).get_data();
-        auto concrete_array = down_cast<const 
arrow::DecimalArray*>(arrow_array);
-        const auto* arrow_decimal_type =
-                static_cast<const 
arrow::DecimalType*>(arrow_array->type().get());
         // TODO check precision
-        const auto arrow_scale = arrow_decimal_type->scale();
         for (size_t value_i = start; value_i < end; ++value_i) {
             auto value = *reinterpret_cast<const vectorized::Decimal128*>(
                     concrete_array->Value(value_i));
@@ -140,6 +136,10 @@ void 
DataTypeDecimalSerDe<T>::read_column_from_arrow(IColumn& column,
             }
             column_data.emplace_back(value);
         }
+    } else if constexpr (std::is_same_v<T, Decimal64> || std::is_same_v<T, 
Decimal32>) {
+        for (size_t value_i = start; value_i < end; ++value_i) {
+            column_data.emplace_back(*reinterpret_cast<const 
T*>(concrete_array->Value(value_i)));
+        }
     } else {
         LOG(FATAL) << "Not support read " << column.get_name() << " from 
arrow";
     }
diff --git a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp 
b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
index a51cb9bc90..7792d40839 100644
--- a/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
+++ b/be/test/vec/data_types/serde/data_type_serde_arrow_test.cpp
@@ -51,6 +51,7 @@
 #include "util/arrow/row_batch.h"
 #include "util/bitmap_value.h"
 #include "util/quantile_state.h"
+#include "util/string_parser.hpp"
 #include "vec/columns/column.h"
 #include "vec/columns/column_array.h"
 #include "vec/columns/column_complex.h"
@@ -77,6 +78,7 @@
 #include "vec/data_types/data_type_time_v2.h"
 #include "vec/runtime/vdatetime_value.h"
 #include "vec/utils/arrow_column_to_doris_column.h"
+
 namespace doris::vectorized {
 
 template <bool is_scalar>
@@ -84,12 +86,16 @@ void serialize_and_deserialize_arrow_test() {
     vectorized::Block block;
     std::vector<std::tuple<std::string, FieldType, int, PrimitiveType, bool>> 
cols;
     if constexpr (is_scalar) {
-        cols = {{"k1", FieldType::OLAP_FIELD_TYPE_INT, 1, TYPE_INT, false},
+        cols = {
+                {"k1", FieldType::OLAP_FIELD_TYPE_INT, 1, TYPE_INT, false},
                 {"k7", FieldType::OLAP_FIELD_TYPE_INT, 7, TYPE_INT, true},
                 {"k2", FieldType::OLAP_FIELD_TYPE_STRING, 2, TYPE_STRING, 
false},
                 {"k3", FieldType::OLAP_FIELD_TYPE_DECIMAL128I, 3, 
TYPE_DECIMAL128I, false},
                 {"k11", FieldType::OLAP_FIELD_TYPE_DATETIME, 11, 
TYPE_DATETIME, false},
-                {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, 
false}};
+                {"k4", FieldType::OLAP_FIELD_TYPE_BOOL, 4, TYPE_BOOLEAN, 
false},
+                {"k5", FieldType::OLAP_FIELD_TYPE_DECIMAL32, 5, 
TYPE_DECIMAL32, false},
+                {"k6", FieldType::OLAP_FIELD_TYPE_DECIMAL64, 6, 
TYPE_DECIMAL64, false},
+        };
     } else {
         cols = {{"a", FieldType::OLAP_FIELD_TYPE_ARRAY, 6, TYPE_ARRAY, true},
                 {"m", FieldType::OLAP_FIELD_TYPE_MAP, 8, TYPE_MAP, true},
@@ -153,6 +159,72 @@ void serialize_and_deserialize_arrow_test() {
                 block.insert(std::move(type_and_name));
             }
             break;
+        case TYPE_DECIMAL32:
+            type_desc.precision = 9;
+            type_desc.scale = 2;
+            tslot.__set_slotType(type_desc.to_thrift());
+            {
+                vectorized::DataTypePtr decimal_data_type =
+                        
std::make_shared<DataTypeDecimal<Decimal32>>(type_desc.precision,
+                                                                     
type_desc.scale);
+                auto decimal_column = decimal_data_type->create_column();
+                auto& data = 
((vectorized::ColumnDecimal<vectorized::Decimal<vectorized::Int32>>*)
+                                      decimal_column.get())
+                                     ->get_data();
+                for (int i = 0; i < row_num; ++i) {
+                    if (i == 0) {
+                        data.push_back(Int32(0));
+                        continue;
+                    }
+                    Int32 val;
+                    StringParser::ParseResult result = 
StringParser::PARSE_SUCCESS;
+                    i % 2 == 0 ? val = 
StringParser::string_to_decimal<__int128>(
+                                         "1234567.56", 11, 
type_desc.precision, type_desc.scale,
+                                         &result)
+                               : val = 
StringParser::string_to_decimal<__int128>(
+                                         "-1234567.56", 12, 
type_desc.precision, type_desc.scale,
+                                         &result);
+                    EXPECT_TRUE(result == StringParser::PARSE_SUCCESS);
+                    data.push_back(val);
+                }
+
+                vectorized::ColumnWithTypeAndName 
type_and_name(decimal_column->get_ptr(),
+                                                                
decimal_data_type, col_name);
+                block.insert(type_and_name);
+            }
+            break;
+        case TYPE_DECIMAL64:
+            type_desc.precision = 18;
+            type_desc.scale = 6;
+            tslot.__set_slotType(type_desc.to_thrift());
+            {
+                vectorized::DataTypePtr decimal_data_type =
+                        
std::make_shared<DataTypeDecimal<Decimal64>>(type_desc.precision,
+                                                                     
type_desc.scale);
+                auto decimal_column = decimal_data_type->create_column();
+                auto& data = 
((vectorized::ColumnDecimal<vectorized::Decimal<vectorized::Int64>>*)
+                                      decimal_column.get())
+                                     ->get_data();
+                for (int i = 0; i < row_num; ++i) {
+                    if (i == 0) {
+                        data.push_back(Int64(0));
+                        continue;
+                    }
+                    Int64 val;
+                    StringParser::ParseResult result = 
StringParser::PARSE_SUCCESS;
+                    std::string decimal_string =
+                            i % 2 == 0 ? "-123456789012.123456" : 
"123456789012.123456";
+                    val = StringParser::string_to_decimal<__int128>(
+                            decimal_string.c_str(), decimal_string.size(), 
type_desc.precision,
+                            type_desc.scale, &result);
+                    EXPECT_TRUE(result == StringParser::PARSE_SUCCESS);
+                    data.push_back(val);
+                }
+                vectorized::ColumnWithTypeAndName 
type_and_name(decimal_column->get_ptr(),
+                                                                
decimal_data_type, col_name);
+                block.insert(type_and_name);
+            }
+            break;
         case TYPE_DECIMAL128I:
             type_desc.precision = 27;
             type_desc.scale = 9;
@@ -362,7 +434,7 @@ void serialize_and_deserialize_arrow_test() {
 
     // serialize
     std::shared_ptr<arrow::RecordBatch> result;
-    std::cout << "block structure: " << block.dump_structure() << std::endl;
+    std::cout << "block data: " << block.dump_data(0, row_num) << std::endl;
     std::cout << "_arrow_schema: " << _arrow_schema->ToString(true) << 
std::endl;
 
     convert_to_arrow_batch(block, _arrow_schema, arrow::default_memory_pool(), 
&result);


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to