This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch revert-37530-pick_21_opt_fill_partition
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 5ff2703f3abf1de0fa528a6aef8e62647eb13988
Author: Mingyu Chen <[email protected]>
AuthorDate: Wed Jul 17 17:19:37 2024 +0800

    Revert "[opt](serde)Optimize the filling of fixed values ​​into block 
columns…"
    
    This reverts commit 6932eef65e54d2ae060b4973da3aec0d919d7154.
---
 .../serde/data_type_datetimev2_serde.cpp           | 21 -----------------
 .../data_types/serde/data_type_datetimev2_serde.h  |  5 -----
 .../data_types/serde/data_type_datev2_serde.cpp    | 21 -----------------
 .../vec/data_types/serde/data_type_datev2_serde.h  |  6 -----
 .../data_types/serde/data_type_decimal_serde.cpp   | 26 ----------------------
 .../vec/data_types/serde/data_type_decimal_serde.h |  6 -----
 .../data_types/serde/data_type_nullable_serde.cpp  | 20 -----------------
 .../data_types/serde/data_type_nullable_serde.h    |  3 ---
 .../data_types/serde/data_type_number_serde.cpp    | 22 ------------------
 .../vec/data_types/serde/data_type_number_serde.h  |  6 -----
 be/src/vec/data_types/serde/data_type_serde.h      | 21 -----------------
 .../vec/data_types/serde/data_type_string_serde.h  | 25 ---------------------
 be/src/vec/exec/format/orc/vorc_reader.cpp         |  9 +++++---
 .../exec/format/parquet/vparquet_group_reader.cpp  |  9 +++++---
 be/src/vec/exec/scan/vfile_scanner.cpp             |  9 +++++---
 15 files changed, 18 insertions(+), 191 deletions(-)

diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp 
b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
index 850ac5766fc..63a199199a0 100644
--- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp
@@ -247,25 +247,4 @@ Status DataTypeDateTimeV2SerDe::write_column_to_orc(const 
std::string& timezone,
     return Status::OK();
 }
 
-Status DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json(
-        IColumn& column, Slice& slice, int rows, int* num_deserialized,
-        const FormatOptions& options) const {
-    Status st = deserialize_one_cell_from_json(column, slice, options);
-    if (!st.ok()) {
-        return st;
-    }
-
-    DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(column, 
rows - 1);
-    *num_deserialized = rows;
-    return Status::OK();
-}
-
-void DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(IColumn& 
column,
-                                                                      int 
times) const {
-    auto& col = static_cast<ColumnVector<UInt64>&>(column);
-    auto sz = col.size();
-    UInt64 val = col.get_element(sz - 1);
-    col.insert_many_vals(val, times);
-}
-
 } // namespace doris::vectorized
diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h 
b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h
index ef4aa6843a0..00b05f5fcd6 100644
--- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.h
+++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.h
@@ -77,11 +77,6 @@ public:
                                int start, int end,
                                std::vector<StringRef>& buffer_list) const 
override;
 
-    Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, 
int rows,
-                                              int* num_deserialized,
-                                              const FormatOptions& options) 
const override;
-    void insert_column_last_value_multiple_times(IColumn& column, int times) 
const override;
-
 private:
     template <bool is_binary_format>
     Status _write_column_to_mysql(const IColumn& column, 
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp 
b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
index f2d595b87c4..eb9122dd240 100644
--- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp
@@ -175,26 +175,5 @@ Status DataTypeDateV2SerDe::write_column_to_orc(const 
std::string& timezone, con
     return Status::OK();
 }
 
-Status DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& 
column, Slice& slice,
-                                                               int rows, int* 
num_deserialized,
-                                                               const 
FormatOptions& options) const {
-    Status st = deserialize_one_cell_from_json(column, slice, options);
-    if (!st.ok()) {
-        return st;
-    }
-    DataTypeDateV2SerDe::insert_column_last_value_multiple_times(column, rows 
- 1);
-    *num_deserialized = rows;
-    return Status::OK();
-}
-
-void DataTypeDateV2SerDe::insert_column_last_value_multiple_times(IColumn& 
column,
-                                                                  int times) 
const {
-    auto& col = static_cast<ColumnVector<UInt32>&>(column);
-    auto sz = col.size();
-    UInt32 val = col.get_element(sz - 1);
-
-    col.insert_many_vals(val, times);
-}
-
 } // namespace vectorized
 } // namespace doris
diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.h 
b/be/src/vec/data_types/serde/data_type_datev2_serde.h
index 52e4cec364e..9a8b050eeba 100644
--- a/be/src/vec/data_types/serde/data_type_datev2_serde.h
+++ b/be/src/vec/data_types/serde/data_type_datev2_serde.h
@@ -74,12 +74,6 @@ public:
                                int start, int end,
                                std::vector<StringRef>& buffer_list) const 
override;
 
-    Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, 
int rows,
-                                              int* num_deserialized,
-                                              const FormatOptions& options) 
const override;
-
-    void insert_column_last_value_multiple_times(IColumn& column, int times) 
const override;
-
 private:
     template <bool is_binary_format>
     Status _write_column_to_mysql(const IColumn& column, 
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp 
b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
index e979211d6d7..a59fdedbfe6 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp
@@ -275,32 +275,6 @@ Status DataTypeDecimalSerDe<T>::write_column_to_orc(const 
std::string& timezone,
     }
     return Status::OK();
 }
-template <typename T>
-
-Status DataTypeDecimalSerDe<T>::deserialize_column_from_fixed_json(
-        IColumn& column, Slice& slice, int rows, int* num_deserialized,
-        const FormatOptions& options) const {
-    Status st = deserialize_one_cell_from_json(column, slice, options);
-    if (!st.ok()) {
-        return st;
-    }
-
-    DataTypeDecimalSerDe::insert_column_last_value_multiple_times(column, rows 
- 1);
-    *num_deserialized = rows;
-    return Status::OK();
-}
-
-template <typename T>
-void DataTypeDecimalSerDe<T>::insert_column_last_value_multiple_times(IColumn& 
column,
-                                                                      int 
times) const {
-    auto& col = static_cast<ColumnDecimal<T>&>(column);
-    auto sz = col.size();
-
-    T val = col.get_element(sz - 1);
-    for (int i = 0; i < times; i++) {
-        col.insert_value(val);
-    }
-}
 
 template class DataTypeDecimalSerDe<Decimal32>;
 template class DataTypeDecimalSerDe<Decimal64>;
diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.h 
b/be/src/vec/data_types/serde/data_type_decimal_serde.h
index 484c6686bc5..55e68699f01 100644
--- a/be/src/vec/data_types/serde/data_type_decimal_serde.h
+++ b/be/src/vec/data_types/serde/data_type_decimal_serde.h
@@ -114,12 +114,6 @@ public:
                                int start, int end,
                                std::vector<StringRef>& buffer_list) const 
override;
 
-    Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, 
int rows,
-                                              int* num_deserialized,
-                                              const FormatOptions& options) 
const override;
-
-    void insert_column_last_value_multiple_times(IColumn& column, int times) 
const override;
-
 private:
     template <bool is_binary_format>
     Status _write_column_to_mysql(const IColumn& column, 
MysqlRowBuffer<is_binary_format>& result,
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp 
b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
index 98ff1eb7f81..faa3c8eb1f4 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp
@@ -127,26 +127,6 @@ Status 
DataTypeNullableSerDe::deserialize_column_from_hive_text_vector(
     return Status::OK();
 }
 
-Status DataTypeNullableSerDe::deserialize_column_from_fixed_json(
-        IColumn& column, Slice& slice, int rows, int* num_deserialized,
-        const FormatOptions& options) const {
-    auto& col = static_cast<ColumnNullable&>(column);
-    Status st = deserialize_one_cell_from_json(column, slice, options);
-    if (!st.ok()) {
-        return st;
-    }
-    auto& null_map = col.get_null_map_data();
-    auto& nested_column = col.get_nested_column();
-
-    null_map.resize_fill(
-            rows, null_map.back()); // 
data_type_nullable::insert_column_last_value_multiple_times()
-    if (rows - 1 != 0) {
-        nested_serde->insert_column_last_value_multiple_times(nested_column, 
rows - 1);
-    }
-    *num_deserialized = rows;
-    return Status::OK();
-}
-
 Status DataTypeNullableSerDe::deserialize_one_cell_from_json(IColumn& column, 
Slice& slice,
                                                              const 
FormatOptions& options) const {
     auto& null_column = assert_cast<ColumnNullable&>(column);
diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.h 
b/be/src/vec/data_types/serde/data_type_nullable_serde.h
index 7b4841dcbdf..09d2fbde409 100644
--- a/be/src/vec/data_types/serde/data_type_nullable_serde.h
+++ b/be/src/vec/data_types/serde/data_type_nullable_serde.h
@@ -47,9 +47,6 @@ public:
                                                int* num_deserialized,
                                                const FormatOptions& options) 
const override;
 
-    Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, 
int rows,
-                                              int* num_deserialized,
-                                              const FormatOptions& options) 
const override;
     Status deserialize_one_cell_from_hive_text(
             IColumn& column, Slice& slice, const FormatOptions& options,
             int hive_text_complex_type_delimiter_level = 1) const override;
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp 
b/be/src/vec/data_types/serde/data_type_number_serde.cpp
index 299779ea267..0ba338ce399 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.cpp
+++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp
@@ -224,28 +224,6 @@ void 
DataTypeNumberSerDe<T>::read_column_from_arrow(IColumn& column,
     const auto* raw_data = reinterpret_cast<const T*>(buffer->data()) + start;
     col_data.insert(raw_data, raw_data + row_count);
 }
-template <typename T>
-Status DataTypeNumberSerDe<T>::deserialize_column_from_fixed_json(
-        IColumn& column, Slice& slice, int rows, int* num_deserialized,
-        const FormatOptions& options) const {
-    Status st = deserialize_one_cell_from_json(column, slice, options);
-    if (!st.ok()) {
-        return st;
-    }
-
-    DataTypeNumberSerDe::insert_column_last_value_multiple_times(column, rows 
- 1);
-    *num_deserialized = rows;
-    return Status::OK();
-}
-
-template <typename T>
-void DataTypeNumberSerDe<T>::insert_column_last_value_multiple_times(IColumn& 
column,
-                                                                     int 
times) const {
-    auto& col = static_cast<ColumnVector<T>&>(column);
-    auto sz = col.size();
-    T val = col.get_element(sz - 1);
-    col.insert_many_vals(val, times);
-}
 
 template <typename T>
 template <bool is_binary_format>
diff --git a/be/src/vec/data_types/serde/data_type_number_serde.h 
b/be/src/vec/data_types/serde/data_type_number_serde.h
index 18ba2fb26c7..c66bc994605 100644
--- a/be/src/vec/data_types/serde/data_type_number_serde.h
+++ b/be/src/vec/data_types/serde/data_type_number_serde.h
@@ -70,12 +70,6 @@ public:
                                                int* num_deserialized,
                                                const FormatOptions& options) 
const override;
 
-    Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, 
int rows,
-                                              int* num_deserialized,
-                                              const FormatOptions& options) 
const override;
-
-    void insert_column_last_value_multiple_times(IColumn& column, int times) 
const override;
-
     Status write_column_to_pb(const IColumn& column, PValues& result, int 
start,
                               int end) const override;
     Status read_column_from_pb(IColumn& column, const PValues& arg) const 
override;
diff --git a/be/src/vec/data_types/serde/data_type_serde.h 
b/be/src/vec/data_types/serde/data_type_serde.h
index 1f6e24aef3f..77663e1d43a 100644
--- a/be/src/vec/data_types/serde/data_type_serde.h
+++ b/be/src/vec/data_types/serde/data_type_serde.h
@@ -234,27 +234,6 @@ public:
     virtual Status deserialize_column_from_json_vector(IColumn& column, 
std::vector<Slice>& slices,
                                                        int* num_deserialized,
                                                        const FormatOptions& 
options) const = 0;
-    // deserialize fixed values.Repeatedly insert the value row times into the 
column.
-    virtual Status deserialize_column_from_fixed_json(IColumn& column, Slice& 
slice, int rows,
-                                                      int* num_deserialized,
-                                                      const FormatOptions& 
options) const {
-        Status st = deserialize_one_cell_from_json(column, slice, options);
-        if (!st.ok()) {
-            *num_deserialized = 0;
-            return st;
-        }
-        insert_column_last_value_multiple_times(column, rows - 1);
-        *num_deserialized = rows;
-        return Status::OK();
-    }
-    // Insert the last value to the end of this column multiple times.
-    virtual void insert_column_last_value_multiple_times(IColumn& column, int 
times) const {
-        //If you try to simplify this operation by using 
`column.insert_many_from(column, column.size() - 1, rows - 1);`
-        // you are likely to get incorrect data results.
-        MutableColumnPtr dum_col = column.clone_empty();
-        dum_col->insert_from(column, column.size() - 1);
-        column.insert_many_from(*dum_col.get(), 0, times);
-    }
 
     virtual Status deserialize_one_cell_from_hive_text(
             IColumn& column, Slice& slice, const FormatOptions& options,
diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h 
b/be/src/vec/data_types/serde/data_type_string_serde.h
index 0f0f1d0dfe8..b74b5857086 100644
--- a/be/src/vec/data_types/serde/data_type_string_serde.h
+++ b/be/src/vec/data_types/serde/data_type_string_serde.h
@@ -132,31 +132,6 @@ public:
         }
         return Status::OK();
     }
-
-    Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, 
int rows,
-                                              int* num_deserialized,
-                                              const FormatOptions& options) 
const override {
-        Status st = deserialize_one_cell_from_json(column, slice, options);
-        if (!st.ok()) {
-            return st;
-        }
-
-        
DataTypeStringSerDeBase::insert_column_last_value_multiple_times(column, rows - 
1);
-        *num_deserialized = rows;
-        return Status::OK();
-    }
-
-    void insert_column_last_value_multiple_times(IColumn& column, int times) 
const override {
-        auto& col = static_cast<ColumnString&>(column);
-        auto sz = col.size();
-
-        StringRef ref = col.get_data_at(sz - 1);
-        String str(ref.data, ref.size);
-        std::vector<StringRef> refs(times, {str.data(), str.size()});
-
-        col.insert_many_strings(refs.data(), refs.size());
-    }
-
     Status read_column_from_pb(IColumn& column, const PValues& arg) const 
override {
         auto& column_dest = assert_cast<ColumnType&>(column);
         column_dest.reserve(column_dest.size() + arg.string_value_size());
diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp 
b/be/src/vec/exec/format/orc/vorc_reader.cpp
index 54d94dcecc7..16909f0023a 100644
--- a/be/src/vec/exec/format/orc/vorc_reader.cpp
+++ b/be/src/vec/exec/format/orc/vorc_reader.cpp
@@ -935,10 +935,13 @@ Status OrcReader::_fill_partition_columns(
         auto& [value, slot_desc] = kv.second;
         auto _text_serde = slot_desc->get_data_type_ptr()->get_serde();
         Slice slice(value.data(), value.size());
+        vector<Slice> slices(rows);
+        for (int i = 0; i < rows; i++) {
+            slices[i] = {value.data(), value.size()};
+        }
         int num_deserialized = 0;
-        if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, 
rows,
-                                                            &num_deserialized,
-                                                            
_text_formatOptions) != Status::OK()) {
+        if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, 
&num_deserialized,
+                                                             
_text_formatOptions) != Status::OK()) {
             return Status::InternalError("Failed to fill partition column: 
{}={}",
                                          slot_desc->col_name(), value);
         }
diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp 
b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
index 9ec1235be1d..5e824f34817 100644
--- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp
@@ -631,10 +631,13 @@ Status RowGroupReader::_fill_partition_columns(
         auto& [value, slot_desc] = kv.second;
         auto _text_serde = slot_desc->get_data_type_ptr()->get_serde();
         Slice slice(value.data(), value.size());
+        vector<Slice> slices(rows);
+        for (int i = 0; i < rows; i++) {
+            slices[i] = {value.data(), value.size()};
+        }
         int num_deserialized = 0;
-        if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, 
rows,
-                                                            &num_deserialized,
-                                                            
_text_formatOptions) != Status::OK()) {
+        if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, 
&num_deserialized,
+                                                             
_text_formatOptions) != Status::OK()) {
             return Status::InternalError("Failed to fill partition column: 
{}={}",
                                          slot_desc->col_name(), value);
         }
diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp 
b/be/src/vec/exec/scan/vfile_scanner.cpp
index 0688f2c0712..944884434f4 100644
--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@@ -499,10 +499,13 @@ Status VFileScanner::_fill_columns_from_path(size_t rows) 
{
         auto& [value, slot_desc] = kv.second;
         auto _text_serde = slot_desc->get_data_type_ptr()->get_serde();
         Slice slice(value.data(), value.size());
+        vector<Slice> slices(rows);
+        for (int i = 0; i < rows; i++) {
+            slices[i] = {value.data(), value.size()};
+        }
         int num_deserialized = 0;
-        if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, 
rows,
-                                                            &num_deserialized,
-                                                            
_text_formatOptions) != Status::OK()) {
+        if (_text_serde->deserialize_column_from_json_vector(*col_ptr, slices, 
&num_deserialized,
+                                                             
_text_formatOptions) != Status::OK()) {
             return Status::InternalError("Failed to fill partition column: 
{}={}",
                                          slot_desc->col_name(), value);
         }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to