This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new 47acf0d8380 branch-4.0: [opt](parquet) opt the performance of dict 
decoder #59681 (#60049)
47acf0d8380 is described below

commit 47acf0d83805efc990127bd0def9c236cb806ed9
Author: github-actions[bot] 
<41898282+github-actions[bot]@users.noreply.github.com>
AuthorDate: Fri Jan 30 16:41:58 2026 +0800

    branch-4.0: [opt](parquet) opt the performance of dict decoder #59681 
(#60049)
    
    Cherry-picked from #59681
    
    Co-authored-by: Mingyu Chen (Rayner) <[email protected]>
---
 .../format/parquet/fix_length_dict_decoder.hpp     | 38 +++++++++++++++++++---
 1 file changed, 33 insertions(+), 5 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp 
b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
index 56863c1b9e8..818a4dce6f4 100644
--- a/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
+++ b/be/src/vec/exec/format/parquet/fix_length_dict_decoder.hpp
@@ -18,6 +18,7 @@
 #pragma once
 
 #include "util/bit_util.h"
+#include "util/memcpy_inlined.h"
 #include "vec/columns/column_dictionary.h"
 #include "vec/columns/column_nullable.h"
 #include "vec/data_types/data_type_nullable.h"
@@ -118,14 +119,41 @@ protected:
         while (size_t run_length = 
select_vector.get_next_run<has_filter>(&read_type)) {
             switch (read_type) {
             case ColumnSelectVector::CONTENT: {
-                for (size_t i = 0; i < run_length; ++i) {
-                    if constexpr (PhysicalType == 
tparquet::Type::FIXED_LEN_BYTE_ARRAY) {
+                if constexpr (PhysicalType == 
tparquet::Type::FIXED_LEN_BYTE_ARRAY) {
+                    // Optimized path: use memcpy_inlined and reduce address 
calculations
+                    char* dst_ptr = raw_data + data_index;
+                    size_t i = 0;
+                    // Loop unrolling: process 4 elements at a time
+                    for (; i + 4 <= run_length; i += 4) {
+                        auto& slice0 = _dict_items[_indexes[dict_index++]];
+                        doris::memcpy_inlined(dst_ptr, slice0.get_data(), 
_type_length);
+                        dst_ptr += _type_length;
+
+                        auto& slice1 = _dict_items[_indexes[dict_index++]];
+                        doris::memcpy_inlined(dst_ptr, slice1.get_data(), 
_type_length);
+                        dst_ptr += _type_length;
+
+                        auto& slice2 = _dict_items[_indexes[dict_index++]];
+                        doris::memcpy_inlined(dst_ptr, slice2.get_data(), 
_type_length);
+                        dst_ptr += _type_length;
+
+                        auto& slice3 = _dict_items[_indexes[dict_index++]];
+                        doris::memcpy_inlined(dst_ptr, slice3.get_data(), 
_type_length);
+                        dst_ptr += _type_length;
+                    }
+                    // Process remaining elements
+                    for (; i < run_length; ++i) {
                         auto& slice = _dict_items[_indexes[dict_index++]];
-                        memcpy(raw_data + data_index, slice.get_data(), 
_type_length);
-                    } else {
+                        doris::memcpy_inlined(dst_ptr, slice.get_data(), 
_type_length);
+                        dst_ptr += _type_length;
+                    }
+                    data_index = dst_ptr - raw_data;
+                } else {
+                    // Original path for non-FIXED_LEN_BYTE_ARRAY types
+                    for (size_t i = 0; i < run_length; ++i) {
                         *(cppType*)(raw_data + data_index) = 
_dict_items[_indexes[dict_index++]];
+                        data_index += _type_length;
                     }
-                    data_index += _type_length;
                 }
                 break;
             }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to