This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 2ad0bb4c58c [opt](MergeIO) use equivalent merge size to measure merge 
effectiveness (#26741) (#26923)
2ad0bb4c58c is described below

commit 2ad0bb4c58cc4c71cfc12d4079aa5467e9117ccf
Author: Ashin Gau <ashin...@users.noreply.github.com>
AuthorDate: Tue Nov 14 23:55:13 2023 +0800

    [opt](MergeIO) use equivalent merge size to measure merge effectiveness 
(#26741) (#26923)
    
    backport #26741
---
 be/src/io/fs/buffered_reader.cpp |  8 +++++---
 be/src/io/fs/buffered_reader.h   | 11 ++++++++++-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp
index 2a7187cc28f..8bee5f468fa 100644
--- a/be/src/io/fs/buffered_reader.cpp
+++ b/be/src/io/fs/buffered_reader.cpp
@@ -152,7 +152,6 @@ Status MergeRangeFileReader::read_at_impl(size_t offset, 
Slice result, size_t* b
     }
     content_size = 0;
     hollow_size = 0;
-    double amplified_ratio = config::max_amplified_read_ratio;
     std::vector<std::pair<double, size_t>> ratio_and_size;
     // Calculate the read amplified ratio for each merge operation and the 
size of the merged data.
     // Find the largest size of the merged data whose amplified ratio is less 
than config::max_amplified_read_ratio
@@ -168,9 +167,12 @@ Status MergeRangeFileReader::read_at_impl(size_t offset, 
Slice result, size_t* b
         }
     }
     size_t best_merged_size = 0;
-    for (const std::pair<double, size_t>& rs : ratio_and_size) {
+    for (int i = 0; i < ratio_and_size.size(); ++i) {
+        const std::pair<double, size_t>& rs = ratio_and_size[i];
+        size_t equivalent_size = rs.second / (i + 1);
         if (rs.second > best_merged_size) {
-            if (rs.first < amplified_ratio || rs.second <= MIN_READ_SIZE) {
+            if (rs.first <= _max_amplified_ratio ||
+                (_max_amplified_ratio < 1 && equivalent_size <= 
_equivalent_io_size)) {
                 best_merged_size = rs.second;
             }
         }
diff --git a/be/src/io/fs/buffered_reader.h b/be/src/io/fs/buffered_reader.h
index 84235f0a460..c9b1eb96ecd 100644
--- a/be/src/io/fs/buffered_reader.h
+++ b/be/src/io/fs/buffered_reader.h
@@ -131,8 +131,9 @@ public:
     static constexpr size_t READ_SLICE_SIZE = 8 * 1024 * 1024;      // 8MB
     static constexpr size_t BOX_SIZE = 1 * 1024 * 1024;             // 1MB
     static constexpr size_t SMALL_IO = 2 * 1024 * 1024;             // 2MB
+    static constexpr size_t HDFS_MIN_IO_SIZE = 4 * 1024;            // 4KB
+    static constexpr size_t OSS_MIN_IO_SIZE = 512 * 1024;           // 512KB
     static constexpr size_t NUM_BOX = TOTAL_BUFFER_SIZE / BOX_SIZE; // 128
-    static constexpr size_t MIN_READ_SIZE = 4096;                   // 4KB
 
     MergeRangeFileReader(RuntimeProfile* profile, io::FileReaderSPtr reader,
                          const std::vector<PrefetchRange>& 
random_access_ranges)
@@ -142,6 +143,11 @@ public:
         _range_cached_data.resize(random_access_ranges.size());
         _size = _reader->size();
         _remaining = TOTAL_BUFFER_SIZE;
+        _is_oss = typeid_cast<io::S3FileReader*>(_reader.get()) != nullptr;
+        _max_amplified_ratio = config::max_amplified_read_ratio;
+        // Equivalent min size of each IO that can reach the maximum storage 
speed limit:
+        // 512KB for oss, 4KB for hdfs
+        _equivalent_io_size = _is_oss ? OSS_MIN_IO_SIZE : HDFS_MIN_IO_SIZE;
         if (_profile != nullptr) {
             const char* random_profile = "MergedSmallIO";
             ADD_TIMER(_profile, random_profile);
@@ -235,6 +241,9 @@ private:
     int16 _last_box_ref = -1;
     uint32 _last_box_usage = 0;
     std::vector<int16> _box_ref;
+    bool _is_oss;
+    double _max_amplified_ratio;
+    size_t _equivalent_io_size;
 
     Statistics _statistics;
 };


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org
For additional commands, e-mail: commits-h...@doris.apache.org

Reply via email to