This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-2.0 in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push: new 2ad0bb4c58c [opt](MergeIO) use equivalent merge size to measure merge effectiveness (#26741) (#26923) 2ad0bb4c58c is described below commit 2ad0bb4c58cc4c71cfc12d4079aa5467e9117ccf Author: Ashin Gau <ashin...@users.noreply.github.com> AuthorDate: Tue Nov 14 23:55:13 2023 +0800 [opt](MergeIO) use equivalent merge size to measure merge effectiveness (#26741) (#26923) backport #26741 --- be/src/io/fs/buffered_reader.cpp | 8 +++++--- be/src/io/fs/buffered_reader.h | 11 ++++++++++- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp index 2a7187cc28f..8bee5f468fa 100644 --- a/be/src/io/fs/buffered_reader.cpp +++ b/be/src/io/fs/buffered_reader.cpp @@ -152,7 +152,6 @@ Status MergeRangeFileReader::read_at_impl(size_t offset, Slice result, size_t* b } content_size = 0; hollow_size = 0; - double amplified_ratio = config::max_amplified_read_ratio; std::vector<std::pair<double, size_t>> ratio_and_size; // Calculate the read amplified ratio for each merge operation and the size of the merged data. // Find the largest size of the merged data whose amplified ratio is less than config::max_amplified_read_ratio @@ -168,9 +167,12 @@ Status MergeRangeFileReader::read_at_impl(size_t offset, Slice result, size_t* b } } size_t best_merged_size = 0; - for (const std::pair<double, size_t>& rs : ratio_and_size) { + for (int i = 0; i < ratio_and_size.size(); ++i) { + const std::pair<double, size_t>& rs = ratio_and_size[i]; + size_t equivalent_size = rs.second / (i + 1); if (rs.second > best_merged_size) { - if (rs.first < amplified_ratio || rs.second <= MIN_READ_SIZE) { + if (rs.first <= _max_amplified_ratio || + (_max_amplified_ratio < 1 && equivalent_size <= _equivalent_io_size)) { best_merged_size = rs.second; } } diff --git a/be/src/io/fs/buffered_reader.h b/be/src/io/fs/buffered_reader.h index 84235f0a460..c9b1eb96ecd 100644 --- a/be/src/io/fs/buffered_reader.h +++ b/be/src/io/fs/buffered_reader.h @@ -131,8 +131,9 @@ public: static constexpr size_t READ_SLICE_SIZE = 8 * 1024 * 1024; // 8MB static constexpr size_t BOX_SIZE = 1 * 1024 * 1024; // 1MB static constexpr size_t SMALL_IO = 2 * 1024 * 1024; // 2MB + static constexpr size_t HDFS_MIN_IO_SIZE = 4 * 1024; // 4KB + static constexpr size_t OSS_MIN_IO_SIZE = 512 * 1024; // 512KB static constexpr size_t NUM_BOX = TOTAL_BUFFER_SIZE / BOX_SIZE; // 128 - static constexpr size_t MIN_READ_SIZE = 4096; // 4KB MergeRangeFileReader(RuntimeProfile* profile, io::FileReaderSPtr reader, const std::vector<PrefetchRange>& random_access_ranges) @@ -142,6 +143,11 @@ public: _range_cached_data.resize(random_access_ranges.size()); _size = _reader->size(); _remaining = TOTAL_BUFFER_SIZE; + _is_oss = typeid_cast<io::S3FileReader*>(_reader.get()) != nullptr; + _max_amplified_ratio = config::max_amplified_read_ratio; + // Equivalent min size of each IO that can reach the maximum storage speed limit: + // 512KB for oss, 4KB for hdfs + _equivalent_io_size = _is_oss ? OSS_MIN_IO_SIZE : HDFS_MIN_IO_SIZE; if (_profile != nullptr) { const char* random_profile = "MergedSmallIO"; ADD_TIMER(_profile, random_profile); @@ -235,6 +241,9 @@ private: int16 _last_box_ref = -1; uint32 _last_box_usage = 0; std::vector<int16> _box_ref; + bool _is_oss; + double _max_amplified_ratio; + size_t _equivalent_io_size; Statistics _statistics; }; --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@doris.apache.org For additional commands, e-mail: commits-h...@doris.apache.org