This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 105a2ad7761ecff7bc02de8f87397561dcbe0e51
Author: Mingyu Chen <[email protected]>
AuthorDate: Wed Jul 5 16:20:58 2023 +0800

    [fix](s3) fix s3 fs benchmark tool (#21401)
    
    1. fix concurrency bug of s3 fs benchmark tool, to avoid crash on multi 
thread.
    2. Add `prefetch_read` operation to test prefetch reader.
    3. add `AWS_EC2_METADATA_DISABLED` env in `start_be.sh` to avoid call ec2 
metadata when creating s3 client.
    4. add `AWS_MAX_ATTEMPTS` env in `start_be.sh` to avoid warning log of s3 
sdk.
---
 be/src/io/file_factory.cpp                     |   3 +-
 be/src/io/file_factory.h                       |   5 +-
 be/src/io/fs/benchmark/base_benchmark.h        |  12 +-
 be/src/io/fs/benchmark/benchmark_factory.hpp   |   2 +
 be/src/io/fs/benchmark/fs_benchmark_tool.cpp   |   8 +-
 be/src/io/fs/benchmark/s3_benchmark.hpp        |  72 ++++++--
 be/src/io/fs/buffered_reader.cpp               |   2 +-
 be/src/util/s3_util.cpp                        |   3 +-
 be/src/util/s3_util.h                          |  11 ++
 be/src/vec/core/block_spill_reader.cpp         |   2 +-
 be/src/vec/exec/format/csv/csv_reader.cpp      |   2 +-
 bin/run-fs-benchmark.sh                        |   2 +
 bin/start_be.sh                                |   2 +
 docs/en/docs/lakehouse/fs_benchmark_tool.md    | 235 +++++++++++++++++++++++++
 docs/zh-CN/docs/lakehouse/fs_benchmark_tool.md | 231 ++++++++++++++++++++++++
 15 files changed, 556 insertions(+), 36 deletions(-)

diff --git a/be/src/io/file_factory.cpp b/be/src/io/file_factory.cpp
index fc91c4c9dd..af32c81982 100644
--- a/be/src/io/file_factory.cpp
+++ b/be/src/io/file_factory.cpp
@@ -108,8 +108,7 @@ Status FileFactory::create_file_writer(TFileType::type 
type, ExecEnv* env,
     return Status::OK();
 }
 
-Status FileFactory::create_file_reader(RuntimeProfile* profile,
-                                       const FileSystemProperties& 
system_properties,
+Status FileFactory::create_file_reader(const FileSystemProperties& 
system_properties,
                                        const FileDescription& file_description,
                                        std::shared_ptr<io::FileSystem>* 
file_system,
                                        io::FileReaderSPtr* file_reader,
diff --git a/be/src/io/file_factory.h b/be/src/io/file_factory.h
index 5f7360c372..b5cbcdfc7c 100644
--- a/be/src/io/file_factory.h
+++ b/be/src/io/file_factory.h
@@ -70,9 +70,8 @@ public:
 
     /// Create FileReader
     static Status create_file_reader(
-            RuntimeProfile* profile, const FileSystemProperties& 
system_properties,
-            const FileDescription& file_description, 
std::shared_ptr<io::FileSystem>* file_system,
-            io::FileReaderSPtr* file_reader,
+            const FileSystemProperties& system_properties, const 
FileDescription& file_description,
+            std::shared_ptr<io::FileSystem>* file_system, io::FileReaderSPtr* 
file_reader,
             io::FileReaderOptions reader_options = NO_CACHE_READER_OPTIONS);
 
     // Create FileReader for stream load pipe
diff --git a/be/src/io/fs/benchmark/base_benchmark.h 
b/be/src/io/fs/benchmark/base_benchmark.h
index 41dae7cea2..6f177482b0 100644
--- a/be/src/io/fs/benchmark/base_benchmark.h
+++ b/be/src/io/fs/benchmark/base_benchmark.h
@@ -106,7 +106,7 @@ public:
     }
 
     Status read(benchmark::State& state, FileReaderSPtr reader) {
-        bm_log("begin to read {}", _name);
+        bm_log("begin to read {}, thread: {}", _name, state.thread_index());
         size_t buffer_size =
                 _conf_map.contains("buffer_size") ? 
std::stol(_conf_map["buffer_size"]) : 1000000L;
         std::vector<char> buffer;
@@ -150,13 +150,13 @@ public:
         if (status.ok() && reader != nullptr) {
             status = reader->close();
         }
-        bm_log("finish to read {}, size {}, seconds: {}, status: {}", _name, 
read_size,
-               elapsed_seconds.count(), status);
+        bm_log("finish to read {}, thread: {}, size {}, seconds: {}, status: 
{}", _name,
+               state.thread_index(), read_size, elapsed_seconds.count(), 
status);
         return status;
     }
 
     Status write(benchmark::State& state, FileWriter* writer) {
-        bm_log("begin to write {}, size: {}", _name, _file_size);
+        bm_log("begin to write {}, thread: {}, size: {}", _name, 
state.thread_index(), _file_size);
         size_t write_size = _file_size;
         size_t buffer_size =
                 _conf_map.contains("buffer_size") ? 
std::stol(_conf_map["buffer_size"]) : 1000000L;
@@ -190,8 +190,8 @@ public:
         state.counters["WriteTotal(B)"] = write_size;
         state.counters["WriteTime(S)"] = elapsed_seconds.count();
 
-        bm_log("finish to write {}, size: {}, seconds: {}, status: {}", _name, 
write_size,
-               elapsed_seconds.count(), status);
+        bm_log("finish to write {}, thread: {}, size: {}, seconds: {}, status: 
{}", _name,
+               state.thread_index(), write_size, elapsed_seconds.count(), 
status);
         return status;
     }
 
diff --git a/be/src/io/fs/benchmark/benchmark_factory.hpp 
b/be/src/io/fs/benchmark/benchmark_factory.hpp
index 0b8af3b96b..8130ffada4 100644
--- a/be/src/io/fs/benchmark/benchmark_factory.hpp
+++ b/be/src/io/fs/benchmark/benchmark_factory.hpp
@@ -44,6 +44,8 @@ Status BenchmarkFactory::getBm(const std::string fs_type, 
const std::string op_t
             *bm = new S3OpenReadBenchmark(threads, iterations, file_size, 
conf_map);
         } else if (op_type == "single_read") {
             *bm = new S3SingleReadBenchmark(threads, iterations, file_size, 
conf_map);
+        } else if (op_type == "prefetch_read") {
+            *bm = new S3PrefetchReadBenchmark(threads, iterations, file_size, 
conf_map);
         } else if (op_type == "rename") {
             *bm = new S3RenameBenchmark(threads, iterations, file_size, 
conf_map);
         } else if (op_type == "exists") {
diff --git a/be/src/io/fs/benchmark/fs_benchmark_tool.cpp 
b/be/src/io/fs/benchmark/fs_benchmark_tool.cpp
index 50085ae1e7..77dee409d1 100644
--- a/be/src/io/fs/benchmark/fs_benchmark_tool.cpp
+++ b/be/src/io/fs/benchmark/fs_benchmark_tool.cpp
@@ -21,6 +21,7 @@
 
 #include "io/fs/benchmark/benchmark_factory.hpp"
 #include "io/fs/s3_file_write_bufferpool.h"
+#include "util/cpu_info.h"
 #include "util/threadpool.h"
 
 DEFINE_string(fs_type, "hdfs", "Supported File System: s3, hdfs");
@@ -109,11 +110,14 @@ int main(int argc, char** argv) {
         return 1;
     }
 
+    doris::CpuInfo::init();
+    int num_cores = doris::CpuInfo::num_cores();
+
     // init s3 write buffer pool
     std::unique_ptr<doris::ThreadPool> buffered_reader_prefetch_thread_pool;
     doris::ThreadPoolBuilder("BufferedReaderPrefetchThreadPool")
-            .set_min_threads(16)
-            .set_max_threads(64)
+            .set_min_threads(num_cores)
+            .set_max_threads(num_cores)
             .build(&buffered_reader_prefetch_thread_pool);
     doris::io::S3FileBufferPool* s3_buffer_pool = 
doris::io::S3FileBufferPool::GetInstance();
     s3_buffer_pool->init(524288000, 5242880, 
buffered_reader_prefetch_thread_pool.get());
diff --git a/be/src/io/fs/benchmark/s3_benchmark.hpp 
b/be/src/io/fs/benchmark/s3_benchmark.hpp
index c2ee8ddd99..f97976e5ba 100644
--- a/be/src/io/fs/benchmark/s3_benchmark.hpp
+++ b/be/src/io/fs/benchmark/s3_benchmark.hpp
@@ -19,6 +19,7 @@
 
 #include "io/file_factory.h"
 #include "io/fs/benchmark/base_benchmark.h"
+#include "io/fs/buffered_reader.h"
 #include "io/fs/file_writer.h"
 #include "io/fs/s3_file_reader.h"
 #include "io/fs/s3_file_system.h"
@@ -35,17 +36,14 @@ public:
             : BaseBenchmark(name, threads, iterations, file_size, conf_map) {}
     virtual ~S3Benchmark() = default;
 
-    Status get_fs(const std::string& path) {
+    Status get_fs(const std::string& path, std::shared_ptr<io::S3FileSystem>* 
fs) {
         S3URI s3_uri(path);
         RETURN_IF_ERROR(s3_uri.parse());
+        S3Conf s3_conf;
         RETURN_IF_ERROR(
-                S3ClientFactory::convert_properties_to_s3_conf(_conf_map, 
s3_uri, &_s3_conf));
-        return io::S3FileSystem::create(std::move(_s3_conf), "", &_fs);
+                S3ClientFactory::convert_properties_to_s3_conf(_conf_map, 
s3_uri, &s3_conf));
+        return io::S3FileSystem::create(std::move(s3_conf), "", fs);
     }
-
-protected:
-    doris::S3Conf _s3_conf;
-    std::shared_ptr<io::S3FileSystem> _fs;
 };
 
 class S3OpenReadBenchmark : public S3Benchmark {
@@ -63,14 +61,14 @@ public:
 
     Status run(benchmark::State& state) override {
         auto file_path = get_file_path(state);
-        RETURN_IF_ERROR(get_fs(file_path));
+        std::shared_ptr<io::S3FileSystem> fs;
+        RETURN_IF_ERROR(get_fs(file_path, &fs));
 
         io::FileReaderSPtr reader;
         io::FileReaderOptions reader_opts = 
FileFactory::get_reader_options(nullptr);
         RETURN_IF_ERROR(FileFactory::create_s3_reader(
-                _conf_map, file_path, 
reinterpret_cast<std::shared_ptr<io::FileSystem>*>(&_fs),
+                _conf_map, file_path, 
reinterpret_cast<std::shared_ptr<io::FileSystem>*>(&fs),
                 &reader, reader_opts));
-
         return read(state, reader);
     }
 };
@@ -92,6 +90,40 @@ public:
     }
 };
 
+// Read a single specified file by prefetch reader
+class S3PrefetchReadBenchmark : public S3Benchmark {
+public:
+    S3PrefetchReadBenchmark(int threads, int iterations, size_t file_size,
+                            const std::map<std::string, std::string>& conf_map)
+            : S3Benchmark("S3PrefetchReadBenchmark", threads, iterations, 
file_size, conf_map) {}
+    virtual ~S3PrefetchReadBenchmark() = default;
+
+    virtual std::string get_file_path(benchmark::State& state) override {
+        std::string file_path = _conf_map["file_path"];
+        bm_log("file_path: {}", file_path);
+        return file_path;
+    }
+
+    Status run(benchmark::State& state) override {
+        FileSystemProperties fs_props;
+        fs_props.system_type = TFileType::FILE_S3;
+        fs_props.properties = _conf_map;
+
+        FileDescription fd;
+        fd.path = get_file_path(state);
+        fd.start_offset = 0;
+        fd.file_size = _file_size;
+        std::shared_ptr<io::FileSystem> fs;
+        io::FileReaderSPtr reader;
+        io::FileReaderOptions reader_options = 
FileFactory::get_reader_options(nullptr);
+        IOContext io_ctx;
+        RETURN_IF_ERROR(io::DelegateReader::create_file_reader(
+                nullptr, fs_props, fd, &fs, &reader, 
io::DelegateReader::AccessMode::SEQUENTIAL,
+                reader_options, &io_ctx));
+        return read(state, reader);
+    }
+};
+
 class S3CreateWriteBenchmark : public S3Benchmark {
 public:
     S3CreateWriteBenchmark(int threads, int iterations, size_t file_size,
@@ -104,10 +136,11 @@ public:
         if (_file_size <= 0) {
             _file_size = 10 * 1024 * 1024; // default 10MB
         }
-        RETURN_IF_ERROR(get_fs(file_path));
+        std::shared_ptr<io::S3FileSystem> fs;
+        RETURN_IF_ERROR(get_fs(file_path, &fs));
 
         io::FileWriterPtr writer;
-        RETURN_IF_ERROR(_fs->create_file(file_path, &writer));
+        RETURN_IF_ERROR(fs->create_file(file_path, &writer));
         return write(state, writer.get());
     }
 };
@@ -125,12 +158,13 @@ public:
 
     Status run(benchmark::State& state) override {
         auto file_path = get_file_path(state);
-        RETURN_IF_ERROR(get_fs(file_path));
+        std::shared_ptr<io::S3FileSystem> fs;
+        RETURN_IF_ERROR(get_fs(file_path, &fs));
 
         auto start = std::chrono::high_resolution_clock::now();
         std::vector<FileInfo> files;
         bool exists = true;
-        RETURN_IF_ERROR(_fs->list(file_path, true, &files, &exists));
+        RETURN_IF_ERROR(fs->list(file_path, true, &files, &exists));
         auto end = std::chrono::high_resolution_clock::now();
         auto elapsed_seconds =
                 std::chrono::duration_cast<std::chrono::duration<double>>(end 
- start);
@@ -168,10 +202,11 @@ public:
     Status run(benchmark::State& state) override {
         auto file_path = get_file_path(state);
         auto new_file_path = file_path + "_new";
-        RETURN_IF_ERROR(get_fs(file_path));
+        std::shared_ptr<io::S3FileSystem> fs;
+        RETURN_IF_ERROR(get_fs(file_path, &fs));
 
         auto start = std::chrono::high_resolution_clock::now();
-        RETURN_IF_ERROR(_fs->rename(file_path, new_file_path));
+        RETURN_IF_ERROR(fs->rename(file_path, new_file_path));
         auto end = std::chrono::high_resolution_clock::now();
         auto elapsed_seconds =
                 std::chrono::duration_cast<std::chrono::duration<double>>(end 
- start);
@@ -192,11 +227,12 @@ public:
 
     Status run(benchmark::State& state) override {
         auto file_path = get_file_path(state);
-        RETURN_IF_ERROR(get_fs(file_path));
+        std::shared_ptr<io::S3FileSystem> fs;
+        RETURN_IF_ERROR(get_fs(file_path, &fs));
 
         auto start = std::chrono::high_resolution_clock::now();
         bool res = false;
-        RETURN_IF_ERROR(_fs->exists(file_path, &res));
+        RETURN_IF_ERROR(fs->exists(file_path, &res));
         auto end = std::chrono::high_resolution_clock::now();
         auto elapsed_seconds =
                 std::chrono::duration_cast<std::chrono::duration<double>>(end 
- start);
diff --git a/be/src/io/fs/buffered_reader.cpp b/be/src/io/fs/buffered_reader.cpp
index e6093612ba..d9af61d3e8 100644
--- a/be/src/io/fs/buffered_reader.cpp
+++ b/be/src/io/fs/buffered_reader.cpp
@@ -765,7 +765,7 @@ Status DelegateReader::create_file_reader(RuntimeProfile* 
profile,
                                           io::FileReaderOptions reader_options,
                                           const IOContext* io_ctx, const 
PrefetchRange file_range) {
     io::FileReaderSPtr reader;
-    RETURN_IF_ERROR(FileFactory::create_file_reader(profile, 
system_properties, file_description,
+    RETURN_IF_ERROR(FileFactory::create_file_reader(system_properties, 
file_description,
                                                     file_system, &reader, 
reader_options));
     if (reader->size() < IN_MEMORY_FILE_SIZE) {
         *file_reader = std::make_shared<InMemoryFileReader>(reader);
diff --git a/be/src/util/s3_util.cpp b/be/src/util/s3_util.cpp
index 02508904aa..14bc5f5c69 100644
--- a/be/src/util/s3_util.cpp
+++ b/be/src/util/s3_util.cpp
@@ -19,7 +19,6 @@
 
 #include <aws/core/auth/AWSAuthSigner.h>
 #include <aws/core/auth/AWSCredentials.h>
-#include <aws/core/client/ClientConfiguration.h>
 #include <aws/core/utils/logging/LogLevel.h>
 #include <aws/core/utils/logging/LogSystemInterface.h>
 #include <aws/core/utils/memory/stl/AWSStringStream.h>
@@ -141,7 +140,7 @@ std::shared_ptr<Aws::S3::S3Client> 
S3ClientFactory::create(const S3Conf& s3_conf
     Aws::Auth::AWSCredentials aws_cred(s3_conf.ak, s3_conf.sk);
     DCHECK(!aws_cred.IsExpiredOrEmpty());
 
-    Aws::Client::ClientConfiguration aws_config;
+    Aws::Client::ClientConfiguration aws_config = 
S3ClientFactory::getClientConfiguration();
     aws_config.endpointOverride = s3_conf.endpoint;
     aws_config.region = s3_conf.region;
     if (s3_conf.max_connections > 0) {
diff --git a/be/src/util/s3_util.h b/be/src/util/s3_util.h
index 6107fae4a7..9611026ecc 100644
--- a/be/src/util/s3_util.h
+++ b/be/src/util/s3_util.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <aws/core/Aws.h>
+#include <aws/core/client/ClientConfiguration.h>
 #include <fmt/format.h>
 #include <stdint.h>
 
@@ -100,6 +101,16 @@ public:
     static Status convert_properties_to_s3_conf(const std::map<std::string, 
std::string>& prop,
                                                 const S3URI& s3_uri, S3Conf* 
s3_conf);
 
+    static Aws::Client::ClientConfiguration& getClientConfiguration() {
+        // The default constructor of ClientConfiguration will do some http 
call
+        // such as Aws::Internal::GetEC2MetadataClient and other init 
operation,
+        // which is unnecessary.
+        // So here we use a static instance, and deep copy every time
+        // to avoid unnecessary operations.
+        static Aws::Client::ClientConfiguration instance;
+        return instance;
+    }
+
 private:
     S3ClientFactory();
 
diff --git a/be/src/vec/core/block_spill_reader.cpp 
b/be/src/vec/core/block_spill_reader.cpp
index 30079360d7..cea20aee0b 100644
--- a/be/src/vec/core/block_spill_reader.cpp
+++ b/be/src/vec/core/block_spill_reader.cpp
@@ -53,7 +53,7 @@ Status BlockSpillReader::open() {
     FileDescription file_description;
     file_description.path = file_path_;
 
-    RETURN_IF_ERROR(FileFactory::create_file_reader(nullptr, 
system_properties, file_description,
+    RETURN_IF_ERROR(FileFactory::create_file_reader(system_properties, 
file_description,
                                                     &file_system, 
&file_reader_));
 
     size_t file_size = file_reader_->size();
diff --git a/be/src/vec/exec/format/csv/csv_reader.cpp 
b/be/src/vec/exec/format/csv/csv_reader.cpp
index 995825888f..ad34dee80b 100644
--- a/be/src/vec/exec/format/csv/csv_reader.cpp
+++ b/be/src/vec/exec/format/csv/csv_reader.cpp
@@ -661,7 +661,7 @@ Status CsvReader::_prepare_parse(size_t* read_line, bool* 
is_parse_name) {
     io::FileReaderOptions reader_options = 
FileFactory::get_reader_options(_state);
     reader_options.modification_time =
             _range.__isset.modification_time ? _range.modification_time : 0;
-    RETURN_IF_ERROR(FileFactory::create_file_reader(_profile, 
_system_properties, _file_description,
+    RETURN_IF_ERROR(FileFactory::create_file_reader(_system_properties, 
_file_description,
                                                     &_file_system, 
&_file_reader, reader_options));
     if (_file_reader->size() == 0 && _params.file_type != 
TFileType::FILE_STREAM &&
         _params.file_type != TFileType::FILE_BROKER) {
diff --git a/bin/run-fs-benchmark.sh b/bin/run-fs-benchmark.sh
index 9eb47d3ee8..a128217fa2 100755
--- a/bin/run-fs-benchmark.sh
+++ b/bin/run-fs-benchmark.sh
@@ -263,6 +263,8 @@ export LIBHDFS_OPTS="${final_java_opt}"
 
 # see 
https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile
 export 
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:30000,dirty_decay_ms:30000,oversize_threshold:0,lg_tcache_max:16,prof_prefix:jeprof.out"
+export AWS_EC2_METADATA_DISABLED=true
+export AWS_MAX_ATTEMPTS=2
 
 echo "$@"
 ${LIMIT:+${LIMIT}} "${DORIS_HOME}/lib/fs_benchmark_tool" "$@" 2>&1 | tee 
"${LOG_DIR}/fs_benchmark_tool.log"
diff --git a/bin/start_be.sh b/bin/start_be.sh
index c603239a85..8984986551 100755
--- a/bin/start_be.sh
+++ b/bin/start_be.sh
@@ -302,6 +302,8 @@ export LIBHDFS_OPTS="${final_java_opt}"
 
 # see 
https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile
 export 
JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:30000,dirty_decay_ms:30000,oversize_threshold:0,lg_tcache_max:16,prof_prefix:jeprof.out"
+export AWS_EC2_METADATA_DISABLED=true
+export AWS_MAX_ATTEMPTS=2
 
 if [[ "${RUN_DAEMON}" -eq 1 ]]; then
     nohup ${LIMIT:+${LIMIT}} "${DORIS_HOME}/lib/doris_be" "$@" 
>>"${LOG_DIR}/be.out" 2>&1 </dev/null &
diff --git a/docs/en/docs/lakehouse/fs_benchmark_tool.md 
b/docs/en/docs/lakehouse/fs_benchmark_tool.md
new file mode 100644
index 0000000000..9e6cca5716
--- /dev/null
+++ b/docs/en/docs/lakehouse/fs_benchmark_tool.md
@@ -0,0 +1,235 @@
+---
+{
+    "title": "File system benchmark tools",
+    "language": "en"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+
+# Introduction
+
+`fs_benchmark_tool` can be used to test the basic service performance of 
remote storage systems including hdfs and object storage, such as read and 
write performance. This tool is mainly used to analyze or troubleshoot the 
performance problems of remote storage systems.
+
+# Compile and install
+
+`fs_benchmark_tool` is part of the `BE` code and does not compile by default. 
To compile, execute the following command:
+
+```
+cd doris 
+BUILD_FS_BENCHMARK=ON ./build.sh  --be
+```
+After compilation, the following contents will be generated in the 
`output/be/` directory:
+```
+bin/run-fs-benchmark.sh
+lib/fs_benchmark_tool
+```
+> Note that `fs_benchmark_tool` it needs to be used in the BE running 
environment directory, because it depends on the BE-related jar package, 
environment variables, etc.
+
+# Use
+
+Command format:
+
+```shell
+sh run-fs-benchmark.sh \
+          --conf= configuration file \
+          --fs_type= file system \
+          --operation= operations on the file system  \
+          --file_size= file size \
+          --threads= the number of threads \
+          --iterations= the number of iterations
+```
+
+## Parameter parsing
+
+ `--conf` Required parameter
+
+
+Configuration file corresponding to the operation file. It is mainly used to 
add the relevant connection information of the remote storage system. See 
examples below.
+
+If you want to connect `hdfs`, please put the `hdfs-site.xml` `core-site.xml` 
file in the `be/conf` directory.
+
+In addition to the connection information, there are the following additional 
parameters:
+
+- `file_size`: Specifies the size of the file to read or write.
+
+- `buffer_size`: The block size of the file read by one read operation.
+
+- `base_dir`: Specifies the base path to read or write to the file.
+
+`--fs_type` Required parameter
+
+The type of file system on which the operation is required. Currently 
supported `hdfs`,`s3`.
+
+`--operation` Required parameter
+
+Specifies the type of operation
+
+- `create_write`: Each thread creates a file named `test_${current thread 
number}`  in the `base_dir(set in conf file)` directory and writes to the file 
with a write size `file_size` of.
+
+- `open_read`: On `create_write` the basis of the created file, each thread 
reads the file with the name of `test_${current thread number}`  and the read 
size of `file_size`.
+
+- `single_read`: Read `file_path(set in conf file)` file, read size is 
`file_size`.
+
+- `prefetch_read`:Use prefetch reader to read `file_path(set in conf file)` 
file, read size is `file_size`. Only for s3 file system.
+
+- `exists`: Each thread queries whether a file with  `test_${current thread 
number}` filename exists.
+
+- `rename`: On `create_write` the basis of the created file, each thread 
changes the `test_${current thread number}` filename to `test_${current thread 
number}_new`.
+
+- `list`: Get `base_dir(set in conf file)` the list of files in the directory.
+
+`--file_size` 
+
+The file size of the operation, in bytes.
+
+- `create_write`: Default is 10 MB.
+
+- `open_read`: Default is 10 MB.
+
+- `single_read`: The default is 0, that is, the full file is read.
+
+`--threads`
+
+The number of threads for the operation. The default number is 1.
+
+`--iterations`
+
+The number of iterations ( The number of times the function was executed ) per 
thread. The default number is 1.
+
+## Result analysis
+
+Except for `rename` the operation, the other operations are repeated three 
times, and the average value, the median value, the standard deviation, and the 
like are calculated.
+```
+--------------------------------------------------------------------------------------------------------------------------------
+Benchmark                                                                      
Time             CPU   Iterations UserCounters...
+--------------------------------------------------------------------------------------------------------------------------------
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1             
13642 ms         2433 ms            1 OpenReaderTime(S)=4.80734 
ReadRate(B/S)=101.104M/s ReadTime(S)=13.642 ReadTotal(B)=1.37926G
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1              
3918 ms         1711 ms            1 OpenReaderTime(S)=22.041u 
ReadRate(B/S)=352.011M/s ReadTime(S)=3.91824 ReadTotal(B)=1.37926G
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1              
3685 ms         1697 ms            1 OpenReaderTime(S)=35.837u 
ReadRate(B/S)=374.313M/s ReadTime(S)=3.68479 ReadTotal(B)=1.37926G
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_mean         
7082 ms         1947 ms            3 OpenReaderTime(S)=1.60247 
ReadRate(B/S)=275.809M/s ReadTime(S)=7.08166 ReadTotal(B)=1.37926G
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_median       
3918 ms         1711 ms            3 OpenReaderTime(S)=35.837u 
ReadRate(B/S)=352.011M/s ReadTime(S)=3.91824 ReadTotal(B)=1.37926G
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_stddev       
5683 ms          421 ms            3 OpenReaderTime(S)=2.7755 
ReadRate(B/S)=151.709M/s ReadTime(S)=5.68258 ReadTotal(B)=0
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_cv          
80.24 %         21.64 %             3 OpenReaderTime(S)=173.20% 
ReadRate(B/S)=55.01% ReadTime(S)=80.24% ReadTotal(B)=0.00%
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_max         
13642 ms         2433 ms            3 OpenReaderTime(S)=4.80734 
ReadRate(B/S)=374.313M/s ReadTime(S)=13.642 ReadTotal(B)=1.37926G
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_min          
3685 ms         1697 ms            3 OpenReaderTime(S)=22.041u 
ReadRate(B/S)=101.104M/s ReadTime(S)=3.68479 ReadTotal(B)=1.37926G
+```
+
+Focus on the first three lines, the result of three repeated executions of the 
code. The first time involves some operations such as connection 
initialization, so it will take a long time. The latter two times usually 
represent normal performance.
+
+Focus `UserCounters` on information in:
+-  `OpenReaderTime`: Time to open the file.
+-  `ReadRate`: read rate. The overall throughput is recorded here. If it is 
multithreaded, it can be divided by the number of threads, which represents the 
average rate per thread.
+-  `ReadTime`: Read time consuming. What is recorded here is the accumulated 
time of multiple threads. Divided by the number of threads, it represents the 
average time spent per thread.
+-  `ReadTotal`: Total amount read. What is recorded here is the accumulated 
value of multiple threads. Divided by the number of threads, this represents 
the average reads per thread.
+-  `WriteRate`: Same as `ReadRate`. Represents the write rate.
+-  `WriteTime`: Same as `ReadTime`. Represents time to write.
+-  `WriteTotal`: Same as `ReadTotal`. Represents the total amount written.
+-  `ListCost/RenameCost/ExistsCost`: A single operation of the corresponding 
operation takes time.
+
+# Examples
+
+## HDFS
+
+Command:
+```
+sh run-fs-benchmark.sh \
+    --conf=hdfs.conf \
+    --fs_type=hdfs \
+    --operation=create_write  \
+    --file_size=1024000 \
+    --threads=3 \
+    --iterations=5
+```
+Using `hdfs.conf` the configuration file,`create_write` operate on the `hdfs` 
file system , using three threads, write 1MB per operation, and iterate 5 times.
+
+ `hdfs.conf` Profile:
+```
+fs.defaultFS=hdfs://HDFS8000871
+hadoop.username=hadoop
+dfs.nameservices=HDFS8000871
+dfs.ha.namenodes.HDFS8000871=nn1,nn2
+dfs.namenode.rpc-address.HDFS8000871.nn1=102.22.10.56:4007
+dfs.namenode.rpc-address.HDFS8000871.nn2=102.22.10.57:4007
+dfs.client.failover.proxy.provider.HDFS8000871=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
+base_dir=hdfs://HDFS8000871/benchmarks/TestDFSIO/io_data/
+```
+Operation result:
+```
+---------------------------------------------------------------------------------------------------------------------------------------
+Benchmark                                                                      
       Time             CPU   Iterations UserCounters...
+---------------------------------------------------------------------------------------------------------------------------------------
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3          
    61.7 ms         38.7 ms           15 WriteRate(B/S)=3.31902M/s 
WriteTime(S)=0.387954 WriteTotal(B)=3.072M
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3          
    49.6 ms         3.09 ms           15 WriteRate(B/S)=4.12967M/s 
WriteTime(S)=0.427992 WriteTotal(B)=3.072M
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3          
    45.2 ms         2.72 ms           15 WriteRate(B/S)=4.53148M/s 
WriteTime(S)=0.362854 WriteTotal(B)=3.072M
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3_mean     
    52.2 ms         14.8 ms            3 WriteRate(B/S)=3.99339M/s 
WriteTime(S)=0.392933 WriteTotal(B)=3.072M
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3_median   
    49.6 ms         3.09 ms            3 WriteRate(B/S)=4.12967M/s 
WriteTime(S)=0.387954 WriteTotal(B)=3.072M
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3_stddev   
    8.55 ms         20.7 ms            3 WriteRate(B/S)=617.61k/s 
WriteTime(S)=0.0328536 WriteTotal(B)=0
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3_cv       
   16.39 %        139.34 %             3 WriteRate(B/S)=15.47% 
WriteTime(S)=8.36% WriteTotal(B)=0.00%
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3_max      
    61.7 ms         38.7 ms            3 WriteRate(B/S)=4.53148M/s 
WriteTime(S)=0.427992 WriteTotal(B)=3.072M
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3_min      
    45.2 ms         2.72 ms            3 WriteRate(B/S)=3.31902M/s 
WriteTime(S)=0.362854 WriteTotal(B)=3.072M
+HDFS 上生成的文件:
+[hadoop@172 ~]$ hadoop fs -ls -h /benchmarks/TestDFSIO/io_data/
+Found 3 items
+-rw-r--r--   3 hadoop supergroup        100 2023-06-27 11:55 
/benchmarks/TestDFSIO/io_data/test_0
+-rw-r--r--   3 hadoop supergroup        100 2023-06-27 11:55 
/benchmarks/TestDFSIO/io_data/test_1
+-rw-r--r--   3 hadoop supergroup        100 2023-06-27 11:55 
/benchmarks/TestDFSIO/io_data/test_2
+```
+
+## Object storage
+
+Command:
+```
+sh bin/run-fs-benchmark.sh \
+     --conf=s3.conf \
+     --fs_type=s3 \
+     --operation=single_read \
+     --threads=1 \
+     --iterations=1
+```
+
+Using `s3.conf` the configuration file, operate on the `s3` file system 
`single_read`, using 1 thread, with 1 iteration.
+
+ `s3.conf` Profile:
+```
+AWS_ACCESS_KEY=ak
+AWS_SECRET_KEY=sk
+AWS_ENDPOINT=cos.ap-beijing.myqcloud.com
+AWS_REGION=ap-beijing
+file_path=s3://bucket-123/test_data/parquet/000016_0
+```
+Operation result:
+```
+------------------------------------------------------------------------------------------------------------------------------
+Benchmark                                                                    
Time             CPU   Iterations UserCounters...
+------------------------------------------------------------------------------------------------------------------------------
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1              7534 
ms          140 ms            1 ReadRate(B/S)=11.9109M/s ReadTime(S)=7.53353 
ReadTotal(B)=89.7314M
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1              5988 
ms          118 ms            1 ReadRate(B/S)=14.985M/s ReadTime(S)=5.98808 
ReadTotal(B)=89.7314M
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1              6060 
ms          124 ms            1 ReadRate(B/S)=14.8081M/s ReadTime(S)=6.05961 
ReadTotal(B)=89.7314M
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_mean         6527 
ms          127 ms            3 ReadRate(B/S)=13.9014M/s ReadTime(S)=6.52707 
ReadTotal(B)=89.7314M
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_median       6060 
ms          124 ms            3 ReadRate(B/S)=14.8081M/s ReadTime(S)=6.05961 
ReadTotal(B)=89.7314M
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_stddev        872 
ms         11.4 ms            3 ReadRate(B/S)=1.72602M/s ReadTime(S)=0.87235 
ReadTotal(B)=0
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_cv          13.37 
%          8.94 %             3 ReadRate(B/S)=12.42% ReadTime(S)=13.37% 
ReadTotal(B)=0.00%
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_max          7534 
ms          140 ms            3 ReadRate(B/S)=14.985M/s ReadTime(S)=7.53353 
ReadTotal(B)=89.7314M
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_min          5988 
ms          118 ms            3 ReadRate(B/S)=11.9109M/s ReadTime(S)=5.98808 
ReadTotal(B)=89.7314M
+``` 
+
diff --git a/docs/zh-CN/docs/lakehouse/fs_benchmark_tool.md 
b/docs/zh-CN/docs/lakehouse/fs_benchmark_tool.md
new file mode 100644
index 0000000000..d130212fdc
--- /dev/null
+++ b/docs/zh-CN/docs/lakehouse/fs_benchmark_tool.md
@@ -0,0 +1,231 @@
+---
+{
+    "title": "文件系统性能测试工具",
+    "language": "zh-CN"
+}
+---
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+
+# 简介
+
+`fs_benchmark_tool` 可以用于测试包括 hdfs 
和对象存储在内的远端存储系统的基本服务性能,如读取、写入性能。该工具主要用于分析或排查远端存储系统的性能问题。
+
+# 编译与安装
+
+`fs_benchmark_tool` 是 BE 代码的一部分,默认不编译。如需编译,请执行以下命令:
+
+```
+cd doris 
+BUILD_FS_BENCHMARK=ON ./build.sh  --be
+```
+编译完之后会在`output/be/` 目录下生成如下相关内容:
+```
+bin/run-fs-benchmark.sh
+lib/fs_benchmark_tool
+```
+> 注意,`fs_benchmark_tool` 需在BE运行环境目录下使用,因为其依赖 BE 相关的 jar 包、环境变量等内容。
+
+# 使用
+
+命令格式:
+
+```shell
+sh run-fs-benchmark.sh \
+          --conf=配置文件 \
+          --fs_type= 文件系统 \
+          --operation= 对文件系统的操作  \
+          --file_size= 文件的大小 \
+          --threads= 线程数量 \
+          --iterations= 迭代次数
+```
+## 参数解析
+
+`--conf`必选参数
+
+
+操作文件对应的配置文件。主要用于添加远端存储系统的相关连接信息。详见下文示例。
+
+如连接`hdfs`,请将 `hdfs-site.xml`,`core-site.xml` 文件放置在 `be/conf` 目录下。
+
+除连接信息外,还有以下额外参数:
+- `file_size`:指定读取或写入文件的大小。
+
+- `buffer_size`:一次读取操作读取的文件块大小。
+
+- `base_dir`:指定读取或写入文件的 base 路径。
+
+`--fs_type`必选参数
+
+需要操作的文件系统类型。目前支持`hdfs`,`s3`。
+
+`--operation` 必选参数
+
+指定操作类型
+
+- `create_write` 
:每个线程在`base_dir(conf文件中设置)`目录下,创建文件名为`test_当前的线程号`,并写入文件,写入大小为`file_size`。
+
+- 
`open_read`:在`create_write`创建好文件的基础下,每个线程读取文件名为`test_当前的线程号`的文件,读取大小为`file_size`。
+
+- `single_read`:读取`file_path(conf文件中设置)`文件,读取大小为`file_size`。
+
+- `prefetch_read`:使用 prefetch reader 
读取`file_path(conf文件中设置)`文件,读取大小为`file_size`。仅适用于 s3。
+
+- `exists` :每个线程查询文件名为`test_当前的线程号`的文件是否存在。
+
+- `rename` 
:在`create_write`创建好文件的基础下,每个线程将文件名为为`test_当前的线程号`的文件更改为为`test_当前的线程号_new`。
+
+- `list`:获取 `base_dir(conf文件中设置)` 目录下的文件列表。
+
+`--file_size`
+操作的文件大小,以字节为单位。
+
+- `create_write`:默认为 10MB。
+
+- `open_read`:默认为 10MB。
+
+- `single_read`:默认为0,即读取完整文件。
+
+`--threads`
+
+操作的线程数量,默认数量为1。
+
+`--iterations`
+
+每个线程进行迭代的次数(函数执行次数),默认数量为1。
+
+## 结果解析
+
+除了`rename`操作外,其余操作都会重复三次,并求出平均值,中间值,标准偏差等。
+```
+--------------------------------------------------------------------------------------------------------------------------------
+Benchmark                                                                      
Time             CPU   Iterations UserCounters...
+--------------------------------------------------------------------------------------------------------------------------------
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1             
13642 ms         2433 ms            1 OpenReaderTime(S)=4.80734 
ReadRate(B/S)=101.104M/s ReadTime(S)=13.642 ReadTotal(B)=1.37926G
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1              
3918 ms         1711 ms            1 OpenReaderTime(S)=22.041u 
ReadRate(B/S)=352.011M/s ReadTime(S)=3.91824 ReadTotal(B)=1.37926G
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1              
3685 ms         1697 ms            1 OpenReaderTime(S)=35.837u 
ReadRate(B/S)=374.313M/s ReadTime(S)=3.68479 ReadTotal(B)=1.37926G
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_mean         
7082 ms         1947 ms            3 OpenReaderTime(S)=1.60247 
ReadRate(B/S)=275.809M/s ReadTime(S)=7.08166 ReadTotal(B)=1.37926G
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_median       
3918 ms         1711 ms            3 OpenReaderTime(S)=35.837u 
ReadRate(B/S)=352.011M/s ReadTime(S)=3.91824 ReadTotal(B)=1.37926G
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_stddev       
5683 ms          421 ms            3 OpenReaderTime(S)=2.7755 
ReadRate(B/S)=151.709M/s ReadTime(S)=5.68258 ReadTotal(B)=0
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_cv          
80.24 %         21.64 %             3 OpenReaderTime(S)=173.20% 
ReadRate(B/S)=55.01% ReadTime(S)=80.24% ReadTotal(B)=0.00%
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_max         
13642 ms         2433 ms            3 OpenReaderTime(S)=4.80734 
ReadRate(B/S)=374.313M/s ReadTime(S)=13.642 ReadTotal(B)=1.37926G
+HdfsReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_min          
3685 ms         1697 ms            3 OpenReaderTime(S)=22.041u 
ReadRate(B/S)=101.104M/s ReadTime(S)=3.68479 ReadTotal(B)=1.37926G
+```
+
+重点关注前3行,分别代码3次重复执行的结果。其中第一次涉及到一些连接初始化等操作,所以耗时会较长。后两次通常代表正常的性能表现。
+
+重点关注`UserCounters` 中的信息:
+- `OpenReaderTime`:打开文件的耗时。
+- `ReadRate`:读取速率。这里记录的是总体的吞吐。如果是多线程,可以除以线程数,即代表每线程平均速率。
+- `ReadTime`:读取耗时。这里记录的是多线程累计时间。除以线程数,即代表每线程平均耗时。
+- `ReadTotal`:读取总量。这里记录的是多线程累计值。除以线程数,即代表每线程平均读取量。
+- `WriteRate`:同 `ReadRate`。代表写入速率。
+- `WriteTime`:同 `ReadTime`。代表写入耗时。
+- `WriteTotal`:同 `ReadTotal`。代表写入总量。
+- `ListCost/RenameCost/ExistsCost`:对应操作的单个操作耗时。
+
+# 示例
+
+## HDFS
+
+命令:
+```
+sh run-fs-benchmark.sh \
+    --conf=hdfs.conf \
+    --fs_type=hdfs \
+    --operation=create_write  \
+    --file_size=1024000 \
+    --threads=3 \
+    --iterations=5
+```
+使用`hdfs.conf`配置文件,对`hdfs`文件系统进行`create_write`操作,使用三个线程,每次操作写入 1MB,迭代次数为5次。
+
+`hdfs.conf`配置文件:
+```
+fs.defaultFS=hdfs://HDFS8000871
+hadoop.username=hadoop
+dfs.nameservices=HDFS8000871
+dfs.ha.namenodes.HDFS8000871=nn1,nn2
+dfs.namenode.rpc-address.HDFS8000871.nn1=102.22.10.56:4007
+dfs.namenode.rpc-address.HDFS8000871.nn2=102.22.10.57:4007
+dfs.client.failover.proxy.provider.HDFS8000871=org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider
+base_dir=hdfs://HDFS8000871/benchmarks/TestDFSIO/io_data/
+```
+运行结果:
+```
+---------------------------------------------------------------------------------------------------------------------------------------
+Benchmark                                                                      
       Time             CPU   Iterations UserCounters...
+---------------------------------------------------------------------------------------------------------------------------------------
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3          
    61.7 ms         38.7 ms           15 WriteRate(B/S)=3.31902M/s 
WriteTime(S)=0.387954 WriteTotal(B)=3.072M
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3          
    49.6 ms         3.09 ms           15 WriteRate(B/S)=4.12967M/s 
WriteTime(S)=0.427992 WriteTotal(B)=3.072M
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3          
    45.2 ms         2.72 ms           15 WriteRate(B/S)=4.53148M/s 
WriteTime(S)=0.362854 WriteTotal(B)=3.072M
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3_mean     
    52.2 ms         14.8 ms            3 WriteRate(B/S)=3.99339M/s 
WriteTime(S)=0.392933 WriteTotal(B)=3.072M
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3_median   
    49.6 ms         3.09 ms            3 WriteRate(B/S)=4.12967M/s 
WriteTime(S)=0.387954 WriteTotal(B)=3.072M
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3_stddev   
    8.55 ms         20.7 ms            3 WriteRate(B/S)=617.61k/s 
WriteTime(S)=0.0328536 WriteTotal(B)=0
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3_cv       
   16.39 %        139.34 %             3 WriteRate(B/S)=15.47% 
WriteTime(S)=8.36% WriteTotal(B)=0.00%
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3_max      
    61.7 ms         38.7 ms            3 WriteRate(B/S)=4.53148M/s 
WriteTime(S)=0.427992 WriteTotal(B)=3.072M
+HdfsCreateWriteBenchmark/iterations:5/repeats:3/manual_time/threads:3_min      
    45.2 ms         2.72 ms            3 WriteRate(B/S)=3.31902M/s 
WriteTime(S)=0.362854 WriteTotal(B)=3.072M
+HDFS 上生成的文件:
+[hadoop@172 ~]$ hadoop fs -ls -h /benchmarks/TestDFSIO/io_data/
+Found 3 items
+-rw-r--r--   3 hadoop supergroup        100 2023-06-27 11:55 
/benchmarks/TestDFSIO/io_data/test_0
+-rw-r--r--   3 hadoop supergroup        100 2023-06-27 11:55 
/benchmarks/TestDFSIO/io_data/test_1
+-rw-r--r--   3 hadoop supergroup        100 2023-06-27 11:55 
/benchmarks/TestDFSIO/io_data/test_2
+```
+
+## 对象存储
+
+命令:
+```
+sh bin/run-fs-benchmark.sh \
+     --conf=s3.conf \
+     --fs_type=s3 \
+     --operation=single_read \
+     --threads=1 \
+     --iterations=1
+```
+
+使用`s3.conf`配置文件,对 `s3`文件系统进行 `single_read`操作,使用1个线程,迭代次数为1次。
+
+`s3.conf` 配置文件:
+```
+AWS_ACCESS_KEY=ak
+AWS_SECRET_KEY=sk
+AWS_ENDPOINT=cos.ap-beijing.myqcloud.com
+AWS_REGION=ap-beijing
+file_path=s3://bucket-123/test_data/parquet/000016_0
+```
+运行结果:
+```
+------------------------------------------------------------------------------------------------------------------------------
+Benchmark                                                                    
Time             CPU   Iterations UserCounters...
+------------------------------------------------------------------------------------------------------------------------------
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1              7534 
ms          140 ms            1 ReadRate(B/S)=11.9109M/s ReadTime(S)=7.53353 
ReadTotal(B)=89.7314M
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1              5988 
ms          118 ms            1 ReadRate(B/S)=14.985M/s ReadTime(S)=5.98808 
ReadTotal(B)=89.7314M
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1              6060 
ms          124 ms            1 ReadRate(B/S)=14.8081M/s ReadTime(S)=6.05961 
ReadTotal(B)=89.7314M
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_mean         6527 
ms          127 ms            3 ReadRate(B/S)=13.9014M/s ReadTime(S)=6.52707 
ReadTotal(B)=89.7314M
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_median       6060 
ms          124 ms            3 ReadRate(B/S)=14.8081M/s ReadTime(S)=6.05961 
ReadTotal(B)=89.7314M
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_stddev        872 
ms         11.4 ms            3 ReadRate(B/S)=1.72602M/s ReadTime(S)=0.87235 
ReadTotal(B)=0
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_cv          13.37 
%          8.94 %             3 ReadRate(B/S)=12.42% ReadTime(S)=13.37% 
ReadTotal(B)=0.00%
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_max          7534 
ms          140 ms            3 ReadRate(B/S)=14.985M/s ReadTime(S)=7.53353 
ReadTotal(B)=89.7314M
+S3ReadBenchmark/iterations:1/repeats:3/manual_time/threads:1_min          5988 
ms          118 ms            3 ReadRate(B/S)=11.9109M/s ReadTime(S)=5.98808 
ReadTotal(B)=89.7314M
+``` 
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]


Reply via email to