This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new fea57a03ee2 branch-4.0: [refact](inverted index) refact compound idx 
writer #59219 (#59364)
fea57a03ee2 is described below

commit fea57a03ee2e0fde2936f161706f2c9e3c55c2ba
Author: Jack <[email protected]>
AuthorDate: Fri Dec 26 18:15:51 2025 +0800

    branch-4.0: [refact](inverted index) refact compound idx writer #59219 
(#59364)
    
    cherry pick from #59219
---
 .../rowset/segment_v2/index_storage_format_v2.cpp  | 39 +++++++++-------------
 .../rowset/segment_v2/index_storage_format_v2.h    |  7 ++--
 .../segment_v2/inverted_index_fs_directory.cpp     | 20 +++++++++++
 .../segment_v2/inverted_index_fs_directory.h       |  5 +++
 .../segment_v2/inverted_index_file_writer_test.cpp | 23 +++++--------
 5 files changed, 53 insertions(+), 41 deletions(-)

diff --git a/be/src/olap/rowset/segment_v2/index_storage_format_v2.cpp 
b/be/src/olap/rowset/segment_v2/index_storage_format_v2.cpp
index 2342e15d34b..0b0e81da191 100644
--- a/be/src/olap/rowset/segment_v2/index_storage_format_v2.cpp
+++ b/be/src/olap/rowset/segment_v2/index_storage_format_v2.cpp
@@ -39,7 +39,6 @@ IndexStorageFormatV2::IndexStorageFormatV2(IndexFileWriter* 
index_file_writer)
         : IndexStorageFormat(index_file_writer) {}
 
 Status IndexStorageFormatV2::write() {
-    std::unique_ptr<lucene::store::Directory, DirectoryDeleter> out_dir = 
nullptr;
     std::unique_ptr<lucene::store::IndexOutput> compound_file_output = nullptr;
     ErrorContext error_context;
     try {
@@ -48,11 +47,13 @@ Status IndexStorageFormatV2::write() {
         // Prepare file metadata
         auto file_metadata = prepare_file_metadata(current_offset);
 
-        // Create output stream
-        auto result = create_output_stream();
-        out_dir = std::move(result.first);
-        compound_file_output = std::move(result.second);
-        VLOG_DEBUG << fmt::format("Output compound index file to streams: {}", 
out_dir->toString());
+        // Create output stream directly without directory operations.
+        // This is important for cloud storage (like S3) where directory 
operations are not
+        // supported or unnecessary.
+        compound_file_output = create_output_stream();
+        auto index_path = InvertedIndexDescriptor::get_index_file_path_v2(
+                _index_file_writer->_index_path_prefix);
+        VLOG_DEBUG << fmt::format("Output compound index file to: {}", 
index_path);
 
         // Write version and number of indices
         write_version_and_indices_count(compound_file_output.get());
@@ -75,10 +76,7 @@ Status IndexStorageFormatV2::write() {
         error_context.err_msg.append(err.what());
         LOG(ERROR) << error_context.err_msg;
     }
-    FINALLY({
-        FINALLY_CLOSE(compound_file_output);
-        FINALLY_CLOSE(out_dir);
-    })
+    FINALLY({ FINALLY_CLOSE(compound_file_output); })
 
     return Status::OK();
 }
@@ -177,21 +175,16 @@ std::vector<FileMetadata> 
IndexStorageFormatV2::prepare_file_metadata(int64_t& c
     return file_metadata;
 }
 
-std::pair<std::unique_ptr<lucene::store::Directory, DirectoryDeleter>,
-          std::unique_ptr<lucene::store::IndexOutput>>
-IndexStorageFormatV2::create_output_stream() {
-    io::Path index_path {InvertedIndexDescriptor::get_index_file_path_v2(
-            _index_file_writer->_index_path_prefix)};
-
-    auto* out_dir = 
DorisFSDirectoryFactory::getDirectory(_index_file_writer->_fs,
-                                                          
index_path.parent_path().c_str());
-    out_dir->set_file_writer_opts(_index_file_writer->_opts);
-    std::unique_ptr<lucene::store::Directory, DirectoryDeleter> 
out_dir_ptr(out_dir);
-
+std::unique_ptr<lucene::store::IndexOutput> 
IndexStorageFormatV2::create_output_stream() {
+    // For V2 format, we create the output stream directly using the file 
writer,
+    // bypassing the directory layer entirely. This optimization is especially 
important
+    // for cloud storage (like S3) where:
+    // 1. Directory operations (exists, create_directory) are unnecessary 
overhead
+    // 2. S3 doesn't have a real directory concept - directories are just key 
prefixes
+    // 3. The file writer is already created and ready to use
     DCHECK(_index_file_writer->_idx_v2_writer != nullptr)
             << "inverted index file writer v2 is nullptr";
-    auto compound_file_output = 
out_dir->createOutputV2(_index_file_writer->_idx_v2_writer.get());
-    return {std::move(out_dir_ptr), std::move(compound_file_output)};
+    return 
DorisFSDirectory::FSIndexOutputV2::create(_index_file_writer->_idx_v2_writer.get());
 }
 
 void 
IndexStorageFormatV2::write_version_and_indices_count(lucene::store::IndexOutput*
 output) {
diff --git a/be/src/olap/rowset/segment_v2/index_storage_format_v2.h 
b/be/src/olap/rowset/segment_v2/index_storage_format_v2.h
index 9b899afac28..1684855af62 100644
--- a/be/src/olap/rowset/segment_v2/index_storage_format_v2.h
+++ b/be/src/olap/rowset/segment_v2/index_storage_format_v2.h
@@ -43,9 +43,10 @@ public:
 private:
     int64_t header_length();
     std::vector<FileMetadata> prepare_file_metadata(int64_t& current_offset);
-    virtual std::pair<std::unique_ptr<lucene::store::Directory, 
DirectoryDeleter>,
-                      std::unique_ptr<lucene::store::IndexOutput>>
-    create_output_stream();
+    // Creates the output stream for writing the compound file.
+    // For V2 format, we directly create FSIndexOutputV2 using the file writer,
+    // avoiding unnecessary directory operations (important for cloud storage 
like S3).
+    virtual std::unique_ptr<lucene::store::IndexOutput> create_output_stream();
     void write_version_and_indices_count(lucene::store::IndexOutput* output);
     virtual void write_index_headers_and_metadata(lucene::store::IndexOutput* 
output,
                                                   const 
std::vector<FileMetadata>& file_metadata);
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
index 936dda66e85..33a0ad801a8 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.cpp
@@ -453,6 +453,26 @@ int64_t DorisFSDirectory::FSIndexOutputV2::length() const {
     return _index_v2_file_writer->bytes_appended();
 }
 
+std::unique_ptr<lucene::store::IndexOutput> 
DorisFSDirectory::FSIndexOutputV2::create(
+        io::FileWriter* file_writer) {
+    auto ret = std::make_unique<FSIndexOutputV2>();
+    ErrorContext error_context;
+    try {
+        ret->init(file_writer);
+    } catch (CLuceneError& err) {
+        error_context.eptr = std::current_exception();
+        error_context.err_msg.append("FSIndexOutputV2::create init error: ");
+        error_context.err_msg.append(err.what());
+        LOG(ERROR) << error_context.err_msg;
+    }
+    FINALLY_EXCEPTION({
+        if (error_context.eptr) {
+            FINALLY_CLOSE(ret);
+        }
+    })
+    return ret;
+}
+
 DorisFSDirectory::DorisFSDirectory() {
     filemode = 0644;
     this->lockFactory = nullptr;
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h 
b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h
index ec606e52911..7b1ac0bdf55 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_fs_directory.h
@@ -246,6 +246,11 @@ public:
     ~FSIndexOutputV2() override;
     void close() override;
     int64_t length() const override;
+
+    // Static factory method to create FSIndexOutputV2 directly without 
Directory object.
+    // This is useful for compound file creation where we already have a 
FileWriter
+    // and don't need directory operations (especially for cloud storage like 
S3).
+    static std::unique_ptr<lucene::store::IndexOutput> create(io::FileWriter* 
file_writer);
 };
 
 /**
diff --git a/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp 
b/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp
index 61d7be3099c..5ac8b773aef 100644
--- a/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp
+++ b/be/test/olap/rowset/segment_v2/inverted_index_file_writer_test.cpp
@@ -692,9 +692,8 @@ public:
     IndexStorageFormatV2MockCreateOutputStream(IndexFileWriter* 
index_file_writer)
             : IndexStorageFormatV2(index_file_writer) {}
 
-    MOCK_METHOD((std::pair<std::unique_ptr<lucene::store::Directory, 
DirectoryDeleter>,
-                           std::unique_ptr<lucene::store::IndexOutput>>),
-                create_output_stream, (), (override));
+    MOCK_METHOD((std::unique_ptr<lucene::store::IndexOutput>), 
create_output_stream, (),
+                (override));
 };
 
 class IndexFileWriterMockCreateOutputStreamV1 : public IndexFileWriter {
@@ -808,12 +807,9 @@ TEST_F(IndexFileWriterTest, WriteV2OutputTest) {
     EXPECT_CALL(
             
*(IndexStorageFormatV2MockCreateOutputStream*)writer_mock._index_storage_format.get(),
             create_output_stream())
-            .WillOnce(::testing::Invoke(
-                    [&]() -> 
std::pair<std::unique_ptr<lucene::store::Directory, DirectoryDeleter>,
-                                       
std::unique_ptr<lucene::store::IndexOutput>> {
-                        return std::make_pair(std::move(out_dir_ptr),
-                                              std::move(compound_file_output));
-                    }));
+            .WillOnce(::testing::Invoke([&]() -> 
std::unique_ptr<lucene::store::IndexOutput> {
+                return std::move(compound_file_output);
+            }));
 
     int64_t index_id = 1;
     std::string index_suffix = "suffix1";
@@ -871,12 +867,9 @@ TEST_F(IndexFileWriterTest, WriteV2OutputCloseErrorTest) {
     EXPECT_CALL(
             
*(IndexStorageFormatV2MockCreateOutputStream*)writer_mock._index_storage_format.get(),
             create_output_stream())
-            .WillOnce(::testing::Invoke(
-                    [&]() -> 
std::pair<std::unique_ptr<lucene::store::Directory, DirectoryDeleter>,
-                                       
std::unique_ptr<lucene::store::IndexOutput>> {
-                        return std::make_pair(std::move(out_dir_ptr),
-                                              std::move(compound_file_output));
-                    }));
+            .WillOnce(::testing::Invoke([&]() -> 
std::unique_ptr<lucene::store::IndexOutput> {
+                return std::move(compound_file_output);
+            }));
 
     int64_t index_id = 1;
     std::string index_suffix = "suffix1";


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to