This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 10cb92debf5 [Fix](segment compaction) fix error using of inverted 
index file writer in segment compaction (#43114)
10cb92debf5 is described below

commit 10cb92debf53a51ec074c9bfd0a7ef1a2e971946
Author: airborne12 <[email protected]>
AuthorDate: Mon Nov 4 17:31:51 2024 +0800

    [Fix](segment compaction) fix error using of inverted index file writer in 
segment compaction (#43114)
    
    ### What problem does this PR solve?
    <!--
    You need to clearly describe your PR in this part:
    
    1. What problem was fixed (it's best to include specific error reporting
    information). How it was fixed.
    2. Which behaviors were modified. What was the previous behavior, what
    is it now, why was it modified, and what possible impacts might there
    be.
    3. What features were added. Why this function was added.
    4. Which codes were refactored and why this part of the code was
    refactored.
    5. Which functions were optimized and what is the difference before and
    after the optimization.
    
    The description of the PR needs to enable reviewers to quickly and
    clearly understand the logic of the code modification.
    -->
    
    <!--
    If there are related issues, please fill in the issue number.
    - If you want the issue to be closed after the PR is merged, please use
    "close #12345". Otherwise, use "ref #12345"
    -->
    
    <!--
    If this PR is followup a preivous PR, for example, fix the bug that
    introduced by a related PR,
    link the PR here
    -->
    Related PR: #41625
    
    Problem Summary:
    Fix BUAF problem, stack like this
    ```
    stack trace: ***
    0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, 
siginfo_t*, void*) at 
/home/zcp/repo_center/doris_master/doris/be/src/common/signal_handler.h:421
    1# 0x00007F5370ABD520 in /lib/x86_64-linux-gnu/libc.so.6
    2# pthread_kill at ./nptl/pthread_kill.c:89
    3# raise at ../sysdeps/posix/raise.c:27
    4# abort at ./stdlib/abort.c:81
    5# _gnu_cxx::_verbose_terminate_handler() [clone .cold] at 
../../../../libstdc+-v3/libsupc+/vterminate.cc:75
    6# _cxxabiv1::_terminate(void ()) at 
../../../../libstdc+-v3/libsupc+/eh_terminate.cc:48
    7# 0x000055D9E015B681 in /mnt/hdd01/ci/master-deploy/be/lib/doris_be
    8# 0x000055D9E015B7D4 in /mnt/hdd01/ci/master-deploy/be/lib/doris_be
    9# 0x000055D9E015BBC6 in /mnt/hdd01/ci/master-deploy/be/lib/doris_be
    10# void fmt::v7::detail::buffer::append(char const*, char const*) in 
/mnt/hdd01/ci/master-deploy/be/lib/doris_be
    11# char const* fmt::v7::detail::parse_replacement_field, char, 
fmt::v7::basic_format_context, char> >&>(char const*, char const*, 
fmt::v7::detail::format_handler, char, fmt::v7::basic_format_context, char> >&) 
in /mnt/hdd01/ci/master-deploy/be/lib/doris_be
    12# void fmt::v7::detail::vformat_to(fmt::v7::detail::buffer&, 
fmt::v7::basic_string_view, fmt::v7::basic_format_args::type>, 
fmt::v7::type_identity::type> >, fmt::v7::detail::locale_ref) in 
/mnt/hdd01/ci/master-deploy/be/lib/doris_be
    13# fmt::v7::detail::vformat[abi:cxx11](fmt::v7::basic_string_view, 
fmt::v7::format_args) in /mnt/hdd01/ci/master-deploy/be/lib/doris_be
    14# 
doris::segment_v2::InvertedIndexDescriptor::get_temporary_index_path[abi:cxx11](std::basic_string_view
 >, std::basic_string_view >, long, long, std::basic_string_view >) at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/rowset/segment_v2/inverted_index_desc.cpp:35
    15# doris::segment_v2::InvertedIndexFileWriter::open(doris::TabletIndex 
const*) at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/rowset/segment_v2/inverted_index_file_writer.cpp:45
    16# 
doris::segment_v2::InvertedIndexColumnWriterImpl<(doris::FieldType)7>::init_bkd_index()
 at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp:146
    17# 
doris::segment_v2::InvertedIndexColumnWriterImpl<(doris::FieldType)7>::init() 
at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp:116
    18# doris::segment_v2::InvertedIndexColumnWriter::create(doris::Field 
const*, std::unique_ptr >, doris::segment_v2::InvertedIndexFileWriter, 
doris::TabletIndex const*) at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp:698
    19# doris::segment_v2::ScalarColumnWriter::init() at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/rowset/segment_v2/column_writer.cpp:483
    20# doris::segment_v2::SegmentWriter::_create_column_writer(unsigned int, 
doris::TabletColumn const&, std::shared_ptr const&) at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/rowset/segment_v2/segment_writer.cpp:258
    21# doris::segment_v2::SegmentWriter::_create_writers(std::shared_ptr 
const&, std::vector > const&) at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/rowset/segment_v2/segment_writer.cpp:307
    22# doris::segment_v2::SegmentWriter::init(std::vector > const&, bool) at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/rowset/segment_v2/segment_writer.cpp:276
    23# doris::SegcompactionWorker::_do_compact_segments(std::shared_ptr, 
std::allocator > > >) in /mnt/hdd01/ci/master-deploy/be/lib/doris_be
    24# doris::SegcompactionWorker::compact_segments(std::shared_ptr, 
std::allocator > > >) at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/rowset/segcompaction.cpp:354
    25# doris::StorageEngine::_handle_seg_compaction(std::shared_ptr, 
std::shared_ptr, std::allocator > > >, unsigned long) at 
/home/zcp/repo_center/doris_master/doris/be/src/olap/olap_server.cpp:1121
    26# std::_Function_handler, std::shared_ptr, std::allocator > > 
>)::$_0>::_M_invoke(std::_Any_data const&) at 
/var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:291
    27# doris::ThreadPool::dispatch_thread() in 
/mnt/hdd01/ci/master-deploy/be/lib/doris_be
    28# doris::Thread::supervise_thread(void*) at 
/home/zcp/repo_center/doris_master/doris/be/src/util/thread.cpp:499
    29# start_thread at ./nptl/pthread_create.c:442
    30# 0x00007F5370BA1850 at ../sysdeps/unix/sysv/linux/x86_64/clone3.S:83
    172.20.50.120 last coredump sql: last SQL query not found
    ```
    
    ### Check List (For Committer)
    
    - Test <!-- At least one of them must be included. -->
    
        - [ ] Regression test
        - [x] Unit Test
        - [ ] Manual test (add detailed scripts or steps below)
        - [ ] No need to test or manual test. Explain why:
    - [ ] This is a refactor/code format and no logic has been changed.
            - [ ] Previous test can cover this change.
            - [ ] No colde files have been changed.
            - [ ] Other reason <!-- Add your reason?  -->
    
    - Behavior changed:
    
        - [x] No.
        - [ ] Yes. <!-- Explain the behavior change -->
    
    - Does this need documentation?
    
        - [x] No.
    - [ ] Yes. <!-- Add document PR link here. eg:
    https://github.com/apache/doris-website/pull/1214 -->
    
    - Release note
    
        <!-- bugfix, feat, behavior changed need a release note -->
        <!-- Add one line release note for this PR. -->
        None
    
    ### Check List (For Reviewer who merge this PR)
    
    - [ ] Confirm the release note
    - [ ] Confirm test cases
    - [ ] Confirm document
    - [ ] Add branch pick label <!-- Add branch pick label that this PR
    should merge into -->
---
 be/src/olap/rowset/beta_rowset_writer.cpp |  7 +++-
 be/src/olap/rowset/beta_rowset_writer.h   |  4 +-
 be/src/olap/rowset/segcompaction.cpp      |  2 +-
 be/src/olap/rowset/segcompaction.h        |  5 +++
 be/test/olap/segcompaction_test.cpp       | 64 +++++++++++++++++++++++++++++++
 5 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp 
b/be/src/olap/rowset/beta_rowset_writer.cpp
index 548b1950b81..037fae316e9 100644
--- a/be/src/olap/rowset/beta_rowset_writer.cpp
+++ b/be/src/olap/rowset/beta_rowset_writer.cpp
@@ -956,7 +956,7 @@ Status 
BaseBetaRowsetWriter::create_inverted_index_file_writer(
     return Status::OK();
 }
 
-Status BetaRowsetWriter::_create_segment_writer_for_segcompaction(
+Status BetaRowsetWriter::create_segment_writer_for_segcompaction(
         std::unique_ptr<segment_v2::SegmentWriter>* writer, int64_t begin, 
int64_t end) {
     DCHECK(begin >= 0 && end >= 0);
     std::string path = 
BetaRowset::local_segment_path_segcompacted(_context.tablet_path,
@@ -996,6 +996,11 @@ Status 
BetaRowsetWriter::_create_segment_writer_for_segcompaction(
         RETURN_IF_ERROR(_segcompaction_worker->get_file_writer()->close());
     }
     _segcompaction_worker->get_file_writer().reset(file_writer.release());
+    if (auto& idx_file_writer = 
_segcompaction_worker->get_inverted_index_file_writer();
+        idx_file_writer != nullptr) {
+        RETURN_IF_ERROR(idx_file_writer->close());
+    }
+    
_segcompaction_worker->get_inverted_index_file_writer().reset(index_file_writer.release());
     return Status::OK();
 }
 
diff --git a/be/src/olap/rowset/beta_rowset_writer.h 
b/be/src/olap/rowset/beta_rowset_writer.h
index 4539959fab5..d96301af226 100644
--- a/be/src/olap/rowset/beta_rowset_writer.h
+++ b/be/src/olap/rowset/beta_rowset_writer.h
@@ -281,6 +281,8 @@ public:
     Status flush_segment_writer_for_segcompaction(
             std::unique_ptr<segment_v2::SegmentWriter>* writer, uint64_t 
index_size,
             KeyBoundsPB& key_bounds);
+    Status create_segment_writer_for_segcompaction(
+            std::unique_ptr<segment_v2::SegmentWriter>* writer, int64_t begin, 
int64_t end);
 
     bool is_segcompacted() const { return _num_segcompacted > 0; }
 
@@ -291,8 +293,6 @@ private:
     Status _check_segment_number_limit(size_t segnum) override;
     int64_t _num_seg() const override;
     Status _wait_flying_segcompaction();
-    Status _create_segment_writer_for_segcompaction(
-            std::unique_ptr<segment_v2::SegmentWriter>* writer, int64_t begin, 
int64_t end);
     Status _segcompaction_if_necessary();
     Status _segcompaction_rename_last_segments();
     Status _load_noncompacted_segment(segment_v2::SegmentSharedPtr& segment, 
int32_t segment_id);
diff --git a/be/src/olap/rowset/segcompaction.cpp 
b/be/src/olap/rowset/segcompaction.cpp
index e5d043d8a22..f901c786062 100644
--- a/be/src/olap/rowset/segcompaction.cpp
+++ b/be/src/olap/rowset/segcompaction.cpp
@@ -232,7 +232,7 @@ Status 
SegcompactionWorker::_check_correctness(OlapReaderStatistics& reader_stat
 
 Status SegcompactionWorker::_create_segment_writer_for_segcompaction(
         std::unique_ptr<segment_v2::SegmentWriter>* writer, uint32_t begin, 
uint32_t end) {
-    return _writer->_create_segment_writer_for_segcompaction(writer, begin, 
end);
+    return _writer->create_segment_writer_for_segcompaction(writer, begin, 
end);
 }
 
 Status 
SegcompactionWorker::_do_compact_segments(SegCompactionCandidatesSharedPtr 
segments) {
diff --git a/be/src/olap/rowset/segcompaction.h 
b/be/src/olap/rowset/segcompaction.h
index 54c5c3758c2..5ec74c0e660 100644
--- a/be/src/olap/rowset/segcompaction.h
+++ b/be/src/olap/rowset/segcompaction.h
@@ -25,6 +25,7 @@
 #include "olap/merger.h"
 #include "olap/simple_rowid_conversion.h"
 #include "olap/tablet.h"
+#include "segment_v2/inverted_index_file_writer.h"
 #include "segment_v2/segment.h"
 
 namespace doris {
@@ -69,6 +70,9 @@ public:
     DeleteBitmapPtr get_converted_delete_bitmap() { return 
_converted_delete_bitmap; }
 
     io::FileWriterPtr& get_file_writer() { return _file_writer; }
+    InvertedIndexFileWriterPtr& get_inverted_index_file_writer() {
+        return _inverted_index_file_writer;
+    }
 
     // set the cancel flag, tasks already started will not be cancelled.
     bool cancel();
@@ -96,6 +100,7 @@ private:
     // Currently cloud storage engine doesn't need segcompaction
     BetaRowsetWriter* _writer = nullptr;
     io::FileWriterPtr _file_writer;
+    InvertedIndexFileWriterPtr _inverted_index_file_writer = nullptr;
 
     // for unique key mow table
     std::unique_ptr<SimpleRowIdConversion> _rowid_conversion = nullptr;
diff --git a/be/test/olap/segcompaction_test.cpp 
b/be/test/olap/segcompaction_test.cpp
index ba0d23acb02..32d724d246b 100644
--- a/be/test/olap/segcompaction_test.cpp
+++ b/be/test/olap/segcompaction_test.cpp
@@ -34,6 +34,7 @@
 #include "olap/rowset/rowset_reader_context.h"
 #include "olap/rowset/rowset_writer.h"
 #include "olap/rowset/rowset_writer_context.h"
+#include "olap/rowset/segment_v2/segment_writer.h"
 #include "olap/storage_engine.h"
 #include "olap/tablet_meta.h"
 #include "olap/tablet_schema.h"
@@ -178,6 +179,24 @@ protected:
         tablet_schema->init_from_pb(tablet_schema_pb);
     }
 
+    void construct_column(ColumnPB* column_pb, TabletIndexPB* tablet_index, 
int64_t index_id,
+                          const std::string& index_name, int32_t col_unique_id,
+                          const std::string& column_type, const std::string& 
column_name,
+                          bool parser = false) {
+        column_pb->set_unique_id(col_unique_id);
+        column_pb->set_name(column_name);
+        column_pb->set_type(column_type);
+        column_pb->set_is_key(false);
+        column_pb->set_is_nullable(true);
+        tablet_index->set_index_id(index_id);
+        tablet_index->set_index_name(index_name);
+        tablet_index->set_index_type(IndexType::INVERTED);
+        tablet_index->add_col_unique_id(col_unique_id);
+        if (parser) {
+            auto* properties = tablet_index->mutable_properties();
+            (*properties)[INVERTED_INDEX_PARSER_KEY] = 
INVERTED_INDEX_PARSER_UNICODE;
+        }
+    }
     // use different id to avoid conflict
     void create_rowset_writer_context(int64_t id, TabletSchemaSPtr 
tablet_schema,
                                       RowsetWriterContext* 
rowset_writer_context) {
@@ -830,6 +849,51 @@ TEST_F(SegCompactionTest, 
SegCompactionThenReadUniqueTableSmall) {
     }
 }
 
+TEST_F(SegCompactionTest, CreateSegCompactionWriter) {
+    config::enable_segcompaction = true;
+    Status s;
+    TabletSchemaSPtr tablet_schema = std::make_shared<TabletSchema>();
+    TabletSchemaPB schema_pb;
+    schema_pb.set_keys_type(KeysType::DUP_KEYS);
+    
schema_pb.set_inverted_index_storage_format(InvertedIndexStorageFormatPB::V2);
+
+    construct_column(schema_pb.add_column(), schema_pb.add_index(), 10000, 
"key_index", 0, "INT",
+                     "key");
+    construct_column(schema_pb.add_column(), schema_pb.add_index(), 10001, 
"v1_index", 1, "STRING",
+                     "v1");
+    construct_column(schema_pb.add_column(), schema_pb.add_index(), 10002, 
"v2_index", 2, "STRING",
+                     "v2", true);
+    construct_column(schema_pb.add_column(), schema_pb.add_index(), 10003, 
"v3_index", 3, "INT",
+                     "v3");
+
+    tablet_schema.reset(new TabletSchema);
+    tablet_schema->init_from_pb(schema_pb);
+    RowsetSharedPtr rowset;
+    config::segcompaction_candidate_max_rows = 6000; // set threshold above
+    // rows_per_segment
+    config::segcompaction_batch_size = 3;
+    std::vector<uint32_t> segment_num_rows;
+    {
+        RowsetWriterContext writer_context;
+        create_rowset_writer_context(10052, tablet_schema, &writer_context);
+
+        auto res = RowsetFactory::create_rowset_writer(*l_engine, 
writer_context, false);
+        EXPECT_TRUE(res.has_value()) << res.error();
+        auto rowset_writer = std::move(res).value();
+        EXPECT_EQ(Status::OK(), s);
+        auto beta_rowset_writer = 
dynamic_cast<BetaRowsetWriter*>(rowset_writer.get());
+        EXPECT_TRUE(beta_rowset_writer != nullptr);
+        std::unique_ptr<segment_v2::SegmentWriter> writer = nullptr;
+        auto status = 
beta_rowset_writer->create_segment_writer_for_segcompaction(&writer, 0, 1);
+        EXPECT_TRUE(beta_rowset_writer != nullptr);
+        EXPECT_TRUE(status == Status::OK());
+        int64_t inverted_index_file_size = 0;
+        status = writer->close_inverted_index(&inverted_index_file_size);
+        EXPECT_TRUE(status == Status::OK());
+        std::cout << inverted_index_file_size << std::endl;
+    }
+}
+
 TEST_F(SegCompactionTest, SegCompactionThenReadAggTableSmall) {
     config::enable_segcompaction = true;
     Status s;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to