This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new e64c5687f21 [fix](index compaction)support compact multi segments in 
one index (#28889)
e64c5687f21 is described below

commit e64c5687f21c2c2775d838cbf847c81480d54944
Author: qiye <[email protected]>
AuthorDate: Thu Dec 28 21:33:21 2023 +0800

    [fix](index compaction)support compact multi segments in one index (#28889)
---
 be/src/clucene                                     |   2 +-
 be/src/common/config.cpp                           |   5 +-
 be/src/common/config.h                             |   3 +-
 be/src/olap/compaction.cpp                         |  38 ++-
 .../segment_v2/inverted_index_compaction.cpp       |   1 +
 .../rowset/segment_v2/inverted_index_writer.cpp    |   3 +-
 ..._index_compaction_with_multi_index_segments.out | 214 +++++++++++++++
 ...dex_compaction_with_multi_index_segments.groovy | 287 +++++++++++++++++++++
 8 files changed, 543 insertions(+), 10 deletions(-)

diff --git a/be/src/clucene b/be/src/clucene
index df3ab39ca63..d05cb8154ef 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit df3ab39ca636e58ed1ee640921444f7ef6d6438d
+Subproject commit d05cb8154ef4368bd40c43c94e8e3c679e13c490
diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp
index b5a16ef0166..6292b191f65 100644
--- a/be/src/common/config.cpp
+++ b/be/src/common/config.cpp
@@ -1006,13 +1006,16 @@ DEFINE_String(inverted_index_query_cache_limit, "10%");
 
 // inverted index
 DEFINE_mDouble(inverted_index_ram_buffer_size, "512");
+// -1 indicates not working.
+// Normally we should not change this, it's useful for testing.
+DEFINE_mInt32(inverted_index_max_buffered_docs, "-1");
 // dict path for chinese analyzer
 DEFINE_String(inverted_index_dict_path, "${DORIS_HOME}/dict");
 DEFINE_Int32(inverted_index_read_buffer_size, "4096");
 // tree depth for bkd index
 DEFINE_Int32(max_depth_in_bkd_tree, "32");
 // index compaction
-DEFINE_Bool(inverted_index_compaction_enable, "false");
+DEFINE_mBool(inverted_index_compaction_enable, "false");
 // index by RAM directory
 DEFINE_mBool(inverted_index_ram_dir_enable, "false");
 // use num_broadcast_buffer blocks as buffer to do broadcast
diff --git a/be/src/common/config.h b/be/src/common/config.h
index 53dc0a2a0d8..e580b38e483 100644
--- a/be/src/common/config.h
+++ b/be/src/common/config.h
@@ -1043,13 +1043,14 @@ DECLARE_String(inverted_index_query_cache_limit);
 
 // inverted index
 DECLARE_mDouble(inverted_index_ram_buffer_size);
+DECLARE_mInt32(inverted_index_max_buffered_docs);
 // dict path for chinese analyzer
 DECLARE_String(inverted_index_dict_path);
 DECLARE_Int32(inverted_index_read_buffer_size);
 // tree depth for bkd index
 DECLARE_Int32(max_depth_in_bkd_tree);
 // index compaction
-DECLARE_Bool(inverted_index_compaction_enable);
+DECLARE_mBool(inverted_index_compaction_enable);
 // index by RAM directory
 DECLARE_mBool(inverted_index_ram_dir_enable);
 // use num_broadcast_buffer blocks as buffer to do broadcast
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 4ad78367d28..e3fddb990cd 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -400,6 +400,34 @@ Status Compaction::do_compaction_impl(int64_t permits) {
 
     if (_input_row_num > 0 && stats.rowid_conversion && 
config::inverted_index_compaction_enable) {
         OlapStopWatch inverted_watch;
+
+        // check rowid_conversion correctness
+        Version version = _tablet->max_version();
+        DeleteBitmap output_rowset_delete_bitmap(_tablet->tablet_id());
+        std::set<RowLocation> missed_rows;
+        std::map<RowsetSharedPtr, std::list<std::pair<RowLocation, 
RowLocation>>> location_map;
+        // Convert the delete bitmap of the input rowsets to output rowset.
+        std::size_t missed_rows_size = 0;
+        _tablet->calc_compaction_output_rowset_delete_bitmap(
+                _input_rowsets, _rowid_conversion, 0, version.second + 1, 
&missed_rows,
+                &location_map, _tablet->tablet_meta()->delete_bitmap(),
+                &output_rowset_delete_bitmap);
+        if (!allow_delete_in_cumu_compaction()) {
+            missed_rows_size = missed_rows.size();
+            if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION 
&&
+                stats.merged_rows != missed_rows_size) {
+                std::string err_msg = fmt::format(
+                        "cumulative compaction: the merged rows({}) is not 
equal to missed "
+                        "rows({}) in rowid conversion, tablet_id: {}, 
table_id:{}",
+                        stats.merged_rows, missed_rows_size, 
_tablet->tablet_id(),
+                        _tablet->table_id());
+                DCHECK(false) << err_msg;
+                LOG(WARNING) << err_msg;
+            }
+        }
+
+        RETURN_IF_ERROR(_tablet->check_rowid_conversion(_output_rowset, 
location_map));
+
         // translation vec
         // <<dest_idx_num, dest_docId>>
         // the first level vector: index indicates src segment.
@@ -425,7 +453,7 @@ Status Compaction::do_compaction_impl(int64_t permits) {
             // src index files
             // format: rowsetId_segmentId
             std::vector<std::string> src_index_files(src_segment_num);
-            for (auto m : src_seg_to_id_map) {
+            for (const auto& m : src_seg_to_id_map) {
                 std::pair<RowsetId, uint32_t> p = m.first;
                 src_index_files[m.second] = p.first.to_string() + "_" + 
std::to_string(p.second);
             }
@@ -676,11 +704,11 @@ Status Compaction::modify_rowsets(const 
Merger::Statistics* stats) {
         // of incremental data later.
         // TODO(LiaoXin): check if there are duplicate keys
         std::size_t missed_rows_size = 0;
+        _tablet->calc_compaction_output_rowset_delete_bitmap(
+                _input_rowsets, _rowid_conversion, 0, version.second + 1, 
&missed_rows,
+                &location_map, _tablet->tablet_meta()->delete_bitmap(),
+                &output_rowset_delete_bitmap);
         if (!allow_delete_in_cumu_compaction()) {
-            _tablet->calc_compaction_output_rowset_delete_bitmap(
-                    _input_rowsets, _rowid_conversion, 0, version.second + 1, 
&missed_rows,
-                    &location_map, _tablet->tablet_meta()->delete_bitmap(),
-                    &output_rowset_delete_bitmap);
             missed_rows_size = missed_rows.size();
             if (compaction_type() == ReaderType::READER_CUMULATIVE_COMPACTION 
&& stats != nullptr &&
                 stats->merged_rows != missed_rows_size) {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
index 677d359f1a4..f34a36c7bae 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
@@ -55,6 +55,7 @@ Status compact_column(int32_t index_id, int src_segment_num, 
int dest_segment_nu
         dest_index_dirs[i] = DorisCompoundDirectoryFactory::getDirectory(fs, 
path.c_str(), true);
     }
 
+    DCHECK_EQ(src_index_dirs.size(), trans_vec.size());
     index_writer->indexCompaction(src_index_dirs, dest_index_dirs, trans_vec,
                                   dest_segment_num_rows);
 
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
index a43049aaa2b..e6e519b9d2c 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
@@ -67,7 +67,6 @@
 
 namespace doris::segment_v2 {
 const int32_t MAX_FIELD_LEN = 0x7FFFFFFFL;
-const int32_t MAX_BUFFER_DOCS = 100000000;
 const int32_t MERGE_FACTOR = 100000000;
 const int32_t MAX_LEAF_COUNT = 1024;
 const float MAXMBSortInHeap = 512.0 * 8;
@@ -196,8 +195,8 @@ public:
         bool close_dir_on_shutdown = true;
         index_writer = std::make_unique<lucene::index::IndexWriter>(
                 _dir.get(), _analyzer.get(), create_index, 
close_dir_on_shutdown);
-        index_writer->setMaxBufferedDocs(MAX_BUFFER_DOCS);
         
index_writer->setRAMBufferSizeMB(config::inverted_index_ram_buffer_size);
+        
_index_writer->setMaxBufferedDocs(config::inverted_index_max_buffered_docs);
         index_writer->setMaxFieldLength(MAX_FIELD_LEN);
         index_writer->setMergeFactor(MERGE_FACTOR);
         index_writer->setUseCompoundFile(false);
diff --git 
a/regression-test/data/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.out
 
b/regression-test/data/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.out
new file mode 100644
index 00000000000..57ad3c1080c
--- /dev/null
+++ 
b/regression-test/data/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.out
@@ -0,0 +1,214 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+
+-- !sql --
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+
+-- !sql --
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       8       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       9       I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+2018-02-21T12:00       10      I'm using the builds
+
+-- !sql --
+
+-- !sql --
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       1       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       2       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       3       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       4       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       5       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       6       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+2018-02-21T12:00       7       I'm using the builds
+
diff --git 
a/regression-test/suites/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.groovy
 
b/regression-test/suites/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.groovy
new file mode 100644
index 00000000000..765ee55515a
--- /dev/null
+++ 
b/regression-test/suites/inverted_index_p0/index_compaction/test_index_compaction_with_multi_index_segments.groovy
@@ -0,0 +1,287 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+import org.codehaus.groovy.runtime.IOGroovyMethods
+
+suite("test_index_compaction_with_multi_index_segments", "p0") {
+    def tableName = "test_index_compaction_with_multi_index_segments"
+  
+    def set_be_config = { key, value ->
+        def backendId_to_backendIP = [:]
+        def backendId_to_backendHttpPort = [:]
+        getBackendIpHttpPort(backendId_to_backendIP, 
backendId_to_backendHttpPort);
+
+        for (String backend_id: backendId_to_backendIP.keySet()) {
+            def (code, out, err) = 
update_be_config(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), key, value)
+            logger.info("update config: code=" + code + ", out=" + out + ", 
err=" + err)
+        }
+    }
+
+    boolean disableAutoCompaction = true
+    boolean invertedIndexCompactionEnable = false
+    int invertedIndexMaxBufferedDocs = -1;
+    boolean has_update_be_config = false
+
+    try {
+        String backend_id;
+        def backendId_to_backendIP = [:]
+        def backendId_to_backendHttpPort = [:]
+        getBackendIpHttpPort(backendId_to_backendIP, 
backendId_to_backendHttpPort);
+
+        backend_id = backendId_to_backendIP.keySet()[0]
+        def (code, out, err) = 
show_be_config(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id))
+        
+        logger.info("Show config: code=" + code + ", out=" + out + ", err=" + 
err)
+        assertEquals(code, 0)
+        def configList = parseJson(out.trim())
+        assert configList instanceof List
+
+        for (Object ele in (List) configList) {
+            assert ele instanceof List<String>
+            if (((List<String>) ele)[0] == "inverted_index_compaction_enable") 
{
+                invertedIndexCompactionEnable = 
Boolean.parseBoolean(((List<String>) ele)[2])
+                logger.info("inverted_index_compaction_enable: 
${((List<String>) ele)[2]}")
+            }
+            if (((List<String>) ele)[0] == "inverted_index_max_buffered_docs") 
{
+                invertedIndexMaxBufferedDocs = 
Integer.parseInt(((List<String>) ele)[2])
+                logger.info("inverted_index_max_buffered_docs: 
${((List<String>) ele)[2]}")
+            }
+        }
+        set_be_config.call("inverted_index_compaction_enable", "true")
+        set_be_config.call("inverted_index_max_buffered_docs", "5")
+        has_update_be_config = true
+
+        sql """ DROP TABLE IF EXISTS ${tableName}; """
+        sql """
+            CREATE TABLE ${tableName} (
+                `file_time` DATETIME NOT NULL,
+                `comment_id` int(11)  NULL,
+                `body` TEXT NULL DEFAULT "",
+                INDEX idx_comment_id (`comment_id`) USING INVERTED COMMENT 
'''',
+                INDEX idx_body (`body`) USING INVERTED PROPERTIES("parser" = 
"unicode") COMMENT ''''
+            ) ENGINE=OLAP
+            DUPLICATE KEY(`file_time`)
+            COMMENT 'OLAP'
+            DISTRIBUTED BY RANDOM BUCKETS 1
+            PROPERTIES (
+            "replication_allocation" = "tag.location.default: 1",
+            "disable_auto_compaction" = "true"
+            );
+        """
+
+        // insert 10 rows
+        sql """ INSERT INTO ${tableName} VALUES ("2018-02-21 12:00:00", 1, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 2, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 3, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 4, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 5, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 6, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 7, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 8, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 9, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 10, 
"I\'m using the builds"); """
+        // insert another 10 rows
+        sql """ INSERT INTO ${tableName} VALUES ("2018-02-21 12:00:00", 1, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 2, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 3, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 4, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 5, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 6, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 7, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 8, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 9, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 10, 
"I\'m using the builds"); """
+
+        qt_sql """ select * from ${tableName} order by file_time, comment_id, 
body """
+        qt_sql """ select * from ${tableName} where body match "using" order 
by file_time, comment_id, body """
+        qt_sql """ select * from ${tableName} where body match "the" order by 
file_time, comment_id, body """
+        qt_sql """ select * from ${tableName} where comment_id < 8 order by 
file_time, comment_id, body """
+
+        
//TabletId,ReplicaId,BackendId,SchemaHash,Version,LstSuccessVersion,LstFailedVersion,LstFailedTime,LocalDataSize,RemoteDataSize,RowCount,State,LstConsistencyCheckTime,CheckVersion,VersionCount,PathHash,MetaUrl,CompactionStatus
+        String[][] tablets = sql """ show tablets from ${tableName}; """
+
+        def replicaNum = get_table_replica_num(tableName)
+        logger.info("get table replica num: " + replicaNum)
+        // before full compaction, there are 3 rowsets.
+        int rowsetCount = 0
+        for (String[] tablet in tablets) {
+            String tablet_id = tablet[0]
+            def compactionStatusUrlIndex = 18
+            (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex])
+            logger.info("Show tablets status: code=" + code + ", out=" + out + 
", err=" + err)
+            assertEquals(code, 0)
+            def tabletJson = parseJson(out.trim())
+            assert tabletJson.rowsets instanceof List
+            rowsetCount +=((List<String>) tabletJson.rowsets).size()
+        }
+        assert (rowsetCount == 3 * replicaNum)
+
+        // trigger full compactions for all tablets in ${tableName}
+        for (String[] tablet in tablets) {
+            String tablet_id = tablet[0]
+            backend_id = tablet[2]
+            times = 1
+
+            do{
+                (code, out, err) = 
be_run_full_compaction(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), tablet_id)
+                logger.info("Run compaction: code=" + code + ", out=" + out + 
", err=" + err)
+                ++times
+                sleep(2000)
+            } while (parseJson(out.trim()).status.toLowerCase()!="success" && 
times<=10)
+
+            def compactJson = parseJson(out.trim())
+            if (compactJson.status.toLowerCase() == "fail") {
+                assertEquals(disableAutoCompaction, false)
+                logger.info("Compaction was done automatically!")
+            }
+            if (disableAutoCompaction) {
+                assertEquals("success", compactJson.status.toLowerCase())
+            }
+        }
+
+        // wait for full compaction done
+        for (String[] tablet in tablets) {
+            boolean running = true
+            do {
+                Thread.sleep(1000)
+                String tablet_id = tablet[0]
+                backend_id = tablet[2]
+                (code, out, err) = 
be_get_compaction_status(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), tablet_id)
+                logger.info("Get compaction status: code=" + code + ", out=" + 
out + ", err=" + err)
+                assertEquals(code, 0)
+                def compactionStatus = parseJson(out.trim())
+                assertEquals("success", compactionStatus.status.toLowerCase())
+                running = compactionStatus.run_status
+            } while (running)
+        }
+
+        // after full compaction, there is only 1 rowset.
+        
+        rowsetCount = 0
+        for (String[] tablet in tablets) {
+            String tablet_id = tablet[0]
+            def compactionStatusUrlIndex = 18
+            (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex])
+            logger.info("Show tablets status: code=" + code + ", out=" + out + 
", err=" + err)
+            assertEquals(code, 0)
+            def tabletJson = parseJson(out.trim())
+            assert tabletJson.rowsets instanceof List
+            rowsetCount +=((List<String>) tabletJson.rowsets).size()
+        }
+        assert (rowsetCount == 1 * replicaNum)
+
+        qt_sql """ select * from ${tableName} order by file_time, comment_id, 
body """
+        qt_sql """ select * from ${tableName} where body match "using" order 
by file_time, comment_id, body """
+        qt_sql """ select * from ${tableName} where body match "the" order by 
file_time, comment_id, body """
+        qt_sql """ select * from ${tableName} where comment_id < 8 order by 
file_time, comment_id, body """
+
+        // insert 10 rows, again
+        sql """ INSERT INTO ${tableName} VALUES ("2018-02-21 12:00:00", 1, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 2, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 3, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 4, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 5, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 6, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 7, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 8, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 9, 
"I\'m using the builds"),
+                                                ("2018-02-21 12:00:00", 10, 
"I\'m using the builds"); """
+
+        tablets = sql """ show tablets from ${tableName}; """
+
+        replicaNum = get_table_replica_num(tableName)
+        logger.info("get table replica num: " + replicaNum)
+        // before full compaction, there are 2 rowsets.
+        rowsetCount = 0
+        for (String[] tablet in tablets) {
+            String tablet_id = tablet[0]
+            def compactionStatusUrlIndex = 18
+            (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex])
+            logger.info("Show tablets status: code=" + code + ", out=" + out + 
", err=" + err)
+            assertEquals(code, 0)
+            def tabletJson = parseJson(out.trim())
+            assert tabletJson.rowsets instanceof List
+            rowsetCount +=((List<String>) tabletJson.rowsets).size()
+        }
+        assert (rowsetCount == 2 * replicaNum)
+
+        // trigger full compactions for all tablets in ${tableName}
+        for (String[] tablet in tablets) {
+            String tablet_id = tablet[0]
+            backend_id = tablet[2]
+            times = 1
+
+            do{
+                (code, out, err) = 
be_run_full_compaction(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), tablet_id)
+                logger.info("Run compaction: code=" + code + ", out=" + out + 
", err=" + err)
+                ++times
+                sleep(2000)
+            } while (parseJson(out.trim()).status.toLowerCase()!="success" && 
times<=10)
+
+            def compactJson = parseJson(out.trim())
+            if (compactJson.status.toLowerCase() == "fail") {
+                assertEquals(disableAutoCompaction, false)
+                logger.info("Compaction was done automatically!")
+            }
+            if (disableAutoCompaction) {
+                assertEquals("success", compactJson.status.toLowerCase())
+            }
+        }
+
+        // wait for full compaction done
+        for (String[] tablet in tablets) {
+            boolean running = true
+            do {
+                Thread.sleep(1000)
+                String tablet_id = tablet[0]
+                backend_id = tablet[2]
+                (code, out, err) = 
be_get_compaction_status(backendId_to_backendIP.get(backend_id), 
backendId_to_backendHttpPort.get(backend_id), tablet_id)
+                logger.info("Get compaction status: code=" + code + ", out=" + 
out + ", err=" + err)
+                assertEquals(code, 0)
+                def compactionStatus = parseJson(out.trim())
+                assertEquals("success", compactionStatus.status.toLowerCase())
+                running = compactionStatus.run_status
+            } while (running)
+        }
+
+        // after full compaction, there is only 1 rowset.
+        
+        rowsetCount = 0
+        for (String[] tablet in tablets) {
+            String tablet_id = tablet[0]
+            def compactionStatusUrlIndex = 18
+            (code, out, err) = curl("GET", tablet[compactionStatusUrlIndex])
+            logger.info("Show tablets status: code=" + code + ", out=" + out + 
", err=" + err)
+            assertEquals(code, 0)
+            def tabletJson = parseJson(out.trim())
+            assert tabletJson.rowsets instanceof List
+            rowsetCount +=((List<String>) tabletJson.rowsets).size()
+        }
+        assert (rowsetCount == 1 * replicaNum)
+
+        qt_sql """ select * from ${tableName} order by file_time, comment_id, 
body """
+        qt_sql """ select * from ${tableName} where body match "using" order 
by file_time, comment_id, body """
+        qt_sql """ select * from ${tableName} where body match "the" order by 
file_time, comment_id, body """
+        qt_sql """ select * from ${tableName} where comment_id < 8 order by 
file_time, comment_id, body """
+
+    } finally {
+        if (has_update_be_config) {
+            set_be_config.call("inverted_index_compaction_enable", 
invertedIndexCompactionEnable.toString())
+            set_be_config.call("inverted_index_max_buffered_docs", 
invertedIndexMaxBufferedDocs.toString())
+        }
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to