This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 4779d144466 [opt](chinese) opt skip Chinese lowercase index compaction 
logic and test case. (#32451)
4779d144466 is described below

commit 4779d14446670779ceebe077e9b0b00f72c4e3cd
Author: zzzxl <[email protected]>
AuthorDate: Thu Mar 21 11:09:53 2024 +0800

    [opt](chinese) opt skip Chinese lowercase index compaction logic and test 
case. (#32451)
---
 be/src/clucene                                     |  2 +-
 be/src/olap/compaction.cpp                         | 59 ++++++++---------
 be/src/olap/inverted_index_parser.h                |  5 ++
 .../segment_v2/inverted_index_compaction.cpp       |  6 +-
 .../rowset/segment_v2/inverted_index_compaction.h  |  2 +-
 be/src/olap/tablet_schema.cpp                      |  2 +
 .../test_index_lowercase_fault_injection.out       | 13 ++++
 .../test_index_lowercase_fault_injection.groovy    | 76 ++++++++++++++++++++++
 8 files changed, 128 insertions(+), 37 deletions(-)

diff --git a/be/src/clucene b/be/src/clucene
index fe7ecdb2d62..e9c7f1f9a4a 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit fe7ecdb2d6214e69caf68eba744d3b5221716119
+Subproject commit e9c7f1f9a4a324d418eab978fa7ccbcf0878f60c
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 40bf05ef08f..36a2ba7f506 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -533,8 +533,25 @@ Status Compaction::do_compaction_impl(int64_t permits) {
                     [&src_segment_num, &dest_segment_num, &index_writer_path, 
&src_index_files,
                      &dest_index_files, &fs, &tablet_path, &trans_vec, 
&dest_segment_num_rows,
                      &status, &src_rowset_ids, this](int32_t column_uniq_id) {
+                        auto error_handler = [this](int64_t index_id, int64_t 
column_uniq_id) {
+                            LOG(WARNING) << "failed to do index compaction"
+                                         << ". tablet=" << _tablet->tablet_id()
+                                         << ". column uniq id=" << 
column_uniq_id
+                                         << ". index_id=" << index_id;
+                            for (auto& rowset : _input_rowsets) {
+                                
rowset->set_skip_index_compaction(column_uniq_id);
+                                LOG(INFO) << "mark skipping inverted index 
compaction next time"
+                                          << ". tablet=" << 
_tablet->tablet_id()
+                                          << ", rowset=" << rowset->rowset_id()
+                                          << ", column uniq id=" << 
column_uniq_id
+                                          << ", index_id=" << index_id;
+                            }
+                        };
+
+                        auto index_id =
+                                
_cur_tablet_schema->get_inverted_index(column_uniq_id)->index_id();
+
                         // if index properties are different, index compaction 
maybe needs to be skipped.
-                        bool maybe_skip = false;
                         std::optional<std::map<std::string, std::string>> 
first_properties;
                         for (const auto& rowset_id : src_rowset_ids) {
                             auto rowset_ptr = _tablet->get_rowset(rowset_id);
@@ -545,50 +562,28 @@ Status Compaction::do_compaction_impl(int64_t permits) {
                                 first_properties = properties;
                             } else {
                                 if (properties != first_properties.value()) {
-                                    LOG(WARNING) << "if index properties are 
different, index "
-                                                    "compaction needs to be 
skipped.";
-                                    maybe_skip = true;
-                                    break;
+                                    error_handler(index_id, column_uniq_id);
+                                    status = 
Status::Error<INVERTED_INDEX_COMPACTION_ERROR>(
+                                            "if index properties are 
different, index compaction "
+                                            "needs to be "
+                                            "skipped.");
+                                    return;
                                 }
                             }
                         }
 
-                        auto index_id =
-                                
_cur_tablet_schema->get_inverted_index(column_uniq_id)->index_id();
                         try {
                             auto st = compact_column(index_id, 
src_segment_num, dest_segment_num,
                                                      src_index_files, 
dest_index_files, fs,
                                                      index_writer_path, 
tablet_path, trans_vec,
-                                                     dest_segment_num_rows, 
maybe_skip);
+                                                     dest_segment_num_rows);
                             if (!st.ok()) {
-                                LOG(WARNING) << "failed to do index compaction"
-                                             << ". tablet=" << 
_tablet->full_name()
-                                             << ". column uniq id=" << 
column_uniq_id
-                                             << ". index_id=" << index_id;
-                                for (auto& rowset : _input_rowsets) {
-                                    
rowset->set_skip_index_compaction(column_uniq_id);
-                                    LOG(INFO) << "mark skipping inverted index 
compaction next time"
-                                              << ". tablet=" << 
_tablet->full_name()
-                                              << ", rowset=" << 
rowset->rowset_id()
-                                              << ", column uniq id=" << 
column_uniq_id
-                                              << ", index_id=" << index_id;
-                                }
+                                error_handler(index_id, column_uniq_id);
                                 status = 
Status::Error<ErrorCode::INVERTED_INDEX_COMPACTION_ERROR>(
                                         st.msg());
                             }
                         } catch (CLuceneError& e) {
-                            LOG(WARNING) << "failed to do index compaction"
-                                         << ". tablet=" << _tablet->full_name()
-                                         << ", column uniq id=" << 
column_uniq_id
-                                         << ", index_id=" << index_id;
-                            for (auto& rowset : _input_rowsets) {
-                                
rowset->set_skip_index_compaction(column_uniq_id);
-                                LOG(INFO) << "mark skipping inverted index 
compaction next time"
-                                          << ". tablet=" << 
_tablet->full_name()
-                                          << ", rowset=" << rowset->rowset_id()
-                                          << ", column uniq id=" << 
column_uniq_id
-                                          << ", index_id=" << index_id;
-                            }
+                            error_handler(index_id, column_uniq_id);
                             status = 
Status::Error<ErrorCode::INVERTED_INDEX_COMPACTION_ERROR>(
                                     e.what());
                         }
diff --git a/be/src/olap/inverted_index_parser.h 
b/be/src/olap/inverted_index_parser.h
index 1a16d9ad97d..8d79f7bbbd9 100644
--- a/be/src/olap/inverted_index_parser.h
+++ b/be/src/olap/inverted_index_parser.h
@@ -21,6 +21,8 @@
 #include <memory>
 #include <string>
 
+#include "util/debug_points.h"
+
 namespace lucene {
 namespace analysis {
 class Analyzer;
@@ -98,6 +100,9 @@ std::string get_parser_lowercase_from_properties(
     if (properties.find(INVERTED_INDEX_PARSER_LOWERCASE_KEY) != 
properties.end()) {
         return properties.at(INVERTED_INDEX_PARSER_LOWERCASE_KEY);
     } else {
+        
DBUG_EXECUTE_IF("inverted_index_parser.get_parser_lowercase_from_properties",
+                        { return ""; })
+
         if constexpr (ReturnTrue) {
             return INVERTED_INDEX_PARSER_TRUE;
         } else {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp 
b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
index ff076e84397..b04edd6eb83 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
@@ -29,7 +29,7 @@ Status compact_column(int32_t index_id, int src_segment_num, 
int dest_segment_nu
                       std::vector<std::string> dest_index_files, const 
io::FileSystemSPtr& fs,
                       std::string index_writer_path, std::string tablet_path,
                       std::vector<std::vector<std::pair<uint32_t, uint32_t>>> 
trans_vec,
-                      std::vector<uint32_t> dest_segment_num_rows, bool 
maybe_skip) {
+                      std::vector<uint32_t> dest_segment_num_rows) {
     DBUG_EXECUTE_IF("index_compaction_compact_column_throw_error", {
         if (index_id % 2 == 0) {
             _CLTHROWA(CL_ERR_IO, "debug point: test throw error in index 
compaction");
@@ -68,8 +68,8 @@ Status compact_column(int32_t index_id, int src_segment_num, 
int dest_segment_nu
     }
 
     DCHECK_EQ(src_index_dirs.size(), trans_vec.size());
-    index_writer->indexCompaction(src_index_dirs, dest_index_dirs, trans_vec, 
dest_segment_num_rows,
-                                  maybe_skip);
+    index_writer->indexCompaction(src_index_dirs, dest_index_dirs, trans_vec,
+                                  dest_segment_num_rows);
 
     index_writer->close();
     _CLDELETE(index_writer);
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compaction.h 
b/be/src/olap/rowset/segment_v2/inverted_index_compaction.h
index 7d6ad1c2d48..f615192b199 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_compaction.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_compaction.h
@@ -30,6 +30,6 @@ Status compact_column(int32_t index_id, int src_segment_num, 
int dest_segment_nu
                       std::vector<std::string> dest_index_files, const 
io::FileSystemSPtr& fs,
                       std::string index_writer_path, std::string tablet_path,
                       std::vector<std::vector<std::pair<uint32_t, uint32_t>>> 
trans_vec,
-                      std::vector<uint32_t> dest_segment_num_rows, bool 
maybe_skip);
+                      std::vector<uint32_t> dest_segment_num_rows);
 } // namespace segment_v2
 } // namespace doris
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index e6e3dde7778..9bd451dd2a9 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -617,6 +617,8 @@ void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
         (*index->mutable_properties())[kv.first] = kv.second;
     }
 
+    DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; })
+
     // lowercase by default
     if (!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
         (*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
diff --git 
a/regression-test/data/fault_injection_p0/test_index_lowercase_fault_injection.out
 
b/regression-test/data/fault_injection_p0/test_index_lowercase_fault_injection.out
new file mode 100644
index 00000000000..196077986ec
--- /dev/null
+++ 
b/regression-test/data/fault_injection_p0/test_index_lowercase_fault_injection.out
@@ -0,0 +1,13 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !sql --
+5
+
+-- !sql --
+0
+
+-- !sql --
+8
+
+-- !sql --
+3
+
diff --git 
a/regression-test/suites/fault_injection_p0/test_index_lowercase_fault_injection.groovy
 
b/regression-test/suites/fault_injection_p0/test_index_lowercase_fault_injection.groovy
new file mode 100644
index 00000000000..0f522652bb4
--- /dev/null
+++ 
b/regression-test/suites/fault_injection_p0/test_index_lowercase_fault_injection.groovy
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_index_lowercase_fault_injection") {
+    // define a sql table
+    def testTable = "httplogs_lowercase"
+
+    def create_httplogs_unique_table = {testTablex ->
+      // multi-line sql
+      def result = sql """
+        CREATE TABLE ${testTablex} (
+          `@timestamp` int(11) NULL COMMENT "",
+          `clientip` string NULL COMMENT "",
+          `request` string NULL COMMENT "",
+          `status` string NULL COMMENT "",
+          `size` string NULL COMMENT "",
+          INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" = 
"chinese", "support_phrase" = "true") COMMENT ''
+          ) ENGINE=OLAP
+          DUPLICATE KEY(`@timestamp`)
+          COMMENT "OLAP"
+          DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1
+          PROPERTIES (
+          "replication_allocation" = "tag.location.default: 1"
+        );
+      """
+    }
+
+    try {
+      sql "DROP TABLE IF EXISTS ${testTable}"
+      create_httplogs_unique_table.call(testTable)
+
+      try {
+        
GetDebugPoint().enableDebugPointForAllBEs("inverted_index_parser.get_parser_lowercase_from_properties")
+        GetDebugPoint().enableDebugPointForAllBEs("tablet_schema.to_schema_pb")
+
+        sql """ INSERT INTO ${testTable} VALUES (893964617, '40.135.0.0', 'GET 
/images/hm_bg.jpg HTTP/1.0', 200, 24736); """
+        sql """ INSERT INTO ${testTable} VALUES (893964653, '232.0.0.0', 'GET 
/images/hm_bg.jpg HTTP/1.0', 200, 3781); """
+        sql """ INSERT INTO ${testTable} VALUES (893964672, '26.1.0.0', 'GET 
/images/hm_bg.jpg HTTP/1.0', 304, 0); """
+        sql """ INSERT INTO ${testTable} VALUES (893964672, '26.1.0.0', 'GET 
/images/hm_bg.jpg HTTP/1.0', 304, 0); """
+        sql """ INSERT INTO ${testTable} VALUES (893964653, '232.0.0.0', 'GET 
/images/hm_bg.jpg HTTP/1.0', 200, 3781); """
+
+        sql 'sync'
+      } finally {
+        
GetDebugPoint().disableDebugPointForAllBEs("inverted_index_parser.get_parser_lowercase_from_properties")
+        
GetDebugPoint().disableDebugPointForAllBEs("tablet_schema.to_schema_pb")
+      }
+
+      qt_sql """ select count() from ${testTable} where (request match 
'HTTP');  """
+      qt_sql """ select count() from ${testTable} where (request match 
'http');  """
+
+      sql """ INSERT INTO ${testTable} VALUES (893964672, '26.1.0.0', 'GET 
/images/hm_bg.jpg HTTP/1.0', 304, 0); """
+      sql """ INSERT INTO ${testTable} VALUES (893964672, '26.1.0.0', 'GET 
/images/hm_bg.jpg HTTP/1.0', 304, 0); """
+      sql """ INSERT INTO ${testTable} VALUES (893964653, '232.0.0.0', 'GET 
/images/hm_bg.jpg HTTP/1.0', 200, 3781); """
+
+      sql 'sync'
+
+      qt_sql """ select count() from ${testTable} where (request match 
'HTTP');  """
+      qt_sql """ select count() from ${testTable} where (request match 
'http');  """
+    } finally {
+    }
+}
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to