This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 4779d144466 [opt](chinese) opt skip Chinese lowercase index compaction
logic and test case. (#32451)
4779d144466 is described below
commit 4779d14446670779ceebe077e9b0b00f72c4e3cd
Author: zzzxl <[email protected]>
AuthorDate: Thu Mar 21 11:09:53 2024 +0800
[opt](chinese) opt skip Chinese lowercase index compaction logic and test
case. (#32451)
---
be/src/clucene | 2 +-
be/src/olap/compaction.cpp | 59 ++++++++---------
be/src/olap/inverted_index_parser.h | 5 ++
.../segment_v2/inverted_index_compaction.cpp | 6 +-
.../rowset/segment_v2/inverted_index_compaction.h | 2 +-
be/src/olap/tablet_schema.cpp | 2 +
.../test_index_lowercase_fault_injection.out | 13 ++++
.../test_index_lowercase_fault_injection.groovy | 76 ++++++++++++++++++++++
8 files changed, 128 insertions(+), 37 deletions(-)
diff --git a/be/src/clucene b/be/src/clucene
index fe7ecdb2d62..e9c7f1f9a4a 160000
--- a/be/src/clucene
+++ b/be/src/clucene
@@ -1 +1 @@
-Subproject commit fe7ecdb2d6214e69caf68eba744d3b5221716119
+Subproject commit e9c7f1f9a4a324d418eab978fa7ccbcf0878f60c
diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp
index 40bf05ef08f..36a2ba7f506 100644
--- a/be/src/olap/compaction.cpp
+++ b/be/src/olap/compaction.cpp
@@ -533,8 +533,25 @@ Status Compaction::do_compaction_impl(int64_t permits) {
[&src_segment_num, &dest_segment_num, &index_writer_path,
&src_index_files,
&dest_index_files, &fs, &tablet_path, &trans_vec,
&dest_segment_num_rows,
&status, &src_rowset_ids, this](int32_t column_uniq_id) {
+ auto error_handler = [this](int64_t index_id, int64_t
column_uniq_id) {
+ LOG(WARNING) << "failed to do index compaction"
+ << ". tablet=" << _tablet->tablet_id()
+ << ". column uniq id=" <<
column_uniq_id
+ << ". index_id=" << index_id;
+ for (auto& rowset : _input_rowsets) {
+
rowset->set_skip_index_compaction(column_uniq_id);
+ LOG(INFO) << "mark skipping inverted index
compaction next time"
+ << ". tablet=" <<
_tablet->tablet_id()
+ << ", rowset=" << rowset->rowset_id()
+ << ", column uniq id=" <<
column_uniq_id
+ << ", index_id=" << index_id;
+ }
+ };
+
+ auto index_id =
+
_cur_tablet_schema->get_inverted_index(column_uniq_id)->index_id();
+
// if index properties are different, index compaction
maybe needs to be skipped.
- bool maybe_skip = false;
std::optional<std::map<std::string, std::string>>
first_properties;
for (const auto& rowset_id : src_rowset_ids) {
auto rowset_ptr = _tablet->get_rowset(rowset_id);
@@ -545,50 +562,28 @@ Status Compaction::do_compaction_impl(int64_t permits) {
first_properties = properties;
} else {
if (properties != first_properties.value()) {
- LOG(WARNING) << "if index properties are
different, index "
- "compaction needs to be
skipped.";
- maybe_skip = true;
- break;
+ error_handler(index_id, column_uniq_id);
+ status =
Status::Error<INVERTED_INDEX_COMPACTION_ERROR>(
+ "if index properties are
different, index compaction "
+ "needs to be "
+ "skipped.");
+ return;
}
}
}
- auto index_id =
-
_cur_tablet_schema->get_inverted_index(column_uniq_id)->index_id();
try {
auto st = compact_column(index_id,
src_segment_num, dest_segment_num,
src_index_files,
dest_index_files, fs,
index_writer_path,
tablet_path, trans_vec,
- dest_segment_num_rows,
maybe_skip);
+ dest_segment_num_rows);
if (!st.ok()) {
- LOG(WARNING) << "failed to do index compaction"
- << ". tablet=" <<
_tablet->full_name()
- << ". column uniq id=" <<
column_uniq_id
- << ". index_id=" << index_id;
- for (auto& rowset : _input_rowsets) {
-
rowset->set_skip_index_compaction(column_uniq_id);
- LOG(INFO) << "mark skipping inverted index
compaction next time"
- << ". tablet=" <<
_tablet->full_name()
- << ", rowset=" <<
rowset->rowset_id()
- << ", column uniq id=" <<
column_uniq_id
- << ", index_id=" << index_id;
- }
+ error_handler(index_id, column_uniq_id);
status =
Status::Error<ErrorCode::INVERTED_INDEX_COMPACTION_ERROR>(
st.msg());
}
} catch (CLuceneError& e) {
- LOG(WARNING) << "failed to do index compaction"
- << ". tablet=" << _tablet->full_name()
- << ", column uniq id=" <<
column_uniq_id
- << ", index_id=" << index_id;
- for (auto& rowset : _input_rowsets) {
-
rowset->set_skip_index_compaction(column_uniq_id);
- LOG(INFO) << "mark skipping inverted index
compaction next time"
- << ". tablet=" <<
_tablet->full_name()
- << ", rowset=" << rowset->rowset_id()
- << ", column uniq id=" <<
column_uniq_id
- << ", index_id=" << index_id;
- }
+ error_handler(index_id, column_uniq_id);
status =
Status::Error<ErrorCode::INVERTED_INDEX_COMPACTION_ERROR>(
e.what());
}
diff --git a/be/src/olap/inverted_index_parser.h
b/be/src/olap/inverted_index_parser.h
index 1a16d9ad97d..8d79f7bbbd9 100644
--- a/be/src/olap/inverted_index_parser.h
+++ b/be/src/olap/inverted_index_parser.h
@@ -21,6 +21,8 @@
#include <memory>
#include <string>
+#include "util/debug_points.h"
+
namespace lucene {
namespace analysis {
class Analyzer;
@@ -98,6 +100,9 @@ std::string get_parser_lowercase_from_properties(
if (properties.find(INVERTED_INDEX_PARSER_LOWERCASE_KEY) !=
properties.end()) {
return properties.at(INVERTED_INDEX_PARSER_LOWERCASE_KEY);
} else {
+
DBUG_EXECUTE_IF("inverted_index_parser.get_parser_lowercase_from_properties",
+ { return ""; })
+
if constexpr (ReturnTrue) {
return INVERTED_INDEX_PARSER_TRUE;
} else {
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
index ff076e84397..b04edd6eb83 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp
@@ -29,7 +29,7 @@ Status compact_column(int32_t index_id, int src_segment_num,
int dest_segment_nu
std::vector<std::string> dest_index_files, const
io::FileSystemSPtr& fs,
std::string index_writer_path, std::string tablet_path,
std::vector<std::vector<std::pair<uint32_t, uint32_t>>>
trans_vec,
- std::vector<uint32_t> dest_segment_num_rows, bool
maybe_skip) {
+ std::vector<uint32_t> dest_segment_num_rows) {
DBUG_EXECUTE_IF("index_compaction_compact_column_throw_error", {
if (index_id % 2 == 0) {
_CLTHROWA(CL_ERR_IO, "debug point: test throw error in index
compaction");
@@ -68,8 +68,8 @@ Status compact_column(int32_t index_id, int src_segment_num,
int dest_segment_nu
}
DCHECK_EQ(src_index_dirs.size(), trans_vec.size());
- index_writer->indexCompaction(src_index_dirs, dest_index_dirs, trans_vec,
dest_segment_num_rows,
- maybe_skip);
+ index_writer->indexCompaction(src_index_dirs, dest_index_dirs, trans_vec,
+ dest_segment_num_rows);
index_writer->close();
_CLDELETE(index_writer);
diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compaction.h
b/be/src/olap/rowset/segment_v2/inverted_index_compaction.h
index 7d6ad1c2d48..f615192b199 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index_compaction.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index_compaction.h
@@ -30,6 +30,6 @@ Status compact_column(int32_t index_id, int src_segment_num,
int dest_segment_nu
std::vector<std::string> dest_index_files, const
io::FileSystemSPtr& fs,
std::string index_writer_path, std::string tablet_path,
std::vector<std::vector<std::pair<uint32_t, uint32_t>>>
trans_vec,
- std::vector<uint32_t> dest_segment_num_rows, bool
maybe_skip);
+ std::vector<uint32_t> dest_segment_num_rows);
} // namespace segment_v2
} // namespace doris
diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp
index e6e3dde7778..9bd451dd2a9 100644
--- a/be/src/olap/tablet_schema.cpp
+++ b/be/src/olap/tablet_schema.cpp
@@ -617,6 +617,8 @@ void TabletIndex::to_schema_pb(TabletIndexPB* index) const {
(*index->mutable_properties())[kv.first] = kv.second;
}
+ DBUG_EXECUTE_IF("tablet_schema.to_schema_pb", { return; })
+
// lowercase by default
if (!_properties.contains(INVERTED_INDEX_PARSER_LOWERCASE_KEY)) {
(*index->mutable_properties())[INVERTED_INDEX_PARSER_LOWERCASE_KEY] =
diff --git
a/regression-test/data/fault_injection_p0/test_index_lowercase_fault_injection.out
b/regression-test/data/fault_injection_p0/test_index_lowercase_fault_injection.out
new file mode 100644
index 00000000000..196077986ec
--- /dev/null
+++
b/regression-test/data/fault_injection_p0/test_index_lowercase_fault_injection.out
@@ -0,0 +1,13 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !sql --
+5
+
+-- !sql --
+0
+
+-- !sql --
+8
+
+-- !sql --
+3
+
diff --git
a/regression-test/suites/fault_injection_p0/test_index_lowercase_fault_injection.groovy
b/regression-test/suites/fault_injection_p0/test_index_lowercase_fault_injection.groovy
new file mode 100644
index 00000000000..0f522652bb4
--- /dev/null
+++
b/regression-test/suites/fault_injection_p0/test_index_lowercase_fault_injection.groovy
@@ -0,0 +1,76 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+
+suite("test_index_lowercase_fault_injection") {
+ // define a sql table
+ def testTable = "httplogs_lowercase"
+
+ def create_httplogs_unique_table = {testTablex ->
+ // multi-line sql
+ def result = sql """
+ CREATE TABLE ${testTablex} (
+ `@timestamp` int(11) NULL COMMENT "",
+ `clientip` string NULL COMMENT "",
+ `request` string NULL COMMENT "",
+ `status` string NULL COMMENT "",
+ `size` string NULL COMMENT "",
+ INDEX request_idx (`request`) USING INVERTED PROPERTIES("parser" =
"chinese", "support_phrase" = "true") COMMENT ''
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`@timestamp`)
+ COMMENT "OLAP"
+ DISTRIBUTED BY HASH(`@timestamp`) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ }
+
+ try {
+ sql "DROP TABLE IF EXISTS ${testTable}"
+ create_httplogs_unique_table.call(testTable)
+
+ try {
+
GetDebugPoint().enableDebugPointForAllBEs("inverted_index_parser.get_parser_lowercase_from_properties")
+ GetDebugPoint().enableDebugPointForAllBEs("tablet_schema.to_schema_pb")
+
+ sql """ INSERT INTO ${testTable} VALUES (893964617, '40.135.0.0', 'GET
/images/hm_bg.jpg HTTP/1.0', 200, 24736); """
+ sql """ INSERT INTO ${testTable} VALUES (893964653, '232.0.0.0', 'GET
/images/hm_bg.jpg HTTP/1.0', 200, 3781); """
+ sql """ INSERT INTO ${testTable} VALUES (893964672, '26.1.0.0', 'GET
/images/hm_bg.jpg HTTP/1.0', 304, 0); """
+ sql """ INSERT INTO ${testTable} VALUES (893964672, '26.1.0.0', 'GET
/images/hm_bg.jpg HTTP/1.0', 304, 0); """
+ sql """ INSERT INTO ${testTable} VALUES (893964653, '232.0.0.0', 'GET
/images/hm_bg.jpg HTTP/1.0', 200, 3781); """
+
+ sql 'sync'
+ } finally {
+
GetDebugPoint().disableDebugPointForAllBEs("inverted_index_parser.get_parser_lowercase_from_properties")
+
GetDebugPoint().disableDebugPointForAllBEs("tablet_schema.to_schema_pb")
+ }
+
+ qt_sql """ select count() from ${testTable} where (request match
'HTTP'); """
+ qt_sql """ select count() from ${testTable} where (request match
'http'); """
+
+ sql """ INSERT INTO ${testTable} VALUES (893964672, '26.1.0.0', 'GET
/images/hm_bg.jpg HTTP/1.0', 304, 0); """
+ sql """ INSERT INTO ${testTable} VALUES (893964672, '26.1.0.0', 'GET
/images/hm_bg.jpg HTTP/1.0', 304, 0); """
+ sql """ INSERT INTO ${testTable} VALUES (893964653, '232.0.0.0', 'GET
/images/hm_bg.jpg HTTP/1.0', 200, 3781); """
+
+ sql 'sync'
+
+ qt_sql """ select count() from ${testTable} where (request match
'HTTP'); """
+ qt_sql """ select count() from ${testTable} where (request match
'http'); """
+ } finally {
+ }
+}
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]