This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new 8cb5aa64f4f [test](inverted index) add an Inverted Index Testing
Switch (#38077) (#38947)
8cb5aa64f4f is described below
commit 8cb5aa64f4f46608ea3d535c3be2b1615556e47d
Author: zzzxl <[email protected]>
AuthorDate: Wed Aug 7 11:25:36 2024 +0800
[test](inverted index) add an Inverted Index Testing Switch (#38077)
(#38947)
https://github.com/apache/doris/pull/38077
---
be/src/olap/rowset/segment_v2/segment_iterator.cpp | 4 +-
be/src/vec/functions/match.cpp | 60 ++++++++++++----------
be/src/vec/functions/match.h | 45 ++++++++--------
.../java/org/apache/doris/qe/SessionVariable.java | 21 ++++++++
gensrc/thrift/PaloInternalService.thrift | 3 ++
5 files changed, 84 insertions(+), 49 deletions(-)
diff --git a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
index 746fc73f9ac..90653e1d577 100644
--- a/be/src/olap/rowset/segment_v2/segment_iterator.cpp
+++ b/be/src/olap/rowset/segment_v2/segment_iterator.cpp
@@ -998,7 +998,9 @@ Status
SegmentIterator::_apply_index_except_leafnode_of_andnode() {
}
bool SegmentIterator::_downgrade_without_index(Status res, bool
need_remaining) {
- if (res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND ||
+ bool is_fallback =
+
_opts.runtime_state->query_options().enable_fallback_on_missing_inverted_index;
+ if ((res.code() == ErrorCode::INVERTED_INDEX_FILE_NOT_FOUND &&
is_fallback) ||
res.code() == ErrorCode::INVERTED_INDEX_BYPASS ||
res.code() == ErrorCode::INVERTED_INDEX_EVALUATE_SKIPPED ||
(res.code() == ErrorCode::INVERTED_INDEX_NO_TERMS && need_remaining)) {
diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp
index 95e973ae612..b1eb4fc358d 100644
--- a/be/src/vec/functions/match.cpp
+++ b/be/src/vec/functions/match.cpp
@@ -95,7 +95,7 @@ Status FunctionMatchBase::execute_impl(FunctionContext*
context, Block& block,
// set default value to 0, and match functions only need to set 1/true
vec_res.resize_fill(input_rows_count);
RETURN_IF_ERROR(execute_match(
- column_name, match_query_str, input_rows_count, values,
inverted_index_ctx,
+ context, column_name, match_query_str, input_rows_count,
values, inverted_index_ctx,
(array_col ? &(array_col->get_offsets()) : nullptr), vec_res));
block.replace_by_position(result, std::move(res));
} else {
@@ -116,6 +116,10 @@ inline doris::segment_v2::InvertedIndexQueryType
FunctionMatchBase::get_query_ty
return doris::segment_v2::InvertedIndexQueryType::MATCH_ALL_QUERY;
} else if (fn_name == MATCH_PHRASE_FUNCTION) {
return doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_QUERY;
+ } else if (fn_name == MATCH_PHRASE_PREFIX_FUNCTION) {
+ return
doris::segment_v2::InvertedIndexQueryType::MATCH_PHRASE_PREFIX_QUERY;
+ } else if (fn_name == MATCH_PHRASE_REGEXP_FUNCTION) {
+ return doris::segment_v2::InvertedIndexQueryType::MATCH_REGEXP_QUERY;
}
return doris::segment_v2::InvertedIndexQueryType::UNKNOWN_QUERY;
}
@@ -151,16 +155,27 @@ inline std::vector<std::string>
FunctionMatchBase::analyse_data_token(
return data_tokens;
}
-Status FunctionMatchAny::execute_match(const std::string& column_name,
+Status FunctionMatchBase::check(FunctionContext* context, const std::string&
function_name) const {
+ if
(!context->state()->query_options().enable_match_without_inverted_index) {
+ return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
+ "{} not support execute_match", function_name);
+ }
+
+ DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", {
+ return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
+ "{} not support execute_match", function_name);
+ });
+
+ return Status::OK();
+}
+
+Status FunctionMatchAny::execute_match(FunctionContext* context, const
std::string& column_name,
const std::string& match_query_str,
size_t input_rows_count,
const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64*
array_offsets,
ColumnUInt8::Container& result) const {
- DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", {
- return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
- "FunctionMatchAny not support execute_match");
- })
+ RETURN_IF_ERROR(check(context, name));
doris::InvertedIndexParserType parser_type =
doris::InvertedIndexParserType::PARSER_UNKNOWN;
if (inverted_index_ctx) {
@@ -201,16 +216,13 @@ Status FunctionMatchAny::execute_match(const std::string&
column_name,
return Status::OK();
}
-Status FunctionMatchAll::execute_match(const std::string& column_name,
+Status FunctionMatchAll::execute_match(FunctionContext* context, const
std::string& column_name,
const std::string& match_query_str,
size_t input_rows_count,
const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64*
array_offsets,
ColumnUInt8::Container& result) const {
- DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", {
- return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
- "FunctionMatchAll not support execute_match");
- })
+ RETURN_IF_ERROR(check(context, name));
doris::InvertedIndexParserType parser_type =
doris::InvertedIndexParserType::PARSER_UNKNOWN;
if (inverted_index_ctx) {
@@ -257,16 +269,13 @@ Status FunctionMatchAll::execute_match(const std::string&
column_name,
return Status::OK();
}
-Status FunctionMatchPhrase::execute_match(const std::string& column_name,
+Status FunctionMatchPhrase::execute_match(FunctionContext* context, const
std::string& column_name,
const std::string& match_query_str,
size_t input_rows_count, const
ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64*
array_offsets,
ColumnUInt8::Container& result)
const {
- DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", {
- return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
- "FunctionMatchPhrase not support execute_match");
- })
+ RETURN_IF_ERROR(check(context, name));
doris::InvertedIndexParserType parser_type =
doris::InvertedIndexParserType::PARSER_UNKNOWN;
if (inverted_index_ctx) {
@@ -330,13 +339,11 @@ Status FunctionMatchPhrase::execute_match(const
std::string& column_name,
}
Status FunctionMatchPhrasePrefix::execute_match(
- const std::string& column_name, const std::string& match_query_str,
size_t input_rows_count,
- const ColumnString* string_col, InvertedIndexCtx* inverted_index_ctx,
- const ColumnArray::Offsets64* array_offsets, ColumnUInt8::Container&
result) const {
- DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", {
- return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
- "FunctionMatchPhrasePrefix not support execute_match");
- })
+ FunctionContext* context, const std::string& column_name,
+ const std::string& match_query_str, size_t input_rows_count, const
ColumnString* string_col,
+ InvertedIndexCtx* inverted_index_ctx, const ColumnArray::Offsets64*
array_offsets,
+ ColumnUInt8::Container& result) const {
+ RETURN_IF_ERROR(check(context, name));
doris::InvertedIndexParserType parser_type =
doris::InvertedIndexParserType::PARSER_UNKNOWN;
if (inverted_index_ctx) {
@@ -400,16 +407,13 @@ Status FunctionMatchPhrasePrefix::execute_match(
return Status::OK();
}
-Status FunctionMatchRegexp::execute_match(const std::string& column_name,
+Status FunctionMatchRegexp::execute_match(FunctionContext* context, const
std::string& column_name,
const std::string& match_query_str,
size_t input_rows_count, const
ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64*
array_offsets,
ColumnUInt8::Container& result)
const {
- DBUG_EXECUTE_IF("match.invert_index_not_support_execute_match", {
- return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
- "FunctionMatchRegexp not support execute_match");
- })
+ RETURN_IF_ERROR(check(context, name));
doris::InvertedIndexParserType parser_type =
doris::InvertedIndexParserType::PARSER_UNKNOWN;
if (inverted_index_ctx) {
diff --git a/be/src/vec/functions/match.h b/be/src/vec/functions/match.h
index aaa7d206c03..1265980987c 100644
--- a/be/src/vec/functions/match.h
+++ b/be/src/vec/functions/match.h
@@ -53,6 +53,8 @@ namespace doris::vectorized {
const std::string MATCH_ANY_FUNCTION = "match_any";
const std::string MATCH_ALL_FUNCTION = "match_all";
const std::string MATCH_PHRASE_FUNCTION = "match_phrase";
+const std::string MATCH_PHRASE_PREFIX_FUNCTION = "match_phrase_prefix";
+const std::string MATCH_PHRASE_REGEXP_FUNCTION = "match_regexp";
class FunctionMatchBase : public IFunction {
public:
@@ -70,8 +72,9 @@ public:
Status execute_impl(FunctionContext* context, Block& block, const
ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override;
- virtual Status execute_match(const std::string& column_name, const
std::string& match_query_str,
- size_t input_rows_count, const ColumnString*
string_col,
+ virtual Status execute_match(FunctionContext* context, const std::string&
column_name,
+ const std::string& match_query_str, size_t
input_rows_count,
+ const ColumnString* string_col,
InvertedIndexCtx* inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const = 0;
@@ -84,6 +87,8 @@ public:
int32_t current_block_row_idx,
const ColumnArray::Offsets64*
array_offsets,
int32_t&
current_src_array_offset) const;
+
+ Status check(FunctionContext* context, const std::string& function_name)
const;
};
class FunctionMatchAny : public FunctionMatchBase {
@@ -93,9 +98,9 @@ public:
String get_name() const override { return name; }
- Status execute_match(const std::string& column_name, const std::string&
match_query_str,
- size_t input_rows_count, const ColumnString*
string_col,
- InvertedIndexCtx* inverted_index_ctx,
+ Status execute_match(FunctionContext* context, const std::string&
column_name,
+ const std::string& match_query_str, size_t
input_rows_count,
+ const ColumnString* string_col, InvertedIndexCtx*
inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const override;
};
@@ -107,9 +112,9 @@ public:
String get_name() const override { return name; }
- Status execute_match(const std::string& column_name, const std::string&
match_query_str,
- size_t input_rows_count, const ColumnString*
string_col,
- InvertedIndexCtx* inverted_index_ctx,
+ Status execute_match(FunctionContext* context, const std::string&
column_name,
+ const std::string& match_query_str, size_t
input_rows_count,
+ const ColumnString* string_col, InvertedIndexCtx*
inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const override;
};
@@ -121,9 +126,9 @@ public:
String get_name() const override { return name; }
- Status execute_match(const std::string& column_name, const std::string&
match_query_str,
- size_t input_rows_count, const ColumnString*
string_col,
- InvertedIndexCtx* inverted_index_ctx,
+ Status execute_match(FunctionContext* context, const std::string&
column_name,
+ const std::string& match_query_str, size_t
input_rows_count,
+ const ColumnString* string_col, InvertedIndexCtx*
inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const override;
};
@@ -135,9 +140,9 @@ public:
String get_name() const override { return name; }
- Status execute_match(const std::string& column_name, const std::string&
match_query_str,
- size_t input_rows_count, const ColumnString*
string_col,
- InvertedIndexCtx* inverted_index_ctx,
+ Status execute_match(FunctionContext* context, const std::string&
column_name,
+ const std::string& match_query_str, size_t
input_rows_count,
+ const ColumnString* string_col, InvertedIndexCtx*
inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const override;
};
@@ -149,9 +154,9 @@ public:
String get_name() const override { return name; }
- Status execute_match(const std::string& column_name, const std::string&
match_query_str,
- size_t input_rows_count, const ColumnString*
string_col,
- InvertedIndexCtx* inverted_index_ctx,
+ Status execute_match(FunctionContext* context, const std::string&
column_name,
+ const std::string& match_query_str, size_t
input_rows_count,
+ const ColumnString* string_col, InvertedIndexCtx*
inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const override;
};
@@ -163,9 +168,9 @@ public:
String get_name() const override { return name; }
- Status execute_match(const std::string& column_name, const std::string&
match_query_str,
- size_t input_rows_count, const ColumnString*
string_col,
- InvertedIndexCtx* inverted_index_ctx,
+ Status execute_match(FunctionContext* context, const std::string&
column_name,
+ const std::string& match_query_str, size_t
input_rows_count,
+ const ColumnString* string_col, InvertedIndexCtx*
inverted_index_ctx,
const ColumnArray::Offsets64* array_offsets,
ColumnUInt8::Container& result) const override {
return Status::Error<ErrorCode::INVERTED_INDEX_NOT_SUPPORTED>(
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 77ea6838470..e1149a30f1d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -615,6 +615,9 @@ public class SessionVariable implements Serializable,
Writable {
public static final String DISABLE_EMPTY_PARTITION_PRUNE =
"disable_empty_partition_prune";
// CLOUD_VARIABLES_BEGIN
+ public static final String ENABLE_MATCH_WITHOUT_INVERTED_INDEX =
"enable_match_without_inverted_index";
+ public static final String ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX =
"enable_fallback_on_missing_inverted_index";
+
/**
* If set false, user couldn't submit analyze SQL and FE won't allocate
any related resources.
*/
@@ -1989,6 +1992,20 @@ public class SessionVariable implements Serializable,
Writable {
})
public boolean enableESParallelScroll = true;
+ @VariableMgr.VarAttr(name = ENABLE_MATCH_WITHOUT_INVERTED_INDEX,
description = {
+ "开启无索引match查询功能,建议正式环境保持开启",
+ "Enable no-index match query functionality."
+ + " it is recommended to keep this enabled in the production
environment."
+ })
+ public boolean enableMatchWithoutInvertedIndex = true;
+
+ @VariableMgr.VarAttr(name = ENABLE_FALLBACK_ON_MISSING_INVERTED_INDEX,
description = {
+ "开启后在没有找到索引的情况下直接查询报错,建议正式环境保持开启",
+ "After enabling, it will directly query and report an error if no
index is found."
+ + " It is recommended to keep this enabled in the production
environment."
+ })
+ public boolean enableFallbackOnMissingInvertedIndex = true;
+
public void setEnableEsParallelScroll(boolean enableESParallelScroll) {
this.enableESParallelScroll = enableESParallelScroll;
}
@@ -3472,6 +3489,10 @@ public class SessionVariable implements Serializable,
Writable {
tResult.setSerdeDialect(getSerdeDialect());
tResult.setHiveOrcUseColumnNames(hiveOrcUseColumnNames);
tResult.setHiveParquetUseColumnNames(hiveParquetUseColumnNames);
+
+
tResult.setEnableMatchWithoutInvertedIndex(enableMatchWithoutInvertedIndex);
+
tResult.setEnableFallbackOnMissingInvertedIndex(enableFallbackOnMissingInvertedIndex);
+
tResult.setKeepCarriageReturn(keepCarriageReturn);
return tResult;
}
diff --git a/gensrc/thrift/PaloInternalService.thrift
b/gensrc/thrift/PaloInternalService.thrift
index 41d113497d3..9c80041f2ad 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -317,6 +317,9 @@ struct TQueryOptions {
123: optional bool hive_parquet_use_column_names = true;
124: optional bool hive_orc_use_column_names = true;
+ 125: optional bool enable_match_without_inverted_index = true;
+ 126: optional bool enable_fallback_on_missing_inverted_index = true;
+
// For cloud, to control if the content would be written into file cache
1000: optional bool disable_file_cache = false
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]