This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.1 by this push:
new ff52e73a07b [Fix](inverted index) fix match null for inverted index
#41746 (#41787)
ff52e73a07b is described below
commit ff52e73a07b3e9f4b95ce34d996f462c3a13b174
Author: airborne12 <[email protected]>
AuthorDate: Mon Oct 14 14:45:36 2024 +0800
[Fix](inverted index) fix match null for inverted index #41746 (#41787)
cherry pick from #41746
---
be/src/vec/exprs/vmatch_predicate.cpp | 16 +++---------
be/src/vec/functions/match.cpp | 21 +++++----------
be/src/vec/functions/match.h | 2 --
.../data/inverted_index_p0/test_null_index.out | 4 ++-
.../inverted_index_p0/test_null_index.groovy | 30 ++++++++++++++++++++--
5 files changed, 40 insertions(+), 33 deletions(-)
diff --git a/be/src/vec/exprs/vmatch_predicate.cpp
b/be/src/vec/exprs/vmatch_predicate.cpp
index 9dea7ca870c..699181e433d 100644
--- a/be/src/vec/exprs/vmatch_predicate.cpp
+++ b/be/src/vec/exprs/vmatch_predicate.cpp
@@ -84,14 +84,9 @@ Status VMatchPredicate::prepare(RuntimeState* state, const
RowDescriptor& desc,
argument_template.emplace_back(nullptr, child->data_type(),
child->expr_name());
child_expr_name.emplace_back(child->expr_name());
}
- // result column always not null
- if (_data_type->is_nullable()) {
- _function = SimpleFunctionFactory::instance().get_function(
- _fn.name.function_name, argument_template,
remove_nullable(_data_type));
- } else {
- _function =
SimpleFunctionFactory::instance().get_function(_fn.name.function_name,
-
argument_template, _data_type);
- }
+
+ _function =
SimpleFunctionFactory::instance().get_function(_fn.name.function_name,
+
argument_template, _data_type);
if (_function == nullptr) {
std::string type_str;
for (auto arg : argument_template) {
@@ -174,11 +169,6 @@ Status VMatchPredicate::execute(VExprContext* context,
Block* block, int* result
RETURN_IF_ERROR(_function->execute(context->fn_context(_fn_context_index),
*block, arguments,
num_columns_without_result,
block->rows(), false));
*result_column_id = num_columns_without_result;
- if (_data_type->is_nullable()) {
- auto nested =
block->get_by_position(num_columns_without_result).column;
- auto nullable = ColumnNullable::create(nested,
ColumnUInt8::create(block->rows(), 0));
- block->replace_by_position(num_columns_without_result, nullable);
- }
return Status::OK();
}
diff --git a/be/src/vec/functions/match.cpp b/be/src/vec/functions/match.cpp
index b70a605e962..71d6fd8ec6e 100644
--- a/be/src/vec/functions/match.cpp
+++ b/be/src/vec/functions/match.cpp
@@ -51,6 +51,10 @@ Status FunctionMatchBase::evaluate_inverted_index(
std::shared_ptr<roaring::Roaring> roaring =
std::make_shared<roaring::Roaring>();
Field param_value;
arguments[0].column->get(0, param_value);
+ if (param_value.is_null()) {
+ // if query value is null, skip evaluate inverted index
+ return Status::OK();
+ }
auto param_type = arguments[0].type->get_type_as_type_descriptor().type;
if (!is_string_type(param_type)) {
return Status::Error<ErrorCode::INVERTED_INDEX_INVALID_PARAMETERS>(
@@ -102,12 +106,7 @@ Status FunctionMatchBase::execute_impl(FunctionContext*
context, Block& block,
const auto* values = check_and_get_column<ColumnString>(source_col.get());
const ColumnArray* array_col = nullptr;
if (source_col->is_column_array()) {
- if (source_col->is_nullable()) {
- auto* nullable =
check_and_get_column<ColumnNullable>(source_col.get());
- array_col =
check_and_get_column<ColumnArray>(*nullable->get_nested_column_ptr());
- } else {
- array_col = check_and_get_column<ColumnArray>(source_col.get());
- }
+ array_col = check_and_get_column<ColumnArray>(source_col.get());
if (array_col && !array_col->get_data().is_column_string()) {
return Status::NotSupported(
fmt::format("unsupported nested array of type {} for
function {}",
@@ -127,15 +126,7 @@ Status FunctionMatchBase::execute_impl(FunctionContext*
context, Block& block,
values =
check_and_get_column<ColumnString>(*(array_col->get_data_ptr()));
}
} else if (auto* nullable =
check_and_get_column<ColumnNullable>(source_col.get())) {
- // match null
- if (type_ptr->is_nullable()) {
- if (column_ptr->only_null()) {
- block.get_by_position(result).column =
nullable->get_null_map_column_ptr();
- return Status::OK();
- }
- } else {
- values =
check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
- }
+ values =
check_and_get_column<ColumnString>(*nullable->get_nested_column_ptr());
}
if (!values) {
diff --git a/be/src/vec/functions/match.h b/be/src/vec/functions/match.h
index 3026e4a06cf..d64fc6de686 100644
--- a/be/src/vec/functions/match.h
+++ b/be/src/vec/functions/match.h
@@ -59,8 +59,6 @@ const std::string MATCH_PHRASE_EDGE_FUNCTION =
"match_phrase_edge";
class FunctionMatchBase : public IFunction {
public:
- bool use_default_implementation_for_nulls() const override { return false;
}
-
size_t get_number_of_arguments() const override { return 2; }
String get_name() const override { return "match"; }
diff --git a/regression-test/data/inverted_index_p0/test_null_index.out
b/regression-test/data/inverted_index_p0/test_null_index.out
index e37dc33ca03..d0405083b99 100644
--- a/regression-test/data/inverted_index_p0/test_null_index.out
+++ b/regression-test/data/inverted_index_p0/test_null_index.out
@@ -2,6 +2,8 @@
-- !sql --
-- !sql --
-1 a \N [null] [1]
-- !sql --
+
+-- !sql --
+
diff --git a/regression-test/suites/inverted_index_p0/test_null_index.groovy
b/regression-test/suites/inverted_index_p0/test_null_index.groovy
index 61b613470ee..a11e3dde7c4 100644
--- a/regression-test/suites/inverted_index_p0/test_null_index.groovy
+++ b/regression-test/suites/inverted_index_p0/test_null_index.groovy
@@ -34,8 +34,8 @@ suite("test_null_index", "p0"){
`id` int(11) NOT NULL,
`str` string NOT NULL,
`str_null` string NULL,
- `value` array<text> NOT NULL,
- `value_int` array<int> NOT NULL
+ `value` array<text> NOT NULL,
+ `value_int` array<int> NOT NULL
) ENGINE=OLAP
DUPLICATE KEY(`id`)
COMMENT 'OLAP'
@@ -48,4 +48,30 @@ suite("test_null_index", "p0"){
sql "INSERT INTO $indexTblName VALUES (1, 'a', null, [null], [1]), (2,
'b', 'b', ['b'], [2]), (3, 'c', 'c', ['c'], [3]);"
qt_sql "SELECT * FROM $indexTblName WHERE str match null order by id;"
qt_sql "SELECT * FROM $indexTblName WHERE str_null match null order by id;"
+
+ def indexTblName2 = "with_index_test"
+
+ sql "DROP TABLE IF EXISTS ${indexTblName2}"
+ // create 1 replica table
+ sql """
+ CREATE TABLE IF NOT EXISTS ${indexTblName2}(
+ `id` int(11) NOT NULL,
+ `str` string NOT NULL,
+ `str_null` string NULL,
+ `value` array<text> NOT NULL,
+ `value_int` array<int> NOT NULL,
+ INDEX str_idx(`str`) USING INVERTED,
+ INDEX str_null_idx(`str_null`) USING INVERTED
+ ) ENGINE=OLAP
+ DUPLICATE KEY(`id`)
+ COMMENT 'OLAP'
+ DISTRIBUTED BY HASH(`id`) BUCKETS 1
+ PROPERTIES(
+ "replication_allocation" = "tag.location.default: 1"
+ );
+ """
+ sql """ set enable_common_expr_pushdown = true """
+ sql "INSERT INTO $indexTblName2 VALUES (1, 'a', null, [null], [1]), (2,
'b', 'b', ['b'], [2]), (3, 'c', 'c', ['c'], [3]);"
+ qt_sql "SELECT * FROM $indexTblName2 WHERE str match null order by id;"
+ qt_sql "SELECT * FROM $indexTblName2 WHERE str_null match null order by
id;"
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]