This is an automated email from the ASF dual-hosted git repository.
yiguolei pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 21d740b0bcc branch-4.0: [fix](search) Replace ExcludeScorer with
null-bitmap-aware version for MUST_NOT to handle NULL rows #61200 (#61322)
21d740b0bcc is described below
commit 21d740b0bcca88ddb5155a11d63357da1e1f391e
Author: Jack <[email protected]>
AuthorDate: Sat Mar 14 01:20:32 2026 +0800
branch-4.0: [fix](search) Replace ExcludeScorer with null-bitmap-aware
version for MUST_NOT to handle NULL rows #61200 (#61322)
Cherry-pick of #61200 to branch-4.0.
### What problem does this PR solve?
Issue Number: close #DORIS-24681
`search('NOT msg:omega')` incorrectly includes NULL rows in the result
set, while `NOT search('msg:omega')` correctly excludes them.
**Root cause:** `ExcludeScorer` does not implement `has_null_bitmap()` /
`get_null_bitmap()`, inheriting the `Scorer` base class defaults that
always return `false` / `nullptr`. This caused NULL documents to be
treated as TRUE (matching) rather than NULL, violating SQL three-valued
logic where `NOT(NULL) = NULL`.
**Fix:** Enhance `ExcludeScorer` with null bitmap support:
- Accept a pre-collected `exclude_null` bitmap (read from index at O(1)
cost)
- Check it during `advance()`/`seek()` before the original lazy
`is_within()` exclusion
- Implement `has_null_bitmap()`/`get_null_bitmap()` to propagate null
info
- Plumb `binding_keys` from `function_search.cpp` through
OccurBooleanQueryBuilder → OccurBooleanQuery → OccurBooleanWeight for
proper null bitmap resolution
### Release note
Fix search('NOT field:value') incorrectly including NULL rows by using
null-bitmap-aware ExcludeScorer.
### Check List (For Author)
- Test
- [x] Regression test
- [x] Unit Test
- [x] Manual test
### Check List (For Reviewer who merge this PR)
- [ ] Confirm the release note
- [ ] Confirm test cases
- [ ] Confirm document
- [ ] Add branch pick label
---
.../query_v2/boolean_query/boolean_query_builder.h | 10 +-
.../query_v2/boolean_query/occur_boolean_query.h | 12 +-
.../boolean_query/occur_boolean_weight.cpp | 70 +++--
.../query_v2/boolean_query/occur_boolean_weight.h | 13 +-
.../inverted_index/query_v2/exclude_scorer.cpp | 53 +++-
.../inverted_index/query_v2/exclude_scorer.h | 14 +-
be/src/vec/functions/function_search.cpp | 2 +-
.../query_v2/intersection_scorer_test.cpp | 317 +++++++++++++++++++--
.../query_v2/occur_boolean_query_test.cpp | 24 +-
.../data/search/test_search_not_null_bitmap.out | 55 ++++
.../search/test_search_not_null_bitmap.groovy | 222 +++++++++++++++
11 files changed, 726 insertions(+), 66 deletions(-)
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/boolean_query_builder.h
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/boolean_query_builder.h
index 3cbcca58080..626e9ac5b85 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/boolean_query_builder.h
+++
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/boolean_query_builder.h
@@ -31,20 +31,26 @@ public:
OccurBooleanQueryBuilder() = default;
~OccurBooleanQueryBuilder() = default;
- void add(const QueryPtr& query, Occur occur) {
_sub_queries.emplace_back(occur, query); }
+ void add(const QueryPtr& query, Occur occur, std::string binding_key = {})
{
+ _sub_queries.emplace_back(occur, query);
+ _binding_keys.emplace_back(std::move(binding_key));
+ }
void set_minimum_number_should_match(size_t value) {
_minimum_number_should_match = value; }
QueryPtr build() {
if (_minimum_number_should_match.has_value()) {
return std::make_shared<OccurBooleanQuery>(std::move(_sub_queries),
+
std::move(_binding_keys),
_minimum_number_should_match.value());
}
- return std::make_shared<OccurBooleanQuery>(std::move(_sub_queries));
+ return std::make_shared<OccurBooleanQuery>(std::move(_sub_queries),
+ std::move(_binding_keys));
}
private:
std::vector<std::pair<Occur, QueryPtr>> _sub_queries;
+ std::vector<std::string> _binding_keys;
std::optional<size_t> _minimum_number_should_match;
};
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_query.h
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_query.h
index 018e2d831c7..27381ef0d1c 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_query.h
+++
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_query.h
@@ -29,13 +29,16 @@ using OccurBooleanQueryPtr =
std::shared_ptr<OccurBooleanQuery>;
class OccurBooleanQuery : public Query {
public:
- explicit OccurBooleanQuery(std::vector<std::pair<Occur, QueryPtr>> clauses)
+ explicit OccurBooleanQuery(std::vector<std::pair<Occur, QueryPtr>> clauses,
+ std::vector<std::string> binding_keys = {})
: _sub_queries(std::move(clauses)),
+ _binding_keys(std::move(binding_keys)),
_minimum_number_should_match(compute_default_minimum_should_match(_sub_queries))
{}
OccurBooleanQuery(std::vector<std::pair<Occur, QueryPtr>> clauses,
- size_t minimum_number_should_match)
+ std::vector<std::string> binding_keys, size_t
minimum_number_should_match)
: _sub_queries(std::move(clauses)),
+ _binding_keys(std::move(binding_keys)),
_minimum_number_should_match(minimum_number_should_match) {}
~OccurBooleanQuery() override = default;
@@ -47,8 +50,8 @@ public:
sub_weights.emplace_back(occur, query->weight(enable_scoring));
}
return std::make_shared<OccurBooleanWeight<SumCombinerPtr>>(
- std::move(sub_weights), _minimum_number_should_match,
enable_scoring,
- std::make_shared<SumCombiner>());
+ std::move(sub_weights), std::move(_binding_keys),
_minimum_number_should_match,
+ enable_scoring, std::make_shared<SumCombiner>());
}
const std::vector<std::pair<Occur, QueryPtr>>& clauses() const { return
_sub_queries; }
@@ -69,6 +72,7 @@ private:
}
std::vector<std::pair<Occur, QueryPtr>> _sub_queries;
+ std::vector<std::string> _binding_keys;
size_t _minimum_number_should_match = 0;
};
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
index 9b828708798..d2318165671 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
+++
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.cpp
@@ -21,6 +21,7 @@
#include "olap/rowset/segment_v2/inverted_index/query_v2/disjunction_scorer.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/exclude_scorer.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/intersection.h"
+#include "olap/rowset/segment_v2/inverted_index/query_v2/intersection_scorer.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/reqopt_scorer.h"
#include
"olap/rowset/segment_v2/inverted_index/query_v2/union/buffered_union.h"
@@ -28,12 +29,19 @@ namespace doris::segment_v2::inverted_index::query_v2 {
template <typename ScoreCombinerPtrT>
OccurBooleanWeight<ScoreCombinerPtrT>::OccurBooleanWeight(
- std::vector<std::pair<Occur, WeightPtr>> sub_weights, size_t
minimum_number_should_match,
- bool enable_scoring, ScoreCombinerPtrT score_combiner)
+ std::vector<std::pair<Occur, WeightPtr>> sub_weights,
std::vector<std::string> binding_keys,
+ size_t minimum_number_should_match, bool enable_scoring,
ScoreCombinerPtrT score_combiner)
: _sub_weights(std::move(sub_weights)),
+ _binding_keys(std::move(binding_keys)),
_minimum_number_should_match(minimum_number_should_match),
_enable_scoring(enable_scoring),
- _score_combiner(std::move(score_combiner)) {}
+ _score_combiner(std::move(score_combiner)) {
+ DCHECK(_binding_keys.empty() || _binding_keys.size() ==
_sub_weights.size())
+ << "binding_keys size (" << _binding_keys.size() << ") must match
sub_weights size ("
+ << _sub_weights.size() << ") when non-empty";
+ // Ensure binding_keys has the same size as sub_weights (pads with empty
strings if needed).
+ _binding_keys.resize(_sub_weights.size());
+}
template <typename ScoreCombinerPtrT>
ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::scorer(const
QueryExecutionContext& context) {
@@ -62,8 +70,10 @@ template <typename ScoreCombinerPtrT>
std::unordered_map<Occur, std::vector<ScorerPtr>>
OccurBooleanWeight<ScoreCombinerPtrT>::per_occur_scorers(const
QueryExecutionContext& context) {
std::unordered_map<Occur, std::vector<ScorerPtr>> result;
- for (const auto& [occur, weight] : _sub_weights) {
- auto sub_scorer = weight->scorer(context);
+ for (size_t i = 0; i < _sub_weights.size(); ++i) {
+ const auto& [occur, weight] = _sub_weights[i];
+ const auto& binding_key = _binding_keys[i];
+ auto sub_scorer = weight->scorer(context, binding_key);
if (sub_scorer) {
result[occur].push_back(std::move(sub_scorer));
}
@@ -122,17 +132,6 @@ std::optional<CombinationMethod>
OccurBooleanWeight<ScoreCombinerPtrT>::build_sh
}
}
-template <typename ScoreCombinerPtrT>
-ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::build_exclude_opt(
- std::vector<ScorerPtr> must_not_scorers) {
- if (must_not_scorers.empty()) {
- return nullptr;
- }
- auto do_nothing = std::make_shared<DoNothingCombiner>();
- auto specialized_scorer = scorer_union(std::move(must_not_scorers),
do_nothing);
- return into_box_scorer(std::move(specialized_scorer), do_nothing);
-}
-
template <typename ScoreCombinerPtrT>
ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::effective_must_scorer(
std::vector<ScorerPtr> must_scorers, size_t must_num_all_scorers) {
@@ -242,13 +241,24 @@ SpecializedScorer
OccurBooleanWeight<ScoreCombinerPtrT>::complex_scorer(
return std::make_shared<EmptyScorer>();
}
- ScorerPtr exclude_opt = build_exclude_opt(std::move(must_not_scorers));
+ // Collect null bitmaps from MUST_NOT scorers (read from index, no
iteration needed)
+ // and union the scorers into one for lazy exclusion.
+ roaring::Roaring exclude_null;
+ ScorerPtr exclude_opt =
+ build_exclude_opt(std::move(must_not_scorers),
context.null_resolver, exclude_null);
+
SpecializedScorer positive_opt =
build_positive_opt(*should_opt, std::move(must_scorers), combiner,
must_special_counts,
should_special_counts);
+ // Use null-bitmap-aware ExcludeScorer for MUST_NOT clauses.
+ // ExcludeScorer keeps lazy TRUE exclusion via seek-based iteration and
adds
+ // O(1) null bitmap checks so that NOT(NULL) = NULL (SQL three-valued
logic).
+ // Documents where the excluded field is NULL are placed in the null bitmap
+ // rather than being incorrectly included in the true result set.
if (exclude_opt) {
ScorerPtr positive_boxed = into_box_scorer(std::move(positive_opt),
combiner);
- return make_exclude(std::move(positive_boxed), std::move(exclude_opt));
+ return make_exclude(std::move(positive_boxed), std::move(exclude_opt),
+ std::move(exclude_null), context.null_resolver);
}
return positive_opt;
}
@@ -321,6 +331,30 @@ ScorerPtr
OccurBooleanWeight<ScoreCombinerPtrT>::into_box_scorer(SpecializedScor
std::move(specialized));
}
+template <typename ScoreCombinerPtrT>
+ScorerPtr OccurBooleanWeight<ScoreCombinerPtrT>::build_exclude_opt(
+ std::vector<ScorerPtr> must_not_scorers, const NullBitmapResolver*
resolver,
+ roaring::Roaring& exclude_null_out) {
+ if (must_not_scorers.empty()) {
+ return nullptr;
+ }
+
+ // Collect null bitmaps before union (read from index, no iteration
needed).
+ for (auto& s : must_not_scorers) {
+ if (resolver != nullptr && s && s->has_null_bitmap(resolver)) {
+ const auto* nb = s->get_null_bitmap(resolver);
+ if (nb != nullptr) {
+ exclude_null_out |= *nb;
+ }
+ }
+ }
+
+ // Union all MUST_NOT scorers into one for lazy seek-based exclusion.
+ auto do_nothing = std::make_shared<DoNothingCombiner>();
+ auto specialized = scorer_union(std::move(must_not_scorers), do_nothing);
+ return into_box_scorer(std::move(specialized), do_nothing);
+}
+
template class OccurBooleanWeight<SumCombinerPtr>;
template class OccurBooleanWeight<DoNothingCombinerPtr>;
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h
index 0daff6a1117..70c43f25a50 100644
---
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h
+++
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur_boolean_weight.h
@@ -17,6 +17,8 @@
#pragma once
+#include <roaring/roaring.hh>
+
#include "olap/rowset/segment_v2/inverted_index/query_v2/boolean_query/occur.h"
#include "olap/rowset/segment_v2/inverted_index/query_v2/scorer.h"
#include
"olap/rowset/segment_v2/inverted_index/query_v2/term_query/term_scorer.h"
@@ -44,8 +46,8 @@ template <typename ScoreCombinerPtrT>
class OccurBooleanWeight : public Weight {
public:
OccurBooleanWeight(std::vector<std::pair<Occur, WeightPtr>> sub_weights,
- size_t minimum_number_should_match, bool enable_scoring,
- ScoreCombinerPtrT score_combiner);
+ std::vector<std::string> binding_keys, size_t
minimum_number_should_match,
+ bool enable_scoring, ScoreCombinerPtrT score_combiner);
~OccurBooleanWeight() override = default;
ScorerPtr scorer(const QueryExecutionContext& context) override;
@@ -62,8 +64,6 @@ private:
std::optional<CombinationMethod> build_should_opt(std::vector<ScorerPtr>&
must_scorers,
std::vector<ScorerPtr>
should_scorers,
CombinerT combiner,
size_t num_all_scorers);
- ScorerPtr build_exclude_opt(std::vector<ScorerPtr> must_not_scorers);
-
ScorerPtr effective_must_scorer(std::vector<ScorerPtr> must_scorers,
size_t must_num_all_scorers);
@@ -87,7 +87,12 @@ private:
template <typename CombinerT>
ScorerPtr into_box_scorer(SpecializedScorer&& specialized, CombinerT
combiner);
+ ScorerPtr build_exclude_opt(std::vector<ScorerPtr> must_not_scorers,
+ const NullBitmapResolver* resolver,
+ roaring::Roaring& exclude_null_out);
+
std::vector<std::pair<Occur, WeightPtr>> _sub_weights;
+ std::vector<std::string> _binding_keys;
size_t _minimum_number_should_match = 1;
bool _enable_scoring = false;
ScoreCombinerPtrT _score_combiner;
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/exclude_scorer.cpp
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/exclude_scorer.cpp
index 09d7b28c410..025596f7767 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/exclude_scorer.cpp
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/exclude_scorer.cpp
@@ -21,11 +21,29 @@ namespace doris::segment_v2::inverted_index::query_v2 {
template <typename TDocSet, typename TDocSetExclude>
Exclude<TDocSet, TDocSetExclude>::Exclude(TDocSet underlying_docset,
- TDocSetExclude excluding_docset)
+ TDocSetExclude excluding_docset,
+ roaring::Roaring exclude_null,
+ const NullBitmapResolver* resolver)
: _underlying_docset(std::move(underlying_docset)),
- _excluding_docset(std::move(excluding_docset)) {
+ _excluding_docset(std::move(excluding_docset)),
+ _exclude_null(std::move(exclude_null)) {
+ // Inherit the include scorer's null bitmap (e.g. from parent AND
operations).
+ if (resolver != nullptr && _underlying_docset->has_null_bitmap(resolver)) {
+ const auto* nb = _underlying_docset->get_null_bitmap(resolver);
+ if (nb != nullptr) {
+ _null_bitmap |= *nb;
+ }
+ }
+
while (_underlying_docset->doc() != TERMINATED) {
uint32_t target = _underlying_docset->doc();
+ // O(1) null bitmap check (pre-computed from index, cheap).
+ if (!_exclude_null.isEmpty() && _exclude_null.contains(target)) {
+ _null_bitmap.add(target);
+ _underlying_docset->advance();
+ continue;
+ }
+ // Original lazy seek (unchanged).
if (!is_within(_excluding_docset, target)) {
break;
}
@@ -40,6 +58,12 @@ uint32_t Exclude<TDocSet, TDocSetExclude>::advance() {
if (candidate == TERMINATED) {
return TERMINATED;
}
+ // O(1) null bitmap check (pre-computed from index, cheap).
+ if (!_exclude_null.isEmpty() && _exclude_null.contains(candidate)) {
+ _null_bitmap.add(candidate);
+ continue;
+ }
+ // Original lazy seek (unchanged).
if (!is_within(_excluding_docset, candidate)) {
return candidate;
}
@@ -52,6 +76,11 @@ uint32_t Exclude<TDocSet, TDocSetExclude>::seek(uint32_t
target) {
if (candidate == TERMINATED) {
return TERMINATED;
}
+ // O(1) null bitmap check (pre-computed from index, cheap).
+ if (!_exclude_null.isEmpty() && _exclude_null.contains(candidate)) {
+ _null_bitmap.add(candidate);
+ return advance();
+ }
if (!is_within(_excluding_docset, candidate)) {
return candidate;
}
@@ -76,11 +105,23 @@ float Exclude<TDocSet, TDocSetExclude>::score() {
return 0.0F;
}
-ScorerPtr make_exclude(ScorerPtr underlying, ScorerPtr excluding) {
- return std::make_shared<Exclude<ScorerPtr,
ScorerPtr>>(std::move(underlying),
-
std::move(excluding));
+template <typename TDocSet, typename TDocSetExclude>
+bool Exclude<TDocSet, TDocSetExclude>::has_null_bitmap(const
NullBitmapResolver* /*resolver*/) {
+ return !_null_bitmap.isEmpty();
+}
+
+template <typename TDocSet, typename TDocSetExclude>
+const roaring::Roaring* Exclude<TDocSet, TDocSetExclude>::get_null_bitmap(
+ const NullBitmapResolver* /*resolver*/) {
+ return _null_bitmap.isEmpty() ? nullptr : &_null_bitmap;
+}
+
+ScorerPtr make_exclude(ScorerPtr underlying, ScorerPtr excluding,
roaring::Roaring exclude_null,
+ const NullBitmapResolver* resolver) {
+ return std::make_shared<Exclude<ScorerPtr, ScorerPtr>>(
+ std::move(underlying), std::move(excluding),
std::move(exclude_null), resolver);
}
template class Exclude<ScorerPtr, ScorerPtr>;
-} // namespace doris::segment_v2::inverted_index::query_v2
\ No newline at end of file
+} // namespace doris::segment_v2::inverted_index::query_v2
diff --git
a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/exclude_scorer.h
b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/exclude_scorer.h
index a1f0523467d..bc4cb19c7b6 100644
--- a/be/src/olap/rowset/segment_v2/inverted_index/query_v2/exclude_scorer.h
+++ b/be/src/olap/rowset/segment_v2/inverted_index/query_v2/exclude_scorer.h
@@ -17,6 +17,8 @@
#pragma once
+#include <roaring/roaring.hh>
+
#include "olap/rowset/segment_v2/inverted_index/query_v2/scorer.h"
namespace doris::segment_v2::inverted_index::query_v2 {
@@ -29,7 +31,8 @@ inline bool is_within(TDocSetExclude& docset, uint32_t doc) {
template <typename TDocSet, typename TDocSetExclude>
class Exclude final : public Scorer {
public:
- Exclude(TDocSet underlying_docset, TDocSetExclude excluding_docset);
+ Exclude(TDocSet underlying_docset, TDocSetExclude excluding_docset,
+ roaring::Roaring exclude_null = {}, const NullBitmapResolver*
resolver = nullptr);
~Exclude() override = default;
uint32_t advance() override;
@@ -38,13 +41,20 @@ public:
uint32_t size_hint() const override;
float score() override;
+ bool has_null_bitmap(const NullBitmapResolver* resolver = nullptr)
override;
+ const roaring::Roaring* get_null_bitmap(const NullBitmapResolver* resolver
= nullptr) override;
+
private:
TDocSet _underlying_docset;
TDocSetExclude _excluding_docset;
+ roaring::Roaring _exclude_null;
+ roaring::Roaring _null_bitmap;
};
using ExcludeScorerPtr = std::shared_ptr<Exclude<ScorerPtr, ScorerPtr>>;
-ScorerPtr make_exclude(ScorerPtr underlying, ScorerPtr excluding);
+ScorerPtr make_exclude(ScorerPtr underlying, ScorerPtr excluding,
+ roaring::Roaring exclude_null = {},
+ const NullBitmapResolver* resolver = nullptr);
} // namespace doris::segment_v2::inverted_index::query_v2
\ No newline at end of file
diff --git a/be/src/vec/functions/function_search.cpp
b/be/src/vec/functions/function_search.cpp
index 9bf1e1e8e54..5e4ea1002bf 100644
--- a/be/src/vec/functions/function_search.cpp
+++ b/be/src/vec/functions/function_search.cpp
@@ -641,7 +641,7 @@ Status FunctionSearch::build_query_recursive(const
TSearchClause& clause,
occur = map_thrift_occur(child_clause.occur);
}
- builder->add(child_query, occur);
+ builder->add(child_query, occur, std::move(child_binding_key));
}
}
diff --git
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/intersection_scorer_test.cpp
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/intersection_scorer_test.cpp
index 81b8d63e3c9..6ff36eedaad 100644
---
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/intersection_scorer_test.cpp
+++
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/intersection_scorer_test.cpp
@@ -26,9 +26,11 @@
#include <utility>
#include <vector>
+#include "olap/rowset/segment_v2/inverted_index/query_v2/exclude_scorer.h"
+
namespace doris {
-using segment_v2::inverted_index::query_v2::AndNotScorer;
+using segment_v2::inverted_index::query_v2::make_exclude;
using segment_v2::inverted_index::query_v2::NullBitmapResolver;
using segment_v2::inverted_index::query_v2::Scorer;
using segment_v2::inverted_index::query_v2::ScorerPtr;
@@ -211,43 +213,324 @@ TEST_F(IntersectionScorerTest, NullBitmapPropagation) {
EXPECT_FALSE(null_bitmap->contains(6));
}
-TEST_F(IntersectionScorerTest, AndNotScorerRespectsTrueAndNullExcludes) {
+// --- ExcludeScorer with null bitmap tests ---
+// These tests verify that the enhanced ExcludeScorer correctly implements
+// SQL three-valued logic: NOT(NULL) = NULL, keeping lazy seek-based
+// exclusion while adding O(1) null bitmap awareness.
+
+TEST_F(IntersectionScorerTest, ExcludeScorerRespectsTrueAndNullExcludes) {
DummyResolver resolver;
+ // Include docs: {2, 4, 6}
auto include = std::make_shared<VectorScorer>(std::vector<uint32_t> {2, 4,
6},
std::vector<float> {0.5F,
1.5F, 2.5F});
- auto exclude_true =
+ // Exclude scorer: TRUE docs {4} (lazy seek-based exclusion)
+ auto exclude =
std::make_shared<VectorScorer>(std::vector<uint32_t> {4},
std::vector<float> {0.0F});
- auto exclude_null = std::make_shared<VectorScorer>(
- std::vector<uint32_t> {}, std::vector<float> {},
std::vector<uint32_t> {6});
+ // Pre-collected null bitmap from exclude scorers: {6}
+ roaring::Roaring exclude_null;
+ exclude_null.add(6);
- std::vector<ScorerPtr> excludes {exclude_true, exclude_null};
- auto and_not = std::make_shared<AndNotScorer>(include,
std::move(excludes), &resolver);
- ASSERT_NE(nullptr, and_not);
+ auto result = make_exclude(include, exclude, std::move(exclude_null),
&resolver);
+ ASSERT_NE(nullptr, result);
std::vector<uint32_t> docs;
std::vector<float> scores;
- while (and_not->doc() != TERMINATED) {
- docs.push_back(and_not->doc());
- scores.push_back(and_not->score());
- if (and_not->advance() == TERMINATED) {
+ while (result->doc() != TERMINATED) {
+ docs.push_back(result->doc());
+ scores.push_back(result->score());
+ if (result->advance() == TERMINATED) {
break;
}
}
+ // Doc 4 is TRUE-excluded, doc 6 is NULL-excluded
std::vector<uint32_t> expected_docs {2};
std::vector<float> expected_scores {0.5F};
EXPECT_EQ(expected_docs, docs);
EXPECT_EQ(expected_scores, scores);
- EXPECT_TRUE(and_not->has_null_bitmap());
- const auto* null_bitmap = and_not->get_null_bitmap();
+ EXPECT_TRUE(result->has_null_bitmap());
+ const auto* null_bitmap = result->get_null_bitmap();
ASSERT_NE(nullptr, null_bitmap);
EXPECT_TRUE(null_bitmap->contains(6));
EXPECT_FALSE(null_bitmap->contains(4));
- EXPECT_EQ(TERMINATED, and_not->advance());
- EXPECT_EQ(TERMINATED, and_not->doc());
- EXPECT_EQ(include->size_hint(), and_not->size_hint());
+ EXPECT_EQ(TERMINATED, result->advance());
+ EXPECT_EQ(TERMINATED, result->doc());
+ EXPECT_EQ(include->size_hint(), result->size_hint());
+}
+
+TEST_F(IntersectionScorerTest, ExcludeScorerAllExcludesAreNull) {
+ // When all exclude docs are NULL (not TRUE), those docs should appear
+ // in the null bitmap rather than being excluded from the result set.
+ DummyResolver resolver;
+ // Include docs: 1, 2, 3, 4, 5
+ auto include =
+ std::make_shared<VectorScorer>(std::vector<uint32_t> {1, 2, 3, 4,
5},
+ std::vector<float> {1.0F, 2.0F,
3.0F, 4.0F, 5.0F});
+ // No TRUE exclude docs
+ auto exclude = std::make_shared<VectorScorer>(std::vector<uint32_t> {},
std::vector<float> {});
+ // NULL exclude: {2, 4}
+ roaring::Roaring exclude_null;
+ exclude_null.add(2);
+ exclude_null.add(4);
+
+ auto result = make_exclude(include, exclude, std::move(exclude_null),
&resolver);
+ ASSERT_NE(nullptr, result);
+
+ std::vector<uint32_t> docs;
+ while (result->doc() != TERMINATED) {
+ docs.push_back(result->doc());
+ result->advance();
+ }
+
+ // Docs 2 and 4 are null-excluded (go to null bitmap), rest pass through
+ std::vector<uint32_t> expected_docs {1, 3, 5};
+ EXPECT_EQ(expected_docs, docs);
+
+ EXPECT_TRUE(result->has_null_bitmap());
+ const auto* null_bitmap = result->get_null_bitmap();
+ ASSERT_NE(nullptr, null_bitmap);
+ EXPECT_TRUE(null_bitmap->contains(2));
+ EXPECT_TRUE(null_bitmap->contains(4));
+ EXPECT_FALSE(null_bitmap->contains(1));
+ EXPECT_FALSE(null_bitmap->contains(3));
+ EXPECT_FALSE(null_bitmap->contains(5));
+}
+
+TEST_F(IntersectionScorerTest, ExcludeScorerNoResolver) {
+ // Without a resolver, null bitmaps from include are not inherited,
+ // but pre-collected exclude_null is still effective.
+ auto include = std::make_shared<VectorScorer>(std::vector<uint32_t> {1, 2,
3},
+ std::vector<float> {1.0F,
2.0F, 3.0F});
+ // TRUE exclude: {2}
+ auto exclude =
+ std::make_shared<VectorScorer>(std::vector<uint32_t> {2},
std::vector<float> {0.0F});
+ // NULL exclude: {3} (pre-collected, works even without resolver)
+ roaring::Roaring exclude_null;
+ exclude_null.add(3);
+
+ auto result = make_exclude(include, exclude, std::move(exclude_null),
nullptr);
+
+ std::vector<uint32_t> docs;
+ while (result->doc() != TERMINATED) {
+ docs.push_back(result->doc());
+ result->advance();
+ }
+
+ // Doc 2 is TRUE-excluded, doc 3 is NULL-excluded
+ std::vector<uint32_t> expected_docs {1};
+ EXPECT_EQ(expected_docs, docs);
+ EXPECT_TRUE(result->has_null_bitmap());
+ const auto* null_bitmap = result->get_null_bitmap();
+ ASSERT_NE(nullptr, null_bitmap);
+ EXPECT_TRUE(null_bitmap->contains(3));
+}
+
+TEST_F(IntersectionScorerTest, ExcludeScorerEmptyExcludes) {
+ // No excludes: all include docs should pass through with no null bitmap.
+ DummyResolver resolver;
+ auto include = std::make_shared<VectorScorer>(std::vector<uint32_t> {1, 5,
10},
+ std::vector<float> {1.0F,
2.0F, 3.0F});
+ auto exclude = std::make_shared<VectorScorer>(std::vector<uint32_t> {},
std::vector<float> {});
+ roaring::Roaring exclude_null;
+
+ auto result = make_exclude(include, exclude, std::move(exclude_null),
&resolver);
+
+ std::vector<uint32_t> docs;
+ while (result->doc() != TERMINATED) {
+ docs.push_back(result->doc());
+ result->advance();
+ }
+
+ std::vector<uint32_t> expected_docs {1, 5, 10};
+ EXPECT_EQ(expected_docs, docs);
+ EXPECT_FALSE(result->has_null_bitmap());
+}
+
+TEST_F(IntersectionScorerTest, ExcludeScorerMultipleExcludesWithNulls) {
+ // Simulates multiple exclude scorers that were unioned.
+ // TRUE docs {3, 6} from union, NULL docs {5, 7} from pre-collected null
bitmaps.
+ DummyResolver resolver;
+ auto include = std::make_shared<VectorScorer>(std::vector<uint32_t> {1, 2,
3, 4, 5, 6, 7, 8},
+ std::vector<float> {1, 2, 3,
4, 5, 6, 7, 8});
+ // Unioned TRUE exclude: {3, 6}
+ auto exclude = std::make_shared<VectorScorer>(std::vector<uint32_t> {3, 6},
+ std::vector<float> {0.0F,
0.0F});
+ // Pre-collected NULL exclude: {5, 7}
+ roaring::Roaring exclude_null;
+ exclude_null.add(5);
+ exclude_null.add(7);
+
+ auto result = make_exclude(include, exclude, std::move(exclude_null),
&resolver);
+
+ std::vector<uint32_t> docs;
+ while (result->doc() != TERMINATED) {
+ docs.push_back(result->doc());
+ result->advance();
+ }
+
+ // TRUE-excluded: 3, 6
+ // NULL-excluded: 5, 7 (go to null bitmap)
+ // Remaining: 1, 2, 4, 8
+ std::vector<uint32_t> expected_docs {1, 2, 4, 8};
+ EXPECT_EQ(expected_docs, docs);
+
+ EXPECT_TRUE(result->has_null_bitmap());
+ const auto* null_bitmap = result->get_null_bitmap();
+ ASSERT_NE(nullptr, null_bitmap);
+ EXPECT_TRUE(null_bitmap->contains(5));
+ EXPECT_TRUE(null_bitmap->contains(7));
+ EXPECT_FALSE(null_bitmap->contains(3));
+ EXPECT_FALSE(null_bitmap->contains(6));
+}
+
+TEST_F(IntersectionScorerTest, ExcludeScorerTrueOverridesNull) {
+ DummyResolver resolver;
+ auto include =
+ std::make_shared<VectorScorer>(std::vector<uint32_t> {1, 2, 3, 4,
5},
+ std::vector<float> {1.0F, 2.0F,
3.0F, 4.0F, 5.0F});
+ // Unioned TRUE exclude: {3, 4}
+ auto exclude = std::make_shared<VectorScorer>(std::vector<uint32_t> {3, 4},
+ std::vector<float> {0.0F,
0.0F});
+ // Pre-collected NULL exclude: {3, 4} (union of both excluders' null
bitmaps)
+ roaring::Roaring exclude_null;
+ exclude_null.add(3);
+ exclude_null.add(4);
+
+ auto result = make_exclude(include, exclude, std::move(exclude_null),
&resolver);
+
+ std::vector<uint32_t> docs;
+ while (result->doc() != TERMINATED) {
+ docs.push_back(result->doc());
+ result->advance();
+ }
+
+ // Docs 3 and 4 are NULL-excluded (exclude_null check comes first)
+ std::vector<uint32_t> expected_docs {1, 2, 5};
+ EXPECT_EQ(expected_docs, docs);
+
+ EXPECT_TRUE(result->has_null_bitmap());
+ const auto* null_bitmap = result->get_null_bitmap();
+ ASSERT_NE(nullptr, null_bitmap);
+ EXPECT_TRUE(null_bitmap->contains(3));
+ EXPECT_TRUE(null_bitmap->contains(4));
+}
+
+TEST_F(IntersectionScorerTest, ExcludeScorerSeekWithNullExclusion) {
+ // Test seek operations when exclude docs have null entries.
+ DummyResolver resolver;
+ auto include = std::make_shared<VectorScorer>(std::vector<uint32_t> {1, 5,
10, 15, 20, 25, 30},
+ std::vector<float> {1, 2, 3,
4, 5, 6, 7});
+ // TRUE exclude: {5, 20}
+ auto exclude = std::make_shared<VectorScorer>(std::vector<uint32_t> {5,
20},
+ std::vector<float> {0.0F,
0.0F});
+ // NULL exclude: {10, 25}
+ roaring::Roaring exclude_null;
+ exclude_null.add(10);
+ exclude_null.add(25);
+
+ auto result = make_exclude(include, exclude, std::move(exclude_null),
&resolver);
+
+ // First doc should be 1 (not excluded)
+ EXPECT_EQ(1u, result->doc());
+
+ // Seek to 10 → doc 10 is null-excluded, should skip to 15
+ EXPECT_EQ(15u, result->seek(10));
+
+ // Seek to 20 → doc 20 is TRUE-excluded, should skip to 30
+ // (25 is null-excluded)
+ EXPECT_EQ(30u, result->seek(20));
+
+ EXPECT_EQ(TERMINATED, result->advance());
+
+ EXPECT_TRUE(result->has_null_bitmap());
+ const auto* null_bitmap = result->get_null_bitmap();
+ ASSERT_NE(nullptr, null_bitmap);
+ EXPECT_TRUE(null_bitmap->contains(10));
+ EXPECT_TRUE(null_bitmap->contains(25));
+}
+
+TEST_F(IntersectionScorerTest, ExcludeScorerIncludeHasNullBitmap) {
+ // When the include scorer has a null bitmap, it should be inherited.
+ DummyResolver resolver;
+ // Include scorer has null docs {8}
+ auto include = std::make_shared<VectorScorer>(std::vector<uint32_t> {1, 3,
5, 7},
+ std::vector<float> {1.0F,
2.0F, 3.0F, 4.0F},
+ std::vector<uint32_t> {8});
+ // TRUE exclude: {3}
+ auto exclude =
+ std::make_shared<VectorScorer>(std::vector<uint32_t> {3},
std::vector<float> {0.0F});
+ // NULL exclude: {5}
+ roaring::Roaring exclude_null;
+ exclude_null.add(5);
+
+ auto result = make_exclude(include, exclude, std::move(exclude_null),
&resolver);
+
+ std::vector<uint32_t> docs;
+ while (result->doc() != TERMINATED) {
+ docs.push_back(result->doc());
+ result->advance();
+ }
+
+ // 3 is TRUE-excluded, 5 is NULL-excluded
+ std::vector<uint32_t> expected_docs {1, 7};
+ EXPECT_EQ(expected_docs, docs);
+
+ EXPECT_TRUE(result->has_null_bitmap());
+ const auto* null_bitmap = result->get_null_bitmap();
+ ASSERT_NE(nullptr, null_bitmap);
+ // Include's null doc 8 should be inherited
+ EXPECT_TRUE(null_bitmap->contains(8));
+ // Exclude's null doc 5 should also be in null bitmap
+ EXPECT_TRUE(null_bitmap->contains(5));
+}
+
+TEST_F(IntersectionScorerTest, ExcludeScorerAllDocsExcluded) {
+ // All include docs are either TRUE-excluded or NULL-excluded.
+ DummyResolver resolver;
+ auto include = std::make_shared<VectorScorer>(std::vector<uint32_t> {1, 2,
3},
+ std::vector<float> {1.0F,
2.0F, 3.0F});
+ // TRUE exclude: {1, 3}
+ auto exclude = std::make_shared<VectorScorer>(std::vector<uint32_t> {1, 3},
+ std::vector<float> {0.0F,
0.0F});
+ // NULL exclude: {2}
+ roaring::Roaring exclude_null;
+ exclude_null.add(2);
+
+ auto result = make_exclude(include, exclude, std::move(exclude_null),
&resolver);
+
+ EXPECT_EQ(TERMINATED, result->doc());
+
+ // Doc 2 should be in null bitmap (NULL-excluded but no TRUE match)
+ EXPECT_TRUE(result->has_null_bitmap());
+ const auto* null_bitmap = result->get_null_bitmap();
+ ASSERT_NE(nullptr, null_bitmap);
+ EXPECT_TRUE(null_bitmap->contains(2));
+ EXPECT_FALSE(null_bitmap->contains(1));
+ EXPECT_FALSE(null_bitmap->contains(3));
+}
+
+TEST_F(IntersectionScorerTest, ExcludeScorerNoNullBitmapWhenEmpty) {
+ // When exclude_null is empty and include has no null bitmap,
+ // ExcludeScorer should behave exactly like the original (no null
awareness).
+ auto include = std::make_shared<VectorScorer>(std::vector<uint32_t> {1, 2,
3, 4, 5},
+ std::vector<float> {1, 2, 3,
4, 5});
+ auto exclude =
+ std::make_shared<VectorScorer>(std::vector<uint32_t> {3},
std::vector<float> {0.0F});
+ roaring::Roaring exclude_null;
+
+ auto result = make_exclude(include, exclude, std::move(exclude_null));
+
+ std::vector<uint32_t> docs;
+ while (result->doc() != TERMINATED) {
+ docs.push_back(result->doc());
+ result->advance();
+ }
+
+ std::vector<uint32_t> expected_docs {1, 2, 4, 5};
+ EXPECT_EQ(expected_docs, docs);
+ EXPECT_FALSE(result->has_null_bitmap());
}
} // namespace doris
diff --git
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
index 7d885ecce19..69322ffa5e1 100644
---
a/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
+++
b/be/test/olap/rowset/segment_v2/inverted_index/query_v2/occur_boolean_query_test.cpp
@@ -713,7 +713,7 @@ TEST_F(OccurBooleanQueryTest,
MinimumShouldMatchExceedsShouldClausesReturnsEmpty
clauses.emplace_back(Occur::MUST,
std::make_shared<MockQuery>(must_docs1));
clauses.emplace_back(Occur::MUST,
std::make_shared<MockQuery>(must_docs2));
- OccurBooleanQuery query(std::move(clauses), 2);
+ OccurBooleanQuery query(std::move(clauses), {}, 2);
auto weight = query.weight(false);
auto scorer = weight->scorer(_ctx);
@@ -727,7 +727,7 @@ TEST_F(OccurBooleanQueryTest,
MinimumShouldMatchExceedsShouldClausesReturnsEmpty
clauses.emplace_back(Occur::MUST,
std::make_shared<MockQuery>(must_docs));
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should_docs));
- OccurBooleanQuery query(std::move(clauses), 2);
+ OccurBooleanQuery query(std::move(clauses), {}, 2);
auto weight = query.weight(false);
auto scorer = weight->scorer(_ctx);
@@ -743,7 +743,7 @@ TEST_F(OccurBooleanQueryTest,
MinimumShouldMatchExceedsShouldClausesReturnsEmpty
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should_docs1));
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should_docs2));
- OccurBooleanQuery query(std::move(clauses), 2);
+ OccurBooleanQuery query(std::move(clauses), {}, 2);
auto weight = query.weight(false);
auto scorer = weight->scorer(_ctx);
auto result = collect_docs(scorer);
@@ -760,7 +760,7 @@ TEST_F(OccurBooleanQueryTest,
MinimumShouldMatchExceedsShouldClausesReturnsEmpty
clauses.emplace_back(Occur::MUST,
std::make_shared<MockQuery>(must_docs));
clauses.emplace_back(Occur::MUST_NOT,
std::make_shared<MockQuery>(must_not_docs));
- OccurBooleanQuery query(std::move(clauses), 2);
+ OccurBooleanQuery query(std::move(clauses), {}, 2);
auto weight = query.weight(false);
auto scorer = weight->scorer(_ctx);
@@ -777,7 +777,7 @@ TEST_F(OccurBooleanQueryTest,
MinimumShouldMatchZeroWithNoShouldClausesReturnsIg
clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs1));
clauses.emplace_back(Occur::MUST, std::make_shared<MockQuery>(must_docs2));
- OccurBooleanQuery query(std::move(clauses), 0);
+ OccurBooleanQuery query(std::move(clauses), {}, 0);
auto weight = query.weight(false);
auto scorer = weight->scorer(_ctx);
auto result = collect_docs(scorer);
@@ -796,7 +796,7 @@ TEST_F(OccurBooleanQueryTest,
MinimumShouldMatchEqualsNumShouldWithMustClause) {
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should1_docs));
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should2_docs));
- OccurBooleanQuery query(std::move(clauses), 2);
+ OccurBooleanQuery query(std::move(clauses), {}, 2);
auto weight = query.weight(false);
auto scorer = weight->scorer(_ctx);
auto result = collect_docs(scorer);
@@ -819,7 +819,7 @@ TEST_F(OccurBooleanQueryTest,
MinimumShouldMatchEqualsNumShouldWithMultipleMustC
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should2_docs));
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should3_docs));
- OccurBooleanQuery query(std::move(clauses), 3);
+ OccurBooleanQuery query(std::move(clauses), {}, 3);
auto weight = query.weight(false);
auto scorer = weight->scorer(_ctx);
auto result = collect_docs(scorer);
@@ -838,7 +838,7 @@ TEST_F(OccurBooleanQueryTest,
MinimumShouldMatchEqualsNumShouldOnlyShould) {
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should2_docs));
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should3_docs));
- OccurBooleanQuery query(std::move(clauses), 3);
+ OccurBooleanQuery query(std::move(clauses), {}, 3);
auto weight = query.weight(false);
auto scorer = weight->scorer(_ctx);
auto result = collect_docs(scorer);
@@ -859,7 +859,7 @@ TEST_F(OccurBooleanQueryTest,
MinimumShouldMatchEqualsNumShouldWithMustNot) {
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should2_docs));
clauses.emplace_back(Occur::MUST_NOT,
std::make_shared<MockQuery>(must_not_docs));
- OccurBooleanQuery query(std::move(clauses), 2);
+ OccurBooleanQuery query(std::move(clauses), {}, 2);
auto weight = query.weight(false);
auto scorer = weight->scorer(_ctx);
auto result = collect_docs(scorer);
@@ -971,7 +971,7 @@ TEST_F(OccurBooleanQueryTest,
AllQueryWithMustAndShouldMinMatch) {
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should1_docs));
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should2_docs));
- OccurBooleanQuery query(std::move(clauses), 2);
+ OccurBooleanQuery query(std::move(clauses), {}, 2);
auto weight = query.weight(false);
auto scorer = weight->scorer(_ctx);
auto result = collect_docs(scorer);
@@ -990,7 +990,7 @@ TEST_F(OccurBooleanQueryTest,
ScoringWithMinimumShouldMatchEqualsNumShould) {
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should1_docs, 2.0F));
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should2_docs, 3.0F));
- OccurBooleanQuery query(std::move(clauses), 2);
+ OccurBooleanQuery query(std::move(clauses), {}, 2);
auto weight = query.weight(true);
auto scorer = weight->scorer(_ctx);
@@ -1016,7 +1016,7 @@ TEST_F(OccurBooleanQueryTest,
ShouldOnlyWithAllQueryMinShouldMatch) {
clauses.emplace_back(Occur::SHOULD,
std::make_shared<MockQuery>(should_docs));
clauses.emplace_back(Occur::SHOULD, std::make_shared<AllQuery>());
- OccurBooleanQuery query(std::move(clauses), 2);
+ OccurBooleanQuery query(std::move(clauses), {}, 2);
auto weight = query.weight(false);
auto scorer = weight->scorer(_ctx);
auto result = collect_docs(scorer);
diff --git a/regression-test/data/search/test_search_not_null_bitmap.out
b/regression-test/data/search/test_search_not_null_bitmap.out
new file mode 100644
index 00000000000..0693c570f03
--- /dev/null
+++ b/regression-test/data/search/test_search_not_null_bitmap.out
@@ -0,0 +1,55 @@
+-- This file is automatically generated. You should know what you did if you
want to edit this
+-- !not_internal_ids --
+3
+4
+
+-- !not_external_ids --
+3
+4
+
+-- !not_internal_count --
+2
+
+-- !not_external_count --
+2
+
+-- !all_null_internal --
+0
+
+-- !all_null_external --
+0
+
+-- !mixed_not_title_search --
+5
+6
+
+-- !mixed_not_title_external --
+5
+6
+
+-- !mixed_not_content_search --
+2
+5
+6
+
+-- !mixed_not_content_external --
+2
+5
+6
+
+-- !mixed_complex_search --
+1
+3
+6
+
+-- !mixed_complex_external --
+1
+3
+6
+
+-- !multi_must_not_search --
+6
+
+-- !multi_must_not_external --
+6
+
diff --git a/regression-test/suites/search/test_search_not_null_bitmap.groovy
b/regression-test/suites/search/test_search_not_null_bitmap.groovy
new file mode 100644
index 00000000000..e15c705bc7b
--- /dev/null
+++ b/regression-test/suites/search/test_search_not_null_bitmap.groovy
@@ -0,0 +1,222 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements. See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership. The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied. See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_search_not_null_bitmap", "p0") {
+ // Regression test for DORIS-24681:
+ // search('NOT field:value') was incorrectly including NULL rows because
+ // ExcludeScorer did not handle null bitmaps. The fix enhances
ExcludeScorer
+ // with null bitmap awareness while keeping lazy seek-based exclusion.
+ // ExcludeScorer now implements SQL three-valued logic: NOT(NULL) = NULL,
+ // so NULL rows are excluded from the result set.
+
+ def tableName = "search_not_null_bitmap"
+
+ sql """ set enable_common_expr_pushdown = true """
+
+ sql "DROP TABLE IF EXISTS ${tableName}"
+
+ sql """
+ CREATE TABLE ${tableName} (
+ id INT,
+ msg TEXT,
+ INDEX idx_msg (msg) USING INVERTED PROPERTIES("parser" = "unicode")
+ ) ENGINE=OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ )
+ """
+
+ sql """
+ INSERT INTO ${tableName} VALUES
+ (1, NULL),
+ (2, 'omega alpha'),
+ (3, 'hello world'),
+ (4, 'alpha beta')
+ """
+
+ Thread.sleep(5000)
+
+ // ---------------------------------------------------------------
+ // Core bug: search('NOT msg:omega') must NOT include NULL rows
+ // ---------------------------------------------------------------
+
+ // Internal NOT via search DSL - must match external NOT
+ qt_not_internal_ids """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM
${tableName}
+ WHERE search('NOT msg:omega')
+ ORDER BY id
+ """
+
+ // External NOT via SQL NOT operator (this always worked correctly)
+ qt_not_external_ids """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM
${tableName}
+ WHERE NOT search('msg:omega')
+ ORDER BY id
+ """
+
+ // Count must match between internal and external NOT
+ qt_not_internal_count """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*) FROM
${tableName}
+ WHERE search('NOT msg:omega')
+ """
+
+ qt_not_external_count """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*) FROM
${tableName}
+ WHERE NOT search('msg:omega')
+ """
+
+ // ---------------------------------------------------------------
+ // Test with all rows NULL for the searched field
+ // ---------------------------------------------------------------
+
+ def allNullTable = "search_not_all_null"
+
+ sql "DROP TABLE IF EXISTS ${allNullTable}"
+
+ sql """
+ CREATE TABLE ${allNullTable} (
+ id INT,
+ msg TEXT,
+ INDEX idx_msg (msg) USING INVERTED PROPERTIES("parser" = "unicode")
+ ) ENGINE=OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ )
+ """
+
+ sql """
+ INSERT INTO ${allNullTable} VALUES
+ (1, NULL),
+ (2, NULL),
+ (3, NULL)
+ """
+
+ Thread.sleep(5000)
+
+ // All NULL rows should be excluded by NOT query
+ qt_all_null_internal """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*) FROM
${allNullTable}
+ WHERE search('NOT msg:anything')
+ """
+
+ qt_all_null_external """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ count(*) FROM
${allNullTable}
+ WHERE NOT search('msg:anything')
+ """
+
+ // ---------------------------------------------------------------
+ // Test with mixed NULL and matching values (multi-field)
+ // ---------------------------------------------------------------
+
+ def mixedTable = "search_not_mixed_null"
+
+ sql "DROP TABLE IF EXISTS ${mixedTable}"
+
+ // Use unicode parser to avoid stemming ambiguity in test expectations
+ sql """
+ CREATE TABLE ${mixedTable} (
+ id INT,
+ title VARCHAR(255),
+ content TEXT,
+ INDEX idx_title (title) USING INVERTED PROPERTIES("parser" =
"unicode"),
+ INDEX idx_content (content) USING INVERTED PROPERTIES("parser" =
"unicode")
+ ) ENGINE=OLAP
+ DUPLICATE KEY(id)
+ DISTRIBUTED BY HASH(id) BUCKETS 1
+ PROPERTIES (
+ "replication_allocation" = "tag.location.default: 1"
+ )
+ """
+
+ sql """
+ INSERT INTO ${mixedTable} VALUES
+ (1, 'hello world', 'good morning'),
+ (2, NULL, 'good afternoon'),
+ (3, 'hello earth', NULL),
+ (4, NULL, NULL),
+ (5, 'goodbye world', 'good evening'),
+ (6, 'test title', 'good night')
+ """
+
+ Thread.sleep(5000)
+
+ // NOT on title field: NULL title rows (id=2,4) should be excluded
+ qt_mixed_not_title_search """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM
${mixedTable}
+ WHERE search('NOT title:hello')
+ ORDER BY id
+ """
+
+ qt_mixed_not_title_external """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM
${mixedTable}
+ WHERE NOT search('title:hello')
+ ORDER BY id
+ """
+
+ // NOT on content field: NULL content rows (id=3,4) should be excluded
+ qt_mixed_not_content_search """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM
${mixedTable}
+ WHERE search('NOT content:morning')
+ ORDER BY id
+ """
+
+ qt_mixed_not_content_external """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM
${mixedTable}
+ WHERE NOT search('content:morning')
+ ORDER BY id
+ """
+
+ // Complex: (title:hello OR content:good) AND NOT title:goodbye
+ qt_mixed_complex_search """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM
${mixedTable}
+ WHERE search('(title:hello OR content:good) AND NOT title:goodbye')
+ ORDER BY id
+ """
+
+ qt_mixed_complex_external """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM
${mixedTable}
+ WHERE (search('title:hello') OR search('content:good'))
+ AND NOT search('title:goodbye')
+ ORDER BY id
+ """
+
+ // ---------------------------------------------------------------
+ // Test with multiple MUST_NOT clauses
+ // ---------------------------------------------------------------
+
+ qt_multi_must_not_search """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM
${mixedTable}
+ WHERE search('NOT title:hello AND NOT title:goodbye')
+ ORDER BY id
+ """
+
+ qt_multi_must_not_external """
+ SELECT /*+SET_VAR(enable_common_expr_pushdown=true) */ id FROM
${mixedTable}
+ WHERE NOT search('title:hello')
+ AND NOT search('title:goodbye')
+ ORDER BY id
+ """
+
+ // Cleanup
+ sql "DROP TABLE IF EXISTS ${tableName}"
+ sql "DROP TABLE IF EXISTS ${allNullTable}"
+ sql "DROP TABLE IF EXISTS ${mixedTable}"
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]