This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 6d4e218a2e3 [fix](search) inject MATCH_ALL_DOCS for multi-MUST_NOT 
queries in lucene mode (#60891)
6d4e218a2e3 is described below

commit 6d4e218a2e3a86051c2645b4774b358a572c80af
Author: Jack <[email protected]>
AuthorDate: Sun Mar 1 11:07:47 2026 +0800

    [fix](search) inject MATCH_ALL_DOCS for multi-MUST_NOT queries in lucene 
mode (#60891)
    
    ### What problem does this PR solve?
    
    Related PR: #60814
    
    Problem Summary:
    In search() lucene mode, when all terms in a boolean query are MUST_NOT
    (e.g., `NOT a AND NOT b` or `NOT a NOT b` with default_operator=AND),
    the query incorrectly returns all documents instead of returning all
    documents EXCEPT those matching the negated terms.
    
    Root cause: Lucene's BooleanQuery with only MUST_NOT clauses matches
    nothing (by design). ES handles this by injecting a MatchAllDocsQuery
    with SHOULD occur. Doris only handled the single-term MUST_NOT case
    but not multi-term all-MUST_NOT queries.
    
    Fix: After `applyLuceneBooleanLogic()`, detect if ALL terms are MUST_NOT
    and inject `MATCH_ALL_DOCS(SHOULD)` with `minimum_should_match=1`.
---
 .../functions/scalar/SearchDslParser.java          | 17 +++++++
 .../functions/scalar/SearchDslParserTest.java      | 54 ++++++++++++++++++++++
 2 files changed, 71 insertions(+)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
index 86e850cd7ad..3f5072b46b6 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
@@ -2164,6 +2164,23 @@ MATCH_ALL_DOCS, // Matches all documents (used for pure 
NOT query rewriting)
             // Apply Lucene boolean logic
             applyLuceneBooleanLogic(terms);
 
+            // Check if ALL terms are MUST_NOT (pure negation query).
+            // In Lucene, a BooleanQuery with only MUST_NOT clauses matches 
nothing,
+            // so we inject a MATCH_ALL_DOCS(SHOULD) node to ensure proper 
semantics:
+            // match all docs EXCEPT those matching any MUST_NOT term.
+            boolean allMustNot = terms.stream().allMatch(t -> t.occur == 
QsOccur.MUST_NOT);
+            if (allMustNot) {
+                QsNode matchAllNode = new QsNode(QsClauseType.MATCH_ALL_DOCS, 
(List<QsNode>) null);
+                matchAllNode.setOccur(QsOccur.SHOULD);
+                List<QsNode> children = new ArrayList<>();
+                children.add(matchAllNode);
+                for (TermWithOccur term : terms) {
+                    term.node.setOccur(term.occur);
+                    children.add(term.node);
+                }
+                return new QsNode(QsClauseType.OCCUR_BOOLEAN, children, 1);
+            }
+
             // Determine minimum_should_match
             // Only use explicit option at top level; nested clauses use 
default logic
             Integer minShouldMatch = (nestingLevel == 0) ? 
options.getMinimumShouldMatch() : null;
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
index 417f73eb4ed..359f3d86f8a 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
@@ -781,6 +781,60 @@ public class SearchDslParserTest {
         Assertions.assertEquals(QsOccur.MUST_NOT, termNode.getOccur());
     }
 
+    @Test
+    public void testLuceneModeMultipleNotTermsInjectMatchAllDocs() {
+        // Test: "NOT a AND NOT b" should inject MATCH_ALL_DOCS(SHOULD) when 
ALL terms are MUST_NOT
+        String dsl = "NOT field:a AND NOT field:b";
+        String options = "{\"mode\":\"lucene\"}";
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+        // 3 children: MATCH_ALL_DOCS(SHOULD) + MUST_NOT(a) + MUST_NOT(b)
+        Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+        Assertions.assertEquals(Integer.valueOf(1), 
plan.getRoot().getMinimumShouldMatch());
+
+        QsNode matchAllNode = plan.getRoot().getChildren().get(0);
+        Assertions.assertEquals(QsClauseType.MATCH_ALL_DOCS, 
matchAllNode.getType());
+        Assertions.assertEquals(QsOccur.SHOULD, matchAllNode.getOccur());
+
+        for (int i = 1; i < plan.getRoot().getChildren().size(); i++) {
+            Assertions.assertEquals(QsOccur.MUST_NOT, 
plan.getRoot().getChildren().get(i).getOccur());
+        }
+    }
+
+    @Test
+    public void testLuceneModeMultipleNotImplicitConjunction() {
+        // Test: "NOT a NOT b" with default_operator=and
+        String dsl = "NOT field:a NOT field:b";
+        String options = "{\"mode\":\"lucene\",\"default_operator\":\"and\"}";
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+        Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+
+        QsNode matchAllNode = plan.getRoot().getChildren().get(0);
+        Assertions.assertEquals(QsClauseType.MATCH_ALL_DOCS, 
matchAllNode.getType());
+        Assertions.assertEquals(QsOccur.SHOULD, matchAllNode.getOccur());
+    }
+
+    @Test
+    public void testLuceneModeNotAllMustNotNoInjection() {
+        // Test: "NOT a AND b" - mixed, should NOT inject MATCH_ALL_DOCS
+        String dsl = "NOT field:a AND field:b";
+        String options = "{\"mode\":\"lucene\"}";
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+        Assertions.assertEquals(2, plan.getRoot().getChildren().size());
+
+        boolean hasMatchAll = plan.getRoot().getChildren().stream()
+                .anyMatch(c -> c.getType() == QsClauseType.MATCH_ALL_DOCS);
+        Assertions.assertFalse(hasMatchAll, "Mixed MUST/MUST_NOT should not 
inject MATCH_ALL_DOCS");
+    }
+
     @Test
     public void testLuceneModeMinimumShouldMatchExplicit() {
         // Test: explicit minimum_should_match=1 keeps SHOULD clauses


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to