This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new a4091ad5c65 [fix](search) fix MATCH_ALL_DOCS losing occur attribute in 
multi-field expansion (#60873)
a4091ad5c65 is described below

commit a4091ad5c65721ad1fb04f2e848f5aeb109f4a3e
Author: Jack <[email protected]>
AuthorDate: Fri Feb 27 23:05:14 2026 +0800

    [fix](search) fix MATCH_ALL_DOCS losing occur attribute in multi-field 
expansion (#60873)
    
    ### What problem does this PR solve?
    
    Issue Number: close #DORIS-24561
    
    Problem Summary:
    
    In lucene mode with multi-field queries (e.g., `best_fields` with
    `fields: ["title", "content"]`), the query `"Lauren Boebert" OR *`
    returns results inconsistent with Elasticsearch.
    
    **Root cause:** During multi-field expansion (`expandNodeCrossFields`,
    `deepCopyWithField`, `setFieldOnLeaves`), `MATCH_ALL_DOCS` nodes are
    recreated without preserving the `occur` attribute (e.g., `SHOULD`). The
    BE defaults `occur=null` to `MUST`, changing the query semantics:
    
    - **Expected (ES behavior):** `SHOULD(phrase) OR SHOULD(match_all)` =
    all documents
    - **Actual (bug):** `SHOULD(phrase) AND MUST(match_all)` = only
    phrase-matching documents
    
    **ES explain** for `"Lauren Boebert" OR *`:
    ```
    (title:"lauren boebert" | content:"lauren boebert") 
(ConstantScore(FieldExistsQuery [field=content]) | 
ConstantScore(FieldExistsQuery [field=title]))
    ```
    Returns 1,000,000 docs (all). Doris was returning only phrase-matching
    docs.
    
    **Fix:** Preserve the `occur` attribute when creating new
    `MATCH_ALL_DOCS` nodes in all three multi-field expansion methods.
---
 .../functions/scalar/SearchDslParser.java          | 21 +++++++--
 .../functions/scalar/SearchDslParserTest.java      | 55 ++++++++++++++++++++++
 2 files changed, 73 insertions(+), 3 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
index 2dc5c52fb1e..09d7af2bca5 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
@@ -1517,7 +1517,14 @@ public class SearchDslParser {
         private static QsNode expandNodeCrossFields(QsNode node, List<String> 
fields, boolean luceneMode) {
             // MATCH_ALL_DOCS matches all documents regardless of field - 
don't expand
             if (node.getType() == QsClauseType.MATCH_ALL_DOCS) {
-                return new QsNode(QsClauseType.MATCH_ALL_DOCS, (List<QsNode>) 
null);
+                QsNode result = new QsNode(QsClauseType.MATCH_ALL_DOCS, 
(List<QsNode>) null);
+                // Preserve occur attribute (e.g., SHOULD from "X OR *" 
queries)
+                // Without this, occur defaults to null which BE interprets as 
MUST,
+                // changing "X OR *" from matching all docs to matching only X.
+                if (node.getOccur() != null) {
+                    result.setOccur(node.getOccur());
+                }
+                return result;
             }
 
             // Check if this is a leaf node (no children)
@@ -1598,7 +1605,11 @@ public class SearchDslParser {
         private static QsNode deepCopyWithField(QsNode node, String field, 
List<String> fields) {
             // MATCH_ALL_DOCS matches all documents regardless of field - 
don't set field
             if (node.getType() == QsClauseType.MATCH_ALL_DOCS) {
-                return new QsNode(QsClauseType.MATCH_ALL_DOCS, (List<QsNode>) 
null);
+                QsNode result = new QsNode(QsClauseType.MATCH_ALL_DOCS, 
(List<QsNode>) null);
+                if (node.getOccur() != null) {
+                    result.setOccur(node.getOccur());
+                }
+                return result;
             }
             if (isLeafNode(node)) {
                 // If the user explicitly wrote "field:term" syntax, preserve 
original field
@@ -1645,7 +1656,11 @@ public class SearchDslParser {
         private static QsNode setFieldOnLeaves(QsNode node, String field, 
List<String> fields) {
             // MATCH_ALL_DOCS matches all documents regardless of field - 
don't set field
             if (node.getType() == QsClauseType.MATCH_ALL_DOCS) {
-                return new QsNode(QsClauseType.MATCH_ALL_DOCS, (List<QsNode>) 
null);
+                QsNode result = new QsNode(QsClauseType.MATCH_ALL_DOCS, 
(List<QsNode>) null);
+                if (node.getOccur() != null) {
+                    result.setOccur(node.getOccur());
+                }
+                return result;
             }
             if (isLeafNode(node)) {
                 // If the user explicitly wrote "field:term" syntax, preserve 
original field
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
index 8e562401e14..72fdaa4250c 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
@@ -2473,4 +2473,59 @@ public class SearchDslParserTest {
         Assertions.assertEquals(1, plan.getFieldBindings().size());
         Assertions.assertEquals("title", 
plan.getFieldBindings().get(0).getFieldName());
     }
+
+    @Test
+    public void testMultiFieldMatchAllDocsPreservesOccurInOrQuery() {
+        // Test: '"Lauren Boebert" OR *' with multi-field + lucene mode + 
best_fields
+        // Bug: expandCrossFields was dropping the SHOULD occur on 
MATCH_ALL_DOCS nodes,
+        // causing BE to default to MUST, which changed the semantics from
+        // "phrase OR match_all" (= all docs) to "phrase AND match_all" (= 
only phrase matches).
+        String dsl = "\"Lauren Boebert\" OR *";
+        String options = 
"{\"fields\":[\"title\",\"content\"],\"type\":\"best_fields\","
+                + 
"\"default_operator\":\"AND\",\"mode\":\"lucene\",\"minimum_should_match\":0}";
+
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+        Assertions.assertNotNull(plan);
+
+        // Root should be OCCUR_BOOLEAN
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+
+        // Find the MATCH_ALL_DOCS child - it MUST have occur=SHOULD
+        boolean foundMatchAllWithShould = false;
+        for (QsNode child : plan.getRoot().getChildren()) {
+            if (child.getType() == QsClauseType.MATCH_ALL_DOCS) {
+                Assertions.assertEquals(QsOccur.SHOULD, child.getOccur(),
+                        "MATCH_ALL_DOCS must preserve SHOULD occur after 
multi-field expansion");
+                foundMatchAllWithShould = true;
+            }
+        }
+        Assertions.assertTrue(foundMatchAllWithShould,
+                "Should contain MATCH_ALL_DOCS node with SHOULD occur");
+    }
+
+    @Test
+    public void testMultiFieldMatchAllDocsPreservesOccurWithAndOperator() {
+        // Test: 'Dollar AND *' with multi-field + lucene mode
+        // MATCH_ALL_DOCS should have occur=MUST (from AND operator)
+        String dsl = "Dollar AND *";
+        String options = 
"{\"fields\":[\"title\",\"content\"],\"type\":\"best_fields\","
+                + 
"\"default_operator\":\"OR\",\"mode\":\"lucene\",\"minimum_should_match\":0}";
+
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+        Assertions.assertNotNull(plan);
+
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+
+        // Find the MATCH_ALL_DOCS child - it MUST have occur=MUST (from AND 
operator)
+        boolean foundMatchAllWithMust = false;
+        for (QsNode child : plan.getRoot().getChildren()) {
+            if (child.getType() == QsClauseType.MATCH_ALL_DOCS) {
+                Assertions.assertEquals(QsOccur.MUST, child.getOccur(),
+                        "MATCH_ALL_DOCS must preserve MUST occur after 
multi-field expansion");
+                foundMatchAllWithMust = true;
+            }
+        }
+        Assertions.assertTrue(foundMatchAllWithMust,
+                "Should contain MATCH_ALL_DOCS node with MUST occur");
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to