This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new a4091ad5c65 [fix](search) fix MATCH_ALL_DOCS losing occur attribute in
multi-field expansion (#60873)
a4091ad5c65 is described below
commit a4091ad5c65721ad1fb04f2e848f5aeb109f4a3e
Author: Jack <[email protected]>
AuthorDate: Fri Feb 27 23:05:14 2026 +0800
[fix](search) fix MATCH_ALL_DOCS losing occur attribute in multi-field
expansion (#60873)
### What problem does this PR solve?
Issue Number: close #DORIS-24561
Problem Summary:
In lucene mode with multi-field queries (e.g., `best_fields` with
`fields: ["title", "content"]`), the query `"Lauren Boebert" OR *`
returns results inconsistent with Elasticsearch.
**Root cause:** During multi-field expansion (`expandNodeCrossFields`,
`deepCopyWithField`, `setFieldOnLeaves`), `MATCH_ALL_DOCS` nodes are
recreated without preserving the `occur` attribute (e.g., `SHOULD`). The
BE defaults `occur=null` to `MUST`, changing the query semantics:
- **Expected (ES behavior):** `SHOULD(phrase) OR SHOULD(match_all)` =
all documents
- **Actual (bug):** `SHOULD(phrase) AND MUST(match_all)` = only
phrase-matching documents
**ES explain** for `"Lauren Boebert" OR *`:
```
(title:"lauren boebert" | content:"lauren boebert")
(ConstantScore(FieldExistsQuery [field=content]) |
ConstantScore(FieldExistsQuery [field=title]))
```
Returns 1,000,000 docs (all). Doris was returning only phrase-matching
docs.
**Fix:** Preserve the `occur` attribute when creating new
`MATCH_ALL_DOCS` nodes in all three multi-field expansion methods.
---
.../functions/scalar/SearchDslParser.java | 21 +++++++--
.../functions/scalar/SearchDslParserTest.java | 55 ++++++++++++++++++++++
2 files changed, 73 insertions(+), 3 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
index 2dc5c52fb1e..09d7af2bca5 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
@@ -1517,7 +1517,14 @@ public class SearchDslParser {
private static QsNode expandNodeCrossFields(QsNode node, List<String>
fields, boolean luceneMode) {
// MATCH_ALL_DOCS matches all documents regardless of field -
don't expand
if (node.getType() == QsClauseType.MATCH_ALL_DOCS) {
- return new QsNode(QsClauseType.MATCH_ALL_DOCS, (List<QsNode>)
null);
+ QsNode result = new QsNode(QsClauseType.MATCH_ALL_DOCS,
(List<QsNode>) null);
+ // Preserve occur attribute (e.g., SHOULD from "X OR *"
queries)
+ // Without this, occur defaults to null which BE interprets as
MUST,
+ // changing "X OR *" from matching all docs to matching only X.
+ if (node.getOccur() != null) {
+ result.setOccur(node.getOccur());
+ }
+ return result;
}
// Check if this is a leaf node (no children)
@@ -1598,7 +1605,11 @@ public class SearchDslParser {
private static QsNode deepCopyWithField(QsNode node, String field,
List<String> fields) {
// MATCH_ALL_DOCS matches all documents regardless of field -
don't set field
if (node.getType() == QsClauseType.MATCH_ALL_DOCS) {
- return new QsNode(QsClauseType.MATCH_ALL_DOCS, (List<QsNode>)
null);
+ QsNode result = new QsNode(QsClauseType.MATCH_ALL_DOCS,
(List<QsNode>) null);
+ if (node.getOccur() != null) {
+ result.setOccur(node.getOccur());
+ }
+ return result;
}
if (isLeafNode(node)) {
// If the user explicitly wrote "field:term" syntax, preserve
original field
@@ -1645,7 +1656,11 @@ public class SearchDslParser {
private static QsNode setFieldOnLeaves(QsNode node, String field,
List<String> fields) {
// MATCH_ALL_DOCS matches all documents regardless of field -
don't set field
if (node.getType() == QsClauseType.MATCH_ALL_DOCS) {
- return new QsNode(QsClauseType.MATCH_ALL_DOCS, (List<QsNode>)
null);
+ QsNode result = new QsNode(QsClauseType.MATCH_ALL_DOCS,
(List<QsNode>) null);
+ if (node.getOccur() != null) {
+ result.setOccur(node.getOccur());
+ }
+ return result;
}
if (isLeafNode(node)) {
// If the user explicitly wrote "field:term" syntax, preserve
original field
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
index 8e562401e14..72fdaa4250c 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
@@ -2473,4 +2473,59 @@ public class SearchDslParserTest {
Assertions.assertEquals(1, plan.getFieldBindings().size());
Assertions.assertEquals("title",
plan.getFieldBindings().get(0).getFieldName());
}
+
+ @Test
+ public void testMultiFieldMatchAllDocsPreservesOccurInOrQuery() {
+ // Test: '"Lauren Boebert" OR *' with multi-field + lucene mode +
best_fields
+ // Bug: expandCrossFields was dropping the SHOULD occur on
MATCH_ALL_DOCS nodes,
+ // causing BE to default to MUST, which changed the semantics from
+ // "phrase OR match_all" (= all docs) to "phrase AND match_all" (=
only phrase matches).
+ String dsl = "\"Lauren Boebert\" OR *";
+ String options =
"{\"fields\":[\"title\",\"content\"],\"type\":\"best_fields\","
+ +
"\"default_operator\":\"AND\",\"mode\":\"lucene\",\"minimum_should_match\":0}";
+
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+ Assertions.assertNotNull(plan);
+
+ // Root should be OCCUR_BOOLEAN
+ Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN,
plan.getRoot().getType());
+
+ // Find the MATCH_ALL_DOCS child - it MUST have occur=SHOULD
+ boolean foundMatchAllWithShould = false;
+ for (QsNode child : plan.getRoot().getChildren()) {
+ if (child.getType() == QsClauseType.MATCH_ALL_DOCS) {
+ Assertions.assertEquals(QsOccur.SHOULD, child.getOccur(),
+ "MATCH_ALL_DOCS must preserve SHOULD occur after
multi-field expansion");
+ foundMatchAllWithShould = true;
+ }
+ }
+ Assertions.assertTrue(foundMatchAllWithShould,
+ "Should contain MATCH_ALL_DOCS node with SHOULD occur");
+ }
+
+ @Test
+ public void testMultiFieldMatchAllDocsPreservesOccurWithAndOperator() {
+ // Test: 'Dollar AND *' with multi-field + lucene mode
+ // MATCH_ALL_DOCS should have occur=MUST (from AND operator)
+ String dsl = "Dollar AND *";
+ String options =
"{\"fields\":[\"title\",\"content\"],\"type\":\"best_fields\","
+ +
"\"default_operator\":\"OR\",\"mode\":\"lucene\",\"minimum_should_match\":0}";
+
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+ Assertions.assertNotNull(plan);
+
+ Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN,
plan.getRoot().getType());
+
+ // Find the MATCH_ALL_DOCS child - it MUST have occur=MUST (from AND
operator)
+ boolean foundMatchAllWithMust = false;
+ for (QsNode child : plan.getRoot().getChildren()) {
+ if (child.getType() == QsClauseType.MATCH_ALL_DOCS) {
+ Assertions.assertEquals(QsOccur.MUST, child.getOccur(),
+ "MATCH_ALL_DOCS must preserve MUST occur after
multi-field expansion");
+ foundMatchAllWithMust = true;
+ }
+ }
+ Assertions.assertTrue(foundMatchAllWithMust,
+ "Should contain MATCH_ALL_DOCS node with MUST occur");
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]