This is an automated email from the ASF dual-hosted git repository.
airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new 1cde086c55d [fix](search) Fix implicit conjunction incorrectly
modifying preceding term in lucene mode (#60814)
1cde086c55d is described below
commit 1cde086c55df903c8f7a083074dad67cf353dd37
Author: Jack <[email protected]>
AuthorDate: Wed Feb 25 18:02:11 2026 +0800
[fix](search) Fix implicit conjunction incorrectly modifying preceding term
in lucene mode (#60814)
### What problem does this PR solve?
Issue Number: close #DORIS-24545
Problem Summary:
In `search()` function's lucene mode, queries with mixed explicit and
implicit operators produce different results from Elasticsearch. For
example:
- Query: `"Sumer" OR Ptolemaic\ dynasty Limonene` with
`default_operator=AND`
- ES result: 1 row
- Doris result: 0 rows (before fix)
**Root cause:** In Lucene's `QueryParserBase.addClause()`, only explicit
`CONJ_AND`/`CONJ_OR` modify the preceding term's occur. Implicit
conjunction (`CONJ_NONE`, i.e., space-separated terms without an
explicit operator) only affects the **current** term via
`default_operator`, without modifying the preceding term.
The FE `SearchDslParser.hasExplicitAndBefore()` incorrectly returned
`true` (based on `default_operator`) when no explicit AND token was
found. This caused implicit conjunction to be treated identically to
explicit AND, making it modify the preceding term's occur — diverging
from Lucene/ES semantics.
**Example of the bug:**
For `a OR b c` with `default_operator=AND`:
- Before fix: `SHOULD(a) MUST(b) MUST(c)` — wrong, implicit space before
`c` incorrectly upgraded `b` from SHOULD to MUST
- After fix: `SHOULD(a) SHOULD(b) MUST(c)` — correct, matches ES
behavior. Only `c` gets MUST (from default_operator), `b` retains SHOULD
(from the preceding OR)
**Fix:** `hasExplicitAndBefore()` now returns `false` when no explicit
AND token is found, regardless of `default_operator`. Only explicit AND
tokens trigger the "introduced by AND" logic that modifies preceding
terms.
---
.../functions/scalar/SearchDslParser.java | 30 +++---
.../functions/scalar/SearchDslParserTest.java | 108 +++++++++++++++++++++
2 files changed, 124 insertions(+), 14 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
index bf0bc8f6168..2dc5c52fb1e 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
@@ -2143,16 +2143,13 @@ public class SearchDslParser {
QsOccur defaultOccur, boolean introducedByOr) {
List<SearchParser.NotClauseContext> notClauses = ctx.notClause();
- // Determine how to handle implicit operators
- String defaultOperator = options.getDefaultOperator();
- boolean useAndForImplicit =
"AND".equalsIgnoreCase(defaultOperator);
-
for (int i = 0; i < notClauses.size(); i++) {
boolean introducedByAnd;
if (i > 0) {
- // Check if there's an explicit AND before this notClause
- // by walking ctx.children and finding the token
immediately before this notClause
- introducedByAnd = hasExplicitAndBefore(ctx,
notClauses.get(i), useAndForImplicit);
+ // Check if there's an explicit AND token before this
notClause.
+ // Implicit conjunction (no AND token) returns false -
only explicit AND
+ // should trigger the "introduced by AND" logic that
modifies preceding terms.
+ introducedByAnd = hasExplicitAndBefore(ctx,
notClauses.get(i));
} else {
introducedByAnd = false;
}
@@ -2166,13 +2163,18 @@ public class SearchDslParser {
/**
* Check if there's an explicit AND token before the target notClause.
* Walks ctx.children to find the position of target and checks the
preceding token.
+ *
+ * IMPORTANT: Returns false for implicit conjunction (no explicit AND
token).
+ * In Lucene's QueryParserBase.addClause(), only explicit CONJ_AND
modifies the
+ * preceding term. CONJ_NONE (implicit conjunction) only affects the
current term's
+ * occur via the default_operator, without modifying the preceding
term.
+ *
* @param ctx The AndClauseContext containing the children
* @param target The target NotClauseContext to check
- * @param implicitDefault Value to return if no explicit AND (use
default_operator)
- * @return true if explicit AND before target, implicitDefault if no
explicit AND
+ * @return true only if there's an explicit AND token before target
*/
private boolean hasExplicitAndBefore(SearchParser.AndClauseContext ctx,
- SearchParser.NotClauseContext target, boolean implicitDefault)
{
+ SearchParser.NotClauseContext target) {
for (int j = 0; j < ctx.getChildCount(); j++) {
if (ctx.getChild(j) == target) {
// Found the target - check if the preceding sibling is an
AND token
@@ -2181,12 +2183,12 @@ public class SearchDslParser {
(org.antlr.v4.runtime.tree.TerminalNode)
ctx.getChild(j - 1);
return terminal.getSymbol().getType() ==
SearchParser.AND;
}
- // No explicit AND before this term - use default
- return implicitDefault;
+ // No explicit AND before this term
+ return false;
}
}
- // Target not found (should not happen) - use default
- return implicitDefault;
+ // Target not found (should not happen)
+ return false;
}
private void collectTermsFromNotClause(SearchParser.NotClauseContext
ctx, List<TermWithOccur> terms,
diff --git
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
index 01bbcf8d925..8e562401e14 100644
---
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
+++
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
@@ -839,6 +839,114 @@ public class SearchDslParserTest {
Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType());
}
+ // ============ Tests for Implicit Conjunction (CONJ_NONE) ============
+
+ @Test
+ public void testLuceneModeImplicitConjunctionAndOperator() {
+ // Test: "a OR b c" with default_operator=AND
+ // In Lucene, implicit conjunction (CONJ_NONE) does NOT modify the
preceding term.
+ // Only explicit AND/OR conjunctions modify the preceding term.
+ // a(CONJ_NONE)→MUST, b(CONJ_OR)→prev(a) SHOULD, b SHOULD,
+ // c(CONJ_NONE)→MUST (no modification to prev b)
+ // Result: [SHOULD(a), SHOULD(b), MUST(c)]
+ // This matches ES query_string: "a OR b c" with default_operator=AND
+ String dsl = "field:a OR field:b field:c";
+ String options =
"{\"mode\":\"lucene\",\"default_operator\":\"AND\",\"minimum_should_match\":0}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN,
plan.getRoot().getType());
+ Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+
+ QsNode nodeA = plan.getRoot().getChildren().get(0);
+ Assertions.assertEquals("a", nodeA.getValue());
+ Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD,
nodeA.getOccur());
+
+ QsNode nodeB = plan.getRoot().getChildren().get(1);
+ Assertions.assertEquals("b", nodeB.getValue());
+ Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD,
nodeB.getOccur());
+
+ QsNode nodeC = plan.getRoot().getChildren().get(2);
+ Assertions.assertEquals("c", nodeC.getValue());
+ Assertions.assertEquals(SearchDslParser.QsOccur.MUST,
nodeC.getOccur());
+ }
+
+ @Test
+ public void testLuceneModeImplicitConjunctionNotAndOperator() {
+ // Test: "a OR b NOT c" with default_operator=AND
+ // In Lucene, implicit NOT conjunction (CONJ_NONE + MOD_NOT) does NOT
modify preceding term.
+ // a(CONJ_NONE)→MUST, b(CONJ_OR)→prev(a) SHOULD, b SHOULD,
+ // NOT c(CONJ_NONE, MOD_NOT)→MUST_NOT (no modification to prev b)
+ // Result: [SHOULD(a), SHOULD(b), MUST_NOT(c)]
+ // This matches ES query_string: "a OR b NOT c" with
default_operator=AND
+ String dsl = "field:a OR field:b NOT field:c";
+ String options =
"{\"mode\":\"lucene\",\"default_operator\":\"AND\",\"minimum_should_match\":0}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN,
plan.getRoot().getType());
+ Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+
+ QsNode nodeA = plan.getRoot().getChildren().get(0);
+ Assertions.assertEquals("a", nodeA.getValue());
+ Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD,
nodeA.getOccur());
+
+ QsNode nodeB = plan.getRoot().getChildren().get(1);
+ Assertions.assertEquals("b", nodeB.getValue());
+ Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD,
nodeB.getOccur());
+
+ QsNode nodeC = plan.getRoot().getChildren().get(2);
+ Assertions.assertEquals("c", nodeC.getValue());
+ Assertions.assertEquals(SearchDslParser.QsOccur.MUST_NOT,
nodeC.getOccur());
+ }
+
+ @Test
+ public void testLuceneModeImplicitConjunctionOrOperator() {
+ // Test: "a OR b c" with default_operator=OR
+ // With OR_OPERATOR, implicit conjunction gives SHOULD to current term.
+ // a(CONJ_NONE)→SHOULD, b(CONJ_OR)→SHOULD, c(CONJ_NONE)→SHOULD
+ // Result: [SHOULD(a), SHOULD(b), SHOULD(c)]
+ String dsl = "field:a OR field:b field:c";
+ String options = "{\"mode\":\"lucene\",\"minimum_should_match\":0}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN,
plan.getRoot().getType());
+ Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+
+ for (QsNode child : plan.getRoot().getChildren()) {
+ Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD,
child.getOccur());
+ }
+ }
+
+ @Test
+ public void testLuceneModeExplicitAndStillModifiesPrev() {
+ // Test: "a OR b AND c" with default_operator=AND
+ // Explicit AND SHOULD modify the preceding term, unlike implicit
conjunction.
+ // a(CONJ_NONE)→MUST, b(CONJ_OR)→prev(a) SHOULD, b SHOULD,
+ // c(CONJ_AND)→prev(b) MUST, c MUST
+ // Result: [SHOULD(a), MUST(b), MUST(c)]
+ String dsl = "field:a OR field:b AND field:c";
+ String options =
"{\"mode\":\"lucene\",\"default_operator\":\"AND\",\"minimum_should_match\":0}";
+ QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+ Assertions.assertNotNull(plan);
+ Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN,
plan.getRoot().getType());
+ Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+
+ QsNode nodeA = plan.getRoot().getChildren().get(0);
+ Assertions.assertEquals("a", nodeA.getValue());
+ Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD,
nodeA.getOccur());
+
+ QsNode nodeB = plan.getRoot().getChildren().get(1);
+ Assertions.assertEquals("b", nodeB.getValue());
+ Assertions.assertEquals(SearchDslParser.QsOccur.MUST,
nodeB.getOccur());
+
+ QsNode nodeC = plan.getRoot().getChildren().get(2);
+ Assertions.assertEquals("c", nodeC.getValue());
+ Assertions.assertEquals(SearchDslParser.QsOccur.MUST,
nodeC.getOccur());
+ }
+
// ============ Tests for Escape Handling ============
@Test
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]