This is an automated email from the ASF dual-hosted git repository.

airborne pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 1cde086c55d [fix](search) Fix implicit conjunction incorrectly 
modifying preceding term in lucene mode (#60814)
1cde086c55d is described below

commit 1cde086c55df903c8f7a083074dad67cf353dd37
Author: Jack <[email protected]>
AuthorDate: Wed Feb 25 18:02:11 2026 +0800

    [fix](search) Fix implicit conjunction incorrectly modifying preceding term 
in lucene mode (#60814)
    
    ### What problem does this PR solve?
    
    Issue Number: close #DORIS-24545
    
    Problem Summary:
    
    In `search()` function's lucene mode, queries with mixed explicit and
    implicit operators produce different results from Elasticsearch. For
    example:
    
    - Query: `"Sumer" OR Ptolemaic\ dynasty Limonene` with
    `default_operator=AND`
    - ES result: 1 row
    - Doris result: 0 rows (before fix)
    
    **Root cause:** In Lucene's `QueryParserBase.addClause()`, only explicit
    `CONJ_AND`/`CONJ_OR` modify the preceding term's occur. Implicit
    conjunction (`CONJ_NONE`, i.e., space-separated terms without an
    explicit operator) only affects the **current** term via
    `default_operator`, without modifying the preceding term.
    
    The FE `SearchDslParser.hasExplicitAndBefore()` incorrectly returned
    `true` (based on `default_operator`) when no explicit AND token was
    found. This caused implicit conjunction to be treated identically to
    explicit AND, making it modify the preceding term's occur — diverging
    from Lucene/ES semantics.
    
    **Example of the bug:**
    
    For `a OR b c` with `default_operator=AND`:
    - Before fix: `SHOULD(a) MUST(b) MUST(c)` — wrong, implicit space before
    `c` incorrectly upgraded `b` from SHOULD to MUST
    - After fix: `SHOULD(a) SHOULD(b) MUST(c)` — correct, matches ES
    behavior. Only `c` gets MUST (from default_operator), `b` retains SHOULD
    (from the preceding OR)
    
    **Fix:** `hasExplicitAndBefore()` now returns `false` when no explicit
    AND token is found, regardless of `default_operator`. Only explicit AND
    tokens trigger the "introduced by AND" logic that modifies preceding
    terms.
---
 .../functions/scalar/SearchDslParser.java          |  30 +++---
 .../functions/scalar/SearchDslParserTest.java      | 108 +++++++++++++++++++++
 2 files changed, 124 insertions(+), 14 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
index bf0bc8f6168..2dc5c52fb1e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParser.java
@@ -2143,16 +2143,13 @@ public class SearchDslParser {
                 QsOccur defaultOccur, boolean introducedByOr) {
             List<SearchParser.NotClauseContext> notClauses = ctx.notClause();
 
-            // Determine how to handle implicit operators
-            String defaultOperator = options.getDefaultOperator();
-            boolean useAndForImplicit = 
"AND".equalsIgnoreCase(defaultOperator);
-
             for (int i = 0; i < notClauses.size(); i++) {
                 boolean introducedByAnd;
                 if (i > 0) {
-                    // Check if there's an explicit AND before this notClause
-                    // by walking ctx.children and finding the token 
immediately before this notClause
-                    introducedByAnd = hasExplicitAndBefore(ctx, 
notClauses.get(i), useAndForImplicit);
+                    // Check if there's an explicit AND token before this 
notClause.
+                    // Implicit conjunction (no AND token) returns false - 
only explicit AND
+                    // should trigger the "introduced by AND" logic that 
modifies preceding terms.
+                    introducedByAnd = hasExplicitAndBefore(ctx, 
notClauses.get(i));
                 } else {
                     introducedByAnd = false;
                 }
@@ -2166,13 +2163,18 @@ public class SearchDslParser {
         /**
          * Check if there's an explicit AND token before the target notClause.
          * Walks ctx.children to find the position of target and checks the 
preceding token.
+         *
+         * IMPORTANT: Returns false for implicit conjunction (no explicit AND 
token).
+         * In Lucene's QueryParserBase.addClause(), only explicit CONJ_AND 
modifies the
+         * preceding term. CONJ_NONE (implicit conjunction) only affects the 
current term's
+         * occur via the default_operator, without modifying the preceding 
term.
+         *
          * @param ctx The AndClauseContext containing the children
          * @param target The target NotClauseContext to check
-         * @param implicitDefault Value to return if no explicit AND (use 
default_operator)
-         * @return true if explicit AND before target, implicitDefault if no 
explicit AND
+         * @return true only if there's an explicit AND token before target
          */
         private boolean hasExplicitAndBefore(SearchParser.AndClauseContext ctx,
-                SearchParser.NotClauseContext target, boolean implicitDefault) 
{
+                SearchParser.NotClauseContext target) {
             for (int j = 0; j < ctx.getChildCount(); j++) {
                 if (ctx.getChild(j) == target) {
                     // Found the target - check if the preceding sibling is an 
AND token
@@ -2181,12 +2183,12 @@ public class SearchDslParser {
                                 (org.antlr.v4.runtime.tree.TerminalNode) 
ctx.getChild(j - 1);
                         return terminal.getSymbol().getType() == 
SearchParser.AND;
                     }
-                    // No explicit AND before this term - use default
-                    return implicitDefault;
+                    // No explicit AND before this term
+                    return false;
                 }
             }
-            // Target not found (should not happen) - use default
-            return implicitDefault;
+            // Target not found (should not happen)
+            return false;
         }
 
         private void collectTermsFromNotClause(SearchParser.NotClauseContext 
ctx, List<TermWithOccur> terms,
diff --git 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
index 01bbcf8d925..8e562401e14 100644
--- 
a/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
+++ 
b/fe/fe-core/src/test/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SearchDslParserTest.java
@@ -839,6 +839,114 @@ public class SearchDslParserTest {
         Assertions.assertEquals(QsClauseType.AND, plan.getRoot().getType());
     }
 
+    // ============ Tests for Implicit Conjunction (CONJ_NONE) ============
+
+    @Test
+    public void testLuceneModeImplicitConjunctionAndOperator() {
+        // Test: "a OR b c" with default_operator=AND
+        // In Lucene, implicit conjunction (CONJ_NONE) does NOT modify the 
preceding term.
+        // Only explicit AND/OR conjunctions modify the preceding term.
+        //   a(CONJ_NONE)→MUST, b(CONJ_OR)→prev(a) SHOULD, b SHOULD,
+        //   c(CONJ_NONE)→MUST (no modification to prev b)
+        //   Result: [SHOULD(a), SHOULD(b), MUST(c)]
+        // This matches ES query_string: "a OR b c" with default_operator=AND
+        String dsl = "field:a OR field:b field:c";
+        String options = 
"{\"mode\":\"lucene\",\"default_operator\":\"AND\",\"minimum_should_match\":0}";
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+        Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+
+        QsNode nodeA = plan.getRoot().getChildren().get(0);
+        Assertions.assertEquals("a", nodeA.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD, 
nodeA.getOccur());
+
+        QsNode nodeB = plan.getRoot().getChildren().get(1);
+        Assertions.assertEquals("b", nodeB.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD, 
nodeB.getOccur());
+
+        QsNode nodeC = plan.getRoot().getChildren().get(2);
+        Assertions.assertEquals("c", nodeC.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.MUST, 
nodeC.getOccur());
+    }
+
+    @Test
+    public void testLuceneModeImplicitConjunctionNotAndOperator() {
+        // Test: "a OR b NOT c" with default_operator=AND
+        // In Lucene, implicit NOT conjunction (CONJ_NONE + MOD_NOT) does NOT 
modify preceding term.
+        //   a(CONJ_NONE)→MUST, b(CONJ_OR)→prev(a) SHOULD, b SHOULD,
+        //   NOT c(CONJ_NONE, MOD_NOT)→MUST_NOT (no modification to prev b)
+        //   Result: [SHOULD(a), SHOULD(b), MUST_NOT(c)]
+        // This matches ES query_string: "a OR b NOT c" with 
default_operator=AND
+        String dsl = "field:a OR field:b NOT field:c";
+        String options = 
"{\"mode\":\"lucene\",\"default_operator\":\"AND\",\"minimum_should_match\":0}";
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+        Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+
+        QsNode nodeA = plan.getRoot().getChildren().get(0);
+        Assertions.assertEquals("a", nodeA.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD, 
nodeA.getOccur());
+
+        QsNode nodeB = plan.getRoot().getChildren().get(1);
+        Assertions.assertEquals("b", nodeB.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD, 
nodeB.getOccur());
+
+        QsNode nodeC = plan.getRoot().getChildren().get(2);
+        Assertions.assertEquals("c", nodeC.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.MUST_NOT, 
nodeC.getOccur());
+    }
+
+    @Test
+    public void testLuceneModeImplicitConjunctionOrOperator() {
+        // Test: "a OR b c" with default_operator=OR
+        // With OR_OPERATOR, implicit conjunction gives SHOULD to current term.
+        //   a(CONJ_NONE)→SHOULD, b(CONJ_OR)→SHOULD, c(CONJ_NONE)→SHOULD
+        //   Result: [SHOULD(a), SHOULD(b), SHOULD(c)]
+        String dsl = "field:a OR field:b field:c";
+        String options = "{\"mode\":\"lucene\",\"minimum_should_match\":0}";
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+        Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+
+        for (QsNode child : plan.getRoot().getChildren()) {
+            Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD, 
child.getOccur());
+        }
+    }
+
+    @Test
+    public void testLuceneModeExplicitAndStillModifiesPrev() {
+        // Test: "a OR b AND c" with default_operator=AND
+        // Explicit AND SHOULD modify the preceding term, unlike implicit 
conjunction.
+        //   a(CONJ_NONE)→MUST, b(CONJ_OR)→prev(a) SHOULD, b SHOULD,
+        //   c(CONJ_AND)→prev(b) MUST, c MUST
+        //   Result: [SHOULD(a), MUST(b), MUST(c)]
+        String dsl = "field:a OR field:b AND field:c";
+        String options = 
"{\"mode\":\"lucene\",\"default_operator\":\"AND\",\"minimum_should_match\":0}";
+        QsPlan plan = SearchDslParser.parseDsl(dsl, options);
+
+        Assertions.assertNotNull(plan);
+        Assertions.assertEquals(QsClauseType.OCCUR_BOOLEAN, 
plan.getRoot().getType());
+        Assertions.assertEquals(3, plan.getRoot().getChildren().size());
+
+        QsNode nodeA = plan.getRoot().getChildren().get(0);
+        Assertions.assertEquals("a", nodeA.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.SHOULD, 
nodeA.getOccur());
+
+        QsNode nodeB = plan.getRoot().getChildren().get(1);
+        Assertions.assertEquals("b", nodeB.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.MUST, 
nodeB.getOccur());
+
+        QsNode nodeC = plan.getRoot().getChildren().get(2);
+        Assertions.assertEquals("c", nodeC.getValue());
+        Assertions.assertEquals(SearchDslParser.QsOccur.MUST, 
nodeC.getOccur());
+    }
+
     // ============ Tests for Escape Handling ============
 
     @Test


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to