This is an automated email from the ASF dual-hosted git repository. dsmiley pushed a commit to branch branch_9x in repository https://gitbox.apache.org/repos/asf/solr.git
commit 61ca4b3e492efca28ca8d0f0cc45e9fb4a5b9d70 Author: Andy Webb <[email protected]> AuthorDate: Sat Oct 25 16:19:59 2025 +0100 SOLR-17959: Add alwaysStopwords option to edismax (#3802) --- changelog/unreleased/SOLR-17959-alwaysStopwords.yml | 9 +++++++++ .../org/apache/solr/search/ExtendedDismaxQParser.java | 17 +++++++++++++---- .../apache/solr/search/TestExtendedDismaxParser.java | 6 ++++++ .../modules/indexing-guide/pages/filters.adoc | 2 ++ .../modules/query-guide/pages/edismax-query-parser.adoc | 6 +++++- 5 files changed, 35 insertions(+), 5 deletions(-) diff --git a/changelog/unreleased/SOLR-17959-alwaysStopwords.yml b/changelog/unreleased/SOLR-17959-alwaysStopwords.yml new file mode 100644 index 00000000000..274ee48261e --- /dev/null +++ b/changelog/unreleased/SOLR-17959-alwaysStopwords.yml @@ -0,0 +1,9 @@ +title: Add alwaysStopwords option to edismax so its "all stopwords" behaviour can be controlled +type: changed +authors: + - name: Andy Webb +links: + - name: SOLR-17959 + url: https://issues.apache.org/jira/browse/SOLR-17959 +issues: + - 17959 diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java index 3281b5073ef..b4d4d945002 100644 --- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java +++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java @@ -97,6 +97,9 @@ public class ExtendedDismaxQParser extends QParser { /** If set to true, stopwords are removed from the query. */ public static String STOPWORDS = "stopwords"; + + /** If set to true, the stopword filter applies even if all terms are stopwords */ + public static String ALWAYS_STOPWORDS = "alwaysStopwords"; } private ExtendedDismaxConfiguration config; @@ -416,7 +419,7 @@ public class ExtendedDismaxQParser extends QParser { query = up.parse(mainUserQuery); if (shouldRemoveStopFilter(config, query)) { - // if the query was all stop words, remove none of them + // if the query was all stopwords, remove none of them (unless alwaysStopwords is set) up.setRemoveStopFilter(true); query = up.parse(mainUserQuery); } @@ -425,6 +428,8 @@ public class ExtendedDismaxQParser extends QParser { up.exceptions = false; } + // query may have become empty if it only contained tokenising characters or due to + // stopword removal if alwaysStopwords is set if (query == null) { return null; } @@ -447,11 +452,11 @@ public class ExtendedDismaxQParser extends QParser { /** * Determines if query should be re-parsed removing the stop filter. * - * @return true if there are stopwords configured and the parsed query was empty false in any - * other case. + * @return true if there are stopwords configured, the alwaysStopwords option hasn't been set and + * the parsed query was empty - return false in any other case. */ protected boolean shouldRemoveStopFilter(ExtendedDismaxConfiguration config, Query query) { - return config.stopwords && isEmpty(query); + return config.stopwords && !config.alwaysStopwords && isEmpty(query); } private String escapeUserQuery(List<Clause> clauses) { @@ -1706,6 +1711,8 @@ public class ExtendedDismaxQParser extends QParser { protected boolean stopwords; + protected boolean alwaysStopwords; + protected boolean mmAutoRelax; protected String altQ; @@ -1756,6 +1763,8 @@ public class ExtendedDismaxQParser extends QParser { stopwords = solrParams.getBool(DMP.STOPWORDS, true); + alwaysStopwords = solrParams.getBool(DMP.ALWAYS_STOPWORDS, false); + mmAutoRelax = solrParams.getBool(DMP.MM_AUTORELAX, false); altQ = solrParams.get(DisMaxParams.ALTQ); diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java index a0a1815d4dc..467d3efe101 100644 --- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java @@ -378,6 +378,12 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 { "q", "the big"), oner); + // test for ignoring stopwords when all query terms are stopwords + assertQ(req("defType", "edismax", "qf", "text_sw", "q", "the"), oner); + + // test for not ignoring stopwords when all query terms are stopwords and alwaysStopwords is set + assertQ(req("defType", "edismax", "qf", "text_sw", "q", "the", "alwaysStopwords", "true"), nor); + // searching for a literal colon value when clearly not used for a field assertQ( "expected doc is missing (using standard)", diff --git a/solr/solr-ref-guide/modules/indexing-guide/pages/filters.adoc b/solr/solr-ref-guide/modules/indexing-guide/pages/filters.adoc index ba90cb3c725..be8febb91a6 100644 --- a/solr/solr-ref-guide/modules/indexing-guide/pages/filters.adoc +++ b/solr/solr-ref-guide/modules/indexing-guide/pages/filters.adoc @@ -2941,6 +2941,8 @@ Spanish stemmer, Spanish words: This filter discards, or _stops_ analysis of, tokens that are on the given stop words list. A standard stop words list is included in the Solr `conf` directory, named `stopwords.txt`, which is appropriate for typical English language text. +Note that the xref:query-guide:edismax-query-parser.adoc[eDisMax] query parser disables the stop filter if all query terms are stop words unless its `alwaysStopwords` option is enabled. + *Factory class:* `solr.StopFilterFactory` *Arguments:* diff --git a/solr/solr-ref-guide/modules/query-guide/pages/edismax-query-parser.adoc b/solr/solr-ref-guide/modules/query-guide/pages/edismax-query-parser.adoc index 201507c4df1..b8cd45db46f 100644 --- a/solr/solr-ref-guide/modules/query-guide/pages/edismax-query-parser.adoc +++ b/solr/solr-ref-guide/modules/query-guide/pages/edismax-query-parser.adoc @@ -27,7 +27,7 @@ In addition to supporting all the DisMax query parser parameters, Extended DisMa * includes improved smart partial escaping in the case of syntax errors; fielded queries, +/-, and phrase queries are still supported in this mode. * improves proximity boosting by using word shingles; you do not need the query to match all words in the document before proximity boosting is applied. * includes advanced stopword handling: stopwords are not required in the mandatory part of the query but are still used in the proximity boosting part. -If a query consists of all stopwords, such as "to be or not to be", then all words are required. +If a query consists of all stopwords, such as "to be or not to be", then all words are required. (This feature may be disabled - see `alwaysStopwords` below.) * includes improved boost function: in Extended DisMax, the `boost` function is a multiplier xref:dismax-query-parser.adoc#bq-bf-shortcomings[rather than an addend], improving your boost results; the additive boost functions of DisMax (`bf` and `bq`) are also supported. * supports pure negative nested queries: queries such as `+foo (-foo)` will match all documents. * lets you specify which fields the end user is allowed to query, and to disallow direct fielded searches. @@ -109,6 +109,10 @@ If not specified, `ps` is used. A Boolean parameter indicating if the `StopFilterFactory` configured in the query analyzer should be respected when parsing the query. If this is set to `false`, then the `StopFilterFactory` in the query analyzer is ignored. +`alwaysStopwords`:: +A Boolean parameter indicating that the `StopFilterFactory` configured in the query analyzer should always be respected even if all query terms are stopwords. +This defaults to `false`, in which case if a query consists of all stopwords, such as "to be or not to be", then all words are required. + `uf`:: Specifies which schema fields the end user is allowed to explicitly query and to toggle whether embedded Solr queries are supported. This parameter supports wildcards.
