This is an automated email from the ASF dual-hosted git repository.
houston pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new e3e99d81799 SOLR-17714: Add a FuzzyQParser for all FuzzyQuery
customizations (#3281)
e3e99d81799 is described below
commit e3e99d81799fdce7ee18ef091d85529956783ff2
Author: Houston Putman <[email protected]>
AuthorDate: Wed Mar 26 12:48:14 2025 -0500
SOLR-17714: Add a FuzzyQParser for all FuzzyQuery customizations (#3281)
---
solr/CHANGES.txt | 2 +
.../apache/solr/parser/SolrQueryParserBase.java | 3 -
.../org/apache/solr/search/FuzzyQParserPlugin.java | 99 ++++++++++++++++++++++
.../java/org/apache/solr/search/QParserPlugin.java | 1 +
.../org/apache/solr/search/QueryEqualityTest.java | 22 +++++
.../modules/query-guide/pages/other-parsers.adoc | 67 +++++++++++++++
.../query-guide/pages/standard-query-parser.adoc | 2 +
7 files changed, 193 insertions(+), 3 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index fe0ebea90d9..e1fd5d1eebb 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -179,6 +179,8 @@ New Features
* SOLR-17632: Added update request processor to encode text to vector at
indexing time through external LLM services. (Alessandro Benedetti)
+* SOLR-17714: Added a FuzzyQParser to enable all FuzzyQuery customizations.
(Houston Putman, Siju Varghese)
+
Improvements
---------------------
* SOLR-15751: The v2 API now has parity with the v1 "COLSTATUS" and "segments"
APIs, which can be used to fetch detailed information about
diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
index 4160fd7a648..568856fcfe2 100644
--- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
+++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
@@ -1033,13 +1033,10 @@ public abstract class SolrQueryParserBase extends
QueryBuilder {
}
protected String analyzeIfMultitermTermText(String field, String part,
FieldType fieldType) {
-
if (part == null
|| !(fieldType instanceof TextField)
|| ((TextField) fieldType).getMultiTermAnalyzer() == null) return part;
- SchemaField sf = schema.getFieldOrNull((field));
- if (sf == null || !(fieldType instanceof TextField)) return part;
BytesRef out =
TextField.analyzeMultiTerm(field, part, ((TextField)
fieldType).getMultiTermAnalyzer());
return out == null ? part : out.utf8ToString();
diff --git a/solr/core/src/java/org/apache/solr/search/FuzzyQParserPlugin.java
b/solr/core/src/java/org/apache/solr/search/FuzzyQParserPlugin.java
new file mode 100644
index 00000000000..fa7816fb80b
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/FuzzyQParserPlugin.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.surround.parser.QueryParser;
+import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.TextField;
+
+/**
+ * Plugin for parsing and creating Lucene {@link FuzzyQuery}s with all
customizations available. All
+ * custom options, <code>maxEdits</code>, <code>prefixLength</code>,
<code>maxExpansions</code>, and
+ * <code>transpositions</code> are optional and use the {@link FuzzyQuery}
defaults if not provided.
+ *
+ * <p>Example: <code>{!fuzzy f=myfield maxEdits=1 prefixLength=3
maxExpansions=2}foobar</code>
+ *
+ * @see QueryParser
+ * @since 9.9
+ */
+public class FuzzyQParserPlugin extends QParserPlugin {
+ public static final String NAME = "fuzzy";
+
+ @Override
+ public QParser createParser(
+ String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest
req) {
+ return new FuzzyQParser(qstr, localParams, params, req);
+ }
+
+ static class FuzzyQParser extends QParser {
+ static final String MAX_EDITS_PARAM = "maxEdits";
+ static final String PREFIX_LENGTH_PARAM = "prefixLength";
+ static final String MAX_EXPANSIONS_PARAM = "maxExpansions";
+ static final String TRANSPOSITIONS_PARAM = "transpositions";
+
+ public FuzzyQParser(
+ String qstr, SolrParams localParams, SolrParams params,
SolrQueryRequest req) {
+ super(qstr, localParams, params, req);
+ }
+
+ @Override
+ public Query parse() throws SyntaxError {
+ String termStr = getParam(QueryParsing.V);
+ String field = getParam(QueryParsing.F);
+ termStr = analyzeIfMultitermTermText(field, termStr);
+ Term t = new Term(field, termStr);
+
+ String maxEditsRaw = getParam(MAX_EDITS_PARAM);
+ int maxEdits =
+ (maxEditsRaw != null) ? Integer.parseInt(maxEditsRaw) :
FuzzyQuery.defaultMaxEdits;
+ String prefixLengthRaw = getParam(PREFIX_LENGTH_PARAM);
+ int prefixLength =
+ (prefixLengthRaw != null)
+ ? Integer.parseInt(prefixLengthRaw)
+ : FuzzyQuery.defaultPrefixLength;
+ String maxExpansionsRaw = getParam(MAX_EXPANSIONS_PARAM);
+ int maxExpansions =
+ (maxExpansionsRaw != null)
+ ? Integer.parseInt(maxExpansionsRaw)
+ : FuzzyQuery.defaultMaxExpansions;
+ String transpositionsRaw = getParam(TRANSPOSITIONS_PARAM);
+ boolean transpositions =
+ (transpositionsRaw != null)
+ ? Boolean.parseBoolean(transpositionsRaw)
+ : FuzzyQuery.defaultTranspositions;
+
+ return new FuzzyQuery(t, maxEdits, prefixLength, maxExpansions,
transpositions);
+ }
+
+ protected String analyzeIfMultitermTermText(String field, String part) {
+ FieldType fieldType = req.getSchema().getFieldTypeNoEx(field);
+ if (part == null
+ || !(fieldType instanceof TextField)
+ || ((TextField) fieldType).getMultiTermAnalyzer() == null) return
part;
+
+ BytesRef out =
+ TextField.analyzeMultiTerm(field, part, ((TextField)
fieldType).getMultiTermAnalyzer());
+ return out == null ? part : out.utf8ToString();
+ }
+ }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
index f87b35b55dc..4917a083d60 100644
--- a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
@@ -91,6 +91,7 @@ public abstract class QParserPlugin implements
NamedListInitializedPlugin, SolrI
map.put(RankQParserPlugin.NAME, new RankQParserPlugin());
map.put(KnnQParserPlugin.NAME, new KnnQParserPlugin());
map.put(VectorSimilarityQParserPlugin.NAME, new
VectorSimilarityQParserPlugin());
+ map.put(FuzzyQParserPlugin.NAME, new FuzzyQParserPlugin());
standardPlugins = Collections.unmodifiableMap(map);
}
diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
index 130cf3f9469..8df761740ae 100644
--- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
+++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
@@ -20,6 +20,7 @@ import java.util.Arrays;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
+import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.tests.search.QueryUtils;
import org.apache.solr.SolrTestCaseJ4;
@@ -138,6 +139,27 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
}
}
+ public void testQueryFuzzy() throws Exception {
+ try (SolrQueryRequest req = req("myField", "foo_s")) {
+ assertQueryEquals("fuzzy", req, "{!fuzzy f=$myField}asdf", "{!fuzzy
f=foo_s}asdf");
+ assertQueryEquals("fuzzy", req, "{!fuzzy f=$myField}asdf", "{!fuzzy
f=foo_s v=asdf}");
+ FuzzyQuery q =
+ (FuzzyQuery)
+ assertQueryEqualsAndReturn("fuzzy", req, "{!fuzzy f=$myField
prefixLength=10}asdf");
+ assertEquals(10, q.getPrefixLength());
+ q =
+ (FuzzyQuery)
+ assertQueryEqualsAndReturn("fuzzy", req, "{!fuzzy f=$myField
maxEdits=1}asdf");
+ assertEquals(FuzzyQuery.defaultPrefixLength, q.getPrefixLength());
+ assertEquals(1, q.getMaxEdits());
+ q =
+ (FuzzyQuery)
+ assertQueryEqualsAndReturn(
+ "fuzzy", req, "{!fuzzy f=$myField maxExpansions=4
transpositions=false}asdf");
+ assertFalse(q.getTranspositions());
+ }
+ }
+
public void testQueryBoost() throws Exception {
SolrQueryRequest req = req("df", "foo_s", "myBoost", "sum(3,foo_i)");
try {
diff --git a/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc
b/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc
index abc99fbcf1b..245f905d72a 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc
@@ -381,6 +381,73 @@ In the second example, we're doing a sum calculation, and
then defining only val
For more information about range queries over functions, see Yonik Seeley's
introductory blog post
https://lucidworks.com/2009/07/06/ranges-over-functions-in-solr-14/[Ranges over
Functions in Solr 1.4].
+== Fuzzy Query Parser
+
+The `FuzzyQParser` extends the `QParserPlugin` and creates a fuzzy query for a
term.
+This query parser gives additional options on top of the standard
xref:standard-query-parser.adoc#fuzzy-searches[`field~2` fuzzy query notation].
+
+For more information about fuzzy queries and their options, please refer to
Lucene's
{lucene-javadocs}/core/org/apache/lucene/search/FuzzyQuery.html[`FuzzyQuery`
documentation].
+
+*Parameters*
+
+`f`::
++
+[%autowidth,frame=none]
+|===
+|Required |Default: none
+|===
++
+The field to run the fuzzy query over
+
+`maxEdits`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `2`
+|===
++
+The maximum number of edits allowed in a matching term.
+This option must be between `0` and `2`.
+
+`prefixLength`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `0`
+|===
++
+Length of the common (non-fuzzy) prefix.
+
+`maxExpansions`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `50`
+|===
++
+The maximum number of terms to match.
+
+`transpositions`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `true`
+|===
++
+`true` if transpositions should be treated as a primitive edit operation. If
this is `false`, comparisons will implement the classic Levenshtein algorithm.
+
+*Examples*
+
+[source,text]
+----
+{!fuzzy f=myfield}term
+----
+
+[source,text]
+----
+ q={!fuzzy f=myfield maxEdits=1 prefixLength=3 maxExpansions=1}foobar
+----
+
== Graph Query Parser
The `graph` query parser does a breadth first, cyclic aware, graph traversal
of all documents that are "reachable" from a starting set of root documents
identified by a wrapped query.
diff --git
a/solr/solr-ref-guide/modules/query-guide/pages/standard-query-parser.adoc
b/solr/solr-ref-guide/modules/query-guide/pages/standard-query-parser.adoc
index 56629af9a43..c59b9d802ba 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/standard-query-parser.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/standard-query-parser.adoc
@@ -154,6 +154,8 @@ This will match terms like roams & foam - but not foams
since it has an edit dis
IMPORTANT: In many cases, stemming (reducing terms to a common stem) can
produce similar effects to fuzzy searches and wildcard searches.
+If more advanced fuzzy search options are required, such as `prefixLength` or
`maxExpansions`, these can be enabled via the
xref:other-parsers.adoc#fuzzy-query-parser[fuzzy query parser].
+
=== Proximity Searches
A proximity search looks for terms that are within a specific distance from
one another.