This is an automated email from the ASF dual-hosted git repository.

houston pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/main by this push:
     new e3e99d81799 SOLR-17714: Add a FuzzyQParser for all FuzzyQuery 
customizations (#3281)
e3e99d81799 is described below

commit e3e99d81799fdce7ee18ef091d85529956783ff2
Author: Houston Putman <[email protected]>
AuthorDate: Wed Mar 26 12:48:14 2025 -0500

    SOLR-17714: Add a FuzzyQParser for all FuzzyQuery customizations (#3281)
---
 solr/CHANGES.txt                                   |  2 +
 .../apache/solr/parser/SolrQueryParserBase.java    |  3 -
 .../org/apache/solr/search/FuzzyQParserPlugin.java | 99 ++++++++++++++++++++++
 .../java/org/apache/solr/search/QParserPlugin.java |  1 +
 .../org/apache/solr/search/QueryEqualityTest.java  | 22 +++++
 .../modules/query-guide/pages/other-parsers.adoc   | 67 +++++++++++++++
 .../query-guide/pages/standard-query-parser.adoc   |  2 +
 7 files changed, 193 insertions(+), 3 deletions(-)

diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index fe0ebea90d9..e1fd5d1eebb 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -179,6 +179,8 @@ New Features
 
 * SOLR-17632: Added update request processor to encode text to vector at 
indexing time through external LLM services. (Alessandro Benedetti)
 
+* SOLR-17714: Added a FuzzyQParser to enable all FuzzyQuery customizations. 
(Houston Putman, Siju Varghese)
+
 Improvements
 ---------------------
 * SOLR-15751: The v2 API now has parity with the v1 "COLSTATUS" and "segments" 
APIs, which can be used to fetch detailed information about 
diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java 
b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
index 4160fd7a648..568856fcfe2 100644
--- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
+++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java
@@ -1033,13 +1033,10 @@ public abstract class SolrQueryParserBase extends 
QueryBuilder {
   }
 
   protected String analyzeIfMultitermTermText(String field, String part, 
FieldType fieldType) {
-
     if (part == null
         || !(fieldType instanceof TextField)
         || ((TextField) fieldType).getMultiTermAnalyzer() == null) return part;
 
-    SchemaField sf = schema.getFieldOrNull((field));
-    if (sf == null || !(fieldType instanceof TextField)) return part;
     BytesRef out =
         TextField.analyzeMultiTerm(field, part, ((TextField) 
fieldType).getMultiTermAnalyzer());
     return out == null ? part : out.utf8ToString();
diff --git a/solr/core/src/java/org/apache/solr/search/FuzzyQParserPlugin.java 
b/solr/core/src/java/org/apache/solr/search/FuzzyQParserPlugin.java
new file mode 100644
index 00000000000..fa7816fb80b
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/FuzzyQParserPlugin.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryparser.surround.parser.QueryParser;
+import org.apache.lucene.search.FuzzyQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.util.BytesRef;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.schema.TextField;
+
+/**
+ * Plugin for parsing and creating Lucene {@link FuzzyQuery}s with all 
customizations available. All
+ * custom options, <code>maxEdits</code>, <code>prefixLength</code>, 
<code>maxExpansions</code>, and
+ * <code>transpositions</code> are optional and use the {@link FuzzyQuery} 
defaults if not provided.
+ *
+ * <p>Example: <code>{!fuzzy f=myfield maxEdits=1 prefixLength=3 
maxExpansions=2}foobar</code>
+ *
+ * @see QueryParser
+ * @since 9.9
+ */
+public class FuzzyQParserPlugin extends QParserPlugin {
+  public static final String NAME = "fuzzy";
+
+  @Override
+  public QParser createParser(
+      String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest 
req) {
+    return new FuzzyQParser(qstr, localParams, params, req);
+  }
+
+  static class FuzzyQParser extends QParser {
+    static final String MAX_EDITS_PARAM = "maxEdits";
+    static final String PREFIX_LENGTH_PARAM = "prefixLength";
+    static final String MAX_EXPANSIONS_PARAM = "maxExpansions";
+    static final String TRANSPOSITIONS_PARAM = "transpositions";
+
+    public FuzzyQParser(
+        String qstr, SolrParams localParams, SolrParams params, 
SolrQueryRequest req) {
+      super(qstr, localParams, params, req);
+    }
+
+    @Override
+    public Query parse() throws SyntaxError {
+      String termStr = getParam(QueryParsing.V);
+      String field = getParam(QueryParsing.F);
+      termStr = analyzeIfMultitermTermText(field, termStr);
+      Term t = new Term(field, termStr);
+
+      String maxEditsRaw = getParam(MAX_EDITS_PARAM);
+      int maxEdits =
+          (maxEditsRaw != null) ? Integer.parseInt(maxEditsRaw) : 
FuzzyQuery.defaultMaxEdits;
+      String prefixLengthRaw = getParam(PREFIX_LENGTH_PARAM);
+      int prefixLength =
+          (prefixLengthRaw != null)
+              ? Integer.parseInt(prefixLengthRaw)
+              : FuzzyQuery.defaultPrefixLength;
+      String maxExpansionsRaw = getParam(MAX_EXPANSIONS_PARAM);
+      int maxExpansions =
+          (maxExpansionsRaw != null)
+              ? Integer.parseInt(maxExpansionsRaw)
+              : FuzzyQuery.defaultMaxExpansions;
+      String transpositionsRaw = getParam(TRANSPOSITIONS_PARAM);
+      boolean transpositions =
+          (transpositionsRaw != null)
+              ? Boolean.parseBoolean(transpositionsRaw)
+              : FuzzyQuery.defaultTranspositions;
+
+      return new FuzzyQuery(t, maxEdits, prefixLength, maxExpansions, 
transpositions);
+    }
+
+    protected String analyzeIfMultitermTermText(String field, String part) {
+      FieldType fieldType = req.getSchema().getFieldTypeNoEx(field);
+      if (part == null
+          || !(fieldType instanceof TextField)
+          || ((TextField) fieldType).getMultiTermAnalyzer() == null) return 
part;
+
+      BytesRef out =
+          TextField.analyzeMultiTerm(field, part, ((TextField) 
fieldType).getMultiTermAnalyzer());
+      return out == null ? part : out.utf8ToString();
+    }
+  }
+}
diff --git a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java 
b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
index f87b35b55dc..4917a083d60 100644
--- a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
@@ -91,6 +91,7 @@ public abstract class QParserPlugin implements 
NamedListInitializedPlugin, SolrI
     map.put(RankQParserPlugin.NAME, new RankQParserPlugin());
     map.put(KnnQParserPlugin.NAME, new KnnQParserPlugin());
     map.put(VectorSimilarityQParserPlugin.NAME, new 
VectorSimilarityQParserPlugin());
+    map.put(FuzzyQParserPlugin.NAME, new FuzzyQParserPlugin());
 
     standardPlugins = Collections.unmodifiableMap(map);
   }
diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java 
b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
index 130cf3f9469..8df761740ae 100644
--- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
+++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
@@ -20,6 +20,7 @@ import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
+import org.apache.lucene.search.FuzzyQuery;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.tests.search.QueryUtils;
 import org.apache.solr.SolrTestCaseJ4;
@@ -138,6 +139,27 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
     }
   }
 
+  public void testQueryFuzzy() throws Exception {
+    try (SolrQueryRequest req = req("myField", "foo_s")) {
+      assertQueryEquals("fuzzy", req, "{!fuzzy f=$myField}asdf", "{!fuzzy 
f=foo_s}asdf");
+      assertQueryEquals("fuzzy", req, "{!fuzzy f=$myField}asdf", "{!fuzzy 
f=foo_s v=asdf}");
+      FuzzyQuery q =
+          (FuzzyQuery)
+              assertQueryEqualsAndReturn("fuzzy", req, "{!fuzzy f=$myField 
prefixLength=10}asdf");
+      assertEquals(10, q.getPrefixLength());
+      q =
+          (FuzzyQuery)
+              assertQueryEqualsAndReturn("fuzzy", req, "{!fuzzy f=$myField 
maxEdits=1}asdf");
+      assertEquals(FuzzyQuery.defaultPrefixLength, q.getPrefixLength());
+      assertEquals(1, q.getMaxEdits());
+      q =
+          (FuzzyQuery)
+              assertQueryEqualsAndReturn(
+                  "fuzzy", req, "{!fuzzy f=$myField maxExpansions=4 
transpositions=false}asdf");
+      assertFalse(q.getTranspositions());
+    }
+  }
+
   public void testQueryBoost() throws Exception {
     SolrQueryRequest req = req("df", "foo_s", "myBoost", "sum(3,foo_i)");
     try {
diff --git a/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc 
b/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc
index abc99fbcf1b..245f905d72a 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/other-parsers.adoc
@@ -381,6 +381,73 @@ In the second example, we're doing a sum calculation, and 
then defining only val
 
 For more information about range queries over functions, see Yonik Seeley's 
introductory blog post 
https://lucidworks.com/2009/07/06/ranges-over-functions-in-solr-14/[Ranges over 
Functions in Solr 1.4].
 
+== Fuzzy Query Parser
+
+The `FuzzyQParser` extends the `QParserPlugin` and creates a fuzzy query for a 
term.
+This query parser gives additional options on top of the standard 
xref:standard-query-parser.adoc#fuzzy-searches[`field~2` fuzzy query notation].
+
+For more information about fuzzy queries and their options, please refer to 
Lucene's 
{lucene-javadocs}/core/org/apache/lucene/search/FuzzyQuery.html[`FuzzyQuery` 
documentation].
+
+*Parameters*
+
+`f`::
++
+[%autowidth,frame=none]
+|===
+|Required |Default: none
+|===
++
+The field to run the fuzzy query over
+
+`maxEdits`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `2`
+|===
++
+The maximum number of edits allowed in a matching term.
+This option must be between `0` and `2`.
+
+`prefixLength`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `0`
+|===
++
+Length of the common (non-fuzzy) prefix.
+
+`maxExpansions`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `50`
+|===
++
+The maximum number of terms to match.
+
+`transpositions`::
++
+[%autowidth,frame=none]
+|===
+|Optional |Default: `true`
+|===
++
+`true` if transpositions should be treated as a primitive edit operation. If 
this is `false`, comparisons will implement the classic Levenshtein algorithm.
+
+*Examples*
+
+[source,text]
+----
+{!fuzzy f=myfield}term
+----
+
+[source,text]
+----
+ q={!fuzzy f=myfield maxEdits=1 prefixLength=3 maxExpansions=1}foobar
+----
+
 == Graph Query Parser
 
 The `graph` query parser does a breadth first, cyclic aware, graph traversal 
of all documents that are "reachable" from a starting set of root documents 
identified by a wrapped query.
diff --git 
a/solr/solr-ref-guide/modules/query-guide/pages/standard-query-parser.adoc 
b/solr/solr-ref-guide/modules/query-guide/pages/standard-query-parser.adoc
index 56629af9a43..c59b9d802ba 100644
--- a/solr/solr-ref-guide/modules/query-guide/pages/standard-query-parser.adoc
+++ b/solr/solr-ref-guide/modules/query-guide/pages/standard-query-parser.adoc
@@ -154,6 +154,8 @@ This will match terms like roams & foam - but not foams 
since it has an edit dis
 
 IMPORTANT: In many cases, stemming (reducing terms to a common stem) can 
produce similar effects to fuzzy searches and wildcard searches.
 
+If more advanced fuzzy search options are required, such as `prefixLength` or 
`maxExpansions`, these can be enabled via the 
xref:other-parsers.adoc#fuzzy-query-parser[fuzzy query parser].
+
 === Proximity Searches
 
 A proximity search looks for terms that are within a specific distance from 
one another.

Reply via email to