Author: klaas
Date: Mon Nov 5 11:39:14 2007
New Revision: 592129
URL: http://svn.apache.org/viewvc?rev=592129&view=rev
Log:
SOLR-395: spell checker upgrade
Added:
lucene/solr/trunk/src/java/org/apache/solr/util/HiFrequencyDictionary.java
(with props)
lucene/solr/trunk/src/test/org/apache/solr/handler/SpellCheckerRequestHandlerTest.java
(with props)
lucene/solr/trunk/src/test/test-files/solr/conf/schema-spellchecker.xml
(with props)
lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig-spellchecker.xml
(with props)
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
Modified: lucene/solr/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=592129&r1=592128&r2=592129&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Mon Nov 5 11:39:14 2007
@@ -136,12 +136,18 @@
to the detailed field information from the solrj client API.
(Grant Ingersoll via ehatcher)
-26. SOLR-334L Pluggable query parsers. Allows specification of query
+26. SOLR-334: Pluggable query parsers. Allows specification of query
type and arguments as a prefix on a query string. (yonik)
-27. SOLR-351L External Value Source. An external file may be used
+27. SOLR-351: External Value Source. An external file may be used
to specify the values of a field, currently usable as
a ValueSource in a FunctionQuery. (yonik)
+
+28. SOLR-395: Many new features for the spell checker implementation, including
+ an extended response mode with much richer output, multi-word spell
checking,
+ and a bevy of new and renamed options (see the wiki).
+ (Mike Krimerman, Scott Taber via klaas).
+
Changes in runtime behavior
Modified:
lucene/solr/trunk/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java?rev=592129&r1=592128&r2=592129&view=diff
==============================================================================
---
lucene/solr/trunk/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
(original)
+++
lucene/solr/trunk/src/java/org/apache/solr/handler/SpellCheckerRequestHandler.java
Mon Nov 5 11:39:14 2007
@@ -18,6 +18,7 @@
package org.apache.solr.handler;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.LuceneDictionary;
@@ -30,7 +31,9 @@
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.core.SolrCore;
+import org.apache.solr.util.HiFrequencyDictionary;
import java.io.File;
import java.io.IOException;
@@ -42,6 +45,141 @@
* Takes a string (e.g. a query string) as the value of the "q" parameter
* and looks up alternative spelling suggestions in the spellchecker.
* The spellchecker used by this handler is the Lucene contrib SpellChecker.
+ *
+<style>
+pre.code
+{
+ border: 1pt solid #AEBDCC;
+ background-color: #F3F5F7;
+ padding: 5pt;
+ font-family: courier, monospace;
+ white-space: pre;
+ // begin css 3 or browser specific rules - do not remove!
+ //see: http://forums.techguy.org/archive/index.php/t-249849.html
+ white-space: pre-wrap;
+ word-wrap: break-word;
+ white-space: -moz-pre-wrap;
+ white-space: -pre-wrap;
+ white-space: -o-pre-wrap;
+ // end css 3 or browser specific rules
+}
+
+</style>
+ *
+ * <p>The results identifies the original words echoing it as an entry with
the
+ * name of "words" and original word value. It
+ * also identifies if the requested "words" is contained in the index through
+ * the use of the exist true/false name value. Examples of these output
+ * parameters in the standard output format is as follows:</p>
+ * <pre class="code">
+<str name="words">facial</str>
+<str name="exist">true</str> </pre>
+ *
+ * <p>If a query string parameter of "multiWords" is used, then each word
within the
+ * "q" parameter (seperated by a space or +) will
+ * be iterated through the spell checker and will be wrapped in an
+ * NamedList. Each word will then get its own set of results: words, exists,
and
+ * suggestions.</p>
+ *
+ * <p>Examples of the use of the standard ouput (XML) without and with the
+ * use of the "multiWords" parameter are as follows.</p>
+ *
+ * <p> The following URL
+ * examples were configured with the solr.SpellCheckerRequestHandler
+ * named as "/spellchecker".</p>
+ *
+ * <p>Without the use of "extendedResults" and one word
+ * spelled correctly: facial </p>
+ * <pre
class="code">http://.../spellchecker?indent=on&onlyMorePopular=true&accuracy=.6&suggestionCount=20&q=facial</pre>
+ * <pre class="code">
+<?xml version="1.0" encoding="UTF-8"?>
+<response>
+
+<lst name="responseHeader">
+ <int name="status">0</int>
+ <int name="QTime">6</int>
+</lst>
+<str name="words">facial</str>
+<str name="exist">true</str>
+<arr name="suggestions">
+ <str>faciale</str>
+ <str>faucial</str>
+ <str>fascial</str>
+ <str>facing</str>
+ <str>faciei</str>
+ <str>facialis</str>
+ <str>social</str>
+ <str>facile</str>
+ <str>spacial</str>
+ <str>glacial</str>
+ <str>marcial</str>
+ <str>facies</str>
+ <str>facio</str>
+</arr>
+</response> </pre>
+ *
+ * <p>Without the use of "extendedResults" and two words,
+ * one spelled correctly and one misspelled: facial salophosphoprotein </p>
+ * <pre
class="code">http://.../spellchecker?indent=on&onlyMorePopular=true&accuracy=.6&suggestionCount=20&q=facial+salophosphoprotein</pre>
+ * <pre class="code">
+<?xml version="1.0" encoding="UTF-8"?>
+<response>
+
+<lst name="responseHeader">
+ <int name="status">0</int>
+ <int name="QTime">18</int>
+</lst>
+<str name="words">facial salophosphoprotein</str>
+<str name="exist">false</str>
+<arr name="suggestions">
+ <str>sialophosphoprotein</str>
+</arr>
+</response> </pre>
+ *
+ *
+ * <p>With the use of "extendedResults" and two words,
+ * one spelled correctly and one misspelled: facial salophosphoprotein </p>
+ * <pre
class="code">http://.../spellchecker?indent=on&onlyMorePopular=true&accuracy=.6&suggestionCount=20&extendedResults=true&q=facial+salophosphoprotein</pre>
+ * <pre class="code">
+<?xml version="1.0" encoding="UTF-8"?>
+<response>
+
+<lst name="responseHeader">
+ <int name="status">0</int>
+ <int name="QTime">23</int>
+</lst>
+<lst name="result">
+ <lst name="facial">
+ <int name="frequency">1</int>
+ <lst name="suggestions">
+ <lst name="faciale"><int
name="frequency">1</int></lst>
+ <lst name="faucial"><int
name="frequency">1</int></lst>
+ <lst name="fascial"><int
name="frequency">1</int></lst>
+ <lst name="facing"><int
name="frequency">1</int></lst>
+ <lst name="faciei"><int
name="frequency">1</int></lst>
+ <lst name="facialis"><int
name="frequency">1</int></lst>
+ <lst name="social"><int
name="frequency">1</int></lst>
+ <lst name="facile"><int
name="frequency">1</int></lst>
+ <lst name="spacial"><int
name="frequency">1</int></lst>
+ <lst name="glacial"><int
name="frequency">1</int></lst>
+ <lst name="marcial"><int
name="frequency">1</int></lst>
+ <lst name="facies"><int
name="frequency">1</int></lst>
+ <lst name="facio"><int
name="frequency">1</int></lst>
+ </lst>
+ </lst>
+ <lst name="salophosphoprotein">
+ <int name="frequency">0</int>
+ <lst name="suggestions">
+ <lst name="sialophosphoprotein"><int
name="frequency">1</int></lst>
+ <lst name="phosphoprotein"><int
name="frequency">1</int></lst>
+ <lst name="phosphoproteins"><int
name="frequency">1</int></lst>
+ <lst name="alphalipoprotein"><int
name="frequency">1</int></lst>
+ </lst>
+ </lst>
+</lst>
+</response> </pre>
+
+ *
* @see <a href="http://wiki.apache.org/jakarta-lucene/SpellChecker">The
Lucene Spellchecker documentation</a>
*
*/
@@ -64,22 +202,37 @@
* return only the words more frequent than this.
*
*/
- private boolean onlyMorePopular = false;
private Directory spellcheckerIndexDir = new RAMDirectory();
private String dirDescription = "(ramdir)";
private String termSourceField;
+
+ private static final String PREFIX = "sp.";
+ private static final String QUERY_PREFIX = PREFIX + "query.";
+ private static final String DICTIONARY_PREFIX = PREFIX + "dictionary.";
+
+ private static final String SOURCE_FIELD = DICTIONARY_PREFIX +
"termSourceField";
+ private static final String INDEX_DIR = DICTIONARY_PREFIX + "indexDir";
+ private static final String THRESHOLD = DICTIONARY_PREFIX + "threshold";
+
+ private static final String ACCURACY = QUERY_PREFIX + "accuracy";
+ private static final String SUGGESTIONS = QUERY_PREFIX + "suggestionCount";
+ private static final String POPULAR = QUERY_PREFIX + "onlyMorePopular";
+ private static final String EXTENDED = QUERY_PREFIX + "extendedResults";
+
private static final float DEFAULT_ACCURACY = 0.5f;
- private static final int DEFAULT_NUM_SUGGESTIONS = 1;
+ private static final int DEFAULT_SUGGESTION_COUNT = 1;
private static final boolean DEFAULT_MORE_POPULAR = false;
-
+ private static final boolean DEFAULT_EXTENDED_RESULTS = false;
+ private static final float DEFAULT_DICTIONARY_THRESHOLD = 0.0f;
+
public void init(NamedList args) {
super.init(args);
SolrParams p = SolrParams.toSolrParams(args);
- termSourceField = p.get("termSourceField");
+ termSourceField = p.get(SOURCE_FIELD, p.get("termSourceField"));
try {
- String dir = p.get("spellcheckerIndexDir");
+ String dir = p.get(INDEX_DIR, p.get("spellcheckerIndexDir"));
if (null != dir) {
File f = new File(dir);
if ( ! f.isAbsolute() ) {
@@ -97,6 +250,10 @@
}
}
+ /**
+ * Processes the following query string parameters: q, multiWords, cmd
rebuild,
+ * cmd reopen, accuracy, suggestionCount, restrictToField, and
onlyMorePopular.
+ */
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp)
throws Exception {
SolrParams p = req.getParams();
@@ -115,47 +272,90 @@
}
}
+ // empty query string
+ if (null == words || "".equals(words.trim())) {
+ return;
+ }
+
IndexReader indexReader = null;
String suggestionField = null;
Float accuracy;
int numSug;
+ boolean onlyMorePopular;
+ boolean extendedResults;
try {
- accuracy = p.getFloat("accuracy", DEFAULT_ACCURACY);
+ accuracy = p.getFloat(ACCURACY, p.getFloat("accuracy",
DEFAULT_ACCURACY));
spellChecker.setAccuracy(accuracy);
} catch (NumberFormatException e) {
throw new RuntimeException("Accuracy must be a valid positive float", e);
}
try {
- numSug = p.getInt("suggestionCount", DEFAULT_NUM_SUGGESTIONS);
+ numSug = p.getInt(SUGGESTIONS, p.getInt("suggestionCount",
DEFAULT_SUGGESTION_COUNT));
} catch (NumberFormatException e) {
throw new RuntimeException("Spelling suggestion count must be a valid
positive integer", e);
}
try {
- onlyMorePopular = p.getBool("onlyMorePopular", DEFAULT_MORE_POPULAR);
- } catch (NumberFormatException e) {
+ onlyMorePopular = p.getBool(POPULAR, DEFAULT_MORE_POPULAR);
+ } catch (SolrException e) {
throw new RuntimeException("'Only more popular' must be a valid
boolean", e);
}
+ try {
+ extendedResults = p.getBool(EXTENDED, DEFAULT_EXTENDED_RESULTS);
+ } catch (SolrException e) {
+ throw new RuntimeException("'Extended results' must be a valid boolean",
e);
+ }
- // when searching for more popular, a non null index-reader and
+ // when searching for more popular, a non null index-reader and
// restricted-field are required
- if (onlyMorePopular) {
+ if (onlyMorePopular || extendedResults) {
indexReader = req.getSearcher().getReader();
suggestionField = termSourceField;
}
+ if (extendedResults) {
- if (null != words && !"".equals(words.trim())) {
+ SimpleOrderedMap<Object> results = new SimpleOrderedMap<Object>();
+ String[] wordz = words.split(" ");
+ for (String word : wordz)
+ {
+ SimpleOrderedMap<Object> nl = new SimpleOrderedMap<Object>();
+ nl.add("frequency", indexReader.docFreq(new Term(suggestionField,
word)));
+ String[] suggestions =
+ spellChecker.suggestSimilar(word, numSug,
+ indexReader, suggestionField, onlyMorePopular);
+
+ // suggestion array
+ NamedList<Object> sa = new NamedList<Object>();
+ for (int i=0; i<suggestions.length; i++) {
+ // suggestion item
+ SimpleOrderedMap<Object> si = new SimpleOrderedMap<Object>();
+ si.add("frequency", indexReader.docFreq(new Term(termSourceField,
suggestions[i])));
+ sa.add(suggestions[i], si);
+ }
+ nl.add("suggestions", sa);
+ results.add(word, nl);
+ }
+ rsp.add( "result", results );
+
+ } else {
+ rsp.add("words", words);
+ if (spellChecker.exist(words)) {
+ rsp.add("exist","true");
+ } else {
+ rsp.add("exist","false");
+ }
String[] suggestions =
spellChecker.suggestSimilar(words, numSug,
indexReader, suggestionField,
onlyMorePopular);
-
+
rsp.add("suggestions", Arrays.asList(suggestions));
}
}
/** Rebuilds the SpellChecker index using values from the
<code>termSourceField</code> from the
* index pointed to by the current [EMAIL PROTECTED] IndexSearcher}.
+ * Any word appearing in less that thresh documents will not be added to the
spellcheck index.
*/
private void rebuild(SolrQueryRequest req) throws IOException, SolrException
{
if (null == termSourceField) {
@@ -163,8 +363,15 @@
(SolrException.ErrorCode.SERVER_ERROR, "can't rebuild spellchecker
index without termSourceField configured");
}
+ Float threshold;
+ try {
+ threshold = req.getParams().getFloat("sp.dictionary.threshold",
DEFAULT_DICTIONARY_THRESHOLD);
+ } catch (NumberFormatException e) {
+ throw new RuntimeException("Threshold must be a valid positive float",
e);
+ }
+
IndexReader indexReader = req.getSearcher().getReader();
- Dictionary dictionary = new LuceneDictionary(indexReader, termSourceField);
+ Dictionary dictionary = new HiFrequencyDictionary(indexReader,
termSourceField, threshold);
spellChecker.clearIndex();
spellChecker.indexDictionary(dictionary);
reopen();
Added:
lucene/solr/trunk/src/java/org/apache/solr/util/HiFrequencyDictionary.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/util/HiFrequencyDictionary.java?rev=592129&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/util/HiFrequencyDictionary.java
(added)
+++ lucene/solr/trunk/src/java/org/apache/solr/util/HiFrequencyDictionary.java
Mon Nov 5 11:39:14 2007
@@ -0,0 +1,140 @@
+package org.apache.solr.util;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.Iterator;
+
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.TermEnum;
+import org.apache.lucene.search.spell.Dictionary;
+
+/**
+ * Hi Frequency Dictionary: terms taken from the given field
+ * of a Lucene index, which appear in a number of documents
+ * above a given threshold.
+ *
+ * When using IndexReader.terms(Term) the code must not call next() on TermEnum
+ * as the first call to TermEnum, see:
http://issues.apache.org/jira/browse/LUCENE-6
+ *
+ * Threshold is a value in [0..1] representing the minimum
+ * number of documents (of the total) where a term should appear.
+ *
+ * @author Mike Krimerman
+ *
+ * Based on LuceneDictionary, by
+ * @author Nicolas Maisonneuve
+ * @author Christian Mallwitz
+ */
+public class HiFrequencyDictionary implements Dictionary {
+ private IndexReader reader;
+ private String field;
+ private float thresh;
+
+ public HiFrequencyDictionary(IndexReader reader, String field, float thresh)
{
+ this.reader = reader;
+ this.field = field.intern();
+ this.thresh = thresh;
+ }
+
+ public final Iterator getWordsIterator() {
+ return new HiFrequencyIterator();
+ }
+
+
+ final class HiFrequencyIterator implements Iterator {
+ private TermEnum termEnum;
+ private Term actualTerm;
+ private boolean hasNextCalled;
+ private int minNumDocs;
+
+ HiFrequencyIterator() {
+ try {
+ termEnum = reader.terms(new Term(field, ""));
+ minNumDocs = (int)(thresh * (float)reader.numDocs());
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private boolean isFrequent(Term term) {
+ try {
+ return reader.docFreq(term) >= minNumDocs;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public Object next() {
+ if (!hasNextCalled) {
+ hasNext();
+ }
+ hasNextCalled = false;
+
+ try {
+ termEnum.next();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ return (actualTerm != null) ? actualTerm.text() : null;
+ }
+
+ public boolean hasNext() {
+ if (hasNextCalled) {
+ return actualTerm != null;
+ }
+ hasNextCalled = true;
+
+ do {
+ actualTerm = termEnum.term();
+
+ // if there are no words return false
+ if (actualTerm == null) {
+ return false;
+ }
+
+ String currentField = actualTerm.field();
+
+ // if the next word doesn't have the same field return false
+ if (currentField != field) {
+ actualTerm = null;
+ return false;
+ }
+
+ // got a valid term, does it pass the threshold?
+ if (isFrequent(actualTerm)) {
+ return true;
+ }
+
+ // term not up to threshold
+ try {
+ termEnum.next();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+
+ } while (true);
+ }
+
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+ }
+}
Propchange:
lucene/solr/trunk/src/java/org/apache/solr/util/HiFrequencyDictionary.java
------------------------------------------------------------------------------
svn:eol-style = native
Added:
lucene/solr/trunk/src/test/org/apache/solr/handler/SpellCheckerRequestHandlerTest.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/handler/SpellCheckerRequestHandlerTest.java?rev=592129&view=auto
==============================================================================
---
lucene/solr/trunk/src/test/org/apache/solr/handler/SpellCheckerRequestHandlerTest.java
(added)
+++
lucene/solr/trunk/src/test/org/apache/solr/handler/SpellCheckerRequestHandlerTest.java
Mon Nov 5 11:39:14 2007
@@ -0,0 +1,473 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.handler;
+
+import org.apache.solr.util.AbstractSolrTestCase;
+
+/**
+ * This is a test case to test the SpellCheckerRequestHandler class.
+ * It tests:
+ * <ul>
+ * <li>The generation of the spell checkers list with a 10 words</li>
+ * <li>The identification of the word that was being spell checked</li>
+ * <li>The confirmation if the word exists or not in the index</li>
+ * <li>The suggested list of a correctly and incorrectly spelled words</li>
+ * <li>The suggestions for both correct and incorrect words</li>
+ * <li>The limitation on the number of suggestions with the
+ * suggestionCount parameter</li>
+ * <li>The usage of the parameter multiWords</li>
+ * </ul>
+ *
+ * Notes/Concerns about this Test Case:
+ * <ul>
+ * <li>This is my first test case for a Solr Handler. As such I am not
+ * familiar with the AbstractSolrTestCase and as such I am not
+ * 100% these test cases will work under the same for each person
+ * who runs the test cases (see next note).</li>
+ * <li>The order of the arrays (arr) may not be consistant on other
+ * systems or different runs, as such these test cases may fail?</li>
+ * <li>Note: I changed //arr/str[1][.='cart'] to //arr/str[.='cart'] and it
+ * appears to work.</li>
+ * <li>The two notations appear to successfully test for the same thing:
+ * "//[EMAIL PROTECTED]'result']/[EMAIL PROTECTED]'word']/[EMAIL
PROTECTED]'words'][.='cat']"
+ * and "//[EMAIL PROTECTED]'words'][.='cat']" which I would think //
would indicate
+ * a root node.</li>
+ * </ul>
+ */
+public class SpellCheckerRequestHandlerTest
+ extends AbstractSolrTestCase
+{
+
+ @Override
+ public String getSchemaFile() { return "solr/conf/schema-spellchecker.xml";
}
+
+ @Override
+ public String getSolrConfigFile() { return
"solr/conf/solrconfig-spellchecker.xml"; }
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+
+
+ }
+
+ private void buildSpellCheckIndex()
+ {
+ lrf = h.getRequestFactory("spellchecker", 0, 20 );
+ lrf.args.put("version","2.0");
+ lrf.args.put("sp.query.accuracy",".9");
+
+ assertU("Add some words to the Spell Check Index:",
+ adoc("id", "100",
+ "spell", "solr"));
+ assertU(adoc("id", "101",
+ "spell", "cat"));
+ assertU(adoc("id", "102",
+ "spell", "cart"));
+ assertU(adoc("id", "103",
+ "spell", "carp"));
+ assertU(adoc("id", "104",
+ "spell", "cant"));
+ assertU(adoc("id", "105",
+ "spell", "catnip"));
+ assertU(adoc("id", "106",
+ "spell", "cattails"));
+ assertU(adoc("id", "107",
+ "spell", "cod"));
+ assertU(adoc("id", "108",
+ "spell", "corn"));
+ assertU(adoc("id", "109",
+ "spell", "cot"));
+
+ assertU(commit());
+ assertU(optimize());
+
+ lrf.args.put("cmd","rebuild");
+ assertQ("Need to first build the index:",
+ req("cat")
+ ,"//[EMAIL PROTECTED]'cmdExecuted'][.='rebuild']"
+ ,"//[EMAIL PROTECTED]'words'][.='cat']"
+ ,"//[EMAIL PROTECTED]'exist'][.='true']"
+ // ,"//[EMAIL PROTECTED]'suggestions'][.='']"
+ );
+ lrf.args.clear();
+
+ }
+
+ /**
+ * Test for correct spelling of a single word at various accuracy levels
+ * to see how the suggestions vary.
+ */
+ public void testSpellCheck_01_correctWords() {
+
+ buildSpellCheckIndex();
+
+ lrf = h.getRequestFactory("spellchecker", 0, 20 );
+ lrf.args.put("version","2.0");
+
+ lrf.args.put("sp.query.accuracy",".9");
+ assertQ("Failed to spell check",
+ req("cat")
+ ,"//[EMAIL PROTECTED]'words'][.='cat']"
+ ,"//[EMAIL PROTECTED]'exist'][.='true']"
+ );
+
+ lrf.args.put("sp.query.accuracy",".4");
+ assertQ("Failed to spell check",
+ req("cat")
+ ,"//[EMAIL PROTECTED]'words'][.='cat']"
+ ,"//[EMAIL PROTECTED]'exist'][.='true']"
+ ,"//arr/str[.='cot']"
+ ,"//arr/str[.='cart']"
+// ,"//arr/str[1][.='cot']"
+// ,"//arr/str[2][.='cart']"
+ );
+
+ lrf.args.put("sp.query.accuracy",".0");
+ assertQ("Failed to spell check",
+ req("cat")
+ ,"//[EMAIL PROTECTED]'words'][.='cat']"
+ ,"//[EMAIL PROTECTED]'exist'][.='true']"
+ ,"//arr/str[.='cart']"
+ ,"//arr/str[.='cot']"
+ ,"//arr/str[.='carp']"
+ ,"//arr/str[.='cod']"
+ ,"//arr/str[.='corn']"
+ );
+ }
+
+ /**
+ * Test for correct spelling of a single word at various accuracy levels
+ * to see how the suggestions vary.
+ */
+ public void testSpellCheck_02_incorrectWords() {
+
+ buildSpellCheckIndex();
+
+ lrf = h.getRequestFactory("spellchecker", 0, 20 );
+ lrf.args.put("version","2.0");
+ lrf.args.put("sp.query.accuracy",".9");
+
+ assertQ("Confirm the index is still valid",
+ req("cat")
+ ,"//[EMAIL PROTECTED]'words'][.='cat']"
+ ,"//[EMAIL PROTECTED]'exist'][.='true']"
+ );
+
+
+ assertQ("Failed to spell check",
+ req("coat")
+ ,"//[EMAIL PROTECTED]'words'][.='coat']"
+ ,"//[EMAIL PROTECTED]'exist'][.='false']"
+ ,"//[EMAIL PROTECTED]'suggestions'][.='']"
+ );
+
+
+ lrf.args.put("sp.query.accuracy",".2");
+ assertQ("Failed to spell check",
+ req("coat")
+ ,"//[EMAIL PROTECTED]'words'][.='coat']"
+ ,"//[EMAIL PROTECTED]'exist'][.='false']"
+ ,"//arr/str[.='cot']"
+ ,"//arr/str[.='cat']"
+ ,"//arr/str[.='corn']"
+ ,"//arr/str[.='cart']"
+ ,"//arr/str[.='cod']"
+ ,"//arr/str[.='solr']"
+ ,"//arr/str[.='carp']"
+ );
+
+ lrf.args.put("sp.query.suggestionCount", "2");
+ lrf.args.put("sp.query.accuracy",".2");
+ assertQ("Failed to spell check",
+ req("coat")
+ ,"//[EMAIL PROTECTED]'words'][.='coat']"
+ ,"//[EMAIL PROTECTED]'exist'][.='false']"
+ ,"//arr/str[.='cot']"
+ ,"//arr/str[.='cat']"
+ );
+ }
+
+ /**
+ * Test for correct spelling of a single word at various accuracy levels
+ * to see how the suggestions vary.
+ */
+ public void testSpellCheck_03_multiWords_correctWords() {
+
+ buildSpellCheckIndex();
+
+ lrf = h.getRequestFactory("spellchecker", 0, 20 );
+ lrf.args.put("version","2.0");
+ lrf.args.put("sp.query.accuracy",".9");
+
+ assertQ("Confirm the index is still valid",
+ req("cat")
+ ,"//[EMAIL PROTECTED]'words'][.='cat']"
+ ,"//[EMAIL PROTECTED]'exist'][.='true']"
+ );
+
+
+ // Enable multiWords formatting:
+ lrf.args.put("sp.query.extendedResults", "true");
+
+
+ assertQ("Failed to spell check",
+ req("cat")
+ ,"//[EMAIL PROTECTED]'cat']"
+ ,"//[EMAIL PROTECTED]'cat']/[EMAIL PROTECTED]'frequency'][.>0]"
+ ,"//[EMAIL PROTECTED]'cat']/[EMAIL PROTECTED]'suggestions' and
count(lst)=0]"
+ );
+
+
+ // Please note that the following produces the following XML structure.
+ // <response>
+ // <responseHeader>
+ // <status>0</status><QTime>0</QTime>
+ // </responseHeader>
+ // <lst name="result">
+ // <lst name="cat">
+ // <int name="frequency">1</int>
+ // <lst name="suggestions">
+ // <lst name="cart"><int name="frequency">1</int></lst>
+ // <lst name="cot"><int name="frequency">1</int></lst>
+ // <lst name="cod"><int name="frequency">1</int></lst>
+ // <lst name="carp"><int name="frequency">1</int></lst>
+ // </lst>
+ // </lst>
+ // </lst>
+ // </response>
+
+
+ lrf.args.put("sp.query.accuracy",".2");
+ assertQ("Failed to spell check",
+ req("cat")
+ ,"//[EMAIL PROTECTED]'cat']"
+ ,"//[EMAIL PROTECTED]'cat']/[EMAIL PROTECTED]'frequency'][.>0]"
+ ,"//[EMAIL PROTECTED]'cat']/[EMAIL PROTECTED]'suggestions']/[EMAIL
PROTECTED]'cart']/[EMAIL PROTECTED]'frequency'][.>0]"
+ ,"//[EMAIL PROTECTED]'cat']/[EMAIL PROTECTED]'suggestions']/[EMAIL
PROTECTED]'cot']/[EMAIL PROTECTED]'frequency'][.>0]"
+ ,"//[EMAIL PROTECTED]'cat']/[EMAIL PROTECTED]'suggestions']/[EMAIL
PROTECTED]'cod']/[EMAIL PROTECTED]'frequency'][.>0]"
+ ,"//[EMAIL PROTECTED]'cat']/[EMAIL PROTECTED]'suggestions']/[EMAIL
PROTECTED]'carp']/[EMAIL PROTECTED]'frequency'][.>0]"
+ );
+
+ lrf.args.put("sp.query.suggestionCount", "2");
+ lrf.args.put("sp.query.accuracy",".2");
+ assertQ("Failed to spell check",
+ req("cat")
+ ,"//[EMAIL PROTECTED]'cat']"
+ ,"//[EMAIL PROTECTED]'cat']/[EMAIL PROTECTED]'frequency'][.>0]"
+ ,"//[EMAIL PROTECTED]'cat']/[EMAIL PROTECTED]'suggestions']/[EMAIL
PROTECTED]'cart']"
+ ,"//[EMAIL PROTECTED]'cat']/[EMAIL PROTECTED]'suggestions']/[EMAIL
PROTECTED]'cot']"
+ );
+
+ /* The following is the generated XML response for the next query with
three words:
+ <response>
+ <responseHeader><status>0</status><QTime>0</QTime></responseHeader>
+ <lst name="result">
+ <lst name="cat">
+ <int name="frequency">1</int>
+ <lst name="suggestions">
+ <lst name="cart"><int name="frequency">1</int></lst>
+ <lst name="cot"><int name="frequency">1</int></lst>
+ </lst>
+ </lst>
+ <lst name="card">
+ <int name="frequency">1</int>
+ <lst name="suggestions">
+ <lst name="carp"><int name="frequency">1</int></lst>
+ <lst name="cat"><int name="frequency">1</int></lst>
+ </lst>
+ </lst>
+ <lst name="carp">
+ <int name="frequency">1</int>
+ <lst name="suggestions">
+ <lst name="cart"><int name="frequency">1</int></lst>
+ <lst name="corn"><int name="frequency">1</int></lst>
+ </lst>
+ </lst>
+ </lst>
+ </response>
+ */
+
+ lrf.args.put("sp.query.suggestionCount", "2");
+ lrf.args.put("sp.query.accuracy",".2");
+ assertQ("Failed to spell check",
+ req("cat cart carp")
+ ,"//[EMAIL PROTECTED]'cat']"
+ ,"//[EMAIL PROTECTED]'cat']/[EMAIL PROTECTED]'frequency'][.>0]"
+ ,"//[EMAIL PROTECTED]'cat']/[EMAIL PROTECTED]'suggestions']/[EMAIL
PROTECTED]'cart']"
+ ,"//[EMAIL PROTECTED]'cat']/[EMAIL PROTECTED]'suggestions']/[EMAIL
PROTECTED]'cot']"
+
+ ,"//[EMAIL PROTECTED]'cart']"
+ ,"//[EMAIL PROTECTED]'cart']/[EMAIL PROTECTED]'frequency'][.>0]"
+ ,"//[EMAIL PROTECTED]'cart']/lst/lst[1]"
+ ,"//[EMAIL PROTECTED]'cart']/lst/lst[2]"
+
+ ,"//[EMAIL PROTECTED]'carp']"
+ ,"//[EMAIL PROTECTED]'carp']/[EMAIL PROTECTED]'frequency'][.>0]"
+ ,"//[EMAIL PROTECTED]'carp']/[EMAIL PROTECTED]'suggestions']/[EMAIL
PROTECTED]'cart']"
+ ,"//[EMAIL PROTECTED]'carp']/[EMAIL PROTECTED]'suggestions']/[EMAIL
PROTECTED]'corn']"
+
+ );
+
+ }
+
+ /**
+ * Test for correct spelling of a single word at various accuracy levels
+ * to see how the suggestions vary.
+ */
+ public void testSpellCheck_04_multiWords_incorrectWords() {
+
+ buildSpellCheckIndex();
+
+ lrf = h.getRequestFactory("spellchecker", 0, 20 );
+ lrf.args.put("version","2.0");
+ lrf.args.put("sp.query.accuracy",".9");
+
+ assertQ("Confirm the index is still valid",
+ req("cat")
+ ,"//[EMAIL PROTECTED]'words'][.='cat']"
+ ,"//[EMAIL PROTECTED]'exist'][.='true']"
+ );
+
+
+ // Enable multiWords formatting:
+ lrf.args.put("sp.query.extendedResults", "true");
+
+
+ assertQ("Failed to spell check",
+ req("coat")
+ ,"//[EMAIL PROTECTED]'coat']"
+ ,"//[EMAIL PROTECTED]'coat']/[EMAIL PROTECTED]'frequency'][.=0]"
+ ,"//[EMAIL PROTECTED]'coat']/[EMAIL PROTECTED]'suggestions' and
count(lst)=0]"
+ );
+
+ lrf.args.put("sp.query.accuracy",".2");
+ assertQ("Failed to spell check",
+ req("coat")
+ ,"//[EMAIL PROTECTED]'coat']"
+ ,"//[EMAIL PROTECTED]'coat']/[EMAIL PROTECTED]'frequency'][.=0]"
+ ,"//[EMAIL PROTECTED]'coat']/[EMAIL
PROTECTED]'suggestions']/[EMAIL PROTECTED]'cot']"
+ ,"//[EMAIL PROTECTED]'coat']/[EMAIL
PROTECTED]'suggestions']/[EMAIL PROTECTED]'cat']"
+ ,"//[EMAIL PROTECTED]'coat']/[EMAIL
PROTECTED]'suggestions']/[EMAIL PROTECTED]'corn']"
+ ,"//[EMAIL PROTECTED]'coat']/[EMAIL
PROTECTED]'suggestions']/[EMAIL PROTECTED]'cart']"
+ );
+
+ lrf.args.put("sp.query.suggestionCount", "2");
+ lrf.args.put("sp.query.accuracy",".2");
+ assertQ("Failed to spell check",
+ req("coat")
+ ,"//[EMAIL PROTECTED]'coat']"
+ ,"//[EMAIL PROTECTED]'coat']/[EMAIL PROTECTED]'frequency'][.=0]"
+ ,"//[EMAIL PROTECTED]'coat']/[EMAIL
PROTECTED]'suggestions']/[EMAIL PROTECTED]'cot']"
+ ,"//[EMAIL PROTECTED]'coat']/[EMAIL
PROTECTED]'suggestions']/[EMAIL PROTECTED]'cat']"
+ );
+
+
+
+ lrf.args.put("sp.query.suggestionCount", "2");
+ lrf.args.put("sp.query.accuracy",".2");
+ assertQ("Failed to spell check",
+ req("cet cert corp")
+ ,"//[EMAIL PROTECTED]'cet']"
+ ,"//[EMAIL PROTECTED]'cet']/[EMAIL PROTECTED]'frequency'][.=0]"
+ ,"//[EMAIL PROTECTED]'cet']/[EMAIL PROTECTED]'suggestions']/lst[1]"
+ ,"//[EMAIL PROTECTED]'cet']/[EMAIL PROTECTED]'suggestions']/lst[2]"
+
+ ,"//[EMAIL PROTECTED]'cert']"
+ ,"//[EMAIL PROTECTED]'cert']/[EMAIL PROTECTED]'frequency'][.=0]"
+ ,"//[EMAIL PROTECTED]'cert']/[EMAIL PROTECTED]'suggestions']/lst[1]"
+ ,"//[EMAIL PROTECTED]'cert']/[EMAIL PROTECTED]'suggestions']/lst[2]"
+
+ ,"//[EMAIL PROTECTED]'corp']"
+ ,"//[EMAIL PROTECTED]'corp']/[EMAIL PROTECTED]'frequency'][.=0]"
+ ,"//[EMAIL PROTECTED]'corp']/[EMAIL PROTECTED]'suggestions']/lst[1]"
+ ,"//[EMAIL PROTECTED]'corp']/[EMAIL PROTECTED]'suggestions']/lst[2]"
+
+ );
+
+ }
+
+ public void testSpellCheck_05_buildDictionary() {
+ lrf = h.getRequestFactory("spellchecker", 0, 20 );
+ lrf.args.put("version","2.0");
+ lrf.args.put("sp.query.accuracy",".9");
+
+ assertU("Add some words to the Spell Check Index:",
+ adoc("id", "100",
+ "spell", "solr cat cart"));
+ assertU(adoc("id", "101",
+ "spell", "cat cart"));
+ assertU(adoc("id", "102",
+ "spell", "cat cart"));
+ assertU(adoc("id", "103",
+ "spell", "cat cart carp"));
+ assertU(adoc("id", "104",
+ "spell", "cat car cant"));
+ assertU(adoc("id", "105",
+ "spell", "cat catnip"));
+ assertU(adoc("id", "106",
+ "spell", "cat cattails"));
+ assertU(adoc("id", "107",
+ "spell", "cat cod"));
+ assertU(adoc("id", "108",
+ "spell", "cat corn"));
+ assertU(adoc("id", "109",
+ "spell", "cat cot"));
+ assertU(commit());
+ assertU(optimize());
+
+ lrf.args.put("sp.dictionary.threshold", "0.20");
+ lrf.args.put("cmd","rebuild");
+ assertQ("Need to first build the index:",
+ req("cat")
+ ,"//[EMAIL PROTECTED]'cmdExecuted'][.='rebuild']"
+ ,"//[EMAIL PROTECTED]'words'][.='cat']"
+ ,"//[EMAIL PROTECTED]'exist'][.='true']"
+ );
+
+ lrf.args.clear();
+ lrf.args.put("version","2.0");
+ lrf.args.put("sp.query.accuracy",".9");
+
+ assertQ("Confirm index contains only words above threshold",
+ req("cat")
+ ,"//[EMAIL PROTECTED]'words'][.='cat']"
+ ,"//[EMAIL PROTECTED]'exist'][.='true']"
+ );
+
+ assertQ("Confirm index contains only words above threshold",
+ req("cart")
+ ,"//[EMAIL PROTECTED]'words'][.='cart']"
+ ,"//[EMAIL PROTECTED]'exist'][.='true']"
+ );
+
+ assertQ("Confirm index contains only words above threshold",
+ req("cod")
+ ,"//[EMAIL PROTECTED]'words'][.='cod']"
+ ,"//[EMAIL PROTECTED]'exist'][.='false']"
+ );
+
+ assertQ("Confirm index contains only words above threshold",
+ req("corn")
+ ,"//[EMAIL PROTECTED]'words'][.='corn']"
+ ,"//[EMAIL PROTECTED]'exist'][.='false']"
+ );
+
+ lrf.args.clear();
+ }
+}
Propchange:
lucene/solr/trunk/src/test/org/apache/solr/handler/SpellCheckerRequestHandlerTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: lucene/solr/trunk/src/test/test-files/solr/conf/schema-spellchecker.xml
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/schema-spellchecker.xml?rev=592129&view=auto
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/schema-spellchecker.xml
(added)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/schema-spellchecker.xml Mon
Nov 5 11:39:14 2007
@@ -0,0 +1,83 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- This is the Solr schema file. This file should be named "schema.xml" and
+ should be in the conf directory under the solr home
+ (i.e. ./solr/conf/schema.xml by default)
+ or located where the classloader for the Solr webapp can find it.
+
+ For more information, on how to customize this file, please see
+ http://wiki.apache.org/solr/SchemaXml
+-->
+
+<schema name="Solr SpellCheck Test" version="1.1">
+ <!-- attribute "name" is the name of this schema and is only used for
display purposes.
+ Applications should change this to reflect the nature of the search
collection.
+ version="1.1" is Solr's version number for the schema syntax and
semantics. It should
+ not normally be changed by applications.
+ 1.0: multiValued attribute did not exist, all fields are multiValued by
nature
+ 1.1: multiValued attribute introduced, false by default -->
+
+ <types>
+ <fieldtype name="string" class="solr.StrField" sortMissingLast="true"
omitNorms="true"/>
+
+ <fieldtype name="text" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.StandardFilterFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.StopFilterFactory"/>
+ <filter class="solr.EnglishPorterFilterFactory"/>
+ </analyzer>
+ </fieldtype>
+
+ <fieldType name="spellText" class="solr.TextField"
positionIncrementGap="100">
+ <analyzer type="index">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt"/>
+ <filter class="solr.StandardFilterFactory"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
ignoreCase="true" expand="true"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt"/>
+ <filter class="solr.StandardFilterFactory"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ </types>
+
+
+ <fields>
+ <field name="id" type="string" indexed="true" stored="true"/>
+ <field name="spell" type="spellText" indexed="true" stored="true" />
+ <field name="text" type="text" indexed="true" stored="false"
multiValued="true"/>
+ </fields>
+
+ <!-- field to use to determine and enforce document uniqueness. -->
+ <uniqueKey>id</uniqueKey>
+
+ <!-- field for the QueryParser to use when an explicit fieldname is absent -->
+ <defaultSearchField>text</defaultSearchField>
+
+ <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
+ <solrQueryParser defaultOperator="OR"/>
+
+</schema>
Propchange:
lucene/solr/trunk/src/test/test-files/solr/conf/schema-spellchecker.xml
------------------------------------------------------------------------------
svn:eol-style = native
Added:
lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig-spellchecker.xml
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig-spellchecker.xml?rev=592129&view=auto
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig-spellchecker.xml
(added)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig-spellchecker.xml
Mon Nov 5 11:39:14 2007
@@ -0,0 +1,103 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<config>
+ <indexDefaults>
+ <useCompoundFile>false</useCompoundFile>
+ <mergeFactor>10</mergeFactor>
+ <maxBufferedDocs>1000</maxBufferedDocs>
+ <maxMergeDocs>2147483647</maxMergeDocs>
+ <maxFieldLength>10000</maxFieldLength>
+ <writeLockTimeout>1000</writeLockTimeout>
+ <commitLockTimeout>10000</commitLockTimeout>
+ </indexDefaults>
+
+ <mainIndex>
+ <useCompoundFile>false</useCompoundFile>
+ <mergeFactor>10</mergeFactor>
+ <maxBufferedDocs>1000</maxBufferedDocs>
+ <maxMergeDocs>2147483647</maxMergeDocs>
+ <maxFieldLength>10000</maxFieldLength>
+ <unlockOnStartup>true</unlockOnStartup>
+ </mainIndex>
+
+
+ <updateHandler class="solr.DirectUpdateHandler2">
+ <commitIntervalLowerBound>0</commitIntervalLowerBound>
+ </updateHandler>
+
+
+ <query>
+ <maxBooleanClauses>1024</maxBooleanClauses>
+ <useFilterForSortedQuery>true</useFilterForSortedQuery>
+ <queryResultWindowSize>10</queryResultWindowSize>
+ <HashDocSet maxSize="3000" loadFactor="0.75"/>
+ <boolTofilterOptimizer enabled="true" cacheSize="32" threshold=".05"/>
+ </query>
+
+
+
+ <requestHandler name="standard" class="solr.StandardRequestHandler" />
+ <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
+
+
+ <!-- SpellCheckerRequestHandler takes in a word (or several words) as the
+ value of the "q" parameter and returns a list of alternative spelling
+ suggestions. If invoked with a ...&cmd=rebuild, it will rebuild the
+ spellchecker index.
+ -->
+ <requestHandler name="spellchecker" class="solr.SpellCheckerRequestHandler"
startup="lazy">
+ <!-- default values for query parameters -->
+ <lst name="defaults">
+ <int name="sp.query.suggestionCount">20</int>
+ <float name="sp.query.accuracy">0.60</float>
+ </lst>
+
+ <!-- Main init params for handler -->
+
+ <!-- The directory where your SpellChecker Index should live. -->
+ <!-- May be absolute, or relative to the Solr "dataDir" directory. -->
+ <!-- If this option is not specified, a RAM directory will be used -->
+ <str name="sp.dictionary.spellcheckerIndexDir">spell</str>
+
+ <!-- the field in your schema that you want to be able to build -->
+ <!-- your spell index on. This should be a field that uses a very -->
+ <!-- simple FieldType without a lot of Analysis (ie: string) -->
+ <str name="sp.dictionary.termSourceField">spell</str>
+
+ <!-- threshold for word to make it into the dictionary -->
+ <!-- a word should appear at minimum in the specified precent of
documents -->
+ <str name="sp.dictionary.threshold">0.0</str>
+
+ </requestHandler>
+
+
+
+ <queryResponseWriter name="standard"
class="org.apache.solr.request.XMLResponseWriter"/>
+ <queryResponseWriter name="useless"
class="org.apache.solr.OutputWriterTest$UselessOutputWriter"/>
+ <queryResponseWriter name="xslt"
class="org.apache.solr.request.XSLTResponseWriter"/>
+ <queryResponseWriter name="json"
class="org.apache.solr.request.JSONResponseWriter"/>
+
+
+ <!-- config for the admin interface -->
+ <admin>
+ <defaultQuery>solr</defaultQuery>
+ <gettableFiles>solrconfig.xml schema.xml admin-extra.html</gettableFiles>
+ </admin>
+
+</config>
Propchange:
lucene/solr/trunk/src/test/test-files/solr/conf/solrconfig-spellchecker.xml
------------------------------------------------------------------------------
svn:eol-style = native