Author: tommaso
Date: Wed Feb 4 12:38:00 2015
New Revision: 1657163
URL: http://svn.apache.org/r1657163
Log:
OAK-2457 - suggestor support in lucene index (rep:suggest)
Added:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CRTokenizer.java
(with props)
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SuggestHelper.java
(with props)
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/SuggestTest.java
(with props)
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldNames.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/PropertyDefinition.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SpellcheckHelper.java
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/LuceneOakRepositoryStub.java
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/TestUtil.java
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java?rev=1657163&r1=1657162&r2=1657163&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
Wed Feb 4 12:38:00 2015
@@ -118,6 +118,17 @@ public final class FieldFactory {
return new IntField(FieldNames.PATH_DEPTH, PathUtils.getDepth(path),
NO);
}
+ public static Field newSuggestField(String... values) {
+ StringBuilder builder = new StringBuilder();
+ for (String v : values) {
+ if (builder.length() > 0) {
+ builder.append('\n');
+ }
+ builder.append(v);
+ }
+ return new OakTextField(FieldNames.SUGGEST, builder.toString(), true);
+ }
+
/**
* Date values are saved with sec resolution
* @param date jcr data string
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldNames.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldNames.java?rev=1657163&r1=1657162&r2=1657163&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldNames.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldNames.java
Wed Feb 4 12:38:00 2015
@@ -26,6 +26,7 @@ import java.util.Set;
*/
public final class FieldNames {
+
/**
* Private constructor.
*/
@@ -54,6 +55,16 @@ public final class FieldNames {
public static final String FULLTEXT = ":fulltext";
/**
+ * Name of the field that contains the suggest index.
+ */
+ public static final String SUGGEST = ":suggest";
+
+ /**
+ * Name of the field that contains the spellcheck index.
+ */
+ public static final String SPELLCHECK = ":spellcheck";
+
+ /**
* Prefix for all field names that are fulltext indexed by property name.
*/
public static final String ANALYZED_FIELD_PREFIX = "full:";
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1657163&r1=1657162&r2=1657163&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
Wed Feb 4 12:38:00 2015
@@ -50,6 +50,7 @@ import java.util.concurrent.atomic.Atomi
import com.google.common.collect.AbstractIterator;
+import com.google.common.collect.Iterables;
import com.google.common.collect.Queues;
import com.google.common.collect.Sets;
@@ -59,6 +60,7 @@ import org.apache.jackrabbit.oak.plugins
import
org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.MoreLikeThisHelper;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.SpellcheckHelper;
+import org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper;
import org.apache.jackrabbit.oak.query.QueryEngineSettings;
import org.apache.jackrabbit.oak.query.QueryImpl;
import org.apache.jackrabbit.oak.query.fulltext.FullTextAnd;
@@ -102,6 +104,7 @@ import org.apache.lucene.search.TermRang
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spell.SuggestWord;
+import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
@@ -264,7 +267,7 @@ public class LuceneIndex implements Adva
public Cursor query(final IndexPlan plan, NodeState rootState) {
final Filter filter = plan.getFilter();
FullTextExpression ft = filter.getFullTextConstraint();
- Set<String> relPaths = getRelativePaths(ft);
+ final Set<String> relPaths = getRelativePaths(ft);
if (relPaths.size() > 1) {
return new MultiLuceneIndex(filter, rootState, relPaths).query();
}
@@ -360,14 +363,24 @@ public class LuceneIndex implements Adva
}
lastDocToRecord = doc;
}
- } else if (luceneRequestFacade.getLuceneRequest()
instanceof SuggestWord[]) {
- SuggestWord[] intent = (SuggestWord[])
luceneRequestFacade.getLuceneRequest();
- Collection<String> suggestedWords = new
ArrayList<String>(intent.length);
- for (SuggestWord suggestWord : intent) {
+ } else if (luceneRequestFacade.getLuceneRequest()
instanceof SpellcheckHelper.SpellcheckQuery) {
+ SpellcheckHelper.SpellcheckQuery spellcheckQuery =
(SpellcheckHelper.SpellcheckQuery) luceneRequestFacade.getLuceneRequest();
+ SuggestWord[] suggestWords =
SpellcheckHelper.getSpellcheck(spellcheckQuery);
+ Collection<String> suggestedWords = new
ArrayList<String>(suggestWords.length);
+ for (SuggestWord suggestWord : suggestWords) {
suggestedWords.add(suggestWord.string);
}
queue.add(new LuceneResultRow(suggestedWords));
noDocs = true;
+ } else if (luceneRequestFacade.getLuceneRequest()
instanceof SuggestHelper.SuggestQuery) {
+ SuggestHelper.SuggestQuery suggestQuery =
(SuggestHelper.SuggestQuery) luceneRequestFacade.getLuceneRequest();
+ List<Lookup.LookupResult> lookupResults =
SuggestHelper.getSuggestions(suggestQuery);
+ Collection<String> suggestedWords = new
ArrayList<String>(lookupResults.size());
+ for (Lookup.LookupResult suggestWord : lookupResults) {
+ suggestedWords.add("{term=" + suggestWord.key +
",weight=" + suggestWord.value + "}");
+ }
+ queue.add(new LuceneResultRow(suggestedWords));
+ noDocs = true;
}
} catch (IOException e) {
LOG.warn("query via {} failed.", LuceneIndex.this, e);
@@ -481,10 +494,14 @@ public class LuceneIndex implements Adva
if (query.startsWith("spellcheck?")) {
String spellcheckQueryString = query.replace("spellcheck?",
"");
if (reader != null) {
- return new
LuceneRequestFacade<SuggestWord[]>(SpellcheckHelper.getSpellcheck(spellcheckQueryString,
reader));
+ return new
LuceneRequestFacade<SpellcheckHelper.SpellcheckQuery>(SpellcheckHelper.getSpellcheckQuery(spellcheckQueryString,
reader));
}
- }
- else {
+ } else if (query.startsWith("suggest?")) {
+ String suggestQueryString = query.replace("suggest?", "");
+ if (reader != null) {
+ return new
LuceneRequestFacade<SuggestHelper.SuggestQuery>(SuggestHelper.getSuggestQuery(suggestQueryString,
reader));
+ }
+ } else {
try {
qs.add(queryParser.parse(query));
} catch (ParseException e) {
@@ -1050,8 +1067,8 @@ public class LuceneIndex implements Adva
if (QueryImpl.JCR_SCORE.equals(columnName)) {
return PropertyValues.newDouble(currentRow.score);
}
- if (QueryImpl.REP_SPELLCHECK.equals(columnName)) {
- return
PropertyValues.newString(currentRow.suggestWords);
+ if (QueryImpl.REP_SPELLCHECK.equals(columnName) ||
QueryImpl.REP_SUGGEST.equals(columnName)) {
+ return
PropertyValues.newString(Iterables.toString(currentRow.suggestWords));
}
return pathRow.getValue(columnName);
}
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java?rev=1657163&r1=1657162&r2=1657163&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexConstants.java
Wed Feb 4 12:38:00 2015
@@ -226,4 +226,14 @@ public interface LuceneIndexConstants {
* By default, no more than 10,000 terms will be indexed for a field.
*/
String MAX_FIELD_LENGTH = "maxFieldLength";
+
+ /**
+ * whether use this property values for suggestions
+ */
+ String PROP_USE_IN_SUGGEST = "useInSuggest";
+
+ /**
+ * whether use this property values for spellchecking
+ */
+ String PROP_USE_IN_SPELLCHECK = "useInSpellcheck";
}
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java?rev=1657163&r1=1657162&r2=1657163&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexEditor.java
Wed Feb 4 12:38:00 2015
@@ -62,6 +62,7 @@ import org.apache.lucene.document.LongFi
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.util.BytesRef;
@@ -329,8 +330,21 @@ public class LuceneIndexEditor implement
document.add(newDepthField(path));
}
+ // because of LUCENE-5833 we have to merge the suggest fields into a
single one
+ Field suggestField = null;
for (Field f : fields) {
- document.add(f);
+ if (FieldNames.SUGGEST.endsWith(f.name())) {
+ if (suggestField == null) {
+ suggestField = f;
+ } else {
+ suggestField =
FieldFactory.newSuggestField(suggestField.stringValue(), f.stringValue());
+ }
+ } else {
+ document.add(f);
+ }
+ }
+ if (suggestField != null) {
+ document.add(suggestField);
}
//TODO Boost at document level
@@ -365,6 +379,14 @@ public class LuceneIndexEditor implement
fields.add(newPropertyField(analyzedPropName, value,
!pd.skipTokenization(pname), pd.stored));
}
+ if (pd.useInSuggest) {
+ fields.add(newPropertyField(FieldNames.SUGGEST, value,
true, true));
+ }
+
+ if (pd.useInSpellcheck) {
+ fields.add(newPropertyField(FieldNames.SPELLCHECK,
value, true, true));
+ }
+
if (pd.nodeScopeIndex) {
Field field = newFulltextField(value);
field.setBoost(pd.boost);
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java?rev=1657163&r1=1657162&r2=1657163&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
Wed Feb 4 12:38:00 2015
@@ -34,6 +34,7 @@ import javax.annotation.Nullable;
import javax.jcr.PropertyType;
import com.google.common.collect.AbstractIterator;
+import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Queues;
import com.google.common.collect.Sets;
@@ -45,6 +46,7 @@ import org.apache.jackrabbit.oak.plugins
import org.apache.jackrabbit.oak.plugins.index.lucene.IndexPlanner.PlanResult;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.MoreLikeThisHelper;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.SpellcheckHelper;
+import org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper;
import org.apache.jackrabbit.oak.query.QueryEngineSettings;
import org.apache.jackrabbit.oak.query.QueryImpl;
import org.apache.jackrabbit.oak.query.fulltext.FullTextAnd;
@@ -88,6 +90,8 @@ import org.apache.lucene.search.TermRang
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spell.SuggestWord;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -332,13 +336,23 @@ public class LucenePropertyIndex impleme
}
lastDocToRecord = doc;
}
- } else if (luceneRequestFacade.getLuceneRequest()
instanceof SuggestWord[]) {
- SuggestWord[] suggestWords = (SuggestWord[])
luceneRequestFacade.getLuceneRequest();
- String[] suggestedWordsStrings = new
String[suggestWords.length];
- for (int i = 0; i < suggestWords.length; i++) {
- suggestedWordsStrings[i] = suggestWords[i].string;
+ } else if (luceneRequestFacade.getLuceneRequest()
instanceof SpellcheckHelper.SpellcheckQuery) {
+ SpellcheckHelper.SpellcheckQuery spellcheckQuery =
(SpellcheckHelper.SpellcheckQuery) luceneRequestFacade.getLuceneRequest();
+ SuggestWord[] suggestWords =
SpellcheckHelper.getSpellcheck(spellcheckQuery);
+ Collection<String> suggestedWords = new
ArrayList<String>(suggestWords.length);
+ for (SuggestWord suggestWord : suggestWords) {
+ suggestedWords.add(suggestWord.string);
}
- queue.add(new LuceneResultRow(suggestedWordsStrings));
+ queue.add(new LuceneResultRow(suggestedWords));
+ noDocs = true;
+ } else if (luceneRequestFacade.getLuceneRequest()
instanceof SuggestHelper.SuggestQuery) {
+ SuggestHelper.SuggestQuery suggestQuery =
(SuggestHelper.SuggestQuery) luceneRequestFacade.getLuceneRequest();
+ List<Lookup.LookupResult> lookupResults =
SuggestHelper.getSuggestions(suggestQuery);
+ Collection<String> suggestedWords = new
ArrayList<String>(lookupResults.size());
+ for (Lookup.LookupResult suggestWord : lookupResults) {
+ suggestedWords.add("{term=" + suggestWord.key +
",weight=" + suggestWord.value + "}");
+ }
+ queue.add(new LuceneResultRow(suggestedWords));
noDocs = true;
}
} catch (IOException e) {
@@ -478,10 +492,15 @@ public class LucenePropertyIndex impleme
} else if (query.startsWith("spellcheck?")) {
String spellcheckQueryString = query.replace("spellcheck?",
"");
if (reader != null) {
- return new
LuceneRequestFacade<SuggestWord[]>(SpellcheckHelper.getSpellcheck(spellcheckQueryString,
reader));
+ return new
LuceneRequestFacade<SpellcheckHelper.SpellcheckQuery>(SpellcheckHelper.getSpellcheckQuery(spellcheckQueryString,
reader));
}
- }
- else {
+ } else if (query.startsWith("suggest?")) {
+ String suggestQueryString = query.replace("suggest?", "");
+ if (reader != null) {
+ return new
LuceneRequestFacade<SuggestHelper.SuggestQuery>(SuggestHelper.getSuggestQuery(suggestQueryString,
+ reader));
+ }
+ } else {
try {
qs.add(queryParser.parse(query));
} catch (ParseException e) {
@@ -834,7 +853,7 @@ public class LucenePropertyIndex impleme
// (a "non-local return")
final AtomicReference<Query> result = new AtomicReference<Query>();
ft.accept(new FullTextVisitor() {
-
+
@Override
public boolean visit(FullTextContains contains) {
visitTerm(contains.getPropertyName(), contains.getRawText(),
null, false);
@@ -880,7 +899,7 @@ public class LucenePropertyIndex impleme
public boolean visit(FullTextTerm term) {
return visitTerm(term.getPropertyName(), term.getText(),
term.getBoost(), term.isNot());
}
-
+
private boolean visitTerm(String propertyName, String text, String
boost, boolean not) {
String p = getLuceneFieldName(propertyName, pr);
Query q = tokenToQuery(text, p, analyzer);
@@ -1002,15 +1021,15 @@ public class LucenePropertyIndex impleme
static class LuceneResultRow {
final String path;
final double score;
- final String[] suggestWords;
+ final Iterable<String> suggestWords;
LuceneResultRow(String path, double score) {
this.path = path;
this.score = score;
- this.suggestWords = new String[0];
+ this.suggestWords = Collections.emptySet();
}
- LuceneResultRow(String[] suggestWords) {
+ LuceneResultRow(Iterable<String> suggestWords) {
this.path = "/";
this.score = 1.0d;
this.suggestWords = suggestWords;
@@ -1021,17 +1040,17 @@ public class LucenePropertyIndex impleme
return String.format("%s (%1.2f)", path, score);
}
}
-
+
/**
* A cursor over Lucene results. The result includes the path,
* and the jcr:score pseudo-property as returned by Lucene.
*/
static class LucenePathCursor implements Cursor {
-
+
private final Cursor pathCursor;
private final String pathPrefix;
LuceneResultRow currentRow;
-
+
LucenePathCursor(final Iterator<LuceneResultRow> it, final IndexPlan
plan, QueryEngineSettings settings) {
pathPrefix = plan.getPathPrefix();
Iterator<String> pathIterator = new Iterator<String>() {
@@ -1043,7 +1062,7 @@ public class LucenePropertyIndex impleme
@Override
public String next() {
- currentRow = it.next();
+ currentRow = it.next();
return currentRow.path;
}
@@ -1051,11 +1070,11 @@ public class LucenePropertyIndex impleme
public void remove() {
it.remove();
}
-
+
};
pathCursor = new PathCursor(pathIterator, true, settings);
}
-
+
@Override
public boolean hasNext() {
@@ -1088,12 +1107,12 @@ public class LucenePropertyIndex impleme
if (QueryImpl.JCR_SCORE.equals(columnName)) {
return PropertyValues.newDouble(currentRow.score);
}
- if (QueryImpl.REP_SPELLCHECK.equals(columnName)) {
- return
PropertyValues.newString(Arrays.toString(currentRow.suggestWords));
+ if (QueryImpl.REP_SPELLCHECK.equals(columnName) ||
QueryImpl.REP_SUGGEST.equals(columnName)) {
+ return
PropertyValues.newString(Iterables.toString(currentRow.suggestWords));
}
return pathRow.getValue(columnName);
}
-
+
};
}
}
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/PropertyDefinition.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/PropertyDefinition.java?rev=1657163&r1=1657162&r2=1657163&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/PropertyDefinition.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/PropertyDefinition.java
Wed Feb 4 12:38:00 2015
@@ -70,6 +70,10 @@ class PropertyDefinition {
final int includedPropertyTypes;
+ boolean useInSuggest;
+
+ boolean useInSpellcheck;
+
public PropertyDefinition(IndexingRule idxDefn, String name, NodeState
defn) {
this.isRegexp = getOptionalValue(defn, PROP_IS_REGEX, false);
this.name = getName(defn, name);
@@ -90,6 +94,8 @@ class PropertyDefinition {
//TODO Add test case for above cases
this.propertyType = getPropertyType(idxDefn, name, defn);
+ this.useInSuggest = getOptionalValue(defn,
LuceneIndexConstants.PROP_USE_IN_SUGGEST, false);
+ this.useInSpellcheck = getOptionalValue(defn,
LuceneIndexConstants.PROP_USE_IN_SPELLCHECK, false);
}
/**
@@ -149,6 +155,8 @@ class PropertyDefinition {
", propertyIndex=" + propertyIndex +
", analyzed=" + analyzed +
", ordered=" + ordered +
+ ", useInSuggest=" + useInSuggest+
+ ", useInSpellcheck=" + useInSpellcheck+
'}';
}
Added:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CRTokenizer.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CRTokenizer.java?rev=1657163&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CRTokenizer.java
(added)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CRTokenizer.java
Wed Feb 4 12:38:00 2015
@@ -0,0 +1,42 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.lucene.util;
+
+import java.io.Reader;
+
+import org.apache.lucene.analysis.util.CharTokenizer;
+import org.apache.lucene.util.Version;
+
+/**
+ * A {@link org.apache.lucene.analysis.util.CharTokenizer} dividing tokens at
<code>\n</code>.
+ * <p/>
+ * This should be deprecated/removed and not used anymore in {@link
org.apache.jackrabbit.oak.plugins.index.lucene.util.SuggestHelper}
+ * (and related 'suggest fields merging code' removed in {@link
org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexEditor})
+ * if / once LUCENE-5833 fix gets included in the Lucene version we ship.
+ */
+public class CRTokenizer extends CharTokenizer {
+ public CRTokenizer(Version matchVersion, Reader input) {
+ super(matchVersion, input);
+ }
+
+ @Override
+ protected boolean isTokenChar(int c) {
+ return c != '\n';
+ }
+}
Propchange:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/CRTokenizer.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SpellcheckHelper.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SpellcheckHelper.java?rev=1657163&r1=1657162&r2=1657163&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SpellcheckHelper.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SpellcheckHelper.java
Wed Feb 4 12:38:00 2015
@@ -29,28 +29,59 @@ import org.apache.lucene.search.spell.Su
* under the hood.
*/
public class SpellcheckHelper {
- public static SuggestWord[] getSpellcheck(String spellcheckQueryString,
IndexReader reader) {
- DirectSpellChecker spellChecker = new DirectSpellChecker();
+ public static SuggestWord[] getSpellcheck(SpellcheckQuery spellcheckQuery)
{
try {
- String text = null;
- for (String param : spellcheckQueryString.split("&")) {
- String[] keyValuePair = param.split("=");
- if (keyValuePair.length != 2 || keyValuePair[0] == null ||
keyValuePair[1] == null) {
- throw new RuntimeException("Unparsable native Lucene
Spellcheck query: " + spellcheckQueryString);
- } else {
- if ("term".equals(keyValuePair[0])) {
- text = keyValuePair[1];
- }
- }
- }
- if (text != null) {
- return spellChecker.suggestSimilar(new
Term(FieldNames.FULLTEXT, text), 10, reader);
+ DirectSpellChecker spellChecker = new DirectSpellChecker();
+ return spellChecker.suggestSimilar(spellcheckQuery.getTerm(),
spellcheckQuery.getCount(), spellcheckQuery.getReader());
+ } catch (Exception e) {
+ throw new RuntimeException("could not handle Spellcheck query " +
spellcheckQuery, e);
+ }
+ }
+
+ public static SpellcheckQuery getSpellcheckQuery(String
spellcheckQueryString, IndexReader reader) {
+ String text = null;
+ for (String param : spellcheckQueryString.split("&")) {
+ String[] keyValuePair = param.split("=");
+ if (keyValuePair.length != 2 || keyValuePair[0] == null ||
keyValuePair[1] == null) {
+ throw new RuntimeException("Unparsable native Lucene
Spellcheck query: " + spellcheckQueryString);
} else {
- return new SuggestWord[0];
+ if ("term".equals(keyValuePair[0])) {
+ text = keyValuePair[1];
+ }
}
+ }
+ return new SpellcheckHelper.SpellcheckQuery(new
Term(FieldNames.SPELLCHECK, text), 10, reader);
+ }
- } catch (Exception e) {
- throw new RuntimeException("could not handle Spellcheck query " +
spellcheckQueryString);
+ public static class SpellcheckQuery {
+ private final Term term;
+ private final int count;
+ private final IndexReader reader;
+
+ public SpellcheckQuery(Term term, int count, IndexReader reader) {
+ this.term = term;
+ this.count = count;
+ this.reader = reader;
+ }
+
+ public Term getTerm() {
+ return term;
+ }
+
+ public int getCount() {
+ return count;
+ }
+
+ public IndexReader getReader() {
+ return reader;
+ }
+
+ @Override
+ public String toString() {
+ return "SpellcheckQuery{" +
+ "term=" + term +
+ ", count=" + count +
+ '}';
}
}
}
Added:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SuggestHelper.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SuggestHelper.java?rev=1657163&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SuggestHelper.java
(added)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SuggestHelper.java
Wed Feb 4 12:38:00 2015
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.lucene.util;
+
+import java.io.Reader;
+import java.util.Collections;
+import java.util.List;
+
+import org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.suggest.DocumentDictionary;
+import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.search.suggest.analyzing.FreeTextSuggester;
+import org.apache.lucene.util.Version;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Helper class for getting suggest results for a given term, calling a {@link
org.apache.lucene.search.suggest.Lookup}
+ * implementation under the hood.
+ */
+public class SuggestHelper {
+
+ private static final Logger log =
LoggerFactory.getLogger(SuggestHelper.class);
+
+ private static final Analyzer analyzer = new Analyzer() {
+ @Override
+ protected Analyzer.TokenStreamComponents createComponents(String
fieldName, Reader reader) {
+ return new Analyzer.TokenStreamComponents(new
CRTokenizer(Version.LUCENE_47, reader));
+ }
+ };
+
+ public static List<Lookup.LookupResult> getSuggestions(SuggestQuery
suggestQuery) {
+ FreeTextSuggester suggester = new FreeTextSuggester(analyzer);
+ try {
+ DocumentDictionary dictionary = suggestQuery.getDictionary();
+ suggester.build(dictionary); // TODO : it should be possible to
avoid rebuilding the index every time
+ } catch (Exception e) {
+ log.warn("could not build suggester from the passed dictionary ",
e);
+ }
+ try {
+ long count = suggester.getCount();
+ if (count > 0) {
+ return suggester.lookup(suggestQuery.getText(), false, 10);
+ } else {
+ return Collections.emptyList();
+ }
+ } catch (Exception e) {
+ throw new RuntimeException("could not handle Suggest query " +
suggestQuery, e);
+ }
+ }
+
+ public static SuggestQuery getSuggestQuery(String suggestQueryString,
IndexReader reader) {
+ try {
+ String text = null;
+ for (String param : suggestQueryString.split("&")) {
+ String[] keyValuePair = param.split("=");
+ if (keyValuePair.length != 2 || keyValuePair[0] == null ||
keyValuePair[1] == null) {
+ throw new RuntimeException("Unparsable native Lucene
Suggest query: " + suggestQueryString);
+ } else {
+ if ("term".equals(keyValuePair[0])) {
+ text = keyValuePair[1];
+ }
+ }
+ }
+ if (text != null) {
+ return new SuggestQuery(new DocumentDictionary(reader,
FieldNames.SUGGEST, FieldNames.PATH_DEPTH), text, analyzer);
+ } else {
+ return null;
+ }
+
+ } catch (Exception e) {
+ throw new RuntimeException("could not build SuggestQuery " +
suggestQueryString, e);
+ }
+ }
+
+ public static class SuggestQuery {
+
+ private final DocumentDictionary dictionary;
+ private final String text;
+ private final Analyzer analyzer;
+
+ public SuggestQuery(DocumentDictionary dictionary, String text,
Analyzer analyzer) {
+ this.dictionary = dictionary;
+ this.text = text;
+ this.analyzer = analyzer;
+ }
+
+ public DocumentDictionary getDictionary() {
+ return dictionary;
+ }
+
+ public String getText() {
+ return text;
+ }
+
+ public Analyzer getAnalyzer() {
+ return analyzer;
+ }
+
+ @Override
+ public String toString() {
+ return "SuggestQuery{" +
+ "dictionary=" + dictionary +
+ ", text='" + text + '\'' +
+ '}';
+ }
+ }
+}
Propchange:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/SuggestHelper.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/LuceneOakRepositoryStub.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/LuceneOakRepositoryStub.java?rev=1657163&r1=1657162&r2=1657163&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/LuceneOakRepositoryStub.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/LuceneOakRepositoryStub.java
Wed Feb 4 12:38:00 2015
@@ -100,6 +100,8 @@ public class LuceneOakRepositoryStub ext
.setProperty(LuceneIndexConstants.PROP_NODE_SCOPE_INDEX,
true)
.setProperty(LuceneIndexConstants.PROP_USE_IN_EXCERPT,
true)
.setProperty(LuceneIndexConstants.PROP_PROPERTY_INDEX,
true)
+ .setProperty(LuceneIndexConstants.PROP_USE_IN_SPELLCHECK,
true)
+ .setProperty(LuceneIndexConstants.PROP_USE_IN_SUGGEST,
true)
.setProperty(LuceneIndexConstants.PROP_NAME,
LuceneIndexConstants.REGEX_ALL_PROPS)
.setProperty(LuceneIndexConstants.PROP_IS_REGEX, true);
}
Added:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/SuggestTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/SuggestTest.java?rev=1657163&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/SuggestTest.java
(added)
+++
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/SuggestTest.java
Wed Feb 4 12:38:00 2015
@@ -0,0 +1,100 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.jackrabbit.oak.jcr.query;
+
+import javax.jcr.Node;
+import javax.jcr.RepositoryException;
+import javax.jcr.Session;
+import javax.jcr.query.Query;
+import javax.jcr.query.QueryManager;
+import javax.jcr.query.QueryResult;
+import javax.jcr.query.Row;
+import javax.jcr.query.RowIterator;
+
+import org.apache.jackrabbit.core.query.AbstractQueryTest;
+
+/**
+ * Tests the suggest support.
+ */
+public class SuggestTest extends AbstractQueryTest {
+
+ public void testSuggestSql() throws Exception {
+ Session session = superuser;
+ QueryManager qm = session.getWorkspace().getQueryManager();
+ Node n1 = testRootNode.addNode("node1");
+ n1.setProperty("jcr:title", "in 2015 my fox is red, like mike's fox
and john's fox");
+ Node n2 = testRootNode.addNode("node2");
+ n2.setProperty("jcr:title", "in 2015 a red fox is still a fox");
+ session.save();
+
+ String sql = "SELECT [rep:suggest()] FROM nt:base WHERE [jcr:path] =
'/' AND SUGGEST('in 201')";
+ Query q = qm.createQuery(sql, Query.SQL);
+ String result = getResult(q.execute(), "rep:suggest()");
+ assertNotNull(result);
+ assertTrue(result.contains("[{term=in 2015 a red fox is still a
fox,weight="));
+ assertTrue(result.contains("{term=in 2015 my fox is red, like mike's
fox and john's fox,weight="));
+ }
+
+ public void testSuggestXPath() throws Exception {
+ Session session = superuser;
+ QueryManager qm = session.getWorkspace().getQueryManager();
+ Node n1 = testRootNode.addNode("node1");
+ n1.setProperty("jcr:title", "in 2015 my fox is red, like mike's fox
and john's fox");
+ Node n2 = testRootNode.addNode("node2");
+ n2.setProperty("jcr:title", "in 2015 a red fox is still a fox");
+ session.save();
+
+ String xpath = "/jcr:root[rep:suggest('in 201')]/(rep:suggest())";
+ Query q = qm.createQuery(xpath, Query.XPATH);
+ String result = getResult(q.execute(), "rep:suggest()");
+ assertNotNull(result);
+ assertTrue(result.contains("[{term=in 2015 a red fox is still a
fox,weight="));
+ assertTrue(result.contains("{term=in 2015 my fox is red, like mike's
fox and john's fox,weight="));
+ }
+
+ public void testNoSuggestions() throws Exception {
+ Session session = superuser;
+ QueryManager qm = session.getWorkspace().getQueryManager();
+ Node n1 = testRootNode.addNode("node1");
+ n1.setProperty("jcr:title", "in 2015 my fox is red, like mike's fox
and john's fox");
+ Node n2 = testRootNode.addNode("node2");
+ n2.setProperty("jcr:title", "in 2015 a red fox is still a fox");
+ session.save();
+
+ String sql = "SELECT [rep:suggest()] FROM nt:base WHERE [jcr:path] =
'/' AND SUGGEST('blablabla')";
+ Query q = qm.createQuery(sql, Query.SQL);
+ String result = getResult(q.execute(), "rep:suggest()");
+ assertNotNull(result);
+ assertEquals("[]", result);
+ }
+
+ static String getResult(QueryResult result, String propertyName) throws
RepositoryException {
+ StringBuilder buff = new StringBuilder();
+ RowIterator it = result.getRows();
+ while (it.hasNext()) {
+ if (buff.length() > 0) {
+ buff.append(", ");
+ }
+ Row row = it.nextRow();
+ buff.append(row.getValue(propertyName).getString());
+ }
+ return buff.toString();
+ }
+
+}
\ No newline at end of file
Propchange:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/SuggestTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/TestUtil.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/TestUtil.java?rev=1657163&r1=1657162&r2=1657163&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/TestUtil.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/TestUtil.java
Wed Feb 4 12:38:00 2015
@@ -61,6 +61,7 @@ public class TestUtil {
prop.setProperty(LuceneIndexConstants.PROP_NODE_SCOPE_INDEX, true);
prop.setProperty(LuceneIndexConstants.PROP_ANALYZED, true);
prop.setProperty(LuceneIndexConstants.PROP_USE_IN_EXCERPT, true);
+ prop.setProperty(LuceneIndexConstants.PROP_USE_IN_SPELLCHECK, true);
return prop;
}