Author: tommaso
Date: Thu Nov 12 14:53:54 2015
New Revision: 1714061
URL: http://svn.apache.org/viewvc?rev=1714061&view=rev
Log:
OAK-3580 - implementing excerpt via indexes, using default Lucene / Solr
highlighter
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/NotImpl.java
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
jackrabbit/oak/trunk/oak-lucene/pom.xml
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
jackrabbit/oak/trunk/oak-solr-core/pom.xml
jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java
jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java?rev=1714061&r1=1714060&r2=1714061&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ResultRowImpl.java
Thu Nov 12 14:53:54 2015
@@ -35,18 +35,18 @@ public class ResultRowImpl implements Re
private final Query query;
private final Tree[] trees;
-
+
/**
* The column values.
*/
private final PropertyValue[] values;
-
+
/**
* Whether the value at the given index is used for comparing rows (used
* within hashCode and equals). If null, all columns are distinct.
*/
private final boolean[] distinctValues;
-
+
/**
* The values used for ordering.
*/
@@ -59,7 +59,7 @@ public class ResultRowImpl implements Re
this.distinctValues = distinctValues;
this.orderValues = orderValues;
}
-
+
PropertyValue[] getOrderValues() {
return orderValues;
}
@@ -107,15 +107,22 @@ public class ResultRowImpl implements Re
}
// OAK-318:
// somebody might call rep:excerpt(text)
- // even thought the query doesn't contain that column
+ // even though the query doesn't contain that column
if (columnName.startsWith(QueryImpl.REP_EXCERPT)) {
- // missing excerpt, generate a default value
- String ex = SimpleExcerptProvider.getExcerpt(getPath(), columnName,
- query, true);
- if (ex != null) {
- return PropertyValues.newString(ex);
+ int columnIndex = query.getColumnIndex(QueryImpl.REP_EXCERPT);
+ if (columnIndex >= 0 && QueryImpl.REP_EXCERPT.equals(columnName)
|| SimpleExcerptProvider.REP_EXCERPT_FN.
+ equals(columnName)) {
+ return SimpleExcerptProvider.getExcerpt(values[columnIndex]);
+ // TODO : make it possible to extract property level excerpts,
e.g. rep:excerpt(text) from indexes
+ } else {
+ // missing excerpt, generate a default value
+ String ex = SimpleExcerptProvider.getExcerpt(getPath(),
columnName,
+ query, true);
+ if (ex != null) {
+ return PropertyValues.newString(ex);
+ }
+ return PropertyValues.newString(getPath());
}
- return PropertyValues.newString(getPath());
}
throw new IllegalArgumentException("Column not found: " + columnName);
}
@@ -146,7 +153,7 @@ public class ResultRowImpl implements Re
}
return buff.toString();
}
-
+
@Override
public int hashCode() {
@@ -155,7 +162,7 @@ public class ResultRowImpl implements Re
result = 31 * result + hashCodeOfValues();
return result;
}
-
+
private int hashCodeOfValues() {
int result = 1;
for (int i = 0; i < values.length; i++) {
@@ -249,4 +256,4 @@ public class ResultRowImpl implements Re
}
-}
+}
\ No newline at end of file
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/NotImpl.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/NotImpl.java?rev=1714061&r1=1714060&r2=1714061&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/NotImpl.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/ast/NotImpl.java
Thu Nov 12 14:53:54 2015
@@ -18,7 +18,6 @@
*/
package org.apache.jackrabbit.oak.query.ast;
-import static com.google.common.collect.Lists.newArrayList;
import static
org.apache.jackrabbit.oak.query.ast.AstElementFactory.copyElementAndCheckReference;
import java.util.Collections;
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java?rev=1714061&r1=1714060&r2=1714061&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/query/fulltext/SimpleExcerptProvider.java
Thu Nov 12 14:53:54 2015
@@ -16,13 +16,16 @@
*/
package org.apache.jackrabbit.oak.query.fulltext;
-import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters;
-
import java.util.BitSet;
import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
import java.util.Set;
+import com.google.common.base.Splitter;
+import com.google.common.collect.ImmutableSet;
import org.apache.jackrabbit.oak.api.PropertyState;
+import org.apache.jackrabbit.oak.api.PropertyValue;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
@@ -33,20 +36,24 @@ import org.apache.jackrabbit.oak.query.a
import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl;
import org.apache.jackrabbit.oak.query.ast.LiteralImpl;
import org.apache.jackrabbit.oak.query.ast.OrImpl;
+import org.apache.jackrabbit.oak.spi.query.PropertyValues;
-import com.google.common.collect.ImmutableSet;
+import static com.google.common.collect.Maps.newHashMap;
+import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters;
/**
* This class can extract excerpts from node.
*/
public class SimpleExcerptProvider {
- private static final String REP_EXCERPT_FN = "rep:excerpt(.)";
+ public static final String REP_EXCERPT_FN = "rep:excerpt(.)";
+ public static final String EXCERPT_END = "</span></div>";
+ public static final String EXCERPT_BEGIN = "<div><span>";
private static int maxFragmentSize = 150;
public static String getExcerpt(String path, String columnName,
- Query query, boolean highlight) {
+ Query query, boolean highlight) {
if (path == null) {
return null;
}
@@ -72,7 +79,7 @@ public class SimpleExcerptProvider {
for (PropertyState p : t.getProperties()) {
if (p.getType().tag() == Type.STRING.tag()
&& (columnName == null || columnName.equalsIgnoreCase(p
- .getName()))) {
+ .getName()))) {
text.append(separator);
separator = " ";
for (String v : p.getValue(Type.STRINGS)) {
@@ -82,8 +89,7 @@ public class SimpleExcerptProvider {
}
Set<String> searchToken = extractFulltext(query);
if (highlight && searchToken != null) {
- String h = highlight(text, searchToken);
- return h;
+ return highlight(text, searchToken);
}
return noHighlight(text);
}
@@ -140,32 +146,32 @@ public class SimpleExcerptProvider {
Set<String> out = new HashSet<String>();
StringBuilder token = new StringBuilder();
boolean quote = false;
- for (int i = 0; i < in.length();) {
+ for (int i = 0; i < in.length(); ) {
final int c = in.codePointAt(i);
int length = Character.charCount(c);
switch (c) {
- case ' ':
- if (quote) {
- token.append(' ');
- } else if (token.length() > 0) {
- out.add(token.toString());
- token = new StringBuilder();
- }
- break;
- case '"':
- case '\'':
- if (quote) {
- quote = false;
- if (token.length() > 0) {
+ case ' ':
+ if (quote) {
+ token.append(' ');
+ } else if (token.length() > 0) {
out.add(token.toString());
token = new StringBuilder();
}
- } else {
- quote = true;
- }
- break;
- default:
- token.append(new String(Character.toChars(c)));
+ break;
+ case '"':
+ case '\'':
+ if (quote) {
+ quote = false;
+ if (token.length() > 0) {
+ out.add(token.toString());
+ token = new StringBuilder();
+ }
+ } else {
+ quote = true;
+ }
+ break;
+ default:
+ token.append(new String(Character.toChars(c)));
}
i += length;
}
@@ -198,7 +204,7 @@ public class SimpleExcerptProvider {
for (String token : tokens) {
highlight(escaped, highlight, token);
}
- StringBuilder excerpt = new StringBuilder("<div><span>");
+ StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN);
boolean strong = false;
for (int i = 0; i < escaped.length(); i++) {
if (highlight.get(i) && !strong) {
@@ -213,10 +219,10 @@ public class SimpleExcerptProvider {
if (strong) {
excerpt.append("</strong>");
}
- excerpt.append("</span></div>");
+ excerpt.append(EXCERPT_END);
return excerpt.toString();
}
-
+
private static void highlight(String text, BitSet highlightBits, String
token) {
boolean isLike = false;
if (token.endsWith("*")) {
@@ -247,5 +253,14 @@ public class SimpleExcerptProvider {
}
}
}
-
+
+ public static PropertyValue getExcerpt(PropertyValue value) {
+ Splitter listSplitter =
Splitter.on(',').trimResults().omitEmptyStrings();
+ StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN);
+ for (String v : listSplitter.splitToList(value.toString())) {
+ excerpt.append(v);
+ }
+ excerpt.append(EXCERPT_END);
+ return PropertyValues.newString(excerpt.toString());
+ }
}
Modified: jackrabbit/oak/trunk/oak-lucene/pom.xml
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/pom.xml?rev=1714061&r1=1714060&r2=1714061&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-lucene/pom.xml Thu Nov 12 14:53:54 2015
@@ -42,6 +42,9 @@
org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragment
<!-- OAK-318 -->
org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragmentEndsWithDots
<!-- OAK-318 -->
org.apache.jackrabbit.core.query.ExcerptTest#testPreferPhrase
<!-- OAK-318 -->
+ org.apache.jackrabbit.core.query.ExcerptTest#testQuotedPhrase
<!-- OAK-3580 -->
+ org.apache.jackrabbit.core.query.ExcerptTest#testHighlightJa
<!-- OAK-3580 -->
+
org.apache.jackrabbit.core.query.ExcerptTest#testEncodeIllegalCharsHighlights
<!-- OAK-3580 -->
org.apache.jackrabbit.core.query.QueryResultTest#testSkip
<!-- OAK-484 -->
org.apache.jackrabbit.core.query.DerefTest#testDeref
<!-- OAK-321 -->
org.apache.jackrabbit.core.query.DerefTest#testDerefInPredicate
<!-- OAK-321 -->
@@ -210,6 +213,11 @@
<version>${lucene.version}</version>
<scope>provided</scope>
</dependency>
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-highlighter</artifactId>
+ <version>${lucene.version}</version>
+ </dependency>
<!-- Logging -->
<dependency>
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1714061&r1=1714060&r2=1714061&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
Thu Nov 12 14:53:54 2015
@@ -73,12 +73,14 @@ import org.apache.jackrabbit.oak.spi.que
import
org.apache.jackrabbit.oak.spi.query.QueryIndex.AdvanceFulltextQueryIndex;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
@@ -98,6 +100,12 @@ import org.apache.lucene.search.TermRang
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.util.Version;
@@ -172,6 +180,9 @@ public class LuceneIndex implements Adva
private final NodeAggregator aggregator;
+ private final Highlighter highlighter = new Highlighter(new
SimpleHTMLFormatter("<strong>", "</strong>"),
+ new SimpleHTMLEncoder(), null);
+
public LuceneIndex(IndexTracker tracker, NodeAggregator aggregator) {
this.tracker = tracker;
this.aggregator = aggregator;
@@ -298,7 +309,7 @@ public class LuceneIndex implements Adva
return endOfData();
}
- private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher
searcher) throws IOException {
+ private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher
searcher, String excerpt) throws IOException {
IndexReader reader = searcher.getIndexReader();
PathStoredFieldVisitor visitor = new PathStoredFieldVisitor();
reader.document(doc.doc, visitor);
@@ -323,7 +334,7 @@ public class LuceneIndex implements Adva
seenPaths.add(path);
}
- return new LuceneResultRow(path, doc.score);
+ return new LuceneResultRow(path, doc.score, excerpt);
}
return null;
}
@@ -363,8 +374,14 @@ public class LuceneIndex implements Adva
LOG.debug("... took {} ms", time);
nextBatchSize = (int) Math.min(nextBatchSize * 2L,
100000);
+ boolean addExcerpt = filter.getQueryStatement() !=
null && filter.getQueryStatement().contains(QueryImpl.REP_EXCERPT);
for (ScoreDoc doc : docs.scoreDocs) {
- LuceneResultRow row = convertToRow(doc,
searcher);
+ String excerpt = null;
+ if (addExcerpt) {
+ excerpt = getExcerpt(indexNode, searcher,
query, doc);
+ }
+
+ LuceneResultRow row = convertToRow(doc,
searcher, excerpt);
if (row != null) {
queue.add(row);
}
@@ -476,6 +493,35 @@ public class LuceneIndex implements Adva
return new LucenePathCursor(itr, settings, sizeEstimator);
}
+ private String getExcerpt(IndexNode indexNode, IndexSearcher searcher,
Query query, ScoreDoc doc) throws IOException {
+ StringBuilder excerpt = new StringBuilder();
+ QueryScorer scorer = new QueryScorer(query);
+ scorer.setExpandMultiTermQuery(true);
+ highlighter.setFragmentScorer(scorer);
+
+ for (IndexableField field :
searcher.getIndexReader().document(doc.doc).getFields())
+ if (!FieldNames.SUGGEST.equals(field.name())) {
+ try {
+ Analyzer analyzer =
indexNode.getDefinition().getAnalyzer();
+ TokenStream tokenStream =
analyzer.tokenStream(field.name(), field.stringValue());
+ tokenStream.reset();
+ CachingTokenFilter cachingTokenFilter = new
CachingTokenFilter(tokenStream);
+ TextFragment[] textFragments =
highlighter.getBestTextFragments(cachingTokenFilter, field.stringValue(), true,
2);
+ if (textFragments != null && textFragments.length > 0) {
+ for (TextFragment fragment : textFragments) {
+ if (excerpt.length() > 0) {
+ excerpt.append("...");
+ }
+ excerpt.append(fragment.toString());
+ }
+ }
+ } catch (InvalidTokenOffsetsException e) {
+ LOG.error("higlighting failed", e);
+ }
+ }
+ return excerpt.toString();
+ }
+
protected static IndexPlan.Builder planBuilder(Filter filter){
return new IndexPlan.Builder()
.setCostPerExecution(0) // we're local. Low-cost
@@ -1039,11 +1085,13 @@ public class LuceneIndex implements Adva
final double score;
final Iterable<String> suggestWords;
final boolean isVirtual;
+ final String excerpt;
- LuceneResultRow(String path, double score) {
+ LuceneResultRow(String path, double score, String excerpt) {
this.isVirtual = false;
this.path = path;
this.score = score;
+ this.excerpt = excerpt;
this.suggestWords = Collections.emptySet();
}
@@ -1052,6 +1100,7 @@ public class LuceneIndex implements Adva
this.path = "/";
this.score = 1.0d;
this.suggestWords = suggestWords;
+ this.excerpt = null;
}
@Override
@@ -1130,6 +1179,9 @@ public class LuceneIndex implements Adva
if (QueryImpl.REP_SPELLCHECK.equals(columnName) ||
QueryImpl.REP_SUGGEST.equals(columnName)) {
return
PropertyValues.newString(Iterables.toString(currentRow.suggestWords));
}
+ if (QueryImpl.REP_EXCERPT.equals(columnName)) {
+ return PropertyValues.newString(currentRow.excerpt);
+ }
return pathRow.getValue(columnName);
}
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java?rev=1714061&r1=1714060&r2=1714061&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
Thu Nov 12 14:53:54 2015
@@ -18,6 +18,10 @@
*/
package org.apache.jackrabbit.oak.plugins.index.lucene;
+import javax.annotation.CheckForNull;
+import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
+import javax.jcr.PropertyType;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
@@ -28,17 +32,11 @@ import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
-import javax.annotation.CheckForNull;
-import javax.annotation.Nonnull;
-import javax.annotation.Nullable;
-import javax.jcr.PropertyType;
-
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Queues;
import com.google.common.collect.Sets;
-
import org.apache.jackrabbit.oak.api.PropertyValue;
import org.apache.jackrabbit.oak.api.Result.SizePrecision;
import org.apache.jackrabbit.oak.api.Type;
@@ -70,10 +68,13 @@ import org.apache.jackrabbit.oak.spi.que
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.jackrabbit.oak.util.PerfLogger;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.CachingTokenFilter;
+import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
@@ -98,6 +99,12 @@ import org.apache.lucene.search.TermRang
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.WildcardQuery;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.suggest.Lookup;
import org.apache.lucene.util.Version;
@@ -114,6 +121,7 @@ import static org.apache.jackrabbit.oak.
import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot;
import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.PATH;
+import static
org.apache.jackrabbit.oak.plugins.index.lucene.FieldNames.SUGGEST;
import static
org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.NATIVE_SORT_ORDER;
import static
org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexConstants.VERSION;
import static
org.apache.jackrabbit.oak.plugins.index.lucene.TermFactory.newAncestorTerm;
@@ -121,9 +129,7 @@ import static org.apache.jackrabbit.oak.
import static org.apache.jackrabbit.oak.query.QueryImpl.JCR_PATH;
import static
org.apache.jackrabbit.oak.spi.query.QueryIndex.AdvancedQueryIndex;
import static org.apache.jackrabbit.oak.spi.query.QueryIndex.NativeQueryIndex;
-import static org.apache.lucene.search.BooleanClause.Occur.MUST;
-import static org.apache.lucene.search.BooleanClause.Occur.MUST_NOT;
-import static org.apache.lucene.search.BooleanClause.Occur.SHOULD;
+import static org.apache.lucene.search.BooleanClause.Occur.*;
/**
* Provides a QueryIndex that does lookups against a Lucene-based index
@@ -166,7 +172,7 @@ import static org.apache.lucene.search.B
*/
public class LucenePropertyIndex implements AdvancedQueryIndex, QueryIndex,
NativeQueryIndex,
AdvanceFulltextQueryIndex {
-
+
private static double MIN_COST = 2.1;
private static final Logger LOG = LoggerFactory
@@ -185,6 +191,9 @@ public class LucenePropertyIndex impleme
private final ScorerProviderFactory scorerProviderFactory;
+ private final Highlighter highlighter = new Highlighter(new
SimpleHTMLFormatter("<strong>", "</strong>"),
+ new SimpleHTMLEncoder(), null);
+
public LucenePropertyIndex(IndexTracker tracker) {
this.tracker = tracker;
this.scorerProviderFactory = ScorerProviderFactory.DEFAULT;
@@ -253,7 +262,7 @@ public class LucenePropertyIndex impleme
.append(path)
.append(") ");
sb.append(getLuceneRequest(plan, null));
- if(plan.getSortOrder() != null && !plan.getSortOrder().isEmpty()){
+ if (plan.getSortOrder() != null && !plan.getSortOrder().isEmpty())
{
sb.append(" ordering:").append(plan.getSortOrder());
}
if (ft != null) {
@@ -292,7 +301,7 @@ public class LucenePropertyIndex impleme
return endOfData();
}
- private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher
searcher) throws IOException {
+ private LuceneResultRow convertToRow(ScoreDoc doc, IndexSearcher
searcher, String excerpt) throws IOException {
IndexReader reader = searcher.getIndexReader();
//TODO Look into usage of field cache for retrieving the path
//instead of reading via reader if no of docs in index are
limited
@@ -307,13 +316,13 @@ public class LucenePropertyIndex impleme
String originalPath = path;
path = pr.transformPath(path);
- if (path == null){
+ if (path == null) {
LOG.trace("Ignoring path {} : Transformation
returned null", originalPath);
return null;
}
// avoid duplicate entries
- if (seenPaths.contains(path)){
+ if (seenPaths.contains(path)) {
LOG.trace("Ignoring path {} : Duplicate post
transformation", originalPath);
return null;
}
@@ -321,7 +330,7 @@ public class LucenePropertyIndex impleme
}
LOG.trace("Matched path {}", path);
- return new LuceneResultRow(path, doc.score);
+ return new LuceneResultRow(path, doc.score, excerpt);
}
return null;
}
@@ -338,7 +347,7 @@ public class LucenePropertyIndex impleme
ScoreDoc lastDocToRecord = null;
- IndexNode indexNode = acquireIndexNode(plan);
+ final IndexNode indexNode = acquireIndexNode(plan);
checkState(indexNode != null);
try {
IndexSearcher searcher = indexNode.getSearcher();
@@ -375,8 +384,14 @@ public class LucenePropertyIndex impleme
PERF_LOGGER.end(start, -1, "{} ...",
docs.scoreDocs.length);
nextBatchSize = (int) Math.min(nextBatchSize * 2L,
100000);
+ boolean addExcerpt = filter.getQueryStatement() !=
null && filter.getQueryStatement().contains(QueryImpl.REP_EXCERPT);
for (ScoreDoc doc : docs.scoreDocs) {
- LuceneResultRow row = convertToRow(doc,
searcher);
+ String excerpt = null;
+ if (addExcerpt) {
+ excerpt = getExcerpt(indexNode, searcher,
query, doc);
+ }
+
+ LuceneResultRow row = convertToRow(doc,
searcher, excerpt);
if (row != null) {
queue.add(row);
}
@@ -454,7 +469,7 @@ public class LucenePropertyIndex impleme
private void checkForIndexVersionChange(IndexSearcher searcher) {
long currentVersion = getVersion(searcher);
- if (currentVersion != lastSearchIndexerVersion && lastDoc !=
null){
+ if (currentVersion != lastSearchIndexerVersion && lastDoc !=
null) {
lastDoc = null;
LOG.debug("Change in index version detected {} => {}.
Query would be performed without " +
"offset", currentVersion,
lastSearchIndexerVersion);
@@ -474,7 +489,7 @@ public class LucenePropertyIndex impleme
Query query = (Query)
luceneRequestFacade.getLuceneRequest();
TotalHitCountCollector collector = new
TotalHitCountCollector();
searcher.search(query, collector);
- int totalHits = collector.getTotalHits();
+ int totalHits = collector.getTotalHits();
LOG.debug("Estimated size for query {} is {}", query,
totalHits);
return totalHits;
}
@@ -490,6 +505,35 @@ public class LucenePropertyIndex impleme
return new LucenePathCursor(itr, plan, settings, sizeEstimator);
}
+ private String getExcerpt(IndexNode indexNode, IndexSearcher searcher,
Query query, ScoreDoc doc) throws IOException {
+ StringBuilder excerpt = new StringBuilder();
+ QueryScorer scorer = new QueryScorer(query);
+ scorer.setExpandMultiTermQuery(true);
+ highlighter.setFragmentScorer(scorer);
+
+ for (IndexableField field :
searcher.getIndexReader().document(doc.doc).getFields())
+ if (!SUGGEST.equals(field.name())) {
+ try {
+ Analyzer analyzer =
indexNode.getDefinition().getAnalyzer();
+ TokenStream tokenStream =
analyzer.tokenStream(field.name(), field.stringValue());
+ tokenStream.reset();
+ CachingTokenFilter cachingTokenFilter = new
CachingTokenFilter(tokenStream);
+ TextFragment[] textFragments =
highlighter.getBestTextFragments(cachingTokenFilter, field.stringValue(), true,
2);
+ if (textFragments != null && textFragments.length > 0) {
+ for (TextFragment fragment : textFragments) {
+ if (excerpt.length() > 0) {
+ excerpt.append("...");
+ }
+ excerpt.append(fragment.toString());
+ }
+ }
+ } catch (InvalidTokenOffsetsException e) {
+ LOG.error("higlighting failed", e);
+ }
+ }
+ return excerpt.toString();
+ }
+
@Override
public NodeAggregator getNodeAggregator() {
return null;
@@ -502,7 +546,7 @@ public class LucenePropertyIndex impleme
*
* @return true if the term is related to node
*/
- public static boolean isNodePath(String fulltextTermPath){
+ public static boolean isNodePath(String fulltextTermPath) {
return fulltextTermPath.endsWith("/*");
}
@@ -564,7 +608,7 @@ public class LucenePropertyIndex impleme
}
}
- private static String getIndexName(IndexPlan plan){
+ private static String getIndexName(IndexPlan plan) {
return PathUtils.getName(getPlanResult(plan).indexPath);
}
@@ -650,7 +694,7 @@ public class LucenePropertyIndex impleme
}
if (qs.size() == 0) {
- if (reader == null){
+ if (reader == null) {
//When called in planning mode then some queries like
rep:similar
//cannot create query as reader is not provided. In such case
we
//just return match all queries
@@ -670,7 +714,7 @@ public class LucenePropertyIndex impleme
/**
* Perform additional wraps on the list of queries to allow, for example,
the NOT CONTAINS to
* play properly when sent to lucene.
- *
+ *
* @param qs the list of queries. Cannot be null.
* @return
*/
@@ -704,7 +748,7 @@ public class LucenePropertyIndex impleme
}
if (!unwrapped) {
- bq.add(q, MUST);
+ bq.add(q, MUST);
}
}
return new LuceneRequestFacade<Query>(bq);
@@ -712,7 +756,7 @@ public class LucenePropertyIndex impleme
/**
* unwraps any NOT clauses from the provided boolean query into another
boolean query.
- *
+ *
* @param input the query to be analysed for the existence of NOT clauses.
Cannot be null.
* @param output the query where the unwrapped NOTs will be saved into.
Cannot be null.
* @return true if there where at least one unwrapped NOT. false otherwise.
@@ -727,23 +771,23 @@ public class LucenePropertyIndex impleme
unwrapped = true;
}
}
-
+
return unwrapped;
}
-
+
private CustomScoreQuery getCustomScoreQuery(IndexPlan plan, Query
subQuery) {
PlanResult planResult = getPlanResult(plan);
IndexDefinition idxDef = planResult.indexDefinition;
String providerName = idxDef.getScorerProviderName();
if (scorerProviderFactory != null && providerName != null) {
- return scorerProviderFactory.getScorerProvider(providerName)
- .createCustomScoreQuery(subQuery);
+ return scorerProviderFactory.getScorerProvider(providerName)
+ .createCustomScoreQuery(subQuery);
}
return null;
}
private static void addNonFullTextConstraints(List<Query> qs,
- IndexPlan plan, IndexReader reader) {
+ IndexPlan plan, IndexReader
reader) {
Filter filter = plan.getFilter();
PlanResult planResult = getPlanResult(plan);
IndexDefinition defn = planResult.indexDefinition;
@@ -753,37 +797,37 @@ public class LucenePropertyIndex impleme
String path = getPathRestriction(plan);
switch (filter.getPathRestriction()) {
- case ALL_CHILDREN:
- if (defn.evaluatePathRestrictions()) {
- if ("/".equals(path)) {
- break;
- }
- qs.add(new TermQuery(newAncestorTerm(path)));
- }
- break;
- case DIRECT_CHILDREN:
- if (defn.evaluatePathRestrictions()) {
- BooleanQuery bq = new BooleanQuery();
- bq.add(new BooleanClause(new TermQuery(newAncestorTerm(path)),
BooleanClause.Occur.MUST));
- bq.add(new BooleanClause(newDepthQuery(path),
BooleanClause.Occur.MUST));
- qs.add(bq);
- }
- break;
- case EXACT:
- qs.add(new TermQuery(newPathTerm(path)));
- break;
- case PARENT:
- if (denotesRoot(path)) {
- // there's no parent of the root node
- // we add a path that can not possibly occur because there
- // is no way to say "match no documents" in Lucene
- qs.add(new TermQuery(new Term(FieldNames.PATH, "///")));
- } else {
- qs.add(new TermQuery(newPathTerm(getParentPath(path))));
- }
- break;
- case NO_RESTRICTION:
- break;
+ case ALL_CHILDREN:
+ if (defn.evaluatePathRestrictions()) {
+ if ("/".equals(path)) {
+ break;
+ }
+ qs.add(new TermQuery(newAncestorTerm(path)));
+ }
+ break;
+ case DIRECT_CHILDREN:
+ if (defn.evaluatePathRestrictions()) {
+ BooleanQuery bq = new BooleanQuery();
+ bq.add(new BooleanClause(new
TermQuery(newAncestorTerm(path)), BooleanClause.Occur.MUST));
+ bq.add(new BooleanClause(newDepthQuery(path),
BooleanClause.Occur.MUST));
+ qs.add(bq);
+ }
+ break;
+ case EXACT:
+ qs.add(new TermQuery(newPathTerm(path)));
+ break;
+ case PARENT:
+ if (denotesRoot(path)) {
+ // there's no parent of the root node
+ // we add a path that can not possibly occur because there
+ // is no way to say "match no documents" in Lucene
+ qs.add(new TermQuery(new Term(FieldNames.PATH, "///")));
+ } else {
+ qs.add(new TermQuery(newPathTerm(getParentPath(path))));
+ }
+ break;
+ case NO_RESTRICTION:
+ break;
}
for (PropertyRestriction pr : filter.getPropertyRestrictions()) {
@@ -817,7 +861,7 @@ public class LucenePropertyIndex impleme
continue;
}
}
-
+
PropertyDefinition pd = planResult.getPropDefn(pr);
if (pd == null) {
continue;
@@ -839,14 +883,14 @@ public class LucenePropertyIndex impleme
typeFromRestriction = pr.first.getType().tag();
} else if (pr.last != null && pr.last.getType() != Type.UNDEFINED)
{
typeFromRestriction = pr.last.getType().tag();
- } else if (pr.list != null && !pr.list.isEmpty()){
+ } else if (pr.list != null && !pr.list.isEmpty()) {
typeFromRestriction = pr.list.get(0).getType().tag();
}
}
return getPropertyType(defn, pr.propertyName, typeFromRestriction);
}
- private static int getPropertyType(PropertyDefinition defn, String name,
int defaultVal){
+ private static int getPropertyType(PropertyDefinition defn, String name,
int defaultVal) {
if (defn.isTypeDefined()) {
return defn.getType();
}
@@ -887,13 +931,13 @@ public class LucenePropertyIndex impleme
PropertyDefinition defn) {
int propType = determinePropertyType(defn, pr);
- if (pr.isNullRestriction()){
+ if (pr.isNullRestriction()) {
return new TermQuery(new Term(FieldNames.NULL_PROPS, defn.name));
}
//If notNullCheckEnabled explicitly enabled use the simple TermQuery
//otherwise later fallback to range query
- if (pr.isNotNullRestriction() && defn.notNullCheckEnabled){
+ if (pr.isNotNullRestriction() && defn.notNullCheckEnabled) {
return new TermQuery(new Term(FieldNames.NOT_NULL_PROPS,
defn.name));
}
@@ -1019,12 +1063,12 @@ public class LucenePropertyIndex impleme
}
}
}
- throw new IllegalStateException("PropertyRestriction not handled " +
pr + " for index " + defn );
+ throw new IllegalStateException("PropertyRestriction not handled " +
pr + " for index " + defn);
}
- static long getVersion(IndexSearcher indexSearcher){
+ static long getVersion(IndexSearcher indexSearcher) {
IndexReader reader = indexSearcher.getIndexReader();
- if (reader instanceof DirectoryReader){
+ if (reader instanceof DirectoryReader) {
return ((DirectoryReader) reader).getVersion();
}
return -1;
@@ -1042,11 +1086,11 @@ public class LucenePropertyIndex impleme
return createLikeQuery(FieldNames.NODE_NAME, first);
}
- throw new IllegalStateException("For nodeName queries only EQUALS and
LIKE are supported "+pr);
+ throw new IllegalStateException("For nodeName queries only EQUALS and
LIKE are supported " + pr);
}
private static void addReferenceConstraint(String uuid, List<Query> qs,
- IndexReader reader) {
+ IndexReader reader) {
if (reader == null) {
// getPlan call
qs.add(new TermQuery(new Term("*", uuid)));
@@ -1120,7 +1164,7 @@ public class LucenePropertyIndex impleme
if (x instanceof BooleanQuery) {
BooleanQuery bq = (BooleanQuery) x;
if ((bq.getClauses().length == 1) &&
- (bq.getClauses()[0].getOccur() ==
BooleanClause.Occur.MUST_NOT)) {
+ (bq.getClauses()[0].getOccur() ==
BooleanClause.Occur.MUST_NOT)) {
hasMustNot = true;
q.add(bq.getClauses()[0]);
}
@@ -1141,7 +1185,7 @@ public class LucenePropertyIndex impleme
private boolean visitTerm(String propertyName, String text, String
boost, boolean not) {
String p = getLuceneFieldName(propertyName, pr);
- Query q = tokenToQuery(text, p, pr.indexingRule, analyzer);
+ Query q = tokenToQuery(text, p, pr.indexingRule, analyzer);
if (q == null) {
return false;
}
@@ -1162,12 +1206,12 @@ public class LucenePropertyIndex impleme
}
static String getLuceneFieldName(@Nullable String p, PlanResult pr) {
- if (p == null){
+ if (p == null) {
return FieldNames.FULLTEXT;
}
- if (isNodePath(p)){
- if (pr.isPathTransformed()){
+ if (isNodePath(p)) {
+ if (pr.isPathTransformed()) {
p = PathUtils.getName(p);
} else {
//Get rid of /* as aggregated fulltext field name is the
@@ -1175,13 +1219,13 @@ public class LucenePropertyIndex impleme
p =
FieldNames.createFulltextFieldName(PathUtils.getParentPath(p));
}
} else {
- if (pr.isPathTransformed()){
+ if (pr.isPathTransformed()) {
p = PathUtils.getName(p);
}
p = FieldNames.createAnalyzedFieldName(p);
}
- if ("*".equals(p)){
+ if ("*".equals(p)) {
p = FieldNames.FULLTEXT;
}
return p;
@@ -1226,7 +1270,7 @@ public class LucenePropertyIndex impleme
/**
* Following logic is taken from
org.apache.jackrabbit.core.query.lucene.JackrabbitQueryParser#parse(java.lang.String)
*/
- private static String rewriteQueryText(String textsearch){
+ private static String rewriteQueryText(String textsearch) {
// replace escaped ' with just '
StringBuilder rewritten = new StringBuilder();
// the default lucene query parser recognizes 'AND' and 'NOT' as
@@ -1281,8 +1325,10 @@ public class LucenePropertyIndex impleme
final double score;
final Iterable<String> suggestWords;
final boolean isVirutal;
+ final String excerpt;
- LuceneResultRow(String path, double score) {
+ LuceneResultRow(String path, double score, String excerpt) {
+ this.excerpt = excerpt;
this.isVirutal = false;
this.path = path;
this.score = score;
@@ -1294,6 +1340,7 @@ public class LucenePropertyIndex impleme
this.path = "/";
this.score = 1.0d;
this.suggestWords = suggestWords;
+ this.excerpt = null;
}
@Override
@@ -1379,6 +1426,9 @@ public class LucenePropertyIndex impleme
if (QueryImpl.REP_SPELLCHECK.equals(columnName) ||
QueryImpl.REP_SUGGEST.equals(columnName)) {
return
PropertyValues.newString(Iterables.toString(currentRow.suggestWords));
}
+ if (QueryImpl.REP_EXCERPT.equals(columnName)) {
+ return PropertyValues.newString(currentRow.excerpt);
+ }
return pathRow.getValue(columnName);
}
Modified: jackrabbit/oak/trunk/oak-solr-core/pom.xml
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/pom.xml?rev=1714061&r1=1714060&r2=1714061&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-solr-core/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-solr-core/pom.xml Thu Nov 12 14:53:54 2015
@@ -58,6 +58,8 @@
org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragment
<!-- OAK-318 -->
org.apache.jackrabbit.core.query.ExcerptTest#testPunctuationStartsFragmentEndsWithDots
<!-- OAK-318 -->
org.apache.jackrabbit.core.query.ExcerptTest#testPreferPhrase
<!-- OAK-318 -->
+ org.apache.jackrabbit.core.query.ExcerptTest#testQuotedPhrase
<!-- OAK-3580 -->
+
org.apache.jackrabbit.core.query.ExcerptTest#testEncodeIllegalCharsHighlights
<!-- OAK-3580 -->
</known.issues>
</properties>
Modified:
jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java?rev=1714061&r1=1714060&r2=1714061&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java
(original)
+++
jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/FilterQueryParser.java
Thu Nov 12 14:53:54 2015
@@ -20,6 +20,7 @@ import java.util.Collection;
import java.util.List;
import
org.apache.jackrabbit.oak.plugins.index.solr.configuration.OakSolrConfiguration;
+import org.apache.jackrabbit.oak.query.QueryImpl;
import org.apache.jackrabbit.oak.query.fulltext.FullTextAnd;
import org.apache.jackrabbit.oak.query.fulltext.FullTextContains;
import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression;
@@ -217,6 +218,19 @@ class FilterQueryParser {
solrQuery.addFilterQuery(ptQueryBuilder.toString());
}
+ if (filter.getQueryStatement() != null &&
filter.getQueryStatement().contains(QueryImpl.REP_EXCERPT)) {
+ if (!solrQuery.getHighlight()) {
+ // enable highlighting
+ solrQuery.setHighlight(true);
+ // defaults
+ solrQuery.set("hl.fl", "*");
+ solrQuery.set("hl.encoder", "html");
+ solrQuery.set("hl.mergeContiguous", true);
+ solrQuery.setHighlightSimplePre("<strong>");
+ solrQuery.setHighlightSimplePost("</strong>");
+ }
+ }
+
if (configuration.useForPathRestrictions()) {
Filter.PathRestriction pathRestriction =
filter.getPathRestriction();
if (pathRestriction != null) {
Modified:
jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java?rev=1714061&r1=1714060&r2=1714061&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
(original)
+++
jackrabbit/oak/trunk/oak-solr-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/solr/query/SolrQueryIndex.java
Thu Nov 12 14:53:54 2015
@@ -16,6 +16,7 @@
*/
package org.apache.jackrabbit.oak.plugins.index.solr.query;
+import javax.annotation.CheckForNull;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
@@ -25,7 +26,6 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
-import javax.annotation.CheckForNull;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.Iterables;
@@ -62,9 +62,7 @@ import org.apache.solr.common.util.Simpl
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import static org.apache.jackrabbit.oak.commons.PathUtils.getAncestorPath;
-import static org.apache.jackrabbit.oak.commons.PathUtils.getDepth;
-import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
+import static org.apache.jackrabbit.oak.commons.PathUtils.*;
/**
* A Solr based {@link QueryIndex}
@@ -137,11 +135,11 @@ public class SolrQueryIndex implements F
}
// property restriction OR native language property restriction
defined AND property restriction handled
- if (filter.getPropertyRestrictions() != null
+ if (filter.getPropertyRestrictions() != null
&& filter.getPropertyRestrictions().size() > 0
- && (filter.getPropertyRestriction(NATIVE_SOLR_QUERY) != null
+ && (filter.getPropertyRestriction(NATIVE_SOLR_QUERY) != null
|| filter.getPropertyRestriction(NATIVE_LUCENE_QUERY) != null
- || configuration.useForPropertyRestrictions())
+ || configuration.useForPropertyRestrictions())
&& !hasIgnoredProperties(filter.getPropertyRestrictions(),
configuration)) {
match++;
}
@@ -163,7 +161,6 @@ public class SolrQueryIndex implements F
}
-
return match;
}
@@ -324,7 +321,21 @@ public class SolrQueryIndex implements F
onRetrievedDocs(filter, docs);
+ Map<String, Map<String, List<String>>> highlighting =
queryResponse.getHighlighting();
for (SolrDocument doc : docs) {
+ // handle highlight
+ if (highlighting != null) {
+ Object pathObject =
doc.getFieldValue(configuration.getPathField());
+ if (pathObject != null &&
highlighting.get(String.valueOf(pathObject)) != null) {
+ Map<String, List<String>> value =
highlighting.get(String.valueOf(pathObject));
+ for (Map.Entry<String, List<String>> entry
: value.entrySet()) {
+ // all highlighted values end up in
'rep:excerpt', regardless of field match
+ for (String v : entry.getValue()) {
+
doc.addField(QueryImpl.REP_EXCERPT, v);
+ }
+ }
+ }
+ }
SolrResultRow row = convertToRow(doc);
if (row != null) {
queue.add(row);
@@ -441,7 +452,7 @@ public class SolrQueryIndex implements F
(!configuration.useForPropertyRestrictions() // Solr index not
used for properties
|| (configuration.getUsedProperties().size() > 0 &&
!configuration.getUsedProperties().contains(propertyName)) // not explicitly
contained in the used properties
|| propertyName.contains("/") // no child-level
property restrictions
- || "rep:excerpt".equals(propertyName) // rep:excerpt
is handled by the query engine
+ || "rep:excerpt".equals(propertyName) // rep:excerpt
is not handled at the property level
||
QueryConstants.RESTRICTION_LOCAL_NAME.equals(propertyName)
||
configuration.getIgnoredProperties().contains(propertyName));
}
@@ -571,7 +582,23 @@ public class SolrQueryIndex implements F
return PropertyValues.newDouble(currentRow.score);
}
Collection<Object> fieldValues =
currentRow.doc.getFieldValues(columnName);
- return
PropertyValues.newString(Iterables.toString(fieldValues != null ? fieldValues :
Collections.emptyList()));
+ String value;
+ if (fieldValues != null && fieldValues.size() > 0) {
+ if (fieldValues.size() > 1) {
+ value = Iterables.toString(fieldValues);
+ } else {
+ Object fieldValue =
currentRow.doc.getFieldValue(columnName);
+ if (fieldValue != null) {
+ value = fieldValue.toString();
+ } else {
+ value = null;
+ }
+ }
+ } else {
+ value = Iterables.toString(Collections.emptyList());
+ }
+
+ return PropertyValues.newString(value);
}
};
Modified:
jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml?rev=1714061&r1=1714060&r2=1714061&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
(original)
+++
jackrabbit/oak/trunk/oak-solr-core/src/main/resources/solr/oak/conf/solrconfig.xml
Thu Nov 12 14:53:54 2015
@@ -1218,6 +1218,114 @@
</arr>
</requestHandler>
+ <!-- Highlighting Component
+
+ http://wiki.apache.org/solr/HighlightingParameters
+ -->
+ <searchComponent class="solr.HighlightComponent" name="highlight">
+ <highlighting>
+ <!-- Configure the standard fragmenter -->
+ <!-- This could most likely be commented out in the "default" case
-->
+ <fragmenter name="gap"
+ default="true"
+ class="solr.highlight.GapFragmenter">
+ <lst name="defaults">
+ <int name="hl.fragsize">100</int>
+ </lst>
+ </fragmenter>
+
+ <!-- A regular-expression-based fragmenter
+ (for sentence extraction)
+ -->
+ <fragmenter name="regex"
+ class="solr.highlight.RegexFragmenter">
+ <lst name="defaults">
+ <!-- slightly smaller fragsizes work better because of
slop -->
+ <int name="hl.fragsize">70</int>
+ <!-- allow 50% slop on fragment sizes -->
+ <float name="hl.regex.slop">0.5</float>
+ <!-- a basic sentence pattern -->
+ <str name="hl.regex.pattern">[-\w
+ ,/\n\"']{20,200}
+ </str>
+ </lst>
+ </fragmenter>
+
+ <!-- Configure the standard formatter -->
+ <formatter name="html"
+ default="true"
+ class="solr.highlight.HtmlFormatter">
+ <lst name="defaults">
+ <str name="hl.simple.pre"><![CDATA[<em>]]></str>
+ <str name="hl.simple.post"><![CDATA[</em>]]></str>
+ </lst>
+ </formatter>
+
+ <!-- Configure the standard encoder -->
+ <encoder name="html"
+ class="solr.highlight.HtmlEncoder"/>
+
+ <!-- Configure the standard fragListBuilder -->
+ <fragListBuilder name="simple"
+ class="solr.highlight.SimpleFragListBuilder"/>
+
+ <!-- Configure the single fragListBuilder -->
+ <fragListBuilder name="single"
+ class="solr.highlight.SingleFragListBuilder"/>
+
+ <!-- Configure the weighted fragListBuilder -->
+ <fragListBuilder name="weighted"
+ default="true"
+ class="solr.highlight.WeightedFragListBuilder"/>
+
+ <!-- default tag FragmentsBuilder -->
+ <fragmentsBuilder name="default"
+ default="true"
+
class="solr.highlight.ScoreOrderFragmentsBuilder">
+ <!--
+ <lst name="defaults">
+ <str name="hl.multiValuedSeparatorChar">/</str>
+ </lst>
+ -->
+ </fragmentsBuilder>
+
+ <!-- multi-colored tag FragmentsBuilder -->
+ <fragmentsBuilder name="colored"
+
class="solr.highlight.ScoreOrderFragmentsBuilder">
+ <lst name="defaults">
+ <str name="hl.tag.pre"><![CDATA[
+ <b style="background:yellow">,<b style="background:lawgreen">,
+ <b style="background:aquamarine">,<b
style="background:magenta">,
+ <b style="background:palegreen">,<b style="background:coral">,
+ <b style="background:wheat">,<b style="background:khaki">,
+ <b style="background:lime">,<b
style="background:deepskyblue">]]></str>
+ <str name="hl.tag.post"><![CDATA[</b>]]></str>
+ </lst>
+ </fragmentsBuilder>
+
+ <boundaryScanner name="default"
+ default="true"
+ class="solr.highlight.SimpleBoundaryScanner">
+ <lst name="defaults">
+ <str name="hl.bs.maxScan">10</str>
+ <str name="hl.bs.chars">.,!? 	 </str>
+ </lst>
+ </boundaryScanner>
+
+ <boundaryScanner name="breakIterator"
+
class="solr.highlight.BreakIteratorBoundaryScanner">
+ <lst name="defaults">
+ <!-- type should be one of CHARACTER, WORD(default), LINE
and SENTENCE -->
+ <str name="hl.bs.type">WORD</str>
+ <!-- language and country are used when constructing
Locale object. -->
+ <!-- And the Locale object will be used when getting
instance of BreakIterator -->
+ <str name="hl.bs.language">en</str>
+ <str name="hl.bs.country">US</str>
+ </lst>
+ </boundaryScanner>
+ </highlighting>
+ </searchComponent>
+
<!-- Update Processors
Chains of Update Processor Factories for dealing with Update
Modified:
jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java?rev=1714061&r1=1714060&r2=1714061&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java
(original)
+++
jackrabbit/oak/trunk/oak-solr-core/src/test/java/org/apache/jackrabbit/oak/jcr/query/SpellcheckTest.java
Thu Nov 12 14:53:54 2015
@@ -83,7 +83,7 @@ public class SpellcheckTest extends Abst
Query q = qm.createQuery(xpath, Query.XPATH);
String result = getResult(q.execute(), "rep:spellcheck()");
assertNotNull(result);
- assertEquals("[voting in ontario]", result);
+ assertEquals("voting in ontario", result);
}
static String getResult(QueryResult result, String propertyName) throws
RepositoryException {