Author: tommaso
Date: Tue Mar 12 13:09:46 2019
New Revision: 1855317
URL: http://svn.apache.org/viewvc?rev=1855317&view=rev
Log:
OAK-8118 - index selected properties to enhance fv simsearch results (branch
1.10)
Modified:
jackrabbit/oak/branches/1.10/ (props changed)
jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
Propchange: jackrabbit/oak/branches/1.10/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Tue Mar 12 13:09:46 2019
@@ -1,3 +1,3 @@
/jackrabbit/oak/branches/1.0:1665962
-/jackrabbit/oak/trunk:1850874,1850882,1851236,1851253,1851451,1851533-1851535,1851619,1852052,1852084,1852120,1852451,1852492-1852493,1852528,1852582,1852584,1852601,1852920,1853141,1853229,1853393,1853429,1853433,1853441,1853866,1853868,1853870,1853893,1853969,1853997,1854034,1854044,1854058,1854113,1854373,1854377,1854380,1854385,1854401,1854403,1854455,1854461-1854462,1854466,1854468,1854515,1854533,1854701,1854827,1854848,1854859,1854930,1855221
+/jackrabbit/oak/trunk:1850874,1850882,1851236,1851253,1851451,1851533-1851535,1851619,1852052,1852084,1852120,1852451,1852492-1852493,1852528,1852582,1852584,1852601,1852920,1853141,1853229,1853393,1853429,1853433,1853441,1853866,1853868,1853870,1853893,1853969,1853997,1854034,1854044,1854058,1854113,1854373,1854377,1854380,1854385,1854401,1854403,1854455,1854461-1854462,1854466,1854468,1854515,1854533,1854701,1854827,1854848,1854859,1854930,1854990,1855221
/jackrabbit/trunk:1345480
Modified:
jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
(original)
+++
jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
Tue Mar 12 13:09:46 2019
@@ -35,14 +35,7 @@ import org.apache.jackrabbit.oak.plugins
import
org.apache.jackrabbit.oak.plugins.index.search.spi.binary.FulltextBinaryTextExtractor;
import
org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextDocumentMaker;
import org.apache.jackrabbit.oak.spi.state.NodeState;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.DoubleDocValuesField;
-import org.apache.lucene.document.DoubleField;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.LongField;
-import org.apache.lucene.document.NumericDocValuesField;
-import org.apache.lucene.document.SortedDocValuesField;
-import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.*;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
import org.apache.lucene.index.IndexableField;
@@ -316,6 +309,12 @@ public class LuceneDocumentMaker extends
}
@Override
+ protected boolean indexSimilarityTag(Document doc, PropertyState property)
{
+ doc.add(new TextField(FieldNames.SIMILARITY_TAGS,
property.getValue(Type.STRING), Field.Store.YES));
+ return true;
+ }
+
+ @Override
protected void indexSimilarityStrings(Document doc, PropertyDefinition pd,
String value) throws IOException {
for (Field f : FieldFactory.newSimilarityFields(pd.name, value)) {
doc.add(f);
Modified:
jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java
(original)
+++
jackrabbit/oak/branches/1.10/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java
Tue Mar 12 13:09:46 2019
@@ -84,7 +84,7 @@ public class SimSearchUtils {
}
return doubles;
}
-
+
private static Collection<BytesRef> getTokens(Analyzer analyzer, String
field, String sampleTextString) throws IOException {
Collection<BytesRef> tokens = new LinkedList<>();
TokenStream ts = analyzer.tokenStream(field, sampleTextString);
@@ -156,9 +156,18 @@ public class SimSearchUtils {
log.trace("generating sim query on field {} and
text {}", similarityFieldName, fvString);
Query simQuery =
SimSearchUtils.getSimQuery(analyzer, similarityFieldName, fvString);
booleanQuery.add(new BooleanClause(simQuery,
SHOULD));
+ String[] binaryTags =
doc.getValues(FieldNames.SIMILARITY_TAGS);
+ if (binaryTags != null && binaryTags.length > 0) {
+ BooleanQuery tagQuery = new BooleanQuery();
+ for (String brt : binaryTags) {
+ tagQuery.add(new BooleanClause(new
TermQuery(new Term(FieldNames.SIMILARITY_TAGS, brt)), SHOULD));
+ }
+ tagQuery.setBoost(0.5f);
+ booleanQuery.add(tagQuery, SHOULD);
+ }
log.trace("similarity query generated for {}",
pd.name);
} else {
- log.warn("could not create query for similarity
field {}", fvString);
+ log.warn("could not create query for similarity
field {}", similarityFieldName);
}
}
}
Modified:
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
(original)
+++
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FieldNames.java
Tue Mar 12 13:09:46 2019
@@ -65,6 +65,11 @@ public final class FieldNames {
private static final String SIMILARITY_BINARY_PREFIX = "simbin:";
/**
+ * Prefix for all field names that contains the similarity search binary
values.
+ */
+ public static final String SIMILARITY_TAGS = "simtags";
+
+ /**
* Name of the field that contains the suggest index.
*/
public static final String SUGGEST = ":suggest";
Modified:
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
(original)
+++
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/FulltextIndexConstants.java
Tue Mar 12 13:09:46 2019
@@ -260,6 +260,11 @@ public interface FulltextIndexConstants
String PROP_SIMILARITY_RERANK = "similarityRerank";
/**
+ * whether property values should be indexed as tags to boost similarity
search results
+ */
+ String PROP_SIMILARITY_TAGS = "similarityTags";
+
+ /**
* Property definition config indicating that null check support should be
* enabled for this property
*/
Modified:
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
(original)
+++
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/PropertyDefinition.java
Tue Mar 12 13:09:46 2019
@@ -128,6 +128,7 @@ public class PropertyDefinition {
public final boolean useInSimilarity;
public final boolean similarityRerank;
+ public final boolean similarityTags;
public PropertyDefinition(IndexingRule idxDefn, String nodeName, NodeState
defn) {
this.isRegexp = getOptionalValue(defn, PROP_IS_REGEX, false);
@@ -159,6 +160,7 @@ public class PropertyDefinition {
this.useInSpellcheck = getOptionalValueIfIndexed(defn,
FulltextIndexConstants.PROP_USE_IN_SPELLCHECK, false);
this.useInSimilarity = getOptionalValueIfIndexed(defn,
FulltextIndexConstants.PROP_USE_IN_SIMILARITY, false);
this.similarityRerank = getOptionalValueIfIndexed(defn,
FulltextIndexConstants.PROP_SIMILARITY_RERANK, true);
+ this.similarityTags = getOptionalValueIfIndexed(defn,
FulltextIndexConstants.PROP_SIMILARITY_TAGS, false);
this.nullCheckEnabled = getOptionalValueIfIndexed(defn,
FulltextIndexConstants.PROP_NULL_CHECK_ENABLED, false);
this.notNullCheckEnabled = getOptionalValueIfIndexed(defn,
FulltextIndexConstants.PROP_NOT_NULL_CHECK_ENABLED, false);
this.excludeFromAggregate = getOptionalValueIfIndexed(defn,
FulltextIndexConstants.PROP_EXCLUDE_FROM_AGGREGATE, false);
Modified:
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java?rev=1855317&r1=1855316&r2=1855317&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
(original)
+++
jackrabbit/oak/branches/1.10/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
Tue Mar 12 13:09:46 2019
@@ -265,12 +265,17 @@ public abstract class FulltextDocumentMa
if (pd.facet && isFacetingEnabled()) {
dirty |= indexFacets(doc, property, pname, pd);
}
+ if (pd.similarityTags) {
+ dirty |= indexSimilarityTag(doc, property);
+ }
}
return dirty;
}
+ protected abstract boolean indexSimilarityTag(D doc, PropertyState
property);
+
protected abstract void indexSimilarityBinaries(D doc, PropertyDefinition
pd, Blob blob) throws IOException;
protected abstract void indexSimilarityStrings(D doc, PropertyDefinition
pd, String value) throws IOException;