Author: tommaso
Date: Thu Nov 22 15:46:44 2018
New Revision: 1847178

URL: http://svn.apache.org/viewvc?rev=1847178&view=rev
Log:
OAK-7916 - make fv similarity search query less selective and more sensitive to 
hash and similarity

Modified:
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/IndexDefinitionBuilder.java
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/FeaturePositionTokenFilter.java
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/LSHAnalyzer.java
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java
    
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
    
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/LSHAnalyzerTest.java

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java?rev=1847178&r1=1847177&r2=1847178&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/FieldFactory.java
 Thu Nov 22 15:46:44 2018
@@ -29,11 +29,7 @@ import org.apache.jackrabbit.oak.plugins
 import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
 import 
org.apache.jackrabbit.oak.plugins.index.search.spi.binary.BlobByteSource;
 import org.apache.jackrabbit.util.ISO8601;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.FieldType;
-import org.apache.lucene.document.IntField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.*;
 
 import static org.apache.lucene.document.Field.Store.NO;
 import static org.apache.lucene.document.Field.Store.YES;
@@ -109,19 +105,34 @@ public final class FieldFactory {
     public static Collection<Field> newSimilarityFields(String name, Blob 
value) throws IOException {
         Collection<Field> fields = new ArrayList<>(1);
         byte[] bytes = new BlobByteSource(value).read();
-//        fields.add(newBinarySimilarityField(name, bytes));
         fields.add(newSimilarityField(name, bytes));
         return fields;
     }
 
+    public static Collection<Field> newBinSimilarityFields(String name, Blob 
value) throws IOException {
+        Collection<Field> fields = new ArrayList<>(1);
+        byte[] bytes = new BlobByteSource(value).read();
+        fields.add(newBinarySimilarityField(name, bytes));
+        return fields;
+    }
+
     public static Collection<Field> newSimilarityFields(String name, String 
value) {
         Collection<Field> fields = new ArrayList<>(1);
-//        byte[] bytes = SimSearchUtils.toByteArray(value);
-//        fields.add(newBinarySimilarityField(name, bytes));
         fields.add(newSimilarityField(name, value));
         return fields;
     }
 
+    public static Collection<Field> newBinSimilarityFields(String name, String 
value) {
+        Collection<Field> fields = new ArrayList<>(1);
+        byte[] bytes = SimSearchUtils.toByteArray(value);
+        fields.add(newBinarySimilarityField(name, bytes));
+        return fields;
+    }
+
+    private static StoredField newBinarySimilarityField(String name, byte[] 
bytes) {
+        return new StoredField(FieldNames.createBinSimilarityFieldName(name), 
bytes);
+    }
+
     private static Field newSimilarityField(String name, byte[] bytes) {
         return newSimilarityField(name, SimSearchUtils.toDoubleString(bytes));
     }

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java?rev=1847178&r1=1847177&r2=1847178&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneDocumentMaker.java
 Thu Nov 22 15:46:44 2018
@@ -320,6 +320,11 @@ public class LuceneDocumentMaker extends
         for (Field f : FieldFactory.newSimilarityFields(pd.name, value)) {
             doc.add(f);
         }
+        if (pd.similarityRerank) {
+            for (Field f : FieldFactory.newBinSimilarityFields(pd.name, 
value)) {
+                doc.add(f);
+            }
+        }
     }
 
     @Override
@@ -327,5 +332,10 @@ public class LuceneDocumentMaker extends
         for (Field f : FieldFactory.newSimilarityFields(pd.name, blob)) {
             doc.add(f);
         }
+        if (pd.similarityRerank) {
+            for (Field f : FieldFactory.newBinSimilarityFields(pd.name, blob)) 
{
+                doc.add(f);
+            }
+        }
     }
 }
\ No newline at end of file

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java?rev=1847178&r1=1847177&r2=1847178&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndex.java
 Thu Nov 22 15:46:44 2018
@@ -120,6 +120,7 @@ import org.apache.lucene.search.highligh
 import org.apache.lucene.search.postingshighlight.PostingsHighlighter;
 import org.apache.lucene.search.spell.SuggestWord;
 import org.apache.lucene.search.suggest.Lookup;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Version;
 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.Nullable;
@@ -128,6 +129,7 @@ import org.slf4j.LoggerFactory;
 
 import static com.google.common.base.Preconditions.checkNotNull;
 import static com.google.common.base.Preconditions.checkState;
+import static com.google.common.base.Predicates.in;
 import static com.google.common.base.Predicates.notNull;
 import static com.google.common.collect.Lists.newArrayListWithCapacity;
 import static org.apache.jackrabbit.JcrConstants.JCR_MIXINTYPES;
@@ -379,6 +381,37 @@ public class LucenePropertyIndex extends
                                 mergedFieldInfos = 
MultiFields.getMergedFieldInfos(searcher.getIndexReader());
                             }
 
+                            boolean earlyStop = false;
+                            if (docs.scoreDocs.length > 1) {
+                                // reranking step for fv sim search
+                                PropertyRestriction pr = null;
+                                LuceneIndexDefinition defn = 
indexNode.getDefinition();
+                                if (defn.hasFunctionDefined()) {
+                                    pr = 
filter.getPropertyRestriction(defn.getFunctionName());
+                                }
+                                if (pr != null) {
+                                    String queryString = 
String.valueOf(pr.first.getValue(pr.first.getType()));
+                                    if (queryString.startsWith("mlt?")) {
+                                        List<PropertyDefinition> sp = new 
LinkedList<>();
+                                        for (IndexingRule r : 
defn.getDefinedRules()) {
+                                            List<PropertyDefinition> 
similarityProperties = r.getSimilarityProperties();
+                                            for (PropertyDefinition pd : 
similarityProperties) {
+                                                if (pd.similarityRerank) {
+                                                    sp.add(pd);
+                                                }
+                                            }
+                                        }
+                                        if (!sp.isEmpty()) {
+                                            long fvs = PERF_LOGGER.start();
+                                            
SimSearchUtils.bruteForceFVRerank(sp, docs, indexSearcher);
+                                            PERF_LOGGER.end(fvs, -1, "fv 
reranking done");
+                                            LOG.info("reranking done");
+                                            earlyStop = true;
+                                        }
+                                    }
+                                }
+                            }
+
                             for (ScoreDoc doc : docs.scoreDocs) {
                                 Map<String, String> excerpts = null;
                                 if (addExcerpt) {
@@ -397,6 +430,10 @@ public class LucenePropertyIndex extends
                                 lastDocToRecord = doc;
                             }
 
+                            if (earlyStop) {
+                                noDocs = true;
+                                break;
+                            }
                             if (queue.isEmpty() && docs.scoreDocs.length > 0) {
                                 //queue is still empty but more results can be 
fetched
                                 //from Lucene so still continue

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/IndexDefinitionBuilder.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/IndexDefinitionBuilder.java?rev=1847178&r1=1847177&r2=1847178&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/IndexDefinitionBuilder.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/IndexDefinitionBuilder.java
 Thu Nov 22 15:46:44 2018
@@ -327,6 +327,12 @@ public final class IndexDefinitionBuilde
             return this;
         }
 
+        public PropertyRule useInSimilarity(boolean rerank) {
+            propTree.setProperty(LuceneIndexConstants.PROP_USE_IN_SIMILARITY, 
true);
+            
propTree.setProperty(FulltextIndexConstants.PROP_SIMILARITY_RERANK, rerank);
+            return this;
+        }
+
         public PropertyRule type(String type){
             //This would throw an IAE if type is invalid
             PropertyType.valueFromName(type);

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/FeaturePositionTokenFilter.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/FeaturePositionTokenFilter.java?rev=1847178&r1=1847177&r2=1847178&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/FeaturePositionTokenFilter.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/FeaturePositionTokenFilter.java
 Thu Nov 22 15:46:44 2018
@@ -55,4 +55,4 @@ final class FeaturePositionTokenFilter e
     tokenCount = 0;
   }
 
-  }
\ No newline at end of file
+}
\ No newline at end of file

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/LSHAnalyzer.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/LSHAnalyzer.java?rev=1847178&r1=1847177&r2=1847178&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/LSHAnalyzer.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/LSHAnalyzer.java
 Thu Nov 22 15:46:44 2018
@@ -30,7 +30,7 @@ import org.apache.lucene.util.Version;
  */
 public class LSHAnalyzer extends Analyzer {
 
-  private static final int DEFAULT_SHINGLE_SIZE = 4;
+  private static final int DEFAULT_SHINGLE_SIZE = 5;
 
   private final int min;
   private final int max;

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java?rev=1847178&r1=1847177&r2=1847178&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/SimSearchUtils.java
 Thu Nov 22 15:46:44 2018
@@ -39,6 +39,7 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.ScoreDoc;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.util.BytesRef;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -80,32 +81,25 @@ public class SimSearchUtils {
         return doubles;
     }
 
-    private static Collection<String> getTokens(Analyzer analyzer, String 
field, String sampleTextString) throws IOException {
-        Collection<String> tokens = new LinkedList<>();
+    private static Collection<BytesRef> getTokens(Analyzer analyzer, String 
field, String sampleTextString) throws IOException {
+        Collection<BytesRef> tokens = new LinkedList<>();
         TokenStream ts = analyzer.tokenStream(field, sampleTextString);
-        ts.reset();
         ts.addAttribute(CharTermAttribute.class);
+        ts.reset();
         while (ts.incrementToken()) {
             CharTermAttribute charTermAttribute = 
ts.getAttribute(CharTermAttribute.class);
             String token = new String(charTermAttribute.buffer(), 0, 
charTermAttribute.length());
-            tokens.add(token);
+            tokens.add(new BytesRef(token));
         }
         ts.end();
         ts.close();
         return tokens;
     }
 
-    static BooleanQuery getSimQuery(Analyzer analyzer, String fieldName, 
String text) throws IOException {
-        Collection<String> tokens = getTokens(analyzer, fieldName, text);
-        BooleanQuery booleanQuery = new BooleanQuery(true);
-        booleanQuery.setMinimumNumberShouldMatch(3);
-        for (String token : tokens) {
-            booleanQuery.add(new ConstantScoreQuery(new TermQuery(new 
Term(fieldName, token))), BooleanClause.Occur.SHOULD);
-        }
-        return booleanQuery;
+    static Query getSimQuery(Analyzer analyzer, String fieldName, String text) 
throws IOException {
+        return createLSHQuery(fieldName, getTokens(analyzer, fieldName, text), 
1f,1f);
     }
 
-
     public static byte[] toByteArray(List<Double> values) {
         int blockSize = Double.SIZE / Byte.SIZE;
         byte[] bytes = new byte[values.size() * blockSize];
@@ -156,7 +150,7 @@ public class SimSearchUtils {
                         String fvString = doc.get(similarityFieldName);
                         if (fvString != null && fvString.trim().length() > 0) {
                             log.trace("generating sim query on field {} and 
text {}", similarityFieldName, fvString);
-                            BooleanQuery simQuery = 
SimSearchUtils.getSimQuery(analyzer, similarityFieldName, fvString);
+                            Query simQuery = 
SimSearchUtils.getSimQuery(analyzer, similarityFieldName, fvString);
                             booleanQuery.add(new BooleanClause(simQuery, 
SHOULD));
                             log.trace("similarity query generated for {}", 
pd.name);
                         }
@@ -174,4 +168,63 @@ public class SimSearchUtils {
         }
     }
 
+    private static Query createLSHQuery(String field, Collection<BytesRef> 
minhashes,
+                                        float similarity, float 
expectedTruePositive) {
+        int bandSize = 1;
+        if (expectedTruePositive < 1) {
+            bandSize = computeBandSize(minhashes.size(), similarity, 
expectedTruePositive);
+        }
+
+        BooleanQuery builder = new BooleanQuery();
+        BooleanQuery childBuilder = new BooleanQuery();
+        int rowInBand = 0;
+        for (BytesRef minHash : minhashes) {
+            TermQuery tq = new TermQuery(new Term(field, minHash));
+            if (bandSize == 1) {
+                builder.add(new ConstantScoreQuery(tq), 
BooleanClause.Occur.SHOULD);
+            } else {
+                childBuilder.add(new ConstantScoreQuery(tq), 
BooleanClause.Occur.MUST);
+                rowInBand++;
+                if (rowInBand == bandSize) {
+                    builder.add(new ConstantScoreQuery(childBuilder),
+                            BooleanClause.Occur.SHOULD);
+                    childBuilder = new BooleanQuery();
+                    rowInBand = 0;
+                }
+            }
+        }
+        // Avoid a dubious narrow band, wrap around and pad with the start
+        if (childBuilder.clauses().size() > 0) {
+            for (BytesRef token : minhashes) {
+                TermQuery tq = new TermQuery(new Term(field, 
token.toString()));
+                childBuilder.add(new ConstantScoreQuery(tq), 
BooleanClause.Occur.MUST);
+                rowInBand++;
+                if (rowInBand == bandSize) {
+                    builder.add(new ConstantScoreQuery(childBuilder),
+                            BooleanClause.Occur.SHOULD);
+                    break;
+                }
+            }
+        }
+
+        if (expectedTruePositive >= 1.0 && similarity < 1) {
+            builder.setMinimumNumberShouldMatch((int) 
(Math.ceil(minhashes.size() * similarity)));
+        }
+        log.trace("similarity query with bands : {}, minShouldMatch : {}, no. 
of clauses : {}", bandSize,
+                builder.getMinimumNumberShouldMatch(), 
builder.clauses().size());
+        return builder;
+
+    }
+
+    private static int computeBandSize(int numHash, double similarity, double 
expectedTruePositive) {
+        for (int bands = 1; bands <= numHash; bands++) {
+            int rowsInBand = numHash / bands;
+            double truePositive = 1 - Math.pow(1 - Math.pow(similarity, 
rowsInBand), bands);
+            if (truePositive > expectedTruePositive) {
+                return rowsInBand;
+            }
+        }
+        return 1;
+    }
+
 }

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java?rev=1847178&r1=1847177&r2=1847178&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java
 Thu Nov 22 15:46:44 2018
@@ -2988,6 +2988,7 @@ public class LucenePropertyIndexTest ext
             String name = split[0];
             Tree child = test.addChild(name);
             child.setProperty("fv", blob, Type.BINARY);
+            children.add(child.getPath());
         }
         root.commit();
 
@@ -3006,7 +3007,6 @@ public class LucenePropertyIndexTest ext
             baseline.clear();
             baseline.addAll(current);
         }
-
     }
 
     @Test
@@ -3017,6 +3017,106 @@ public class LucenePropertyIndexTest ext
 
         Tree idx = root.getTree("/").getChild("oak:index").addChild("test1");
         idxb.build(idx);
+        root.commit();
+
+        Tree test = root.getTree("/").addChild("test");
+
+        URI uri = 
getClass().getResource("/org/apache/jackrabbit/oak/query/fvs.csv").toURI();
+        File file = new File(uri);
+
+        Collection<String> children = new LinkedList<>();
+
+        for (String line : IOUtils.readLines(new FileInputStream(file), 
Charset.defaultCharset())) {
+            int i1 = line.indexOf(',');
+            String name = line.substring(0, i1);
+            String value = line.substring(i1 + 1);
+            Tree child = test.addChild(name);
+            child.setProperty("fv", value, Type.STRING);
+            children.add(child.getPath());
+        }
+        root.commit();
+
+        // check that similarity changes across different feature vectors
+        List<String> baseline = new LinkedList<>();
+        for (String similarPath : children) {
+            String query = "select [jcr:path] from [nt:base] where similar(., 
'" + similarPath + "')";
+
+            Iterator<String> result = executeQuery(query, 
"JCR-SQL2").iterator();
+            List<String> current = new LinkedList<>();
+            while (result.hasNext()) {
+                String next = result.next();
+                current.add(next);
+            }
+            assertNotEquals(baseline, current);
+            baseline.clear();
+            baseline.addAll(current);
+        }
+    }
+
+    @Test
+    public void testRepSimilarWithBinaryFeatureVectorsAndRerank() throws 
Exception {
+
+        IndexDefinitionBuilder idxb = new IndexDefinitionBuilder().noAsync();
+        
idxb.indexRule("nt:base").property("fv").useInSimilarity(true).nodeScopeIndex().propertyIndex();
+
+        Tree idx = root.getTree("/").getChild("oak:index").addChild("test1");
+        idxb.build(idx);
+        root.commit();
+
+        Tree test = root.getTree("/").addChild("test");
+
+        URI uri = 
getClass().getResource("/org/apache/jackrabbit/oak/query/fvs.csv").toURI();
+        File file = new File(uri);
+
+        Collection<String> children = new LinkedList<>();
+        for (String line : IOUtils.readLines(new FileInputStream(file), 
Charset.defaultCharset())) {
+            String[] split = line.split(",");
+            List<Double> values = new LinkedList<>();
+            int i = 0;
+            for (String s : split) {
+                if (i > 0) {
+                    values.add(Double.parseDouble(s));
+                }
+                i++;
+            }
+
+            byte[] bytes = SimSearchUtils.toByteArray(values);
+            List<Double> actual = SimSearchUtils.toDoubles(bytes);
+            assertEquals(values, actual);
+
+            Blob blob = root.createBlob(new ByteArrayInputStream(bytes));
+            String name = split[0];
+            Tree child = test.addChild(name);
+            child.setProperty("fv", blob, Type.BINARY);
+            children.add(child.getPath());
+        }
+        root.commit();
+
+        // check that similarity changes across different feature vectors
+        List<String> baseline = new LinkedList<>();
+        for (String similarPath : children) {
+            String query = "select [jcr:path] from [nt:base] where similar(., 
'" + similarPath + "')";
+
+            Iterator<String> result = executeQuery(query, 
"JCR-SQL2").iterator();
+            List<String> current = new LinkedList<>();
+            while (result.hasNext()) {
+                String next = result.next();
+                current.add(next);
+            }
+            assertNotEquals(baseline, current);
+            baseline.clear();
+            baseline.addAll(current);
+        }
+    }
+
+    @Test
+    public void testRepSimilarWithStringFeatureVectorsAndRerank() throws 
Exception {
+
+        IndexDefinitionBuilder idxb = new IndexDefinitionBuilder().noAsync();
+        
idxb.indexRule("nt:base").property("fv").useInSimilarity(true).nodeScopeIndex().propertyIndex();
+
+        Tree idx = root.getTree("/").getChild("oak:index").addChild("test1");
+        idxb.build(idx);
         root.commit();
 
 

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/LSHAnalyzerTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/LSHAnalyzerTest.java?rev=1847178&r1=1847177&r2=1847178&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/LSHAnalyzerTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/fv/LSHAnalyzerTest.java
 Thu Nov 22 15:46:44 2018
@@ -80,7 +80,6 @@ public class LSHAnalyzerTest {
       IndexWriter writer = new IndexWriter(directory, new 
IndexWriterConfig(Version.LUCENE_47, analyzer));
       DirectoryReader reader = null;
       try {
-
           List<Double> values = new LinkedList<>();
           values.add(0.1d);
           values.add(0.3d);


Reply via email to