fabriziofortino commented on code in PR #589:
URL: https://github.com/apache/jackrabbit-oak/pull/589#discussion_r893311381


##########
oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java:
##########
@@ -3032,6 +3089,50 @@ public void testRepSimilarWithStringFeatureVectors() 
throws Exception {
         }
     }
 
+    @Test
+    public void testRepSimilarWithStringFeatureVectorsWithDisabledStrings() 
throws Exception {
+
+        IndexDefinitionBuilder idxb = new 
LuceneIndexDefinitionBuilder().noAsync().indexSimilarityStrings(false);
+        
idxb.indexRule("nt:base").property("fv").useInSimilarity().nodeScopeIndex().propertyIndex();
+
+        Tree idx = root.getTree("/").getChild("oak:index").addChild("test1");
+        idxb.build(idx);
+        root.commit();
+
+        Tree test = root.getTree("/").addChild("test");
+
+        URI uri = 
getClass().getResource("/org/apache/jackrabbit/oak/query/fvs.csv").toURI();
+        File file = new File(uri);
+
+        Collection<String> children = new LinkedList<>();
+
+        for (String line : IOUtils.readLines(new FileInputStream(file), 
Charset.defaultCharset())) {
+            int i1 = line.indexOf(',');
+            String name = line.substring(0, i1);
+            String value = line.substring(i1 + 1);
+            Tree child = test.addChild(name);
+            child.setProperty("fv", value, Type.STRING);
+            children.add(child.getPath());
+        }
+        root.commit();
+
+        // check that similarity changes across different feature vectors
+        List<String> baseline = new LinkedList<>();

Review Comment:
   remove `baseline`



##########
oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java:
##########
@@ -2988,6 +2988,63 @@ public void testRepSimilarWithBinaryFeatureVectors() 
throws Exception {
         }
     }
 
+    @Test
+    public void testRepSimilarWithBinaryFeatureVectorsWithDisabledBinary() 
throws Exception {
+
+        IndexDefinitionBuilder idxb = new 
LuceneIndexDefinitionBuilder().noAsync().indexSimilarityBinaries(false);
+        
idxb.indexRule("nt:base").property("fv").useInSimilarity().nodeScopeIndex().propertyIndex();
+
+
+        Tree idx = root.getTree("/").getChild("oak:index").addChild("test1");
+        idxb.build(idx);
+        root.commit();
+
+        Tree test = root.getTree("/").addChild("test");
+
+        URI uri = 
getClass().getResource("/org/apache/jackrabbit/oak/query/fvs.csv").toURI();
+        File file = new File(uri);
+
+        Collection<String> children = new LinkedList<>();
+        for (String line : IOUtils.readLines(new FileInputStream(file), 
Charset.defaultCharset())) {
+            String[] split = line.split(",");
+            List<Double> values = new LinkedList<>();
+            int i = 0;
+            for (String s : split) {
+                if (i > 0) {
+                    values.add(Double.parseDouble(s));
+                }
+                i++;
+            }
+
+            byte[] bytes = SimSearchUtils.toByteArray(values);
+            List<Double> actual = SimSearchUtils.toDoubles(bytes);
+            assertEquals(values, actual);
+
+            Blob blob = root.createBlob(new ByteArrayInputStream(bytes));
+            String name = split[0];
+            Tree child = test.addChild(name);
+            child.setProperty("fv", blob, Type.BINARY);
+            children.add(child.getPath());
+        }
+        root.commit();
+
+        // check that similarity changes across different feature vectors
+        List<String> baseline = new LinkedList<>();

Review Comment:
   `baseline` can be removed. It's updated but never queried



##########
oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java:
##########
@@ -2988,6 +2988,63 @@ public void testRepSimilarWithBinaryFeatureVectors() 
throws Exception {
         }
     }
 
+    @Test
+    public void testRepSimilarWithBinaryFeatureVectorsWithDisabledBinary() 
throws Exception {
+
+        IndexDefinitionBuilder idxb = new 
LuceneIndexDefinitionBuilder().noAsync().indexSimilarityBinaries(false);
+        
idxb.indexRule("nt:base").property("fv").useInSimilarity().nodeScopeIndex().propertyIndex();
+
+
+        Tree idx = root.getTree("/").getChild("oak:index").addChild("test1");
+        idxb.build(idx);
+        root.commit();
+
+        Tree test = root.getTree("/").addChild("test");
+
+        URI uri = 
getClass().getResource("/org/apache/jackrabbit/oak/query/fvs.csv").toURI();
+        File file = new File(uri);
+
+        Collection<String> children = new LinkedList<>();
+        for (String line : IOUtils.readLines(new FileInputStream(file), 
Charset.defaultCharset())) {
+            String[] split = line.split(",");
+            List<Double> values = new LinkedList<>();
+            int i = 0;
+            for (String s : split) {
+                if (i > 0) {
+                    values.add(Double.parseDouble(s));
+                }
+                i++;
+            }
+
+            byte[] bytes = SimSearchUtils.toByteArray(values);
+            List<Double> actual = SimSearchUtils.toDoubles(bytes);
+            assertEquals(values, actual);
+
+            Blob blob = root.createBlob(new ByteArrayInputStream(bytes));
+            String name = split[0];
+            Tree child = test.addChild(name);
+            child.setProperty("fv", blob, Type.BINARY);
+            children.add(child.getPath());
+        }
+        root.commit();
+
+        // check that similarity changes across different feature vectors
+        List<String> baseline = new LinkedList<>();
+        for (String similarPath : children) {
+            String query = "select [jcr:path] from [nt:base] where similar(., 
'" + similarPath + "')";
+
+            Iterator<String> result = executeQuery(query, 
"JCR-SQL2").iterator();
+            List<String> current = new LinkedList<>();
+            while (result.hasNext()) {
+                String next = result.next();
+                current.add(next);
+            }
+            assertTrue("binary data for similarity should not be indexed" 
,current.size() == 0);

Review Comment:
   this can be improved
   ```suggestion
               assertEquals("binary data for similarity should not be indexed", 
0, current.size());
   ```



##########
oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java:
##########
@@ -488,6 +491,8 @@ protected IndexDefinition(NodeState root, NodeState defn, 
IndexFormatVersion ver
                     defn.getProperty(TYPE_PROPERTY_NAME) != null &&
                             
DYNAMIC_BOOST_LITE.contains(defn.getProperty(TYPE_PROPERTY_NAME).getValue(Type.STRING))
             );
+            this.indexSimilarityBinaries = getOptionalValue(defn, 
getIndexSimilarityBinariesDefinitionKey(), true);
+            this.indexSimilarityStrings = getOptionalValue(defn, 
getIndexSimilarityStringsDefinitionKey(), true);

Review Comment:
   instead of doing this, we could use the same strategy used for 
`dynamicBoostLite` (see line above) with a single string property like 
`disableIndexSimilarityBinary` (similar for strings). This property can have 
one or more index types. Examples:
   
   ```
   disableIndexSimilarityBinary=lucene
   disableIndexSimilarityBinary=elasticsearch
   disableIndexSimilarityBinary=lucene,elasticsearch
   ```
   
   In this way we could have the same functionality for the different types 
without the need to introduce a specific key.



##########
oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LucenePropertyIndexTest.java:
##########
@@ -3032,6 +3089,50 @@ public void testRepSimilarWithStringFeatureVectors() 
throws Exception {
         }
     }
 
+    @Test
+    public void testRepSimilarWithStringFeatureVectorsWithDisabledStrings() 
throws Exception {
+
+        IndexDefinitionBuilder idxb = new 
LuceneIndexDefinitionBuilder().noAsync().indexSimilarityStrings(false);
+        
idxb.indexRule("nt:base").property("fv").useInSimilarity().nodeScopeIndex().propertyIndex();
+
+        Tree idx = root.getTree("/").getChild("oak:index").addChild("test1");
+        idxb.build(idx);
+        root.commit();
+
+        Tree test = root.getTree("/").addChild("test");
+
+        URI uri = 
getClass().getResource("/org/apache/jackrabbit/oak/query/fvs.csv").toURI();
+        File file = new File(uri);
+
+        Collection<String> children = new LinkedList<>();
+
+        for (String line : IOUtils.readLines(new FileInputStream(file), 
Charset.defaultCharset())) {
+            int i1 = line.indexOf(',');
+            String name = line.substring(0, i1);
+            String value = line.substring(i1 + 1);
+            Tree child = test.addChild(name);
+            child.setProperty("fv", value, Type.STRING);
+            children.add(child.getPath());
+        }
+        root.commit();
+
+        // check that similarity changes across different feature vectors
+        List<String> baseline = new LinkedList<>();
+        for (String similarPath : children) {
+            String query = "select [jcr:path] from [nt:base] where similar(., 
'" + similarPath + "')";
+
+            Iterator<String> result = executeQuery(query, 
"JCR-SQL2").iterator();
+            List<String> current = new LinkedList<>();
+            while (result.hasNext()) {
+                String next = result.next();
+                current.add(next);
+            }
+            assertTrue("String data for similarity should not be 
indexed",current.size() == 0);

Review Comment:
   ```suggestion
               assertEquals("String data for similarity should not be indexed", 
0, current.size());
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to