nfsantos commented on code in PR #860:
URL: https://github.com/apache/jackrabbit-oak/pull/860#discussion_r1126414469


##########
oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java:
##########
@@ -151,52 +171,253 @@ public void pathTransformationsWithPathRestrictions() 
throws Exception {
 
         assertEventually(() -> {
             // ALL CHILDREN
-            assertQuery("/jcr:root/test//*[j:c/analyzed_field = 'bar']", 
XPATH, Arrays.asList("/test/a", "/test/c/d"));
-            assertQuery("/jcr:root/test//*[*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a", "/test/c/d", "/test/e", "/test/f/d", "/test/g/e"));
-            assertQuery("/jcr:root/test//*[d/*/analyzed_field = 'bar']", 
XPATH, Arrays.asList("/test/c", "/test/f"));
-            assertQuery("/jcr:root/test//*[analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a/j:c","/test/b","/test/c/d/j:c",
+            assertQuery("/jcr:root/test//*[j:c/analyzed_field = 'bar']", 
XPATH, asList("/test/a", "/test/c/d"));
+            assertQuery("/jcr:root/test//*[*/analyzed_field = 'bar']", XPATH, 
asList("/test/a", "/test/c/d", "/test/e", "/test/f/d", "/test/g/e"));
+            assertQuery("/jcr:root/test//*[d/*/analyzed_field = 'bar']", 
XPATH, asList("/test/c", "/test/f"));
+            assertQuery("/jcr:root/test//*[analyzed_field = 'bar']", XPATH, 
asList("/test/a/j:c","/test/b","/test/c/d/j:c",
                     "/test/e/temp:c", "/test/f/d/temp:c","/test/g/e/temp:c"));
 
             // DIRECT CHILDREN
-            assertQuery("/jcr:root/test/*[j:c/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a"));
-            assertQuery("/jcr:root/test/*[*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a", "/test/e"));
-            assertQuery("/jcr:root/test/*[d/*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/c", "/test/f"));
-            assertQuery("/jcr:root/test/*[analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/b"));
+            assertQuery("/jcr:root/test/*[j:c/analyzed_field = 'bar']", XPATH, 
singletonList("/test/a"));
+            assertQuery("/jcr:root/test/*[*/analyzed_field = 'bar']", XPATH, 
asList("/test/a", "/test/e"));
+            assertQuery("/jcr:root/test/*[d/*/analyzed_field = 'bar']", XPATH, 
asList("/test/c", "/test/f"));
+            assertQuery("/jcr:root/test/*[analyzed_field = 'bar']", XPATH, 
singletonList("/test/b"));
 
             // EXACT
-            assertQuery("/jcr:root/test/a[j:c/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a"));
-            assertQuery("/jcr:root/test/a[*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a"));
-            assertQuery("/jcr:root/test/c[d/*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/c"));
-            assertQuery("/jcr:root/test/a/j:c[analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a/j:c"));
+            assertQuery("/jcr:root/test/a[j:c/analyzed_field = 'bar']", XPATH, 
singletonList("/test/a"));
+            assertQuery("/jcr:root/test/a[*/analyzed_field = 'bar']", XPATH, 
singletonList("/test/a"));
+            assertQuery("/jcr:root/test/c[d/*/analyzed_field = 'bar']", XPATH, 
singletonList("/test/c"));
+            assertQuery("/jcr:root/test/a/j:c[analyzed_field = 'bar']", XPATH, 
singletonList("/test/a/j:c"));
 
             // PARENT
-
             assertQuery("select a.[jcr:path] as [jcr:path] from [nt:base] as a 
\n" +
                     "  inner join [nt:base] as b on ischildnode(b, a)\n" +
                     "  where isdescendantnode(a, '/tmp') \n" +
                     "  and b.[analyzed_field] = 'bar'\n" +
-                    "  and a.[abc] is not null ", SQL2, 
Arrays.asList("/tmp/a", "/tmp/c/d"));
+                    "  and a.[abc] is not null ", SQL2, asList("/tmp/a", 
"/tmp/c/d"));
         });
     }
 
+    @Test
+    public void fulltextSearchWithBuiltInAnalyzerClass() throws Exception {
+        setup(singletonList("foo"), idx -> {
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            anl.setProperty(FulltextIndexConstants.ANL_CLASS, 
"org.apache.lucene.analysis.en.EnglishAnalyzer");
+        });
+
+        Tree test = root.getTree("/").addChild("test");
+        test.setProperty("foo", "fox jumping");
+        root.commit();
+
+        // standard english analyzer stems verbs (jumping -> jump)
+        assertEventually(() -> assertQuery("select * from [nt:base] where 
CONTAINS(*, 'jump')", singletonList("/test")));

Review Comment:
   As before, suggest adding another property which will not be matched. And 
add a query that will not return results, for instance, searching for 'jum' or 
'jumpingjack'.



##########
oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java:
##########
@@ -40,72 +53,76 @@ protected void assertEventually(Runnable r) {
 
     @Test
     public void defaultAnalyzer() throws Exception {
-        Tree test = setup();
+        setup();
 
+        Tree test = root.getTree("/").addChild("test");
         test.addChild("a").setProperty("analyzed_field", "sun.jpg");
         root.commit();

Review Comment:
   Suggesting: add an additional property to the tree which does not match the 
query condition, to test that the code is only returning the expected property. 
Without it, this test would pass if the query engine returns all properties, 
regardless of matching the query condition. 
   
   Same suggestion for the reminder of the tests in this file.



##########
oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java:
##########
@@ -151,52 +171,253 @@ public void pathTransformationsWithPathRestrictions() 
throws Exception {
 
         assertEventually(() -> {
             // ALL CHILDREN
-            assertQuery("/jcr:root/test//*[j:c/analyzed_field = 'bar']", 
XPATH, Arrays.asList("/test/a", "/test/c/d"));
-            assertQuery("/jcr:root/test//*[*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a", "/test/c/d", "/test/e", "/test/f/d", "/test/g/e"));
-            assertQuery("/jcr:root/test//*[d/*/analyzed_field = 'bar']", 
XPATH, Arrays.asList("/test/c", "/test/f"));
-            assertQuery("/jcr:root/test//*[analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a/j:c","/test/b","/test/c/d/j:c",
+            assertQuery("/jcr:root/test//*[j:c/analyzed_field = 'bar']", 
XPATH, asList("/test/a", "/test/c/d"));
+            assertQuery("/jcr:root/test//*[*/analyzed_field = 'bar']", XPATH, 
asList("/test/a", "/test/c/d", "/test/e", "/test/f/d", "/test/g/e"));
+            assertQuery("/jcr:root/test//*[d/*/analyzed_field = 'bar']", 
XPATH, asList("/test/c", "/test/f"));
+            assertQuery("/jcr:root/test//*[analyzed_field = 'bar']", XPATH, 
asList("/test/a/j:c","/test/b","/test/c/d/j:c",
                     "/test/e/temp:c", "/test/f/d/temp:c","/test/g/e/temp:c"));
 
             // DIRECT CHILDREN
-            assertQuery("/jcr:root/test/*[j:c/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a"));
-            assertQuery("/jcr:root/test/*[*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a", "/test/e"));
-            assertQuery("/jcr:root/test/*[d/*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/c", "/test/f"));
-            assertQuery("/jcr:root/test/*[analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/b"));
+            assertQuery("/jcr:root/test/*[j:c/analyzed_field = 'bar']", XPATH, 
singletonList("/test/a"));
+            assertQuery("/jcr:root/test/*[*/analyzed_field = 'bar']", XPATH, 
asList("/test/a", "/test/e"));
+            assertQuery("/jcr:root/test/*[d/*/analyzed_field = 'bar']", XPATH, 
asList("/test/c", "/test/f"));
+            assertQuery("/jcr:root/test/*[analyzed_field = 'bar']", XPATH, 
singletonList("/test/b"));
 
             // EXACT
-            assertQuery("/jcr:root/test/a[j:c/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a"));
-            assertQuery("/jcr:root/test/a[*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a"));
-            assertQuery("/jcr:root/test/c[d/*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/c"));
-            assertQuery("/jcr:root/test/a/j:c[analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a/j:c"));
+            assertQuery("/jcr:root/test/a[j:c/analyzed_field = 'bar']", XPATH, 
singletonList("/test/a"));
+            assertQuery("/jcr:root/test/a[*/analyzed_field = 'bar']", XPATH, 
singletonList("/test/a"));
+            assertQuery("/jcr:root/test/c[d/*/analyzed_field = 'bar']", XPATH, 
singletonList("/test/c"));
+            assertQuery("/jcr:root/test/a/j:c[analyzed_field = 'bar']", XPATH, 
singletonList("/test/a/j:c"));
 
             // PARENT
-
             assertQuery("select a.[jcr:path] as [jcr:path] from [nt:base] as a 
\n" +
                     "  inner join [nt:base] as b on ischildnode(b, a)\n" +
                     "  where isdescendantnode(a, '/tmp') \n" +
                     "  and b.[analyzed_field] = 'bar'\n" +
-                    "  and a.[abc] is not null ", SQL2, 
Arrays.asList("/tmp/a", "/tmp/c/d"));
+                    "  and a.[abc] is not null ", SQL2, asList("/tmp/a", 
"/tmp/c/d"));
         });
     }
 
+    @Test
+    public void fulltextSearchWithBuiltInAnalyzerClass() throws Exception {
+        setup(singletonList("foo"), idx -> {
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            anl.setProperty(FulltextIndexConstants.ANL_CLASS, 
"org.apache.lucene.analysis.en.EnglishAnalyzer");

Review Comment:
   Is there a test for error handling when the analyzer class does not exist?



##########
oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextIndexCommonTest.java:
##########
@@ -67,4 +72,27 @@ protected List<String> getExpectedLogMessage() {
         expectedLogList.add(log2);
         return expectedLogList;
     }
+
+    @Test
+    /*
+     * analyzers by name are not possible in lucene, this test can run on 
elastic only
+     */
+    public void fulltextSearchWithBuiltInAnalyzerName() throws Exception {
+        setup(singletonList("foo"), idx -> {
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            anl.setProperty(FulltextIndexConstants.ANL_NAME, "german");

Review Comment:
   How does Lucene handle if we specify an analyzer by name? Should we have a 
test for that error case?



##########
oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java:
##########
@@ -151,52 +171,253 @@ public void pathTransformationsWithPathRestrictions() 
throws Exception {
 
         assertEventually(() -> {
             // ALL CHILDREN
-            assertQuery("/jcr:root/test//*[j:c/analyzed_field = 'bar']", 
XPATH, Arrays.asList("/test/a", "/test/c/d"));
-            assertQuery("/jcr:root/test//*[*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a", "/test/c/d", "/test/e", "/test/f/d", "/test/g/e"));
-            assertQuery("/jcr:root/test//*[d/*/analyzed_field = 'bar']", 
XPATH, Arrays.asList("/test/c", "/test/f"));
-            assertQuery("/jcr:root/test//*[analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a/j:c","/test/b","/test/c/d/j:c",
+            assertQuery("/jcr:root/test//*[j:c/analyzed_field = 'bar']", 
XPATH, asList("/test/a", "/test/c/d"));
+            assertQuery("/jcr:root/test//*[*/analyzed_field = 'bar']", XPATH, 
asList("/test/a", "/test/c/d", "/test/e", "/test/f/d", "/test/g/e"));
+            assertQuery("/jcr:root/test//*[d/*/analyzed_field = 'bar']", 
XPATH, asList("/test/c", "/test/f"));
+            assertQuery("/jcr:root/test//*[analyzed_field = 'bar']", XPATH, 
asList("/test/a/j:c","/test/b","/test/c/d/j:c",
                     "/test/e/temp:c", "/test/f/d/temp:c","/test/g/e/temp:c"));
 
             // DIRECT CHILDREN
-            assertQuery("/jcr:root/test/*[j:c/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a"));
-            assertQuery("/jcr:root/test/*[*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a", "/test/e"));
-            assertQuery("/jcr:root/test/*[d/*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/c", "/test/f"));
-            assertQuery("/jcr:root/test/*[analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/b"));
+            assertQuery("/jcr:root/test/*[j:c/analyzed_field = 'bar']", XPATH, 
singletonList("/test/a"));
+            assertQuery("/jcr:root/test/*[*/analyzed_field = 'bar']", XPATH, 
asList("/test/a", "/test/e"));
+            assertQuery("/jcr:root/test/*[d/*/analyzed_field = 'bar']", XPATH, 
asList("/test/c", "/test/f"));
+            assertQuery("/jcr:root/test/*[analyzed_field = 'bar']", XPATH, 
singletonList("/test/b"));
 
             // EXACT
-            assertQuery("/jcr:root/test/a[j:c/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a"));
-            assertQuery("/jcr:root/test/a[*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a"));
-            assertQuery("/jcr:root/test/c[d/*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/c"));
-            assertQuery("/jcr:root/test/a/j:c[analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a/j:c"));
+            assertQuery("/jcr:root/test/a[j:c/analyzed_field = 'bar']", XPATH, 
singletonList("/test/a"));
+            assertQuery("/jcr:root/test/a[*/analyzed_field = 'bar']", XPATH, 
singletonList("/test/a"));
+            assertQuery("/jcr:root/test/c[d/*/analyzed_field = 'bar']", XPATH, 
singletonList("/test/c"));
+            assertQuery("/jcr:root/test/a/j:c[analyzed_field = 'bar']", XPATH, 
singletonList("/test/a/j:c"));
 
             // PARENT
-
             assertQuery("select a.[jcr:path] as [jcr:path] from [nt:base] as a 
\n" +
                     "  inner join [nt:base] as b on ischildnode(b, a)\n" +
                     "  where isdescendantnode(a, '/tmp') \n" +
                     "  and b.[analyzed_field] = 'bar'\n" +
-                    "  and a.[abc] is not null ", SQL2, 
Arrays.asList("/tmp/a", "/tmp/c/d"));
+                    "  and a.[abc] is not null ", SQL2, asList("/tmp/a", 
"/tmp/c/d"));
         });
     }
 
+    @Test
+    public void fulltextSearchWithBuiltInAnalyzerClass() throws Exception {
+        setup(singletonList("foo"), idx -> {
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            anl.setProperty(FulltextIndexConstants.ANL_CLASS, 
"org.apache.lucene.analysis.en.EnglishAnalyzer");
+        });
+
+        Tree test = root.getTree("/").addChild("test");
+        test.setProperty("foo", "fox jumping");
+        root.commit();
+
+        // standard english analyzer stems verbs (jumping -> jump)
+        assertEventually(() -> assertQuery("select * from [nt:base] where 
CONTAINS(*, 'jump')", singletonList("/test")));
+    }
+
+    @Test
+    public void fulltextSearchWithBuiltInAnalyzerClassAndConfigurationParams() 
throws Exception {
+        setup(singletonList("foo"), idx -> {
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            anl.setProperty(FulltextIndexConstants.ANL_CLASS, 
"org.apache.lucene.analysis.en.EnglishAnalyzer");
+            anl.setProperty("luceneMatchVersion", "LUCENE_47");
+            
anl.addChild("stopwords").addChild(JCR_CONTENT).setProperty(JCR_DATA, "dog");
+        });
 
+        Tree test = root.getTree("/").addChild("test");
+        test.setProperty("foo", "dog and cat");
+        root.commit();
+
+        // standard english analyzer stems verbs (jumping -> jump)
+        assertEventually(() -> {
+            assertQuery("select * from [nt:base] where CONTAINS(*, 'dog')", 
emptyList());
+            assertQuery("select * from [nt:base] where CONTAINS(*, 'cat')", 
singletonList("/test"));
+        });
+    }
+
+    @Test
+    public void fulltextSearchWithCustomComposedFilters() throws Exception {
+        setup(singletonList("foo"), idx -> {
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            
anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME,
 "whitespace");
+
+            Tree stopFilter = 
anl.addChild(FulltextIndexConstants.ANL_FILTERS).addChild("Stop");
+            stopFilter.setProperty("words", "stop1.txt, stop2.txt");
+            stopFilter.addChild("stop1.txt").addChild(JcrConstants.JCR_CONTENT)
+                    .setProperty(JcrConstants.JCR_DATA, "foo");
+            stopFilter.addChild("stop2.txt").addChild(JcrConstants.JCR_CONTENT)
+                    .setProperty(JcrConstants.JCR_DATA, "and");
+        });
+
+        Tree test = root.getTree("/").addChild("test");
+        test.setProperty("foo", "fox jumping");
+        root.commit();
+
+        assertEventually(() -> assertQuery("select * from [nt:base] where 
CONTAINS(*, 'fox foo jumping')", singletonList("/test")));
+    }
+
+    @Test
+    public void fulltextSearchWithCustomComposedAnalyzer() throws Exception {
+        setup(singletonList("foo"), idx -> {
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            
anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME,
 "Standard");
+
+            Tree charFilters = 
anl.addChild(FulltextIndexConstants.ANL_CHAR_FILTERS);
+            charFilters.addChild("HTMLStrip");
+            Tree mappingFilter = charFilters.addChild("Mapping");
+            mappingFilter.setProperty("mapping", "mappings.txt");
+            
mappingFilter.addChild("mappings.txt").addChild(JcrConstants.JCR_CONTENT)
+                    .setProperty(JcrConstants.JCR_DATA, 
getHinduArabicMapping());
+
+            Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS);
+            filters.addChild("LowerCase");
+            Tree stopFilter = filters.addChild("Stop");
+            stopFilter.setProperty("words", "stop1.txt, stop2.txt");
+            stopFilter.addChild("stop1.txt").addChild(JcrConstants.JCR_CONTENT)
+                    .setProperty(JcrConstants.JCR_DATA, "my");
+            stopFilter.addChild("stop2.txt").addChild(JcrConstants.JCR_CONTENT)
+                    .setProperty(JcrConstants.JCR_DATA, "is");
+            filters.addChild("PorterStem");
+        });
+
+        Tree test = root.getTree("/").addChild("test");
+        test.setProperty("foo", "My license plate is ٢٥٠١٥");
+        root.commit();
+
+        assertEventually(() -> assertQuery("select * from [nt:base] where 
CONTAINS(*, '25015')", singletonList("/test")));
+    }
+
+    protected String getHinduArabicMapping() {
+        // Hindu-Arabic numerals conversion from
+        // 
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-mapping-charfilter.html
+        return "\"٠\" => \"0\"\n\"١\" => \"1\"\n\"٢\" => \"2\"\n\"٣\" => 
\"3\"\n\"٤\" => \"4\"\n" +
+                "\"٥\" => \"5\"\n\"٦\" => \"6\"\n\"٧\" => \"7\"\n\"٨\" => 
\"8\"\n\"٩\" => \"9\"";
+    }
+
+    //OAK-4805
+    @Test
+    public void badIndexDefinitionShouldLetQEWork() throws Exception {
+        setup(singletonList("foo"), idx -> {
+            //This would allow index def to get committed. Else bad index def 
can't be created.
+            idx.setProperty(IndexConstants.ASYNC_PROPERTY_NAME, "async");
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            
anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME,
 "Standard");
+            Tree synFilter = 
anl.addChild(FulltextIndexConstants.ANL_FILTERS).addChild("Synonym");
+            synFilter.setProperty("synonyms", "syn.txt");
+            // Don't add syn.txt to make analyzer (and hence index def) invalid
+            // 
synFilter.addChild("syn.txt").addChild(JCR_CONTENT).setProperty(JCR_DATA, 
"blah, foo, bar");
+        });
+
+        //Using this version of executeQuery as we don't want a result row 
quoting the exception
+        assertEventually(() -> {
+            try {
+                executeQuery("SELECT * FROM [nt:base] where a='b'", SQL2, 
QueryEngine.NO_BINDINGS);
+            } catch (ParseException e) {
+                throw new RuntimeException(e);
+            }
+        });
+    }
+
+    @Test
+    public void testSynonyms() throws Exception {
+        setup(singletonList("foo"), idx -> {
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            
anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME,
 "Standard");
+            Tree synFilter = 
anl.addChild(FulltextIndexConstants.ANL_FILTERS).addChild("Synonym");
+            synFilter.setProperty("synonyms", "syn.txt");
+            synFilter.addChild("syn.txt").addChild(JcrConstants.JCR_CONTENT)
+                    .setProperty(JcrConstants.JCR_DATA, "plane, airplane, 
aircraft\nflies=>scars");
+        });
+
+        Tree test = root.getTree("/").addChild("test");
+        Tree testNodeChild = test.addChild("node");
+        testNodeChild.setProperty("foo", "an aircraft flies");
+        root.commit();
+
+        assertEventually(() -> {
+            assertQuery("select * from [nt:base] where 
ISDESCENDANTNODE('/test') and CONTAINS(*, 'plane')", 
singletonList("/test/node"));
+            assertQuery("select * from [nt:base] where 
ISDESCENDANTNODE('/test') and CONTAINS(*, 'airplane')", 
singletonList("/test/node"));
+            assertQuery("select * from [nt:base] where 
ISDESCENDANTNODE('/test') and CONTAINS(*, 'aircraft')", 
singletonList("/test/node"));
+            assertQuery("select * from [nt:base] where 
ISDESCENDANTNODE('/test') and CONTAINS(*, 'scars')", 
singletonList("/test/node"));
+        });
+    }
+
+    //OAK-4516
+    @Test
+    public void wildcardQueryToLookupUnanalyzedText() throws Exception {
+        Tree index = setup(builder -> {
+            builder.indexRule("nt:base").property("propa").analyzed();
+            builder.indexRule("nt:base").property("propb").nodeScopeIndex();
+        }, idx -> 
idx.addChild(ANALYZERS).setProperty(FulltextIndexConstants.INDEX_ORIGINAL_TERM, 
true),
+                "propa", "propb");
+
+        Tree rootTree = root.getTree("/");
+        Tree node1Tree = rootTree.addChild("node1");
+        node1Tree.setProperty("propa", "abcdef");
+        node1Tree.setProperty("propb", "abcdef");
+        Tree node2Tree = rootTree.addChild("node2");
+        node2Tree.setProperty("propa", "abc_def");
+        node2Tree.setProperty("propb", "abc_def");
+        root.commit();
+
+        String fullIndexName = indexOptions.getIndexType() + ":" + 
index.getName();
+
+        assertEventually(() -> {
+            //normal query still works
+            String query = "select [jcr:path] from [nt:base] where 
contains('propa', 'abc*')";
+            String explanation = explain(query);
+            assertThat(explanation, containsString(fullIndexName));
+            assertQuery(query, asList("/node1", "/node2"));

Review Comment:
   All the usages of `asList` and `singletonList` can now be replaced by 
`List.of(...)`



##########
oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextIndexCommonTest.java:
##########
@@ -67,4 +72,27 @@ protected List<String> getExpectedLogMessage() {
         expectedLogList.add(log2);
         return expectedLogList;
     }
+
+    @Test
+    /*
+     * analyzers by name are not possible in lucene, this test can run on 
elastic only
+     */
+    public void fulltextSearchWithBuiltInAnalyzerName() throws Exception {
+        setup(singletonList("foo"), idx -> {
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            anl.setProperty(FulltextIndexConstants.ANL_NAME, "german");

Review Comment:
   I think we should have some more tests for error handling, for instance, if 
the name given here is not valid.



##########
oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java:
##########
@@ -151,52 +171,253 @@ public void pathTransformationsWithPathRestrictions() 
throws Exception {
 
         assertEventually(() -> {
             // ALL CHILDREN
-            assertQuery("/jcr:root/test//*[j:c/analyzed_field = 'bar']", 
XPATH, Arrays.asList("/test/a", "/test/c/d"));
-            assertQuery("/jcr:root/test//*[*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a", "/test/c/d", "/test/e", "/test/f/d", "/test/g/e"));
-            assertQuery("/jcr:root/test//*[d/*/analyzed_field = 'bar']", 
XPATH, Arrays.asList("/test/c", "/test/f"));
-            assertQuery("/jcr:root/test//*[analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a/j:c","/test/b","/test/c/d/j:c",
+            assertQuery("/jcr:root/test//*[j:c/analyzed_field = 'bar']", 
XPATH, asList("/test/a", "/test/c/d"));
+            assertQuery("/jcr:root/test//*[*/analyzed_field = 'bar']", XPATH, 
asList("/test/a", "/test/c/d", "/test/e", "/test/f/d", "/test/g/e"));
+            assertQuery("/jcr:root/test//*[d/*/analyzed_field = 'bar']", 
XPATH, asList("/test/c", "/test/f"));
+            assertQuery("/jcr:root/test//*[analyzed_field = 'bar']", XPATH, 
asList("/test/a/j:c","/test/b","/test/c/d/j:c",
                     "/test/e/temp:c", "/test/f/d/temp:c","/test/g/e/temp:c"));
 
             // DIRECT CHILDREN
-            assertQuery("/jcr:root/test/*[j:c/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a"));
-            assertQuery("/jcr:root/test/*[*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a", "/test/e"));
-            assertQuery("/jcr:root/test/*[d/*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/c", "/test/f"));
-            assertQuery("/jcr:root/test/*[analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/b"));
+            assertQuery("/jcr:root/test/*[j:c/analyzed_field = 'bar']", XPATH, 
singletonList("/test/a"));
+            assertQuery("/jcr:root/test/*[*/analyzed_field = 'bar']", XPATH, 
asList("/test/a", "/test/e"));
+            assertQuery("/jcr:root/test/*[d/*/analyzed_field = 'bar']", XPATH, 
asList("/test/c", "/test/f"));
+            assertQuery("/jcr:root/test/*[analyzed_field = 'bar']", XPATH, 
singletonList("/test/b"));
 
             // EXACT
-            assertQuery("/jcr:root/test/a[j:c/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a"));
-            assertQuery("/jcr:root/test/a[*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a"));
-            assertQuery("/jcr:root/test/c[d/*/analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/c"));
-            assertQuery("/jcr:root/test/a/j:c[analyzed_field = 'bar']", XPATH, 
Arrays.asList("/test/a/j:c"));
+            assertQuery("/jcr:root/test/a[j:c/analyzed_field = 'bar']", XPATH, 
singletonList("/test/a"));
+            assertQuery("/jcr:root/test/a[*/analyzed_field = 'bar']", XPATH, 
singletonList("/test/a"));
+            assertQuery("/jcr:root/test/c[d/*/analyzed_field = 'bar']", XPATH, 
singletonList("/test/c"));
+            assertQuery("/jcr:root/test/a/j:c[analyzed_field = 'bar']", XPATH, 
singletonList("/test/a/j:c"));
 
             // PARENT
-
             assertQuery("select a.[jcr:path] as [jcr:path] from [nt:base] as a 
\n" +
                     "  inner join [nt:base] as b on ischildnode(b, a)\n" +
                     "  where isdescendantnode(a, '/tmp') \n" +
                     "  and b.[analyzed_field] = 'bar'\n" +
-                    "  and a.[abc] is not null ", SQL2, 
Arrays.asList("/tmp/a", "/tmp/c/d"));
+                    "  and a.[abc] is not null ", SQL2, asList("/tmp/a", 
"/tmp/c/d"));
         });
     }
 
+    @Test
+    public void fulltextSearchWithBuiltInAnalyzerClass() throws Exception {
+        setup(singletonList("foo"), idx -> {
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            anl.setProperty(FulltextIndexConstants.ANL_CLASS, 
"org.apache.lucene.analysis.en.EnglishAnalyzer");
+        });
+
+        Tree test = root.getTree("/").addChild("test");
+        test.setProperty("foo", "fox jumping");
+        root.commit();
+
+        // standard english analyzer stems verbs (jumping -> jump)
+        assertEventually(() -> assertQuery("select * from [nt:base] where 
CONTAINS(*, 'jump')", singletonList("/test")));
+    }
+
+    @Test
+    public void fulltextSearchWithBuiltInAnalyzerClassAndConfigurationParams() 
throws Exception {
+        setup(singletonList("foo"), idx -> {
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            anl.setProperty(FulltextIndexConstants.ANL_CLASS, 
"org.apache.lucene.analysis.en.EnglishAnalyzer");
+            anl.setProperty("luceneMatchVersion", "LUCENE_47");
+            
anl.addChild("stopwords").addChild(JCR_CONTENT).setProperty(JCR_DATA, "dog");
+        });
 
+        Tree test = root.getTree("/").addChild("test");
+        test.setProperty("foo", "dog and cat");
+        root.commit();
+
+        // standard english analyzer stems verbs (jumping -> jump)
+        assertEventually(() -> {
+            assertQuery("select * from [nt:base] where CONTAINS(*, 'dog')", 
emptyList());
+            assertQuery("select * from [nt:base] where CONTAINS(*, 'cat')", 
singletonList("/test"));
+        });
+    }
+
+    @Test
+    public void fulltextSearchWithCustomComposedFilters() throws Exception {
+        setup(singletonList("foo"), idx -> {
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            
anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME,
 "whitespace");
+
+            Tree stopFilter = 
anl.addChild(FulltextIndexConstants.ANL_FILTERS).addChild("Stop");
+            stopFilter.setProperty("words", "stop1.txt, stop2.txt");
+            stopFilter.addChild("stop1.txt").addChild(JcrConstants.JCR_CONTENT)
+                    .setProperty(JcrConstants.JCR_DATA, "foo");
+            stopFilter.addChild("stop2.txt").addChild(JcrConstants.JCR_CONTENT)
+                    .setProperty(JcrConstants.JCR_DATA, "and");
+        });
+
+        Tree test = root.getTree("/").addChild("test");
+        test.setProperty("foo", "fox jumping");
+        root.commit();
+
+        assertEventually(() -> assertQuery("select * from [nt:base] where 
CONTAINS(*, 'fox foo jumping')", singletonList("/test")));
+    }
+
+    @Test
+    public void fulltextSearchWithCustomComposedAnalyzer() throws Exception {
+        setup(singletonList("foo"), idx -> {
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            
anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME,
 "Standard");
+
+            Tree charFilters = 
anl.addChild(FulltextIndexConstants.ANL_CHAR_FILTERS);
+            charFilters.addChild("HTMLStrip");
+            Tree mappingFilter = charFilters.addChild("Mapping");
+            mappingFilter.setProperty("mapping", "mappings.txt");
+            
mappingFilter.addChild("mappings.txt").addChild(JcrConstants.JCR_CONTENT)
+                    .setProperty(JcrConstants.JCR_DATA, 
getHinduArabicMapping());
+
+            Tree filters = anl.addChild(FulltextIndexConstants.ANL_FILTERS);
+            filters.addChild("LowerCase");
+            Tree stopFilter = filters.addChild("Stop");
+            stopFilter.setProperty("words", "stop1.txt, stop2.txt");
+            stopFilter.addChild("stop1.txt").addChild(JcrConstants.JCR_CONTENT)
+                    .setProperty(JcrConstants.JCR_DATA, "my");
+            stopFilter.addChild("stop2.txt").addChild(JcrConstants.JCR_CONTENT)
+                    .setProperty(JcrConstants.JCR_DATA, "is");
+            filters.addChild("PorterStem");
+        });
+
+        Tree test = root.getTree("/").addChild("test");
+        test.setProperty("foo", "My license plate is ٢٥٠١٥");
+        root.commit();
+
+        assertEventually(() -> assertQuery("select * from [nt:base] where 
CONTAINS(*, '25015')", singletonList("/test")));
+    }
+
+    protected String getHinduArabicMapping() {
+        // Hindu-Arabic numerals conversion from
+        // 
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-mapping-charfilter.html
+        return "\"٠\" => \"0\"\n\"١\" => \"1\"\n\"٢\" => \"2\"\n\"٣\" => 
\"3\"\n\"٤\" => \"4\"\n" +
+                "\"٥\" => \"5\"\n\"٦\" => \"6\"\n\"٧\" => \"7\"\n\"٨\" => 
\"8\"\n\"٩\" => \"9\"";
+    }
+
+    //OAK-4805
+    @Test
+    public void badIndexDefinitionShouldLetQEWork() throws Exception {
+        setup(singletonList("foo"), idx -> {
+            //This would allow index def to get committed. Else bad index def 
can't be created.
+            idx.setProperty(IndexConstants.ASYNC_PROPERTY_NAME, "async");
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            
anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME,
 "Standard");
+            Tree synFilter = 
anl.addChild(FulltextIndexConstants.ANL_FILTERS).addChild("Synonym");
+            synFilter.setProperty("synonyms", "syn.txt");
+            // Don't add syn.txt to make analyzer (and hence index def) invalid
+            // 
synFilter.addChild("syn.txt").addChild(JCR_CONTENT).setProperty(JCR_DATA, 
"blah, foo, bar");
+        });
+
+        //Using this version of executeQuery as we don't want a result row 
quoting the exception
+        assertEventually(() -> {
+            try {
+                executeQuery("SELECT * FROM [nt:base] where a='b'", SQL2, 
QueryEngine.NO_BINDINGS);
+            } catch (ParseException e) {
+                throw new RuntimeException(e);
+            }
+        });
+    }
+
+    @Test
+    public void testSynonyms() throws Exception {
+        setup(singletonList("foo"), idx -> {
+            Tree anl = 
idx.addChild(FulltextIndexConstants.ANALYZERS).addChild(FulltextIndexConstants.ANL_DEFAULT);
+            
anl.addChild(FulltextIndexConstants.ANL_TOKENIZER).setProperty(FulltextIndexConstants.ANL_NAME,
 "Standard");
+            Tree synFilter = 
anl.addChild(FulltextIndexConstants.ANL_FILTERS).addChild("Synonym");
+            synFilter.setProperty("synonyms", "syn.txt");
+            synFilter.addChild("syn.txt").addChild(JcrConstants.JCR_CONTENT)
+                    .setProperty(JcrConstants.JCR_DATA, "plane, airplane, 
aircraft\nflies=>scars");
+        });
+
+        Tree test = root.getTree("/").addChild("test");
+        Tree testNodeChild = test.addChild("node");
+        testNodeChild.setProperty("foo", "an aircraft flies");
+        root.commit();
+
+        assertEventually(() -> {
+            assertQuery("select * from [nt:base] where 
ISDESCENDANTNODE('/test') and CONTAINS(*, 'plane')", 
singletonList("/test/node"));
+            assertQuery("select * from [nt:base] where 
ISDESCENDANTNODE('/test') and CONTAINS(*, 'airplane')", 
singletonList("/test/node"));
+            assertQuery("select * from [nt:base] where 
ISDESCENDANTNODE('/test') and CONTAINS(*, 'aircraft')", 
singletonList("/test/node"));
+            assertQuery("select * from [nt:base] where 
ISDESCENDANTNODE('/test') and CONTAINS(*, 'scars')", 
singletonList("/test/node"));
+        });
+    }
+
+    //OAK-4516
+    @Test
+    public void wildcardQueryToLookupUnanalyzedText() throws Exception {
+        Tree index = setup(builder -> {
+            builder.indexRule("nt:base").property("propa").analyzed();
+            builder.indexRule("nt:base").property("propb").nodeScopeIndex();
+        }, idx -> 
idx.addChild(ANALYZERS).setProperty(FulltextIndexConstants.INDEX_ORIGINAL_TERM, 
true),
+                "propa", "propb");

Review Comment:
   Not aligned. Suggest to use autoformat of IntelliJ.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to