fabriziofortino commented on a change in pull request #489:
URL: https://github.com/apache/jackrabbit-oak/pull/489#discussion_r803563539



##########
File path: 
oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java
##########
@@ -147,6 +146,14 @@ public void testQueryFullTextWithTitle() throws Exception {
         // wildcard works
         assertQuery("//element(*, dam:Asset)[jcr:contains(., 'title*')]", 
XPATH,
                 Arrays.asList("/test/asset1", "/test/asset2", "/test/asset3"));
+

Review comment:
       not sure if the whole `testQueryFullTextWithTitle` should stay in this 
class since it does not test dynamic boost.

##########
File path: 
oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
##########
@@ -779,41 +779,13 @@ private static QueryBuilder referenceConstraint(String 
uuid) {
     }
 
     private static QueryBuilder fullTextQuery(String text, String fieldName, 
PlanResult pr) {
-        // default match query are executed in OR, we need to use AND instead 
to avoid that
-        // every document having at least one term in the `text` will match. 
If there are multiple
-        // contains clause they will go to different match queries and will be 
executed in OR
-        if (FieldNames.FULLTEXT.equals(fieldName) && 
!pr.indexingRule.getNodeScopeAnalyzedProps().isEmpty()) {
-            MultiMatchQueryBuilder multiMatchQuery = multiMatchQuery(text)
-                    .operator(Operator.AND)
-                    .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS);
-            pr.indexingRule.getNodeScopeAnalyzedProps().forEach(pd -> 
multiMatchQuery.field(pd.name, pd.boost));
-            // Add the query for actual fulltext field also. That query would 
not be boosted
-            // and could contain other parts like renditions, node name, etc
-            return multiMatchQuery.field(fieldName);
-        } else {
-            // 
https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-query-string-query.html
-            // simpleQueryStringQuery does not support leading wildcards 
whereas it's supported by default in queryStringQuery
-            // Not using queryStringQuery by default , since some functional 
cases break.
-            // simpleQueryStringQuery is less Strict, for instance searches 
for terms starting with / work, whereas
-            // with queryStringQuery, they throw an Exception (which 
ultimately results in an empty result set in oak),
-            // so using simpleQueryStringQuery by default would break certain 
functional cases.
-            // So only support this in case any term in the text String 
actually starts with *
-            // For example *hello or Hello *world
-            String[] textTerms = text.split(" ");
-            boolean allowLeadingWildCard = false;
-            for(String textTerm : textTerms) {
-                if (textTerm.startsWith("*")) {
-                    allowLeadingWildCard = true;
-                    break;
-                }
-            }
-
-            if (allowLeadingWildCard) {
-                return 
queryStringQuery(text).field(fieldName).defaultOperator(Operator.AND);
-            } else {
-                return 
simpleQueryStringQuery(text).analyzeWildcard(true).field(fieldName).defaultOperator(Operator.AND);
-            }
+        LOG.debug("fullTextQuery for text: '{}', fieldName: '{}'", text, 
fieldName);
+        QueryStringQueryBuilder queryStringQueryBuilder = 
queryStringQuery(text).defaultOperator(Operator.AND)
+                .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS);

Review comment:
       I think I have discovered the reason some tests in Elastic were not 
successful. In Lucene, we rewrite the query escaping some reserved chars 
(including `/`, `|`, `&`). Without this Lucene queries would be exactly the 
same as Elastic query_string queries.
   
   If we use the same rewrite function (the modifier needs to be widened from 
`protected` to `public`) we get the same results. Changes in 
`oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexQueryCommonTest.java
 ` can be reverted.
   
   ```suggestion
           QueryStringQueryBuilder queryStringQueryBuilder = 
queryStringQuery(FulltextIndex.rewriteQueryText(text))
                   .defaultOperator(Operator.AND)
                   .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS);
   ```

##########
File path: 
oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAsyncTest.java
##########
@@ -264,16 +259,104 @@ public void fulltextWithModifiedNodeScopeIndex() throws 
Exception {
         test.addChild("a").setProperty("analyzed_field", "sun.jpg");
         root.commit();
 
-        assertEventually(() ->
-                assertQuery("//*[jcr:contains(@analyzed_field, 'SUN.JPG')] ", 
XPATH, Collections.singletonList("/test/a")));
+        assertEventually(() -> assertQuery("//*[jcr:contains(@analyzed_field, 
'SUN.JPG')] ", XPATH, Collections.singletonList("/test/a")));
 
         // add nodeScopeIndex at a later stage
-        index.getChild("indexRules").getChild("nt:base").getChild("properties")
-                
.getChild("analyzed_field").setProperty(FulltextIndexConstants.PROP_NODE_SCOPE_INDEX,
 true);
+        index.getChild("indexRules")
+                .getChild("nt:base")
+                .getChild("properties")
+                .getChild("analyzed_field")
+                .setProperty(FulltextIndexConstants.PROP_NODE_SCOPE_INDEX, 
true);
+        root.commit();
+
+        assertEventually(() -> assertQuery("//*[jcr:contains(., 'jpg')] ", 
XPATH, Collections.singletonList("/test/a")));
+    }
+
+    @Test
+    public void nodeScopeFulltextQuery() throws CommitFailedException {
+        IndexDefinitionBuilder builder = createIndex("a", "b").async("async");
+        builder.indexRule("nt:base").property("a").nodeScopeIndex();
+        builder.indexRule("nt:base").property("b").nodeScopeIndex();
+
+        setIndex(UUID.randomUUID().toString(), builder);
+        root.commit();
+
+        prepareTestContent();
+
+        assertEventually(() -> {
+            // case insensitive
+            assertQuery("//*[jcr:contains(., 'WORLD')] ", XPATH, 
Arrays.asList("/test/nodeb", "/test/nodec"));
+
+            // wild card
+            assertQuery("//*[jcr:contains(., 'Hell*')] ", XPATH, 
Arrays.asList("/test/nodea", "/test/nodeb", "/test/nodec"));
+            assertQuery("//*[jcr:contains(., 'He*o')] ", XPATH, 
Arrays.asList("/test/nodea", "/test/nodeb", "/test/nodec"));
+            assertQuery("//*[jcr:contains(., '*llo')] ", XPATH, 
Arrays.asList("/test/nodea", "/test/nodeb", "/test/nodec"));
+            assertQuery("//*[jcr:contains(., '?orld')] ", XPATH, 
Arrays.asList("/test/nodeb", "/test/nodec"));
+            assertQuery("//*[jcr:contains(., 'wo?ld')] ", XPATH, 
Arrays.asList("/test/nodeb", "/test/nodec"));
+            assertQuery("//*[jcr:contains(., 'worl?')] ", XPATH, 
Arrays.asList("/test/nodeb", "/test/nodec"));
+
+            // space explained as AND
+            assertQuery("//*[jcr:contains(., 'hello world')] ", XPATH, 
Arrays.asList("/test/nodeb", "/test/nodec"));
+
+            // exclude
+            assertQuery("//*[jcr:contains(., 'hello -world')] ", XPATH, 
Arrays.asList("/test/nodea"));
+
+            // explicit OR
+            assertQuery("//*[jcr:contains(., 'ocean OR world')] ", XPATH, 
Arrays.asList("/test/nodea", "/test/nodeb", "/test/nodec"));
+        });
+    }
+
+    @Test
+    public void analyzedNodeScopeFulltextQuery() throws CommitFailedException {
+        IndexDefinitionBuilder builder = createIndex("a", "b").async("async");
+        builder.indexRule("nt:base").property("a").nodeScopeIndex().analyzed();
+        builder.indexRule("nt:base").property("b").nodeScopeIndex().analyzed();
+
+        setIndex(UUID.randomUUID().toString(), builder);
         root.commit();
 
-        assertEventually(() ->
-                assertQuery("//*[jcr:contains(., 'jpg')] ", XPATH, 
Collections.singletonList("/test/a")));
+        prepareTestContent();
+
+        assertEventually(() -> {
+            // case insensitive
+            assertQuery("//*[jcr:contains(., 'WORLD')] ", XPATH, 
Arrays.asList("/test/nodeb", "/test/nodec"));
+
+            // wild card
+            assertQuery("//*[jcr:contains(., 'Hell*')] ", XPATH, 
Arrays.asList("/test/nodea", "/test/nodeb", "/test/nodec"));
+            assertQuery("//*[jcr:contains(., 'He*o')] ", XPATH, 
Arrays.asList("/test/nodea", "/test/nodeb", "/test/nodec"));
+            assertQuery("//*[jcr:contains(., '*llo')] ", XPATH, 
Arrays.asList("/test/nodea", "/test/nodeb", "/test/nodec"));
+            assertQuery("//*[jcr:contains(., '?orld')] ", XPATH, 
Arrays.asList("/test/nodeb", "/test/nodec"));
+            assertQuery("//*[jcr:contains(., 'wo?ld')] ", XPATH, 
Arrays.asList("/test/nodeb", "/test/nodec"));
+            assertQuery("//*[jcr:contains(., 'worl?')] ", XPATH, 
Arrays.asList("/test/nodeb", "/test/nodec"));
+
+            // space explained as AND
+            assertQuery("//*[jcr:contains(., 'hello world')] ", XPATH, 
Arrays.asList("/test/nodeb", "/test/nodec"));
+
+            // exclude
+            assertQuery("//*[jcr:contains(., 'hello -world')] ", XPATH, 
Arrays.asList("/test/nodea"));
+
+            // explicit OR
+            assertQuery("//*[jcr:contains(., 'ocean OR world')] ", XPATH, 
Arrays.asList("/test/nodea", "/test/nodeb", "/test/nodec"));
+
+            // analyzed field
+            assertQuery("//*[jcr:contains(., 'soccer')] ", XPATH, 
Arrays.asList("/test/nodeb"));

Review comment:
       can we add a few queries using the lucene syntax on a single analyzed 
query? eg `//*[jcr:contains(a, 'Hell*')]`

##########
File path: 
oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/dynamicBoost/DynamicBoostTest.java
##########
@@ -224,7 +231,12 @@ public void testQueryDynamicBoostLiteWildcard() throws 
Exception {
         createAssetsIndexAndProperties(true, true);
         prepareTestAssets();
 
-        assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 
'blu*')", SQL2, Arrays.asList("/test/asset3"));
+        assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 
'blu?')", SQL2, Arrays.asList("/test/asset3"));
+        assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 
'bl?e')", SQL2, Arrays.asList("/test/asset3"));
+        assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 
'?lue')", SQL2, Arrays.asList("/test/asset3"));
+        assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 
'coff*')", SQL2, Arrays.asList("/test/asset2"));
+        assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 
'co*ee')", SQL2, Arrays.asList("/test/asset2"));
+        assertQuery("select [jcr:path] from [dam:Asset] where contains(*, 
'*ffee')", SQL2, Arrays.asList("/test/asset2"));

Review comment:
       nice test set 👍 




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to