This is an automated email from the ASF dual-hosted git repository.

fortino pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 8a80267ebf OAK-11624: make ES queries more lenient when values cannot 
be converted (#2200)
8a80267ebf is described below

commit 8a80267ebf05e53ebbd2fdcae775a8ec4a9a66b7
Author: Fabrizio Fortino <[email protected]>
AuthorDate: Thu Mar 27 07:08:42 2025 +0100

    OAK-11624: make ES queries more lenient when values cannot be converted 
(#2200)
    
    * OAK-11624: make ES queries more lenient when values cannot be converted
    
    * OAK-11624:fix failing tests
    
    * Update 
oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
    
    Co-authored-by: Thomas Mueller <[email protected]>
    
    ---------
    
    Co-authored-by: Thomas Mueller <[email protected]>
---
 .../index/elastic/index/ElasticDocumentMaker.java  |  9 +++++--
 .../index/elastic/query/ElasticRequestHandler.java |  3 ++-
 .../index/elastic/ElasticDynamicBoostTest.java     |  2 +-
 .../index/elastic/ElasticIndexAggregationTest.java | 14 +++++-----
 .../oak/plugins/index/FullTextIndexCommonTest.java | 31 ++++++++++++++++++++++
 5 files changed, 48 insertions(+), 11 deletions(-)

diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
index 59c84f9b01..a6297d4730 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
@@ -145,11 +145,16 @@ public class ElasticDocumentMaker extends 
FulltextDocumentMaker<ElasticDocument>
 
     /**
      * We store the value in :fulltext only when the {@link 
PropertyDefinition} has a regular expression (that means we
-     * were not able to create a ft property at mapping time) or the property 
is not analyzed.
+     * were not able to create a ft property at mapping time) or the property 
is not analyzed or the type could be ignored
+     * in case is malformed (eg: a date that cannot be parsed).
      */
     @Override
     protected boolean isFulltextValuePersistedAtNode(PropertyDefinition pd) {
-        return pd.isRegexp || !pd.analyzed;
+        return pd.isRegexp || !pd.analyzed
+                || pd.getType() == Type.DATE.tag()
+                || pd.getType() == Type.BOOLEAN.tag()
+                || pd.getType() == Type.LONG.tag()
+                || pd.getType() == Type.DOUBLE.tag();
     }
 
     /**
diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
index 713d7d31f5..dbae459210 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
@@ -902,7 +902,8 @@ public class ElasticRequestHandler {
                 .query(rewriteQueryText(text))
                 .defaultOperator(Operator.And)
                 .type(TextQueryType.CrossFields)
-                .tieBreaker(0.5d);
+                .tieBreaker(0.5d)
+                .lenient(true);
         if (FieldNames.FULLTEXT.equals(fieldName)) {
             for (PropertyDefinition pd : 
pr.indexingRule.getNodeScopeAnalyzedProps()) {
                 qsqBuilder.fields(ElasticIndexUtils.fieldName(pd.name) + "^" + 
pd.boost);
diff --git 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java
 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java
index 6fc11c6658..28cc5e999d 100644
--- 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java
+++ 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java
@@ -49,7 +49,7 @@ public class ElasticDynamicBoostTest extends 
DynamicBoostCommonTest {
     protected String getTestQueryDynamicBoostBasicExplained() {
         return "{\"_source\":{\"includes\":[\":path\"]}," +
                 
"\"query\":{\"bool\":{\"must\":[{\"bool\":{\"must\":[{\"query_string\":{\"default_operator\":\"and\","
 +
-                
"\"fields\":[\"title^1.0\",\":dynamic-boost-ft^1.0E-4\",\":fulltext\"],\"query\":\"plant\",\"tie_breaker\":0.5,\"type\":\"cross_fields\"}}],"
 +
+                
"\"fields\":[\"title^1.0\",\":dynamic-boost-ft^1.0E-4\",\":fulltext\"],\"lenient\":true,\"query\":\"plant\",\"tie_breaker\":0.5,\"type\":\"cross_fields\"}}],"
 +
                 
"\"should\":[{\"nested\":{\"path\":\"predictedTagsDynamicBoost\",\"query\":{\"function_score\":{\"boost\":9.999999747378752E-5,"
 +
                 
"\"functions\":[{\"field_value_factor\":{\"field\":\"predictedTagsDynamicBoost.boost\"}}],"
 +
                 
"\"query\":{\"match\":{\"predictedTagsDynamicBoost.value\":{\"query\":\"plant\"}}}}},\"score_mode\":\"avg\"}}]}}]}},"
 +
diff --git 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationTest.java
 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationTest.java
index 204c14f488..27b3cd54fe 100644
--- 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationTest.java
+++ 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationTest.java
@@ -82,37 +82,37 @@ public class ElasticIndexAggregationTest extends 
IndexAggregationCommonTest {
         assertEventually(() -> {
             String matchContentAll = "//element(*, nt:folder)[(jcr:contains(., 
'dog'))]";
             assertThat(explainXPath(matchContentAll), containsString(
-                    "\"fields\":[\":fulltext\"],\"query\":\"dog\""));
+                    
"\"fields\":[\":fulltext\"],\"lenient\":true,\"query\":\"dog\""));
             assertQuery(matchContentAll, "xpath", 
List.of("/content/myFolder"));
 
             String matchContentSimple = "//element(*, 
nt:folder)[(jcr:contains(myFile, 'dog'))]";
             assertThat(explainXPath(matchContentSimple), containsString(
-                    "\"fields\":[\":fulltext\"],\"query\":\"dog\""));
+                    
"\"fields\":[\":fulltext\"],\"lenient\":true,\"query\":\"dog\""));
             assertQuery(matchContentSimple, "xpath", 
List.of("/content/myFolder"));
 
             String matchContent = " //element(*, 
nt:folder)[(jcr:contains(myFile, 'dog') or jcr:contains(myFile/@jcr:title, 
'invalid') or jcr:contains(myFile/@jcr:description, 'invalid'))]";
             assertThat(explainXPath(matchContent), containsString(
-                    "\"fields\":[\":fulltext\"],\"query\":\"dog\""));
+                    
"\"fields\":[\":fulltext\"],\"lenient\":true,\"query\":\"dog\""));
             assertQuery(matchContent, "xpath", List.of("/content/myFolder"));
 
             String matchTitle = " //element(*, 
nt:folder)[(jcr:contains(myFile, 'invalid') or jcr:contains(myFile/@jcr:title, 
'title') or jcr:contains(myFile/@jcr:description, 'invalid'))]";
             assertThat(explainXPath(matchTitle), containsString(
-                    "\"fields\":[\":fulltext\"],\"query\":\"invalid\""));
+                    
"\"fields\":[\":fulltext\"],\"lenient\":true,\"query\":\"invalid\""));
             assertQuery(matchTitle, "xpath", List.of("/content/myFolder"));
 
             String matchDesc = " //element(*, nt:folder)[(jcr:contains(myFile, 
'invalid') or jcr:contains(myFile/@jcr:title, 'invalid') or 
jcr:contains(myFile/@jcr:description, 'description'))]";
             assertThat(explainXPath(matchDesc), containsString(
-                    "\"fields\":[\":fulltext\"],\"query\":\"invalid\""));
+                    
"\"fields\":[\":fulltext\"],\"lenient\":true,\"query\":\"invalid\""));
             assertQuery(matchDesc, "xpath", List.of("/content/myFolder"));
 
             String matchNone = " //element(*, nt:folder)[(jcr:contains(myFile, 
'invalid') or jcr:contains(myFile/@jcr:title, 'invalid') or 
jcr:contains(myFile/@jcr:description, 'invalid'))]";
             assertThat(explainXPath(matchNone), containsString(
-                    "\"fields\":[\":fulltext\"],\"query\":\"invalid\""));
+                    
"\"fields\":[\":fulltext\"],\"lenient\":true,\"query\":\"invalid\""));
             assertQuery(matchNone, "xpath", List.of());
 
             String matchOnlyTitleOr = " //element(*, 
nt:folder)[(jcr:contains(myFile/@jcr:title, 'title') or 
jcr:contains(myFile/@jcr:title, 'unknown') )]";
             assertThat(explainXPath(matchOnlyTitleOr), containsString(
-                    "\"fields\":[\"jcr:title\"],\"query\":\"title\""));
+                    
"\"fields\":[\"jcr:title\"],\"lenient\":true,\"query\":\"title\""));
             assertQuery(matchOnlyTitleOr, "xpath", 
List.of("/content/myFolder"));
         });
     }
diff --git 
a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java
 
b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java
index 859acaf435..edc24bcf24 100644
--- 
a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java
+++ 
b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java
@@ -325,6 +325,37 @@ public abstract class FullTextIndexCommonTest extends 
AbstractQueryTest {
                 assertQuery("//*[jcr:contains(., 'jpg')]", XPATH, 
List.of("/test/a")));
     }
 
+    @Test
+    public void fulltextWithMalformedFields() throws Exception {
+        setup(builder -> {
+            
builder.indexRule("nt:base").property("string_field").type("String").analyzed().nodeScopeIndex();
+            
builder.indexRule("nt:base").property("date_field").type("Date").analyzed().nodeScopeIndex();
+            
builder.indexRule("nt:base").property("long_field").type("Long").analyzed().nodeScopeIndex();
+            
builder.indexRule("nt:base").property("double_field").type("Double").analyzed().nodeScopeIndex();
+            
builder.indexRule("nt:base").property("bool_field").type("Boolean").analyzed().nodeScopeIndex();
+        }, idx -> {
+        }, "string_field", "date_field", "long_field", "double_field", 
"bool_field");
+
+        //add content
+        Tree test = root.getTree("/").addChild("test");
+        test.addChild("a").setProperty("string_field", "foo");
+        test.addChild("b").setProperty("date_field", "2025-bar");
+        test.addChild("c").setProperty("long_field", "123-bar");
+        test.addChild("d").setProperty("double_field", "456.78-bar");
+        test.addChild("e").setProperty("bool_field", "true-bar");
+
+        root.commit();
+
+        assertEventually(() -> {
+                    assertQuery("//*[jcr:contains(., 'foo')]", XPATH, 
List.of("/test/a"));
+                    assertQuery("//*[jcr:contains(., '2025')]", XPATH, 
List.of("/test/b"));
+                    assertQuery("//*[jcr:contains(., '123')]", XPATH, 
List.of("/test/c"));
+                    assertQuery("//*[jcr:contains(., '456.78')]", XPATH, 
List.of("/test/d"));
+                    assertQuery("//*[jcr:contains(., 'true')]", XPATH, 
List.of("/test/e"));
+                }
+        );
+    }
+
     protected void assertEventually(Runnable r) {
         TestUtil.assertEventually(r,
                 ((repositoryOptionsUtil.isAsync() ? 
repositoryOptionsUtil.defaultAsyncIndexingTimeInSeconds : 0) + 3000) * 5);

Reply via email to