This is an automated email from the ASF dual-hosted git repository.
fortino pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 8a80267ebf OAK-11624: make ES queries more lenient when values cannot
be converted (#2200)
8a80267ebf is described below
commit 8a80267ebf05e53ebbd2fdcae775a8ec4a9a66b7
Author: Fabrizio Fortino <[email protected]>
AuthorDate: Thu Mar 27 07:08:42 2025 +0100
OAK-11624: make ES queries more lenient when values cannot be converted
(#2200)
* OAK-11624: make ES queries more lenient when values cannot be converted
* OAK-11624:fix failing tests
* Update
oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
Co-authored-by: Thomas Mueller <[email protected]>
---------
Co-authored-by: Thomas Mueller <[email protected]>
---
.../index/elastic/index/ElasticDocumentMaker.java | 9 +++++--
.../index/elastic/query/ElasticRequestHandler.java | 3 ++-
.../index/elastic/ElasticDynamicBoostTest.java | 2 +-
.../index/elastic/ElasticIndexAggregationTest.java | 14 +++++-----
.../oak/plugins/index/FullTextIndexCommonTest.java | 31 ++++++++++++++++++++++
5 files changed, 48 insertions(+), 11 deletions(-)
diff --git
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
index 59c84f9b01..a6297d4730 100644
---
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
+++
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
@@ -145,11 +145,16 @@ public class ElasticDocumentMaker extends
FulltextDocumentMaker<ElasticDocument>
/**
* We store the value in :fulltext only when the {@link
PropertyDefinition} has a regular expression (that means we
- * were not able to create a ft property at mapping time) or the property
is not analyzed.
+ * were not able to create a ft property at mapping time) or the property
is not analyzed or the type could be ignored
+ * in case is malformed (eg: a date that cannot be parsed).
*/
@Override
protected boolean isFulltextValuePersistedAtNode(PropertyDefinition pd) {
- return pd.isRegexp || !pd.analyzed;
+ return pd.isRegexp || !pd.analyzed
+ || pd.getType() == Type.DATE.tag()
+ || pd.getType() == Type.BOOLEAN.tag()
+ || pd.getType() == Type.LONG.tag()
+ || pd.getType() == Type.DOUBLE.tag();
}
/**
diff --git
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
index 713d7d31f5..dbae459210 100644
---
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
+++
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
@@ -902,7 +902,8 @@ public class ElasticRequestHandler {
.query(rewriteQueryText(text))
.defaultOperator(Operator.And)
.type(TextQueryType.CrossFields)
- .tieBreaker(0.5d);
+ .tieBreaker(0.5d)
+ .lenient(true);
if (FieldNames.FULLTEXT.equals(fieldName)) {
for (PropertyDefinition pd :
pr.indexingRule.getNodeScopeAnalyzedProps()) {
qsqBuilder.fields(ElasticIndexUtils.fieldName(pd.name) + "^" +
pd.boost);
diff --git
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java
index 6fc11c6658..28cc5e999d 100644
---
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java
+++
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java
@@ -49,7 +49,7 @@ public class ElasticDynamicBoostTest extends
DynamicBoostCommonTest {
protected String getTestQueryDynamicBoostBasicExplained() {
return "{\"_source\":{\"includes\":[\":path\"]}," +
"\"query\":{\"bool\":{\"must\":[{\"bool\":{\"must\":[{\"query_string\":{\"default_operator\":\"and\","
+
-
"\"fields\":[\"title^1.0\",\":dynamic-boost-ft^1.0E-4\",\":fulltext\"],\"query\":\"plant\",\"tie_breaker\":0.5,\"type\":\"cross_fields\"}}],"
+
+
"\"fields\":[\"title^1.0\",\":dynamic-boost-ft^1.0E-4\",\":fulltext\"],\"lenient\":true,\"query\":\"plant\",\"tie_breaker\":0.5,\"type\":\"cross_fields\"}}],"
+
"\"should\":[{\"nested\":{\"path\":\"predictedTagsDynamicBoost\",\"query\":{\"function_score\":{\"boost\":9.999999747378752E-5,"
+
"\"functions\":[{\"field_value_factor\":{\"field\":\"predictedTagsDynamicBoost.boost\"}}],"
+
"\"query\":{\"match\":{\"predictedTagsDynamicBoost.value\":{\"query\":\"plant\"}}}}},\"score_mode\":\"avg\"}}]}}]}},"
+
diff --git
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationTest.java
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationTest.java
index 204c14f488..27b3cd54fe 100644
---
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationTest.java
+++
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationTest.java
@@ -82,37 +82,37 @@ public class ElasticIndexAggregationTest extends
IndexAggregationCommonTest {
assertEventually(() -> {
String matchContentAll = "//element(*, nt:folder)[(jcr:contains(.,
'dog'))]";
assertThat(explainXPath(matchContentAll), containsString(
- "\"fields\":[\":fulltext\"],\"query\":\"dog\""));
+
"\"fields\":[\":fulltext\"],\"lenient\":true,\"query\":\"dog\""));
assertQuery(matchContentAll, "xpath",
List.of("/content/myFolder"));
String matchContentSimple = "//element(*,
nt:folder)[(jcr:contains(myFile, 'dog'))]";
assertThat(explainXPath(matchContentSimple), containsString(
- "\"fields\":[\":fulltext\"],\"query\":\"dog\""));
+
"\"fields\":[\":fulltext\"],\"lenient\":true,\"query\":\"dog\""));
assertQuery(matchContentSimple, "xpath",
List.of("/content/myFolder"));
String matchContent = " //element(*,
nt:folder)[(jcr:contains(myFile, 'dog') or jcr:contains(myFile/@jcr:title,
'invalid') or jcr:contains(myFile/@jcr:description, 'invalid'))]";
assertThat(explainXPath(matchContent), containsString(
- "\"fields\":[\":fulltext\"],\"query\":\"dog\""));
+
"\"fields\":[\":fulltext\"],\"lenient\":true,\"query\":\"dog\""));
assertQuery(matchContent, "xpath", List.of("/content/myFolder"));
String matchTitle = " //element(*,
nt:folder)[(jcr:contains(myFile, 'invalid') or jcr:contains(myFile/@jcr:title,
'title') or jcr:contains(myFile/@jcr:description, 'invalid'))]";
assertThat(explainXPath(matchTitle), containsString(
- "\"fields\":[\":fulltext\"],\"query\":\"invalid\""));
+
"\"fields\":[\":fulltext\"],\"lenient\":true,\"query\":\"invalid\""));
assertQuery(matchTitle, "xpath", List.of("/content/myFolder"));
String matchDesc = " //element(*, nt:folder)[(jcr:contains(myFile,
'invalid') or jcr:contains(myFile/@jcr:title, 'invalid') or
jcr:contains(myFile/@jcr:description, 'description'))]";
assertThat(explainXPath(matchDesc), containsString(
- "\"fields\":[\":fulltext\"],\"query\":\"invalid\""));
+
"\"fields\":[\":fulltext\"],\"lenient\":true,\"query\":\"invalid\""));
assertQuery(matchDesc, "xpath", List.of("/content/myFolder"));
String matchNone = " //element(*, nt:folder)[(jcr:contains(myFile,
'invalid') or jcr:contains(myFile/@jcr:title, 'invalid') or
jcr:contains(myFile/@jcr:description, 'invalid'))]";
assertThat(explainXPath(matchNone), containsString(
- "\"fields\":[\":fulltext\"],\"query\":\"invalid\""));
+
"\"fields\":[\":fulltext\"],\"lenient\":true,\"query\":\"invalid\""));
assertQuery(matchNone, "xpath", List.of());
String matchOnlyTitleOr = " //element(*,
nt:folder)[(jcr:contains(myFile/@jcr:title, 'title') or
jcr:contains(myFile/@jcr:title, 'unknown') )]";
assertThat(explainXPath(matchOnlyTitleOr), containsString(
- "\"fields\":[\"jcr:title\"],\"query\":\"title\""));
+
"\"fields\":[\"jcr:title\"],\"lenient\":true,\"query\":\"title\""));
assertQuery(matchOnlyTitleOr, "xpath",
List.of("/content/myFolder"));
});
}
diff --git
a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java
b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java
index 859acaf435..edc24bcf24 100644
---
a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java
+++
b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/FullTextIndexCommonTest.java
@@ -325,6 +325,37 @@ public abstract class FullTextIndexCommonTest extends
AbstractQueryTest {
assertQuery("//*[jcr:contains(., 'jpg')]", XPATH,
List.of("/test/a")));
}
+ @Test
+ public void fulltextWithMalformedFields() throws Exception {
+ setup(builder -> {
+
builder.indexRule("nt:base").property("string_field").type("String").analyzed().nodeScopeIndex();
+
builder.indexRule("nt:base").property("date_field").type("Date").analyzed().nodeScopeIndex();
+
builder.indexRule("nt:base").property("long_field").type("Long").analyzed().nodeScopeIndex();
+
builder.indexRule("nt:base").property("double_field").type("Double").analyzed().nodeScopeIndex();
+
builder.indexRule("nt:base").property("bool_field").type("Boolean").analyzed().nodeScopeIndex();
+ }, idx -> {
+ }, "string_field", "date_field", "long_field", "double_field",
"bool_field");
+
+ //add content
+ Tree test = root.getTree("/").addChild("test");
+ test.addChild("a").setProperty("string_field", "foo");
+ test.addChild("b").setProperty("date_field", "2025-bar");
+ test.addChild("c").setProperty("long_field", "123-bar");
+ test.addChild("d").setProperty("double_field", "456.78-bar");
+ test.addChild("e").setProperty("bool_field", "true-bar");
+
+ root.commit();
+
+ assertEventually(() -> {
+ assertQuery("//*[jcr:contains(., 'foo')]", XPATH,
List.of("/test/a"));
+ assertQuery("//*[jcr:contains(., '2025')]", XPATH,
List.of("/test/b"));
+ assertQuery("//*[jcr:contains(., '123')]", XPATH,
List.of("/test/c"));
+ assertQuery("//*[jcr:contains(., '456.78')]", XPATH,
List.of("/test/d"));
+ assertQuery("//*[jcr:contains(., 'true')]", XPATH,
List.of("/test/e"));
+ }
+ );
+ }
+
protected void assertEventually(Runnable r) {
TestUtil.assertEventually(r,
((repositoryOptionsUtil.isAsync() ?
repositoryOptionsUtil.defaultAsyncIndexingTimeInSeconds : 0) + 3000) * 5);