This is an automated email from the ASF dual-hosted git repository. fortino pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push: new 673572f946 OAK-11352 (oak-search-elastic) add useInFullTextQuery flag for dynamic boosted values (#1949) 673572f946 is described below commit 673572f9467272a603fe618d212b19fefcbe2ed7 Author: Fabrizio Fortino <fabrizio.fort...@gmail.com> AuthorDate: Wed Jan 8 17:11:06 2025 +0100 OAK-11352 (oak-search-elastic) add useInFullTextQuery flag for dynamic boosted values (#1949) * OAK-11352 (oak-search-elastic) add useInFullTextQuery flag for dynamic boosted values * OAK-11352 (minor) make useInFullTextQuery final * OAK-11352 (test) fix dynamicBoostNotIncludedInFullText --- oak-doc/src/site/markdown/query/elastic.md | 16 ++++++++-- .../index/elastic/ElasticIndexDefinition.java | 5 ++-- .../index/elastic/ElasticPropertyDefinition.java | 11 +++++++ .../index/elastic/query/ElasticRequestHandler.java | 14 +++++---- .../index/elastic/ElasticDynamicBoostTest.java | 35 ++++++++++++++++++++++ .../oak/plugins/index/DynamicBoostCommonTest.java | 5 ++++ 6 files changed, 76 insertions(+), 10 deletions(-) diff --git a/oak-doc/src/site/markdown/query/elastic.md b/oak-doc/src/site/markdown/query/elastic.md index 163a20d6b3..a0fd952eab 100644 --- a/oak-doc/src/site/markdown/query/elastic.md +++ b/oak-doc/src/site/markdown/query/elastic.md @@ -48,9 +48,19 @@ however there are differences: * `useInExcerpt` does not support regexp relative properties. * For property definitions, `sync` and `unique` are ignored. Synchronous indexing, and enforcing uniqueness constraints is not currently supported in elastic indexes. -* The behavior for `dynamicBoost` is slightly different: - For Lucene indexes, boosting is done in indexing, while for Elastic it is done at query time. -* The behavior for `suggest` is slightly different: +* The behavior of `dynamicBoost` differs slightly between Lucene and Elasticsearch: + - **Lucene**: Boosting is applied at indexing time. + - **Elasticsearch**: Boosting is applied at query time. + +Full-text queries automatically use dynamically boosted values to match relevant results, but this behavior may not always be desirable. +To use these values exclusively for influencing relevance without affecting matching, configure the property definition as follows: +```json +{ + "dynamicBoost": true, + "useInFullTextQuery": false +} +``` +* The behavior of `suggest` is slightly different: For Lucene indexes, the suggestor is updated every 10 minutes by default and the frequency can be changed by `suggestUpdateFrequencyMinutes` property in suggestion node under the index definition node. In Elastic indexes, there is no such delay and thus no need for the above config property. This is an improvement in ES over lucene. diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java index 0eefd1695d..edc912867b 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java @@ -187,7 +187,7 @@ public class ElasticIndexDefinition extends IndexDefinition { public final InferenceDefinition inferenceDefinition; private final Map<String, List<PropertyDefinition>> propertiesByName; - private final List<PropertyDefinition> dynamicBoostProperties; + private final List<ElasticPropertyDefinition> dynamicBoostProperties; private final List<PropertyDefinition> similarityProperties; private final List<PropertyDefinition> similarityTagsProperties; private final String[] similarityTagsFields; @@ -238,6 +238,7 @@ public class ElasticIndexDefinition extends IndexDefinition { .stream() .flatMap(IndexingRule::getNamePatternsProperties) .filter(pd -> pd.dynamicBoost) + .map(pd -> (ElasticPropertyDefinition) pd) .collect(Collectors.toList()); this.similarityProperties = getDefinedRules() @@ -278,7 +279,7 @@ public class ElasticIndexDefinition extends IndexDefinition { return propertiesByName; } - public List<PropertyDefinition> getDynamicBoostProperties() { + public List<ElasticPropertyDefinition> getDynamicBoostProperties() { return dynamicBoostProperties; } diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java index a5d24076b0..f38191b307 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java @@ -34,6 +34,12 @@ public class ElasticPropertyDefinition extends PropertyDefinition { private static final int DEFAULT_CANDIDATES = 500; private KnnSearchParameters knnSearchParameters; + /** + * Whether to use dynamic boosted values in full text queries, default is true + */ + private static final String PROP_USE_IN_FULL_TEXT_QUERY = "useInFullTextQuery"; + private final boolean useInFullTextQuery; + public ElasticPropertyDefinition(IndexDefinition.IndexingRule idxDefn, String nodeName, NodeState defn) { super(idxDefn, nodeName, defn); if (this.useInSimilarity) { @@ -43,12 +49,17 @@ public class ElasticPropertyDefinition extends PropertyDefinition { getOptionalValue(defn, PROP_K, DEFAULT_K), getOptionalValue(defn, PROP_CANDIDATES, DEFAULT_CANDIDATES)); } + this.useInFullTextQuery = this.dynamicBoost && getOptionalValue(defn, PROP_USE_IN_FULL_TEXT_QUERY, true); } public KnnSearchParameters getKnnSearchParameters() { return knnSearchParameters; } + public boolean useInFullTextQuery() { + return useInFullTextQuery; + } + /** * Class for defining parameters of approximate knn search on dense_vector fields * <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html">...</a> and diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java index 047c7fb1e1..06c57eeb1e 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java @@ -556,13 +556,17 @@ public class ElasticRequestHandler { QueryStringQuery.Builder qsqBuilder = fullTextQuery(text, ElasticIndexDefinition.DYNAMIC_PROPERTIES + ".value", pr, false); bqBuilder.must(m -> m.nested(nf -> nf.path(ElasticIndexDefinition.DYNAMIC_PROPERTIES).query(Query.of(q -> q.queryString(qsqBuilder.build()))))); } else { - boolean dbEnabled = !elasticIndexDefinition.getDynamicBoostProperties().isEmpty(); + // TODO: we include dynamic boosted values in the full-text query if there is at least one dynamic property with useInFullTextQuery set to true + // This might not be ideal when there are multiple dynamic properties with different useInFullTextQuery settings (very unlikely) + // A better approach would be to include the values at index time (requires a refactoring of the DocumentMaker to access the ElasticIndexDefinition) + boolean includeDynamicBoostedValues = !elasticIndexDefinition.getDynamicBoostProperties().isEmpty() && + elasticIndexDefinition.getDynamicBoostProperties().stream().anyMatch(ElasticPropertyDefinition::useInFullTextQuery); // Experimental support for inference queries if (elasticIndexDefinition.inferenceDefinition != null && elasticIndexDefinition.inferenceDefinition.queries != null) { - bqBuilder.must(m -> m.bool(b -> inference(b, propertyName, text, pr, dbEnabled))); + bqBuilder.must(m -> m.bool(b -> inference(b, propertyName, text, pr, includeDynamicBoostedValues))); } else { - QueryStringQuery.Builder qsqBuilder = fullTextQuery(text, getElasticFieldName(propertyName), pr, dbEnabled); + QueryStringQuery.Builder qsqBuilder = fullTextQuery(text, getElasticFieldName(propertyName), pr, includeDynamicBoostedValues); bqBuilder.must(m -> m.queryString(qsqBuilder.build())); } } @@ -878,7 +882,7 @@ public class ElasticRequestHandler { return Query.of(q -> q.multiMatch(m -> m.fields(uuid))); } - private static QueryStringQuery.Builder fullTextQuery(String text, String fieldName, PlanResult pr, boolean dynamicBoostEnabled) { + private static QueryStringQuery.Builder fullTextQuery(String text, String fieldName, PlanResult pr, boolean includeDynamicBoostedValues) { LOG.debug("fullTextQuery for text: '{}', fieldName: '{}'", text, fieldName); QueryStringQuery.Builder qsqBuilder = new QueryStringQuery.Builder() .query(FulltextIndex.rewriteQueryText(text)) @@ -890,7 +894,7 @@ public class ElasticRequestHandler { qsqBuilder.fields(pd.name + "^" + pd.boost); } // dynamic boost is included only for :fulltext field - if (dynamicBoostEnabled) { + if (includeDynamicBoostedValues) { qsqBuilder.fields(ElasticIndexDefinition.DYNAMIC_BOOST_FULLTEXT + "^" + DYNAMIC_BOOST_WEIGHT); } } diff --git a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java index d996da9da0..6fc11c6658 100644 --- a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java +++ b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java @@ -84,4 +84,39 @@ public class ElasticDynamicBoostTest extends DynamicBoostCommonTest { List.of("/test/asset2", "/test/asset1")); }); } + + @Test + public void dynamicBoostNotIncludedInFullText() throws Exception { + createAssetsIndexAndProperties(false, false, false); + + Tree testParent = createNodeWithType(root.getTree("/"), "test", JcrConstants.NT_UNSTRUCTURED, ""); + + Tree predicted1 = createAssetNodeWithPredicted(testParent, "asset1", "flower with a lot of red and a bit of blue"); + createPredictedTag(predicted1, "fooTag", 100.0); + createPredictedTag(predicted1, "barTag", 1.0); + createPredictedTag(predicted1, "red", 9.0); + createPredictedTag(predicted1, "blue", 1.0); + + Tree predicted2 = createAssetNodeWithPredicted(testParent, "asset2", "flower with a lot of blue and a bit of red"); + createPredictedTag(predicted2, "fooTag", 1.0); + createPredictedTag(predicted2, "barTag", 100.0); + createPredictedTag(predicted2, "red", 1.0); + createPredictedTag(predicted2, "blue", 9.0); + + Tree predicted3 = createAssetNodeWithPredicted(testParent, "asset3", "this is a not matching asset"); + createPredictedTag(predicted3, "fooTag", 1.0); + createPredictedTag(predicted3, "barTag", 1.0); + + root.commit(); + + assertEventually(() -> { + // with this test we are checking that the dynamic boost is not included in the fulltext search + assertQuery("//element(*, dam:Asset)[jcr:contains(., 'fooTag')]", XPATH, List.of()); + assertOrderedQuery("select [jcr:path] from [dam:Asset] where contains(*, 'flower OR fooTag')", + List.of("/test/asset1", "/test/asset2")); + assertOrderedQuery("select [jcr:path] from [dam:Asset] where contains(*, 'flower OR barTag')", + List.of("/test/asset2", "/test/asset1")); + }); + + } } diff --git a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/DynamicBoostCommonTest.java b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/DynamicBoostCommonTest.java index 9fb4cc6053..d71426d9af 100644 --- a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/DynamicBoostCommonTest.java +++ b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/DynamicBoostCommonTest.java @@ -230,6 +230,10 @@ public abstract class DynamicBoostCommonTest extends AbstractQueryTest { } protected void createAssetsIndexAndProperties(boolean lite, boolean similarityTags) throws Exception { + createAssetsIndexAndProperties(lite, similarityTags, true); + } + + protected void createAssetsIndexAndProperties(boolean lite, boolean similarityTags, boolean useInFullTextQuery) throws Exception { NodeTypeRegistry.register(root, new ByteArrayInputStream(ASSET_NODE_TYPE.getBytes()), "test nodeType"); Tree indexRuleProps = createIndex("dam:Asset", lite); @@ -237,6 +241,7 @@ public abstract class DynamicBoostCommonTest extends AbstractQueryTest { predictedTagsDynamicBoost.setProperty("name", "jcr:content/metadata/predictedTags/.*"); predictedTagsDynamicBoost.setProperty("isRegexp", true); predictedTagsDynamicBoost.setProperty("dynamicBoost", true); + predictedTagsDynamicBoost.setProperty("useInFullTextQuery", useInFullTextQuery); if (similarityTags) { Tree predictedTags = createNodeWithType(indexRuleProps, "predictedTags", JcrConstants.NT_UNSTRUCTURED, "");