This is an automated email from the ASF dual-hosted git repository.

fortino pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 673572f946 OAK-11352 (oak-search-elastic) add useInFullTextQuery flag 
for dynamic boosted values (#1949)
673572f946 is described below

commit 673572f9467272a603fe618d212b19fefcbe2ed7
Author: Fabrizio Fortino <fabrizio.fort...@gmail.com>
AuthorDate: Wed Jan 8 17:11:06 2025 +0100

    OAK-11352 (oak-search-elastic) add useInFullTextQuery flag for dynamic 
boosted values (#1949)
    
    * OAK-11352 (oak-search-elastic) add useInFullTextQuery flag for dynamic 
boosted values
    
    * OAK-11352 (minor) make useInFullTextQuery final
    
    * OAK-11352 (test) fix dynamicBoostNotIncludedInFullText
---
 oak-doc/src/site/markdown/query/elastic.md         | 16 ++++++++--
 .../index/elastic/ElasticIndexDefinition.java      |  5 ++--
 .../index/elastic/ElasticPropertyDefinition.java   | 11 +++++++
 .../index/elastic/query/ElasticRequestHandler.java | 14 +++++----
 .../index/elastic/ElasticDynamicBoostTest.java     | 35 ++++++++++++++++++++++
 .../oak/plugins/index/DynamicBoostCommonTest.java  |  5 ++++
 6 files changed, 76 insertions(+), 10 deletions(-)

diff --git a/oak-doc/src/site/markdown/query/elastic.md 
b/oak-doc/src/site/markdown/query/elastic.md
index 163a20d6b3..a0fd952eab 100644
--- a/oak-doc/src/site/markdown/query/elastic.md
+++ b/oak-doc/src/site/markdown/query/elastic.md
@@ -48,9 +48,19 @@ however there are differences:
 * `useInExcerpt` does not support regexp relative properties.
 * For property definitions, `sync` and `unique` are ignored.
   Synchronous indexing, and enforcing uniqueness constraints is not currently 
supported in elastic indexes.
-* The behavior for `dynamicBoost` is slightly different: 
-  For Lucene indexes, boosting is done in indexing, while for Elastic it is 
done at query time.
-* The behavior for `suggest` is slightly different:
+* The behavior of `dynamicBoost` differs slightly between Lucene and 
Elasticsearch:  
+  - **Lucene**: Boosting is applied at indexing time.  
+  - **Elasticsearch**: Boosting is applied at query time.  
+
+Full-text queries automatically use dynamically boosted values to match 
relevant results, but this behavior may not always be desirable.
+To use these values exclusively for influencing relevance without affecting 
matching, configure the property definition as follows:
+```json
+{
+  "dynamicBoost": true,
+  "useInFullTextQuery": false
+}
+```
+* The behavior of `suggest` is slightly different:
   For Lucene indexes, the suggestor is updated every 10 minutes by default and 
the frequency
   can be changed by `suggestUpdateFrequencyMinutes` property in suggestion 
node under the index definition node.
   In Elastic indexes, there is no such delay and thus no need for the above 
config property. This is an improvement in ES over lucene.
diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
index 0eefd1695d..edc912867b 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
@@ -187,7 +187,7 @@ public class ElasticIndexDefinition extends IndexDefinition 
{
     public final InferenceDefinition inferenceDefinition;
 
     private final Map<String, List<PropertyDefinition>> propertiesByName;
-    private final List<PropertyDefinition> dynamicBoostProperties;
+    private final List<ElasticPropertyDefinition> dynamicBoostProperties;
     private final List<PropertyDefinition> similarityProperties;
     private final List<PropertyDefinition> similarityTagsProperties;
     private final String[] similarityTagsFields;
@@ -238,6 +238,7 @@ public class ElasticIndexDefinition extends IndexDefinition 
{
                 .stream()
                 .flatMap(IndexingRule::getNamePatternsProperties)
                 .filter(pd -> pd.dynamicBoost)
+                .map(pd -> (ElasticPropertyDefinition) pd)
                 .collect(Collectors.toList());
 
         this.similarityProperties = getDefinedRules()
@@ -278,7 +279,7 @@ public class ElasticIndexDefinition extends IndexDefinition 
{
         return propertiesByName;
     }
 
-    public List<PropertyDefinition> getDynamicBoostProperties() {
+    public List<ElasticPropertyDefinition> getDynamicBoostProperties() {
         return dynamicBoostProperties;
     }
 
diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java
index a5d24076b0..f38191b307 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java
@@ -34,6 +34,12 @@ public class ElasticPropertyDefinition extends 
PropertyDefinition {
   private static final int DEFAULT_CANDIDATES = 500;
   private KnnSearchParameters knnSearchParameters;
 
+  /**
+   * Whether to use dynamic boosted values in full text queries, default is 
true
+   */
+  private static final String PROP_USE_IN_FULL_TEXT_QUERY = 
"useInFullTextQuery";
+  private final boolean useInFullTextQuery;
+
   public ElasticPropertyDefinition(IndexDefinition.IndexingRule idxDefn, 
String nodeName, NodeState defn) {
     super(idxDefn, nodeName, defn);
     if (this.useInSimilarity) {
@@ -43,12 +49,17 @@ public class ElasticPropertyDefinition extends 
PropertyDefinition {
           getOptionalValue(defn, PROP_K, DEFAULT_K),
           getOptionalValue(defn, PROP_CANDIDATES, DEFAULT_CANDIDATES));
     }
+    this.useInFullTextQuery = this.dynamicBoost && getOptionalValue(defn, 
PROP_USE_IN_FULL_TEXT_QUERY, true);
   }
 
   public KnnSearchParameters getKnnSearchParameters() {
     return knnSearchParameters;
   }
 
+    public boolean useInFullTextQuery() {
+        return useInFullTextQuery;
+    }
+
   /**
    * Class for defining parameters of approximate knn search on dense_vector 
fields
    * <a 
href="https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html";>...</a>
 and
diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
index 047c7fb1e1..06c57eeb1e 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
@@ -556,13 +556,17 @@ public class ElasticRequestHandler {
                     QueryStringQuery.Builder qsqBuilder = fullTextQuery(text, 
ElasticIndexDefinition.DYNAMIC_PROPERTIES + ".value", pr, false);
                     bqBuilder.must(m -> m.nested(nf -> 
nf.path(ElasticIndexDefinition.DYNAMIC_PROPERTIES).query(Query.of(q -> 
q.queryString(qsqBuilder.build())))));
                 } else {
-                    boolean dbEnabled = 
!elasticIndexDefinition.getDynamicBoostProperties().isEmpty();
+                    // TODO: we include dynamic boosted values in the 
full-text query if there is at least one dynamic property with 
useInFullTextQuery set to true
+                    // This might not be ideal when there are multiple dynamic 
properties with different useInFullTextQuery settings (very unlikely)
+                    // A better approach would be to include the values at 
index time (requires a refactoring of the DocumentMaker to access the 
ElasticIndexDefinition)
+                    boolean includeDynamicBoostedValues = 
!elasticIndexDefinition.getDynamicBoostProperties().isEmpty() &&
+                            
elasticIndexDefinition.getDynamicBoostProperties().stream().anyMatch(ElasticPropertyDefinition::useInFullTextQuery);
 
                     // Experimental support for inference queries
                     if (elasticIndexDefinition.inferenceDefinition != null && 
elasticIndexDefinition.inferenceDefinition.queries != null) {
-                        bqBuilder.must(m -> m.bool(b -> inference(b, 
propertyName, text, pr, dbEnabled)));
+                        bqBuilder.must(m -> m.bool(b -> inference(b, 
propertyName, text, pr, includeDynamicBoostedValues)));
                     } else {
-                        QueryStringQuery.Builder qsqBuilder = 
fullTextQuery(text, getElasticFieldName(propertyName), pr, dbEnabled);
+                        QueryStringQuery.Builder qsqBuilder = 
fullTextQuery(text, getElasticFieldName(propertyName), pr, 
includeDynamicBoostedValues);
                         bqBuilder.must(m -> m.queryString(qsqBuilder.build()));
                     }
                 }
@@ -878,7 +882,7 @@ public class ElasticRequestHandler {
         return Query.of(q -> q.multiMatch(m -> m.fields(uuid)));
     }
 
-    private static QueryStringQuery.Builder fullTextQuery(String text, String 
fieldName, PlanResult pr, boolean dynamicBoostEnabled) {
+    private static QueryStringQuery.Builder fullTextQuery(String text, String 
fieldName, PlanResult pr, boolean includeDynamicBoostedValues) {
         LOG.debug("fullTextQuery for text: '{}', fieldName: '{}'", text, 
fieldName);
         QueryStringQuery.Builder qsqBuilder = new QueryStringQuery.Builder()
                 .query(FulltextIndex.rewriteQueryText(text))
@@ -890,7 +894,7 @@ public class ElasticRequestHandler {
                 qsqBuilder.fields(pd.name + "^" + pd.boost);
             }
             // dynamic boost is included only for :fulltext field
-            if (dynamicBoostEnabled) {
+            if (includeDynamicBoostedValues) {
                 
qsqBuilder.fields(ElasticIndexDefinition.DYNAMIC_BOOST_FULLTEXT + "^" + 
DYNAMIC_BOOST_WEIGHT);
             }
         }
diff --git 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java
 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java
index d996da9da0..6fc11c6658 100644
--- 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java
+++ 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticDynamicBoostTest.java
@@ -84,4 +84,39 @@ public class ElasticDynamicBoostTest extends 
DynamicBoostCommonTest {
                     List.of("/test/asset2", "/test/asset1"));
         });
     }
+
+    @Test
+    public void dynamicBoostNotIncludedInFullText() throws Exception {
+        createAssetsIndexAndProperties(false, false, false);
+
+        Tree testParent = createNodeWithType(root.getTree("/"), "test", 
JcrConstants.NT_UNSTRUCTURED, "");
+
+        Tree predicted1 = createAssetNodeWithPredicted(testParent, "asset1", 
"flower with a lot of red and a bit of blue");
+        createPredictedTag(predicted1, "fooTag", 100.0);
+        createPredictedTag(predicted1, "barTag", 1.0);
+        createPredictedTag(predicted1, "red", 9.0);
+        createPredictedTag(predicted1, "blue", 1.0);
+
+        Tree predicted2 = createAssetNodeWithPredicted(testParent, "asset2", 
"flower with a lot of blue and a bit of red");
+        createPredictedTag(predicted2, "fooTag", 1.0);
+        createPredictedTag(predicted2, "barTag", 100.0);
+        createPredictedTag(predicted2, "red", 1.0);
+        createPredictedTag(predicted2, "blue", 9.0);
+
+        Tree predicted3 = createAssetNodeWithPredicted(testParent, "asset3", 
"this is a not matching asset");
+        createPredictedTag(predicted3, "fooTag", 1.0);
+        createPredictedTag(predicted3, "barTag", 1.0);
+
+        root.commit();
+
+        assertEventually(() -> {
+            // with this test we are checking that the dynamic boost is not 
included in the fulltext search
+            assertQuery("//element(*, dam:Asset)[jcr:contains(., 'fooTag')]", 
XPATH, List.of());
+            assertOrderedQuery("select [jcr:path] from [dam:Asset] where 
contains(*, 'flower OR fooTag')",
+                    List.of("/test/asset1", "/test/asset2"));
+            assertOrderedQuery("select [jcr:path] from [dam:Asset] where 
contains(*, 'flower OR barTag')",
+                    List.of("/test/asset2", "/test/asset1"));
+        });
+
+    }
 }
diff --git 
a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/DynamicBoostCommonTest.java
 
b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/DynamicBoostCommonTest.java
index 9fb4cc6053..d71426d9af 100644
--- 
a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/DynamicBoostCommonTest.java
+++ 
b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/DynamicBoostCommonTest.java
@@ -230,6 +230,10 @@ public abstract class DynamicBoostCommonTest extends 
AbstractQueryTest {
     }
 
     protected void createAssetsIndexAndProperties(boolean lite, boolean 
similarityTags) throws Exception {
+        createAssetsIndexAndProperties(lite, similarityTags, true);
+    }
+
+    protected void createAssetsIndexAndProperties(boolean lite, boolean 
similarityTags, boolean useInFullTextQuery) throws Exception {
         NodeTypeRegistry.register(root, new 
ByteArrayInputStream(ASSET_NODE_TYPE.getBytes()), "test nodeType");
         Tree indexRuleProps = createIndex("dam:Asset", lite);
 
@@ -237,6 +241,7 @@ public abstract class DynamicBoostCommonTest extends 
AbstractQueryTest {
         predictedTagsDynamicBoost.setProperty("name", 
"jcr:content/metadata/predictedTags/.*");
         predictedTagsDynamicBoost.setProperty("isRegexp", true);
         predictedTagsDynamicBoost.setProperty("dynamicBoost", true);
+        predictedTagsDynamicBoost.setProperty("useInFullTextQuery", 
useInFullTextQuery);
 
         if (similarityTags) {
             Tree predictedTags = createNodeWithType(indexRuleProps, 
"predictedTags", JcrConstants.NT_UNSTRUCTURED, "");

Reply via email to