This is an automated email from the ASF dual-hosted git repository.

mkataria pushed a commit to branch OAK-11757
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git

commit f9ea15e3037c207318b999e1e3b3d5d7809c2c59
Author: Mohit Kataria <tiho...@gmail.com>
AuthorDate: Thu Jun 12 10:32:26 2025 +0530

    OAK-11757: Implement similarityThreshold for inference
---
 .../index/elastic/query/ElasticRequestHandler.java |  1 +
 .../inference/ElasticInferenceUsingConfigTest.java | 63 +++++++++++++++++++++-
 2 files changed, 63 insertions(+), 1 deletion(-)

diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
index 2411079170..1c632c46d5 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
@@ -694,6 +694,7 @@ public class ElasticRequestHandler {
                         knnQueryBuilder.field(InferenceConstants.VECTOR_SPACES 
+ "." + inferenceModelConfigName + "." + InferenceConstants.VECTOR);
                         
knnQueryBuilder.numCandidates(inferenceModelConfig.getNumCandidates());
                         knnQueryBuilder.queryVector(embeddings);
+                        knnQueryBuilder.similarity((float) 
inferenceModelConfig.getSimilarityThreshold());
                         // filters in knn are only applicable if filters are 
defined in knn query itself.
                         // the filters outside knn query are applicable as 
post filters which can lead to missing results.
                         if (planResult.evaluateNonFullTextConstraints()) {
diff --git 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
index 4904ca137c..99cacec41c 100644
--- 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
+++ 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
@@ -883,7 +883,7 @@ public class ElasticInferenceUsingConfigTest extends 
ElasticAbstractQueryTest {
 
         // Create inference configuration
         createInferenceConfig(jcrIndexName, true, defaultEnricherConfig, 
inferenceModelConfigName,
-            inferenceModelName, inferenceServiceUrl, 0.8, 1L, true, true);
+            inferenceModelName, inferenceServiceUrl, 0.7, 1L, true, true);
         setupEnricherStatus(defaultEnricherStatusMapping, 
defaultEnricherStatusData);
 
         // Create index definition with searchable properties
@@ -989,4 +989,65 @@ public class ElasticInferenceUsingConfigTest extends 
ElasticAbstractQueryTest {
             assertEquals("/content/filterPath/ml", results.get(0));
         });
     }
+
+    @Test
+    public void testSimilarityThresholdInKnnQuery() throws Exception {
+        String inferenceConfigInQuery = "?{}?";
+        String jcrIndexName = UUID.randomUUID().toString();
+        String inferenceServiceUrl = "http://localhost:"; + wireMock.port() + 
"/v1/embeddings";
+        String inferenceModelConfigName = "ada-test-model";
+        String inferenceModelName = "text-embedding-ada-002";
+
+        // Create inference config
+        Double initialSimilarityThreshold = 0.2;
+        createInferenceConfig(jcrIndexName, true, defaultEnricherConfig, 
inferenceModelConfigName,
+            inferenceModelName, inferenceServiceUrl, 
initialSimilarityThreshold, 1L, true, true);
+        setupEnricherStatus(defaultEnricherStatusMapping, 
defaultEnricherStatusData);
+        // Create index definition with multiple properties
+        IndexDefinitionBuilder builder = createIndexDefinition("title", 
"description", "updatedBy");
+        Tree index = setIndex(jcrIndexName, builder);
+        root.commit();
+
+        // Add test content
+        addTestContent();
+
+        // Let the index catch up
+        assertEventually(() -> assertEquals(7, countDocuments(index)));
+
+        // Enrich documents with embeddings
+        setupEmbeddingsForContent(index, inferenceModelConfigName, 
inferenceModelName);
+
+        // Setup wiremock stubs for inference service
+        setupMockInferenceService(inferenceModelConfigName, jcrIndexName);
+
+        String searchQuery = "technological advancements in electric vehicles";
+        String queryPath = "select [jcr:path] from [nt:base] where 
ISDESCENDANTNODE('/content') and contains(*, '"
+            + inferenceConfigInQuery + searchQuery + "')";
+        LOG.info("Running initial query with similarity threshold {}: {}", 
initialSimilarityThreshold, queryPath);
+        assertEventually(() -> {
+            List<String> results = executeQuery(queryPath, SQL2, true, true);
+            LOG.info("Query with similarity threshold {} returned {} results: 
{}",
+                initialSimilarityThreshold, results.size(), results);
+            assertEquals(5, results.size());
+        });
+
+        // update similarity threshold
+        double newThreshold = 0.8;
+        LOG.info("Updating similarity threshold from {} to {}", 
initialSimilarityThreshold, newThreshold);
+        // using same parameters as above apart from similarityThreshold,
+        // affectively updating similarityThreshold value.
+        createInferenceConfig(jcrIndexName, true, defaultEnricherConfig, 
inferenceModelConfigName,
+            inferenceModelName, inferenceServiceUrl, newThreshold, 1L, true, 
true);
+        InferenceConfig.reInitialize();
+
+        // With higher threshold number of documents should decrease
+        LOG.info("Running query with updated similarity threshold {}: {}", 
newThreshold, queryPath);
+        assertEventually(() -> {
+            List<String> results = executeQuery(queryPath, SQL2, true, true);
+            LOG.info("Query with similarity threshold {} returned {} results: 
{}",
+                newThreshold, results.size(), results);
+            assertEquals(1, results.size());
+        });
+
+    }
 }

Reply via email to