This is an automated email from the ASF dual-hosted git repository.

fortino pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git


The following commit(s) were added to refs/heads/trunk by this push:
     new 3e514c073b OAK-11998: bump ES to 8.19.5, fix k for knn queries (#2597)
3e514c073b is described below

commit 3e514c073b5d17f811668aa6f950c99036582402
Author: Fabrizio Fortino <[email protected]>
AuthorDate: Wed Oct 29 17:33:35 2025 +0100

    OAK-11998: bump ES to 8.19.5, fix k for knn queries (#2597)
    
    * OAK-11998: bump ES to 8.19.5, fix k for knn queries
    
    * OAK-11998: fix test
    
    * OAK-11998: add comment
    
    * OAK-11998: fix lucene version
---
 oak-search-elastic/pom.xml                         |  9 +---
 .../index/elastic/query/ElasticRequestHandler.java |  5 +++
 .../inference/ElasticInferenceUsingConfigTest.java | 50 +++++++++++++++++++---
 3 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/oak-search-elastic/pom.xml b/oak-search-elastic/pom.xml
index 29c266fe84..ec2d14bc03 100644
--- a/oak-search-elastic/pom.xml
+++ b/oak-search-elastic/pom.xml
@@ -33,8 +33,8 @@
   <description>Oak Elasticsearch integration subproject</description>
 
   <properties>
-    
<elasticsearch.java.client.version>8.18.2</elasticsearch.java.client.version>
-    <lucene.version>9.12.1</lucene.version>
+    
<elasticsearch.java.client.version>8.19.5</elasticsearch.java.client.version>
+    <lucene.version>9.12.2</lucene.version>
   </properties>
 
   <build>
@@ -139,11 +139,6 @@
       <artifactId>jackson-databind</artifactId>
       <version>${jackson.version}</version>
     </dependency>
-    <dependency>
-      <groupId>org.apache.lucene</groupId>
-      <artifactId>lucene-core</artifactId>
-      <version>${lucene.version}</version>
-    </dependency>
     <dependency>
       <groupId>org.apache.lucene</groupId>
       <artifactId>lucene-analysis-common</artifactId>
diff --git 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
index 18cdf42b3f..055eb02271 100644
--- 
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
+++ 
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
@@ -700,6 +700,11 @@ public class ElasticRequestHandler {
                         KnnQuery.Builder knnQueryBuilder = new 
KnnQuery.Builder();
                         knnQueryBuilder.field(InferenceConstants.VECTOR_SPACES 
+ "." + inferenceModelConfigName + "." + InferenceConstants.VECTOR);
                         
knnQueryBuilder.numCandidates(inferenceModelConfig.getNumCandidates());
+                        // The behavior of knn queries has changed in ES 
8.18+. k is the number of nearest neighbors to return from each shard.
+                        // When not specified, it defaults to the size of the 
overall search request, which by default is 10.
+                        // To maintain previous behavior, we explicitly set k 
to numCandidates.
+                        // see 
https://github.com/elastic/elasticsearch/pull/118774
+                        
knnQueryBuilder.k(inferenceModelConfig.getNumCandidates());
                         knnQueryBuilder.queryVector(embeddings);
                         knnQueryBuilder.similarity((float) 
inferenceModelConfig.getSimilarityThreshold());
                         // filters in knn are only applicable if filters are 
defined in knn query itself.
diff --git 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
index 99cacec41c..13b0c3f1b5 100644
--- 
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
+++ 
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
@@ -87,7 +87,7 @@ import static org.junit.Assert.assertTrue;
 public class ElasticInferenceUsingConfigTest extends ElasticAbstractQueryTest {
 
     private static final Logger LOG = 
LoggerFactory.getLogger(ElasticInferenceUsingConfigTest.class);
-    private static ObjectMapper MAPPER = new JsonMapper();
+    private static final ObjectMapper MAPPER = new JsonMapper();
 
     private ScheduledExecutorService executorService;
     private StatisticsProvider statisticsProvider;
@@ -242,6 +242,45 @@ public class ElasticInferenceUsingConfigTest extends 
ElasticAbstractQueryTest {
         System.clearProperty(VectorQuery.EXPERIMENTAL_COMPATIBILITY_MODE_KEY);
     }
 
+    @Test
+    public void knnWithSmallFetchSize() throws Exception {
+        String jcrIndexName = UUID.randomUUID().toString();
+        String inferenceServiceUrl = "http://localhost:"; + wireMock.port() + 
"/v1/embeddings";
+        String inferenceModelConfigName = "small-fetch-size";
+        String inferenceModelName = "text-embedding-ada-002";
+
+        // Create inference config
+        createInferenceConfig(jcrIndexName, true, defaultEnricherConfig, 
inferenceModelConfigName,
+                inferenceModelName, inferenceServiceUrl, 0.0, 1L, true, true);
+        setupEnricherStatus(defaultEnricherStatusMapping, 
defaultEnricherStatusData);
+        // Create index definition with multiple properties
+        IndexDefinitionBuilder builder = createIndexDefinition("title", 
"description", "updatedBy");
+        // set small query fetch sizes to test this behavioral change 
https://github.com/elastic/elasticsearch/pull/118774
+        builder.getBuilderTree().setProperty("queryFetchSizes", List.of(1L, 
10L), Type.LONGS);
+        Tree index = setIndex(jcrIndexName, builder);
+        root.commit();
+
+        // Add test content
+        addTestContent();
+
+        // Let the index catch up
+        assertEventually(() -> assertEquals(7, countDocuments(index)));
+
+        // Enrich documents with embeddings
+        setupEmbeddingsForContent(index, inferenceModelConfigName, 
inferenceModelName);
+
+        // Setup wiremock stubs for inference service
+        setupMockInferenceService(inferenceModelConfigName, jcrIndexName);
+
+        // Test with inference config
+        String queryPath = "select [jcr:path] from [nt:base] where 
ISDESCENDANTNODE('/content') and contains(*, '"
+                + "?{}?a beginner guide to data manipulation in python')";
+        assertEventually(() -> {
+            List<String> results = executeQuery(queryPath, SQL2, true, true);
+            assertEquals(5, results.size());
+        });
+    }
+
     private void enableExperimentalInferenceCompatibility() {
         System.setProperty(VectorQuery.EXPERIMENTAL_COMPATIBILITY_MODE_KEY, 
"true");
     }
@@ -423,7 +462,7 @@ public class ElasticInferenceUsingConfigTest extends 
ElasticAbstractQueryTest {
                 Map<String, Collection<Double>> map = MAPPER.readValue(json, 
Map.class);
                 ObjectNode updateDoc = MAPPER.createObjectNode();
                 List<Float> embeddings = map.get("embedding").stream()
-                    .map(d -> ((Double) d).floatValue())
+                    .map(Double::floatValue)
                     .collect(Collectors.toList());
 
                 VectorDocument vectorDocument = new VectorDocument(
@@ -435,8 +474,7 @@ public class ElasticInferenceUsingConfigTest extends 
ElasticAbstractQueryTest {
                 ObjectNode vectorSpacesNode = 
updateDoc.putObject(InferenceConstants.VECTOR_SPACES);
                 ArrayNode inferenceModelConfigNode = 
vectorSpacesNode.putArray(inferenceModelConfigName);
                 inferenceModelConfigNode.addPOJO(vectorDocument);
-                Map<String, Object> enricherStatusConfig = new HashMap<>();
-                
InferenceConfig.getInstance().getEnricherStatus().entrySet().forEach(k -> 
enricherStatusConfig.put(k.getKey(), k.getValue()));
+                Map<String, Object> enricherStatusConfig = new 
HashMap<>(InferenceConfig.getInstance().getEnricherStatus());
                 enricherStatusConfig.put("status", "COMPLETED");
                 updateDoc.putPOJO(InferenceConstants.ENRICH_NODE, 
enricherStatusConfig);
                 updateDocument(index, path, updateDoc);
@@ -542,7 +580,7 @@ public class ElasticInferenceUsingConfigTest extends 
ElasticAbstractQueryTest {
             
assertNotNull(carsDocUpdated.get(InferenceConstants.VECTOR_SPACES));
             try {
                 TreeNode tree = 
MAPPER.readTree(carsDocUpdated.get(InferenceConstants.ENRICH_NODE).traverse());
-                assertEquals(((TextNode) tree.get("status")).asText(), 
(String) InferenceConfig.getInstance().getEnricherStatus().get("status"));
+                assertEquals(((TextNode) tree.get("status")).asText(), 
InferenceConfig.getInstance().getEnricherStatus().get("status"));
             } catch (IOException e) {
                 throw new RuntimeException(e);
             }
@@ -923,7 +961,7 @@ public class ElasticInferenceUsingConfigTest extends 
ElasticAbstractQueryTest {
         ObjectMapper mapper = new JsonMapper();
         Map<String, Collection<Double>> embeddingsMap = 
mapper.readValue(jsonUrl, Map.class);
         List<Float> embeddings = embeddingsMap.get("embedding").stream()
-            .map(d -> ((Double) d).floatValue())
+            .map(Double::floatValue)
             .collect(Collectors.toList());
 
         // Create a large number of car-related documents

Reply via email to