This is an automated email from the ASF dual-hosted git repository.
fortino pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 3e514c073b OAK-11998: bump ES to 8.19.5, fix k for knn queries (#2597)
3e514c073b is described below
commit 3e514c073b5d17f811668aa6f950c99036582402
Author: Fabrizio Fortino <[email protected]>
AuthorDate: Wed Oct 29 17:33:35 2025 +0100
OAK-11998: bump ES to 8.19.5, fix k for knn queries (#2597)
* OAK-11998: bump ES to 8.19.5, fix k for knn queries
* OAK-11998: fix test
* OAK-11998: add comment
* OAK-11998: fix lucene version
---
oak-search-elastic/pom.xml | 9 +---
.../index/elastic/query/ElasticRequestHandler.java | 5 +++
.../inference/ElasticInferenceUsingConfigTest.java | 50 +++++++++++++++++++---
3 files changed, 51 insertions(+), 13 deletions(-)
diff --git a/oak-search-elastic/pom.xml b/oak-search-elastic/pom.xml
index 29c266fe84..ec2d14bc03 100644
--- a/oak-search-elastic/pom.xml
+++ b/oak-search-elastic/pom.xml
@@ -33,8 +33,8 @@
<description>Oak Elasticsearch integration subproject</description>
<properties>
-
<elasticsearch.java.client.version>8.18.2</elasticsearch.java.client.version>
- <lucene.version>9.12.1</lucene.version>
+
<elasticsearch.java.client.version>8.19.5</elasticsearch.java.client.version>
+ <lucene.version>9.12.2</lucene.version>
</properties>
<build>
@@ -139,11 +139,6 @@
<artifactId>jackson-databind</artifactId>
<version>${jackson.version}</version>
</dependency>
- <dependency>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-core</artifactId>
- <version>${lucene.version}</version>
- </dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analysis-common</artifactId>
diff --git
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
index 18cdf42b3f..055eb02271 100644
---
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
+++
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
@@ -700,6 +700,11 @@ public class ElasticRequestHandler {
KnnQuery.Builder knnQueryBuilder = new
KnnQuery.Builder();
knnQueryBuilder.field(InferenceConstants.VECTOR_SPACES
+ "." + inferenceModelConfigName + "." + InferenceConstants.VECTOR);
knnQueryBuilder.numCandidates(inferenceModelConfig.getNumCandidates());
+ // The behavior of knn queries has changed in ES
8.18+. k is the number of nearest neighbors to return from each shard.
+ // When not specified, it defaults to the size of the
overall search request, which by default is 10.
+ // To maintain previous behavior, we explicitly set k
to numCandidates.
+ // see
https://github.com/elastic/elasticsearch/pull/118774
+
knnQueryBuilder.k(inferenceModelConfig.getNumCandidates());
knnQueryBuilder.queryVector(embeddings);
knnQueryBuilder.similarity((float)
inferenceModelConfig.getSimilarityThreshold());
// filters in knn are only applicable if filters are
defined in knn query itself.
diff --git
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
index 99cacec41c..13b0c3f1b5 100644
---
a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
+++
b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/inference/ElasticInferenceUsingConfigTest.java
@@ -87,7 +87,7 @@ import static org.junit.Assert.assertTrue;
public class ElasticInferenceUsingConfigTest extends ElasticAbstractQueryTest {
private static final Logger LOG =
LoggerFactory.getLogger(ElasticInferenceUsingConfigTest.class);
- private static ObjectMapper MAPPER = new JsonMapper();
+ private static final ObjectMapper MAPPER = new JsonMapper();
private ScheduledExecutorService executorService;
private StatisticsProvider statisticsProvider;
@@ -242,6 +242,45 @@ public class ElasticInferenceUsingConfigTest extends
ElasticAbstractQueryTest {
System.clearProperty(VectorQuery.EXPERIMENTAL_COMPATIBILITY_MODE_KEY);
}
+ @Test
+ public void knnWithSmallFetchSize() throws Exception {
+ String jcrIndexName = UUID.randomUUID().toString();
+ String inferenceServiceUrl = "http://localhost:" + wireMock.port() +
"/v1/embeddings";
+ String inferenceModelConfigName = "small-fetch-size";
+ String inferenceModelName = "text-embedding-ada-002";
+
+ // Create inference config
+ createInferenceConfig(jcrIndexName, true, defaultEnricherConfig,
inferenceModelConfigName,
+ inferenceModelName, inferenceServiceUrl, 0.0, 1L, true, true);
+ setupEnricherStatus(defaultEnricherStatusMapping,
defaultEnricherStatusData);
+ // Create index definition with multiple properties
+ IndexDefinitionBuilder builder = createIndexDefinition("title",
"description", "updatedBy");
+ // set small query fetch sizes to test this behavioral change
https://github.com/elastic/elasticsearch/pull/118774
+ builder.getBuilderTree().setProperty("queryFetchSizes", List.of(1L,
10L), Type.LONGS);
+ Tree index = setIndex(jcrIndexName, builder);
+ root.commit();
+
+ // Add test content
+ addTestContent();
+
+ // Let the index catch up
+ assertEventually(() -> assertEquals(7, countDocuments(index)));
+
+ // Enrich documents with embeddings
+ setupEmbeddingsForContent(index, inferenceModelConfigName,
inferenceModelName);
+
+ // Setup wiremock stubs for inference service
+ setupMockInferenceService(inferenceModelConfigName, jcrIndexName);
+
+ // Test with inference config
+ String queryPath = "select [jcr:path] from [nt:base] where
ISDESCENDANTNODE('/content') and contains(*, '"
+ + "?{}?a beginner guide to data manipulation in python')";
+ assertEventually(() -> {
+ List<String> results = executeQuery(queryPath, SQL2, true, true);
+ assertEquals(5, results.size());
+ });
+ }
+
private void enableExperimentalInferenceCompatibility() {
System.setProperty(VectorQuery.EXPERIMENTAL_COMPATIBILITY_MODE_KEY,
"true");
}
@@ -423,7 +462,7 @@ public class ElasticInferenceUsingConfigTest extends
ElasticAbstractQueryTest {
Map<String, Collection<Double>> map = MAPPER.readValue(json,
Map.class);
ObjectNode updateDoc = MAPPER.createObjectNode();
List<Float> embeddings = map.get("embedding").stream()
- .map(d -> ((Double) d).floatValue())
+ .map(Double::floatValue)
.collect(Collectors.toList());
VectorDocument vectorDocument = new VectorDocument(
@@ -435,8 +474,7 @@ public class ElasticInferenceUsingConfigTest extends
ElasticAbstractQueryTest {
ObjectNode vectorSpacesNode =
updateDoc.putObject(InferenceConstants.VECTOR_SPACES);
ArrayNode inferenceModelConfigNode =
vectorSpacesNode.putArray(inferenceModelConfigName);
inferenceModelConfigNode.addPOJO(vectorDocument);
- Map<String, Object> enricherStatusConfig = new HashMap<>();
-
InferenceConfig.getInstance().getEnricherStatus().entrySet().forEach(k ->
enricherStatusConfig.put(k.getKey(), k.getValue()));
+ Map<String, Object> enricherStatusConfig = new
HashMap<>(InferenceConfig.getInstance().getEnricherStatus());
enricherStatusConfig.put("status", "COMPLETED");
updateDoc.putPOJO(InferenceConstants.ENRICH_NODE,
enricherStatusConfig);
updateDocument(index, path, updateDoc);
@@ -542,7 +580,7 @@ public class ElasticInferenceUsingConfigTest extends
ElasticAbstractQueryTest {
assertNotNull(carsDocUpdated.get(InferenceConstants.VECTOR_SPACES));
try {
TreeNode tree =
MAPPER.readTree(carsDocUpdated.get(InferenceConstants.ENRICH_NODE).traverse());
- assertEquals(((TextNode) tree.get("status")).asText(),
(String) InferenceConfig.getInstance().getEnricherStatus().get("status"));
+ assertEquals(((TextNode) tree.get("status")).asText(),
InferenceConfig.getInstance().getEnricherStatus().get("status"));
} catch (IOException e) {
throw new RuntimeException(e);
}
@@ -923,7 +961,7 @@ public class ElasticInferenceUsingConfigTest extends
ElasticAbstractQueryTest {
ObjectMapper mapper = new JsonMapper();
Map<String, Collection<Double>> embeddingsMap =
mapper.readValue(jsonUrl, Map.class);
List<Float> embeddings = embeddingsMap.get("embedding").stream()
- .map(d -> ((Double) d).floatValue())
+ .map(Double::floatValue)
.collect(Collectors.toList());
// Create a large number of car-related documents