This is an automated email from the ASF dual-hosted git repository.
thomasm pushed a commit to branch trunk
in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
The following commit(s) were added to refs/heads/trunk by this push:
new 07335f52a4 OAK-11504 Elasticsearch: support flattened fields -
indentation changes only (#2104)
07335f52a4 is described below
commit 07335f52a4e61565a5efc7f4213485bd6fbfbbb0
Author: Thomas Mueller <[email protected]>
AuthorDate: Fri Feb 21 10:35:05 2025 +0100
OAK-11504 Elasticsearch: support flattened fields - indentation changes
only (#2104)
---
.../index/elastic/ElasticIndexImporter.java | 1 -
.../index/elastic/ElasticPropertyDefinition.java | 159 +++++++++++----------
.../index/elastic/query/ElasticRequestHandler.java | 90 ++++++------
.../elastic/query/ElasticSuggestIterator.java | 1 -
.../query/async/facets/ElasticFacetProvider.java | 1 -
5 files changed, 126 insertions(+), 126 deletions(-)
diff --git
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexImporter.java
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexImporter.java
index 433f50bbd5..ef325b49ef 100644
---
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexImporter.java
+++
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexImporter.java
@@ -20,7 +20,6 @@ package org.apache.jackrabbit.oak.plugins.index.elastic;
import org.apache.jackrabbit.oak.api.CommitFailedException;
import org.apache.jackrabbit.oak.plugins.index.importer.IndexImporterProvider;
-import org.apache.jackrabbit.oak.plugins.index.search.ReindexOperations;
import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.slf4j.Logger;
diff --git
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java
index f38191b307..b6298903bc 100644
---
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java
+++
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java
@@ -24,96 +24,99 @@ import org.apache.jackrabbit.oak.spi.state.NodeState;
public class ElasticPropertyDefinition extends PropertyDefinition {
- public static final String DEFAULT_SIMILARITY_METRIC = "l2_norm";
- static final String PROP_SIMILARITY_METRIC = "similarityMetric";
- private static final String PROP_SIMILARITY = "similarity";
- private static final String PROP_K = "k";
- private static final String PROP_CANDIDATES = "candidates";
- private static final float DEFAULT_SIMILARITY = 0.95f;
- private static final int DEFAULT_K = 10;
- private static final int DEFAULT_CANDIDATES = 500;
- private KnnSearchParameters knnSearchParameters;
-
- /**
- * Whether to use dynamic boosted values in full text queries, default is
true
- */
- private static final String PROP_USE_IN_FULL_TEXT_QUERY =
"useInFullTextQuery";
- private final boolean useInFullTextQuery;
-
- public ElasticPropertyDefinition(IndexDefinition.IndexingRule idxDefn,
String nodeName, NodeState defn) {
- super(idxDefn, nodeName, defn);
- if (this.useInSimilarity) {
- knnSearchParameters = new KnnSearchParameters(
- getOptionalValue(defn, PROP_SIMILARITY_METRIC,
DEFAULT_SIMILARITY_METRIC),
- getOptionalValue(defn, PROP_SIMILARITY, DEFAULT_SIMILARITY),
- getOptionalValue(defn, PROP_K, DEFAULT_K),
- getOptionalValue(defn, PROP_CANDIDATES, DEFAULT_CANDIDATES));
+ public static final String DEFAULT_SIMILARITY_METRIC = "l2_norm";
+ static final String PROP_SIMILARITY_METRIC = "similarityMetric";
+ private static final String PROP_SIMILARITY = "similarity";
+ private static final String PROP_K = "k";
+ private static final String PROP_CANDIDATES = "candidates";
+ private static final float DEFAULT_SIMILARITY = 0.95f;
+ private static final int DEFAULT_K = 10;
+ private static final int DEFAULT_CANDIDATES = 500;
+ private KnnSearchParameters knnSearchParameters;
+
+ /**
+ * Whether to use dynamic boosted values in full text queries, default is
true
+ */
+ private static final String PROP_USE_IN_FULL_TEXT_QUERY =
"useInFullTextQuery";
+ private final boolean useInFullTextQuery;
+
+ public ElasticPropertyDefinition(IndexDefinition.IndexingRule idxDefn,
String nodeName, NodeState defn) {
+ super(idxDefn, nodeName, defn);
+ if (this.useInSimilarity) {
+ knnSearchParameters = new KnnSearchParameters(
+ getOptionalValue(defn, PROP_SIMILARITY_METRIC,
DEFAULT_SIMILARITY_METRIC),
+ getOptionalValue(defn, PROP_SIMILARITY,
DEFAULT_SIMILARITY),
+ getOptionalValue(defn, PROP_K, DEFAULT_K),
+ getOptionalValue(defn, PROP_CANDIDATES,
DEFAULT_CANDIDATES));
+ }
+ this.useInFullTextQuery = this.dynamicBoost && getOptionalValue(defn,
PROP_USE_IN_FULL_TEXT_QUERY, true);
}
- this.useInFullTextQuery = this.dynamicBoost && getOptionalValue(defn,
PROP_USE_IN_FULL_TEXT_QUERY, true);
- }
- public KnnSearchParameters getKnnSearchParameters() {
- return knnSearchParameters;
- }
+ public KnnSearchParameters getKnnSearchParameters() {
+ return knnSearchParameters;
+ }
public boolean useInFullTextQuery() {
return useInFullTextQuery;
}
- /**
- * Class for defining parameters of approximate knn search on dense_vector
fields
- * <a
href="https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html">...</a>
and
- * <a
href="https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html">...</a>
- */
- public static class KnnSearchParameters {
-
- public KnnSearchParameters(String similarityMetric, float similarity, int
k, int candidates) {
- this.similarityMetric = similarityMetric;
- this.similarity = similarity;
- this.k = k;
- this.candidates = candidates;
- }
-
- /**
- * Similarity metric used to compare query and document vectors. Possible
values are l2_norm (default), cosine,
- * dot_product, max_inner_product
- */
- private final String similarityMetric;
- /**
- * Minimum similarity for the document vector to be considered as a match.
Required when cosine, dot_product
- * or max_inner_product is set as similarityMetric
- */
- private final float similarity;
/**
- * Number of nearest neighbours to return. Must be <= candidates
- * vector added as a field
+ * Class for defining parameters of approximate knn search on dense_vector
fields
+ * <a
href="https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html">...</a>
and
+ * <a
href="https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html">...</a>
*/
- private final int k;
+ public static class KnnSearchParameters {
- /**
- * Take the top vectors with the most matching hashes and compute their
exact similarity to the query vector. The
- * candidates parameter controls the number of exact similarity
computations. Specifically, we compute exact
- * similarity for the top candidates candidate vectors in each segment. As
a reminder, each Elasticsearch index has
- * >= 1 shards, and each shard has >= 1 segments. That means if you set
"candidates": 200 for an index with 2
- * shards, each with 3 segments, then you’ll compute the exact similarity
for 2 * 3 * 200 = 1200 vectors. candidates
- * must be set to a number greater or equal to the number of Elasticsearch
results you want to get. Higher values
- * generally mean higher recall and higher latency.
- */
- private final int candidates;
+ public KnnSearchParameters(String similarityMetric, float similarity,
int k, int candidates) {
+ this.similarityMetric = similarityMetric;
+ this.similarity = similarity;
+ this.k = k;
+ this.candidates = candidates;
+ }
- public String getSimilarityMetric() {
- return similarityMetric;
- }
- public float getSimilarity() {
- return similarity;
- }
+ /**
+ * Similarity metric used to compare query and document vectors.
Possible values are l2_norm (default), cosine,
+ * dot_product, max_inner_product
+ */
+ private final String similarityMetric;
- public int getK() {
- return k;
- }
+ /**
+ * Minimum similarity for the document vector to be considered as a
match. Required when cosine, dot_product
+ * or max_inner_product is set as similarityMetric
+ */
+ private final float similarity;
+
+ /**
+ * Number of nearest neighbours to return. Must be <= candidates
+ * vector added as a field
+ */
+ private final int k;
+
+ /**
+ * Take the top vectors with the most matching hashes and compute
their exact similarity to the query vector. The
+ * candidates parameter controls the number of exact similarity
computations. Specifically, we compute exact
+ * similarity for the top candidates candidate vectors in each
segment. As a reminder, each Elasticsearch index has
+ * >= 1 shards, and each shard has >= 1 segments. That means if you
set "candidates": 200 for an index with 2
+ * shards, each with 3 segments, then you’ll compute the exact
similarity for 2 * 3 * 200 = 1200 vectors. candidates
+ * must be set to a number greater or equal to the number of
Elasticsearch results you want to get. Higher values
+ * generally mean higher recall and higher latency.
+ */
+ private final int candidates;
+
+ public String getSimilarityMetric() {
+ return similarityMetric;
+ }
+
+ public float getSimilarity() {
+ return similarity;
+ }
+
+ public int getK() {
+ return k;
+ }
- public int getCandidates() {
- return candidates;
+ public int getCandidates() {
+ return candidates;
+ }
}
- }
}
diff --git
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
index c7d8c55b6b..8b11566216 100644
---
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
+++
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
@@ -193,10 +193,10 @@ public class ElasticRequestHandler {
bqb.must(m -> m.moreLikeThis(mltQuery(mltParams)));
}
} else {
- similarityQuery(queryNodePath, sp).ifPresent(similarityQuery
->
- bqb.filter(fb -> fb.exists(ef ->
ef.field(similarityQuery.field())))
- .should(s -> s.knn(similarityQuery))
- );
+ similarityQuery(queryNodePath,
sp).ifPresent(similarityQuery ->
+ bqb.filter(fb -> fb.exists(ef ->
ef.field(similarityQuery.field())))
+ .should(s -> s.knn(similarityQuery))
+ );
}
// Add should clause to improve relevance using similarity
tags only when similarity is
@@ -228,48 +228,48 @@ public class ElasticRequestHandler {
return bqb;
}
- public Optional<KnnQuery> similarityQuery(@NotNull String text,
List<PropertyDefinition> sp) {
- if (!sp.isEmpty()) {
- LOG.debug("generating similarity query for {}", text);
- NodeState targetNodeState = rootState;
- for (String token : PathUtils.elements(text)) {
- targetNodeState = targetNodeState.getChildNode(token);
- }
- if (!targetNodeState.exists()) {
- throw new IllegalArgumentException("Could not find node " + text);
- }
- for (PropertyDefinition propertyDefinition : sp) {
- ElasticPropertyDefinition pd = (ElasticPropertyDefinition)
propertyDefinition;
- String propertyPath = PathUtils.getParentPath(pd.name);
- String propertyName = PathUtils.getName(pd.name);
- NodeState tempState = targetNodeState;
- for (String token : PathUtils.elements(propertyPath)) {
- if (token.isEmpty()) {
- break;
- }
- tempState = tempState.getChildNode(token);
- }
- PropertyState ps = tempState.getProperty(propertyName);
- Blob property = ps != null ? ps.getValue(Type.BINARY) : null;
- if (property == null) {
- LOG.warn("Couldn't find property {} on {}", pd.name, text);
- continue;
- }
- byte[] bytes;
- try {
- bytes = property.getNewStream().readAllBytes();
- } catch (IOException e) {
- LOG.error("Error reading bytes from property {} on {}", pd.name,
text, e);
- continue;
- }
+ public Optional<KnnQuery> similarityQuery(@NotNull String text,
List<PropertyDefinition> sp) {
+ if (!sp.isEmpty()) {
+ LOG.debug("generating similarity query for {}", text);
+ NodeState targetNodeState = rootState;
+ for (String token : PathUtils.elements(text)) {
+ targetNodeState = targetNodeState.getChildNode(token);
+ }
+ if (!targetNodeState.exists()) {
+ throw new IllegalArgumentException("Could not find node " +
text);
+ }
+ for (PropertyDefinition propertyDefinition : sp) {
+ ElasticPropertyDefinition pd = (ElasticPropertyDefinition)
propertyDefinition;
+ String propertyPath = PathUtils.getParentPath(pd.name);
+ String propertyName = PathUtils.getName(pd.name);
+ NodeState tempState = targetNodeState;
+ for (String token : PathUtils.elements(propertyPath)) {
+ if (token.isEmpty()) {
+ break;
+ }
+ tempState = tempState.getChildNode(token);
+ }
+ PropertyState ps = tempState.getProperty(propertyName);
+ Blob property = ps != null ? ps.getValue(Type.BINARY) : null;
+ if (property == null) {
+ LOG.warn("Couldn't find property {} on {}", pd.name, text);
+ continue;
+ }
+ byte[] bytes;
+ try {
+ bytes = property.getNewStream().readAllBytes();
+ } catch (IOException e) {
+ LOG.error("Error reading bytes from property {} on {}",
pd.name, text, e);
+ continue;
+ }
- String similarityPropFieldName =
FieldNames.createSimilarityFieldName(pd.name);
- KnnQuery knnQuery = baseKnnQueryBuilder(similarityPropFieldName,
bytes, pd).build();
- return Optional.of(knnQuery);
- }
+ String similarityPropFieldName =
FieldNames.createSimilarityFieldName(pd.name);
+ KnnQuery knnQuery =
baseKnnQueryBuilder(similarityPropFieldName, bytes, pd).build();
+ return Optional.of(knnQuery);
+ }
+ }
+ return Optional.empty();
}
- return Optional.empty();
- }
@NotNull
private KnnQuery.Builder baseKnnQueryBuilder(String
similarityPropFieldName, byte[] bytes, ElasticPropertyDefinition pd) {
@@ -934,7 +934,7 @@ public class ElasticRequestHandler {
return like(propertyName, pr.first.getValue(Type.STRING));
}
- //TODO Confirm that all other types can be treated as string
+ // TODO Confirm that all other types can be treated as string
in = newPropertyRestrictionQuery(field, pr, value ->
value.getValue(Type.STRING));
}
}
diff --git
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSuggestIterator.java
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSuggestIterator.java
index 1562e9e5a8..907c010a18 100644
---
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSuggestIterator.java
+++
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSuggestIterator.java
@@ -25,7 +25,6 @@ import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import co.elastic.clients.elasticsearch._types.query_dsl.Query;
import co.elastic.clients.elasticsearch.core.SearchRequest;
import co.elastic.clients.elasticsearch.core.SearchResponse;
diff --git
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticFacetProvider.java
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticFacetProvider.java
index 8b3058d4d7..53d056dec9 100644
---
a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticFacetProvider.java
+++
b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticFacetProvider.java
@@ -20,7 +20,6 @@ import
org.apache.jackrabbit.oak.plugins.index.elastic.ElasticConnection;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
import
org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticRequestHandler;
import
org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticResponseHandler;
-import
org.apache.jackrabbit.oak.plugins.index.elastic.query.async.ElasticResponseListener;
import
org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.SecureFacetConfiguration;
import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;