This is an automated email from the ASF dual-hosted git repository. thomasm pushed a commit to branch OAK-11504-i in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit 95e31532cc5b58056bfe5606c2f8a37df3b584e7 Author: Thomas Mueller <[email protected]> AuthorDate: Fri Feb 21 09:49:31 2025 +0100 OAK-11504 Elasticsearch: support flattened fields - indentation changes only --- .../index/elastic/ElasticIndexImporter.java | 1 - .../index/elastic/ElasticPropertyDefinition.java | 159 +++++++++++---------- .../index/elastic/query/ElasticRequestHandler.java | 90 ++++++------ .../elastic/query/ElasticSuggestIterator.java | 1 - .../query/async/facets/ElasticFacetProvider.java | 1 - 5 files changed, 126 insertions(+), 126 deletions(-) diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexImporter.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexImporter.java index 433f50bbd5..ef325b49ef 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexImporter.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexImporter.java @@ -20,7 +20,6 @@ package org.apache.jackrabbit.oak.plugins.index.elastic; import org.apache.jackrabbit.oak.api.CommitFailedException; import org.apache.jackrabbit.oak.plugins.index.importer.IndexImporterProvider; -import org.apache.jackrabbit.oak.plugins.index.search.ReindexOperations; import org.apache.jackrabbit.oak.spi.state.NodeBuilder; import org.apache.jackrabbit.oak.spi.state.NodeState; import org.slf4j.Logger; diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java index f38191b307..b6298903bc 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyDefinition.java @@ -24,96 +24,99 @@ import org.apache.jackrabbit.oak.spi.state.NodeState; public class ElasticPropertyDefinition extends PropertyDefinition { - public static final String DEFAULT_SIMILARITY_METRIC = "l2_norm"; - static final String PROP_SIMILARITY_METRIC = "similarityMetric"; - private static final String PROP_SIMILARITY = "similarity"; - private static final String PROP_K = "k"; - private static final String PROP_CANDIDATES = "candidates"; - private static final float DEFAULT_SIMILARITY = 0.95f; - private static final int DEFAULT_K = 10; - private static final int DEFAULT_CANDIDATES = 500; - private KnnSearchParameters knnSearchParameters; - - /** - * Whether to use dynamic boosted values in full text queries, default is true - */ - private static final String PROP_USE_IN_FULL_TEXT_QUERY = "useInFullTextQuery"; - private final boolean useInFullTextQuery; - - public ElasticPropertyDefinition(IndexDefinition.IndexingRule idxDefn, String nodeName, NodeState defn) { - super(idxDefn, nodeName, defn); - if (this.useInSimilarity) { - knnSearchParameters = new KnnSearchParameters( - getOptionalValue(defn, PROP_SIMILARITY_METRIC, DEFAULT_SIMILARITY_METRIC), - getOptionalValue(defn, PROP_SIMILARITY, DEFAULT_SIMILARITY), - getOptionalValue(defn, PROP_K, DEFAULT_K), - getOptionalValue(defn, PROP_CANDIDATES, DEFAULT_CANDIDATES)); + public static final String DEFAULT_SIMILARITY_METRIC = "l2_norm"; + static final String PROP_SIMILARITY_METRIC = "similarityMetric"; + private static final String PROP_SIMILARITY = "similarity"; + private static final String PROP_K = "k"; + private static final String PROP_CANDIDATES = "candidates"; + private static final float DEFAULT_SIMILARITY = 0.95f; + private static final int DEFAULT_K = 10; + private static final int DEFAULT_CANDIDATES = 500; + private KnnSearchParameters knnSearchParameters; + + /** + * Whether to use dynamic boosted values in full text queries, default is true + */ + private static final String PROP_USE_IN_FULL_TEXT_QUERY = "useInFullTextQuery"; + private final boolean useInFullTextQuery; + + public ElasticPropertyDefinition(IndexDefinition.IndexingRule idxDefn, String nodeName, NodeState defn) { + super(idxDefn, nodeName, defn); + if (this.useInSimilarity) { + knnSearchParameters = new KnnSearchParameters( + getOptionalValue(defn, PROP_SIMILARITY_METRIC, DEFAULT_SIMILARITY_METRIC), + getOptionalValue(defn, PROP_SIMILARITY, DEFAULT_SIMILARITY), + getOptionalValue(defn, PROP_K, DEFAULT_K), + getOptionalValue(defn, PROP_CANDIDATES, DEFAULT_CANDIDATES)); + } + this.useInFullTextQuery = this.dynamicBoost && getOptionalValue(defn, PROP_USE_IN_FULL_TEXT_QUERY, true); } - this.useInFullTextQuery = this.dynamicBoost && getOptionalValue(defn, PROP_USE_IN_FULL_TEXT_QUERY, true); - } - public KnnSearchParameters getKnnSearchParameters() { - return knnSearchParameters; - } + public KnnSearchParameters getKnnSearchParameters() { + return knnSearchParameters; + } public boolean useInFullTextQuery() { return useInFullTextQuery; } - /** - * Class for defining parameters of approximate knn search on dense_vector fields - * <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html">...</a> and - * <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html">...</a> - */ - public static class KnnSearchParameters { - - public KnnSearchParameters(String similarityMetric, float similarity, int k, int candidates) { - this.similarityMetric = similarityMetric; - this.similarity = similarity; - this.k = k; - this.candidates = candidates; - } - - /** - * Similarity metric used to compare query and document vectors. Possible values are l2_norm (default), cosine, - * dot_product, max_inner_product - */ - private final String similarityMetric; - /** - * Minimum similarity for the document vector to be considered as a match. Required when cosine, dot_product - * or max_inner_product is set as similarityMetric - */ - private final float similarity; /** - * Number of nearest neighbours to return. Must be <= candidates - * vector added as a field + * Class for defining parameters of approximate knn search on dense_vector fields + * <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html">...</a> and + * <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/knn-search.html">...</a> */ - private final int k; + public static class KnnSearchParameters { - /** - * Take the top vectors with the most matching hashes and compute their exact similarity to the query vector. The - * candidates parameter controls the number of exact similarity computations. Specifically, we compute exact - * similarity for the top candidates candidate vectors in each segment. As a reminder, each Elasticsearch index has - * >= 1 shards, and each shard has >= 1 segments. That means if you set "candidates": 200 for an index with 2 - * shards, each with 3 segments, then you’ll compute the exact similarity for 2 * 3 * 200 = 1200 vectors. candidates - * must be set to a number greater or equal to the number of Elasticsearch results you want to get. Higher values - * generally mean higher recall and higher latency. - */ - private final int candidates; + public KnnSearchParameters(String similarityMetric, float similarity, int k, int candidates) { + this.similarityMetric = similarityMetric; + this.similarity = similarity; + this.k = k; + this.candidates = candidates; + } - public String getSimilarityMetric() { - return similarityMetric; - } - public float getSimilarity() { - return similarity; - } + /** + * Similarity metric used to compare query and document vectors. Possible values are l2_norm (default), cosine, + * dot_product, max_inner_product + */ + private final String similarityMetric; - public int getK() { - return k; - } + /** + * Minimum similarity for the document vector to be considered as a match. Required when cosine, dot_product + * or max_inner_product is set as similarityMetric + */ + private final float similarity; + + /** + * Number of nearest neighbours to return. Must be <= candidates + * vector added as a field + */ + private final int k; + + /** + * Take the top vectors with the most matching hashes and compute their exact similarity to the query vector. The + * candidates parameter controls the number of exact similarity computations. Specifically, we compute exact + * similarity for the top candidates candidate vectors in each segment. As a reminder, each Elasticsearch index has + * >= 1 shards, and each shard has >= 1 segments. That means if you set "candidates": 200 for an index with 2 + * shards, each with 3 segments, then you’ll compute the exact similarity for 2 * 3 * 200 = 1200 vectors. candidates + * must be set to a number greater or equal to the number of Elasticsearch results you want to get. Higher values + * generally mean higher recall and higher latency. + */ + private final int candidates; + + public String getSimilarityMetric() { + return similarityMetric; + } + + public float getSimilarity() { + return similarity; + } + + public int getK() { + return k; + } - public int getCandidates() { - return candidates; + public int getCandidates() { + return candidates; + } } - } } diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java index c7d8c55b6b..8b11566216 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java @@ -193,10 +193,10 @@ public class ElasticRequestHandler { bqb.must(m -> m.moreLikeThis(mltQuery(mltParams))); } } else { - similarityQuery(queryNodePath, sp).ifPresent(similarityQuery -> - bqb.filter(fb -> fb.exists(ef -> ef.field(similarityQuery.field()))) - .should(s -> s.knn(similarityQuery)) - ); + similarityQuery(queryNodePath, sp).ifPresent(similarityQuery -> + bqb.filter(fb -> fb.exists(ef -> ef.field(similarityQuery.field()))) + .should(s -> s.knn(similarityQuery)) + ); } // Add should clause to improve relevance using similarity tags only when similarity is @@ -228,48 +228,48 @@ public class ElasticRequestHandler { return bqb; } - public Optional<KnnQuery> similarityQuery(@NotNull String text, List<PropertyDefinition> sp) { - if (!sp.isEmpty()) { - LOG.debug("generating similarity query for {}", text); - NodeState targetNodeState = rootState; - for (String token : PathUtils.elements(text)) { - targetNodeState = targetNodeState.getChildNode(token); - } - if (!targetNodeState.exists()) { - throw new IllegalArgumentException("Could not find node " + text); - } - for (PropertyDefinition propertyDefinition : sp) { - ElasticPropertyDefinition pd = (ElasticPropertyDefinition) propertyDefinition; - String propertyPath = PathUtils.getParentPath(pd.name); - String propertyName = PathUtils.getName(pd.name); - NodeState tempState = targetNodeState; - for (String token : PathUtils.elements(propertyPath)) { - if (token.isEmpty()) { - break; - } - tempState = tempState.getChildNode(token); - } - PropertyState ps = tempState.getProperty(propertyName); - Blob property = ps != null ? ps.getValue(Type.BINARY) : null; - if (property == null) { - LOG.warn("Couldn't find property {} on {}", pd.name, text); - continue; - } - byte[] bytes; - try { - bytes = property.getNewStream().readAllBytes(); - } catch (IOException e) { - LOG.error("Error reading bytes from property {} on {}", pd.name, text, e); - continue; - } + public Optional<KnnQuery> similarityQuery(@NotNull String text, List<PropertyDefinition> sp) { + if (!sp.isEmpty()) { + LOG.debug("generating similarity query for {}", text); + NodeState targetNodeState = rootState; + for (String token : PathUtils.elements(text)) { + targetNodeState = targetNodeState.getChildNode(token); + } + if (!targetNodeState.exists()) { + throw new IllegalArgumentException("Could not find node " + text); + } + for (PropertyDefinition propertyDefinition : sp) { + ElasticPropertyDefinition pd = (ElasticPropertyDefinition) propertyDefinition; + String propertyPath = PathUtils.getParentPath(pd.name); + String propertyName = PathUtils.getName(pd.name); + NodeState tempState = targetNodeState; + for (String token : PathUtils.elements(propertyPath)) { + if (token.isEmpty()) { + break; + } + tempState = tempState.getChildNode(token); + } + PropertyState ps = tempState.getProperty(propertyName); + Blob property = ps != null ? ps.getValue(Type.BINARY) : null; + if (property == null) { + LOG.warn("Couldn't find property {} on {}", pd.name, text); + continue; + } + byte[] bytes; + try { + bytes = property.getNewStream().readAllBytes(); + } catch (IOException e) { + LOG.error("Error reading bytes from property {} on {}", pd.name, text, e); + continue; + } - String similarityPropFieldName = FieldNames.createSimilarityFieldName(pd.name); - KnnQuery knnQuery = baseKnnQueryBuilder(similarityPropFieldName, bytes, pd).build(); - return Optional.of(knnQuery); - } + String similarityPropFieldName = FieldNames.createSimilarityFieldName(pd.name); + KnnQuery knnQuery = baseKnnQueryBuilder(similarityPropFieldName, bytes, pd).build(); + return Optional.of(knnQuery); + } + } + return Optional.empty(); } - return Optional.empty(); - } @NotNull private KnnQuery.Builder baseKnnQueryBuilder(String similarityPropFieldName, byte[] bytes, ElasticPropertyDefinition pd) { @@ -934,7 +934,7 @@ public class ElasticRequestHandler { return like(propertyName, pr.first.getValue(Type.STRING)); } - //TODO Confirm that all other types can be treated as string + // TODO Confirm that all other types can be treated as string in = newPropertyRestrictionQuery(field, pr, value -> value.getValue(Type.STRING)); } } diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSuggestIterator.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSuggestIterator.java index 1562e9e5a8..907c010a18 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSuggestIterator.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSuggestIterator.java @@ -25,7 +25,6 @@ import org.jetbrains.annotations.NotNull; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import co.elastic.clients.elasticsearch._types.query_dsl.Query; import co.elastic.clients.elasticsearch.core.SearchRequest; import co.elastic.clients.elasticsearch.core.SearchResponse; diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticFacetProvider.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticFacetProvider.java index 8b3058d4d7..53d056dec9 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticFacetProvider.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticFacetProvider.java @@ -20,7 +20,6 @@ import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticConnection; import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition; import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticRequestHandler; import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticResponseHandler; -import org.apache.jackrabbit.oak.plugins.index.elastic.query.async.ElasticResponseListener; import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.SecureFacetConfiguration; import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
