This is an automated email from the ASF dual-hosted git repository. thomasm pushed a commit to branch OAK-11555 in repository https://gitbox.apache.org/repos/asf/jackrabbit-oak.git
commit b58c345273edf3c4300a49245ef9996f633e9d57 Author: Thomas Mueller <[email protected]> AuthorDate: Thu Mar 6 18:01:27 2025 +0100 OAK-11555 Elastic: support dot in property and function names --- .../index/elastic/ElasticIndexDefinition.java | 11 ++- .../index/elastic/ElasticIndexStatistics.java | 5 +- .../index/elastic/index/ElasticDocument.java | 8 +-- .../index/elastic/index/ElasticDocumentMaker.java | 14 ++-- .../index/elastic/index/ElasticIndexHelper.java | 20 ++++-- .../index/elastic/query/ElasticRequestHandler.java | 23 ++++-- .../facets/ElasticSecureFacetAsyncProvider.java | 10 ++- .../ElasticStatisticalFacetAsyncProvider.java | 11 ++- .../index/elastic/util/ElasticIndexUtils.java | 32 +++++++-- .../plugins/index/elastic/ElasticContentTest.java | 9 +-- .../index/elastic/ElasticIndexAggregationTest.java | 82 ++++++++++++++++++++++ .../index/elastic/ElasticPropertyIndexTest.java | 59 +++++++++++++++- .../index/elastic/ElasticSimilarQueryTest.java | 8 ++- .../elastic/index/ElasticIndexHelperTest.java | 7 +- .../index/elastic/util/ElasticIndexUtilsTest.java | 59 ++++++++++++++++ .../oak/plugins/index/IndexPlannerCommonTest.java | 2 +- 16 files changed, 309 insertions(+), 51 deletions(-) diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java index 9d69551d1c..f341686445 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java @@ -31,6 +31,7 @@ import java.util.stream.Stream; import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.commons.collections.StreamUtils; +import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils; import org.apache.jackrabbit.oak.plugins.index.search.FieldNames; import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants; import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition; @@ -324,13 +325,17 @@ public class ElasticIndexDefinition extends IndexDefinition { ElasticPropertyDefinition pd = getMatchingRegexPropertyDefinition(propertyName); if (pd != null) { if (pd.isFlattened()) { - return FieldNames.FLATTENED_FIELD_PREFIX + pd.nodeName + "." + propertyName; + String fieldName = ElasticIndexUtils.fieldName(propertyName); + String flattenedFieldName = FieldNames.FLATTENED_FIELD_PREFIX + + ElasticIndexUtils.fieldName(pd.nodeName) + "." + fieldName; + return flattenedFieldName; } } - return propertyName + ".keyword"; + String fieldName = ElasticIndexUtils.fieldName(propertyName); + return fieldName + ".keyword"; } - String field = propertyName; + String field = ElasticIndexUtils.fieldName(propertyName); // it's ok to look at the first property since we are sure they all have the same type int type = propertyDefinitions.get(0).getType(); if (isAnalyzable.apply(type) && isAnalyzed(propertyDefinitions)) { diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexStatistics.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexStatistics.java index ed52c9b57d..4d64d77ac5 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexStatistics.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexStatistics.java @@ -25,6 +25,8 @@ import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import co.elastic.clients.elasticsearch._types.query_dsl.Query; + +import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils; import org.apache.jackrabbit.oak.plugins.index.search.IndexStatistics; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.Nullable; @@ -111,8 +113,9 @@ public class ElasticIndexStatistics implements IndexStatistics { */ @Override public int getDocCountFor(String field) { + String elasticField = ElasticIndexUtils.fieldName(field); return countCache.getUnchecked( - new StatsRequestDescriptor(elasticConnection, indexDefinition.getIndexAlias(), field, null) + new StatsRequestDescriptor(elasticConnection, indexDefinition.getIndexAlias(), elasticField, null) ); } diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java index 3c7dc6f4f3..2f1ee7e26e 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocument.java @@ -147,9 +147,9 @@ public class ElasticDocument { properties.put(fieldName, finalValue); } - void addSimilarityField(String name, Blob value) throws IOException { + void addSimilarityField(String fieldName, Blob value) throws IOException { byte[] bytes = value.getNewStream().readAllBytes(); - addProperty(FieldNames.createSimilarityFieldName(name), toFloats(bytes)); + addProperty(FieldNames.createSimilarityFieldName(fieldName), toFloats(bytes)); } void indexAncestors(String path) { @@ -160,8 +160,8 @@ public class ElasticDocument { addProperty(FieldNames.PATH_DEPTH, depth); } - void addDynamicBoostField(String propName, String value, double boost) { - addProperty(propName, + void addDynamicBoostField(String fieldName, String value, double boost) { + addProperty(fieldName, Map.of( ElasticIndexHelper.DYNAMIC_BOOST_NESTED_VALUE, value, ElasticIndexHelper.DYNAMIC_BOOST_NESTED_BOOST, boost diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java index 5316cf1127..59c84f9b01 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java @@ -24,6 +24,7 @@ import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.commons.log.LogSilencer; import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition; import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticPropertyDefinition; +import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils; import org.apache.jackrabbit.oak.plugins.index.search.Aggregate; import org.apache.jackrabbit.oak.plugins.index.search.FieldNames; import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition; @@ -165,16 +166,17 @@ public class ElasticDocumentMaker extends FulltextDocumentMaker<ElasticDocument> } @Override - protected void indexTypedProperty(ElasticDocument doc, PropertyState property, String pname, PropertyDefinition pd, int i) { + protected void indexTypedProperty(ElasticDocument doc, PropertyState property, String propertyName, PropertyDefinition pd, int i) { // Get the Type tag from the defined index definition here - and not from the actual persisted property state - this way in case // If the actual property value is different from the property type defined in the index definition/mapping - this will try to convert the property if possible, // otherwise will log a warning and not try and add the property to index. If we try and index incompatible data types (like String to Date), // we would get an exception while indexing the node on elastic search and other properties for the node will also don't get indexed. (See OAK-9665). - String fieldName = pname; + String fieldName = ElasticIndexUtils.fieldName(propertyName); if (pd.isRegexp) { ElasticPropertyDefinition epd = (ElasticPropertyDefinition) pd; if (epd.isFlattened()) { - fieldName = FieldNames.FLATTENED_FIELD_PREFIX + epd.nodeName + "." + pname; + fieldName = FieldNames.FLATTENED_FIELD_PREFIX + + ElasticIndexUtils.fieldName(epd.nodeName) + "." + fieldName; } } int tag = pd.getType(); @@ -197,7 +199,7 @@ public class ElasticDocumentMaker extends FulltextDocumentMaker<ElasticDocument> if (!LOG_SILENCER.silence(LOG_KEY_COULD_NOT_CONVERT_PROPERTY)) { LOG.warn( "[{}] Ignoring property. Could not convert property {} (field {}) of type {} to type {} for path {}. Error: {}", - getIndexName(), pname, fieldName, + getIndexName(), propertyName, fieldName, Type.fromTag(property.getType().tag(), false), Type.fromTag(tag, false), path, e.toString()); } @@ -252,7 +254,7 @@ public class ElasticDocumentMaker extends FulltextDocumentMaker<ElasticDocument> if (pd.getSimilaritySearchDenseVectorSize() == blob.length() / BLOB_LENGTH_DIVISOR) { // see https://www.elastic.co/blog/text-similarity-search-with-vectors-in-elasticsearch // see https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html - doc.addSimilarityField(pd.name, blob); + doc.addSimilarityField(ElasticIndexUtils.fieldName(pd.name), blob); } else { if (!LOG_SILENCER.silence(LOG_KEY_SIMILARITY_BINARIES_WRONG_DIMENSION)) { LOG.warn("[{}] Ignoring binary property {} for path {}. Expected dimension is {} but got {}", @@ -275,7 +277,7 @@ public class ElasticDocumentMaker extends FulltextDocumentMaker<ElasticDocument> @Override protected boolean indexDynamicBoost(ElasticDocument doc, String parent, String nodeName, String token, double boost) { if (!token.isEmpty()) { - doc.addDynamicBoostField(nodeName, token, boost); + doc.addDynamicBoostField(ElasticIndexUtils.fieldName(nodeName), token, boost); return true; } return false; diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java index f42432ea0c..cd9ac9f2b3 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java @@ -22,6 +22,7 @@ import co.elastic.clients.json.JsonData; import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition; import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticPropertyDefinition; +import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils; import org.apache.jackrabbit.oak.plugins.index.search.FieldNames; import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.IndexingRule; import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition; @@ -149,7 +150,8 @@ class ElasticIndexHelper { builder.meta("inference", JsonData.of(inferenceDefinition)); if (inferenceDefinition.properties != null) { - inferenceDefinition.properties.forEach(p -> builder.properties(p.name, + inferenceDefinition.properties.forEach(p -> builder.properties( + ElasticIndexUtils.fieldName(p.name), b -> b.object(bo -> bo .properties("value", pb -> pb.denseVector(dv -> dv.index(true) @@ -243,13 +245,15 @@ class ElasticIndexHelper { if (epd.isFlattened()) { Property.Builder pBuilder = new Property.Builder(); pBuilder.flattened(b2 -> b2.index(true)); - builder.properties(FieldNames.FLATTENED_FIELD_PREFIX + pd.nodeName, pBuilder.build()); + builder.properties(FieldNames.FLATTENED_FIELD_PREFIX + + ElasticIndexUtils.fieldName(pd.nodeName), pBuilder.build()); } } } for (Map.Entry<String, List<PropertyDefinition>> entry : indexDefinition.getPropertiesByName().entrySet()) { - final String name = entry.getKey(); - final List<PropertyDefinition> propertyDefinitions = entry.getValue(); + String propertyName = entry.getKey(); + String fieldName = ElasticIndexUtils.fieldName(propertyName); + List<PropertyDefinition> propertyDefinitions = entry.getValue(); Type<?> type = null; for (PropertyDefinition pd : propertyDefinitions) { type = Type.fromTag(pd.getType(), false); @@ -280,10 +284,10 @@ class ElasticIndexHelper { pBuilder.keyword(b1 -> b1.ignoreAbove(256)); } } - builder.properties(name, pBuilder.build()); + builder.properties(fieldName, pBuilder.build()); for (PropertyDefinition pd : indexDefinition.getDynamicBoostProperties()) { - builder.properties(pd.nodeName, + builder.properties(ElasticIndexUtils.fieldName(pd.nodeName), b1 -> b1.nested( b2 -> b2.properties(DYNAMIC_BOOST_NESTED_VALUE, b3 -> b3.text( @@ -305,7 +309,9 @@ class ElasticIndexHelper { .similarity(DEFAULT_SIMILARITY_METRIC) .build(); - builder.properties(FieldNames.createSimilarityFieldName(pd.name), b1 -> b1.denseVector(denseVectorProperty)); + builder.properties(FieldNames.createSimilarityFieldName( + ElasticIndexUtils.fieldName(pd.name)), + b1 -> b1.denseVector(denseVectorProperty)); } builder.properties(ElasticIndexDefinition.SIMILARITY_TAGS, diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java index 330aecc167..eab982ffdd 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java @@ -263,7 +263,8 @@ public class ElasticRequestHandler { continue; } - String similarityPropFieldName = FieldNames.createSimilarityFieldName(pd.name); + String similarityPropFieldName = FieldNames.createSimilarityFieldName( + ElasticIndexUtils.fieldName(pd.name)); KnnQuery knnQuery = baseKnnQueryBuilder(similarityPropFieldName, bytes, pd).build(); return Optional.of(knnQuery); } @@ -649,11 +650,14 @@ public class ElasticRequestHandler { private Stream<NestedQuery> dynamicScoreQueries(String text) { return elasticIndexDefinition.getDynamicBoostProperties().stream().map(pd -> NestedQuery.of(n -> n - .path(pd.nodeName) + .path(ElasticIndexUtils.fieldName(pd.nodeName)) .query(q -> q.functionScore(s -> s .boost(DYNAMIC_BOOST_WEIGHT) - .query(fq -> fq.match(m -> m.field(pd.nodeName + ".value").query(FieldValue.of(text)))) - .functions(f -> f.fieldValueFactor(fv -> fv.field(pd.nodeName + ".boost"))))) + .query(fq -> fq.match(m -> m.field( + ElasticIndexUtils.fieldName(pd.nodeName) + ".value"). + query(FieldValue.of(text)))) + .functions(f -> f.fieldValueFactor(fv -> fv.field( + ElasticIndexUtils.fieldName(pd.nodeName) + ".boost"))))) .scoreMode(ChildScoreMode.Avg)) ); } @@ -889,8 +893,8 @@ public class ElasticRequestHandler { .type(TextQueryType.CrossFields) .tieBreaker(0.5d); if (FieldNames.FULLTEXT.equals(fieldName)) { - for(PropertyDefinition pd: pr.indexingRule.getNodeScopeAnalyzedProps()) { - qsqBuilder.fields(pd.name + "^" + pd.boost); + for (PropertyDefinition pd : pr.indexingRule.getNodeScopeAnalyzedProps()) { + qsqBuilder.fields(ElasticIndexUtils.fieldName(pd.name) + "^" + pd.boost); } // dynamic boost is included only for :fulltext field if (includeDynamicBoostedValues) { @@ -951,6 +955,11 @@ public class ElasticRequestHandler { if (planResult.isPathTransformed()) { propertyName = PathUtils.getName(propertyName); } - return propertyName; + if ("*".equals(propertyName)) { + // elasticsearch does support the pseudo-field "*" meaning all fields, + // but (arguably) what we really want is the field ":fulltext". + return FieldNames.FULLTEXT; + } + return ElasticIndexUtils.fieldName(propertyName); } } diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticSecureFacetAsyncProvider.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticSecureFacetAsyncProvider.java index dec588c62d..2ddb63d3b7 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticSecureFacetAsyncProvider.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticSecureFacetAsyncProvider.java @@ -22,6 +22,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticRequestHandler; import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticResponseHandler; import org.apache.jackrabbit.oak.plugins.index.elastic.query.async.ElasticResponseListener; +import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils; import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -44,6 +45,7 @@ class ElasticSecureFacetAsyncProvider implements ElasticFacetProvider, ElasticRe private static final Logger LOG = LoggerFactory.getLogger(ElasticSecureFacetAsyncProvider.class); private final Set<String> facetFields; + private final Set<String> elasticFieldNames; private final Map<String, Map<String, Integer>> accessibleFacetCounts = new ConcurrentHashMap<>(); private final ElasticResponseHandler elasticResponseHandler; private final Predicate<String> isAccessible; @@ -58,11 +60,14 @@ class ElasticSecureFacetAsyncProvider implements ElasticFacetProvider, ElasticRe this.elasticResponseHandler = elasticResponseHandler; this.isAccessible = isAccessible; this.facetFields = elasticRequestHandler.facetFields().collect(Collectors.toSet()); + this.elasticFieldNames = elasticRequestHandler.facetFields(). + map(p -> ElasticIndexUtils.fieldName(p)). + collect(Collectors.toSet()); } @Override public Set<String> sourceFields() { - return facetFields; + return elasticFieldNames; } @Override @@ -75,7 +80,8 @@ class ElasticSecureFacetAsyncProvider implements ElasticFacetProvider, ElasticRe final String path = elasticResponseHandler.getPath(searchHit); if (path != null && isAccessible.test(path)) { for (String field: facetFields) { - JsonNode value = searchHit.source().get(field); + String elasticField = ElasticIndexUtils.fieldName(field); + JsonNode value = searchHit.source().get(elasticField); if (value != null) { accessibleFacetCounts.compute(field, (column, facetValues) -> { if (facetValues == null) { diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticStatisticalFacetAsyncProvider.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticStatisticalFacetAsyncProvider.java index f8adf3fab2..f946a847de 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticStatisticalFacetAsyncProvider.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/async/facets/ElasticStatisticalFacetAsyncProvider.java @@ -23,6 +23,7 @@ import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticConnection; import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition; import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticRequestHandler; import org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticResponseHandler; +import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils; import org.apache.jackrabbit.oak.plugins.index.search.FieldNames; import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex; import org.slf4j.Logger; @@ -74,11 +75,15 @@ public class ElasticStatisticalFacetAsyncProvider implements ElasticFacetProvide this.elasticResponseHandler = elasticResponseHandler; this.isAccessible = isAccessible; - this.facetFields = elasticRequestHandler.facetFields().collect(Collectors.toSet()); + Set<String> elasticFieldNames = elasticRequestHandler.facetFields(). + map(p -> ElasticIndexUtils.fieldName(p)). + collect(Collectors.toSet()); + this.facetFields = elasticRequestHandler.facetFields(). + collect(Collectors.toSet()); SearchRequest searchRequest = SearchRequest.of(srb -> srb.index(indexDefinition.getIndexAlias()) .trackTotalHits(thb -> thb.enabled(true)) - .source(SourceConfig.of(scf -> scf.filter(ff -> ff.includes(FieldNames.PATH).includes(new ArrayList<>(facetFields))))) + .source(SourceConfig.of(scf -> scf.filter(ff -> ff.includes(FieldNames.PATH).includes(new ArrayList<>(elasticFieldNames))))) .query(Query.of(qb -> qb.bool(elasticRequestHandler.baseQueryBuilder().build()))) .aggregations(elasticRequestHandler.aggregations()) .size(sampleSize) @@ -135,7 +140,7 @@ public class ElasticStatisticalFacetAsyncProvider implements ElasticFacetProvide final String path = elasticResponseHandler.getPath(searchHit); if (path != null && isAccessible.test(path)) { for (String field : facetFields) { - JsonNode value = searchHit.source().get(field); + JsonNode value = searchHit.source().get(ElasticIndexUtils.fieldName(field)); if (value != null) { accessibleFacetCounts.compute(field, (column, facetValues) -> { if (facetValues == null) { diff --git a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticIndexUtils.java b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticIndexUtils.java index 5332b1c245..89dabd5022 100644 --- a/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticIndexUtils.java +++ b/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticIndexUtils.java @@ -31,6 +31,31 @@ public class ElasticIndexUtils { private static final Logger LOG = LoggerFactory.getLogger(ElasticIndexUtils.class); + /** + * Convert a JCR property name to a Elasticsearch field name. + * Notice that "|" is not allowed in JCR names. + * + * "." is converted to "|dot|" + * "/" is converted to "||" + * + * @param propertyName the property name + * @return the field name + */ + public static String fieldName(String propertyName) { + String fieldName = propertyName; + // 99% property names don't contain a slash or dot, + // so for performance reason use indexOf + int slashIndex = fieldName.indexOf('|'); + if (slashIndex >= 0) { + fieldName = fieldName.replaceAll("\\|", "\\|\\|"); + } + int dotIndex = fieldName.indexOf('.'); + if (dotIndex >= 0) { + fieldName = fieldName.replaceAll("\\.", "|dot|"); + } + return fieldName; + } + /** * Transforms a path into an _id compatible with Elasticsearch specification. The path cannot be larger than 512 * bytes. For performance reasons paths that are already compatible are returned untouched. Otherwise, SHA-256 @@ -58,7 +83,7 @@ public class ElasticIndexUtils { * @return list of floats */ public static List<Float> toFloats(byte[] array) { - int blockSize = Float.SIZE / Byte.SIZE; + int blockSize = Float.BYTES; ByteBuffer wrap = ByteBuffer.wrap(array); if (array.length % blockSize != 0) { LOG.warn("Unexpected byte array length {}", array.length); @@ -78,10 +103,9 @@ public class ElasticIndexUtils { * @return byte array */ public static byte[] toByteArray(List<Float> values) { - int blockSize = Float.SIZE / Byte.SIZE; - byte[] bytes = new byte[values.size() * blockSize]; + byte[] bytes = new byte[values.size() * Float.BYTES]; ByteBuffer wrap = ByteBuffer.wrap(bytes); - for (int i = 0, j = 0; i < values.size(); i++, j += blockSize) { + for (int i = 0; i < values.size(); i++) { wrap.putFloat(values.get(i)); } return bytes; diff --git a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticContentTest.java b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticContentTest.java index aaa1569093..775339aa24 100644 --- a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticContentTest.java +++ b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticContentTest.java @@ -19,6 +19,7 @@ package org.apache.jackrabbit.oak.plugins.index.elastic; import com.fasterxml.jackson.databind.node.ObjectNode; import org.apache.jackrabbit.oak.api.Tree; import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils; import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder; import org.apache.jackrabbit.oak.stats.StatisticsProvider; import org.junit.Ignore; @@ -285,12 +286,12 @@ public class ElasticContentTest extends ElasticAbstractQueryTest { assertEventually(() -> { ObjectNode indexed1 = getDocument(index, "/content/indexed1"); - assertThat(indexed1.get("a").asText(), equalTo("foo")); + assertThat(indexed1.get(ElasticIndexUtils.fieldName("a")).asText(), equalTo("foo")); ObjectNode indexed2 = getDocument(index, "/content/indexed2"); - assertThat(indexed2.get("a").size(), equalTo(2)); - assertThat(indexed2.get("a").get(0).asText(), equalTo("foo")); - assertThat(indexed2.get("a").get(1).asText(), equalTo("bar")); + assertThat(indexed2.get(ElasticIndexUtils.fieldName("a")).size(), equalTo(2)); + assertThat(indexed2.get(ElasticIndexUtils.fieldName("a")).get(0).asText(), equalTo("foo")); + assertThat(indexed2.get(ElasticIndexUtils.fieldName("a")).get(1).asText(), equalTo("bar")); }); } diff --git a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationTest.java b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationTest.java index 174c62c017..204c14f488 100644 --- a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationTest.java +++ b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationTest.java @@ -16,12 +16,26 @@ */ package org.apache.jackrabbit.oak.plugins.index.elastic; +import static org.apache.jackrabbit.JcrConstants.JCR_CONTENT; +import static org.apache.jackrabbit.JcrConstants.JCR_DATA; +import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE; +import static org.apache.jackrabbit.JcrConstants.NT_FILE; +import static org.apache.jackrabbit.JcrConstants.NT_FOLDER; +import static org.apache.jackrabbit.oak.plugins.memory.BinaryPropertyState.binaryProperty; + +import java.util.Calendar; +import java.util.List; + import org.apache.jackrabbit.oak.api.CommitFailedException; import org.apache.jackrabbit.oak.api.ContentRepository; +import org.apache.jackrabbit.oak.api.Tree; +import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.plugins.index.IndexAggregationCommonTest; import org.junit.ClassRule; import org.junit.Ignore; import org.junit.Test; +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.MatcherAssert.assertThat; public class ElasticIndexAggregationTest extends IndexAggregationCommonTest { @@ -44,4 +58,72 @@ public class ElasticIndexAggregationTest extends IndexAggregationCommonTest { public void oak3371AggregateV1() throws CommitFailedException { super.oak3371AggregateV1(); } + + @Test + public void testChildNodeWithOrCompositePlan() throws Exception { + Tree content = root.getTree("/").addChild("content"); + Tree folder = content.addChild("myFolder"); + folder.setProperty(JCR_PRIMARYTYPE, NT_FOLDER, Type.NAME); + Tree file = folder.addChild("myFile"); + file.setProperty(JCR_PRIMARYTYPE, NT_FILE, Type.NAME); + file.setProperty("jcr:title", "title"); + file.setProperty("jcr:description", "description"); + + Tree resource = file.addChild(JCR_CONTENT); + resource.setProperty(JCR_PRIMARYTYPE, "nt:resource", Type.NAME); + resource.setProperty("jcr:lastModified", Calendar.getInstance()); + resource.setProperty("jcr:encoding", "UTF-8"); + resource.setProperty("jcr:mimeType", "text/plain"); + resource.setProperty(binaryProperty(JCR_DATA, + "the quick brown fox jumps over the lazy dog.")); + + root.commit(); + + assertEventually(() -> { + String matchContentAll = "//element(*, nt:folder)[(jcr:contains(., 'dog'))]"; + assertThat(explainXPath(matchContentAll), containsString( + "\"fields\":[\":fulltext\"],\"query\":\"dog\"")); + assertQuery(matchContentAll, "xpath", List.of("/content/myFolder")); + + String matchContentSimple = "//element(*, nt:folder)[(jcr:contains(myFile, 'dog'))]"; + assertThat(explainXPath(matchContentSimple), containsString( + "\"fields\":[\":fulltext\"],\"query\":\"dog\"")); + assertQuery(matchContentSimple, "xpath", List.of("/content/myFolder")); + + String matchContent = " //element(*, nt:folder)[(jcr:contains(myFile, 'dog') or jcr:contains(myFile/@jcr:title, 'invalid') or jcr:contains(myFile/@jcr:description, 'invalid'))]"; + assertThat(explainXPath(matchContent), containsString( + "\"fields\":[\":fulltext\"],\"query\":\"dog\"")); + assertQuery(matchContent, "xpath", List.of("/content/myFolder")); + + String matchTitle = " //element(*, nt:folder)[(jcr:contains(myFile, 'invalid') or jcr:contains(myFile/@jcr:title, 'title') or jcr:contains(myFile/@jcr:description, 'invalid'))]"; + assertThat(explainXPath(matchTitle), containsString( + "\"fields\":[\":fulltext\"],\"query\":\"invalid\"")); + assertQuery(matchTitle, "xpath", List.of("/content/myFolder")); + + String matchDesc = " //element(*, nt:folder)[(jcr:contains(myFile, 'invalid') or jcr:contains(myFile/@jcr:title, 'invalid') or jcr:contains(myFile/@jcr:description, 'description'))]"; + assertThat(explainXPath(matchDesc), containsString( + "\"fields\":[\":fulltext\"],\"query\":\"invalid\"")); + assertQuery(matchDesc, "xpath", List.of("/content/myFolder")); + + String matchNone = " //element(*, nt:folder)[(jcr:contains(myFile, 'invalid') or jcr:contains(myFile/@jcr:title, 'invalid') or jcr:contains(myFile/@jcr:description, 'invalid'))]"; + assertThat(explainXPath(matchNone), containsString( + "\"fields\":[\":fulltext\"],\"query\":\"invalid\"")); + assertQuery(matchNone, "xpath", List.of()); + + String matchOnlyTitleOr = " //element(*, nt:folder)[(jcr:contains(myFile/@jcr:title, 'title') or jcr:contains(myFile/@jcr:title, 'unknown') )]"; + assertThat(explainXPath(matchOnlyTitleOr), containsString( + "\"fields\":[\"jcr:title\"],\"query\":\"title\"")); + assertQuery(matchOnlyTitleOr, "xpath", List.of("/content/myFolder")); + }); + } + + protected String explainXPath(String query) { + return explain(query, XPATH); + } + + protected String explain(String query, String language) { + String explain = "explain " + query; + return executeQuery(explain, language).get(0); + } + } diff --git a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyIndexTest.java b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyIndexTest.java index 0e930f5c47..b5b0b9e44c 100644 --- a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyIndexTest.java +++ b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticPropertyIndexTest.java @@ -22,7 +22,9 @@ import org.apache.jackrabbit.oak.api.CommitFailedException; import org.apache.jackrabbit.oak.api.Tree; import org.apache.jackrabbit.oak.commons.junit.LogCustomizer; import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexDefinitionBuilder; +import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants; import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder; +import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder.PropertyRule; import org.junit.Assert; import org.junit.Test; @@ -177,16 +179,18 @@ public class ElasticPropertyIndexTest extends ElasticAbstractQueryTest { // OAK-11530 @Test - public void propertyWithDot() throws Exception { + public void propertyWithDotPrefix() throws Exception { IndexDefinitionBuilder builder = createIndex(); builder.includedPaths("/test") .indexRule("nt:base") - .property("test", "./test"); + .property("foo", "foo").propertyIndex() + .property("test", "./test").propertyIndex(); setIndex("test1", builder); root.commit(); //add content - root.getTree("/").addChild("test").setProperty("test", "1"); + root.getTree("/").addChild("test") + .setProperty("test", "1"); root.commit(); String query = "select [jcr:path] from [nt:base] " + @@ -196,6 +200,55 @@ public class ElasticPropertyIndexTest extends ElasticAbstractQueryTest { String explanation = explain(query); assertThat(explanation, containsString("no-index")); }); + + String queryFoo = "select [jcr:path] from [nt:base] " + + "where foo = '1'"; + assertEventually(() -> { + String explanation = explain(queryFoo); + assertThat(explanation, containsString("/oak:index/test1")); + assertThat(explanation, containsString("{\"term\":{\"foo\":{\"value\":\"1\"")); + assertQuery(query, List.of()); + }); + } + + @Test + public void propertyWithDot() throws Exception { + IndexDefinitionBuilder builder = createIndex(); + builder.includedPaths("/test") + .indexRule("nt:base") + .property("firstName", "first.name").propertyIndex() + .property("lowerFirstName", "first.name"); + PropertyRule lowerFirstName = builder.indexRule("nt:base").property("lowerFirstName"); + lowerFirstName.getBuilderTree().setProperty( + FulltextIndexConstants.PROP_FUNCTION, "lower([first.name])"); + setIndex("test1", builder); + root.commit(); + + //add content + root.getTree("/").addChild("test").setProperty("first.name", "Antonio"); + root.commit(); + + String query = "select [jcr:path] from [nt:base] " + + "where [first.name] = 'Antonio'"; + + assertEventually(() -> { + String explanation = explain(query); + assertThat(explanation, containsString("/oak:index/test1")); + assertThat(explanation, containsString( + "{\"term\":{\"first|dot|name\":{\"value\":\"Antonio\"")); + assertQuery(query, List.of("/test")); + }); + + String lowerQuery = "select [jcr:path] from [nt:base] " + + "where lower([first.name]) = 'antonio'"; + + assertEventually(() -> { + String explanation = explain(lowerQuery); + assertThat(explanation, containsString("/oak:index/test1")); + assertThat(explanation, containsString( + "{\"term\":{\"function*lower*@first|dot|name\":{\"value\":\"antonio\"")); + assertQuery(lowerQuery, List.of("/test")); + }); } @Test diff --git a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSimilarQueryTest.java b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSimilarQueryTest.java index 6195152a2e..575b55d2f0 100644 --- a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSimilarQueryTest.java +++ b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSimilarQueryTest.java @@ -25,6 +25,7 @@ import org.apache.commons.io.IOUtils; import org.apache.jackrabbit.oak.api.Blob; import org.apache.jackrabbit.oak.api.Tree; import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils; import org.apache.jackrabbit.oak.plugins.index.search.FieldNames; import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants; import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder; @@ -174,10 +175,11 @@ public class ElasticSimilarQueryTest extends ElasticAbstractQueryTest { @Test public void vectorSimilarityIndexConfiguration() throws Exception { final String indexName = "test1"; - final String fieldName1 = "fv1"; + String propertyName = "fv1"; + final String fieldName1 = ElasticIndexUtils.fieldName(propertyName); final String similarityFieldName1 = FieldNames.createSimilarityFieldName(fieldName1); - IndexDefinitionBuilder builder = createIndex(fieldName1); - Tree tree = builder.indexRule("nt:base").property(fieldName1).useInSimilarity(true).nodeScopeIndex() + IndexDefinitionBuilder builder = createIndex(propertyName); + Tree tree = builder.indexRule("nt:base").property(propertyName).useInSimilarity(true).nodeScopeIndex() .similaritySearchDenseVectorSize(2048).getBuilderTree(); tree.setProperty(ElasticPropertyDefinition.PROP_SIMILARITY_METRIC, "cosine"); diff --git a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java index 4873aa6e9e..9b7372967e 100644 --- a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java +++ b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java @@ -28,6 +28,7 @@ import co.elastic.clients.elasticsearch.indices.IndexSettingsAnalysis; import org.apache.jackrabbit.oak.api.Tree; import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition; import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexDefinitionBuilder; +import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexUtils; import org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants; import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder; import org.apache.jackrabbit.oak.spi.state.NodeState; @@ -71,7 +72,7 @@ public class ElasticIndexHelperTest { TypeMapping fooPropertyMappings = request.mappings(); assertThat(fooPropertyMappings, notNullValue()); - Property fooProperty = fooPropertyMappings.properties().get("foo"); + Property fooProperty = fooPropertyMappings.properties().get(ElasticIndexUtils.fieldName("foo")); assertThat(fooProperty, is(notNullValue())); assertThat(fooProperty._kind(), is(Property.Kind.Text)); TextProperty fooTextProperty = fooProperty.text(); @@ -151,7 +152,7 @@ public class ElasticIndexHelperTest { TypeMapping fooMappings = request.mappings(); assertThat(fooMappings, notNullValue()); - Property fooProperty = fooMappings.properties().get("foo"); + Property fooProperty = fooMappings.properties().get(ElasticIndexUtils.fieldName("foo")); assertThat(fooProperty, is(notNullValue())); TextProperty textProperty = fooProperty.text(); assertThat(textProperty.analyzer(), is("oak_analyzer")); @@ -160,7 +161,7 @@ public class ElasticIndexHelperTest { TypeMapping barMappings = request.mappings(); assertThat(barMappings, notNullValue()); - Property barProperty = barMappings.properties().get("bar"); + Property barProperty = barMappings.properties().get(ElasticIndexUtils.fieldName("bar")); assertThat(barProperty._kind(), is(Property.Kind.Keyword)); } diff --git a/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticIndexUtilsTest.java b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticIndexUtilsTest.java new file mode 100644 index 0000000000..76f621019d --- /dev/null +++ b/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticIndexUtilsTest.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.elastic.util; + +import static org.junit.Assert.assertEquals; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.List; + +import org.junit.Test; + +public class ElasticIndexUtilsTest { + + @Test + public void fieldName() { + assertEquals("a", ElasticIndexUtils.fieldName("a")); + assertEquals("first|dot|name", ElasticIndexUtils.fieldName("first.name")); + assertEquals("first||name", ElasticIndexUtils.fieldName("first|name")); + } + + @Test + public void idFromPath() { + assertEquals("/content", ElasticIndexUtils.idFromPath("/content")); + assertEquals("%40%0Bz%DF%B4%22%29%EF%BF%BD%EF%BF%BD%3Cfh%EF%BF%BD%27%EF%BF%BD%7E%EF%BF%BDM%EF%BF%BD%EF%BF%BD%EF%BF%BD%22I%EF%BF%BD%7C%EF%BF%BDGn%0A+%25", + URLEncoder.encode(ElasticIndexUtils.idFromPath("/content".repeat(100)),StandardCharsets.UTF_8)); + } + + @Test + public void toByteArray() { + assertEquals("[1.0, 0.1]", + ElasticIndexUtils.toFloats( + ElasticIndexUtils.toByteArray(List.of(1.0f, 0.1f))).toString()); + assertEquals("[-0.0, 0.0]", + ElasticIndexUtils.toFloats( + ElasticIndexUtils.toByteArray(List.of(-0.0f, 0.0f))).toString()); + assertEquals("[Infinity, -Infinity]", + ElasticIndexUtils.toFloats( + ElasticIndexUtils.toByteArray(List.of(Float.POSITIVE_INFINITY, Float.NEGATIVE_INFINITY))).toString()); + assertEquals("[NaN, 3.4028235E38]", + ElasticIndexUtils.toFloats( + ElasticIndexUtils.toByteArray(List.of(Float.NaN, Float.MAX_VALUE))).toString()); + } + +} diff --git a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexPlannerCommonTest.java b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexPlannerCommonTest.java index e8a557e923..881610482f 100644 --- a/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexPlannerCommonTest.java +++ b/oak-search/src/test/java/org/apache/jackrabbit/oak/plugins/index/IndexPlannerCommonTest.java @@ -1508,7 +1508,7 @@ public abstract class IndexPlannerCommonTest { } private static String generateRandomIndexName(String prefix) { - return prefix + RandomStringUtils.random(5, true, false); + return prefix + RandomStringUtils.insecure().next(5, true, false); } /**
