Author: ngupta Date: Thu May 21 08:56:02 2020 New Revision: 1877992 URL: http://svn.apache.org/viewvc?rev=1877992&view=rev Log: OAK-9061|oak-search-elastic: mapping consistent with index definition(committing patch by Fabrizio)
Added: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java (with props) jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java (with props) jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelperTest.java (with props) Modified: jackrabbit/oak/trunk/oak-search-elastic/pom.xml jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchFacetTest.java jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchFullTextAsyncTest.java jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchPropertyIndexTest.java jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriterTest.java jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java Modified: jackrabbit/oak/trunk/oak-search-elastic/pom.xml URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/pom.xml?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/pom.xml (original) +++ jackrabbit/oak/trunk/oak-search-elastic/pom.xml Thu May 21 08:56:02 2020 @@ -239,7 +239,7 @@ <dependency> <groupId>org.hamcrest</groupId> <artifactId>hamcrest-core</artifactId> - <version>1.3</version> + <version>2.2</version> <scope>test</scope> </dependency> <dependency> Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java Thu May 21 08:56:02 2020 @@ -18,11 +18,17 @@ */ package org.apache.jackrabbit.oak.plugins.index.elasticsearch; +import org.apache.jackrabbit.oak.api.Type; import org.apache.jackrabbit.oak.commons.PathUtils; import org.apache.jackrabbit.oak.plugins.index.IndexConstants; import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition; +import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition; import org.apache.jackrabbit.oak.spi.state.NodeState; +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.function.Function; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.StreamSupport; @@ -56,6 +62,16 @@ public class ElasticsearchIndexDefinitio .map(Object::toString) .collect(Collectors.joining(""))); + private static final Function<Integer, Boolean> isAnalyzable; + + static { + int[] NOT_ANALYZED_TYPES = new int[] { + Type.BINARY.tag(), Type.LONG.tag(), Type.DOUBLE.tag(), Type.DECIMAL.tag(), Type.DATE.tag(), Type.BOOLEAN.tag() + }; + Arrays.sort(NOT_ANALYZED_TYPES); // need for binary search + isAnalyzable = type -> Arrays.binarySearch(NOT_ANALYZED_TYPES, type) < 0; + } + private final String remoteIndexName; public final int bulkActions; @@ -66,6 +82,8 @@ public class ElasticsearchIndexDefinitio private final String indexPrefix; private final String remoteAlias; + private final Map<String, List<PropertyDefinition>> propertiesByName; + public ElasticsearchIndexDefinition(NodeState root, NodeState defn, String indexPath, String indexPrefix) { super(root, getIndexDefinitionState(defn), determineIndexFormatVersion(defn), determineUniqueId(defn), indexPath); boolean isReindex = defn.getBoolean(IndexConstants.REINDEX_PROPERTY_NAME); @@ -78,6 +96,12 @@ public class ElasticsearchIndexDefinitio this.bulkFlushIntervalMs = getOptionalValue(defn, BULK_FLUSH_INTERVAL_MS, BULK_FLUSH_INTERVAL_MS_DEFAULT); this.bulkRetries = getOptionalValue(defn, BULK_RETRIES, BULK_RETRIES_DEFAULT); this.bulkRetriesBackoff = getOptionalValue(defn, BULK_RETRIES_BACKOFF, BULK_RETRIES_BACKOFF_DEFAULT); + + this.propertiesByName = getDefinedRules() + .stream() + .flatMap(rule -> StreamSupport.stream(rule.getProperties().spliterator(), false)) + .filter(pd -> pd.index) // keep only properties that can be indexed + .collect(Collectors.groupingBy(pd -> pd.name)); } /** @@ -99,6 +123,35 @@ public class ElasticsearchIndexDefinitio return remoteIndexName; } + public Map<String, List<PropertyDefinition>> getPropertiesByName() { + return propertiesByName; + } + + /** + * Returns the keyword field name mapped in Elasticsearch for the specified property name. + * @param propertyName the property name in the index rules + * @return the field name identifier in Elasticsearch + * @throws IllegalArgumentException if the specified name is not part of this {@code ElasticsearchIndexDefinition} + */ + public String getElasticKeyword(String propertyName) { + List<PropertyDefinition> propertyDefinitions = propertiesByName.get(propertyName); + if (propertyDefinitions == null) { + throw new IllegalArgumentException(propertyName + " is not part of this ElasticsearchIndexDefinition"); + } + + String field = propertyName; + // it's ok to look at the first property since we are sure they all have the same type + int type = propertyDefinitions.get(0).getType(); + if (isAnalyzable.apply(type) && isAnalyzed(propertyDefinitions)) { + field += ".keyword"; + } + return field; + } + + public boolean isAnalyzed(List<PropertyDefinition> propertyDefinitions) { + return propertyDefinitions.stream().anyMatch(pd -> pd.analyzed || pd.fulltextEnabled()); + } + private String setupAlias() { // TODO: implement advanced remote index name strategy that takes into account multiple tenants and re-index process return getESSafeIndexName(indexPrefix + "." + getIndexPath()); Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java Thu May 21 08:56:02 2020 @@ -20,17 +20,17 @@ package org.apache.jackrabbit.oak.plugin import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchIndexNode; import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher; +import org.apache.jackrabbit.oak.plugins.index.search.FieldNames; import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.SecureFacetConfiguration; import org.apache.jackrabbit.oak.spi.query.Filter; import org.apache.jackrabbit.oak.spi.query.QueryIndex; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.search.SearchHit; -import java.io.UnsupportedEncodingException; -import java.net.URLDecoder; import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import java.util.Map; public class ElasticFacetHelper { @@ -58,32 +58,28 @@ public class ElasticFacetHelper { return elasticsearchFacets; } - public static List<String> getAccessibleDocIds(SearchHit[] searchHits, Filter filter) throws UnsupportedEncodingException { + public static List<String> getAccessibleDocIds(SearchHit[] searchHits, Filter filter) { List<String> accessibleDocs = new LinkedList<>(); for (SearchHit searchHit : searchHits) { - String id = searchHit.getId(); - String path = idToPath(id); + final Map<String, Object> sourceMap = searchHit.getSourceAsMap(); + String path = (String) sourceMap.get(FieldNames.PATH); if (filter.isAccessible(path)) { - accessibleDocs.add(id); + accessibleDocs.add(path); } } return accessibleDocs; } - public static int getAccessibleDocCount(Iterator<SearchHit> searchHitIterator, Filter filter) throws UnsupportedEncodingException { + public static int getAccessibleDocCount(Iterator<SearchHit> searchHitIterator, Filter filter) { int count = 0; while (searchHitIterator.hasNext()) { SearchHit searchHit = searchHitIterator.next(); - String id = searchHit.getId(); - String path = idToPath(id); + final Map<String, Object> sourceMap = searchHit.getSourceAsMap(); + String path = (String) sourceMap.get(FieldNames.PATH); if (filter.isAccessible(path)) { count++; } } return count; } - - public static String idToPath(String id) throws UnsupportedEncodingException { - return URLDecoder.decode(id, "UTF-8"); - } } Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java Thu May 21 08:56:02 2020 @@ -16,6 +16,7 @@ */ package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets; +import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition; import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher; import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex; import org.apache.jackrabbit.oak.spi.query.QueryIndex; @@ -49,7 +50,8 @@ public interface ElasticsearchFacets { * @return A map with facetName as key and List of facets in descending order of facetCount. * @throws IOException */ - Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException; + Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition, + int numberOfFacets) throws IOException; /** * We can retrieve Aggregation in a single call to elastic search while querying. Which can then be passed Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java Thu May 21 08:56:02 2020 @@ -16,6 +16,7 @@ */ package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets; +import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition; import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher; import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel; import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchAggregationBuilderUtil; @@ -51,12 +52,13 @@ public class InsecureElasticSearchFacets } @Override - public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException { + public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition, + int numberOfFacets) throws IOException { if (elasticsearchAggregationData != null && numberOfFacets <= elasticsearchAggregationData.getNumberOfFacets()) { return changeToFacetList(elasticsearchAggregationData.getAggregations().getAsMap(), numberOfFacets); } LOG.warn("Facet data is being retrieved by again calling Elasticsearch"); - List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil.getAggregators(plan, numberOfFacets); + List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil.getAggregators(plan, indexDefinition, numberOfFacets); ElasticsearchSearcherModel elasticsearchSearcherModel = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder() .withQuery(query) .withAggregation(aggregationBuilders) Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java Thu May 21 08:56:02 2020 @@ -16,6 +16,7 @@ */ package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets; +import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition; import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher; import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel; import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchAggregationBuilderUtil; @@ -50,7 +51,8 @@ public class SecureElasticSearchFacets e for docs. */ @Override - public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException { + public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition, + int numberOfFacets) throws IOException { Map<String, Map<String, Long>> secureFacetCount = new HashMap<>(); Filter filter = getPlan().getFilter(); boolean doFetch = true; @@ -70,7 +72,8 @@ public class SecureElasticSearchFacets e List<String> accessibleDocs = ElasticFacetHelper.getAccessibleDocIds(searchHits, filter); if (accessibleDocs.isEmpty()) continue; QueryBuilder queryWithAccessibleDocIds = QueryBuilders.termsQuery("_id", accessibleDocs); - Map<String, Aggregation> accessibleDocsAggregation = getAggregationForDocIds(queryWithAccessibleDocIds, accessibleDocs.size()); + Map<String, Aggregation> accessibleDocsAggregation = getAggregationForDocIds(queryWithAccessibleDocIds, + accessibleDocs.size(), indexDefinition); collateAggregations(secureFacetCount, accessibleDocsAggregation); } @@ -115,15 +118,15 @@ public class SecureElasticSearchFacets e } } - private Map<String, Aggregation> getAggregationForDocIds(QueryBuilder queryWithAccessibleDocIds, int facetCount) throws IOException { - List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil.getAggregators(getPlan(), facetCount); + private Map<String, Aggregation> getAggregationForDocIds(QueryBuilder queryWithAccessibleDocIds, int facetCount, + ElasticsearchIndexDefinition indexDefinition) throws IOException { + List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil.getAggregators(getPlan(), indexDefinition, facetCount); ElasticsearchSearcherModel idBasedelasticsearchSearcherModelWithAggregation = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder() .withQuery(queryWithAccessibleDocIds) .withAggregation(aggregationBuilders) .build(); SearchResponse facetDocs = getSearcher().search(idBasedelasticsearchSearcherModelWithAggregation); - Map<String, Aggregation> aggregationMap = facetDocs.getAggregations().asMap(); - return aggregationMap; + return facetDocs.getAggregations().asMap(); } } Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java Thu May 21 08:56:02 2020 @@ -17,6 +17,7 @@ package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets; import com.google.common.collect.AbstractIterator; +import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition; import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher; import org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel; import org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchConstants; @@ -52,7 +53,9 @@ public class StatisticalElasticSearchFac this.secureFacetConfiguration = secureFacetConfiguration; } - public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int numberOfFacets) throws IOException { + @Override + public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition, + int numberOfFacets) throws IOException { Map<String, List<FulltextIndex.Facet>> result = new HashMap<>(); Map<String, List<FulltextIndex.Facet>> topChildren; Filter filter = getPlan().getFilter(); @@ -61,7 +64,7 @@ public class StatisticalElasticSearchFac ElasticsearchAggregationData aggregationData = getElasticsearchAggregationData(); if (aggregationData == null || aggregationData.getNumberOfFacets() < numberOfFacets) { LOG.warn("Facets and Totalhit count are being retrieved by calling Elasticsearch"); - topChildren = super.getElasticSearchFacets(numberOfFacets); + topChildren = super.getElasticSearchFacets(indexDefinition, numberOfFacets); ElasticsearchSearcherModel elasticsearchSearcherModel = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder() .withQuery(getQuery()) .withBatchSize(ElasticsearchConstants.ELASTICSEARCH_QUERY_BATCH_SIZE) @@ -79,7 +82,8 @@ public class StatisticalElasticSearchFac // instead of statistical count. <OAK-8138> if (hitCount < sampleSize) { LOG.debug("SampleSize: {} is greater than hitcount: {}, Getting secure facet count", sampleSize, hitCount); - return new SecureElasticSearchFacets(getSearcher(), getQuery(), getPlan()).getElasticSearchFacets(numberOfFacets); + return new SecureElasticSearchFacets(getSearcher(), getQuery(), getPlan()).getElasticSearchFacets(indexDefinition, + numberOfFacets); } long randomSeed = secureFacetConfiguration.getRandomSeed(); Iterator<SearchHit> docIterator = getMatchingDocIterator(getSearcher(), getQuery()); Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java Thu May 21 08:56:02 2020 @@ -25,8 +25,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.net.URLEncoder; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -35,10 +33,7 @@ import java.util.Map; class ElasticsearchDocument { private static final Logger LOG = LoggerFactory.getLogger(ElasticsearchDocument.class); - // id should only be useful for logging (at least as of now) private final String path; - - private final String id; private final List<String> fulltext; private final List<String> suggest; private final List<String> notNullProps; @@ -47,13 +42,6 @@ class ElasticsearchDocument { ElasticsearchDocument(String path) { this.path = path; - String id = null; - try { - id = pathToId(path); - } catch (UnsupportedEncodingException e) { - LOG.warn("Couldn't encode {} as ES id", path); - } - this.id = id; this.fulltext = new ArrayList<>(); this.suggest = new ArrayList<>(); this.notNullProps = new ArrayList<>(); @@ -93,24 +81,20 @@ class ElasticsearchDocument { String parPath = PathUtils.getParentPath(path); int depth = PathUtils.getDepth(path); - // TODO: remember that mapping must be configured with - // https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html addProperty(FieldNames.ANCESTORS, parPath); addProperty(FieldNames.PATH_DEPTH, depth); } - String getId() { - return id; - } - public String build() { - String ret = null; + String ret; try { XContentBuilder builder = XContentFactory.jsonBuilder(); builder.startObject(); - if (fulltext.size() > 0) { - builder.field(FieldNames.FULLTEXT, fulltext); - } + { + builder.field(FieldNames.PATH, path); + if (fulltext.size() > 0) { + builder.field(FieldNames.FULLTEXT, fulltext); + } if (suggest.size() > 0) { builder.startObject(FieldNames.SUGGEST).field("input", suggest).endObject(); } @@ -120,17 +104,18 @@ class ElasticsearchDocument { if (nullProps.size() > 0) { builder.field(FieldNames.NULL_PROPS, nullProps); } - for (Map.Entry<String, Object> prop : properties.entrySet()) { - builder.field(prop.getKey(), prop.getValue()); + for (Map.Entry<String, Object> prop : properties.entrySet()) { + builder.field(prop.getKey(), prop.getValue()); + } } builder.endObject(); ret = Strings.toString(builder); } catch (IOException e) { - LOG.error("Error serializing document - id: {}, properties: {}, fulltext: {}, suggest: {}, " + + LOG.error("Error serializing document - path: {}, properties: {}, fulltext: {}, suggest: {}, " + "notNullProps: {}, nullProps: {}", - path, properties, fulltext, suggest, notNullProps, nullProps, - e); + path, properties, fulltext, suggest, notNullProps, nullProps, e); + ret = null; } return ret; @@ -140,8 +125,4 @@ class ElasticsearchDocument { public String toString() { return build(); } - - public static String pathToId(String path) throws UnsupportedEncodingException { - return URLEncoder.encode(path, "UTF-8"); - } } Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java Thu May 21 08:56:02 2020 @@ -48,10 +48,9 @@ class ElasticsearchDocumentMaker extends } @Override - protected ElasticsearchDocument finalizeDoc(ElasticsearchDocument doc, boolean dirty, boolean facet) throws IOException { - if (doc.getId() == null) { - throw new IOException("Couldn't generate id for doc - (More details during initDoc)" + doc); - } + protected ElasticsearchDocument finalizeDoc(ElasticsearchDocument doc, boolean dirty, boolean facet) { + // evaluate path restrictions is enabled by default in elastic. Always index ancestors + doc.indexAncestors(path); return doc; } @@ -134,10 +133,13 @@ class ElasticsearchDocumentMaker extends doc.addProperty(pname, f); } + /** + * Empty method implementation. Ancestors are always indexed + * + * @see ElasticsearchDocumentMaker#finalizeDoc + */ @Override - protected void indexAncestors(ElasticsearchDocument doc, String path) { - doc.indexAncestors(path); - } + protected void indexAncestors(ElasticsearchDocument doc, String path) { /* empty */ } @Override protected void indexNotNullProperty(ElasticsearchDocument doc, PropertyDefinition pd) { Added: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java?rev=1877992&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java (added) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java Thu May 21 08:56:02 2020 @@ -0,0 +1,143 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.elasticsearch.index; + +import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition; +import org.apache.jackrabbit.oak.plugins.index.search.FieldNames; +import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition; +import org.elasticsearch.client.indices.CreateIndexRequest; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * Provides utility functions around Elasticsearch indexing + */ +class ElasticsearchIndexHelper { + + public static CreateIndexRequest createIndexRequest(ElasticsearchIndexDefinition indexDefinition) throws IOException { + final CreateIndexRequest request = new CreateIndexRequest(indexDefinition.getRemoteIndexName()); + + // provision settings + // https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html + request.settings(Settings.builder() + .put("analysis.analyzer.ancestor_analyzer.type", "custom") + .put("analysis.analyzer.ancestor_analyzer.tokenizer", "path_hierarchy")); + + // provision mappings + final XContentBuilder mappingBuilder = XContentFactory.jsonBuilder(); + mappingBuilder.startObject(); + { + mappingBuilder.startObject("properties"); + { + mapInternalProperties(mappingBuilder); + mapIndexRules(indexDefinition, mappingBuilder); + } + mappingBuilder.endObject(); + } + mappingBuilder.endObject(); + request.mapping(mappingBuilder); + + return request; + } + + private static void mapInternalProperties(XContentBuilder mappingBuilder) throws IOException { + mappingBuilder.startObject(FieldNames.PATH) + .field("type", "keyword") + .endObject(); + mappingBuilder.startObject(FieldNames.ANCESTORS) + .field("type", "text") + .field("analyzer", "ancestor_analyzer") + .field("search_analyzer", "keyword") + .field("search_quote_analyzer", "keyword") + .endObject(); + mappingBuilder.startObject(FieldNames.PATH_DEPTH) + .field("type", "integer") + .endObject(); + // TODO: the mapping below is for features currently not supported. These need to be reviewed + // when the specific features will be implemented +// mappingBuilder.startObject(FieldNames.SUGGEST) +// .field("type", "completion") +// .endObject(); +// mappingBuilder.startObject(FieldNames.NOT_NULL_PROPS) +// .field("type", "keyword") +// .endObject(); +// mappingBuilder.startObject(FieldNames.NULL_PROPS) +// .field("type", "keyword") +// .endObject(); + } + + private static void mapIndexRules(ElasticsearchIndexDefinition indexDefinition, XContentBuilder mappingBuilder) throws IOException { + // we need to check if in the defined rules there are properties with the same name and different types + final List<Map.Entry<String, List<PropertyDefinition>>> multiTypesFields = indexDefinition.getPropertiesByName() + .entrySet() + .stream() + .filter(e -> e.getValue().size() > 1) + .filter(e -> e.getValue().stream().map(PropertyDefinition::getType).distinct().count() > 1) + .collect(Collectors.toList()); + + if (!multiTypesFields.isEmpty()) { + String fields = multiTypesFields.stream().map(Map.Entry::getKey).collect(Collectors.joining(", ", "[", "]")); + throw new IllegalStateException(indexDefinition.getIndexPath() + " has properties with the same name and " + + "different types " + fields); + } + + for (Map.Entry<String, List<PropertyDefinition>> entry : indexDefinition.getPropertiesByName().entrySet()) { + final String name = entry.getKey(); + final List<PropertyDefinition> propertyDefinitions = entry.getValue(); + + Type<?> type = Type.fromTag(propertyDefinitions.get(0).getType(), false); + mappingBuilder.startObject(name); + { + // https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html + if (Type.BINARY.equals(type)) { + mappingBuilder.field("type", "binary"); + } else if (Type.LONG.equals(type)) { + mappingBuilder.field("type", "long"); + } else if (Type.DOUBLE.equals(type) || Type.DECIMAL.equals(type)) { + mappingBuilder.field("type", "double"); + } else if (Type.DATE.equals(type)) { + mappingBuilder.field("type", "date"); + } else if (Type.BOOLEAN.equals(type)) { + mappingBuilder.field("type", "boolean"); + } else { + if (indexDefinition.isAnalyzed(propertyDefinitions)) { + mappingBuilder.field("type", "text"); + // always add keyword for sorting / faceting as sub-field + mappingBuilder.startObject("fields"); + { + mappingBuilder.startObject("keyword") + .field("type", "keyword") + .endObject(); + } + mappingBuilder.endObject(); + } else { + // always add keyword for sorting / faceting + mappingBuilder.field("type", "keyword"); + } + } + } + mappingBuilder.endObject(); + } + } +} Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java ------------------------------------------------------------------------------ svn:eol-style = native Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java Thu May 21 08:56:02 2020 @@ -18,8 +18,8 @@ package org.apache.jackrabbit.oak.plugin import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchConnection; import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition; -import org.apache.jackrabbit.oak.plugins.index.search.FieldNames; import org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexWriter; +import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest; import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest; @@ -37,13 +37,11 @@ import org.elasticsearch.client.IndicesC import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.indices.CreateIndexRequest; import org.elasticsearch.client.indices.CreateIndexResponse; +import org.elasticsearch.client.indices.GetIndexRequest; import org.elasticsearch.cluster.metadata.AliasMetaData; import org.elasticsearch.common.Strings; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.unit.TimeValue; -import org.elasticsearch.common.xcontent.XContentBuilder; -import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; import org.jetbrains.annotations.NotNull; import org.jetbrains.annotations.TestOnly; @@ -51,12 +49,17 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.Phaser; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.Map; -import java.util.Optional; import java.util.Set; import java.util.stream.Collectors; -import static org.apache.jackrabbit.oak.plugins.index.elasticsearch.index.ElasticsearchDocument.pathToId; import static org.elasticsearch.common.xcontent.ToXContent.EMPTY_PARAMS; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; @@ -66,8 +69,19 @@ class ElasticsearchIndexWriter implement private final ElasticsearchConnection elasticsearchConnection; private final ElasticsearchIndexDefinition indexDefinition; + /** + * Coordinates communication between bulk processes. It has a main controller registered at creation time and + * de-registered on {@link ElasticsearchIndexWriter#close(long)}. Each bulk request register a new party in + * this Phaser in {@link OakBulkProcessorListener#beforeBulk(long, BulkRequest)} and de-register itself when + * the request returns. + */ + private final Phaser phaser = new Phaser(1); // register main controller + /** + * Key-value structure to keep the history of bulk requests. Keys are the bulk execution ids, the boolean + * value is {@code true} when at least an update is performed, otherwise {@code false}. + */ + private final ConcurrentHashMap<Long, Boolean> updatesMap = new ConcurrentHashMap<>(); private final BulkProcessor bulkProcessor; - private Optional<Boolean> indexUpdated = Optional.empty(); ElasticsearchIndexWriter(@NotNull ElasticsearchConnection elasticsearchConnection, @NotNull ElasticsearchIndexDefinition indexDefinition) { @@ -99,70 +113,76 @@ class ElasticsearchIndexWriter implement } @Override - public void updateDocument(String path, ElasticsearchDocument doc) throws IOException { + public void updateDocument(String path, ElasticsearchDocument doc) { IndexRequest request = new IndexRequest(indexDefinition.getRemoteIndexAlias()) - .id(pathToId(path)) + .id(idFromPath(path)) .source(doc.build(), XContentType.JSON); bulkProcessor.add(request); } @Override - public void deleteDocuments(String path) throws IOException { + public void deleteDocuments(String path) { DeleteRequest request = new DeleteRequest(indexDefinition.getRemoteIndexAlias()) - .id(pathToId(path)); + .id(idFromPath(path)); bulkProcessor.add(request); } @Override - public boolean close(long timestamp) throws IOException { + public boolean close(long timestamp) { LOG.trace("Calling close on bulk processor {}", bulkProcessor); bulkProcessor.close(); LOG.trace("Bulk Processor {} closed", bulkProcessor); - // bulkProcessor.close() calls the OakBulkProcessorListener.beforeBulk in a blocking manner - // indexUpdated would be unset there if it was false till now (not even a single update succeeded) - // in this case wait for sometime for the last OakBulkProcessorListener.afterBulk to be called - // where indexUpdated can possibly be set to true, return false in case of timeout. - // We don't wait in case indexUpdated is already set (This would be if any of the previous flushes for this processor - // were successful i.e index was updated at least once) - final long start = System.currentTimeMillis(); - long timeoutMillis = indexDefinition.bulkFlushIntervalMs * 5 ; - while (!indexUpdated.isPresent()) { - long lastAttempt = System.currentTimeMillis(); - long elapsedTime = lastAttempt - start; - if (elapsedTime > timeoutMillis) { - // indexUpdate was not set till now, return false - LOG.trace("Timed out waiting for the bulk processor response. Returning indexUpdated = false"); - return false; - } else { - try { - LOG.trace("Waiting for afterBulk response..."); - Thread.sleep(100); - } catch (InterruptedException ex) { - // - } - } + // de-register main controller + final int phase = phaser.arriveAndDeregister(); + + try { + phaser.awaitAdvanceInterruptibly(phase, indexDefinition.bulkFlushIntervalMs * 5, TimeUnit.MILLISECONDS); + } catch (InterruptedException | TimeoutException e) { + LOG.error("Error waiting for bulk requests to return", e); + } + + if (LOG.isTraceEnabled()) { + LOG.trace("Bulk identifier -> update status = {}", updatesMap); } - LOG.trace("Returning indexUpdated = {}", indexUpdated.get()); - return indexUpdated.get(); + return updatesMap.containsValue(Boolean.TRUE); } - // TODO: we need to check if the index already exists and in that case we have to figure out if there are - // "breaking changes" in the index definition protected void provisionIndex() throws IOException { + // check if index already exists + boolean exists = elasticsearchConnection.getClient().indices().exists( + new GetIndexRequest(indexDefinition.getRemoteIndexName()), RequestOptions.DEFAULT + ); + if (exists) { + LOG.info("Index {} already exists. Skip index provision", indexDefinition.getRemoteIndexName()); + return; + } - IndicesClient indicesClient = elasticsearchConnection.getClient().indices(); + final IndicesClient indicesClient = elasticsearchConnection.getClient().indices(); final String indexName = indexDefinition.getRemoteIndexName(); - CreateIndexRequest createIndexRequest = constructCreateIndexRequest(indexName); - String requestMsg = Strings.toString(createIndexRequest.toXContent(jsonBuilder(), EMPTY_PARAMS)); - CreateIndexResponse response = indicesClient.create(createIndexRequest, RequestOptions.DEFAULT); - checkResponseAcknowledgement(response, "Create index call not acknowledged for index " + indexName); - - LOG.info("Updated settings for index {} = {}. Response acknowledged: {}", - indexDefinition.getRemoteIndexAlias(), requestMsg, response.isAcknowledged()); - + // create the new index + final CreateIndexRequest request = ElasticsearchIndexHelper.createIndexRequest(indexDefinition); + try { + if (LOG.isDebugEnabled()) { + final String requestMsg = Strings.toString(request.toXContent(jsonBuilder(), EMPTY_PARAMS)); + LOG.debug("Creating Index with request {}", requestMsg); + } + CreateIndexResponse response = indicesClient.create(request, RequestOptions.DEFAULT); + LOG.info("Updated settings for index {}. Response acknowledged: {}", + indexDefinition.getRemoteIndexName(), response.isAcknowledged()); + checkResponseAcknowledgement(response, "Create index call not acknowledged for index " + indexName); + } catch (ElasticsearchStatusException ese) { + // We already check index existence as first thing in this method, if we get here it means we have got into + // a conflict (eg: multiple cluster nodes provision concurrently). + // Elasticsearch does not have a CREATE IF NOT EXIST, need to inspect exception + // https://github.com/elastic/elasticsearch/issues/19862 + if (ese.status().getStatus() == 400 && ese.getDetailedMessage().contains("resource_already_exists_exception")) { + LOG.warn("Index {} already exists. Ignoring error", indexName); + } else throw ese; + } + // update the mapping GetAliasesRequest getAliasesRequest = new GetAliasesRequest(indexDefinition.getRemoteIndexAlias()); GetAliasesResponse aliasesResponse = indicesClient.getAlias(getAliasesRequest, RequestOptions.DEFAULT); Map<String, Set<AliasMetaData>> aliases = aliasesResponse.getAliases(); @@ -180,6 +200,8 @@ class ElasticsearchIndexWriter implement + indexDefinition.getRemoteIndexAlias()); LOG.info("Updated alias {} to index {}. Response acknowledged: {}", indexDefinition.getRemoteIndexAlias(), indexName, updateAliasResponse.isAcknowledged()); + + // once the alias has been updated, we can safely remove the old index deleteOldIndices(indicesClient, aliases.keySet()); } @@ -201,57 +223,15 @@ class ElasticsearchIndexWriter implement LOG.info("Deleted indices {}. Response acknowledged: {}", indices.toString(), deleteIndexResponse.isAcknowledged()); } - private CreateIndexRequest constructCreateIndexRequest(String indexName) throws IOException { - CreateIndexRequest request = new CreateIndexRequest(indexName); - - // provision settings - request.settings(Settings.builder() - .put("analysis.analyzer.ancestor_analyzer.type", "custom") - .put("analysis.analyzer.ancestor_analyzer.tokenizer", "path_hierarchy")); - - // provision mappings - XContentBuilder mappingBuilder = XContentFactory.jsonBuilder(); - mappingBuilder.startObject(); - { - mappingBuilder.startObject("properties"); - { - mappingBuilder.startObject(FieldNames.ANCESTORS) - .field("type", "text") - .field("analyzer", "ancestor_analyzer") - .field("search_analyzer", "keyword") - .field("search_quote_analyzer", "keyword") - .endObject(); - mappingBuilder.startObject(FieldNames.PATH_DEPTH) - .field("type", "integer") - .endObject(); - mappingBuilder.startObject(FieldNames.SUGGEST) - .field("type", "completion") - .endObject(); - mappingBuilder.startObject(FieldNames.NOT_NULL_PROPS) - .field("type", "keyword") - .endObject(); - mappingBuilder.startObject(FieldNames.NULL_PROPS) - .field("type", "keyword") - .endObject(); - } - mappingBuilder.endObject(); - } - mappingBuilder.endObject(); - request.mapping(mappingBuilder); - - return request; - } - private class OakBulkProcessorListener implements BulkProcessor.Listener { @Override public void beforeBulk(long executionId, BulkRequest bulkRequest) { - if (indexUpdated.isPresent() && !indexUpdated.get()) { - // Reset the state only if it's false - // If it's true that means index was updated at least once by this processor - // and we can return true for indexUpdate. - indexUpdated = Optional.empty(); - } + // register new bulk party + phaser.register(); + // init update status + updatesMap.put(executionId, Boolean.FALSE); + LOG.info("Sending bulk with id {} -> {}", executionId, bulkRequest.getDescription()); if (LOG.isTraceEnabled()) { LOG.trace("Bulk Requests: \n{}", bulkRequest.requests() @@ -279,29 +259,41 @@ class ElasticsearchIndexWriter implement LOG.error("Bulk item with id {} failed", failure.getId(), failure.getCause()); } else { // Set indexUpdated to true even if 1 item was updated successfully - indexUpdated = Optional.of(true); + updatesMap.put(executionId, Boolean.TRUE); } } - // Only set indexUpdated to false if it's unset - // If set and true, that means index was updated at least once by this processor. - // If set and false, no need to do anything - if (!indexUpdated.isPresent()) { - indexUpdated = Optional.of(false); - } } else { - indexUpdated = Optional.of(true); + updatesMap.put(executionId, Boolean.TRUE); } + phaser.arriveAndDeregister(); } @Override public void afterBulk(long executionId, BulkRequest bulkRequest, Throwable throwable) { - // Only set indexUpdated to false if it's unset - // If set and true, that means index was updated at least once by this processor. - // If set and false, no need to do anything - if (!indexUpdated.isPresent()) { - indexUpdated = Optional.of(false); - } LOG.error("Bulk with id {} threw an error", executionId, throwable); + phaser.arriveAndDeregister(); + } + } + + /** + * Transforms a path into an _id compatible with Elasticsearch specification. The path cannot be larger than 512 + * bytes. For performance reasons paths that are already compatible are returned untouched. Otherwise, SHA-256 + * algorithm is used to return a transformed path (32 bytes max). + * + * @param path the document path + * @return the Elasticsearch compatible path + * @see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-id-field.html"> + * Mapping _id field</a> + */ + private static String idFromPath(@NotNull String path) { + byte[] pathBytes = path.getBytes(StandardCharsets.UTF_8); + if (pathBytes.length > 512) { + try { + return new String(MessageDigest.getInstance("SHA-256").digest(pathBytes)); + } catch (NoSuchAlgorithmException e) { + throw new IllegalStateException(e); + } } + return path; } } Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java Thu May 21 08:56:02 2020 @@ -90,7 +90,9 @@ class ElasticsearchIndex extends Fulltex @Override protected String getFulltextRequestString(IndexPlan plan, IndexNode indexNode) { - return Strings.toString(ElasticsearchResultRowIterator.getESQuery(plan, getPlanResult(plan))); + return Strings.toString(new ElasticsearchResultRowIterator(plan.getFilter(), getPlanResult(plan), plan, + acquireIndexNode(plan), FulltextIndex::shouldInclude, getEstimator(plan.getPlanName())) + .getESQuery(plan, getPlanResult(plan))); } @Override Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java Thu May 21 08:56:02 2020 @@ -53,8 +53,6 @@ import org.slf4j.LoggerFactory; import javax.jcr.PropertyType; import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.net.URLDecoder; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Deque; @@ -68,7 +66,6 @@ import java.util.stream.StreamSupport; import static org.apache.jackrabbit.JcrConstants.JCR_MIXINTYPES; import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE; -import static org.apache.jackrabbit.oak.api.Type.STRING; import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot; import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath; import static org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.TermQueryBuilderFactory.newAncestorQuery; @@ -158,9 +155,10 @@ class ElasticsearchResultRowIterator imp try { ElasticsearchSearcher searcher = getCurrentSearcher(indexNode); QueryBuilder query = getESQuery(plan, planResult); - int numberOfFacets = indexNode.getDefinition().getNumberOfTopFacets(); + ElasticsearchIndexDefinition indexDefinition = indexNode.getDefinition(); + int numberOfFacets = indexDefinition.getNumberOfTopFacets(); List<TermsAggregationBuilder> aggregationBuilders = ElasticsearchAggregationBuilderUtil - .getAggregators(plan, numberOfFacets); + .getAggregators(plan, indexDefinition, numberOfFacets); ElasticsearchSearcherModel elasticsearchSearcherModel = new ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder() .withQuery(query) @@ -236,10 +234,9 @@ class ElasticsearchResultRowIterator imp return new ElasticsearchSearcher(indexNode); } - private FulltextIndex.FulltextResultRow convertToRow(SearchHit hit, - ElasticsearchFacetProvider elasticsearchFacetProvider) throws IOException { - String id = hit.getId(); - String path = idToPath(id); + private FulltextIndex.FulltextResultRow convertToRow(SearchHit hit, ElasticsearchFacetProvider elasticsearchFacetProvider) { + final Map<String, Object> sourceMap = hit.getSourceAsMap(); + String path = (String) sourceMap.get(FieldNames.PATH); if (path != null) { if ("".equals(path)) { path = "/"; @@ -276,7 +273,7 @@ class ElasticsearchResultRowIterator imp * @param planResult * @return the Lucene query */ - static QueryBuilder getESQuery(IndexPlan plan, PlanResult planResult) { + public QueryBuilder getESQuery(IndexPlan plan, PlanResult planResult) { List<QueryBuilder> qs = new ArrayList<>(); Filter filter = plan.getFilter(); FullTextExpression ft = filter.getFullTextConstraint(); @@ -504,13 +501,12 @@ class ElasticsearchResultRowIterator imp return unwrapped; } - private static void addNonFullTextConstraints(List<QueryBuilder> qs, + private void addNonFullTextConstraints(List<QueryBuilder> qs, IndexPlan plan, PlanResult planResult) { final BiPredicate<Iterable<String>, String> any = (iterable, value) -> StreamSupport.stream(iterable.spliterator(), false).anyMatch(value::equals); Filter filter = plan.getFilter(); - IndexDefinition defn = planResult.indexDefinition; if (!filter.matchesAllTypes()) { addNodeTypeConstraints(planResult.indexingRule, qs, filter); } @@ -518,20 +514,15 @@ class ElasticsearchResultRowIterator imp String path = FulltextIndex.getPathRestriction(plan); switch (filter.getPathRestriction()) { case ALL_CHILDREN: - if (defn.evaluatePathRestrictions()) { - if ("/".equals(path)) { - break; - } + if (!"/".equals(path)) { qs.add(newAncestorQuery(path)); } break; case DIRECT_CHILDREN: - if (defn.evaluatePathRestrictions()) { - BoolQueryBuilder bq = boolQuery(); - bq.must(newAncestorQuery(path)); - bq.must(newDepthQuery(path, planResult)); - qs.add(bq); - } + BoolQueryBuilder bq = boolQuery(); + bq.must(newAncestorQuery(path)); + bq.must(newDepthQuery(path, planResult)); + qs.add(bq); break; case EXACT: // For transformed paths, we can only add path restriction if absolute path to property can be @@ -588,7 +579,7 @@ class ElasticsearchResultRowIterator imp if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding && pr.lastIncluding) { - String first = pr.first.getValue(STRING); + String first = pr.first.getValue(Type.STRING); first = first.replace("\\", ""); if (JCR_PATH.equals(name)) { qs.add(newPathQuery(first)); @@ -637,7 +628,7 @@ class ElasticsearchResultRowIterator imp } private static QueryBuilder createNodeNameQuery(Filter.PropertyRestriction pr) { - String first = pr.first != null ? pr.first.getValue(STRING) : null; + String first = pr.first != null ? pr.first.getValue(Type.STRING) : null; if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding && pr.lastIncluding) { // [property]=[value] @@ -685,7 +676,7 @@ class ElasticsearchResultRowIterator imp } @Nullable - private static QueryBuilder createQuery(String propertyName, Filter.PropertyRestriction pr, + private QueryBuilder createQuery(String propertyName, Filter.PropertyRestriction pr, PropertyDefinition defn) { int propType = FulltextIndex.determinePropertyType(defn, pr); @@ -699,31 +690,29 @@ class ElasticsearchResultRowIterator imp return newNotNullPropQuery(defn.name); } + final String field = indexNode.getDefinition().getElasticKeyword(propertyName); + QueryBuilder in; switch (propType) { case PropertyType.DATE: { - in = newPropertyRestrictionQuery(propertyName, false, pr, - value -> parse(value.getValue(Type.DATE)).getTime()); + in = newPropertyRestrictionQuery(field, pr, value -> parse(value.getValue(Type.DATE)).getTime()); break; } case PropertyType.DOUBLE: { - in = newPropertyRestrictionQuery(propertyName, false, pr, - value -> value.getValue(Type.DOUBLE)); + in = newPropertyRestrictionQuery(field, pr, value -> value.getValue(Type.DOUBLE)); break; } case PropertyType.LONG: { - in = newPropertyRestrictionQuery(propertyName, false, pr, - value -> value.getValue(Type.LONG)); + in = newPropertyRestrictionQuery(field, pr, value -> value.getValue(Type.LONG)); break; } default: { if (pr.isLike) { - return createLikeQuery(propertyName, pr.first.getValue(STRING)); + return createLikeQuery(propertyName, pr.first.getValue(Type.STRING)); } //TODO Confirm that all other types can be treated as string - in = newPropertyRestrictionQuery(propertyName, true, pr, - value -> value.getValue(Type.STRING)); + in = newPropertyRestrictionQuery(field, pr, value -> value.getValue(Type.STRING)); } } @@ -734,10 +723,6 @@ class ElasticsearchResultRowIterator imp throw new IllegalStateException("PropertyRestriction not handled " + pr + " for index " + defn); } - private static String idToPath(String id) throws UnsupportedEncodingException { - return URLDecoder.decode(id, "UTF-8"); - } - class ElasticsearchFacetProvider implements FulltextIndex.FacetProvider { private ElasticsearchFacets elasticsearchFacets; private Map<String, List<FulltextIndex.Facet>> cachedResults = new HashMap<>(); @@ -750,7 +735,7 @@ class ElasticsearchResultRowIterator imp public List<FulltextIndex.Facet> getFacets(int numberOfFacets, String columnName) throws IOException { String facetProp = FulltextIndex.parseFacetField(columnName); if (cachedResults.get(facetProp) == null) { - cachedResults = elasticsearchFacets.getElasticSearchFacets(numberOfFacets); + cachedResults = elasticsearchFacets.getElasticSearchFacets(indexNode.getDefinition(), numberOfFacets); } return cachedResults.get(facetProp); } Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java Thu May 21 08:56:02 2020 @@ -47,8 +47,7 @@ public class ElasticsearchSearcher { public SearchResponse search(QueryBuilder query, int batchSize) throws IOException { SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder() .query(query) - .fetchSource(false) - .storedField(FieldNames.PATH) + .fetchSource(FieldNames.PATH, null) .size(batchSize); SearchRequest request = new SearchRequest(indexNode.getDefinition().getRemoteIndexAlias()) Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java Thu May 21 08:56:02 2020 @@ -17,6 +17,7 @@ package org.apache.jackrabbit.oak.plugins.index.elasticsearch.util; import org.apache.jackrabbit.oak.api.Type; +import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition; import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex; import org.apache.jackrabbit.oak.spi.query.Filter; import org.apache.jackrabbit.oak.spi.query.QueryConstants; @@ -33,7 +34,7 @@ public final class ElasticsearchAggregat private ElasticsearchAggregationBuilderUtil() { } - public static List<TermsAggregationBuilder> getAggregators(QueryIndex.IndexPlan plan, int numberOfFacets) { + public static List<TermsAggregationBuilder> getAggregators(QueryIndex.IndexPlan plan, ElasticsearchIndexDefinition indexDefinition, int numberOfFacets) { List<TermsAggregationBuilder> termsAggregationBuilders = new LinkedList<>(); Collection<Filter.PropertyRestriction> propertyRestrictions = plan.getFilter().getPropertyRestrictions(); for (Filter.PropertyRestriction propertyRestriction : propertyRestrictions) { @@ -41,13 +42,14 @@ public final class ElasticsearchAggregat if (QueryConstants.REP_FACET.equals(name)) { String value = propertyRestriction.first.getValue(Type.STRING); String facetProp = FulltextIndex.parseFacetField(value); - termsAggregationBuilders.add(AggregationBuilders.terms(facetProp).field(keywordFieldName(facetProp)).size(numberOfFacets)); + termsAggregationBuilders.add( + AggregationBuilders + .terms(facetProp) + .field(indexDefinition.getElasticKeyword(facetProp)) + .size(numberOfFacets) + ); } } return termsAggregationBuilders; } - - private static String keywordFieldName(String propName) { - return propName + "." + "keyword"; - } } Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java Thu May 21 08:56:02 2020 @@ -25,8 +25,7 @@ public class SearchSourceBuilderUtil { public static SearchSourceBuilder createSearchSourceBuilder(ElasticsearchSearcherModel elasticsearchSearcherModel) { SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder() .query(elasticsearchSearcherModel.getQueryBuilder()) - .fetchSource(elasticsearchSearcherModel.fetchSource()) - .storedField(elasticsearchSearcherModel.getStoredField()) + .fetchSource(elasticsearchSearcherModel.getStoredField(), null) .size(elasticsearchSearcherModel.getBatchSize()) .from(elasticsearchSearcherModel.getFrom()); Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java Thu May 21 08:56:02 2020 @@ -67,11 +67,11 @@ public class TermQueryBuilderFactory { } public static PrefixQueryBuilder newPrefixQuery(String field, @NotNull String value) { - return prefixQuery(keywordFieldName(field), value); + return prefixQuery(field, value); } public static WildcardQueryBuilder newWildcardQuery(String field, @NotNull String value) { - return wildcardQuery(keywordFieldName(field), value); + return wildcardQuery(field, value); } public static TermQueryBuilder newPathQuery(String path) { @@ -96,11 +96,11 @@ public class TermQueryBuilderFactory { } public static TermQueryBuilder newNodeTypeQuery(String type) { - return termQuery(keywordFieldName(JCR_PRIMARYTYPE), type); + return termQuery(JCR_PRIMARYTYPE, type); } public static TermQueryBuilder newMixinTypeQuery(String type) { - return termQuery(keywordFieldName(JCR_MIXINTYPES), type); + return termQuery(JCR_MIXINTYPES, type); } public static TermQueryBuilder newNotNullPropQuery(String propName) { @@ -126,12 +126,9 @@ public class TermQueryBuilderFactory { return bq; } - public static <R> QueryBuilder newPropertyRestrictionQuery(String propertyName, boolean isString, + public static <R> QueryBuilder newPropertyRestrictionQuery(String propertyName, Filter.PropertyRestriction pr, Function<PropertyValue, R> propToObj) { - if (isString) { - propertyName = keywordFieldName(propertyName); - } R first = pr.first != null ? propToObj.apply(pr.first) : null; R last = pr.last != null ? propToObj.apply(pr.last) : null; @@ -166,9 +163,4 @@ public class TermQueryBuilderFactory { } return path; } - - // As per https://www.elastic.co/blog/strings-are-dead-long-live-strings - private static String keywordFieldName(String propName) { - return propName + "." + "keyword"; - } } \ No newline at end of file Modified: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java?rev=1877992&r1=1877991&r2=1877992&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java Thu May 21 08:56:02 2020 @@ -20,7 +20,7 @@ import org.apache.commons.io.FileUtils; import org.apache.jackrabbit.oak.InitialContent; import org.apache.jackrabbit.oak.Oak; import org.apache.jackrabbit.oak.api.ContentRepository; -import org.apache.jackrabbit.oak.commons.PerfLogger; +import org.apache.jackrabbit.oak.api.Tree; import org.apache.jackrabbit.oak.plugins.index.AsyncIndexUpdate; import org.apache.jackrabbit.oak.plugins.index.IndexEditorProvider; import org.apache.jackrabbit.oak.plugins.index.TrackingCorruptIndexHandler; @@ -37,6 +37,9 @@ import org.apache.jackrabbit.oak.query.A import org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider; import org.apache.jackrabbit.oak.spi.state.NodeBuilder; import org.apache.jackrabbit.oak.spi.state.NodeStore; +import org.elasticsearch.client.RequestOptions; +import org.elasticsearch.client.core.CountRequest; +import org.elasticsearch.client.indices.GetIndexRequest; import org.jetbrains.annotations.NotNull; import org.junit.After; import org.junit.ClassRule; @@ -44,7 +47,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; -import java.net.URI; import java.util.concurrent.TimeUnit; import static com.google.common.collect.Lists.newArrayList; @@ -56,10 +58,6 @@ public abstract class ElasticsearchAbstr protected static final Logger LOG = LoggerFactory.getLogger(ElasticsearchAbstractQueryTest.class); - protected static final PerfLogger PERF_LOGGER = - new PerfLogger(LoggerFactory.getLogger(ElasticsearchAbstractQueryTest.class.getName() + ".perf")); - - // Set this connection string as // <scheme>://<hostname>:<port>?key_id=<>,key_secret=<> // key_id and key_secret are optional in case the ES server @@ -72,11 +70,9 @@ public abstract class ElasticsearchAbstr // This can be used by the extending classes to trigger the async index update as per need (not having to wait for async indexing cycle) protected AsyncIndexUpdate asyncIndexUpdate; protected long INDEX_CORRUPT_INTERVAL_IN_MILLIS = 100; - protected ElasticsearchIndexEditorProvider editorProvider; protected NodeStore nodeStore; protected int DEFAULT_ASYNC_INDEXING_TIME_IN_SECONDS = 5; - @ClassRule public static ElasticsearchConnectionRule elasticRule = new ElasticsearchConnectionRule(elasticConnectionString); @@ -131,12 +127,11 @@ public abstract class ElasticsearchAbstr } protected Oak addAsyncIndexingLanesToOak(Oak oak) { - // Override this in extending clases to configure different + // Override this in extending classes to configure different // indexing lanes with different time limits. return oak.withAsyncIndexing("async", DEFAULT_ASYNC_INDEXING_TIME_IN_SECONDS); } - @Override protected ContentRepository createRepository() { @@ -156,7 +151,6 @@ public abstract class ElasticsearchAbstr trackingCorruptIndexHandler.setCorruptInterval(INDEX_CORRUPT_INTERVAL_IN_MILLIS, TimeUnit.MILLISECONDS); asyncIndexUpdate.setCorruptIndexHandler(trackingCorruptIndexHandler); - Oak oak = new Oak(nodeStore) .with(getInitialContent()) .with(new OpenSecurityProvider()) @@ -171,7 +165,6 @@ public abstract class ElasticsearchAbstr return oak.createContentRepository(); } - protected static void assertEventually(Runnable r) { ElasticsearchTestUtils.assertEventually(r, BULK_FLUSH_INTERVAL_MS_DEFAULT * 5); } @@ -188,8 +181,8 @@ public abstract class ElasticsearchAbstr return builder; } - protected void setIndex(String idxName, IndexDefinitionBuilder builder) { - builder.build(root.getTree("/").addChild(INDEX_DEFINITIONS_NAME).addChild(idxName)); + protected Tree setIndex(String idxName, IndexDefinitionBuilder builder) { + return builder.build(root.getTree("/").addChild(INDEX_DEFINITIONS_NAME).addChild(idxName)); } protected String explain(String query) { @@ -206,4 +199,36 @@ public abstract class ElasticsearchAbstr setTraversalEnabled(false); } + // Utility methods accessing directly Elasticsearch + + protected boolean exists(Tree index) { + ElasticsearchIndexDefinition esIdxDef = getElasticsearchIndexDefinition(index); + + try { + return esConnection.getClient().indices() + .exists(new GetIndexRequest(esIdxDef.getRemoteIndexAlias()), RequestOptions.DEFAULT); + } catch (IOException e) { + throw new IllegalStateException(e); + } + } + + protected long countDocuments(Tree index) { + ElasticsearchIndexDefinition esIdxDef = getElasticsearchIndexDefinition(index); + + CountRequest request = new CountRequest(esIdxDef.getRemoteIndexAlias()); + try { + return esConnection.getClient().count(request, RequestOptions.DEFAULT).getCount(); + } catch (IOException e) { + throw new IllegalStateException(e); + } + } + + private ElasticsearchIndexDefinition getElasticsearchIndexDefinition(Tree index) { + return new ElasticsearchIndexDefinition( + nodeStore.getRoot(), + nodeStore.getRoot().getChildNode(INDEX_DEFINITIONS_NAME).getChildNode(index.getName()), + index.getPath(), + esConnection.getIndexPrefix()); + } + } Added: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java?rev=1877992&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java (added) +++ jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java Thu May 21 08:56:02 2020 @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.elasticsearch; + +import org.apache.jackrabbit.oak.api.Tree; +import org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder; +import org.junit.Ignore; +import org.junit.Test; + +import java.util.Random; +import java.util.UUID; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.junit.Assert.assertTrue; + +public class ElasticsearchContentTest extends ElasticsearchAbstractQueryTest { + + @Test + public void indexWithAnalyzedProperty() throws Exception { + IndexDefinitionBuilder builder = createIndex("a").noAsync(); + builder.indexRule("nt:base").property("a").analyzed(); + String testId = UUID.randomUUID().toString(); + Tree index = setIndex(testId, builder); + root.commit(); + + assertTrue(exists(index)); + assertThat(0L, equalTo(countDocuments(index))); + + Tree content = root.getTree("/").addChild(testId); + content.addChild("indexed").setProperty("a", "foo"); + content.addChild("not-indexed").setProperty("b", "foo"); + root.commit(); + + assertEventually(() -> assertThat(countDocuments(index), equalTo(1L))); + + content.getChild("indexed").remove(); + root.commit(); + + assertEventually(() -> assertThat(countDocuments(index), equalTo(0L))); + + // TODO: should the index be deleted when the definition gets removed? + //index.remove(); + //root.commit(); + + //assertFalse(exists(index)); + } + + @Test + @Ignore("this test fails because of a bug with nodeScopeIndex (every node gets indexed in an empty doc)") + public void indexWithAnalyzedNodeScopeIndexProperty() throws Exception { + IndexDefinitionBuilder builder = createIndex("a").noAsync(); + builder.indexRule("nt:base").property("a").analyzed().nodeScopeIndex(); + String testId = UUID.randomUUID().toString(); + Tree index = setIndex(testId, builder); + root.commit(); + + assertThat(0L, equalTo(countDocuments(index))); + + Tree content = root.getTree("/").addChild(testId); + content.addChild("indexed").setProperty("a", "foo"); + content.addChild("not-indexed").setProperty("b", "foo"); + root.commit(); + + assertEventually(() -> assertThat(countDocuments(index), equalTo(1L))); + } + + @Test + public void indexContentWithLongPath() throws Exception { + IndexDefinitionBuilder builder = createIndex("a").noAsync(); + builder.indexRule("nt:base").property("a").analyzed(); + String testId = UUID.randomUUID().toString(); + Tree index = setIndex(testId, builder); + root.commit(); + + assertTrue(exists(index)); + assertThat(0L, equalTo(countDocuments(index))); + + int leftLimit = 48; // ' ' (space) + int rightLimit = 122; // char '~' + int targetStringLength = 1024; + final Random random = new Random(42); + + String generatedPath = random.ints(leftLimit, rightLimit + 1) + .limit(targetStringLength) + .collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append) + .toString(); + + Tree content = root.getTree("/").addChild(testId); + content.addChild(generatedPath).setProperty("a", "foo"); + root.commit(); + + assertEventually(() -> assertThat(countDocuments(index), equalTo(1L))); + } + + @Test + public void defineIndexTwice() throws Exception { + IndexDefinitionBuilder builder = createIndex("a").noAsync(); + String testId = UUID.randomUUID().toString(); + Tree index = setIndex(testId, builder); + root.commit(); + + assertTrue(exists(index)); + + builder = createIndex("a").noAsync(); + index = setIndex(testId, builder); + root.commit(); + } +} Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java ------------------------------------------------------------------------------ svn:eol-style = native