Author: ngupta
Date: Thu May 21 08:56:02 2020
New Revision: 1877992

URL: http://svn.apache.org/viewvc?rev=1877992&view=rev
Log:
OAK-9061|oak-search-elastic: mapping consistent with index 
definition(committing patch by Fabrizio)

Added:
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java
   (with props)
    
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java
   (with props)
    
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelperTest.java
   (with props)
Modified:
    jackrabbit/oak/trunk/oak-search-elastic/pom.xml
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchFacetTest.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchFullTextAsyncTest.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchPropertyIndexTest.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriterTest.java
    
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/IndexDefinition.java

Modified: jackrabbit/oak/trunk/oak-search-elastic/pom.xml
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/pom.xml?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-search-elastic/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-search-elastic/pom.xml Thu May 21 08:56:02 2020
@@ -239,7 +239,7 @@
     <dependency>
       <groupId>org.hamcrest</groupId>
       <artifactId>hamcrest-core</artifactId>
-      <version>1.3</version>
+      <version>2.2</version>
       <scope>test</scope>
     </dependency>
     <dependency>

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchIndexDefinition.java
 Thu May 21 08:56:02 2020
@@ -18,11 +18,17 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.elasticsearch;
 
+import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.commons.PathUtils;
 import org.apache.jackrabbit.oak.plugins.index.IndexConstants;
 import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
+import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
 import org.apache.jackrabbit.oak.spi.state.NodeState;
 
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Function;
 import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
@@ -56,6 +62,16 @@ public class ElasticsearchIndexDefinitio
             .map(Object::toString)
             .collect(Collectors.joining("")));
 
+    private static final Function<Integer, Boolean> isAnalyzable;
+
+    static {
+        int[] NOT_ANALYZED_TYPES = new int[] {
+                Type.BINARY.tag(), Type.LONG.tag(), Type.DOUBLE.tag(), 
Type.DECIMAL.tag(), Type.DATE.tag(), Type.BOOLEAN.tag()
+        };
+        Arrays.sort(NOT_ANALYZED_TYPES); // need for binary search
+        isAnalyzable = type -> Arrays.binarySearch(NOT_ANALYZED_TYPES, type) < 
0;
+    }
+
     private final String remoteIndexName;
 
     public final int bulkActions;
@@ -66,6 +82,8 @@ public class ElasticsearchIndexDefinitio
     private final String indexPrefix;
     private final String remoteAlias;
 
+    private final Map<String, List<PropertyDefinition>> propertiesByName;
+
     public ElasticsearchIndexDefinition(NodeState root, NodeState defn, String 
indexPath, String indexPrefix) {
         super(root, getIndexDefinitionState(defn), 
determineIndexFormatVersion(defn), determineUniqueId(defn), indexPath);
         boolean isReindex = 
defn.getBoolean(IndexConstants.REINDEX_PROPERTY_NAME);
@@ -78,6 +96,12 @@ public class ElasticsearchIndexDefinitio
         this.bulkFlushIntervalMs = getOptionalValue(defn, 
BULK_FLUSH_INTERVAL_MS, BULK_FLUSH_INTERVAL_MS_DEFAULT);
         this.bulkRetries = getOptionalValue(defn, BULK_RETRIES, 
BULK_RETRIES_DEFAULT);
         this.bulkRetriesBackoff = getOptionalValue(defn, BULK_RETRIES_BACKOFF, 
BULK_RETRIES_BACKOFF_DEFAULT);
+
+        this.propertiesByName = getDefinedRules()
+                .stream()
+                .flatMap(rule -> 
StreamSupport.stream(rule.getProperties().spliterator(), false))
+                .filter(pd -> pd.index) // keep only properties that can be 
indexed
+                .collect(Collectors.groupingBy(pd -> pd.name));
     }
 
     /**
@@ -99,6 +123,35 @@ public class ElasticsearchIndexDefinitio
         return remoteIndexName;
     }
 
+    public Map<String, List<PropertyDefinition>> getPropertiesByName() {
+        return propertiesByName;
+    }
+
+    /**
+     * Returns the keyword field name mapped in Elasticsearch for the 
specified property name.
+     * @param propertyName the property name in the index rules
+     * @return the field name identifier in Elasticsearch
+     * @throws IllegalArgumentException if the specified name is not part of 
this {@code ElasticsearchIndexDefinition}
+     */
+    public String getElasticKeyword(String propertyName) {
+        List<PropertyDefinition> propertyDefinitions = 
propertiesByName.get(propertyName);
+        if (propertyDefinitions == null) {
+            throw new IllegalArgumentException(propertyName + " is not part of 
this ElasticsearchIndexDefinition");
+        }
+
+        String field = propertyName;
+        // it's ok to look at the first property since we are sure they all 
have the same type
+        int type = propertyDefinitions.get(0).getType();
+        if (isAnalyzable.apply(type) && isAnalyzed(propertyDefinitions)) {
+            field += ".keyword";
+        }
+        return field;
+    }
+
+    public boolean isAnalyzed(List<PropertyDefinition> propertyDefinitions) {
+        return propertyDefinitions.stream().anyMatch(pd -> pd.analyzed || 
pd.fulltextEnabled());
+    }
+
     private String setupAlias() {
         // TODO: implement advanced remote index name strategy that takes into 
account multiple tenants and re-index process
         return getESSafeIndexName(indexPrefix + "." + getIndexPath());

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticFacetHelper.java
 Thu May 21 08:56:02 2020
@@ -20,17 +20,17 @@ package org.apache.jackrabbit.oak.plugin
 
 import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchIndexNode;
 import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
 import 
org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition.SecureFacetConfiguration;
 import org.apache.jackrabbit.oak.spi.query.Filter;
 import org.apache.jackrabbit.oak.spi.query.QueryIndex;
 import org.elasticsearch.index.query.QueryBuilder;
 import org.elasticsearch.search.SearchHit;
 
-import java.io.UnsupportedEncodingException;
-import java.net.URLDecoder;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Map;
 
 public class ElasticFacetHelper {
 
@@ -58,32 +58,28 @@ public class ElasticFacetHelper {
         return elasticsearchFacets;
     }
 
-    public static List<String> getAccessibleDocIds(SearchHit[] searchHits, 
Filter filter) throws UnsupportedEncodingException {
+    public static List<String> getAccessibleDocIds(SearchHit[] searchHits, 
Filter filter) {
         List<String> accessibleDocs = new LinkedList<>();
         for (SearchHit searchHit : searchHits) {
-            String id = searchHit.getId();
-            String path = idToPath(id);
+            final Map<String, Object> sourceMap = searchHit.getSourceAsMap();
+            String path = (String) sourceMap.get(FieldNames.PATH);
             if (filter.isAccessible(path)) {
-                accessibleDocs.add(id);
+                accessibleDocs.add(path);
             }
         }
         return accessibleDocs;
     }
 
-    public static int getAccessibleDocCount(Iterator<SearchHit> 
searchHitIterator, Filter filter) throws UnsupportedEncodingException {
+    public static int getAccessibleDocCount(Iterator<SearchHit> 
searchHitIterator, Filter filter) {
         int count = 0;
         while (searchHitIterator.hasNext()) {
             SearchHit searchHit = searchHitIterator.next();
-            String id = searchHit.getId();
-            String path = idToPath(id);
+            final Map<String, Object> sourceMap = searchHit.getSourceAsMap();
+            String path = (String) sourceMap.get(FieldNames.PATH);
             if (filter.isAccessible(path)) {
                 count++;
             }
         }
         return count;
     }
-
-    public static String idToPath(String id) throws 
UnsupportedEncodingException {
-        return URLDecoder.decode(id, "UTF-8");
-    }
 }

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/ElasticsearchFacets.java
 Thu May 21 08:56:02 2020
@@ -16,6 +16,7 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
 
+import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
 import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
 import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
 import org.apache.jackrabbit.oak.spi.query.QueryIndex;
@@ -49,7 +50,8 @@ public interface ElasticsearchFacets {
      * @return A map with facetName as key and List of facets in descending 
order of facetCount.
      * @throws IOException
      */
-    Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int 
numberOfFacets) throws IOException;
+    Map<String, List<FulltextIndex.Facet>> 
getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition,
+                                                                  int 
numberOfFacets) throws IOException;
 
     /**
      * We can retrieve Aggregation in a single call to elastic search while 
querying. Which can then be passed

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/InsecureElasticSearchFacets.java
 Thu May 21 08:56:02 2020
@@ -16,6 +16,7 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
 
+import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
 import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
 import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel;
 import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchAggregationBuilderUtil;
@@ -51,12 +52,13 @@ public class InsecureElasticSearchFacets
     }
 
     @Override
-    public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int 
numberOfFacets) throws IOException {
+    public Map<String, List<FulltextIndex.Facet>> 
getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition,
+                                                                         int 
numberOfFacets) throws IOException {
         if (elasticsearchAggregationData != null && numberOfFacets <= 
elasticsearchAggregationData.getNumberOfFacets()) {
             return 
changeToFacetList(elasticsearchAggregationData.getAggregations().getAsMap(), 
numberOfFacets);
         }
         LOG.warn("Facet data is being retrieved by again calling 
Elasticsearch");
-        List<TermsAggregationBuilder> aggregationBuilders = 
ElasticsearchAggregationBuilderUtil.getAggregators(plan, numberOfFacets);
+        List<TermsAggregationBuilder> aggregationBuilders = 
ElasticsearchAggregationBuilderUtil.getAggregators(plan, indexDefinition, 
numberOfFacets);
         ElasticsearchSearcherModel elasticsearchSearcherModel = new 
ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
                 .withQuery(query)
                 .withAggregation(aggregationBuilders)

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/SecureElasticSearchFacets.java
 Thu May 21 08:56:02 2020
@@ -16,6 +16,7 @@
  */
 package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
 
+import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
 import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
 import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel;
 import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchAggregationBuilderUtil;
@@ -50,7 +51,8 @@ public class SecureElasticSearchFacets e
     for docs.
      */
     @Override
-    public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int 
numberOfFacets) throws IOException {
+    public Map<String, List<FulltextIndex.Facet>> 
getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition,
+                                                                         int 
numberOfFacets) throws IOException {
         Map<String, Map<String, Long>> secureFacetCount = new HashMap<>();
         Filter filter = getPlan().getFilter();
         boolean doFetch = true;
@@ -70,7 +72,8 @@ public class SecureElasticSearchFacets e
             List<String> accessibleDocs = 
ElasticFacetHelper.getAccessibleDocIds(searchHits, filter);
             if (accessibleDocs.isEmpty()) continue;
             QueryBuilder queryWithAccessibleDocIds = 
QueryBuilders.termsQuery("_id", accessibleDocs);
-            Map<String, Aggregation> accessibleDocsAggregation = 
getAggregationForDocIds(queryWithAccessibleDocIds, accessibleDocs.size());
+            Map<String, Aggregation> accessibleDocsAggregation = 
getAggregationForDocIds(queryWithAccessibleDocIds,
+                    accessibleDocs.size(), indexDefinition);
             collateAggregations(secureFacetCount, accessibleDocsAggregation);
         }
 
@@ -115,15 +118,15 @@ public class SecureElasticSearchFacets e
         }
     }
 
-    private Map<String, Aggregation> getAggregationForDocIds(QueryBuilder 
queryWithAccessibleDocIds, int facetCount) throws IOException {
-        List<TermsAggregationBuilder> aggregationBuilders = 
ElasticsearchAggregationBuilderUtil.getAggregators(getPlan(), facetCount);
+    private Map<String, Aggregation> getAggregationForDocIds(QueryBuilder 
queryWithAccessibleDocIds, int facetCount,
+                                                             
ElasticsearchIndexDefinition indexDefinition) throws IOException {
+        List<TermsAggregationBuilder> aggregationBuilders = 
ElasticsearchAggregationBuilderUtil.getAggregators(getPlan(), indexDefinition, 
facetCount);
         ElasticsearchSearcherModel 
idBasedelasticsearchSearcherModelWithAggregation = new 
ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
                 .withQuery(queryWithAccessibleDocIds)
                 .withAggregation(aggregationBuilders)
                 .build();
 
         SearchResponse facetDocs = 
getSearcher().search(idBasedelasticsearchSearcherModelWithAggregation);
-        Map<String, Aggregation> aggregationMap = 
facetDocs.getAggregations().asMap();
-        return aggregationMap;
+        return facetDocs.getAggregations().asMap();
     }
 }

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/facets/StatisticalElasticSearchFacets.java
 Thu May 21 08:56:02 2020
@@ -17,6 +17,7 @@
 package org.apache.jackrabbit.oak.plugins.index.elasticsearch.facets;
 
 import com.google.common.collect.AbstractIterator;
+import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
 import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcher;
 import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.query.ElasticsearchSearcherModel;
 import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.ElasticsearchConstants;
@@ -52,7 +53,9 @@ public class StatisticalElasticSearchFac
         this.secureFacetConfiguration = secureFacetConfiguration;
     }
 
-    public Map<String, List<FulltextIndex.Facet>> getElasticSearchFacets(int 
numberOfFacets) throws IOException {
+    @Override
+    public Map<String, List<FulltextIndex.Facet>> 
getElasticSearchFacets(ElasticsearchIndexDefinition indexDefinition,
+                                                                         int 
numberOfFacets) throws IOException {
         Map<String, List<FulltextIndex.Facet>> result = new HashMap<>();
         Map<String, List<FulltextIndex.Facet>> topChildren;
         Filter filter = getPlan().getFilter();
@@ -61,7 +64,7 @@ public class StatisticalElasticSearchFac
         ElasticsearchAggregationData aggregationData = 
getElasticsearchAggregationData();
         if (aggregationData == null || aggregationData.getNumberOfFacets() < 
numberOfFacets) {
             LOG.warn("Facets and Totalhit count are being retrieved by calling 
Elasticsearch");
-            topChildren = super.getElasticSearchFacets(numberOfFacets);
+            topChildren = super.getElasticSearchFacets(indexDefinition, 
numberOfFacets);
             ElasticsearchSearcherModel elasticsearchSearcherModel = new 
ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
                     .withQuery(getQuery())
                     
.withBatchSize(ElasticsearchConstants.ELASTICSEARCH_QUERY_BATCH_SIZE)
@@ -79,7 +82,8 @@ public class StatisticalElasticSearchFac
         // instead of statistical count. <OAK-8138>
         if (hitCount < sampleSize) {
             LOG.debug("SampleSize: {} is greater than hitcount: {}, Getting 
secure facet count", sampleSize, hitCount);
-            return new SecureElasticSearchFacets(getSearcher(), getQuery(), 
getPlan()).getElasticSearchFacets(numberOfFacets);
+            return new SecureElasticSearchFacets(getSearcher(), getQuery(), 
getPlan()).getElasticSearchFacets(indexDefinition,
+                    numberOfFacets);
         }
         long randomSeed = secureFacetConfiguration.getRandomSeed();
         Iterator<SearchHit> docIterator = 
getMatchingDocIterator(getSearcher(), getQuery());

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocument.java
 Thu May 21 08:56:02 2020
@@ -25,8 +25,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.URLEncoder;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
@@ -35,10 +33,7 @@ import java.util.Map;
 class ElasticsearchDocument {
     private static final Logger LOG = 
LoggerFactory.getLogger(ElasticsearchDocument.class);
 
-    // id should only be useful for logging (at least as of now)
     private final String path;
-
-    private final String id;
     private final List<String> fulltext;
     private final List<String> suggest;
     private final List<String> notNullProps;
@@ -47,13 +42,6 @@ class ElasticsearchDocument {
 
     ElasticsearchDocument(String path) {
         this.path = path;
-        String id = null;
-        try {
-            id = pathToId(path);
-        } catch (UnsupportedEncodingException e) {
-            LOG.warn("Couldn't encode {} as ES id", path);
-        }
-        this.id = id;
         this.fulltext = new ArrayList<>();
         this.suggest = new ArrayList<>();
         this.notNullProps = new ArrayList<>();
@@ -93,24 +81,20 @@ class ElasticsearchDocument {
         String parPath = PathUtils.getParentPath(path);
         int depth = PathUtils.getDepth(path);
 
-        // TODO: remember that mapping must be configured with
-        // 
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html
         addProperty(FieldNames.ANCESTORS, parPath);
         addProperty(FieldNames.PATH_DEPTH, depth);
     }
 
-    String getId() {
-        return id;
-    }
-
     public String build() {
-        String ret = null;
+        String ret;
         try {
             XContentBuilder builder = XContentFactory.jsonBuilder();
             builder.startObject();
-            if (fulltext.size() > 0) {
-                builder.field(FieldNames.FULLTEXT, fulltext);
-            }
+            {
+                builder.field(FieldNames.PATH, path);
+                if (fulltext.size() > 0) {
+                    builder.field(FieldNames.FULLTEXT, fulltext);
+                }
             if (suggest.size() > 0) {
                 builder.startObject(FieldNames.SUGGEST).field("input", 
suggest).endObject();
             }
@@ -120,17 +104,18 @@ class ElasticsearchDocument {
             if (nullProps.size() > 0) {
                 builder.field(FieldNames.NULL_PROPS, nullProps);
             }
-            for (Map.Entry<String, Object> prop : properties.entrySet()) {
-                builder.field(prop.getKey(), prop.getValue());
+                for (Map.Entry<String, Object> prop : properties.entrySet()) {
+                    builder.field(prop.getKey(), prop.getValue());
+                }
             }
             builder.endObject();
 
             ret = Strings.toString(builder);
         } catch (IOException e) {
-            LOG.error("Error serializing document - id: {}, properties: {}, 
fulltext: {}, suggest: {}, " +
+            LOG.error("Error serializing document - path: {}, properties: {}, 
fulltext: {}, suggest: {}, " +
                             "notNullProps: {}, nullProps: {}",
-                    path, properties, fulltext, suggest, notNullProps, 
nullProps,
-                    e);
+                    path, properties, fulltext, suggest, notNullProps, 
nullProps, e);
+            ret = null;
         }
 
         return ret;
@@ -140,8 +125,4 @@ class ElasticsearchDocument {
     public String toString() {
         return build();
     }
-
-    public static String pathToId(String path) throws 
UnsupportedEncodingException {
-        return URLEncoder.encode(path, "UTF-8");
-    }
 }

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchDocumentMaker.java
 Thu May 21 08:56:02 2020
@@ -48,10 +48,9 @@ class ElasticsearchDocumentMaker extends
     }
 
     @Override
-    protected ElasticsearchDocument finalizeDoc(ElasticsearchDocument doc, 
boolean dirty, boolean facet) throws IOException {
-        if (doc.getId() == null) {
-            throw new IOException("Couldn't generate id for doc - (More 
details during initDoc)" + doc);
-        }
+    protected ElasticsearchDocument finalizeDoc(ElasticsearchDocument doc, 
boolean dirty, boolean facet) {
+        // evaluate path restrictions is enabled by default in elastic. Always 
index ancestors
+        doc.indexAncestors(path);
         return doc;
     }
 
@@ -134,10 +133,13 @@ class ElasticsearchDocumentMaker extends
         doc.addProperty(pname, f);
     }
 
+    /**
+     * Empty method implementation. Ancestors are always indexed
+     *
+     * @see ElasticsearchDocumentMaker#finalizeDoc
+     */
     @Override
-    protected void indexAncestors(ElasticsearchDocument doc, String path) {
-        doc.indexAncestors(path);
-    }
+    protected void indexAncestors(ElasticsearchDocument doc, String path) { /* 
empty */ }
 
     @Override
     protected void indexNotNullProperty(ElasticsearchDocument doc, 
PropertyDefinition pd) {

Added: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java?rev=1877992&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java
 (added)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java
 Thu May 21 08:56:02 2020
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch.index;
+
+import org.apache.jackrabbit.oak.api.Type;
+import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
+import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
+import org.elasticsearch.client.indices.CreateIndexRequest;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentFactory;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+/**
+ * Provides utility functions around Elasticsearch indexing
+ */
+class ElasticsearchIndexHelper {
+
+    public static CreateIndexRequest 
createIndexRequest(ElasticsearchIndexDefinition indexDefinition) throws 
IOException {
+        final CreateIndexRequest request = new 
CreateIndexRequest(indexDefinition.getRemoteIndexName());
+
+        // provision settings
+        // 
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html
+        request.settings(Settings.builder()
+                .put("analysis.analyzer.ancestor_analyzer.type", "custom")
+                .put("analysis.analyzer.ancestor_analyzer.tokenizer", 
"path_hierarchy"));
+
+        // provision mappings
+        final XContentBuilder mappingBuilder = XContentFactory.jsonBuilder();
+        mappingBuilder.startObject();
+        {
+            mappingBuilder.startObject("properties");
+            {
+                mapInternalProperties(mappingBuilder);
+                mapIndexRules(indexDefinition, mappingBuilder);
+            }
+            mappingBuilder.endObject();
+        }
+        mappingBuilder.endObject();
+        request.mapping(mappingBuilder);
+
+        return request;
+    }
+
+    private static void mapInternalProperties(XContentBuilder mappingBuilder) 
throws IOException {
+        mappingBuilder.startObject(FieldNames.PATH)
+                .field("type", "keyword")
+                .endObject();
+        mappingBuilder.startObject(FieldNames.ANCESTORS)
+                .field("type", "text")
+                .field("analyzer", "ancestor_analyzer")
+                .field("search_analyzer", "keyword")
+                .field("search_quote_analyzer", "keyword")
+                .endObject();
+        mappingBuilder.startObject(FieldNames.PATH_DEPTH)
+                .field("type", "integer")
+                .endObject();
+        // TODO: the mapping below is for features currently not supported. 
These need to be reviewed
+        // when the specific features will be implemented
+//                mappingBuilder.startObject(FieldNames.SUGGEST)
+//                        .field("type", "completion")
+//                        .endObject();
+//                mappingBuilder.startObject(FieldNames.NOT_NULL_PROPS)
+//                        .field("type", "keyword")
+//                        .endObject();
+//                mappingBuilder.startObject(FieldNames.NULL_PROPS)
+//                        .field("type", "keyword")
+//                        .endObject();
+    }
+
+    private static void mapIndexRules(ElasticsearchIndexDefinition 
indexDefinition, XContentBuilder mappingBuilder) throws IOException {
+        // we need to check if in the defined rules there are properties with 
the same name and different types
+        final List<Map.Entry<String, List<PropertyDefinition>>> 
multiTypesFields = indexDefinition.getPropertiesByName()
+                .entrySet()
+                .stream()
+                .filter(e -> e.getValue().size() > 1)
+                .filter(e -> 
e.getValue().stream().map(PropertyDefinition::getType).distinct().count() > 1)
+                .collect(Collectors.toList());
+
+        if (!multiTypesFields.isEmpty()) {
+            String fields = 
multiTypesFields.stream().map(Map.Entry::getKey).collect(Collectors.joining(", 
", "[", "]"));
+            throw new IllegalStateException(indexDefinition.getIndexPath() + " 
has properties with the same name and " +
+                    "different types " + fields);
+        }
+
+        for (Map.Entry<String, List<PropertyDefinition>> entry : 
indexDefinition.getPropertiesByName().entrySet()) {
+            final String name = entry.getKey();
+            final List<PropertyDefinition> propertyDefinitions = 
entry.getValue();
+
+            Type<?> type = Type.fromTag(propertyDefinitions.get(0).getType(), 
false);
+            mappingBuilder.startObject(name);
+            {
+                // 
https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-types.html
+                if (Type.BINARY.equals(type)) {
+                    mappingBuilder.field("type", "binary");
+                } else if (Type.LONG.equals(type)) {
+                    mappingBuilder.field("type", "long");
+                } else if (Type.DOUBLE.equals(type) || 
Type.DECIMAL.equals(type)) {
+                    mappingBuilder.field("type", "double");
+                } else if (Type.DATE.equals(type)) {
+                    mappingBuilder.field("type", "date");
+                } else if (Type.BOOLEAN.equals(type)) {
+                    mappingBuilder.field("type", "boolean");
+                } else {
+                    if (indexDefinition.isAnalyzed(propertyDefinitions)) {
+                        mappingBuilder.field("type", "text");
+                        // always add keyword for sorting / faceting as 
sub-field
+                        mappingBuilder.startObject("fields");
+                        {
+                            mappingBuilder.startObject("keyword")
+                                    .field("type", "keyword")
+                                    .endObject();
+                        }
+                        mappingBuilder.endObject();
+                    } else {
+                        // always add keyword for sorting / faceting
+                        mappingBuilder.field("type", "keyword");
+                    }
+                }
+            }
+            mappingBuilder.endObject();
+        }
+    }
+}

Propchange: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexHelper.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/index/ElasticsearchIndexWriter.java
 Thu May 21 08:56:02 2020
@@ -18,8 +18,8 @@ package org.apache.jackrabbit.oak.plugin
 
 import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchConnection;
 import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
-import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
 import 
org.apache.jackrabbit.oak.plugins.index.search.spi.editor.FulltextIndexWriter;
+import org.elasticsearch.ElasticsearchStatusException;
 import org.elasticsearch.action.DocWriteRequest;
 import org.elasticsearch.action.admin.indices.alias.IndicesAliasesRequest;
 import org.elasticsearch.action.admin.indices.alias.get.GetAliasesRequest;
@@ -37,13 +37,11 @@ import org.elasticsearch.client.IndicesC
 import org.elasticsearch.client.RequestOptions;
 import org.elasticsearch.client.indices.CreateIndexRequest;
 import org.elasticsearch.client.indices.CreateIndexResponse;
+import org.elasticsearch.client.indices.GetIndexRequest;
 import org.elasticsearch.cluster.metadata.AliasMetaData;
 import org.elasticsearch.common.Strings;
-import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.unit.ByteSizeValue;
 import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.common.xcontent.XContentType;
 import org.jetbrains.annotations.NotNull;
 import org.jetbrains.annotations.TestOnly;
@@ -51,12 +49,17 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.Phaser;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
 import java.util.Map;
-import java.util.Optional;
 import java.util.Set;
 import java.util.stream.Collectors;
 
-import static 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.index.ElasticsearchDocument.pathToId;
 import static org.elasticsearch.common.xcontent.ToXContent.EMPTY_PARAMS;
 import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
 
@@ -66,8 +69,19 @@ class ElasticsearchIndexWriter implement
     private final ElasticsearchConnection elasticsearchConnection;
     private final ElasticsearchIndexDefinition indexDefinition;
 
+    /**
+     * Coordinates communication between bulk processes. It has a main 
controller registered at creation time and
+     * de-registered on {@link ElasticsearchIndexWriter#close(long)}. Each 
bulk request register a new party in
+     * this Phaser in {@link OakBulkProcessorListener#beforeBulk(long, 
BulkRequest)} and de-register itself when
+     * the request returns.
+     */
+    private final Phaser phaser = new Phaser(1); // register main controller
+    /**
+     * Key-value structure to keep the history of bulk requests. Keys are the 
bulk execution ids, the boolean
+     * value is {@code true} when at least an update is performed, otherwise 
{@code false}.
+     */
+    private final ConcurrentHashMap<Long, Boolean> updatesMap = new 
ConcurrentHashMap<>();
     private final BulkProcessor bulkProcessor;
-    private Optional<Boolean> indexUpdated = Optional.empty();
 
     ElasticsearchIndexWriter(@NotNull ElasticsearchConnection 
elasticsearchConnection,
                                        @NotNull ElasticsearchIndexDefinition 
indexDefinition) {
@@ -99,70 +113,76 @@ class ElasticsearchIndexWriter implement
     }
 
     @Override
-    public void updateDocument(String path, ElasticsearchDocument doc) throws 
IOException {
+    public void updateDocument(String path, ElasticsearchDocument doc) {
         IndexRequest request = new 
IndexRequest(indexDefinition.getRemoteIndexAlias())
-                .id(pathToId(path))
+                .id(idFromPath(path))
                 .source(doc.build(), XContentType.JSON);
         bulkProcessor.add(request);
     }
 
     @Override
-    public void deleteDocuments(String path) throws IOException {
+    public void deleteDocuments(String path) {
         DeleteRequest request = new 
DeleteRequest(indexDefinition.getRemoteIndexAlias())
-                .id(pathToId(path));
+                .id(idFromPath(path));
         bulkProcessor.add(request);
     }
 
     @Override
-    public boolean close(long timestamp) throws IOException {
+    public boolean close(long timestamp) {
         LOG.trace("Calling close on bulk processor {}", bulkProcessor);
         bulkProcessor.close();
         LOG.trace("Bulk Processor {} closed", bulkProcessor);
 
-        // bulkProcessor.close() calls the OakBulkProcessorListener.beforeBulk 
in a blocking manner
-        // indexUpdated would be unset there if it was false till now (not 
even a single update succeeded)
-        // in this case wait for sometime for the last 
OakBulkProcessorListener.afterBulk to be called
-        // where indexUpdated can possibly be set to true, return false in 
case of timeout.
-        // We don't wait in case indexUpdated is already set (This would be if 
any of the previous flushes for this processor
-        // were successful i.e index was updated at least once)
-        final long start = System.currentTimeMillis();
-        long timeoutMillis = indexDefinition.bulkFlushIntervalMs * 5 ;
-        while (!indexUpdated.isPresent()) {
-            long lastAttempt = System.currentTimeMillis();
-            long elapsedTime = lastAttempt - start;
-            if (elapsedTime > timeoutMillis) {
-                // indexUpdate was not set till now, return false
-                LOG.trace("Timed out waiting for the bulk processor response. 
Returning indexUpdated = false");
-                return false;
-            } else {
-                try {
-                    LOG.trace("Waiting for afterBulk response...");
-                    Thread.sleep(100);
-                } catch (InterruptedException ex) {
-                    //
-                }
-            }
+        // de-register main controller
+        final int phase = phaser.arriveAndDeregister();
+
+        try {
+            phaser.awaitAdvanceInterruptibly(phase, 
indexDefinition.bulkFlushIntervalMs * 5, TimeUnit.MILLISECONDS);
+        } catch (InterruptedException | TimeoutException e) {
+            LOG.error("Error waiting for bulk requests to return", e);
+        }
+
+        if (LOG.isTraceEnabled()) {
+            LOG.trace("Bulk identifier -> update status = {}", updatesMap);
         }
-        LOG.trace("Returning indexUpdated = {}", indexUpdated.get());
-        return indexUpdated.get();
+        return updatesMap.containsValue(Boolean.TRUE);
     }
 
-    // TODO: we need to check if the index already exists and in that case we 
have to figure out if there are
-    // "breaking changes" in the index definition
     protected void provisionIndex() throws IOException {
+        // check if index already exists
+        boolean exists = elasticsearchConnection.getClient().indices().exists(
+                new GetIndexRequest(indexDefinition.getRemoteIndexName()), 
RequestOptions.DEFAULT
+        );
+        if (exists) {
+            LOG.info("Index {} already exists. Skip index provision", 
indexDefinition.getRemoteIndexName());
+            return;
+        }
 
-        IndicesClient indicesClient = 
elasticsearchConnection.getClient().indices();
+        final IndicesClient indicesClient = 
elasticsearchConnection.getClient().indices();
         final String indexName = indexDefinition.getRemoteIndexName();
 
-        CreateIndexRequest createIndexRequest = 
constructCreateIndexRequest(indexName);
-        String requestMsg = 
Strings.toString(createIndexRequest.toXContent(jsonBuilder(), EMPTY_PARAMS));
-        CreateIndexResponse response = 
indicesClient.create(createIndexRequest, RequestOptions.DEFAULT);
-        checkResponseAcknowledgement(response, "Create index call not 
acknowledged for index " + indexName);
-
-        LOG.info("Updated settings for index {} = {}. Response acknowledged: 
{}",
-                indexDefinition.getRemoteIndexAlias(), requestMsg, 
response.isAcknowledged());
-
+        // create the new index
+        final CreateIndexRequest request = 
ElasticsearchIndexHelper.createIndexRequest(indexDefinition);
+        try {
+            if (LOG.isDebugEnabled()) {
+                final String requestMsg = 
Strings.toString(request.toXContent(jsonBuilder(), EMPTY_PARAMS));
+                LOG.debug("Creating Index with request {}", requestMsg);
+            }
+            CreateIndexResponse response = indicesClient.create(request, 
RequestOptions.DEFAULT);
+            LOG.info("Updated settings for index {}. Response acknowledged: 
{}",
+                    indexDefinition.getRemoteIndexName(), 
response.isAcknowledged());
+            checkResponseAcknowledgement(response, "Create index call not 
acknowledged for index " + indexName);
+        } catch (ElasticsearchStatusException ese) {
+            // We already check index existence as first thing in this method, 
if we get here it means we have got into
+            // a conflict (eg: multiple cluster nodes provision concurrently).
+            // Elasticsearch does not have a CREATE IF NOT EXIST, need to 
inspect exception
+            // https://github.com/elastic/elasticsearch/issues/19862
+            if (ese.status().getStatus() == 400 && 
ese.getDetailedMessage().contains("resource_already_exists_exception")) {
+                LOG.warn("Index {} already exists. Ignoring error", indexName);
+            } else throw ese;
+        }
 
+        // update the mapping
         GetAliasesRequest getAliasesRequest = new 
GetAliasesRequest(indexDefinition.getRemoteIndexAlias());
         GetAliasesResponse aliasesResponse = 
indicesClient.getAlias(getAliasesRequest, RequestOptions.DEFAULT);
         Map<String, Set<AliasMetaData>> aliases = aliasesResponse.getAliases();
@@ -180,6 +200,8 @@ class ElasticsearchIndexWriter implement
                 + indexDefinition.getRemoteIndexAlias());
         LOG.info("Updated alias {} to index {}. Response acknowledged: {}", 
indexDefinition.getRemoteIndexAlias(),
                 indexName, updateAliasResponse.isAcknowledged());
+
+        // once the alias has been updated, we can safely remove the old index
         deleteOldIndices(indicesClient, aliases.keySet());
     }
 
@@ -201,57 +223,15 @@ class ElasticsearchIndexWriter implement
         LOG.info("Deleted indices {}. Response acknowledged: {}", 
indices.toString(), deleteIndexResponse.isAcknowledged());
     }
 
-    private CreateIndexRequest constructCreateIndexRequest(String indexName) 
throws IOException {
-        CreateIndexRequest request = new CreateIndexRequest(indexName);
-
-        // provision settings
-        request.settings(Settings.builder()
-                .put("analysis.analyzer.ancestor_analyzer.type", "custom")
-                .put("analysis.analyzer.ancestor_analyzer.tokenizer", 
"path_hierarchy"));
-
-        // provision mappings
-        XContentBuilder mappingBuilder = XContentFactory.jsonBuilder();
-        mappingBuilder.startObject();
-        {
-            mappingBuilder.startObject("properties");
-            {
-                mappingBuilder.startObject(FieldNames.ANCESTORS)
-                        .field("type", "text")
-                        .field("analyzer", "ancestor_analyzer")
-                        .field("search_analyzer", "keyword")
-                        .field("search_quote_analyzer", "keyword")
-                        .endObject();
-                mappingBuilder.startObject(FieldNames.PATH_DEPTH)
-                        .field("type", "integer")
-                        .endObject();
-                mappingBuilder.startObject(FieldNames.SUGGEST)
-                        .field("type", "completion")
-                        .endObject();
-                mappingBuilder.startObject(FieldNames.NOT_NULL_PROPS)
-                        .field("type", "keyword")
-                        .endObject();
-                mappingBuilder.startObject(FieldNames.NULL_PROPS)
-                        .field("type", "keyword")
-                        .endObject();
-            }
-            mappingBuilder.endObject();
-        }
-        mappingBuilder.endObject();
-        request.mapping(mappingBuilder);
-
-        return request;
-    }
-
     private class OakBulkProcessorListener implements BulkProcessor.Listener {
 
         @Override
         public void beforeBulk(long executionId, BulkRequest bulkRequest) {
-            if (indexUpdated.isPresent() && !indexUpdated.get()) {
-                // Reset the state only if it's false
-                // If it's true that means index was updated at least once by 
this processor
-                // and we can return true for indexUpdate.
-                indexUpdated = Optional.empty();
-            }
+            // register new bulk party
+            phaser.register();
+            // init update status
+            updatesMap.put(executionId, Boolean.FALSE);
+
             LOG.info("Sending bulk with id {} -> {}", executionId, 
bulkRequest.getDescription());
             if (LOG.isTraceEnabled()) {
                 LOG.trace("Bulk Requests: \n{}", bulkRequest.requests()
@@ -279,29 +259,41 @@ class ElasticsearchIndexWriter implement
                         LOG.error("Bulk item with id {} failed", 
failure.getId(), failure.getCause());
                     } else {
                         // Set indexUpdated to true even if 1 item was updated 
successfully
-                        indexUpdated = Optional.of(true);
+                        updatesMap.put(executionId, Boolean.TRUE);
                     }
                 }
-                // Only set indexUpdated to false if it's unset
-                // If set and true, that means index was updated at least once 
by this processor.
-                // If set and false, no need to do anything
-                if (!indexUpdated.isPresent()) {
-                    indexUpdated = Optional.of(false);
-                }
             } else {
-                indexUpdated = Optional.of(true);
+                updatesMap.put(executionId, Boolean.TRUE);
             }
+            phaser.arriveAndDeregister();
         }
 
         @Override
         public void afterBulk(long executionId, BulkRequest bulkRequest, 
Throwable throwable) {
-            // Only set indexUpdated to false if it's unset
-            // If set and true, that means index was updated at least once by 
this processor.
-            // If set and false, no need to do anything
-            if (!indexUpdated.isPresent()) {
-                indexUpdated = Optional.of(false);
-            }
             LOG.error("Bulk with id {} threw an error", executionId, 
throwable);
+            phaser.arriveAndDeregister();
+        }
+    }
+
+    /**
+     * Transforms a path into an _id compatible with Elasticsearch 
specification. The path cannot be larger than 512
+     * bytes. For performance reasons paths that are already compatible are 
returned untouched. Otherwise, SHA-256
+     * algorithm is used to return a transformed path (32 bytes max).
+     *
+     * @param path the document path
+     * @return the Elasticsearch compatible path
+     * @see <a 
href="https://www.elastic.co/guide/en/elasticsearch/reference/current/mapping-id-field.html";>
+     *     Mapping _id field</a>
+     */
+    private static String idFromPath(@NotNull String path) {
+        byte[] pathBytes = path.getBytes(StandardCharsets.UTF_8);
+        if (pathBytes.length > 512) {
+            try {
+                return new 
String(MessageDigest.getInstance("SHA-256").digest(pathBytes));
+            } catch (NoSuchAlgorithmException e) {
+                throw new IllegalStateException(e);
+            }
         }
+        return path;
     }
 }

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndex.java
 Thu May 21 08:56:02 2020
@@ -90,7 +90,9 @@ class ElasticsearchIndex extends Fulltex
 
     @Override
     protected String getFulltextRequestString(IndexPlan plan, IndexNode 
indexNode) {
-        return 
Strings.toString(ElasticsearchResultRowIterator.getESQuery(plan, 
getPlanResult(plan)));
+        return Strings.toString(new 
ElasticsearchResultRowIterator(plan.getFilter(), getPlanResult(plan), plan,
+                acquireIndexNode(plan), FulltextIndex::shouldInclude, 
getEstimator(plan.getPlanName()))
+                .getESQuery(plan, getPlanResult(plan)));
     }
 
     @Override

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchResultRowIterator.java
 Thu May 21 08:56:02 2020
@@ -53,8 +53,6 @@ import org.slf4j.LoggerFactory;
 
 import javax.jcr.PropertyType;
 import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.URLDecoder;
 import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Deque;
@@ -68,7 +66,6 @@ import java.util.stream.StreamSupport;
 
 import static org.apache.jackrabbit.JcrConstants.JCR_MIXINTYPES;
 import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE;
-import static org.apache.jackrabbit.oak.api.Type.STRING;
 import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot;
 import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
 import static 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.util.TermQueryBuilderFactory.newAncestorQuery;
@@ -158,9 +155,10 @@ class ElasticsearchResultRowIterator imp
         try {
             ElasticsearchSearcher searcher = getCurrentSearcher(indexNode);
             QueryBuilder query = getESQuery(plan, planResult);
-            int numberOfFacets = 
indexNode.getDefinition().getNumberOfTopFacets();
+            ElasticsearchIndexDefinition indexDefinition = 
indexNode.getDefinition();
+            int numberOfFacets = indexDefinition.getNumberOfTopFacets();
             List<TermsAggregationBuilder> aggregationBuilders = 
ElasticsearchAggregationBuilderUtil
-                    .getAggregators(plan, numberOfFacets);
+                    .getAggregators(plan, indexDefinition, numberOfFacets);
 
             ElasticsearchSearcherModel elasticsearchSearcherModel = new 
ElasticsearchSearcherModel.ElasticsearchSearcherModelBuilder()
                     .withQuery(query)
@@ -236,10 +234,9 @@ class ElasticsearchResultRowIterator imp
         return new ElasticsearchSearcher(indexNode);
     }
 
-    private FulltextIndex.FulltextResultRow convertToRow(SearchHit hit,
-                                                         
ElasticsearchFacetProvider elasticsearchFacetProvider) throws IOException {
-        String id = hit.getId();
-        String path = idToPath(id);
+    private FulltextIndex.FulltextResultRow convertToRow(SearchHit hit, 
ElasticsearchFacetProvider elasticsearchFacetProvider) {
+        final Map<String, Object> sourceMap = hit.getSourceAsMap();
+        String path = (String) sourceMap.get(FieldNames.PATH);
         if (path != null) {
             if ("".equals(path)) {
                 path = "/";
@@ -276,7 +273,7 @@ class ElasticsearchResultRowIterator imp
      * @param planResult
      * @return the Lucene query
      */
-    static QueryBuilder getESQuery(IndexPlan plan, PlanResult planResult) {
+    public QueryBuilder getESQuery(IndexPlan plan, PlanResult planResult) {
         List<QueryBuilder> qs = new ArrayList<>();
         Filter filter = plan.getFilter();
         FullTextExpression ft = filter.getFullTextConstraint();
@@ -504,13 +501,12 @@ class ElasticsearchResultRowIterator imp
         return unwrapped;
     }
 
-    private static void addNonFullTextConstraints(List<QueryBuilder> qs,
+    private void addNonFullTextConstraints(List<QueryBuilder> qs,
                                                   IndexPlan plan, PlanResult 
planResult) {
         final BiPredicate<Iterable<String>, String> any = (iterable, value) ->
                 StreamSupport.stream(iterable.spliterator(), 
false).anyMatch(value::equals);
 
         Filter filter = plan.getFilter();
-        IndexDefinition defn = planResult.indexDefinition;
         if (!filter.matchesAllTypes()) {
             addNodeTypeConstraints(planResult.indexingRule, qs, filter);
         }
@@ -518,20 +514,15 @@ class ElasticsearchResultRowIterator imp
         String path = FulltextIndex.getPathRestriction(plan);
         switch (filter.getPathRestriction()) {
             case ALL_CHILDREN:
-                if (defn.evaluatePathRestrictions()) {
-                    if ("/".equals(path)) {
-                        break;
-                    }
+                if (!"/".equals(path)) {
                     qs.add(newAncestorQuery(path));
                 }
                 break;
             case DIRECT_CHILDREN:
-                if (defn.evaluatePathRestrictions()) {
-                    BoolQueryBuilder bq = boolQuery();
-                    bq.must(newAncestorQuery(path));
-                    bq.must(newDepthQuery(path, planResult));
-                    qs.add(bq);
-                }
+                BoolQueryBuilder bq = boolQuery();
+                bq.must(newAncestorQuery(path));
+                bq.must(newDepthQuery(path, planResult));
+                qs.add(bq);
                 break;
             case EXACT:
                 // For transformed paths, we can only add path restriction if 
absolute path to property can be
@@ -588,7 +579,7 @@ class ElasticsearchResultRowIterator imp
 
             if (pr.first != null && pr.first.equals(pr.last) && 
pr.firstIncluding
                     && pr.lastIncluding) {
-                String first = pr.first.getValue(STRING);
+                String first = pr.first.getValue(Type.STRING);
                 first = first.replace("\\", "");
                 if (JCR_PATH.equals(name)) {
                     qs.add(newPathQuery(first));
@@ -637,7 +628,7 @@ class ElasticsearchResultRowIterator imp
     }
 
     private static QueryBuilder createNodeNameQuery(Filter.PropertyRestriction 
pr) {
-        String first = pr.first != null ? pr.first.getValue(STRING) : null;
+        String first = pr.first != null ? pr.first.getValue(Type.STRING) : 
null;
         if (pr.first != null && pr.first.equals(pr.last) && pr.firstIncluding
                 && pr.lastIncluding) {
             // [property]=[value]
@@ -685,7 +676,7 @@ class ElasticsearchResultRowIterator imp
     }
 
     @Nullable
-    private static QueryBuilder createQuery(String propertyName, 
Filter.PropertyRestriction pr,
+    private QueryBuilder createQuery(String propertyName, 
Filter.PropertyRestriction pr,
                                             PropertyDefinition defn) {
         int propType = FulltextIndex.determinePropertyType(defn, pr);
 
@@ -699,31 +690,29 @@ class ElasticsearchResultRowIterator imp
             return newNotNullPropQuery(defn.name);
         }
 
+        final String field = 
indexNode.getDefinition().getElasticKeyword(propertyName);
+
         QueryBuilder in;
         switch (propType) {
             case PropertyType.DATE: {
-                in = newPropertyRestrictionQuery(propertyName, false, pr,
-                        value -> parse(value.getValue(Type.DATE)).getTime());
+                in = newPropertyRestrictionQuery(field, pr, value -> 
parse(value.getValue(Type.DATE)).getTime());
                 break;
             }
             case PropertyType.DOUBLE: {
-                in = newPropertyRestrictionQuery(propertyName, false, pr,
-                        value -> value.getValue(Type.DOUBLE));
+                in = newPropertyRestrictionQuery(field, pr, value -> 
value.getValue(Type.DOUBLE));
                 break;
             }
             case PropertyType.LONG: {
-                in = newPropertyRestrictionQuery(propertyName, false, pr,
-                        value -> value.getValue(Type.LONG));
+                in = newPropertyRestrictionQuery(field, pr, value -> 
value.getValue(Type.LONG));
                 break;
             }
             default: {
                 if (pr.isLike) {
-                    return createLikeQuery(propertyName, 
pr.first.getValue(STRING));
+                    return createLikeQuery(propertyName, 
pr.first.getValue(Type.STRING));
                 }
 
                 //TODO Confirm that all other types can be treated as string
-                in = newPropertyRestrictionQuery(propertyName, true, pr,
-                        value -> value.getValue(Type.STRING));
+                in = newPropertyRestrictionQuery(field, pr, value -> 
value.getValue(Type.STRING));
             }
         }
 
@@ -734,10 +723,6 @@ class ElasticsearchResultRowIterator imp
         throw new IllegalStateException("PropertyRestriction not handled " + 
pr + " for index " + defn);
     }
 
-    private static String idToPath(String id) throws 
UnsupportedEncodingException {
-        return URLDecoder.decode(id, "UTF-8");
-    }
-
     class ElasticsearchFacetProvider implements FulltextIndex.FacetProvider {
         private ElasticsearchFacets elasticsearchFacets;
         private Map<String, List<FulltextIndex.Facet>> cachedResults = new 
HashMap<>();
@@ -750,7 +735,7 @@ class ElasticsearchResultRowIterator imp
         public List<FulltextIndex.Facet> getFacets(int numberOfFacets, String 
columnName) throws IOException {
             String facetProp = FulltextIndex.parseFacetField(columnName);
             if (cachedResults.get(facetProp) == null) {
-                cachedResults = 
elasticsearchFacets.getElasticSearchFacets(numberOfFacets);
+                cachedResults = 
elasticsearchFacets.getElasticSearchFacets(indexNode.getDefinition(), 
numberOfFacets);
             }
             return cachedResults.get(facetProp);
         }

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchSearcher.java
 Thu May 21 08:56:02 2020
@@ -47,8 +47,7 @@ public class ElasticsearchSearcher {
     public SearchResponse search(QueryBuilder query, int batchSize) throws 
IOException {
         SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
                 .query(query)
-                .fetchSource(false)
-                .storedField(FieldNames.PATH)
+                .fetchSource(FieldNames.PATH, null)
                 .size(batchSize);
 
         SearchRequest request = new 
SearchRequest(indexNode.getDefinition().getRemoteIndexAlias())

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/ElasticsearchAggregationBuilderUtil.java
 Thu May 21 08:56:02 2020
@@ -17,6 +17,7 @@
 package org.apache.jackrabbit.oak.plugins.index.elasticsearch.util;
 
 import org.apache.jackrabbit.oak.api.Type;
+import 
org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition;
 import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
 import org.apache.jackrabbit.oak.spi.query.Filter;
 import org.apache.jackrabbit.oak.spi.query.QueryConstants;
@@ -33,7 +34,7 @@ public final class ElasticsearchAggregat
     private ElasticsearchAggregationBuilderUtil() {
     }
 
-    public static List<TermsAggregationBuilder> 
getAggregators(QueryIndex.IndexPlan plan, int numberOfFacets) {
+    public static List<TermsAggregationBuilder> 
getAggregators(QueryIndex.IndexPlan plan, ElasticsearchIndexDefinition 
indexDefinition, int numberOfFacets) {
         List<TermsAggregationBuilder> termsAggregationBuilders = new 
LinkedList<>();
         Collection<Filter.PropertyRestriction> propertyRestrictions = 
plan.getFilter().getPropertyRestrictions();
         for (Filter.PropertyRestriction propertyRestriction : 
propertyRestrictions) {
@@ -41,13 +42,14 @@ public final class ElasticsearchAggregat
             if (QueryConstants.REP_FACET.equals(name)) {
                 String value = propertyRestriction.first.getValue(Type.STRING);
                 String facetProp = FulltextIndex.parseFacetField(value);
-                
termsAggregationBuilders.add(AggregationBuilders.terms(facetProp).field(keywordFieldName(facetProp)).size(numberOfFacets));
+                termsAggregationBuilders.add(
+                        AggregationBuilders
+                                .terms(facetProp)
+                                
.field(indexDefinition.getElasticKeyword(facetProp))
+                                .size(numberOfFacets)
+                );
             }
         }
         return termsAggregationBuilders;
     }
-
-    private static String keywordFieldName(String propName) {
-        return propName + "." + "keyword";
-    }
 }

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/SearchSourceBuilderUtil.java
 Thu May 21 08:56:02 2020
@@ -25,8 +25,7 @@ public class SearchSourceBuilderUtil {
     public static SearchSourceBuilder 
createSearchSourceBuilder(ElasticsearchSearcherModel 
elasticsearchSearcherModel) {
         SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
                 .query(elasticsearchSearcherModel.getQueryBuilder())
-                .fetchSource(elasticsearchSearcherModel.fetchSource())
-                .storedField(elasticsearchSearcherModel.getStoredField())
+                .fetchSource(elasticsearchSearcherModel.getStoredField(), null)
                 .size(elasticsearchSearcherModel.getBatchSize())
                 .from(elasticsearchSearcherModel.getFrom());
 

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/util/TermQueryBuilderFactory.java
 Thu May 21 08:56:02 2020
@@ -67,11 +67,11 @@ public class TermQueryBuilderFactory {
     }
 
     public static PrefixQueryBuilder newPrefixQuery(String field, @NotNull 
String value) {
-        return prefixQuery(keywordFieldName(field), value);
+        return prefixQuery(field, value);
     }
 
     public static WildcardQueryBuilder newWildcardQuery(String field, @NotNull 
String value) {
-        return wildcardQuery(keywordFieldName(field), value);
+        return wildcardQuery(field, value);
     }
 
     public static TermQueryBuilder newPathQuery(String path) {
@@ -96,11 +96,11 @@ public class TermQueryBuilderFactory {
     }
 
     public static TermQueryBuilder newNodeTypeQuery(String type) {
-        return termQuery(keywordFieldName(JCR_PRIMARYTYPE), type);
+        return termQuery(JCR_PRIMARYTYPE, type);
     }
 
     public static TermQueryBuilder newMixinTypeQuery(String type) {
-        return termQuery(keywordFieldName(JCR_MIXINTYPES), type);
+        return termQuery(JCR_MIXINTYPES, type);
     }
 
     public static TermQueryBuilder newNotNullPropQuery(String propName) {
@@ -126,12 +126,9 @@ public class TermQueryBuilderFactory {
         return bq;
     }
 
-    public static <R> QueryBuilder newPropertyRestrictionQuery(String 
propertyName, boolean isString,
+    public static <R> QueryBuilder newPropertyRestrictionQuery(String 
propertyName,
                                                                
Filter.PropertyRestriction pr,
                                                                
Function<PropertyValue, R> propToObj) {
-        if (isString) {
-            propertyName = keywordFieldName(propertyName);
-        }
 
         R first = pr.first != null ? propToObj.apply(pr.first) : null;
         R last = pr.last != null ? propToObj.apply(pr.last) : null;
@@ -166,9 +163,4 @@ public class TermQueryBuilderFactory {
         }
         return path;
     }
-
-    // As per https://www.elastic.co/blog/strings-are-dead-long-live-strings
-    private static String keywordFieldName(String propName) {
-        return propName + "." + "keyword";
-    }
 }
\ No newline at end of file

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java?rev=1877992&r1=1877991&r2=1877992&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchAbstractQueryTest.java
 Thu May 21 08:56:02 2020
@@ -20,7 +20,7 @@ import org.apache.commons.io.FileUtils;
 import org.apache.jackrabbit.oak.InitialContent;
 import org.apache.jackrabbit.oak.Oak;
 import org.apache.jackrabbit.oak.api.ContentRepository;
-import org.apache.jackrabbit.oak.commons.PerfLogger;
+import org.apache.jackrabbit.oak.api.Tree;
 import org.apache.jackrabbit.oak.plugins.index.AsyncIndexUpdate;
 import org.apache.jackrabbit.oak.plugins.index.IndexEditorProvider;
 import org.apache.jackrabbit.oak.plugins.index.TrackingCorruptIndexHandler;
@@ -37,6 +37,9 @@ import org.apache.jackrabbit.oak.query.A
 import org.apache.jackrabbit.oak.spi.security.OpenSecurityProvider;
 import org.apache.jackrabbit.oak.spi.state.NodeBuilder;
 import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.elasticsearch.client.RequestOptions;
+import org.elasticsearch.client.core.CountRequest;
+import org.elasticsearch.client.indices.GetIndexRequest;
 import org.jetbrains.annotations.NotNull;
 import org.junit.After;
 import org.junit.ClassRule;
@@ -44,7 +47,6 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import java.io.IOException;
-import java.net.URI;
 import java.util.concurrent.TimeUnit;
 
 import static com.google.common.collect.Lists.newArrayList;
@@ -56,10 +58,6 @@ public abstract class ElasticsearchAbstr
 
     protected static final Logger LOG = 
LoggerFactory.getLogger(ElasticsearchAbstractQueryTest.class);
 
-    protected static final PerfLogger PERF_LOGGER =
-            new 
PerfLogger(LoggerFactory.getLogger(ElasticsearchAbstractQueryTest.class.getName()
 + ".perf"));
-
-
     // Set this connection string as
     // <scheme>://<hostname>:<port>?key_id=<>,key_secret=<>
     // key_id and key_secret are optional in case the ES server
@@ -72,11 +70,9 @@ public abstract class ElasticsearchAbstr
     // This can be used by the extending classes to trigger the async index 
update as per need (not having to wait for async indexing cycle)
     protected AsyncIndexUpdate asyncIndexUpdate;
     protected long INDEX_CORRUPT_INTERVAL_IN_MILLIS = 100;
-    protected ElasticsearchIndexEditorProvider editorProvider;
     protected NodeStore nodeStore;
     protected int DEFAULT_ASYNC_INDEXING_TIME_IN_SECONDS = 5;
 
-
     @ClassRule
     public static ElasticsearchConnectionRule elasticRule = new 
ElasticsearchConnectionRule(elasticConnectionString);
 
@@ -131,12 +127,11 @@ public abstract class ElasticsearchAbstr
     }
 
     protected Oak addAsyncIndexingLanesToOak(Oak oak) {
-        // Override this in extending clases to configure different
+        // Override this in extending classes to configure different
         // indexing lanes with different time limits.
         return oak.withAsyncIndexing("async", 
DEFAULT_ASYNC_INDEXING_TIME_IN_SECONDS);
     }
 
-
     @Override
     protected ContentRepository createRepository() {
 
@@ -156,7 +151,6 @@ public abstract class ElasticsearchAbstr
         
trackingCorruptIndexHandler.setCorruptInterval(INDEX_CORRUPT_INTERVAL_IN_MILLIS,
 TimeUnit.MILLISECONDS);
         asyncIndexUpdate.setCorruptIndexHandler(trackingCorruptIndexHandler);
 
-
         Oak oak = new Oak(nodeStore)
                 .with(getInitialContent())
                 .with(new OpenSecurityProvider())
@@ -171,7 +165,6 @@ public abstract class ElasticsearchAbstr
         return oak.createContentRepository();
     }
 
-
     protected static void assertEventually(Runnable r) {
         ElasticsearchTestUtils.assertEventually(r, 
BULK_FLUSH_INTERVAL_MS_DEFAULT * 5);
     }
@@ -188,8 +181,8 @@ public abstract class ElasticsearchAbstr
         return builder;
     }
 
-    protected void setIndex(String idxName, IndexDefinitionBuilder builder) {
-        
builder.build(root.getTree("/").addChild(INDEX_DEFINITIONS_NAME).addChild(idxName));
+    protected Tree setIndex(String idxName, IndexDefinitionBuilder builder) {
+        return 
builder.build(root.getTree("/").addChild(INDEX_DEFINITIONS_NAME).addChild(idxName));
     }
 
     protected String explain(String query) {
@@ -206,4 +199,36 @@ public abstract class ElasticsearchAbstr
         setTraversalEnabled(false);
     }
 
+    // Utility methods accessing directly Elasticsearch
+
+    protected boolean exists(Tree index) {
+        ElasticsearchIndexDefinition esIdxDef = 
getElasticsearchIndexDefinition(index);
+
+        try {
+            return esConnection.getClient().indices()
+                    .exists(new 
GetIndexRequest(esIdxDef.getRemoteIndexAlias()), RequestOptions.DEFAULT);
+        } catch (IOException e) {
+            throw new IllegalStateException(e);
+        }
+    }
+
+    protected long countDocuments(Tree index) {
+        ElasticsearchIndexDefinition esIdxDef = 
getElasticsearchIndexDefinition(index);
+
+        CountRequest request = new 
CountRequest(esIdxDef.getRemoteIndexAlias());
+        try {
+            return esConnection.getClient().count(request, 
RequestOptions.DEFAULT).getCount();
+        } catch (IOException e) {
+            throw new IllegalStateException(e);
+        }
+    }
+
+    private ElasticsearchIndexDefinition getElasticsearchIndexDefinition(Tree 
index) {
+        return new ElasticsearchIndexDefinition(
+                nodeStore.getRoot(),
+                
nodeStore.getRoot().getChildNode(INDEX_DEFINITIONS_NAME).getChildNode(index.getName()),
+                index.getPath(),
+                esConnection.getIndexPrefix());
+    }
+
 }

Added: 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java?rev=1877992&view=auto
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java
 (added)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java
 Thu May 21 08:56:02 2020
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elasticsearch;
+
+import org.apache.jackrabbit.oak.api.Tree;
+import 
org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
+import org.junit.Ignore;
+import org.junit.Test;
+
+import java.util.Random;
+import java.util.UUID;
+
+import static org.hamcrest.CoreMatchers.equalTo;
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.junit.Assert.assertTrue;
+
+public class ElasticsearchContentTest extends ElasticsearchAbstractQueryTest {
+
+    @Test
+    public void indexWithAnalyzedProperty() throws Exception {
+        IndexDefinitionBuilder builder = createIndex("a").noAsync();
+        builder.indexRule("nt:base").property("a").analyzed();
+        String testId = UUID.randomUUID().toString();
+        Tree index = setIndex(testId, builder);
+        root.commit();
+
+        assertTrue(exists(index));
+        assertThat(0L, equalTo(countDocuments(index)));
+
+        Tree content = root.getTree("/").addChild(testId);
+        content.addChild("indexed").setProperty("a", "foo");
+        content.addChild("not-indexed").setProperty("b", "foo");
+        root.commit();
+
+        assertEventually(() -> assertThat(countDocuments(index), equalTo(1L)));
+
+        content.getChild("indexed").remove();
+        root.commit();
+
+        assertEventually(() -> assertThat(countDocuments(index), equalTo(0L)));
+
+        // TODO: should the index be deleted when the definition gets removed?
+        //index.remove();
+        //root.commit();
+
+        //assertFalse(exists(index));
+    }
+
+    @Test
+    @Ignore("this test fails because of a bug with nodeScopeIndex (every node 
gets indexed in an empty doc)")
+    public void indexWithAnalyzedNodeScopeIndexProperty() throws Exception {
+        IndexDefinitionBuilder builder = createIndex("a").noAsync();
+        builder.indexRule("nt:base").property("a").analyzed().nodeScopeIndex();
+        String testId = UUID.randomUUID().toString();
+        Tree index = setIndex(testId, builder);
+        root.commit();
+
+        assertThat(0L, equalTo(countDocuments(index)));
+
+        Tree content = root.getTree("/").addChild(testId);
+        content.addChild("indexed").setProperty("a", "foo");
+        content.addChild("not-indexed").setProperty("b", "foo");
+        root.commit();
+
+        assertEventually(() -> assertThat(countDocuments(index), equalTo(1L)));
+    }
+
+    @Test
+    public void indexContentWithLongPath() throws Exception {
+        IndexDefinitionBuilder builder = createIndex("a").noAsync();
+        builder.indexRule("nt:base").property("a").analyzed();
+        String testId = UUID.randomUUID().toString();
+        Tree index = setIndex(testId, builder);
+        root.commit();
+
+        assertTrue(exists(index));
+        assertThat(0L, equalTo(countDocuments(index)));
+
+        int leftLimit = 48; // ' ' (space)
+        int rightLimit = 122; // char '~'
+        int targetStringLength = 1024;
+        final Random random = new Random(42);
+
+        String generatedPath = random.ints(leftLimit, rightLimit + 1)
+                .limit(targetStringLength)
+                .collect(StringBuilder::new, StringBuilder::appendCodePoint, 
StringBuilder::append)
+                .toString();
+
+        Tree content = root.getTree("/").addChild(testId);
+        content.addChild(generatedPath).setProperty("a", "foo");
+        root.commit();
+
+        assertEventually(() -> assertThat(countDocuments(index), equalTo(1L)));
+    }
+
+    @Test
+    public void defineIndexTwice() throws Exception {
+        IndexDefinitionBuilder builder = createIndex("a").noAsync();
+        String testId = UUID.randomUUID().toString();
+        Tree index = setIndex(testId, builder);
+        root.commit();
+
+        assertTrue(exists(index));
+
+        builder = createIndex("a").noAsync();
+        index = setIndex(testId, builder);
+        root.commit();
+    }
+}

Propchange: 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchContentTest.java
------------------------------------------------------------------------------
    svn:eol-style = native


Reply via email to