Author: fortino
Date: Tue Jun 16 13:01:50 2020
New Revision: 1878887

URL: http://svn.apache.org/viewvc?rev=1878887&view=rev
Log:
OAK-9111: oak default analyzer for elastic

Modified:
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAsyncTest.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java?rev=1878887&r1=1878886&r2=1878887&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
 Tue Jun 16 13:01:50 2020
@@ -51,7 +51,7 @@ public class OakAnalyzer extends Analyze
      * @param matchVersion Lucene version to match See {@link #matchVersion 
above}
      * @param indexOriginalTerm flag to setup analyzer such that
      *                              {@link 
WordDelimiterFilter#PRESERVE_ORIGINAL}
-     *                              is set to oonfigure word delimeter
+     *                              is set to configure word delimiter
      */
     public OakAnalyzer(Version matchVersion, boolean indexOriginalTerm) {
         this.matchVersion = matchVersion;

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java?rev=1878887&r1=1878886&r2=1878887&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
 Tue Jun 16 13:01:50 2020
@@ -52,6 +52,16 @@ public class ElasticIndexDefinition exte
     public static final String BULK_RETRIES_BACKOFF = "bulkRetriesBackoff";
     public static final long BULK_RETRIES_BACKOFF_DEFAULT = 200;
 
+    /**
+     * Node name under which various analyzers are configured
+     */
+    private static final String ANALYZERS = "analyzers";
+
+    /**
+     * Boolean property indicating if in-built analyzer should preserve 
original term
+     */
+    private static final String INDEX_ORIGINAL_TERM = "indexOriginalTerm";
+
     private static final Function<Integer, Boolean> isAnalyzable;
 
     static {
@@ -143,6 +153,14 @@ public class ElasticIndexDefinition exte
     }
 
     /**
+     * Returns {@code true} if original terms need to be preserved at indexing 
analysis phase
+     */
+    public boolean indexOriginalTerms() {
+        NodeState analyzersTree = definition.getChildNode(ANALYZERS);
+        return getOptionalValue(analyzersTree, INDEX_ORIGINAL_TERM, false);
+    }
+
+    /**
      * Class to help with {@link ElasticIndexDefinition} creation.
      * The built object represents the index definition only without the node 
structure.
      */

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java?rev=1878887&r1=1878886&r2=1878887&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
 Tue Jun 16 13:01:50 2020
@@ -21,7 +21,6 @@ import org.apache.jackrabbit.oak.plugins
 import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
 import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
 import org.elasticsearch.client.indices.CreateIndexRequest;
-import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentFactory;
 
@@ -39,10 +38,7 @@ class ElasticIndexHelper {
         final CreateIndexRequest request = new 
CreateIndexRequest(indexDefinition.getRemoteIndexName());
 
         // provision settings
-        // 
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html
-        request.settings(Settings.builder()
-                .put("analysis.analyzer.ancestor_analyzer.type", "custom")
-                .put("analysis.analyzer.ancestor_analyzer.tokenizer", 
"path_hierarchy"));
+        request.settings(loadSettings(indexDefinition));
 
         // provision mappings
         final XContentBuilder mappingBuilder = XContentFactory.jsonBuilder();
@@ -61,6 +57,51 @@ class ElasticIndexHelper {
         return request;
     }
 
+    private static XContentBuilder loadSettings(ElasticIndexDefinition 
indexDefinition) throws IOException {
+        final XContentBuilder settingsBuilder = XContentFactory.jsonBuilder();
+        settingsBuilder.startObject();
+        {
+            settingsBuilder.startObject("analysis");
+            {
+                settingsBuilder.startObject("filter");
+                {
+                    
settingsBuilder.startObject("oak_word_delimiter_graph_filter");
+                    {
+                        settingsBuilder.field("type", "word_delimiter_graph");
+                        settingsBuilder.field("generate_word_parts", true);
+                        settingsBuilder.field("stem_english_possessive", true);
+                        settingsBuilder.field("generate_number_parts", true);
+                        settingsBuilder.field("preserve_original", 
indexDefinition.indexOriginalTerms());
+                    }
+                    settingsBuilder.endObject();
+                }
+                settingsBuilder.endObject();
+
+                settingsBuilder.startObject("analyzer");
+                {
+                    settingsBuilder.startObject("oak_analyzer");
+                    {
+                        settingsBuilder.field("type", "custom");
+                        settingsBuilder.field("tokenizer", "standard");
+                        settingsBuilder.field("filter", new 
String[]{"lowercase", "oak_word_delimiter_graph_filter"});
+                    }
+                    settingsBuilder.endObject();
+                    // 
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html
+                    settingsBuilder.startObject("ancestor_analyzer");
+                    {
+                        settingsBuilder.field("type", "custom");
+                        settingsBuilder.field("tokenizer", "path_hierarchy");
+                    }
+                    settingsBuilder.endObject();
+                }
+                settingsBuilder.endObject();
+            }
+            settingsBuilder.endObject();
+        }
+        settingsBuilder.endObject();
+        return settingsBuilder;
+    }
+
     private static void mapInternalProperties(XContentBuilder mappingBuilder) 
throws IOException {
         mappingBuilder.startObject(FieldNames.PATH)
                 .field("type", "keyword")
@@ -123,6 +164,7 @@ class ElasticIndexHelper {
                 } else {
                     if (indexDefinition.isAnalyzed(propertyDefinitions)) {
                         mappingBuilder.field("type", "text");
+                        mappingBuilder.field("analyzer", "oak_analyzer");
                         // always add keyword for sorting / faceting as 
sub-field
                         mappingBuilder.startObject("fields");
                         {

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java?rev=1878887&r1=1878886&r2=1878887&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
 Tue Jun 16 13:01:50 2020
@@ -165,8 +165,9 @@ public abstract class ElasticAbstractQue
         return oak.createContentRepository();
     }
 
-    protected static void assertEventually(Runnable r) {
-        ElasticTestUtils.assertEventually(r, BULK_FLUSH_INTERVAL_MS_DEFAULT * 
5);
+    protected void assertEventually(Runnable r) {
+        ElasticTestUtils.assertEventually(r,
+                ((useAsyncIndexing() ? DEFAULT_ASYNC_INDEXING_TIME_IN_SECONDS 
: 0) + BULK_FLUSH_INTERVAL_MS_DEFAULT) * 5);
     }
 
     protected IndexDefinitionBuilder createIndex(String... propNames) {

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAsyncTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAsyncTest.java?rev=1878887&r1=1878886&r2=1878887&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAsyncTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAsyncTest.java
 Tue Jun 16 13:01:50 2020
@@ -21,6 +21,8 @@ import org.apache.jackrabbit.oak.plugins
 import org.junit.Test;
 
 import java.util.Arrays;
+import java.util.Collections;
+import java.util.UUID;
 
 import static org.hamcrest.CoreMatchers.containsString;
 import static org.hamcrest.MatcherAssert.assertThat;
@@ -49,12 +51,6 @@ public class ElasticFullTextAsyncTest ex
         test.addChild("c").setProperty("propa", "Hello everyone. This is an 
elastic test");
         test.addChild("d").setProperty("propa", "howdy! hello again");
         root.commit();
-               // Wait for DEFAULT_ASYNC_INDEXING_TIME_IN_SECONDS
-               // This is needed in addition to assertEventually to make the 
-               // test reliable, otherwise they seem to fail sometimes even
-               // with assertEventually wait in place, due to minor delay in 
async 
-               // cycle exec.
-        Thread.sleep(DEFAULT_ASYNC_INDEXING_TIME_IN_SECONDS * 1000);
                
         String query = "//*[jcr:contains(@propa, 'Hello')] ";
 
@@ -64,4 +60,25 @@ public class ElasticFullTextAsyncTest ex
         });
     }
 
+    @Test
+    public void testDefaultAnalyzer() throws Exception {
+        IndexDefinitionBuilder builder = createIndex("analyzed_field");
+        builder.async("async");
+        builder.indexRule("nt:base").property("analyzed_field").analyzed();
+
+        setIndex(UUID.randomUUID().toString(), builder);
+        root.commit();
+
+        //add content
+        Tree test = root.getTree("/").addChild("test");
+
+        test.addChild("a").setProperty("analyzed_field", "sun.jpg");
+        root.commit();
+
+        assertEventually(() -> {
+            assertQuery("//*[jcr:contains(@analyzed_field, 'Sun')] ", XPATH, 
Collections.singletonList("/test/a"));
+            assertQuery("//*[jcr:contains(@analyzed_field, 'jpg')] ", XPATH, 
Collections.singletonList("/test/a"));
+        });
+    }
+
 }

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java?rev=1878887&r1=1878886&r2=1878887&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
 Tue Jun 16 13:01:50 2020
@@ -17,6 +17,7 @@
 package org.apache.jackrabbit.oak.plugins.index.elastic.index;
 
 import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.jackrabbit.oak.api.Tree;
 import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
 import 
org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexDefinitionBuilder;
 import 
org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
@@ -28,6 +29,7 @@ import java.io.IOException;
 import java.util.Map;
 
 import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.CoreMatchers.nullValue;
 import static org.hamcrest.MatcherAssert.assertThat;
 
 public class ElasticIndexHelperTest {
@@ -70,4 +72,46 @@ public class ElasticIndexHelperTest {
         ElasticIndexHelper.createIndexRequest(definition);
     }
 
+    @Test
+    public void oakAnalyzer() throws IOException {
+        IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
+        IndexDefinitionBuilder.IndexRule indexRule = builder.indexRule("type");
+        indexRule.property("foo").type("String").analyzed();
+        indexRule.property("bar").type("String");
+
+        NodeState nodeState = builder.build();
+
+        ElasticIndexDefinition definition =
+                new ElasticIndexDefinition(nodeState, nodeState, "path", 
"prefix");
+
+        CreateIndexRequest request = 
ElasticIndexHelper.createIndexRequest(definition);
+
+        
assertThat(request.settings().get("analysis.filter.oak_word_delimiter_graph_filter.preserve_original"),
 is("false"));
+
+        ObjectMapper mapper = new ObjectMapper();
+        Map<String, Object> jsonMappings = 
mapper.readValue(request.mappings().streamInput(), Map.class);
+        Map fooMapping = (Map) ((Map) 
jsonMappings.get("properties")).get("foo");
+        assertThat(fooMapping.get("analyzer"), is("oak_analyzer"));
+        Map barMapping = (Map) ((Map) 
jsonMappings.get("properties")).get("bar");
+        assertThat(barMapping.get("analyzer"), nullValue());
+    }
+
+    @Test
+    public void oakAnalyzerWithOriginalTerm() throws IOException {
+        IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
+        IndexDefinitionBuilder.IndexRule indexRule = builder.indexRule("type");
+        indexRule.property("foo").type("String").analyzed();
+        Tree analyzer = builder.getBuilderTree().addChild("analyzers");
+        analyzer.setProperty("indexOriginalTerm", "true");
+
+        NodeState nodeState = builder.build();
+
+        ElasticIndexDefinition definition =
+                new ElasticIndexDefinition(nodeState, nodeState, "path", 
"prefix");
+
+        CreateIndexRequest request = 
ElasticIndexHelper.createIndexRequest(definition);
+
+        
assertThat(request.settings().get("analysis.filter.oak_word_delimiter_graph_filter.preserve_original"),
 is("true"));
+    }
+
 }


Reply via email to