Author: fortino
Date: Tue Jun 16 13:01:50 2020
New Revision: 1878887
URL: http://svn.apache.org/viewvc?rev=1878887&view=rev
Log:
OAK-9111: oak default analyzer for elastic
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAsyncTest.java
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java?rev=1878887&r1=1878886&r2=1878887&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/OakAnalyzer.java
Tue Jun 16 13:01:50 2020
@@ -51,7 +51,7 @@ public class OakAnalyzer extends Analyze
* @param matchVersion Lucene version to match See {@link #matchVersion
above}
* @param indexOriginalTerm flag to setup analyzer such that
* {@link
WordDelimiterFilter#PRESERVE_ORIGINAL}
- * is set to oonfigure word delimeter
+ * is set to configure word delimiter
*/
public OakAnalyzer(Version matchVersion, boolean indexOriginalTerm) {
this.matchVersion = matchVersion;
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java?rev=1878887&r1=1878886&r2=1878887&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
Tue Jun 16 13:01:50 2020
@@ -52,6 +52,16 @@ public class ElasticIndexDefinition exte
public static final String BULK_RETRIES_BACKOFF = "bulkRetriesBackoff";
public static final long BULK_RETRIES_BACKOFF_DEFAULT = 200;
+ /**
+ * Node name under which various analyzers are configured
+ */
+ private static final String ANALYZERS = "analyzers";
+
+ /**
+ * Boolean property indicating if in-built analyzer should preserve
original term
+ */
+ private static final String INDEX_ORIGINAL_TERM = "indexOriginalTerm";
+
private static final Function<Integer, Boolean> isAnalyzable;
static {
@@ -143,6 +153,14 @@ public class ElasticIndexDefinition exte
}
/**
+ * Returns {@code true} if original terms need to be preserved at indexing
analysis phase
+ */
+ public boolean indexOriginalTerms() {
+ NodeState analyzersTree = definition.getChildNode(ANALYZERS);
+ return getOptionalValue(analyzersTree, INDEX_ORIGINAL_TERM, false);
+ }
+
+ /**
* Class to help with {@link ElasticIndexDefinition} creation.
* The built object represents the index definition only without the node
structure.
*/
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java?rev=1878887&r1=1878886&r2=1878887&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
Tue Jun 16 13:01:50 2020
@@ -21,7 +21,6 @@ import org.apache.jackrabbit.oak.plugins
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
import org.elasticsearch.client.indices.CreateIndexRequest;
-import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
@@ -39,10 +38,7 @@ class ElasticIndexHelper {
final CreateIndexRequest request = new
CreateIndexRequest(indexDefinition.getRemoteIndexName());
// provision settings
- //
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html
- request.settings(Settings.builder()
- .put("analysis.analyzer.ancestor_analyzer.type", "custom")
- .put("analysis.analyzer.ancestor_analyzer.tokenizer",
"path_hierarchy"));
+ request.settings(loadSettings(indexDefinition));
// provision mappings
final XContentBuilder mappingBuilder = XContentFactory.jsonBuilder();
@@ -61,6 +57,51 @@ class ElasticIndexHelper {
return request;
}
+ private static XContentBuilder loadSettings(ElasticIndexDefinition
indexDefinition) throws IOException {
+ final XContentBuilder settingsBuilder = XContentFactory.jsonBuilder();
+ settingsBuilder.startObject();
+ {
+ settingsBuilder.startObject("analysis");
+ {
+ settingsBuilder.startObject("filter");
+ {
+
settingsBuilder.startObject("oak_word_delimiter_graph_filter");
+ {
+ settingsBuilder.field("type", "word_delimiter_graph");
+ settingsBuilder.field("generate_word_parts", true);
+ settingsBuilder.field("stem_english_possessive", true);
+ settingsBuilder.field("generate_number_parts", true);
+ settingsBuilder.field("preserve_original",
indexDefinition.indexOriginalTerms());
+ }
+ settingsBuilder.endObject();
+ }
+ settingsBuilder.endObject();
+
+ settingsBuilder.startObject("analyzer");
+ {
+ settingsBuilder.startObject("oak_analyzer");
+ {
+ settingsBuilder.field("type", "custom");
+ settingsBuilder.field("tokenizer", "standard");
+ settingsBuilder.field("filter", new
String[]{"lowercase", "oak_word_delimiter_graph_filter"});
+ }
+ settingsBuilder.endObject();
+ //
https://www.elastic.co/guide/en/elasticsearch/reference/current/analysis-pathhierarchy-tokenizer.html
+ settingsBuilder.startObject("ancestor_analyzer");
+ {
+ settingsBuilder.field("type", "custom");
+ settingsBuilder.field("tokenizer", "path_hierarchy");
+ }
+ settingsBuilder.endObject();
+ }
+ settingsBuilder.endObject();
+ }
+ settingsBuilder.endObject();
+ }
+ settingsBuilder.endObject();
+ return settingsBuilder;
+ }
+
private static void mapInternalProperties(XContentBuilder mappingBuilder)
throws IOException {
mappingBuilder.startObject(FieldNames.PATH)
.field("type", "keyword")
@@ -123,6 +164,7 @@ class ElasticIndexHelper {
} else {
if (indexDefinition.isAnalyzed(propertyDefinitions)) {
mappingBuilder.field("type", "text");
+ mappingBuilder.field("analyzer", "oak_analyzer");
// always add keyword for sorting / faceting as
sub-field
mappingBuilder.startObject("fields");
{
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java?rev=1878887&r1=1878886&r2=1878887&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticAbstractQueryTest.java
Tue Jun 16 13:01:50 2020
@@ -165,8 +165,9 @@ public abstract class ElasticAbstractQue
return oak.createContentRepository();
}
- protected static void assertEventually(Runnable r) {
- ElasticTestUtils.assertEventually(r, BULK_FLUSH_INTERVAL_MS_DEFAULT *
5);
+ protected void assertEventually(Runnable r) {
+ ElasticTestUtils.assertEventually(r,
+ ((useAsyncIndexing() ? DEFAULT_ASYNC_INDEXING_TIME_IN_SECONDS
: 0) + BULK_FLUSH_INTERVAL_MS_DEFAULT) * 5);
}
protected IndexDefinitionBuilder createIndex(String... propNames) {
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAsyncTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAsyncTest.java?rev=1878887&r1=1878886&r2=1878887&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAsyncTest.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticFullTextAsyncTest.java
Tue Jun 16 13:01:50 2020
@@ -21,6 +21,8 @@ import org.apache.jackrabbit.oak.plugins
import org.junit.Test;
import java.util.Arrays;
+import java.util.Collections;
+import java.util.UUID;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.MatcherAssert.assertThat;
@@ -49,12 +51,6 @@ public class ElasticFullTextAsyncTest ex
test.addChild("c").setProperty("propa", "Hello everyone. This is an
elastic test");
test.addChild("d").setProperty("propa", "howdy! hello again");
root.commit();
- // Wait for DEFAULT_ASYNC_INDEXING_TIME_IN_SECONDS
- // This is needed in addition to assertEventually to make the
- // test reliable, otherwise they seem to fail sometimes even
- // with assertEventually wait in place, due to minor delay in
async
- // cycle exec.
- Thread.sleep(DEFAULT_ASYNC_INDEXING_TIME_IN_SECONDS * 1000);
String query = "//*[jcr:contains(@propa, 'Hello')] ";
@@ -64,4 +60,25 @@ public class ElasticFullTextAsyncTest ex
});
}
+ @Test
+ public void testDefaultAnalyzer() throws Exception {
+ IndexDefinitionBuilder builder = createIndex("analyzed_field");
+ builder.async("async");
+ builder.indexRule("nt:base").property("analyzed_field").analyzed();
+
+ setIndex(UUID.randomUUID().toString(), builder);
+ root.commit();
+
+ //add content
+ Tree test = root.getTree("/").addChild("test");
+
+ test.addChild("a").setProperty("analyzed_field", "sun.jpg");
+ root.commit();
+
+ assertEventually(() -> {
+ assertQuery("//*[jcr:contains(@analyzed_field, 'Sun')] ", XPATH,
Collections.singletonList("/test/a"));
+ assertQuery("//*[jcr:contains(@analyzed_field, 'jpg')] ", XPATH,
Collections.singletonList("/test/a"));
+ });
+ }
+
}
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java?rev=1878887&r1=1878886&r2=1878887&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelperTest.java
Tue Jun 16 13:01:50 2020
@@ -17,6 +17,7 @@
package org.apache.jackrabbit.oak.plugins.index.elastic.index;
import com.fasterxml.jackson.databind.ObjectMapper;
+import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
import
org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexDefinitionBuilder;
import
org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
@@ -28,6 +29,7 @@ import java.io.IOException;
import java.util.Map;
import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.CoreMatchers.nullValue;
import static org.hamcrest.MatcherAssert.assertThat;
public class ElasticIndexHelperTest {
@@ -70,4 +72,46 @@ public class ElasticIndexHelperTest {
ElasticIndexHelper.createIndexRequest(definition);
}
+ @Test
+ public void oakAnalyzer() throws IOException {
+ IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
+ IndexDefinitionBuilder.IndexRule indexRule = builder.indexRule("type");
+ indexRule.property("foo").type("String").analyzed();
+ indexRule.property("bar").type("String");
+
+ NodeState nodeState = builder.build();
+
+ ElasticIndexDefinition definition =
+ new ElasticIndexDefinition(nodeState, nodeState, "path",
"prefix");
+
+ CreateIndexRequest request =
ElasticIndexHelper.createIndexRequest(definition);
+
+
assertThat(request.settings().get("analysis.filter.oak_word_delimiter_graph_filter.preserve_original"),
is("false"));
+
+ ObjectMapper mapper = new ObjectMapper();
+ Map<String, Object> jsonMappings =
mapper.readValue(request.mappings().streamInput(), Map.class);
+ Map fooMapping = (Map) ((Map)
jsonMappings.get("properties")).get("foo");
+ assertThat(fooMapping.get("analyzer"), is("oak_analyzer"));
+ Map barMapping = (Map) ((Map)
jsonMappings.get("properties")).get("bar");
+ assertThat(barMapping.get("analyzer"), nullValue());
+ }
+
+ @Test
+ public void oakAnalyzerWithOriginalTerm() throws IOException {
+ IndexDefinitionBuilder builder = new ElasticIndexDefinitionBuilder();
+ IndexDefinitionBuilder.IndexRule indexRule = builder.indexRule("type");
+ indexRule.property("foo").type("String").analyzed();
+ Tree analyzer = builder.getBuilderTree().addChild("analyzers");
+ analyzer.setProperty("indexOriginalTerm", "true");
+
+ NodeState nodeState = builder.build();
+
+ ElasticIndexDefinition definition =
+ new ElasticIndexDefinition(nodeState, nodeState, "path",
"prefix");
+
+ CreateIndexRequest request =
ElasticIndexHelper.createIndexRequest(definition);
+
+
assertThat(request.settings().get("analysis.filter.oak_word_delimiter_graph_filter.preserve_original"),
is("true"));
+ }
+
}