Author: mkataria
Date: Thu Jun 18 06:56:36 2020
New Revision: 1878954
URL: http://svn.apache.org/viewvc?rev=1878954&view=rev
Log:
OAK-9106: Support spellchecking in Oak ES
Added:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticProcess.java
(with props)
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticQueryProcess.java
(with props)
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSpellcheckProcess.java
(with props)
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticQueryUtil.java
(with props)
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSpellcheckTest.java
(with props)
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticIndex.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticResultRowIterator.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSearcher.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSearcherModel.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticConstants.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/SearchSourceBuilderUtil.java
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java?rev=1878954&r1=1878953&r2=1878954&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexDefinition.java
Thu Jun 18 06:56:36 2020
@@ -152,6 +152,15 @@ public class ElasticIndexDefinition exte
return propertyDefinitions.stream().anyMatch(pd -> pd.analyzed ||
pd.fulltextEnabled());
}
+ @Override
+ protected String getDefaultFunctionName() {
+ /*
+ This has nothing to do with lucene index. While parsing queries,
spellCheck queries are handled
+ via PropertyRestriction having native*lucene as key.
+ */
+ return "lucene";
+ }
+
/**
* Returns {@code true} if original terms need to be preserved at indexing
analysis phase
*/
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java?rev=1878954&r1=1878953&r2=1878954&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
Thu Jun 18 06:56:36 2020
@@ -98,6 +98,9 @@ class ElasticIndexHelper {
}
settingsBuilder.endObject();
}
+ if (indexDefinition.isSpellcheckEnabled()) {
+ createSpellcheckMapping(indexDefinition, settingsBuilder);
+ }
settingsBuilder.endObject();
return settingsBuilder;
}
@@ -171,6 +174,11 @@ class ElasticIndexHelper {
mappingBuilder.startObject("keyword")
.field("type", "keyword")
.endObject();
+ if (indexDefinition.isSpellcheckEnabled()) {
+ mappingBuilder.startObject("trigram")
+ .field("type",
"text").field("analyzer", "trigram")
+ .endObject();
+ }
}
mappingBuilder.endObject();
} else {
@@ -182,4 +190,34 @@ class ElasticIndexHelper {
mappingBuilder.endObject();
}
}
+
+ private static void createSpellcheckMapping(ElasticIndexDefinition
indexDefinition, XContentBuilder settingsBuilder) throws IOException {
+ settingsBuilder.startObject("index");
+ {
+ settingsBuilder.startObject("analysis");
+ {
+ settingsBuilder.startObject("analyzer");
+ {
+ settingsBuilder.startObject("trigram")
+ .field("type", "custom")
+ .field("tokenizer", "standard")
+ .array("filter", "lowercase", "shingle")
+ .endObject();
+ }
+ settingsBuilder.endObject();
+
+ settingsBuilder.startObject("filter");
+ {
+ settingsBuilder.startObject("shingle")
+ .field("type", "shingle")
+ .field("min_shingle_size", 2)
+ .field("max_shingle_size", 3)
+ .endObject();
+ }
+ settingsBuilder.endObject();
+ }
+ settingsBuilder.endObject();
+ }
+ settingsBuilder.endObject();
+ }
}
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticIndex.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticIndex.java?rev=1878954&r1=1878953&r2=1878954&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticIndex.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticIndex.java
Thu Jun 18 06:56:36 2020
@@ -28,6 +28,7 @@ import org.apache.jackrabbit.oak.spi.que
import org.apache.jackrabbit.oak.spi.query.Filter;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.elasticsearch.common.Strings;
+import org.elasticsearch.index.query.QueryBuilder;
import org.jetbrains.annotations.NotNull;
import java.util.Iterator;
Added:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticProcess.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticProcess.java?rev=1878954&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticProcess.java
(added)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticProcess.java
Thu Jun 18 06:56:36 2020
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elastic.query;
+
+import org.elasticsearch.search.SearchHit;
+
+import java.io.IOException;
+
+/**
+ * ElasticProcess extracts out elastic implementation which was part of
ElasticResultRowIterator
+ * ElasticRowIteratorState is used to manage and return result from
ElasticResultRowIterator
+ */
+interface ElasticProcess {
+ /**
+ * @return Last SearchDocument
+ * @throws IOException
+ */
+ SearchHit process() throws IOException;
+
+ /**
+ * @return query string
+ */
+ String getQuery();
+}
Propchange:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticProcess.java
------------------------------------------------------------------------------
svn:eol-style = native
Added:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticQueryProcess.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticQueryProcess.java?rev=1878954&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticQueryProcess.java
(added)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticQueryProcess.java
Thu Jun 18 06:56:36 2020
@@ -0,0 +1,157 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elastic.query;
+
+import org.apache.jackrabbit.oak.commons.PerfLogger;
+import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
+import
org.apache.jackrabbit.oak.plugins.index.elastic.query.facets.ElasticAggregationData;
+import
org.apache.jackrabbit.oak.plugins.index.elastic.query.facets.ElasticFacetHelper;
+import
org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticAggregationBuilderUtil;
+import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticConstants;
+import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
+import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.common.Strings;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.search.SearchHit;
+import
org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+class ElasticQueryProcess implements ElasticProcess {
+
+ private static final Logger LOG = LoggerFactory
+ .getLogger(ElasticQueryProcess.class);
+ private static final PerfLogger PERF_LOGGER =
+ new
PerfLogger(LoggerFactory.getLogger(ElasticQueryProcess.class.getName() +
".perf"));
+
+ private final QueryBuilder queryBuilder;
+ private int nextBatchSize = ElasticConstants.ELASTIC_QUERY_BATCH_SIZE;
+ private final ElasticResultRowIterator.ElasticRowIteratorState
elasticRowIteratorState;
+ private final ElasticResultRowIterator.ElasticFacetProvider
elasticsearchFacetProvider;
+
+
+ ElasticQueryProcess(QueryBuilder queryBuilder,
ElasticResultRowIterator.ElasticRowIteratorState elasticRowIteratorState,
+ ElasticResultRowIterator.ElasticFacetProvider
elasticFacetProvider) {
+ this.queryBuilder = queryBuilder;
+ this.elasticRowIteratorState = elasticRowIteratorState;
+ this.elasticsearchFacetProvider = elasticFacetProvider;
+ }
+
+ @Override
+ public SearchHit process() throws IOException {
+
+ ElasticIndexDefinition indexDefinition =
elasticRowIteratorState.getIndexNode().getDefinition();
+ ElasticSearcher searcher = new
ElasticSearcher(elasticRowIteratorState.getIndexNode());
+ int numberOfFacets =
elasticRowIteratorState.getIndexNode().getDefinition().getNumberOfTopFacets();
+ List<TermsAggregationBuilder> aggregationBuilders =
ElasticAggregationBuilderUtil
+ .getAggregators(elasticRowIteratorState.getPlan(),
indexDefinition, numberOfFacets);
+
+ ElasticSearcherModel elasticSearcherModel = new
ElasticSearcherModel.ElasticSearcherModelBuilder()
+ .withQuery(this.queryBuilder)
+ .withBatchSize(nextBatchSize)
+ .withAggregation(aggregationBuilders)
+ .build();
+
+ // TODO: custom scoring
+
+ SearchResponse docs;
+ SearchHit lastDocToRecord = null;
+ long start = PERF_LOGGER.start();
+
+ while (true) {
+ LOG.debug("loading {} entries for query {}", nextBatchSize,
this.queryBuilder);
+ docs = searcher.search(elasticSearcherModel);
+ long totalHits = docs.getHits().getTotalHits().value;
+ ElasticAggregationData elasticAggregationData =
+ new ElasticAggregationData(numberOfFacets, totalHits,
docs.getAggregations());
+
+ SearchHit[] searchHits = docs.getHits().getHits();
+ PERF_LOGGER.end(start, -1, "{} ...", searchHits.length);
+
+
elasticRowIteratorState.updateEstimator(docs.getHits().getTotalHits().value);
+ if (searchHits.length < nextBatchSize) {
+ elasticRowIteratorState.setLastDoc(true);
+ }
+ nextBatchSize = (int) Math.min(nextBatchSize * 2L,
ElasticConstants.ELASTIC_QUERY_MAX_BATCH_SIZE);
+ if (aggregationBuilders.size() > 0 &&
!elasticsearchFacetProvider.isInitiliazed()) {
+
elasticsearchFacetProvider.initialize(ElasticFacetHelper.getAggregates(searcher,
queryBuilder,
+ elasticRowIteratorState.getIndexNode(),
elasticRowIteratorState.getPlan(), elasticAggregationData));
+ }
+
+ // TODO: excerpt
+
+ // TODO: explanation
+
+ // TODO: sim search
+
+ for (SearchHit doc : searchHits) {
+ // TODO : excerpts
+
+ FulltextIndex.FulltextResultRow row = convertToRow(doc,
elasticsearchFacetProvider);
+ if (row != null) {
+ elasticRowIteratorState.addResultRow(row);
+ }
+ lastDocToRecord = doc;
+ }
+
+ if (elasticRowIteratorState.isEmpty() && searchHits.length > 0) {
+ //queue is still empty but more results can be fetched
+ //from Lucene so still continue
+ elasticRowIteratorState.lastIteratedDoc = lastDocToRecord;
+ } else {
+ break;
+ }
+ }
+ return lastDocToRecord;
+ }
+
+ @Override
+ public String getQuery() {
+ return Strings.toString((queryBuilder));
+ }
+
+ private FulltextIndex.FulltextResultRow convertToRow(SearchHit hit,
ElasticResultRowIterator.ElasticFacetProvider elasticsearchFacetProvider) {
+ final Map<String, Object> sourceMap = hit.getSourceAsMap();
+ String path = (String) sourceMap.get(FieldNames.PATH);
+ if (path != null) {
+ if ("".equals(path)) {
+ path = "/";
+ }
+ String originalPath = path;
+ path = elasticRowIteratorState.getPlanResult().transformPath(path);
+
+ if (path == null) {
+ LOG.trace("Ignoring path {} : Transformation returned null",
originalPath);
+ return null;
+ }
+
+ boolean shouldIncludeForHierarchy =
elasticRowIteratorState.getRowInclusionPredicate()
+ .shouldInclude(path, elasticRowIteratorState.getPlan());
+ LOG.trace("Matched path {}; shouldIncludeForHierarchy: {}", path,
shouldIncludeForHierarchy);
+ return shouldIncludeForHierarchy ? new
FulltextIndex.FulltextResultRow(path, hit.getScore(), null,
+ elasticsearchFacetProvider, null)
+ : null;
+ }
+ return null;
+ }
+
+}
Propchange:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticQueryProcess.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticResultRowIterator.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticResultRowIterator.java?rev=1878954&r1=1878953&r2=1878954&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticResultRowIterator.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticResultRowIterator.java
Thu Jun 18 06:56:36 2020
@@ -18,13 +18,11 @@ package org.apache.jackrabbit.oak.plugin
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
-import org.apache.jackrabbit.oak.commons.PerfLogger;
import org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition;
import
org.apache.jackrabbit.oak.plugins.index.elastic.query.facets.ElasticAggregationData;
import
org.apache.jackrabbit.oak.plugins.index.elastic.query.facets.ElasticFacetHelper;
import
org.apache.jackrabbit.oak.plugins.index.elastic.query.facets.ElasticFacets;
-import
org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticAggregationBuilderUtil;
-import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticConstants;
+import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticQueryUtil;
import org.apache.jackrabbit.oak.plugins.index.search.FieldNames;
import org.apache.jackrabbit.oak.plugins.index.search.IndexDefinition;
import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
@@ -42,12 +40,10 @@ import org.apache.jackrabbit.oak.spi.que
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextOr;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextTerm;
import org.apache.jackrabbit.oak.spi.query.fulltext.FullTextVisitor;
-import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
-import
org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
@@ -57,21 +53,19 @@ import javax.jcr.PropertyType;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.Deque;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
+import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiPredicate;
import java.util.stream.StreamSupport;
import static org.apache.jackrabbit.JcrConstants.JCR_MIXINTYPES;
import static org.apache.jackrabbit.JcrConstants.JCR_PRIMARYTYPE;
-import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot;
-import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
-import static
org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newAncestorQuery;
-import static
org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newDepthQuery;
import static
org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newMixinTypeQuery;
import static
org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newNodeTypeQuery;
import static
org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newNotNullPropQuery;
@@ -92,26 +86,12 @@ import static org.elasticsearch.index.qu
class ElasticResultRowIterator implements
Iterator<FulltextIndex.FulltextResultRow> {
private static final Logger LOG = LoggerFactory
.getLogger(ElasticResultRowIterator.class);
- private static final PerfLogger PERF_LOGGER =
- new
PerfLogger(LoggerFactory.getLogger(ElasticResultRowIterator.class.getName() +
".perf"));
+ private final static String SPELLCHECK_PREFIX = "spellcheck?term=";
// TODO: oak-lucene gets this via WildcardQuery class. See if ES also
exposes these consts
private static final char WILDCARD_STRING = '*';
private static final char WILDCARD_CHAR = '?';
-
- private final Deque<FulltextIndex.FulltextResultRow> queue = new
ArrayDeque<>();
- // TODO : find if ES can return dup docs - if so how to avoid
-// private final Set<String> seenPaths = Sets.newHashSet();
- private SearchHit lastDoc;
- private int nextBatchSize = ElasticConstants.ELASTIC_QUERY_BATCH_SIZE;
- private boolean noDocs = false;
-
- private final Filter filter;
- private final PlanResult planResult;
- private final IndexPlan plan;
- private final ElasticIndexNode indexNode;
- private final RowInclusionPredicate rowInclusionPredicate;
- private final LMSEstimator estimator;
+ private final ElasticRowIteratorState rowIteratorState;
ElasticResultRowIterator(@NotNull Filter filter,
@NotNull FulltextIndexPlanner.PlanResult
planResult,
@@ -119,22 +99,18 @@ class ElasticResultRowIterator implement
ElasticIndexNode indexNode,
RowInclusionPredicate rowInclusionPredicate,
LMSEstimator estimator) {
- this.filter = filter;
- this.planResult = planResult;
- this.plan = plan;
- this.indexNode = indexNode;
- this.rowInclusionPredicate = rowInclusionPredicate != null ?
rowInclusionPredicate : RowInclusionPredicate.NOOP;
- this.estimator = estimator;
+ this.rowIteratorState = new ElasticRowIteratorState(filter, planResult,
+ plan, indexNode, rowInclusionPredicate, estimator);
}
@Override
public boolean hasNext() {
- return !queue.isEmpty() || loadDocs();
+ return !rowIteratorState.queue.isEmpty() || loadDocs();
}
@Override
public FulltextIndex.FulltextResultRow next() {
- return queue.remove();
+ return rowIteratorState.queue.remove();
}
/**
@@ -144,121 +120,32 @@ class ElasticResultRowIterator implement
*/
private boolean loadDocs() {
- if (noDocs) {
+ if (rowIteratorState.isLastDoc) {
return false;
}
- if (indexNode == null) {
+ if (rowIteratorState.indexNode == null) {
throw new IllegalStateException("indexNode cannot be null");
}
SearchHit lastDocToRecord = null;
try {
- ElasticSearcher searcher = getCurrentElasticSearcher(indexNode);
- QueryBuilder query = getElasticQuery(plan, planResult);
- ElasticIndexDefinition indexDefinition = indexNode.getDefinition();
- int numberOfFacets = indexDefinition.getNumberOfTopFacets();
- List<TermsAggregationBuilder> aggregationBuilders =
ElasticAggregationBuilderUtil
- .getAggregators(plan, indexDefinition, numberOfFacets);
-
- ElasticSearcherModel elasticSearcherModel = new
ElasticSearcherModel.ElasticSearcherModelBuilder()
- .withQuery(query)
- .withBatchSize(nextBatchSize)
- .withAggregation(aggregationBuilders)
- .build();
-
- // TODO: custom scoring
-
- SearchResponse docs;
- long start = PERF_LOGGER.start();
- while (true) {
- LOG.debug("loading {} entries for query {}", nextBatchSize,
query);
- docs = searcher.search(elasticSearcherModel);
- long totalHits = docs.getHits().getTotalHits().value;
- ElasticAggregationData elasticAggregationData =
- new ElasticAggregationData(numberOfFacets, totalHits,
docs.getAggregations());
-
- SearchHit[] searchHits = docs.getHits().getHits();
- PERF_LOGGER.end(start, -1, "{} ...", searchHits.length);
-
- estimator.update(filter, docs.getHits().getTotalHits().value);
-
- if (searchHits.length < nextBatchSize) {
- noDocs = true;
- }
-
- nextBatchSize = (int) Math.min(nextBatchSize * 2L,
ElasticConstants.ELASTIC_QUERY_MAX_BATCH_SIZE);
-
- ElasticsearchFacetProvider elasticsearchFacetProvider = new
ElasticsearchFacetProvider(ElasticFacetHelper.getAggregates(searcher, query,
indexNode, plan, elasticAggregationData));
+ ElasticProcess elasticProcess =
getElasticProcess(rowIteratorState.plan, rowIteratorState.planResult);
- // TODO: excerpt
-
- // TODO: explanation
-
- // TODO: sim search
-
- for (SearchHit doc : searchHits) {
- // TODO : excerpts
-
- FulltextIndex.FulltextResultRow row = convertToRow(doc,
elasticsearchFacetProvider);
- if (row != null) {
- queue.add(row);
- }
- lastDocToRecord = doc;
- }
+ lastDocToRecord = elasticProcess.process();
- if (queue.isEmpty() && searchHits.length > 0) {
- //queue is still empty but more results can be fetched
- //from Lucene so still continue
- lastDoc = lastDocToRecord;
- } else {
- break;
- }
- }
-
- // TODO: spellcheck else if
(luceneRequestFacade.getLuceneRequest() instanceof
SpellcheckHelper.SpellcheckQuery) {
// TODO: suggest } else if (luceneRequestFacade.getLuceneRequest()
instanceof SuggestHelper.SuggestQuery) {
} catch (Exception e) {
LOG.warn("query via {} failed.", this, e);
} finally {
- indexNode.release();
+ rowIteratorState.indexNode.release();
}
if (lastDocToRecord != null) {
- this.lastDoc = lastDocToRecord;
+ this.rowIteratorState.lastIteratedDoc = lastDocToRecord;
}
- return !queue.isEmpty();
- }
-
- private ElasticSearcher getCurrentElasticSearcher(ElasticIndexNode
indexNode) {
- return new ElasticSearcher(indexNode);
- }
-
- private FulltextIndex.FulltextResultRow convertToRow(SearchHit hit,
ElasticsearchFacetProvider elasticsearchFacetProvider) {
- final Map<String, Object> sourceMap = hit.getSourceAsMap();
- String path = (String) sourceMap.get(FieldNames.PATH);
- if (path != null) {
- if ("".equals(path)) {
- path = "/";
- }
- if (planResult.isPathTransformed()) {
- String originalPath = path;
- path = planResult.transformPath(path);
-
- if (path == null) {
- LOG.trace("Ignoring path {} : Transformation returned
null", originalPath);
- return null;
- }
- }
-
- boolean shouldIncludeForHierarchy =
rowInclusionPredicate.shouldInclude(path, this.plan);
- LOG.trace("Matched path {}; shouldIncludeForHierarchy: {}", path,
shouldIncludeForHierarchy);
- return shouldIncludeForHierarchy ? new
FulltextIndex.FulltextResultRow(path, hit.getScore(), null,
- elasticsearchFacetProvider, null)
- : null;
- }
- return null;
+ return !rowIteratorState.queue.isEmpty();
}
public interface RowInclusionPredicate {
@@ -274,7 +161,7 @@ class ElasticResultRowIterator implement
* @param planResult
* @return the Lucene query
*/
- public QueryBuilder getElasticQuery(IndexPlan plan, PlanResult planResult)
{
+ private ElasticProcess getElasticProcess(IndexPlan plan, PlanResult
planResult) {
List<QueryBuilder> qs = new ArrayList<>();
Filter filter = plan.getFilter();
FullTextExpression ft = filter.getFullTextConstraint();
@@ -300,6 +187,9 @@ class ElasticResultRowIterator implement
// TODO: more like this
// TODO: spellcheck
+ if (query.startsWith(SPELLCHECK_PREFIX)) {
+ return new ElasticSpellcheckProcess(query, rowIteratorState);
+ }
// TODO: suggest
@@ -317,12 +207,14 @@ class ElasticResultRowIterator implement
//For purely nodeType based queries all the documents would have to
//be returned (if the index definition has a single rule)
if (planResult.evaluateNodeTypeRestriction()) {
- return matchAllQuery();
+ return new ElasticQueryProcess(matchAllQuery(),
rowIteratorState, new ElasticFacetProvider());
}
throw new IllegalStateException("No query created for filter " +
filter);
}
- return performAdditionalWraps(qs);
+ ElasticProcess elasticProcess = new
ElasticQueryProcess(ElasticQueryUtil.performAdditionalWraps(qs),
+ rowIteratorState, new ElasticFacetProvider());
+ return elasticProcess;
}
private static QueryBuilder getFullTextQuery(FullTextExpression ft, final
PlanResult pr) {
@@ -437,118 +329,14 @@ class ElasticResultRowIterator implement
return p;
}
- /**
- * Perform additional wraps on the list of queries to allow, for example,
the NOT CONTAINS to
- * play properly when sent to lucene.
- *
- * @param qs the list of queries. Cannot be null.
- * @return the request facade
- */
- @NotNull
- private static QueryBuilder performAdditionalWraps(@NotNull
List<QueryBuilder> qs) {
- if (qs.size() == 1) {
- // we don't need to worry about all-negatives in a bool query as
- // BoolQueryBuilder.adjustPureNegative is on by default anyway
- return qs.get(0);
- }
- BoolQueryBuilder bq = new BoolQueryBuilder();
- // TODO: while I've attempted to translate oak-lucene code to
corresponding ES one but I am
- // unable to make sense of this code
- for (QueryBuilder q : qs) {
- boolean unwrapped = false;
- if (q instanceof BoolQueryBuilder) {
- unwrapped = unwrapMustNot((BoolQueryBuilder) q, bq);
- }
-
- if (!unwrapped) {
- bq.must(q);
- }
- }
- return bq;
- }
-
- /**
- * unwraps any NOT clauses from the provided boolean query into another
boolean query.
- *
- * @param input the query to be analysed for the existence of NOT
clauses. Cannot be null.
- * @param output the query where the unwrapped NOTs will be saved into.
Cannot be null.
- * @return true if there where at least one unwrapped NOT. false otherwise.
- */
- private static boolean unwrapMustNot(@NotNull BoolQueryBuilder input,
@NotNull BoolQueryBuilder output) {
- boolean unwrapped = false;
- for (QueryBuilder mustNot : input.mustNot()) {
- output.mustNot(mustNot);
- unwrapped = true;
- }
- if (unwrapped) {
- // if we have unwrapped "must not" conditions,
- // then we need to unwrap "must" conditions as well
- for (QueryBuilder must : input.must()) {
- output.must(must);
- }
- }
-
- return unwrapped;
- }
-
private void addNonFullTextConstraints(List<QueryBuilder> qs,
IndexPlan plan, PlanResult
planResult) {
- final BiPredicate<Iterable<String>, String> any = (iterable, value) ->
- StreamSupport.stream(iterable.spliterator(),
false).anyMatch(value::equals);
-
Filter filter = plan.getFilter();
if (!filter.matchesAllTypes()) {
addNodeTypeConstraints(planResult.indexingRule, qs, filter);
}
- String path = FulltextIndex.getPathRestriction(plan);
- switch (filter.getPathRestriction()) {
- case ALL_CHILDREN:
- if (!"/".equals(path)) {
- qs.add(newAncestorQuery(path));
- }
- break;
- case DIRECT_CHILDREN:
- BoolQueryBuilder bq = boolQuery();
- bq.must(newAncestorQuery(path));
- bq.must(newDepthQuery(path, planResult));
- qs.add(bq);
- break;
- case EXACT:
- // For transformed paths, we can only add path restriction if
absolute path to property can be
- // deduced
- if (planResult.isPathTransformed()) {
- String parentPathSegment =
planResult.getParentPathSegment();
- if (!any.test(PathUtils.elements(parentPathSegment), "*"))
{
- qs.add(newPathQuery(path + parentPathSegment));
- }
- } else {
- qs.add(newPathQuery(path));
- }
- break;
- case PARENT:
- if (denotesRoot(path)) {
- // there's no parent of the root node
- // we add a path that can not possibly occur because there
- // is no way to say "match no documents" in Lucene
- qs.add(newPathQuery("///"));
- } else {
- // For transformed paths, we can only add path restriction
if absolute path to property can be
- // deduced
- if (planResult.isPathTransformed()) {
- String parentPathSegment =
planResult.getParentPathSegment();
- if (!any.test(PathUtils.elements(parentPathSegment),
"*")) {
- qs.add(newPathQuery(getParentPath(path) +
parentPathSegment));
- }
- } else {
- qs.add(newPathQuery(getParentPath(path)));
- }
- }
- break;
- case NO_RESTRICTION:
- break;
- }
-
+ qs.addAll(ElasticQueryUtil.getPathRestrictionQuery(plan, planResult,
filter));
for (Filter.PropertyRestriction pr : filter.getPropertyRestrictions())
{
String name = pr.propertyName;
@@ -594,6 +382,7 @@ class ElasticResultRowIterator implement
}
}
+
private static void addNodeTypeConstraints(IndexDefinition.IndexingRule
defn, List<QueryBuilder> qs, Filter filter) {
BoolQueryBuilder bq = boolQuery();
PropertyDefinition primaryType = defn.getConfig(JCR_PRIMARYTYPE);
@@ -667,7 +456,7 @@ class ElasticResultRowIterator implement
@Nullable
private QueryBuilder createQuery(String propertyName,
Filter.PropertyRestriction pr,
- PropertyDefinition defn) {
+ PropertyDefinition defn) {
int propType = FulltextIndex.determinePropertyType(defn, pr);
if (pr.isNullRestriction()) {
@@ -680,7 +469,7 @@ class ElasticResultRowIterator implement
return newNotNullPropQuery(defn.name);
}
- final String field =
indexNode.getDefinition().getElasticKeyword(propertyName);
+ final String field =
rowIteratorState.indexNode.getDefinition().getElasticKeyword(propertyName);
QueryBuilder in;
switch (propType) {
@@ -713,21 +502,98 @@ class ElasticResultRowIterator implement
throw new IllegalStateException("PropertyRestriction not handled " +
pr + " for index " + defn);
}
- class ElasticsearchFacetProvider implements FulltextIndex.FacetProvider {
+ class ElasticFacetProvider implements FulltextIndex.FacetProvider {
private ElasticFacets elasticFacets;
private Map<String, List<FulltextIndex.Facet>> cachedResults = new
HashMap<>();
-
- ElasticsearchFacetProvider(ElasticFacets elasticFacets) {
- this.elasticFacets = elasticFacets;
- }
+ private AtomicBoolean isInitialized = new AtomicBoolean();
@Override
public List<FulltextIndex.Facet> getFacets(int numberOfFacets, String
columnName) throws IOException {
- String facetProp = FulltextIndex.parseFacetField(columnName);
- if (cachedResults.get(facetProp) == null) {
- cachedResults =
elasticFacets.getFacets(indexNode.getDefinition(), numberOfFacets);
+ if (isInitiliazed()) {
+ String facetProp = FulltextIndex.parseFacetField(columnName);
+ if (cachedResults.get(facetProp) == null) {
+ cachedResults =
elasticFacets.getFacets(rowIteratorState.indexNode.getDefinition(),
numberOfFacets);
+ }
+ return cachedResults.get(facetProp);
+ } else {
+ LOG.error("FacetProvider not initialized");
}
- return cachedResults.get(facetProp);
+ return Collections.emptyList();
+ }
+
+ public boolean isInitiliazed() {
+ return isInitialized.get();
+ }
+
+ public void initialize(ElasticFacets elasticFacets) {
+ isInitialized.set(true);
+ this.elasticFacets = elasticFacets;
+ }
+ }
+
+ static class ElasticRowIteratorState {
+
+ private final Deque<FulltextIndex.FulltextResultRow> queue = new
ArrayDeque<>();
+ // TODO : find if ES can return dup docs - if so how to avoid
+ SearchHit lastIteratedDoc;
+ private boolean isLastDoc = false;
+ private final Filter filter;
+ private final FulltextIndexPlanner.PlanResult planResult;
+ private final QueryIndex.IndexPlan plan;
+ private final ElasticIndexNode indexNode;
+ private final ElasticResultRowIterator.RowInclusionPredicate
rowInclusionPredicate;
+ private final LMSEstimator estimator;
+
+ private ElasticRowIteratorState(Filter filter,
FulltextIndexPlanner.PlanResult planResult,
+ QueryIndex.IndexPlan plan,
ElasticIndexNode indexNode,
+
ElasticResultRowIterator.RowInclusionPredicate rowInclusionPredicate,
+ LMSEstimator estimator) {
+ this.filter = filter;
+ this.planResult = planResult;
+ this.plan = plan;
+ this.indexNode = indexNode;
+ this.rowInclusionPredicate = rowInclusionPredicate;
+ this.estimator = estimator;
+ }
+
+ void updateEstimator(long value) {
+ estimator.update(filter, value);
+ }
+
+ void addResultRow(FulltextIndex.FulltextResultRow row) {
+ queue.add(row);
}
+
+ void setLastDoc(boolean lastDoc) {
+ this.isLastDoc = lastDoc;
+ }
+
+ IndexPlan getPlan() {
+ return plan;
+ }
+
+
+ ElasticIndexNode getIndexNode() {
+ return indexNode;
+ }
+
+
+ RowInclusionPredicate getRowInclusionPredicate() {
+ return rowInclusionPredicate;
+ }
+
+ Filter getFilter() {
+ return filter;
+ }
+
+ PlanResult getPlanResult() {
+ return planResult;
+ }
+
+ boolean isEmpty(){
+ return queue.isEmpty();
+ }
+
}
+
}
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSearcher.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSearcher.java?rev=1878954&r1=1878953&r2=1878954&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSearcher.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSearcher.java
Thu Jun 18 06:56:36 2020
@@ -17,6 +17,8 @@
package org.apache.jackrabbit.oak.plugins.index.elastic.query;
import
org.apache.jackrabbit.oak.plugins.index.elastic.util.SearchSourceBuilderUtil;
+import org.elasticsearch.action.search.MultiSearchRequest;
+import org.elasticsearch.action.search.MultiSearchResponse;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.RequestOptions;
@@ -24,6 +26,7 @@ import org.elasticsearch.search.builder.
import org.jetbrains.annotations.NotNull;
import java.io.IOException;
+import java.util.List;
public class ElasticSearcher {
private final ElasticIndexNode indexNode;
@@ -40,4 +43,16 @@ public class ElasticSearcher {
return indexNode.getConnection().getClient().search(request,
RequestOptions.DEFAULT);
}
+
+ public MultiSearchResponse search(List<ElasticSearcherModel>
elasticSearcherModels) throws IOException {
+ MultiSearchRequest ms = new MultiSearchRequest();
+ for (ElasticSearcherModel elasticSearcherModel: elasticSearcherModels)
{
+ SearchSourceBuilder searchSourceBuilder =
SearchSourceBuilderUtil.createSearchSourceBuilder(elasticSearcherModel);
+ SearchRequest request = new
SearchRequest(indexNode.getDefinition().getRemoteIndexAlias())
+ .source(searchSourceBuilder);
+ ms.add(request);
+ }
+ return indexNode.getConnection().getClient().msearch(ms,
RequestOptions.DEFAULT);
+ }
+
}
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSearcherModel.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSearcherModel.java?rev=1878954&r1=1878953&r2=1878954&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSearcherModel.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSearcherModel.java
Thu Jun 18 06:56:36 2020
@@ -20,6 +20,7 @@ import org.apache.jackrabbit.oak.plugins
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import
org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
+import org.elasticsearch.search.suggest.SuggestBuilder;
import java.util.LinkedList;
import java.util.List;
@@ -32,15 +33,17 @@ public class ElasticSearcherModel {
private final boolean fetchSource;
private final String storedField;
private final int from;
+ private SuggestBuilder searchSourceBuilder;
private ElasticSearcherModel(QueryBuilder queryBuilder,
List<AggregationBuilder> aggregationBuilders,
- int batchSize, boolean fetchSource, String
storedField, int from) {
+ int batchSize, boolean fetchSource, String
storedField, int from, SuggestBuilder searchSourceBuilder) {
this.queryBuilder = queryBuilder;
this.aggregationBuilders = aggregationBuilders;
this.batchSize = batchSize;
this.fetchSource = fetchSource;
this.storedField = storedField;
this.from = from;
+ this.searchSourceBuilder = searchSourceBuilder;
}
public int getBatchSize() {
@@ -67,12 +70,17 @@ public class ElasticSearcherModel {
return storedField;
}
+ public SuggestBuilder getSuggestBuilder() {
+ return searchSourceBuilder;
+ }
+
public static class ElasticSearcherModelBuilder {
private QueryBuilder queryBuilder;
private final List<AggregationBuilder> aggregationBuilders = new
LinkedList<>();
private int batchSize;
private boolean fetchSource = false;
private String storedField = FieldNames.PATH;
+ private SuggestBuilder searchSourceBuilder;
private int from;
public ElasticSearcherModelBuilder withQuery(QueryBuilder query) {
@@ -96,7 +104,12 @@ public class ElasticSearcherModel {
}
public ElasticSearcherModel build() {
- return new ElasticSearcherModel(queryBuilder, aggregationBuilders,
batchSize, fetchSource, storedField, from);
+ return new ElasticSearcherModel(queryBuilder, aggregationBuilders,
batchSize, fetchSource, storedField, from, searchSourceBuilder);
+ }
+
+ public ElasticSearcherModelBuilder withSpellCheck(SuggestBuilder
searchSourceBuilder) {
+ this.searchSourceBuilder = searchSourceBuilder;
+ return this;
}
}
}
Added:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSpellcheckProcess.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSpellcheckProcess.java?rev=1878954&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSpellcheckProcess.java
(added)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSpellcheckProcess.java
Thu Jun 18 06:56:36 2020
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elastic.query;
+
+import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticConstants;
+import org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticQueryUtil;
+import org.apache.jackrabbit.oak.plugins.index.search.PropertyDefinition;
+import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
+import org.elasticsearch.action.search.MultiSearchResponse;
+import org.elasticsearch.action.search.SearchResponse;
+import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
+import org.elasticsearch.index.query.MultiMatchQueryBuilder;
+import org.elasticsearch.index.query.Operator;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.elasticsearch.index.search.MatchQuery;
+import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.search.suggest.Suggest;
+import org.elasticsearch.search.suggest.SuggestBuilder;
+import org.elasticsearch.search.suggest.SuggestBuilders;
+import org.elasticsearch.search.suggest.SuggestionBuilder;
+import org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder;
+import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.PriorityQueue;
+import java.util.Queue;
+
+class ElasticSpellcheckProcess implements ElasticProcess {
+ private final static String SPELLCHECK_PREFIX = "spellcheck?term=";
+ private final String query;
+ private final ElasticResultRowIterator.ElasticRowIteratorState
rowIteratorState;
+
+ ElasticSpellcheckProcess(String query,
ElasticResultRowIterator.ElasticRowIteratorState rowIteratorState) {
+ this.query = query;
+ this.rowIteratorState = rowIteratorState;
+ }
+
+ private List<String> getSpellCheckFields() {
+ List<String> spellCheckFields = new LinkedList<>();
+
+ for (PropertyDefinition propertyDefinition :
rowIteratorState.getPlanResult().indexingRule.getProperties()) {
+ if (propertyDefinition.useInSpellcheck) {
+ spellCheckFields.add(propertyDefinition.name);
+ }
+ }
+
+ return spellCheckFields;
+ }
+
+ private MatchPhraseQueryBuilder getCollateQuery(String fieldName) {
+ MatchPhraseQueryBuilder mb = new MatchPhraseQueryBuilder(fieldName,
"{{suggestion}}");
+ return mb;
+ }
+
+ private SuggestBuilder getSuggestBuilder() {
+ SuggestBuilder suggestBuilder = new SuggestBuilder();
+ String spellcheckQueryString = query.replace(SPELLCHECK_PREFIX, "");
+ int i = 0;
+ for (String field : getSpellCheckFields()) {
+ PhraseSuggestionBuilder.CandidateGenerator
candidateGeneratorBuilder = new
DirectCandidateGeneratorBuilder(getTrigramField(field))
+ .suggestMode("missing");
+ SuggestionBuilder phraseSuggestionBuilder =
SuggestBuilders.phraseSuggestion(getTrigramField(field))
+ .size(10)
+ .addCandidateGenerator(candidateGeneratorBuilder)
+ .text(spellcheckQueryString)
+ .collateQuery(getCollateQuery(field).toString());
+ suggestBuilder.addSuggestion("oak:suggestion" + i,
phraseSuggestionBuilder);
+ }
+ return suggestBuilder;
+ }
+
+ @Override
+ public SearchHit process() throws IOException {
+
+ rowIteratorState.setLastDoc(true);
+ ElasticSearcher searcher = new
ElasticSearcher(rowIteratorState.getIndexNode());
+ SuggestBuilder suggestBuilder = getSuggestBuilder();
+
+ ElasticSearcherModel elasticSearcherModel = new
ElasticSearcherModel.ElasticSearcherModelBuilder()
+ .withSpellCheck(suggestBuilder).build();
+ SearchResponse docs = searcher.search(elasticSearcherModel);
+ Suggest suggest = docs.getSuggest();
+ // Priority queue to get sorted results with decreasing score
+ Queue<Suggest.Suggestion.Entry.Option> pqueue = new
PriorityQueue<>((o1, o2) -> Float.compare(o2.getScore(), o1.getScore()));
+
+ Iterator<Suggest.Suggestion<? extends Suggest.Suggestion.Entry<?
extends Suggest.Suggestion.Entry.Option>>> suggestionIterator =
suggest.iterator();
+ while (suggestionIterator.hasNext()) {
+ Suggest.Suggestion<? extends Suggest.Suggestion.Entry<? extends
Suggest.Suggestion.Entry.Option>> spellCheckResults = suggestionIterator.next();
+ for (Suggest.Suggestion.Entry spell : spellCheckResults) {
+ List<Suggest.Suggestion.Entry.Option> options =
spell.getOptions();
+ for (Suggest.Suggestion.Entry.Option option : options) {
+ pqueue.add(option);
+ }
+ }
+ }
+
+ List<ElasticSearcherModel> elasticSearcherModels = new LinkedList<>();
+ for (Suggest.Suggestion.Entry.Option suggestionoption : pqueue) {
+ String suggestion = suggestionoption.getText().string();
+ List<QueryBuilder> qbList = new LinkedList<>();
+ QueryBuilder queryBuilder = new MultiMatchQueryBuilder(suggestion,
getSpellCheckFields()
+ .toArray(new String[0]))
+ .operator(Operator.AND).fuzzyTranspositions(false)
+ .autoGenerateSynonymsPhraseQuery(false)
+ .type(MatchQuery.Type.PHRASE);
+ qbList.add(queryBuilder);
+
qbList.addAll(ElasticQueryUtil.getPathRestrictionQuery(rowIteratorState.getPlan(),
rowIteratorState.getPlanResult(),
+ rowIteratorState.getFilter()));
+ QueryBuilder finalqb =
ElasticQueryUtil.performAdditionalWraps(qbList);
+
+ elasticSearcherModels.add(new
ElasticSearcherModel.ElasticSearcherModelBuilder()
+ .withQuery(finalqb)
+ .withBatchSize(100)
+ .build());
+ }
+ MultiSearchResponse res = searcher.search(elasticSearcherModels);
+ for (MultiSearchResponse.Item response : res.getResponses()) {
+ boolean isResult = false;
+ for (SearchHit doc : response.getResponse().getHits()) {
+ if (rowIteratorState.getFilter().isAccessible((String)
doc.getSourceAsMap().get(ElasticConstants.ES_PATH_FIELD))) {
+ isResult = true;
+ break;
+ }
+ }
+ if (isResult) {
+ rowIteratorState.addResultRow(new
FulltextIndex.FulltextResultRow(pqueue.remove().getText().string()));
+ } else {
+ pqueue.remove();
+ }
+ }
+
+ // Spellcheck return string result wrapped as nodes.
+ return null;
+ }
+
+ @Override
+ public String getQuery() {
+ return null;
+ }
+
+ private String getTrigramField(String field) {
+ return field + ElasticConstants.ES_TRIGRAM_SUFFIX;
+ }
+}
Propchange:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticSpellcheckProcess.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticConstants.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticConstants.java?rev=1878954&r1=1878953&r2=1878954&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticConstants.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticConstants.java
Thu Jun 18 06:56:36 2020
@@ -23,6 +23,9 @@ public final class ElasticConstants {
public static final int ELASTIC_QUERY_BATCH_SIZE = 1000;
public static final int ELASTIC_QUERY_MAX_BATCH_SIZE = 10000;
+ public static final String ES_PATH_FIELD = ":path";
+ public static final String ES_TRIGRAM_SUFFIX = ".trigram";
+
private ElasticConstants() {
}
}
Added:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticQueryUtil.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticQueryUtil.java?rev=1878954&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticQueryUtil.java
(added)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticQueryUtil.java
Thu Jun 18 06:56:36 2020
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elastic.util;
+
+import org.apache.jackrabbit.oak.commons.PathUtils;
+import org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndex;
+import
org.apache.jackrabbit.oak.plugins.index.search.spi.query.FulltextIndexPlanner;
+import org.apache.jackrabbit.oak.spi.query.Filter;
+import org.apache.jackrabbit.oak.spi.query.QueryIndex;
+import org.elasticsearch.index.query.BoolQueryBuilder;
+import org.elasticsearch.index.query.QueryBuilder;
+import org.jetbrains.annotations.NotNull;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.function.BiPredicate;
+import java.util.stream.StreamSupport;
+
+import static org.apache.jackrabbit.oak.commons.PathUtils.denotesRoot;
+import static org.apache.jackrabbit.oak.commons.PathUtils.getParentPath;
+import static
org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newAncestorQuery;
+import static
org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newDepthQuery;
+import static
org.apache.jackrabbit.oak.plugins.index.elastic.util.TermQueryBuilderFactory.newPathQuery;
+import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
+
+public final class ElasticQueryUtil {
+
+ /**
+ * Perform additional wraps on the list of queries to allow, for example,
the NOT CONTAINS to
+ * play properly when sent to lucene.
+ *
+ * @param qs the list of queries. Cannot be null.
+ * @return the request facade
+ */
+ @NotNull
+ public static QueryBuilder performAdditionalWraps(@NotNull
List<QueryBuilder> qs) {
+ if (qs.size() == 1) {
+ // we don't need to worry about all-negatives in a bool query as
+ // BoolQueryBuilder.adjustPureNegative is on by default anyway
+ return qs.get(0);
+ }
+ BoolQueryBuilder bq = new BoolQueryBuilder();
+ // TODO: while I've attempted to translate oak-lucene code to
corresponding ES one but I am
+ // unable to make sense of this code
+ for (QueryBuilder q : qs) {
+ boolean unwrapped = false;
+ if (q instanceof BoolQueryBuilder) {
+ unwrapped = unwrapMustNot((BoolQueryBuilder) q, bq);
+ }
+
+ if (!unwrapped) {
+ bq.must(q);
+ }
+ }
+ return bq;
+ }
+
+ /**
+ * unwraps any NOT clauses from the provided boolean query into another
boolean query.
+ *
+ * @param input the query to be analysed for the existence of NOT
clauses. Cannot be null.
+ * @param output the query where the unwrapped NOTs will be saved into.
Cannot be null.
+ * @return true if there where at least one unwrapped NOT. false otherwise.
+ */
+ private static boolean unwrapMustNot(@NotNull BoolQueryBuilder input,
@NotNull BoolQueryBuilder output) {
+ boolean unwrapped = false;
+ for (QueryBuilder mustNot : input.mustNot()) {
+ output.mustNot(mustNot);
+ unwrapped = true;
+ }
+ if (unwrapped) {
+ // if we have unwrapped "must not" conditions,
+ // then we need to unwrap "must" conditions as well
+ for (QueryBuilder must : input.must()) {
+ output.must(must);
+ }
+ }
+
+ return unwrapped;
+ }
+
+ /**
+ * Get path restrictions from plan and create elastic's queryBuilder
objects with these restrictions.
+ *
+ * @param plan
+ * @param planResult
+ * @param filter
+ * @return List of QueryBuilder with pathRestrictions
+ */
+ public static List<QueryBuilder>
getPathRestrictionQuery(QueryIndex.IndexPlan plan,
+
FulltextIndexPlanner.PlanResult planResult, Filter filter) {
+ final BiPredicate<Iterable<String>, String> any = (iterable, value) ->
+ StreamSupport.stream(iterable.spliterator(),
false).anyMatch(value::equals);
+ List<QueryBuilder> qs = new LinkedList<>();
+
+ String path = FulltextIndex.getPathRestriction(plan);
+ switch (filter.getPathRestriction()) {
+ case ALL_CHILDREN:
+ if (!"/".equals(path)) {
+ qs.add(newAncestorQuery(path));
+ }
+ break;
+ case DIRECT_CHILDREN:
+ BoolQueryBuilder bq = boolQuery();
+ bq.must(newAncestorQuery(path));
+ bq.must(newDepthQuery(path, planResult));
+ qs.add(bq);
+ break;
+ case EXACT:
+ // For transformed paths, we can only add path restriction if
absolute path to property can be
+ // deduced
+ if (planResult.isPathTransformed()) {
+ String parentPathSegment =
planResult.getParentPathSegment();
+ if (!any.test(PathUtils.elements(parentPathSegment), "*"))
{
+ qs.add(newPathQuery(path + parentPathSegment));
+ }
+ } else {
+ qs.add(newPathQuery(path));
+ }
+ break;
+ case PARENT:
+ if (denotesRoot(path)) {
+ // there's no parent of the root node
+ // we add a path that can not possibly occur because there
+ // is no way to say "match no documents" in Lucene
+ qs.add(newPathQuery("///"));
+ } else {
+ // For transformed paths, we can only add path restriction
if absolute path to property can be
+ // deduced
+ if (planResult.isPathTransformed()) {
+ String parentPathSegment =
planResult.getParentPathSegment();
+ if (!any.test(PathUtils.elements(parentPathSegment),
"*")) {
+ qs.add(newPathQuery(getParentPath(path) +
parentPathSegment));
+ }
+ } else {
+ qs.add(newPathQuery(getParentPath(path)));
+ }
+ }
+ break;
+ case NO_RESTRICTION:
+ break;
+ }
+ return qs;
+ }
+
+}
Propchange:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/ElasticQueryUtil.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/SearchSourceBuilderUtil.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/SearchSourceBuilderUtil.java?rev=1878954&r1=1878953&r2=1878954&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/SearchSourceBuilderUtil.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/util/SearchSourceBuilderUtil.java
Thu Jun 18 06:56:36 2020
@@ -26,6 +26,7 @@ public class SearchSourceBuilderUtil {
SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
.query(elasticSearcherModel.getQueryBuilder())
.fetchSource(elasticSearcherModel.getStoredField(), null)
+ .suggest(elasticSearcherModel.getSuggestBuilder())
.size(elasticSearcherModel.getBatchSize())
.from(elasticSearcherModel.getFrom());
Added:
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSpellcheckTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSpellcheckTest.java?rev=1878954&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSpellcheckTest.java
(added)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSpellcheckTest.java
Thu Jun 18 06:56:36 2020
@@ -0,0 +1,262 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.elastic;
+
+import com.github.dockerjava.api.DockerClient;
+import com.google.common.collect.Lists;
+import com.google.common.io.Closer;
+import org.apache.commons.io.FileUtils;
+import org.apache.jackrabbit.JcrConstants;
+import
org.apache.jackrabbit.commons.jackrabbit.authorization.AccessControlUtils;
+import org.apache.jackrabbit.oak.Oak;
+import org.apache.jackrabbit.oak.api.Type;
+import org.apache.jackrabbit.oak.jcr.Jcr;
+import
org.apache.jackrabbit.oak.plugins.index.elastic.index.ElasticIndexEditorProvider;
+import
org.apache.jackrabbit.oak.plugins.index.elastic.query.ElasticIndexProvider;
+import
org.apache.jackrabbit.oak.plugins.index.elastic.util.ElasticIndexDefinitionBuilder;
+import org.apache.jackrabbit.oak.plugins.index.search.ExtractedTextCache;
+import
org.apache.jackrabbit.oak.plugins.index.search.util.IndexDefinitionBuilder;
+import org.apache.jackrabbit.oak.plugins.memory.MemoryNodeStore;
+import org.apache.jackrabbit.oak.spi.state.NodeStore;
+import org.elasticsearch.Version;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.BeforeClass;
+import org.junit.ClassRule;
+import org.junit.Rule;
+import org.junit.Test;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.testcontainers.DockerClientFactory;
+import org.testcontainers.elasticsearch.ElasticsearchContainer;
+
+import javax.jcr.GuestCredentials;
+import javax.jcr.Node;
+import javax.jcr.Repository;
+import javax.jcr.RepositoryException;
+import javax.jcr.Session;
+import javax.jcr.SimpleCredentials;
+import javax.jcr.query.Query;
+import javax.jcr.query.QueryManager;
+import javax.jcr.query.QueryResult;
+import javax.jcr.query.Row;
+import javax.jcr.query.RowIterator;
+import javax.jcr.security.Privilege;
+import java.io.IOException;
+import java.util.List;
+import java.util.UUID;
+
+import static org.apache.jackrabbit.commons.JcrUtils.getOrCreateByPath;
+import static org.apache.jackrabbit.oak.InitialContentHelper.INITIAL_CONTENT;
+import static
org.apache.jackrabbit.oak.plugins.index.IndexConstants.INDEX_DEFINITIONS_NAME;
+import static
org.apache.jackrabbit.oak.plugins.index.elastic.ElasticIndexDefinition.BULK_FLUSH_INTERVAL_MS_DEFAULT;
+import static
org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants.PROP_ANALYZED;
+import static
org.apache.jackrabbit.oak.plugins.index.search.FulltextIndexConstants.PROP_USE_IN_SPELLCHECK;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assume.assumeNotNull;
+
+public class ElasticSpellcheckTest {
+
+ private static final Logger LOG =
LoggerFactory.getLogger(ElasticSpellcheckTest.class);
+ private Session adminSession;
+ private Session anonymousSession;
+ private QueryManager qe;
+ private Node indexNode;
+ private static final String TEST_INDEX = "testIndex";
+
+ // Set this connection string as
+ // <scheme>://<hostname>:<port>?key_id=<>,key_secret=<>
+ // key_id and key_secret are optional in case the ES server
+ // needs authentication
+ // Do not set this if docker is running and you want to run the tests on
docker instead.
+ private static final String elasticConnectionString =
System.getProperty("elasticConnectionString");
+
+ @ClassRule
+ public static final ElasticConnectionRule elasticRule = new
ElasticConnectionRule(elasticConnectionString);
+
+ /*
+ Close the ES connection after every test method execution
+ */
+ @After
+ public void cleanup() throws IOException {
+ anonymousSession.logout();
+ adminSession.logout();
+ elasticRule.closeElasticConnection();
+ }
+
+ @Before
+ public void setup() throws Exception {
+ createRepository();
+ final String indexName = createIndex();
+ indexNode =
adminSession.getRootNode().getNode(INDEX_DEFINITIONS_NAME).getNode(indexName);
+ }
+
+ private void createRepository() throws RepositoryException {
+ ElasticConnection connection = elasticRule.useDocker() ?
elasticRule.getElasticConnectionForDocker() :
+ elasticRule.getElasticConnectionFromString();
+ ElasticIndexEditorProvider editorProvider = new
ElasticIndexEditorProvider(connection,
+ new ExtractedTextCache(10 * FileUtils.ONE_MB, 100));
+ ElasticIndexProvider indexProvider = new
ElasticIndexProvider(connection);
+
+ NodeStore nodeStore = new MemoryNodeStore(INITIAL_CONTENT);
+ Oak oak = new Oak(nodeStore)
+ .with(editorProvider)
+ .with(indexProvider);
+
+ Jcr jcr = new Jcr(oak);
+ Repository repository = jcr.createRepository();
+
+ adminSession = repository.login(new SimpleCredentials("admin",
"admin".toCharArray()), null);
+
+ // we'd always query anonymously
+ anonymousSession = repository.login(new GuestCredentials(), null);
+ anonymousSession.refresh(true);
+ anonymousSession.save();
+
+ qe = anonymousSession.getWorkspace().getQueryManager();
+ }
+
+ private class IndexSkeleton {
+ IndexDefinitionBuilder indexDefinitionBuilder;
+ IndexDefinitionBuilder.IndexRule indexRule;
+
+ void initialize() {
+ initialize(JcrConstants.NT_BASE);
+ }
+
+ void initialize(String nodeType) {
+ indexDefinitionBuilder = new ElasticIndexDefinitionBuilder();
+ indexRule = indexDefinitionBuilder.indexRule(nodeType);
+ }
+
+ String build() throws RepositoryException {
+ final String indexName = UUID.randomUUID().toString();
+
indexDefinitionBuilder.build(adminSession.getRootNode().getNode(INDEX_DEFINITIONS_NAME).addNode(indexName));
+ return indexName;
+ }
+ }
+
+ private String createIndex() throws RepositoryException {
+ IndexSkeleton indexSkeleton = new IndexSkeleton();
+ indexSkeleton.initialize();
+ indexSkeleton.indexDefinitionBuilder.noAsync();
+ indexSkeleton.indexRule.property("cons").propertyIndex();
+ indexSkeleton.indexRule.property("foo").propertyIndex();
+
indexSkeleton.indexRule.property("foo").getBuilderTree().setProperty(PROP_USE_IN_SPELLCHECK,
true, Type.BOOLEAN);
+
indexSkeleton.indexRule.property("foo").getBuilderTree().setProperty(PROP_ANALYZED,
true, Type.BOOLEAN);
+
+ return indexSkeleton.build();
+ }
+
+ @Test
+ public void testSpellcheckSingleWord() throws Exception {
+ //Session session = superuser;
+ QueryManager qm = adminSession.getWorkspace().getQueryManager();
+ Node par = allow(getOrCreateByPath("/parent", "oak:Unstructured",
adminSession));
+ Node n1 = par.addNode("node1");
+ n1.setProperty("foo", "descent");
+ Node n2 = n1.addNode("node2");
+ n2.setProperty("foo", "decent");
+ adminSession.save();
+
+ String sql = "SELECT [rep:spellcheck()] FROM nt:base WHERE
SPELLCHECK('desent')";
+ Query q = qm.createQuery(sql, Query.SQL);
+ assertEventually(() -> {
+ try {
+ assertEquals("[decent, descent]", getResult(q.execute(),
"rep:spellcheck()").toString());
+ } catch (RepositoryException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ @Test
+ public void testSpellcheckSingleWordWithDescendantNode() throws Exception {
+ //Session session = superuser;
+ QueryManager qm = adminSession.getWorkspace().getQueryManager();
+ Node par = allow(getOrCreateByPath("/parent", "oak:Unstructured",
adminSession));
+ Node n1 = par.addNode("node1");
+ n1.setProperty("foo", "descent");
+ Node n2 = n1.addNode("node2");
+ n2.setProperty("foo", "decent");
+ adminSession.save();
+
+ String sql = "SELECT [rep:spellcheck()] FROM nt:base WHERE
SPELLCHECK('desent') and isDescendantNode('/parent/node1')";
+ Query q = qm.createQuery(sql, Query.SQL);
+ assertEventually(() -> {
+ try {
+ assertEquals("[decent]", getResult(q.execute(),
"rep:spellcheck()").toString());
+ } catch (RepositoryException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ @Test
+ public void testSpellcheckMultipleWords() throws Exception {
+ adminSession.save();
+ QueryManager qm = adminSession.getWorkspace().getQueryManager();
+ Node par = allow(getOrCreateByPath("/parent", "oak:Unstructured",
adminSession));
+ Node n1 = par.addNode("node1");
+ n1.setProperty("foo", "it is always a good idea to go visiting
ontario");
+ Node n2 = par.addNode("node2");
+ n2.setProperty("foo", "ontario is a nice place to live in");
+ Node n3 = par.addNode("node3");
+ n2.setProperty("foo", "I flied to ontario for voting for the major
polls");
+ Node n4 = par.addNode("node4");
+ n2.setProperty("foo", "I will go voting in ontario, I always voted
since I've been allowed to");
+ adminSession.save();
+
+ String sql = "SELECT [rep:spellcheck()] FROM nt:base WHERE
SPELLCHECK('votin in ontari')";
+ Query q = qm.createQuery(sql, Query.SQL);
+
+ assertEventually(() -> {
+ try {
+ assertEquals("[voting in ontario]", getResult(q.execute(),
"rep:spellcheck()").toString());
+ } catch (RepositoryException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ }
+
+ private Node deny(Node node) throws RepositoryException {
+ AccessControlUtils.deny(node, "anonymous", Privilege.JCR_ALL);
+ return node;
+ }
+
+ private Node allow(Node node) throws RepositoryException {
+ AccessControlUtils.allow(node, "anonymous", Privilege.JCR_READ);
+ return node;
+ }
+
+ static List<String> getResult(QueryResult result, String propertyName)
throws RepositoryException {
+ List<String> results = Lists.newArrayList();
+ RowIterator it = null;
+
+ it = result.getRows();
+ while (it.hasNext()) {
+ Row row = it.nextRow();
+ results.add(row.getValue(propertyName).getString());
+ }
+ return results;
+ }
+
+ private static void assertEventually(Runnable r) {
+ ElasticTestUtils.assertEventually(r, BULK_FLUSH_INTERVAL_MS_DEFAULT *
3);
+ }
+
+}
Propchange:
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticSpellcheckTest.java
------------------------------------------------------------------------------
svn:eol-style = native