Author: fortino
Date: Wed Jul 29 14:45:25 2020
New Revision: 1880408
URL: http://svn.apache.org/viewvc?rev=1880408&view=rev
Log:
OAK-9155: avoid duplication of analyzed/nodeScopeIndex values in :fulltext
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationNtFileTest.java
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java?rev=1880408&r1=1880407&r2=1880408&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
Wed Jul 29 14:45:25 2020
@@ -110,11 +110,15 @@ class ElasticDocumentMaker extends Fullt
@Override
protected void indexFulltextValue(ElasticDocument doc, String value) {
- // Note: diversion from lucene impl - here we are storing even these
cases and not just binary
doc.addFulltext(value);
}
@Override
+ protected boolean isFulltextValuePersisted() {
+ return false;
+ }
+
+ @Override
protected void indexTypedProperty(ElasticDocument doc, PropertyState
property, String pname, PropertyDefinition pd, int i) {
int tag = property.getType().tag();
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java?rev=1880408&r1=1880407&r2=1880408&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
Wed Jul 29 14:45:25 2020
@@ -130,7 +130,6 @@ class ElasticIndexHelper {
.field("type", "integer")
.field("doc_values", false) // no need to sort/aggregate here
.endObject();
- // TODO: to increase efficiency, we could potentially remove this and
use a multi match query when needed
mappingBuilder.startObject(FieldNames.FULLTEXT)
.field("type", "text")
.field("analyzer", "oak_analyzer")
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java?rev=1880408&r1=1880407&r2=1880408&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
Wed Jul 29 14:45:25 2020
@@ -97,6 +97,7 @@ import static org.elasticsearch.index.qu
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
+import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
import static org.elasticsearch.index.query.QueryBuilders.nestedQuery;
import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
@@ -421,8 +422,7 @@ public class ElasticRequestHandler {
}
private boolean visitTerm(String propertyName, String text, String
boost, boolean not) {
- String p = getElasticFieldName(propertyName);
- QueryBuilder q = tokenToQuery(text, p, pr);
+ QueryBuilder q = fullTextQuery(text,
getElasticFieldName(propertyName), pr);
if (boost != null) {
q.boost(Float.parseFloat(boost));
}
@@ -620,28 +620,21 @@ public class ElasticRequestHandler {
return QueryBuilders.multiMatchQuery(uuid);
}
- private static QueryBuilder tokenToQuery(String text, String fieldName,
PlanResult pr) {
+ private static QueryBuilder fullTextQuery(String text, String fieldName,
PlanResult pr) {
// default match query are executed in OR, we need to use AND instead
to avoid that
// every document having at least one term in the `text` will match.
If there are multiple
// contains clause they will go to different match queries and will be
executed in OR
- QueryBuilder ret;
- IndexDefinition.IndexingRule indexingRule = pr.indexingRule;
- //Expand the query on fulltext field
- if (FieldNames.FULLTEXT.equals(fieldName) &&
!indexingRule.getNodeScopeAnalyzedProps().isEmpty()) {
- BoolQueryBuilder in = boolQuery();
- for (PropertyDefinition pd :
indexingRule.getNodeScopeAnalyzedProps()) {
- QueryBuilder q = matchQuery(pd.name,
text).boost(pd.boost).operator(Operator.AND);
- in.should(q);
- }
-
- //Add the query for actual fulltext field also. That query would
not be boosted
- // TODO: do we need this if all the analyzed fields are queried?
- ret = in.should(matchQuery(fieldName,
text).operator(Operator.AND));
+ if (FieldNames.FULLTEXT.equals(fieldName) &&
!pr.indexingRule.getNodeScopeAnalyzedProps().isEmpty()) {
+ MultiMatchQueryBuilder multiMatchQuery = multiMatchQuery(text)
+ .operator(Operator.AND)
+ .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS);
+ pr.indexingRule.getNodeScopeAnalyzedProps().forEach(pd ->
multiMatchQuery.field(pd.name, pd.boost));
+ // Add the query for actual fulltext field also. That query would
not be boosted
+ // and could contain other parts like renditions, node name, etc
+ return multiMatchQuery.field(fieldName);
} else {
- ret = matchQuery(fieldName, text).operator(Operator.AND);
+ return matchQuery(fieldName, text).operator(Operator.AND);
}
-
- return ret;
}
private QueryBuilder createQuery(String propertyName,
Filter.PropertyRestriction pr,
Modified:
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationNtFileTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationNtFileTest.java?rev=1880408&r1=1880407&r2=1880408&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationNtFileTest.java
(original)
+++
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationNtFileTest.java
Wed Jul 29 14:45:25 2020
@@ -64,33 +64,33 @@ public class ElasticIndexAggregationNtFi
@Override
public void initialize(@NotNull NodeBuilder builder) {
- super.initialize(builder);
- // registering additional node types for wider testing
- InputStream stream = null;
+ super.initialize(builder);
+ // registering additional node types for wider testing
+ InputStream stream = null;
+ try {
+ stream = ElasticIndexAggregationNtFileTest.class
+ .getResourceAsStream("test_nodetypes.cnd");
+ NodeState base = builder.getNodeState();
+ NodeStore store = new MemoryNodeStore(base);
+
+ Root root = RootFactory.createSystemRoot(store, new
EditorHook(
+ new CompositeEditorProvider(new
NamespaceEditorProvider(),
+ new TypeEditorProvider())), null, null,
null);
+ NodeTypeRegistry.register(root, stream, "testing node
types");
+ NodeState target = store.getRoot();
+ target.compareAgainstBaseState(base, new
ApplyDiff(builder));
+ } catch (Exception e) {
+ LOG.error("Error while registering required node types.
Failing here", e);
+ fail("Error while registering required node types");
+ } finally {
+ printNodeTypes(builder);
+ if (stream != null) {
try {
- stream = ElasticIndexAggregationNtFileTest.class
- .getResourceAsStream("test_nodetypes.cnd");
- NodeState base = builder.getNodeState();
- NodeStore store = new MemoryNodeStore(base);
-
- Root root = RootFactory.createSystemRoot(store,
new EditorHook(
- new CompositeEditorProvider(new
NamespaceEditorProvider(),
- new TypeEditorProvider())), null,
null, null);
- NodeTypeRegistry.register(root, stream, "testing
node types");
- NodeState target = store.getRoot();
- target.compareAgainstBaseState(base, new
ApplyDiff(builder));
- } catch (Exception e) {
- LOG.error("Error while registering required node
types. Failing here", e);
- fail("Error while registering required node
types");
- } finally {
- printNodeTypes(builder);
- if (stream != null) {
- try {
- stream.close();
- } catch (IOException e) {
- LOG.debug("Ignoring exception on stream
closing.", e);
- }
- }
+ stream.close();
+ } catch (IOException e) {
+ LOG.debug("Ignoring exception on stream closing.",
e);
+ }
+ }
}
}
@@ -126,7 +126,7 @@ public class ElasticIndexAggregationNtFi
}
@Test
- public void indexNtFileText() throws CommitFailedException,
InterruptedException {
+ public void indexNtFileText() throws CommitFailedException {
setTraversalEnabled(false);
final String statement = "//element(*, test:Asset)[ " +
"jcr:contains(jcr:content/renditions/dam.text.txt/jcr:content,
'quick') ]";
@@ -149,7 +149,5 @@ public class ElasticIndexAggregationNtFi
assertEventually(()-> {
assertQuery(statement, "xpath", expected);
});
-
-
}
}
Modified:
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java?rev=1880408&r1=1880407&r2=1880408&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
(original)
+++
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
Wed Jul 29 14:45:25 2020
@@ -68,9 +68,9 @@ public abstract class FulltextDocumentMa
private final int logWarnStringSizeThreshold;
public FulltextDocumentMaker(@Nullable FulltextBinaryTextExtractor
textExtractor,
- @NotNull IndexDefinition definition,
- IndexDefinition.IndexingRule indexingRule,
- @NotNull String path) {
+ @NotNull IndexDefinition definition,
+ IndexDefinition.IndexingRule indexingRule,
+ @NotNull String path) {
this.textExtractor = textExtractor;
this.definition = checkNotNull(definition);
this.indexingRule = checkNotNull(indexingRule);
@@ -204,7 +204,6 @@ public abstract class FulltextDocumentMa
return finalizeDoc(document, dirty, facet);
}
-
private boolean indexFacets(D doc, PropertyState property, String pname,
PropertyDefinition pd) {
int tag = property.getType().tag();
int idxDefinedTag = pd.getType();
@@ -277,7 +276,9 @@ public abstract class FulltextDocumentMa
}
if (pd.nodeScopeIndex) {
- indexFulltextValue(doc, value);
+ if (isFulltextValuePersisted()) {
+ indexFulltextValue(doc, value);
+ }
if (pd.useInSimilarity) {
log.trace("indexing similarity strings for {}",
pd.name);
try {
@@ -303,6 +304,13 @@ public abstract class FulltextDocumentMa
return dirty;
}
+ /**
+ * Returns {@code true} if nodeScopeIndex full text values need to be
indexed
+ */
+ protected boolean isFulltextValuePersisted() {
+ return true;
+ }
+
protected abstract boolean indexSimilarityTag(D doc, PropertyState
property);
protected abstract void indexSimilarityBinaries(D doc, PropertyDefinition
pd, Blob blob) throws IOException;
@@ -377,7 +385,7 @@ public abstract class FulltextDocumentMa
}
private List<String> newBinary(
- PropertyState property, NodeState state, String path) {
+ PropertyState property, NodeState state, String path) {
if (textExtractor == null){
//Skip text extraction for sync indexing
return Collections.emptyList();
@@ -417,7 +425,6 @@ public abstract class FulltextDocumentMa
return fieldAdded;
}
-
private boolean indexNullCheckEnabledProps(String path, D doc, NodeState
state) {
boolean fieldAdded = false;
for (PropertyDefinition pd :
indexingRule.getNullCheckEnabledProperties()) {
@@ -461,7 +468,7 @@ public abstract class FulltextDocumentMa
if (pd != null
&& pd.index
&& (pd.includePropertyType(ps.getType().tag())
- ||
indexingRule.includePropertyType(ps.getType().tag()))) {
+ || indexingRule.includePropertyType(ps.getType().tag()))) {
dirty = true;
break;
}
@@ -513,7 +520,7 @@ public abstract class FulltextDocumentMa
* index aggregates on a certain path
*/
private boolean[] indexAggregates(final String path, final D document,
- final NodeState state) {
+ final NodeState state) {
final AtomicBoolean dirtyFlag = new AtomicBoolean();
final AtomicBoolean facetFlag = new AtomicBoolean();
indexingRule.getAggregate().collectAggregates(state, new
Aggregate.ResultCollector() {
@@ -631,5 +638,4 @@ public abstract class FulltextDocumentMa
indexNodeName(doc, value);
}
-
}