Author: fortino
Date: Wed Jul 29 14:45:25 2020
New Revision: 1880408

URL: http://svn.apache.org/viewvc?rev=1880408&view=rev
Log:
OAK-9155: avoid duplication of analyzed/nodeScopeIndex values in :fulltext

Modified:
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
    
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationNtFileTest.java
    
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java?rev=1880408&r1=1880407&r2=1880408&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticDocumentMaker.java
 Wed Jul 29 14:45:25 2020
@@ -110,11 +110,15 @@ class ElasticDocumentMaker extends Fullt
 
     @Override
     protected void indexFulltextValue(ElasticDocument doc, String value) {
-        // Note: diversion from lucene impl - here we are storing even these 
cases and not just binary
         doc.addFulltext(value);
     }
 
     @Override
+    protected boolean isFulltextValuePersisted() {
+        return false;
+    }
+
+    @Override
     protected void indexTypedProperty(ElasticDocument doc, PropertyState 
property, String pname, PropertyDefinition pd, int i) {
         int tag = property.getType().tag();
 

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java?rev=1880408&r1=1880407&r2=1880408&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/index/ElasticIndexHelper.java
 Wed Jul 29 14:45:25 2020
@@ -130,7 +130,6 @@ class ElasticIndexHelper {
                 .field("type", "integer")
                 .field("doc_values", false) // no need to sort/aggregate here
                 .endObject();
-        // TODO: to increase efficiency, we could potentially remove this and 
use a multi match query when needed
         mappingBuilder.startObject(FieldNames.FULLTEXT)
                 .field("type", "text")
                 .field("analyzer", "oak_analyzer")

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java?rev=1880408&r1=1880407&r2=1880408&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elastic/query/ElasticRequestHandler.java
 Wed Jul 29 14:45:25 2020
@@ -97,6 +97,7 @@ import static org.elasticsearch.index.qu
 import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
 import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
 import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
+import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
 import static org.elasticsearch.index.query.QueryBuilders.nestedQuery;
 import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
 import static org.elasticsearch.index.query.QueryBuilders.termQuery;
@@ -421,8 +422,7 @@ public class ElasticRequestHandler {
             }
 
             private boolean visitTerm(String propertyName, String text, String 
boost, boolean not) {
-                String p = getElasticFieldName(propertyName);
-                QueryBuilder q = tokenToQuery(text, p, pr);
+                QueryBuilder q = fullTextQuery(text, 
getElasticFieldName(propertyName), pr);
                 if (boost != null) {
                     q.boost(Float.parseFloat(boost));
                 }
@@ -620,28 +620,21 @@ public class ElasticRequestHandler {
         return QueryBuilders.multiMatchQuery(uuid);
     }
 
-    private static QueryBuilder tokenToQuery(String text, String fieldName, 
PlanResult pr) {
+    private static QueryBuilder fullTextQuery(String text, String fieldName, 
PlanResult pr) {
         // default match query are executed in OR, we need to use AND instead 
to avoid that
         // every document having at least one term in the `text` will match. 
If there are multiple
         // contains clause they will go to different match queries and will be 
executed in OR
-        QueryBuilder ret;
-        IndexDefinition.IndexingRule indexingRule = pr.indexingRule;
-        //Expand the query on fulltext field
-        if (FieldNames.FULLTEXT.equals(fieldName) && 
!indexingRule.getNodeScopeAnalyzedProps().isEmpty()) {
-            BoolQueryBuilder in = boolQuery();
-            for (PropertyDefinition pd : 
indexingRule.getNodeScopeAnalyzedProps()) {
-                QueryBuilder q = matchQuery(pd.name, 
text).boost(pd.boost).operator(Operator.AND);
-                in.should(q);
-            }
-
-            //Add the query for actual fulltext field also. That query would 
not be boosted
-            // TODO: do we need this if all the analyzed fields are queried?
-            ret = in.should(matchQuery(fieldName, 
text).operator(Operator.AND));
+        if (FieldNames.FULLTEXT.equals(fieldName) && 
!pr.indexingRule.getNodeScopeAnalyzedProps().isEmpty()) {
+            MultiMatchQueryBuilder multiMatchQuery = multiMatchQuery(text)
+                    .operator(Operator.AND)
+                    .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS);
+            pr.indexingRule.getNodeScopeAnalyzedProps().forEach(pd -> 
multiMatchQuery.field(pd.name, pd.boost));
+            // Add the query for actual fulltext field also. That query would 
not be boosted
+            // and could contain other parts like renditions, node name, etc
+            return multiMatchQuery.field(fieldName);
         } else {
-            ret = matchQuery(fieldName, text).operator(Operator.AND);
+            return matchQuery(fieldName, text).operator(Operator.AND);
         }
-
-        return ret;
     }
 
     private QueryBuilder createQuery(String propertyName, 
Filter.PropertyRestriction pr,

Modified: 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationNtFileTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationNtFileTest.java?rev=1880408&r1=1880407&r2=1880408&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationNtFileTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elastic/ElasticIndexAggregationNtFileTest.java
 Wed Jul 29 14:45:25 2020
@@ -64,33 +64,33 @@ public class ElasticIndexAggregationNtFi
 
             @Override
             public void initialize(@NotNull NodeBuilder builder) {
-                        super.initialize(builder);
-                        // registering additional node types for wider testing
-                        InputStream stream = null;
+                super.initialize(builder);
+                // registering additional node types for wider testing
+                InputStream stream = null;
+                try {
+                    stream = ElasticIndexAggregationNtFileTest.class
+                            .getResourceAsStream("test_nodetypes.cnd");
+                    NodeState base = builder.getNodeState();
+                    NodeStore store = new MemoryNodeStore(base);
+
+                    Root root = RootFactory.createSystemRoot(store, new 
EditorHook(
+                            new CompositeEditorProvider(new 
NamespaceEditorProvider(),
+                                    new TypeEditorProvider())), null, null, 
null);
+                    NodeTypeRegistry.register(root, stream, "testing node 
types");
+                    NodeState target = store.getRoot();
+                    target.compareAgainstBaseState(base, new 
ApplyDiff(builder));
+                } catch (Exception e) {
+                    LOG.error("Error while registering required node types. 
Failing here", e);
+                    fail("Error while registering required node types");
+                } finally {
+                    printNodeTypes(builder);
+                    if (stream != null) {
                         try {
-                            stream = ElasticIndexAggregationNtFileTest.class
-                                    .getResourceAsStream("test_nodetypes.cnd");
-                            NodeState base = builder.getNodeState();
-                            NodeStore store = new MemoryNodeStore(base);
-
-                            Root root = RootFactory.createSystemRoot(store, 
new EditorHook(
-                                    new CompositeEditorProvider(new 
NamespaceEditorProvider(),
-                                            new TypeEditorProvider())), null, 
null, null);
-                            NodeTypeRegistry.register(root, stream, "testing 
node types");
-                            NodeState target = store.getRoot();
-                            target.compareAgainstBaseState(base, new 
ApplyDiff(builder));
-                        } catch (Exception e) {
-                            LOG.error("Error while registering required node 
types. Failing here", e);
-                            fail("Error while registering required node 
types");
-                        } finally {
-                            printNodeTypes(builder);
-                            if (stream != null) {
-                                try {
-                                    stream.close();
-                                } catch (IOException e) {
-                                    LOG.debug("Ignoring exception on stream 
closing.", e);
-                                }
-                            }
+                            stream.close();
+                        } catch (IOException e) {
+                            LOG.debug("Ignoring exception on stream closing.", 
e);
+                        }
+                    }
                 }
             }
 
@@ -126,7 +126,7 @@ public class ElasticIndexAggregationNtFi
     }
 
     @Test
-    public void indexNtFileText() throws CommitFailedException, 
InterruptedException {
+    public void indexNtFileText() throws CommitFailedException {
         setTraversalEnabled(false);
         final String statement = "//element(*, test:Asset)[ " +
                 "jcr:contains(jcr:content/renditions/dam.text.txt/jcr:content, 
'quick') ]";
@@ -149,7 +149,5 @@ public class ElasticIndexAggregationNtFi
         assertEventually(()-> {
             assertQuery(statement, "xpath", expected);
         });
-
-
     }
 }

Modified: 
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java?rev=1880408&r1=1880407&r2=1880408&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-search/src/main/java/org/apache/jackrabbit/oak/plugins/index/search/spi/editor/FulltextDocumentMaker.java
 Wed Jul 29 14:45:25 2020
@@ -68,9 +68,9 @@ public abstract class FulltextDocumentMa
     private final int logWarnStringSizeThreshold;
 
     public FulltextDocumentMaker(@Nullable FulltextBinaryTextExtractor 
textExtractor,
-                               @NotNull IndexDefinition definition,
-                               IndexDefinition.IndexingRule indexingRule,
-                               @NotNull String path) {
+                                 @NotNull IndexDefinition definition,
+                                 IndexDefinition.IndexingRule indexingRule,
+                                 @NotNull String path) {
         this.textExtractor = textExtractor;
         this.definition = checkNotNull(definition);
         this.indexingRule = checkNotNull(indexingRule);
@@ -204,7 +204,6 @@ public abstract class FulltextDocumentMa
         return finalizeDoc(document, dirty, facet);
     }
 
-
     private boolean indexFacets(D doc, PropertyState property, String pname, 
PropertyDefinition pd) {
         int tag = property.getType().tag();
         int idxDefinedTag = pd.getType();
@@ -277,7 +276,9 @@ public abstract class FulltextDocumentMa
                     }
 
                     if (pd.nodeScopeIndex) {
-                        indexFulltextValue(doc, value);
+                        if (isFulltextValuePersisted()) {
+                            indexFulltextValue(doc, value);
+                        }
                         if (pd.useInSimilarity) {
                             log.trace("indexing similarity strings for {}", 
pd.name);
                             try {
@@ -303,6 +304,13 @@ public abstract class FulltextDocumentMa
         return dirty;
     }
 
+    /**
+     * Returns {@code true} if nodeScopeIndex full text values need to be 
indexed
+     */
+    protected boolean isFulltextValuePersisted() {
+        return true;
+    }
+
     protected abstract boolean indexSimilarityTag(D doc, PropertyState 
property);
 
     protected abstract void indexSimilarityBinaries(D doc, PropertyDefinition 
pd, Blob blob) throws IOException;
@@ -377,7 +385,7 @@ public abstract class FulltextDocumentMa
     }
 
     private List<String> newBinary(
-        PropertyState property, NodeState state, String path) {
+            PropertyState property, NodeState state, String path) {
         if (textExtractor == null){
             //Skip text extraction for sync indexing
             return Collections.emptyList();
@@ -417,7 +425,6 @@ public abstract class FulltextDocumentMa
         return fieldAdded;
     }
 
-
     private boolean indexNullCheckEnabledProps(String path, D doc, NodeState 
state) {
         boolean fieldAdded = false;
         for (PropertyDefinition pd : 
indexingRule.getNullCheckEnabledProperties()) {
@@ -461,7 +468,7 @@ public abstract class FulltextDocumentMa
             if (pd != null
                     && pd.index
                     && (pd.includePropertyType(ps.getType().tag())
-                            || 
indexingRule.includePropertyType(ps.getType().tag()))) {
+                    || indexingRule.includePropertyType(ps.getType().tag()))) {
                 dirty = true;
                 break;
             }
@@ -513,7 +520,7 @@ public abstract class FulltextDocumentMa
      * index aggregates on a certain path
      */
     private boolean[] indexAggregates(final String path, final D document,
-                                    final NodeState state) {
+                                      final NodeState state) {
         final AtomicBoolean dirtyFlag = new AtomicBoolean();
         final AtomicBoolean facetFlag = new AtomicBoolean();
         indexingRule.getAggregate().collectAggregates(state, new 
Aggregate.ResultCollector() {
@@ -631,5 +638,4 @@ public abstract class FulltextDocumentMa
         indexNodeName(doc, value);
     }
 
-
 }


Reply via email to