Author: thomasm
Date: Wed Jun 10 11:13:04 2015
New Revision: 1684642

URL: http://svn.apache.org/r1684642
Log:
OAK-2926 Fast result size estimate

Modified:
    
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/aggregate/AggregationCursor.java
    
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/UnionQueryImpl.java
    
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/Cursors.java
    
jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
    
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/ResultSizeTest.java

Modified: 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/aggregate/AggregationCursor.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/aggregate/AggregationCursor.java?rev=1684642&r1=1684641&r2=1684642&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/aggregate/AggregationCursor.java
 (original)
+++ 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/aggregate/AggregationCursor.java
 Wed Jun 10 11:13:04 2015
@@ -22,6 +22,7 @@ import java.util.NoSuchElementException;
 import java.util.Set;
 
 import org.apache.jackrabbit.oak.api.PropertyValue;
+import org.apache.jackrabbit.oak.api.Result.SizePrecision;
 import org.apache.jackrabbit.oak.spi.query.Cursor;
 import org.apache.jackrabbit.oak.spi.query.IndexRow;
 import org.apache.jackrabbit.oak.spi.query.Cursors.AbstractCursor;
@@ -125,4 +126,10 @@ class AggregationCursor extends Abstract
             
         };
     }
+    
+    @Override
+    public long getSize(SizePrecision precision, long max) {
+        return cursor.getSize(precision, max);
+    }
+    
 }
\ No newline at end of file

Modified: 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/UnionQueryImpl.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/UnionQueryImpl.java?rev=1684642&r1=1684641&r2=1684642&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/UnionQueryImpl.java
 (original)
+++ 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/UnionQueryImpl.java
 Wed Jun 10 11:13:04 2015
@@ -147,11 +147,20 @@ public class UnionQueryImpl implements Q
     
     @Override
     public long getSize(SizePrecision precision, long max) {
-        // Note: this does not respect the "unionAll == false" case
+        // Note: for "unionAll == false", overlapping entries are counted twice
         // (this can result in a larger reported size, but it is not a 
security problem)
-        return QueryImpl.saturatedAdd(
-                left.getSize(precision, max),
-                right.getSize(precision, max));
+        
+        // ensure the queries are both executed, otherwise the cursor is not 
set,
+        // and so the size would be -1
+        left.executeQuery().getRows().iterator().hasNext();
+        right.executeQuery().getRows().iterator().hasNext();
+        long a = left.getSize(precision, max);
+        long b = right.getSize(precision, max);
+        if (a < 0 || b < 0) {
+            return -1;
+        }
+        long total = QueryImpl.saturatedAdd(a, b);
+        return Math.min(limit, total);
     }
     
     @Override

Modified: 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/Cursors.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/Cursors.java?rev=1684642&r1=1684641&r2=1684642&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/Cursors.java
 (original)
+++ 
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/Cursors.java
 Wed Jun 10 11:13:04 2015
@@ -29,6 +29,7 @@ import org.apache.jackrabbit.oak.commons
 import org.apache.jackrabbit.oak.plugins.memory.MemoryChildNodeEntry;
 import org.apache.jackrabbit.oak.query.FilterIterators;
 import org.apache.jackrabbit.oak.query.QueryEngineSettings;
+import org.apache.jackrabbit.oak.query.QueryImpl;
 import org.apache.jackrabbit.oak.query.index.IndexRowImpl;
 import org.apache.jackrabbit.oak.spi.query.Filter.PathRestriction;
 import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
@@ -425,6 +426,17 @@ public class Cursors {
             FilterIterators.checkMemoryLimit(seen.size(), settings);
         }
         
+        @Override
+        public long getSize(SizePrecision precision, long max) {
+            // this is the worst case
+            long a = first.getSize(precision, max);
+            long b = second.getSize(precision, max);
+            if (a < 0 || b < 0) {
+                return -1;
+            }
+            return QueryImpl.saturatedAdd(a, b);
+        }
+        
     }
 
     /**
@@ -439,16 +451,21 @@ public class Cursors {
         private boolean closed;
 
         private Cursor currentCursor;
+        private int cursorListIndex;
         private IndexRow current;
 
         ConcatCursor(List<Cursor> cursors, QueryEngineSettings settings) {
             this.cursors = cursors;
             this.settings = settings;
-            if (cursors.size() == 0) {
+            nextCursor();
+        }
+        
+        private void nextCursor() {
+            if (cursorListIndex >= cursors.size()) {
                 init = true;
                 closed = true;
             } else {
-                this.currentCursor = cursors.remove(0);
+                currentCursor = cursors.get(cursorListIndex++);
             }
         }
 
@@ -478,11 +495,9 @@ public class Cursors {
         private void fetchNext() {
             while (true) {
                 while (!currentCursor.hasNext()) {
-                    if (cursors.isEmpty()) {
-                        closed = true;
+                    nextCursor();
+                    if (closed) {
                         return;
-                    } else {
-                        currentCursor = cursors.remove(0);
                     }
                 }
                 IndexRow c = currentCursor.next();
@@ -500,6 +515,20 @@ public class Cursors {
             seen.add(path);
             FilterIterators.checkMemoryLimit(seen.size(), settings);
         }
+        
+        @Override
+        public long getSize(SizePrecision precision, long max) {
+            // this is the worst case (duplicate entries are counted twice)
+            long total = 0;
+            for (Cursor c : cursors) {
+                long t = c.getSize(precision, max);
+                if (t < 0) {
+                    return -1;
+                }
+                total = QueryImpl.saturatedAdd(total, t);
+            }
+            return total;
+        }
 
     }
 }

Modified: 
jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1684642&r1=1684641&r2=1684642&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
 (original)
+++ 
jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
 Wed Jun 10 11:13:04 2015
@@ -77,6 +77,7 @@ import org.apache.lucene.search.ScoreDoc
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TotalHitCountCollector;
 import org.apache.lucene.search.WildcardQuery;
 import org.apache.lucene.search.spell.SuggestWord;
 import org.apache.lucene.search.suggest.Lookup;
@@ -444,7 +445,32 @@ public class LuceneIndex implements Adva
                 this.lastSearchIndexerVersion = currentVersion;
             }
         };
-        return new LucenePathCursor(itr, settings);
+        SizeEstimator sizeEstimator = new SizeEstimator() {
+            @Override
+            public long getSize() {
+                IndexNode indexNode = tracker.acquireIndexNode((String) 
plan.getAttribute(ATTR_INDEX_PATH));
+                checkState(indexNode != null);                
+                try {
+                    IndexSearcher searcher = indexNode.getSearcher();
+                    LuceneRequestFacade luceneRequestFacade = 
getLuceneRequest(filter, searcher.getIndexReader(),
+                            nonFullTextConstraints, indexNode.getDefinition());
+                    if (luceneRequestFacade.getLuceneRequest() instanceof 
Query) {
+                        Query query = (Query) 
luceneRequestFacade.getLuceneRequest();
+                        LOG.debug("estimate size for query " + query);
+                        TotalHitCountCollector collector = new 
TotalHitCountCollector();
+                        searcher.search(query, collector);
+                        return collector.getTotalHits();
+                    }
+                    LOG.debug("estimate size: not a Query: " + 
luceneRequestFacade.getLuceneRequest());
+                } catch (IOException e) {
+                    LOG.warn("query via {} failed.", LuceneIndex.this, e);
+                } finally {
+                    indexNode.release();
+                }
+                return -1;
+            }
+        };
+        return new LucenePathCursor(itr, settings, sizeEstimator);
     }
 
     protected static IndexPlan.Builder planBuilder(Filter filter){
@@ -1067,8 +1093,11 @@ public class LuceneIndex implements Adva
 
         private final Cursor pathCursor;
         LuceneResultRow currentRow;
+        private final SizeEstimator sizeEstimator;
+        private long estimatedSize;
 
-        LucenePathCursor(final Iterator<LuceneResultRow> it, 
QueryEngineSettings settings) {
+        LucenePathCursor(final Iterator<LuceneResultRow> it, 
QueryEngineSettings settings, SizeEstimator sizeEstimator) {
+            this.sizeEstimator = sizeEstimator;
             Iterator<String> pathIterator = new Iterator<String>() {
 
                 @Override
@@ -1129,8 +1158,10 @@ public class LuceneIndex implements Adva
 
         @Override
         public long getSize(SizePrecision precision, long max) {
-            // not yet supported
-            return -1;
+            if (estimatedSize != 0) {
+                return estimatedSize;
+            }
+            return estimatedSize = sizeEstimator.getSize();
         }
     }
 

Modified: 
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/ResultSizeTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/ResultSizeTest.java?rev=1684642&r1=1684641&r2=1684642&view=diff
==============================================================================
--- 
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/ResultSizeTest.java
 (original)
+++ 
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/ResultSizeTest.java
 Wed Jun 10 11:13:04 2015
@@ -19,6 +19,8 @@
 package org.apache.jackrabbit.oak.jcr.query;
 
 import javax.jcr.Node;
+import javax.jcr.NodeIterator;
+import javax.jcr.RepositoryException;
 import javax.jcr.Session;
 import javax.jcr.query.Query;
 import javax.jcr.query.QueryManager;
@@ -28,34 +30,90 @@ import org.apache.jackrabbit.core.query.
 public class ResultSizeTest extends AbstractQueryTest {
 
     public void testResultSize() throws Exception {
+        doTestResultSize(false);
+    }
+    
+    public void testResultSizeLuceneV1() throws Exception {
+        Session session = superuser;
+        Node index = session.getRootNode().getNode("oak:index");
+        Node luceneGlobal = index.getNode("luceneGlobal");
+        luceneGlobal.setProperty("type", "disabled");
+        Node luceneV1 = index.addNode("luceneV1", "oak:QueryIndexDefinition");
+        luceneV1.setProperty("type", "lucene");
+        session.save();
+
+        doTestResultSize(true);
+        
+        luceneV1.remove();
+        luceneGlobal.setProperty("type", "lucene");
+        session.save();
+    }
+    
+    private void doTestResultSize(boolean aggregateAtQueryTime) throws 
RepositoryException {
+        createData();
+        int expectedForUnion = 400;
+        int expectedForTwoConditions = aggregateAtQueryTime ? 400 : 200;
+        doTestResultSize(false, expectedForTwoConditions);
+        doTestResultSize(true, expectedForUnion);
+    }
+    
+    private void createData() throws RepositoryException {
         Session session = superuser;
-        QueryManager qm = session.getWorkspace().getQueryManager();
         for (int i = 0; i < 200; i++) {
             Node n = testRootNode.addNode("node" + i);
             n.setProperty("text", "Hello World");
         }
         session.save();
+    }
+    
+    private void doTestResultSize(boolean union, int expected) throws 
RepositoryException {
+        Session session = superuser;
+        QueryManager qm = session.getWorkspace().getQueryManager();
 
-        String xpath = "/jcr:root//*[jcr:contains(@text, 'Hello World')]";
+        String xpath;
+        if (union) {
+            xpath = "/jcr:root//*[jcr:contains(@text, 'Hello') or 
jcr:contains(@text, 'World')]";
+        } else {
+            xpath = "/jcr:root//*[jcr:contains(@text, 'Hello World')]";
+        }
         
         Query q;
         long result;
+        NodeIterator it;
+        StringBuilder buff;
         
         // fast (insecure) case
         System.setProperty("oak.fastQuerySize", "true");
         q = qm.createQuery(xpath, "xpath");
-        result = q.execute().getRows().getSize();
-        assertTrue("size: " + result, result > 150 && result < 250);
-        
+        it = q.execute().getNodes();
+        result = it.getSize();
+        assertTrue("size: " + result + " expected around " + expected, 
+                result > expected - 50 && 
+                result < expected + 50);
+        buff = new StringBuilder();
+        while (it.hasNext()) {
+            Node n = it.nextNode();
+            buff.append(n.getPath()).append('\n');
+        }
+        String fastSizeResult = buff.toString();
         q = qm.createQuery(xpath, "xpath");
         q.setLimit(90);
-        assertEquals(90, q.execute().getRows().getSize());
+        it = q.execute().getNodes();
+        assertEquals(90, it.getSize());
         
         // default (secure) case
         System.clearProperty("oak.fastQuerySize");
         q = qm.createQuery(xpath, "xpath");
-        result = q.execute().getRows().getSize();
-        assertEquals(-1, q.execute().getRows().getSize());
+        it = q.execute().getNodes();
+        result = it.getSize();
+        assertEquals(-1, result);
+        buff = new StringBuilder();
+        while (it.hasNext()) {
+            Node n = it.nextNode();
+            buff.append(n.getPath()).append('\n');
+        }
+        String regularResult = buff.toString();
+        assertEquals(regularResult, fastSizeResult);
 
     }
     


Reply via email to