Author: thomasm
Date: Wed Jun 10 11:13:04 2015
New Revision: 1684642
URL: http://svn.apache.org/r1684642
Log:
OAK-2926 Fast result size estimate
Modified:
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/aggregate/AggregationCursor.java
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/UnionQueryImpl.java
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/Cursors.java
jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/ResultSizeTest.java
Modified:
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/aggregate/AggregationCursor.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/aggregate/AggregationCursor.java?rev=1684642&r1=1684641&r2=1684642&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/aggregate/AggregationCursor.java
(original)
+++
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/index/aggregate/AggregationCursor.java
Wed Jun 10 11:13:04 2015
@@ -22,6 +22,7 @@ import java.util.NoSuchElementException;
import java.util.Set;
import org.apache.jackrabbit.oak.api.PropertyValue;
+import org.apache.jackrabbit.oak.api.Result.SizePrecision;
import org.apache.jackrabbit.oak.spi.query.Cursor;
import org.apache.jackrabbit.oak.spi.query.IndexRow;
import org.apache.jackrabbit.oak.spi.query.Cursors.AbstractCursor;
@@ -125,4 +126,10 @@ class AggregationCursor extends Abstract
};
}
+
+ @Override
+ public long getSize(SizePrecision precision, long max) {
+ return cursor.getSize(precision, max);
+ }
+
}
\ No newline at end of file
Modified:
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/UnionQueryImpl.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/UnionQueryImpl.java?rev=1684642&r1=1684641&r2=1684642&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/UnionQueryImpl.java
(original)
+++
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/query/UnionQueryImpl.java
Wed Jun 10 11:13:04 2015
@@ -147,11 +147,20 @@ public class UnionQueryImpl implements Q
@Override
public long getSize(SizePrecision precision, long max) {
- // Note: this does not respect the "unionAll == false" case
+ // Note: for "unionAll == false", overlapping entries are counted twice
// (this can result in a larger reported size, but it is not a
security problem)
- return QueryImpl.saturatedAdd(
- left.getSize(precision, max),
- right.getSize(precision, max));
+
+ // ensure the queries are both executed, otherwise the cursor is not
set,
+ // and so the size would be -1
+ left.executeQuery().getRows().iterator().hasNext();
+ right.executeQuery().getRows().iterator().hasNext();
+ long a = left.getSize(precision, max);
+ long b = right.getSize(precision, max);
+ if (a < 0 || b < 0) {
+ return -1;
+ }
+ long total = QueryImpl.saturatedAdd(a, b);
+ return Math.min(limit, total);
}
@Override
Modified:
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/Cursors.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/Cursors.java?rev=1684642&r1=1684641&r2=1684642&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/Cursors.java
(original)
+++
jackrabbit/oak/branches/1.0/oak-core/src/main/java/org/apache/jackrabbit/oak/spi/query/Cursors.java
Wed Jun 10 11:13:04 2015
@@ -29,6 +29,7 @@ import org.apache.jackrabbit.oak.commons
import org.apache.jackrabbit.oak.plugins.memory.MemoryChildNodeEntry;
import org.apache.jackrabbit.oak.query.FilterIterators;
import org.apache.jackrabbit.oak.query.QueryEngineSettings;
+import org.apache.jackrabbit.oak.query.QueryImpl;
import org.apache.jackrabbit.oak.query.index.IndexRowImpl;
import org.apache.jackrabbit.oak.spi.query.Filter.PathRestriction;
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
@@ -425,6 +426,17 @@ public class Cursors {
FilterIterators.checkMemoryLimit(seen.size(), settings);
}
+ @Override
+ public long getSize(SizePrecision precision, long max) {
+ // this is the worst case
+ long a = first.getSize(precision, max);
+ long b = second.getSize(precision, max);
+ if (a < 0 || b < 0) {
+ return -1;
+ }
+ return QueryImpl.saturatedAdd(a, b);
+ }
+
}
/**
@@ -439,16 +451,21 @@ public class Cursors {
private boolean closed;
private Cursor currentCursor;
+ private int cursorListIndex;
private IndexRow current;
ConcatCursor(List<Cursor> cursors, QueryEngineSettings settings) {
this.cursors = cursors;
this.settings = settings;
- if (cursors.size() == 0) {
+ nextCursor();
+ }
+
+ private void nextCursor() {
+ if (cursorListIndex >= cursors.size()) {
init = true;
closed = true;
} else {
- this.currentCursor = cursors.remove(0);
+ currentCursor = cursors.get(cursorListIndex++);
}
}
@@ -478,11 +495,9 @@ public class Cursors {
private void fetchNext() {
while (true) {
while (!currentCursor.hasNext()) {
- if (cursors.isEmpty()) {
- closed = true;
+ nextCursor();
+ if (closed) {
return;
- } else {
- currentCursor = cursors.remove(0);
}
}
IndexRow c = currentCursor.next();
@@ -500,6 +515,20 @@ public class Cursors {
seen.add(path);
FilterIterators.checkMemoryLimit(seen.size(), settings);
}
+
+ @Override
+ public long getSize(SizePrecision precision, long max) {
+ // this is the worst case (duplicate entries are counted twice)
+ long total = 0;
+ for (Cursor c : cursors) {
+ long t = c.getSize(precision, max);
+ if (t < 0) {
+ return -1;
+ }
+ total = QueryImpl.saturatedAdd(total, t);
+ }
+ return total;
+ }
}
}
Modified:
jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1684642&r1=1684641&r2=1684642&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
(original)
+++
jackrabbit/oak/branches/1.0/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
Wed Jun 10 11:13:04 2015
@@ -77,6 +77,7 @@ import org.apache.lucene.search.ScoreDoc
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TotalHitCountCollector;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.suggest.Lookup;
@@ -444,7 +445,32 @@ public class LuceneIndex implements Adva
this.lastSearchIndexerVersion = currentVersion;
}
};
- return new LucenePathCursor(itr, settings);
+ SizeEstimator sizeEstimator = new SizeEstimator() {
+ @Override
+ public long getSize() {
+ IndexNode indexNode = tracker.acquireIndexNode((String)
plan.getAttribute(ATTR_INDEX_PATH));
+ checkState(indexNode != null);
+ try {
+ IndexSearcher searcher = indexNode.getSearcher();
+ LuceneRequestFacade luceneRequestFacade =
getLuceneRequest(filter, searcher.getIndexReader(),
+ nonFullTextConstraints, indexNode.getDefinition());
+ if (luceneRequestFacade.getLuceneRequest() instanceof
Query) {
+ Query query = (Query)
luceneRequestFacade.getLuceneRequest();
+ LOG.debug("estimate size for query " + query);
+ TotalHitCountCollector collector = new
TotalHitCountCollector();
+ searcher.search(query, collector);
+ return collector.getTotalHits();
+ }
+ LOG.debug("estimate size: not a Query: " +
luceneRequestFacade.getLuceneRequest());
+ } catch (IOException e) {
+ LOG.warn("query via {} failed.", LuceneIndex.this, e);
+ } finally {
+ indexNode.release();
+ }
+ return -1;
+ }
+ };
+ return new LucenePathCursor(itr, settings, sizeEstimator);
}
protected static IndexPlan.Builder planBuilder(Filter filter){
@@ -1067,8 +1093,11 @@ public class LuceneIndex implements Adva
private final Cursor pathCursor;
LuceneResultRow currentRow;
+ private final SizeEstimator sizeEstimator;
+ private long estimatedSize;
- LucenePathCursor(final Iterator<LuceneResultRow> it,
QueryEngineSettings settings) {
+ LucenePathCursor(final Iterator<LuceneResultRow> it,
QueryEngineSettings settings, SizeEstimator sizeEstimator) {
+ this.sizeEstimator = sizeEstimator;
Iterator<String> pathIterator = new Iterator<String>() {
@Override
@@ -1129,8 +1158,10 @@ public class LuceneIndex implements Adva
@Override
public long getSize(SizePrecision precision, long max) {
- // not yet supported
- return -1;
+ if (estimatedSize != 0) {
+ return estimatedSize;
+ }
+ return estimatedSize = sizeEstimator.getSize();
}
}
Modified:
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/ResultSizeTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/ResultSizeTest.java?rev=1684642&r1=1684641&r2=1684642&view=diff
==============================================================================
---
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/ResultSizeTest.java
(original)
+++
jackrabbit/oak/branches/1.0/oak-lucene/src/test/java/org/apache/jackrabbit/oak/jcr/query/ResultSizeTest.java
Wed Jun 10 11:13:04 2015
@@ -19,6 +19,8 @@
package org.apache.jackrabbit.oak.jcr.query;
import javax.jcr.Node;
+import javax.jcr.NodeIterator;
+import javax.jcr.RepositoryException;
import javax.jcr.Session;
import javax.jcr.query.Query;
import javax.jcr.query.QueryManager;
@@ -28,34 +30,90 @@ import org.apache.jackrabbit.core.query.
public class ResultSizeTest extends AbstractQueryTest {
public void testResultSize() throws Exception {
+ doTestResultSize(false);
+ }
+
+ public void testResultSizeLuceneV1() throws Exception {
+ Session session = superuser;
+ Node index = session.getRootNode().getNode("oak:index");
+ Node luceneGlobal = index.getNode("luceneGlobal");
+ luceneGlobal.setProperty("type", "disabled");
+ Node luceneV1 = index.addNode("luceneV1", "oak:QueryIndexDefinition");
+ luceneV1.setProperty("type", "lucene");
+ session.save();
+
+ doTestResultSize(true);
+
+ luceneV1.remove();
+ luceneGlobal.setProperty("type", "lucene");
+ session.save();
+ }
+
+ private void doTestResultSize(boolean aggregateAtQueryTime) throws
RepositoryException {
+ createData();
+ int expectedForUnion = 400;
+ int expectedForTwoConditions = aggregateAtQueryTime ? 400 : 200;
+ doTestResultSize(false, expectedForTwoConditions);
+ doTestResultSize(true, expectedForUnion);
+ }
+
+ private void createData() throws RepositoryException {
Session session = superuser;
- QueryManager qm = session.getWorkspace().getQueryManager();
for (int i = 0; i < 200; i++) {
Node n = testRootNode.addNode("node" + i);
n.setProperty("text", "Hello World");
}
session.save();
+ }
+
+ private void doTestResultSize(boolean union, int expected) throws
RepositoryException {
+ Session session = superuser;
+ QueryManager qm = session.getWorkspace().getQueryManager();
- String xpath = "/jcr:root//*[jcr:contains(@text, 'Hello World')]";
+ String xpath;
+ if (union) {
+ xpath = "/jcr:root//*[jcr:contains(@text, 'Hello') or
jcr:contains(@text, 'World')]";
+ } else {
+ xpath = "/jcr:root//*[jcr:contains(@text, 'Hello World')]";
+ }
Query q;
long result;
+ NodeIterator it;
+ StringBuilder buff;
// fast (insecure) case
System.setProperty("oak.fastQuerySize", "true");
q = qm.createQuery(xpath, "xpath");
- result = q.execute().getRows().getSize();
- assertTrue("size: " + result, result > 150 && result < 250);
-
+ it = q.execute().getNodes();
+ result = it.getSize();
+ assertTrue("size: " + result + " expected around " + expected,
+ result > expected - 50 &&
+ result < expected + 50);
+ buff = new StringBuilder();
+ while (it.hasNext()) {
+ Node n = it.nextNode();
+ buff.append(n.getPath()).append('\n');
+ }
+ String fastSizeResult = buff.toString();
q = qm.createQuery(xpath, "xpath");
q.setLimit(90);
- assertEquals(90, q.execute().getRows().getSize());
+ it = q.execute().getNodes();
+ assertEquals(90, it.getSize());
// default (secure) case
System.clearProperty("oak.fastQuerySize");
q = qm.createQuery(xpath, "xpath");
- result = q.execute().getRows().getSize();
- assertEquals(-1, q.execute().getRows().getSize());
+ it = q.execute().getNodes();
+ result = it.getSize();
+ assertEquals(-1, result);
+ buff = new StringBuilder();
+ while (it.hasNext()) {
+ Node n = it.nextNode();
+ buff.append(n.getPath()).append('\n');
+ }
+ String regularResult = buff.toString();
+ assertEquals(regularResult, fastSizeResult);
}