Author: stefanegli
Date: Mon Nov 28 10:57:06 2016
New Revision: 1771708
URL: http://svn.apache.org/viewvc?rev=1771708&view=rev
Log:
OAK-5169 : unprecise exclude filtering introduced: this reduces large sets of
exclude paths to a smaller and thus unprecise set of parent paths that, if hit,
result in a potentially false negative (which is fine, just a minor performance
hit)
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImpl.java
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImplTest.java
Modified:
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImpl.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImpl.java?rev=1771708&r1=1771707&r2=1771708&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImpl.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImpl.java
Mon Nov 28 10:57:06 2016
@@ -29,6 +29,7 @@ import java.util.regex.Pattern;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
+import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.plugins.observation.ChangeSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -37,9 +38,13 @@ public class ChangeSetFilterImpl impleme
private static final Logger LOG =
LoggerFactory.getLogger(ChangeSetFilterImpl.class);
+ private static final int MAX_EXCLUDED_PATHS = 11;
+ private static final int MAX_EXCLUDE_PATH_CUTOFF_LEVEL = 6;
+
private final Set<String> rootIncludePaths;
private final Set<Pattern> includePathPatterns;
private final Set<Pattern> excludePathPatterns;
+ private final Set<Pattern> unpreciseExcludePathPatterns;
private final Set<String> parentNodeNames;
private final Set<String> parentNodeTypes;
private final Set<String> propertyNames;
@@ -51,14 +56,17 @@ public class ChangeSetFilterImpl impleme
parentNodeTypes+", propertyNames="+propertyNames+"]";
}
- public ChangeSetFilterImpl(@Nonnull Set<String> includedParentPaths,
boolean isDeep, Set<String> excludedParentPaths,
+ public ChangeSetFilterImpl(@Nonnull Set<String> includedParentPaths,
boolean isDeep,
+ @Nullable Set<String> additionalIncludedParentPaths, Set<String>
excludedParentPaths,
Set<String> parentNodeNames, Set<String> parentNodeTypes,
Set<String> propertyNames) {
- this(includedParentPaths, isDeep, null, excludedParentPaths,
parentNodeNames, parentNodeTypes, propertyNames);
+ this(includedParentPaths, isDeep, additionalIncludedParentPaths,
excludedParentPaths, parentNodeNames, parentNodeTypes, propertyNames,
+ MAX_EXCLUDED_PATHS);
}
public ChangeSetFilterImpl(@Nonnull Set<String> includedParentPaths,
boolean isDeep,
@Nullable Set<String> additionalIncludedParentPaths, Set<String>
excludedParentPaths,
- Set<String> parentNodeNames, Set<String> parentNodeTypes,
Set<String> propertyNames) {
+ Set<String> parentNodeNames, Set<String> parentNodeTypes,
Set<String> propertyNames,
+ int maxExcludedPaths) {
this.rootIncludePaths = new HashSet<String>();
this.includePathPatterns = new HashSet<Pattern>();
for (String aRawIncludePath : includedParentPaths) {
@@ -78,15 +86,67 @@ public class ChangeSetFilterImpl impleme
this.includePathPatterns.add(asPattern(path));
}
}
+ // OAK-5169:
+ // excludedParentPaths could in theory be a large list, in which case
+ // the excludes() algorithm becomes non-performing. Reason is, that it
+ // iterates through the changeSet and then through the excludePaths.
+ // which means it becomes an O(n*m) operation.
+ // This should be avoided and one way to avoid this is to make an
+ // unprecise exclude filtering, where a smaller number of parent
+ // exclude paths is determined - and if the change is within this
+ // unprecise set, then we have to include it (with the risk of
+ // false negative) - but if the change is outside of this unprecise
+ // set, then we are certain that we are not excluding it.
+ // one way this unprecise filter can be implemented is by
+ // starting off with eg 6 levels deep paths, and check if that brings
+ // down the number far enough (to eg 11), if it's still too high,
+ // cut off exclude paths at level 5 and repeat until the figure ends
+ // up under 11.
this.excludePathPatterns = new HashSet<Pattern>();
- for (String aRawExcludePath : excludedParentPaths) {
- this.excludePathPatterns.add(asPattern(concat(aRawExcludePath,
"**")));
+ this.unpreciseExcludePathPatterns = new HashSet<Pattern>();
+ if (excludedParentPaths.size() < maxExcludedPaths) {
+ for (String aRawExcludePath : excludedParentPaths) {
+ this.excludePathPatterns.add(asPattern(concat(aRawExcludePath,
"**")));
+ }
+ } else {
+ final Set<String> unprecisePaths =
unprecisePaths(excludedParentPaths);
+ for (String anUnprecisePath : unprecisePaths) {
+
this.unpreciseExcludePathPatterns.add(asPattern(concat(anUnprecisePath, "**")));
+ }
}
this.propertyNames = propertyNames == null ? null : new
HashSet<String>(propertyNames);
this.parentNodeTypes = parentNodeTypes == null ? null : new
HashSet<String>(parentNodeTypes);
this.parentNodeNames = parentNodeNames == null ? null : new
HashSet<String>(parentNodeNames);
}
+ private Set<String> unprecisePaths(Set<String> paths) {
+ int level = MAX_EXCLUDE_PATH_CUTOFF_LEVEL;
+ while(level > 1) {
+ Set<String> unprecise = unprecisePaths(paths, level);
+ if (unprecise.size() < MAX_EXCLUDED_PATHS) {
+ return unprecise;
+ }
+ level--;
+ }
+ // worst case: we even have too many top-level paths, so
+ // the only way out here is by returning a set containing only "/"
+ HashSet<String> result = new HashSet<String>();
+ result.add("/");
+ return result;
+ }
+
+ private Set<String> unprecisePaths(Set<String> paths, int level) {
+ Set<String> result = new HashSet<String>();
+ for (String path : paths) {
+ String unprecise = path;
+ while(PathUtils.getDepth(unprecise) > level) {
+ unprecise = PathUtils.getParentPath(unprecise);
+ }
+ result.add(unprecise);
+ }
+ return result;
+ }
+
/** for testing only **/
public Set<String> getRootIncludePaths() {
return rootIncludePaths;
@@ -118,6 +178,22 @@ public class ChangeSetFilterImpl impleme
}
final Set<String> parentPaths = new
HashSet<String>(changeSet.getParentPaths());
+ // first go through the unprecise excludes. if that has any hit,
+ // we have to let it pass as include
+ boolean unpreciseExclude = false;
+ if (this.unpreciseExcludePathPatterns.size() != 0) {
+ final Iterator<String> it = parentPaths.iterator();
+ while (it.hasNext()) {
+ final String aParentPath = it.next();
+ if (patternsMatch(this.unpreciseExcludePathPatterns,
aParentPath)) {
+ // if there is an unprecise match we keep track of that
fact
+ // for later in this method
+ unpreciseExclude = true;
+ break;
+ }
+ }
+ }
+
// first go through excludes to remove those that are explicitly
// excluded
if (this.excludePathPatterns.size() != 0) {
@@ -155,6 +231,10 @@ public class ChangeSetFilterImpl impleme
if (!included) {
// well then we can definitely say that this commit is excluded
return true;
+ } else if (unpreciseExclude) {
+ // then it might have been excluded but we are not sure
+ // in which case we return false (as that's safe always)
+ return false;
}
if (this.propertyNames != null && this.propertyNames.size() != 0) {
Modified:
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImplTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImplTest.java?rev=1771708&r1=1771707&r2=1771708&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImplTest.java
(original)
+++
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImplTest.java
Mon Nov 28 10:57:06 2016
@@ -113,7 +113,7 @@ public class ChangeSetFilterImplTest {
@Test
public void testIsDeepFalse() throws Exception {
- ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), false,
s("/excluded"), s(), s(), s());
+ ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), false,
null, s("/excluded"), s(), s(), s());
assertTrue(prefilter.excludes(newChangeSet(5, s("/child1", "/child2"),
s("child1", "child2"), s(), s())));
assertFalse(prefilter.excludes(newChangeSet(5, s("/", "/child2"),
s("child2"), s(), s())));
@@ -121,16 +121,16 @@ public class ChangeSetFilterImplTest {
@Test
public void testParentPathsIncludeExclude() throws Exception {
- ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
s("/excluded"), s(), s(), s());
+ ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
null, s("/excluded"), s(), s(), s());
assertFalse(prefilter.excludes(newChangeSet(5, s("/a", "/b"), s("a",
"b"), s(), s())));
assertTrue(prefilter.excludes(newChangeSet(5, s("/excluded/foo",
"/excluded/bar"), s("foo", "bar"), s(), s())));
- prefilter = new ChangeSetFilterImpl(s("/included"), true,
s("/excluded"), s(), s(), s());
+ prefilter = new ChangeSetFilterImpl(s("/included"), true, null,
s("/excluded"), s(), s(), s());
assertTrue(prefilter.excludes(newChangeSet(5, s("/a", "/b"), s(), s(),
s())));
assertFalse(prefilter.excludes(newChangeSet(5, s("/included/a",
"/included/b"), s(), s(), s())));
assertTrue(prefilter.excludes(newChangeSet(5, s("/excluded/foo",
"/excluded/bar"), s(), s(), s())));
- prefilter = new ChangeSetFilterImpl(s("/foo/**/included/**"), true
/*ignored for globs */, s("/excluded"), s(), s(), s());
+ prefilter = new ChangeSetFilterImpl(s("/foo/**/included/**"), true
/*ignored for globs */, null, s("/excluded"), s(), s(), s());
assertTrue(prefilter.excludes(newChangeSet(5, s("/a", "/b"), s(), s(),
s())));
assertTrue(prefilter.excludes(newChangeSet(5, s("/included/a",
"/included/b"), s(), s(), s())));
assertFalse(prefilter.excludes(newChangeSet(5, s("/foo/included/a"),
s(), s(), s())));
@@ -138,18 +138,18 @@ public class ChangeSetFilterImplTest {
assertFalse(prefilter.excludes(newChangeSet(5,
s("/foo/bar/included/a", "/included/b"), s(), s(), s())));
assertTrue(prefilter.excludes(newChangeSet(5, s("/excluded/foo",
"/excluded/bar"), s(), s(), s())));
- prefilter = new ChangeSetFilterImpl(s("/main/**/included"), true,
s("/main/excluded"), s(), s(), s());
+ prefilter = new ChangeSetFilterImpl(s("/main/**/included"), true,
null, s("/main/excluded"), s(), s(), s());
assertTrue(prefilter.excludes(newChangeSet(5, s("/main", "/main/foo"),
s(), s(), s())));
assertFalse(prefilter.excludes(newChangeSet(5, s("/main/included",
"/main/excluded"), s(), s(), s())));
assertTrue(prefilter.excludes(newChangeSet(5,
s("/main/excluded/included", "/main/excluded"), s(), s(), s())));
- prefilter = new ChangeSetFilterImpl(s("/main/included/**"), true,
s("/main/excluded"), s(), s(), s());
+ prefilter = new ChangeSetFilterImpl(s("/main/included/**"), true,
null, s("/main/excluded"), s(), s(), s());
assertTrue(prefilter.excludes(newChangeSet(5, s("/main", "/main/foo"),
s(), s(), s())));
assertTrue(prefilter.excludes(newChangeSet(5, s("/main/excluded"),
s(), s(), s())));
assertFalse(prefilter.excludes(newChangeSet(5, s("/main/included"),
s(), s(), s())));
assertTrue(prefilter.excludes(newChangeSet(5,
s("/main/excluded/included", "/main/excluded"), s(), s(), s())));
- prefilter = new ChangeSetFilterImpl(s("/main/inc-*/**"), true,
s("/main/excluded"), s(), s(), s());
+ prefilter = new ChangeSetFilterImpl(s("/main/inc-*/**"), true, null,
s("/main/excluded"), s(), s(), s());
assertTrue(prefilter.excludes(newChangeSet(5, s("/main", "/main/foo"),
s(), s(), s())));
assertTrue(prefilter.excludes(newChangeSet(5, s("/main/excluded"),
s(), s(), s())));
assertFalse(prefilter.excludes(newChangeSet(5, s("/main/inc-luded"),
s(), s(), s())));
@@ -158,7 +158,7 @@ public class ChangeSetFilterImplTest {
@Test
public void testParentNodeNames() throws Exception {
- ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
s(), s("foo", "bar"), s(), s());
+ ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
null, s(), s("foo", "bar"), s(), s());
assertFalse(prefilter.excludes(newChangeSet(5, s("/a/foo", "/b"),
s("foo", "b"), s(), s())));
assertTrue(prefilter.excludes(newChangeSet(5, s("/a/zoo", "/b"),
s("zoo", "b"), s(), s())));
assertFalse(prefilter.excludes(newChangeSet(5, s("/a/zoo", "/bar"),
s("zoo", "bar"), s(), s())));
@@ -166,14 +166,14 @@ public class ChangeSetFilterImplTest {
@Test
public void testParentNodeTypes() throws Exception {
- ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
s(), s(), s("nt:folder"), s());
+ ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
null, s(), s(), s("nt:folder"), s());
assertTrue(prefilter.excludes(newChangeSet(5, s("/a"), s("a"),
s("nt:unstructured"), s())));
assertFalse(prefilter.excludes(newChangeSet(5, s("/a"), s("a"),
s("nt:folder"), s())));
}
@Test
public void testPropertyNames() throws Exception {
- ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
s(), s(), s(), s("jcr:data"));
+ ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
null, s(), s(), s(), s("jcr:data"));
assertTrue(prefilter.excludes(newChangeSet(5, s("/a"), s("a"), s(),
s("myProperty"))));
assertFalse(prefilter.excludes(newChangeSet(5, s("/a"), s("a"), s(),
s("jcr:data"))));
}
@@ -202,7 +202,7 @@ public class ChangeSetFilterImplTest {
@Test
public void testIncludeOnAllNodeTypeOverflow() throws Exception {
ChangeSetBuilder builder = sampleBuilder();
- ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
s("/excluded"), s("foo", "bars"), s("nt:file"), s());
+ ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
null, s("/excluded"), s("foo", "bars"), s("nt:file"), s());
assertTrue(prefilter.excludes(builder.build()));
overflowAllNodeTypes(builder);
assertFalse(prefilter.excludes(builder.build()));
@@ -211,7 +211,7 @@ public class ChangeSetFilterImplTest {
@Test
public void testIncludeOnParentNodeNameOverflow() throws Exception {
ChangeSetBuilder builder = sampleBuilder();
- ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
s("/excluded"), s("foo", "bars"), s("nt:file"), s());
+ ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
null, s("/excluded"), s("foo", "bars"), s("nt:file"), s());
assertTrue(prefilter.excludes(builder.build()));
overflowParentNodeNames(builder);
assertFalse(prefilter.excludes(builder.build()));
@@ -220,7 +220,7 @@ public class ChangeSetFilterImplTest {
@Test
public void testIncludeOnPropertyNamesOverflow() throws Exception {
ChangeSetBuilder builder = sampleBuilder();
- ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
s("/excluded"), s("foo", "bars"), s("nt:file"), s());
+ ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
null, s("/excluded"), s("foo", "bars"), s("nt:file"), s());
assertTrue(prefilter.excludes(builder.build()));
overflowPropertyNames(builder);
assertFalse(prefilter.excludes(builder.build()));
@@ -229,7 +229,7 @@ public class ChangeSetFilterImplTest {
@Test
public void testIncludeOnParentNodeTypeOverflow() throws Exception {
ChangeSetBuilder builder = sampleBuilder();
- ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
s("/excluded"), s("foo", "bars"), s("nt:file"), s());
+ ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
null, s("/excluded"), s("foo", "bars"), s("nt:file"), s());
assertTrue(prefilter.excludes(builder.build()));
overflowParentNodeTypes(builder);
assertFalse(prefilter.excludes(builder.build()));
@@ -238,9 +238,49 @@ public class ChangeSetFilterImplTest {
@Test
public void testIncludeOnParentPathsOverflow() throws Exception {
ChangeSetBuilder builder = sampleBuilder();
- ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
s("/excluded"), s("foo", "bars"), s("nt:file"), s());
+ ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
null, s("/excluded"), s("foo", "bars"), s("nt:file"), s());
assertTrue(prefilter.excludes(builder.build()));
overflowParentPaths(builder);
assertFalse(prefilter.excludes(builder.build()));
}
+
+ @Test
+ public void testUnpreciseInclude() throws Exception {
+ ChangeSetBuilder builder = newBuilder(5, 5);
+ builder.addNodeType("nt:file");
+ builder.addParentNodeType("nt:file");
+ builder.addParentPath("/a/b/c/e");
+ builder.addParentNodeName("d");
+ builder.addPropertyName("e");
+ builder.addPropertyName("f");
+ Set<String> largeExcludeSet = new HashSet<String>();
+ for(int a=0;a<3;a++) {
+ for(int b=0;b<3;b++) {
+ for(int c=0;c<10;c++) {
+ String s = "/a";
+ if (a > 0) {
+ s += a;
+ }
+ s += "/b";
+ if (b > 0) {
+ s += b;
+ }
+ s += "/c";
+ if (c > 0) {
+ s += c;
+ }
+ s += "/d";
+ largeExcludeSet.add(s);
+ }
+ }
+ }
+ ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true,
null, largeExcludeSet, s("foo", "bars"), s("nt:file"), s(), 999);
+ assertTrue(prefilter.excludes(builder.build()));
+
+ prefilter = new ChangeSetFilterImpl(s("/"), true, null,
largeExcludeSet, s("foo", "bars"), s("nt:file"), s(), 15);
+ assertFalse(prefilter.excludes(builder.build()));
+
+ prefilter = new ChangeSetFilterImpl(s("/"), true, null,
largeExcludeSet, s("foo", "bars"), s("nt:file"), s(), 1);
+ assertFalse(prefilter.excludes(builder.build()));
+ }
}