Author: stefanegli
Date: Mon Nov 28 10:57:06 2016
New Revision: 1771708

URL: http://svn.apache.org/viewvc?rev=1771708&view=rev
Log:
OAK-5169 : unprecise exclude filtering introduced: this reduces large sets of 
exclude paths to a smaller and thus unprecise set of parent paths that, if hit, 
result in a potentially false negative (which is fine, just a minor performance 
hit)

Modified:
    
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImpl.java
    
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImplTest.java

Modified: 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImpl.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImpl.java?rev=1771708&r1=1771707&r2=1771708&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImpl.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/main/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImpl.java
 Mon Nov 28 10:57:06 2016
@@ -29,6 +29,7 @@ import java.util.regex.Pattern;
 import javax.annotation.Nonnull;
 import javax.annotation.Nullable;
 
+import org.apache.jackrabbit.oak.commons.PathUtils;
 import org.apache.jackrabbit.oak.plugins.observation.ChangeSet;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -37,9 +38,13 @@ public class ChangeSetFilterImpl impleme
 
     private static final Logger LOG = 
LoggerFactory.getLogger(ChangeSetFilterImpl.class);
     
+    private static final int MAX_EXCLUDED_PATHS = 11;
+    private static final int MAX_EXCLUDE_PATH_CUTOFF_LEVEL = 6;
+    
     private final Set<String> rootIncludePaths;
     private final Set<Pattern> includePathPatterns;
     private final Set<Pattern> excludePathPatterns;
+    private final Set<Pattern> unpreciseExcludePathPatterns;
     private final Set<String> parentNodeNames;
     private final Set<String> parentNodeTypes;
     private final Set<String> propertyNames;
@@ -51,14 +56,17 @@ public class ChangeSetFilterImpl impleme
                 parentNodeTypes+", propertyNames="+propertyNames+"]";
     }
 
-    public ChangeSetFilterImpl(@Nonnull Set<String> includedParentPaths, 
boolean isDeep, Set<String> excludedParentPaths,
+    public ChangeSetFilterImpl(@Nonnull Set<String> includedParentPaths, 
boolean isDeep,
+            @Nullable Set<String> additionalIncludedParentPaths, Set<String> 
excludedParentPaths,
             Set<String> parentNodeNames, Set<String> parentNodeTypes, 
Set<String> propertyNames) {
-        this(includedParentPaths, isDeep, null, excludedParentPaths, 
parentNodeNames, parentNodeTypes, propertyNames);
+        this(includedParentPaths, isDeep, additionalIncludedParentPaths, 
excludedParentPaths, parentNodeNames, parentNodeTypes, propertyNames,
+                MAX_EXCLUDED_PATHS);
     }
 
     public ChangeSetFilterImpl(@Nonnull Set<String> includedParentPaths, 
boolean isDeep,
             @Nullable Set<String> additionalIncludedParentPaths, Set<String> 
excludedParentPaths,
-            Set<String> parentNodeNames, Set<String> parentNodeTypes, 
Set<String> propertyNames) {
+            Set<String> parentNodeNames, Set<String> parentNodeTypes, 
Set<String> propertyNames,
+            int maxExcludedPaths) {
         this.rootIncludePaths = new HashSet<String>();
         this.includePathPatterns = new HashSet<Pattern>();
         for (String aRawIncludePath : includedParentPaths) {
@@ -78,15 +86,67 @@ public class ChangeSetFilterImpl impleme
                 this.includePathPatterns.add(asPattern(path));
             }
         }
+        // OAK-5169:
+        // excludedParentPaths could in theory be a large list, in which case
+        // the excludes() algorithm becomes non-performing. Reason is, that it
+        // iterates through the changeSet and then through the excludePaths.
+        // which means it becomes an O(n*m) operation.
+        // This should be avoided and one way to avoid this is to make an
+        // unprecise exclude filtering, where a smaller number of parent
+        // exclude paths is determined - and if the change is within this
+        // unprecise set, then we have to include it (with the risk of
+        // false negative) - but if the change is outside of this unprecise
+        // set, then we are certain that we are not excluding it.
+        // one way this unprecise filter can be implemented is by 
+        // starting off with eg 6 levels deep paths, and check if that brings
+        // down the number far enough (to eg 11), if it's still too high,
+        // cut off exclude paths at level 5 and repeat until the figure ends
+        // up under 11.
         this.excludePathPatterns = new HashSet<Pattern>();
-        for (String aRawExcludePath : excludedParentPaths) {
-            this.excludePathPatterns.add(asPattern(concat(aRawExcludePath, 
"**")));
+        this.unpreciseExcludePathPatterns = new HashSet<Pattern>();
+        if (excludedParentPaths.size() < maxExcludedPaths) {
+            for (String aRawExcludePath : excludedParentPaths) {
+                this.excludePathPatterns.add(asPattern(concat(aRawExcludePath, 
"**")));
+            }
+        } else {
+            final Set<String> unprecisePaths = 
unprecisePaths(excludedParentPaths);
+            for (String anUnprecisePath : unprecisePaths) {
+                
this.unpreciseExcludePathPatterns.add(asPattern(concat(anUnprecisePath, "**")));
+            }
         }
         this.propertyNames = propertyNames == null ? null : new 
HashSet<String>(propertyNames);
         this.parentNodeTypes = parentNodeTypes == null ? null : new 
HashSet<String>(parentNodeTypes);
         this.parentNodeNames = parentNodeNames == null ? null : new 
HashSet<String>(parentNodeNames);
     }
     
+    private Set<String> unprecisePaths(Set<String> paths) {
+        int level = MAX_EXCLUDE_PATH_CUTOFF_LEVEL;
+        while(level > 1) {
+            Set<String> unprecise = unprecisePaths(paths, level);
+            if (unprecise.size() < MAX_EXCLUDED_PATHS) {
+                return unprecise;
+            }
+            level--;
+        }
+        // worst case: we even have too many top-level paths, so 
+        // the only way out here is by returning a set containing only "/"
+        HashSet<String> result = new HashSet<String>();
+        result.add("/");
+        return result;
+    }
+
+    private Set<String> unprecisePaths(Set<String> paths, int level) {
+        Set<String> result = new HashSet<String>();
+        for (String path : paths) {
+            String unprecise = path;
+            while(PathUtils.getDepth(unprecise) > level) {
+                unprecise = PathUtils.getParentPath(unprecise);
+            }
+            result.add(unprecise);
+        }
+        return result;
+    }
+
     /** for testing only **/
     public Set<String> getRootIncludePaths() {
         return rootIncludePaths;
@@ -118,6 +178,22 @@ public class ChangeSetFilterImpl impleme
         }
         final Set<String> parentPaths = new 
HashSet<String>(changeSet.getParentPaths());
 
+        // first go through the unprecise excludes. if that has any hit,
+        // we have to let it pass as include
+        boolean unpreciseExclude = false;
+        if (this.unpreciseExcludePathPatterns.size() != 0) {
+            final Iterator<String> it = parentPaths.iterator();
+            while (it.hasNext()) {
+                final String aParentPath = it.next();
+                if (patternsMatch(this.unpreciseExcludePathPatterns, 
aParentPath)) {
+                    // if there is an unprecise match we keep track of that 
fact
+                    // for later in this method
+                    unpreciseExclude = true;
+                    break;
+                }
+            }
+        }
+        
         // first go through excludes to remove those that are explicitly
         // excluded
         if (this.excludePathPatterns.size() != 0) {
@@ -155,6 +231,10 @@ public class ChangeSetFilterImpl impleme
         if (!included) {
             // well then we can definitely say that this commit is excluded
             return true;
+        } else if (unpreciseExclude) {
+            // then it might have been excluded but we are not sure
+            // in which case we return false (as that's safe always)
+            return false;
         }
 
         if (this.propertyNames != null && this.propertyNames.size() != 0) {

Modified: 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImplTest.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImplTest.java?rev=1771708&r1=1771707&r2=1771708&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImplTest.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-core/src/test/java/org/apache/jackrabbit/oak/plugins/observation/filter/ChangeSetFilterImplTest.java
 Mon Nov 28 10:57:06 2016
@@ -113,7 +113,7 @@ public class ChangeSetFilterImplTest {
 
     @Test
     public void testIsDeepFalse() throws Exception {
-        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), false, 
s("/excluded"), s(), s(), s());
+        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), false, 
null, s("/excluded"), s(), s(), s());
         
         assertTrue(prefilter.excludes(newChangeSet(5, s("/child1", "/child2"), 
s("child1", "child2"), s(), s())));
         assertFalse(prefilter.excludes(newChangeSet(5, s("/", "/child2"), 
s("child2"), s(), s())));
@@ -121,16 +121,16 @@ public class ChangeSetFilterImplTest {
 
     @Test
     public void testParentPathsIncludeExclude() throws Exception {
-        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
s("/excluded"), s(), s(), s());
+        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
null, s("/excluded"), s(), s(), s());
         assertFalse(prefilter.excludes(newChangeSet(5, s("/a", "/b"), s("a", 
"b"), s(), s())));
         assertTrue(prefilter.excludes(newChangeSet(5, s("/excluded/foo", 
"/excluded/bar"), s("foo", "bar"), s(), s())));
         
-        prefilter = new ChangeSetFilterImpl(s("/included"), true, 
s("/excluded"), s(), s(), s());
+        prefilter = new ChangeSetFilterImpl(s("/included"), true, null, 
s("/excluded"), s(), s(), s());
         assertTrue(prefilter.excludes(newChangeSet(5, s("/a", "/b"), s(), s(), 
s())));
         assertFalse(prefilter.excludes(newChangeSet(5, s("/included/a", 
"/included/b"), s(), s(), s())));
         assertTrue(prefilter.excludes(newChangeSet(5, s("/excluded/foo", 
"/excluded/bar"), s(), s(), s())));
 
-        prefilter = new ChangeSetFilterImpl(s("/foo/**/included/**"), true 
/*ignored for globs */, s("/excluded"), s(), s(), s());
+        prefilter = new ChangeSetFilterImpl(s("/foo/**/included/**"), true 
/*ignored for globs */, null, s("/excluded"), s(), s(), s());
         assertTrue(prefilter.excludes(newChangeSet(5, s("/a", "/b"), s(), s(), 
s())));
         assertTrue(prefilter.excludes(newChangeSet(5, s("/included/a", 
"/included/b"), s(), s(), s())));
         assertFalse(prefilter.excludes(newChangeSet(5, s("/foo/included/a"), 
s(), s(), s())));
@@ -138,18 +138,18 @@ public class ChangeSetFilterImplTest {
         assertFalse(prefilter.excludes(newChangeSet(5, 
s("/foo/bar/included/a", "/included/b"), s(), s(), s())));
         assertTrue(prefilter.excludes(newChangeSet(5, s("/excluded/foo", 
"/excluded/bar"), s(), s(), s())));
 
-        prefilter = new ChangeSetFilterImpl(s("/main/**/included"), true, 
s("/main/excluded"), s(), s(), s());
+        prefilter = new ChangeSetFilterImpl(s("/main/**/included"), true, 
null, s("/main/excluded"), s(), s(), s());
         assertTrue(prefilter.excludes(newChangeSet(5, s("/main", "/main/foo"), 
s(), s(), s())));
         assertFalse(prefilter.excludes(newChangeSet(5, s("/main/included", 
"/main/excluded"), s(), s(), s())));
         assertTrue(prefilter.excludes(newChangeSet(5, 
s("/main/excluded/included", "/main/excluded"), s(), s(), s())));
 
-        prefilter = new ChangeSetFilterImpl(s("/main/included/**"), true, 
s("/main/excluded"), s(), s(), s());
+        prefilter = new ChangeSetFilterImpl(s("/main/included/**"), true, 
null, s("/main/excluded"), s(), s(), s());
         assertTrue(prefilter.excludes(newChangeSet(5, s("/main", "/main/foo"), 
s(), s(), s())));
         assertTrue(prefilter.excludes(newChangeSet(5, s("/main/excluded"), 
s(), s(), s())));
         assertFalse(prefilter.excludes(newChangeSet(5, s("/main/included"), 
s(), s(), s())));
         assertTrue(prefilter.excludes(newChangeSet(5, 
s("/main/excluded/included", "/main/excluded"), s(), s(), s())));
 
-        prefilter = new ChangeSetFilterImpl(s("/main/inc-*/**"), true, 
s("/main/excluded"), s(), s(), s());
+        prefilter = new ChangeSetFilterImpl(s("/main/inc-*/**"), true, null, 
s("/main/excluded"), s(), s(), s());
         assertTrue(prefilter.excludes(newChangeSet(5, s("/main", "/main/foo"), 
s(), s(), s())));
         assertTrue(prefilter.excludes(newChangeSet(5, s("/main/excluded"), 
s(), s(), s())));
         assertFalse(prefilter.excludes(newChangeSet(5, s("/main/inc-luded"), 
s(), s(), s())));
@@ -158,7 +158,7 @@ public class ChangeSetFilterImplTest {
     
     @Test
     public void testParentNodeNames() throws Exception {
-        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
s(), s("foo", "bar"), s(), s());
+        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
null, s(), s("foo", "bar"), s(), s());
         assertFalse(prefilter.excludes(newChangeSet(5, s("/a/foo", "/b"), 
s("foo", "b"), s(), s())));
         assertTrue(prefilter.excludes(newChangeSet(5, s("/a/zoo", "/b"), 
s("zoo", "b"), s(), s())));
         assertFalse(prefilter.excludes(newChangeSet(5, s("/a/zoo", "/bar"), 
s("zoo", "bar"), s(), s())));
@@ -166,14 +166,14 @@ public class ChangeSetFilterImplTest {
 
     @Test
     public void testParentNodeTypes() throws Exception {
-        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
s(), s(), s("nt:folder"), s());
+        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
null, s(), s(), s("nt:folder"), s());
         assertTrue(prefilter.excludes(newChangeSet(5, s("/a"), s("a"), 
s("nt:unstructured"), s())));
         assertFalse(prefilter.excludes(newChangeSet(5, s("/a"), s("a"), 
s("nt:folder"), s())));
     }
 
     @Test
     public void testPropertyNames() throws Exception {
-        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
s(), s(), s(), s("jcr:data"));
+        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
null, s(), s(), s(), s("jcr:data"));
         assertTrue(prefilter.excludes(newChangeSet(5, s("/a"), s("a"), s(), 
s("myProperty"))));
         assertFalse(prefilter.excludes(newChangeSet(5, s("/a"), s("a"), s(), 
s("jcr:data"))));
     }
@@ -202,7 +202,7 @@ public class ChangeSetFilterImplTest {
     @Test
     public void testIncludeOnAllNodeTypeOverflow() throws Exception {
         ChangeSetBuilder builder = sampleBuilder();
-        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
s("/excluded"), s("foo", "bars"), s("nt:file"), s());
+        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
null, s("/excluded"), s("foo", "bars"), s("nt:file"), s());
         assertTrue(prefilter.excludes(builder.build()));
         overflowAllNodeTypes(builder);
         assertFalse(prefilter.excludes(builder.build()));
@@ -211,7 +211,7 @@ public class ChangeSetFilterImplTest {
     @Test
     public void testIncludeOnParentNodeNameOverflow() throws Exception {
         ChangeSetBuilder builder = sampleBuilder();
-        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
s("/excluded"), s("foo", "bars"), s("nt:file"), s());
+        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
null, s("/excluded"), s("foo", "bars"), s("nt:file"), s());
         assertTrue(prefilter.excludes(builder.build()));
         overflowParentNodeNames(builder);
         assertFalse(prefilter.excludes(builder.build()));
@@ -220,7 +220,7 @@ public class ChangeSetFilterImplTest {
     @Test
     public void testIncludeOnPropertyNamesOverflow() throws Exception {
         ChangeSetBuilder builder = sampleBuilder();
-        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
s("/excluded"), s("foo", "bars"), s("nt:file"), s());
+        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
null, s("/excluded"), s("foo", "bars"), s("nt:file"), s());
         assertTrue(prefilter.excludes(builder.build()));
         overflowPropertyNames(builder);
         assertFalse(prefilter.excludes(builder.build()));
@@ -229,7 +229,7 @@ public class ChangeSetFilterImplTest {
     @Test
     public void testIncludeOnParentNodeTypeOverflow() throws Exception {
         ChangeSetBuilder builder = sampleBuilder();
-        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
s("/excluded"), s("foo", "bars"), s("nt:file"), s());
+        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
null, s("/excluded"), s("foo", "bars"), s("nt:file"), s());
         assertTrue(prefilter.excludes(builder.build()));
         overflowParentNodeTypes(builder);
         assertFalse(prefilter.excludes(builder.build()));
@@ -238,9 +238,49 @@ public class ChangeSetFilterImplTest {
     @Test
     public void testIncludeOnParentPathsOverflow() throws Exception {
         ChangeSetBuilder builder = sampleBuilder();
-        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
s("/excluded"), s("foo", "bars"), s("nt:file"), s());
+        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
null, s("/excluded"), s("foo", "bars"), s("nt:file"), s());
         assertTrue(prefilter.excludes(builder.build()));
         overflowParentPaths(builder);
         assertFalse(prefilter.excludes(builder.build()));
     }
+    
+    @Test
+    public void testUnpreciseInclude() throws Exception {
+        ChangeSetBuilder builder = newBuilder(5, 5);
+        builder.addNodeType("nt:file");
+        builder.addParentNodeType("nt:file");
+        builder.addParentPath("/a/b/c/e");
+        builder.addParentNodeName("d");
+        builder.addPropertyName("e");
+        builder.addPropertyName("f");
+        Set<String> largeExcludeSet = new HashSet<String>();
+        for(int a=0;a<3;a++) {
+            for(int b=0;b<3;b++) {
+                for(int c=0;c<10;c++) {
+                    String s = "/a";
+                    if (a > 0) {
+                        s += a;
+                    }
+                    s += "/b";
+                    if (b > 0) {
+                        s += b;
+                    }
+                    s += "/c";
+                    if (c > 0) {
+                        s += c;
+                    }
+                    s += "/d";
+                    largeExcludeSet.add(s);
+                }
+            }
+        }
+        ChangeSetFilterImpl prefilter = new ChangeSetFilterImpl(s("/"), true, 
null, largeExcludeSet, s("foo", "bars"), s("nt:file"), s(), 999);
+        assertTrue(prefilter.excludes(builder.build()));
+
+        prefilter = new ChangeSetFilterImpl(s("/"), true, null, 
largeExcludeSet, s("foo", "bars"), s("nt:file"), s(), 15);
+        assertFalse(prefilter.excludes(builder.build()));
+
+        prefilter = new ChangeSetFilterImpl(s("/"), true, null, 
largeExcludeSet, s("foo", "bars"), s("nt:file"), s(), 1);
+        assertFalse(prefilter.excludes(builder.build()));
+    }
 }


Reply via email to