Title: [181917] trunk
Revision
181917
Author
[email protected]
Date
2015-03-24 16:24:36 -0700 (Tue, 24 Mar 2015)

Log Message

Make URL filter patterns matching consistent and add a simple canonicalization step
https://bugs.webkit.org/show_bug.cgi?id=142998

Patch by Benjamin Poulain <[email protected]> on 2015-03-24
Reviewed by Alex Christensen.

Source/WebCore:

This patch makes two changes to the url filter input:
-Make the matching "Search" by default, the pattern can now appear anywhere
 in the URL by default.
-Make the input a little less fragile: do not explode on valid input
 that is not formatted in a certain way.

To implement the search behavior, I simply add an implict ".*" in front of the patterns
when that make sense.

To make the input more solid, we do some little modification on the input:
-Remove duplicated ".*".
-Remove matching suffixes that do not bring new information.
-Unify all the ".*" in the same format.

Why do that here? That should be done through a graph analysis on the machine.

The reason is this is incredibly cheap compared to the graph analysis. Any state
removed upfront will save the handling of several hundred nodes in the deterministic
graph.

* contentextensions/URLFilterParser.cpp:
(WebCore::ContentExtensions::Term::isKnownToMatchAnyString):
(WebCore::ContentExtensions::Term::isUniversalTransition):
(WebCore::ContentExtensions::GraphBuilder::finalize):
(WebCore::ContentExtensions::GraphBuilder::assertionBOL):
(WebCore::ContentExtensions::GraphBuilder::fail):
(WebCore::ContentExtensions::GraphBuilder::simplifySunkTerms):

Tools:

* TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (181916 => 181917)


--- trunk/Source/WebCore/ChangeLog	2015-03-24 23:10:30 UTC (rev 181916)
+++ trunk/Source/WebCore/ChangeLog	2015-03-24 23:24:36 UTC (rev 181917)
@@ -1,3 +1,38 @@
+2015-03-24  Benjamin Poulain  <[email protected]>
+
+        Make URL filter patterns matching consistent and add a simple canonicalization step
+        https://bugs.webkit.org/show_bug.cgi?id=142998
+
+        Reviewed by Alex Christensen.
+
+        This patch makes two changes to the url filter input:
+        -Make the matching "Search" by default, the pattern can now appear anywhere
+         in the URL by default.
+        -Make the input a little less fragile: do not explode on valid input
+         that is not formatted in a certain way.
+
+        To implement the search behavior, I simply add an implict ".*" in front of the patterns
+        when that make sense.
+
+        To make the input more solid, we do some little modification on the input:
+        -Remove duplicated ".*".
+        -Remove matching suffixes that do not bring new information.
+        -Unify all the ".*" in the same format.
+
+        Why do that here? That should be done through a graph analysis on the machine.
+
+        The reason is this is incredibly cheap compared to the graph analysis. Any state
+        removed upfront will save the handling of several hundred nodes in the deterministic
+        graph.
+
+        * contentextensions/URLFilterParser.cpp:
+        (WebCore::ContentExtensions::Term::isKnownToMatchAnyString):
+        (WebCore::ContentExtensions::Term::isUniversalTransition):
+        (WebCore::ContentExtensions::GraphBuilder::finalize):
+        (WebCore::ContentExtensions::GraphBuilder::assertionBOL):
+        (WebCore::ContentExtensions::GraphBuilder::fail):
+        (WebCore::ContentExtensions::GraphBuilder::simplifySunkTerms):
+
 2015-03-24  Chris Dumez  <[email protected]>
 
         Regression(r181671): Caused Timer-related crashes on iOS / WK1

Modified: trunk/Source/WebCore/contentextensions/URLFilterParser.cpp (181916 => 181917)


--- trunk/Source/WebCore/contentextensions/URLFilterParser.cpp	2015-03-24 23:10:30 UTC (rev 181916)
+++ trunk/Source/WebCore/contentextensions/URLFilterParser.cpp	2015-03-24 23:24:36 UTC (rev 181917)
@@ -244,6 +244,50 @@
         return true;
     }
 
+    // Matches any string, the empty string included.
+    // This is very conservative. Patterns matching any string can return false here.
+    bool isKnownToMatchAnyString() const
+    {
+        ASSERT(isValid());
+
+        switch (m_termType) {
+        case TermType::Empty:
+        case TermType::Deleted:
+            ASSERT_NOT_REACHED();
+            break;
+        case TermType::CharacterSet:
+            // ".*" is the only simple term matching any string.
+            return isUniversalTransition() && m_quantifier == AtomQuantifier::ZeroOrMore;
+            break;
+        case TermType::Group: {
+            // There are infinitely many ways to match anything with groups, we just handle simple cases
+            if (m_atomData.group.terms.size() != 1)
+                return false;
+
+            const Term& firstTermInGroup = m_atomData.group.terms.first();
+            // -(.*) with any quantifier.
+            if (firstTermInGroup.isKnownToMatchAnyString())
+                return true;
+
+            if (firstTermInGroup.isUniversalTransition()) {
+                // -(.)*, (.+)*, (.?)* etc.
+                if (m_quantifier == AtomQuantifier::ZeroOrMore)
+                    return true;
+
+                // -(.+)?.
+                if (m_quantifier == AtomQuantifier::ZeroOrOne && firstTermInGroup.m_quantifier == AtomQuantifier::OneOrMore)
+                    return true;
+
+                // -(.?)+.
+                if (m_quantifier == AtomQuantifier::OneOrMore && firstTermInGroup.m_quantifier == AtomQuantifier::ZeroOrOne)
+                    return true;
+            }
+            break;
+        }
+        }
+        return false;
+    }
+
     Term& operator=(const Term& other)
     {
         destroy();
@@ -308,11 +352,24 @@
     }
 
 private:
+    // This is exact for character sets but conservative for groups.
+    // The return value can be false for a group equivalent to a universal transition.
     bool isUniversalTransition() const
     {
-        return m_termType == TermType::CharacterSet
-            && ((m_atomData.characterSet.inverted && !m_atomData.characterSet.characters.bitCount())
-                || (!m_atomData.characterSet.inverted && m_atomData.characterSet.characters.bitCount() == 128));
+        ASSERT(isValid());
+
+        switch (m_termType) {
+        case TermType::Empty:
+        case TermType::Deleted:
+            ASSERT_NOT_REACHED();
+            break;
+        case TermType::CharacterSet:
+            return (m_atomData.characterSet.inverted && !m_atomData.characterSet.characters.bitCount())
+                || (!m_atomData.characterSet.inverted && m_atomData.characterSet.characters.bitCount() == 128);
+        case TermType::Group:
+            return m_atomData.group.terms.size() == 1 && m_atomData.group.terms.first().isUniversalTransition();
+        }
+        return false;
     }
 
     unsigned generateSubgraphForAtom(NFA& nfa, uint64_t patternId, unsigned source) const
@@ -459,6 +516,8 @@
 
         sinkFloatingTermIfNecessary();
 
+        simplifySunkTerms();
+
         // Check to see if there are any terms without ? or *.
         bool matchesEverything = true;
         for (const auto& term : m_sunkTerms) {
@@ -467,8 +526,10 @@
                 break;
             }
         }
-        if (matchesEverything)
+        if (matchesEverything) {
             fail(URLFilterParser::MatchesEverything);
+            return;
+        }
 
         for (const auto& term : m_sunkTerms) {
             ASSERT(m_lastPrefixTreeEntry);
@@ -484,12 +545,7 @@
                 
                 auto addResult = m_lastPrefixTreeEntry->nextPattern.set(term, WTF::move(nextPrefixTreeEntry));
                 ASSERT(addResult.isNewEntry);
-                
-                if (!m_newPrefixSubtreeRoot) {
-                    m_newPrefixSubtreeRoot = m_lastPrefixTreeEntry;
-                    m_newPrefixStaringPoint = term;
-                }
-                
+
                 m_lastPrefixTreeEntry = addResult.iterator->value.get();
             }
             m_subtreeEnd = m_lastPrefixTreeEntry->nfaNode;
@@ -564,8 +620,12 @@
         if (hasError())
             return;
 
-        if (m_subtreeStart != m_subtreeEnd || m_floatingTerm.isValid() || !m_openGroups.isEmpty())
+        if (m_subtreeStart != m_subtreeEnd || m_floatingTerm.isValid() || !m_openGroups.isEmpty()) {
             fail(URLFilterParser::MisplacedStartOfLine);
+            return;
+        }
+
+        m_hasBeginningOfLineAssertion = true;
     }
 
     void assertionEOL()
@@ -671,9 +731,6 @@
         if (hasError())
             return;
 
-        if (m_newPrefixSubtreeRoot)
-            m_newPrefixSubtreeRoot->nextPattern.remove(m_newPrefixStaringPoint);
-
         m_parseStatus = reason;
     }
 
@@ -701,6 +758,50 @@
         m_floatingTerm = Term();
     }
 
+    void simplifySunkTerms()
+    {
+        ASSERT(!m_floatingTerm.isValid());
+
+        if (m_sunkTerms.isEmpty())
+            return;
+
+        Term canonicalDotStar(Term::UniversalTransition);
+        canonicalDotStar.quantify(AtomQuantifier::ZeroOrMore);
+
+        // Replace every ".*"-like terms by our canonical version. Remove any duplicate ".*".
+        {
+            unsigned termIndex = 0;
+            bool isAfterDotStar = false;
+            while (termIndex < m_sunkTerms.size()) {
+                if (isAfterDotStar && m_sunkTerms[termIndex].isKnownToMatchAnyString()) {
+                    m_sunkTerms.remove(termIndex);
+                    continue;
+                }
+                isAfterDotStar = false;
+
+                if (m_sunkTerms[termIndex].isKnownToMatchAnyString()) {
+                    m_sunkTerms[termIndex] = canonicalDotStar;
+                    isAfterDotStar = true;
+                }
+                ++termIndex;
+            }
+        }
+
+        // Add our ".*" in front if needed.
+        if (!m_hasBeginningOfLineAssertion && !m_sunkTerms.first().isKnownToMatchAnyString())
+            m_sunkTerms.insert(0, canonicalDotStar);
+
+        // Remove trailing ".*$".
+        if (m_sunkTerms.size() > 2 && m_sunkTerms.last().isEndOfLineAssertion() && m_sunkTerms[m_sunkTerms.size() - 2].isKnownToMatchAnyString())
+            m_sunkTerms.shrink(m_sunkTerms.size() - 2);
+
+        // Remove irrelevant terms that can match empty. For example in "foob?", matching "b" is irrelevant.
+        if (m_sunkTerms.last().isEndOfLineAssertion())
+            return;
+        while (!m_sunkTerms.isEmpty() && !m_sunkTerms.last().matchesAtLeastOneCharacter())
+            m_sunkTerms.removeLast();
+    }
+
     NFA& m_nfa;
     bool m_patternIsCaseSensitive;
     const uint64_t m_patternId;
@@ -712,9 +813,9 @@
     Deque<Term> m_openGroups;
     Vector<Term> m_sunkTerms;
     Term m_floatingTerm;
+    bool m_hasBeginningOfLineAssertion { false };
     bool m_hasProcessedEndOfLineAssertion { false };
 
-    PrefixTreeEntry* m_newPrefixSubtreeRoot = nullptr;
     Term m_newPrefixStaringPoint;
 
     URLFilterParser::ParseStatus m_parseStatus;

Modified: trunk/Tools/ChangeLog (181916 => 181917)


--- trunk/Tools/ChangeLog	2015-03-24 23:10:30 UTC (rev 181916)
+++ trunk/Tools/ChangeLog	2015-03-24 23:24:36 UTC (rev 181917)
@@ -1,3 +1,12 @@
+2015-03-24  Benjamin Poulain  <[email protected]>
+
+        Make URL filter patterns matching consistent and add a simple canonicalization step
+        https://bugs.webkit.org/show_bug.cgi?id=142998
+
+        Reviewed by Alex Christensen.
+
+        * TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp:
+
 2015-03-24  Csaba Osztrogonác  <[email protected]>
 
         [EFL] Add OpenWebRTC in jhbuild

Modified: trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp (181916 => 181917)


--- trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp	2015-03-24 23:10:30 UTC (rev 181916)
+++ trunk/Tools/TestWebKitAPI/Tests/WebCore/ContentExtensions.cpp	2015-03-24 23:24:36 UTC (rev 181917)
@@ -115,7 +115,7 @@
     return { URL(URL(), url), URL(URL(), url), resourceType };
 }
 
-const char* basicFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".*webkit.org\"}}]";
+const char* basicFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"webkit.org\"}}]";
 
 TEST_F(ContentExtensionTest, Basic)
 {
@@ -130,7 +130,8 @@
 
 TEST_F(ContentExtensionTest, RangeBasic)
 {
-    const char* rangeBasicFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".*w[0-9]c\", \"url-filter-is-case-sensitive\":true}},{\"action\":{\"type\":\"block-cookies\"},\"trigger\":{\"url-filter\":\".*[A-H][a-z]cko\", \"url-filter-is-case-sensitive\":true}}]";
+    const char* rangeBasicFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"w[0-9]c\", \"url-filter-is-case-sensitive\":true}},"
+        "{\"action\":{\"type\":\"block-cookies\"},\"trigger\":{\"url-filter\":\"[A-H][a-z]cko\", \"url-filter-is-case-sensitive\":true}}]";
     auto extensionData = ContentExtensions::compileRuleList(rangeBasicFilter);
     auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData));
 
@@ -157,7 +158,7 @@
 TEST_F(ContentExtensionTest, RangeExclusionGeneratingUniversalTransition)
 {
     // Transition of the type ([^X]X) effictively transition on every input.
-    const char* rangeExclusionGeneratingUniversalTransitionFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".*[^a]+afoobar\"}}]";
+    const char* rangeExclusionGeneratingUniversalTransitionFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"[^a]+afoobar\"}}]";
     auto extensionData = ContentExtensions::compileRuleList(rangeExclusionGeneratingUniversalTransitionFilter);
     auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData));
 
@@ -178,10 +179,9 @@
     testRequest(backend, mainDocumentRequest("http://w3c.org/AAfoobar"), { });
 }
 
-const char* patternsStartingWithGroupFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"(http://whatwg\\\\.org/)?webkit\134\134.org\"}}]";
-
 TEST_F(ContentExtensionTest, PatternStartingWithGroup)
 {
+    const char* patternsStartingWithGroupFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^(http://whatwg\\\\.org/)?webkit\134\134.org\"}}]";
     auto extensionData = ContentExtensions::compileRuleList(patternsStartingWithGroupFilter);
     auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData));
 
@@ -195,10 +195,10 @@
     testRequest(backend, mainDocumentRequest("http://whatwg.org"), { });
 }
 
-const char* patternNestedGroupsFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"http://webkit\\\\.org/(foo(bar)*)+\"}}]";
-
 TEST_F(ContentExtensionTest, PatternNestedGroups)
 {
+    const char* patternNestedGroupsFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^http://webkit\\\\.org/(foo(bar)*)+\"}}]";
+
     auto extensionData = ContentExtensions::compileRuleList(patternNestedGroupsFilter);
     auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData));
 
@@ -218,10 +218,10 @@
     testRequest(backend, mainDocumentRequest("http://webkit.org/fobar"), { });
 }
 
-const char* matchPastEndOfStringFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".+\"}}]";
-
 TEST_F(ContentExtensionTest, MatchPastEndOfString)
 {
+    const char* matchPastEndOfStringFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".+\"}}]";
+
     auto extensionData = ContentExtensions::compileRuleList(matchPastEndOfStringFilter);
     auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData));
 
@@ -258,10 +258,9 @@
     testRequest(backend, mainDocumentRequest("http://foobar.org/"), { });
 }
 
-const char* endOfLineAssertionFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".*foobar$\"}}]";
-
 TEST_F(ContentExtensionTest, EndOfLineAssertion)
 {
+    const char* endOfLineAssertionFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"foobar$\"}}]";
     auto extensionData = ContentExtensions::compileRuleList(endOfLineAssertionFilter);
     auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData));
 
@@ -278,7 +277,7 @@
 
 TEST_F(ContentExtensionTest, EndOfLineAssertionWithInvertedCharacterSet)
 {
-    const char* endOfLineAssertionWithInvertedCharacterSetFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".*[^y]$\"}}]";
+    const char* endOfLineAssertionWithInvertedCharacterSetFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"[^y]$\"}}]";
     auto extensionData = ContentExtensions::compileRuleList(endOfLineAssertionWithInvertedCharacterSetFilter);
     auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData));
 
@@ -295,13 +294,183 @@
     testRequest(backend, mainDocumentRequest("http://webkit.org/foobary"), { });
     testRequest(backend, mainDocumentRequest("http://webkit.org/foobarY"), { });
 }
-    
-const char* loadTypeFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".*webkit.org\",\"load-type\":[\"third-party\"]}},"
-    "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".*whatwg.org\",\"load-type\":[\"first-party\"]}},"
-    "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".*alwaysblock.pdf\"}}]";
 
+TEST_F(ContentExtensionTest, PrefixInfixSuffixExactMatch)
+{
+    const char* prefixInfixSuffixExactMatchFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"infix\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^prefix\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"suffix$\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^http://exact\\\\.org/$\"}}]";
+    auto extensionData = ContentExtensions::compileRuleList(prefixInfixSuffixExactMatchFilter);
+    auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData));
+
+    ContentExtensions::ContentExtensionsBackend backend;
+    backend.addContentExtension("PrefixInfixSuffixExactMatch", extension);
+
+    testRequest(backend, mainDocumentRequest("infix://webkit.org/"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("http://infix.org/"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/infix"), { ContentExtensions::ActionType::BlockLoad });
+
+    testRequest(backend, mainDocumentRequest("prefix://webkit.org/"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://prefix.org/"), { });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/prefix"), { });
+
+    testRequest(backend, mainDocumentRequest("https://webkit.org/suffix"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://suffix.org/"), { });
+    testRequest(backend, mainDocumentRequest("suffix://webkit.org/"), { });
+
+    testRequest(backend, mainDocumentRequest("http://exact.org/"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("http://exact.org/oops"), { });
+}
+
+TEST_F(ContentExtensionTest, DuplicatedMatchAllTermsInVariousFormat)
+{
+    const char* duplicatedMatchAllTermsInVariousFormatFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".*.*(.)*(.*)(.+)*(.?)*infix\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"pre(.?)+(.+)?post\"}}]";
+    auto extensionData = ContentExtensions::compileRuleList(duplicatedMatchAllTermsInVariousFormatFilter);
+    auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData));
+
+    ContentExtensions::ContentExtensionsBackend backend;
+    backend.addContentExtension("DuplicatedMatchAllTermsInVariousFormat", extension);
+
+    testRequest(backend, mainDocumentRequest("infix://webkit.org/"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("http://infix.org/"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/infix"), { ContentExtensions::ActionType::BlockLoad });
+
+    testRequest(backend, mainDocumentRequest("pre://webkit.org/post"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("http://prepost.org/"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://pre.org/posttail"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://pre.pre/posttail"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://pre.org/posttailpost"), { ContentExtensions::ActionType::BlockLoad });
+
+    testRequest(backend, mainDocumentRequest("https://post.org/pre"), { });
+    testRequest(backend, mainDocumentRequest("https://pre.org/pre"), { });
+    testRequest(backend, mainDocumentRequest("https://post.org/post"), { });
+}
+
+TEST_F(ContentExtensionTest, TermsKnownToMatchAnything)
+{
+    const char* termsKnownToMatchAnythingFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^pre1.*post1$\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^pre2(.*)post2$\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^pre3(.*)?post3$\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^pre4(.*)+post4$\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^pre5(.*)*post5$\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^pre6(.)*post6$\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^pre7(.+)*post7$\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^pre8(.?)*post8$\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^pre9(.+)?post9$\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"^pre0(.?)+post0$\"}}]";
+    auto extensionData = ContentExtensions::compileRuleList(termsKnownToMatchAnythingFilter);
+    auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData));
+
+    ContentExtensions::ContentExtensionsBackend backend;
+    backend.addContentExtension("TermsKnownToMatchAnything", extension);
+
+    testRequest(backend, mainDocumentRequest("pre1://webkit.org/post1"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("pre2://webkit.org/post2"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("pre3://webkit.org/post3"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("pre4://webkit.org/post4"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("pre5://webkit.org/post5"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("pre6://webkit.org/post6"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("pre7://webkit.org/post7"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("pre8://webkit.org/post8"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("pre9://webkit.org/post9"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("pre0://webkit.org/post0"), { ContentExtensions::ActionType::BlockLoad });
+
+    testRequest(backend, mainDocumentRequest("pre1://webkit.org/post2"), { });
+    testRequest(backend, mainDocumentRequest("pre2://webkit.org/post3"), { });
+    testRequest(backend, mainDocumentRequest("pre3://webkit.org/post4"), { });
+    testRequest(backend, mainDocumentRequest("pre4://webkit.org/post5"), { });
+    testRequest(backend, mainDocumentRequest("pre5://webkit.org/post6"), { });
+    testRequest(backend, mainDocumentRequest("pre6://webkit.org/post7"), { });
+    testRequest(backend, mainDocumentRequest("pre7://webkit.org/post8"), { });
+    testRequest(backend, mainDocumentRequest("pre8://webkit.org/post9"), { });
+    testRequest(backend, mainDocumentRequest("pre9://webkit.org/post0"), { });
+    testRequest(backend, mainDocumentRequest("pre0://webkit.org/post1"), { });
+
+    testRequest(backend, mainDocumentRequest("pre0://webkit.org/post1"), { });
+    testRequest(backend, mainDocumentRequest("pre1://webkit.org/post2"), { });
+    testRequest(backend, mainDocumentRequest("pre2://webkit.org/post3"), { });
+    testRequest(backend, mainDocumentRequest("pre3://webkit.org/post4"), { });
+    testRequest(backend, mainDocumentRequest("pre4://webkit.org/post5"), { });
+    testRequest(backend, mainDocumentRequest("pre5://webkit.org/post6"), { });
+    testRequest(backend, mainDocumentRequest("pre6://webkit.org/post7"), { });
+    testRequest(backend, mainDocumentRequest("pre7://webkit.org/post8"), { });
+    testRequest(backend, mainDocumentRequest("pre8://webkit.org/post9"), { });
+    testRequest(backend, mainDocumentRequest("pre9://webkit.org/post0"), { });
+}
+
+TEST_F(ContentExtensionTest, TrailingDotStar)
+{
+    const char* trailingDotStarFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"foo.*$\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"bar(.*)$\"}}]";
+    auto extensionData = ContentExtensions::compileRuleList(trailingDotStarFilter);
+    auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData));
+
+    ContentExtensions::ContentExtensionsBackend backend;
+    backend.addContentExtension("TrailingDotStar", extension);
+
+    testRequest(backend, mainDocumentRequest("https://webkit.org/"), { });
+
+    testRequest(backend, mainDocumentRequest("foo://webkit.org/"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://foo.org/"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.foo/"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/foo"), { ContentExtensions::ActionType::BlockLoad });
+
+    testRequest(backend, mainDocumentRequest("bar://webkit.org/"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://bar.org/"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.bar/"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/bar"), { ContentExtensions::ActionType::BlockLoad });
+}
+
+TEST_F(ContentExtensionTest, TrailingTermsCarryingNoData)
+{
+    const char* trailingTermsCarryingNoDataFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"foob?a?r?\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"bazo(ok)?a?$\"}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"cats*$\"}}]";
+    auto extensionData = ContentExtensions::compileRuleList(trailingTermsCarryingNoDataFilter);
+    auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData));
+
+    ContentExtensions::ContentExtensionsBackend backend;
+    backend.addContentExtension("TrailingTermsCarryingNoData", extension);
+
+    testRequest(backend, mainDocumentRequest("https://webkit.org/"), { });
+
+    // Anything is fine after foo.
+    testRequest(backend, mainDocumentRequest("https://webkit.org/foo"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/foob"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/fooc"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/fooba"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/foobar"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/foobar-stuff"), { ContentExtensions::ActionType::BlockLoad });
+
+    // Bazooka has to be at the tail without any character not defined by the filter.
+    testRequest(backend, mainDocumentRequest("https://webkit.org/baz"), { });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/bazo"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/bazoa"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/bazob"), { });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/bazoo"), { });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/bazook"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/bazookb"), { });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/bazooka"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/bazookaa"), { });
+
+    // The pattern must finish with cat, with any number of 's' following it, but no other character.
+    testRequest(backend, mainDocumentRequest("https://cat.org/"), { });
+    testRequest(backend, mainDocumentRequest("https://cats.org/"), { });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/cat"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/cats"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/catss"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/catsss"), { ContentExtensions::ActionType::BlockLoad });
+    testRequest(backend, mainDocumentRequest("https://webkit.org/catso"), { });
+}
+
 TEST_F(ContentExtensionTest, LoadType)
 {
+    const char* loadTypeFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"webkit.org\",\"load-type\":[\"third-party\"]}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"whatwg.org\",\"load-type\":[\"first-party\"]}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"alwaysblock.pdf\"}}]";
+
     auto extensionData = ContentExtensions::compileRuleList(loadTypeFilter);
     auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData));
         
@@ -317,12 +486,12 @@
     testRequest(backend, mainDocumentRequest("http://foobar.org/alwaysblock.pdf"), { ContentExtensions::ActionType::BlockLoad });
     testRequest(backend, {URL(URL(), "http://foobar.org/alwaysblock.pdf"), URL(URL(), "http://not_foobar.org/alwaysblock.pdf"), ResourceType::Document}, { ContentExtensions::ActionType::BlockLoad });
 }
-    
-const char* resourceTypeFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".*block_all_types.org\",\"resource-type\":[\"document\",\"image\",\"style-sheet\",\"script\",\"font\",\"raw\",\"svg-document\",\"media\"]}},"
-    "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\".*block_only_images\",\"resource-type\":[\"image\"]}}]";
-    
+
 TEST_F(ContentExtensionTest, ResourceType)
 {
+    const char* resourceTypeFilter = "[{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"block_all_types.org\",\"resource-type\":[\"document\",\"image\",\"style-sheet\",\"script\",\"font\",\"raw\",\"svg-document\",\"media\"]}},"
+        "{\"action\":{\"type\":\"block\"},\"trigger\":{\"url-filter\":\"block_only_images\",\"resource-type\":[\"image\"]}}]";
+
     auto extensionData = ContentExtensions::compileRuleList(resourceTypeFilter);
     auto extension = InMemoryCompiledContentExtension::create(WTF::move(extensionData));
         
@@ -426,6 +595,13 @@
     testPatternStatus("([a-z]*)", ContentExtensions::URLFilterParser::ParseStatus::MatchesEverything);
     testPatternStatus("([a-z]?)", ContentExtensions::URLFilterParser::ParseStatus::MatchesEverything);
 
+    testPatternStatus("(.)*", ContentExtensions::URLFilterParser::ParseStatus::MatchesEverything);
+    testPatternStatus("(.+)*", ContentExtensions::URLFilterParser::ParseStatus::MatchesEverything);
+    testPatternStatus("(.?)*", ContentExtensions::URLFilterParser::ParseStatus::MatchesEverything);
+    testPatternStatus("(.*)*", ContentExtensions::URLFilterParser::ParseStatus::MatchesEverything);
+    testPatternStatus("(.+)?", ContentExtensions::URLFilterParser::ParseStatus::MatchesEverything);
+    testPatternStatus("(.?)+", ContentExtensions::URLFilterParser::ParseStatus::MatchesEverything);
+
     // Nested groups.
     testPatternStatus("((foo)?((.)*)(bar)*)", ContentExtensions::URLFilterParser::ParseStatus::MatchesEverything);
 }
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to