Diff
Modified: trunk/Source/WebCore/ChangeLog (178528 => 178529)
--- trunk/Source/WebCore/ChangeLog 2015-01-15 22:14:50 UTC (rev 178528)
+++ trunk/Source/WebCore/ChangeLog 2015-01-15 22:19:40 UTC (rev 178529)
@@ -1,3 +1,71 @@
+2015-01-15 Benjamin Poulain <[email protected]>
+
+ When building the NFA of the global disjunction, share the prefix subgraph of existing subpatterns
+ https://bugs.webkit.org/show_bug.cgi?id=140465
+
+ Reviewed by Andreas Kling.
+
+ This patch updates the parser to produce smaller graphs when multiple patterns
+ of the rule list share a common prefix.
+
+ Previously, GraphBuilder would generate subgraph in place of each parsed
+ atom. We now only create subgraph if an atom does not appear in the prefix tree.
+
+ We accumulate the parsing information into small uint16_t named TrivialAtom.
+ When generating the subgraph for an new atom, we first check if the prefix tree already
+ has a corresponding subgraph for that atom. If it does, we do not generate anything and we extend the existing
+ graph. If there is no existing prefix, we create the subgraph and extend the prefix tree.
+
+ Sharing prefix subtrees slows down the subtree generation a bit but the resulting graph is much
+ simpler for many kind of inputs.
+
+ * WebCore.xcodeproj/project.pbxproj:
+ * contentextensions/ContentExtensionsBackend.cpp:
+ (WebCore::ContentExtensions::ContentExtensionsBackend::setRuleList):
+ The URLFilterParser now maintains states (the prefix tree) between patterns.
+
+ * contentextensions/DFA.cpp:
+ (WebCore::ContentExtensions::DFA::debugPrintDot):
+ * contentextensions/DFANode.h:
+ Fix a typo :)
+
+ * contentextensions/NFA.cpp:
+ (WebCore::ContentExtensions::NFA::createNode):
+ (WebCore::ContentExtensions::NFA::setFinal):
+ (WebCore::ContentExtensions::NFA::restoreToGraphSize):
+ (WebCore::ContentExtensions::NFA::addRuleId):
+ (WebCore::ContentExtensions::NFA::debugPrintDot):
+ * contentextensions/NFA.h:
+ (WebCore::ContentExtensions::NFA::addRuleId):
+ * contentextensions/NFANode.cpp: Removed.
+ * contentextensions/NFANode.h:
+ NFA nodes from two patterns are now "merged" by construction, thus we need
+ to keep track of multiple rules per node.
+
+ * contentextensions/NFAToDFA.cpp:
+ (WebCore::ContentExtensions::NodeIdSetToUniqueNodeIdSetTranslator::translate):
+ * contentextensions/URLFilterParser.cpp:
+ (WebCore::ContentExtensions::trivialAtomFromAsciiCharacter):
+ (WebCore::ContentExtensions::quantifyTrivialAtom):
+ (WebCore::ContentExtensions::trivialAtomForNewlineClassIDBuiltin):
+ (WebCore::ContentExtensions::GraphBuilder::GraphBuilder):
+ (WebCore::ContentExtensions::GraphBuilder::m_LastPrefixTreeEntry):
+ (WebCore::ContentExtensions::GraphBuilder::finalize):
+ (WebCore::ContentExtensions::GraphBuilder::atomPatternCharacter):
+ (WebCore::ContentExtensions::GraphBuilder::atomBuiltInCharacterClass):
+ (WebCore::ContentExtensions::GraphBuilder::quantifyAtom):
+ (WebCore::ContentExtensions::GraphBuilder::fail):
+ (WebCore::ContentExtensions::GraphBuilder::generateTransition):
+ (WebCore::ContentExtensions::GraphBuilder::sinkTrivialAtom):
+ (WebCore::ContentExtensions::GraphBuilder::sinkPendingAtomIfNecessary):
+ (WebCore::ContentExtensions::URLFilterParser::URLFilterParser):
+ (WebCore::ContentExtensions::URLFilterParser::addPattern):
+ (WebCore::ContentExtensions::GraphBuilder::m_lastAtom): Deleted.
+ (WebCore::ContentExtensions::URLFilterParser::parse): Deleted.
+ * contentextensions/URLFilterParser.h:
+ (WebCore::ContentExtensions::URLFilterParser::hasError): Deleted.
+ (WebCore::ContentExtensions::URLFilterParser::errorMessage): Deleted.
+
2015-01-14 Alexey Proskuryakov <[email protected]>
Web Inspector and regular console use different source code locations for messages
Modified: trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj (178528 => 178529)
--- trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj 2015-01-15 22:14:50 UTC (rev 178528)
+++ trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj 2015-01-15 22:19:40 UTC (rev 178529)
@@ -1031,7 +1031,6 @@
267726041A5DF6F2003C24DD /* URLFilterParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 267726021A5DF6F2003C24DD /* URLFilterParser.cpp */; };
267726051A5DF6F2003C24DD /* URLFilterParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 267726031A5DF6F2003C24DD /* URLFilterParser.h */; };
269239961505E1AA009E57FC /* JSIDBVersionChangeEvent.h in Headers */ = {isa = PBXBuildFile; fileRef = 269239921505E1AA009E57FC /* JSIDBVersionChangeEvent.h */; };
- 269397211A4A412F00E8349D /* NFANode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2693971F1A4A412F00E8349D /* NFANode.cpp */; };
269397221A4A412F00E8349D /* NFANode.h in Headers */ = {isa = PBXBuildFile; fileRef = 269397201A4A412F00E8349D /* NFANode.h */; };
269397241A4A5B6400E8349D /* NFA.h in Headers */ = {isa = PBXBuildFile; fileRef = 269397231A4A5B6400E8349D /* NFA.h */; };
269397261A4A5FBD00E8349D /* NFA.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 269397251A4A5FBD00E8349D /* NFA.cpp */; };
@@ -8032,7 +8031,6 @@
267726031A5DF6F2003C24DD /* URLFilterParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = URLFilterParser.h; sourceTree = "<group>"; };
269239911505E1AA009E57FC /* JSIDBVersionChangeEvent.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = JSIDBVersionChangeEvent.cpp; sourceTree = "<group>"; };
269239921505E1AA009E57FC /* JSIDBVersionChangeEvent.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = JSIDBVersionChangeEvent.h; sourceTree = "<group>"; };
- 2693971F1A4A412F00E8349D /* NFANode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = NFANode.cpp; sourceTree = "<group>"; };
269397201A4A412F00E8349D /* NFANode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = NFANode.h; sourceTree = "<group>"; };
269397231A4A5B6400E8349D /* NFA.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = NFA.h; sourceTree = "<group>"; };
269397251A4A5FBD00E8349D /* NFA.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = NFA.cpp; sourceTree = "<group>"; };
@@ -15337,7 +15335,6 @@
267725F91A5B3AD9003C24DD /* DFANode.h */,
269397251A4A5FBD00E8349D /* NFA.cpp */,
269397231A4A5B6400E8349D /* NFA.h */,
- 2693971F1A4A412F00E8349D /* NFANode.cpp */,
269397201A4A412F00E8349D /* NFANode.h */,
267725FA1A5B3AD9003C24DD /* NFAToDFA.cpp */,
267725FB1A5B3AD9003C24DD /* NFAToDFA.h */,
@@ -29687,7 +29684,6 @@
B2227A960D00BF220071B782 /* SVGPreserveAspectRatio.cpp in Sources */,
B543B85717EB758F003BE93A /* SVGPropertyInfo.cpp in Sources */,
B2227A990D00BF220071B782 /* SVGRadialGradientElement.cpp in Sources */,
- 269397211A4A412F00E8349D /* NFANode.cpp in Sources */,
B2227A9D0D00BF220071B782 /* SVGRectElement.cpp in Sources */,
BC2274780E8366E200E7F975 /* SVGRenderStyle.cpp in Sources */,
BC22747A0E8366E200E7F975 /* SVGRenderStyleDefs.cpp in Sources */,
Modified: trunk/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp (178528 => 178529)
--- trunk/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp 2015-01-15 22:14:50 UTC (rev 178528)
+++ trunk/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp 2015-01-15 22:19:40 UTC (rev 178529)
@@ -64,16 +64,16 @@
#endif
NFA nfa;
+ URLFilterParser urlFilterParser(nfa);
for (unsigned ruleIndex = 0; ruleIndex < ruleList.size(); ++ruleIndex) {
const ContentExtensionRule& contentExtensionRule = ruleList[ruleIndex];
const ContentExtensionRule::Trigger& trigger = contentExtensionRule.trigger();
ASSERT(trigger.urlFilter.length());
- URLFilterParser urlFilterParser;
- urlFilterParser.parse(trigger.urlFilter, ruleIndex, nfa);
+ String error = urlFilterParser.addPattern(trigger.urlFilter, ruleIndex);
- if (urlFilterParser.hasError()) {
- dataLogF("Error while parsing %s: %s", trigger.urlFilter.utf8().data(), urlFilterParser.errorMessage().utf8().data());
+ if (!error.isNull()) {
+ dataLogF("Error while parsing %s: %s\n", trigger.urlFilter.utf8().data(), error.utf8().data());
continue;
}
}
Modified: trunk/Source/WebCore/contentextensions/DFA.cpp (178528 => 178529)
--- trunk/Source/WebCore/contentextensions/DFA.cpp 2015-01-15 22:14:50 UTC (rev 178528)
+++ trunk/Source/WebCore/contentextensions/DFA.cpp 2015-01-15 22:19:40 UTC (rev 178529)
@@ -154,13 +154,13 @@
}
}
- Vector<unsigned> correspondingDFANodes = m_nodes[i].correspondingDFANodes;
- ASSERT(!correspondingDFANodes.isEmpty());
+ Vector<unsigned> correspondingNFANodes = m_nodes[i].correspondingNFANodes;
+ ASSERT(!correspondingNFANodes.isEmpty());
dataLogF("<BR/>NFA Nodes: ");
- for (unsigned correspondingDFANodeIndex = 0; correspondingDFANodeIndex < correspondingDFANodes.size(); ++correspondingDFANodeIndex) {
+ for (unsigned correspondingDFANodeIndex = 0; correspondingDFANodeIndex < correspondingNFANodes.size(); ++correspondingDFANodeIndex) {
if (correspondingDFANodeIndex)
dataLogF(", ");
- dataLogF("%d", correspondingDFANodes[correspondingDFANodeIndex]);
+ dataLogF("%d", correspondingNFANodes[correspondingDFANodeIndex]);
}
dataLogF(">]");
Modified: trunk/Source/WebCore/contentextensions/DFANode.h (178528 => 178529)
--- trunk/Source/WebCore/contentextensions/DFANode.h 2015-01-15 22:14:50 UTC (rev 178528)
+++ trunk/Source/WebCore/contentextensions/DFANode.h 2015-01-15 22:19:40 UTC (rev 178529)
@@ -44,7 +44,7 @@
Vector<uint64_t> actions;
#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
- Vector<unsigned> correspondingDFANodes;
+ Vector<unsigned> correspondingNFANodes;
#endif
};
Modified: trunk/Source/WebCore/contentextensions/NFA.cpp (178528 => 178529)
--- trunk/Source/WebCore/contentextensions/NFA.cpp 2015-01-15 22:14:50 UTC (rev 178528)
+++ trunk/Source/WebCore/contentextensions/NFA.cpp 2015-01-15 22:19:40 UTC (rev 178529)
@@ -40,10 +40,10 @@
{
}
-unsigned NFA::createNode(uint64_t ruleId)
+unsigned NFA::createNode()
{
unsigned nextId = m_nodes.size();
- m_nodes.append(NFANode(ruleId));
+ m_nodes.append(NFANode());
return nextId;
}
@@ -66,10 +66,10 @@
addResult.iterator->value.add(to);
}
-void NFA::setFinal(unsigned node)
+void NFA::setFinal(unsigned node, uint64_t ruleId)
{
- ASSERT(node < m_nodes.size());
- m_nodes[node].isFinal = true;
+ ASSERT(!m_nodes[node].finalRuleIds.contains(ruleId));
+ m_nodes[node].finalRuleIds.append(ruleId);
}
unsigned NFA::graphSize() const
@@ -79,7 +79,7 @@
void NFA::restoreToGraphSize(unsigned size)
{
- ASSERT(size > 1);
+ ASSERT(size >= 1);
ASSERT(size <= graphSize());
m_nodes.shrink(size);
@@ -87,6 +87,12 @@
#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+void NFA::addRuleId(unsigned node, uint64_t ruleId)
+{
+ ASSERT(!m_nodes[node].ruleIds.contains(ruleId));
+ m_nodes[node].ruleIds.append(ruleId);
+}
+
static void printRange(bool firstRange, uint16_t rangeStart, uint16_t rangeEnd, uint16_t epsilonTransitionCharacter)
{
if (!firstRange)
@@ -152,12 +158,33 @@
dataLogF(" node [shape=circle];\n");
dataLogF(" {\n");
for (unsigned i = 0; i < m_nodes.size(); ++i) {
- if (m_nodes[i].ruleId == std::numeric_limits<uint64_t>::max())
- dataLogF(" %d [label=\"Node %d\"]", i, i);
- else
- dataLogF(" %d [label=<Node %d<BR/>(Rule %llu)>]", i, i, m_nodes[i].ruleId);
+ dataLogF(" %d [label=<Node %d", i, i);
- if (m_nodes[i].isFinal)
+ const Vector<uint64_t>& originalRules = m_nodes[i].ruleIds;
+ if (!originalRules.isEmpty()) {
+ dataLogF("<BR/>(Rules: ");
+ for (unsigned ruleIndex = 0; ruleIndex < originalRules.size(); ++ruleIndex) {
+ if (ruleIndex)
+ dataLogF(", ");
+ dataLogF("%llu", originalRules[ruleIndex]);
+ }
+ dataLogF(")");
+ }
+
+ const Vector<uint64_t>& finalRules = m_nodes[i].finalRuleIds;
+ if (!finalRules.isEmpty()) {
+ dataLogF("<BR/>(Final: ");
+ for (unsigned ruleIndex = 0; ruleIndex < finalRules.size(); ++ruleIndex) {
+ if (ruleIndex)
+ dataLogF(", ");
+ dataLogF("%llu", finalRules[ruleIndex]);
+ }
+ dataLogF(")");
+ }
+
+ dataLogF(">]");
+
+ if (!finalRules.isEmpty())
dataLogF(" [shape=doublecircle]");
dataLogF(";\n");
Modified: trunk/Source/WebCore/contentextensions/NFA.h (178528 => 178529)
--- trunk/Source/WebCore/contentextensions/NFA.h 2015-01-15 22:14:50 UTC (rev 178528)
+++ trunk/Source/WebCore/contentextensions/NFA.h 2015-01-15 22:19:40 UTC (rev 178529)
@@ -30,7 +30,6 @@
#include "ContentExtensionsDebugging.h"
#include "NFANode.h"
-#include <limits>
#include <wtf/Vector.h>
namespace WebCore {
@@ -45,17 +44,21 @@
public:
NFA();
unsigned root() const { return m_root; }
- unsigned createNode(uint64_t ruleId = std::numeric_limits<uint64_t>::max());
+ unsigned createNode();
void addTransition(unsigned from, unsigned to, char character);
void addEpsilonTransition(unsigned from, unsigned to);
- void setFinal(unsigned node);
+ void setFinal(unsigned node, uint64_t ruleId);
unsigned graphSize() const;
void restoreToGraphSize(unsigned);
#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ void addRuleId(unsigned node, uint64_t ruleId);
+
void debugPrintDot() const;
+#else
+ void addRuleId(unsigned, uint64_t) { }
#endif
private:
Deleted: trunk/Source/WebCore/contentextensions/NFANode.cpp (178528 => 178529)
--- trunk/Source/WebCore/contentextensions/NFANode.cpp 2015-01-15 22:14:50 UTC (rev 178528)
+++ trunk/Source/WebCore/contentextensions/NFANode.cpp 2015-01-15 22:19:40 UTC (rev 178529)
@@ -1,45 +0,0 @@
-/*
- * Copyright (C) 2014 Apple Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
- * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
- * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
- * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
- * THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "config.h"
-#include "NFANode.h"
-
-#if ENABLE(CONTENT_EXTENSIONS)
-
-namespace WebCore {
-
-namespace ContentExtensions {
-
-NFANode::NFANode(uint64_t ruleId)
- : isFinal(false)
- , ruleId(ruleId)
-{
-}
-
-}
-
-} // namespace WebCore
-
-#endif // ENABLE(CONTENT_EXTENSIONS)
Modified: trunk/Source/WebCore/contentextensions/NFANode.h (178528 => 178529)
--- trunk/Source/WebCore/contentextensions/NFANode.h 2015-01-15 22:14:50 UTC (rev 178528)
+++ trunk/Source/WebCore/contentextensions/NFANode.h 2015-01-15 22:19:40 UTC (rev 178529)
@@ -28,8 +28,10 @@
#if ENABLE(CONTENT_EXTENSIONS)
+#include "ContentExtensionsDebugging.h"
#include <wtf/HashMap.h>
#include <wtf/HashSet.h>
+#include <wtf/Vector.h>
namespace WebCore {
@@ -38,11 +40,12 @@
// A NFANode abstract the transition table out of a NFA state.
class NFANode {
public:
- NFANode(uint64_t ruleId);
+ HashMap<uint16_t, HashSet<unsigned, DefaultHash<unsigned>::Hash, WTF::UnsignedWithZeroKeyHashTraits<unsigned>>> transitions;
- HashMap<uint16_t, HashSet<unsigned, DefaultHash<unsigned>::Hash, WTF::UnsignedWithZeroKeyHashTraits<unsigned>>> transitions;
- bool isFinal;
- const uint64_t ruleId;
+ Vector<uint64_t> finalRuleIds;
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ Vector<uint64_t> ruleIds;
+#endif
};
}
Modified: trunk/Source/WebCore/contentextensions/NFAToDFA.cpp (178528 => 178529)
--- trunk/Source/WebCore/contentextensions/NFAToDFA.cpp 2015-01-15 22:14:50 UTC (rev 178528)
+++ trunk/Source/WebCore/contentextensions/NFAToDFA.cpp 2015-01-15 22:19:40 UTC (rev 178529)
@@ -219,10 +219,9 @@
HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> actions;
for (unsigned nfaNodeId : source.nodeIdSet) {
const NFANode& nfaNode = source.nfaGraph[nfaNodeId];
- if (nfaNode.isFinal)
- actions.add(nfaNode.ruleId);
+ actions.add(nfaNode.finalRuleIds.begin(), nfaNode.finalRuleIds.end());
#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
- newDFANode.correspondingDFANodes.append(nfaNodeId);
+ newDFANode.correspondingNFANodes.append(nfaNodeId);
#endif
}
copyToVector(actions, newDFANode.actions);
Modified: trunk/Source/WebCore/contentextensions/URLFilterParser.cpp (178528 => 178529)
--- trunk/Source/WebCore/contentextensions/URLFilterParser.cpp 2015-01-15 22:14:50 UTC (rev 178528)
+++ trunk/Source/WebCore/contentextensions/URLFilterParser.cpp 2015-01-15 22:19:40 UTC (rev 178529)
@@ -35,6 +35,35 @@
namespace ContentExtensions {
+const uint16_t hasNonCharacterMask = 0x0080;
+const uint16_t characterMask = 0x0007F;
+const uint16_t newlineClassIDBuiltinMask = 0x100;
+
+static TrivialAtom trivialAtomFromASCIICharacter(char character)
+{
+ ASSERT(isASCII(character));
+
+ return static_cast<uint16_t>(character);
+}
+
+enum class TrivialAtomQuantifier : uint16_t {
+ ZeroOrOne = 0x1000,
+ ZeroToMany = 0x2000,
+ _OneToMany_ = 0x4000
+};
+
+static void quantifyTrivialAtom(TrivialAtom& trivialAtom, TrivialAtomQuantifier quantifier)
+{
+ ASSERT(trivialAtom & (hasNonCharacterMask | characterMask));
+ ASSERT(!(trivialAtom & 0xf000));
+ trivialAtom |= static_cast<uint16_t>(quantifier);
+}
+
+static TrivialAtom trivialAtomForNewlineClassIDBuiltin()
+{
+ return hasNonCharacterMask | newlineClassIDBuiltinMask;
+}
+
class GraphBuilder {
private:
struct BoundedSubGraph {
@@ -42,11 +71,11 @@
unsigned end;
};
public:
- GraphBuilder(NFA& nfa, uint64_t patternId)
+ GraphBuilder(NFA& nfa, PrefixTreeEntry& prefixTreeRoot, uint64_t patternId)
: m_nfa(nfa)
, m_patternId(patternId)
, m_activeGroup({ nfa.root(), nfa.root() })
- , m_lastAtom(m_activeGroup)
+ , m_lastPrefixTreeEntry(&prefixTreeRoot)
{
}
@@ -54,8 +83,11 @@
{
if (hasError())
return;
+
+ sinkPendingAtomIfNecessary();
+
if (m_activeGroup.start != m_activeGroup.end)
- m_nfa.setFinal(m_activeGroup.end);
+ m_nfa.setFinal(m_activeGroup.end, m_patternId);
else
fail(ASCIILiteral("The pattern cannot match anything."));
}
@@ -75,12 +107,13 @@
if (hasError())
return;
+ sinkPendingAtomIfNecessary();
+
+ char asciiChararacter = static_cast<char>(character);
m_hasValidAtom = true;
- unsigned newEnd = m_nfa.createNode(m_patternId);
- m_nfa.addTransition(m_lastAtom.end, newEnd, static_cast<char>(character));
- m_lastAtom.start = m_lastAtom.end;
- m_lastAtom.end = newEnd;
- m_activeGroup.end = m_lastAtom.end;
+
+ ASSERT(m_lastPrefixTreeEntry);
+ m_pendingTrivialAtom = trivialAtomFromASCIICharacter(asciiChararacter);
}
void atomBuiltInCharacterClass(JSC::Yarr::BuiltInCharacterClassID builtInCharacterClassID, bool inverted)
@@ -89,14 +122,9 @@
return;
if (builtInCharacterClassID == JSC::Yarr::NewlineClassID && inverted) {
- // FIXME: handle new line properly.
m_hasValidAtom = true;
- unsigned newEnd = m_nfa.createNode(m_patternId);
- for (unsigned i = 1; i < 128; ++i)
- m_nfa.addTransition(m_lastAtom.end, newEnd, i);
- m_lastAtom.start = m_lastAtom.end;
- m_lastAtom.end = newEnd;
- m_activeGroup.end = m_lastAtom.end;
+ ASSERT(m_lastPrefixTreeEntry);
+ m_pendingTrivialAtom = trivialAtomForNewlineClassIDBuiltin();
} else
fail(ASCIILiteral("Character class is not supported."));
}
@@ -112,13 +140,13 @@
return;
}
+ ASSERT(m_lastPrefixTreeEntry);
if (!minimum && maximum == 1)
- m_nfa.addEpsilonTransition(m_lastAtom.start, m_lastAtom.end);
- else if (!minimum && maximum == JSC::Yarr::quantifyInfinite) {
- m_nfa.addEpsilonTransition(m_lastAtom.start, m_lastAtom.end);
- m_nfa.addEpsilonTransition(m_lastAtom.end, m_lastAtom.start);
- } else if (minimum == 1 && maximum == JSC::Yarr::quantifyInfinite)
- m_nfa.addEpsilonTransition(m_lastAtom.end, m_lastAtom.start);
+ quantifyTrivialAtom(m_pendingTrivialAtom, TrivialAtomQuantifier::ZeroOrOne);
+ else if (!minimum && maximum == JSC::Yarr::quantifyInfinite)
+ quantifyTrivialAtom(m_pendingTrivialAtom, TrivialAtomQuantifier::ZeroToMany);
+ else if (minimum == 1 && maximum == JSC::Yarr::quantifyInfinite)
+ quantifyTrivialAtom(m_pendingTrivialAtom, TrivialAtomQuantifier::OneToMany);
else
fail(ASCIILiteral("Arbitrary atom repetitions are not supported."));
}
@@ -189,7 +217,6 @@
fail(ASCIILiteral("Disjunctions are not supported yet."));
}
-
private:
bool hasError() const
{
@@ -200,43 +227,154 @@
{
if (hasError())
return;
+
+ if (m_newPrefixSubtreeRoot)
+ m_newPrefixSubtreeRoot->nextPattern.remove(m_newPrefixStaringPoint);
+
m_errorMessage = errorMessage;
}
+ void generateTransition(TrivialAtom trivialAtom, unsigned source, unsigned target)
+ {
+ if (trivialAtom & hasNonCharacterMask) {
+ ASSERT(trivialAtom & newlineClassIDBuiltinMask);
+ for (unsigned i = 1; i < 128; ++i)
+ m_nfa.addTransition(source, target, i);
+ } else
+ m_nfa.addTransition(source, target, static_cast<char>(trivialAtom & characterMask));
+ }
+
+ BoundedSubGraph sinkTrivialAtom(TrivialAtom trivialAtom, unsigned start)
+ {
+ if (trivialAtom & static_cast<uint16_t>(TrivialAtomQuantifier::ZeroOrOne)) {
+ unsigned newEnd = m_nfa.createNode();
+ m_nfa.addRuleId(newEnd, m_patternId);
+ generateTransition(trivialAtom, start, newEnd);
+ m_nfa.addEpsilonTransition(start, newEnd);
+ return { start, newEnd };
+ }
+
+ if (trivialAtom & static_cast<uint16_t>(TrivialAtomQuantifier::ZeroToMany)) {
+ unsigned repeatStart = m_nfa.createNode();
+ m_nfa.addRuleId(repeatStart, m_patternId);
+ unsigned repeatEnd = m_nfa.createNode();
+ m_nfa.addRuleId(repeatEnd, m_patternId);
+
+ generateTransition(trivialAtom, repeatStart, repeatEnd);
+ m_nfa.addEpsilonTransition(repeatEnd, repeatStart);
+
+ m_nfa.addEpsilonTransition(start, repeatStart);
+
+ unsigned kleenEnd = m_nfa.createNode();
+ m_nfa.addRuleId(kleenEnd, m_patternId);
+ m_nfa.addEpsilonTransition(repeatEnd, kleenEnd);
+ m_nfa.addEpsilonTransition(start, kleenEnd);
+ return { start, kleenEnd };
+ }
+
+ if (trivialAtom & static_cast<uint16_t>(TrivialAtomQuantifier::OneToMany)) {
+ unsigned repeatStart = m_nfa.createNode();
+ m_nfa.addRuleId(repeatStart, m_patternId);
+ unsigned repeatEnd = m_nfa.createNode();
+ m_nfa.addRuleId(repeatEnd, m_patternId);
+
+ generateTransition(trivialAtom, repeatStart, repeatEnd);
+ m_nfa.addEpsilonTransition(repeatEnd, repeatStart);
+
+ m_nfa.addEpsilonTransition(start, repeatStart);
+
+ unsigned afterRepeat = m_nfa.createNode();
+ m_nfa.addRuleId(afterRepeat, m_patternId);
+ m_nfa.addEpsilonTransition(repeatEnd, afterRepeat);
+ return { start, afterRepeat };
+ }
+
+ unsigned newEnd = m_nfa.createNode();
+ m_nfa.addRuleId(newEnd, m_patternId);
+ generateTransition(trivialAtom, start, newEnd);
+ return { start, newEnd };
+ }
+
+ void sinkPendingAtomIfNecessary()
+ {
+ ASSERT(m_lastPrefixTreeEntry);
+
+ if (!m_hasValidAtom)
+ return;
+
+ ASSERT(m_pendingTrivialAtom);
+
+ auto nextEntry = m_lastPrefixTreeEntry->nextPattern.find(m_pendingTrivialAtom);
+ if (nextEntry != m_lastPrefixTreeEntry->nextPattern.end()) {
+ m_lastPrefixTreeEntry = nextEntry->value.get();
+ m_nfa.addRuleId(m_lastPrefixTreeEntry->nfaNode, m_patternId);
+ } else {
+ std::unique_ptr<PrefixTreeEntry> nextPrefixTreeEntry = std::make_unique<PrefixTreeEntry>();
+
+ BoundedSubGraph newSubGraph = sinkTrivialAtom(m_pendingTrivialAtom, m_lastPrefixTreeEntry->nfaNode);
+ nextPrefixTreeEntry->nfaNode = newSubGraph.end;
+
+ auto addResult = m_lastPrefixTreeEntry->nextPattern.set(m_pendingTrivialAtom, WTF::move(nextPrefixTreeEntry));
+ ASSERT(addResult.isNewEntry);
+
+ m_newPrefixSubtreeRoot = m_lastPrefixTreeEntry;
+ m_newPrefixStaringPoint = m_pendingTrivialAtom;
+
+ m_lastPrefixTreeEntry = addResult.iterator->value.get();
+ }
+ ASSERT(m_lastPrefixTreeEntry);
+
+ m_activeGroup.end = m_lastPrefixTreeEntry->nfaNode;
+ m_pendingTrivialAtom = 0;
+ m_hasValidAtom = false;
+ }
+
NFA& m_nfa;
const uint64_t m_patternId;
BoundedSubGraph m_activeGroup;
+ PrefixTreeEntry* m_lastPrefixTreeEntry;
bool m_hasValidAtom = false;
- BoundedSubGraph m_lastAtom;
+ TrivialAtom m_pendingTrivialAtom = 0;
+ PrefixTreeEntry* m_newPrefixSubtreeRoot = nullptr;
+ TrivialAtom m_newPrefixStaringPoint = 0;
+
String m_errorMessage;
};
-void URLFilterParser::parse(const String& pattern, uint64_t patternId, NFA& nfa)
+URLFilterParser::URLFilterParser(NFA& nfa)
+ : m_nfa(nfa)
{
+ m_prefixTreeRoot.nfaNode = nfa.root();
+}
+
+String URLFilterParser::addPattern(const String& pattern, uint64_t patternId)
+{
if (!pattern.containsOnlyASCII())
- m_errorMessage = ASCIILiteral("URLFilterParser only supports ASCII patterns.");
+ return ASCIILiteral("URLFilterParser only supports ASCII patterns.");
ASSERT(!pattern.isEmpty());
if (pattern.isEmpty())
- return;
+ return ASCIILiteral("Empty pattern.");
- unsigned oldSize = nfa.graphSize();
+ unsigned oldSize = m_nfa.graphSize();
- GraphBuilder graphBuilder(nfa, patternId);
- const char* error = JSC::Yarr::parse(graphBuilder, pattern, 0);
- if (error)
- m_errorMessage = String(error);
- else
+ String error;
+
+ GraphBuilder graphBuilder(m_nfa, m_prefixTreeRoot, patternId);
+ error = String(JSC::Yarr::parse(graphBuilder, pattern, 0));
+ if (error.isNull())
graphBuilder.finalize();
- if (!error)
- m_errorMessage = graphBuilder.errorMessage();
+ if (error.isNull())
+ error = graphBuilder.errorMessage();
- if (hasError())
- nfa.restoreToGraphSize(oldSize);
+ if (!error.isNull())
+ m_nfa.restoreToGraphSize(oldSize);
+
+ return error;
}
} // namespace ContentExtensions
Modified: trunk/Source/WebCore/contentextensions/URLFilterParser.h (178528 => 178529)
--- trunk/Source/WebCore/contentextensions/URLFilterParser.h 2015-01-15 22:14:50 UTC (rev 178528)
+++ trunk/Source/WebCore/contentextensions/URLFilterParser.h 2015-01-15 22:19:40 UTC (rev 178529)
@@ -28,6 +28,7 @@
#if ENABLE(CONTENT_EXTENSIONS)
+#include <wtf/HashMap.h>
#include <wtf/text/WTFString.h>
namespace WebCore {
@@ -36,20 +37,22 @@
class NFA;
+typedef uint16_t TrivialAtom;
+
+struct PrefixTreeEntry {
+ unsigned nfaNode;
+ HashMap<TrivialAtom, std::unique_ptr<PrefixTreeEntry>> nextPattern;
+};
+
+
class URLFilterParser {
public:
- void parse(const String& pattern, uint64_t patternId, NFA&);
+ explicit URLFilterParser(NFA&);
+ String addPattern(const String& pattern, uint64_t patternId);
- bool hasError() const { return !m_errorMessage.isNull(); }
- String errorMessage() const { return m_errorMessage; }
-
private:
- struct BoundedSubGraph {
- unsigned start;
- unsigned end;
- };
-
- String m_errorMessage;
+ NFA& m_nfa;
+ PrefixTreeEntry m_prefixTreeRoot;
};
} // namespace ContentExtensions