Diff
Modified: trunk/Source/WebCore/ChangeLog (178435 => 178436)
--- trunk/Source/WebCore/ChangeLog 2015-01-14 20:16:23 UTC (rev 178435)
+++ trunk/Source/WebCore/ChangeLog 2015-01-14 20:32:39 UTC (rev 178436)
@@ -1,3 +1,80 @@
+2015-01-14 Benjamin Poulain <[email protected]>
+
+ Do not create new set for every sub-operation when converting a NFA to DFA
+ https://bugs.webkit.org/show_bug.cgi?id=140380
+
+ Reviewed by Andreas Kling.
+
+ This is the first step toward making the NFA-to-DFA conversion more scalable: instead
+ of creating new sets for each step of the algorithm, we use two kinds of sets
+ and never do any copy.
+
+ The first new tool to do that is UniqueNodeIdSetImpl. It represents a set of NFA state corresponding to a DFA
+ state. It is unique per DFA state.
+
+ HashableNodeIdSet is a helper tool storing a UniqueNodeIdSetImpl.
+
+ The creation of new sets now goes like this:
+ 1) Get a NodeIdSet for each possible transition.
+ 2) For each transition:
+ 2a) Extend the NodeIdSet in place with its epsilon closure.
+ 2b) Get the UniqueNodeIdSetImpl corresponding to the new set we discovered.
+ 2c) If the UniqueNodeIdSetImpl is new, queue it for processing.
+
+ * WebCore.xcodeproj/project.pbxproj:
+ * contentextensions/ContentExtensionsDebugging.h: Copied from Source/WebCore/contentextensions/DFANode.h.
+ * contentextensions/ContentExtensionsBackend.cpp:
+ (WebCore::ContentExtensions::ContentExtensionsBackend::setRuleList):
+ * contentextensions/ContentExtensionsManager.cpp:
+ (WebCore::ContentExtensions::ExtensionsManager::loadExtension):
+ Added some logging to inspect more easily what the clients are sending.
+
+ * contentextensions/DFA.cpp:
+ * contentextensions/DFA.h:
+ * contentextensions/DFANode.h:
+ * contentextensions/NFA.cpp:
+ * contentextensions/NFA.h:
+ * contentextensions/NFAToDFA.cpp:
+
+ (WebCore::ContentExtensions::epsilonClosure):
+ Instead of returning a new HashSet, extend the input HashSet.
+
+ (WebCore::ContentExtensions::UniqueNodeIdSetImpl::buffer):
+ (WebCore::ContentExtensions::UniqueNodeIdSet::UniqueNodeIdSet):
+ (WebCore::ContentExtensions::UniqueNodeIdSet::operator=):
+ (WebCore::ContentExtensions::UniqueNodeIdSet::~UniqueNodeIdSet):
+ (WebCore::ContentExtensions::UniqueNodeIdSet::operator==):
+ (WebCore::ContentExtensions::UniqueNodeIdSet::impl):
+ (WebCore::ContentExtensions::UniqueNodeIdSet::hash):
+ (WebCore::ContentExtensions::UniqueNodeIdSet::isEmptyValue):
+ (WebCore::ContentExtensions::UniqueNodeIdSet::isDeletedValue):
+ (WebCore::ContentExtensions::UniqueNodeIdSetHash::hash):
+ (WebCore::ContentExtensions::UniqueNodeIdSetHash::equal):
+ UniqueNodeIdSetImpl is a compact representation of a NodeIdSet corresponding to a DFA node.
+
+ It is never built directly, it is only built on demand through NodeIdSetToUniqueNodeIdSetTranslator
+ from a NodeIdSet.
+
+ (WebCore::ContentExtensions::NodeIdSetToUniqueNodeIdSetSource::NodeIdSetToUniqueNodeIdSetSource):
+ (WebCore::ContentExtensions::NodeIdSetToUniqueNodeIdSetTranslator::hash):
+ (WebCore::ContentExtensions::NodeIdSetToUniqueNodeIdSetTranslator::equal):
+ (WebCore::ContentExtensions::NodeIdSetToUniqueNodeIdSetTranslator::translate):
+ (WebCore::ContentExtensions::SetTransitionsExcludingEpsilon::operator[]):
+ (WebCore::ContentExtensions::SetTransitionsExcludingEpsilon::size):
+ (WebCore::ContentExtensions::SetTransitionsExcludingEpsilon::begin):
+ (WebCore::ContentExtensions::SetTransitionsExcludingEpsilon::end):
+ (WebCore::ContentExtensions::populateTransitionsExcludingEpsilon):
+ (WebCore::ContentExtensions::NFAToDFA::convert):
+ (WebCore::ContentExtensions::setTransitionsExcludingEpsilon): Deleted.
+ (WebCore::ContentExtensions::HashableNodeIdSet::HashableNodeIdSet): Deleted.
+ (WebCore::ContentExtensions::HashableNodeIdSet::operator=): Deleted.
+ (WebCore::ContentExtensions::HashableNodeIdSet::isEmptyValue): Deleted.
+ (WebCore::ContentExtensions::HashableNodeIdSet::isDeletedValue): Deleted.
+ (WebCore::ContentExtensions::HashableNodeIdSet::nodeIdSet): Deleted.
+ (WebCore::ContentExtensions::HashableNodeIdSetHash::hash): Deleted.
+ (WebCore::ContentExtensions::HashableNodeIdSetHash::equal): Deleted.
+ (WebCore::ContentExtensions::addDFAState): Deleted.
+
2015-01-14 Chris Dumez <[email protected]>
Make 'TypeName' parameter unnecessary in CSSPropertyNames.in
Modified: trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj (178435 => 178436)
--- trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj 2015-01-14 20:16:23 UTC (rev 178435)
+++ trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj 2015-01-14 20:32:39 UTC (rev 178436)
@@ -1014,6 +1014,7 @@
24F54EAD101FE914000AE741 /* ApplicationCacheHost.h in Headers */ = {isa = PBXBuildFile; fileRef = 24F54EAB101FE914000AE741 /* ApplicationCacheHost.h */; settings = {ATTRIBUTES = (Private, ); }; };
2542F4DA1166C25A00E89A86 /* UserGestureIndicator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2542F4D81166C25A00E89A86 /* UserGestureIndicator.cpp */; };
2542F4DB1166C25A00E89A86 /* UserGestureIndicator.h in Headers */ = {isa = PBXBuildFile; fileRef = 2542F4D91166C25A00E89A86 /* UserGestureIndicator.h */; settings = {ATTRIBUTES = (Private, ); }; };
+ 262391361A648CEE007251A3 /* ContentExtensionsDebugging.h in Headers */ = {isa = PBXBuildFile; fileRef = 262391351A648CEE007251A3 /* ContentExtensionsDebugging.h */; };
26255F0018878DFF0006E1FD /* UserAgentIOS.mm in Sources */ = {isa = PBXBuildFile; fileRef = 26255EFF18878DFF0006E1FD /* UserAgentIOS.mm */; };
26255F0318878E110006E1FD /* UserAgent.h in Headers */ = {isa = PBXBuildFile; fileRef = 26255F0118878E110006E1FD /* UserAgent.h */; settings = {ATTRIBUTES = (Private, ); }; };
26255F0418878E110006E1FD /* UserAgentMac.mm in Sources */ = {isa = PBXBuildFile; fileRef = 26255F0218878E110006E1FD /* UserAgentMac.mm */; };
@@ -8010,6 +8011,7 @@
2527CC9516BF95DD009DDAC0 /* PlatformSpeechSynthesisUtterance.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = PlatformSpeechSynthesisUtterance.cpp; sourceTree = "<group>"; };
2542F4D81166C25A00E89A86 /* UserGestureIndicator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = UserGestureIndicator.cpp; sourceTree = "<group>"; };
2542F4D91166C25A00E89A86 /* UserGestureIndicator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = UserGestureIndicator.h; sourceTree = "<group>"; };
+ 262391351A648CEE007251A3 /* ContentExtensionsDebugging.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ContentExtensionsDebugging.h; sourceTree = "<group>"; };
26255EFF18878DFF0006E1FD /* UserAgentIOS.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = UserAgentIOS.mm; sourceTree = "<group>"; };
26255F0118878E110006E1FD /* UserAgent.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = UserAgent.h; sourceTree = "<group>"; };
26255F0218878E110006E1FD /* UserAgentMac.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = UserAgentMac.mm; sourceTree = "<group>"; };
@@ -15319,6 +15321,7 @@
26F0C89A1A2EC110002794F8 /* ContentExtensionRule.h */,
26F0C89D1A2EC3BE002794F8 /* ContentExtensionsBackend.cpp */,
26F0C89E1A2EC3BE002794F8 /* ContentExtensionsBackend.h */,
+ 262391351A648CEE007251A3 /* ContentExtensionsDebugging.h */,
26F0C8931A2D7A76002794F8 /* ContentExtensionsInterface.cpp */,
26F0C8911A2D79CB002794F8 /* ContentExtensionsInterface.h */,
26F0C8951A2E724B002794F8 /* ContentExtensionsManager.cpp */,
@@ -26155,6 +26158,7 @@
08250939128BD4D800E2ED8E /* SVGAnimatedTransformList.h in Headers */,
085A15931289A8DD002710E3 /* SVGAnimatedTransformListPropertyTearOff.h in Headers */,
439D334313A6911C00C20F4F /* SVGAnimatedType.h in Headers */,
+ 262391361A648CEE007251A3 /* ContentExtensionsDebugging.h in Headers */,
439D334413A6911C00C20F4F /* SVGAnimatedTypeAnimator.h in Headers */,
B22279900D00BF220071B782 /* SVGAnimateElement.h in Headers */,
832B843419D8E55100B26055 /* SVGAnimateElementBase.h in Headers */,
Modified: trunk/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp (178435 => 178436)
--- trunk/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp 2015-01-14 20:16:23 UTC (rev 178435)
+++ trunk/Source/WebCore/contentextensions/ContentExtensionsBackend.cpp 2015-01-14 20:32:39 UTC (rev 178436)
@@ -28,10 +28,12 @@
#if ENABLE(CONTENT_EXTENSIONS)
+#include "ContentExtensionsDebugging.h"
#include "NFA.h"
#include "NFAToDFA.h"
#include "URL.h"
#include "URLFilterParser.h"
+#include <wtf/CurrentTime.h>
#include <wtf/DataLog.h>
#include <wtf/NeverDestroyed.h>
#include <wtf/text/CString.h>
@@ -57,6 +59,10 @@
return;
}
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ double nfaBuildTimeStart = monotonicallyIncreasingTime();
+#endif
+
NFA nfa;
for (unsigned ruleIndex = 0; ruleIndex < ruleList.size(); ++ruleIndex) {
const ContentExtensionRule& contentExtensionRule = ruleList[ruleIndex];
@@ -72,8 +78,32 @@
}
}
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ double nfaBuildTimeEnd = monotonicallyIncreasingTime();
+ dataLogF(" Time spent building the NFA: %f\n", (nfaBuildTimeEnd - nfaBuildTimeStart));
+#endif
+
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ nfa.debugPrintDot();
+#endif
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ double dfaBuildTimeStart = monotonicallyIncreasingTime();
+#endif
+
+ CompiledContentExtension compiledContentExtension = { NFAToDFA::convert(nfa), ruleList };
+
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ double dfaBuildTimeEnd = monotonicallyIncreasingTime();
+ dataLogF(" Time spent building the DFA: %f\n", (dfaBuildTimeEnd - dfaBuildTimeStart));
+#endif
+
// FIXME: never add a DFA that only matches the empty set.
- CompiledContentExtension compiledContentExtension = { NFAToDFA::convert(nfa), ruleList };
+
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ compiledContentExtension.dfa.debugPrintDot();
+#endif
+
m_ruleLists.set(identifier, compiledContentExtension);
}
Added: trunk/Source/WebCore/contentextensions/ContentExtensionsDebugging.h (0 => 178436)
--- trunk/Source/WebCore/contentextensions/ContentExtensionsDebugging.h (rev 0)
+++ trunk/Source/WebCore/contentextensions/ContentExtensionsDebugging.h 2015-01-14 20:32:39 UTC (rev 178436)
@@ -0,0 +1,37 @@
+/*
+ * Copyright (C) 2014, 2015 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS''
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ContentExtensionsDebugging_h
+#define ContentExtensionsDebugging_h
+
+#if ENABLE(CONTENT_EXTENSIONS)
+
+#define CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING 0
+
+#define CONTENT_EXTENSIONS_PERFORMANCE_REPORTING 0
+
+#endif // ENABLE(CONTENT_EXTENSIONS)
+
+#endif // ContentExtensionsDebugging_h
Modified: trunk/Source/WebCore/contentextensions/ContentExtensionsManager.cpp (178435 => 178436)
--- trunk/Source/WebCore/contentextensions/ContentExtensionsManager.cpp 2015-01-14 20:16:23 UTC (rev 178435)
+++ trunk/Source/WebCore/contentextensions/ContentExtensionsManager.cpp 2015-01-14 20:32:39 UTC (rev 178436)
@@ -30,12 +30,14 @@
#include "ContentExtensionRule.h"
#include "ContentExtensionsBackend.h"
+#include "ContentExtensionsDebugging.h"
#include <_javascript_Core/IdentifierInlines.h>
#include <_javascript_Core/JSCJSValueInlines.h>
#include <_javascript_Core/JSGlobalObject.h>
#include <_javascript_Core/JSONObject.h>
#include <_javascript_Core/StructureInlines.h>
#include <_javascript_Core/VM.h>
+#include <wtf/CurrentTime.h>
#include <wtf/text/WTFString.h>
using namespace JSC;
@@ -154,6 +156,9 @@
void loadExtension(const String& identifier, const String& rules)
{
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ double loadExtensionStartTime = monotonicallyIncreasingTime();
+#endif
RefPtr<VM> vm = VM::create();
JSLockHolder locker(vm.get());
@@ -169,6 +174,11 @@
return;
}
+#if CONTENT_EXTENSIONS_PERFORMANCE_REPORTING
+ double loadExtensionEndTime = monotonicallyIncreasingTime();
+ dataLogF("Time spent loading extension %s: %f\n", identifier.utf8().data(), (loadExtensionEndTime - loadExtensionStartTime));
+#endif
+
ContentExtensionsBackend::sharedInstance().setRuleList(identifier, ruleList);
}
Modified: trunk/Source/WebCore/contentextensions/DFA.cpp (178435 => 178436)
--- trunk/Source/WebCore/contentextensions/DFA.cpp 2015-01-14 20:16:23 UTC (rev 178435)
+++ trunk/Source/WebCore/contentextensions/DFA.cpp 2015-01-14 20:32:39 UTC (rev 178436)
@@ -79,7 +79,7 @@
return m_nodes[currentState].actions;
}
-#ifndef NDEBUG
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
static void printRange(bool firstRange, char rangeStart, char rangeEnd)
{
if (!firstRange)
Modified: trunk/Source/WebCore/contentextensions/DFA.h (178435 => 178436)
--- trunk/Source/WebCore/contentextensions/DFA.h 2015-01-14 20:16:23 UTC (rev 178435)
+++ trunk/Source/WebCore/contentextensions/DFA.h 2015-01-14 20:32:39 UTC (rev 178436)
@@ -28,6 +28,7 @@
#if ENABLE(CONTENT_EXTENSIONS)
+#include "ContentExtensionsDebugging.h"
#include "DFANode.h"
#include <wtf/Vector.h>
@@ -50,7 +51,7 @@
unsigned nextState(unsigned currentState, char character, bool& ok) const;
const Vector<uint64_t>& actions(unsigned currentState) const;
-#ifndef NDEBUG
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
void debugPrintDot() const;
#endif
Modified: trunk/Source/WebCore/contentextensions/DFANode.h (178435 => 178436)
--- trunk/Source/WebCore/contentextensions/DFANode.h 2015-01-14 20:16:23 UTC (rev 178435)
+++ trunk/Source/WebCore/contentextensions/DFANode.h 2015-01-14 20:32:39 UTC (rev 178436)
@@ -28,6 +28,7 @@
#if ENABLE(CONTENT_EXTENSIONS)
+#include "ContentExtensionsDebugging.h"
#include <wtf/HashMap.h>
#include <wtf/Vector.h>
@@ -42,7 +43,7 @@
HashMap<uint16_t, unsigned> transitions;
Vector<uint64_t> actions;
-#ifndef NDEBUG
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
Vector<unsigned> correspondingDFANodes;
#endif
};
Modified: trunk/Source/WebCore/contentextensions/NFA.cpp (178435 => 178436)
--- trunk/Source/WebCore/contentextensions/NFA.cpp 2015-01-14 20:16:23 UTC (rev 178435)
+++ trunk/Source/WebCore/contentextensions/NFA.cpp 2015-01-14 20:32:39 UTC (rev 178436)
@@ -85,7 +85,7 @@
m_nodes.shrink(size);
}
-#ifndef NDEBUG
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
static void printRange(bool firstRange, uint16_t rangeStart, uint16_t rangeEnd, uint16_t epsilonTransitionCharacter)
{
Modified: trunk/Source/WebCore/contentextensions/NFA.h (178435 => 178436)
--- trunk/Source/WebCore/contentextensions/NFA.h 2015-01-14 20:16:23 UTC (rev 178435)
+++ trunk/Source/WebCore/contentextensions/NFA.h 2015-01-14 20:32:39 UTC (rev 178436)
@@ -28,6 +28,7 @@
#if ENABLE(CONTENT_EXTENSIONS)
+#include "ContentExtensionsDebugging.h"
#include "NFANode.h"
#include <limits>
#include <wtf/Vector.h>
@@ -53,7 +54,7 @@
unsigned graphSize() const;
void restoreToGraphSize(unsigned);
-#ifndef NDEBUG
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
void debugPrintDot() const;
#endif
Modified: trunk/Source/WebCore/contentextensions/NFAToDFA.cpp (178435 => 178436)
--- trunk/Source/WebCore/contentextensions/NFAToDFA.cpp 2015-01-14 20:16:23 UTC (rev 178435)
+++ trunk/Source/WebCore/contentextensions/NFAToDFA.cpp 2015-01-14 20:32:39 UTC (rev 178436)
@@ -28,8 +28,10 @@
#if ENABLE(CONTENT_EXTENSIONS)
+#include "ContentExtensionsDebugging.h"
#include "DFANode.h"
#include "NFA.h"
+#include <wtf/DataLog.h>
#include <wtf/HashMap.h>
#include <wtf/HashSet.h>
@@ -37,209 +39,296 @@
namespace ContentExtensions {
+// FIXME: set a better initial size.
+// FIXME: include the hash inside NodeIdSet.
typedef HashSet<unsigned, DefaultHash<unsigned>::Hash, WTF::UnsignedWithZeroKeyHashTraits<unsigned>> NodeIdSet;
-static NodeIdSet epsilonClosure(const NodeIdSet& nodeSet, const Vector<NFANode>& graph, unsigned epsilonTransitionCharacter)
+static inline void epsilonClosure(NodeIdSet& nodeSet, const Vector<NFANode>& graph, unsigned epsilonTransitionCharacter)
{
ASSERT(!nodeSet.isEmpty());
ASSERT(!graph.isEmpty());
- // We go breadth-first first into our graph following all the epsilon transition. At each generation,
- // discoveredNodes contains all the new nodes we have discovered by following a single epsilon transition
- // out of the previous set of nodes.
- NodeIdSet outputNodeSet = nodeSet;
- NodeIdSet discoveredNodes = nodeSet;
+ // FIXME: fine a good inline size for unprocessedNodes.
+ Vector<unsigned> unprocessedNodes;
+ copyToVector(nodeSet, unprocessedNodes);
+
do {
- outputNodeSet.add(discoveredNodes.begin(), discoveredNodes.end());
-
- NodeIdSet nextGenerationDiscoveredNodes;
-
- for (unsigned nodeId : discoveredNodes) {
- const NFANode& node = graph[nodeId];
- auto epsilonTransitionSlot = node.transitions.find(epsilonTransitionCharacter);
- if (epsilonTransitionSlot != node.transitions.end()) {
- const HashSet<unsigned, DefaultHash<unsigned>::Hash, WTF::UnsignedWithZeroKeyHashTraits<unsigned>>& targets = epsilonTransitionSlot->value;
- for (unsigned targetNodeId : targets) {
- if (!outputNodeSet.contains(targetNodeId))
- nextGenerationDiscoveredNodes.add(targetNodeId);
- }
+ unsigned unprocessedNodeId = unprocessedNodes.takeLast();
+ const NFANode& node = graph[unprocessedNodeId];
+ auto epsilonTransitionSlot = node.transitions.find(epsilonTransitionCharacter);
+ if (epsilonTransitionSlot != node.transitions.end()) {
+ for (unsigned targetNodeId : epsilonTransitionSlot->value) {
+ auto addResult = nodeSet.add(targetNodeId);
+ if (addResult.isNewEntry)
+ unprocessedNodes.append(targetNodeId);
}
}
-
- discoveredNodes = nextGenerationDiscoveredNodes;
- } while (!discoveredNodes.isEmpty());
-
- ASSERT(!outputNodeSet.isEmpty());
- return outputNodeSet;
+ } while (!unprocessedNodes.isEmpty());
}
-typedef HashMap<uint16_t, NodeIdSet, DefaultHash<uint16_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint16_t>> SetTransitionsExcludingEpsilon;
+struct UniqueNodeIdSetImpl {
+ unsigned* buffer()
+ {
+ return m_buffer;
+ }
-static SetTransitionsExcludingEpsilon setTransitionsExcludingEpsilon(const NodeIdSet& nodeSet, const Vector<NFANode>& graph, unsigned epsilonTransitionCharacter)
-{
- ASSERT(!nodeSet.isEmpty());
- ASSERT(!graph.isEmpty());
-
- SetTransitionsExcludingEpsilon outputSetTransitionsExcludingEpsilon;
-
- for (unsigned nodeId : nodeSet) {
- const NFANode& node = graph[nodeId];
- for (const auto& transitionSlot : node.transitions) {
- if (transitionSlot.key != epsilonTransitionCharacter) {
- auto existingTransition = outputSetTransitionsExcludingEpsilon.find(transitionSlot.key);
- if (existingTransition != outputSetTransitionsExcludingEpsilon.end())
- existingTransition->value.add(transitionSlot.value.begin(), transitionSlot.value.end());
- else {
- NodeIdSet newSet;
- newSet.add(transitionSlot.value.begin(), transitionSlot.value.end());
- outputSetTransitionsExcludingEpsilon.add(transitionSlot.key, newSet);
- }
- }
- }
+ const unsigned* buffer() const
+ {
+ return m_buffer;
}
- return outputSetTransitionsExcludingEpsilon;
-}
+ unsigned m_size;
+ unsigned m_hash;
+ unsigned m_dfaNodeId;
+private:
+ unsigned m_buffer[1];
+};
-class HashableNodeIdSet {
+class UniqueNodeIdSet {
public:
+ UniqueNodeIdSet() { }
enum EmptyValueTag { EmptyValue };
enum DeletedValueTag { DeletedValue };
- HashableNodeIdSet(EmptyValueTag) { }
- HashableNodeIdSet(DeletedValueTag)
- : m_isDeleted(true)
+ UniqueNodeIdSet(EmptyValueTag) { }
+ UniqueNodeIdSet(DeletedValueTag)
+ : m_uniqueNodeIdSetBuffer(reinterpret_cast_ptr<UniqueNodeIdSetImpl*>(-1))
{
}
- HashableNodeIdSet(const NodeIdSet& nodeIdSet)
- : m_nodeIdSet(nodeIdSet)
+ UniqueNodeIdSet(const NodeIdSet& nodeIdSet, unsigned hash, unsigned dfaNodeId)
{
- ASSERT(!nodeIdSet.isEmpty());
+ ASSERT(nodeIdSet.size());
+
+ unsigned size = nodeIdSet.size();
+ size_t byteSize = sizeof(UniqueNodeIdSetImpl) + (size - 1) * sizeof(unsigned);
+ m_uniqueNodeIdSetBuffer = static_cast<UniqueNodeIdSetImpl*>(fastMalloc(byteSize));
+
+ m_uniqueNodeIdSetBuffer->m_size = size;
+ m_uniqueNodeIdSetBuffer->m_hash = hash;
+ m_uniqueNodeIdSetBuffer->m_dfaNodeId = dfaNodeId;
+
+ unsigned* buffer = m_uniqueNodeIdSetBuffer->buffer();
+ for (unsigned nodeId : nodeIdSet) {
+ *buffer = nodeId;
+ ++buffer;
+ }
}
- HashableNodeIdSet(HashableNodeIdSet&& other)
- : m_nodeIdSet(other.m_nodeIdSet)
- , m_isDeleted(other.m_isDeleted)
+ UniqueNodeIdSet(UniqueNodeIdSet&& other)
+ : m_uniqueNodeIdSetBuffer(other.m_uniqueNodeIdSetBuffer)
{
- other.m_nodeIdSet.clear();
- other.m_isDeleted = false;
+ other.m_uniqueNodeIdSetBuffer = nullptr;
}
- HashableNodeIdSet& operator=(HashableNodeIdSet&& other)
+ UniqueNodeIdSet& operator=(UniqueNodeIdSet&& other)
{
- m_nodeIdSet = other.m_nodeIdSet;
- other.m_nodeIdSet.clear();
- m_isDeleted = other.m_isDeleted;
- other.m_isDeleted = false;
+ m_uniqueNodeIdSetBuffer = other.m_uniqueNodeIdSetBuffer;
+ other.m_uniqueNodeIdSetBuffer = nullptr;
return *this;
}
- bool isEmptyValue() const { return m_nodeIdSet.isEmpty(); }
- bool isDeletedValue() const { return m_isDeleted; }
+ ~UniqueNodeIdSet()
+ {
+ fastFree(m_uniqueNodeIdSetBuffer);
+ }
- NodeIdSet nodeIdSet() const { return m_nodeIdSet; }
+ bool operator==(const UniqueNodeIdSet& other) const
+ {
+ return m_uniqueNodeIdSetBuffer == other.m_uniqueNodeIdSetBuffer;
+ }
+ bool operator==(const NodeIdSet& other) const
+ {
+ if (m_uniqueNodeIdSetBuffer->m_size != static_cast<unsigned>(other.size()))
+ return false;
+ unsigned* buffer = m_uniqueNodeIdSetBuffer->buffer();
+ for (unsigned i = 0; i < m_uniqueNodeIdSetBuffer->m_size; ++i) {
+ if (!other.contains(buffer[i]))
+ return false;
+ }
+ return true;
+ }
+
+ UniqueNodeIdSetImpl* impl() const { return m_uniqueNodeIdSetBuffer; }
+
+ unsigned hash() const { return m_uniqueNodeIdSetBuffer->m_hash; }
+ bool isEmptyValue() const { return !m_uniqueNodeIdSetBuffer; }
+ bool isDeletedValue() const { return m_uniqueNodeIdSetBuffer == reinterpret_cast_ptr<UniqueNodeIdSetImpl*>(-1); }
+
private:
- NodeIdSet m_nodeIdSet;
- bool m_isDeleted = false;
+ UniqueNodeIdSetImpl* m_uniqueNodeIdSetBuffer = nullptr;
};
-struct HashableNodeIdSetHash {
- static unsigned hash(const HashableNodeIdSet& p)
+struct UniqueNodeIdSetHash {
+ static unsigned hash(const UniqueNodeIdSet& p)
{
- unsigned hash = 4207445155;
- for (unsigned nodeId : p.nodeIdSet())
- hash += DefaultHash<unsigned>::Hash::hash(nodeId);
- return hash;
+ return p.hash();
}
- static bool equal(const HashableNodeIdSet& a, const HashableNodeIdSet& b)
+ static bool equal(const UniqueNodeIdSet& a, const UniqueNodeIdSet& b)
{
- return a.nodeIdSet() == b.nodeIdSet() && a.isDeletedValue() == b.isDeletedValue();
+ return a == b;
}
- static const bool safeToCompareToEmptyOrDeleted = true;
+ // It would be fine to compare empty or deleted here, but not for the HashTranslator.
+ static const bool safeToCompareToEmptyOrDeleted = false;
};
-struct HashableNodeIdSetHashTraits : public WTF::CustomHashTraits<HashableNodeIdSet> { };
+struct UniqueNodeIdSetHashHashTraits : public WTF::CustomHashTraits<UniqueNodeIdSet> {
+ static const bool emptyValueIsZero = true;
-typedef HashMap<HashableNodeIdSet, unsigned, HashableNodeIdSetHash, HashableNodeIdSetHashTraits> NFAToDFANodeMap;
+ // FIXME: Get a good size.
+ static const int minimumTableSize = 128;
+};
-static unsigned addDFAState(Vector<DFANode>& dfaGraph, NFAToDFANodeMap& nfaToDFANodeMap, const Vector<NFANode>& nfaGraph, NodeIdSet nfaNodes, unsigned epsilonTransitionCharacter)
-{
- ASSERT(!nfaToDFANodeMap.contains(nfaNodes));
- ASSERT_UNUSED(epsilonTransitionCharacter, epsilonClosure(nfaNodes, nfaGraph, epsilonTransitionCharacter) == nfaNodes);
+typedef HashSet<std::unique_ptr<UniqueNodeIdSet>, UniqueNodeIdSetHash, UniqueNodeIdSetHashHashTraits> UniqueNodeIdSetTable;
- DFANode newDFANode;
+struct NodeIdSetToUniqueNodeIdSetSource {
+ NodeIdSetToUniqueNodeIdSetSource(Vector<DFANode>& dfaGraph, const Vector<NFANode>& nfaGraph, const NodeIdSet& nodeIdSet)
+ : dfaGraph(dfaGraph)
+ , nfaGraph(nfaGraph)
+ , nodeIdSet(nodeIdSet)
+ {
+ // The hashing operation must be independant of the nodeId.
+ unsigned hash = 4207445155;
+ for (unsigned nodeId : nodeIdSet)
+ hash += nodeId;
+ this->hash = DefaultHash<unsigned>::Hash::hash(hash);
+ }
+ Vector<DFANode>& dfaGraph;
+ const Vector<NFANode>& nfaGraph;
+ const NodeIdSet& nodeIdSet;
+ unsigned hash;
+};
- HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> actions;
- for (unsigned nfaNodeId : nfaNodes) {
- const NFANode& nfaNode = nfaGraph[nfaNodeId];
- if (nfaNode.isFinal)
- actions.add(nfaNode.ruleId);
-#ifndef NDEBUG
- newDFANode.correspondingDFANodes.append(nfaNodeId);
+struct NodeIdSetToUniqueNodeIdSetTranslator {
+ static unsigned hash(const NodeIdSetToUniqueNodeIdSetSource& source)
+ {
+ return source.hash;
+ }
+
+ static inline bool equal(const UniqueNodeIdSet& a, const NodeIdSetToUniqueNodeIdSetSource& b)
+ {
+ return a == b.nodeIdSet;
+ }
+
+ static void translate(UniqueNodeIdSet& location, const NodeIdSetToUniqueNodeIdSetSource& source, unsigned hash)
+ {
+ DFANode newDFANode;
+
+ HashSet<uint64_t, DefaultHash<uint64_t>::Hash, WTF::UnsignedWithZeroKeyHashTraits<uint64_t>> actions;
+ for (unsigned nfaNodeId : source.nodeIdSet) {
+ const NFANode& nfaNode = source.nfaGraph[nfaNodeId];
+ if (nfaNode.isFinal)
+ actions.add(nfaNode.ruleId);
+#if CONTENT_EXTENSIONS_STATE_MACHINE_DEBUGGING
+ newDFANode.correspondingDFANodes.append(nfaNodeId);
#endif
+ }
+ copyToVector(actions, newDFANode.actions);
+
+ unsigned dfaNodeId = source.dfaGraph.size();
+ source.dfaGraph.append(newDFANode);
+ new (NotNull, &location) UniqueNodeIdSet(source.nodeIdSet, hash, dfaNodeId);
+
+ ASSERT(location.impl());
}
+};
- for (uint64_t action : actions)
- newDFANode.actions.append(action);
+class SetTransitionsExcludingEpsilon {
+public:
+ NodeIdSet& operator[](unsigned index)
+ {
+ ASSERT(index < size());
+ return m_targets[index];
+ }
- unsigned dfaNodeId = dfaGraph.size();
- dfaGraph.append(newDFANode);
- nfaToDFANodeMap.add(nfaNodes, dfaNodeId);
- return dfaNodeId;
+ unsigned size() const
+ {
+ return WTF_ARRAY_LENGTH(m_targets);
+ }
+
+ NodeIdSet* begin()
+ {
+ return m_targets;
+ }
+
+ NodeIdSet* end()
+ {
+ return m_targets + size();
+ }
+
+private:
+ NodeIdSet m_targets[128];
+};
+
+static inline void populateTransitionsExcludingEpsilon(SetTransitionsExcludingEpsilon& setTransitionsExcludingEpsilon, const UniqueNodeIdSetImpl& sourceNodeSet, const Vector<NFANode>& graph, unsigned epsilonTransitionCharacter)
+{
+ ASSERT(!graph.isEmpty());
+#if !ASSERT_DISABLED
+ for (const NodeIdSet& set : setTransitionsExcludingEpsilon)
+ ASSERT(set.isEmpty());
+#endif
+
+ const unsigned* buffer = sourceNodeSet.buffer();
+ for (unsigned i = 0; i < sourceNodeSet.m_size; ++i) {
+ unsigned nodeId = buffer[i];
+ const NFANode& node = graph[nodeId];
+ for (const auto& transitionSlot : node.transitions) {
+ if (transitionSlot.key != epsilonTransitionCharacter)
+ setTransitionsExcludingEpsilon[transitionSlot.key].add(transitionSlot.value.begin(), transitionSlot.value.end());
+ }
+ }
}
-typedef HashSet<HashableNodeIdSet, HashableNodeIdSetHash, HashableNodeIdSetHashTraits> SetOfNodeSet;
-
DFA NFAToDFA::convert(const NFA& nfa)
{
Vector<DFANode> dfaGraph;
- NFAToDFANodeMap nfaToDFANodeMap;
- SetOfNodeSet processedStateSets;
- SetOfNodeSet unprocessedStateSets;
-
const Vector<NFANode>& nfaGraph = nfa.m_nodes;
NodeIdSet initialSet({ nfa.root() });
- NodeIdSet closedInitialSet = epsilonClosure(initialSet, nfaGraph, NFA::epsilonTransitionCharacter);
+ epsilonClosure(initialSet, nfaGraph, NFA::epsilonTransitionCharacter);
- addDFAState(dfaGraph, nfaToDFANodeMap, nfaGraph, closedInitialSet, NFA::epsilonTransitionCharacter);
- unprocessedStateSets.add(closedInitialSet);
+ UniqueNodeIdSetTable uniqueNodeIdSetTable;
+ NodeIdSetToUniqueNodeIdSetSource initialNodeIdSetToUniqueNodeIdSetSource(dfaGraph, nfaGraph, initialSet);
+ auto addResult = uniqueNodeIdSetTable.add<NodeIdSetToUniqueNodeIdSetTranslator>(initialNodeIdSetToUniqueNodeIdSetSource);
+
+ Vector<UniqueNodeIdSetImpl*> unprocessedNodes;
+ unprocessedNodes.append(addResult.iterator->impl());
+
+ SetTransitionsExcludingEpsilon transitionsFromClosedSet;
+
do {
- HashableNodeIdSet stateSet = unprocessedStateSets.takeAny();
+ UniqueNodeIdSetImpl* uniqueNodeIdSetImpl = unprocessedNodes.takeLast();
- ASSERT(!processedStateSets.contains(stateSet.nodeIdSet()));
- processedStateSets.add(stateSet.nodeIdSet());
+ unsigned dfaNodeId = uniqueNodeIdSetImpl->m_dfaNodeId;
+ populateTransitionsExcludingEpsilon(transitionsFromClosedSet, *uniqueNodeIdSetImpl, nfaGraph, NFA::epsilonTransitionCharacter);
- unsigned dfaNodeId = nfaToDFANodeMap.get(stateSet);
+ // FIXME: there should not be any transition on key 0.
+ for (unsigned key = 0; key < transitionsFromClosedSet.size(); ++key) {
+ NodeIdSet& targetNodeSet = transitionsFromClosedSet[key];
- SetTransitionsExcludingEpsilon transitionsFromClosedSet = setTransitionsExcludingEpsilon(stateSet.nodeIdSet(), nfaGraph, NFA::epsilonTransitionCharacter);
- for (const auto& transitionSlot : transitionsFromClosedSet) {
- NodeIdSet closedTargetNodeSet = epsilonClosure(transitionSlot.value, nfaGraph, NFA::epsilonTransitionCharacter);
- unsigned newDFANodeId;
+ if (targetNodeSet.isEmpty())
+ continue;
- const auto& existingNFAToDFAAssociation = nfaToDFANodeMap.find(closedTargetNodeSet);
- if (existingNFAToDFAAssociation != nfaToDFANodeMap.end())
- newDFANodeId = existingNFAToDFAAssociation->value;
- else
- newDFANodeId = addDFAState(dfaGraph, nfaToDFANodeMap, nfaGraph, closedTargetNodeSet, NFA::epsilonTransitionCharacter);
+ epsilonClosure(targetNodeSet, nfaGraph, NFA::epsilonTransitionCharacter);
- ASSERT(newDFANodeId < dfaGraph.size());
+ NodeIdSetToUniqueNodeIdSetSource nodeIdSetToUniqueNodeIdSetSource(dfaGraph, nfaGraph, targetNodeSet);
+ auto uniqueNodeIdAddResult = uniqueNodeIdSetTable.add<NodeIdSetToUniqueNodeIdSetTranslator>(nodeIdSetToUniqueNodeIdSetSource);
- const auto addResult = dfaGraph[dfaNodeId].transitions.set(transitionSlot.key, newDFANodeId);
+ unsigned targetNodeId = uniqueNodeIdAddResult.iterator->impl()->m_dfaNodeId;
+ const auto addResult = dfaGraph[dfaNodeId].transitions.add(key, targetNodeId);
ASSERT_UNUSED(addResult, addResult.isNewEntry);
- if (!processedStateSets.contains(closedTargetNodeSet))
- unprocessedStateSets.add(closedTargetNodeSet);
+ if (uniqueNodeIdAddResult.isNewEntry)
+ unprocessedNodes.append(uniqueNodeIdAddResult.iterator->impl());
+
+ targetNodeSet.clear();
}
- } while (!unprocessedStateSets.isEmpty());
+ } while (!unprocessedNodes.isEmpty());
- ASSERT(processedStateSets.size() == nfaToDFANodeMap.size());
-
return DFA(WTF::move(dfaGraph), 0);
}