Title: [234713] trunk/Source/_javascript_Core
Revision
234713
Author
[email protected]
Date
2018-08-08 15:42:30 -0700 (Wed, 08 Aug 2018)

Log Message

Yarr JIT should include annotations with dumpDisassembly=true
https://bugs.webkit.org/show_bug.cgi?id=188415

Reviewed by Yusuke Suzuki.

Created a YarrDisassembler class that handles annotations similar to the baseline JIT.
Given that the Yarr creates matching code bu going through the YarrPattern ops forward and
then the backtracking code through the YarrPattern ops in reverse order, the disassembler
needs to do the same think.

Restructured some of the logging code in YarrPattern to eliminate redundent code and factor
out simple methods for what was needed by the YarrDisassembler.

Here is abbreviated sample output after this change.

Generated JIT code for 8-bit regular _expression_ /ab*c/:
    Code at [0x469561c03720, 0x469561c03840):
        0x469561c03720: push %rbp
        0x469561c03721: mov %rsp, %rbp
        ...
        0x469561c03762: sub $0x40, %rsp
     == Matching ==
   0:OpBodyAlternativeBegin minimum size 2
        0x469561c03766: add $0x2, %esi
        0x469561c03769: cmp %edx, %esi
        0x469561c0376b: ja 0x469561c037fa
   1:OpTerm TypePatternCharacter 'a'
        0x469561c03771: movzx -0x2(%rdi,%rsi), %eax
        0x469561c03776: cmp $0x61, %eax
        0x469561c03779: jnz 0x469561c037e9
   2:OpTerm TypePatternCharacter 'b' {0,...} greedy
        0x469561c0377f: xor %r9d, %r9d
        0x469561c03782: cmp %edx, %esi
        0x469561c03784: jz 0x469561c037a2
        ...
        0x469561c0379d: jmp 0x469561c03782
        0x469561c037a2: mov %r9, 0x8(%rsp)
   3:OpTerm TypePatternCharacter 'c'
        0x469561c037a7: movzx -0x1(%rdi,%rsi), %eax
        0x469561c037ac: cmp $0x63, %eax
        0x469561c037af: jnz 0x469561c037d1
   4:OpBodyAlternativeEnd
        0x469561c037b5: add $0x40, %rsp
        ...
        0x469561c037cf: pop %rbp
        0x469561c037d0: ret
     == Backtracking ==
   4:OpBodyAlternativeEnd
   3:OpTerm TypePatternCharacter 'c'
   2:OpTerm TypePatternCharacter 'b' {0,...} greedy
        0x469561c037d1: mov 0x8(%rsp), %r9
        ...
        0x469561c037e4: jmp 0x469561c037a2
   1:OpTerm TypePatternCharacter 'a'
   0:OpBodyAlternativeBegin minimum size 2
        0x469561c037e9: mov %rsi, %rax
        ...
        0x469561c0382f: pop %rbp
        0x469561c03830: ret

* _javascript_Core.xcodeproj/project.pbxproj:
* Sources.txt:
* runtime/RegExp.cpp:
(JSC::RegExp::compile):
(JSC::RegExp::compileMatchOnly):
* yarr/YarrDisassembler.cpp: Added.
(JSC::Yarr::YarrDisassembler::indentString):
(JSC::Yarr::YarrDisassembler::YarrDisassembler):
(JSC::Yarr::YarrDisassembler::~YarrDisassembler):
(JSC::Yarr::YarrDisassembler::dump):
(JSC::Yarr::YarrDisassembler::dumpHeader):
(JSC::Yarr::YarrDisassembler::dumpVectorForInstructions):
(JSC::Yarr::YarrDisassembler::dumpForInstructions):
(JSC::Yarr::YarrDisassembler::dumpDisassembly):
* yarr/YarrDisassembler.h: Added.
(JSC::Yarr::YarrJITInfo::~YarrJITInfo):
(JSC::Yarr::YarrDisassembler::setStartOfCode):
(JSC::Yarr::YarrDisassembler::setForGenerate):
(JSC::Yarr::YarrDisassembler::setForBacktrack):
(JSC::Yarr::YarrDisassembler::setEndOfGenerate):
(JSC::Yarr::YarrDisassembler::setEndOfBacktrack):
(JSC::Yarr::YarrDisassembler::setEndOfCode):
(JSC::Yarr::YarrDisassembler::indentString):
* yarr/YarrJIT.cpp:
(JSC::Yarr::YarrGenerator::generate):
(JSC::Yarr::YarrGenerator::backtrack):
(JSC::Yarr::YarrGenerator::YarrGenerator):
(JSC::Yarr::YarrGenerator::compile):
(JSC::Yarr::jitCompile):
* yarr/YarrJIT.h:
* yarr/YarrPattern.cpp:
(JSC::Yarr::dumpCharacterClass):
(JSC::Yarr::PatternTerm::dump):
(JSC::Yarr::YarrPattern::dumpPatternString):
(JSC::Yarr::YarrPattern::dumpPattern):
* yarr/YarrPattern.h:

Modified Paths

Added Paths

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (234712 => 234713)


--- trunk/Source/_javascript_Core/ChangeLog	2018-08-08 22:18:28 UTC (rev 234712)
+++ trunk/Source/_javascript_Core/ChangeLog	2018-08-08 22:42:30 UTC (rev 234713)
@@ -1,3 +1,102 @@
+2018-08-08  Michael Saboff  <[email protected]>
+
+        Yarr JIT should include annotations with dumpDisassembly=true
+        https://bugs.webkit.org/show_bug.cgi?id=188415
+
+        Reviewed by Yusuke Suzuki.
+
+        Created a YarrDisassembler class that handles annotations similar to the baseline JIT.
+        Given that the Yarr creates matching code bu going through the YarrPattern ops forward and
+        then the backtracking code through the YarrPattern ops in reverse order, the disassembler
+        needs to do the same think.
+
+        Restructured some of the logging code in YarrPattern to eliminate redundent code and factor
+        out simple methods for what was needed by the YarrDisassembler.
+
+        Here is abbreviated sample output after this change.
+
+        Generated JIT code for 8-bit regular _expression_ /ab*c/:
+            Code at [0x469561c03720, 0x469561c03840):
+                0x469561c03720: push %rbp
+                0x469561c03721: mov %rsp, %rbp
+                ...
+                0x469561c03762: sub $0x40, %rsp
+             == Matching ==
+           0:OpBodyAlternativeBegin minimum size 2
+                0x469561c03766: add $0x2, %esi
+                0x469561c03769: cmp %edx, %esi
+                0x469561c0376b: ja 0x469561c037fa
+           1:OpTerm TypePatternCharacter 'a'
+                0x469561c03771: movzx -0x2(%rdi,%rsi), %eax
+                0x469561c03776: cmp $0x61, %eax
+                0x469561c03779: jnz 0x469561c037e9
+           2:OpTerm TypePatternCharacter 'b' {0,...} greedy
+                0x469561c0377f: xor %r9d, %r9d
+                0x469561c03782: cmp %edx, %esi
+                0x469561c03784: jz 0x469561c037a2
+                ...
+                0x469561c0379d: jmp 0x469561c03782
+                0x469561c037a2: mov %r9, 0x8(%rsp)
+           3:OpTerm TypePatternCharacter 'c'
+                0x469561c037a7: movzx -0x1(%rdi,%rsi), %eax
+                0x469561c037ac: cmp $0x63, %eax
+                0x469561c037af: jnz 0x469561c037d1
+           4:OpBodyAlternativeEnd
+                0x469561c037b5: add $0x40, %rsp
+                ...
+                0x469561c037cf: pop %rbp
+                0x469561c037d0: ret
+             == Backtracking ==
+           4:OpBodyAlternativeEnd
+           3:OpTerm TypePatternCharacter 'c'
+           2:OpTerm TypePatternCharacter 'b' {0,...} greedy
+                0x469561c037d1: mov 0x8(%rsp), %r9
+                ...
+                0x469561c037e4: jmp 0x469561c037a2
+           1:OpTerm TypePatternCharacter 'a'
+           0:OpBodyAlternativeBegin minimum size 2
+                0x469561c037e9: mov %rsi, %rax
+                ...
+                0x469561c0382f: pop %rbp
+                0x469561c03830: ret
+
+        * _javascript_Core.xcodeproj/project.pbxproj:
+        * Sources.txt:
+        * runtime/RegExp.cpp:
+        (JSC::RegExp::compile):
+        (JSC::RegExp::compileMatchOnly):
+        * yarr/YarrDisassembler.cpp: Added.
+        (JSC::Yarr::YarrDisassembler::indentString):
+        (JSC::Yarr::YarrDisassembler::YarrDisassembler):
+        (JSC::Yarr::YarrDisassembler::~YarrDisassembler):
+        (JSC::Yarr::YarrDisassembler::dump):
+        (JSC::Yarr::YarrDisassembler::dumpHeader):
+        (JSC::Yarr::YarrDisassembler::dumpVectorForInstructions):
+        (JSC::Yarr::YarrDisassembler::dumpForInstructions):
+        (JSC::Yarr::YarrDisassembler::dumpDisassembly):
+        * yarr/YarrDisassembler.h: Added.
+        (JSC::Yarr::YarrJITInfo::~YarrJITInfo):
+        (JSC::Yarr::YarrDisassembler::setStartOfCode):
+        (JSC::Yarr::YarrDisassembler::setForGenerate):
+        (JSC::Yarr::YarrDisassembler::setForBacktrack):
+        (JSC::Yarr::YarrDisassembler::setEndOfGenerate):
+        (JSC::Yarr::YarrDisassembler::setEndOfBacktrack):
+        (JSC::Yarr::YarrDisassembler::setEndOfCode):
+        (JSC::Yarr::YarrDisassembler::indentString):
+        * yarr/YarrJIT.cpp:
+        (JSC::Yarr::YarrGenerator::generate):
+        (JSC::Yarr::YarrGenerator::backtrack):
+        (JSC::Yarr::YarrGenerator::YarrGenerator):
+        (JSC::Yarr::YarrGenerator::compile):
+        (JSC::Yarr::jitCompile):
+        * yarr/YarrJIT.h:
+        * yarr/YarrPattern.cpp:
+        (JSC::Yarr::dumpCharacterClass):
+        (JSC::Yarr::PatternTerm::dump):
+        (JSC::Yarr::YarrPattern::dumpPatternString):
+        (JSC::Yarr::YarrPattern::dumpPattern):
+        * yarr/YarrPattern.h:
+
 2018-08-05  Darin Adler  <[email protected]>
 
         [Cocoa] More tweaks and refactoring to prepare for ARC

Modified: trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj (234712 => 234713)


--- trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj	2018-08-08 22:18:28 UTC (rev 234712)
+++ trunk/Source/_javascript_Core/_javascript_Core.xcodeproj/project.pbxproj	2018-08-08 22:42:30 UTC (rev 234713)
@@ -3586,6 +3586,8 @@
 		65B8392C1BACA92A0044E824 /* CachedRecovery.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CachedRecovery.h; sourceTree = "<group>"; };
 		65B8392D1BACA9D30044E824 /* CachedRecovery.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = CachedRecovery.cpp; sourceTree = "<group>"; };
 		65C02FBB0637462A003E7EE6 /* Protect.h */ = {isa = PBXFileReference; fileEncoding = 30; indentWidth = 4; lastKnownFileType = sourcecode.c.h; path = Protect.h; sourceTree = "<group>"; tabWidth = 8; };
+		65C6BEDF21128C3B006849C3 /* YarrDisassembler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = YarrDisassembler.cpp; path = yarr/YarrDisassembler.cpp; sourceTree = "<group>"; };
+		65C6BEE021128C3B006849C3 /* YarrDisassembler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = YarrDisassembler.h; path = yarr/YarrDisassembler.h; sourceTree = "<group>"; };
 		65C7A1710A8EAACB00FA37EA /* JSWrapperObject.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; path = JSWrapperObject.cpp; sourceTree = "<group>"; };
 		65C7A1720A8EAACB00FA37EA /* JSWrapperObject.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = JSWrapperObject.h; sourceTree = "<group>"; };
 		65DA90461F87052A009BC546 /* generateYarrUnicodePropertyTables.py */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.python; name = generateYarrUnicodePropertyTables.py; path = Scripts/generateYarrUnicodePropertyTables.py; sourceTree = "<group>"; };
@@ -7119,6 +7121,8 @@
 				863C6D991521111200585E4E /* YarrCanonicalize.h */,
 				863C6D981521111200585E4E /* YarrCanonicalizeUCS2.cpp */,
 				863C6D9A1521111200585E4E /* YarrCanonicalizeUCS2.js */,
+				65C6BEDF21128C3B006849C3 /* YarrDisassembler.cpp */,
+				65C6BEE021128C3B006849C3 /* YarrDisassembler.h */,
 				E3282BB91FE930A300EDAF71 /* YarrErrorCode.cpp */,
 				E3282BBA1FE930A400EDAF71 /* YarrErrorCode.h */,
 				86704B7D12DBA33700A9FE7B /* YarrInterpreter.cpp */,

Modified: trunk/Source/_javascript_Core/Sources.txt (234712 => 234713)


--- trunk/Source/_javascript_Core/Sources.txt	2018-08-08 22:18:28 UTC (rev 234712)
+++ trunk/Source/_javascript_Core/Sources.txt	2018-08-08 22:42:30 UTC (rev 234713)
@@ -1036,6 +1036,7 @@
 
 yarr/RegularExpression.cpp
 yarr/YarrCanonicalizeUCS2.cpp
+yarr/YarrDisassembler.cpp
 yarr/YarrErrorCode.cpp
 yarr/YarrInterpreter.cpp
 yarr/YarrJIT.cpp

Modified: trunk/Source/_javascript_Core/runtime/RegExp.cpp (234712 => 234713)


--- trunk/Source/_javascript_Core/runtime/RegExp.cpp	2018-08-08 22:18:28 UTC (rev 234712)
+++ trunk/Source/_javascript_Core/runtime/RegExp.cpp	2018-08-08 22:42:30 UTC (rev 234713)
@@ -306,7 +306,7 @@
 
 #if ENABLE(YARR_JIT)
     if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && VM::canUseRegExpJIT()) {
-        Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode);
+        Yarr::jitCompile(pattern, m_patternString, charSize, vm, m_regExpJITCode);
         if (!m_regExpJITCode.failureReason()) {
             m_state = JITCode;
             return;
@@ -362,7 +362,7 @@
 
 #if ENABLE(YARR_JIT)
     if (!pattern.m_containsBackreferences && !pattern.containsUnsignedLengthPattern() && VM::canUseRegExpJIT()) {
-        Yarr::jitCompile(pattern, charSize, vm, m_regExpJITCode, Yarr::MatchOnly);
+        Yarr::jitCompile(pattern, m_patternString, charSize, vm, m_regExpJITCode, Yarr::MatchOnly);
         if (!m_regExpJITCode.failureReason()) {
             m_state = JITCode;
             return;

Added: trunk/Source/_javascript_Core/yarr/YarrDisassembler.cpp (0 => 234713)


--- trunk/Source/_javascript_Core/yarr/YarrDisassembler.cpp	                        (rev 0)
+++ trunk/Source/_javascript_Core/yarr/YarrDisassembler.cpp	2018-08-08 22:42:30 UTC (rev 234713)
@@ -0,0 +1,152 @@
+/*
+ * Copyright (C) 2018 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#include "config.h"
+#include "YarrDisassembler.h"
+
+#if ENABLE(JIT)
+
+#include "Disassembler.h"
+#include "LinkBuffer.h"
+#include <wtf/StringPrintStream.h>
+
+namespace JSC { namespace Yarr {
+
+static constexpr char s_spaces[] = "                        ";
+static constexpr unsigned s_maxIndent = sizeof(s_spaces) - 1;
+
+const char* YarrDisassembler::indentString(unsigned level)
+{
+    unsigned indent = 6 + level * 2;
+    indent = std::min(indent, s_maxIndent);
+
+    return s_spaces + s_maxIndent - indent;
+}
+
+YarrDisassembler::YarrDisassembler(YarrJITInfo* yarrJITInfo)
+    : m_jitInfo(yarrJITInfo)
+    , m_labelForGenerateYarrOp(yarrJITInfo->opCount())
+    , m_labelForBacktrackYarrOp(yarrJITInfo->opCount())
+{
+}
+
+YarrDisassembler::~YarrDisassembler()
+{
+}
+
+void YarrDisassembler::dump(PrintStream& out, LinkBuffer& linkBuffer)
+{
+    dumpHeader(out, linkBuffer);
+    dumpDisassembly(out, indentString(), linkBuffer, m_startOfCode, m_labelForGenerateYarrOp[0]);
+
+    out.print("     == Matching ==\n");
+    dumpForInstructions(out, linkBuffer, m_labelForGenerateYarrOp, m_endOfGenerate);
+    out.print("     == Backtracking ==\n");
+    dumpForInstructions(out, linkBuffer, m_labelForBacktrackYarrOp, m_endOfBacktrack, VectorOrder::IterateReverse);
+
+    if (!(m_endOfBacktrack == m_endOfCode)) {
+        out.print("     == Helpers ==\n");
+
+        dumpDisassembly(out, indentString(), linkBuffer, m_endOfBacktrack, m_endOfCode);
+    }
+
+    linkBuffer.didAlreadyDisassemble();
+}
+
+void YarrDisassembler::dump(LinkBuffer& linkBuffer)
+{
+    dump(WTF::dataFile(), linkBuffer);
+}
+
+void YarrDisassembler::dumpHeader(PrintStream& out, LinkBuffer& linkBuffer)
+{
+    out.print("Generated JIT code for ", m_jitInfo->variant(), " ");
+    m_jitInfo->dumpPatternString(out);
+    out.print(":\n");
+    out.print("    Code at [", RawPointer(linkBuffer.debugAddress()), ", ", RawPointer(static_cast<char*>(linkBuffer.debugAddress()) + linkBuffer.size()), "):\n");
+}
+
+Vector<YarrDisassembler::DumpedOp> YarrDisassembler::dumpVectorForInstructions(LinkBuffer& linkBuffer, Vector<MacroAssembler::Label>& labels, MacroAssembler::Label endLabel, YarrDisassembler::VectorOrder vectorOrder)
+{
+    StringPrintStream out;
+    Vector<DumpedOp> result;
+
+    unsigned directionBias = (vectorOrder == VectorOrder::IterateForward) ? 0 : labels.size() - 1;
+
+    auto realIndex = [&](unsigned rawIndex) {
+        if (directionBias)
+            return directionBias - rawIndex;
+        return rawIndex;
+    };
+
+    for (unsigned i = 0; i < labels.size();) {
+        if (!labels[realIndex(i)].isSet()) {
+            i++;
+            continue;
+        }
+        out.reset();
+        result.append(DumpedOp());
+        result.last().index = realIndex(i);
+
+        int delta = m_jitInfo->dumpFor(out, realIndex(i));
+        m_indentLevel += (vectorOrder == VectorOrder::IterateForward) ? delta : -delta;
+
+        for (unsigned nextIndex = i + 1; ; nextIndex++) {
+            if (nextIndex >= labels.size()) {
+                dumpDisassembly(out, indentString(), linkBuffer, labels[realIndex(i)], endLabel);
+                result.last().disassembly = out.toCString();
+                return result;
+            }
+            if (labels[realIndex(nextIndex)].isSet()) {
+                dumpDisassembly(out, indentString(), linkBuffer, labels[realIndex(i)], labels[realIndex(nextIndex)]);
+                result.last().disassembly = out.toCString();
+                i = nextIndex;
+                break;
+            }
+        }
+    }
+
+    return result;
+}
+
+void YarrDisassembler::dumpForInstructions(PrintStream& out, LinkBuffer& linkBuffer, Vector<MacroAssembler::Label>& labels, MacroAssembler::Label endLabel, YarrDisassembler::VectorOrder vectorOrder)
+{
+    Vector<DumpedOp> dumpedOps = dumpVectorForInstructions(linkBuffer, labels, endLabel, vectorOrder);
+
+    for (unsigned i = 0; i < dumpedOps.size(); ++i)
+        out.print(dumpedOps[i].disassembly);
+}
+
+void YarrDisassembler::dumpDisassembly(PrintStream& out, const char* prefix, LinkBuffer& linkBuffer, MacroAssembler::Label from, MacroAssembler::Label to)
+{
+    CodeLocationLabel<DisassemblyPtrTag> fromLocation = linkBuffer.locationOf<DisassemblyPtrTag>(from);
+    CodeLocationLabel<DisassemblyPtrTag> toLocation = linkBuffer.locationOf<DisassemblyPtrTag>(to);
+    disassemble(fromLocation, toLocation.dataLocation<uintptr_t>() - fromLocation.dataLocation<uintptr_t>(), prefix, out);
+}
+
+}} // namespace Yarr namespace JSC
+
+#endif // ENABLE(JIT)
+

Added: trunk/Source/_javascript_Core/yarr/YarrDisassembler.h (0 => 234713)


--- trunk/Source/_javascript_Core/yarr/YarrDisassembler.h	                        (rev 0)
+++ trunk/Source/_javascript_Core/yarr/YarrDisassembler.h	2018-08-08 22:42:30 UTC (rev 234713)
@@ -0,0 +1,114 @@
+/*
+ * Copyright (C) 2018 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#pragma once
+
+#if ENABLE(JIT)
+
+#include "MacroAssembler.h"
+#include <wtf/Vector.h>
+#include <wtf/text/CString.h>
+
+
+namespace JSC {
+
+class LinkBuffer;
+
+namespace Yarr {
+
+class YarrCodeBlock;
+
+class YarrJITInfo {
+public:
+    virtual ~YarrJITInfo() { };
+    virtual const char* variant() = 0;
+    virtual unsigned opCount() = 0;
+    virtual void dumpPatternString(PrintStream&) = 0;
+    virtual int dumpFor(PrintStream&, unsigned) = 0;
+};
+
+class YarrDisassembler {
+    WTF_MAKE_FAST_ALLOCATED;
+public:
+    YarrDisassembler(YarrJITInfo*);
+    ~YarrDisassembler();
+
+    void setStartOfCode(MacroAssembler::Label label) { m_startOfCode = label; }
+    void setForGenerate(unsigned opIndex, MacroAssembler::Label label)
+    {
+        m_labelForGenerateYarrOp[opIndex] = label;
+    }
+
+    void setForBacktrack(unsigned opIndex, MacroAssembler::Label label)
+    {
+        m_labelForBacktrackYarrOp[opIndex] = label;
+    }
+
+    void setEndOfGenerate(MacroAssembler::Label label) { m_endOfGenerate = label; }
+    void setEndOfBacktrack(MacroAssembler::Label label) { m_endOfBacktrack = label; }
+    void setEndOfCode(MacroAssembler::Label label) { m_endOfCode = label; }
+
+    void dump(LinkBuffer&);
+    void dump(PrintStream&, LinkBuffer&);
+
+private:
+    enum class VectorOrder {
+        IterateForward,
+        IterateReverse
+    };
+
+    void dumpHeader(PrintStream&, LinkBuffer&);
+    MacroAssembler::Label firstSlowLabel();
+
+    struct DumpedOp {
+        unsigned index;
+        CString disassembly;
+    };
+
+    const char* indentString(unsigned);
+    const char* indentString()
+    {
+        return indentString(m_indentLevel);
+    }
+
+    Vector<DumpedOp> dumpVectorForInstructions(LinkBuffer&, Vector<MacroAssembler::Label>& labels, MacroAssembler::Label endLabel, YarrDisassembler::VectorOrder vectorOrder = VectorOrder::IterateForward);
+
+    void dumpForInstructions(PrintStream&, LinkBuffer&, Vector<MacroAssembler::Label>& labels, MacroAssembler::Label endLabel, YarrDisassembler::VectorOrder vectorOrder = VectorOrder::IterateForward);
+
+    void dumpDisassembly(PrintStream&, const char* prefix, LinkBuffer&, MacroAssembler::Label from, MacroAssembler::Label to);
+
+    YarrJITInfo* m_jitInfo;
+    MacroAssembler::Label m_startOfCode;
+    Vector<MacroAssembler::Label> m_labelForGenerateYarrOp;
+    Vector<MacroAssembler::Label> m_labelForBacktrackYarrOp;
+    MacroAssembler::Label m_endOfGenerate;
+    MacroAssembler::Label m_endOfBacktrack;
+    MacroAssembler::Label m_endOfCode;
+    unsigned m_indentLevel { 0 };
+};
+
+}} // namespace Yarr namespace JSC
+
+#endif // ENABLE(JIT)

Modified: trunk/Source/_javascript_Core/yarr/YarrJIT.cpp (234712 => 234713)


--- trunk/Source/_javascript_Core/yarr/YarrJIT.cpp	2018-08-08 22:18:28 UTC (rev 234712)
+++ trunk/Source/_javascript_Core/yarr/YarrJIT.cpp	2018-08-08 22:42:30 UTC (rev 234713)
@@ -32,6 +32,7 @@
 #include "VM.h"
 #include "Yarr.h"
 #include "YarrCanonicalize.h"
+#include "YarrDisassembler.h"
 
 #if ENABLE(YARR_JIT)
 
@@ -40,8 +41,7 @@
 namespace JSC { namespace Yarr {
 
 template<YarrJITCompileMode compileMode>
-class YarrGenerator : private MacroAssembler {
-    friend void jitCompile(VM*, YarrCodeBlock&, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
+class YarrGenerator : public YarrJITInfo, private MacroAssembler {
 
 #if CPU(ARM)
     static const RegisterID input = ARMRegisters::r0;
@@ -1856,6 +1856,9 @@
         size_t opIndex = 0;
 
         do {
+            if (m_disassembler)
+                m_disassembler->setForGenerate(opIndex, label());
+
             YarrOp& op = m_ops[opIndex];
             switch (op.m_op) {
 
@@ -2372,6 +2375,10 @@
 
         do {
             --opIndex;
+
+            if (m_disassembler)
+                m_disassembler->setForBacktrack(opIndex, label());
+
             YarrOp& op = m_ops[opIndex];
             switch (op.m_op) {
 
@@ -3424,9 +3431,10 @@
     }
 
 public:
-    YarrGenerator(VM* vm, YarrPattern& pattern, YarrCodeBlock& codeBlock, YarrCharSize charSize)
+    YarrGenerator(VM* vm, YarrPattern& pattern, String& patternString, YarrCodeBlock& codeBlock, YarrCharSize charSize)
         : m_vm(vm)
         , m_pattern(pattern)
+        , m_patternString(patternString)
         , m_codeBlock(codeBlock)
         , m_charSize(charSize)
         , m_decodeSurrogatePairs(m_charSize == Char16 && m_pattern.unicode())
@@ -3463,7 +3471,13 @@
             codeBlock.setFallBackWithFailureReason(*m_failureReason);
             return;
         }
-        
+
+        if (UNLIKELY(Options::dumpDisassembly()))
+            m_disassembler = std::make_unique<YarrDisassembler>(this);
+
+        if (m_disassembler)
+            m_disassembler->setStartOfCode(label());
+
         generateEnter();
 
         Jump hasInput = checkInput();
@@ -3499,12 +3513,19 @@
         }
 
         generate();
+        if (m_disassembler)
+            m_disassembler->setEndOfGenerate(label());
         backtrack();
+        if (m_disassembler)
+            m_disassembler->setEndOfBacktrack(label());
 
         generateTryReadUnicodeCharacterHelper();
 
         generateJITFailReturn();
 
+        if (m_disassembler)
+            m_disassembler->setEndOfCode(label());
+
         LinkBuffer linkBuffer(*this, REGEXP_CODE_ID, JITCompilationCanFail);
         if (linkBuffer.didFailToAllocate()) {
             codeBlock.setFallBackWithFailureReason(JITFailureReason::ExecutableMemoryAllocationFailure);
@@ -3520,6 +3541,9 @@
 
         m_backtrackingState.linkDataLabels(linkBuffer);
 
+        if (m_disassembler)
+            m_disassembler->dump(linkBuffer);
+
         if (compileMode == MatchOnly) {
             if (m_charSize == Char8)
                 codeBlock.set8BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, YarrMatchOnly8BitPtrTag, "Match-only 8-bit regular _expression_"));
@@ -3535,10 +3559,196 @@
             codeBlock.setFallBackWithFailureReason(*m_failureReason);
     }
 
+    const char* variant() override
+    {
+        if (compileMode == MatchOnly) {
+            if (m_charSize == Char8)
+                return "Match-only 8-bit regular _expression_";
+
+            return "Match-only 16-bit regular _expression_";
+        }
+
+        if (m_charSize == Char8)
+            return "8-bit regular _expression_";
+
+        return "16-bit regular _expression_";
+    }
+
+    unsigned opCount() override
+    {
+        return m_ops.size();
+    }
+
+    void dumpPatternString(PrintStream& out) override
+    {
+        m_pattern.dumpPatternString(out, m_patternString);
+    }
+
+    int dumpFor(PrintStream& out, unsigned opIndex) override
+    {
+        if (opIndex >= opCount())
+            return 0;
+
+        out.printf("%4d:", opIndex);
+
+        YarrOp& op = m_ops[opIndex];
+        PatternTerm* term = op.m_term;
+        switch (op.m_op) {
+        case OpTerm: {
+            out.print("OpTerm ");
+            switch (term->type) {
+            case PatternTerm::TypeAssertionBOL:
+                out.print("Assert BOL");
+                break;
+
+            case PatternTerm::TypeAssertionEOL:
+                out.print("Assert EOL");
+                break;
+
+            case PatternTerm::TypePatternCharacter:
+                out.print("TypePatternCharacter ");
+                dumpUChar32(out, term->patternCharacter);
+                if (m_pattern.ignoreCase())
+                    out.print(" ignore case");
+
+                term->dumpQuantifier(out);
+                break;
+
+            case PatternTerm::TypeCharacterClass:
+                out.print("TypePatternCharacterClass ");
+                if (term->invert())
+                    out.print("not ");
+                dumpCharacterClass(out, &m_pattern, term->characterClass);
+                term->dumpQuantifier(out);
+                break;
+
+            case PatternTerm::TypeAssertionWordBoundary:
+                out.printf("%sword boundary", term->invert() ? "non-" : "");
+                break;
+
+            case PatternTerm::TypeDotStarEnclosure:
+                out.print(".* enclosure");
+                break;
+
+            case PatternTerm::TypeForwardReference:
+            case PatternTerm::TypeBackReference:
+            case PatternTerm::TypeParenthesesSubpattern:
+            case PatternTerm::TypeParentheticalAssertion:
+                RELEASE_ASSERT_NOT_REACHED();
+                break;
+            }
+
+            if (op.m_isDeadCode)
+                out.print(" already handled");
+            out.print("\n");
+            return(0);
+        }
+
+        case OpBodyAlternativeBegin:
+            out.printf("OpBodyAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
+            return(0);
+
+        case OpBodyAlternativeNext:
+            out.printf("OpBodyAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
+            return(0);
+
+        case OpBodyAlternativeEnd:
+            out.print("OpBodyAlternativeEnd\n");
+            return(0);
+
+        case OpSimpleNestedAlternativeBegin:
+            out.printf("OpSimpleNestedAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
+            return(1);
+
+        case OpNestedAlternativeBegin:
+            out.printf("OpNestedAlternativeBegin minimum size %u\n", op.m_alternative->m_minimumSize);
+            return(1);
+
+        case OpSimpleNestedAlternativeNext:
+            out.printf("OpSimpleNestedAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
+            return(0);
+
+        case OpNestedAlternativeNext:
+            out.printf("OpNestedAlternativeNext minimum size %u\n", op.m_alternative->m_minimumSize);
+            return(0);
+
+        case OpSimpleNestedAlternativeEnd:
+            out.print("OpSimpleNestedAlternativeEnd");
+            term->dumpQuantifier(out);
+            out.print("\n");
+            return(-1);
+
+        case OpNestedAlternativeEnd:
+            out.print("OpNestedAlternativeEnd");
+            term->dumpQuantifier(out);
+            out.print("\n");
+            return(-1);
+
+        case OpParenthesesSubpatternOnceBegin:
+            out.print("OpParenthesesSubpatternOnceBegin ");
+            if (term->capture())
+                out.printf("capturing pattern #%u\n", op.m_term->parentheses.subpatternId);
+            else
+                out.print("non-capturing\n");
+            return(0);
+
+        case OpParenthesesSubpatternOnceEnd:
+            out.print("OpParenthesesSubpatternOnceEnd\n");
+            return(0);
+
+        case OpParenthesesSubpatternTerminalBegin:
+            out.print("OpParenthesesSubpatternTerminalBegin ");
+            if (term->capture())
+                out.printf("capturing pattern #%u\n", op.m_term->parentheses.subpatternId);
+            else
+                out.print("non-capturing\n");
+            return(0);
+
+        case OpParenthesesSubpatternTerminalEnd:
+            out.print("OpParenthesesSubpatternTerminalEnd ");
+            if (term->capture())
+                out.printf("capturing pattern #%u\n", op.m_term->parentheses.subpatternId);
+            else
+                out.print("non-capturing\n");
+            return(0);
+
+        case OpParenthesesSubpatternBegin:
+            out.print("OpParenthesesSubpatternBegin ");
+            if (term->capture())
+                out.printf("capturing pattern #%u\n", op.m_term->parentheses.subpatternId);
+            else
+                out.print("non-capturing\n");
+            return(0);
+
+        case OpParenthesesSubpatternEnd:
+            out.print("OpParenthesesSubpatternEnd ");
+            if (term->capture())
+                out.printf("capturing pattern #%u\n", op.m_term->parentheses.subpatternId);
+            else
+                out.print("non-capturing\n");
+            return(0);
+
+        case OpParentheticalAssertionBegin:
+            out.printf("OpParentheticalAssertionBegin%s\n", op.m_term->invert() ? " inverted" : "");
+            return(0);
+
+        case OpParentheticalAssertionEnd:
+            out.print("OpParentheticalAssertionEnd%s\n", op.m_term->invert() ? " inverted" : "");
+            return(0);
+
+        case OpMatchFailed:
+            out.print("OpMatchFailed\n");
+            return(0);
+        }
+
+        return(0);
+    }
+
 private:
     VM* m_vm;
 
     YarrPattern& m_pattern;
+    String& m_patternString;
 
     YarrCodeBlock& m_codeBlock;
     YarrCharSize m_charSize;
@@ -3576,6 +3786,8 @@
 
     // This class records state whilst generating the backtracking path of code.
     BacktrackingState m_backtrackingState;
+    
+    std::unique_ptr<YarrDisassembler> m_disassembler;
 };
 
 static void dumpCompileFailure(JITFailureReason failure)
@@ -3602,12 +3814,12 @@
     }
 }
 
-void jitCompile(YarrPattern& pattern, YarrCharSize charSize, VM* vm, YarrCodeBlock& codeBlock, YarrJITCompileMode mode)
+void jitCompile(YarrPattern& pattern, String& patternString, YarrCharSize charSize, VM* vm, YarrCodeBlock& codeBlock, YarrJITCompileMode mode)
 {
     if (mode == MatchOnly)
-        YarrGenerator<MatchOnly>(vm, pattern, codeBlock, charSize).compile();
+        YarrGenerator<MatchOnly>(vm, pattern, patternString, codeBlock, charSize).compile();
     else
-        YarrGenerator<IncludeSubpatterns>(vm, pattern, codeBlock, charSize).compile();
+        YarrGenerator<IncludeSubpatterns>(vm, pattern, patternString, codeBlock, charSize).compile();
 
     if (auto failureReason = codeBlock.failureReason()) {
         if (Options::dumpCompiledRegExpPatterns())

Modified: trunk/Source/_javascript_Core/yarr/YarrJIT.h (234712 => 234713)


--- trunk/Source/_javascript_Core/yarr/YarrJIT.h	2018-08-08 22:18:28 UTC (rev 234712)
+++ trunk/Source/_javascript_Core/yarr/YarrJIT.h	2018-08-08 22:42:30 UTC (rev 234713)
@@ -210,7 +210,7 @@
     MatchOnly,
     IncludeSubpatterns
 };
-void jitCompile(YarrPattern&, YarrCharSize, VM*, YarrCodeBlock& jitObject, YarrJITCompileMode = IncludeSubpatterns);
+void jitCompile(YarrPattern&, String& patternString, YarrCharSize, VM*, YarrCodeBlock& jitObject, YarrJITCompileMode = IncludeSubpatterns);
 
 } } // namespace JSC::Yarr
 

Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.cpp (234712 => 234713)


--- trunk/Source/_javascript_Core/yarr/YarrPattern.cpp	2018-08-08 22:18:28 UTC (rev 234712)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.cpp	2018-08-08 22:42:30 UTC (rev 234713)
@@ -1169,7 +1169,7 @@
     else if (characterClass == pattern->wordcharCharacterClass())
         out.print("<word>");
     else if (characterClass == pattern->wordUnicodeIgnoreCaseCharCharacterClass())
-        out.print("<unicode ignore case>");
+        out.print("<unicode word ignore case>");
     else if (characterClass == pattern->nondigitsCharacterClass())
         out.print("<non-digits>");
     else if (characterClass == pattern->nonspacesCharacterClass())
@@ -1177,7 +1177,7 @@
     else if (characterClass == pattern->nonwordcharCharacterClass())
         out.print("<non-word>");
     else if (characterClass == pattern->nonwordUnicodeIgnoreCaseCharCharacterClass())
-        out.print("<unicode non-ignore case>");
+        out.print("<unicode non-word ignore case>");
     else {
         bool needMatchesRangesSeperator = false;
 
@@ -1299,75 +1299,7 @@
         break;
     case TypeCharacterClass:
         out.print("character class ");
-        if (characterClass->m_anyCharacter)
-            out.print("<any character>");
-        else if (characterClass == thisPattern->newlineCharacterClass())
-            out.print("<newline>");
-        else if (characterClass == thisPattern->digitsCharacterClass())
-            out.print("<digits>");
-        else if (characterClass == thisPattern->spacesCharacterClass())
-            out.print("<whitespace>");
-        else if (characterClass == thisPattern->wordcharCharacterClass())
-            out.print("<word>");
-        else if (characterClass == thisPattern->wordUnicodeIgnoreCaseCharCharacterClass())
-            out.print("<unicode ignore case>");
-        else if (characterClass == thisPattern->nondigitsCharacterClass())
-            out.print("<non-digits>");
-        else if (characterClass == thisPattern->nonspacesCharacterClass())
-            out.print("<non-whitespace>");
-        else if (characterClass == thisPattern->nonwordcharCharacterClass())
-            out.print("<non-word>");
-        else if (characterClass == thisPattern->nonwordUnicodeIgnoreCaseCharCharacterClass())
-            out.print("<unicode non-ignore case>");
-        else {
-            bool needMatchesRangesSeperator = false;
-
-            auto dumpMatches = [&] (const char* prefix, Vector<UChar32> matches) {
-                size_t matchesSize = matches.size();
-                if (matchesSize) {
-                    if (needMatchesRangesSeperator)
-                        out.print(",");
-                    needMatchesRangesSeperator = true;
-
-                    out.print(prefix, ":(");
-                    for (size_t i = 0; i < matchesSize; ++i) {
-                        if (i)
-                            out.print(",");
-                        dumpUChar32(out, matches[i]);
-                    }
-                    out.print(")");
-                }
-            };
-
-            auto dumpRanges = [&] (const char* prefix, Vector<CharacterRange> ranges) {
-                size_t rangeSize = ranges.size();
-                if (rangeSize) {
-                    if (needMatchesRangesSeperator)
-                        out.print(",");
-                    needMatchesRangesSeperator = true;
-
-                    out.print(prefix, " ranges:(");
-                    for (size_t i = 0; i < rangeSize; ++i) {
-                        if (i)
-                            out.print(",");
-                        CharacterRange range = ranges[i];
-                        out.print("(");
-                        dumpUChar32(out, range.begin);
-                        out.print("..");
-                        dumpUChar32(out, range.end);
-                        out.print(")");
-                    }
-                    out.print(")");
-                }
-            };
-
-            out.print("[");
-            dumpMatches("ASCII", characterClass->m_matches);
-            dumpRanges("ASCII", characterClass->m_ranges);
-            dumpMatches("Unicode", characterClass->m_matchesUnicode);
-            dumpRanges("Unicode", characterClass->m_rangesUnicode);
-            out.print("]");
-        }
+        dumpCharacterClass(out, thisPattern, characterClass);
         dumpQuantifier(out);
         if (quantityType != QuantifierFixedCount || thisPattern->unicode())
             out.print(",frame location ", frameLocation);
@@ -1440,16 +1372,10 @@
     }
 }
 
-void YarrPattern::dumpPattern(const String& patternString)
+void YarrPattern::dumpPatternString(PrintStream& out, const String& patternString)
 {
-    dumpPattern(WTF::dataFile(), patternString);
-}
+    out.print("/", patternString, "/");
 
-void YarrPattern::dumpPattern(PrintStream& out, const String& patternString)
-{
-    out.print("RegExp pattern for /");
-    out.print(patternString);
-    out.print("/");
     if (global())
         out.print("g");
     if (ignoreCase())
@@ -1460,6 +1386,18 @@
         out.print("u");
     if (sticky())
         out.print("y");
+}
+
+void YarrPattern::dumpPattern(const String& patternString)
+{
+    dumpPattern(WTF::dataFile(), patternString);
+}
+
+void YarrPattern::dumpPattern(PrintStream& out, const String& patternString)
+{
+    out.print("RegExp pattern for ");
+    dumpPatternString(out, patternString);
+
     if (m_flags != NoFlags) {
         bool printSeperator = false;
         out.print(" (");

Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.h (234712 => 234713)


--- trunk/Source/_javascript_Core/yarr/YarrPattern.h	2018-08-08 22:18:28 UTC (rev 234712)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.h	2018-08-08 22:42:30 UTC (rev 234713)
@@ -489,6 +489,7 @@
         return unicodePropertiesCached.get(classID);
     }
 
+    void dumpPatternString(PrintStream& out, const String& patternString);
     void dumpPattern(const String& pattern);
     void dumpPattern(PrintStream& out, const String& pattern);
 
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to