Title: [112564] trunk/Source
Revision
112564
Author
[email protected]
Date
2012-03-29 13:16:03 -0700 (Thu, 29 Mar 2012)

Log Message

Template the Yarr::Interpreter on the character type
https://bugs.webkit.org/show_bug.cgi?id=82637

Reviewed by Sam Weinig.

We should be able to call to the interpreter after having already checked the character type,
without having to re-package the character pointer back up into a string!

../_javascript_Core: 

* runtime/RegExp.cpp:
(JSC::RegExp::match):
(JSC::RegExp::matchCompareWithInterpreter):
    - Don't pass length.
* yarr/Yarr.h:
    - moved function declarations to YarrInterpreter.h.
* yarr/YarrInterpreter.cpp:
(Yarr):
(Interpreter):
(JSC::Yarr::Interpreter::InputStream::InputStream):
(InputStream):
(JSC::Yarr::Interpreter::Interpreter):
(JSC::Yarr::interpret):
    - templated Interpreter class on CharType.
* yarr/YarrInterpreter.h:
(Yarr):
    - added function declarations.

../WebCore: 

* inspector/ContentSearchUtils.cpp:
(WebCore::ContentSearchUtils::findMagicComment):
* platform/text/RegularExpression.cpp:
(WebCore::RegularExpression::match):
    - Don't pass length.

Modified Paths

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (112563 => 112564)


--- trunk/Source/_javascript_Core/ChangeLog	2012-03-29 19:48:21 UTC (rev 112563)
+++ trunk/Source/_javascript_Core/ChangeLog	2012-03-29 20:16:03 UTC (rev 112564)
@@ -1,3 +1,31 @@
+2012-03-29  Gavin Barraclough  <[email protected]>
+
+        Template the Yarr::Interpreter on the character type
+        https://bugs.webkit.org/show_bug.cgi?id=82637
+
+        Reviewed by Sam Weinig.
+
+        We should be able to call to the interpreter after having already checked the character type,
+        without having to re-package the character pointer back up into a string!
+
+        * runtime/RegExp.cpp:
+        (JSC::RegExp::match):
+        (JSC::RegExp::matchCompareWithInterpreter):
+            - Don't pass length.
+        * yarr/Yarr.h:
+            - moved function declarations to YarrInterpreter.h.
+        * yarr/YarrInterpreter.cpp:
+        (Yarr):
+        (Interpreter):
+        (JSC::Yarr::Interpreter::InputStream::InputStream):
+        (InputStream):
+        (JSC::Yarr::Interpreter::Interpreter):
+        (JSC::Yarr::interpret):
+            - templated Interpreter class on CharType.
+        * yarr/YarrInterpreter.h:
+        (Yarr):
+            - added function declarations.
+
 2012-03-29  David Kilzer  <[email protected]>
 
         Don't use a flattened framework path when building on OS X

Modified: trunk/Source/_javascript_Core/runtime/RegExp.cpp (112563 => 112564)


--- trunk/Source/_javascript_Core/runtime/RegExp.cpp	2012-03-29 19:48:21 UTC (rev 112563)
+++ trunk/Source/_javascript_Core/runtime/RegExp.cpp	2012-03-29 20:16:03 UTC (rev 112564)
@@ -343,7 +343,7 @@
 #endif
     } else
 #endif
-        result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, s.length(), reinterpret_cast<unsigned*>(offsetVector));
+        result = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
 
     // FIXME: The YARR engine should handle unsigned or size_t length matches.
     // The YARR Interpreter is "unsigned" clean, while the YARR JIT hasn't been addressed.
@@ -467,7 +467,7 @@
     Vector<int, 32> nonReturnedOvector;
     nonReturnedOvector.resize(offsetVectorSize);
     offsetVector = nonReturnedOvector.data();
-    int r = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, s.length(), reinterpret_cast<unsigned*>(offsetVector));
+    int r = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, reinterpret_cast<unsigned*>(offsetVector));
 #if REGEXP_FUNC_TEST_DATA_GEN
     RegExpFunctionalTestCollector::get()->outputOneTest(this, s, startOffset, offsetVector, result);
 #endif
@@ -509,7 +509,7 @@
     for (unsigned j = 0, i = 0; i < m_numSubpatterns + 1; j += 2, i++)
         interpreterOffsetVector[j] = -1;
 
-    interpreterResult = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, s.length(), interpreterOffsetVector);
+    interpreterResult = Yarr::interpret(m_regExpBytecode.get(), s, startOffset, interpreterOffsetVector);
 
     if (jitResult != interpreterResult)
         differences++;

Modified: trunk/Source/_javascript_Core/yarr/Yarr.h (112563 => 112564)


--- trunk/Source/_javascript_Core/yarr/Yarr.h	2012-03-29 19:48:21 UTC (rev 112563)
+++ trunk/Source/_javascript_Core/yarr/Yarr.h	2012-03-29 20:16:03 UTC (rev 112564)
@@ -63,9 +63,6 @@
     Char16
 };
 
-JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
-JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const UString& input, unsigned start, unsigned length, unsigned* output);
-
 } } // namespace JSC::Yarr
 
 #endif // Yarr_h

Modified: trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp (112563 => 112564)


--- trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp	2012-03-29 19:48:21 UTC (rev 112563)
+++ trunk/Source/_javascript_Core/yarr/YarrInterpreter.cpp	2012-03-29 20:16:03 UTC (rev 112564)
@@ -42,6 +42,7 @@
 
 namespace JSC { namespace Yarr {
 
+template<typename CharType>
 class Interpreter {
 public:
     struct ParenthesesDisjunctionContext;
@@ -170,55 +171,9 @@
         allocatorPool = allocatorPool->dealloc(context);
     }
 
-    // This class is a placeholder for future character iterator, current 
-    // proposed name StringConstCharacterIterator.
-    class CharAccess {
-    public:
-        CharAccess(const UString& s)
-        {
-            if (s.is8Bit()) {
-                m_charSize = Char8;
-                m_ptr.ptr8 = s.characters8();
-            } else {
-                m_charSize = Char16;
-                m_ptr.ptr16 = s.characters16();
-            }
-        }
-
-        CharAccess(const LChar* ptr)
-            : m_charSize(Char8)
-        {
-            m_ptr.ptr8 = ptr;
-        }
-
-        CharAccess(const UChar* ptr)
-            : m_charSize(Char16)
-        {
-            m_ptr.ptr16 = ptr;
-        }
-
-        ~CharAccess()
-        {
-        }
-
-        inline UChar operator[](unsigned index)
-        {
-            if (m_charSize == Char8)
-                return m_ptr.ptr8[index];
-            return m_ptr.ptr16[index];
-        }
-
-    private:
-        union {
-            const LChar* ptr8;
-            const UChar* ptr16;
-        } m_ptr;
-        YarrCharSize m_charSize;
-    };
-
     class InputStream {
     public:
-        InputStream(const UString& input, unsigned start, unsigned length)
+        InputStream(const CharType* input, unsigned start, unsigned length)
             : input(input)
             , pos(start)
             , length(length)
@@ -332,7 +287,7 @@
         }
 
     private:
-        CharAccess input;
+        const CharType* input;
         unsigned pos;
         unsigned length;
     };
@@ -1489,7 +1444,7 @@
         return output[0];
     }
 
-    Interpreter(BytecodePattern* pattern, unsigned* output, const UString input, unsigned start, unsigned length)
+    Interpreter(BytecodePattern* pattern, unsigned* output, const CharType* input, unsigned length, unsigned start)
         : pattern(pattern)
         , output(output)
         , input(input, start, length)
@@ -1979,18 +1934,31 @@
     return ByteCompiler(pattern).compile(allocator);
 }
 
-unsigned interpret(BytecodePattern* bytecode, const UString& input, unsigned start, unsigned length, unsigned* output)
+unsigned interpret(BytecodePattern* bytecode, const UString& input, unsigned start, unsigned* output)
 {
-    return Interpreter(bytecode, output, input, start, length).interpret();
+    if (input.is8Bit())
+        return Interpreter<LChar>(bytecode, output, input.characters8(), input.length(), start).interpret();
+    return Interpreter<UChar>(bytecode, output, input.characters16(), input.length(), start).interpret();
 }
 
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
-COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
+unsigned interpret(BytecodePattern* bytecode, const LChar* input, unsigned length, unsigned start, unsigned* output)
+{
+    return Interpreter<LChar>(bytecode, output, input, length, start).interpret();
+}
 
+unsigned interpret(BytecodePattern* bytecode, const UChar* input, unsigned length, unsigned start, unsigned* output)
+{
+    return Interpreter<UChar>(bytecode, output, input, length, start).interpret();
+}
 
+// These should be the same for both UChar & LChar.
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoPatternCharacter) == (YarrStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoPatternCharacter);
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoCharacterClass) == (YarrStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoCharacterClass);
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoBackReference) == (YarrStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoBackReference);
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoAlternative) == (YarrStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoAlternative);
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheticalAssertion) == (YarrStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheticalAssertion);
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParenthesesOnce) == (YarrStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParenthesesOnce);
+COMPILE_ASSERT(sizeof(Interpreter<UChar>::BackTrackInfoParentheses) == (YarrStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckYarrStackSpaceForBackTrackInfoParentheses);
+
+
 } }

Modified: trunk/Source/_javascript_Core/yarr/YarrInterpreter.h (112563 => 112564)


--- trunk/Source/_javascript_Core/yarr/YarrInterpreter.h	2012-03-29 19:48:21 UTC (rev 112563)
+++ trunk/Source/_javascript_Core/yarr/YarrInterpreter.h	2012-03-29 20:16:03 UTC (rev 112564)
@@ -375,6 +375,11 @@
     Vector<CharacterClass*> m_userCharacterClasses;
 };
 
+JS_EXPORT_PRIVATE PassOwnPtr<BytecodePattern> byteCompile(YarrPattern&, BumpPointerAllocator*);
+JS_EXPORT_PRIVATE unsigned interpret(BytecodePattern*, const UString& input, unsigned start, unsigned* output);
+unsigned interpret(BytecodePattern*, const LChar* input, unsigned length, unsigned start, unsigned* output);
+unsigned interpret(BytecodePattern*, const UChar* input, unsigned length, unsigned start, unsigned* output);
+
 } } // namespace JSC::Yarr
 
 #endif // YarrInterpreter_h

Modified: trunk/Source/WebCore/ChangeLog (112563 => 112564)


--- trunk/Source/WebCore/ChangeLog	2012-03-29 19:48:21 UTC (rev 112563)
+++ trunk/Source/WebCore/ChangeLog	2012-03-29 20:16:03 UTC (rev 112564)
@@ -1,3 +1,19 @@
+2012-03-29  Gavin Barraclough  <[email protected]>
+
+        Template the Yarr::Interpreter on the character type
+        https://bugs.webkit.org/show_bug.cgi?id=82637
+
+        Reviewed by Sam Weinig.
+
+        We should be able to call to the interpreter after having already checked the character type,
+        without having to re-package the character pointer back up into a string!
+
+        * inspector/ContentSearchUtils.cpp:
+        (WebCore::ContentSearchUtils::findMagicComment):
+        * platform/text/RegularExpression.cpp:
+        (WebCore::RegularExpression::match):
+            - Don't pass length.
+
 2012-03-29  Sheriff Bot  <[email protected]>
 
         Unreviewed, rolling out r112553.

Modified: trunk/Source/WebCore/inspector/ContentSearchUtils.cpp (112563 => 112564)


--- trunk/Source/WebCore/inspector/ContentSearchUtils.cpp	2012-03-29 19:48:21 UTC (rev 112563)
+++ trunk/Source/WebCore/inspector/ContentSearchUtils.cpp	2012-03-29 20:16:03 UTC (rev 112564)
@@ -152,7 +152,7 @@
     ASSERT(pattern.m_numSubpatterns == 1);
     Vector<int, 4> matches;
     matches.resize(4);
-    unsigned result = JSC::Yarr::interpret(bytecodePattern.get(), JSC::UString(content.impl()), 0, content.length(), reinterpret_cast<unsigned*>(matches.data()));
+    unsigned result = JSC::Yarr::interpret(bytecodePattern.get(), JSC::UString(content.impl()), 0, reinterpret_cast<unsigned*>(matches.data()));
     if (result == JSC::Yarr::offsetNoMatch)
         return String();
     ASSERT(matches[2] > 0 && matches[3] > 0);

Modified: trunk/Source/WebCore/platform/text/RegularExpression.cpp (112563 => 112564)


--- trunk/Source/WebCore/platform/text/RegularExpression.cpp	2012-03-29 19:48:21 UTC (rev 112563)
+++ trunk/Source/WebCore/platform/text/RegularExpression.cpp	2012-03-29 20:16:03 UTC (rev 112564)
@@ -112,7 +112,7 @@
 
     unsigned result;
     if (str.length() <= INT_MAX)
-        result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), JSC::UString(str.impl()), startFrom, str.length(), offsetVector);
+        result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), JSC::UString(str.impl()), startFrom, offsetVector);
     else {
         // This code can't handle unsigned offsets. Limit our processing to strings with offsets that 
         // can be represented as ints.
_______________________________________________
webkit-changes mailing list
[email protected]
http://lists.webkit.org/mailman/listinfo.cgi/webkit-changes

Reply via email to