Title: [272570] trunk/Source/_javascript_Core
Revision
272570
Author
[email protected]
Date
2021-02-09 02:02:13 -0800 (Tue, 09 Feb 2021)

Log Message

[JSC] Make JSON.parse faster by using table for fast string parsing
https://bugs.webkit.org/show_bug.cgi?id=221593

Reviewed by Ryosuke Niwa and Geoffrey Garen.

We use Latin1 table for quickly checking whether a character is safe for the fast path string parsing in JSON.
This offers 1-3% improvement in Kraken json-parse-financial test.

* parser/Lexer.cpp:
(JSC::Lexer<T>::Lexer):
* runtime/LiteralParser.cpp:
(JSC::LiteralParser<CharType>::Lexer::lex):
(JSC::isSafeStringCharacter):
(JSC::LiteralParser<CharType>::Lexer::lexString):
* runtime/LiteralParser.h:

Modified Paths

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (272569 => 272570)


--- trunk/Source/_javascript_Core/ChangeLog	2021-02-09 06:54:33 UTC (rev 272569)
+++ trunk/Source/_javascript_Core/ChangeLog	2021-02-09 10:02:13 UTC (rev 272570)
@@ -1,3 +1,21 @@
+2021-02-09  Yusuke Suzuki  <[email protected]>
+
+        [JSC] Make JSON.parse faster by using table for fast string parsing
+        https://bugs.webkit.org/show_bug.cgi?id=221593
+
+        Reviewed by Ryosuke Niwa and Geoffrey Garen.
+
+        We use Latin1 table for quickly checking whether a character is safe for the fast path string parsing in JSON.
+        This offers 1-3% improvement in Kraken json-parse-financial test.
+
+        * parser/Lexer.cpp:
+        (JSC::Lexer<T>::Lexer):
+        * runtime/LiteralParser.cpp:
+        (JSC::LiteralParser<CharType>::Lexer::lex):
+        (JSC::isSafeStringCharacter):
+        (JSC::LiteralParser<CharType>::Lexer::lexString):
+        * runtime/LiteralParser.h:
+
 2021-02-08  Patrick Angle  <[email protected]>
 
         Web Inspector: Add `CSS.setLayoutContextTypeChangedMode` for getting information about all layout contexts

Modified: trunk/Source/_javascript_Core/parser/Lexer.cpp (272569 => 272570)


--- trunk/Source/_javascript_Core/parser/Lexer.cpp	2021-02-09 06:54:33 UTC (rev 272569)
+++ trunk/Source/_javascript_Core/parser/Lexer.cpp	2021-02-09 10:02:13 UTC (rev 272570)
@@ -44,7 +44,7 @@
     return JSC::mainTable.entry(identifier);
 }
 
-enum CharacterType {
+enum CharacterType : uint8_t {
     // Types for the main switch
 
     // The first three types are fixed, and also used for identifying
@@ -95,7 +95,7 @@
 };
 
 // 256 Latin-1 codes
-static constexpr const unsigned short typesOfLatin1Characters[256] = {
+static constexpr const CharacterType typesOfLatin1Characters[256] = {
 /*   0 - Null               */ CharacterInvalid,
 /*   1 - Start of Heading   */ CharacterInvalid,
 /*   2 - Start of Text      */ CharacterInvalid,

Modified: trunk/Source/_javascript_Core/runtime/LiteralParser.cpp (272569 => 272570)


--- trunk/Source/_javascript_Core/runtime/LiteralParser.cpp	2021-02-09 06:54:33 UTC (rev 272569)
+++ trunk/Source/_javascript_Core/runtime/LiteralParser.cpp	2021-02-09 10:02:13 UTC (rev 272570)
@@ -173,7 +173,7 @@
 }
 
 // 256 Latin-1 codes
-static constexpr const TokenType TokenTypesOfLatin1Characters[256] = {
+static constexpr const TokenType tokenTypesOfLatin1Characters[256] = {
 /*   0 - Null               */ TokError,
 /*   1 - Start of Heading   */ TokError,
 /*   2 - Start of Text      */ TokError,
@@ -432,6 +432,266 @@
 /* 255 - Ll category        */ TokError
 };
 
+// 256 Latin-1 codes
+static constexpr const bool safeStringLatin1CharactersInStrictJSON[256] = {
+/*   0 - Null               */ false,
+/*   1 - Start of Heading   */ false,
+/*   2 - Start of Text      */ false,
+/*   3 - End of Text        */ false,
+/*   4 - End of Transm.     */ false,
+/*   5 - Enquiry            */ false,
+/*   6 - Acknowledgment     */ false,
+/*   7 - Bell               */ false,
+/*   8 - Back Space         */ false,
+/*   9 - Horizontal Tab     */ false,
+/*  10 - Line Feed          */ false,
+/*  11 - Vertical Tab       */ false,
+/*  12 - Form Feed          */ false,
+/*  13 - Carriage Return    */ false,
+/*  14 - Shift Out          */ false,
+/*  15 - Shift In           */ false,
+/*  16 - Data Line Escape   */ false,
+/*  17 - Device Control 1   */ false,
+/*  18 - Device Control 2   */ false,
+/*  19 - Device Control 3   */ false,
+/*  20 - Device Control 4   */ false,
+/*  21 - Negative Ack.      */ false,
+/*  22 - Synchronous Idle   */ false,
+/*  23 - End of Transmit    */ false,
+/*  24 - Cancel             */ false,
+/*  25 - End of Medium      */ false,
+/*  26 - Substitute         */ false,
+/*  27 - Escape             */ false,
+/*  28 - File Separator     */ false,
+/*  29 - Group Separator    */ false,
+/*  30 - Record Separator   */ false,
+/*  31 - Unit Separator     */ false,
+/*  32 - Space              */ true,
+/*  33 - !                  */ true,
+/*  34 - "                  */ false,
+/*  35 - #                  */ true,
+/*  36 - $                  */ true,
+/*  37 - %                  */ true,
+/*  38 - &                  */ true,
+/*  39 - '                  */ true,
+/*  40 - (                  */ true,
+/*  41 - )                  */ true,
+/*  42 - *                  */ true,
+/*  43 - +                  */ true,
+/*  44 - ,                  */ true,
+/*  45 - -                  */ true,
+/*  46 - .                  */ true,
+/*  47 - /                  */ true,
+/*  48 - 0                  */ true,
+/*  49 - 1                  */ true,
+/*  50 - 2                  */ true,
+/*  51 - 3                  */ true,
+/*  52 - 4                  */ true,
+/*  53 - 5                  */ true,
+/*  54 - 6                  */ true,
+/*  55 - 7                  */ true,
+/*  56 - 8                  */ true,
+/*  57 - 9                  */ true,
+/*  58 - :                  */ true,
+/*  59 - ;                  */ true,
+/*  60 - <                  */ true,
+/*  61 - =                  */ true,
+/*  62 - >                  */ true,
+/*  63 - ?                  */ true,
+/*  64 - @                  */ true,
+/*  65 - A                  */ true,
+/*  66 - B                  */ true,
+/*  67 - C                  */ true,
+/*  68 - D                  */ true,
+/*  69 - E                  */ true,
+/*  70 - F                  */ true,
+/*  71 - G                  */ true,
+/*  72 - H                  */ true,
+/*  73 - I                  */ true,
+/*  74 - J                  */ true,
+/*  75 - K                  */ true,
+/*  76 - L                  */ true,
+/*  77 - M                  */ true,
+/*  78 - N                  */ true,
+/*  79 - O                  */ true,
+/*  80 - P                  */ true,
+/*  81 - Q                  */ true,
+/*  82 - R                  */ true,
+/*  83 - S                  */ true,
+/*  84 - T                  */ true,
+/*  85 - U                  */ true,
+/*  86 - V                  */ true,
+/*  87 - W                  */ true,
+/*  88 - X                  */ true,
+/*  89 - Y                  */ true,
+/*  90 - Z                  */ true,
+/*  91 - [                  */ true,
+/*  92 - \                  */ false,
+/*  93 - ]                  */ true,
+/*  94 - ^                  */ true,
+/*  95 - _                  */ true,
+/*  96 - `                  */ true,
+/*  97 - a                  */ true,
+/*  98 - b                  */ true,
+/*  99 - c                  */ true,
+/* 100 - d                  */ true,
+/* 101 - e                  */ true,
+/* 102 - f                  */ true,
+/* 103 - g                  */ true,
+/* 104 - h                  */ true,
+/* 105 - i                  */ true,
+/* 106 - j                  */ true,
+/* 107 - k                  */ true,
+/* 108 - l                  */ true,
+/* 109 - m                  */ true,
+/* 110 - n                  */ true,
+/* 111 - o                  */ true,
+/* 112 - p                  */ true,
+/* 113 - q                  */ true,
+/* 114 - r                  */ true,
+/* 115 - s                  */ true,
+/* 116 - t                  */ true,
+/* 117 - u                  */ true,
+/* 118 - v                  */ true,
+/* 119 - w                  */ true,
+/* 120 - x                  */ true,
+/* 121 - y                  */ true,
+/* 122 - z                  */ true,
+/* 123 - {                  */ true,
+/* 124 - |                  */ true,
+/* 125 - }                  */ true,
+/* 126 - ~                  */ true,
+/* 127 - Delete             */ true,
+/* 128 - Cc category        */ true,
+/* 129 - Cc category        */ true,
+/* 130 - Cc category        */ true,
+/* 131 - Cc category        */ true,
+/* 132 - Cc category        */ true,
+/* 133 - Cc category        */ true,
+/* 134 - Cc category        */ true,
+/* 135 - Cc category        */ true,
+/* 136 - Cc category        */ true,
+/* 137 - Cc category        */ true,
+/* 138 - Cc category        */ true,
+/* 139 - Cc category        */ true,
+/* 140 - Cc category        */ true,
+/* 141 - Cc category        */ true,
+/* 142 - Cc category        */ true,
+/* 143 - Cc category        */ true,
+/* 144 - Cc category        */ true,
+/* 145 - Cc category        */ true,
+/* 146 - Cc category        */ true,
+/* 147 - Cc category        */ true,
+/* 148 - Cc category        */ true,
+/* 149 - Cc category        */ true,
+/* 150 - Cc category        */ true,
+/* 151 - Cc category        */ true,
+/* 152 - Cc category        */ true,
+/* 153 - Cc category        */ true,
+/* 154 - Cc category        */ true,
+/* 155 - Cc category        */ true,
+/* 156 - Cc category        */ true,
+/* 157 - Cc category        */ true,
+/* 158 - Cc category        */ true,
+/* 159 - Cc category        */ true,
+/* 160 - Zs category (nbsp) */ true,
+/* 161 - Po category        */ true,
+/* 162 - Sc category        */ true,
+/* 163 - Sc category        */ true,
+/* 164 - Sc category        */ true,
+/* 165 - Sc category        */ true,
+/* 166 - So category        */ true,
+/* 167 - So category        */ true,
+/* 168 - Sk category        */ true,
+/* 169 - So category        */ true,
+/* 170 - Ll category        */ true,
+/* 171 - Pi category        */ true,
+/* 172 - Sm category        */ true,
+/* 173 - Cf category        */ true,
+/* 174 - So category        */ true,
+/* 175 - Sk category        */ true,
+/* 176 - So category        */ true,
+/* 177 - Sm category        */ true,
+/* 178 - No category        */ true,
+/* 179 - No category        */ true,
+/* 180 - Sk category        */ true,
+/* 181 - Ll category        */ true,
+/* 182 - So category        */ true,
+/* 183 - Po category        */ true,
+/* 184 - Sk category        */ true,
+/* 185 - No category        */ true,
+/* 186 - Ll category        */ true,
+/* 187 - Pf category        */ true,
+/* 188 - No category        */ true,
+/* 189 - No category        */ true,
+/* 190 - No category        */ true,
+/* 191 - Po category        */ true,
+/* 192 - Lu category        */ true,
+/* 193 - Lu category        */ true,
+/* 194 - Lu category        */ true,
+/* 195 - Lu category        */ true,
+/* 196 - Lu category        */ true,
+/* 197 - Lu category        */ true,
+/* 198 - Lu category        */ true,
+/* 199 - Lu category        */ true,
+/* 200 - Lu category        */ true,
+/* 201 - Lu category        */ true,
+/* 202 - Lu category        */ true,
+/* 203 - Lu category        */ true,
+/* 204 - Lu category        */ true,
+/* 205 - Lu category        */ true,
+/* 206 - Lu category        */ true,
+/* 207 - Lu category        */ true,
+/* 208 - Lu category        */ true,
+/* 209 - Lu category        */ true,
+/* 210 - Lu category        */ true,
+/* 211 - Lu category        */ true,
+/* 212 - Lu category        */ true,
+/* 213 - Lu category        */ true,
+/* 214 - Lu category        */ true,
+/* 215 - Sm category        */ true,
+/* 216 - Lu category        */ true,
+/* 217 - Lu category        */ true,
+/* 218 - Lu category        */ true,
+/* 219 - Lu category        */ true,
+/* 220 - Lu category        */ true,
+/* 221 - Lu category        */ true,
+/* 222 - Lu category        */ true,
+/* 223 - Ll category        */ true,
+/* 224 - Ll category        */ true,
+/* 225 - Ll category        */ true,
+/* 226 - Ll category        */ true,
+/* 227 - Ll category        */ true,
+/* 228 - Ll category        */ true,
+/* 229 - Ll category        */ true,
+/* 230 - Ll category        */ true,
+/* 231 - Ll category        */ true,
+/* 232 - Ll category        */ true,
+/* 233 - Ll category        */ true,
+/* 234 - Ll category        */ true,
+/* 235 - Ll category        */ true,
+/* 236 - Ll category        */ true,
+/* 237 - Ll category        */ true,
+/* 238 - Ll category        */ true,
+/* 239 - Ll category        */ true,
+/* 240 - Ll category        */ true,
+/* 241 - Ll category        */ true,
+/* 242 - Ll category        */ true,
+/* 243 - Ll category        */ true,
+/* 244 - Ll category        */ true,
+/* 245 - Ll category        */ true,
+/* 246 - Ll category        */ true,
+/* 247 - Sm category        */ true,
+/* 248 - Ll category        */ true,
+/* 249 - Ll category        */ true,
+/* 250 - Ll category        */ true,
+/* 251 - Ll category        */ true,
+/* 252 - Ll category        */ true,
+/* 253 - Ll category        */ true,
+/* 254 - Ll category        */ true,
+/* 255 - Ll category        */ true,
+};
+
 template <typename CharType>
 ALWAYS_INLINE TokenType LiteralParser<CharType>::Lexer::lex(LiteralParserToken<CharType>& token)
 {
@@ -453,10 +713,10 @@
     token.start = m_ptr;
     CharType character = *m_ptr;
     if (LIKELY(isLatin1(character))) {
-        TokenType tokenType = TokenTypesOfLatin1Characters[character];
+        TokenType tokenType = tokenTypesOfLatin1Characters[character];
         switch (tokenType) {
         case TokString:
-            if (character == '\'' && m_mode == StrictJSON) {
+            if (UNLIKELY(character == '\'' && m_mode == StrictJSON)) {
                 m_lexErrorMessage = "Single quotes (\') are not allowed in JSON"_s;
                 return TokError;
             }
@@ -572,13 +832,21 @@
 template <SafeStringCharacterSet set>
 static ALWAYS_INLINE bool isSafeStringCharacter(LChar c, LChar terminator)
 {
-    return (c >= ' ' && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict);
+    if constexpr (set == SafeStringCharacterSet::Strict)
+        return safeStringLatin1CharactersInStrictJSON[c];
+    else
+        return (c >= ' ' && c != '\\' && c != terminator) || (c == '\t');
 }
 
 template <SafeStringCharacterSet set>
 static ALWAYS_INLINE bool isSafeStringCharacter(UChar c, UChar terminator)
 {
-    return (c >= ' ' && (set == SafeStringCharacterSet::Strict || isLatin1(c)) && c != '\\' && c != terminator) || (c == '\t' && set != SafeStringCharacterSet::Strict);
+    if constexpr (set == SafeStringCharacterSet::Strict) {
+        if (!isLatin1(c))
+            return true;
+        return isSafeStringCharacter<set>(static_cast<LChar>(c), static_cast<LChar>(terminator));
+    } else
+        return (c >= ' ' && isLatin1(c) && c != '\\' && c != terminator) || (c == '\t');
 }
 
 template <typename CharType>
@@ -588,7 +856,8 @@
     const CharType* runStart = m_ptr;
 
     if (m_mode == StrictJSON) {
-        while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, terminator))
+        ASSERT(terminator == '"');
+        while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::Strict>(*m_ptr, '"'))
             ++m_ptr;
     } else {
         while (m_ptr < m_end && isSafeStringCharacter<SafeStringCharacterSet::NonStrict>(*m_ptr, terminator))

Modified: trunk/Source/_javascript_Core/runtime/LiteralParser.h (272569 => 272570)


--- trunk/Source/_javascript_Core/runtime/LiteralParser.h	2021-02-09 06:54:33 UTC (rev 272569)
+++ trunk/Source/_javascript_Core/runtime/LiteralParser.h	2021-02-09 10:02:13 UTC (rev 272570)
@@ -33,9 +33,9 @@
 
 namespace JSC {
 
-typedef enum { StrictJSON, NonStrictJSON, JSONP } ParserMode;
+enum ParserMode : uint8_t { StrictJSON, NonStrictJSON, JSONP };
 
-enum JSONPPathEntryType {
+enum JSONPPathEntryType : uint8_t {
     JSONPPathEntryTypeDeclareVar, // var pathEntryName = JSON
     JSONPPathEntryTypeDot, // <prior entries>.pathEntryName = JSON
     JSONPPathEntryTypeLookup, // <prior entries>[pathIndex] = JSON
@@ -42,19 +42,22 @@
     JSONPPathEntryTypeCall // <prior entries>(JSON)
 };
 
-enum ParserState { StartParseObject, StartParseArray, StartParseExpression, 
-                   StartParseStatement, StartParseStatementEndStatement, 
-                   DoParseObjectStartExpression, DoParseObjectEndExpression,
-                   DoParseArrayStartExpression, DoParseArrayEndExpression };
-enum TokenType { TokLBracket, TokRBracket, TokLBrace, TokRBrace, 
-                 TokString, TokIdentifier, TokNumber, TokColon, 
-                 TokLParen, TokRParen, TokComma, TokTrue, TokFalse,
-                 TokNull, TokEnd, TokDot, TokAssign, TokSemi, TokError };
-    
+enum ParserState : uint8_t {
+    StartParseObject, StartParseArray, StartParseExpression,
+    StartParseStatement, StartParseStatementEndStatement,
+    DoParseObjectStartExpression, DoParseObjectEndExpression,
+    DoParseArrayStartExpression, DoParseArrayEndExpression };
+
+enum TokenType : uint8_t {
+    TokLBracket, TokRBracket, TokLBrace, TokRBrace,
+    TokString, TokIdentifier, TokNumber, TokColon,
+    TokLParen, TokRParen, TokComma, TokTrue, TokFalse,
+    TokNull, TokEnd, TokDot, TokAssign, TokSemi, TokError };
+
 struct JSONPPathEntry {
-    JSONPPathEntryType m_type;
     Identifier m_pathEntryName;
     int m_pathIndex;
+    JSONPPathEntryType m_type;
 };
 
 struct JSONPData {
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to