Modified: trunk/Source/WebCore/platform/URL.cpp (224822 => 224823)
--- trunk/Source/WebCore/platform/URL.cpp 2017-11-14 19:01:22 UTC (rev 224822)
+++ trunk/Source/WebCore/platform/URL.cpp 2017-11-14 19:15:23 UTC (rev 224823)
@@ -55,276 +55,6 @@
static const unsigned invalidPortNumber = 0xFFFF;
-enum URLCharacterClasses {
- // alpha
- SchemeFirstChar = 1 << 0,
-
- // ( alpha | digit | "+" | "-" | "." )
- SchemeChar = 1 << 1,
-
- // mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
- // unreserved = alphanum | mark
- // ( unreserved | escaped | ";" | ":" | "&" | "=" | "+" | "$" | "," )
- UserInfoChar = 1 << 2,
-
- // alnum | "." | "-" | "%"
- // The above is what the specification says, but we are lenient to
- // match existing practice and also allow:
- // "_"
- HostnameChar = 1 << 3,
-
- // hexdigit | ":" | "%"
- IPv6Char = 1 << 4,
-
- // "#" | "?" | "/" | nul
- PathSegmentEndChar = 1 << 5,
-
- // not allowed in path
- BadChar = 1 << 6,
-
- // "\t" | "\n" | "\r"
- TabNewline = 1 << 7
-};
-
-namespace URLInternal {
-static const unsigned char characterClassTable[256] = {
- /* 0 nul */ PathSegmentEndChar, /* 1 soh */ BadChar,
- /* 2 stx */ BadChar, /* 3 etx */ BadChar,
- /* 4 eot */ BadChar, /* 5 enq */ BadChar, /* 6 ack */ BadChar, /* 7 bel */ BadChar,
- /* 8 bs */ BadChar, /* 9 ht */ BadChar | TabNewline, /* 10 nl */ BadChar | TabNewline,
- /* 11 vt */ BadChar, /* 12 np */ BadChar, /* 13 cr */ BadChar | TabNewline,
- /* 14 so */ BadChar, /* 15 si */ BadChar,
- /* 16 dle */ BadChar, /* 17 dc1 */ BadChar, /* 18 dc2 */ BadChar, /* 19 dc3 */ BadChar,
- /* 20 dc4 */ BadChar, /* 21 nak */ BadChar, /* 22 syn */ BadChar, /* 23 etb */ BadChar,
- /* 24 can */ BadChar, /* 25 em */ BadChar, /* 26 sub */ BadChar, /* 27 esc */ BadChar,
- /* 28 fs */ BadChar, /* 29 gs */ BadChar, /* 30 rs */ BadChar, /* 31 us */ BadChar,
- /* 32 sp */ BadChar, /* 33 ! */ UserInfoChar,
- /* 34 " */ BadChar, /* 35 # */ PathSegmentEndChar | BadChar,
- /* 36 $ */ UserInfoChar, /* 37 % */ UserInfoChar | HostnameChar | IPv6Char | BadChar,
- /* 38 & */ UserInfoChar, /* 39 ' */ UserInfoChar,
- /* 40 ( */ UserInfoChar, /* 41 ) */ UserInfoChar,
- /* 42 * */ UserInfoChar, /* 43 + */ SchemeChar | UserInfoChar,
- /* 44 , */ UserInfoChar,
- /* 45 - */ SchemeChar | UserInfoChar | HostnameChar,
- /* 46 . */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 47 / */ PathSegmentEndChar,
- /* 48 0 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 49 1 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 50 2 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 51 3 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 52 4 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 53 5 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 54 6 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 55 7 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 56 8 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 57 9 */ SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 58 : */ UserInfoChar | IPv6Char, /* 59 ; */ UserInfoChar,
- /* 60 < */ BadChar, /* 61 = */ UserInfoChar,
- /* 62 > */ BadChar, /* 63 ? */ PathSegmentEndChar | BadChar,
- /* 64 @ */ 0,
- /* 65 A */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 66 B */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 67 C */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 68 D */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 69 E */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 70 F */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 71 G */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 72 H */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 73 I */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 74 J */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 75 K */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 76 L */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 77 M */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 78 N */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 79 O */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 80 P */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 81 Q */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 82 R */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 83 S */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 84 T */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 85 U */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 86 V */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 87 W */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 88 X */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 89 Y */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 90 Z */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 91 [ */ 0,
- /* 92 \ */ 0, /* 93 ] */ 0,
- /* 94 ^ */ 0,
- /* 95 _ */ UserInfoChar | HostnameChar,
- /* 96 ` */ 0,
- /* 97 a */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 98 b */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 99 c */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 100 d */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 101 e */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 102 f */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar | IPv6Char,
- /* 103 g */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 104 h */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 105 i */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 106 j */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 107 k */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 108 l */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 109 m */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 110 n */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 111 o */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 112 p */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 113 q */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 114 r */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 115 s */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 116 t */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 117 u */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 118 v */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 119 w */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 120 x */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 121 y */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 122 z */ SchemeFirstChar | SchemeChar | UserInfoChar | HostnameChar,
- /* 123 { */ 0,
- /* 124 | */ 0, /* 125 } */ 0, /* 126 ~ */ UserInfoChar, /* 127 del */ BadChar,
- /* 128 */ BadChar, /* 129 */ BadChar, /* 130 */ BadChar, /* 131 */ BadChar,
- /* 132 */ BadChar, /* 133 */ BadChar, /* 134 */ BadChar, /* 135 */ BadChar,
- /* 136 */ BadChar, /* 137 */ BadChar, /* 138 */ BadChar, /* 139 */ BadChar,
- /* 140 */ BadChar, /* 141 */ BadChar, /* 142 */ BadChar, /* 143 */ BadChar,
- /* 144 */ BadChar, /* 145 */ BadChar, /* 146 */ BadChar, /* 147 */ BadChar,
- /* 148 */ BadChar, /* 149 */ BadChar, /* 150 */ BadChar, /* 151 */ BadChar,
- /* 152 */ BadChar, /* 153 */ BadChar, /* 154 */ BadChar, /* 155 */ BadChar,
- /* 156 */ BadChar, /* 157 */ BadChar, /* 158 */ BadChar, /* 159 */ BadChar,
- /* 160 */ BadChar, /* 161 */ BadChar, /* 162 */ BadChar, /* 163 */ BadChar,
- /* 164 */ BadChar, /* 165 */ BadChar, /* 166 */ BadChar, /* 167 */ BadChar,
- /* 168 */ BadChar, /* 169 */ BadChar, /* 170 */ BadChar, /* 171 */ BadChar,
- /* 172 */ BadChar, /* 173 */ BadChar, /* 174 */ BadChar, /* 175 */ BadChar,
- /* 176 */ BadChar, /* 177 */ BadChar, /* 178 */ BadChar, /* 179 */ BadChar,
- /* 180 */ BadChar, /* 181 */ BadChar, /* 182 */ BadChar, /* 183 */ BadChar,
- /* 184 */ BadChar, /* 185 */ BadChar, /* 186 */ BadChar, /* 187 */ BadChar,
- /* 188 */ BadChar, /* 189 */ BadChar, /* 190 */ BadChar, /* 191 */ BadChar,
- /* 192 */ BadChar, /* 193 */ BadChar, /* 194 */ BadChar, /* 195 */ BadChar,
- /* 196 */ BadChar, /* 197 */ BadChar, /* 198 */ BadChar, /* 199 */ BadChar,
- /* 200 */ BadChar, /* 201 */ BadChar, /* 202 */ BadChar, /* 203 */ BadChar,
- /* 204 */ BadChar, /* 205 */ BadChar, /* 206 */ BadChar, /* 207 */ BadChar,
- /* 208 */ BadChar, /* 209 */ BadChar, /* 210 */ BadChar, /* 211 */ BadChar,
- /* 212 */ BadChar, /* 213 */ BadChar, /* 214 */ BadChar, /* 215 */ BadChar,
- /* 216 */ BadChar, /* 217 */ BadChar, /* 218 */ BadChar, /* 219 */ BadChar,
- /* 220 */ BadChar, /* 221 */ BadChar, /* 222 */ BadChar, /* 223 */ BadChar,
- /* 224 */ BadChar, /* 225 */ BadChar, /* 226 */ BadChar, /* 227 */ BadChar,
- /* 228 */ BadChar, /* 229 */ BadChar, /* 230 */ BadChar, /* 231 */ BadChar,
- /* 232 */ BadChar, /* 233 */ BadChar, /* 234 */ BadChar, /* 235 */ BadChar,
- /* 236 */ BadChar, /* 237 */ BadChar, /* 238 */ BadChar, /* 239 */ BadChar,
- /* 240 */ BadChar, /* 241 */ BadChar, /* 242 */ BadChar, /* 243 */ BadChar,
- /* 244 */ BadChar, /* 245 */ BadChar, /* 246 */ BadChar, /* 247 */ BadChar,
- /* 248 */ BadChar, /* 249 */ BadChar, /* 250 */ BadChar, /* 251 */ BadChar,
- /* 252 */ BadChar, /* 253 */ BadChar, /* 254 */ BadChar, /* 255 */ BadChar
-};
-}
-
-enum PercentEncodeCharacterClass {
- // Class names match the URL Standard; each class is a superset of the previous one.
- PercentEncodeSimple = 255,
- PercentEncodeDefault = 127,
- PercentEncodePassword = 63,
- PercentEncodeUsername = 31,
-};
-
-static const unsigned char percentEncodeClassTable[256] = {
- /* 0 nul */ PercentEncodeSimple, /* 1 soh */ PercentEncodeSimple, /* 2 stx */ PercentEncodeSimple, /* 3 etx */ PercentEncodeSimple,
- /* 4 eot */ PercentEncodeSimple, /* 5 enq */ PercentEncodeSimple, /* 6 ack */ PercentEncodeSimple, /* 7 bel */ PercentEncodeSimple,
- /* 8 bs */ PercentEncodeSimple, /* 9 ht */ PercentEncodeSimple, /* 10 nl */ PercentEncodeSimple, /* 11 vt */ PercentEncodeSimple,
- /* 12 np */ PercentEncodeSimple, /* 13 cr */ PercentEncodeSimple, /* 14 so */ PercentEncodeSimple, /* 15 si */ PercentEncodeSimple,
- /* 16 dle */ PercentEncodeSimple, /* 17 dc1 */ PercentEncodeSimple, /* 18 dc2 */ PercentEncodeSimple, /* 19 dc3 */ PercentEncodeSimple,
- /* 20 dc4 */ PercentEncodeSimple, /* 21 nak */ PercentEncodeSimple, /* 22 syn */ PercentEncodeSimple, /* 23 etb */ PercentEncodeSimple,
- /* 24 can */ PercentEncodeSimple, /* 25 em */ PercentEncodeSimple, /* 26 sub */ PercentEncodeSimple, /* 27 esc */ PercentEncodeSimple,
- /* 28 fs */ PercentEncodeSimple, /* 29 gs */ PercentEncodeSimple, /* 30 rs */ PercentEncodeSimple, /* 31 us */ PercentEncodeSimple,
- /* 32 sp */ PercentEncodeDefault,
- /* 33 ! */ 0,
- /* 34 " */ PercentEncodeDefault,
- /* 35 # */ PercentEncodeDefault,
- /* 36 $ */ 0,
- /* 37 % */ 0,
- /* 38 & */ 0,
- /* 39 ' */ 0,
- /* 40 ( */ 0,
- /* 41 ) */ 0,
- /* 42 * */ 0,
- /* 43 + */ 0,
- /* 44 , */ 0,
- /* 45 - */ 0,
- /* 46 . */ 0,
- /* 47 / */ PercentEncodePassword,
- /* 48 0 */ 0, /* 49 1 */ 0, /* 50 2 */ 0, /* 51 3 */ 0,
- /* 52 4 */ 0, /* 53 5 */ 0, /* 54 6 */ 0, /* 55 7 */ 0,
- /* 56 8 */ 0, /* 57 9 */ 0,
- /* 58 : */ PercentEncodeUsername,
- /* 59 ; */ 0,
- /* 60 < */ PercentEncodeDefault,
- /* 61 = */ 0,
- /* 62 > */ PercentEncodeDefault,
- /* 63 ? */ PercentEncodeDefault,
- /* 64 @ */ PercentEncodePassword,
- /* 65 A */ 0, /* 66 B */ 0, /* 67 C */ 0, /* 68 D */ 0,
- /* 69 E */ 0, /* 70 F */ 0, /* 71 G */ 0, /* 72 H */ 0,
- /* 73 I */ 0, /* 74 J */ 0, /* 75 K */ 0, /* 76 L */ 0,
- /* 77 M */ 0, /* 78 N */ 0, /* 79 O */ 0, /* 80 P */ 0,
- /* 81 Q */ 0, /* 82 R */ 0, /* 83 S */ 0, /* 84 T */ 0,
- /* 85 U */ 0, /* 86 V */ 0, /* 87 W */ 0, /* 88 X */ 0,
- /* 89 Y */ 0, /* 90 Z */ 0,
- /* 91 [ */ 0,
- /* 92 \ */ PercentEncodePassword,
- /* 93 ] */ 0,
- /* 94 ^ */ 0,
- /* 95 _ */ 0,
- /* 96 ` */ PercentEncodeDefault,
- /* 97 a */ 0, /* 98 b */ 0, /* 99 c */ 0, /* 100 d */ 0,
- /* 101 e */ 0, /* 102 f */ 0, /* 103 g */ 0, /* 104 h */ 0,
- /* 105 i */ 0, /* 106 j */ 0, /* 107 k */ 0, /* 108 l */ 0,
- /* 109 m */ 0, /* 110 n */ 0, /* 111 o */ 0, /* 112 p */ 0,
- /* 113 q */ 0, /* 114 r */ 0, /* 115 s */ 0, /* 116 t */ 0,
- /* 117 u */ 0, /* 118 v */ 0, /* 119 w */ 0, /* 120 x */ 0,
- /* 121 y */ 0, /* 122 z */ 0,
- /* 123 { */ 0,
- /* 124 | */ 0,
- /* 125 } */ 0,
- /* 126 ~ */ 0,
- /* 127 del */ PercentEncodeSimple,
- /* 128 */ PercentEncodeSimple, /* 129 */ PercentEncodeSimple, /* 130 */ PercentEncodeSimple, /* 131 */ PercentEncodeSimple,
- /* 132 */ PercentEncodeSimple, /* 133 */ PercentEncodeSimple, /* 134 */ PercentEncodeSimple, /* 135 */ PercentEncodeSimple,
- /* 136 */ PercentEncodeSimple, /* 137 */ PercentEncodeSimple, /* 138 */ PercentEncodeSimple, /* 139 */ PercentEncodeSimple,
- /* 140 */ PercentEncodeSimple, /* 141 */ PercentEncodeSimple, /* 142 */ PercentEncodeSimple, /* 143 */ PercentEncodeSimple,
- /* 144 */ PercentEncodeSimple, /* 145 */ PercentEncodeSimple, /* 146 */ PercentEncodeSimple, /* 147 */ PercentEncodeSimple,
- /* 148 */ PercentEncodeSimple, /* 149 */ PercentEncodeSimple, /* 150 */ PercentEncodeSimple, /* 151 */ PercentEncodeSimple,
- /* 152 */ PercentEncodeSimple, /* 153 */ PercentEncodeSimple, /* 154 */ PercentEncodeSimple, /* 155 */ PercentEncodeSimple,
- /* 156 */ PercentEncodeSimple, /* 157 */ PercentEncodeSimple, /* 158 */ PercentEncodeSimple, /* 159 */ PercentEncodeSimple,
- /* 160 */ PercentEncodeSimple, /* 161 */ PercentEncodeSimple, /* 162 */ PercentEncodeSimple, /* 163 */ PercentEncodeSimple,
- /* 164 */ PercentEncodeSimple, /* 165 */ PercentEncodeSimple, /* 166 */ PercentEncodeSimple, /* 167 */ PercentEncodeSimple,
- /* 168 */ PercentEncodeSimple, /* 169 */ PercentEncodeSimple, /* 170 */ PercentEncodeSimple, /* 171 */ PercentEncodeSimple,
- /* 172 */ PercentEncodeSimple, /* 173 */ PercentEncodeSimple, /* 174 */ PercentEncodeSimple, /* 175 */ PercentEncodeSimple,
- /* 176 */ PercentEncodeSimple, /* 177 */ PercentEncodeSimple, /* 178 */ PercentEncodeSimple, /* 179 */ PercentEncodeSimple,
- /* 180 */ PercentEncodeSimple, /* 181 */ PercentEncodeSimple, /* 182 */ PercentEncodeSimple, /* 183 */ PercentEncodeSimple,
- /* 184 */ PercentEncodeSimple, /* 185 */ PercentEncodeSimple, /* 186 */ PercentEncodeSimple, /* 187 */ PercentEncodeSimple,
- /* 188 */ PercentEncodeSimple, /* 189 */ PercentEncodeSimple, /* 190 */ PercentEncodeSimple, /* 191 */ PercentEncodeSimple,
- /* 192 */ PercentEncodeSimple, /* 193 */ PercentEncodeSimple, /* 194 */ PercentEncodeSimple, /* 195 */ PercentEncodeSimple,
- /* 196 */ PercentEncodeSimple, /* 197 */ PercentEncodeSimple, /* 198 */ PercentEncodeSimple, /* 199 */ PercentEncodeSimple,
- /* 200 */ PercentEncodeSimple, /* 201 */ PercentEncodeSimple, /* 202 */ PercentEncodeSimple, /* 203 */ PercentEncodeSimple,
- /* 204 */ PercentEncodeSimple, /* 205 */ PercentEncodeSimple, /* 206 */ PercentEncodeSimple, /* 207 */ PercentEncodeSimple,
- /* 208 */ PercentEncodeSimple, /* 209 */ PercentEncodeSimple, /* 210 */ PercentEncodeSimple, /* 211 */ PercentEncodeSimple,
- /* 212 */ PercentEncodeSimple, /* 213 */ PercentEncodeSimple, /* 214 */ PercentEncodeSimple, /* 215 */ PercentEncodeSimple,
- /* 216 */ PercentEncodeSimple, /* 217 */ PercentEncodeSimple, /* 218 */ PercentEncodeSimple, /* 219 */ PercentEncodeSimple,
- /* 220 */ PercentEncodeSimple, /* 221 */ PercentEncodeSimple, /* 222 */ PercentEncodeSimple, /* 223 */ PercentEncodeSimple,
- /* 224 */ PercentEncodeSimple, /* 225 */ PercentEncodeSimple, /* 226 */ PercentEncodeSimple, /* 227 */ PercentEncodeSimple,
- /* 228 */ PercentEncodeSimple, /* 229 */ PercentEncodeSimple, /* 230 */ PercentEncodeSimple, /* 231 */ PercentEncodeSimple,
- /* 232 */ PercentEncodeSimple, /* 233 */ PercentEncodeSimple, /* 234 */ PercentEncodeSimple, /* 235 */ PercentEncodeSimple,
- /* 236 */ PercentEncodeSimple, /* 237 */ PercentEncodeSimple, /* 238 */ PercentEncodeSimple, /* 239 */ PercentEncodeSimple,
- /* 240 */ PercentEncodeSimple, /* 241 */ PercentEncodeSimple, /* 242 */ PercentEncodeSimple, /* 243 */ PercentEncodeSimple,
- /* 244 */ PercentEncodeSimple, /* 245 */ PercentEncodeSimple, /* 246 */ PercentEncodeSimple, /* 247 */ PercentEncodeSimple,
- /* 248 */ PercentEncodeSimple, /* 249 */ PercentEncodeSimple, /* 250 */ PercentEncodeSimple, /* 251 */ PercentEncodeSimple,
- /* 252 */ PercentEncodeSimple, /* 253 */ PercentEncodeSimple, /* 254 */ PercentEncodeSimple, /* 255 */ PercentEncodeSimple
-};
-
-static inline bool isSchemeFirstChar(UChar c) { return c <= 0xff && (URLInternal::characterClassTable[c] & SchemeFirstChar); }
-static inline bool isSchemeChar(UChar c) { return c <= 0xff && (URLInternal::characterClassTable[c] & SchemeChar); }
-static inline bool isBadChar(unsigned char c) { return URLInternal::characterClassTable[c] & BadChar; }
-static inline bool isTabNewline(UChar c) { return c <= 0xff && (URLInternal::characterClassTable[c] & TabNewline); }
-
-String encodeWithURLEscapeSequences(const String& notEncodedString, PercentEncodeCharacterClass whatToEncode);
-
// Copies the source to the destination, assuming all the source characters are
// ASCII. The destination buffer must be large enough. Null characters are allowed
// in the source string, and no attempt is made to null-terminate the result.
@@ -651,17 +381,17 @@
// Firefox and IE remove everything after the first ':'.
size_t separatorPosition = s.find(':');
String newProtocol = s.substring(0, separatorPosition);
-
- if (!isValidProtocol(newProtocol))
+ auto canonicalized = URLParser::maybeCanonicalizeScheme(newProtocol);
+ if (!canonicalized)
return false;
if (!m_isValid) {
- URLParser parser(makeString(newProtocol, ":", m_string));
+ URLParser parser(makeString(*canonicalized, ":", m_string));
*this = parser.result();
return true;
}
- URLParser parser(makeString(newProtocol, m_string.substring(m_schemeEnd)));
+ URLParser parser(makeString(*canonicalized, m_string.substring(m_schemeEnd)));
*this = parser.result();
return true;
}
@@ -795,6 +525,32 @@
*this = parser.result();
}
+static String percentEncodeCharacters(const String& input, bool(*shouldEncode)(UChar))
+{
+ auto encode = [shouldEncode] (const String& input) {
+ CString utf8 = input.utf8();
+ auto* data = ""
+ StringBuilder builder;
+ auto length = utf8.length();
+ for (unsigned j = 0; j < length; j++) {
+ auto c = data[j];
+ if (shouldEncode(c)) {
+ builder.append('%');
+ builder.append(upperNibbleToASCIIHexDigit(c));
+ builder.append(lowerNibbleToASCIIHexDigit(c));
+ } else
+ builder.append(c);
+ }
+ return builder.toString();
+ };
+
+ for (size_t i = 0; i < input.length(); ++i) {
+ if (UNLIKELY(shouldEncode(input[i])))
+ return encode(input);
+ }
+ return input;
+}
+
void URL::setUser(const String& user)
{
if (!m_isValid)
@@ -805,13 +561,13 @@
unsigned end = m_userEnd;
if (!user.isEmpty()) {
- String u = encodeWithURLEscapeSequences(user, PercentEncodeUsername);
+ String u = percentEncodeCharacters(user, URLParser::isInUserInfoEncodeSet);
if (m_userStart == m_schemeEnd + 1)
u = "//" + u;
// Add '@' if we didn't have one before.
if (end == m_hostEnd || (end == m_passwordEnd && m_string[end] != '@'))
u.append('@');
- URLParser parser(makeString(m_string.left(m_userStart), u, m_string.substring(end)));
+ URLParser parser(makeString(StringView(m_string).left(m_userStart), u, StringView(m_string).substring(end)));
*this = parser.result();
} else {
// Remove '@' if we now have neither user nor password.
@@ -819,7 +575,7 @@
end += 1;
// We don't want to parse in the extremely common case where we are not going to make a change.
if (m_userStart != end) {
- URLParser parser(makeString(m_string.left(m_userStart), m_string.substring(end)));
+ URLParser parser(makeString(StringView(m_string).left(m_userStart), StringView(m_string).substring(end)));
*this = parser.result();
}
}
@@ -832,13 +588,13 @@
unsigned end = m_passwordEnd;
if (!password.isEmpty()) {
- String p = ":" + encodeWithURLEscapeSequences(password, PercentEncodePassword) + "@";
+ String p = ":" + percentEncodeCharacters(password, URLParser::isInUserInfoEncodeSet) + "@";
if (m_userEnd == m_schemeEnd + 1)
p = "//" + p;
// Eat the existing '@' since we are going to add our own.
if (end != m_hostEnd && m_string[end] == '@')
end += 1;
- URLParser parser(makeString(m_string.left(m_userEnd), p, m_string.substring(end)));
+ URLParser parser(makeString(StringView(m_string).left(m_userEnd), p, StringView(m_string).substring(end)));
*this = parser.result();
} else {
// Remove '@' if we now have neither user nor password.
@@ -846,7 +602,7 @@
end += 1;
// We don't want to parse in the extremely common case where we are not going to make a change.
if (m_userEnd != end) {
- URLParser parser(makeString(m_string.left(m_userEnd), m_string.substring(end)));
+ URLParser parser(makeString(StringView(m_string).left(m_userEnd), StringView(m_string).substring(end)));
*this = parser.result();
}
}
@@ -891,10 +647,10 @@
// access to the document in this function.
// https://webkit.org/b/161176
if ((query.isEmpty() || query[0] != '?') && !query.isNull()) {
- URLParser parser(makeString(m_string.left(m_pathEnd), "?", query, m_string.substring(m_queryEnd)));
+ URLParser parser(makeString(StringView(m_string).left(m_pathEnd), "?", query, StringView(m_string).substring(m_queryEnd)));
*this = parser.result();
} else {
- URLParser parser(makeString(m_string.left(m_pathEnd), query, m_string.substring(m_queryEnd)));
+ URLParser parser(makeString(StringView(m_string).left(m_pathEnd), query, StringView(m_string).substring(m_queryEnd)));
*this = parser.result();
}
@@ -905,13 +661,14 @@
if (!m_isValid)
return;
- // FIXME: encodeWithURLEscapeSequences does not correctly escape '#' and '?', so fragment and query parts
- // may be inadvertently affected.
String path = s;
if (path.isEmpty() || path[0] != '/')
path = "/" + path;
- URLParser parser(makeString(m_string.left(m_portEnd), encodeWithURLEscapeSequences(path), m_string.substring(m_pathEnd)));
+ auto questionMarkOrNumberSign = [] (UChar character) {
+ return character == '?' || character == '#';
+ };
+ URLParser parser(makeString(StringView(m_string).left(m_portEnd), percentEncodeCharacters(path, questionMarkOrNumberSign), StringView(m_string).substring(m_pathEnd)));
*this = parser.result();
}
@@ -925,13 +682,6 @@
return decodeEscapeSequences<URLEscapeSequence>(string, encoding);
}
-// Caution: This function does not bounds check.
-static void appendEscapedChar(char*& buffer, unsigned char c)
-{
- *buffer++ = '%';
- placeByteAsHex(c, buffer);
-}
-
String URL::serialize(bool omitFragment) const
{
if (omitFragment)
@@ -1033,50 +783,11 @@
return true;
}
-String encodeWithURLEscapeSequences(const String& notEncodedString, PercentEncodeCharacterClass whatToEncode)
+String encodeWithURLEscapeSequences(const String& input)
{
- CString asUTF8 = notEncodedString.utf8();
-
- CharBuffer buffer(asUTF8.length() * 3 + 1);
- char* p = buffer.data();
-
- const char* str = asUTF8.data();
- const char* strEnd = str + asUTF8.length();
- while (str < strEnd) {
- unsigned char c = *str++;
- if (percentEncodeClassTable[c] >= whatToEncode)
- appendEscapedChar(p, c);
- else
- *p++ = c;
- }
-
- ASSERT(p - buffer.data() <= static_cast<int>(buffer.size()));
-
- return String(buffer.data(), p - buffer.data());
+ return percentEncodeCharacters(input, URLParser::isInUserInfoEncodeSet);
}
-String encodeWithURLEscapeSequences(const String& notEncodedString)
-{
- CString asUTF8 = notEncodedString.utf8();
-
- CharBuffer buffer(asUTF8.length() * 3 + 1);
- char* p = buffer.data();
-
- const char* str = asUTF8.data();
- const char* strEnd = str + asUTF8.length();
- while (str < strEnd) {
- unsigned char c = *str++;
- if (isBadChar(c))
- appendEscapedChar(p, c);
- else
- *p++ = c;
- }
-
- ASSERT(p - buffer.data() <= static_cast<int>(buffer.size()));
-
- return String(buffer.data(), p - buffer.data());
-}
-
bool URL::isHierarchical() const
{
if (!m_isValid)
@@ -1106,7 +817,7 @@
isLeading = false;
// Skip any tabs and newlines.
- if (isTabNewline(url[i]))
+ if (url[i] == '\t' || url[i] == '\r' || url[i] == '\n')
continue;
if (!protocol[j])
@@ -1130,21 +841,6 @@
return WebCore::protocolIsInternal(string, protocol);
}
-bool isValidProtocol(const String& protocol)
-{
- // RFC3986: ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
- if (protocol.isEmpty())
- return false;
- if (!isSchemeFirstChar(protocol[0]))
- return false;
- unsigned protocolLength = protocol.length();
- for (unsigned i = 1; i < protocolLength; i++) {
- if (!isSchemeChar(protocol[i]))
- return false;
- }
- return true;
-}
-
#ifndef NDEBUG
void URL::print() const