Log Message
Some tests fail with ES6 `u` (Unicode) flag for regular expressions https://bugs.webkit.org/show_bug.cgi?id=151597
Reviewed by Geoffrey Garen. Source/_javascript_Core: Added two new tables to handle the anomolies of \w and \W CharacterClassEscapes when specified in RegExp's with both the unicode and ignoreCase flags. Given the case folding rules described in the standard vie the meta function Canonicalize(), which allow cross ASCII case folding when unicode is specified, the unicode characters \u017f (small sharp s) and \u212a (kelvin symbol) are part of the \w (word) characterClassEscape. This is true because they case fold to 's' and 'k' respectively. Because they case fold to lower case letters, the corresponding letters, 'k', 'K', 's' and 'S', are also matched with \W with the unicode and ignoreCase flags. * create_regex_tables: * yarr/YarrPattern.cpp: (JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass): (JSC::Yarr::YarrPatternConstructor::atomCharacterClassBuiltIn): (JSC::Yarr::YarrPattern::YarrPattern): * yarr/YarrPattern.h: (JSC::Yarr::YarrPattern::wordcharCharacterClass): (JSC::Yarr::YarrPattern::wordUnicodeIgnoreCaseCharCharacterClass): (JSC::Yarr::YarrPattern::nonwordcharCharacterClass): (JSC::Yarr::YarrPattern::nonwordUnicodeIgnoreCaseCharCharacterClass): LayoutTests: Updated tests. * js/regexp-unicode-expected.txt: * js/script-tests/regexp-unicode.js:
Modified Paths
- trunk/LayoutTests/ChangeLog
- trunk/LayoutTests/js/regexp-unicode-expected.txt
- trunk/LayoutTests/js/script-tests/regexp-unicode.js
- trunk/Source/_javascript_Core/ChangeLog
- trunk/Source/_javascript_Core/create_regex_tables
- trunk/Source/_javascript_Core/yarr/YarrPattern.cpp
- trunk/Source/_javascript_Core/yarr/YarrPattern.h
Diff
Modified: trunk/LayoutTests/ChangeLog (199522 => 199523)
--- trunk/LayoutTests/ChangeLog 2016-04-14 00:42:00 UTC (rev 199522)
+++ trunk/LayoutTests/ChangeLog 2016-04-14 00:47:40 UTC (rev 199523)
@@ -1,3 +1,15 @@
+2016-04-13 Michael Saboff <[email protected]>
+
+ Some tests fail with ES6 `u` (Unicode) flag for regular expressions
+ https://bugs.webkit.org/show_bug.cgi?id=151597
+
+ Reviewed by Geoffrey Garen.
+
+ Updated tests.
+
+ * js/regexp-unicode-expected.txt:
+ * js/script-tests/regexp-unicode.js:
+
2016-04-13 Chris Dumez <[email protected]>
We should not speculatively revalidate cached redirects
Modified: trunk/LayoutTests/js/regexp-unicode-expected.txt (199522 => 199523)
--- trunk/LayoutTests/js/regexp-unicode-expected.txt 2016-04-14 00:42:00 UTC (rev 199522)
+++ trunk/LayoutTests/js/regexp-unicode-expected.txt 2016-04-14 00:47:40 UTC (rev 199523)
@@ -39,6 +39,38 @@
PASS /(?:A|𐄣|b)x/iu.test("bx") is true
PASS "a𐄣X".match(/a𐄣b|a𐄣x/iu)[0].length is 4
PASS "Ťx".match(/ťx/iu)[0].length is 2
+PASS /\w/iu.test("ſ") is true
+PASS /\w/iu.test("K") is true
+PASS /!\w/iu.test("ſ") is false
+PASS /!\w/iu.test("K") is false
+PASS /\W/iu.test("ſ") is true
+PASS /\W/iu.test("K") is true
+PASS /!\W/iu.test("ſ") is false
+PASS /!\W/iu.test("K") is false
+PASS /[\w\d]/iu.test("ſ") is true
+PASS /[\w\d]/iu.test("K") is true
+PASS /[^\w\d]/iu.test("ſ") is false
+PASS /[^\w\d]/iu.test("K") is false
+PASS /[\W\d]/iu.test("ſ") is true
+PASS /[\W\d]/iu.test("K") is true
+PASS /[^\W\d]/iu.test("ſ") is false
+PASS /[^\W\d]/iu.test("K") is false
+PASS /\w/iu.test("S") is true
+PASS /\w/iu.test("K") is true
+PASS /!\w/iu.test("S") is false
+PASS /!\w/iu.test("K") is false
+PASS /\W/iu.test("S") is true
+PASS /\W/iu.test("K") is true
+PASS /!\W/iu.test("S") is false
+PASS /!\W/iu.test("K") is false
+PASS /[\w\d]/iu.test("S") is true
+PASS /[\w\d]/iu.test("K") is true
+PASS /[^\w\d]/iu.test("S") is false
+PASS /[^\w\d]/iu.test("K") is false
+PASS /[\W\d]/iu.test("S") is true
+PASS /[\W\d]/iu.test("K") is true
+PASS /[^\W\d]/iu.test("S") is false
+PASS /[^\W\d]/iu.test("K") is false
PASS "𝌆".match(/^.$/u)[0].length is 2
PASS "It is 78°".match(/.*/u)[0].length is 9
PASS stringWithDanglingFirstSurrogate.match(/.*/u)[0].length is 3
Modified: trunk/LayoutTests/js/script-tests/regexp-unicode.js (199522 => 199523)
--- trunk/LayoutTests/js/script-tests/regexp-unicode.js 2016-04-14 00:42:00 UTC (rev 199522)
+++ trunk/LayoutTests/js/script-tests/regexp-unicode.js 2016-04-14 00:47:40 UTC (rev 199523)
@@ -43,7 +43,40 @@
shouldBeTrue('/(?:A|\u{10123}|b)x/iu.test("bx")');
shouldBe('"a\u{10123}X".match(/a\u{10123}b|a\u{10123}x/iu)[0].length', '4');
shouldBe('"\u0164x".match(/\u0165x/iu)[0].length', '2');
+shouldBeTrue('/\\w/iu.test("\u017f")');
+shouldBeTrue('/\\w/iu.test("\u212a")');
+shouldBeFalse('/!\\w/iu.test("\u017f")');
+shouldBeFalse('/!\\w/iu.test("\u212a")');
+shouldBeTrue('/\\W/iu.test("\u017f")');
+shouldBeTrue('/\\W/iu.test("\u212a")');
+shouldBeFalse('/!\\W/iu.test("\u017f")');
+shouldBeFalse('/!\\W/iu.test("\u212a")');
+shouldBeTrue('/[\\w\\d]/iu.test("\u017f")');
+shouldBeTrue('/[\\w\\d]/iu.test("\u212a")');
+shouldBeFalse('/[^\\w\\d]/iu.test("\u017f")');
+shouldBeFalse('/[^\\w\\d]/iu.test("\u212a")');
+shouldBeTrue('/[\\W\\d]/iu.test("\u017f")');
+shouldBeTrue('/[\\W\\d]/iu.test("\u212a")');
+shouldBeFalse('/[^\\W\\d]/iu.test("\u017f")');
+shouldBeFalse('/[^\\W\\d]/iu.test("\u212a")');
+shouldBeTrue('/\\w/iu.test("S")');
+shouldBeTrue('/\\w/iu.test("K")');
+shouldBeFalse('/!\\w/iu.test("S")');
+shouldBeFalse('/!\\w/iu.test("K")');
+shouldBeTrue('/\\W/iu.test("S")');
+shouldBeTrue('/\\W/iu.test("K")');
+shouldBeFalse('/!\\W/iu.test("S")');
+shouldBeFalse('/!\\W/iu.test("K")');
+shouldBeTrue('/[\\w\\d]/iu.test("S")');
+shouldBeTrue('/[\\w\\d]/iu.test("K")');
+shouldBeFalse('/[^\\w\\d]/iu.test("S")');
+shouldBeFalse('/[^\\w\\d]/iu.test("K")');
+shouldBeTrue('/[\\W\\d]/iu.test("S")');
+shouldBeTrue('/[\\W\\d]/iu.test("K")');
+shouldBeFalse('/[^\\W\\d]/iu.test("S")');
+shouldBeFalse('/[^\\W\\d]/iu.test("K")');
+
// Test . matches with Unicode flag
shouldBe('"\u{1D306}".match(/^.$/u)[0].length', '2');
shouldBe('"It is 78\u00B0".match(/.*/u)[0].length', '9');
Modified: trunk/Source/_javascript_Core/ChangeLog (199522 => 199523)
--- trunk/Source/_javascript_Core/ChangeLog 2016-04-14 00:42:00 UTC (rev 199522)
+++ trunk/Source/_javascript_Core/ChangeLog 2016-04-14 00:47:40 UTC (rev 199523)
@@ -1,3 +1,30 @@
+2016-04-13 Michael Saboff <[email protected]>
+
+ Some tests fail with ES6 `u` (Unicode) flag for regular expressions
+ https://bugs.webkit.org/show_bug.cgi?id=151597
+
+ Reviewed by Geoffrey Garen.
+
+ Added two new tables to handle the anomolies of \w and \W CharacterClassEscapes
+ when specified in RegExp's with both the unicode and ignoreCase flags. Given the
+ case folding rules described in the standard vie the meta function Canonicalize(),
+ which allow cross ASCII case folding when unicode is specified, the unicode characters
+ \u017f (small sharp s) and \u212a (kelvin symbol) are part of the \w (word) characterClassEscape.
+ This is true because they case fold to 's' and 'k' respectively. Because they case fold
+ to lower case letters, the corresponding letters, 'k', 'K', 's' and 'S', are also matched with
+ \W with the unicode and ignoreCase flags.
+
+ * create_regex_tables:
+ * yarr/YarrPattern.cpp:
+ (JSC::Yarr::YarrPatternConstructor::atomBuiltInCharacterClass):
+ (JSC::Yarr::YarrPatternConstructor::atomCharacterClassBuiltIn):
+ (JSC::Yarr::YarrPattern::YarrPattern):
+ * yarr/YarrPattern.h:
+ (JSC::Yarr::YarrPattern::wordcharCharacterClass):
+ (JSC::Yarr::YarrPattern::wordUnicodeIgnoreCaseCharCharacterClass):
+ (JSC::Yarr::YarrPattern::nonwordcharCharacterClass):
+ (JSC::Yarr::YarrPattern::nonwordUnicodeIgnoreCaseCharCharacterClass):
+
2016-04-13 Commit Queue <[email protected]>
Unreviewed, rolling out r199502 and r199511.
Modified: trunk/Source/_javascript_Core/create_regex_tables (199522 => 199523)
--- trunk/Source/_javascript_Core/create_regex_tables 2016-04-14 00:42:00 UTC (rev 199522)
+++ trunk/Source/_javascript_Core/create_regex_tables 2016-04-14 00:47:40 UTC (rev 199523)
@@ -25,12 +25,14 @@
types = {
"wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]},
- "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]},
+ "wordUnicodeIgnoreCaseChar": { "UseTable" : False, "data": ['_', ('0', '9'), ('A', 'Z'), ('a', 'z'), 0x017f, 0x212a]},
+ "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x10ffff)]},
+ "nonwordUnicodeIgnoreCaseChar": { "UseTable" : False, "Inverse": "wordchar", "data": ['k', 'K', 's', 'S', '`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0x10ffff)]},
"newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]},
"spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]},
- "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0xffff)]},
+ "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0x10ffff)]},
"digits": { "UseTable" : False, "data": [('0', '9')]},
- "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] }
+ "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0x10ffff)] }
}
entriesPerLine = 50
arrays = "";
Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.cpp (199522 => 199523)
--- trunk/Source/_javascript_Core/yarr/YarrPattern.cpp 2016-04-14 00:42:00 UTC (rev 199522)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.cpp 2016-04-14 00:47:40 UTC (rev 199523)
@@ -349,7 +349,13 @@
m_alternative->m_terms.append(PatternTerm(m_pattern.spacesCharacterClass(), invert));
break;
case WordClassID:
- m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert));
+ if (m_pattern.unicode() && m_pattern.ignoreCase()) {
+ if (invert)
+ m_alternative->m_terms.append(PatternTerm(m_pattern.nonwordUnicodeIgnoreCaseCharCharacterClass(), false));
+ else
+ m_alternative->m_terms.append(PatternTerm(m_pattern.wordUnicodeIgnoreCaseCharCharacterClass(), false));
+ } else
+ m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert));
break;
case NewlineClassID:
m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), invert));
@@ -386,7 +392,10 @@
break;
case WordClassID:
- m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass());
+ if (m_pattern.unicode() && m_pattern.ignoreCase())
+ m_characterClassConstructor.append(invert ? m_pattern.nonwordUnicodeIgnoreCaseCharCharacterClass() : m_pattern.wordUnicodeIgnoreCaseCharCharacterClass());
+ else
+ m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass());
break;
default:
@@ -884,9 +893,11 @@
, digitsCached(0)
, spacesCached(0)
, wordcharCached(0)
+ , wordUnicodeIgnoreCaseCharCached(0)
, nondigitsCached(0)
, nonspacesCached(0)
, nonwordcharCached(0)
+ , nonwordUnicodeIgnoreCasecharCached(0)
{
*error = compile(pattern);
}
Modified: trunk/Source/_javascript_Core/yarr/YarrPattern.h (199522 => 199523)
--- trunk/Source/_javascript_Core/yarr/YarrPattern.h 2016-04-14 00:42:00 UTC (rev 199522)
+++ trunk/Source/_javascript_Core/yarr/YarrPattern.h 2016-04-14 00:47:40 UTC (rev 199523)
@@ -287,9 +287,11 @@
std::unique_ptr<CharacterClass> digitsCreate();
std::unique_ptr<CharacterClass> spacesCreate();
std::unique_ptr<CharacterClass> wordcharCreate();
+std::unique_ptr<CharacterClass> wordUnicodeIgnoreCaseCharCreate();
std::unique_ptr<CharacterClass> nondigitsCreate();
std::unique_ptr<CharacterClass> nonspacesCreate();
std::unique_ptr<CharacterClass> nonwordcharCreate();
+std::unique_ptr<CharacterClass> nonwordUnicodeIgnoreCaseCharCreate();
struct TermChain {
TermChain(PatternTerm term)
@@ -317,9 +319,11 @@
digitsCached = 0;
spacesCached = 0;
wordcharCached = 0;
+ wordUnicodeIgnoreCaseCharCached = 0;
nondigitsCached = 0;
nonspacesCached = 0;
nonwordcharCached = 0;
+ nonwordUnicodeIgnoreCasecharCached = 0;
m_disjunctions.clear();
m_userCharacterClasses.clear();
@@ -367,6 +371,14 @@
}
return wordcharCached;
}
+ CharacterClass* wordUnicodeIgnoreCaseCharCharacterClass()
+ {
+ if (!wordUnicodeIgnoreCaseCharCached) {
+ m_userCharacterClasses.append(wordUnicodeIgnoreCaseCharCreate());
+ wordUnicodeIgnoreCaseCharCached = m_userCharacterClasses.last().get();
+ }
+ return wordUnicodeIgnoreCaseCharCached;
+ }
CharacterClass* nondigitsCharacterClass()
{
if (!nondigitsCached) {
@@ -391,6 +403,14 @@
}
return nonwordcharCached;
}
+ CharacterClass* nonwordUnicodeIgnoreCaseCharCharacterClass()
+ {
+ if (!nonwordUnicodeIgnoreCasecharCached) {
+ m_userCharacterClasses.append(nonwordUnicodeIgnoreCaseCharCreate());
+ nonwordUnicodeIgnoreCasecharCached = m_userCharacterClasses.last().get();
+ }
+ return nonwordUnicodeIgnoreCasecharCached;
+ }
bool ignoreCase() const { return m_flags & FlagIgnoreCase; }
bool multiline() const { return m_flags & FlagMultiline; }
@@ -414,9 +434,11 @@
CharacterClass* digitsCached;
CharacterClass* spacesCached;
CharacterClass* wordcharCached;
+ CharacterClass* wordUnicodeIgnoreCaseCharCached;
CharacterClass* nondigitsCached;
CharacterClass* nonspacesCached;
CharacterClass* nonwordcharCached;
+ CharacterClass* nonwordUnicodeIgnoreCasecharCached;
};
} } // namespace JSC::Yarr
_______________________________________________ webkit-changes mailing list [email protected] https://lists.webkit.org/mailman/listinfo/webkit-changes
