Revision: 19199
Author: [email protected]
Date: Fri Feb 7 14:13:00 2014 UTC
Log: Revert "Fix inconsistencies wrt whitespaces."
This reverts r19196.
[email protected]
Review URL: https://codereview.chromium.org/147443008
http://code.google.com/p/v8/source/detail?r=19199
Deleted:
/branches/bleeding_edge/test/mjsunit/whitespaces.js
Modified:
/branches/bleeding_edge/src/arm/regexp-macro-assembler-arm.cc
/branches/bleeding_edge/src/char-predicates.h
/branches/bleeding_edge/src/ia32/regexp-macro-assembler-ia32.cc
/branches/bleeding_edge/src/jsregexp.cc
/branches/bleeding_edge/src/runtime.cc
/branches/bleeding_edge/src/scanner.h
/branches/bleeding_edge/src/x64/regexp-macro-assembler-x64.cc
/branches/bleeding_edge/test/cctest/test-regexp.cc
/branches/bleeding_edge/test/mjsunit/third_party/string-trim.js
=======================================
--- /branches/bleeding_edge/test/mjsunit/whitespaces.js Fri Feb 7 12:34:45
2014 UTC
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright 2014 the V8 project authors. All rights reserved.
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-// * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-// * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following
-// disclaimer in the documentation and/or other materials provided
-// with the distribution.
-// * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived
-// from this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-var whitespaces = [
- // Whitespaces defined in ECMA-262 5.1, 7.2
- 0x0009, // Tab TAB
- 0x000B, // Vertical Tab VT
- 0x000C, // Form Feed FF
- 0x0020, // Space SP
- 0x00A0, // No-break space NBSP
- 0xFEFF, // Byte Order Mark BOM
- // Unicode whitespaces
- 0x000A, // Line Feed LF
- 0x000D, // Carriage Return CR
- 0x0085, // Next Line NEL
- 0x1680, // Ogham Space Mark
- 0x180E, // Mongolian Vowel Separator
- 0x2000, // EN QUAD
- 0x2001, // EM QUAD
- 0x2002, // EN SPACE
- 0x2003, // EM SPACE
- 0x2004, // THREE-PER-EM SPACE
- 0x2005, // FOUR-PER-EM SPACE
- 0x2006, // SIX-PER-EM SPACE
- 0x2007, // FIGURE SPACE
- 0x2008, // PUNCTUATION SPACE
- 0x2009, // THIN SPACE
- 0x200A, // HAIR SPACE
- 0x2028, // LINE SEPARATOR
- 0x2029, // PARAGRAPH SEPARATOR
- 0x202F, // NARROW NO-BREAK SPACE
- 0x205F, // MEDIUM MATHEMATICAL SPACE
- 0x3000, // IDEOGRAPHIC SPACE
-];
-
-// Add single twobyte char to force twobyte representation.
-// Interestingly, snowman is not "white" space :)
-var twobyte = "\u2603";
-var onebyte = "\u007E";
-var twobytespace = "\u2000";
-var onebytespace = "\u0020";
-
-function is_whitespace(c) {
- return whitespaces.indexOf(c.charCodeAt(0)) > -1;
-}
-
-function test_regexp(str) {
- var pos_match = str.match(/\s/);
- var neg_match = str.match(/\S/);
- var test_char = str[0];
- var postfix = str[1];
- if (is_whitespace(test_char)) {
- assertEquals(test_char, pos_match[0]);
- assertEquals(postfix, neg_match[0]);
- } else {
- assertEquals(test_char, neg_match[0]);
- assertNull(pos_match);
- }
-}
-
-function test_trim(c, infix) {
- var str = c + c + c + infix + c;
- if (is_whitespace(c)) {
- assertEquals(infix, str.trim());
- } else {
- assertEquals(str, str.trim());
- }
-}
-
-function test_parseInt(c, postfix) {
- // Skip if prefix is a digit.
- if (c >= "0" && c <= 9) return;
- var str = c + c + "123" + postfix;
- if (is_whitespace(c)) {
- assertEquals(123, parseInt(str));
- } else {
- assertEquals(NaN, parseInt(str));
- }
-}
-
-function test_eval(c, content) {
- if (!is_whitespace(c)) return;
- var str = c + c + "'" + content + "'" + c + c;
- assertEquals(content, eval(str));
-}
-
-function test_stringtonumber(c, postfix) {
- // Skip if prefix is a digit.
- if (c >= "0" && c <= 9) return;
- var result = 1 + Number(c + "123" + c + postfix);
- if (is_whitespace(c)) {
- assertEquals(124, result);
- } else {
- assertEquals(NaN, result);
- }
-}
-
-for (var i = 0; i < 0x10000; i++) {
- c = String.fromCharCode(i);
- test_regexp(c + onebyte);
- test_regexp(c + twobyte);
- test_trim(c, onebyte + "trim");
- test_trim(c, twobyte + "trim");
- test_parseInt(c, onebyte);
- test_parseInt(c, twobyte);
- test_eval(c, onebyte);
- test_eval(c, twobyte);
- test_stringtonumber(c, onebytespace);
- test_stringtonumber(c, twobytespace);
-}
=======================================
--- /branches/bleeding_edge/src/arm/regexp-macro-assembler-arm.cc Fri Feb
7 12:34:45 2014 UTC
+++ /branches/bleeding_edge/src/arm/regexp-macro-assembler-arm.cc Fri Feb
7 14:13:00 2014 UTC
@@ -497,8 +497,6 @@
__ b(ls, &success);
// \u00a0 (NBSP).
__ cmp(r0, Operand(0x00a0 - '\t'));
- // \u0085 (NEL).
- __ cmp(r0, Operand(0x0085 - '\t'), ne);
BranchOrBacktrack(ne, on_no_match);
__ bind(&success);
return true;
=======================================
--- /branches/bleeding_edge/src/char-predicates.h Fri Feb 7 12:34:45 2014
UTC
+++ /branches/bleeding_edge/src/char-predicates.h Fri Feb 7 14:13:00 2014
UTC
@@ -66,14 +66,6 @@
}
};
-
-struct WhiteSpace {
- static inline bool Is(uc32 c) {
- return unibrow::WhiteSpace::Is(c) ||
- c == 0xFEFF; // BYTE ORDER MARK is a white space in ECMA-262 5.1,
7.2.
- }
-};
-
} } // namespace v8::internal
#endif // V8_CHAR_PREDICATES_H_
=======================================
--- /branches/bleeding_edge/src/ia32/regexp-macro-assembler-ia32.cc Fri
Feb 7 12:34:45 2014 UTC
+++ /branches/bleeding_edge/src/ia32/regexp-macro-assembler-ia32.cc Fri
Feb 7 14:13:00 2014 UTC
@@ -526,9 +526,6 @@
__ j(below_equal, &success, Label::kNear);
// \u00a0 (NBSP).
__ cmp(eax, 0x00a0 - '\t');
- __ j(equal, &success, Label::kNear);
- // \u0085 (NEL).
- __ cmp(eax, 0x0085 - '\t');
BranchOrBacktrack(not_equal, on_no_match);
__ bind(&success);
return true;
=======================================
--- /branches/bleeding_edge/src/jsregexp.cc Fri Feb 7 12:34:45 2014 UTC
+++ /branches/bleeding_edge/src/jsregexp.cc Fri Feb 7 14:13:00 2014 UTC
@@ -3597,10 +3597,9 @@
// The '2' variant is has inclusive from and exclusive to.
-static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,
- 0x0085, 0x0086, 0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F,
- 0x2000, 0x200B, 0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060,
- 0x3000, 0x3001, 0xFEFF, 0xFF00, 0x10000 };
+static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1, 0x00A0,
+ 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B, 0x2028, 0x202A,
+ 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001, 0xFEFF, 0xFF00,
0x10000 };
static const int kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);
static const int kWordRanges[] = {
=======================================
--- /branches/bleeding_edge/src/runtime.cc Fri Feb 7 12:34:45 2014 UTC
+++ /branches/bleeding_edge/src/runtime.cc Fri Feb 7 14:13:00 2014 UTC
@@ -6105,10 +6105,8 @@
// Fast check for a junk value. A valid string may start from a
// whitespace, a sign ('+' or '-'), the decimal point, a decimal
digit or
// the 'I' character ('Infinity'). All of that have codes not
greater than
- // '9' except 'I', NBSP and NEL.
- if (data[start_pos] != 'I' &&
- data[start_pos] != 0xa0 &&
- data[start_pos] != 0x85) {
+ // '9' except 'I' and .
+ if (data[start_pos] != 'I' && data[start_pos] != 0xa0) {
return isolate->heap()->nan_value();
}
} else if (len - start_pos < 10 && AreDigits(data, start_pos, len)) {
@@ -6541,6 +6539,11 @@
return ConvertCase(
args, isolate, isolate->runtime_state()->to_upper_mapping());
}
+
+
+static inline bool IsTrimWhiteSpace(unibrow::uchar c) {
+ return unibrow::WhiteSpace::Is(c) || c == 0x200b || c == 0xfeff;
+}
RUNTIME_FUNCTION(MaybeObject*, Runtime_StringTrim) {
@@ -6555,17 +6558,15 @@
int length = string->length();
int left = 0;
- UnicodeCache* unicode_cache = isolate->unicode_cache();
if (trimLeft) {
- while (left < length &&
unicode_cache->IsWhiteSpace(string->Get(left))) {
+ while (left < length && IsTrimWhiteSpace(string->Get(left))) {
left++;
}
}
int right = length;
if (trimRight) {
- while (right > left &&
- unicode_cache->IsWhiteSpace(string->Get(right - 1))) {
+ while (right > left && IsTrimWhiteSpace(string->Get(right - 1))) {
right--;
}
}
=======================================
--- /branches/bleeding_edge/src/scanner.h Fri Feb 7 12:34:45 2014 UTC
+++ /branches/bleeding_edge/src/scanner.h Fri Feb 7 14:13:00 2014 UTC
@@ -144,7 +144,7 @@
unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
- unibrow::Predicate<WhiteSpace, 128> kIsWhiteSpace;
+ unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
StaticResource<Utf8Decoder> utf8_decoder_;
DISALLOW_COPY_AND_ASSIGN(UnicodeCache);
=======================================
--- /branches/bleeding_edge/src/x64/regexp-macro-assembler-x64.cc Fri Feb
7 12:34:45 2014 UTC
+++ /branches/bleeding_edge/src/x64/regexp-macro-assembler-x64.cc Fri Feb
7 14:13:00 2014 UTC
@@ -552,9 +552,6 @@
__ j(below_equal, &success, Label::kNear);
// \u00a0 (NBSP).
__ cmpl(rax, Immediate(0x00a0 - '\t'));
- __ j(equal, &success, Label::kNear);
- // \u0085 (NEL).
- __ cmpl(rax, Immediate(0x0085 - '\t'));
BranchOrBacktrack(not_equal, on_no_match);
__ bind(&success);
return true;
=======================================
--- /branches/bleeding_edge/test/cctest/test-regexp.cc Fri Feb 7 12:34:45
2014 UTC
+++ /branches/bleeding_edge/test/cctest/test-regexp.cc Fri Feb 7 14:13:00
2014 UTC
@@ -445,7 +445,21 @@
static bool IsWhiteSpace(uc16 c) {
- return v8::internal::WhiteSpace::Is(c);
+ switch (c) {
+ case 0x09:
+ case 0x0A:
+ case 0x0B:
+ case 0x0C:
+ case 0x0d:
+ case 0x20:
+ case 0xA0:
+ case 0x2028:
+ case 0x2029:
+ case 0xFEFF:
+ return true;
+ default:
+ return unibrow::Space::Is(c);
+ }
}
=======================================
--- /branches/bleeding_edge/test/mjsunit/third_party/string-trim.js Fri
Feb 7 12:34:45 2014 UTC
+++ /branches/bleeding_edge/test/mjsunit/third_party/string-trim.js Fri
Feb 7 14:13:00 2014 UTC
@@ -66,8 +66,7 @@
{s : '\u3000', t : 'IDEOGRAPHIC SPACE'},
{s : '\u2028', t : 'LINE SEPARATOR'},
{s : '\u2029', t : 'PARAGRAPH SEPARATOR'},
- // \u200B is not a whitespace character according to Unicode 6.3.0.
- // {s : '\u200B', t : 'ZERO WIDTH SPACE (category Cf)'}
+ {s : '\u200B', t : 'ZERO WIDTH SPACE (category Cf)'}
];
for (var i = 0; i < whitespace.length; i++) {
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.