Revision: 18110
Author: [email protected]
Date: Wed Nov 27 15:28:46 2013 UTC
Log: Experimental scanner: keeping track of octal numbers and octal
escapes.
The baseline uses a more liberal definition than ECMA, but it's ok, since
this
is only used for whining about octal escapes in the strict mode. (So, even
though "\1" is technically not an octal escape inside a string (since it
should
be exactly 2 digits), it's still a good idea to whine.
[email protected]
[email protected], [email protected]
BUG=
Review URL: https://codereview.chromium.org/91833002
http://code.google.com/p/v8/source/detail?r=18110
Added:
/branches/experimental/parser/test/lexer/cornercases/octals.js
Modified:
/branches/experimental/parser/src/lexer/experimental-scanner.h
/branches/experimental/parser/src/lexer/lexer-shell.cc
/branches/experimental/parser/src/lexer/lexer_py.re
/branches/experimental/parser/test/lexer/cornercases/strings-and-identifiers-with-escapes.js
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
=======================================
--- /dev/null
+++ /branches/experimental/parser/test/lexer/cornercases/octals.js Wed Nov
27 15:28:46 2013 UTC
@@ -0,0 +1,58 @@
+// Copyright 2013 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Octal numbers and octal escapes in strings are not allowed in the strict
+// mode.
+
+var octal_number = 031;
+var not_octal_number = 0;
+var again_not = 019;
+
+"octal inside \01 string"
+"this is not octal \0"
+"this is an octal escape followed by 9: \019"
+"doesn't need to start with 0: \11"
+
+'octal inside \01 string'
+'this is not octal \0'
+'this is an octal escape followed by 9: \01'
+'doesn\'t need to start with 0: \11'
+
+// Even more complicated cases: two octals in one string:
+"foo\00\00"
+'foo\00\00'
+
+// Different lengths of octals:
+"bar\0" // not an octal
+"bar\00"
+"bar\000" // Not an octal according to Ecma
+"bar\0000" // First 3 recognized as octal
+
+'bar\0' // not an octal
+'bar\00'
+'bar\000' // Not an octal according to Ecma
+'bar\0000' // First 3 recognized as octal
=======================================
--- /branches/experimental/parser/src/lexer/experimental-scanner.h Wed Nov
27 13:51:50 2013 UTC
+++ /branches/experimental/parser/src/lexer/experimental-scanner.h Wed Nov
27 15:28:46 2013 UTC
@@ -67,7 +67,6 @@
has_line_terminator_before_next_(true),
current_literal_(&literals_[0]),
next_literal_(&literals_[1]),
- octal_pos_(Location::invalid()),
harmony_numeric_literals_(false),
harmony_modules_(false),
harmony_scoping_(false) {
@@ -216,10 +215,6 @@
return literal.length() == keyword.length() &&
(memcmp(literal.start(), keyword.start(), literal.length()) == 0);
}
-
- // Returns the location of the last seen octal literal.
- Location octal_position() const { return octal_pos_; }
- void clear_octal_position() { octal_pos_ = Location::invalid(); }
// Seek forward to the given position. This operation works for simple
cases
// such as seeking forward until simple delimiter tokens, which is what
it is
@@ -237,6 +232,10 @@
// be empty).
virtual bool ScanRegExpFlags() = 0;
+ // Returns the location of the last seen octal literal.
+ virtual Location octal_position() const = 0;
+ virtual void clear_octal_position() = 0;
+
protected:
struct TokenDesc {
Token::Value token;
@@ -273,8 +272,6 @@
LiteralDesc* next_literal_;
LiteralDesc literals_[2];
- Location octal_pos_;
-
bool harmony_numeric_literals_;
bool harmony_modules_;
bool harmony_scoping_;
@@ -296,7 +293,8 @@
buffer_end_(NULL),
start_(NULL),
cursor_(NULL),
- marker_(NULL) {
+ marker_(NULL),
+ last_octal_end_(NULL) {
ASSERT(source->IsFlat());
SetBufferBasedOnHandle();
Scan();
@@ -304,12 +302,17 @@
virtual ~ExperimentalScanner() { }
- protected:
- virtual void Scan();
virtual void SeekForward(int pos);
virtual void SetEnd(int pos);
virtual bool ScanRegExpPattern(bool seen_equal);
virtual bool ScanRegExpFlags();
+ virtual Location octal_position() const;
+ virtual void clear_octal_position() {
+ last_octal_end_ = NULL;
+ }
+
+ protected:
+ virtual void Scan();
virtual void SetBufferBasedOnHandle() {
// We get a raw pointer from the Handle, but we also update it every
time
@@ -363,6 +366,10 @@
const Char* start_;
const Char* cursor_;
const Char* marker_;
+
+ // Where we have seen the last octal number or an octal escape inside a
+ // string. Used by octal_position().
+ const Char* last_octal_end_;
};
@@ -488,14 +495,6 @@
if (nx >= 256) break;
x = nx;
}
- // Anything except '\0' is an octal escape sequence, illegal in strict
mode.
- // Remember the position of octal escape sequences so that an error
- // can be reported later (in strict mode).
- // We don't report the error immediately, because the octal escape can
- // occur before the "use strict" directive.
- if (*result != '0' || cursor > start) {
- octal_pos_ = Location(start - 1 - buffer_, cursor - 1 - buffer_);
- }
*result = x;
return cursor;
}
@@ -593,6 +592,18 @@
return cursor;
}
+template<typename Char>
+ScannerBase::Location ExperimentalScanner<Char>::octal_position() const {
+ if (!last_octal_end_)
+ return Location::invalid();
+ // The last octal might be an octal escape or an octal number. Whichever
it
+ // is, we'll find the start by just scanning back until we hit a
non-octal
+ // character.
+ const Char* temp_cursor = last_octal_end_ - 1;
+ while (temp_cursor >= buffer_ && *temp_cursor >= '0' && *temp_cursor
<= '7')
+ --temp_cursor;
+ return Location(temp_cursor - buffer_ + 1, last_octal_end_ - buffer_);
+}
} }
=======================================
--- /branches/experimental/parser/src/lexer/lexer-shell.cc Wed Nov 27
14:18:23 2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer-shell.cc Wed Nov 27
15:28:46 2013 UTC
@@ -171,13 +171,23 @@
size_t end;
std::vector<int> literal;
bool is_ascii;
+ // The location of the latest octal position when the token was seen.
+ int octal_beg;
+ int octal_end;
TokenWithLocation() :
value(Token::ILLEGAL), beg(0), end(0), is_ascii(false) { }
- TokenWithLocation(Token::Value value, size_t beg, size_t end) :
- value(value), beg(beg), end(end), is_ascii(false) { }
+ TokenWithLocation(Token::Value value, size_t beg, size_t end,
+ int octal_beg, int octal_end) :
+ value(value), beg(beg), end(end), is_ascii(false),
octal_beg(octal_beg),
+ octal_end(octal_end) { }
bool operator==(const TokenWithLocation& other) {
+ // The octal_end of the baseline scanner is inconsistent between octal
+ // numbers (end = one beyond the last digit) and octal escapes (end =
the
+ // last digit). Ignore that.
return value == other.value && beg == other.beg && end == other.end &&
- literal == other.literal && is_ascii == other.is_ascii;
+ literal == other.literal && is_ascii == other.is_ascii &&
+ octal_beg == other.octal_beg &&
+ octal_end >= other.octal_end - 1 && octal_end <= other.octal_end +
1;
}
bool operator!=(const TokenWithLocation& other) {
return !(*this == other);
@@ -191,7 +201,7 @@
printf(is_ascii ? " %02x" : " %04x", literal[i]);
}
}
- printf("\n");
+ printf(" (last octal: %d %d)\n", octal_beg, octal_end);
}
};
@@ -217,7 +227,8 @@
TokenWithLocation GetTokenWithLocation(Scanner *scanner, Token::Value
token) {
int beg = scanner->location().beg_pos;
int end = scanner->location().end_pos;
- TokenWithLocation result(token, beg, end);
+ TokenWithLocation result(token, beg, end,
scanner->octal_position().beg_pos,
+ scanner->octal_position().end_pos);
if (HasLiteral(token)) {
result.is_ascii = scanner->is_literal_ascii();
if (scanner->is_literal_ascii()) {
=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Wed Nov 27 14:18:23
2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer_py.re Wed Nov 27 15:28:46
2013 UTC
@@ -32,6 +32,7 @@
hex_digit = [0-9a-fA-F];
single_escape_char = ['"\\bfnrtv];
maybe_exponent = /([eE][\-+]?[:digit:]+)?/;
+octal_number = /0[0-7]+/;
number =
/0[xX][:hex_digit:]+/ | (
/\.[:digit:]+/ maybe_exponent |
@@ -96,6 +97,7 @@
"<" <|token(LT)|>
">" <|token(GT)|>
+octal_number <|octal_number|>
number <|token(NUMBER)|>
number identifier_char <|token(ILLEGAL)|>
number "\\" <|token(ILLEGAL)|>
@@ -199,7 +201,9 @@
"\\" line_terminator_sequence <||continue>
/\\[x][:hex_digit:]{2}/ <set_has_escapes||continue>
/\\[u][:hex_digit:]{4}/ <set_has_escapes||continue>
-/\\[^xu:line_terminator:]/ <set_has_escapes||continue>
+/\\[1-7]/ <octal_inside_string||continue>
+/\\[0-7]{2,3}/ <octal_inside_string||continue>
+/\\[^xu1-7:line_terminator:]/ <set_has_escapes||continue>
"\\" <|token(ILLEGAL)|>
line_terminator <|token(ILLEGAL)|>
"\"" <|token(STRING)|>
@@ -211,7 +215,9 @@
"\\" line_terminator_sequence <||continue>
/\\[x][:hex_digit:]{2}/ <set_has_escapes||continue>
/\\[u][:hex_digit:]{4}/ <set_has_escapes||continue>
-/\\[^xu:line_terminator:]/ <set_has_escapes||continue>
+/\\[1-7]/ <octal_inside_string||continue>
+/\\[0-7]{2,3}/ <octal_inside_string||continue>
+/\\[^xu1-7:line_terminator:]/ <set_has_escapes||continue>
"\\" <|token(ILLEGAL)|>
line_terminator <|token(ILLEGAL)|>
"'" <|token(STRING)|>
=======================================
---
/branches/experimental/parser/test/lexer/cornercases/strings-and-identifiers-with-escapes.js
Wed Nov 27 10:41:24 2013 UTC
+++
/branches/experimental/parser/test/lexer/cornercases/strings-and-identifiers-with-escapes.js
Wed Nov 27 15:28:46 2013 UTC
@@ -28,6 +28,11 @@
"this is a normal string"
"this is a string with an \xaa escape"
"this \u00ab too"
-"and \n this \t\000"
+"and \n this \t\00"
+
+'this is a normal string'
+'this is a string with an \xaa escape'
+'this \u00ab too'
+'and \n this \t\00'
identifier\u1118oo
=======================================
---
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
Wed Nov 27 09:41:41 2013 UTC
+++
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
Wed Nov 27 15:28:46 2013 UTC
@@ -114,7 +114,13 @@
{% elif type == 'set_marker' %}
marker_ = cursor_ - {{value}};
{% elif type == 'set_has_escapes' %}
- next_.has_escapes = true;
+ next_.has_escapes = true;
+ {% elif type == 'octal_number' %}
+ last_octal_end_ = cursor_;
+ DO_TOKEN(Token::NUMBER);
+ {% elif type == 'octal_inside_string' %}
+ last_octal_end_ = cursor_;
+ next_.has_escapes = true;
{% else %}
uncompilable code for {{type}}
{% endif -%}
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.