Reviewers: dcarney, ulan,
Message:
ptal
Description:
Experimental lexer: track one-byteness on the fly.
BUG=
[email protected],[email protected]
Please review this at https://codereview.chromium.org/136793025/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+14, -12 lines):
M src/lexer/experimental-scanner.h
M src/lexer/experimental-scanner.cc
M tools/lexer_generator/code_generator.jinja
Index: src/lexer/experimental-scanner.cc
diff --git a/src/lexer/experimental-scanner.cc
b/src/lexer/experimental-scanner.cc
index
d354908e6c4619d134af34616e08c6a670ee2f78..28aaf6c0ac0884bf973f107fc63f7c1ceb4038b9
100644
--- a/src/lexer/experimental-scanner.cc
+++ b/src/lexer/experimental-scanner.cc
@@ -106,28 +106,21 @@ bool ExperimentalScanner<uint16_t>::FillLiteral(
++start;
--end;
}
+ literal->buffer.Reset();
if (!token.has_escapes) {
// UTF-16 can also contain only one byte chars. Note that is_ascii here
// means is_onebyte.
- literal->is_ascii = true;
- literal->buffer.Reset();
- for (const uint16_t* cursor = start; cursor != end; ++cursor) {
- if (*cursor >= unibrow::Latin1::kMaxChar) {
- literal->is_ascii = false;
- break;
- }
- literal->buffer.AddChar(*cursor);
- }
literal->length = end - start;
- if (literal->is_ascii) {
+ literal->is_ascii = token.is_onebyte;
+ if (token.is_onebyte) {
+ for (const uint16_t* cursor = start; cursor != end; ++cursor)
+ literal->buffer.AddChar(*cursor);
literal->ascii_string = literal->buffer.ascii_literal();
} else {
- literal->buffer.Reset();
literal->utf16_string = Vector<const uint16_t>(start,
literal->length);
}
return true;
}
- literal->buffer.Reset();
for (const uint16_t* cursor = start; cursor != end;) {
if (*cursor != '\\') {
literal->buffer.AddChar(*cursor++);
Index: src/lexer/experimental-scanner.h
diff --git a/src/lexer/experimental-scanner.h
b/src/lexer/experimental-scanner.h
index
8a9a96e1e4df2560ba0841ccd86d1201b60030d6..a0fa0006206508514cfc732363300b53d09a2606
100644
--- a/src/lexer/experimental-scanner.h
+++ b/src/lexer/experimental-scanner.h
@@ -238,6 +238,7 @@ class ScannerBase {
int beg_pos;
int end_pos;
bool has_escapes;
+ bool is_onebyte;
};
struct LiteralDesc {
Index: tools/lexer_generator/code_generator.jinja
diff --git a/tools/lexer_generator/code_generator.jinja
b/tools/lexer_generator/code_generator.jinja
index
67ccb56a3d60b5670a40d3597e98589d77d0e041..d5951785a978d6089a7aa543d996e0b35b41c502
100644
--- a/tools/lexer_generator/code_generator.jinja
+++ b/tools/lexer_generator/code_generator.jinja
@@ -105,6 +105,9 @@
{{dispatch_match_action('backtrack', ('1', 'ILLEGAL'))}}
{% elif type == 'skip' %}
RESET_START();
+ {%- if encoding == 'utf16'-%}
+ next_.is_onebyte = true;
+ {%- endif -%}
goto state_entry_0;
{% elif type == 'skip_and_terminate' %}
RESET_START();
@@ -112,6 +115,9 @@
{{dispatch_match_action('terminate', None)}}
{% elif type == 'line_terminator' %}
RESET_START();
+ {%- if encoding == 'utf16'-%}
+ next_.is_onebyte = true;
+ {%- endif -%}
has_line_terminator_before_next_ = true;
goto state_entry_0;
{% elif type == 'token' %}
@@ -264,6 +270,7 @@
{%- if state['long_char_transitions'] -%}
if ({{long_char_check()}}) {
+ next_.is_onebyte = false;
{{long_char_create()}}
{%- for key, transition_state_id in state['long_char_transitions'] %}
if ({{do_key(key)}}) { // long_char transition
@@ -323,6 +330,7 @@ void ExperimentalScanner<{{char_type}}>::Scan() {
// Setup environment.
next_.has_escapes = false;
+ next_.is_onebyte = true;
Token::Value stored_token;
const {{char_type}} * marker;
{{char_type}} primary_char;
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.