Revision: 17945
Author: [email protected]
Date: Thu Nov 21 08:21:45 2013 UTC
Log: Experimental parser: split classes into latin1 and non latin1
[email protected]
BUG=
Review URL: https://codereview.chromium.org/80263003
http://code.google.com/p/v8/source/detail?r=17945
Modified:
/branches/experimental/parser/src/lexer/lexer_py.re
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
/branches/experimental/parser/tools/lexer_generator/transition_keys.py
=======================================
--- /branches/experimental/parser/src/lexer/lexer_py.re Wed Nov 20 16:10:09
2013 UTC
+++ /branches/experimental/parser/src/lexer/lexer_py.re Thu Nov 21 08:21:45
2013 UTC
@@ -25,11 +25,11 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-whitespace_char = [ \t\v\f\r:whitespace:\240];
+whitespace_char = [:whitespace:];
whitespace = whitespace_char+;
-identifier_start = [$_a-zA-Z:letter:];
-identifier_char = [0-9:identifier_part_not_letter::identifier_start:];
-line_terminator = [\n\r];
+identifier_start = [$_:letter:];
+identifier_char = [:identifier_start::identifier_part_not_letter:];
+line_terminator = [:line_terminator:];
digit = [0-9];
hex_digit = [0-9a-fA-F];
single_escape_char = ['"\\bfnrtv];
@@ -38,8 +38,7 @@
/0[xX][:hex_digit:]+/ | (
/\.[:digit:]+/ maybe_exponent |
/[:digit:]+(\.[:digit:]*)?/ maybe_exponent );
-# TODO this is incomplete/incorrect
-line_terminator_sequence = (/\n\r?/)|(/\r\n?/);
+line_terminator_sequence = /[:line_terminator:]|\r\n/;
eos = [:eos:];
# grammar is
=======================================
---
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
Wed Nov 20 17:15:07 2013 UTC
+++
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
Thu Nov 21 08:21:45 2013 UTC
@@ -5,29 +5,31 @@
{%- if not loop.first %} || {% endif -%}
{%- if r[0] == 'LATIN_1' -%}
{%- if r[1][0] == r[1][1] -%}
- yych == {{r[1][0]}}
+ yych == {{r[1][0]}}
{%- elif r[1][0] == 0 -%}
- yych <= {{r[1][1]}}
+ yych <= {{r[1][1]}}
{%- elif r[1][1] == 255 -%}
- yych >= {{r[1][0]}}
+ yych >= {{r[1][0]}}
{%- else -%}
- ({{r[1][0]}} <= yych && yych <= {{r[1][1]}})
+ ({{r[1][0]}} <= yych && yych <= {{r[1][1]}})
{%- endif -%}
{%- elif r[0] == 'CLASS' -%}
{%- if r[1] == 'eos' -%}
- (yych == 0 && cursor_ >= buffer_end_)
+ (yych == 0 && cursor_ >= buffer_end_)
{%- elif r[1] == 'zero' -%}
- (yych == 0 && cursor_ < buffer_end_)
- {%- elif r[1] == 'whitespace' and encoding == 'utf16'-%}
+ (yych == 0 && cursor_ < buffer_end_)
+ {%- elif r[1] == 'non_latin_1_whitespace' and encoding == 'utf16'-%}
{# FIXME: Add and use unicode_cache_->InNonAsciiWhitespace #}
- (yych != ' ' && yych != '\t' && yych != '\v' && yych != '\f' &&
yych != '\r' && yych != '\n' && unicode_cache_->IsWhiteSpace(yych))
- {%- elif r[1] == 'letter' and encoding == 'utf16'-%}
+ (yych > 255 && unicode_cache_->IsWhiteSpace(yych))
+ {%- elif r[1] == 'non_latin_1_letter' and encoding == 'utf16'-%}
{# FIXME: Add and use unicode_cache_->InNonAsciiLetter #}
- (!(yych >= 'a' && yych <= 'z') && !(yych >= 'A' && yych <= 'Z')
&& unicode_cache_->IsLetter(yych))
- {%- elif r[1] == 'identifier_part_not_letter' and encoding
== 'utf16'-%}
- unicode_cache_->IsIdentifierPartNotLetter(yych)
+ (yych > 255 && unicode_cache_->IsLetter(yych))
+ {%- elif r[1] == 'non_latin1_identifier_part_not_letter' and
encoding == 'utf16'-%}
+ (yych > 255 && unicode_cache_->IsIdentifierPartNotLetter(yych))
+ {%- elif r[1] == 'non_latin1_line_terminator' and encoding
== 'utf16'-%}
+ (yych > 255 && unicode_cache_->IsLineTerminator(yych))
{%- else -%}
- false /* {{r[1]}} */
+ false /* {{r[1]}} */
{%- endif -%}
{%- else -%}
false
=======================================
--- /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Wed Nov 20 16:10:09 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/transition_keys.py
Thu Nov 21 08:21:45 2013 UTC
@@ -40,19 +40,36 @@
'latin_1' : (1, 255),
# These are not real ranges; they just need to be separate from any
real
# ranges.
- 'whitespace' : (256, 256),
- 'letter' : (257, 257),
- 'identifier_part_not_letter' : (258, 258),
- 'eos' : (259, 259),
- 'zero' : (260, 260),
+ 'non_latin_1_whitespace' : (256, 256),
+ 'non_latin_1_letter' : (257, 257),
+ 'non_latin1_identifier_part_not_letter' : (258, 258),
+ 'non_latin1_line_terminator' : (259, 259),
+ 'eos' : (260, 260),
+ 'zero' : (261, 261),
}
- __lower_bound = 1
+ __lower_bound = min(__class_bounds.values(), key=lambda item: item[0])[0]
__upper_bound = max(__class_bounds.values(), key=lambda item: item[1])[1]
__cached_keys = {}
__unique_key_counter = -1
+ __predefined_ranges = {
+ 'whitespace' : [
+ (9, 9), (11, 12), (32, 32), (133, 133), (160, 160),
+ __class_bounds['non_latin_1_whitespace']],
+ 'letter' : [
+ (65, 90), (97, 122), (170, 170), (181, 181),
+ (186, 186), (192, 214), (216, 246), (248, 255),
+ __class_bounds['non_latin_1_letter']],
+ 'line_terminator' : [
+ (10, 10), (13, 13),
+ __class_bounds['non_latin1_line_terminator']],
+ 'identifier_part_not_letter' : [
+ (48, 57), (95, 95),
+ __class_bounds['non_latin1_identifier_part_not_letter']],
+ }
+
@staticmethod
def __in_latin_1(char):
bound = TransitionKey.__class_bounds['latin_1']
@@ -140,8 +157,16 @@
TransitionKey.__process_graph(x, ranges, key_map)
elif key == 'CHARACTER_CLASS':
class_name = graph[1]
- if class_name in TransitionKey.__class_bounds.keys():
+ if class_name in TransitionKey.__class_bounds:
+ if class_name in key_map:
+ assert (key_map[class_name] ==
+ TransitionKey([TransitionKey.__class_bounds[class_name]]))
ranges.append(TransitionKey.__class_bounds[class_name])
+ elif class_name in TransitionKey.__predefined_ranges:
+ if class_name in key_map:
+ assert (key_map[class_name] ==
+ TransitionKey(TransitionKey.__predefined_ranges[class_name]))
+ ranges += TransitionKey.__predefined_ranges[class_name]
elif class_name in key_map:
ranges += key_map[class_name].__ranges
else:
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.