Revision: 19028
Author: [email protected]
Date: Mon Feb 3 14:12:24 2014 UTC
Log: Experimental parser: some tuple removal
[email protected]
BUG=
Review URL: https://codereview.chromium.org/152513004
http://code.google.com/p/v8/source/detail?r=19028
Added:
/branches/experimental/parser/tools/lexer_generator/term_test.py
Modified:
/branches/experimental/parser/tools/lexer_generator/automaton.py
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
/branches/experimental/parser/tools/lexer_generator/code_generator.py
/branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py
/branches/experimental/parser/tools/lexer_generator/lexer_test.py
/branches/experimental/parser/tools/lexer_generator/rule_parser.py
/branches/experimental/parser/tools/lexer_generator/test_suite.py
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/term_test.py Mon
Feb 3 14:12:24 2014 UTC
@@ -0,0 +1,51 @@
+# Copyright 2014 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import unittest
+from automaton import Term
+
+class TermTestCase(unittest.TestCase):
+
+ def test_basic(self):
+
+ t = Term('a')
+ self.assertEqual('a', t.name())
+ self.assertEqual((), t.args())
+ self.assertEqual("(a)", str(t))
+ self.assertEqual(Term('a'), t)
+
+ t = Term('a', 'b', 'c')
+ self.assertEqual('a', t.name())
+ self.assertEqual(('b', 'c'), t.args())
+ self.assertEqual("(a,b,c)", str(t))
+ self.assertEqual(Term('a', 'b', 'c'), t)
+
+ t = Term('a', Term('b', 'c'))
+ self.assertEqual('a', t.name())
+ self.assertEqual((Term('b', 'c'), ), t.args())
+ self.assertEqual("(a,(b,c))", str(t))
+ self.assertEqual(Term('a', Term('b', 'c')), t)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/automaton.py Tue
Jan 21 15:59:38 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/automaton.py Mon
Feb 3 14:12:24 2014 UTC
@@ -25,10 +25,48 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-from types import TupleType, ListType
+from types import TupleType, ListType, StringType
from itertools import chain
from transition_keys import TransitionKey
+class Term(object):
+ '''A class representing a function and its arguments.
+ f(a,b,c) would be represented as ('f', a, b, c) where
+ a, b, and c are strings or Terms.'''
+
+ @staticmethod
+ def __verify_string(v):
+ assert (not ',' in v) and (not '(' in v)
+
+ def __init__(self, name, *args):
+ assert type(name) == StringType
+ self.__verify_string(name)
+ for v in args:
+ if type(v) == StringType:
+ self.__verify_string(v)
+ else:
+ assert isinstance(v, self.__class__)
+ self.__tuple = tuple([name] + list(args))
+ self.__str = None
+
+ def name(self):
+ return self.__tuple[0]
+
+ def args(self):
+ return self.__tuple[1:]
+
+ def __hash__(self):
+ return hash(self.__tuple)
+
+ def __eq__(self, other):
+ return (isinstance(other, self.__class__) and
+ self.__tuple == other.__tuple)
+
+ def __str__(self):
+ if self.__str == None:
+ self.__str = '(%s)' % ','.join(map(str, self.__tuple))
+ return self.__str
+
class Action(object):
@staticmethod
@@ -50,8 +88,7 @@
for action in [entry_action, match_action]:
if action == None:
continue
- assert type(action) == TupleType and len(action)
- assert action[0] != None
+ assert isinstance(action, Term)
assert entry_action or match_action
self.__entry_action = entry_action
self.__match_action = match_action
@@ -79,9 +116,7 @@
for action in [self.__entry_action, self.__match_action]:
part = ""
if action:
- part += action[0]
- if action[1]:
- part += "(%s)" % str(action[1])
+ part += str(action)
parts.append(part)
return "action< %s >" % " | ".join(parts)
=======================================
---
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
Wed Jan 29 13:19:19 2014 UTC
+++
/branches/experimental/parser/tools/lexer_generator/code_generator.jinja
Mon Feb 3 14:12:24 2014 UTC
@@ -65,11 +65,11 @@
{#- entry actions must not change the value of the cursor
unless they jump -#}
-{% macro dispatch_entry_action(type, value) -%}
+{% macro dispatch_entry_action(type, args) -%}
{% if type == 'store_token' %}
- stored_token = Token::{{value}};
+ stored_token = Token::{{args[0]}};
{% elif type == 'set_marker' %}
- marker = cursor_ - {{value}};
+ marker = cursor_ - {{args[0]}};
{% elif type == 'set_has_escapes' %}
next_.has_escapes = true;
{% elif type == 'octal_inside_string' %}
@@ -85,16 +85,16 @@
next_.has_escapes = true;
{% elif type == 'if_line_terminator_backtrack' %}
if (!has_line_terminator_before_next_) {
- {{dispatch_match_action('backtrack', value)}}
+ {{dispatch_match_action('backtrack', args)}}
}
{% else %}
- uncompilable code for {{type}} {{value}}
+ uncompilable code for {{type}} {{args}}
{% endif -%}
{%- endmacro -%}
{#- match actions must all explicitly jump or return -#}
-{% macro dispatch_match_action(type, value) -%}
+{% macro dispatch_match_action(type, args) -%}
{% if type == 'terminate' %}
{{dispatch_match_action('backtrack', ('1', 'EOS'))}}
{% elif type == 'terminate_illegal' %}
@@ -118,45 +118,45 @@
has_line_terminator_before_next_ = true;
goto state_entry_0;
{% elif type == 'token' %}
- DO_TOKEN(Token::{{value}})
+ DO_TOKEN(Token::{{args[0]}})
return;
{% elif type == 'goto_start' %}
- {{ jump(value[0]) }}
+ {{ jump(args[0]|int) }}
{% elif type == 'store_token_and_goto' %}
- stored_token = Token::{{value[0]}};
- {{ jump(value[1]) }}
+ stored_token = Token::{{args[0]}};
+ {{ jump(args[1]|int) }}
{% elif type == 'do_stored_token' %}
DO_TOKEN(stored_token)
return;
{% elif type == 'do_token_and_go_forward' %}
- DO_TOKEN(Token::{{value}});
+ DO_TOKEN(Token::{{args[0]}});
FORWARD();
RESET_START();
return;
{% elif type == 'harmony_token' %}
- if (harmony_{{value[0]}}_) {
- DO_TOKEN(Token::{{value[1]}});
+ if (harmony_{{args[0]}}_) {
+ DO_TOKEN(Token::{{args[1]}});
} else {
- DO_TOKEN(Token::{{value[2]}});
+ DO_TOKEN(Token::{{args[2]}});
}
return;
{% elif type == 'store_harmony_token_and_goto' %}
- if (harmony_{{value[0]}}_) {
- stored_token = Token::{{value[1]}};
+ if (harmony_{{args[0]}}_) {
+ stored_token = Token::{{args[1]}};
} else {
- stored_token = Token::{{value[2]}};
+ stored_token = Token::{{args[2]}};
}
- {{ jump(value[3]) }}
+ {{ jump(args[3]|int) }}
{% elif type == 'octal_number' %}
last_octal_end_ = cursor_;
DO_TOKEN(Token::NUMBER);
return;
{% elif type == 'backtrack' %}
- BACKWARD({{value[0]}});
- DO_TOKEN(Token::{{value[1]}});
+ BACKWARD({{args[0]}});
+ DO_TOKEN(Token::{{args[1]}});
return;
{% else %}
- uncompilable code for {{type}} {{value}}
+ uncompilable code for {{type}} {{args}}
{% endif -%}
{%- endmacro -%}
@@ -219,7 +219,7 @@
{%- set entry_action = state.entry_action -%}
{%- if entry_action %}
- {{ dispatch_entry_action(entry_action[0], entry_action[1]) }}
+ {{ dispatch_entry_action(entry_action.name(), entry_action.args()) }}
{%- endif %}
{{ write_label('after_entry_code', node_number) }}
@@ -279,7 +279,7 @@
{%- set match_action = state.match_action -%}
{%- if match_action %}
- {{ dispatch_match_action(match_action[0], match_action[1]) }}
+ {{ dispatch_match_action(match_action.name(), match_action.args()) }}
CRASH();
{% else %}
goto default_action;
@@ -343,7 +343,7 @@
{%- if debug_print %}
fprintf(stderr, "default action\n");
{% endif -%}
- {{dispatch_match_action(default_action[0], default_action[1])}}
+ {{dispatch_match_action(default_action.name(), default_action.args())}}
CRASH();
if (false) {
=======================================
--- /branches/experimental/parser/tools/lexer_generator/code_generator.py
Wed Jan 29 13:19:19 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/code_generator.py
Mon Feb 3 14:12:24 2014 UTC
@@ -30,6 +30,7 @@
import jinja2
from copy import deepcopy
from dfa import Dfa
+from automaton import Term
from transition_keys import TransitionKey
class CodeGenerator:
@@ -297,9 +298,9 @@
states = self.__dfa_states
for state in states:
if (state['match_action'] and
- state['match_action'][0] == 'do_stored_token'):
- assert not state['match_action'][1] in goto_map
- goto_map[state['match_action'][1]] = state['node_number']
+ state['match_action'].name() == 'do_stored_token'):
+ assert not state['match_action'].args()[0] in goto_map
+ goto_map[state['match_action'].args()[0]] = state['node_number']
mapped_actions = set([
'store_harmony_token_and_goto',
'store_token_and_goto',
@@ -307,9 +308,9 @@
for state in states:
if not state['match_action']:
continue
- action = state['match_action'][0]
+ action = state['match_action'].name()
if action in mapped_actions:
- value = state['match_action'][1]
+ value = state['match_action'].args()
target_id = goto_map[value[-1]]
states[target_id]['must_not_inline'] = True
if action != 'goto_start':
@@ -319,7 +320,8 @@
states[target_id]['can_elide_read'] = False
label = 'state_entry'
jump_label = self.__register_jump(target_id, label)
- state['match_action'] = (action, tuple(list(value[:-1]) +
[jump_label]))
+ new_args = list(value[:-1]) + [str(jump_label)]
+ state['match_action'] = Term(action, *new_args)
def __inlined_state(self, target_id):
state = deepcopy(self.__dfa_states[target_id])
=======================================
--- /branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py
Wed Jan 22 13:29:05 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/dfa_optimizer.py
Mon Feb 3 14:12:24 2014 UTC
@@ -26,7 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
from transition_keys import TransitionKey
-from automaton import Action
+from automaton import Term, Action
from dfa import Dfa
# --- Optimization: Replacing tokens with gotos ---
@@ -175,8 +175,8 @@
action = state.action()
if not action or not action.match_action():
return False
- if (action.match_action()[0] == 'token' or
- action.match_action()[0] == 'harmony_token'):
+ if (action.match_action().name() == 'token' or
+ action.match_action().name() == 'harmony_token'):
return True
return False
@@ -225,22 +225,19 @@
def replacement_action(old_action, transition_state):
assert old_action.match_action()
state_id = name(transition_state)
- if old_action.match_action()[0] == 'token':
- old_token = old_action.match_action()[1]
- if (transition_state.action().match_action()[0] == 'token' and
- transition_state.action().match_action()[1] == old_token):
+ if old_action.match_action().name() == 'token':
+ old_token = old_action.match_action().args()[0]
+ if (transition_state.action().match_action().name() == 'token' and
+ transition_state.action().match_action().args()[0] ==
old_token):
# no need to store token
- match_action = ('goto_start', (state_id,))
+ match_action = Term('goto_start', state_id)
counters['goto_start'] += 1
else:
counters['store_token_and_goto'] += 1
- match_action = ('store_token_and_goto', (old_token, state_id))
- elif old_action.match_action()[0] == 'harmony_token':
- match_action = ('store_harmony_token_and_goto',
- (old_action.match_action()[1][0],
- old_action.match_action()[1][1],
- old_action.match_action()[1][2],
- state_id))
+ match_action = Term('store_token_and_goto', old_token, state_id)
+ elif old_action.match_action().name() == 'harmony_token':
+ new_args = list(old_action.match_action().args()) + [state_id]
+ match_action = Term('store_harmony_token_and_goto', *new_args)
counters['store_harmony_token_and_goto'] += 1
else:
raise Exception(old_action.match_action())
@@ -278,9 +275,9 @@
for state_id in store_states:
old_action = states[state_id]['action']
assert not old_action.entry_action()
- assert old_action.match_action()[0] == 'token', 'unimplemented'
- entry_action = ('store_token', old_action.match_action()[1])
- match_action = ('do_stored_token', state_id)
+ assert old_action.match_action().name() == 'token', 'unimplemented'
+ entry_action = Term('store_token',
old_action.match_action().args()[0])
+ match_action = Term('do_stored_token', state_id)
precedence = old_action.precedence()
states[state_id]['action'] = Action(
entry_action, match_action, precedence)
=======================================
--- /branches/experimental/parser/tools/lexer_generator/lexer_test.py Thu
Jan 23 11:39:04 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/lexer_test.py Mon
Feb 3 14:12:24 2014 UTC
@@ -26,13 +26,13 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import unittest
-from automaton import Action
+from automaton import Term, Action
from rule_parser import RuleProcessor
class LexerTestCase(unittest.TestCase):
def __verify_action_stream(self, rules, string, expected):
- expected = map(lambda (action, s) : (Action(None, (action, None)), s),
+ expected = map(lambda (action, s) : (Action(None, Term(action)), s),
expected)
rule_processor = RuleProcessor.parse(rules, 'latin1')
automata = rule_processor.default_automata()
@@ -44,7 +44,7 @@
@staticmethod
def __terminate():
- return (Action(None, ('terminate', None)), '\0')
+ return ('terminate', '\0')
def test_simple(self):
rules = '''
=======================================
--- /branches/experimental/parser/tools/lexer_generator/rule_parser.py Thu
Jan 23 11:39:04 2014 UTC
+++ /branches/experimental/parser/tools/lexer_generator/rule_parser.py Mon
Feb 3 14:12:24 2014 UTC
@@ -26,7 +26,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import ply.yacc as yacc
-from automaton import Action
+from automaton import Term, Action
from rule_lexer import RuleLexer
from regex_parser import RegexParser
from nfa_builder import NfaBuilder
@@ -148,12 +148,9 @@
| IDENTIFIER LEFT_PARENTHESIS RIGHT_PARENTHESIS
| IDENTIFIER LEFT_PARENTHESIS action_params
RIGHT_PARENTHESIS'''
if len(p) == 2 or len(p) == 4:
- p[0] = (p[1], None)
+ p[0] = Term(p[1])
elif len(p) == 5:
- if len(p[3]) == 1:
- p[0] = (p[1], p[3][0])
- else:
- p[0] = (p[1], p[3])
+ p[0] = Term(p[1], *p[3])
else:
raise Exception()
=======================================
--- /branches/experimental/parser/tools/lexer_generator/test_suite.py Wed
Nov 20 11:29:01 2013 UTC
+++ /branches/experimental/parser/tools/lexer_generator/test_suite.py Mon
Feb 3 14:12:24 2014 UTC
@@ -27,6 +27,7 @@
from unittest import TestLoader, TextTestRunner, TestSuite
+from term_test import *
from automata_test import *
from code_generator_test import *
from lexer_test import *
@@ -36,6 +37,7 @@
if __name__ == "__main__":
loader = TestLoader()
suite = TestSuite((
+ loader.loadTestsFromTestCase(TermTestCase),
loader.loadTestsFromTestCase(TransitionKeyTestCase),
loader.loadTestsFromTestCase(AutomataTestCase),
loader.loadTestsFromTestCase(RuleParserTestCase),
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.