Reviewers: marja,
Message:
Committed patchset #1 manually as r19295 (presubmit successful).
Description:
Experimental parser: store literals as ints
[email protected]
BUG=
Committed: https://code.google.com/p/v8/source/detail?r=19295
Please review this at https://codereview.chromium.org/157813004/
SVN Base: https://v8.googlecode.com/svn/branches/experimental/parser
Affected files (+23, -26 lines):
M tools/lexer_generator/automata_test.py
M tools/lexer_generator/nfa_builder.py
M tools/lexer_generator/regex_parser.py
M tools/lexer_generator/transition_key_test.py
M tools/lexer_generator/transition_keys.py
Index: tools/lexer_generator/automata_test.py
diff --git a/tools/lexer_generator/automata_test.py
b/tools/lexer_generator/automata_test.py
index
158a602543206cc3665370340c63e34a52644b44..63142f21fbf9ec19503b9d4856481d3e3f60bdf0
100644
--- a/tools/lexer_generator/automata_test.py
+++ b/tools/lexer_generator/automata_test.py
@@ -91,9 +91,9 @@ class AutomataTestCase(unittest.TestCase):
'terminal' : False,
'action' : Action.empty_action() }
mapping = { k : empty_node() for k in ['S_0', 'S_1', 'S_2', 'S_3'] }
- key_a = TransitionKey.single_char(encoding, 'a')
- key_b = TransitionKey.single_char(encoding, 'b')
- key_c = TransitionKey.single_char(encoding, 'c')
+ key_a = TransitionKey.single_char(encoding, ord('a'))
+ key_b = TransitionKey.single_char(encoding, ord('b'))
+ key_c = TransitionKey.single_char(encoding, ord('c'))
mapping['S_0']['transitions'][key_a] = 'S_1'
mapping['S_0']['transitions'][key_b] = 'S_2'
Index: tools/lexer_generator/nfa_builder.py
diff --git a/tools/lexer_generator/nfa_builder.py
b/tools/lexer_generator/nfa_builder.py
index
d2a2d566c7b82921920a80ba32bee198a23de10d..ea30c2ce0e605a6bd0c92073691e24df8cd6c2e5
100644
--- a/tools/lexer_generator/nfa_builder.py
+++ b/tools/lexer_generator/nfa_builder.py
@@ -128,7 +128,7 @@ class NfaBuilder(object):
state.add_unclosed_transition(key)
return (state, [state])
- def __literal(self, chars):
+ def __literal(self, *chars):
terms = map(lambda c : Term('SINGLE_CHAR', c), chars)
return self.__process(self.cat_terms(terms))
@@ -320,24 +320,21 @@ class NfaBuilder(object):
@staticmethod
def __flatten_literals(terms):
- literal = None
+ acc = ()
for term in terms:
assert isinstance(term, Term)
if not term:
continue
if term.name() == 'LITERAL':
- if literal:
- literal += term.args()[0]
- else:
- literal = term.args()[0]
+ acc += term.args()
else:
- if literal:
- yield Term('LITERAL', literal)
- literal = None
+ if acc:
+ yield Term('LITERAL', *acc)
+ acc = ()
if term:
yield term
- if literal:
- yield Term('LITERAL', literal)
+ if acc:
+ yield Term('LITERAL', *acc)
@staticmethod
def or_terms(terms):
Index: tools/lexer_generator/regex_parser.py
diff --git a/tools/lexer_generator/regex_parser.py
b/tools/lexer_generator/regex_parser.py
index
d74a3e8d9b76b941ce8e9376f9d1b720d29e76d0..9c86a0b927f7742982d0f213d13f1ce37ebc9975
100644
--- a/tools/lexer_generator/regex_parser.py
+++ b/tools/lexer_generator/regex_parser.py
@@ -238,7 +238,7 @@ class RegexParser:
def p_literal(self, p):
'''literal : LITERAL'''
- p[0] = Term('LITERAL', p[1])
+ p[0] = Term('LITERAL', ord(p[1]))
def p_any(self, p):
'''any : ANY'''
@@ -262,10 +262,10 @@ class RegexParser:
| CHARACTER_CLASS maybe_class_content
'''
if len(p) == 5:
- left = Term("RANGE", p[1], p[3])
+ left = Term("RANGE", ord(p[1]), ord(p[3]))
else:
if len(p[1]) == 1:
- left = Term('LITERAL', p[1])
+ left = Term('LITERAL', ord(p[1]))
else:
left = Term('CHARACTER_CLASS', p[1][1:-1])
p[0] = self.__cat(left, p[len(p)-1])
Index: tools/lexer_generator/transition_key_test.py
diff --git a/tools/lexer_generator/transition_key_test.py
b/tools/lexer_generator/transition_key_test.py
index
0b627c9306b3ccbb65618db1c75ee8cdddba8f47..d4e348ad26642ffe97544b435bc15a24e4556b36
100644
--- a/tools/lexer_generator/transition_key_test.py
+++ b/tools/lexer_generator/transition_key_test.py
@@ -36,8 +36,8 @@ class TransitionKeyTestCase(unittest.TestCase):
__equal_pairs = [
(TransitionKey.epsilon(), TransitionKey.epsilon()),
(TransitionKey.any(__encoding), TransitionKey.any(__encoding)),
- (TransitionKey.single_char(__encoding, 'a'),
- TransitionKey.single_char(__encoding, 'a')),
+ (TransitionKey.single_char(__encoding, ord('a')),
+ TransitionKey.single_char(__encoding, ord('a'))),
]
def test_eq(self):
Index: tools/lexer_generator/transition_keys.py
diff --git a/tools/lexer_generator/transition_keys.py
b/tools/lexer_generator/transition_keys.py
index
a7ef16ee54f8a3aa56627de99fd53a56ad30f8db..14dfe770bf60c4ea83b3647b3ac07084be9c2f02
100644
--- a/tools/lexer_generator/transition_keys.py
+++ b/tools/lexer_generator/transition_keys.py
@@ -25,6 +25,7 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+from types import IntType
from itertools import chain
from action import Term
from string import printable
@@ -144,8 +145,7 @@ class TransitionKey(object):
@staticmethod
def omega():
'''Always matches.'''
- return TransitionKey.__cached_key(None, 'omega',
- lambda : Term("OMEGA_KEY"))
+ return TransitionKey.__cached_key(None, 'omega', lambda :
Term("OMEGA_KEY"))
@staticmethod
def any(encoding):
@@ -154,13 +154,14 @@ class TransitionKey(object):
lambda : encoding.all_components_iter())
@staticmethod
- def single_char(encoding, char): # TODO(dcarney): char should be int
+ def single_char(encoding, char):
'''Returns a TransitionKey for a single-character transition.'''
- return TransitionKey(encoding, Term("NUMERIC_RANGE_KEY", ord(char),
ord(char)))
+ return TransitionKey.range(encoding, char, char)
@staticmethod
def range(encoding, a, b):
'''Returns a TransitionKey for a single-character transition.'''
+ assert type(a) == IntType and type(b) == IntType
return TransitionKey(encoding, Term("NUMERIC_RANGE_KEY", a, b))
@staticmethod
@@ -176,10 +177,9 @@ class TransitionKey(object):
key = term.name()
args = term.args()
if key == 'RANGE':
- components.append(Term('NUMERIC_RANGE_KEY', ord(args[0]),
ord(args[1])))
+ components.append(Term('NUMERIC_RANGE_KEY', args[0], args[1]))
elif key == 'LITERAL':
- for char in args[0]: # TODO(dcarney): don't use strings for literals
- components.append(Term('NUMERIC_RANGE_KEY', ord(char), ord(char)))
+ components += map(lambda x : Term('NUMERIC_RANGE_KEY', x, x), args)
elif key == 'CAT':
for x in args:
TransitionKey.__process_term(encoding, x, components, key_map)
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.