Revision: 17431
Author: [email protected]
Date: Tue Oct 29 15:10:46 2013 UTC
Log: Experimental Parser: add lexer generator
[email protected]
BUG=
Review URL: https://codereview.chromium.org/50873003
http://code.google.com/p/v8/source/detail?r=17431
Added:
/branches/experimental/parser/tools/lexer_generator
/branches/experimental/parser/tools/lexer_generator/__init__.py
/branches/experimental/parser/tools/lexer_generator/dfa.py
/branches/experimental/parser/tools/lexer_generator/nfa.py
/branches/experimental/parser/tools/lexer_generator/regex_lexer.py
/branches/experimental/parser/tools/lexer_generator/regex_parser.py
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/__init__.py Tue Oct
29 15:10:46 2013 UTC
@@ -0,0 +1,28 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# do nothing for now
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/dfa.py Tue Oct 29
15:10:46 2013 UTC
@@ -0,0 +1,107 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from nfa import Nfa
+
+class DfaState:
+
+ def __init__(self, name, node_number):
+ self.__name = name
+ self.__node_number = node_number
+ self.__transitions = {}
+
+ def name(self):
+ return self.__name
+
+ def node_number(self):
+ return self.__node_number
+
+ def add_transition(self, key, state):
+ assert not self.__transitions.has_key(key)
+ self.__transitions[key] = state
+
+ def transitions(self):
+ return self.__transitions
+
+
+class Dfa:
+
+ def __init__(self, start_name, mapping, end_names):
+ name_map = {}
+ offset = 0
+ self.__terminal_set = set()
+ for name in mapping.keys():
+ dfa_state = DfaState(name, offset)
+ name_map[name] = dfa_state
+ offset = offset + 1
+ if name in end_names:
+ self.__terminal_set.add(dfa_state)
+ for name, values in mapping.items():
+ for key, value in values.items():
+ name_map[name].add_transition(key, name_map[value])
+ self.__start = name_map[start_name]
+ assert self.__terminal_set
+
+ @staticmethod
+ def __visit_edges(start, function, state):
+ edge = set([start])
+ visited = set()
+ while edge:
+ next_edge = set()
+ for node in edge:
+ next_edge = next_edge | set(node.transitions().values())
+ state = function(node, state)
+ visited = visited | edge
+ edge = next_edge - visited
+ return state
+
+ def to_dot(self):
+
+ def f(node, node_content):
+ for key, value in node.transitions().items():
+ node_content.append(
+ " S_%s -> S_%s [ label = \"%s\" ];" %
+ (node.node_number(), value.node_number(), key))
+ return node_content
+
+ node_content = self.__visit_edges(self.__start, f, [])
+ terminals = ["S_%d;" % x.node_number() for x in self.__terminal_set]
+ start_number = self.__start.node_number()
+ start_shape = "circle"
+ if self.__start in self.__terminal_set:
+ start_shape = "doublecircle"
+
+ return '''
+digraph finite_state_machine {
+ rankdir=LR;
+ node [shape = %s, style=filled, bgcolor=lightgrey]; S_%s
+ node [shape = doublecircle, style=unfilled]; %s
+ node [shape = circle];
+%s
+}
+ ''' % (start_shape,
start_number, " ".join(terminals), "\n".join(node_content))
+
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Oct 29
15:10:46 2013 UTC
@@ -0,0 +1,335 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from types import TupleType
+from inspect import getmembers
+
+
+class NfaState:
+
+ __epsilon_key = "epsilon"
+ __any_key = "any"
+
+ @staticmethod
+ def epsilon_key():
+ return NfaState.__epsilon_key
+
+ @staticmethod
+ def any_key():
+ return NfaState.__any_key
+
+ def __init__(self, node_number):
+ self.__transitions = {}
+ self.__unclosed = set()
+ self.__node_number = node_number
+ self.__epsilon_closure = None
+
+ def node_number(self):
+ return self.__node_number
+
+ def epsilon_closure(self):
+ return self.__epsilon_closure
+
+ def set_epsilon_closure(self, closure):
+ assert self.is_closed()
+ assert self.__epsilon_closure == None
+ self.__epsilon_closure = frozenset(closure)
+
+ def transitions(self):
+ assert self.is_closed()
+ return self.__transitions
+
+ def get_transition_set(self, key):
+ if not self.__transitions.has_key(key): return frozenset()
+ return frozenset(self.__transitions[key])
+
+ def __add_transition(self, key, value):
+ if value == None:
+ assert not self.is_closed(), "already closed"
+ self.__unclosed.add(key)
+ return
+ if not self.__transitions.has_key(key):
+ self.__transitions[key] = set()
+ self.__transitions[key].add(value)
+
+ def add_character_transition(self, char):
+ self.__add_transition(char, None)
+
+ def add_any_transition(self):
+ self.__add_transition(self.__any_key, None)
+
+ def add_class_transition(self, class_data, inverted):
+ self.__add_transition('class_data', None)
+
+ def add_epsilon_transition(self, state):
+ assert state != None
+ self.__add_transition(self.__epsilon_key, state)
+
+ def is_closed(self):
+ return self.__unclosed == None
+
+ def close(self, end):
+ assert not self.is_closed()
+ if end == None:
+ assert not self.__unclosed
+ else:
+ for key in self.__unclosed:
+ self.__add_transition(key, end)
+ if not self.__unclosed:
+ self.add_epsilon_transition(end)
+ self.__unclosed = None
+
+ def matches(self, char):
+ matches = self.get_transition_set(char)
+ matches = matches.union(self.get_transition_set(self.__any_key))
+ # TODO class matches
+ return matches
+
+ @staticmethod
+ def gather_transition_keys(state_set):
+ keys = set()
+ for state in state_set:
+ for key, values in state.transitions().items():
+ if key == NfaState.__epsilon_key: continue
+ keys.add(key)
+ return keys
+
+class NfaBuilder:
+
+ def __init__(self):
+ self.__operation_map = {}
+ self.__members = getmembers(self)
+
+ def __new_state(self):
+ node_number = self.node_number
+ self.node_number = self.node_number + 1
+ return NfaState(node_number)
+
+ def __or(self, graph):
+ start = self.__new_state()
+ ends = []
+ for x in [self.__process(graph[1]), self.__process(graph[2])]:
+ start.add_epsilon_transition(x[0])
+ ends += x[1]
+ start.close(None)
+ return (start, ends)
+
+ def __one_or_more(self, graph):
+ (start, ends) = self.__process(graph[1])
+ end = self.__new_state()
+ end.add_epsilon_transition(start)
+ self.__patch_ends(ends, end)
+ return (start, [end])
+
+ def __zero_or_more(self, graph):
+ (node, ends) = self.__process(graph[1])
+ start = self.__new_state()
+ start.add_epsilon_transition(node)
+ self.__patch_ends(ends, start)
+ return (start, [start])
+
+ def __zero_or_one(self, graph):
+ (node, ends) = self.__process(graph[1])
+ start = self.__new_state()
+ start.add_epsilon_transition(node)
+ return (start, ends + [start])
+
+ def __cat(self, graph):
+ (left, right) = (self.__process(graph[1]), self.__process(graph[2]))
+ self.__patch_ends(left[1], right[0])
+ return (left[0], right[1])
+
+ def __literal(self, graph):
+ contents = graph[1]
+ state = self.__new_state()
+ state.add_character_transition(contents)
+ return (state, [state])
+
+ def __class(self, graph):
+ state = self.__new_state()
+ state.add_class_transition(graph[1], False)
+ return (state, [state])
+
+ def __not_class(self, graph):
+ state = self.__new_state()
+ state.add_class_transition(graph[1], True)
+ return (state, [state])
+
+ def __any(self, graph):
+ state = self.__new_state()
+ state.add_any_transition()
+ return (state, [state])
+
+ def __process(self, graph):
+ assert type(graph) == TupleType
+ method = "_NfaBuilder__" + graph[0].lower()
+ if (not self.__operation_map.has_key(method)):
+ matches = [func for (name, func) in self.__members if name == method]
+ assert len(matches) == 1
+ self.__operation_map[method] = matches[0]
+ return self.__operation_map[method](graph)
+
+ def __patch_ends(self, ends, new_end):
+ for end in ends:
+ end.close(new_end)
+
+ def nfa(self, graph):
+ self.node_number = 0
+ (start, ends) = self.__process(graph)
+ end = self.__new_state()
+ self.__patch_ends(ends, end)
+ end.close(None)
+ return Nfa(start, end, self.node_number)
+
+
+class Nfa:
+
+ def __init__(self, start, end, nodes_created):
+ self.__start = start
+ self.__end = end
+ self.__epsilon_closure_computed = False
+ self.__verify(nodes_created)
+
+ @staticmethod
+ def __visit_edges(start, include_start, just_epsilon, function, state):
+
+ def compute_next_edge(node, next_edge):
+ if just_epsilon:
+ return next_edge.union(node.get_transition_set(node.epsilon_key()))
+ else:
+ for key, values in node.transitions().items():
+ next_edge = next_edge.union(values)
+ return next_edge
+
+ edge = set()
+ if include_start:
+ edge.add(start)
+ else:
+ edge = compute_next_edge(start, edge)
+
+ visited = set()
+ while edge:
+ next_edge = set()
+ for node in edge:
+ next_edge = compute_next_edge(node, next_edge)
+ state = function(node, state)
+ visited = visited.union(edge)
+ edge = next_edge.difference(visited)
+ return state
+
+ def __visit_all_edges(self, function, state):
+ return Nfa.__visit_edges(self.__start, True, False, function, state)
+
+ def __verify(self, nodes_created):
+ def f(node, node_list):
+ assert node.is_closed()
+ node_list.append(node)
+ return node_list
+ node_list = self.__visit_all_edges(f, [])
+ assert len(node_list) == nodes_created
+
+ def __compute_epsilon_closures(self):
+ if self.__epsilon_closure_computed:
+ return
+ self.__epsilon_closure_computed = True
+ def outer(node, state):
+ def inner(node, closure):
+ closure.add(node)
+ return closure
+ closure = Nfa.__visit_edges(node, False, True, inner, set())
+ node.set_epsilon_closure(closure)
+ self.__visit_all_edges(outer, None)
+
+ @staticmethod
+ def __close(states):
+ for node in states:
+ states = states.union(node.epsilon_closure())
+ return states
+
+ def matches(self, string):
+ self.__compute_epsilon_closures()
+ valid_states = Nfa.__close(set([self.__start]))
+ for c in string:
+ next_valid_states = set()
+ for valid_state in valid_states:
+ next_valid_states = next_valid_states.union(valid_state.matches(c))
+ valid_states = Nfa.__close(next_valid_states)
+ if not valid_states:
+ return False
+ return self.__end in valid_states
+
+ @staticmethod
+ def __to_dfa(nfa_state_set, dfa_builder):
+ nfa_state_set = Nfa.__close(nfa_state_set)
+ name = str([x.node_number() for x in nfa_state_set])
+ (dfa_nodes, end_nodes, end_node) = dfa_builder
+ if dfa_nodes.has_key(name):
+ return name
+ dfa_node = {}
+ dfa_nodes[name] = dfa_node
+ for key in NfaState.gather_transition_keys(nfa_state_set):
+ reachable_set = set()
+ for nfa_state in nfa_state_set:
+ reachable_set = reachable_set | nfa_state.matches(key)
+ dfa_node[key] = Nfa.__to_dfa(reachable_set, dfa_builder)
+ if end_node in nfa_state_set:
+ end_nodes.add(name)
+ return name
+
+ def compute_dfa(self):
+ self.__compute_epsilon_closures()
+ dfa_nodes = {}
+ end_nodes = set()
+ dfa_builder = (dfa_nodes, end_nodes, self.__end)
+ start_name = self.__to_dfa(set([self.__start]), dfa_builder)
+ return (start_name, dfa_nodes, end_nodes)
+
+ def to_dot(self):
+
+ def f(node, node_content):
+ for key, values in node.transitions().items():
+ if key == node.epsilon_key(): key = "ε"
+ if key != node.epsilon_key() and len(key) == 1:
+ key = "'%s'" % key
+ for value in values:
+ node_content.append(
+ " S_%d -> S_%d [ label = \"%s\" ];" %
+ (node.node_number(), value.node_number(), key))
+ return node_content
+
+ node_content = self.__visit_all_edges(f, [])
+
+ return '''
+digraph finite_state_machine {
+ rankdir=LR;
+ node [shape = circle, style=filled, bgcolor=lightgrey]; S_%s
+ node [shape = doublecircle, style=unfilled]; S_%s
+ node [shape = circle];
+%s
+}
+ ''' % (self.__start.node_number(),
self.__end.node_number(), "\n".join(node_content))
+
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Tue
Oct 29 15:10:46 2013 UTC
@@ -0,0 +1,102 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import ply.lex as lex
+
+class RegexLexer:
+
+ tokens = (
+
+ 'GROUP_BEGIN',
+ 'GROUP_END',
+
+ 'CLASS_BEGIN',
+ 'CLASS_END',
+
+ 'OR',
+ 'ONE_OR_MORE',
+ 'ZERO_OR_MORE',
+ 'ZERO_OR_ONE',
+ 'ANY',
+
+ 'LITERAL',
+
+ 'RANGE',
+ 'NOT',
+ 'CLASS_LITERAL',
+ )
+
+ states = (
+ ('class','exclusive'),
+ )
+
+ def t_ESCAPED_LITERAL(self, t):
+ r'\\\(|\\\)|\\\[|\\\]|\\\||\\\+|\\\*|\\\?|\\\.|\\\\'
+ t.type = 'LITERAL'
+ t.value = t.value[1:]
+ return t
+
+ t_GROUP_BEGIN = r'\('
+ t_GROUP_END = r'\)'
+
+ t_OR = r'\|'
+ t_ONE_OR_MORE = r'\+'
+ t_ZERO_OR_MORE = r'\*'
+ t_ZERO_OR_ONE = r'\?'
+
+ t_ANY = r'\.'
+
+ t_LITERAL = r'.'
+
+ def t_CLASS_BEGIN(self, t):
+ r'\['
+ self.lexer.push_state('class')
+ return t
+
+ def t_class_CLASS_END(self, t):
+ r'\]'
+ self.lexer.pop_state()
+ return t
+
+ t_class_RANGE = '-'
+ t_class_NOT = '\^'
+
+ def t_class_ESCAPED_CLASS_LITERAL(self, t):
+ r'\\\^|\\-'
+ t.type = 'CLASS_LITERAL'
+ t.value = t.value[1:]
+ return t
+
+ t_class_CLASS_LITERAL = r'[a-zA-Z]' # fix this
+
+ t_ANY_ignore = '\n'
+
+ def t_ANY_error(self, t):
+ raise Exception("Illegal character '%s'" % t.value[0])
+
+ def build(self, **kwargs):
+ self.lexer = lex.lex(module=self, **kwargs)
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Tue
Oct 29 15:10:46 2013 UTC
@@ -0,0 +1,134 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import ply.yacc as yacc
+from regex_lexer import RegexLexer
+from types import ListType, TupleType
+
+class RegexParser:
+
+ tokens = RegexLexer.tokens
+
+ token_map = {
+ '+': 'ONE_OR_MORE',
+ '?': 'ZERO_OR_ONE',
+ '*': 'ZERO_OR_MORE',
+ '|': 'OR',
+ '.': 'ANY',
+ }
+
+ def p_start(self, p):
+ '''start : fragments OR fragments
+ | fragments'''
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ p[0] = (self.token_map[p[2]], p[1], p[3])
+
+ def p_fragments(self, p):
+ '''fragments : fragment
+ | fragment fragments'''
+ if len(p) == 2:
+ p[0] = p[1]
+ else:
+ p[0] = self.__cat(p[1], p[2])
+
+ def p_fragment(self, p):
+ '''fragment : literal maybe_modifier
+ | class maybe_modifier
+ | group maybe_modifier
+ | any maybe_modifier
+ '''
+ if p[2] != None:
+ p[0] = (p[2], p[1])
+ else:
+ p[0] = p[1]
+
+ def p_maybe_modifier(self, p):
+ '''maybe_modifier : ONE_OR_MORE
+ | ZERO_OR_ONE
+ | ZERO_OR_MORE
+ | empty'''
+ p[0] = p[1]
+ if p[1] != None:
+ p[0] = self.token_map[p[1]]
+
+ def p_literal(self, p):
+ '''literal : LITERAL'''
+ p[0] = ('LITERAL', p[1])
+
+ def p_any(self, p):
+ '''any : ANY'''
+ p[0] = (self.token_map[p[1]],)
+
+ def p_class(self, p):
+ '''class : CLASS_BEGIN class_content CLASS_END
+ | CLASS_BEGIN NOT class_content CLASS_END'''
+ if len(p) == 4:
+ p[0] = ("CLASS", p[2])
+ else:
+ p[0] = ("NOT_CLASS", p[3])
+
+ def p_group(self, p):
+ '''group : GROUP_BEGIN start GROUP_END'''
+ p[0] = p[2]
+
+ def p_class_content(self, p):
+ '''class_content : CLASS_LITERAL RANGE CLASS_LITERAL
maybe_class_content
+ | CLASS_LITERAL maybe_class_content
+ '''
+ if len(p) == 5:
+ left = ("RANGE", p[1], p[3])
+ else:
+ left = ('LITERAL', p[1])
+ p[0] = self.__cat(left, p[len(p)-1])
+
+ def p_maybe_class_content(self, p):
+ '''maybe_class_content : class_content
+ | empty'''
+ p[0] = p[1]
+
+ def p_empty(self, p):
+ 'empty :'
+
+ def p_error(self, p):
+ raise Exception("Syntax error in input '%s'" % p)
+
+ @staticmethod
+ def __cat(left, right):
+ if right == None:
+ return left
+ return ('CAT', left, right)
+
+ def build(self, **kwargs):
+ self.parser = yacc.yacc(module=self, **kwargs)
+ self.lexer = RegexLexer()
+ self.lexer.build(**kwargs)
+
+ def parse(self, data):
+ return self.parser.parse(data, lexer=self.lexer.lexer)
+
--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
---
You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.