Revision: 17431
Author:   [email protected]
Date:     Tue Oct 29 15:10:46 2013 UTC
Log:      Experimental Parser: add lexer generator

[email protected]
BUG=

Review URL: https://codereview.chromium.org/50873003
http://code.google.com/p/v8/source/detail?r=17431

Added:
 /branches/experimental/parser/tools/lexer_generator
 /branches/experimental/parser/tools/lexer_generator/__init__.py
 /branches/experimental/parser/tools/lexer_generator/dfa.py
 /branches/experimental/parser/tools/lexer_generator/nfa.py
 /branches/experimental/parser/tools/lexer_generator/regex_lexer.py
 /branches/experimental/parser/tools/lexer_generator/regex_parser.py

=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/__init__.py Tue Oct 29 15:10:46 2013 UTC
@@ -0,0 +1,28 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# do nothing for now
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/dfa.py Tue Oct 29 15:10:46 2013 UTC
@@ -0,0 +1,107 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from nfa import Nfa
+
+class DfaState:
+
+  def __init__(self, name, node_number):
+    self.__name = name
+    self.__node_number = node_number
+    self.__transitions = {}
+
+  def name(self):
+    return self.__name
+
+  def node_number(self):
+    return self.__node_number
+
+  def add_transition(self, key, state):
+    assert not self.__transitions.has_key(key)
+    self.__transitions[key] = state
+
+  def transitions(self):
+    return self.__transitions
+
+
+class Dfa:
+
+  def __init__(self, start_name, mapping, end_names):
+    name_map = {}
+    offset = 0
+    self.__terminal_set = set()
+    for name in mapping.keys():
+      dfa_state = DfaState(name, offset)
+      name_map[name] = dfa_state
+      offset = offset + 1
+      if name in end_names:
+        self.__terminal_set.add(dfa_state)
+    for name, values in mapping.items():
+      for key, value in values.items():
+        name_map[name].add_transition(key, name_map[value])
+    self.__start = name_map[start_name]
+    assert self.__terminal_set
+
+  @staticmethod
+  def __visit_edges(start, function, state):
+    edge = set([start])
+    visited = set()
+    while edge:
+      next_edge = set()
+      for node in edge:
+        next_edge = next_edge | set(node.transitions().values())
+        state = function(node, state)
+      visited = visited | edge
+      edge = next_edge - visited
+    return state
+
+  def to_dot(self):
+
+    def f(node, node_content):
+      for key, value in node.transitions().items():
+        node_content.append(
+          "  S_%s -> S_%s [ label = \"%s\" ];" %
+            (node.node_number(), value.node_number(), key))
+      return node_content
+
+    node_content = self.__visit_edges(self.__start, f, [])
+    terminals = ["S_%d;" % x.node_number() for x in self.__terminal_set]
+    start_number = self.__start.node_number()
+    start_shape = "circle"
+    if self.__start in self.__terminal_set:
+      start_shape = "doublecircle"
+
+    return '''
+digraph finite_state_machine {
+  rankdir=LR;
+  node [shape = %s, style=filled, bgcolor=lightgrey]; S_%s
+  node [shape = doublecircle, style=unfilled]; %s
+  node [shape = circle];
+%s
+}
+ ''' % (start_shape, start_number, " ".join(terminals), "\n".join(node_content))
+
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/nfa.py Tue Oct 29 15:10:46 2013 UTC
@@ -0,0 +1,335 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from types import TupleType
+from inspect import getmembers
+
+
+class NfaState:
+
+  __epsilon_key = "epsilon"
+  __any_key = "any"
+
+  @staticmethod
+  def epsilon_key():
+    return NfaState.__epsilon_key
+
+  @staticmethod
+  def any_key():
+    return NfaState.__any_key
+
+  def __init__(self, node_number):
+    self.__transitions = {}
+    self.__unclosed = set()
+    self.__node_number = node_number
+    self.__epsilon_closure = None
+
+  def node_number(self):
+    return self.__node_number
+
+  def epsilon_closure(self):
+    return self.__epsilon_closure
+
+  def set_epsilon_closure(self, closure):
+    assert self.is_closed()
+    assert self.__epsilon_closure == None
+    self.__epsilon_closure = frozenset(closure)
+
+  def transitions(self):
+    assert self.is_closed()
+    return self.__transitions
+
+  def get_transition_set(self, key):
+    if not self.__transitions.has_key(key): return frozenset()
+    return frozenset(self.__transitions[key])
+
+  def __add_transition(self, key, value):
+    if value == None:
+      assert not self.is_closed(), "already closed"
+      self.__unclosed.add(key)
+      return
+    if not self.__transitions.has_key(key):
+      self.__transitions[key] = set()
+    self.__transitions[key].add(value)
+
+  def add_character_transition(self, char):
+    self.__add_transition(char, None)
+
+  def add_any_transition(self):
+    self.__add_transition(self.__any_key, None)
+
+  def add_class_transition(self, class_data, inverted):
+    self.__add_transition('class_data', None)
+
+  def add_epsilon_transition(self, state):
+    assert state != None
+    self.__add_transition(self.__epsilon_key, state)
+
+  def is_closed(self):
+    return self.__unclosed == None
+
+  def close(self, end):
+    assert not self.is_closed()
+    if end == None:
+      assert not self.__unclosed
+    else:
+      for key in self.__unclosed:
+        self.__add_transition(key, end)
+      if not self.__unclosed:
+        self.add_epsilon_transition(end)
+    self.__unclosed = None
+
+  def matches(self, char):
+    matches = self.get_transition_set(char)
+    matches = matches.union(self.get_transition_set(self.__any_key))
+    # TODO class matches
+    return matches
+
+  @staticmethod
+  def gather_transition_keys(state_set):
+    keys = set()
+    for state in state_set:
+      for key, values in state.transitions().items():
+        if key == NfaState.__epsilon_key: continue
+        keys.add(key)
+    return keys
+
+class NfaBuilder:
+
+  def __init__(self):
+    self.__operation_map = {}
+    self.__members = getmembers(self)
+
+  def __new_state(self):
+    node_number = self.node_number
+    self.node_number = self.node_number + 1
+    return NfaState(node_number)
+
+  def __or(self, graph):
+    start = self.__new_state()
+    ends = []
+    for x in [self.__process(graph[1]), self.__process(graph[2])]:
+      start.add_epsilon_transition(x[0])
+      ends += x[1]
+    start.close(None)
+    return (start, ends)
+
+  def __one_or_more(self, graph):
+    (start, ends) = self.__process(graph[1])
+    end =  self.__new_state()
+    end.add_epsilon_transition(start)
+    self.__patch_ends(ends, end)
+    return (start, [end])
+
+  def __zero_or_more(self, graph):
+    (node, ends) = self.__process(graph[1])
+    start =  self.__new_state()
+    start.add_epsilon_transition(node)
+    self.__patch_ends(ends, start)
+    return (start, [start])
+
+  def __zero_or_one(self, graph):
+    (node, ends) = self.__process(graph[1])
+    start =  self.__new_state()
+    start.add_epsilon_transition(node)
+    return (start, ends + [start])
+
+  def __cat(self, graph):
+    (left, right) = (self.__process(graph[1]), self.__process(graph[2]))
+    self.__patch_ends(left[1], right[0])
+    return (left[0], right[1])
+
+  def __literal(self, graph):
+    contents = graph[1]
+    state =  self.__new_state()
+    state.add_character_transition(contents)
+    return (state, [state])
+
+  def __class(self, graph):
+    state =  self.__new_state()
+    state.add_class_transition(graph[1], False)
+    return (state, [state])
+
+  def __not_class(self, graph):
+    state =  self.__new_state()
+    state.add_class_transition(graph[1], True)
+    return (state, [state])
+
+  def __any(self, graph):
+    state =  self.__new_state()
+    state.add_any_transition()
+    return (state, [state])
+
+  def __process(self, graph):
+    assert type(graph) == TupleType
+    method = "_NfaBuilder__" + graph[0].lower()
+    if (not self.__operation_map.has_key(method)):
+      matches = [func for (name, func) in self.__members if name == method]
+      assert len(matches) == 1
+      self.__operation_map[method] = matches[0]
+    return self.__operation_map[method](graph)
+
+  def __patch_ends(self, ends, new_end):
+    for end in ends:
+      end.close(new_end)
+
+  def nfa(self, graph):
+    self.node_number = 0
+    (start, ends) = self.__process(graph)
+    end =  self.__new_state()
+    self.__patch_ends(ends, end)
+    end.close(None)
+    return Nfa(start, end, self.node_number)
+
+
+class Nfa:
+
+  def __init__(self, start, end, nodes_created):
+    self.__start = start
+    self.__end = end
+    self.__epsilon_closure_computed = False
+    self.__verify(nodes_created)
+
+  @staticmethod
+  def __visit_edges(start, include_start, just_epsilon, function, state):
+
+    def compute_next_edge(node, next_edge):
+      if just_epsilon:
+        return next_edge.union(node.get_transition_set(node.epsilon_key()))
+      else:
+        for key, values in node.transitions().items():
+          next_edge = next_edge.union(values)
+      return next_edge
+
+    edge = set()
+    if include_start:
+      edge.add(start)
+    else:
+      edge = compute_next_edge(start, edge)
+
+    visited = set()
+    while edge:
+      next_edge = set()
+      for node in edge:
+        next_edge = compute_next_edge(node, next_edge)
+        state = function(node, state)
+      visited = visited.union(edge)
+      edge = next_edge.difference(visited)
+    return state
+
+  def __visit_all_edges(self, function, state):
+    return Nfa.__visit_edges(self.__start, True, False, function, state)
+
+  def __verify(self, nodes_created):
+    def f(node, node_list):
+      assert node.is_closed()
+      node_list.append(node)
+      return node_list
+    node_list = self.__visit_all_edges(f, [])
+    assert len(node_list) == nodes_created
+
+  def __compute_epsilon_closures(self):
+    if self.__epsilon_closure_computed:
+      return
+    self.__epsilon_closure_computed = True
+    def outer(node, state):
+      def inner(node, closure):
+        closure.add(node)
+        return closure
+      closure = Nfa.__visit_edges(node, False, True, inner, set())
+      node.set_epsilon_closure(closure)
+    self.__visit_all_edges(outer, None)
+
+  @staticmethod
+  def __close(states):
+    for node in states:
+      states = states.union(node.epsilon_closure())
+    return states
+
+  def matches(self, string):
+    self.__compute_epsilon_closures()
+    valid_states = Nfa.__close(set([self.__start]))
+    for c in string:
+      next_valid_states = set()
+      for valid_state in valid_states:
+        next_valid_states = next_valid_states.union(valid_state.matches(c))
+      valid_states = Nfa.__close(next_valid_states)
+      if not valid_states:
+        return False
+    return self.__end in valid_states
+
+  @staticmethod
+  def __to_dfa(nfa_state_set, dfa_builder):
+    nfa_state_set = Nfa.__close(nfa_state_set)
+    name = str([x.node_number() for x in nfa_state_set])
+    (dfa_nodes, end_nodes, end_node) = dfa_builder
+    if dfa_nodes.has_key(name):
+      return name
+    dfa_node = {}
+    dfa_nodes[name] = dfa_node
+    for key in NfaState.gather_transition_keys(nfa_state_set):
+      reachable_set = set()
+      for nfa_state in nfa_state_set:
+        reachable_set = reachable_set | nfa_state.matches(key)
+      dfa_node[key] = Nfa.__to_dfa(reachable_set, dfa_builder)
+    if end_node in nfa_state_set:
+      end_nodes.add(name)
+    return name
+
+  def compute_dfa(self):
+    self.__compute_epsilon_closures()
+    dfa_nodes = {}
+    end_nodes = set()
+    dfa_builder = (dfa_nodes, end_nodes, self.__end)
+    start_name = self.__to_dfa(set([self.__start]), dfa_builder)
+    return (start_name, dfa_nodes, end_nodes)
+
+  def to_dot(self):
+
+    def f(node, node_content):
+      for key, values in node.transitions().items():
+        if key == node.epsilon_key(): key = "ε"
+        if key != node.epsilon_key() and len(key) == 1:
+          key = "'%s'" % key
+        for value in values:
+          node_content.append(
+            "  S_%d -> S_%d [ label = \"%s\" ];" %
+              (node.node_number(), value.node_number(), key))
+      return node_content
+
+    node_content = self.__visit_all_edges(f, [])
+
+    return '''
+digraph finite_state_machine {
+  rankdir=LR;
+  node [shape = circle, style=filled, bgcolor=lightgrey]; S_%s
+  node [shape = doublecircle, style=unfilled]; S_%s
+  node [shape = circle];
+%s
+}
+ ''' % (self.__start.node_number(), self.__end.node_number(), "\n".join(node_content))
+
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/regex_lexer.py Tue Oct 29 15:10:46 2013 UTC
@@ -0,0 +1,102 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import ply.lex as lex
+
+class RegexLexer:
+
+  tokens = (
+
+    'GROUP_BEGIN',
+    'GROUP_END',
+
+    'CLASS_BEGIN',
+    'CLASS_END',
+
+    'OR',
+    'ONE_OR_MORE',
+    'ZERO_OR_MORE',
+    'ZERO_OR_ONE',
+    'ANY',
+
+    'LITERAL',
+
+    'RANGE',
+    'NOT',
+    'CLASS_LITERAL',
+  )
+
+  states = (
+    ('class','exclusive'),
+  )
+
+  def t_ESCAPED_LITERAL(self, t):
+    r'\\\(|\\\)|\\\[|\\\]|\\\||\\\+|\\\*|\\\?|\\\.|\\\\'
+    t.type = 'LITERAL'
+    t.value = t.value[1:]
+    return t
+
+  t_GROUP_BEGIN = r'\('
+  t_GROUP_END = r'\)'
+
+  t_OR = r'\|'
+  t_ONE_OR_MORE = r'\+'
+  t_ZERO_OR_MORE = r'\*'
+  t_ZERO_OR_ONE = r'\?'
+
+  t_ANY = r'\.'
+
+  t_LITERAL = r'.'
+
+  def t_CLASS_BEGIN(self, t):
+    r'\['
+    self.lexer.push_state('class')
+    return t
+
+  def t_class_CLASS_END(self, t):
+    r'\]'
+    self.lexer.pop_state()
+    return t
+
+  t_class_RANGE = '-'
+  t_class_NOT = '\^'
+
+  def t_class_ESCAPED_CLASS_LITERAL(self, t):
+    r'\\\^|\\-'
+    t.type = 'CLASS_LITERAL'
+    t.value = t.value[1:]
+    return t
+
+  t_class_CLASS_LITERAL = r'[a-zA-Z]' # fix this
+
+  t_ANY_ignore  = '\n'
+
+  def t_ANY_error(self, t):
+    raise Exception("Illegal character '%s'" % t.value[0])
+
+  def build(self, **kwargs):
+    self.lexer = lex.lex(module=self, **kwargs)
=======================================
--- /dev/null
+++ /branches/experimental/parser/tools/lexer_generator/regex_parser.py Tue Oct 29 15:10:46 2013 UTC
@@ -0,0 +1,134 @@
+# Copyright 2013 the V8 project authors. All rights reserved.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#     * Neither the name of Google Inc. nor the names of its
+#       contributors may be used to endorse or promote products derived
+#       from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import ply.yacc as yacc
+from regex_lexer import RegexLexer
+from types import ListType, TupleType
+
+class RegexParser:
+
+  tokens = RegexLexer.tokens
+
+  token_map = {
+    '+': 'ONE_OR_MORE',
+    '?': 'ZERO_OR_ONE',
+    '*': 'ZERO_OR_MORE',
+    '|': 'OR',
+    '.': 'ANY',
+  }
+
+  def p_start(self, p):
+    '''start : fragments OR fragments
+             | fragments'''
+    if len(p) == 2:
+      p[0] = p[1]
+    else:
+      p[0] = (self.token_map[p[2]], p[1], p[3])
+
+  def p_fragments(self, p):
+    '''fragments : fragment
+                 | fragment fragments'''
+    if len(p) == 2:
+      p[0] = p[1]
+    else:
+      p[0] = self.__cat(p[1], p[2])
+
+  def p_fragment(self, p):
+    '''fragment : literal maybe_modifier
+                | class maybe_modifier
+                | group maybe_modifier
+                | any maybe_modifier
+    '''
+    if p[2] != None:
+      p[0] = (p[2], p[1])
+    else:
+      p[0] = p[1]
+
+  def p_maybe_modifier(self, p):
+    '''maybe_modifier : ONE_OR_MORE
+                      | ZERO_OR_ONE
+                      | ZERO_OR_MORE
+                      | empty'''
+    p[0] = p[1]
+    if p[1] != None:
+      p[0] = self.token_map[p[1]]
+
+  def p_literal(self, p):
+    '''literal : LITERAL'''
+    p[0] = ('LITERAL', p[1])
+
+  def p_any(self, p):
+    '''any : ANY'''
+    p[0] = (self.token_map[p[1]],)
+
+  def p_class(self, p):
+    '''class : CLASS_BEGIN class_content CLASS_END
+             | CLASS_BEGIN NOT class_content CLASS_END'''
+    if len(p) == 4:
+      p[0] = ("CLASS", p[2])
+    else:
+      p[0] = ("NOT_CLASS", p[3])
+
+  def p_group(self, p):
+    '''group : GROUP_BEGIN start GROUP_END'''
+    p[0] = p[2]
+
+  def p_class_content(self, p):
+ '''class_content : CLASS_LITERAL RANGE CLASS_LITERAL maybe_class_content
+                     | CLASS_LITERAL maybe_class_content
+    '''
+    if len(p) == 5:
+      left = ("RANGE", p[1], p[3])
+    else:
+      left = ('LITERAL', p[1])
+    p[0] = self.__cat(left, p[len(p)-1])
+
+  def p_maybe_class_content(self, p):
+    '''maybe_class_content : class_content
+                           | empty'''
+    p[0] = p[1]
+
+  def p_empty(self, p):
+    'empty :'
+
+  def p_error(self, p):
+    raise Exception("Syntax error in input '%s'" % p)
+
+  @staticmethod
+  def __cat(left, right):
+    if right == None:
+      return left
+    return ('CAT', left, right)
+
+  def build(self, **kwargs):
+    self.parser = yacc.yacc(module=self, **kwargs)
+    self.lexer = RegexLexer()
+    self.lexer.build(**kwargs)
+
+  def parse(self, data):
+    return self.parser.parse(data, lexer=self.lexer.lexer)
+

--
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
--- You received this message because you are subscribed to the Google Groups "v8-dev" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/groups/opt_out.

Reply via email to