Author: Carl Friedrich Bolz <cfb...@gmx.de> Branch: Changeset: r80490:986b29dd1c0b Date: 2015-10-30 18:30 +0100 http://bitbucket.org/pypy/pypy/changeset/986b29dd1c0b/
Log: don't do one to two dict lookups per parsed character this is done by computing a different representation of the state transition table (a big string, instead of many dictionaries). A bit of a "just-because" commit. diff --git a/pypy/interpreter/pyparser/automata.py b/pypy/interpreter/pyparser/automata.py --- a/pypy/interpreter/pyparser/automata.py +++ b/pypy/interpreter/pyparser/automata.py @@ -22,27 +22,61 @@ # PYPY Modification : removed all automata functions (any, maybe, # newArcPair, etc.) +ERROR_STATE = chr(255) + class DFA: # ____________________________________________________________ def __init__(self, states, accepts, start = 0): - self.states = states + """ NOT_RPYTHON """ + assert len(states) < 255 # no support for huge amounts of states + # construct string for looking up state transitions + string_states = [] * len(states) + # compute maximum + maximum = 0 + for state in states: + for key in state: + if key == DEFAULT: + continue + maximum = max(ord(key), maximum) + self.max_char = maximum + 1 + + defaults = [] + for i, state in enumerate(states): + default = ERROR_STATE + if DEFAULT in state: + default = chr(state[DEFAULT]) + defaults.append(default) + string_state = [default] * self.max_char + for key, value in state.iteritems(): + if key == DEFAULT: + continue + assert len(key) == 1 + assert ord(key) < self.max_char + string_state[ord(key)] = chr(value) + string_states.extend(string_state) + self.states = "".join(string_states) + self.defaults = "".join(defaults) self.accepts = accepts self.start = start # ____________________________________________________________ - def recognize (self, inVec, pos = 0): # greedy = True + + def _next_state(self, item, crntState): + if ord(item) >= self.max_char: + return self.defaults[crntState] + else: + return self.states[crntState * self.max_char + ord(item)] + + def recognize(self, inVec, pos = 0): crntState = self.start lastAccept = False i = pos for i in range(pos, len(inVec)): item = inVec[i] - # arcMap, accept = self.states[crntState] - arcMap = self.states[crntState] accept = self.accepts[crntState] - if item in arcMap: - crntState = arcMap[item] - elif DEFAULT in arcMap: - crntState = arcMap[DEFAULT] + crntState = self._next_state(item, crntState) + if crntState != ERROR_STATE: + pass elif accept: return i elif lastAccept: @@ -51,6 +85,7 @@ return i - 1 else: return -1 + crntState = ord(crntState) lastAccept = accept # if self.states[crntState][1]: if self.accepts[crntState]: @@ -63,24 +98,20 @@ # ______________________________________________________________________ class NonGreedyDFA (DFA): - def recognize (self, inVec, pos = 0): + + def recognize(self, inVec, pos = 0): crntState = self.start i = pos for i in range(pos, len(inVec)): item = inVec[i] - # arcMap, accept = self.states[crntState] - arcMap = self.states[crntState] accept = self.accepts[crntState] if accept: return i - elif item in arcMap: - crntState = arcMap[item] - elif DEFAULT in arcMap: - crntState = arcMap[DEFAULT] - else: + crntState = self._next_state(item, crntState) + if crntState == ERROR_STATE: return -1 + crntState = ord(crntState) i += 1 - # if self.states[crntState][1]: if self.accepts[crntState]: return i else: diff --git a/pypy/interpreter/pyparser/test/test_automata.py b/pypy/interpreter/pyparser/test/test_automata.py new file mode 100644 --- /dev/null +++ b/pypy/interpreter/pyparser/test/test_automata.py @@ -0,0 +1,12 @@ +from pypy.interpreter.pyparser.automata import DFA, DEFAULT + +def test_states(): + d = DFA([{"\x00": 1}, {"\x01": 0}], [False, True]) + assert d.states == "\x01\xff\xff\x00" + assert d.defaults == "\xff\xff" + assert d.max_char == 2 + + d = DFA([{"\x00": 1}, {DEFAULT: 0}], [False, True]) + assert d.states == "\x01\x00" + assert d.defaults == "\xff\x00" + assert d.max_char == 1 _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit