Author: Amaury Forgeot d'Arc <amaur...@gmail.com> Branch: py3.6 Changeset: r93620:c123301c02cc Date: 2018-01-03 12:17 +0100 http://bitbucket.org/pypy/pypy/changeset/c123301c02cc/
Log: Attempt to parse numbers with underscores diff --git a/pypy/interpreter/pyparser/dfa_generated.py b/pypy/interpreter/pyparser/dfa_generated.py --- a/pypy/interpreter/pyparser/dfa_generated.py +++ b/pypy/interpreter/pyparser/dfa_generated.py @@ -7,10 +7,14 @@ accepts = [True, True, True, True, True, True, True, True, True, True, True, False, True, True, True, True, True, False, False, False, True, False, False, - False, True, False, True, False, True, False, False, True, False, False, True, False, False, - True, True, True, False, False, True, False, - False, False, True] + True, False, False, True, False, False, True, + False, False, True, False, True, False, True, + False, True, False, False, False, False, True, + True, False, False, False, False, True, False, + True, False, True, False, True, False, True, True, + False, True, False, True, False, False, True, + True, True, True, True] states = [ # 0 {'\t': 0, '\n': 15, '\x0c': 0, @@ -110,21 +114,21 @@ 'v': 1, 'w': 1, 'x': 1, 'y': 1, 'z': 1, '\x80': 1}, # 5 - {'.': 26, '0': 24, '1': 25, '2': 25, - '3': 25, '4': 25, '5': 25, '6': 25, - '7': 25, '8': 25, '9': 25, 'B': 23, - 'E': 27, 'J': 15, 'O': 22, 'X': 21, - 'b': 23, 'e': 27, 'j': 15, 'o': 22, - 'x': 21}, + {'.': 27, '0': 24, '1': 26, '2': 26, + '3': 26, '4': 26, '5': 26, '6': 26, + '7': 26, '8': 26, '9': 26, 'B': 23, + 'E': 28, 'J': 15, 'O': 22, 'X': 21, + '_': 25, 'b': 23, 'e': 28, 'j': 15, + 'o': 22, 'x': 21}, # 6 - {'.': 26, '0': 6, '1': 6, '2': 6, + {'.': 27, '0': 6, '1': 6, '2': 6, '3': 6, '4': 6, '5': 6, '6': 6, - '7': 6, '8': 6, '9': 6, 'E': 27, - 'J': 15, 'e': 27, 'j': 15}, + '7': 6, '8': 6, '9': 6, 'E': 28, + 'J': 15, '_': 29, 'e': 28, 'j': 15}, # 7 - {'.': 29, '0': 28, '1': 28, '2': 28, - '3': 28, '4': 28, '5': 28, '6': 28, - '7': 28, '8': 28, '9': 28}, + {'.': 31, '0': 30, '1': 30, '2': 30, + '3': 30, '4': 30, '5': 30, '6': 30, + '7': 30, '8': 30, '9': 30}, # 8 {'*': 14, '=': 15}, # 9 @@ -144,107 +148,240 @@ # 16 {'\n': 15}, # 17 - {automata.DEFAULT: 33, '\n': 30, - '\r': 30, "'": 31, '\\': 32}, + {automata.DEFAULT: 35, '\n': 32, + '\r': 32, "'": 33, '\\': 34}, # 18 - {automata.DEFAULT: 36, '\n': 30, - '\r': 30, '"': 34, '\\': 35}, + {automata.DEFAULT: 38, '\n': 32, + '\r': 32, '"': 36, '\\': 37}, # 19 {'\n': 15, '\r': 16}, # 20 - {automata.DEFAULT: 20, '\n': 30, '\r': 30}, + {automata.DEFAULT: 20, '\n': 32, '\r': 32}, # 21 - {'0': 37, '1': 37, '2': 37, '3': 37, - '4': 37, '5': 37, '6': 37, '7': 37, - '8': 37, '9': 37, 'A': 37, 'B': 37, - 'C': 37, 'D': 37, 'E': 37, 'F': 37, - 'a': 37, 'b': 37, 'c': 37, 'd': 37, - 'e': 37, 'f': 37}, + {'0': 39, '1': 39, '2': 39, '3': 39, + '4': 39, '5': 39, '6': 39, '7': 39, + '8': 39, '9': 39, 'A': 39, 'B': 39, + 'C': 39, 'D': 39, 'E': 39, 'F': 39, + '_': 40, 'a': 39, 'b': 39, 'c': 39, + 'd': 39, 'e': 39, 'f': 39}, # 22 - {'0': 38, '1': 38, '2': 38, '3': 38, - '4': 38, '5': 38, '6': 38, '7': 38}, + {'0': 41, '1': 41, '2': 41, '3': 41, + '4': 41, '5': 41, '6': 41, '7': 41, + '_': 42}, # 23 - {'0': 39, '1': 39}, + {'0': 43, '1': 43, '_': 44}, # 24 - {'.': 26, '0': 24, '1': 25, '2': 25, - '3': 25, '4': 25, '5': 25, '6': 25, - '7': 25, '8': 25, '9': 25, 'E': 27, - 'J': 15, 'e': 27, 'j': 15}, + {'.': 27, '0': 24, '1': 26, '2': 26, + '3': 26, '4': 26, '5': 26, '6': 26, + '7': 26, '8': 26, '9': 26, 'E': 28, + 'J': 15, '_': 25, 'e': 28, 'j': 15}, # 25 - {'.': 26, '0': 25, '1': 25, '2': 25, - '3': 25, '4': 25, '5': 25, '6': 25, - '7': 25, '8': 25, '9': 25, 'E': 27, - 'J': 15, 'e': 27, 'j': 15}, + {'0': 45, '1': 46, '2': 46, '3': 46, + '4': 46, '5': 46, '6': 46, '7': 46, + '8': 46, '9': 46}, # 26 - {'0': 26, '1': 26, '2': 26, '3': 26, - '4': 26, '5': 26, '6': 26, '7': 26, - '8': 26, '9': 26, 'E': 40, 'J': 15, - 'e': 40, 'j': 15}, + {'.': 27, '0': 26, '1': 26, '2': 26, + '3': 26, '4': 26, '5': 26, '6': 26, + '7': 26, '8': 26, '9': 26, 'E': 28, + 'J': 15, '_': 47, 'e': 28, 'j': 15}, # 27 - {'+': 41, '-': 41, '0': 42, '1': 42, - '2': 42, '3': 42, '4': 42, '5': 42, - '6': 42, '7': 42, '8': 42, '9': 42}, + {'0': 27, '1': 27, '2': 27, '3': 27, + '4': 27, '5': 27, '6': 27, '7': 27, + '8': 27, '9': 27, 'E': 48, 'J': 15, + 'e': 48, 'j': 15}, # 28 - {'0': 28, '1': 28, '2': 28, '3': 28, - '4': 28, '5': 28, '6': 28, '7': 28, - '8': 28, '9': 28, 'E': 40, 'J': 15, - 'e': 40, 'j': 15}, + {'+': 49, '-': 49, '0': 50, '1': 50, + '2': 50, '3': 50, '4': 50, '5': 50, + '6': 50, '7': 50, '8': 50, '9': 50}, # 29 + {'0': 51, '1': 51, '2': 51, '3': 51, + '4': 51, '5': 51, '6': 51, '7': 51, + '8': 51, '9': 51}, + # 30 + {'0': 30, '1': 30, '2': 30, '3': 30, + '4': 30, '5': 30, '6': 30, '7': 30, + '8': 30, '9': 30, 'E': 48, 'J': 15, + '_': 52, 'e': 48, 'j': 15}, + # 31 {'.': 15}, - # 30 + # 32 {}, - # 31 + # 33 {"'": 15}, - # 32 - {automata.DEFAULT: 43, '\n': 15, '\r': 16}, - # 33 - {automata.DEFAULT: 33, '\n': 30, - '\r': 30, "'": 15, '\\': 32}, # 34 + {automata.DEFAULT: 53, '\n': 15, '\r': 16}, + # 35 + {automata.DEFAULT: 35, '\n': 32, + '\r': 32, "'": 15, '\\': 34}, + # 36 {'"': 15}, - # 35 - {automata.DEFAULT: 44, '\n': 15, '\r': 16}, - # 36 - {automata.DEFAULT: 36, '\n': 30, - '\r': 30, '"': 15, '\\': 35}, # 37 - {'0': 37, '1': 37, '2': 37, '3': 37, - '4': 37, '5': 37, '6': 37, '7': 37, - '8': 37, '9': 37, 'A': 37, 'B': 37, - 'C': 37, 'D': 37, 'E': 37, 'F': 37, - 'a': 37, 'b': 37, 'c': 37, 'd': 37, - 'e': 37, 'f': 37}, + {automata.DEFAULT: 54, '\n': 15, '\r': 16}, # 38 - {'0': 38, '1': 38, '2': 38, '3': 38, - '4': 38, '5': 38, '6': 38, '7': 38}, + {automata.DEFAULT: 38, '\n': 32, + '\r': 32, '"': 15, '\\': 37}, # 39 - {'0': 39, '1': 39}, + {'0': 39, '1': 39, '2': 39, '3': 39, + '4': 39, '5': 39, '6': 39, '7': 39, + '8': 39, '9': 39, 'A': 39, 'B': 39, + 'C': 39, 'D': 39, 'E': 39, 'F': 39, + '_': 55, 'a': 39, 'b': 39, 'c': 39, + 'd': 39, 'e': 39, 'f': 39}, # 40 - {'+': 45, '-': 45, '0': 46, '1': 46, - '2': 46, '3': 46, '4': 46, '5': 46, - '6': 46, '7': 46, '8': 46, '9': 46}, + {'0': 56, '1': 56, '2': 56, '3': 56, + '4': 56, '5': 56, '6': 56, '7': 56, + '8': 56, '9': 56, 'A': 56, 'B': 56, + 'C': 56, 'D': 56, 'E': 56, 'F': 56, + 'a': 56, 'b': 56, 'c': 56, 'd': 56, + 'e': 56, 'f': 56}, # 41 - {'0': 42, '1': 42, '2': 42, '3': 42, - '4': 42, '5': 42, '6': 42, '7': 42, - '8': 42, '9': 42}, + {'0': 41, '1': 41, '2': 41, '3': 41, + '4': 41, '5': 41, '6': 41, '7': 41, + '_': 57}, # 42 - {'0': 42, '1': 42, '2': 42, '3': 42, - '4': 42, '5': 42, '6': 42, '7': 42, - '8': 42, '9': 42, 'J': 15, 'j': 15}, + {'0': 58, '1': 58, '2': 58, '3': 58, + '4': 58, '5': 58, '6': 58, '7': 58}, # 43 - {automata.DEFAULT: 43, '\n': 30, - '\r': 30, "'": 15, '\\': 32}, + {'0': 43, '1': 43, '_': 59}, # 44 - {automata.DEFAULT: 44, '\n': 30, - '\r': 30, '"': 15, '\\': 35}, + {'0': 60, '1': 60}, # 45 + {'.': 27, '0': 45, '1': 46, '2': 46, + '3': 46, '4': 46, '5': 46, '6': 46, + '7': 46, '8': 46, '9': 46, 'E': 28, + 'J': 15, '_': 25, 'e': 28, 'j': 15}, + # 46 + {'.': 27, '0': 46, '1': 46, '2': 46, + '3': 46, '4': 46, '5': 46, '6': 46, + '7': 46, '8': 46, '9': 46, 'E': 28, + 'J': 15, '_': 47, 'e': 28, 'j': 15}, + # 47 {'0': 46, '1': 46, '2': 46, '3': 46, '4': 46, '5': 46, '6': 46, '7': 46, '8': 46, '9': 46}, - # 46 - {'0': 46, '1': 46, '2': 46, '3': 46, - '4': 46, '5': 46, '6': 46, '7': 46, - '8': 46, '9': 46, 'J': 15, 'j': 15}, + # 48 + {'+': 61, '-': 61, '0': 62, '1': 62, + '2': 62, '3': 62, '4': 62, '5': 62, + '6': 62, '7': 62, '8': 62, '9': 62}, + # 49 + {'0': 50, '1': 50, '2': 50, '3': 50, + '4': 50, '5': 50, '6': 50, '7': 50, + '8': 50, '9': 50}, + # 50 + {'0': 50, '1': 50, '2': 50, '3': 50, + '4': 50, '5': 50, '6': 50, '7': 50, + '8': 50, '9': 50, 'J': 15, '_': 63, + 'j': 15}, + # 51 + {'.': 27, '0': 51, '1': 51, '2': 51, + '3': 51, '4': 51, '5': 51, '6': 51, + '7': 51, '8': 51, '9': 51, 'E': 28, + 'J': 15, '_': 29, 'e': 28, 'j': 15}, + # 52 + {'0': 64, '1': 64, '2': 64, '3': 64, + '4': 64, '5': 64, '6': 64, '7': 64, + '8': 64, '9': 64}, + # 53 + {automata.DEFAULT: 53, '\n': 32, + '\r': 32, "'": 15, '\\': 34}, + # 54 + {automata.DEFAULT: 54, '\n': 32, + '\r': 32, '"': 15, '\\': 37}, + # 55 + {'0': 65, '1': 65, '2': 65, '3': 65, + '4': 65, '5': 65, '6': 65, '7': 65, + '8': 65, '9': 65, 'A': 65, 'B': 65, + 'C': 65, 'D': 65, 'E': 65, 'F': 65, + 'a': 65, 'b': 65, 'c': 65, 'd': 65, + 'e': 65, 'f': 65}, + # 56 + {'0': 56, '1': 56, '2': 56, '3': 56, + '4': 56, '5': 56, '6': 56, '7': 56, + '8': 56, '9': 56, 'A': 56, 'B': 56, + 'C': 56, 'D': 56, 'E': 56, 'F': 56, + '_': 66, 'a': 56, 'b': 56, 'c': 56, + 'd': 56, 'e': 56, 'f': 56}, + # 57 + {'0': 67, '1': 67, '2': 67, '3': 67, + '4': 67, '5': 67, '6': 67, '7': 67}, + # 58 + {'0': 58, '1': 58, '2': 58, '3': 58, + '4': 58, '5': 58, '6': 58, '7': 58, + '_': 68}, + # 59 + {'0': 69, '1': 69}, + # 60 + {'0': 60, '1': 60, '_': 70}, + # 61 + {'0': 62, '1': 62, '2': 62, '3': 62, + '4': 62, '5': 62, '6': 62, '7': 62, + '8': 62, '9': 62}, + # 62 + {'0': 62, '1': 62, '2': 62, '3': 62, + '4': 62, '5': 62, '6': 62, '7': 62, + '8': 62, '9': 62, 'J': 15, '_': 71, + 'j': 15}, + # 63 + {'0': 72, '1': 72, '2': 72, '3': 72, + '4': 72, '5': 72, '6': 72, '7': 72, + '8': 72, '9': 72}, + # 64 + {'0': 64, '1': 64, '2': 64, '3': 64, + '4': 64, '5': 64, '6': 64, '7': 64, + '8': 64, '9': 64, 'E': 48, 'J': 15, + '_': 52, 'e': 48, 'j': 15}, + # 65 + {'0': 65, '1': 65, '2': 65, '3': 65, + '4': 65, '5': 65, '6': 65, '7': 65, + '8': 65, '9': 65, 'A': 65, 'B': 65, + 'C': 65, 'D': 65, 'E': 65, 'F': 65, + '_': 55, 'a': 65, 'b': 65, 'c': 65, + 'd': 65, 'e': 65, 'f': 65}, + # 66 + {'0': 73, '1': 73, '2': 73, '3': 73, + '4': 73, '5': 73, '6': 73, '7': 73, + '8': 73, '9': 73, 'A': 73, 'B': 73, + 'C': 73, 'D': 73, 'E': 73, 'F': 73, + 'a': 73, 'b': 73, 'c': 73, 'd': 73, + 'e': 73, 'f': 73}, + # 67 + {'0': 67, '1': 67, '2': 67, '3': 67, + '4': 67, '5': 67, '6': 67, '7': 67, + '_': 57}, + # 68 + {'0': 74, '1': 74, '2': 74, '3': 74, + '4': 74, '5': 74, '6': 74, '7': 74}, + # 69 + {'0': 69, '1': 69, '_': 59}, + # 70 + {'0': 75, '1': 75}, + # 71 + {'0': 76, '1': 76, '2': 76, '3': 76, + '4': 76, '5': 76, '6': 76, '7': 76, + '8': 76, '9': 76}, + # 72 + {'0': 72, '1': 72, '2': 72, '3': 72, + '4': 72, '5': 72, '6': 72, '7': 72, + '8': 72, '9': 72, 'J': 15, '_': 63, + 'j': 15}, + # 73 + {'0': 73, '1': 73, '2': 73, '3': 73, + '4': 73, '5': 73, '6': 73, '7': 73, + '8': 73, '9': 73, 'A': 73, 'B': 73, + 'C': 73, 'D': 73, 'E': 73, 'F': 73, + '_': 66, 'a': 73, 'b': 73, 'c': 73, + 'd': 73, 'e': 73, 'f': 73}, + # 74 + {'0': 74, '1': 74, '2': 74, '3': 74, + '4': 74, '5': 74, '6': 74, '7': 74, + '_': 68}, + # 75 + {'0': 75, '1': 75, '_': 70}, + # 76 + {'0': 76, '1': 76, '2': 76, '3': 76, + '4': 76, '5': 76, '6': 76, '7': 76, + '8': 76, '9': 76, 'J': 15, '_': 71, + 'j': 15}, ] pseudoDFA = automata.DFA(states, accepts) diff --git a/pypy/interpreter/pyparser/gendfa.py b/pypy/interpreter/pyparser/gendfa.py --- a/pypy/interpreter/pyparser/gendfa.py +++ b/pypy/interpreter/pyparser/gendfa.py @@ -60,28 +60,43 @@ # Digits def makeDigits (): return groupStr(states, "0123456789") + def makeDigitsChain (digits="0123456789", first=None, + allow_leading_underscore=False): + if first is None: + first = digits + if allow_leading_underscore: + return group(states, + makeDigitsChain(digits=digits), + chain(states, + newArcPair(states, "_"), + makeDigitsChain(digits=digits))) + return chain(states, + groupStr(states, first), + any(states, groupStr(states, digits)), + any(states, + chain(states, + newArcPair(states, "_"), + atleastonce(states, groupStr(states, digits))))) + # ____________________________________________________________ # Integer numbers hexNumber = chain(states, newArcPair(states, "0"), groupStr(states, "xX"), - atleastonce(states, - groupStr(states, "0123456789abcdefABCDEF"))) + makeDigitsChain("0123456789abcdefABCDEF", + allow_leading_underscore=True)) octNumber = chain(states, newArcPair(states, "0"), groupStr(states, "oO"), - groupStr(states, "01234567"), - any(states, groupStr(states, "01234567"))) + makeDigitsChain("01234567", + allow_leading_underscore=True)) binNumber = chain(states, newArcPair(states, "0"), groupStr(states, "bB"), - atleastonce(states, groupStr(states, "01"))) - decNumber = chain(states, - groupStr(states, "123456789"), - any(states, makeDigits())) - zero = chain(states, - newArcPair(states, "0"), - any(states, newArcPair(states, "0"))) + makeDigitsChain("01", + allow_leading_underscore=True)) + decNumber = makeDigitsChain(first="123456789") + zero = makeDigitsChain("0") intNumber = group(states, hexNumber, octNumber, binNumber, decNumber, zero) # ____________________________________________________________ # Exponents @@ -89,29 +104,34 @@ return chain(states, groupStr(states, "eE"), maybe(states, groupStr(states, "+-")), - atleastonce(states, makeDigits())) + makeDigitsChain()) + # ____________________________________________________________ # Floating point numbers + def makePointFloat (): + return group(states, + chain(states, + makeDigitsChain(), + newArcPair(states, "."), + any(states, makeDigits())), + chain(states, + newArcPair(states, "."), + makeDigitsChain())) def makeFloat (): - pointFloat = chain(states, - group(states, - chain(states, - atleastonce(states, makeDigits()), - newArcPair(states, "."), - any(states, makeDigits())), - chain(states, - newArcPair(states, "."), - atleastonce(states, makeDigits()))), - maybe(states, makeExp())) + pointFloat = group(states, + makePointFloat(), + chain(states, + makePointFloat(), + makeExp())) expFloat = chain(states, - atleastonce(states, makeDigits()), + makeDigitsChain(), makeExp()) return group(states, pointFloat, expFloat) # ____________________________________________________________ # Imaginary numbers imagNumber = group(states, chain(states, - atleastonce(states, makeDigits()), + makeDigitsChain(), groupStr(states, "jJ")), chain(states, makeFloat(), diff --git a/pypy/interpreter/pyparser/test/test_pyparse.py b/pypy/interpreter/pyparser/test/test_pyparse.py --- a/pypy/interpreter/pyparser/test/test_pyparse.py +++ b/pypy/interpreter/pyparser/test/test_pyparse.py @@ -191,8 +191,90 @@ async with a: pass""") py.test.raises(SyntaxError, self.parse, 'def foo(): async with a: pass') - - + + def test_number_underscores(self): + VALID_UNDERSCORE_LITERALS = [ + '0_0_0', + '4_2', + '1_0000_0000', + '0b1001_0100', + '0xffff_ffff', + '0o5_7_7', + '1_00_00.5', + '1_00_00.5e5', + '1_00_00e5_1', + '1e1_0', + '.1_4', + '.1_4e1', + '0b_0', + '0x_f', + '0o_5', + '1_00_00j', + '1_00_00.5j', + '1_00_00e5_1j', + '.1_4j', + '(1_2.5+3_3j)', + '(.5_6j)', + ] + INVALID_UNDERSCORE_LITERALS = [ + # Trailing underscores: + '0_', + '42_', + '1.4j_', + '0x_', + '0b1_', + '0xf_', + '0o5_', + '0 if 1_Else 1', + # Underscores in the base selector: + '0_b0', + '0_xf', + '0_o5', + # Old-style octal, still disallowed: + '0_7', + '09_99', + # Multiple consecutive underscores: + '4_______2', + '0.1__4', + '0.1__4j', + '0b1001__0100', + '0xffff__ffff', + '0x___', + '0o5__77', + '1e1__0', + '1e1__0j', + # Underscore right before a dot: + '1_.4', + '1_.4j', + # Underscore right after a dot: + '1._4', + '1._4j', + '._5', + '._5j', + # Underscore right after a sign: + '1.0e+_1', + '1.0e+_1j', + # Underscore right before j: + '1.4_j', + '1.4e5_j', + # Underscore right before e: + '1_e1', + '1.4_e1', + '1.4_e1j', + # Underscore right after e: + '1e_1', + '1.4e_1', + '1.4e_1j', + # Complex cases with parens: + '(1+1.5_j_)', + '(1+1.5_j)', + ] + for x in VALID_UNDERSCORE_LITERALS: + tree = self.parse(x) + for x in INVALID_UNDERSCORE_LITERALS: + print x + raises(SyntaxError, self.parse, "x = %s" % x) + class TestPythonParserWithSpace: _______________________________________________ pypy-commit mailing list pypy-commit@python.org https://mail.python.org/mailman/listinfo/pypy-commit