Author: Amaury Forgeot d'Arc <[email protected]>
Branch: py3.6
Changeset: r93620:c123301c02cc
Date: 2018-01-03 12:17 +0100
http://bitbucket.org/pypy/pypy/changeset/c123301c02cc/
Log: Attempt to parse numbers with underscores
diff --git a/pypy/interpreter/pyparser/dfa_generated.py
b/pypy/interpreter/pyparser/dfa_generated.py
--- a/pypy/interpreter/pyparser/dfa_generated.py
+++ b/pypy/interpreter/pyparser/dfa_generated.py
@@ -7,10 +7,14 @@
accepts = [True, True, True, True, True, True, True, True,
True, True, True, False, True, True, True, True,
True, False, False, False, True, False, False,
- False, True, False, True, False, True, False,
False, True, False, False, True, False, False,
- True, True, True, False, False, True, False,
- False, False, True]
+ True, False, False, True, False, False, True,
+ False, False, True, False, True, False, True,
+ False, True, False, False, False, False, True,
+ True, False, False, False, False, True, False,
+ True, False, True, False, True, False, True, True,
+ False, True, False, True, False, False, True,
+ True, True, True, True]
states = [
# 0
{'\t': 0, '\n': 15, '\x0c': 0,
@@ -110,21 +114,21 @@
'v': 1, 'w': 1, 'x': 1, 'y': 1,
'z': 1, '\x80': 1},
# 5
- {'.': 26, '0': 24, '1': 25, '2': 25,
- '3': 25, '4': 25, '5': 25, '6': 25,
- '7': 25, '8': 25, '9': 25, 'B': 23,
- 'E': 27, 'J': 15, 'O': 22, 'X': 21,
- 'b': 23, 'e': 27, 'j': 15, 'o': 22,
- 'x': 21},
+ {'.': 27, '0': 24, '1': 26, '2': 26,
+ '3': 26, '4': 26, '5': 26, '6': 26,
+ '7': 26, '8': 26, '9': 26, 'B': 23,
+ 'E': 28, 'J': 15, 'O': 22, 'X': 21,
+ '_': 25, 'b': 23, 'e': 28, 'j': 15,
+ 'o': 22, 'x': 21},
# 6
- {'.': 26, '0': 6, '1': 6, '2': 6,
+ {'.': 27, '0': 6, '1': 6, '2': 6,
'3': 6, '4': 6, '5': 6, '6': 6,
- '7': 6, '8': 6, '9': 6, 'E': 27,
- 'J': 15, 'e': 27, 'j': 15},
+ '7': 6, '8': 6, '9': 6, 'E': 28,
+ 'J': 15, '_': 29, 'e': 28, 'j': 15},
# 7
- {'.': 29, '0': 28, '1': 28, '2': 28,
- '3': 28, '4': 28, '5': 28, '6': 28,
- '7': 28, '8': 28, '9': 28},
+ {'.': 31, '0': 30, '1': 30, '2': 30,
+ '3': 30, '4': 30, '5': 30, '6': 30,
+ '7': 30, '8': 30, '9': 30},
# 8
{'*': 14, '=': 15},
# 9
@@ -144,107 +148,240 @@
# 16
{'\n': 15},
# 17
- {automata.DEFAULT: 33, '\n': 30,
- '\r': 30, "'": 31, '\\': 32},
+ {automata.DEFAULT: 35, '\n': 32,
+ '\r': 32, "'": 33, '\\': 34},
# 18
- {automata.DEFAULT: 36, '\n': 30,
- '\r': 30, '"': 34, '\\': 35},
+ {automata.DEFAULT: 38, '\n': 32,
+ '\r': 32, '"': 36, '\\': 37},
# 19
{'\n': 15, '\r': 16},
# 20
- {automata.DEFAULT: 20, '\n': 30, '\r': 30},
+ {automata.DEFAULT: 20, '\n': 32, '\r': 32},
# 21
- {'0': 37, '1': 37, '2': 37, '3': 37,
- '4': 37, '5': 37, '6': 37, '7': 37,
- '8': 37, '9': 37, 'A': 37, 'B': 37,
- 'C': 37, 'D': 37, 'E': 37, 'F': 37,
- 'a': 37, 'b': 37, 'c': 37, 'd': 37,
- 'e': 37, 'f': 37},
+ {'0': 39, '1': 39, '2': 39, '3': 39,
+ '4': 39, '5': 39, '6': 39, '7': 39,
+ '8': 39, '9': 39, 'A': 39, 'B': 39,
+ 'C': 39, 'D': 39, 'E': 39, 'F': 39,
+ '_': 40, 'a': 39, 'b': 39, 'c': 39,
+ 'd': 39, 'e': 39, 'f': 39},
# 22
- {'0': 38, '1': 38, '2': 38, '3': 38,
- '4': 38, '5': 38, '6': 38, '7': 38},
+ {'0': 41, '1': 41, '2': 41, '3': 41,
+ '4': 41, '5': 41, '6': 41, '7': 41,
+ '_': 42},
# 23
- {'0': 39, '1': 39},
+ {'0': 43, '1': 43, '_': 44},
# 24
- {'.': 26, '0': 24, '1': 25, '2': 25,
- '3': 25, '4': 25, '5': 25, '6': 25,
- '7': 25, '8': 25, '9': 25, 'E': 27,
- 'J': 15, 'e': 27, 'j': 15},
+ {'.': 27, '0': 24, '1': 26, '2': 26,
+ '3': 26, '4': 26, '5': 26, '6': 26,
+ '7': 26, '8': 26, '9': 26, 'E': 28,
+ 'J': 15, '_': 25, 'e': 28, 'j': 15},
# 25
- {'.': 26, '0': 25, '1': 25, '2': 25,
- '3': 25, '4': 25, '5': 25, '6': 25,
- '7': 25, '8': 25, '9': 25, 'E': 27,
- 'J': 15, 'e': 27, 'j': 15},
+ {'0': 45, '1': 46, '2': 46, '3': 46,
+ '4': 46, '5': 46, '6': 46, '7': 46,
+ '8': 46, '9': 46},
# 26
- {'0': 26, '1': 26, '2': 26, '3': 26,
- '4': 26, '5': 26, '6': 26, '7': 26,
- '8': 26, '9': 26, 'E': 40, 'J': 15,
- 'e': 40, 'j': 15},
+ {'.': 27, '0': 26, '1': 26, '2': 26,
+ '3': 26, '4': 26, '5': 26, '6': 26,
+ '7': 26, '8': 26, '9': 26, 'E': 28,
+ 'J': 15, '_': 47, 'e': 28, 'j': 15},
# 27
- {'+': 41, '-': 41, '0': 42, '1': 42,
- '2': 42, '3': 42, '4': 42, '5': 42,
- '6': 42, '7': 42, '8': 42, '9': 42},
+ {'0': 27, '1': 27, '2': 27, '3': 27,
+ '4': 27, '5': 27, '6': 27, '7': 27,
+ '8': 27, '9': 27, 'E': 48, 'J': 15,
+ 'e': 48, 'j': 15},
# 28
- {'0': 28, '1': 28, '2': 28, '3': 28,
- '4': 28, '5': 28, '6': 28, '7': 28,
- '8': 28, '9': 28, 'E': 40, 'J': 15,
- 'e': 40, 'j': 15},
+ {'+': 49, '-': 49, '0': 50, '1': 50,
+ '2': 50, '3': 50, '4': 50, '5': 50,
+ '6': 50, '7': 50, '8': 50, '9': 50},
# 29
+ {'0': 51, '1': 51, '2': 51, '3': 51,
+ '4': 51, '5': 51, '6': 51, '7': 51,
+ '8': 51, '9': 51},
+ # 30
+ {'0': 30, '1': 30, '2': 30, '3': 30,
+ '4': 30, '5': 30, '6': 30, '7': 30,
+ '8': 30, '9': 30, 'E': 48, 'J': 15,
+ '_': 52, 'e': 48, 'j': 15},
+ # 31
{'.': 15},
- # 30
+ # 32
{},
- # 31
+ # 33
{"'": 15},
- # 32
- {automata.DEFAULT: 43, '\n': 15, '\r': 16},
- # 33
- {automata.DEFAULT: 33, '\n': 30,
- '\r': 30, "'": 15, '\\': 32},
# 34
+ {automata.DEFAULT: 53, '\n': 15, '\r': 16},
+ # 35
+ {automata.DEFAULT: 35, '\n': 32,
+ '\r': 32, "'": 15, '\\': 34},
+ # 36
{'"': 15},
- # 35
- {automata.DEFAULT: 44, '\n': 15, '\r': 16},
- # 36
- {automata.DEFAULT: 36, '\n': 30,
- '\r': 30, '"': 15, '\\': 35},
# 37
- {'0': 37, '1': 37, '2': 37, '3': 37,
- '4': 37, '5': 37, '6': 37, '7': 37,
- '8': 37, '9': 37, 'A': 37, 'B': 37,
- 'C': 37, 'D': 37, 'E': 37, 'F': 37,
- 'a': 37, 'b': 37, 'c': 37, 'd': 37,
- 'e': 37, 'f': 37},
+ {automata.DEFAULT: 54, '\n': 15, '\r': 16},
# 38
- {'0': 38, '1': 38, '2': 38, '3': 38,
- '4': 38, '5': 38, '6': 38, '7': 38},
+ {automata.DEFAULT: 38, '\n': 32,
+ '\r': 32, '"': 15, '\\': 37},
# 39
- {'0': 39, '1': 39},
+ {'0': 39, '1': 39, '2': 39, '3': 39,
+ '4': 39, '5': 39, '6': 39, '7': 39,
+ '8': 39, '9': 39, 'A': 39, 'B': 39,
+ 'C': 39, 'D': 39, 'E': 39, 'F': 39,
+ '_': 55, 'a': 39, 'b': 39, 'c': 39,
+ 'd': 39, 'e': 39, 'f': 39},
# 40
- {'+': 45, '-': 45, '0': 46, '1': 46,
- '2': 46, '3': 46, '4': 46, '5': 46,
- '6': 46, '7': 46, '8': 46, '9': 46},
+ {'0': 56, '1': 56, '2': 56, '3': 56,
+ '4': 56, '5': 56, '6': 56, '7': 56,
+ '8': 56, '9': 56, 'A': 56, 'B': 56,
+ 'C': 56, 'D': 56, 'E': 56, 'F': 56,
+ 'a': 56, 'b': 56, 'c': 56, 'd': 56,
+ 'e': 56, 'f': 56},
# 41
- {'0': 42, '1': 42, '2': 42, '3': 42,
- '4': 42, '5': 42, '6': 42, '7': 42,
- '8': 42, '9': 42},
+ {'0': 41, '1': 41, '2': 41, '3': 41,
+ '4': 41, '5': 41, '6': 41, '7': 41,
+ '_': 57},
# 42
- {'0': 42, '1': 42, '2': 42, '3': 42,
- '4': 42, '5': 42, '6': 42, '7': 42,
- '8': 42, '9': 42, 'J': 15, 'j': 15},
+ {'0': 58, '1': 58, '2': 58, '3': 58,
+ '4': 58, '5': 58, '6': 58, '7': 58},
# 43
- {automata.DEFAULT: 43, '\n': 30,
- '\r': 30, "'": 15, '\\': 32},
+ {'0': 43, '1': 43, '_': 59},
# 44
- {automata.DEFAULT: 44, '\n': 30,
- '\r': 30, '"': 15, '\\': 35},
+ {'0': 60, '1': 60},
# 45
+ {'.': 27, '0': 45, '1': 46, '2': 46,
+ '3': 46, '4': 46, '5': 46, '6': 46,
+ '7': 46, '8': 46, '9': 46, 'E': 28,
+ 'J': 15, '_': 25, 'e': 28, 'j': 15},
+ # 46
+ {'.': 27, '0': 46, '1': 46, '2': 46,
+ '3': 46, '4': 46, '5': 46, '6': 46,
+ '7': 46, '8': 46, '9': 46, 'E': 28,
+ 'J': 15, '_': 47, 'e': 28, 'j': 15},
+ # 47
{'0': 46, '1': 46, '2': 46, '3': 46,
'4': 46, '5': 46, '6': 46, '7': 46,
'8': 46, '9': 46},
- # 46
- {'0': 46, '1': 46, '2': 46, '3': 46,
- '4': 46, '5': 46, '6': 46, '7': 46,
- '8': 46, '9': 46, 'J': 15, 'j': 15},
+ # 48
+ {'+': 61, '-': 61, '0': 62, '1': 62,
+ '2': 62, '3': 62, '4': 62, '5': 62,
+ '6': 62, '7': 62, '8': 62, '9': 62},
+ # 49
+ {'0': 50, '1': 50, '2': 50, '3': 50,
+ '4': 50, '5': 50, '6': 50, '7': 50,
+ '8': 50, '9': 50},
+ # 50
+ {'0': 50, '1': 50, '2': 50, '3': 50,
+ '4': 50, '5': 50, '6': 50, '7': 50,
+ '8': 50, '9': 50, 'J': 15, '_': 63,
+ 'j': 15},
+ # 51
+ {'.': 27, '0': 51, '1': 51, '2': 51,
+ '3': 51, '4': 51, '5': 51, '6': 51,
+ '7': 51, '8': 51, '9': 51, 'E': 28,
+ 'J': 15, '_': 29, 'e': 28, 'j': 15},
+ # 52
+ {'0': 64, '1': 64, '2': 64, '3': 64,
+ '4': 64, '5': 64, '6': 64, '7': 64,
+ '8': 64, '9': 64},
+ # 53
+ {automata.DEFAULT: 53, '\n': 32,
+ '\r': 32, "'": 15, '\\': 34},
+ # 54
+ {automata.DEFAULT: 54, '\n': 32,
+ '\r': 32, '"': 15, '\\': 37},
+ # 55
+ {'0': 65, '1': 65, '2': 65, '3': 65,
+ '4': 65, '5': 65, '6': 65, '7': 65,
+ '8': 65, '9': 65, 'A': 65, 'B': 65,
+ 'C': 65, 'D': 65, 'E': 65, 'F': 65,
+ 'a': 65, 'b': 65, 'c': 65, 'd': 65,
+ 'e': 65, 'f': 65},
+ # 56
+ {'0': 56, '1': 56, '2': 56, '3': 56,
+ '4': 56, '5': 56, '6': 56, '7': 56,
+ '8': 56, '9': 56, 'A': 56, 'B': 56,
+ 'C': 56, 'D': 56, 'E': 56, 'F': 56,
+ '_': 66, 'a': 56, 'b': 56, 'c': 56,
+ 'd': 56, 'e': 56, 'f': 56},
+ # 57
+ {'0': 67, '1': 67, '2': 67, '3': 67,
+ '4': 67, '5': 67, '6': 67, '7': 67},
+ # 58
+ {'0': 58, '1': 58, '2': 58, '3': 58,
+ '4': 58, '5': 58, '6': 58, '7': 58,
+ '_': 68},
+ # 59
+ {'0': 69, '1': 69},
+ # 60
+ {'0': 60, '1': 60, '_': 70},
+ # 61
+ {'0': 62, '1': 62, '2': 62, '3': 62,
+ '4': 62, '5': 62, '6': 62, '7': 62,
+ '8': 62, '9': 62},
+ # 62
+ {'0': 62, '1': 62, '2': 62, '3': 62,
+ '4': 62, '5': 62, '6': 62, '7': 62,
+ '8': 62, '9': 62, 'J': 15, '_': 71,
+ 'j': 15},
+ # 63
+ {'0': 72, '1': 72, '2': 72, '3': 72,
+ '4': 72, '5': 72, '6': 72, '7': 72,
+ '8': 72, '9': 72},
+ # 64
+ {'0': 64, '1': 64, '2': 64, '3': 64,
+ '4': 64, '5': 64, '6': 64, '7': 64,
+ '8': 64, '9': 64, 'E': 48, 'J': 15,
+ '_': 52, 'e': 48, 'j': 15},
+ # 65
+ {'0': 65, '1': 65, '2': 65, '3': 65,
+ '4': 65, '5': 65, '6': 65, '7': 65,
+ '8': 65, '9': 65, 'A': 65, 'B': 65,
+ 'C': 65, 'D': 65, 'E': 65, 'F': 65,
+ '_': 55, 'a': 65, 'b': 65, 'c': 65,
+ 'd': 65, 'e': 65, 'f': 65},
+ # 66
+ {'0': 73, '1': 73, '2': 73, '3': 73,
+ '4': 73, '5': 73, '6': 73, '7': 73,
+ '8': 73, '9': 73, 'A': 73, 'B': 73,
+ 'C': 73, 'D': 73, 'E': 73, 'F': 73,
+ 'a': 73, 'b': 73, 'c': 73, 'd': 73,
+ 'e': 73, 'f': 73},
+ # 67
+ {'0': 67, '1': 67, '2': 67, '3': 67,
+ '4': 67, '5': 67, '6': 67, '7': 67,
+ '_': 57},
+ # 68
+ {'0': 74, '1': 74, '2': 74, '3': 74,
+ '4': 74, '5': 74, '6': 74, '7': 74},
+ # 69
+ {'0': 69, '1': 69, '_': 59},
+ # 70
+ {'0': 75, '1': 75},
+ # 71
+ {'0': 76, '1': 76, '2': 76, '3': 76,
+ '4': 76, '5': 76, '6': 76, '7': 76,
+ '8': 76, '9': 76},
+ # 72
+ {'0': 72, '1': 72, '2': 72, '3': 72,
+ '4': 72, '5': 72, '6': 72, '7': 72,
+ '8': 72, '9': 72, 'J': 15, '_': 63,
+ 'j': 15},
+ # 73
+ {'0': 73, '1': 73, '2': 73, '3': 73,
+ '4': 73, '5': 73, '6': 73, '7': 73,
+ '8': 73, '9': 73, 'A': 73, 'B': 73,
+ 'C': 73, 'D': 73, 'E': 73, 'F': 73,
+ '_': 66, 'a': 73, 'b': 73, 'c': 73,
+ 'd': 73, 'e': 73, 'f': 73},
+ # 74
+ {'0': 74, '1': 74, '2': 74, '3': 74,
+ '4': 74, '5': 74, '6': 74, '7': 74,
+ '_': 68},
+ # 75
+ {'0': 75, '1': 75, '_': 70},
+ # 76
+ {'0': 76, '1': 76, '2': 76, '3': 76,
+ '4': 76, '5': 76, '6': 76, '7': 76,
+ '8': 76, '9': 76, 'J': 15, '_': 71,
+ 'j': 15},
]
pseudoDFA = automata.DFA(states, accepts)
diff --git a/pypy/interpreter/pyparser/gendfa.py
b/pypy/interpreter/pyparser/gendfa.py
--- a/pypy/interpreter/pyparser/gendfa.py
+++ b/pypy/interpreter/pyparser/gendfa.py
@@ -60,28 +60,43 @@
# Digits
def makeDigits ():
return groupStr(states, "0123456789")
+ def makeDigitsChain (digits="0123456789", first=None,
+ allow_leading_underscore=False):
+ if first is None:
+ first = digits
+ if allow_leading_underscore:
+ return group(states,
+ makeDigitsChain(digits=digits),
+ chain(states,
+ newArcPair(states, "_"),
+ makeDigitsChain(digits=digits)))
+ return chain(states,
+ groupStr(states, first),
+ any(states, groupStr(states, digits)),
+ any(states,
+ chain(states,
+ newArcPair(states, "_"),
+ atleastonce(states, groupStr(states, digits)))))
+
# ____________________________________________________________
# Integer numbers
hexNumber = chain(states,
newArcPair(states, "0"),
groupStr(states, "xX"),
- atleastonce(states,
- groupStr(states, "0123456789abcdefABCDEF")))
+ makeDigitsChain("0123456789abcdefABCDEF",
+ allow_leading_underscore=True))
octNumber = chain(states,
newArcPair(states, "0"),
groupStr(states, "oO"),
- groupStr(states, "01234567"),
- any(states, groupStr(states, "01234567")))
+ makeDigitsChain("01234567",
+ allow_leading_underscore=True))
binNumber = chain(states,
newArcPair(states, "0"),
groupStr(states, "bB"),
- atleastonce(states, groupStr(states, "01")))
- decNumber = chain(states,
- groupStr(states, "123456789"),
- any(states, makeDigits()))
- zero = chain(states,
- newArcPair(states, "0"),
- any(states, newArcPair(states, "0")))
+ makeDigitsChain("01",
+ allow_leading_underscore=True))
+ decNumber = makeDigitsChain(first="123456789")
+ zero = makeDigitsChain("0")
intNumber = group(states, hexNumber, octNumber, binNumber, decNumber, zero)
# ____________________________________________________________
# Exponents
@@ -89,29 +104,34 @@
return chain(states,
groupStr(states, "eE"),
maybe(states, groupStr(states, "+-")),
- atleastonce(states, makeDigits()))
+ makeDigitsChain())
+
# ____________________________________________________________
# Floating point numbers
+ def makePointFloat ():
+ return group(states,
+ chain(states,
+ makeDigitsChain(),
+ newArcPair(states, "."),
+ any(states, makeDigits())),
+ chain(states,
+ newArcPair(states, "."),
+ makeDigitsChain()))
def makeFloat ():
- pointFloat = chain(states,
- group(states,
- chain(states,
- atleastonce(states, makeDigits()),
- newArcPair(states, "."),
- any(states, makeDigits())),
- chain(states,
- newArcPair(states, "."),
- atleastonce(states, makeDigits()))),
- maybe(states, makeExp()))
+ pointFloat = group(states,
+ makePointFloat(),
+ chain(states,
+ makePointFloat(),
+ makeExp()))
expFloat = chain(states,
- atleastonce(states, makeDigits()),
+ makeDigitsChain(),
makeExp())
return group(states, pointFloat, expFloat)
# ____________________________________________________________
# Imaginary numbers
imagNumber = group(states,
chain(states,
- atleastonce(states, makeDigits()),
+ makeDigitsChain(),
groupStr(states, "jJ")),
chain(states,
makeFloat(),
diff --git a/pypy/interpreter/pyparser/test/test_pyparse.py
b/pypy/interpreter/pyparser/test/test_pyparse.py
--- a/pypy/interpreter/pyparser/test/test_pyparse.py
+++ b/pypy/interpreter/pyparser/test/test_pyparse.py
@@ -191,8 +191,90 @@
async with a:
pass""")
py.test.raises(SyntaxError, self.parse, 'def foo(): async with a:
pass')
-
-
+
+ def test_number_underscores(self):
+ VALID_UNDERSCORE_LITERALS = [
+ '0_0_0',
+ '4_2',
+ '1_0000_0000',
+ '0b1001_0100',
+ '0xffff_ffff',
+ '0o5_7_7',
+ '1_00_00.5',
+ '1_00_00.5e5',
+ '1_00_00e5_1',
+ '1e1_0',
+ '.1_4',
+ '.1_4e1',
+ '0b_0',
+ '0x_f',
+ '0o_5',
+ '1_00_00j',
+ '1_00_00.5j',
+ '1_00_00e5_1j',
+ '.1_4j',
+ '(1_2.5+3_3j)',
+ '(.5_6j)',
+ ]
+ INVALID_UNDERSCORE_LITERALS = [
+ # Trailing underscores:
+ '0_',
+ '42_',
+ '1.4j_',
+ '0x_',
+ '0b1_',
+ '0xf_',
+ '0o5_',
+ '0 if 1_Else 1',
+ # Underscores in the base selector:
+ '0_b0',
+ '0_xf',
+ '0_o5',
+ # Old-style octal, still disallowed:
+ '0_7',
+ '09_99',
+ # Multiple consecutive underscores:
+ '4_______2',
+ '0.1__4',
+ '0.1__4j',
+ '0b1001__0100',
+ '0xffff__ffff',
+ '0x___',
+ '0o5__77',
+ '1e1__0',
+ '1e1__0j',
+ # Underscore right before a dot:
+ '1_.4',
+ '1_.4j',
+ # Underscore right after a dot:
+ '1._4',
+ '1._4j',
+ '._5',
+ '._5j',
+ # Underscore right after a sign:
+ '1.0e+_1',
+ '1.0e+_1j',
+ # Underscore right before j:
+ '1.4_j',
+ '1.4e5_j',
+ # Underscore right before e:
+ '1_e1',
+ '1.4_e1',
+ '1.4_e1j',
+ # Underscore right after e:
+ '1e_1',
+ '1.4e_1',
+ '1.4e_1j',
+ # Complex cases with parens:
+ '(1+1.5_j_)',
+ '(1+1.5_j)',
+ ]
+ for x in VALID_UNDERSCORE_LITERALS:
+ tree = self.parse(x)
+ for x in INVALID_UNDERSCORE_LITERALS:
+ print x
+ raises(SyntaxError, self.parse, "x = %s" % x)
+
class TestPythonParserWithSpace:
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit