New submission from Ron Adam:

Replaced Unicode literals in and it's tests files with byte

Added a compile step to the test to make sure the text file used in the
test are valid python code.  This will catch changes that need to be
done in to the text (gold file) for future python versions.

components: Library (Lib)
files: tokenize_patch.diff
messages: 57366
nosy: ron_adam
severity: normal
status: open
title: Unicode literals in and tests.
versions: Python 3.0
Added file:

Index: Lib/
--- Lib/	(revision 58930)
+++ Lib/	(working copy)
@@ -69,10 +69,10 @@
 Single3 = r"[^'\\]*(?:(?:\\.|'(?!''))[^'\\]*)*'''"
 # Tail end of """ string.
 Double3 = r'[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""'
-Triple = group("[uU]?[rR]?'''", '[uU]?[rR]?"""')
+Triple = group("[bB]?[rR]?'''", '[bB]?[rR]?"""')
 # Single-line ' or " string.
-String = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
-               r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
+String = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*'",
+               r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*"')
 # Because of leftmost-then-longest match semantics, be sure to put the
 # longest operators first (e.g., if = came before ==, == would get
@@ -90,9 +90,9 @@
 Token = Ignore + PlainToken
 # First (or only) line of ' or " string.
-ContStr = group(r"[uU]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
+ContStr = group(r"[bB]?[rR]?'[^\n'\\]*(?:\\.[^\n'\\]*)*" +
                 group("'", r'\\\r?\n'),
-                r'[uU]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
+                r'[bB]?[rR]?"[^\n"\\]*(?:\\.[^\n"\\]*)*' +
                 group('"', r'\\\r?\n'))
 PseudoExtras = group(r'\\\r?\n', Comment, Triple)
 PseudoToken = Whitespace + group(PseudoExtras, Number, Funny, ContStr, Name)
@@ -102,28 +102,28 @@
 endprogs = {"'": re.compile(Single), '"': re.compile(Double),
             "'''": single3prog, '"""': double3prog,
             "r'''": single3prog, 'r"""': double3prog,
-            "u'''": single3prog, 'u"""': double3prog,
-            "ur'''": single3prog, 'ur"""': double3prog,
+            "b'''": single3prog, 'b"""': double3prog,
+            "br'''": single3prog, 'br"""': double3prog,
             "R'''": single3prog, 'R"""': double3prog,
-            "U'''": single3prog, 'U"""': double3prog,
-            "uR'''": single3prog, 'uR"""': double3prog,
-            "Ur'''": single3prog, 'Ur"""': double3prog,
-            "UR'''": single3prog, 'UR"""': double3prog,
-            'r': None, 'R': None, 'u': None, 'U': None}
+            "B'''": single3prog, 'B"""': double3prog,
+            "bR'''": single3prog, 'bR"""': double3prog,
+            "Br'''": single3prog, 'Br"""': double3prog,
+            "BR'''": single3prog, 'BR"""': double3prog,
+            'r': None, 'R': None, 'b': None, 'B': None}
 triple_quoted = {}
 for t in ("'''", '"""',
           "r'''", 'r"""', "R'''", 'R"""',
-          "u'''", 'u"""', "U'''", 'U"""',
-          "ur'''", 'ur"""', "Ur'''", 'Ur"""',
-          "uR'''", 'uR"""', "UR'''", 'UR"""'):
+          "b'''", 'b"""', "B'''", 'B"""',
+          "br'''", 'br"""', "Br'''", 'Br"""',
+          "bR'''", 'bR"""', "BR'''", 'BR"""'):
     triple_quoted[t] = t
 single_quoted = {}
 for t in ("'", '"',
           "r'", 'r"', "R'", 'R"',
-          "u'", 'u"', "U'", 'U"',
-          "ur'", 'ur"', "Ur'", 'Ur"',
-          "uR'", 'uR"', "UR'", 'UR"' ):
+          "b'", 'b"', "B'", 'B"',
+          "br'", 'br"', "Br'", 'Br"',
+          "bR'", 'bR"', "BR'", 'BR"' ):
     single_quoted[t] = t
 tabsize = 8
Index: Lib/test/
--- Lib/test/	(revision 58930)
+++ Lib/test/	(working copy)
@@ -183,17 +183,26 @@
     next_time = time.time() + _PRINT_WORKING_MSG_INTERVAL
+    # Validate the tokenize_tests.txt file.
+    # This makes sure it's compiles and displays any errors in it.
+    f = open(findfile('tokenize_tests.txt'))
+    sf =
+    f.close()
+    cf = compile(sf, 'tokenize_tests.txt', 'exec')
     # This displays the tokenization of to stdout, and
     # checks that this equals the expected output (in the
     # test/output/ directory).
     f = open(findfile('tokenize_tests.txt'))
-    # Now run test_roundtrip() over too, and over all
+    # Now run test_roundtrip() over too, and over all
     # (if the "compiler" resource is enabled) or a small random sample (if
     # "compiler" is not enabled) of the test*.py files.
-    f = findfile('tokenize_tests.txt')
+    f = findfile('')
+    if verbose:
+        print('    round trip: ', f, file=sys.__stdout__)
     testdir = os.path.dirname(f) or os.curdir
Index: Lib/test/tokenize_tests.txt
--- Lib/test/tokenize_tests.txt	(revision 58930)
+++ Lib/test/tokenize_tests.txt	(working copy)
@@ -110,19 +110,19 @@
 bar \\ baz
 """ + R'''spam
-x = u'abc' + U'ABC'
-y = u"abc" + U"ABC"
-x = ur'abc' + Ur'ABC' + uR'ABC' + UR'ABC'
-y = ur"abc" + Ur"ABC" + uR"ABC" + UR"ABC"
-x = ur'\\' + UR'\\'
-x = ur'\'' + ''
-y = ur'''
+x = b'abc' + B'ABC'
+y = b"abc" + B"ABC"
+x = br'abc' + Br'ABC' + bR'ABC' + BR'ABC'
+y = br"abc" + Br"ABC" + bR"ABC" + BR"ABC"
+x = br'\\' + BR'\\'
+x = br'\'' + ''
+y = br'''
 foo bar \\
-baz''' + UR'''
+baz''' + BR'''
-y = Ur"""foo
+y = Br"""foo
 bar \\ baz
-""" + uR'''spam
+""" + bR'''spam
 # Indentation
Index: Lib/test/output/test_tokenize
--- Lib/test/output/test_tokenize	(revision 58930)
+++ Lib/test/output/test_tokenize	(working copy)
@@ -342,59 +342,59 @@
 112,3-112,4:	NEWLINE	'\n'
 113,0-113,1:	NAME	'x'
 113,2-113,3:	OP	'='
-113,4-113,10:	STRING	"u'abc'"
+113,4-113,10:	STRING	"b'abc'"
 113,11-113,12:	OP	'+'
-113,13-113,19:	STRING	"U'ABC'"
+113,13-113,19:	STRING	"B'ABC'"
 113,19-113,20:	NEWLINE	'\n'
 114,0-114,1:	NAME	'y'
 114,2-114,3:	OP	'='
-114,4-114,10:	STRING	'u"abc"'
+114,4-114,10:	STRING	'b"abc"'
 114,11-114,12:	OP	'+'
-114,13-114,19:	STRING	'U"ABC"'
+114,13-114,19:	STRING	'B"ABC"'
 114,19-114,20:	NEWLINE	'\n'
 115,0-115,1:	NAME	'x'
 115,2-115,3:	OP	'='
-115,4-115,11:	STRING	"ur'abc'"
+115,4-115,11:	STRING	"br'abc'"
 115,12-115,13:	OP	'+'
-115,14-115,21:	STRING	"Ur'ABC'"
+115,14-115,21:	STRING	"Br'ABC'"
 115,22-115,23:	OP	'+'
-115,24-115,31:	STRING	"uR'ABC'"
+115,24-115,31:	STRING	"bR'ABC'"
 115,32-115,33:	OP	'+'
-115,34-115,41:	STRING	"UR'ABC'"
+115,34-115,41:	STRING	"BR'ABC'"
 115,41-115,42:	NEWLINE	'\n'
 116,0-116,1:	NAME	'y'
 116,2-116,3:	OP	'='
-116,4-116,11:	STRING	'ur"abc"'
+116,4-116,11:	STRING	'br"abc"'
 116,12-116,13:	OP	'+'
-116,14-116,21:	STRING	'Ur"ABC"'
+116,14-116,21:	STRING	'Br"ABC"'
 116,22-116,23:	OP	'+'
-116,24-116,31:	STRING	'uR"ABC"'
+116,24-116,31:	STRING	'bR"ABC"'
 116,32-116,33:	OP	'+'
-116,34-116,41:	STRING	'UR"ABC"'
+116,34-116,41:	STRING	'BR"ABC"'
 116,41-116,42:	NEWLINE	'\n'
 117,0-117,1:	NAME	'x'
 117,2-117,3:	OP	'='
-117,4-117,10:	STRING	"ur'\\\\'"
+117,4-117,10:	STRING	"br'\\\\'"
 117,11-117,12:	OP	'+'
-117,13-117,19:	STRING	"UR'\\\\'"
+117,13-117,19:	STRING	"BR'\\\\'"
 117,19-117,20:	NEWLINE	'\n'
 118,0-118,1:	NAME	'x'
 118,2-118,3:	OP	'='
-118,4-118,10:	STRING	"ur'\\''"
+118,4-118,10:	STRING	"br'\\''"
 118,11-118,12:	OP	'+'
 118,13-118,15:	STRING	"''"
 118,15-118,16:	NEWLINE	'\n'
 119,0-119,1:	NAME	'y'
 119,2-119,3:	OP	'='
-119,4-121,6:	STRING	"ur'''\nfoo bar \\\\\nbaz'''"
+119,4-121,6:	STRING	"br'''\nfoo bar \\\\\nbaz'''"
 121,7-121,8:	OP	'+'
-121,9-122,6:	STRING	"UR'''\nfoo'''"
+121,9-122,6:	STRING	"BR'''\nfoo'''"
 122,6-122,7:	NEWLINE	'\n'
 123,0-123,1:	NAME	'y'
 123,2-123,3:	OP	'='
-123,4-125,3:	STRING	'Ur"""foo\nbar \\\\ baz\n"""'
+123,4-125,3:	STRING	'Br"""foo\nbar \\\\ baz\n"""'
 125,4-125,5:	OP	'+'
-125,6-126,3:	STRING	"uR'''spam\n'''"
+125,6-126,3:	STRING	"bR'''spam\n'''"
 126,3-126,4:	NEWLINE	'\n'
 127,0-127,1:	NL	'\n'
 128,0-128,13:	COMMENT	'# Indentation'
Python-bugs-list mailing list 

Reply via email to