Hello community, here is the log from the commit of package python-parso for openSUSE:Factory checked in at 2019-07-21 11:28:55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-parso (Old) and /work/SRC/openSUSE:Factory/.python-parso.new.4126 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-parso" Sun Jul 21 11:28:55 2019 rev:9 rq:715639 version:0.5.1 Changes: -------- --- /work/SRC/openSUSE:Factory/python-parso/python-parso.changes 2019-07-08 14:59:35.086403902 +0200 +++ /work/SRC/openSUSE:Factory/.python-parso.new.4126/python-parso.changes 2019-07-21 11:28:56.444832074 +0200 @@ -1,0 +2,7 @@ +Tue Jul 16 10:37:44 UTC 2019 - Ondřej Súkup <[email protected]> + +- update to 0.5.1 + * Fix: Some unicode identifiers were not correctly tokenized + * Fix: Line continuations in f-strings are now working + +------------------------------------------------------------------- Old: ---- parso-0.5.0.tar.gz New: ---- parso-0.5.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-parso.spec ++++++ --- /var/tmp/diff_new_pack.jlB5Lu/_old 2019-07-21 11:28:57.136831956 +0200 +++ /var/tmp/diff_new_pack.jlB5Lu/_new 2019-07-21 11:28:57.140831955 +0200 @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-parso -Version: 0.5.0 +Version: 0.5.1 Release: 0 Summary: An autocompletion tool for Python License: MIT AND Python-2.0 ++++++ parso-0.5.0.tar.gz -> parso-0.5.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parso-0.5.0/CHANGELOG.rst new/parso-0.5.1/CHANGELOG.rst --- old/parso-0.5.0/CHANGELOG.rst 2019-06-20 21:27:50.000000000 +0200 +++ new/parso-0.5.1/CHANGELOG.rst 2019-07-13 15:47:10.000000000 +0200 @@ -3,6 +3,12 @@ Changelog --------- +0.5.1 (2019-07-13) +++++++++++++++++++ + +- Fix: Some unicode identifiers were not correctly tokenized +- Fix: Line continuations in f-strings are now working + 0.5.0 (2019-06-20) ++++++++++++++++++ @@ -17,19 +23,19 @@ - Python 3.8 support - FileIO support, it's now possible to use abstract file IO, support is alpha -0.3.4 (2018-02-13) +0.3.4 (2019-02-13) +++++++++++++++++++ - Fix an f-string tokenizer error -0.3.3 (2018-02-06) +0.3.3 (2019-02-06) +++++++++++++++++++ - Fix async errors in the diff parser - A fix in iter_errors - This is a very small bugfix release -0.3.2 (2018-01-24) +0.3.2 (2019-01-24) +++++++++++++++++++ - 20+ bugfixes in the diff parser and 3 in the tokenizer diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parso-0.5.0/PKG-INFO new/parso-0.5.1/PKG-INFO --- old/parso-0.5.0/PKG-INFO 2019-06-20 22:11:18.000000000 +0200 +++ new/parso-0.5.1/PKG-INFO 2019-07-13 15:50:20.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: parso -Version: 0.5.0 +Version: 0.5.1 Summary: A Python Parser Home-page: https://github.com/davidhalter/parso Author: David Halter @@ -106,6 +106,12 @@ Changelog --------- + 0.5.1 (2019-07-13) + ++++++++++++++++++ + + - Fix: Some unicode identifiers were not correctly tokenized + - Fix: Line continuations in f-strings are now working + 0.5.0 (2019-06-20) ++++++++++++++++++ @@ -120,19 +126,19 @@ - Python 3.8 support - FileIO support, it's now possible to use abstract file IO, support is alpha - 0.3.4 (2018-02-13) + 0.3.4 (2019-02-13) +++++++++++++++++++ - Fix an f-string tokenizer error - 0.3.3 (2018-02-06) + 0.3.3 (2019-02-06) +++++++++++++++++++ - Fix async errors in the diff parser - A fix in iter_errors - This is a very small bugfix release - 0.3.2 (2018-01-24) + 0.3.2 (2019-01-24) +++++++++++++++++++ - 20+ bugfixes in the diff parser and 3 in the tokenizer diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parso-0.5.0/parso/__init__.py new/parso-0.5.1/parso/__init__.py --- old/parso-0.5.0/parso/__init__.py 2019-06-20 21:27:50.000000000 +0200 +++ new/parso-0.5.1/parso/__init__.py 2019-07-13 15:47:10.000000000 +0200 @@ -43,7 +43,7 @@ from parso.utils import split_lines, python_bytes_to_unicode -__version__ = '0.5.0' +__version__ = '0.5.1' def parse(code=None, **kwargs): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parso-0.5.0/parso/grammar.py new/parso-0.5.1/parso/grammar.py --- old/parso-0.5.0/parso/grammar.py 2019-06-20 21:27:50.000000000 +0200 +++ new/parso-0.5.1/parso/grammar.py 2019-07-13 15:47:10.000000000 +0200 @@ -57,7 +57,8 @@ :param str path: The path to the file you want to open. Only needed for caching. :param bool cache: Keeps a copy of the parser tree in RAM and on disk if a path is given. Returns the cached trees if the corresponding - files on disk have not changed. + files on disk have not changed. Note that this stores pickle files + on your file system (e.g. for Linux in ``~/.cache/parso/``). :param bool diff_cache: Diffs the cached python module against the new code and tries to parse only the parts that have changed. Returns the same (changed) module that is found in cache. Using this option diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parso-0.5.0/parso/python/tokenize.py new/parso-0.5.1/parso/python/tokenize.py --- old/parso-0.5.0/parso/python/tokenize.py 2019-06-20 21:27:50.000000000 +0200 +++ new/parso-0.5.1/parso/python/tokenize.py 2019-07-13 15:47:10.000000000 +0200 @@ -23,6 +23,9 @@ from parso.utils import split_lines +# Maximum code point of Unicode 6.0: 0x10ffff (1,114,111) +MAX_UNICODE = '\U0010ffff' + STRING = PythonTokenTypes.STRING NAME = PythonTokenTypes.NAME NUMBER = PythonTokenTypes.NUMBER @@ -51,8 +54,13 @@ # Python 3 has str.isidentifier() to check if a char is a valid identifier is_identifier = str.isidentifier else: - namechars = string.ascii_letters + '_' - is_identifier = lambda s: s in namechars + # Python 2 doesn't, but it's not that important anymore and if you tokenize + # Python 2 code with this, it's still ok. It's just that parsing Python 3 + # code with this function is not 100% correct. + # This just means that Python 2 code matches a few identifiers too much, + # but that doesn't really matter. + def is_identifier(s): + return True def group(*choices, **kwargs): @@ -118,9 +126,9 @@ return result -fstring_string_single_line = _compile(r'(?:[^{}\r\n]+|\{\{|\}\})+') +fstring_string_single_line = _compile(r'(?:\{\{|\}\}|\\(?:\r\n?|\n)|[^{}\r\n])+') fstring_string_multi_line = _compile(r'(?:[^{}]+|\{\{|\}\})+') -fstring_format_spec_single_line = _compile(r'[^{}\r\n]+') +fstring_format_spec_single_line = _compile(r'(?:\\(?:\r\n?|\n)|[^{}\r\n])+') fstring_format_spec_multi_line = _compile(r'[^{}]+') @@ -130,7 +138,16 @@ Whitespace = r'[ \f\t]*' whitespace = _compile(Whitespace) Comment = r'#[^\r\n]*' - Name = r'\w+' + # Python 2 is pretty much not working properly anymore, we just ignore + # parsing unicode properly, which is fine, I guess. + if version_info[0] == 2: + Name = r'([A-Za-z_0-9]+)' + elif sys.version_info[0] == 2: + # Unfortunately the regex engine cannot deal with the regex below, so + # just use this one. + Name = r'(\w+)' + else: + Name = u'([A-Za-z_0-9\u0080-' + MAX_UNICODE + ']+)' if version_info >= (3, 6): Hexnumber = r'0[xX](?:_?[0-9a-fA-F])+' @@ -340,7 +357,9 @@ new_pos = pos new_pos += len(string) - if allow_multiline and (string.endswith('\n') or string.endswith('\r')): + # even if allow_multiline is False, we still need to check for trailing + # newlines, because a single-line f-string can contain line continuations + if string.endswith('\n') or string.endswith('\r'): tos.previous_lines += string string = '' else: @@ -510,6 +529,24 @@ if (initial in numchars or # ordinary number (initial == '.' and token != '.' and token != '...')): yield PythonToken(NUMBER, token, spos, prefix) + elif pseudomatch.group(3) is not None: # ordinary name + if token in always_break_tokens: + fstring_stack[:] = [] + paren_level = 0 + # We only want to dedent if the token is on a new line. + if re.match(r'[ \f\t]*$', line[:start]): + while True: + indent = indents.pop() + if indent > start: + yield PythonToken(DEDENT, '', spos, '') + else: + indents.append(indent) + break + if is_identifier(token): + yield PythonToken(NAME, token, spos, prefix) + else: + for t in _split_illegal_unicode_name(token, spos, prefix): + yield t # yield from Python 2 elif initial in '\r\n': if any(not f.allow_multiline() for f in fstring_stack): # Would use fstring_stack.clear, but that's not available @@ -564,20 +601,6 @@ elif token in fstring_pattern_map: # The start of an fstring. fstring_stack.append(FStringNode(fstring_pattern_map[token])) yield PythonToken(FSTRING_START, token, spos, prefix) - elif is_identifier(initial): # ordinary name - if token in always_break_tokens: - fstring_stack[:] = [] - paren_level = 0 - # We only want to dedent if the token is on a new line. - if re.match(r'[ \f\t]*$', line[:start]): - while True: - indent = indents.pop() - if indent > start: - yield PythonToken(DEDENT, '', spos, '') - else: - indents.append(indent) - break - yield PythonToken(NAME, token, spos, prefix) elif initial == '\\' and line[start:] in ('\\\n', '\\\r\n', '\\\r'): # continued stmt additional_prefix += prefix + line[start:] break @@ -613,6 +636,39 @@ yield PythonToken(ENDMARKER, '', end_pos, additional_prefix) +def _split_illegal_unicode_name(token, start_pos, prefix): + def create_token(): + return PythonToken(ERRORTOKEN if is_illegal else NAME, found, pos, prefix) + + found = '' + is_illegal = False + pos = start_pos + for i, char in enumerate(token): + if is_illegal: + if is_identifier(char): + yield create_token() + found = char + is_illegal = False + prefix = '' + pos = start_pos[0], start_pos[1] + i + else: + found += char + else: + new_found = found + char + if is_identifier(new_found): + found = new_found + else: + if found: + yield create_token() + prefix = '' + pos = start_pos[0], start_pos[1] + i + found = char + is_illegal = True + + if found: + yield create_token() + + if __name__ == "__main__": if len(sys.argv) >= 2: path = sys.argv[1] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parso-0.5.0/parso/python/tree.py new/parso-0.5.1/parso/python/tree.py --- old/parso-0.5.0/parso/python/tree.py 2019-06-20 21:27:50.000000000 +0200 +++ new/parso-0.5.1/parso/python/tree.py 2019-07-13 15:47:10.000000000 +0200 @@ -43,7 +43,10 @@ """ import re -from collections import Mapping +try: + from collections.abc import Mapping +except ImportError: + from collections import Mapping from parso._compatibility import utf8_repr, unicode from parso.tree import Node, BaseNode, Leaf, ErrorNode, ErrorLeaf, \ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parso-0.5.0/parso.egg-info/PKG-INFO new/parso-0.5.1/parso.egg-info/PKG-INFO --- old/parso-0.5.0/parso.egg-info/PKG-INFO 2019-06-20 22:11:18.000000000 +0200 +++ new/parso-0.5.1/parso.egg-info/PKG-INFO 2019-07-13 15:50:20.000000000 +0200 @@ -1,6 +1,6 @@ Metadata-Version: 2.1 Name: parso -Version: 0.5.0 +Version: 0.5.1 Summary: A Python Parser Home-page: https://github.com/davidhalter/parso Author: David Halter @@ -106,6 +106,12 @@ Changelog --------- + 0.5.1 (2019-07-13) + ++++++++++++++++++ + + - Fix: Some unicode identifiers were not correctly tokenized + - Fix: Line continuations in f-strings are now working + 0.5.0 (2019-06-20) ++++++++++++++++++ @@ -120,19 +126,19 @@ - Python 3.8 support - FileIO support, it's now possible to use abstract file IO, support is alpha - 0.3.4 (2018-02-13) + 0.3.4 (2019-02-13) +++++++++++++++++++ - Fix an f-string tokenizer error - 0.3.3 (2018-02-06) + 0.3.3 (2019-02-06) +++++++++++++++++++ - Fix async errors in the diff parser - A fix in iter_errors - This is a very small bugfix release - 0.3.2 (2018-01-24) + 0.3.2 (2019-01-24) +++++++++++++++++++ - 20+ bugfixes in the diff parser and 3 in the tokenizer diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parso-0.5.0/test/test_diff_parser.py new/parso-0.5.1/test/test_diff_parser.py --- old/parso-0.5.0/test/test_diff_parser.py 2019-06-20 21:27:50.000000000 +0200 +++ new/parso-0.5.1/test/test_diff_parser.py 2019-07-13 15:47:10.000000000 +0200 @@ -974,10 +974,12 @@ Those issues were all found with the fuzzer. """ differ.initialize('') - differ.parse(u'\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, expect_error_leaves=True) + differ.parse(u'\x1dĔBϞɛˁşʑ˳˻ȣſéÎ\x90̕ȟòwʘ\x1dĔBϞɛˁşʑ˳˻ȣſéÎ', parsers=1, + expect_error_leaves=True) differ.parse(u'\r\r', parsers=1) differ.parse(u"˟Ę\x05À\r rúƣ@\x8a\x15r()\n", parsers=1, expect_error_leaves=True) - differ.parse(u'a\ntaǁ\rGĒōns__\n\nb', parsers=1) + differ.parse(u'a\ntaǁ\rGĒōns__\n\nb', parsers=1, + expect_error_leaves=sys.version_info[0] == 2) s = ' if not (self, "_fi\x02\x0e\x08\n\nle"):' differ.parse(s, parsers=1, expect_error_leaves=True) differ.parse('') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parso-0.5.0/test/test_fstring.py new/parso-0.5.1/test/test_fstring.py --- old/parso-0.5.0/test/test_fstring.py 2019-06-20 21:27:50.000000000 +0200 +++ new/parso-0.5.1/test/test_fstring.py 2019-07-13 15:47:10.000000000 +0200 @@ -12,33 +12,57 @@ @pytest.mark.parametrize( 'code', [ - '{1}', - '{1:}', - '', - '{1!a}', - '{1!a:1}', - '{1:1}', - '{1:1.{32}}', - '{1::>4}', - '{foo} {bar}', - '{x:{y}}', - '{x:{y:}}', - '{x:{y:1}}', + # simple cases + 'f"{1}"', + 'f"""{1}"""', + 'f"{foo} {bar}"', + + # empty string + 'f""', + 'f""""""', + + # empty format specifier is okay + 'f"{1:}"', + + # use of conversion options + 'f"{1!a}"', + 'f"{1!a:1}"', + + # format specifiers + 'f"{1:1}"', + 'f"{1:1.{32}}"', + 'f"{1::>4}"', + 'f"{x:{y}}"', + 'f"{x:{y:}}"', + 'f"{x:{y:1}}"', # Escapes - '{{}}', - '{{{1}}}', - '{{{1}', - '1{{2{{3', - '}}', + 'f"{{}}"', + 'f"{{{1}}}"', + 'f"{{{1}"', + 'f"1{{2{{3"', + 'f"}}"', # New Python 3.8 syntax f'{a=}' - '{a=}', - '{a()=}', + 'f"{a=}"', + 'f"{a()=}"', + + # multiline f-string + 'f"""abc\ndef"""', + 'f"""abc{\n123}def"""', + + # a line continuation inside of an fstring_string + 'f"abc\\\ndef"', + 'f"\\\n{123}\\\n"', + + # a line continuation inside of an fstring_expr + 'f"{\\\n123}"', + + # a line continuation inside of an format spec + 'f"{123:.2\\\nf}"', ] ) def test_valid(code, grammar): - code = 'f"""%s"""' % code module = grammar.parse(code, error_recovery=False) fstring = module.children[0] assert fstring.type == 'fstring' @@ -47,23 +71,34 @@ @pytest.mark.parametrize( 'code', [ - '}', - '{', - '{1!{a}}', - '{!{a}}', - '{}', - '{:}', - '{:}}}', - '{:1}', - '{!:}', - '{!}', - '{!a}', - '{1:{}}', - '{1:{:}}', + # an f-string can't contain unmatched curly braces + 'f"}"', + 'f"{"', + 'f"""}"""', + 'f"""{"""', + + # invalid conversion characters + 'f"{1!{a}}"', + 'f"{!{a}}"', + + # The curly braces must contain an expression + 'f"{}"', + 'f"{:}"', + 'f"{:}}}"', + 'f"{:1}"', + 'f"{!:}"', + 'f"{!}"', + 'f"{!a}"', + + # invalid (empty) format specifiers + 'f"{1:{}}"', + 'f"{1:{:}}"', + + # a newline without a line continuation inside a single-line string + 'f"abc\ndef"', ] ) def test_invalid(code, grammar): - code = 'f"""%s"""' % code with pytest.raises(ParserSyntaxError): grammar.parse(code, error_recovery=False) @@ -95,6 +130,7 @@ """), 'f"foo', 'f"""foo', + 'f"abc\ndef"', ] ) def test_roundtrip(grammar, code): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parso-0.5.0/test/test_tokenize.py new/parso-0.5.1/test/test_tokenize.py --- old/parso-0.5.0/test/test_tokenize.py 2019-06-20 21:27:50.000000000 +0200 +++ new/parso-0.5.1/test/test_tokenize.py 2019-07-13 15:47:10.000000000 +0200 @@ -1,5 +1,6 @@ # -*- coding: utf-8 # This file contains Unicode characters. +import sys from textwrap import dedent import pytest @@ -16,6 +17,7 @@ NAME = PythonTokenTypes.NAME NEWLINE = PythonTokenTypes.NEWLINE STRING = PythonTokenTypes.STRING +NUMBER = PythonTokenTypes.NUMBER INDENT = PythonTokenTypes.INDENT DEDENT = PythonTokenTypes.DEDENT ERRORTOKEN = PythonTokenTypes.ERRORTOKEN @@ -140,7 +142,7 @@ else: # Unicode tokens in Python 2 seem to be identified as operators. # They will be ignored in the parser, that's ok. - assert unicode_token[0] == OP + assert unicode_token[0] == ERRORTOKEN def test_quoted_strings(): @@ -228,16 +230,29 @@ check('a\\') +xfail_py2 = dict(marks=[pytest.mark.xfail(sys.version_info[0] == 2, reason='Python 2')]) + + @pytest.mark.parametrize( ('code', 'types'), [ + # Indentation (' foo', [INDENT, NAME, DEDENT]), (' foo\n bar', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, DEDENT]), (' foo\n bar \n baz', [INDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, NEWLINE, ERROR_DEDENT, NAME, DEDENT]), (' foo\nbar', [INDENT, NAME, NEWLINE, DEDENT, NAME]), + + # Name stuff + ('1foo1', [NUMBER, NAME]), + pytest.param( + u'மெல்லினம்', [NAME], + **xfail_py2), + pytest.param(u'²', [ERRORTOKEN], **xfail_py2), + pytest.param(u'ä²ö', [NAME, ERRORTOKEN, NAME], **xfail_py2), + pytest.param(u'ää²¹öö', [NAME, ERRORTOKEN, NAME], **xfail_py2), ] ) -def test_indentation(code, types): +def test_token_types(code, types): actual_types = [t.type for t in _get_token_list(code)] assert actual_types == types + [ENDMARKER] @@ -330,13 +345,46 @@ ('f" "{}', [FSTRING_START, FSTRING_STRING, FSTRING_END, OP, OP]), (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), (r'f"\""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), + + # format spec (r'f"Some {x:.2f}{y}"', [FSTRING_START, FSTRING_STRING, OP, NAME, OP, FSTRING_STRING, OP, OP, NAME, OP, FSTRING_END]), + + # multiline f-string + ('f"""abc\ndef"""', [FSTRING_START, FSTRING_STRING, FSTRING_END]), + ('f"""abc{\n123}def"""', [ + FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING, + FSTRING_END + ]), + + # a line continuation inside of an fstring_string + ('f"abc\\\ndef"', [ + FSTRING_START, FSTRING_STRING, FSTRING_END + ]), + ('f"\\\n{123}\\\n"', [ + FSTRING_START, FSTRING_STRING, OP, NUMBER, OP, FSTRING_STRING, + FSTRING_END + ]), + + # a line continuation inside of an fstring_expr + ('f"{\\\n123}"', [FSTRING_START, OP, NUMBER, OP, FSTRING_END]), + + # a line continuation inside of an format spec + ('f"{123:.2\\\nf}"', [ + FSTRING_START, OP, NUMBER, OP, FSTRING_STRING, OP, FSTRING_END + ]), + + # a newline without a line continuation inside a single-line string is + # wrong, and will generate an ERRORTOKEN + ('f"abc\ndef"', [ + FSTRING_START, FSTRING_STRING, NEWLINE, NAME, ERRORTOKEN + ]), + + # a more complex example (r'print(f"Some {x:.2f}a{y}")', [ NAME, OP, FSTRING_START, FSTRING_STRING, OP, NAME, OP, FSTRING_STRING, OP, FSTRING_STRING, OP, NAME, OP, FSTRING_END, OP ]), - ] ) def test_fstring(code, types, version_ge_py36): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/parso-0.5.0/tox.ini new/parso-0.5.1/tox.ini --- old/parso-0.5.0/tox.ini 2019-06-20 21:27:50.000000000 +0200 +++ new/parso-0.5.1/tox.ini 2019-07-13 15:47:10.000000000 +0200 @@ -4,6 +4,7 @@ extras = testing deps = py26,py33: pytest>=3.0.7,<3.3 + py27,py34: pytest<5 py26,py33: setuptools<37 coverage: coverage setenv =
