commit: 3e82d2e09f124f3b77d1b913c945b93341ee4053 Author: Zac Medico <zmedico <AT> gentoo <DOT> org> AuthorDate: Wed Feb 22 00:17:37 2017 +0000 Commit: Zac Medico <zmedico <AT> gentoo <DOT> org> CommitDate: Wed Feb 22 08:08:24 2017 +0000 URL: https://gitweb.gentoo.org/proj/portage.git/commit/?id=3e82d2e0
repoman: use regular expression to detect line continuations (bug 610414) Use a regular expression to detect line continuations, instead of the unicode_escape codec, since the unicode_escape codec is not really intended to be used this way. This solves an issue with python3.6, where a DeprecationWarning is triggered by ebuilds containing escape sequences, like this warning triggered by a sed expression in the dev-db/sqlite ebuilds: DeprecationWarning: invalid escape sequence '\[' X-Gentoo-Bug: 610414 X-Gentoo-Bug-Url: https://bugs.gentoo.org/show_bug.cgi?id=610414 Acked-by: Brian Dolbec <dolsen <AT> gentoo.org> repoman/pym/repoman/modules/scan/ebuild/checks.py | 28 +++++++---------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/repoman/pym/repoman/modules/scan/ebuild/checks.py b/repoman/pym/repoman/modules/scan/ebuild/checks.py index 15e225156..d21bf0cb2 100644 --- a/repoman/pym/repoman/modules/scan/ebuild/checks.py +++ b/repoman/pym/repoman/modules/scan/ebuild/checks.py @@ -8,8 +8,8 @@ and correctness of an ebuild.""" from __future__ import unicode_literals -import codecs from itertools import chain +import operator import re import time @@ -923,11 +923,10 @@ def checks_init(experimental_inherit=False): _here_doc_re = re.compile(r'.*<<[-]?(\w+)\s*(>\s*\S+\s*)?$') _ignore_comment_re = re.compile(r'^\s*#') +_continuation_re = re.compile(r'(\\)*$') def run_checks(contents, pkg): - unicode_escape_codec = codecs.lookup('unicode_escape') - unicode_escape = lambda x: unicode_escape_codec.decode(x)[0] if _constant_checks is None: checks_init() checks = _constant_checks @@ -957,32 +956,21 @@ def run_checks(contents, pkg): # cow # This will merge these lines like so: # inherit foo bar moo cow - try: - # A normal line will end in the two bytes: <\> <\n>. So decoding - # that will result in python thinking the <\n> is being escaped - # and eat the single <\> which makes it hard for us to detect. - # Instead, strip the newline (which we know all lines have), and - # append a <0>. Then when python escapes it, if the line ended - # in a <\>, we'll end up with a <\0> marker to key off of. This - # shouldn't be a problem with any valid ebuild ... - line_escaped = unicode_escape(line.rstrip('\n') + '0') - except SystemExit: - raise - except: - # Who knows what kind of crazy crap an ebuild will have - # in it -- don't allow it to kill us. - line_escaped = line + # A line ending with an even number of backslashes does not count, + # because the last backslash is escaped. Therefore, search for an + # odd number of backslashes. + line_escaped = operator.sub(*_continuation_re.search(line).span()) % 2 == 1 if multiline: # Chop off the \ and \n bytes from the previous line. multiline = multiline[:-2] + line - if not line_escaped.endswith('\0'): + if not line_escaped: line = multiline num = multinum multiline = None else: continue else: - if line_escaped.endswith('\0'): + if line_escaped: multinum = num multiline = line continue