Re: [gentoo-portage-dev] [PATCH] repoman: use regular expression to detect line continuations
On 02/21/2017 07:29 PM, Brian Dolbec wrote: > Code seems fine to me, I trust you ;) Thanks, merged: https://gitweb.gentoo.org/proj/portage.git/commit/?id=3e82d2e09f124f3b77d1b913c945b93341ee4053 -- Thanks, Zac
Re: [gentoo-portage-dev] [PATCH] repoman: use regular expression to detect line continuations
On Tue, 21 Feb 2017 16:31:56 -0800 Zac Medicowrote: > Use a regular expression to detect line continuations, instead > of the unicode_escape codec, since the unicode_escape codec is > not really intended to be used this way. > > This solves an issue with python3.6, where a DeprecationWarning > is triggered by ebuilds containing escape sequences, like this > warning triggered by a sed expression in the dev-db/sqlite > ebuilds: > > DeprecationWarning: invalid escape sequence '\[' > --- > repoman/pym/repoman/modules/scan/ebuild/checks.py | 28 > +++ 1 file changed, 8 insertions(+), 20 > deletions(-) > > diff --git a/repoman/pym/repoman/modules/scan/ebuild/checks.py > b/repoman/pym/repoman/modules/scan/ebuild/checks.py index > 15e2251..d21bf0c 100644 --- > a/repoman/pym/repoman/modules/scan/ebuild/checks.py +++ > b/repoman/pym/repoman/modules/scan/ebuild/checks.py @@ -8,8 +8,8 @@ > and correctness of an ebuild.""" > from __future__ import unicode_literals > > -import codecs > from itertools import chain > +import operator > import re > import time > > @@ -923,11 +923,10 @@ def checks_init(experimental_inherit=False): > > _here_doc_re = re.compile(r'.*<<[-]?(\w+)\s*(>\s*\S+\s*)?$') > _ignore_comment_re = re.compile(r'^\s*#') > +_continuation_re = re.compile(r'(\\)*$') > > > def run_checks(contents, pkg): > - unicode_escape_codec = codecs.lookup('unicode_escape') > - unicode_escape = lambda x: unicode_escape_codec.decode(x)[0] > if _constant_checks is None: > checks_init() > checks = _constant_checks > @@ -957,32 +956,21 @@ def run_checks(contents, pkg): > # cow > # This will merge these lines like so: > # inherit foo bar moo cow > - try: > - # A normal line will end in the two bytes: > <\> <\n>. So decoding > - # that will result in python thinking the > <\n> is being escaped > - # and eat the single <\> which makes it hard > for us to detect. > - # Instead, strip the newline (which we know > all lines have), and > - # append a <0>. Then when python escapes > it, if the line ended > - # in a <\>, we'll end up with a <\0> marker > to key off of. This > - # shouldn't be a problem with any valid > ebuild ... > - line_escaped = > unicode_escape(line.rstrip('\n') + '0') > - except SystemExit: > - raise > - except: > - # Who knows what kind of crazy crap an > ebuild will have > - # in it -- don't allow it to kill us. > - line_escaped = line > + # A line ending with an even number of backslashes > does not count, > + # because the last backslash is escaped. Therefore, > search for an > + # odd number of backslashes. > + line_escaped = > operator.sub(*_continuation_re.search(line).span()) % 2 == 1 if > multiline: # Chop off the \ and \n bytes from the previous line. > multiline = multiline[:-2] + line > - if not line_escaped.endswith('\0'): > + if not line_escaped: > line = multiline > num = multinum > multiline = None > else: > continue > else: > - if line_escaped.endswith('\0'): > + if line_escaped: > multinum = num > multiline = line > continue Code seems fine to me, I trust you ;) -- Brian Dolbec
[gentoo-portage-dev] [PATCH] repoman: use regular expression to detect line continuations
Use a regular expression to detect line continuations, instead of the unicode_escape codec, since the unicode_escape codec is not really intended to be used this way. This solves an issue with python3.6, where a DeprecationWarning is triggered by ebuilds containing escape sequences, like this warning triggered by a sed expression in the dev-db/sqlite ebuilds: DeprecationWarning: invalid escape sequence '\[' --- repoman/pym/repoman/modules/scan/ebuild/checks.py | 28 +++ 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/repoman/pym/repoman/modules/scan/ebuild/checks.py b/repoman/pym/repoman/modules/scan/ebuild/checks.py index 15e2251..d21bf0c 100644 --- a/repoman/pym/repoman/modules/scan/ebuild/checks.py +++ b/repoman/pym/repoman/modules/scan/ebuild/checks.py @@ -8,8 +8,8 @@ and correctness of an ebuild.""" from __future__ import unicode_literals -import codecs from itertools import chain +import operator import re import time @@ -923,11 +923,10 @@ def checks_init(experimental_inherit=False): _here_doc_re = re.compile(r'.*<<[-]?(\w+)\s*(>\s*\S+\s*)?$') _ignore_comment_re = re.compile(r'^\s*#') +_continuation_re = re.compile(r'(\\)*$') def run_checks(contents, pkg): - unicode_escape_codec = codecs.lookup('unicode_escape') - unicode_escape = lambda x: unicode_escape_codec.decode(x)[0] if _constant_checks is None: checks_init() checks = _constant_checks @@ -957,32 +956,21 @@ def run_checks(contents, pkg): # cow # This will merge these lines like so: # inherit foo bar moo cow - try: - # A normal line will end in the two bytes: <\> <\n>. So decoding - # that will result in python thinking the <\n> is being escaped - # and eat the single <\> which makes it hard for us to detect. - # Instead, strip the newline (which we know all lines have), and - # append a <0>. Then when python escapes it, if the line ended - # in a <\>, we'll end up with a <\0> marker to key off of. This - # shouldn't be a problem with any valid ebuild ... - line_escaped = unicode_escape(line.rstrip('\n') + '0') - except SystemExit: - raise - except: - # Who knows what kind of crazy crap an ebuild will have - # in it -- don't allow it to kill us. - line_escaped = line + # A line ending with an even number of backslashes does not count, + # because the last backslash is escaped. Therefore, search for an + # odd number of backslashes. + line_escaped = operator.sub(*_continuation_re.search(line).span()) % 2 == 1 if multiline: # Chop off the \ and \n bytes from the previous line. multiline = multiline[:-2] + line - if not line_escaped.endswith('\0'): + if not line_escaped: line = multiline num = multinum multiline = None else: continue else: - if line_escaped.endswith('\0'): + if line_escaped: multinum = num multiline = line continue -- 2.10.2