Re: [gentoo-portage-dev] [PATCH] repoman: use regular expression to detect line continuations

2017-02-22 Thread Zac Medico
On 02/21/2017 07:29 PM, Brian Dolbec wrote:
> Code seems fine to me, I trust you ;)

Thanks, merged:

https://gitweb.gentoo.org/proj/portage.git/commit/?id=3e82d2e09f124f3b77d1b913c945b93341ee4053
-- 
Thanks,
Zac



Re: [gentoo-portage-dev] [PATCH] repoman: use regular expression to detect line continuations

2017-02-21 Thread Brian Dolbec
On Tue, 21 Feb 2017 16:31:56 -0800
Zac Medico  wrote:

> Use a regular expression to detect line continuations, instead
> of the unicode_escape codec, since the unicode_escape codec is
> not really intended to be used this way.
> 
> This solves an issue with python3.6, where a DeprecationWarning
> is triggered by ebuilds containing escape sequences, like this
> warning triggered by a sed expression in the dev-db/sqlite
> ebuilds:
> 
> DeprecationWarning: invalid escape sequence '\['
> ---
>  repoman/pym/repoman/modules/scan/ebuild/checks.py | 28
> +++ 1 file changed, 8 insertions(+), 20
> deletions(-)
> 
> diff --git a/repoman/pym/repoman/modules/scan/ebuild/checks.py
> b/repoman/pym/repoman/modules/scan/ebuild/checks.py index
> 15e2251..d21bf0c 100644 ---
> a/repoman/pym/repoman/modules/scan/ebuild/checks.py +++
> b/repoman/pym/repoman/modules/scan/ebuild/checks.py @@ -8,8 +8,8 @@
> and correctness of an ebuild.""" 
>  from __future__ import unicode_literals
>  
> -import codecs
>  from itertools import chain
> +import operator
>  import re
>  import time
>  
> @@ -923,11 +923,10 @@ def checks_init(experimental_inherit=False):
>  
>  _here_doc_re = re.compile(r'.*<<[-]?(\w+)\s*(>\s*\S+\s*)?$')
>  _ignore_comment_re = re.compile(r'^\s*#')
> +_continuation_re = re.compile(r'(\\)*$')
>  
>  
>  def run_checks(contents, pkg):
> - unicode_escape_codec = codecs.lookup('unicode_escape')
> - unicode_escape = lambda x: unicode_escape_codec.decode(x)[0]
>   if _constant_checks is None:
>   checks_init()
>   checks = _constant_checks
> @@ -957,32 +956,21 @@ def run_checks(contents, pkg):
>   #   cow
>   # This will merge these lines like so:
>   #   inherit foo bar moo cow
> - try:
> - # A normal line will end in the two bytes:
> <\> <\n>.  So decoding
> - # that will result in python thinking the
> <\n> is being escaped
> - # and eat the single <\> which makes it hard
> for us to detect.
> - # Instead, strip the newline (which we know
> all lines have), and
> - # append a <0>.  Then when python escapes
> it, if the line ended
> - # in a <\>, we'll end up with a <\0> marker
> to key off of.  This
> - # shouldn't be a problem with any valid
> ebuild ...
> - line_escaped =
> unicode_escape(line.rstrip('\n') + '0')
> - except SystemExit:
> - raise
> - except:
> - # Who knows what kind of crazy crap an
> ebuild will have
> - # in it -- don't allow it to kill us.
> - line_escaped = line
> + # A line ending with an even number of backslashes
> does not count,
> + # because the last backslash is escaped. Therefore,
> search for an
> + # odd number of backslashes.
> + line_escaped =
> operator.sub(*_continuation_re.search(line).span()) % 2 == 1 if
> multiline: # Chop off the \ and \n bytes from the previous line.
>   multiline = multiline[:-2] + line
> - if not line_escaped.endswith('\0'):
> + if not line_escaped:
>   line = multiline
>   num = multinum
>   multiline = None
>   else:
>   continue
>   else:
> - if line_escaped.endswith('\0'):
> + if line_escaped:
>   multinum = num
>   multiline = line
>   continue

Code seems fine to me, I trust you ;)

-- 
Brian Dolbec 




[gentoo-portage-dev] [PATCH] repoman: use regular expression to detect line continuations

2017-02-21 Thread Zac Medico
Use a regular expression to detect line continuations, instead
of the unicode_escape codec, since the unicode_escape codec is
not really intended to be used this way.

This solves an issue with python3.6, where a DeprecationWarning
is triggered by ebuilds containing escape sequences, like this
warning triggered by a sed expression in the dev-db/sqlite
ebuilds:

DeprecationWarning: invalid escape sequence '\['
---
 repoman/pym/repoman/modules/scan/ebuild/checks.py | 28 +++
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/repoman/pym/repoman/modules/scan/ebuild/checks.py 
b/repoman/pym/repoman/modules/scan/ebuild/checks.py
index 15e2251..d21bf0c 100644
--- a/repoman/pym/repoman/modules/scan/ebuild/checks.py
+++ b/repoman/pym/repoman/modules/scan/ebuild/checks.py
@@ -8,8 +8,8 @@ and correctness of an ebuild."""
 
 from __future__ import unicode_literals
 
-import codecs
 from itertools import chain
+import operator
 import re
 import time
 
@@ -923,11 +923,10 @@ def checks_init(experimental_inherit=False):
 
 _here_doc_re = re.compile(r'.*<<[-]?(\w+)\s*(>\s*\S+\s*)?$')
 _ignore_comment_re = re.compile(r'^\s*#')
+_continuation_re = re.compile(r'(\\)*$')
 
 
 def run_checks(contents, pkg):
-   unicode_escape_codec = codecs.lookup('unicode_escape')
-   unicode_escape = lambda x: unicode_escape_codec.decode(x)[0]
if _constant_checks is None:
checks_init()
checks = _constant_checks
@@ -957,32 +956,21 @@ def run_checks(contents, pkg):
#   cow
# This will merge these lines like so:
#   inherit foo bar moo cow
-   try:
-   # A normal line will end in the two bytes: <\> <\n>.  
So decoding
-   # that will result in python thinking the <\n> is being 
escaped
-   # and eat the single <\> which makes it hard for us to 
detect.
-   # Instead, strip the newline (which we know all lines 
have), and
-   # append a <0>.  Then when python escapes it, if the 
line ended
-   # in a <\>, we'll end up with a <\0> marker to key off 
of.  This
-   # shouldn't be a problem with any valid ebuild ...
-   line_escaped = unicode_escape(line.rstrip('\n') + '0')
-   except SystemExit:
-   raise
-   except:
-   # Who knows what kind of crazy crap an ebuild will have
-   # in it -- don't allow it to kill us.
-   line_escaped = line
+   # A line ending with an even number of backslashes does not 
count,
+   # because the last backslash is escaped. Therefore, search for 
an
+   # odd number of backslashes.
+   line_escaped = 
operator.sub(*_continuation_re.search(line).span()) % 2 == 1
if multiline:
# Chop off the \ and \n bytes from the previous line.
multiline = multiline[:-2] + line
-   if not line_escaped.endswith('\0'):
+   if not line_escaped:
line = multiline
num = multinum
multiline = None
else:
continue
else:
-   if line_escaped.endswith('\0'):
+   if line_escaped:
multinum = num
multiline = line
continue
-- 
2.10.2