Re: [gentoo-portage-dev] [PATCH v2] filter-bash-environment.py: use buffered input, raw bytes (bug 647654)
On Thu, Feb 15, 2018 at 12:49 AM, Zac Medicowrote: > Use sys.stdin.buffer instead of sys.stdin.buffer.raw, for buffered input. > Also use raw bytes instead of unicode strings, in order to avoid making > assumptions about character encodings, and also to avoid overhead from > unicode decoding/encoding. > > Since the % operator does not support bytes operands in python3.4, use > the + operator to format strings of bytes. > > Bug: https://bugs.gentoo.org/647654 > --- > [PATCH v2] changes: > * don't use % operator with bytes operands, for python3.4 compat > * add unit test that compares expected output to actual output > Sorry to be unclear; if there is existing test coverage via emerge I think that is fine; I was just worried there were none. I need to learn to check travis-ci for these pullreqs. > > bin/filter-bash-environment.py| 47 +-- > pym/portage/tests/bin/test_filter_bash_env.py | 113 > ++ > 2 files changed, 135 insertions(+), 25 deletions(-) > create mode 100644 pym/portage/tests/bin/test_filter_bash_env.py > > diff --git a/bin/filter-bash-environment.py b/bin/filter-bash-environment. > py > index a4cdc5429..668aa7452 100755 > --- a/bin/filter-bash-environment.py > +++ b/bin/filter-bash-environment.py > @@ -2,21 +2,19 @@ > # Copyright 1999-2014 Gentoo Foundation > # Distributed under the terms of the GNU General Public License v2 > > -import codecs > -import io > import os > import re > import sys > > -here_doc_re = re.compile(r'.*\s<<[-]?(\w+)$') > -func_start_re = re.compile(r'^[-\w]+\s*\(\)\s*$') > -func_end_re = re.compile(r'^\}$') > +here_doc_re = re.compile(br'.*\s<<[-]?(\w+)$') > +func_start_re = re.compile(br'^[-\w]+\s*\(\)\s*$') > +func_end_re = re.compile(br'^\}$') > > -var_assign_re = re.compile(r'(^|^declare\s+-\ > S+\s+|^declare\s+|^export\s+)([^=\s]+)=("|\')?.*$') > -close_quote_re = re.compile(r'(\\"|"|\')\s*$') > -readonly_re = re.compile(r'^declare\s+-(\S*)r(\S*)\s+') > +var_assign_re = re.compile(br'(^|^declare\s+-\ > S+\s+|^declare\s+|^export\s+)([^=\s]+)=("|\')?.*$') > +close_quote_re = re.compile(br'(\\"|"|\')\s*$') > +readonly_re = re.compile(br'^declare\s+-(\S*)r(\S*)\s+') > # declare without assignment > -var_declare_re = re.compile(r'^declare(\s+-\S+)?\s+([^=\s]+)\s*$') > +var_declare_re = re.compile(br'^declare(\s+-\S+)?\s+([^=\s]+)\s*$') > > def have_end_quote(quote, line): > """ > @@ -32,16 +30,16 @@ def have_end_quote(quote, line): > def filter_declare_readonly_opt(line): > readonly_match = readonly_re.match(line) > if readonly_match is not None: > - declare_opts = '' > + declare_opts = b'' > for i in (1, 2): > group = readonly_match.group(i) > if group is not None: > declare_opts += group > if declare_opts: > - line = 'declare -%s %s' % \ > - (declare_opts, line[readonly_match.end():]) > + line = b'declare -' + declare_opts + \ > + b' ' + line[readonly_match.end():] > else: > - line = 'declare ' + line[readonly_match.end():] > + line = b'declare ' + line[readonly_match.end():] > return line > > def filter_bash_environment(pattern, file_in, file_out): > @@ -57,7 +55,7 @@ def filter_bash_environment(pattern, file_in, file_out): > for line in file_in: > if multi_line_quote is not None: > if not multi_line_quote_filter: > - file_out.write(line.replace("\1", "")) > + file_out.write(line.replace(b"\1", b"")) > if have_end_quote(multi_line_quote, line): > multi_line_quote = None > multi_line_quote_filter = None > @@ -78,7 +76,7 @@ def filter_bash_environment(pattern, file_in, file_out): > multi_line_quote_filter = > filter_this > if not filter_this: > line = filter_declare_readonly_opt( > line) > - file_out.write(line.replace("\1", > "")) > + file_out.write(line.replace(b"\1", > b"")) > continue > else: > declare_match = var_declare_re.match(line) > @@ -98,7 +96,7 @@ def filter_bash_environment(pattern, file_in, file_out): > continue > here_doc = here_doc_re.match(line) > if here_doc is not None: > - here_doc_delim = re.compile("^%s$" % > here_doc.group(1)) > + here_doc_delim = re.compile(b'^%s' + >
[gentoo-portage-dev] [PATCH v2] filter-bash-environment.py: use buffered input, raw bytes (bug 647654)
Use sys.stdin.buffer instead of sys.stdin.buffer.raw, for buffered input. Also use raw bytes instead of unicode strings, in order to avoid making assumptions about character encodings, and also to avoid overhead from unicode decoding/encoding. Since the % operator does not support bytes operands in python3.4, use the + operator to format strings of bytes. Bug: https://bugs.gentoo.org/647654 --- [PATCH v2] changes: * don't use % operator with bytes operands, for python3.4 compat * add unit test that compares expected output to actual output bin/filter-bash-environment.py| 47 +-- pym/portage/tests/bin/test_filter_bash_env.py | 113 ++ 2 files changed, 135 insertions(+), 25 deletions(-) create mode 100644 pym/portage/tests/bin/test_filter_bash_env.py diff --git a/bin/filter-bash-environment.py b/bin/filter-bash-environment.py index a4cdc5429..668aa7452 100755 --- a/bin/filter-bash-environment.py +++ b/bin/filter-bash-environment.py @@ -2,21 +2,19 @@ # Copyright 1999-2014 Gentoo Foundation # Distributed under the terms of the GNU General Public License v2 -import codecs -import io import os import re import sys -here_doc_re = re.compile(r'.*\s<<[-]?(\w+)$') -func_start_re = re.compile(r'^[-\w]+\s*\(\)\s*$') -func_end_re = re.compile(r'^\}$') +here_doc_re = re.compile(br'.*\s<<[-]?(\w+)$') +func_start_re = re.compile(br'^[-\w]+\s*\(\)\s*$') +func_end_re = re.compile(br'^\}$') -var_assign_re = re.compile(r'(^|^declare\s+-\S+\s+|^declare\s+|^export\s+)([^=\s]+)=("|\')?.*$') -close_quote_re = re.compile(r'(\\"|"|\')\s*$') -readonly_re = re.compile(r'^declare\s+-(\S*)r(\S*)\s+') +var_assign_re = re.compile(br'(^|^declare\s+-\S+\s+|^declare\s+|^export\s+)([^=\s]+)=("|\')?.*$') +close_quote_re = re.compile(br'(\\"|"|\')\s*$') +readonly_re = re.compile(br'^declare\s+-(\S*)r(\S*)\s+') # declare without assignment -var_declare_re = re.compile(r'^declare(\s+-\S+)?\s+([^=\s]+)\s*$') +var_declare_re = re.compile(br'^declare(\s+-\S+)?\s+([^=\s]+)\s*$') def have_end_quote(quote, line): """ @@ -32,16 +30,16 @@ def have_end_quote(quote, line): def filter_declare_readonly_opt(line): readonly_match = readonly_re.match(line) if readonly_match is not None: - declare_opts = '' + declare_opts = b'' for i in (1, 2): group = readonly_match.group(i) if group is not None: declare_opts += group if declare_opts: - line = 'declare -%s %s' % \ - (declare_opts, line[readonly_match.end():]) + line = b'declare -' + declare_opts + \ + b' ' + line[readonly_match.end():] else: - line = 'declare ' + line[readonly_match.end():] + line = b'declare ' + line[readonly_match.end():] return line def filter_bash_environment(pattern, file_in, file_out): @@ -57,7 +55,7 @@ def filter_bash_environment(pattern, file_in, file_out): for line in file_in: if multi_line_quote is not None: if not multi_line_quote_filter: - file_out.write(line.replace("\1", "")) + file_out.write(line.replace(b"\1", b"")) if have_end_quote(multi_line_quote, line): multi_line_quote = None multi_line_quote_filter = None @@ -78,7 +76,7 @@ def filter_bash_environment(pattern, file_in, file_out): multi_line_quote_filter = filter_this if not filter_this: line = filter_declare_readonly_opt(line) - file_out.write(line.replace("\1", "")) + file_out.write(line.replace(b"\1", b"")) continue else: declare_match = var_declare_re.match(line) @@ -98,7 +96,7 @@ def filter_bash_environment(pattern, file_in, file_out): continue here_doc = here_doc_re.match(line) if here_doc is not None: - here_doc_delim = re.compile("^%s$" % here_doc.group(1)) + here_doc_delim = re.compile(b'^%s' + here_doc.group(1) + b'$') file_out.write(line) continue # Note: here-documents are handled before functions since otherwise @@ -141,18 +139,17 @@ if __name__ == "__main__": file_in = sys.stdin file_out = sys.stdout if sys.hexversion >= 0x300: - file_in = codecs.iterdecode(sys.stdin.buffer.raw, - 'utf_8', errors='replace') -