Scott Moser has proposed merging ~smoser/cloud-init:bug/1751051-subp-encode-with-utf8 into cloud-init:master.
Commit message: subp: Fix subp usage with non-ascii characters when no system locale. If python starts up without a locale set, then its default encoding ends up set as ascii. That is not easily changed with the likes of setlocale. In order to avoid UnicodeDecodeErrors cloud-init will decode python3 string (python2 basestring) so that the values passed to Popen are already bytes. LP: #1751051 Requested reviews: cloud-init commiters (cloud-init-dev) Related bugs: Bug #1751051 in cloud-init: "UnicodeEncodeError when creating user with non-ascii chars" https://bugs.launchpad.net/cloud-init/+bug/1751051 For more details, see: https://code.launchpad.net/~smoser/cloud-init/+git/cloud-init/+merge/338586 see commit message -- Your team cloud-init commiters is requested to review the proposed merge of ~smoser/cloud-init:bug/1751051-subp-encode-with-utf8 into cloud-init:master.
diff --git a/cloudinit/util.py b/cloudinit/util.py index 338fb97..42fea64 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -1865,6 +1865,8 @@ def subp(args, data=None, rcs=None, env=None, capture=True, shell=False, if not isinstance(data, bytes): data = data.encode() + args = [x if isinstance(x, six.binary_type) else x.encode("utf-8") + for x in args] try: sp = subprocess.Popen(args, stdout=stdout, stderr=stderr, stdin=stdin, diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py index 4a92e74..167008d 100644 --- a/tests/unittests/test_util.py +++ b/tests/unittests/test_util.py @@ -8,7 +8,9 @@ import shutil import stat import tempfile +import json import six +import sys import yaml from cloudinit import importer, util @@ -733,6 +735,43 @@ class TestSubp(helpers.CiTestCase): self.assertEqual("/target/my/path/", util.target_path("/target/", "///my/path/")) + def test_c_lang_can_take_utf8_args(self): + """Independent of system LC_CTYPE, args can contain utf-8 strings. + + When python starts up, its default encoding gets set based on + the value of LC_CTYPE. If no system locale is set, the default + encoding for both python2 and python3 in some paths will end up + being ascii. + + Attempts to use setlocale or patching (or changing) os.environ + in the current environment seem to not be effective. + + This test starts up a python with LC_CTYPE set to C so that + the default encoding will be set to ascii. In such an environment + supb(['command', 'non-ascii-arg']) will cause a UnicodeDecodeError. + """ + python_prog = '\n'.join([ + 'import json, sys', + 'from cloudinit.util import subp', + 'data = sys.stdin.read()', + 'cmd = json.loads(data)', + 'sys.stderr.write("cmd=%s\\n" % cmd)', + 'subp(cmd, capture=False)', + '']) + cmd = [BASH, '-c', 'echo -n "$@"', '--', + self.utf8_valid.decode("utf-8")] + python_subp = [sys.executable, '-c', python_prog] + + out, _err = util.subp( + ['cat'], update_env={'LC_CTYPE': 'C'}, + data=json.dumps(cmd).encode("utf-8"), + decode=False) + out, _err = util.subp( + python_subp, update_env={'LC_CTYPE': 'C'}, + data=json.dumps(cmd).encode("utf-8"), + decode=False) + self.assertEqual(self.utf8_valid, out) + class TestEncode(helpers.TestCase): """Test the encoding functions"""
_______________________________________________ Mailing list: https://launchpad.net/~cloud-init-dev Post to : cloud-init-dev@lists.launchpad.net Unsubscribe : https://launchpad.net/~cloud-init-dev More help : https://help.launchpad.net/ListHelp