--- Begin Message ---
Package: release.debian.org
Severity: normal
Tags: jessie
User: release.debian....@packages.debian.org
Usertags: pu
Hello stable team,
The version of ansible currently in Debian Jessie has a few minor security
vulnerabilities in it. I spoke with the security team, and we all agree that
the vulnerabilities don't rise to the level where a DSA and security upload is
necessary. The resolved security issues are TEMP-0000000-EDD657 and
CVE-2015-3908.
I've backported the fixes from the current upstream stable branch back to the
version in Debian Jessie. Please let me know if it is clear for me to upload
to stable.
-- System Information:
Debian Release: stretch/sid
APT prefers testing
APT policy: (900, 'testing')
Architecture: amd64 (x86_64)
Foreign Architectures: i386
Kernel: Linux 4.0.0-2-amd64 (SMP w/8 CPU cores)
Locale: LANG=en_US.utf8, LC_CTYPE=en_US.utf8 (charmap=UTF-8)
*** /tmp/debdiff.txt
agartha 福 ~/Debian/ansible
10128 ◯ : debdiff --from ansible_1.7.2+dfsg-2_all.deb --to
ansible_1.7.2+dfsg-2+deb8u1_all.deb
[The following lists of changes regard files as different if they have
different names, permissions or owners.]
Files in second set of .debs but not in first
---------------------------------------------
-rw-r--r-- root/root /usr/lib/python2.7/dist-
packages/ansible/utils/unicode.py
Control files: lines which differ (wdiff format)
------------------------------------------------
Installed-Size: [-3400-] {+3415+}
Version: [-1.7.2+dfsg-2-] {+1.7.2+dfsg-2+deb8u1+}
diff --git a/lib/ansible/module_utils/urls.py b/lib/ansible/module_utils/urls.py
index 4edab47..ced7239 100644
--- a/lib/ansible/module_utils/urls.py
+++ b/lib/ansible/module_utils/urls.py
@@ -50,6 +50,15 @@ try:
except:
HAS_SSL=False
+HAS_MATCH_HOSTNAME = True
+try:
+ from ssl import match_hostname, CertificateError
+except ImportError:
+ try:
+ from backports.ssl_match_hostname import match_hostname, CertificateError
+ except ImportError:
+ HAS_MATCH_HOSTNAME = False
+
import os
import re
import socket
@@ -244,11 +253,13 @@ class SSLValidationHandler(urllib2.BaseHandler):
connect_result = s.recv(4096)
self.validate_proxy_response(connect_result)
ssl_s = ssl.wrap_socket(s, ca_certs=tmp_ca_cert_path, cert_reqs=ssl.CERT_REQUIRED)
+ match_hostname(ssl_s.getpeercert(), self.hostname)
else:
self.module.fail_json(msg='Unsupported proxy scheme: %s. Currently ansible only supports HTTP proxies.' % proxy_parts.get('scheme'))
else:
s.connect((self.hostname, self.port))
ssl_s = ssl.wrap_socket(s, ca_certs=tmp_ca_cert_path, cert_reqs=ssl.CERT_REQUIRED)
+ match_hostname(ssl_s.getpeercert(), self.hostname)
# close the ssl connection
#ssl_s.unwrap()
s.close()
@@ -262,6 +273,9 @@ class SSLValidationHandler(urllib2.BaseHandler):
'Use validate_certs=no or make sure your managed systems have a valid CA certificate installed. ' + \
'Paths checked for this platform: %s' % ", ".join(paths_checked)
)
+ except CertificateError:
+ self.module.fail_json(msg="SSL Certificate does not belong to %s. Make sure the url has a certificate that belongs to it or use validate_certs=no (insecure)" % self.hostname)
+
try:
# cleanup the temp file created, don't worry
# if it fails for some reason
@@ -290,7 +304,7 @@ def url_argument_spec():
)
-def fetch_url(module, url, data=None, headers=None, method=None,
+def fetch_url(module, url, data=None, headers=None, method=None,
use_proxy=True, force=False, last_mod_time=None, timeout=10):
'''
Fetches a file from an HTTP/FTP server using urllib2
@@ -314,27 +328,30 @@ def fetch_url(module, url, data=None, headers=None, method=None,
# FIXME: change the following to use the generic_urlparse function
# to remove the indexed references for 'parsed'
parsed = urlparse.urlparse(url)
- if parsed[0] == 'https':
- if not HAS_SSL and validate_certs:
+ if parsed[0] == 'https' and validate_certs:
+ if not HAS_SSL:
if distribution == 'Redhat':
module.fail_json(msg='SSL validation is not available in your version of python. You can use validate_certs=no, however this is unsafe and not recommended. You can also install python-ssl from EPEL')
else:
module.fail_json(msg='SSL validation is not available in your version of python. You can use validate_certs=no, however this is unsafe and not recommended')
- elif validate_certs:
- # do the cert validation
- netloc = parsed[1]
- if '@' in netloc:
- netloc = netloc.split('@', 1)[1]
- if ':' in netloc:
- hostname, port = netloc.split(':', 1)
- else:
- hostname = netloc
- port = 443
- # create the SSL validation handler and
- # add it to the list of handlers
- ssl_handler = SSLValidationHandler(module, hostname, port)
- handlers.append(ssl_handler)
+ if not HAS_MATCH_HOSTNAME:
+ module.fail_json(msg='Available SSL validation does not check that the certificate matches the hostname. You can install backports.ssl_match_hostname or update your managed machine to python-2.7.9 or newer. You could also use validate_certs=no, however this is unsafe and not recommended')
+
+ # do the cert validation
+ netloc = parsed[1]
+ if '@' in netloc:
+ netloc = netloc.split('@', 1)[1]
+ if ':' in netloc:
+ hostname, port = netloc.split(':', 1)
+ port = int(port)
+ else:
+ hostname = netloc
+ port = 443
+ # create the SSL validation handler and
+ # add it to the list of handlers
+ ssl_handler = SSLValidationHandler(module, hostname, port)
+ handlers.append(ssl_handler)
if parsed[0] != 'ftp':
username = module.params.get('url_username', '')
@@ -383,11 +400,11 @@ def fetch_url(module, url, data=None, headers=None, method=None,
else:
request = urllib2.Request(url, data)
- # add the custom agent header, to help prevent issues
- # with sites that block the default urllib agent string
+ # add the custom agent header, to help prevent issues
+ # with sites that block the default urllib agent string
request.add_header('User-agent', module.params.get('http_agent'))
- # if we're ok with getting a 304, set the timestamp in the
+ # if we're ok with getting a 304, set the timestamp in the
# header, otherwise make sure we don't get a cached copy
if last_mod_time and not force:
tstamp = last_mod_time.strftime('%a, %d %b %Y %H:%M:%S +0000')
@@ -419,4 +436,3 @@ def fetch_url(module, url, data=None, headers=None, method=None,
info.update(dict(msg="Request failed: %s" % str(e), status=code))
return r, info
-
diff --git a/lib/ansible/runner/connection_plugins/chroot.py b/lib/ansible/runner/connection_plugins/chroot.py
index 38c8af7..6b5b677 100644
--- a/lib/ansible/runner/connection_plugins/chroot.py
+++ b/lib/ansible/runner/connection_plugins/chroot.py
@@ -1,5 +1,6 @@
# Based on local.py (c) 2012, Michael DeHaan <michael.deh...@gmail.com>
# (c) 2013, Maykel Moya <mm...@speedyrails.com>
+# (c) 2015, Toshio Kuratomi <tkurat...@ansible.com>
#
# This file is part of Ansible
#
@@ -15,16 +16,21 @@
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
+from __future__ import (absolute_import, division, print_function)
+__metaclass__ = type
import distutils.spawn
import traceback
import os
-import shutil
+import shlex
import subprocess
from ansible import errors
from ansible import utils
+from ansible.utils.uniode import to_bytes
from ansible.callbacks import vvv
+BUFSIZE = 65536
+
class Connection(object):
''' Local chroot based connections '''
@@ -60,8 +66,25 @@ class Connection(object):
return self
- def exec_command(self, cmd, tmp_path, sudo_user=None, sudoable=False, executable='/bin/sh', in_data=None, su=None, su_user=None):
- ''' run a command on the chroot '''
+ def _generate_cmd(self, executable, cmd):
+ if executable:
+ local_cmd = [self.chroot_cmd, self.chroot, executable, '-c', cmd]
+ else:
+ # Prev to python2.7.3, shlex couldn't handle unicode type strings
+ cmd = to_bytes(cmd)
+ cmd = shlex.split(cmd)
+ local_cmd = [self.chroot_cmd, self.chroot]
+ local_cmd += cmd
+ return local_cmd
+
+ def _buffered_exec_command(self, cmd, tmp_path, become_user=None, sudoable=False, executable='/bin/sh', in_data=None, stdin=subprocess.PIPE):
+ ''' run a command on the chroot. This is only needed for implementing
+ put_file() get_file() so that we don't have to read the whole file
+ into memory.
+
+ compared to exec_command() it looses some niceties like being able to
+ return the process's exit code immediately.
+ '''
if su or su_user:
raise errors.AnsibleError("Internal Error: this module does not support running commands via su")
@@ -70,60 +93,68 @@ class Connection(object):
raise errors.AnsibleError("Internal Error: this module does not support optimized module pipelining")
# We enter chroot as root so sudo stuff can be ignored
-
- if executable:
- local_cmd = [self.chroot_cmd, self.chroot, executable, '-c', cmd]
- else:
- local_cmd = '%s "%s" %s' % (self.chroot_cmd, self.chroot, cmd)
+ local_cmd = self._generate_cmd(executable, cmd)
vvv("EXEC %s" % (local_cmd), host=self.chroot)
- p = subprocess.Popen(local_cmd, shell=isinstance(local_cmd, basestring),
+ p = subprocess.Popen(local_cmd, shell=False,
cwd=self.runner.basedir,
- stdin=subprocess.PIPE,
+ stdin=stdin,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ return p
+
+ def exec_command(self, cmd, tmp_path, become_user=None, sudoable=False, executable='/bin/sh', in_data=None):
+ ''' run a command on the chroot '''
+
+ p = self._buffered_exec_command(cmd, tmp_path, become_user, sudoable, executable, in_data)
+
stdout, stderr = p.communicate()
return (p.returncode, '', stdout, stderr)
def put_file(self, in_path, out_path):
''' transfer a file from local to chroot '''
- if not out_path.startswith(os.path.sep):
- out_path = os.path.join(os.path.sep, out_path)
- normpath = os.path.normpath(out_path)
- out_path = os.path.join(self.chroot, normpath[1:])
-
vvv("PUT %s TO %s" % (in_path, out_path), host=self.chroot)
- if not os.path.exists(in_path):
- raise errors.AnsibleFileNotFound("file or module does not exist: %s" % in_path)
+
try:
- shutil.copyfile(in_path, out_path)
- except shutil.Error:
- traceback.print_exc()
- raise errors.AnsibleError("failed to copy: %s and %s are the same" % (in_path, out_path))
+ with open(in_path, 'rb') as in_file:
+ try:
+ p = self._buffered_exec_command('dd of=%s bs=%s' % (out_path, BUFSIZE), None, stdin=in_file)
+ except OSError:
+ raise errors.AnsibleError("chroot connection requires dd command in the chroot")
+ try:
+ stdout, stderr = p.communicate()
+ except:
+ traceback.print_exc()
+ raise errors.AnsibleError("failed to transfer file %s to %s" % (in_path, out_path))
+ if p.returncode != 0:
+ raise errors.AnsibleError("failed to transfer file %s to %s:\n%s\n%s" % (in_path, out_path, stdout, stderr))
except IOError:
- traceback.print_exc()
- raise errors.AnsibleError("failed to transfer file to %s" % out_path)
+ raise errors.AnsibleError("file or module does not exist at: %s" % in_path)
def fetch_file(self, in_path, out_path):
''' fetch a file from chroot to local '''
- if not in_path.startswith(os.path.sep):
- in_path = os.path.join(os.path.sep, in_path)
- normpath = os.path.normpath(in_path)
- in_path = os.path.join(self.chroot, normpath[1:])
-
vvv("FETCH %s TO %s" % (in_path, out_path), host=self.chroot)
- if not os.path.exists(in_path):
- raise errors.AnsibleFileNotFound("file or module does not exist: %s" % in_path)
+
try:
- shutil.copyfile(in_path, out_path)
- except shutil.Error:
- traceback.print_exc()
- raise errors.AnsibleError("failed to copy: %s and %s are the same" % (in_path, out_path))
- except IOError:
- traceback.print_exc()
- raise errors.AnsibleError("failed to transfer file to %s" % out_path)
+ p = self._buffered_exec_command('dd if=%s bs=%s' % (in_path, BUFSIZE), None)
+ except OSError:
+ raise errors.AnsibleError("chroot connection requires dd command in the chroot")
+
+ with open(out_path, 'wb+') as out_file:
+ try:
+ chunk = p.stdout.read(BUFSIZE)
+ while chunk:
+ out_file.write(chunk)
+ chunk = p.stdout.read(BUFSIZE)
+ except:
+ traceback.print_exc()
+ raise errors.AnsibleError("failed to transfer file %s to %s" % (in_path, out_path))
+ stdout, stderr = p.communicate()
+ if p.returncode != 0:
+ raise errors.AnsibleError("failed to transfer file %s to %s:\n%s\n%s" % (in_path, out_path, stdout, stderr))
+
def close(self):
''' terminate the connection; nothing to do here '''
diff --git a/lib/ansible/runner/connection_plugins/jail.py b/lib/ansible/runner/connection_plugins/jail.py
index b721ad6..685349e 100644
--- a/lib/ansible/runner/connection_plugins/jail.py
+++ b/lib/ansible/runner/connection_plugins/jail.py
@@ -1,6 +1,7 @@
# Based on local.py (c) 2012, Michael DeHaan <michael.deh...@gmail.com>
# and chroot.py (c) 2013, Maykel Moya <mm...@speedyrails.com>
# (c) 2013, Michael Scherer <m...@zarb.org>
+# (c) 2015, Toshio Kuratomi <tkurat...@ansible.com>
#
# This file is part of Ansible
#
@@ -16,17 +17,22 @@
#
# You should have received a copy of the GNU General Public License
# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
+from __future__ import (absolute_import, division, print_function)
+__metaclass__ = type
import distutils.spawn
import traceback
import os
-import shutil
+import shlex
import subprocess
from ansible import errors
+from ansible.utils.unicode import to_bytes
from ansible.callbacks import vvv
+BUFSIZE = 65536
+
class Connection(object):
- ''' Local chroot based connections '''
+ ''' Local BSD Jail based connections '''
def _search_executable(self, executable):
cmd = distutils.spawn.find_executable(executable)
@@ -54,8 +60,6 @@ class Connection(object):
# remove \n
return stdout[:-1]
-
-
def __init__(self, runner, host, port, *args, **kwargs):
self.jail = host
self.runner = runner
@@ -67,7 +71,7 @@ class Connection(object):
self.jls_cmd = self._search_executable('jls')
self.jexec_cmd = self._search_executable('jexec')
-
+
if not self.jail in self.list_jails():
raise errors.AnsibleError("incorrect jail name %s" % self.jail)
@@ -77,9 +81,9 @@ class Connection(object):
self.port = port
def connect(self, port=None):
- ''' connect to the chroot; nothing to do here '''
+ ''' connect to the jail; nothing to do here '''
- vvv("THIS IS A LOCAL CHROOT DIR", host=self.jail)
+ vvv("THIS IS A LOCAL JAIL DIR", host=self.jail)
return self
@@ -88,11 +92,21 @@ class Connection(object):
if executable:
local_cmd = [self.jexec_cmd, self.jail, executable, '-c', cmd]
else:
- local_cmd = '%s "%s" %s' % (self.jexec_cmd, self.jail, cmd)
+ # Prev to python2.7.3, shlex couldn't handle unicode type strings
+ cmd = to_bytes(cmd)
+ cmd = shlex.split(cmd)
+ local_cmd = [self.jexec_cmd, self.jail]
+ local_cmd += cmd
return local_cmd
- def exec_command(self, cmd, tmp_path, sudo_user=None, sudoable=False, executable='/bin/sh', in_data=None, su=None, su_user=None):
- ''' run a command on the chroot '''
+ def _buffered_exec_command(self, cmd, tmp_path, sudo_user=None, sudoable=False, executable='/bin/sh', in_data=None, su=None, su_user=None, stdin=subprocess.PIPE):
+ '''run a command on the jail. This is only needed for implementing
+ put_file() get_file() so that we don't have to read the whole
+ file into memory.
+
+ compared to the exec_command() it looses some niceties like
+ being able to return the process' exit code immediately.
+ '''
if su or su_user:
raise errors.AnsibleError("Internal Error: this module does not support running commands via su")
@@ -104,47 +118,63 @@ class Connection(object):
local_cmd = self._generate_cmd(executable, cmd)
vvv("EXEC %s" % (local_cmd), host=self.jail)
- p = subprocess.Popen(local_cmd, shell=isinstance(local_cmd, basestring),
+ p = subprocess.Popen(local_cmd, shell=False,
cwd=self.runner.basedir,
- stdin=subprocess.PIPE,
+ stdin=stdin,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ return p
- stdout, stderr = p.communicate()
- return (p.returncode, '', stdout, stderr)
+ def exec_command(self, cmd, tmp_path, become_user=None, sudoable=False, executable='/bin/sh', in_data=None):
+ ''' run a command on the jail '''
- def _normalize_path(self, path, prefix):
- if not path.startswith(os.path.sep):
- path = os.path.join(os.path.sep, path)
- normpath = os.path.normpath(path)
- return os.path.join(prefix, normpath[1:])
+ p = self._buffered_exec_command(cmd, tmp_path, become_user, sudoable, executable, in_data)
- def _copy_file(self, in_path, out_path):
- if not os.path.exists(in_path):
- raise errors.AnsibleFileNotFound("file or module does not exist: %s" % in_path)
- try:
- shutil.copyfile(in_path, out_path)
- except shutil.Error:
- traceback.print_exc()
- raise errors.AnsibleError("failed to copy: %s and %s are the same" % (in_path, out_path))
- except IOError:
- traceback.print_exc()
- raise errors.AnsibleError("failed to transfer file to %s" % out_path)
+ stdout, stderr = p.communicate()
+ return (p.returncode, '', stdout, stderr)
def put_file(self, in_path, out_path):
- ''' transfer a file from local to chroot '''
+ ''' transfer a file from local to jail '''
- out_path = self._normalize_path(out_path, self.get_jail_path())
vvv("PUT %s TO %s" % (in_path, out_path), host=self.jail)
- self._copy_file(in_path, out_path)
+ try:
+ with open(in_path, 'rb') as in_file:
+ try:
+ p = self._buffered_exec_command('dd of=%s bs=%s' % (out_path, BUFSIZE), None, stdin=in_file)
+ except OSError:
+ raise errors.AnsibleError("jail connection requires dd command in the jail")
+ try:
+ stdout, stderr = p.communicate()
+ except:
+ traceback.print_exc()
+ raise errors.AnsibleError("failed to transfer file %s to %s" % (in_path, out_path))
+ if p.returncode != 0:
+ raise errors.AnsibleError("failed to transfer file %s to %s:\n%s\n%s" % (in_path, out_path, stdout, stderr))
+ except IOError:
+ raise errors.AnsibleError("file or module does not exist at: %s" % in_path)
def fetch_file(self, in_path, out_path):
- ''' fetch a file from chroot to local '''
+ ''' fetch a file from jail to local '''
- in_path = self._normalize_path(in_path, self.get_jail_path())
vvv("FETCH %s TO %s" % (in_path, out_path), host=self.jail)
- self._copy_file(in_path, out_path)
+ try:
+ p = self._buffered_exec_command('dd if=%s bs=%s' % (in_path, BUFSIZE), None)
+ except OSError:
+ raise errors.AnsibleError("jail connection requires dd command in the jail")
+
+ with open(out_path, 'wb+') as out_file:
+ try:
+ chunk = p.stdout.read(BUFSIZE)
+ while chunk:
+ out_file.write(chunk)
+ chunk = p.stdout.read(BUFSIZE)
+ except:
+ traceback.print_exc()
+ raise errors.AnsibleError("failed to transfer file %s to %s" % (in_path, out_path))
+ stdout, stderr = p.communicate()
+ if p.returncode != 0:
+ raise errors.AnsibleError("failed to transfer file %s to %s:\n%s\n%s" % (in_path, out_path, stdout, stderr))
def close(self):
''' terminate the connection; nothing to do here '''
diff --git a/lib/ansible/utils/unicode.py b/lib/ansible/utils/unicode.py
new file mode 100644
index 0000000..2cff2e5
--- /dev/null
+++ b/lib/ansible/utils/unicode.py
@@ -0,0 +1,253 @@
+# (c) 2012-2014, Toshio Kuraotmi <a.bad...@gmail.com>
+#
+# This file is part of Ansible
+#
+# Ansible is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# Ansible is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Ansible. If not, see <http://www.gnu.org/licenses/>.
+
+# Make coding more python3-ish
+from __future__ import (absolute_import, division, print_function)
+__metaclass__ = type
+
+from six import string_types, text_type, binary_type, PY3
+
+# to_bytes and to_unicode were written by Toshio Kuratomi for the
+# python-kitchen library https://pypi.python.org/pypi/kitchen
+# They are licensed in kitchen under the terms of the GPLv2+
+# They were copied and modified for use in ansible by Toshio in Jan 2015
+# (simply removing the deprecated features)
+
+#: Aliases for the utf-8 codec
+_UTF8_ALIASES = frozenset(('utf-8', 'UTF-8', 'utf8', 'UTF8', 'utf_8', 'UTF_8',
+ 'utf', 'UTF', 'u8', 'U8'))
+#: Aliases for the latin-1 codec
+_LATIN1_ALIASES = frozenset(('latin-1', 'LATIN-1', 'latin1', 'LATIN1',
+ 'latin', 'LATIN', 'l1', 'L1', 'cp819', 'CP819', '8859', 'iso8859-1',
+ 'ISO8859-1', 'iso-8859-1', 'ISO-8859-1'))
+
+# EXCEPTION_CONVERTERS is defined below due to using to_unicode
+
+if PY3:
+ basestring = (str, bytes)
+
+def to_unicode(obj, encoding='utf-8', errors='replace', nonstring=None):
+ '''Convert an object into a :class:`unicode` string
+
+ :arg obj: Object to convert to a :class:`unicode` string. This should
+ normally be a byte :class:`str`
+ :kwarg encoding: What encoding to try converting the byte :class:`str` as.
+ Defaults to :term:`utf-8`
+ :kwarg errors: If errors are found while decoding, perform this action.
+ Defaults to ``replace`` which replaces the invalid bytes with
+ a character that means the bytes were unable to be decoded. Other
+ values are the same as the error handling schemes in the `codec base
+ classes
+ <http://docs.python.org/library/codecs.html#codec-base-classes>`_.
+ For instance ``strict`` which raises an exception and ``ignore`` which
+ simply omits the non-decodable characters.
+ :kwarg nonstring: How to treat nonstring values. Possible values are:
+
+ :simplerepr: Attempt to call the object's "simple representation"
+ method and return that value. Python-2.3+ has two methods that
+ try to return a simple representation: :meth:`object.__unicode__`
+ and :meth:`object.__str__`. We first try to get a usable value
+ from :meth:`object.__unicode__`. If that fails we try the same
+ with :meth:`object.__str__`.
+ :empty: Return an empty :class:`unicode` string
+ :strict: Raise a :exc:`TypeError`
+ :passthru: Return the object unchanged
+ :repr: Attempt to return a :class:`unicode` string of the repr of the
+ object
+
+ Default is ``simplerepr``
+
+ :raises TypeError: if :attr:`nonstring` is ``strict`` and
+ a non-:class:`basestring` object is passed in or if :attr:`nonstring`
+ is set to an unknown value
+ :raises UnicodeDecodeError: if :attr:`errors` is ``strict`` and
+ :attr:`obj` is not decodable using the given encoding
+ :returns: :class:`unicode` string or the original object depending on the
+ value of :attr:`nonstring`.
+
+ Usually this should be used on a byte :class:`str` but it can take both
+ byte :class:`str` and :class:`unicode` strings intelligently. Nonstring
+ objects are handled in different ways depending on the setting of the
+ :attr:`nonstring` parameter.
+
+ The default values of this function are set so as to always return
+ a :class:`unicode` string and never raise an error when converting from
+ a byte :class:`str` to a :class:`unicode` string. However, when you do
+ not pass validly encoded text (or a nonstring object), you may end up with
+ output that you don't expect. Be sure you understand the requirements of
+ your data, not just ignore errors by passing it through this function.
+ '''
+ # Could use isbasestring/isunicode here but we want this code to be as
+ # fast as possible
+ if isinstance(obj, basestring):
+ if isinstance(obj, text_type):
+ return obj
+ if encoding in _UTF8_ALIASES:
+ return text_type(obj, 'utf-8', errors)
+ if encoding in _LATIN1_ALIASES:
+ return text_type(obj, 'latin-1', errors)
+ return obj.decode(encoding, errors)
+
+ if not nonstring:
+ nonstring = 'simplerepr'
+ if nonstring == 'empty':
+ return u''
+ elif nonstring == 'passthru':
+ return obj
+ elif nonstring == 'simplerepr':
+ try:
+ simple = obj.__unicode__()
+ except (AttributeError, UnicodeError):
+ simple = None
+ if not simple:
+ try:
+ simple = text_type(obj)
+ except UnicodeError:
+ try:
+ simple = obj.__str__()
+ except (UnicodeError, AttributeError):
+ simple = u''
+ if isinstance(simple, binary_type):
+ return text_type(simple, encoding, errors)
+ return simple
+ elif nonstring in ('repr', 'strict'):
+ obj_repr = repr(obj)
+ if isinstance(obj_repr, binary_type):
+ obj_repr = text_type(obj_repr, encoding, errors)
+ if nonstring == 'repr':
+ return obj_repr
+ raise TypeError('to_unicode was given "%(obj)s" which is neither'
+ ' a byte string (str) or a unicode string' %
+ {'obj': obj_repr.encode(encoding, 'replace')})
+
+ raise TypeError('nonstring value, %(param)s, is not set to a valid'
+ ' action' % {'param': nonstring})
+
+def to_bytes(obj, encoding='utf-8', errors='replace', nonstring=None):
+ '''Convert an object into a byte :class:`str`
+
+ :arg obj: Object to convert to a byte :class:`str`. This should normally
+ be a :class:`unicode` string.
+ :kwarg encoding: Encoding to use to convert the :class:`unicode` string
+ into a byte :class:`str`. Defaults to :term:`utf-8`.
+ :kwarg errors: If errors are found while encoding, perform this action.
+ Defaults to ``replace`` which replaces the invalid bytes with
+ a character that means the bytes were unable to be encoded. Other
+ values are the same as the error handling schemes in the `codec base
+ classes
+ <http://docs.python.org/library/codecs.html#codec-base-classes>`_.
+ For instance ``strict`` which raises an exception and ``ignore`` which
+ simply omits the non-encodable characters.
+ :kwarg nonstring: How to treat nonstring values. Possible values are:
+
+ :simplerepr: Attempt to call the object's "simple representation"
+ method and return that value. Python-2.3+ has two methods that
+ try to return a simple representation: :meth:`object.__unicode__`
+ and :meth:`object.__str__`. We first try to get a usable value
+ from :meth:`object.__str__`. If that fails we try the same
+ with :meth:`object.__unicode__`.
+ :empty: Return an empty byte :class:`str`
+ :strict: Raise a :exc:`TypeError`
+ :passthru: Return the object unchanged
+ :repr: Attempt to return a byte :class:`str` of the :func:`repr` of the
+ object
+
+ Default is ``simplerepr``.
+
+ :raises TypeError: if :attr:`nonstring` is ``strict`` and
+ a non-:class:`basestring` object is passed in or if :attr:`nonstring`
+ is set to an unknown value.
+ :raises UnicodeEncodeError: if :attr:`errors` is ``strict`` and all of the
+ bytes of :attr:`obj` are unable to be encoded using :attr:`encoding`.
+ :returns: byte :class:`str` or the original object depending on the value
+ of :attr:`nonstring`.
+
+ .. warning::
+
+ If you pass a byte :class:`str` into this function the byte
+ :class:`str` is returned unmodified. It is **not** re-encoded with
+ the specified :attr:`encoding`. The easiest way to achieve that is::
+
+ to_bytes(to_unicode(text), encoding='utf-8')
+
+ The initial :func:`to_unicode` call will ensure text is
+ a :class:`unicode` string. Then, :func:`to_bytes` will turn that into
+ a byte :class:`str` with the specified encoding.
+
+ Usually, this should be used on a :class:`unicode` string but it can take
+ either a byte :class:`str` or a :class:`unicode` string intelligently.
+ Nonstring objects are handled in different ways depending on the setting
+ of the :attr:`nonstring` parameter.
+
+ The default values of this function are set so as to always return a byte
+ :class:`str` and never raise an error when converting from unicode to
+ bytes. However, when you do not pass an encoding that can validly encode
+ the object (or a non-string object), you may end up with output that you
+ don't expect. Be sure you understand the requirements of your data, not
+ just ignore errors by passing it through this function.
+ '''
+ # Could use isbasestring, isbytestring here but we want this to be as fast
+ # as possible
+ if isinstance(obj, basestring):
+ if isinstance(obj, binary_type):
+ return obj
+ return obj.encode(encoding, errors)
+ if not nonstring:
+ nonstring = 'simplerepr'
+
+ if nonstring == 'empty':
+ return b''
+ elif nonstring == 'passthru':
+ return obj
+ elif nonstring == 'simplerepr':
+ try:
+ simple = binary_type(obj)
+ except UnicodeError:
+ try:
+ simple = obj.__str__()
+ except (AttributeError, UnicodeError):
+ simple = None
+ if not simple:
+ try:
+ simple = obj.__unicode__()
+ except (AttributeError, UnicodeError):
+ simple = b''
+ if isinstance(simple, text_type):
+ simple = simple.encode(encoding, 'replace')
+ return simple
+ elif nonstring in ('repr', 'strict'):
+ try:
+ obj_repr = obj.__repr__()
+ except (AttributeError, UnicodeError):
+ obj_repr = b''
+ if isinstance(obj_repr, text_type):
+ obj_repr = obj_repr.encode(encoding, errors)
+ else:
+ obj_repr = binary_type(obj_repr)
+ if nonstring == 'repr':
+ return obj_repr
+ raise TypeError('to_bytes was given "%(obj)s" which is neither'
+ ' a unicode string or a byte string (str)' % {'obj': obj_repr})
+
+ raise TypeError('nonstring value, %(param)s, is not set to a valid'
+ ' action' % {'param': nonstring})
+
+
+# force the return value of a function to be unicode. Use with partial to
+# ensure that a filter will return unicode values.
+def unicode_wrap(func, *args, **kwargs):
+ return to_unicode(func(*args, **kwargs), nonstring='passthru')
--- End Message ---