Michael Pasternak has uploaded a new change for review.

Change subject: cli: keep own converters utils
......................................................................

cli: keep own converters utils

Change-Id: I092575bea3f5d9175a17d975a1bdfd877e8214f8
Signed-off-by: Michael Pasternak <[email protected]>
---
M src/cli/context.py
M src/cli/terminal.py
A src/ovirtcli/utils/converters.py
3 files changed, 858 insertions(+), 3 deletions(-)


  git pull ssh://gerrit.ovirt.org:29418/ovirt-engine-cli refs/changes/64/7264/1

diff --git a/src/cli/context.py b/src/cli/context.py
index bc3f10a..3ce74cc 100644
--- a/src/cli/context.py
+++ b/src/cli/context.py
@@ -30,7 +30,7 @@
 from cli import platform
 import codecs
 import cStringIO
-from kitchen.text.converters import getwriter
+from ovirtcli.utils.converters import getwriter
 
 
 class ExecutionContext(object):
diff --git a/src/cli/terminal.py b/src/cli/terminal.py
index 346405f..21b0f1a 100644
--- a/src/cli/terminal.py
+++ b/src/cli/terminal.py
@@ -14,8 +14,7 @@
 # limitations under the License.
 #
 
-from kitchen.text.converters import getwriter
-
+from ovirtcli.utils.converters import getwriter
 
 class Terminal(object):
     """Base class for terminal objects."""
diff --git a/src/ovirtcli/utils/converters.py b/src/ovirtcli/utils/converters.py
new file mode 100644
index 0000000..e757aa1
--- /dev/null
+++ b/src/ovirtcli/utils/converters.py
@@ -0,0 +1,856 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2011 Red Hat, Inc.
+#
+# kitchen is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+#
+# kitchen is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with kitchen; if not, see <http://www.gnu.org/licenses/>
+#
+# Authors:
+#   Toshio Kuratomi <[email protected]>
+#   Seth Vidal
+#
+# Portions of code taken from yum/i18n.py and
+# python-fedora: fedora/textutils.py
+
+'''
+Functions to handle conversion of byte :class:`str` and :class:`unicode`
+strings.
+
+.. versionchanged:: kitchen 0.2a2 ; API kitchen.text 2.0.0
+    Added :func:`~kitchen.text.converters.getwriter`
+
+.. versionchanged:: kitchen 0.2.2  ; API kitchen.text 2.1.0
+    Added :func:`~kitchen.text.converters.exception_to_unicode`,
+    :func:`~kitchen.text.converters.exception_to_bytes`,
+    :data:`~kitchen.text.converters.EXCEPTION_CONVERTERS`,
+    and :data:`~kitchen.text.converters.BYTE_EXCEPTION_CONVERTERS`
+'''
+try:
+    from base64 import b64encode, b64decode
+except ImportError:
+    from kitchen.pycompat24.base64 import b64encode, b64decode
+
+import codecs
+import warnings
+import xml.sax.saxutils
+
+# We need to access b_() for localizing our strings but we'll end up with
+# a circular import if we import it directly.
+import kitchen as k
+from kitchen.pycompat24 import sets
+sets.add_builtin_set()
+
+from kitchen.text.exceptions import ControlCharError, XmlEncodeError
+from kitchen.text.misc import guess_encoding, html_entities_unescape, \
+        process_control_chars
+
+#: Aliases for the utf-8 codec
+_UTF8_ALIASES = frozenset(('utf-8', 'UTF-8', 'utf8', 'UTF8', 'utf_8', 'UTF_8',
+    'utf', 'UTF', 'u8', 'U8'))
+#: Aliases for the latin-1 codec
+_LATIN1_ALIASES = frozenset(('latin-1', 'LATIN-1', 'latin1', 'LATIN1',
+    'latin', 'LATIN', 'l1', 'L1', 'cp819', 'CP819', '8859', 'iso8859-1',
+    'ISO8859-1', 'iso-8859-1', 'ISO-8859-1'))
+
+# EXCEPTION_CONVERTERS is defined below due to using to_unicode
+
+def to_unicode(obj, encoding='utf-8', errors='replace', nonstring=None,
+        non_string=None):
+    '''Convert an object into a :class:`unicode` string
+
+    :arg obj: Object to convert to a :class:`unicode` string.  This should
+        normally be a byte :class:`str`
+    :kwarg encoding: What encoding to try converting the byte :class:`str` as.
+        Defaults to :term:`utf-8`
+    :kwarg errors: If errors are found while decoding, perform this action.
+        Defaults to ``replace`` which replaces the invalid bytes with
+        a character that means the bytes were unable to be decoded.  Other
+        values are the same as the error handling schemes in the `codec base
+        classes
+        <http://docs.python.org/library/codecs.html#codec-base-classes>`_.
+        For instance ``strict`` which raises an exception and ``ignore`` which
+        simply omits the non-decodable characters.
+    :kwarg nonstring: How to treat nonstring values.  Possible values are:
+
+        :simplerepr: Attempt to call the object's "simple representation"
+            method and return that value.  Python-2.3+ has two methods that
+            try to return a simple representation: :meth:`object.__unicode__`
+            and :meth:`object.__str__`.  We first try to get a usable value
+            from :meth:`object.__unicode__`.  If that fails we try the same
+            with :meth:`object.__str__`.
+        :empty: Return an empty :class:`unicode` string
+        :strict: Raise a :exc:`TypeError`
+        :passthru: Return the object unchanged
+        :repr: Attempt to return a :class:`unicode` string of the repr of the
+            object
+
+        Default is ``simplerepr``
+
+    :kwarg non_string: *Deprecated* Use :attr:`nonstring` instead
+    :raises TypeError: if :attr:`nonstring` is ``strict`` and
+        a non-:class:`basestring` object is passed in or if :attr:`nonstring`
+        is set to an unknown value
+    :raises UnicodeDecodeError: if :attr:`errors` is ``strict`` and
+        :attr:`obj` is not decodable using the given encoding
+    :returns: :class:`unicode` string or the original object depending on the
+        value of :attr:`nonstring`.
+
+    Usually this should be used on a byte :class:`str` but it can take both
+    byte :class:`str` and :class:`unicode` strings intelligently.  Nonstring
+    objects are handled in different ways depending on the setting of the
+    :attr:`nonstring` parameter.
+
+    The default values of this function are set so as to always return
+    a :class:`unicode` string and never raise an error when converting from
+    a byte :class:`str` to a :class:`unicode` string.  However, when you do
+    not pass validly encoded text (or a nonstring object), you may end up with
+    output that you don't expect.  Be sure you understand the requirements of
+    your data, not just ignore errors by passing it through this function.
+
+    .. versionchanged:: 0.2.1a2
+        Deprecated :attr:`non_string` in favor of :attr:`nonstring` parameter 
and changed
+        default value to ``simplerepr``
+    '''
+    if isinstance(obj, basestring):
+        if isinstance(obj, unicode):
+            return obj
+        if encoding in _UTF8_ALIASES:
+            return unicode(obj, 'utf-8', errors)
+        if encoding in _LATIN1_ALIASES:
+            return unicode(obj, 'latin-1', errors)
+        return obj.decode(encoding, errors)
+
+    if non_string:
+        warnings.warn(k.b_('non_string is a deprecated parameter of'
+            ' to_unicode().  Use nonstring instead'), DeprecationWarning,
+            stacklevel=2)
+        if not nonstring:
+            nonstring = non_string
+
+    if not nonstring:
+        nonstring = 'simplerepr'
+    if nonstring == 'empty':
+        return u''
+    elif nonstring == 'passthru':
+        return obj
+    elif nonstring == 'simplerepr':
+        try:
+            simple = obj.__unicode__()
+        except (AttributeError, UnicodeError):
+            simple = None
+        if not simple:
+            try:
+                simple = str(obj)
+            except UnicodeError:
+                try:
+                    simple = obj.__str__()
+                except (UnicodeError, AttributeError):
+                    simple = u''
+        if not isinstance(simple, unicode):
+            return unicode(simple, encoding, errors)
+        return simple
+    elif nonstring in ('repr', 'strict'):
+        obj_repr = repr(obj)
+        if not isinstance(obj_repr, unicode):
+            obj_repr = unicode(obj_repr, encoding, errors)
+        if nonstring == 'repr':
+            return obj_repr
+        raise TypeError(k.b_('to_unicode was given "%(obj)s" which is neither'
+            ' a byte string (str) or a unicode string') %
+            {'obj': obj_repr.encode(encoding, 'replace')})
+
+    raise TypeError(k.b_('nonstring value, %(param)s, is not set to a valid'
+        ' action') % {'param': nonstring})
+
+def to_bytes(obj, encoding='utf-8', errors='replace', nonstring=None,
+        non_string=None):
+    '''Convert an object into a byte :class:`str`
+
+    :arg obj: Object to convert to a byte :class:`str`.  This should normally
+        be a :class:`unicode` string.
+    :kwarg encoding: Encoding to use to convert the :class:`unicode` string
+        into a byte :class:`str`.  Defaults to :term:`utf-8`.
+    :kwarg errors: If errors are found while encoding, perform this action.
+        Defaults to ``replace`` which replaces the invalid bytes with
+        a character that means the bytes were unable to be encoded.  Other
+        values are the same as the error handling schemes in the `codec base
+        classes
+        <http://docs.python.org/library/codecs.html#codec-base-classes>`_.
+        For instance ``strict`` which raises an exception and ``ignore`` which
+        simply omits the non-encodable characters.
+    :kwarg nonstring: How to treat nonstring values.  Possible values are:
+
+        :simplerepr: Attempt to call the object's "simple representation"
+            method and return that value.  Python-2.3+ has two methods that
+            try to return a simple representation: :meth:`object.__unicode__`
+            and :meth:`object.__str__`.  We first try to get a usable value
+            from :meth:`object.__str__`.  If that fails we try the same
+            with :meth:`object.__unicode__`.
+        :empty: Return an empty byte :class:`str`
+        :strict: Raise a :exc:`TypeError`
+        :passthru: Return the object unchanged
+        :repr: Attempt to return a byte :class:`str` of the :func:`repr` of the
+            object
+
+        Default is ``simplerepr``.
+
+    :kwarg non_string: *Deprecated* Use :attr:`nonstring` instead.
+    :raises TypeError: if :attr:`nonstring` is ``strict`` and
+        a non-:class:`basestring` object is passed in or if :attr:`nonstring`
+        is set to an unknown value.
+    :raises UnicodeEncodeError: if :attr:`errors` is ``strict`` and all of the
+        bytes of :attr:`obj` are unable to be encoded using :attr:`encoding`.
+    :returns: byte :class:`str` or the original object depending on the value
+        of :attr:`nonstring`.
+
+    .. warning:: If you pass a byte :class:`str` into this function the byte
+        :class:`str` is returned unmodified.  It is **not** re-encoded
+        with the specified :attr:`encoding`.
+
+    Usually, this should be used on a :class:`unicode` string but it can take
+    either a byte :class:`str` or a :class:`unicode` string intelligently.
+    Nonstring objects are handled in different ways depending on the setting
+    of the :attr:`nonstring` parameter.
+
+    The default values of this function are set so as to always return a byte
+    :class:`str` and never raise an error when converting from unicode to
+    bytes.  However, when you do not pass an encoding that can validly encode
+    the object (or a non-string object), you may end up with output that you
+    don't expect.  Be sure you understand the requirements of your data, not
+    just ignore errors by passing it through this function.
+
+    .. versionchanged:: 0.2.1a2
+        Deprecated :attr:`non_string` in favor of :attr:`nonstring` parameter
+        and changed default value to ``simplerepr``
+    '''
+    if isinstance(obj, basestring):
+        if isinstance(obj, str):
+            return obj
+        return obj.encode(encoding, errors)
+    if non_string:
+        warnings.warn(k.b_('non_string is a deprecated parameter of'
+            ' to_bytes().  Use nonstring instead'), DeprecationWarning,
+            stacklevel=2)
+        if not nonstring:
+            nonstring = non_string
+    if not nonstring:
+        nonstring = 'simplerepr'
+
+    if nonstring == 'empty':
+        return ''
+    elif nonstring == 'passthru':
+        return obj
+    elif nonstring == 'simplerepr':
+        try:
+            simple = str(obj)
+        except UnicodeError:
+            try:
+                simple = obj.__str__()
+            except (AttributeError, UnicodeError):
+                simple = None
+        if not simple:
+            try:
+                simple = obj.__unicode__()
+            except (AttributeError, UnicodeError):
+                simple = ''
+        if isinstance(simple, unicode):
+            simple = simple.encode(encoding, 'replace')
+        return simple
+    elif nonstring in ('repr', 'strict'):
+        try:
+            obj_repr = obj.__repr__()
+        except (AttributeError, UnicodeError):
+            obj_repr = ''
+        if isinstance(obj_repr, unicode):
+            obj_repr = obj_repr.encode(encoding, errors)
+        else:
+            obj_repr = str(obj_repr)
+        if nonstring == 'repr':
+            return obj_repr
+        raise TypeError(k.b_('to_bytes was given "%(obj)s" which is neither'
+            ' a unicode string or a byte string (str)') % {'obj': obj_repr})
+
+    raise TypeError(k.b_('nonstring value, %(param)s, is not set to a valid'
+        ' action') % {'param': nonstring})
+
+def getwriter(encoding):
+    '''Return a :class:`codecs.StreamWriter` that resists tracing back.
+
+    :arg encoding: Encoding to use for transforming :class:`unicode` strings
+        into byte :class:`str`.
+    :rtype: :class:`codecs.StreamWriter`
+    :returns: :class:`~codecs.StreamWriter` that you can instantiate to wrap 
output
+        streams to automatically translate :class:`unicode` strings into 
:attr:`encoding`.
+
+    This is a reimplemetation of :func:`codecs.getwriter` that returns
+    a :class:`~codecs.StreamWriter` that resists issuing tracebacks.  The
+    :class:`~codecs.StreamWriter` that is returned uses
+    :func:`kitchen.text.converters.to_bytes` to convert :class:`unicode`
+    strings into byte :class:`str`.  The departures from
+    :func:`codecs.getwriter` are:
+
+    1) The :class:`~codecs.StreamWriter` that is returned will take byte
+       :class:`str` as well as :class:`unicode` strings.  Any byte
+       :class:`str` will be passed through unmodified.
+    2) The default error handler for unknown bytes is to ``replace`` the bytes
+       with the unknown character (``?`` in most ascii-based encodings, ``�``
+       in the utf encodings) whereas :func:`codecs.getwriter` defaults to
+       ``strict``.  Like :class:`codecs.StreamWriter`, the returned
+       :class:`~codecs.StreamWriter` can have its error handler changed in
+       code by setting ``stream.errors = 'new_handler_name'``
+
+    Example usage::
+
+        $ LC_ALL=C python
+        >>> import sys
+        >>> from kitchen.text.converters import getwriter
+        >>> UTF8Writer = getwriter('utf-8')
+        >>> unwrapped_stdout = sys.stdout
+        >>> sys.stdout = UTF8Writer(unwrapped_stdout)
+        >>> print 'caf\\xc3\\xa9'
+        café
+        >>> print u'caf\\xe9'
+        café
+        >>> ASCIIWriter = getwriter('ascii')
+        >>> sys.stdout = ASCIIWriter(unwrapped_stdout)
+        >>> print 'caf\\xc3\\xa9'
+        café
+        >>> print u'caf\\xe9'
+        caf?
+
+    .. seealso::
+        API docs for :class:`codecs.StreamWriter` and :func:`codecs.getwriter`
+        and `Print Fails <http://wiki.python.org/moin/PrintFails>`_ on the
+        python wiki.
+
+    .. versionadded:: kitchen 0.2a2, API: kitchen.text 1.1.0
+    '''
+    class _StreamWriter(codecs.StreamWriter):
+        # :W0223: We don't need to implement all methods of StreamWriter.
+        #   This is not the actual class that gets used but a replacement for
+        #   the actual class.
+        # :C0111: We're implementing an API from the stdlib.  Just point
+        #   people at that documentation instead of writing docstrings here.
+        #pylint:disable-msg=W0223,C0111
+        def __init__(self, stream, errors='replace'):
+            codecs.StreamWriter.__init__(self, stream, errors)
+
+        def encode(self, msg, errors='replace'):
+            return (to_bytes(msg, encoding=self.encoding, errors=errors),
+                    len(msg))
+
+    _StreamWriter.encoding = encoding
+    return _StreamWriter
+
+def to_utf8(obj, errors='replace', non_string='passthru'):
+    '''*Deprecated*
+
+    Convert :class:`unicode` to an encoded :term:`utf-8` byte :class:`str`.
+    You should be using :func:`to_bytes` instead::
+
+        to_bytes(obj, encoding='utf-8', non_string='passthru')
+    '''
+    warnings.warn(k.b_('kitchen.text.converters.to_utf8 is deprecated.  Use'
+        ' kitchen.text.converters.to_bytes(obj, encoding="utf-8",'
+        ' nonstring="passthru" instead.'), DeprecationWarning, stacklevel=2)
+    return to_bytes(obj, encoding='utf-8', errors=errors,
+            nonstring=non_string)
+
+### str is also the type name for byte strings so it's not a good name for
+### something that can return unicode strings
+def to_str(obj):
+    '''*Deprecated*
+
+    This function converts something to a byte :class:`str` if it isn't one.
+    It's used to call :func:`str` or :func:`unicode` on the object to get its
+    simple representation without danger of getting a :exc:`UnicodeError`.
+    You should be using :func:`to_unicode` or :func:`to_bytes` explicitly
+    instead.
+
+    If you need :class:`unicode` strings::
+
+        to_unicode(obj, nonstring='simplerepr')
+
+    If you need byte :class:`str`::
+
+        to_bytes(obj, nonstring='simplerepr')
+    '''
+    warnings.warn(k.b_('to_str is deprecated.  Use to_unicode or to_bytes'
+        ' instead.  See the to_str docstring for'
+        ' porting information.'),
+        DeprecationWarning, stacklevel=2)
+    return to_bytes(obj, nonstring='simplerepr')
+
+# Exception message extraction functions
+EXCEPTION_CONVERTERS = (lambda e: to_unicode(e.args[0]), to_unicode)
+''' Tuple of functions to try to use to convert an exception into a string
+    representation.  This is the default value given to
+    :func:`exception_to_unicode`.  Use code like this if you just want to add
+    more possible conversion function::
+
+        from kitchen.text.converters import (EXCEPTION_CONVERTERS,
+                exception_to_unicode)
+        converters = [lambda e: to_unicode(e.value),
+                lambda e: to_unicode(e.value, encoding='euc_jp')]
+        converters.extend(EXCEPTION_CONVERTERS)
+
+    Each function in this list should take the exception as its sole argument
+    and return a string containing the message representing the exception.
+    Ideally the function will return the message as a :class:`unicode` string
+    but the value will be run through :func:`to_unicode` to ensure that it is
+    :class:`unicode` before being returned.
+
+    .. versionadded:: 0.2.2
+'''
+
+BYTE_EXCEPTION_CONVERTERS = (lambda e: to_bytes(e.args[0]), to_bytes)
+''' Tuple of functions to try to use to convert an exception into a string
+    representation.  This tuple is similar to the one in
+    :data:`EXCEPTION_CONVERTERS` but it's used with :func:`exception_to_bytes`
+    instead.  Ideally, these functions should do their best to return the data
+    as a byte :class:`str` but the results will be run through
+    :func:`to_bytes` before being returned.
+
+    .. versionadded:: 0.2.2
+'''
+
+def exception_to_unicode(exc, converters=EXCEPTION_CONVERTERS):
+    '''Convert an exception object into a unicode representation
+
+    :arg exc: Exception object to convert
+    :kwarg converters: List of functions to use to convert the exception into
+        a string.  See :data:`EXCEPTION_CONVERTERS` for the default value and
+        an example of adding another converter to the defaults.  The functions
+        in the list are tried one at a time to see if they can extract
+        a string from the exception.  The first one to do so without raising
+        an exception is used.
+    :returns: :class:`unicode` string representation of the exception.  The
+        value from this will be converted into :class:`unicode` before being
+        returned using the :term:`utf-8` encoding before being returned (if
+        you know you need to use an alternate encoding, add a function that
+        does that to the list of functions in :attr:`converters`)
+
+    .. versionadded:: 0.2.2
+    '''
+    msg = u'<exception failed to convert to text>'
+    for func in converters:
+        try:
+            msg = func(exc)
+        except:
+            pass
+        else:
+            break
+    return to_unicode(msg)
+
+def exception_to_bytes(exc, converters=BYTE_EXCEPTION_CONVERTERS):
+    '''Convert an exception object into a str representation
+
+    :arg exc: Exception object to convert
+    :kwarg converters: List of functions to use to convert the exception into
+        a string.  See :data:`BYTE_EXCEPTION_CONVERTERS` for the default value
+        and an example of adding another converter to the defaults.  The
+        functions in the list are tried one at a time to see if they can
+        extract a string from the exception.  The first one to do so without
+        raising an exception is used.
+    :returns: byte :class:`str` representation of the exception.  The value
+        from this will be converted into :class:`str` before being returned
+        using the :term:`utf-8` encoding before being returned (if you know
+        you need to use an alternate encoding, add a function that does that
+        to the list of functions in :attr:`converters`)
+
+    .. versionadded:: 0.2.2
+    '''
+    msg = '<exception failed to convert to text>'
+    for func in converters:
+        try:
+            msg = func(exc)
+        except:
+            pass
+        else:
+            break
+    return to_bytes(msg)
+
+#
+# XML Related Functions
+#
+
+def unicode_to_xml(string, encoding='utf-8', attrib=False,
+        control_chars='replace'):
+    '''Take a :class:`unicode` string and turn it into a byte :class:`str`
+    suitable for xml
+
+    :arg string: :class:`unicode` string to encode into an XML compatible byte
+        :class:`str`
+    :kwarg encoding: encoding to use for the returned byte :class:`str`.
+        Default is to encode to :term:`UTF-8`.  If some of the characters in
+        :attr:`string` are not encodable in this encoding, the unknown
+        characters will be entered into the output string using xml character
+        references.
+    :kwarg attrib: If :data:`True`, quote the string for use in an xml
+        attribute.  If :data:`False` (default), quote for use in an xml text
+        field.
+    :kwarg control_chars: :term:`control characters` are not allowed in XML
+        documents.  When we encounter those we need to know what to do.  Valid
+        options are:
+
+        :replace: (default) Replace the control characters with ``?``
+        :ignore: Remove the characters altogether from the output
+        :strict: Raise an :exc:`~kitchen.text.exceptions.XmlEncodeError`  when
+            we encounter a :term:`control character`
+
+    :raises kitchen.text.exceptions.XmlEncodeError: If :attr:`control_chars`
+        is set to ``strict`` and the string to be made suitable for output to
+        xml contains :term:`control characters` or if :attr:`string` is not
+        a :class:`unicode` string then we raise this exception.
+    :raises ValueError: If :attr:`control_chars` is set to something other than
+        ``replace``, ``ignore``, or ``strict``.
+    :rtype: byte :class:`str`
+    :returns: representation of the :class:`unicode` string as a valid XML
+        byte :class:`str`
+
+    XML files consist mainly of text encoded using a particular charset.  XML
+    also denies the use of certain bytes in the encoded text (example: ``ASCII
+    Null``).  There are also special characters that must be escaped if they
+    are present in the input (example: ``<``).  This function takes care of
+    all of those issues for you.
+
+    There are a few different ways to use this function depending on your
+    needs.  The simplest invocation is like this::
+
+       unicode_to_xml(u'String with non-ASCII characters: <"á と">')
+
+    This will return the following to you, encoded in :term:`utf-8`::
+
+      'String with non-ASCII characters: &lt;"á と"&gt;'
+
+    Pretty straightforward.  Now, what if you need to encode your document in
+    something other than :term:`utf-8`?  For instance, ``latin-1``?  Let's
+    see::
+
+       unicode_to_xml(u'String with non-ASCII characters: <"á と">', 
encoding='latin-1')
+       'String with non-ASCII characters: &lt;"á &#12392;"&gt;'
+
+    Because the ``と`` character is not available in the ``latin-1`` charset,
+    it is replaced with ``&#12392;`` in our output.  This is an xml character
+    reference which represents the character at unicode codepoint ``12392``, 
the
+    ``と`` character.
+
+    When you want to reverse this, use :func:`xml_to_unicode` which will turn
+    a byte :class:`str` into a :class:`unicode` string and replace the xml
+    character references with the unicode characters.
+
+    XML also has the quirk of not allowing :term:`control characters` in its
+    output.  The :attr:`control_chars` parameter allows us to specify what to
+    do with those.  For use cases that don't need absolute character by
+    character fidelity (example: holding strings that will just be used for
+    display in a GUI app later), the default value of ``replace`` works well::
+
+        unicode_to_xml(u'String with disallowed control chars: \u0000\u0007')
+        'String with disallowed control chars: ??'
+
+    If you do need to be able to reproduce all of the characters at a later
+    date (examples: if the string is a key value in a database or a path on a
+    filesystem) you have many choices.  Here are a few that rely on ``utf-7``,
+    a verbose encoding that encodes :term:`control characters` (as well as
+    non-:term:`ASCII` unicode values) to characters from within the
+    :term:`ASCII` printable characters.  The good thing about doing this is
+    that the code is pretty simple.  You just need to use ``utf-7`` both when
+    encoding the field for xml and when decoding it for use in your python
+    program::
+
+        unicode_to_xml(u'String with unicode: と and control char: \u0007', 
encoding='utf7')
+        'String with unicode: +MGg and control char: +AAc-'
+        # [...]
+        xml_to_unicode('String with unicode: +MGg and control char: +AAc-', 
encoding='utf7')
+        u'String with unicode: と and control char: \u0007'
+
+    As you can see, the ``utf-7`` encoding will transform even characters that
+    would be representable in :term:`utf-8`.  This can be a drawback if you
+    want unicode characters in the file to be readable without being decoded
+    first.  You can work around this with increased complexity in your
+    application code::
+
+        encoding = 'utf-8'
+        u_string = u'String with unicode: と and control char: \u0007'
+        try:
+            # First attempt to encode to utf8
+            data = unicode_to_xml(u_string, encoding=encoding, errors='strict')
+        except XmlEncodeError:
+            # Fallback to utf-7
+            encoding = 'utf-7'
+            data = unicode_to_xml(u_string, encoding=encoding, errors='strict')
+        write_tag('<mytag encoding=%s>%s</mytag>' % (encoding, data))
+        # [...]
+        encoding = tag.attributes.encoding
+        u_string = xml_to_unicode(u_string, encoding=encoding)
+
+    Using code similar to that, you can have some fields encoded using your
+    default encoding and fallback to ``utf-7`` if there are :term:`control
+    characters` present.
+
+    .. note:: If your goal is to preserve the :term:`control characters` you
+        cannot save the entire file as ``utf-7`` and set the xml encoding
+        parameter to ``utf-7`` if your goal is to preserve the :term:`control
+        characters`.  Because XML doesn't allow :term:`control characters`,
+        you have to encode those separate from any encoding work that the XML
+        parser itself knows about.
+
+    .. seealso::
+
+        :func:`bytes_to_xml`
+            if you're dealing with bytes that are non-text or of an unknown
+            encoding that you must preserve on a byte for byte level.
+        :func:`guess_encoding_to_xml`
+            if you're dealing with strings in unknown encodings that you don't
+            need to save with char-for-char fidelity.
+    '''
+    if not string:
+        # Small optimization
+        return ''
+    try:
+        process_control_chars(string, strategy=control_chars)
+    except TypeError:
+        raise XmlEncodeError(k.b_('unicode_to_xml must have a unicode type as'
+                ' the first argument.  Use bytes_string_to_xml for byte'
+                ' strings.'))
+    except ValueError:
+        raise ValueError(k.b_('The control_chars argument to unicode_to_xml'
+                ' must be one of ignore, replace, or strict'))
+    except ControlCharError, exc:
+        raise XmlEncodeError(exc.args[0])
+
+    string = string.encode(encoding, 'xmlcharrefreplace')
+
+    # Escape characters that have special meaning in xml
+    if attrib:
+        string = xml.sax.saxutils.escape(string, entities={'"':"&quot;"})
+    else:
+        string = xml.sax.saxutils.escape(string)
+    return string
+
+def xml_to_unicode(byte_string, encoding='utf-8', errors='replace'):
+    '''Transform a byte :class:`str` from an xml file into a :class:`unicode`
+    string
+
+    :arg byte_string: byte :class:`str` to decode
+    :kwarg encoding: encoding that the byte :class:`str` is in
+    :kwarg errors: What to do if not every character is  valid in
+        :attr:`encoding`.  See the :func:`to_unicode` documentation for legal
+        values.
+    :rtype: :class:`unicode` string
+    :returns: string decoded from :attr:`byte_string`
+
+    This function attempts to reverse what :func:`unicode_to_xml` does.  It
+    takes a byte :class:`str` (presumably read in from an xml file) and
+    expands all the html entities into unicode characters and decodes the byte
+    :class:`str` into a :class:`unicode` string.  One thing it cannot do is
+    restore any :term:`control characters` that were removed prior to
+    inserting into the file.  If you need to keep such characters you need to
+    use :func:`xml_to_bytes` and :func:`bytes_to_xml` or use on of the
+    strategies documented in :func:`unicode_to_xml` instead.
+    '''
+    string = to_unicode(byte_string, encoding=encoding, errors=errors)
+    string = html_entities_unescape(string)
+    return string
+
+def byte_string_to_xml(byte_string, input_encoding='utf-8', errors='replace',
+        output_encoding='utf-8', attrib=False, control_chars='replace'):
+    '''Make sure a byte :class:`str` is validly encoded for xml output
+
+    :arg byte_string: Byte :class:`str` to turn into valid xml output
+    :kwarg input_encoding: Encoding of :attr:`byte_string`.  Default ``utf-8``
+    :kwarg errors: How to handle errors encountered while decoding the
+        :attr:`byte_string` into :class:`unicode` at the beginning of the
+        process.  Values are:
+
+        :replace: (default) Replace the invalid bytes with a ``?``
+        :ignore: Remove the characters altogether from the output
+        :strict: Raise an :exc:`UnicodeDecodeError` when we encounter
+            a non-decodable character
+
+    :kwarg output_encoding: Encoding for the xml file that this string will go
+        into.  Default is ``utf-8``.  If all the characters in
+        :attr:`byte_string` are not encodable in this encoding, the unknown
+        characters will be entered into the output string using xml character
+        references.
+    :kwarg attrib: If :data:`True`, quote the string for use in an xml
+        attribute.  If :data:`False` (default), quote for use in an xml text
+        field.
+    :kwarg control_chars: XML does not allow :term:`control characters`.  When
+        we encounter those we need to know what to do.  Valid options are:
+
+        :replace: (default) Replace the :term:`control characters` with ``?``
+        :ignore: Remove the characters altogether from the output
+        :strict: Raise an error when we encounter a :term:`control character`
+
+    :raises XmlEncodeError: If :attr:`control_chars` is set to ``strict`` and
+        the string to be made suitable for output to xml contains
+        :term:`control characters` then we raise this exception.
+    :raises UnicodeDecodeError: If errors is set to ``strict`` and the
+        :attr:`byte_string` contains bytes that are not decodable using
+        :attr:`input_encoding`, this error is raised
+    :rtype: byte :class:`str`
+    :returns: representation of the byte :class:`str` in the output encoding 
with
+        any bytes that aren't available in xml taken care of.
+
+    Use this when you have a byte :class:`str` representing text that you need
+    to make suitable for output to xml.  There are several cases where this
+    is the case.  For instance, if you need to transform some strings encoded
+    in ``latin-1`` to :term:`utf-8` for output::
+
+        utf8_string = byte_string_to_xml(latin1_string, 
input_encoding='latin-1')
+
+    If you already have strings in the proper encoding you may still want to
+    use this function to remove :term:`control characters`::
+
+        cleaned_string = byte_string_to_xml(string, input_encoding='utf-8', 
output_encoding='utf-8')
+
+    .. seealso::
+
+        :func:`unicode_to_xml`
+            for other ideas on using this function
+    '''
+    if not isinstance(byte_string, str):
+        raise XmlEncodeError(k.b_('byte_string_to_xml can only take a byte'
+                ' string as its first argument.  Use unicode_to_xml for'
+                ' unicode strings'))
+
+    # Decode the string into unicode
+    u_string = unicode(byte_string, input_encoding, errors)
+    return unicode_to_xml(u_string, encoding=output_encoding,
+            attrib=attrib, control_chars=control_chars)
+
+def xml_to_byte_string(byte_string, input_encoding='utf-8', errors='replace',
+        output_encoding='utf-8'):
+    '''Transform a byte :class:`str` from an xml file into :class:`unicode`
+    string
+
+    :arg byte_string: byte :class:`str` to decode
+    :kwarg input_encoding: encoding that the byte :class:`str` is in
+    :kwarg errors: What to do if not every character is valid in
+        :attr:`encoding`.  See the :func:`to_unicode` docstring for legal
+        values.
+    :kwarg output_encoding: Encoding for the output byte :class:`str`
+    :returns: :class:`unicode` string decoded from :attr:`byte_string`
+
+    This function attempts to reverse what :func:`unicode_to_xml` does.  It
+    takes a byte :class:`str` (presumably read in from an xml file) and
+    expands all the html entities into unicode characters and decodes the
+    byte :class:`str` into a :class:`unicode` string.  One thing it cannot do
+    is restore any :term:`control characters` that were removed prior to
+    inserting into the file.  If you need to keep such characters you need to
+    use :func:`xml_to_bytes` and :func:`bytes_to_xml` or use one of the
+    strategies documented in :func:`unicode_to_xml` instead.
+    '''
+    string = xml_to_unicode(byte_string, input_encoding, errors)
+    return to_bytes(string, output_encoding, errors)
+
+def bytes_to_xml(byte_string, *args, **kwargs):
+    '''Return a byte :class:`str` encoded so it is valid inside of any xml
+    file
+
+    :arg byte_string: byte :class:`str` to transform
+    :arg \*args, \*\*kwargs: extra arguments to this function are passed on to
+        the function actually implementing the encoding.  You can use this to
+        tweak the output in some cases but, as a general rule, you shouldn't
+        because the underlying encoding function is not guaranteed to remain
+        the same.
+    :rtype: byte :class:`str` consisting of all :term:`ASCII` characters
+    :returns: byte :class:`str` representation of the input.  This will be 
encoded
+        using base64.
+
+    This function is made especially to put binary information into xml
+    documents.
+
+    This function is intended for encoding things that must be preserved
+    byte-for-byte.  If you want to encode a byte string that's text and don't
+    mind losing the actual bytes you probably want to try 
:func:`byte_string_to_xml`
+    or :func:`guess_encoding_to_xml` instead.
+
+    .. note:: Although the current implementation uses 
:func:`base64.b64encode` and
+        there's no plans to change it, that isn't guaranteed.  If you want to
+        make sure that you can encode and decode these messages it's best to
+        use :func:`xml_to_bytes` if you use this function to encode.
+    '''
+    # Can you do this yourself?  Yes, you can.
+    return b64encode(byte_string, *args, **kwargs)
+
+def xml_to_bytes(byte_string, *args, **kwargs):
+    '''Decode a string encoded using :func:`bytes_to_xml`
+
+    :arg byte_string: byte :class:`str` to transform.  This should be a base64
+        encoded sequence of bytes originally generated by :func:`bytes_to_xml`.
+    :arg \*args, \*\*kwargs: extra arguments to this function are passed on to
+        the function actually implementing the encoding.  You can use this to
+        tweak the output in some cases but, as a general rule, you shouldn't
+        because the underlying encoding function is not guaranteed to remain
+        the same.
+    :rtype: byte :class:`str`
+    :returns: byte :class:`str` that's the decoded input
+
+    If you've got fields in an xml document that were encoded with
+    :func:`bytes_to_xml` then you want to use this function to undecode them.
+    It converts a base64 encoded string into a byte :class:`str`.
+
+    .. note:: Although the current implementation uses
+        :func:`base64.b64decode` and there's no plans to change it, that isn't
+        guaranteed.  If you want to make sure that you can encode and decode
+        these messages it's best to use :func:`bytes_to_xml` if you use this
+        function to decode.
+    '''
+    return b64decode(byte_string, *args, **kwargs)
+
+def guess_encoding_to_xml(string, output_encoding='utf-8', attrib=False,
+        control_chars='replace'):
+    '''Return a byte :class:`str` suitable for inclusion in xml
+
+    :arg string: :class:`unicode` or byte :class:`str` to be transformed into
+        a byte :class:`str` suitable for inclusion in xml.  If string is
+        a byte :class:`str` we attempt to guess the encoding.  If we cannot 
guess,
+        we fallback to ``latin-1``.
+    :kwarg output_encoding: Output encoding for the byte :class:`str`.  This
+        should match the encoding of your xml file.
+    :kwarg attrib: If :data:`True`, escape the item for use in an xml
+        attribute.  If :data:`False` (default) escape the item for use in
+        a text node.
+    :returns: :term:`utf-8` encoded byte :class:`str`
+
+    '''
+    # Unicode strings can just be run through unicode_to_xml()
+    if isinstance(string, unicode):
+        return unicode_to_xml(string, encoding=output_encoding,
+                attrib=attrib, control_chars=control_chars)
+
+    # Guess the encoding of the byte strings
+    input_encoding = guess_encoding(string)
+
+    # Return the new byte string
+    return byte_string_to_xml(string, input_encoding=input_encoding,
+            errors='replace', output_encoding=output_encoding,
+            attrib=attrib, control_chars=control_chars)
+
+def to_xml(string, encoding='utf-8', attrib=False, control_chars='ignore'):
+    '''*Deprecated*: Use :func:`guess_encoding_to_xml` instead
+    '''
+    warnings.warn(k.b_('kitchen.text.converters.to_xml is deprecated.  Use'
+            ' kitchen.text.converters.guess_encoding_to_xml instead.'),
+            DeprecationWarning, stacklevel=2)
+    return guess_encoding_to_xml(string, output_encoding=encoding,
+            attrib=attrib, control_chars=control_chars)
+
+__all__ = ('BYTE_EXCEPTION_CONVERTERS', 'EXCEPTION_CONVERTERS',
+        'byte_string_to_xml', 'bytes_to_xml', 'exception_to_bytes',
+        'exception_to_unicode', 'getwriter', 'guess_encoding_to_xml',
+        'to_bytes', 'to_str', 'to_unicode', 'to_utf8', 'to_xml',
+        'unicode_to_xml', 'xml_to_byte_string', 'xml_to_bytes',
+        'xml_to_unicode')


--
To view, visit http://gerrit.ovirt.org/7264
To unsubscribe, visit http://gerrit.ovirt.org/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I092575bea3f5d9175a17d975a1bdfd877e8214f8
Gerrit-PatchSet: 1
Gerrit-Project: ovirt-engine-cli
Gerrit-Branch: master
Gerrit-Owner: Michael Pasternak <[email protected]>
_______________________________________________
Engine-patches mailing list
[email protected]
http://lists.ovirt.org/mailman/listinfo/engine-patches

Reply via email to