URL: https://github.com/freeipa/freeipa/pull/459
Author: tiran
 Title: #459: [WIP] Faster JSON encoder/decoder
Action: synchronized

To pull the PR as Git branch:
git remote add ghfreeipa https://github.com/freeipa/freeipa
git fetch ghfreeipa pull/459/head:pr459
git checkout pr459
From e685e106dbcfb54d1651c97d6a07a17c3417127f Mon Sep 17 00:00:00 2001
From: Christian Heimes <chei...@redhat.com>
Date: Mon, 13 Feb 2017 09:46:39 +0100
Subject: [PATCH 1/2] Faster JSON encoder/decoder

Improve performance of FreeIPA's JSON serializer and deserializer.

* Don't indent and sort keys. Both options trigger a slow path in
  Python's json package. Without indention and sorting, encoding
  mostly happens in optimized C code.
* Replace O(n) type checks with O(1) type lookup and eliminate
  the use of isinstance().
* Check each client capability only once for every conversion.
* Use decoder's obj_hook feature to traverse the object tree once and
  to eliminate calls to isinstance().

Closes: https://fedorahosted.org/freeipa/ticket/6655
Signed-off-by: Christian Heimes <chei...@redhat.com>
---
 ipalib/rpc.py          | 211 +++++++++++++++++++++++++++++++------------------
 ipaserver/rpcserver.py |   7 +-
 2 files changed, 134 insertions(+), 84 deletions(-)

diff --git a/ipalib/rpc.py b/ipalib/rpc.py
index 7d9f6ec..6cad397 100644
--- a/ipalib/rpc.py
+++ b/ipalib/rpc.py
@@ -51,7 +51,7 @@
 from ipalib.backend import Connectible
 from ipalib.constants import LDAP_GENERALIZED_TIME_FORMAT
 from ipalib.errors import (public_errors, UnknownError, NetworkError,
-    KerberosError, XMLRPCMarshallError, JSONError, ConversionError)
+    KerberosError, XMLRPCMarshallError, JSONError)
 from ipalib import errors, capabilities
 from ipalib.request import context, Connection
 from ipapython.ipa_log_manager import root_logger
@@ -274,67 +274,140 @@ def xml_dumps(params, version, methodname=None, methodresponse=False,
     )
 
 
-def json_encode_binary(val, version):
-    '''
-   JSON cannot encode binary values. We encode binary values in Python str
-   objects and text in Python unicode objects. In order to allow a binary
-   object to be passed through JSON we base64 encode it thus converting it to
-   text which JSON can transport. To assure we recognize the value is a base64
-   encoded representation of the original binary value and not confuse it with
-   other text we convert the binary value to a dict in this form:
-
-   {'__base64__' : base64_encoding_of_binary_value}
-
-   This modification of the original input value cannot be done "in place" as
-   one might first assume (e.g. replacing any binary items in a container
-   (e.g. list, tuple, dict) with the base64 dict because the container might be
-   an immutable object (i.e. a tuple). Therefore this function returns a copy
-   of any container objects it encounters with tuples replaced by lists. This
-   is O.K. because the JSON encoding will map both lists and tuples to JSON
-   arrays.
-   '''
-
-    if isinstance(val, dict):
-        new_dict = {}
-        for k, v in val.items():
-            new_dict[k] = json_encode_binary(v, version)
-        return new_dict
-    elif isinstance(val, (list, tuple)):
-        new_list = [json_encode_binary(v, version) for v in val]
-        return new_list
-    elif isinstance(val, bytes):
-        encoded = base64.b64encode(val)
-        if not six.PY2:
-            encoded = encoded.decode('ascii')
-        return {'__base64__': encoded}
-    elif isinstance(val, Decimal):
-        return unicode(val)
-    elif isinstance(val, DN):
-        return str(val)
-    elif isinstance(val, datetime.datetime):
-        if capabilities.client_has_capability(version, 'datetime_values'):
+class _JSONConverter(dict):
+    __slots__ = ('version', '_cap_datetime', '_cap_dnsname')
+
+    _identity = object()
+
+    def __init__(self, version, _identity=_identity):
+        super(_JSONConverter, self).__init__()
+        self.version = version
+        self._cap_datetime = None
+        self._cap_dnsname = None
+        self.update({
+            unicode: _identity,
+            bool: _identity,
+            type(None): _identity,
+            float: _identity,
+            Decimal: unicode,
+            DN: str,
+            Principal: unicode,
+            DNSName: self._enc_dnsname,
+            datetime.datetime: self._enc_datetime,
+            bytes: self._enc_bytes,
+            list: self._enc_list,
+            tuple: self._enc_list,
+            dict: self._enc_dict,
+        })
+        # int, long
+        for t in six.integer_types:
+            self[t] = _identity
+
+    def __missing__(self, typ):
+        # walk MRO to find best match
+        for c in typ.__mro__:
+            if c in self:
+                self[typ] = self[c]
+                return self[c]
+        # use issubclass to check for registered ABCs
+        for c in self:
+            if issubclass(typ, c):
+                self[typ] = self[c]
+                return self[c]
+        raise TypeError(typ)
+
+    def convert(self, obj, _identity=_identity):
+        # obj.__class__ is twice as fast as type(obj)
+        func = self[obj.__class__]
+        return obj if func is _identity else func(obj)
+
+    def _enc_datetime(self, val):
+        cap = self._cap_datetime
+        if cap is None:
+            cap = capabilities.client_has_capability(self.version,
+                                                     'datetime_values')
+            self._cap_datetime = cap
+        if cap:
             return {'__datetime__': val.strftime(LDAP_GENERALIZED_TIME_FORMAT)}
         else:
             return val.strftime(LDAP_GENERALIZED_TIME_FORMAT)
-    elif isinstance(val, DNSName):
-        if capabilities.client_has_capability(version, 'dns_name_values'):
+
+    def _enc_dnsname(self, val):
+        cap = self._cap_dnsname
+        if cap is None:
+            cap = capabilities.client_has_capability(self.version,
+                                                     'dns_name_values')
+            self._cap_dnsname = cap
+        if cap:
             return {'__dns_name__': unicode(val)}
         else:
             return unicode(val)
-    elif isinstance(val, Principal):
-        return unicode(val)
+
+    def _enc_bytes(self, val):
+        encoded = base64.b64encode(val)
+        if not six.PY2:
+            encoded = encoded.decode('ascii')
+        return {'__base64__': encoded}
+
+    def _enc_list(self, val, _identity=_identity):
+        result = []
+        append = result.append
+        for v in val:
+            func = self[v.__class__]
+            append(v if func is _identity else func(v))
+        return result
+
+    def _enc_dict(self, val, _identity=_identity, _iteritems=six.iteritems):
+        result = {}
+        for k, v in _iteritems(val):
+            func = self[v.__class__]
+            result[k] = v if func is _identity else func(v)
+        return result
+
+
+def json_encode_binary(val, version):
+    """
+    JSON cannot encode binary values. We encode binary values in Python str
+    objects and text in Python unicode objects. In order to allow a binary
+    object to be passed through JSON we base64 encode it thus converting it to
+    text which JSON can transport. To assure we recognize the value is a base64
+    encoded representation of the original binary value and not confuse it with
+    other text we convert the binary value to a dict in this form:
+
+    {'__base64__' : base64_encoding_of_binary_value}
+
+    This modification of the original input value cannot be done "in place" as
+    one might first assume (e.g. replacing any binary items in a container
+    (e.g. list, tuple, dict) with the base64 dict because the container might
+    be an immutable object (i.e. a tuple). Therefore this function returns a
+    copy of any container objects it encounters with tuples replaced by lists.
+    This is O.K. because the JSON encoding will map both lists and tuples to
+    JSON arrays.
+    """
+    result = _JSONConverter(version).convert(val)
+    return json.dumps(result)
+
+
+def _ipa_obj_hook(dct):
+    if '__base64__' in dct:
+        return base64.b64decode(dct['__base64__'])
+    elif '__datetime__' in dct:
+        return datetime.datetime.strptime(dct['__datetime__'],
+                                          LDAP_GENERALIZED_TIME_FORMAT)
+    elif '__dns_name__' in dct:
+        return DNSName(dct['__dns_name__'])
     else:
-        return val
+        return dct
 
 
 def json_decode_binary(val):
-    '''
+    """
     JSON cannot transport binary data. In order to transport binary data we
     convert binary data to a form like this:
 
-   {'__base64__' : base64_encoding_of_binary_value}
+    {'__base64__' : base64_encoding_of_binary_value}
 
-   see json_encode_binary()
+    see json_encode_binary()
 
     After JSON had decoded the JSON stream back into a Python object we must
     recursively scan the object looking for any dicts which might represent
@@ -345,31 +418,11 @@ def json_decode_binary(val):
     don't modify objects in place because of side effects which may be
     dangerous. Thus we elect to spend a few more cycles and avoid the
     possibility of unintended side effects in favor of robustness.
-    '''
+    """
+    if isinstance(val, bytes):
+        val = val.decode('utf-8')
 
-    if isinstance(val, dict):
-        if '__base64__' in val:
-            return base64.b64decode(val['__base64__'])
-        elif '__datetime__' in val:
-            return datetime.datetime.strptime(val['__datetime__'],
-                                              LDAP_GENERALIZED_TIME_FORMAT)
-        elif '__dns_name__' in val:
-            return DNSName(val['__dns_name__'])
-        else:
-            return dict((k, json_decode_binary(v)) for k, v in val.items())
-    elif isinstance(val, list):
-        return tuple(json_decode_binary(v) for v in val)
-    else:
-        if isinstance(val, bytes):
-            try:
-                return val.decode('utf-8')
-            except UnicodeDecodeError:
-                raise ConversionError(
-                    name=val,
-                    error='incorrect type'
-                )
-        else:
-            return val
+    return json.loads(val, object_hook=_ipa_obj_hook)
 
 
 def decode_fault(e, encoding='UTF-8'):
@@ -1091,27 +1144,27 @@ def __request(self, name, args):
         payload = json_encode_binary(payload, version)
 
         if self.__verbose >= 2:
-            root_logger.info('Request: %s',
-                             json.dumps(payload, sort_keys=True, indent=4))
+            root_logger.info(
+                'Request: %s',
+                json.dumps(json.loads(payload), sort_keys=True, indent=4)
+            )
 
         response = self.__transport.request(
             self.__host,
             self.__handler,
-            json.dumps(payload).encode('utf-8'),
+            payload.encode('utf-8'),
             verbose=self.__verbose >= 3,
         )
 
         try:
-            response = json_decode_binary(
-                json.loads(response.decode('utf-8')))
+            response = json_decode_binary(response)
         except ValueError as e:
             raise JSONError(error=str(e))
 
         if self.__verbose >= 2:
             root_logger.info(
                 'Response: %s',
-                json.dumps(json_encode_binary(response, version),
-                           sort_keys=True, indent=4)
+                json.dumps(response, sort_keys=True, indent=4)
             )
         error = response.get('error')
         if error:
diff --git a/ipaserver/rpcserver.py b/ipaserver/rpcserver.py
index 45550fb..c5fc308 100644
--- a/ipaserver/rpcserver.py
+++ b/ipaserver/rpcserver.py
@@ -26,7 +26,6 @@
 from xml.sax.saxutils import escape
 import os
 import datetime
-import json
 import traceback
 import gssapi
 import time
@@ -489,13 +488,12 @@ def marshal(self, result, error, _id=None,
             principal=unicode(principal),
             version=unicode(VERSION),
         )
-        response = json_encode_binary(response, version)
-        dump = json.dumps(response, sort_keys=True, indent=4)
+        dump = json_encode_binary(response, version)
         return dump.encode('utf-8')
 
     def unmarshal(self, data):
         try:
-            d = json.loads(data)
+            d = json_decode_binary(data)
         except ValueError as e:
             raise JSONError(error=e)
         if not isinstance(d, dict):
@@ -504,7 +502,6 @@ def unmarshal(self, data):
             raise JSONError(error=_('Request is missing "method"'))
         if 'params' not in d:
             raise JSONError(error=_('Request is missing "params"'))
-        d = json_decode_binary(d)
         method = d['method']
         params = d['params']
         _id = d.get('id')

From 5c819e554266700ca155ab40d057de7941c9fd17 Mon Sep 17 00:00:00 2001
From: Christian Heimes <chei...@redhat.com>
Date: Mon, 13 Feb 2017 10:46:36 +0100
Subject: [PATCH 2/2] Convert list to tuples

Some tests assume that JSON deserializier returns tuples instead of
lists. I don't think it is necessary but let's pass the tests for now.

Signed-off-by: Christian Heimes <chei...@redhat.com>
---
 ipalib/rpc.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/ipalib/rpc.py b/ipalib/rpc.py
index 6cad397..753d7e1 100644
--- a/ipalib/rpc.py
+++ b/ipalib/rpc.py
@@ -388,7 +388,7 @@ def json_encode_binary(val, version):
     return json.dumps(result)
 
 
-def _ipa_obj_hook(dct):
+def _ipa_obj_hook(dct, _iteritems=six.iteritems, _list=list):
     if '__base64__' in dct:
         return base64.b64decode(dct['__base64__'])
     elif '__datetime__' in dct:
@@ -397,6 +397,10 @@ def _ipa_obj_hook(dct):
     elif '__dns_name__' in dct:
         return DNSName(dct['__dns_name__'])
     else:
+        # XXX tests assume tuples. Is this really necessary?
+        for k, v in _iteritems(dct):
+            if v.__class__ is _list:
+                dct[k] = tuple(v)
         return dct
 
 
-- 
Manage your subscription for the Freeipa-devel mailing list:
https://www.redhat.com/mailman/listinfo/freeipa-devel
Contribute to FreeIPA: http://www.freeipa.org/page/Contribute/Code

Reply via email to