[Python-Dev] PEP for RFE 46738 (first draft)

Simon Wittber Fri, 17 Jun 2005 23:19:40 -0700

Hello Chaps,

The attached PEP (pep.txt) is for RFE 46738, which you can view here:


http://sourceforge.net/tracker/index.php?func=detail&aid=467384&group_id=5470&atid=355470

It provides a safe, documented class for serialization of simple python types.

A sample implementation is also attached (gherkin.py).

Critcism and comments on the PEP and the implementation are appreciated.

Simon Wittber.

PEP: XXX
Title: Serialization of Simple Python Types
Version: $Revision: $
Last-Modified: $Date: $
Author: Simon Wittber <[EMAIL PROTECTED]>
Status: Draft
Type: Standards Track
Python-Version: 2.4
Content-Type: text/plain
Created: 19-Jun-2005
Post-History:

Abstract

    This PEP suggests the addition of a module to the standard library,
    which provides a serialization class for simple Python types.


Copyright

    This document is placed in the public domain.


Motivation

    The standard library currently provides two modules which are used
    for object serialization. Pickle is not secure by its very nature,
    and the marshal module is clearly marked as being not secure in the
    documentation. The marshal module does not guarantee compatibility
    between Python versions. The proposed module will only serialize
    simple built-in Python types, and provide compatibility across
    Python versions.

    See RFE 467384 (on SourceForge) for past discussions on the above
    issues.


Specification

    The proposed module should use the same API as the marshal module.

        dump(value, file)
        #serialize value, and write to open file object
        load(file)
        #read data from file object, unserialize and return an object
        dumps(value)
        #return the string that would be written to the file by dump
        loads(value)
        #unserialize and return object


Reference Implementation

    Please see attached sencode.py


Rationale

    An algorithm using a single encode function, in which an if/elif
    structure is used, rather than a dict lookup by type, proved to
    be slower than the algorithm used in the reference implementation.



Local Variables:
mode: indented-text
indent-tabs-mode: nil
sentence-end-double-space: t
fill-column: 70
End:

from types import 
IntType,TupleType,StringType,FloatType,LongType,ListType,DictType,NoneType,BooleanType,UnicodeType

from struct import pack, unpack
from cStringIO import StringIO

class EncodeError(Exception): pass
class DecodeError(Exception): pass

SIZEOF_INT = 4
SIZEOF_FLOAT = 8
UNICODE_CODEC = 'utf-8'

class Gherkin(object):
    def __init__(self):
        self.header = 'GHE'
        self.version = 0
        self.protocol = {
            TupleType  :"T",
            ListType   :"L",
            DictType   :"D",
            LongType   :"B",
            IntType    :"I",
            FloatType  :"F",
            StringType :"S",
            NoneType   :"N",
            BooleanType:"b",
            UnicodeType:"U"
            }
        self.encoder = {}
        self.decoder = {}
        self.int_size = SIZEOF_INT
        self.float_size = SIZEOF_FLOAT

        self.encoder[DictType] = self.enc_dict_type
        self.encoder[ListType] = self.enc_list_type
        self.encoder[TupleType] = self.enc_list_type
        self.encoder[IntType] = self.enc_int_type
        self.encoder[FloatType] = self.enc_float_type
        self.encoder[LongType] = self.enc_long_type
        self.encoder[UnicodeType] = self.enc_unicode_type
        self.encoder[StringType] = self.enc_string_type
        self.encoder[NoneType] = self.enc_none_type
        self.encoder[BooleanType] = self.enc_bool_type

        self.decoder[self.protocol[TupleType]] = self.dec_tuple_type
        self.decoder[self.protocol[ListType]] = self.dec_list_type
        self.decoder[self.protocol[DictType]] = self.dec_dict_type
        self.decoder[self.protocol[LongType]] = self.dec_long_type
        self.decoder[self.protocol[StringType]] = self.dec_string_type
        self.decoder[self.protocol[FloatType]] = self.dec_float_type
        self.decoder[self.protocol[IntType]] = self.dec_int_type
        self.decoder[self.protocol[NoneType]] = self.dec_none_type
        self.decoder[self.protocol[BooleanType]] = self.dec_bool_type
        self.decoder[self.protocol[UnicodeType]] = self.dec_unicode_type

    def enc_dict_type(self, obj):
        data = "".join([self.encoder[type(i)](i) for i in obj.items()])
        return "%s%s%s" % (self.protocol[DictType], pack("!L", len(data)), data)

    def enc_list_type(self, obj):
        data = "".join([self.encoder[type(i)](i) for i in obj])
        return "%s%s%s" % (self.protocol[type(obj)], pack("!L", len(data)), 
data)

    def enc_int_type(self, obj):
        return "%s%s" % (self.protocol[IntType], pack("!i", obj))

    def enc_float_type(self, obj):
        return "%s%s" % (self.protocol[FloatType], pack("!d", obj))

    def enc_long_type(self, obj):
        obj = hex(obj)[2:-1]
        return "%s%s%s" % (self.protocol[LongType], pack("!L", len(obj)), obj)

    def enc_unicode_type(self, obj):
        obj = obj.encode(UNICODE_CODEC)
        return "%s%s%s" % (self.protocol[UnicodeType], pack("!L", len(obj)), 
obj)

    def enc_string_type(self, obj):
        return "%s%s%s" % (self.protocol[StringType], pack("!L", len(obj)), obj)

    def enc_none_type(self, obj):
        return self.protocol[NoneType]

    def enc_bool_type(self, obj):
        return self.protocol[BooleanType] + str(int(obj))

    def dumps(self, obj):
        """
         Return the string that would be written to a file by dump(value, 
file). The value must be a supported type. Raise a ValueError exception if 
value has (or contains an object that has) an unsupported type.
        """
        options = 
"".join((hex(self.version)[2:],hex(SIZEOF_INT)[2:],hex(SIZEOF_FLOAT)[2:]))
        assert len(options) == 3
        try:
            data = self.encoder[type(obj)](obj)
        except KeyError, e:
            raise ValueError, "Type not supported. (%s)" % e
        header = "".join((self.header, options))
        assert len(header) == 6
        return "".join((header, data))

    def dump(self, obj, file):
        """
        Write the value on the open file. The value must be a supported type. 
The file must be an open file object such as sys.stdout or returned by open() 
or posix.popen(). It must be opened in binary mode ('wb' or 'w+b').
        If the value has (or contains an object that has) an unsupported type, 
a ValueError exception is raised
        """
        return file.write(self.dumps(obj))

    def build_sequence(self, data, cast=list):
        size = unpack('!L', data.read(SIZEOF_INT))[0]
        items = []
        start_position = data.tell()
        while (data.tell() - start_position) < size:
            T = data.read(1)
            value = self.decoder[T](data)
            items.append(value)
        return cast(items)

    def dec_tuple_type(self, data):
        return self.build_sequence(data, cast=tuple)

    def dec_list_type(self, data):
        return self.build_sequence(data, cast=list)

    def dec_dict_type(self, data):
        return self.build_sequence(data, cast=dict)

    def dec_long_type(self, data):
        size = unpack('!L', data.read(self.int_size))[0]
        value = long(data.read(size),16)
        return value

    def dec_string_type(self, data):
        size = unpack('!L', data.read(self.int_size))[0]
        value = str(data.read(size))
        return value

    def dec_float_type(self, data):
        value = unpack('!d', data.read(self.float_size))[0]
        return value

    def dec_int_type(self, data):
        value = unpack('!i', data.read(self.int_size))[0]
        return value

    def dec_none_type(self, data):
        return None

    def dec_bool_type(self, data):
        value = int(data.read(1))
        return bool(value)

    def dec_unicode_type(self, data):
        size = unpack('!L', data.read(self.int_size))[0]
        value = data.read(size).decode(UNICODE_CODEC)
        return value

    def loads(self, data):
        """
        Convert the string to a value. If no valid value is found, raise 
EOFError, ValueError or TypeError. Extra characters in the string are ignored.
        """
        buffer = StringIO(data)
        header = buffer.read(len(self.header))
        assert header == self.header
        self.version = int(buffer.read(1), 10)
        self.int_size = int(buffer.read(1), 10)
        self.float_size = int(buffer.read(1), 10)
        try:
            value = self.decoder[buffer.read(1)](buffer)
        except KeyError, e:
            raise ValueError, "Type prefix not supported. (%s)" % e
        return value

    def load(self, file):
        """
        Read one value from the open file and return it. If no valid value is 
read, raise EOFError, ValueError or TypeError. The file must be an open file 
object opened in binary mode ('rb' or 'r+b').
        """
        return self.loads(file.read())

dumps = Gherkin().dumps
loads = Gherkin().loads
dump = Gherkin().dump
load = Gherkin().load

if __name__ == "__main__":
    def test():
        value = (u'\N{POUND SIGN} Testing unicode', 
{True:False},[1,2,3,4],["[1,2,3,4]"],("python","types"), "pi 
equals",3.1,("longs are ok too", 
912398102398102938102398109238019283012983019238019283019283))
        data = dumps(value)
        print data
        new_value = loads(data)
        assert value == new_value
    test()

_______________________________________________
Python-Dev mailing list
Python-Dev@python.org
http://mail.python.org/mailman/listinfo/python-dev
Unsubscribe: 
http://mail.python.org/mailman/options/python-dev/archive%40mail-archive.com

[Python-Dev] PEP for RFE 46738 (first draft)

Reply via email to