Hello Chaps, The attached PEP (pep.txt) is for RFE 46738, which you can view here:
http://sourceforge.net/tracker/index.php?func=detail&aid=467384&group_id=5470&atid=355470 It provides a safe, documented class for serialization of simple python types. A sample implementation is also attached (gherkin.py). Critcism and comments on the PEP and the implementation are appreciated. Simon Wittber.
PEP: XXX Title: Serialization of Simple Python Types Version: $Revision: $ Last-Modified: $Date: $ Author: Simon Wittber <[EMAIL PROTECTED]> Status: Draft Type: Standards Track Python-Version: 2.4 Content-Type: text/plain Created: 19-Jun-2005 Post-History: Abstract This PEP suggests the addition of a module to the standard library, which provides a serialization class for simple Python types. Copyright This document is placed in the public domain. Motivation The standard library currently provides two modules which are used for object serialization. Pickle is not secure by its very nature, and the marshal module is clearly marked as being not secure in the documentation. The marshal module does not guarantee compatibility between Python versions. The proposed module will only serialize simple built-in Python types, and provide compatibility across Python versions. See RFE 467384 (on SourceForge) for past discussions on the above issues. Specification The proposed module should use the same API as the marshal module. dump(value, file) #serialize value, and write to open file object load(file) #read data from file object, unserialize and return an object dumps(value) #return the string that would be written to the file by dump loads(value) #unserialize and return object Reference Implementation Please see attached sencode.py Rationale An algorithm using a single encode function, in which an if/elif structure is used, rather than a dict lookup by type, proved to be slower than the algorithm used in the reference implementation. Local Variables: mode: indented-text indent-tabs-mode: nil sentence-end-double-space: t fill-column: 70 End:
from types import IntType,TupleType,StringType,FloatType,LongType,ListType,DictType,NoneType,BooleanType,UnicodeType from struct import pack, unpack from cStringIO import StringIO class EncodeError(Exception): pass class DecodeError(Exception): pass SIZEOF_INT = 4 SIZEOF_FLOAT = 8 UNICODE_CODEC = 'utf-8' class Gherkin(object): def __init__(self): self.header = 'GHE' self.version = 0 self.protocol = { TupleType :"T", ListType :"L", DictType :"D", LongType :"B", IntType :"I", FloatType :"F", StringType :"S", NoneType :"N", BooleanType:"b", UnicodeType:"U" } self.encoder = {} self.decoder = {} self.int_size = SIZEOF_INT self.float_size = SIZEOF_FLOAT self.encoder[DictType] = self.enc_dict_type self.encoder[ListType] = self.enc_list_type self.encoder[TupleType] = self.enc_list_type self.encoder[IntType] = self.enc_int_type self.encoder[FloatType] = self.enc_float_type self.encoder[LongType] = self.enc_long_type self.encoder[UnicodeType] = self.enc_unicode_type self.encoder[StringType] = self.enc_string_type self.encoder[NoneType] = self.enc_none_type self.encoder[BooleanType] = self.enc_bool_type self.decoder[self.protocol[TupleType]] = self.dec_tuple_type self.decoder[self.protocol[ListType]] = self.dec_list_type self.decoder[self.protocol[DictType]] = self.dec_dict_type self.decoder[self.protocol[LongType]] = self.dec_long_type self.decoder[self.protocol[StringType]] = self.dec_string_type self.decoder[self.protocol[FloatType]] = self.dec_float_type self.decoder[self.protocol[IntType]] = self.dec_int_type self.decoder[self.protocol[NoneType]] = self.dec_none_type self.decoder[self.protocol[BooleanType]] = self.dec_bool_type self.decoder[self.protocol[UnicodeType]] = self.dec_unicode_type def enc_dict_type(self, obj): data = "".join([self.encoder[type(i)](i) for i in obj.items()]) return "%s%s%s" % (self.protocol[DictType], pack("!L", len(data)), data) def enc_list_type(self, obj): data = "".join([self.encoder[type(i)](i) for i in obj]) return "%s%s%s" % (self.protocol[type(obj)], pack("!L", len(data)), data) def enc_int_type(self, obj): return "%s%s" % (self.protocol[IntType], pack("!i", obj)) def enc_float_type(self, obj): return "%s%s" % (self.protocol[FloatType], pack("!d", obj)) def enc_long_type(self, obj): obj = hex(obj)[2:-1] return "%s%s%s" % (self.protocol[LongType], pack("!L", len(obj)), obj) def enc_unicode_type(self, obj): obj = obj.encode(UNICODE_CODEC) return "%s%s%s" % (self.protocol[UnicodeType], pack("!L", len(obj)), obj) def enc_string_type(self, obj): return "%s%s%s" % (self.protocol[StringType], pack("!L", len(obj)), obj) def enc_none_type(self, obj): return self.protocol[NoneType] def enc_bool_type(self, obj): return self.protocol[BooleanType] + str(int(obj)) def dumps(self, obj): """ Return the string that would be written to a file by dump(value, file). The value must be a supported type. Raise a ValueError exception if value has (or contains an object that has) an unsupported type. """ options = "".join((hex(self.version)[2:],hex(SIZEOF_INT)[2:],hex(SIZEOF_FLOAT)[2:])) assert len(options) == 3 try: data = self.encoder[type(obj)](obj) except KeyError, e: raise ValueError, "Type not supported. (%s)" % e header = "".join((self.header, options)) assert len(header) == 6 return "".join((header, data)) def dump(self, obj, file): """ Write the value on the open file. The value must be a supported type. The file must be an open file object such as sys.stdout or returned by open() or posix.popen(). It must be opened in binary mode ('wb' or 'w+b'). If the value has (or contains an object that has) an unsupported type, a ValueError exception is raised """ return file.write(self.dumps(obj)) def build_sequence(self, data, cast=list): size = unpack('!L', data.read(SIZEOF_INT))[0] items = [] start_position = data.tell() while (data.tell() - start_position) < size: T = data.read(1) value = self.decoder[T](data) items.append(value) return cast(items) def dec_tuple_type(self, data): return self.build_sequence(data, cast=tuple) def dec_list_type(self, data): return self.build_sequence(data, cast=list) def dec_dict_type(self, data): return self.build_sequence(data, cast=dict) def dec_long_type(self, data): size = unpack('!L', data.read(self.int_size))[0] value = long(data.read(size),16) return value def dec_string_type(self, data): size = unpack('!L', data.read(self.int_size))[0] value = str(data.read(size)) return value def dec_float_type(self, data): value = unpack('!d', data.read(self.float_size))[0] return value def dec_int_type(self, data): value = unpack('!i', data.read(self.int_size))[0] return value def dec_none_type(self, data): return None def dec_bool_type(self, data): value = int(data.read(1)) return bool(value) def dec_unicode_type(self, data): size = unpack('!L', data.read(self.int_size))[0] value = data.read(size).decode(UNICODE_CODEC) return value def loads(self, data): """ Convert the string to a value. If no valid value is found, raise EOFError, ValueError or TypeError. Extra characters in the string are ignored. """ buffer = StringIO(data) header = buffer.read(len(self.header)) assert header == self.header self.version = int(buffer.read(1), 10) self.int_size = int(buffer.read(1), 10) self.float_size = int(buffer.read(1), 10) try: value = self.decoder[buffer.read(1)](buffer) except KeyError, e: raise ValueError, "Type prefix not supported. (%s)" % e return value def load(self, file): """ Read one value from the open file and return it. If no valid value is read, raise EOFError, ValueError or TypeError. The file must be an open file object opened in binary mode ('rb' or 'r+b'). """ return self.loads(file.read()) dumps = Gherkin().dumps loads = Gherkin().loads dump = Gherkin().dump load = Gherkin().load if __name__ == "__main__": def test(): value = (u'\N{POUND SIGN} Testing unicode', {True:False},[1,2,3,4],["[1,2,3,4]"],("python","types"), "pi equals",3.1,("longs are ok too", 912398102398102938102398109238019283012983019238019283019283)) data = dumps(value) print data new_value = loads(data) assert value == new_value test()
_______________________________________________ Python-Dev mailing list Python-Dev@python.org http://mail.python.org/mailman/listinfo/python-dev Unsubscribe: http://mail.python.org/mailman/options/python-dev/archive%40mail-archive.com