Hi,
As part of a research project I'm working on, I found it convenient to be
able to pickle messages (or more specifically Python structures that
contain them). You can do this without patching protobuf, but it is a bit
clumsier and (unless I'm missing something) requires registering a handler
for every kind of message to be handled with copy_reg, or breaking down the
structures manually (especially annoying when operating interactively).
The attached patch adds __getstate__ and __setstate__ methods to generated
classes, hooking into pickle and using SerializePartialToString to return
an object pickle knows how to deal with, allowing them to be pickled out of
the box. This seems to work for both standard Python messages and the
experimental C++ bindings.
I'm not sure whether this is a common use case, but it solved my problem
nicely, and seems like something that should just work, and therefore I
contribute it upstream.
Alex
--
You received this message because you are subscribed to the Google Groups
"Protocol Buffers" group.
To view this discussion on the web visit
https://groups.google.com/d/msg/protobuf/-/XsHAtFgKNR0J.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to
[email protected].
For more options, visit this group at
http://groups.google.com/group/protobuf?hl=en.
Index: python/google/protobuf/internal/python_message.py
===================================================================
--- python/google/protobuf/internal/python_message.py (revision 421)
+++ python/google/protobuf/internal/python_message.py (working copy)
@@ -654,6 +654,25 @@
cls.__eq__ = __eq__
+def _AddGetStateMethod(message_descriptor, cls):
+ """Helper for _AddMessageMethods()."""
+ def __getstate__(self):
+ # We have to return a tuple because the serialized representation of a
+ # message with no fields set is the empty string, which pickle interprets
+ # as an instruction not to call setstate on unpickle (because it gets cast
+ # to False)
+ return self.SerializePartialToString(),
+ cls.__getstate__ = __getstate__
+
+
+def _AddSetStateMethod(message_descriptor, cls):
+ """Helper for _AddMessageMethods()."""
+ def __setstate__(self, state):
+ self.__init__()
+ self.ParseFromString(state[0])
+ cls.__setstate__ = __setstate__
+
+
def _AddStrMethod(message_descriptor, cls):
"""Helper for _AddMessageMethods()."""
def __str__(self):
@@ -911,6 +930,8 @@
_AddHasExtensionMethod(cls)
_AddClearMethod(message_descriptor, cls)
_AddEqualsMethod(message_descriptor, cls)
+ _AddGetStateMethod(message_descriptor, cls)
+ _AddSetStateMethod(message_descriptor, cls)
_AddStrMethod(message_descriptor, cls)
_AddUnicodeMethod(message_descriptor, cls)
_AddSetListenerMethod(cls)
Index: python/google/protobuf/internal/cpp_message.py
===================================================================
--- python/google/protobuf/internal/cpp_message.py (revision 421)
+++ python/google/protobuf/internal/cpp_message.py (working copy)
@@ -564,6 +564,17 @@
def FindInitializationErrors(self):
return self._cmsg.FindInitializationErrors()
+ def __setstate__(self, state):
+ self.__init__()
+ self.ParseFromString(state[0])
+
+ def __getstate__(self):
+ # We have to return a tuple because the serialized representation of a
+ # message with no fields set is the empty string, which pickle interprets
+ # as an instruction not to call setstate on unpickle (because it gets cast
+ # to False)
+ return self.SerializePartialToString(),
+
def __str__(self):
return self._cmsg.DebugString()
Index: python/google/protobuf/internal/reflection_test.py
===================================================================
--- python/google/protobuf/internal/reflection_test.py (revision 421)
+++ python/google/protobuf/internal/reflection_test.py (working copy)
@@ -38,6 +38,7 @@
__author__ = '[email protected] (Will Robinson)'
import operator
+import pickle
import struct
import unittest
@@ -1443,7 +1444,31 @@
proto.optionalgroup.SetInParent()
self.assertTrue(proto.HasField('optionalgroup'))
+ def testPickleUnpickle(self):
+ message = unittest_pb2.TestAllTypes()
+ # Empty message serializes to empty string casts to false, which getstate
+ # treats specially.
+ max_format = max((int(float(v)) for v in pickle.compatible_formats))
+ for pickle_format in xrange(1, max_format + 1):
+ pickled = pickle.dumps(message, pickle_format)
+ restored = pickle.loads(pickled)
+ self.assertTrue(message == restored)
+
+ # Test a message with a few fields set.
+ message.optional_fixed64 = 314
+ message.repeated_uint32.extend([3, 1, 4])
+ for pickle_format in xrange(1, max_format + 1):
+ pickled = pickle.dumps(message, pickle_format)
+ restored = pickle.loads(pickled)
+ self.assertTrue(message == restored)
+
+ # Test a message with required fields and one required field missing. This
+ # should succeed because we use SerializePartialToString.
+ message = unittest_pb2.TestRequired(a=3, dummy2=3141)
+ self.assertTrue(pickle.loads(pickle.dumps(message)) == message)
+
+
# Since we had so many tests for protocol buffer equality, we broke these out
# into separate TestCase classes.