Hi,

As part of a research project I'm working on, I found it convenient to be 
able to pickle messages (or more specifically Python structures that 
contain them). You can do this without patching protobuf, but it is a bit 
clumsier and (unless I'm missing something) requires registering a handler 
for every kind of message to be handled with copy_reg, or breaking down the 
structures manually (especially annoying when operating interactively).

The attached patch adds __getstate__ and __setstate__ methods to generated 
classes, hooking into pickle and using SerializePartialToString to return 
an object pickle knows how to deal with, allowing them to be pickled out of 
the box. This seems to work for both standard Python messages and the 
experimental C++ bindings.

I'm not sure whether this is a common use case, but it solved my problem 
nicely, and seems like something that should just work, and therefore I 
contribute it upstream.

Alex

-- 
You received this message because you are subscribed to the Google Groups 
"Protocol Buffers" group.
To view this discussion on the web visit 
https://groups.google.com/d/msg/protobuf/-/XsHAtFgKNR0J.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to 
[email protected].
For more options, visit this group at 
http://groups.google.com/group/protobuf?hl=en.

Index: python/google/protobuf/internal/python_message.py
===================================================================
--- python/google/protobuf/internal/python_message.py	(revision 421)
+++ python/google/protobuf/internal/python_message.py	(working copy)
@@ -654,6 +654,25 @@
   cls.__eq__ = __eq__
 
 
+def _AddGetStateMethod(message_descriptor, cls):
+  """Helper for _AddMessageMethods()."""
+  def __getstate__(self):
+    # We have to return a tuple because the serialized representation of a
+    # message with no fields set is the empty string, which pickle interprets
+    # as an instruction not to call setstate on unpickle (because it gets cast
+    # to False)
+    return self.SerializePartialToString(),
+  cls.__getstate__ = __getstate__
+
+
+def _AddSetStateMethod(message_descriptor, cls):
+  """Helper for _AddMessageMethods()."""
+  def __setstate__(self, state):
+    self.__init__()
+    self.ParseFromString(state[0])
+  cls.__setstate__ = __setstate__
+
+
 def _AddStrMethod(message_descriptor, cls):
   """Helper for _AddMessageMethods()."""
   def __str__(self):
@@ -911,6 +930,8 @@
     _AddHasExtensionMethod(cls)
   _AddClearMethod(message_descriptor, cls)
   _AddEqualsMethod(message_descriptor, cls)
+  _AddGetStateMethod(message_descriptor, cls)
+  _AddSetStateMethod(message_descriptor, cls)
   _AddStrMethod(message_descriptor, cls)
   _AddUnicodeMethod(message_descriptor, cls)
   _AddSetListenerMethod(cls)
Index: python/google/protobuf/internal/cpp_message.py
===================================================================
--- python/google/protobuf/internal/cpp_message.py	(revision 421)
+++ python/google/protobuf/internal/cpp_message.py	(working copy)
@@ -564,6 +564,17 @@
   def FindInitializationErrors(self):
     return self._cmsg.FindInitializationErrors()
 
+  def __setstate__(self, state):
+    self.__init__()
+    self.ParseFromString(state[0])
+
+  def __getstate__(self):
+    # We have to return a tuple because the serialized representation of a
+    # message with no fields set is the empty string, which pickle interprets
+    # as an instruction not to call setstate on unpickle (because it gets cast
+    # to False)
+    return self.SerializePartialToString(),
+
   def __str__(self):
     return self._cmsg.DebugString()
 
Index: python/google/protobuf/internal/reflection_test.py
===================================================================
--- python/google/protobuf/internal/reflection_test.py	(revision 421)
+++ python/google/protobuf/internal/reflection_test.py	(working copy)
@@ -38,6 +38,7 @@
 __author__ = '[email protected] (Will Robinson)'
 
 import operator
+import pickle
 import struct
 
 import unittest
@@ -1443,7 +1444,31 @@
     proto.optionalgroup.SetInParent()
     self.assertTrue(proto.HasField('optionalgroup'))
 
+  def testPickleUnpickle(self):
+    message = unittest_pb2.TestAllTypes()
 
+    # Empty message serializes to empty string casts to false, which getstate
+    # treats specially.
+    max_format = max((int(float(v)) for v in pickle.compatible_formats))
+    for pickle_format in xrange(1, max_format + 1):
+      pickled = pickle.dumps(message, pickle_format)
+      restored = pickle.loads(pickled)
+      self.assertTrue(message == restored)
+
+    # Test a message with a few fields set.
+    message.optional_fixed64 = 314
+    message.repeated_uint32.extend([3, 1, 4])
+    for pickle_format in xrange(1, max_format + 1):
+      pickled = pickle.dumps(message, pickle_format)
+      restored = pickle.loads(pickled)
+      self.assertTrue(message == restored)
+
+    # Test a message with required fields and one required field missing. This
+    # should succeed because we use SerializePartialToString.
+    message = unittest_pb2.TestRequired(a=3, dummy2=3141) 
+    self.assertTrue(pickle.loads(pickle.dumps(message)) == message)
+
+
 #  Since we had so many tests for protocol buffer equality, we broke these out
 #  into separate TestCase classes.
 

Reply via email to