Repository: spark Updated Branches: refs/heads/master e2614038e -> ecfa76cdf
[SPARK-3415] [PySpark] removes SerializingAdapter code This code removes the SerializingAdapter code that was copied from PiCloud Author: Ward Viaene <[email protected]> Closes #2287 from wardviaene/feature/pythonsys and squashes the following commits: 5f0d426 [Ward Viaene] SPARK-3415: modified test class to do dump and load 5f5d559 [Ward Viaene] SPARK-3415: modified test class name and call cloudpickle.dumps instead using StringIO afc4a9a [Ward Viaene] SPARK-3415: added newlines to pass lint aaf10b7 [Ward Viaene] SPARK-3415: removed references to SerializingAdapter and rewrote test 65ffeff [Ward Viaene] removed duplicate test a958866 [Ward Viaene] SPARK-3415: test script e263bf5 [Ward Viaene] SPARK-3415: removes legacy SerializingAdapter code Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ecfa76cd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ecfa76cd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ecfa76cd Branch: refs/heads/master Commit: ecfa76cdfe846c75e1b7ebc556167e46963289c5 Parents: e261403 Author: Ward Viaene <[email protected]> Authored: Sun Sep 7 18:54:36 2014 -0700 Committer: Josh Rosen <[email protected]> Committed: Sun Sep 7 18:54:36 2014 -0700 ---------------------------------------------------------------------- python/pyspark/cloudpickle.py | 6 +----- python/pyspark/tests.py | 11 +++++++++++ 2 files changed, 12 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/ecfa76cd/python/pyspark/cloudpickle.py ---------------------------------------------------------------------- diff --git a/python/pyspark/cloudpickle.py b/python/pyspark/cloudpickle.py index 6806248..80e51d1 100644 --- a/python/pyspark/cloudpickle.py +++ b/python/pyspark/cloudpickle.py @@ -657,7 +657,6 @@ class CloudPickler(pickle.Pickler): def save_file(self, obj): """Save a file""" import StringIO as pystringIO #we can't use cStringIO as it lacks the name attribute - from ..transport.adapter import SerializingAdapter if not hasattr(obj, 'name') or not hasattr(obj, 'mode'): raise pickle.PicklingError("Cannot pickle files that do not map to an actual file") @@ -691,13 +690,10 @@ class CloudPickler(pickle.Pickler): tmpfile.close() if tst != '': raise pickle.PicklingError("Cannot pickle file %s as it does not appear to map to a physical, real file" % name) - elif fsize > SerializingAdapter.max_transmit_data: - raise pickle.PicklingError("Cannot pickle file %s as it exceeds cloudconf.py's max_transmit_data of %d" % - (name,SerializingAdapter.max_transmit_data)) else: try: tmpfile = file(name) - contents = tmpfile.read(SerializingAdapter.max_transmit_data) + contents = tmpfile.read() tmpfile.close() except IOError: raise pickle.PicklingError("Cannot pickle file %s as it cannot be read" % name) http://git-wip-us.apache.org/repos/asf/spark/blob/ecfa76cd/python/pyspark/tests.py ---------------------------------------------------------------------- diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py index 2ade15b..9fbeb36 100644 --- a/python/pyspark/tests.py +++ b/python/pyspark/tests.py @@ -169,6 +169,17 @@ class SerializationTestCase(unittest.TestCase): self.assertEquals(p1, p2) +# Regression test for SPARK-3415 +class CloudPickleTest(unittest.TestCase): + def test_pickling_file_handles(self): + from pyspark.cloudpickle import dumps + from StringIO import StringIO + from pickle import load + out1 = sys.stderr + out2 = load(StringIO(dumps(out1))) + self.assertEquals(out1, out2) + + class PySparkTestCase(unittest.TestCase): def setUp(self): --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
