Github user BryanCutler commented on a diff in the pull request:
https://github.com/apache/spark/pull/20725#discussion_r210679722
--- Diff: python/pyspark/sql/tests.py ---
@@ -4331,13 +4354,22 @@ def test_createDataFrame_fallback_enabled(self):
self.assertEqual(df.collect(), [Row(a={u'a': 1})])
def test_createDataFrame_fallback_disabled(self):
+ from distutils.version import LooseVersion
import pandas as pd
+ import pyarrow as pa
with QuietTest(self.sc):
with self.assertRaisesRegexp(TypeError, 'Unsupported type'):
self.spark.createDataFrame(
pd.DataFrame([[{u'a': 1}]]), "a: map<string, int>")
+ # TODO: remove BinaryType check once minimum pyarrow version is
0.10.0
+ if LooseVersion(pa.__version__) < LooseVersion("0.10.0"):
+ with QuietTest(self.sc):
+ with self.assertRaisesRegexp(TypeError, 'Unsupported
type.*BinaryType'):
+ self.spark.createDataFrame(
+ pd.DataFrame([[{'a': b'aaa'}]]), "a: binary")
--- End diff --
In general, Spark only accepts `bytearray` and pyarrow can accept
`bytearray` or `bytes`. In pyarrow < 0.10.0 bytearrays weren't supported
though, which is why I used `bytes` for the pandas pdf here.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]