Github user BryanCutler commented on a diff in the pull request:
https://github.com/apache/spark/pull/20678#discussion_r172267148
--- Diff: python/pyspark/sql/tests.py ---
@@ -3493,19 +3519,30 @@ def create_pandas_data_frame(self):
data_dict["4_float_t"] = np.float32(data_dict["4_float_t"])
return pd.DataFrame(data=data_dict)
- def test_unsupported_datatype(self):
- schema = StructType([StructField("map", MapType(StringType(),
IntegerType()), True)])
- df = self.spark.createDataFrame([(None,)], schema=schema)
- with QuietTest(self.sc):
- with self.assertRaisesRegexp(Exception, 'Unsupported type'):
- df.toPandas()
+ def test_toPandas_fallback_enabled(self):
+ import pandas as pd
- df = self.spark.createDataFrame([(None,)], schema="a binary")
- with QuietTest(self.sc):
- with self.assertRaisesRegexp(
- Exception,
- 'Unsupported type.*\nNote: toPandas attempted Arrow
optimization because'):
- df.toPandas()
+ with self.sql_conf({"spark.sql.execution.arrow.fallback.enabled":
True}):
+ schema = StructType([StructField("map", MapType(StringType(),
IntegerType()), True)])
+ df = self.spark.createDataFrame([({u'a': 1},)], schema=schema)
+ with QuietTest(self.sc):
+ with warnings.catch_warnings(record=True) as warns:
+ pdf = df.toPandas()
+ # Catch and check the last UserWarning.
+ user_warns = [
+ warn.message for warn in warns if
isinstance(warn.message, UserWarning)]
+ self.assertTrue(len(user_warns) > 0)
+ self.assertTrue(
+ "Attempts non-optimization" in
_exception_message(user_warns[-1]))
+ self.assertPandasEqual(pdf, pd.DataFrame({u'map':
[{u'a': 1}]}))
+
+ def test_toPandas_fallback_disabled(self):
+ with self.sql_conf({"spark.sql.execution.arrow.fallback.enabled":
False}):
--- End diff --
Do you still want this since it is disabled in setUpClass? It doesn't hurt
to have it, but just thought I'd ask
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]