awdavidson commented on code in PR #36120:
URL: https://github.com/apache/spark/pull/36120#discussion_r846519415
##########
python/pyspark/sql/tests/test_pandas_map.py:
##########
@@ -95,17 +98,61 @@ def func(iterator):
actual = df.repartition(1).mapInPandas(func, "a long").collect()
self.assertEqual(set((r.a for r in actual)), set(range(100)))
+ def test_other_than_dataframe(self):
+ def bad_iter(_):
+ return iter([1])
+
+ with QuietTest(self.sc):
+ with self.assertRaisesRegex(
+ PythonException,
+ "Return type of the user-defined function should be
Pandas.DataFrame, "
+ "but is <class 'int'>",
+ ):
+ self.spark.range(10, numPartitions=3).mapInPandas(
+ bad_iter, "a int, b string"
+ ).count()
+
def test_empty_iterator(self):
def empty_iter(_):
return iter([])
- self.assertEqual(self.spark.range(10).mapInPandas(empty_iter, "a int,
b string").count(), 0)
+ mapped = self.spark.range(10, numPartitions=3).mapInPandas(empty_iter,
"a int, b string")
+ self.assertEqual(mapped.count(), 0)
- def test_empty_rows(self):
- def empty_rows(_):
+ def test_empty_dataframes(self):
+ def empty_dataframes(_):
return iter([pd.DataFrame({"a": []})])
- self.assertEqual(self.spark.range(10).mapInPandas(empty_rows, "a
int").count(), 0)
+ mapped = self.spark.range(10,
numPartitions=3).mapInPandas(empty_dataframes, "a int")
+ self.assertEqual(mapped.count(), 0)
+
+ def test_empty_dataframes_without_columns(self):
+ def empty_dataframes_wo_columns(iterator):
+ for pdf in iterator:
+ yield pdf
+ # after yielding all elements of the iterator, also yield one
dataframe without columns
+ yield pd.DataFrame([])
+
+ mapped = self.spark.range(10,
numPartitions=3).toDF("id").mapInPandas(empty_dataframes_wo_columns, "id int")
+ self.assertEqual(mapped.count(), 10)
+
+ def test_empty_dataframes_with_less_columns(self):
+ def empty_dataframes_with_less_columns(iterator):
+ for pdf in iterator:
+ yield pdf
+ # after yielding all elements of the iterator, also yield one
dataframe with less columns
+ yield pd.DataFrame([(1,)], columns=["id"])
+
+ with QuietTest(self.sc):
+ with self.assertRaisesRegex(
+ PythonException,
+ "KeyError: 'value'",
+ ):
+ self.spark.range(10, numPartitions=3) \
Review Comment:
nit: let’s wrap this block in a single parenthesis block rather than `\`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]