ueshin commented on code in PR #35391:
URL: https://github.com/apache/spark/pull/35391#discussion_r932566706
##########
python/pyspark/sql/tests/test_dataframe.py:
##########
@@ -953,6 +953,30 @@ def test_to_pandas_from_mixed_dataframe(self):
pdf_with_only_nulls = self.spark.sql(sql).filter("tinyint is
null").toPandas()
self.assertTrue(np.all(pdf_with_only_nulls.dtypes ==
pdf_with_some_nulls.dtypes))
+ @unittest.skipIf(
+ not have_pandas or not have_pyarrow,
+ cast(str, pandas_requirement_message or pyarrow_requirement_message),
+ )
+ def test_to_pandas_for_array_of_struct(self):
+ # SPARK-38098: Support Array of Struct for Pandas UDFs and toPandas
+ import numpy as np
+ import pandas as pd
+
+ df = self.spark.createDataFrame(
+ [[[("a", 2, 3.0), ("a", 2, 3.0)]], [[("b", 5, 6.0), ("b", 5,
6.0)]]],
+ "array_struct_col Array<struct<col1:string, col2:long,
col3:double>>",
+ )
+ is_arrow_enabled = [True, False]
+ for value in is_arrow_enabled:
Review Comment:
nit:
```py
for is_arrow_enabled in [True, False]:
```
##########
python/pyspark/sql/tests/test_pandas_udf_scalar.py:
##########
@@ -134,6 +134,30 @@ def test_pandas_udf_nested_arrays(self):
result = df.select(tokenize("vals").alias("hi"))
self.assertEqual([Row(hi=[["hi", "boo"]]), Row(hi=[["bye", "boo"]])],
result.collect())
+ def test_pandas_array_struct(self):
+ # SPARK-38098: Support Array of Struct for Pandas UDFs and toPandas
+ # import numpy as np
+
+ @pandas_udf("Array<struct<col1:string, col2:long, col3:double>>")
+ def return_cols(cols):
+ # self.assertEqual(type(cols), pd.Series)
+ # self.assertEqual(type(cols[0]), np.ndarray)
+ # self.assertEqual(type(cols[0][0]), dict)
Review Comment:
I guess we can't use `self` in the udf. Shall we follow the other tests to
use builtin `assert` instead:
https://github.com/apache/spark/blob/f8b3d5322e6cbce2e42a6940518686b7255e79cb/python/pyspark/sql/tests/test_pandas_udf_scalar.py#L1206-L1215
Also `import numpy as np` might need to be in the udf.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]