Github user BryanCutler commented on a diff in the pull request:
https://github.com/apache/spark/pull/20114#discussion_r159134256
--- Diff: python/pyspark/sql/tests.py ---
@@ -3357,6 +3357,27 @@ def test_schema_conversion_roundtrip(self):
schema_rt = from_arrow_schema(arrow_schema)
self.assertEquals(self.schema, schema_rt)
+ def test_createDataFrame_with_array_type(self):
+ import pandas as pd
+ pdf = pd.DataFrame({"a": [[1, 2], [3, 4]], "b": [[u"x", u"y"],
[u"y", u"z"]]})
+ df = self.spark.createDataFrame(pdf)
+ result = df.collect()
+ expected = [tuple(list(e) for e in rec) for rec in
pdf.to_records(index=False)]
+ for r in range(len(expected)):
+ for e in range(len(expected[r])):
+ self.assertTrue(expected[r][e] == result[r][e])
+
+ def test_toPandas_with_array_type(self):
+ expected = [([1, 2], [u"x", u"y"]), ([3, 4], [u"y", u"z"])]
+ array_schema = StructType([StructField("a",
ArrayType(IntegerType())),
+ StructField("b",
ArrayType(StringType()))])
+ df = self.spark.createDataFrame(expected, schema=array_schema)
+ pdf_arrow = df.toPandas()
--- End diff --
ok done
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]