Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/20114#discussion_r159125988
  
    --- Diff: python/pyspark/sql/tests.py ---
    @@ -3357,6 +3357,27 @@ def test_schema_conversion_roundtrip(self):
             schema_rt = from_arrow_schema(arrow_schema)
             self.assertEquals(self.schema, schema_rt)
     
    +    def test_createDataFrame_with_array_type(self):
    +        import pandas as pd
    +        pdf = pd.DataFrame({"a": [[1, 2], [3, 4]], "b": [[u"x", u"y"], 
[u"y", u"z"]]})
    +        df = self.spark.createDataFrame(pdf)
    +        result = df.collect()
    +        expected = [tuple(list(e) for e in rec) for rec in 
pdf.to_records(index=False)]
    +        for r in range(len(expected)):
    +            for e in range(len(expected[r])):
    +                self.assertTrue(expected[r][e] == result[r][e])
    +
    +    def test_toPandas_with_array_type(self):
    +        expected = [([1, 2], [u"x", u"y"]), ([3, 4], [u"y", u"z"])]
    +        array_schema = StructType([StructField("a", 
ArrayType(IntegerType())),
    +                                   StructField("b", 
ArrayType(StringType()))])
    +        df = self.spark.createDataFrame(expected, schema=array_schema)
    +        pdf_arrow = df.toPandas()
    --- End diff --
    
    Should we maybe add some tests to check if they are the same when arrow is 
enabled/disabled for sure?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to