HyukjinKwon commented on code in PR #40829:
URL: https://github.com/apache/spark/pull/40829#discussion_r1173192092


##########
python/pyspark/sql/tests/test_arrow.py:
##########
@@ -858,6 +858,41 @@ def test_createDataFrame_empty_partition(self):
         self.assertEqual([Row(c1=1, c2="string")], df.collect())
         self.assertGreater(self.spark.sparkContext.defaultParallelism, 
len(pdf))
 
+    def test_toPandas_duplicate_field_names(self):
+        for arrow_enabled in [True, False]:
+            with self.subTest(arrow_enabled=arrow_enabled):
+                self.check_toPandas_duplicate_field_names(arrow_enabled)
+
+    def check_toPandas_duplicate_field_names(self, arrow_enabled):
+        data = [Row(Row("a", 1), Row(2, 3, "b", 4, "c")), Row(Row("x", 6), 
Row(7, 8, "y", 9, "z"))]
+        schema = (
+            StructType()
+            .add("struct", StructType().add("x", StringType()).add("x", 
IntegerType()))
+            .add(
+                "struct",
+                StructType()
+                .add("a", IntegerType())
+                .add("x", IntegerType())
+                .add("x", StringType())
+                .add("y", IntegerType())
+                .add("y", StringType()),
+            )
+        )
+        if arrow_enabled:
+            expected = pd.DataFrame(
+                [
+                    [{"x_0": "a", "x_1": 1}, {"a": 2, "x_0": 3, "x_1": "b", 
"y_0": 4, "y_1": "c"}],

Review Comment:
   Hmmm ... not sure if it's good to add these suffix ...



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to