viirya commented on code in PR #53822:
URL: https://github.com/apache/spark/pull/53822#discussion_r2710666352
##########
python/pyspark/sql/tests/arrow/test_arrow.py:
##########
@@ -1853,6 +1853,41 @@ def
test_toArrow_with_compression_codec_large_dataset(self):
self.assertEqual(t.num_rows, 10000)
self.assertEqual(t.column_names, ["id", "str_col",
"mod_col"])
+ def test_toPandas_double_nested_array_empty_outer(self):
+ schema = StructType([StructField("data",
ArrayType(ArrayType(StringType())))])
+ df = self.spark.createDataFrame([Row(data=[])], schema=schema)
+ pdf = df.toPandas()
+ self.assertEqual(len(pdf), 1)
+ self.assertEqual(len(pdf["data"][0]), 0)
+
+ def test_toPandas_array_of_map_empty_outer(self):
+ schema = StructType([StructField("data",
ArrayType(MapType(StringType(), StringType())))])
+ df = self.spark.createDataFrame([Row(data=[])], schema=schema)
+ pdf = df.toPandas()
+ self.assertEqual(len(pdf), 1)
+ self.assertEqual(len(pdf["data"][0]), 0)
+
+ def test_toPandas_triple_nested_array_empty_outer(self):
+ # SPARK-55056: This triggers SIGSEGV without the fix.
+ # When the outer array is empty, the second-level ArrayWriter is never
+ # invoked, so its count stays 0. Arrow format requires ListArray offset
+ # buffer to have N+1 entries even when N=0, but getBufferSizeFor(0)
+ # returns 0 and the buffer is omitted in IPC serialization.
+ schema = StructType([StructField("data",
ArrayType(ArrayType(ArrayType(StringType()))))])
+ df = self.spark.createDataFrame([Row(data=[])], schema=schema)
+ pdf = df.toPandas()
+ self.assertEqual(len(pdf), 1)
+ self.assertEqual(len(pdf["data"][0]), 0)
Review Comment:
`data=[]` so it should be `self.assertEqual(len(pdf["data"]), 0)`?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]