Sandeep Singh created SPARK-41884:
-------------------------------------
Summary: DataFrame `toPandas` parity in return types
Key: SPARK-41884
URL: https://issues.apache.org/jira/browse/SPARK-41884
Project: Spark
Issue Type: Sub-task
Components: Connect
Affects Versions: 3.4.0
Reporter: Sandeep Singh
{code:java}
schema = StructType(
[StructField("i", StringType(), True), StructField("j", IntegerType(),
True)]
)
df = self.spark.createDataFrame([("a", 1)], schema)
schema1 = StructType([StructField("j", StringType()), StructField("i",
StringType())])
df1 = df.to(schema1)
self.assertEqual(schema1, df1.schema)
self.assertEqual(df.count(), df1.count())
schema2 = StructType([StructField("j", LongType())])
df2 = df.to(schema2)
self.assertEqual(schema2, df2.schema)
self.assertEqual(df.count(), df2.count())
schema3 = StructType([StructField("struct", schema1, False)])
df3 = df.select(struct("i", "j").alias("struct")).to(schema3)
self.assertEqual(schema3, df3.schema)
self.assertEqual(df.count(), df3.count())
# incompatible field nullability
schema4 = StructType([StructField("j", LongType(), False)])
self.assertRaisesRegex(
AnalysisException, "NULLABLE_COLUMN_OR_FIELD", lambda: df.to(schema4)
){code}
{code:java}
Traceback (most recent call last):
File
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_dataframe.py",
line 1486, in test_to
self.assertRaisesRegex(
AssertionError: AnalysisException not raised by <lambda> {code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]