Sandeep Singh created SPARK-41884:
-------------------------------------

             Summary: DataFrame `toPandas` parity in return types
                 Key: SPARK-41884
                 URL: https://issues.apache.org/jira/browse/SPARK-41884
             Project: Spark
          Issue Type: Sub-task
          Components: Connect
    Affects Versions: 3.4.0
            Reporter: Sandeep Singh


{code:java}
schema = StructType(
    [StructField("i", StringType(), True), StructField("j", IntegerType(), 
True)]
)
df = self.spark.createDataFrame([("a", 1)], schema)

schema1 = StructType([StructField("j", StringType()), StructField("i", 
StringType())])
df1 = df.to(schema1)
self.assertEqual(schema1, df1.schema)
self.assertEqual(df.count(), df1.count())

schema2 = StructType([StructField("j", LongType())])
df2 = df.to(schema2)
self.assertEqual(schema2, df2.schema)
self.assertEqual(df.count(), df2.count())

schema3 = StructType([StructField("struct", schema1, False)])
df3 = df.select(struct("i", "j").alias("struct")).to(schema3)
self.assertEqual(schema3, df3.schema)
self.assertEqual(df.count(), df3.count())

# incompatible field nullability
schema4 = StructType([StructField("j", LongType(), False)])
self.assertRaisesRegex(
    AnalysisException, "NULLABLE_COLUMN_OR_FIELD", lambda: df.to(schema4)
){code}
{code:java}
Traceback (most recent call last):
  File 
"/Users/s.singh/personal/spark-oss/python/pyspark/sql/tests/test_dataframe.py", 
line 1486, in test_to
    self.assertRaisesRegex(
AssertionError: AnalysisException not raised by <lambda> {code}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to