This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push:
new e6f3dd9b971e [SPARK-47202][PYTHON][TESTS][FOLLOW-UP] Test timestamp
with tzinfo in toPandas and createDataFrame with Arrow optimized
e6f3dd9b971e is described below
commit e6f3dd9b971e539485518dc041244a51c7a8302e
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Wed Feb 28 16:56:38 2024 +0900
[SPARK-47202][PYTHON][TESTS][FOLLOW-UP] Test timestamp with tzinfo in
toPandas and createDataFrame with Arrow optimized
### What changes were proposed in this pull request?
This PR is a follow up of https://github.com/apache/spark/pull/45301 that
actually test the change.
### Why are the changes needed?
To prevent a regression.
### Does this PR introduce _any_ user-facing change?
No, test-only.
### How was this patch tested?
Manually ran the tests.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #45308 from HyukjinKwon/SPARK-47202-followup.
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
(cherry picked from commit 721c2a41a54bb00ea885093f322edf704e63d17f)
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../pyspark/sql/tests/connect/test_parity_arrow.py | 3 +++
python/pyspark/sql/tests/test_arrow.py | 27 ++++++++++++++++++++++
2 files changed, 30 insertions(+)
diff --git a/python/pyspark/sql/tests/connect/test_parity_arrow.py
b/python/pyspark/sql/tests/connect/test_parity_arrow.py
index a92ef971cd21..55e689da8fdd 100644
--- a/python/pyspark/sql/tests/connect/test_parity_arrow.py
+++ b/python/pyspark/sql/tests/connect/test_parity_arrow.py
@@ -136,6 +136,9 @@ class ArrowParityTests(ArrowTestsMixin,
ReusedConnectTestCase, PandasOnSparkTest
def test_toPandas_nested_timestamp(self):
self.check_toPandas_nested_timestamp(True)
+ def test_toPandas_timestmap_tzinfo(self):
+ self.check_toPandas_timestmap_tzinfo(True)
+
def test_createDataFrame_udt(self):
self.check_createDataFrame_udt(True)
diff --git a/python/pyspark/sql/tests/test_arrow.py
b/python/pyspark/sql/tests/test_arrow.py
index 9e9a7d3ac9b0..6e462d38d8e5 100644
--- a/python/pyspark/sql/tests/test_arrow.py
+++ b/python/pyspark/sql/tests/test_arrow.py
@@ -18,12 +18,14 @@
import datetime
import os
import threading
+import calendar
import time
import unittest
import warnings
from distutils.version import LooseVersion
from typing import cast
from collections import namedtuple
+from zoneinfo import ZoneInfo
from pyspark import SparkContext, SparkConf
from pyspark.sql import Row, SparkSession
@@ -1090,6 +1092,31 @@ class ArrowTestsMixin:
self.assertEqual(df.first(), expected)
+ def test_toPandas_timestmap_tzinfo(self):
+ for arrow_enabled in [True, False]:
+ with self.subTest(arrow_enabled=arrow_enabled):
+ self.check_toPandas_timestmap_tzinfo(arrow_enabled)
+
+ def check_toPandas_timestmap_tzinfo(self, arrow_enabled):
+ # SPARK-47202: Test timestamp with tzinfo in toPandas and
createDataFrame
+ ts_tzinfo = datetime.datetime(2023, 1, 1, 0, 0, 0,
tzinfo=ZoneInfo("America/Los_Angeles"))
+ data = pd.DataFrame({"a": [ts_tzinfo]})
+ df = self.spark.createDataFrame(data)
+
+ with self.sql_conf(
+ {
+ "spark.sql.execution.arrow.pyspark.enabled": arrow_enabled,
+ }
+ ):
+ pdf = df.toPandas()
+
+ expected = pd.DataFrame(
+ # Spark unsets tzinfo and converts them to localtimes.
+ {"a":
[datetime.datetime.fromtimestamp(calendar.timegm(ts_tzinfo.utctimetuple()))]}
+ )
+
+ assert_frame_equal(pdf, expected)
+
def test_toPandas_nested_timestamp(self):
for arrow_enabled in [True, False]:
with self.subTest(arrow_enabled=arrow_enabled):
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]