This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 25d6b7a280f6 [SPARK-49692][PYTHON][CONNECT] Refine the string
representation of literal date and datetime
25d6b7a280f6 is described below
commit 25d6b7a280f690c1a467f65143115cce846a732a
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Sep 19 07:46:18 2024 +0800
[SPARK-49692][PYTHON][CONNECT] Refine the string representation of literal
date and datetime
### What changes were proposed in this pull request?
Refine the string representation of literal date and datetime
### Why are the changes needed?
1, we should not represent those literals with internal values;
2, the string representation should be consistent with PySpark Classic if
possible (we cannot make sure the representations are always the same because
we only hold an unresolved expression in connect, but we can try our best to do
so)
### Does this PR introduce _any_ user-facing change?
yes
before:
```
In [3]: lit(datetime.date(2024, 7, 10))
Out[3]: Column<'19914'>
In [4]: lit(datetime.datetime(2024, 7, 10, 1, 2, 3, 456))
Out[4]: Column<'1720544523000456'>
```
after:
```
In [3]: lit(datetime.date(2024, 7, 10))
Out[3]: Column<'2024-07-10'>
In [4]: lit(datetime.datetime(2024, 7, 10, 1, 2, 3, 456))
Out[4]: Column<'2024-07-10 01:02:03.000456'>
```
### How was this patch tested?
added tests
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #48137 from zhengruifeng/py_connect_lit_dt.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
python/pyspark/sql/connect/expressions.py | 16 ++++++++++++++--
python/pyspark/sql/tests/test_column.py | 9 +++++++++
2 files changed, 23 insertions(+), 2 deletions(-)
diff --git a/python/pyspark/sql/connect/expressions.py
b/python/pyspark/sql/connect/expressions.py
index db1cd1c013be..63128ef48e38 100644
--- a/python/pyspark/sql/connect/expressions.py
+++ b/python/pyspark/sql/connect/expressions.py
@@ -477,8 +477,20 @@ class LiteralExpression(Expression):
def __repr__(self) -> str:
if self._value is None:
return "NULL"
- else:
- return f"{self._value}"
+ elif isinstance(self._dataType, DateType):
+ dt = DateType().fromInternal(self._value)
+ if dt is not None and isinstance(dt, datetime.date):
+ return dt.strftime("%Y-%m-%d")
+ elif isinstance(self._dataType, TimestampType):
+ ts = TimestampType().fromInternal(self._value)
+ if ts is not None and isinstance(ts, datetime.datetime):
+ return ts.strftime("%Y-%m-%d %H:%M:%S.%f")
+ elif isinstance(self._dataType, TimestampNTZType):
+ ts = TimestampNTZType().fromInternal(self._value)
+ if ts is not None and isinstance(ts, datetime.datetime):
+ return ts.strftime("%Y-%m-%d %H:%M:%S.%f")
+ # TODO(SPARK-49693): Refine the string representation of timedelta
+ return f"{self._value}"
class ColumnReference(Expression):
diff --git a/python/pyspark/sql/tests/test_column.py
b/python/pyspark/sql/tests/test_column.py
index 2bd66baaa2bf..220ecd387f7e 100644
--- a/python/pyspark/sql/tests/test_column.py
+++ b/python/pyspark/sql/tests/test_column.py
@@ -18,6 +18,8 @@
from enum import Enum
from itertools import chain
+import datetime
+
from pyspark.sql import Column, Row
from pyspark.sql import functions as sf
from pyspark.sql.types import StructType, StructField, IntegerType, LongType
@@ -280,6 +282,13 @@ class ColumnTestsMixin:
when_cond = sf.when(expression, sf.lit(None))
self.assertEqual(str(when_cond), "Column<'CASE WHEN foo THEN NULL
END'>")
+ def test_lit_time_representation(self):
+ dt = datetime.date(2021, 3, 4)
+ self.assertEqual(str(sf.lit(dt)), "Column<'2021-03-04'>")
+
+ ts = datetime.datetime(2021, 3, 4, 12, 34, 56, 1234)
+ self.assertEqual(str(sf.lit(ts)), "Column<'2021-03-04
12:34:56.001234'>")
+
def test_enum_literals(self):
class IntEnum(Enum):
X = 1
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]