This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 112f949718 GH-36323: [Python] Fix Timestamp scalar repr error on
values outside datetime range (#36942)
112f949718 is described below
commit 112f94971882750731fabebd499ab0f817ca3839
Author: Ashish Bailkeri <[email protected]>
AuthorDate: Mon Jul 31 16:57:01 2023 -0400
GH-36323: [Python] Fix Timestamp scalar repr error on values outside
datetime range (#36942)
### Rationale for this change
https://github.com/apache/arrow/issues/36323
### What changes are included in this PR?
Changed the way repr is handled for TimestampScalar
### Are these changes tested?
I have added a very basic test for this change to see whether it will error
or not if outside the range.
### Are there any user-facing changes?
The functionality of TimestampScalar's repr now uses the `strftime`
function.
* Closes: #36323
Lead-authored-by: Ashish Bailkeri <[email protected]>
Co-authored-by: Ashish Bailkeri <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
python/pyarrow/scalar.pxi | 17 +++++++++++++++++
python/pyarrow/tests/test_convert_builtin.py | 2 +-
python/pyarrow/tests/test_scalars.py | 12 ++++++++++++
python/pyarrow/types.pxi | 4 ++--
4 files changed, 32 insertions(+), 3 deletions(-)
diff --git a/python/pyarrow/scalar.pxi b/python/pyarrow/scalar.pxi
index 74f5aa4213..aff1c311ab 100644
--- a/python/pyarrow/scalar.pxi
+++ b/python/pyarrow/scalar.pxi
@@ -522,6 +522,23 @@ cdef class TimestampScalar(Scalar):
return _datetime_from_int(sp.value, unit=dtype.unit(), tzinfo=tzinfo)
+ def __repr__(self):
+ """
+ Return the representation of TimestampScalar using `strftime` to avoid
+ original repr datetime values being out of range.
+ """
+ cdef:
+ CTimestampScalar* sp = <CTimestampScalar*> self.wrapped.get()
+ CTimestampType* dtype = <CTimestampType*> sp.type.get()
+
+ if not dtype.timezone().empty():
+ type_format = str(_pc().strftime(self,
format="%Y-%m-%dT%H:%M:%S%z"))
+ else:
+ type_format = str(_pc().strftime(self))
+ return '<pyarrow.{}: {!r}>'.format(
+ self.__class__.__name__, type_format
+ )
+
cdef class DurationScalar(Scalar):
"""
diff --git a/python/pyarrow/tests/test_convert_builtin.py
b/python/pyarrow/tests/test_convert_builtin.py
index af4c91a894..cf2535a3c6 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -1353,7 +1353,7 @@ def test_sequence_timestamp_from_int_with_unit():
assert len(arr_s) == 1
assert arr_s.type == s
assert repr(arr_s[0]) == (
- "<pyarrow.TimestampScalar: datetime.datetime(1970, 1, 1, 0, 0, 1)>"
+ "<pyarrow.TimestampScalar: '1970-01-01T00:00:01'>"
)
assert str(arr_s[0]) == "1970-01-01 00:00:01"
diff --git a/python/pyarrow/tests/test_scalars.py
b/python/pyarrow/tests/test_scalars.py
index 2aaefe16ae..a989301fe5 100644
--- a/python/pyarrow/tests/test_scalars.py
+++ b/python/pyarrow/tests/test_scalars.py
@@ -154,6 +154,18 @@ def test_hashing_struct_scalar():
assert hash1 == hash2
+def test_timestamp_scalar():
+ a = repr(pa.scalar("0000-01-01").cast(pa.timestamp("s")))
+ assert a == "<pyarrow.TimestampScalar: '0000-01-01T00:00:00'>"
+ b = repr(pa.scalar(datetime.datetime(2015, 1, 1), type=pa.timestamp('s',
tz='UTC')))
+ assert b == "<pyarrow.TimestampScalar: '2015-01-01T00:00:00+0000'>"
+ c = repr(pa.scalar(datetime.datetime(2015, 1, 1), type=pa.timestamp('us')))
+ assert c == "<pyarrow.TimestampScalar: '2015-01-01T00:00:00.000000'>"
+ d = repr(pc.assume_timezone(
+ pa.scalar("2000-01-01").cast(pa.timestamp("s")), "America/New_York"))
+ assert d == "<pyarrow.TimestampScalar: '2000-01-01T00:00:00-0500'>"
+
+
def test_bool():
false = pa.scalar(False)
true = pa.scalar(True)
diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi
index fbd4f8a94b..12ad2fc4b6 100644
--- a/python/pyarrow/types.pxi
+++ b/python/pyarrow/types.pxi
@@ -3605,9 +3605,9 @@ def timestamp(unit, tz=None):
>>> from datetime import datetime
>>> pa.scalar(datetime(2012, 1, 1), type=pa.timestamp('s', tz='UTC'))
- <pyarrow.TimestampScalar: datetime.datetime(2012, 1, 1, 0, 0,
tzinfo=<UTC>)>
+ <pyarrow.TimestampScalar: '2012-01-01T00:00:00+0000'>
>>> pa.scalar(datetime(2012, 1, 1), type=pa.timestamp('us'))
- <pyarrow.TimestampScalar: datetime.datetime(2012, 1, 1, 0, 0)>
+ <pyarrow.TimestampScalar: '2012-01-01T00:00:00.000000'>
Returns
-------