This is an automated email from the ASF dual-hosted git repository.
alenka pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 7ba1a7b99b GH-48654: [Python] Test timestamp from int without pandas
dependency (#48655)
7ba1a7b99b is described below
commit 7ba1a7b99b3c036d9d14d345876cfe7194ecaacc
Author: Hyukjin Kwon <[email protected]>
AuthorDate: Tue Jan 6 18:44:17 2026 +0900
GH-48654: [Python] Test timestamp from int without pandas dependency
(#48655)
### Rationale for this change
The test `test_sequence_timestamp_from_int_with_unit()` was marked with
`@ pytest.mark.pandas`, meaning it was skipped when pandas was not
installed.
This left the non-pandas code path untested.
Specifically in order to test:
https://github.com/apache/arrow/blob/744f0ec2cf9f8716fcea408d67ede9c14a7e6954/python/pyarrow/scalar.pxi#L652-L661
The TODO was introduced in commit 286bf7c9 when making pandas
an optional dependency:
https://github.com/apache/arrow/blob/286bf7c9d343cb972691c32ea8128390aed39119/python/pyarrow/tests/test_convert_builtin.py#L731-L732
### What changes are included in this PR?
Split nanosecond timestamp testing into separate tests to cover both pandas
and non-pandas code paths:
1. Kept `test_sequence_timestamp_from_int_with_unit()` unchanged (tests
s/ms/us units without pandas requirement)
2. Removed TODO comment and created three new nanosecond-specific tests:
- `test_sequence_timestamp_from_int_with_unit_nanosecond` (`@
pytest.mark.pandas`): Tests `pd.Timestamp` with full nanosecond precision
- `test_sequence_timestamp_from_int_nanosecond_without_pandas` (`@
pytest.mark.nopandas`): Tests that values not divisible by 1000 raise
`ValueError`
- `test_sequence_timestamp_from_int_nanosecond_divisible_without_pandas`
(`@ pytest.mark.nopandas`): Tests successful conversion when values are
divisible by 1000
### Are these changes tested?
Yes. I manually tested with/without pandas:
```
conda remove pandas
pytest pyarrow/tests/test_convert_builtin.py -k
"timestamp_from_int_with_unit" -xvs
conda install pandas==2.3.3
pytest pyarrow/tests/test_convert_builtin.py -k
"timestamp_from_int_with_unit" -xvs
```
### Are there any user-facing changes?
No, test-only.
* GitHub Issue: #48654
Authored-by: Hyukjin Kwon <[email protected]>
Signed-off-by: AlenkaF <[email protected]>
---
python/pyarrow/tests/test_convert_builtin.py | 68 +++++++++++++++++++++++-----
1 file changed, 56 insertions(+), 12 deletions(-)
diff --git a/python/pyarrow/tests/test_convert_builtin.py
b/python/pyarrow/tests/test_convert_builtin.py
index 07286125c4..f1461a302d 100644
--- a/python/pyarrow/tests/test_convert_builtin.py
+++ b/python/pyarrow/tests/test_convert_builtin.py
@@ -1381,18 +1381,13 @@ def test_sequence_timestamp_nanoseconds():
23, 34, 123456)
[email protected]
@pytest.mark.timezone_data
def test_sequence_timestamp_from_int_with_unit():
- # TODO(wesm): This test might be rewritten to assert the actual behavior
- # when pandas is not installed
-
data = [1]
s = pa.timestamp('s')
ms = pa.timestamp('ms')
us = pa.timestamp('us')
- ns = pa.timestamp('ns')
arr_s = pa.array(data, type=s)
assert len(arr_s) == 1
@@ -1418,22 +1413,71 @@ def test_sequence_timestamp_from_int_with_unit():
)
assert str(arr_us[0]) == "1970-01-01 00:00:00.000001"
+ expected_exc = TypeError
+
+ class CustomClass():
+ pass
+
+ for ty in [pa.timestamp('ns'), pa.date32(), pa.date64()]:
+ with pytest.raises(expected_exc):
+ pa.array([1, CustomClass()], type=ty)
+
+
[email protected]
[email protected]_data
+def test_sequence_timestamp_from_int_with_unit_nanosecond():
+ # With pandas installed, nanosecond timestamps return pd.Timestamp
+ # with full nanosecond precision (see scalar.pxi in _datetime_from_int)
+ import pandas as pd
+
+ data = [1]
+ ns = pa.timestamp('ns')
+
arr_ns = pa.array(data, type=ns)
assert len(arr_ns) == 1
assert arr_ns.type == ns
- assert repr(arr_ns[0].as_py()) == (
+
+ result = arr_ns[0].as_py()
+ assert isinstance(result, pd.Timestamp)
+ assert repr(result) == (
"Timestamp('1970-01-01 00:00:00.000000001')"
)
assert str(arr_ns[0]) == "1970-01-01 00:00:00.000000001"
- expected_exc = TypeError
- class CustomClass():
- pass
[email protected]
[email protected]_data
+def test_sequence_timestamp_from_int_nanosecond_without_pandas():
+ # Without pandas, nanosecond timestamps raise ValueError if value
+ # is not safely convertible to microseconds (value % 1000 != 0)
+ data = [1]
+ ns = pa.timestamp('ns')
- for ty in [ns, pa.date32(), pa.date64()]:
- with pytest.raises(expected_exc):
- pa.array([1, CustomClass()], type=ty)
+ arr_ns = pa.array(data, type=ns)
+ assert len(arr_ns) == 1
+ assert arr_ns.type == ns
+
+ with pytest.raises(ValueError, match="not safely convertible to
microseconds"):
+ arr_ns[0].as_py()
+
+
[email protected]
[email protected]_data
+def test_sequence_timestamp_from_int_nanosecond_divisible_without_pandas():
+ # Without pandas, nanosecond timestamps that are divisible by 1000
+ # can be safely converted to microseconds (value % 1000 == 0)
+ data = [1000]
+ ns = pa.timestamp('ns')
+
+ arr_ns = pa.array(data, type=ns)
+ assert len(arr_ns) == 1
+ assert arr_ns.type == ns
+
+ result = arr_ns[0].as_py()
+ assert repr(result) == (
+ "datetime.datetime(1970, 1, 1, 0, 0, 0, 1)"
+ )
+ assert str(arr_ns[0]) == "1970-01-01 00:00:00.000001"
def test_sequence_duration():