jorisvandenbossche commented on a change in pull request #12522:
URL: https://github.com/apache/arrow/pull/12522#discussion_r834286616
##########
File path: cpp/src/arrow/python/datetime.cc
##########
@@ -391,62 +391,165 @@ Result<std::string> PyTZInfo_utcoffset_hhmm(PyObject*
pytzinfo) {
Result<PyObject*> StringToTzinfo(const std::string& tz) {
util::string_view sign_str, hour_str, minute_str;
OwnedRef pytz;
- RETURN_NOT_OK(internal::ImportModule("pytz", &pytz));
+ OwnedRef zoneinfo;
+ OwnedRef datetime;
+ if (internal::ImportModule("pytz", &pytz).ok()) {
+ RETURN_NOT_OK(internal::ImportModule("pytz", &pytz));
Review comment:
```suggestion
Status has_pytz = internal::ImportModule("pytz", &pytz)
if (has_pytz.ok()) {
```
Something like this would avoid doing the ImportModule twice. Not that
importing twice is really a problem, but this might be a bit cleaner.
(same for the other places where an ImportModule happens inside an `if`)
##########
File path: cpp/src/arrow/python/datetime.cc
##########
@@ -391,62 +391,165 @@ Result<std::string> PyTZInfo_utcoffset_hhmm(PyObject*
pytzinfo) {
Result<PyObject*> StringToTzinfo(const std::string& tz) {
util::string_view sign_str, hour_str, minute_str;
OwnedRef pytz;
- RETURN_NOT_OK(internal::ImportModule("pytz", &pytz));
+ OwnedRef zoneinfo;
+ OwnedRef datetime;
+ if (internal::ImportModule("pytz", &pytz).ok()) {
+ RETURN_NOT_OK(internal::ImportModule("pytz", &pytz));
+
+ if (MatchFixedOffset(tz, &sign_str, &hour_str, &minute_str)) {
+ int sign = -1;
+ if (sign_str == "+") {
+ sign = 1;
+ }
+ OwnedRef fixed_offset;
+ RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "FixedOffset",
&fixed_offset));
+ uint32_t minutes, hours;
+ if (!::arrow::internal::ParseUnsigned(hour_str.data(), hour_str.size(),
&hours) ||
+ !::arrow::internal::ParseUnsigned(minute_str.data(),
minute_str.size(),
+ &minutes)) {
+ return Status::Invalid("Invalid timezone: ", tz);
+ }
+ OwnedRef total_minutes(PyLong_FromLong(
+ sign * ((static_cast<int>(hours) * 60) +
static_cast<int>(minutes))));
+ RETURN_IF_PYERROR();
+ auto tzinfo =
+ PyObject_CallFunctionObjArgs(fixed_offset.obj(),
total_minutes.obj(), NULL);
+ RETURN_IF_PYERROR();
+ return tzinfo;
+ }
+
+ OwnedRef timezone;
+ RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "timezone",
&timezone));
+ OwnedRef py_tz_string(
+ PyUnicode_FromStringAndSize(tz.c_str(),
static_cast<Py_ssize_t>(tz.size())));
+ auto tzinfo = PyObject_CallFunctionObjArgs(timezone.obj(),
py_tz_string.obj(), NULL);
+ RETURN_IF_PYERROR();
+ return tzinfo;
+ }
+
+ // catch fixed offset if pytz is not present
if (MatchFixedOffset(tz, &sign_str, &hour_str, &minute_str)) {
+ RETURN_NOT_OK(internal::ImportModule("datetime", &datetime));
int sign = -1;
if (sign_str == "+") {
sign = 1;
}
- OwnedRef fixed_offset;
- RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "FixedOffset",
&fixed_offset));
+
+ // import timezone and timedelta module to create a tzinfo object
+ OwnedRef class_timezone;
+ OwnedRef class_timedelta;
+ RETURN_NOT_OK(
+ internal::ImportFromModule(datetime.obj(), "timezone",
&class_timezone));
+ RETURN_NOT_OK(
+ internal::ImportFromModule(datetime.obj(), "timedelta",
&class_timedelta));
+
+ // check input
uint32_t minutes, hours;
if (!::arrow::internal::ParseUnsigned(hour_str.data(), hour_str.size(),
&hours) ||
!::arrow::internal::ParseUnsigned(minute_str.data(), minute_str.size(),
&minutes)) {
return Status::Invalid("Invalid timezone: ", tz);
}
+
+ // save offset as a signed integer
OwnedRef total_minutes(PyLong_FromLong(
sign * ((static_cast<int>(hours) * 60) + static_cast<int>(minutes))));
+ // create zero integers for empty arguments in datetime.timedelta
+ OwnedRef zero(PyLong_FromLong(static_cast<int>(0)));
+
+ // call datetime.timedelta to get correct offset object for
datetime.timezone
+ auto offset =
+ PyObject_CallFunctionObjArgs(class_timedelta.obj(), zero.obj(),
zero.obj(),
+ zero.obj(), zero.obj(),
total_minutes.obj(), NULL);
RETURN_IF_PYERROR();
+ // call datetime.timezone
+ auto tzinfo = PyObject_CallFunctionObjArgs(class_timezone.obj(), offset,
NULL);
+ RETURN_IF_PYERROR();
+ return tzinfo;
+ }
+
+ // fallback on zoneinfo if tz is string and pytz is not present
+ if (internal::ImportModule("zoneinfo", &zoneinfo).ok()) {
+ RETURN_NOT_OK(internal::ImportModule("zoneinfo", &zoneinfo));
+
+ OwnedRef class_zoneinfo;
+ RETURN_NOT_OK(
+ internal::ImportFromModule(zoneinfo.obj(), "ZoneInfo",
&class_zoneinfo));
+ OwnedRef py_tz_string(
+ PyUnicode_FromStringAndSize(tz.c_str(),
static_cast<Py_ssize_t>(tz.size())));
auto tzinfo =
- PyObject_CallFunctionObjArgs(fixed_offset.obj(), total_minutes.obj(),
NULL);
+ PyObject_CallFunctionObjArgs(class_zoneinfo.obj(), py_tz_string.obj(),
NULL);
RETURN_IF_PYERROR();
return tzinfo;
}
- OwnedRef timezone;
- RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "timezone", &timezone));
- OwnedRef py_tz_string(
- PyUnicode_FromStringAndSize(tz.c_str(),
static_cast<Py_ssize_t>(tz.size())));
- auto tzinfo = PyObject_CallFunctionObjArgs(timezone.obj(),
py_tz_string.obj(), NULL);
- RETURN_IF_PYERROR();
- return tzinfo;
+ return Status::Invalid("Pytz package must be installed.");
Review comment:
or Python>=3.8 for zoneinfo module
##########
File path: python/pyarrow/tests/test_convert_builtin.py
##########
@@ -995,6 +994,9 @@ def test_sequence_timestamp():
'ns'
])
def test_sequence_timestamp_with_timezone(timezone, unit):
+ pytest.importorskip("pytz")
+ import pytz
Review comment:
This one is not yet addressed I think
##########
File path: python/pyarrow/tests/strategies.py
##########
@@ -261,17 +274,23 @@ def arrays(draw, type, size=None, nullable=True):
elif pa.types.is_date(ty):
value = st.dates()
elif pa.types.is_timestamp(ty):
+ if zoneinfo is None:
+ pytest.skip('no module named zoneinfo')
+ if ty.tz is None:
+ pytest.skip('requires timezone not None')
Review comment:
Although I now see some previous comments about that below ...
##########
File path: python/pyarrow/tests/test_pandas.py
##########
@@ -1044,7 +1044,7 @@ def test_python_datetime_with_pytz_tzinfo(self):
df = pd.DataFrame({'datetime': values})
_check_pandas_roundtrip(df)
- @h.given(st.none() | tzst.timezones())
+ @h.given(st.none() | st.timezones())
Review comment:
Or this can be `past.timezones()` instead?
##########
File path: python/pyarrow/tests/strategies.py
##########
@@ -96,10 +103,16 @@
pa.time64('us'),
pa.time64('ns')
])
+if tzst:
Review comment:
I don't know if I already asked this before, but is it possible to do
something like this?
```suggestion
if tzst and zoneinfo:
timezones = st.one_of(st.none(), tzst.timezones(), st.timezones())
elif tzst:
```
##########
File path: python/pyarrow/tests/strategies.py
##########
@@ -261,17 +274,23 @@ def arrays(draw, type, size=None, nullable=True):
elif pa.types.is_date(ty):
value = st.dates()
elif pa.types.is_timestamp(ty):
+ if zoneinfo is None:
+ pytest.skip('no module named zoneinfo')
+ if ty.tz is None:
+ pytest.skip('requires timezone not None')
Review comment:
I think we can also create a datetime without timezone in this case,
instead of skipping the whole test? (that might require rearranging the code
below a bit)
##########
File path: cpp/src/arrow/python/datetime.cc
##########
@@ -391,62 +391,165 @@ Result<std::string> PyTZInfo_utcoffset_hhmm(PyObject*
pytzinfo) {
Result<PyObject*> StringToTzinfo(const std::string& tz) {
util::string_view sign_str, hour_str, minute_str;
OwnedRef pytz;
- RETURN_NOT_OK(internal::ImportModule("pytz", &pytz));
+ OwnedRef zoneinfo;
+ OwnedRef datetime;
+ if (internal::ImportModule("pytz", &pytz).ok()) {
+ RETURN_NOT_OK(internal::ImportModule("pytz", &pytz));
+
+ if (MatchFixedOffset(tz, &sign_str, &hour_str, &minute_str)) {
+ int sign = -1;
+ if (sign_str == "+") {
+ sign = 1;
+ }
+ OwnedRef fixed_offset;
+ RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "FixedOffset",
&fixed_offset));
+ uint32_t minutes, hours;
+ if (!::arrow::internal::ParseUnsigned(hour_str.data(), hour_str.size(),
&hours) ||
+ !::arrow::internal::ParseUnsigned(minute_str.data(),
minute_str.size(),
+ &minutes)) {
+ return Status::Invalid("Invalid timezone: ", tz);
+ }
+ OwnedRef total_minutes(PyLong_FromLong(
+ sign * ((static_cast<int>(hours) * 60) +
static_cast<int>(minutes))));
+ RETURN_IF_PYERROR();
+ auto tzinfo =
+ PyObject_CallFunctionObjArgs(fixed_offset.obj(),
total_minutes.obj(), NULL);
+ RETURN_IF_PYERROR();
+ return tzinfo;
+ }
+
+ OwnedRef timezone;
+ RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "timezone",
&timezone));
+ OwnedRef py_tz_string(
+ PyUnicode_FromStringAndSize(tz.c_str(),
static_cast<Py_ssize_t>(tz.size())));
+ auto tzinfo = PyObject_CallFunctionObjArgs(timezone.obj(),
py_tz_string.obj(), NULL);
+ RETURN_IF_PYERROR();
+ return tzinfo;
+ }
+
+ // catch fixed offset if pytz is not present
if (MatchFixedOffset(tz, &sign_str, &hour_str, &minute_str)) {
+ RETURN_NOT_OK(internal::ImportModule("datetime", &datetime));
int sign = -1;
if (sign_str == "+") {
sign = 1;
}
- OwnedRef fixed_offset;
- RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "FixedOffset",
&fixed_offset));
+
+ // import timezone and timedelta module to create a tzinfo object
+ OwnedRef class_timezone;
+ OwnedRef class_timedelta;
+ RETURN_NOT_OK(
+ internal::ImportFromModule(datetime.obj(), "timezone",
&class_timezone));
+ RETURN_NOT_OK(
+ internal::ImportFromModule(datetime.obj(), "timedelta",
&class_timedelta));
+
+ // check input
uint32_t minutes, hours;
if (!::arrow::internal::ParseUnsigned(hour_str.data(), hour_str.size(),
&hours) ||
!::arrow::internal::ParseUnsigned(minute_str.data(), minute_str.size(),
&minutes)) {
return Status::Invalid("Invalid timezone: ", tz);
}
+
+ // save offset as a signed integer
OwnedRef total_minutes(PyLong_FromLong(
sign * ((static_cast<int>(hours) * 60) + static_cast<int>(minutes))));
+ // create zero integers for empty arguments in datetime.timedelta
+ OwnedRef zero(PyLong_FromLong(static_cast<int>(0)));
+
+ // call datetime.timedelta to get correct offset object for
datetime.timezone
+ auto offset =
+ PyObject_CallFunctionObjArgs(class_timedelta.obj(), zero.obj(),
zero.obj(),
+ zero.obj(), zero.obj(),
total_minutes.obj(), NULL);
RETURN_IF_PYERROR();
+ // call datetime.timezone
+ auto tzinfo = PyObject_CallFunctionObjArgs(class_timezone.obj(), offset,
NULL);
+ RETURN_IF_PYERROR();
+ return tzinfo;
+ }
+
+ // fallback on zoneinfo if tz is string and pytz is not present
+ if (internal::ImportModule("zoneinfo", &zoneinfo).ok()) {
+ RETURN_NOT_OK(internal::ImportModule("zoneinfo", &zoneinfo));
+
+ OwnedRef class_zoneinfo;
+ RETURN_NOT_OK(
+ internal::ImportFromModule(zoneinfo.obj(), "ZoneInfo",
&class_zoneinfo));
+ OwnedRef py_tz_string(
+ PyUnicode_FromStringAndSize(tz.c_str(),
static_cast<Py_ssize_t>(tz.size())));
auto tzinfo =
- PyObject_CallFunctionObjArgs(fixed_offset.obj(), total_minutes.obj(),
NULL);
+ PyObject_CallFunctionObjArgs(class_zoneinfo.obj(), py_tz_string.obj(),
NULL);
RETURN_IF_PYERROR();
return tzinfo;
}
- OwnedRef timezone;
- RETURN_NOT_OK(internal::ImportFromModule(pytz.obj(), "timezone", &timezone));
- OwnedRef py_tz_string(
- PyUnicode_FromStringAndSize(tz.c_str(),
static_cast<Py_ssize_t>(tz.size())));
- auto tzinfo = PyObject_CallFunctionObjArgs(timezone.obj(),
py_tz_string.obj(), NULL);
- RETURN_IF_PYERROR();
- return tzinfo;
+ return Status::Invalid("Pytz package must be installed.");
}
Result<std::string> TzinfoToString(PyObject* tzinfo) {
OwnedRef module_pytz; // import pytz
OwnedRef module_datetime; // import datetime
+ OwnedRef module_zoneinfo; // import zoneinfo
+ OwnedRef module_dateutil; // import dateutil
OwnedRef class_timezone; // from datetime import timezone
OwnedRef class_fixedoffset; // from pytz import _FixedOffset
+ OwnedRef class_basetzinfo; // from pytz import BaseTzInfo
+ OwnedRef class_zoneinfo; // from zoneinfo import ZoneInfo
+ OwnedRef class_tzfile; // from zoneinfo import tzfile
// import necessary modules
- RETURN_NOT_OK(internal::ImportModule("pytz", &module_pytz));
RETURN_NOT_OK(internal::ImportModule("datetime", &module_datetime));
// import necessary classes
- RETURN_NOT_OK(
- internal::ImportFromModule(module_pytz.obj(), "_FixedOffset",
&class_fixedoffset));
RETURN_NOT_OK(
internal::ImportFromModule(module_datetime.obj(), "timezone",
&class_timezone));
+ // Try to import pytz if it is available
+ if (internal::ImportModule("pytz", &module_pytz).ok()) {
+ RETURN_NOT_OK(internal::ImportModule("pytz", &module_pytz));
+ RETURN_NOT_OK(internal::ImportFromModule(module_pytz.obj(), "_FixedOffset",
+ &class_fixedoffset));
+ RETURN_NOT_OK(
+ internal::ImportFromModule(module_pytz.obj(), "BaseTzInfo",
&class_basetzinfo));
+ }
+
+ // Try to import zoneinfo if it is available
+ if (internal::ImportModule("zoneinfo", &module_zoneinfo).ok()) {
+ RETURN_NOT_OK(internal::ImportModule("zoneinfo", &module_zoneinfo));
+ RETURN_NOT_OK(
+ internal::ImportFromModule(module_zoneinfo.obj(), "ZoneInfo",
&class_zoneinfo));
+ }
+ // Try to import dateutil if it is available
+ if (internal::ImportModule("dateutil.tz", &module_dateutil).ok()) {
+ RETURN_NOT_OK(internal::ImportModule("dateutil.tz", &module_dateutil));
+ RETURN_NOT_OK(
+ internal::ImportFromModule(module_dateutil.obj(), "tzfile",
&class_tzfile));
+ }
+
// check that it's a valid tzinfo object
if (!PyTZInfo_Check(tzinfo)) {
return Status::TypeError("Not an instance of datetime.tzinfo");
}
- // if tzinfo is an instance of pytz._FixedOffset or datetime.timezone return
the
+ // if tzinfo is an instance of datetime.timezone return the
+ // HH:MM offset string representation
+ if (PyObject_IsInstance(tzinfo, class_timezone.obj())) {
+ // still recognize datetime.timezone.utc as UTC (instead of +00:00)
+ OwnedRef tzname_object(PyObject_CallMethod(tzinfo, "tzname", "O",
Py_None));
+ RETURN_IF_PYERROR();
+ if (PyUnicode_Check(tzname_object.obj())) {
+ std::string result;
+ RETURN_NOT_OK(internal::PyUnicode_AsStdString(tzname_object.obj(),
&result));
+ if (result == "UTC") {
+ return result;
+ }
+ }
+ return PyTZInfo_utcoffset_hhmm(tzinfo);
+ }
+
+ // if tzinfo is an instance of pytz._FixedOffset return the
// HH:MM offset string representation
- if (PyObject_IsInstance(tzinfo, class_timezone.obj()) ||
+ if (module_pytz.obj() != nullptr &&
PyObject_IsInstance(tzinfo, class_fixedoffset.obj())) {
// still recognize datetime.timezone.utc as UTC (instead of +00:00)
Review comment:
This part was only relevant for the stdlib datetime.timezone, as
pytz.utc is not a subclass of pytz._FixedOffset. So this can be cleaned up a
bit here.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]