This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new d534e77248 GH-44679: [C++][Python] Fix Flight Timestamp precision,
revert workaround from #43537 (#44681)
d534e77248 is described below
commit d534e772481f5b8e034e7e2fca1fb79c37b7ead8
Author: Enrico Minack <[email protected]>
AuthorDate: Thu Nov 14 03:59:24 2024 +0100
GH-44679: [C++][Python] Fix Flight Timestamp precision, revert workaround
from #43537 (#44681)
### What changes are included in this PR?
Make `arrow.flight.Timestamp` nanoseconds precision and OS-independent.
Use the `CTimePoint` for `FlightEndpoint.expiration_time` to have
OS-independent nanoseconds precision.
https://github.com/apache/arrow/blob/093655c60783321c786a8e69632f185d37520f4d/python/pyarrow/includes/libarrow_flight.pxd#L133-L139
This reverts workaround for `expiration_time` from #43537.
### Are these changes tested?
Existing unit tests are adjusted to the new precision.
### Are there any user-facing changes?
**This PR includes breaking changes to public APIs.**
The values of `FlightEndpoint.expiration_time` now have nanoseconds
precision on any operating system.
* GitHub Issue: #44679
Authored-by: Enrico Minack <[email protected]>
Signed-off-by: David Li <[email protected]>
---
cpp/src/arrow/flight/flight_internals_test.cc | 18 ++++++------------
cpp/src/arrow/flight/types.h | 3 ++-
python/pyarrow/_flight.pyx | 13 +++----------
python/pyarrow/includes/chrono.pxd | 23 -----------------------
python/pyarrow/includes/libarrow_flight.pxd | 4 ++--
python/pyarrow/includes/libarrow_python.pxd | 4 ----
python/pyarrow/src/arrow/python/datetime.h | 14 --------------
python/pyarrow/tests/test_flight.py | 16 +++-------------
8 files changed, 16 insertions(+), 79 deletions(-)
diff --git a/cpp/src/arrow/flight/flight_internals_test.cc
b/cpp/src/arrow/flight/flight_internals_test.cc
index 3f233cb7b7..ab2f8c7830 100644
--- a/cpp/src/arrow/flight/flight_internals_test.cc
+++ b/cpp/src/arrow/flight/flight_internals_test.cc
@@ -184,12 +184,9 @@ TEST(FlightTypes, FlightDescriptor) {
TEST(FlightTypes, FlightEndpoint) {
ASSERT_OK_AND_ASSIGN(auto location1, Location::ForGrpcTcp("localhost",
1024));
ASSERT_OK_AND_ASSIGN(auto location2, Location::ForGrpcTls("localhost",
1024));
- // 2023-06-19 03:14:06.004330100
- // We must use microsecond resolution here for portability.
- // std::chrono::system_clock::time_point may not provide nanosecond
- // resolution on some platforms such as Windows.
+ // 2023-06-19 03:14:06.004339123
const auto expiration_time_duration =
- std::chrono::seconds{1687144446} + std::chrono::nanoseconds{4339000};
+ std::chrono::seconds{1687144446} + std::chrono::nanoseconds{4339123};
Timestamp expiration_time(
std::chrono::duration_cast<Timestamp::duration>(expiration_time_duration));
std::vector<FlightEndpoint> values = {
@@ -210,7 +207,7 @@ TEST(FlightTypes, FlightEndpoint) {
"<FlightEndpoint ticket=<Ticket ticket='bar'> locations=[] "
"expiration_time=null app_metadata='DEADBEEF'>",
"<FlightEndpoint ticket=<Ticket ticket='foo'> locations=[] "
- "expiration_time=2023-06-19 03:14:06.004339000 app_metadata=''>",
+ "expiration_time=2023-06-19 03:14:06.004339123 app_metadata=''>",
"<FlightEndpoint ticket=<Ticket ticket='foo'> locations="
"[grpc+tcp://localhost:1024] expiration_time=null app_metadata=''>",
"<FlightEndpoint ticket=<Ticket ticket='bar'> locations="
@@ -271,12 +268,9 @@ TEST(FlightTypes, PollInfo) {
auto desc = FlightDescriptor::Command("foo");
auto endpoint = FlightEndpoint{Ticket{"foo"}, {}, std::nullopt, ""};
auto info = MakeFlightInfo(schema, desc, {endpoint}, -1, 42, true, "");
- // 2023-06-19 03:14:06.004330100
- // We must use microsecond resolution here for portability.
- // std::chrono::system_clock::time_point may not provide nanosecond
- // resolution on some platforms such as Windows.
+ // 2023-06-19 03:14:06.004339123
const auto expiration_time_duration =
- std::chrono::seconds{1687144446} + std::chrono::nanoseconds{4339000};
+ std::chrono::seconds{1687144446} + std::chrono::nanoseconds{4339123};
Timestamp expiration_time(
std::chrono::duration_cast<Timestamp::duration>(expiration_time_duration));
std::vector<PollInfo> values = {
@@ -292,7 +286,7 @@ TEST(FlightTypes, PollInfo) {
"progress=null expiration_time=null>",
"<PollInfo info=" + info.ToString() +
" descriptor=<FlightDescriptor cmd='poll'> "
- "progress=0.1 expiration_time=2023-06-19 03:14:06.004339000>",
+ "progress=0.1 expiration_time=2023-06-19 03:14:06.004339123>",
"<PollInfo info=null descriptor=null progress=null
expiration_time=null>",
};
diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h
index b6309d0af2..8b612bd55c 100644
--- a/cpp/src/arrow/flight/types.h
+++ b/cpp/src/arrow/flight/types.h
@@ -80,7 +80,8 @@ class FlightServerBase;
/// > all minutes are 60 seconds long, i.e. leap seconds are "smeared"
/// > so that no leap second table is needed for interpretation. Range
/// > is from 0001-01-01T00:00:00Z to 9999-12-31T23:59:59.999999999Z.
-using Timestamp = std::chrono::system_clock::time_point;
+using Timestamp =
+ std::chrono::time_point<std::chrono::system_clock,
std::chrono::nanoseconds>;
/// \brief A Flight-specific status code. Used to encode some
/// additional status codes into an Arrow Status.
diff --git a/python/pyarrow/_flight.pyx b/python/pyarrow/_flight.pyx
index ba6cdf273a..9a2341d694 100644
--- a/python/pyarrow/_flight.pyx
+++ b/python/pyarrow/_flight.pyx
@@ -750,11 +750,8 @@ cdef class FlightEndpoint(_Weakrefable):
if expiration_time is not None:
if isinstance(expiration_time, lib.TimestampScalar):
- # Convert into OS-dependent
std::chrono::system_clock::time_point from
- # std::chrono::time_point<std::chrono::system_clock,
std::chrono::nanoseconds>
- # See Timestamp in cpp/src/arrow/flight/types.h
- self.endpoint.expiration_time =
TimePoint_to_system_time(TimePoint_from_ns(
- expiration_time.cast(timestamp("ns")).value))
+ self.endpoint.expiration_time = TimePoint_from_ns(
+ expiration_time.cast(timestamp("ns")).value)
else:
raise TypeError("Argument expiration_time must be a
TimestampScalar, "
"not '{}'".format(type(expiration_time)))
@@ -786,11 +783,7 @@ cdef class FlightEndpoint(_Weakrefable):
cdef:
int64_t time_since_epoch
if self.endpoint.expiration_time.has_value():
- time_since_epoch = TimePoint_to_ns(
- # Convert from OS-dependent
std::chrono::system_clock::time_point into
- # std::chrono::time_point<std::chrono::system_clock,
std::chrono::nanoseconds>
- # See Timestamp in cpp/src/arrow/flight/types.h
-
TimePoint_from_system_time(self.endpoint.expiration_time.value()))
+ time_since_epoch =
TimePoint_to_ns(self.endpoint.expiration_time.value())
return lib.scalar(time_since_epoch, timestamp("ns", "UTC"))
return None
diff --git a/python/pyarrow/includes/chrono.pxd
b/python/pyarrow/includes/chrono.pxd
deleted file mode 100644
index e5d22d1975..0000000000
--- a/python/pyarrow/includes/chrono.pxd
+++ /dev/null
@@ -1,23 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied. See the License for the
-# specific language governing permissions and limitations
-# under the License.
-
-# distutils: language = c++
-
-
-cdef extern from "<chrono>" namespace "std::chrono::system_clock":
- cdef cppclass time_point:
- pass
diff --git a/python/pyarrow/includes/libarrow_flight.pxd
b/python/pyarrow/includes/libarrow_flight.pxd
index d2bc3c9d0d..eefad537dc 100644
--- a/python/pyarrow/includes/libarrow_flight.pxd
+++ b/python/pyarrow/includes/libarrow_flight.pxd
@@ -19,7 +19,7 @@
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
-from pyarrow.includes.chrono cimport time_point
+from pyarrow.includes.libarrow_python cimport CTimePoint
cdef extern from "arrow/flight/api.h" namespace "arrow" nogil:
@@ -135,7 +135,7 @@ cdef extern from "arrow/flight/api.h" namespace "arrow"
nogil:
CTicket ticket
vector[CLocation] locations
- optional[time_point] expiration_time
+ optional[CTimePoint] expiration_time
c_string app_metadata
bint operator==(CFlightEndpoint)
diff --git a/python/pyarrow/includes/libarrow_python.pxd
b/python/pyarrow/includes/libarrow_python.pxd
index da5bca5edd..96725c9c38 100644
--- a/python/pyarrow/includes/libarrow_python.pxd
+++ b/python/pyarrow/includes/libarrow_python.pxd
@@ -17,7 +17,6 @@
# distutils: language = c++
-from pyarrow.includes.chrono cimport time_point
from pyarrow.includes.common cimport *
from pyarrow.includes.libarrow cimport *
@@ -245,9 +244,6 @@ cdef extern from "arrow/python/api.h" namespace
"arrow::py::internal" nogil:
CTimePoint TimePoint_from_s(double val)
CTimePoint TimePoint_from_ns(int64_t val)
- CTimePoint TimePoint_from_system_time(time_point val)
- time_point TimePoint_to_system_time(CTimePoint val)
-
CResult[c_string] TzinfoToString(PyObject* pytzinfo)
CResult[PyObject*] StringToTzinfo(c_string)
diff --git a/python/pyarrow/src/arrow/python/datetime.h
b/python/pyarrow/src/arrow/python/datetime.h
index 3de5ea69fd..9b21eeb434 100644
--- a/python/pyarrow/src/arrow/python/datetime.h
+++ b/python/pyarrow/src/arrow/python/datetime.h
@@ -144,20 +144,6 @@ inline TimePoint TimePoint_from_ns(int64_t val) {
return TimePoint(TimePoint::duration(val));
}
-ARROW_PYTHON_EXPORT
-// Note: Needed by FlightEndpoint.expiration_time, which is an OS-dependent
-// std::chrono::system_clock::time_point
-inline std::chrono::system_clock::time_point
TimePoint_to_system_time(TimePoint val) {
- return
std::chrono::time_point_cast<std::chrono::system_clock::duration>(val);
-}
-
-ARROW_PYTHON_EXPORT
-// Note: Needed by FlightEndpoint.expiration_time, which is an OS-dependent
-// std::chrono::system_clock::time_point
-inline TimePoint
TimePoint_from_system_time(std::chrono::system_clock::time_point val) {
- return std::chrono::time_point_cast<TimePoint::duration>(val);
-}
-
ARROW_PYTHON_EXPORT
inline int64_t PyDelta_to_s(PyDateTime_Delta* pytimedelta) {
return (PyDateTime_DELTA_GET_DAYS(pytimedelta) * 86400LL +
diff --git a/python/pyarrow/tests/test_flight.py
b/python/pyarrow/tests/test_flight.py
index b3103c4be8..191a25baaf 100644
--- a/python/pyarrow/tests/test_flight.py
+++ b/python/pyarrow/tests/test_flight.py
@@ -1179,19 +1179,9 @@ def test_flight_get_info():
assert info.endpoints[0].expiration_time is None
assert info.endpoints[0].app_metadata == b""
assert info.endpoints[0].locations[0] == flight.Location('grpc://test')
- # on macOS, system_clock::duration is milliseconds
- # on Windows, system_clock::duration is 100 nanoseconds
- # on Linux, system_clock::duration is nanoseconds
- ts = None
- if pa._platform.system() == 'Darwin':
- ts = "2023-04-05T12:34:56.789012000+00:00"
- elif pa._platform.system() == 'Windows':
- ts = "2023-04-05T12:34:56.789012300+00:00"
- elif pa._platform.system() == 'Linux':
- ts = "2023-04-05T12:34:56.789012345+00:00"
- if ts is not None:
- assert info.endpoints[1].expiration_time == \
- pa.scalar(ts).cast(pa.timestamp("ns", "UTC"))
+ assert info.endpoints[1].expiration_time == \
+ pa.scalar("2023-04-05T12:34:56.789012345+00:00") \
+ .cast(pa.timestamp("ns", "UTC"))
assert info.endpoints[1].app_metadata == b"endpoint app metadata"
assert info.endpoints[1].locations[0] == \
flight.Location.for_grpc_tcp('localhost', 5005)