jorisvandenbossche commented on code in PR #35656:
URL: https://github.com/apache/arrow/pull/35656#discussion_r1223394291
##########
python/pyarrow/src/arrow/python/arrow_to_pandas.cc:
##########
@@ -1569,7 +1572,31 @@ class DatetimeWriter : public
TypedPandasWriter<NPY_DATETIME> {
};
using DatetimeSecondWriter = DatetimeWriter<TimeUnit::SECOND>;
-using DatetimeMilliWriter = DatetimeWriter<TimeUnit::MILLI>;
+
+class DatetimeMilliWriter : public DatetimeWriter<TimeUnit::MILLI> {
+ public:
+ using DatetimeWriter<TimeUnit::MILLI>::DatetimeWriter;
+
+ Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement)
override {
+ Type::type type = data->type()->id();
+ int64_t* out_values = this->GetBlockColumnStart(rel_placement);
+ if (type == Type::DATE32) {
+ // Convert from days since epoch to datetime64[ms]
+ ConvertDatetimeLikeNanos<int32_t, 86400000L>(*data, out_values);
+ } else if (type == Type::DATE64) {
+ ConvertDatetimeLikeNanos<int64_t, 1LL>(*data, out_values);
+ } else {
+ const auto& ts_type = checked_cast<const TimestampType&>(*data->type());
+ DCHECK_EQ(TimeUnit::MILLI, ts_type.unit())
+ << "Should only call instances of this writer "
+ << "with arrays of the correct unit";
+ ConvertNumericNullable<int64_t>(*data, kPandasTimestampNull,
+
this->GetBlockColumnStart(rel_placement));
Review Comment:
```suggestion
ConvertNumericNullable<int64_t>(*data, kPandasTimestampNull,
out_values);
```
##########
python/pyarrow/src/arrow/python/arrow_to_pandas.cc:
##########
@@ -1569,7 +1572,31 @@ class DatetimeWriter : public
TypedPandasWriter<NPY_DATETIME> {
};
using DatetimeSecondWriter = DatetimeWriter<TimeUnit::SECOND>;
-using DatetimeMilliWriter = DatetimeWriter<TimeUnit::MILLI>;
+
+class DatetimeMilliWriter : public DatetimeWriter<TimeUnit::MILLI> {
+ public:
+ using DatetimeWriter<TimeUnit::MILLI>::DatetimeWriter;
+
+ Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement)
override {
+ Type::type type = data->type()->id();
+ int64_t* out_values = this->GetBlockColumnStart(rel_placement);
+ if (type == Type::DATE32) {
+ // Convert from days since epoch to datetime64[ms]
+ ConvertDatetimeLikeNanos<int32_t, 86400000L>(*data, out_values);
+ } else if (type == Type::DATE64) {
+ ConvertDatetimeLikeNanos<int64_t, 1LL>(*data, out_values);
Review Comment:
Can probably use `ConvertNumericNullable` here as well (like in the branch
below, that avoids doing the multiplication with 1)
##########
python/pyarrow/tests/parquet/common.py:
##########
@@ -176,8 +176,8 @@ def alltypes_sample(size=10000, seed=0, categorical=False):
# TODO(wesm): Test other timestamp resolutions now that arrow supports
# them
'datetime': np.arange("2016-01-01T00:00:00.001", size,
- dtype='datetime64[ms]').astype('datetime64[ns]'),
- 'timedelta': np.arange(0, size, dtype="timedelta64[ns]"),
+ dtype='datetime64[ms]'),
Review Comment:
We can maybe keep both original ns and new ms resolution? (to test both)
##########
python/pyarrow/tests/test_pandas.py:
##########
@@ -70,7 +70,7 @@ def _alltypes_example(size=100):
# TODO(wesm): Pandas only support ns resolution, Arrow supports s, ms,
# us, ns
'datetime': np.arange("2016-01-01T00:00:00.001", size,
- dtype='datetime64[ms]').astype("datetime64[ns]"),
+ dtype='datetime64[ms]'),
Review Comment:
Or also here keep two resolutions?
##########
python/pyarrow/tests/test_pandas.py:
##########
@@ -1202,7 +1211,7 @@ def test_table_convert_date_as_object(self):
df_datetime = table.to_pandas(date_as_object=False)
df_object = table.to_pandas()
- tm.assert_frame_equal(df.astype('datetime64[ns]'), df_datetime,
+ tm.assert_frame_equal(df.astype('datetime64[ms]'), df_datetime,
Review Comment:
Do we have coverage for testing that it stays nanoseconds if you specify
`coerce_temporal_nanoseconds=True`?
##########
python/pyarrow/src/arrow/python/arrow_to_pandas.cc:
##########
@@ -1569,7 +1572,31 @@ class DatetimeWriter : public
TypedPandasWriter<NPY_DATETIME> {
};
using DatetimeSecondWriter = DatetimeWriter<TimeUnit::SECOND>;
-using DatetimeMilliWriter = DatetimeWriter<TimeUnit::MILLI>;
+
+class DatetimeMilliWriter : public DatetimeWriter<TimeUnit::MILLI> {
+ public:
+ using DatetimeWriter<TimeUnit::MILLI>::DatetimeWriter;
+
+ Status CopyInto(std::shared_ptr<ChunkedArray> data, int64_t rel_placement)
override {
+ Type::type type = data->type()->id();
+ int64_t* out_values = this->GetBlockColumnStart(rel_placement);
+ if (type == Type::DATE32) {
+ // Convert from days since epoch to datetime64[ms]
+ ConvertDatetimeLikeNanos<int32_t, 86400000L>(*data, out_values);
Review Comment:
I am wondering that if we do such naive multiplication here, we can get
overflow errors for out-of-bounds timestamps (but this is already the case for
the current code converting to nanoseconds, as well, to be clear)
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]