bkietz commented on a change in pull request #10988:
URL: https://github.com/apache/arrow/pull/10988#discussion_r698674002
##########
File path: cpp/src/arrow/pretty_print.cc
##########
@@ -421,10 +471,14 @@ class ArrayPrinter : public PrettyPrinter {
private:
template <typename Unit>
void FormatDateTime(const char* fmt, int64_t value, bool add_epoch) {
- if (add_epoch) {
- (*sink_) << arrow_vendored::date::format(fmt, epoch_ + Unit{value});
- } else {
- (*sink_) << arrow_vendored::date::format(fmt, Unit{value});
+ try {
+ if (add_epoch) {
+ (*sink_) << arrow_vendored::date::format(fmt, epoch_ + Unit{value});
Review comment:
The range check could possibly even just be here:
```suggestion
constexpr Unit kMin = duration_cast<Unit>(kMinDay);
constexpr Unit kMax = duration_cast<Unit>(kMaxDay);
if (Unit{value} >= kMin && Unit{value} <= kMax) {
(*sink_) << arrow_vendored::date::format(fmt, epoch_ +
Unit{value});
} else {
WriteOutOfRange(value);
}
```
##########
File path: cpp/src/arrow/pretty_print.cc
##########
@@ -167,29 +165,81 @@ class ArrayPrinter : public PrettyPrinter {
}
template <typename T>
- enable_if_date<typename T::TypeClass, Status> WriteDataValues(const T&
array) {
+ enable_if_time<typename T::TypeClass, Status> WriteDataValues(const T&
array) {
const auto data = array.raw_values();
- using unit = typename std::conditional<std::is_same<T, Date32Array>::value,
- arrow_vendored::date::days,
- std::chrono::milliseconds>::type;
- WriteValues(array, [&](int64_t i) { FormatDateTime<unit>("%F", data[i],
true); });
+ const auto& type = checked_cast<const TimeType&>(*array.type());
+ WriteValues(array,
+ [&](int64_t i) { FormatDateTime(type.unit(), "%T", data[i],
false); });
return Status::OK();
}
- template <typename T>
- enable_if_time<typename T::TypeClass, Status> WriteDataValues(const T&
array) {
+ // NOTE about bounds checking on temporal data:
+ //
+ // While we catch exceptions in FormatDateTime(), some out-of-bound values
+ // would result in successful but erroneous printing because of silent
+ // integer wraparound in the `arrow_vendored::date` library (particularly
+ // because it represents year values as a C `short` internally).
+ //
+ // To avoid such misprinting, we must therefore check the bounds explicitly.
+ // The bounds correspond to start of year -32767 and end of year 32767,
+ // respectively (-32768 is an invalid year value in `arrow_vendored::date`).
+ Status WriteDataValues(const Date32Array& array) {
const auto data = array.raw_values();
- const auto type = static_cast<const TimeType*>(array.type().get());
- WriteValues(array,
- [&](int64_t i) { FormatDateTime(type->unit(), "%T", data[i],
false); });
+ WriteValues(array, [&](int64_t i) {
+ const int32_t v = data[i];
+ if (v >= -12687428 && v <= 11248737) {
Review comment:
Nit: please extract named constants like
```suggestion
if (v >= kMinimumDay && v <= kMaximumDay) {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]