isichei commented on a change in pull request #10461:
URL: https://github.com/apache/arrow/pull/10461#discussion_r646865384



##########
File path: cpp/src/parquet/types.h
##########
@@ -602,6 +602,49 @@ static inline int64_t Int96GetNanoSeconds(const 
parquet::Int96& i96) {
   return static_cast<int64_t>(days_since_epoch * kNanosecondsPerDay + 
nanoseconds);
 }
 
+// ARROW-12096
+static inline int64_t Int96GetMicroSeconds(const parquet::Int96& i96) {
+  // We do the computations in the unsigned domain to avoid unsigned behaviour
+  // on overflow.
+  uint64_t days_since_epoch =
+      i96.value[2] - static_cast<uint64_t>(kJulianToUnixEpochDays);
+  uint64_t nanoseconds = 0;
+  memcpy(&nanoseconds, &i96.value, sizeof(uint64_t));
+
+  uint64_t microseconds = nanoseconds/static_cast<uint64_t>(1000);
+
+  return static_cast<int64_t>(days_since_epoch * kMicrosecondsPerDay + 
microseconds);
+}
+
+// ARROW-12096
+static inline int64_t Int96GetMilliSeconds(const parquet::Int96& i96) {
+  // We do the computations in the unsigned domain to avoid unsigned behaviour
+  // on overflow.
+  uint64_t days_since_epoch =
+      i96.value[2] - static_cast<uint64_t>(kJulianToUnixEpochDays);
+  uint64_t nanoseconds = 0;
+  memcpy(&nanoseconds, &i96.value, sizeof(uint64_t));
+
+  uint64_t milliseconds = nanoseconds/static_cast<uint64_t>(1000000);
+
+  return static_cast<int64_t>(days_since_epoch * kMillisecondsPerDay + 
milliseconds);
+}
+
+// ARROW-12096
+static inline int64_t Int96GetSeconds(const parquet::Int96& i96) {
+  // We do the computations in the unsigned domain to avoid unsigned behaviour
+  // on overflow.
+  uint64_t days_since_epoch =
+      i96.value[2] - static_cast<uint64_t>(kJulianToUnixEpochDays);
+  
+  uint64_t nanoseconds = 0;
+  memcpy(&nanoseconds, &i96.value, sizeof(uint64_t));
+
+  uint64_t seconds = nanoseconds/(static_cast<uint64_t>(1000000000));
+
+  return static_cast<int64_t>(days_since_epoch * kSecondsPerDay + seconds);

Review comment:
       Yeah I agree.
   
   I was concerned about changing the original function `Int96GetNanoSeconds` 
incase I introduced some unexpected change. Perhaps a halfway house is to 
replace the 3 (`us`, `ms` and `s`) INT96 functions to something like:
   
   ```cpp
   static inline int64_t Int96GetDownsampledTimestamp(const parquet::Int96& 
i96, const ::arrow::TimeUnit::type unit) {
     // We do the computations in the unsigned domain to avoid unsigned 
behaviour
     // on overflow.
     uint64_t days_since_epoch =
         i96.value[2] - static_cast<uint64_t>(kJulianToUnixEpochDays);
     uint64_t nanoseconds = 0;
   
     memcpy(&nanoseconds, &i96.value, sizeof(uint64_t));
     uint64_t seconds;
   
     switch (unit){
       case ::arrow::TimeUnit::SECOND:
         seconds = nanoseconds/static_cast<uint64_t>(1000000000);
       case ::arrow::TimeUnit::MILLI:
         seconds = nanoseconds/static_cast<uint64_t>(1000000);
       case ::arrow::TimeUnit::MICRO:
         seconds = nanoseconds/static_cast<uint64_t>(1000);
     }
     return static_cast<int64_t>(days_since_epoch * kNanosecondsPerDay + 
seconds);
   }
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to