postgresql): implement copy writer for time types [arrow-adbc]

via GitHub Tue, 10 Mar 2026 23:38:11 -0700


lidavidm commented on code in PR #4057:
URL: https://github.com/apache/arrow-adbc/pull/4057#discussion_r2916203017



##########
c/driver/postgresql/copy/reader.h:
##########
@@ -425,6 +425,81 @@ class PostgresCopyNumericFieldReader : public 
PostgresCopyFieldReader {
   static const uint16_t kNumericNinf = 0xF000;
 };
 
+// Microseconds per day (24h)
+constexpr int64_t kUsecsPerDay = 86400LL * 1000000LL;
+// Nanoseconds per day (24h)
+constexpr int64_t kNsecsPerDay = 86400LL * 1000000000LL;
+
+template <enum ArrowTimeUnit TU, typename OutT>
+class PostgresCopyTimeOfDayFieldReader : public PostgresCopyFieldReader {
+ public:
+  ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, 
ArrowArray* array,
+                      ArrowError* error) override {
+    if (field_size_bytes <= 0) {
+      return ArrowArrayAppendNull(array, 1);
+    }
+
+    // PostgreSQL TIME binary payload is int64 microseconds since midnight. 
https://www.postgresql.org/docs/current/datatype-datetime.html
+    if (field_size_bytes != static_cast<int32_t>(sizeof(int64_t))) {
+      ArrowErrorSet(error, "Expected field with %d bytes but found field with 
%d bytes",
+                    static_cast<int>(sizeof(int64_t)),
+                    static_cast<int>(field_size_bytes));  // 
NOLINT(runtime/int)
+      return EINVAL;
+    }
+
+    const int64_t time_usec = ReadUnsafe<int64_t>(data);
+
+    // PostgreSQL time_recv validates microseconds since midnight 
(0..USECS_PER_DAY).
+    // Keep this validation here so we don't produce nonsensical Arrow values.
+    if (time_usec < 0 || time_usec > kUsecsPerDay) {
+      ArrowErrorSet(error,
+                    "[libpq] TIME value %" PRId64
+                    " usec is out of range [0, %" PRId64 "]",
+                    time_usec, kUsecsPerDay);
+      return EINVAL;
+    }
+
+    // Convert to Arrow representation requested by schema:
+    // Arrow TIME32 uses int32 in seconds or milliseconds; TIME64 uses int64 
in microseconds or nanoseconds.
+    int64_t out64 = 0;
+    switch (TU) {
+      case NANOARROW_TIME_UNIT_SECOND:
+        out64 = time_usec / 1000000LL;
+        break;
+      case NANOARROW_TIME_UNIT_MILLI:
+        out64 = time_usec / 1000LL;
+        break;
+      case NANOARROW_TIME_UNIT_MICRO:
+        out64 = time_usec;
+        break;
+      case NANOARROW_TIME_UNIT_NANO:
+        out64 = time_usec * 1000LL;
+        break;
+    }
+
+    // Ensure the target type can hold the converted value (TIME32 -> int32).
+    if constexpr (std::is_same<OutT, int32_t>::value) {
+      if (out64 < (std::numeric_limits<int32_t>::min)() ||
+          out64 > (std::numeric_limits<int32_t>::max)()) {
+        ArrowErrorSet(error,
+                      "[libpq] TIME value %" PRId64
+                      " usec converts to %" PRId64
+                      " which overflows int32 for Arrow TIME32",
+                      time_usec, out64);
+        return EOVERFLOW;
+      }

Review Comment:
   Is this possible given we're already validating that the value fits in 24 
hours?



##########
c/driver/postgresql/copy/reader.h:
##########
@@ -425,6 +425,81 @@ class PostgresCopyNumericFieldReader : public 
PostgresCopyFieldReader {
   static const uint16_t kNumericNinf = 0xF000;
 };
 
+// Microseconds per day (24h)
+constexpr int64_t kUsecsPerDay = 86400LL * 1000000LL;
+// Nanoseconds per day (24h)
+constexpr int64_t kNsecsPerDay = 86400LL * 1000000000LL;
+
+template <enum ArrowTimeUnit TU, typename OutT>
+class PostgresCopyTimeOfDayFieldReader : public PostgresCopyFieldReader {
+ public:
+  ArrowErrorCode Read(ArrowBufferView* data, int32_t field_size_bytes, 
ArrowArray* array,
+                      ArrowError* error) override {
+    if (field_size_bytes <= 0) {
+      return ArrowArrayAppendNull(array, 1);
+    }
+
+    // PostgreSQL TIME binary payload is int64 microseconds since midnight. 
https://www.postgresql.org/docs/current/datatype-datetime.html
+    if (field_size_bytes != static_cast<int32_t>(sizeof(int64_t))) {
+      ArrowErrorSet(error, "Expected field with %d bytes but found field with 
%d bytes",
+                    static_cast<int>(sizeof(int64_t)),
+                    static_cast<int>(field_size_bytes));  // 
NOLINT(runtime/int)
+      return EINVAL;
+    }
+
+    const int64_t time_usec = ReadUnsafe<int64_t>(data);
+
+    // PostgreSQL time_recv validates microseconds since midnight 
(0..USECS_PER_DAY).
+    // Keep this validation here so we don't produce nonsensical Arrow values.
+    if (time_usec < 0 || time_usec > kUsecsPerDay) {
+      ArrowErrorSet(error,
+                    "[libpq] TIME value %" PRId64
+                    " usec is out of range [0, %" PRId64 "]",
+                    time_usec, kUsecsPerDay);
+      return EINVAL;
+    }
+
+    // Convert to Arrow representation requested by schema:
+    // Arrow TIME32 uses int32 in seconds or milliseconds; TIME64 uses int64 
in microseconds or nanoseconds.
+    int64_t out64 = 0;
+    switch (TU) {
+      case NANOARROW_TIME_UNIT_SECOND:
+        out64 = time_usec / 1000000LL;
+        break;
+      case NANOARROW_TIME_UNIT_MILLI:
+        out64 = time_usec / 1000LL;
+        break;
+      case NANOARROW_TIME_UNIT_MICRO:
+        out64 = time_usec;
+        break;
+      case NANOARROW_TIME_UNIT_NANO:
+        out64 = time_usec * 1000LL;
+        break;
+    }
+
+    // Ensure the target type can hold the converted value (TIME32 -> int32).
+    if constexpr (std::is_same<OutT, int32_t>::value) {
+      if (out64 < (std::numeric_limits<int32_t>::min)() ||
+          out64 > (std::numeric_limits<int32_t>::max)()) {
+        ArrowErrorSet(error,
+                      "[libpq] TIME value %" PRId64
+                      " usec converts to %" PRId64
+                      " which overflows int32 for Arrow TIME32",
+                      time_usec, out64);
+        return EOVERFLOW;
+      }
+
+      const int32_t out32 = static_cast<int32_t>(out64);
+      NANOARROW_RETURN_NOT_OK(ArrowBufferAppend(data_, &out32, sizeof(out32)));
+    } else {
+      const int64_t out = static_cast<int64_t>(out64);

Review Comment:
   nit: redundant cast?



##########
c/driver/postgresql/copy/writer.h:
##########
@@ -735,6 +735,50 @@ class PostgresCopyTimestampFieldWriter : public 
PostgresCopyFieldWriter {
   }
 };
 
+// Microseconds per day (24h)
+constexpr int64_t kUsecsPerDay = 86400LL * 1000000LL;
+
+template <enum ArrowTimeUnit TU>
+class PostgresCopyTimeFieldWriter : public PostgresCopyFieldWriter {
+ public:
+  ArrowErrorCode Write(ArrowBuffer* buffer, int64_t index, ArrowError* error) 
override {
+    // PostgreSQL TIME binary format is an int64 microseconds-since-midnight
+    // and the COPY binary field length must be 8 bytes. 
https://www.postgresql.org/docs/current/datatype-datetime.html
+    constexpr int32_t field_size_bytes = sizeof(int64_t);
+    NANOARROW_RETURN_NOT_OK(WriteChecked<int32_t>(buffer, field_size_bytes, 
error));
+
+    const int64_t raw_value = ArrowArrayViewGetIntUnsafe(array_view_, index);
+    int64_t micros = 0;
+
+    switch (TU) {
+      case NANOARROW_TIME_UNIT_SECOND:
+        micros = raw_value * 1000000LL;
+        break;
+      case NANOARROW_TIME_UNIT_MILLI:
+        micros = raw_value * 1000LL;
+        break;
+      case NANOARROW_TIME_UNIT_MICRO:
+        micros = raw_value;
+        break;
+      case NANOARROW_TIME_UNIT_NANO:
+        micros = raw_value / 1000LL;
+        break;
+    }
+
+    if (micros < 0 || micros > kUsecsPerDay) {

Review Comment:
   Hmm. If we assume the Arrow data isn't necessarily valid, don't we have to 
watch for overflow when we do the multiplication above? Or if we do assume the 
data is valid, then this can't happen, right? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Re: [PR] feat(c/driver/postgresql): implement copy writer for time types [arrow-adbc]

Reply via email to