This is an automated email from the ASF dual-hosted git repository. lidavidm pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push: new 87dca7d832 GH-47016: [C++][FlightSQL] Fix negative timestamps to date types (#47017) 87dca7d832 is described below commit 87dca7d8320b549ad6ea17d84ef6c80360ef8c33 Author: Igor Antropov <igor.antro...@dremio.com> AuthorDate: Thu Jul 31 08:39:37 2025 +0100 GH-47016: [C++][FlightSQL] Fix negative timestamps to date types (#47017) ### Rationale for this change Fix negative timestamps from arrow to date structs when using flightsql odbc. ### Are these changes tested? Yes ### Are there any user-facing changes? No * GitHub Issue: #47016 Authored-by: Igor Antropov <igor.antro...@dremio.com> Signed-off-by: David Li <li.david...@gmail.com> --- .../flight_sql/accessors/date_array_accessor.cc | 2 +- .../accessors/date_array_accessor_test.cc | 36 +++++++------ .../flight_sql/accessors/time_array_accessor.cc | 2 +- .../accessors/time_array_accessor_test.cc | 8 +-- .../accessors/timestamp_array_accessor.cc | 52 +++++++++++-------- .../accessors/timestamp_array_accessor_test.cc | 60 ++++++++++++++-------- .../sql/odbc/odbcabstraction/calendar_utils.cc | 27 +++++++--- .../include/odbcabstraction/calendar_utils.h | 2 +- 8 files changed, 117 insertions(+), 72 deletions(-) diff --git a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor.cc b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor.cc index ac8d079cd0..ab139efc92 100644 --- a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor.cc +++ b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor.cc @@ -68,7 +68,7 @@ RowStatus DateArrayFlightSqlAccessor<TARGET_TYPE, ARROW_ARRAY>::MoveSingleCell_i auto value = convertDate<ARROW_ARRAY>(this->GetArray()->Value(arrow_row)); tm date{}; - GetTimeForSecondsSinceEpoch(date, value); + GetTimeForSecondsSinceEpoch(value, date); buffer[cell_counter].year = 1900 + (date.tm_year); buffer[cell_counter].month = date.tm_mon + 1; diff --git a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor_test.cc b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor_test.cc index d0c4d65099..fdd48d2706 100644 --- a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor_test.cc +++ b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/date_array_accessor_test.cc @@ -38,7 +38,11 @@ using arrow::ArrayFromVector; using odbcabstraction::GetTimeForSecondsSinceEpoch; TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) { - std::vector<int32_t> values = {7589, 12320, 18980, 19095}; + std::vector<int32_t> values = {7589, 12320, 18980, 19095, -1, 0}; + std::vector<tagDATE_STRUCT> expected = { + {1990, 10, 12}, {2003, 9, 25}, {2021, 12, 19}, + {2022, 4, 13}, {1969, 12, 31}, {1970, 1, 1}, + }; std::shared_ptr<Array> array; ArrayFromVector<Date32Type, int32_t>(values, &array); @@ -60,19 +64,23 @@ TEST(DateArrayAccessor, Test_Date32Array_CDataType_DATE) { for (size_t i = 0; i < values.size(); ++i) { ASSERT_EQ(sizeof(DATE_STRUCT), strlen_buffer[i]); - tm date{}; - int64_t converted_time = values[i] * 86400; - GetTimeForSecondsSinceEpoch(date, converted_time); - ASSERT_EQ((date.tm_year + 1900), buffer[i].year); - ASSERT_EQ(date.tm_mon + 1, buffer[i].month); - ASSERT_EQ(date.tm_mday, buffer[i].day); + ASSERT_EQ(expected[i].year, buffer[i].year); + ASSERT_EQ(expected[i].month, buffer[i].month); + ASSERT_EQ(expected[i].day, buffer[i].day); } } TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) { - std::vector<int64_t> values = {86400000, 172800000, 259200000, 1649793238110, - 345600000, 432000000, 518400000}; + std::vector<int64_t> values = { + 86400000, 172800000, 259200000, 1649793238110, 0, 345600000, 432000000, + 518400000, -86400000, -17987443200000, -24268068949000}; + std::vector<tagDATE_STRUCT> expected = { + /* year(16), month(u16), day(u16) */ + {1970, 1, 2}, {1970, 1, 3}, {1970, 1, 4}, {2022, 4, 12}, + {1970, 1, 1}, {1970, 1, 5}, {1970, 1, 6}, {1970, 1, 7}, + {1969, 12, 31}, {1400, 1, 1}, {1200, 12, 22}, + }; std::shared_ptr<Array> array; ArrayFromVector<Date64Type, int64_t>(values, &array); @@ -94,13 +102,9 @@ TEST(DateArrayAccessor, Test_Date64Array_CDataType_DATE) { for (size_t i = 0; i < values.size(); ++i) { ASSERT_EQ(sizeof(DATE_STRUCT), strlen_buffer[i]); - tm date{}; - - int64_t converted_time = values[i] / 1000; - GetTimeForSecondsSinceEpoch(date, converted_time); - ASSERT_EQ((date.tm_year + 1900), buffer[i].year); - ASSERT_EQ(date.tm_mon + 1, buffer[i].month); - ASSERT_EQ(date.tm_mday, buffer[i].day); + ASSERT_EQ(expected[i].year, buffer[i].year); + ASSERT_EQ(expected[i].month, buffer[i].month); + ASSERT_EQ(expected[i].day, buffer[i].day); } } diff --git a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor.cc b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor.cc index 8f4b7db5ed..93bf11778b 100644 --- a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor.cc +++ b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor.cc @@ -108,7 +108,7 @@ RowStatus TimeArrayFlightSqlAccessor<TARGET_TYPE, ARROW_ARRAY, UNIT>::MoveSingle auto converted_value_seconds = ConvertTimeValue<ARROW_ARRAY>(this->GetArray()->Value(arrow_row), UNIT); - GetTimeForSecondsSinceEpoch(time, converted_value_seconds); + GetTimeForSecondsSinceEpoch(converted_value_seconds, time); buffer[cell_counter].hour = time.tm_hour; buffer[cell_counter].minute = time.tm_min; diff --git a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor_test.cc b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor_test.cc index 26f1c1a676..d04a098e23 100644 --- a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor_test.cc +++ b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/time_array_accessor_test.cc @@ -66,7 +66,7 @@ TEST(TEST_TIME32, TIME_WITH_SECONDS) { tm time{}; - GetTimeForSecondsSinceEpoch(time, t32_values[i]); + GetTimeForSecondsSinceEpoch(t32_values[i], time); ASSERT_EQ(buffer[i].hour, time.tm_hour); ASSERT_EQ(buffer[i].minute, time.tm_min); ASSERT_EQ(buffer[i].second, time.tm_sec); @@ -103,7 +103,7 @@ TEST(TEST_TIME32, TIME_WITH_MILLI) { tm time{}; auto convertedValue = t32_values[i] / odbcabstraction::MILLI_TO_SECONDS_DIVISOR; - GetTimeForSecondsSinceEpoch(time, convertedValue); + GetTimeForSecondsSinceEpoch(convertedValue, time); ASSERT_EQ(buffer[i].hour, time.tm_hour); ASSERT_EQ(buffer[i].minute, time.tm_min); @@ -142,7 +142,7 @@ TEST(TEST_TIME64, TIME_WITH_MICRO) { tm time{}; const auto convertedValue = t64_values[i] / odbcabstraction::MICRO_TO_SECONDS_DIVISOR; - GetTimeForSecondsSinceEpoch(time, convertedValue); + GetTimeForSecondsSinceEpoch(convertedValue, time); ASSERT_EQ(buffer[i].hour, time.tm_hour); ASSERT_EQ(buffer[i].minute, time.tm_min); @@ -179,7 +179,7 @@ TEST(TEST_TIME64, TIME_WITH_NANO) { tm time{}; const auto convertedValue = t64_values[i] / odbcabstraction::NANO_TO_SECONDS_DIVISOR; - GetTimeForSecondsSinceEpoch(time, convertedValue); + GetTimeForSecondsSinceEpoch(convertedValue, time); ASSERT_EQ(buffer[i].hour, time.tm_hour); ASSERT_EQ(buffer[i].minute, time.tm_min); diff --git a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor.cc b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor.cc index b85cb95a88..68e9f64fff 100644 --- a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor.cc +++ b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor.cc @@ -44,25 +44,26 @@ int64_t GetConversionToSecondsDivisor(TimeUnit::type unit) { return divisor; } -uint32_t CalculateFraction(TimeUnit::type unit, uint64_t units_since_epoch) { - // Convert the given remainder and time unit to nanoseconds - // since the fraction field on TIMESTAMP_STRUCT is in nanoseconds. - switch (unit) { - case TimeUnit::SECOND: - return 0; - case TimeUnit::MILLI: - // 1000000 nanoseconds = 1 millisecond. - return (units_since_epoch % driver::odbcabstraction::MILLI_TO_SECONDS_DIVISOR) * - 1000000; - case TimeUnit::MICRO: - // 1000 nanoseconds = 1 microsecond. - return (units_since_epoch % driver::odbcabstraction::MICRO_TO_SECONDS_DIVISOR) * - 1000; - case TimeUnit::NANO: - // 1000 nanoseconds = 1 microsecond. - return (units_since_epoch % driver::odbcabstraction::NANO_TO_SECONDS_DIVISOR); +uint32_t CalculateFraction(TimeUnit::type unit, int64_t units_since_epoch) { + /** + * Convert the given remainder and time unit to nanoseconds + * since the fraction field on TIMESTAMP_STRUCT is in nanoseconds. + */ + if (unit == TimeUnit::SECOND) { + return 0; } - return 0; + + const int64_t divisor = GetConversionToSecondsDivisor(unit); + const int64_t nano_divisor = GetConversionToSecondsDivisor(TimeUnit::NANO); + + // Safe remainder calculation that always gives a non-negative result + int64_t remainder = units_since_epoch % divisor; + if (remainder < 0) { + remainder += divisor; + } + + // Scale to nanoseconds + return static_cast<uint32_t>(remainder * (nano_divisor / divisor)); } } // namespace @@ -88,10 +89,21 @@ RowStatus TimestampArrayFlightSqlAccessor<TARGET_TYPE, UNIT>::MoveSingleCell_imp int64_t value = this->GetArray()->Value(arrow_row); const auto divisor = GetConversionToSecondsDivisor(UNIT); - const auto converted_result_seconds = value / divisor; + const auto converted_result_seconds = + // We want floor division here; C++ will round towards zero + (value < 0) + /** + * Floor division: Shift all "fractional" (not a multiple of divisor) values so + * they round towards zero (and to the same value) along with the "floor" less + * than them, then add 1 to get back to the floor. Alternative we could shift + * negatively by (divisor - 1) but this breaks near INT64_MIN causing underflow. + */ + ? ((value + 1) / divisor) - 1 + // Towards zero is already floor + : value / divisor; tm timestamp = {0}; - GetTimeForSecondsSinceEpoch(timestamp, converted_result_seconds); + GetTimeForSecondsSinceEpoch(converted_result_seconds, timestamp); buffer[cell_counter].year = 1900 + (timestamp.tm_year); buffer[cell_counter].month = timestamp.tm_mon + 1; diff --git a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor_test.cc b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor_test.cc index a5fb167e79..930cc6a565 100644 --- a/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor_test.cc +++ b/cpp/src/arrow/flight/sql/odbc/flight_sql/accessors/timestamp_array_accessor_test.cc @@ -34,8 +34,35 @@ using odbcabstraction::TIMESTAMP_STRUCT; using odbcabstraction::GetTimeForSecondsSinceEpoch; TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MILLI) { - std::vector<int64_t> values = {86400370, 172800000, 259200000, 1649793238110LL, - 345600000, 432000000, 518400000}; + std::vector<int64_t> values = {86400370, 172800000, 259200000, + 1649793238110LL, 345600000, 432000000, + 518400000, -86399000, 0, + -86399999, -86399001, 86400001, + 86400999, -3786912000000, -5364662400000, + -1500, -24268068949000}; + std::vector<TIMESTAMP_STRUCT> expected = { + /* year(16), month(u16), day(u16), hour(u16), minute(u16), second(u16), + fraction(u32) */ + {1970, 1, 2, 0, 0, 0, 370000000}, + {1970, 1, 3, 0, 0, 0, 0}, + {1970, 1, 4, 0, 0, 0, 0}, + {2022, 4, 12, 19, 53, 58, 110000000}, + {1970, 1, 5, 0, 0, 0, 0}, + {1970, 1, 6, 0, 0, 0, 0}, + {1970, 1, 7, 0, 0, 0, 0}, + {1969, 12, 31, 0, 0, 1, 0}, + {1970, 1, 1, 0, 0, 0, 0}, + /* Tests both ends of the fraction rounding range to ensure we don't tip the wrong + way */ + {1969, 12, 31, 0, 0, 0, 1000000}, + {1969, 12, 31, 0, 0, 0, 999000000}, + {1970, 1, 2, 0, 0, 0, 1000000}, + {1970, 1, 2, 0, 0, 0, 999000000}, + {1849, 12, 31, 0, 0, 0, 0U}, + {1800, 1, 1, 0, 0, 0, 0U}, + {1969, 12, 31, 23, 59, 58, 500000000U}, + {1200, 12, 22, 13, 44, 11, 0U}, + }; std::shared_ptr<Array> timestamp_array; @@ -60,22 +87,13 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MILLI) { for (size_t i = 0; i < values.size(); ++i) { ASSERT_EQ(sizeof(TIMESTAMP_STRUCT), strlen_buffer[i]); - tm date{}; - - auto converted_time = values[i] / odbcabstraction::MILLI_TO_SECONDS_DIVISOR; - GetTimeForSecondsSinceEpoch(date, converted_time); - - ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year)); - ASSERT_EQ(buffer[i].month, date.tm_mon + 1); - ASSERT_EQ(buffer[i].day, date.tm_mday); - ASSERT_EQ(buffer[i].hour, date.tm_hour); - ASSERT_EQ(buffer[i].minute, date.tm_min); - ASSERT_EQ(buffer[i].second, date.tm_sec); - - constexpr uint32_t NANOSECONDS_PER_MILLI = 1000000; - ASSERT_EQ( - buffer[i].fraction, - (values[i] % odbcabstraction::MILLI_TO_SECONDS_DIVISOR) * NANOSECONDS_PER_MILLI); + ASSERT_EQ(buffer[i].year, expected[i].year); + ASSERT_EQ(buffer[i].month, expected[i].month); + ASSERT_EQ(buffer[i].day, expected[i].day); + ASSERT_EQ(buffer[i].hour, expected[i].hour); + ASSERT_EQ(buffer[i].minute, expected[i].minute); + ASSERT_EQ(buffer[i].second, expected[i].second); + ASSERT_EQ(buffer[i].fraction, expected[i].fraction); } } @@ -109,7 +127,7 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_SECONDS) { tm date{}; auto converted_time = values[i]; - GetTimeForSecondsSinceEpoch(date, converted_time); + GetTimeForSecondsSinceEpoch(converted_time, date); ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year)); ASSERT_EQ(buffer[i].month, date.tm_mon + 1); @@ -151,7 +169,7 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_MICRO) { tm date{}; auto converted_time = values[i] / odbcabstraction::MICRO_TO_SECONDS_DIVISOR; - GetTimeForSecondsSinceEpoch(date, converted_time); + GetTimeForSecondsSinceEpoch(converted_time, date); ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year)); ASSERT_EQ(buffer[i].month, date.tm_mon + 1); @@ -194,7 +212,7 @@ TEST(TEST_TIMESTAMP, TIMESTAMP_WITH_NANO) { tm date{}; auto converted_time = values[i] / odbcabstraction::NANO_TO_SECONDS_DIVISOR; - GetTimeForSecondsSinceEpoch(date, converted_time); + GetTimeForSecondsSinceEpoch(converted_time, date); ASSERT_EQ(buffer[i].year, 1900 + (date.tm_year)); ASSERT_EQ(buffer[i].month, date.tm_mon + 1); diff --git a/cpp/src/arrow/flight/sql/odbc/odbcabstraction/calendar_utils.cc b/cpp/src/arrow/flight/sql/odbc/odbcabstraction/calendar_utils.cc index f4a23419f1..7b92a00a25 100644 --- a/cpp/src/arrow/flight/sql/odbc/odbcabstraction/calendar_utils.cc +++ b/cpp/src/arrow/flight/sql/odbc/odbcabstraction/calendar_utils.cc @@ -17,7 +17,9 @@ #include "odbcabstraction/calendar_utils.h" +#include <chrono> #include <cstdint> +#include <cstring> #include <ctime> namespace driver { @@ -26,7 +28,7 @@ int64_t GetTodayTimeFromEpoch() { tm date{}; int64_t t = std::time(0); - GetTimeForSecondsSinceEpoch(date, t); + GetTimeForSecondsSinceEpoch(t, date); date.tm_hour = 0; date.tm_min = 0; @@ -39,13 +41,22 @@ int64_t GetTodayTimeFromEpoch() { #endif } -void GetTimeForSecondsSinceEpoch(tm& date, int64_t value) { -#if defined(_WIN32) - gmtime_s(&date, &value); -#else - time_t time_value = static_cast<time_t>(value); - gmtime_r(&time_value, &date); -#endif +void GetTimeForSecondsSinceEpoch(const int64_t seconds_since_epoch, std::tm& out_tm) { + std::memset(&out_tm, 0, sizeof(std::tm)); + + std::chrono::time_point<std::chrono::system_clock, std::chrono::seconds> timepoint{ + std::chrono::seconds{seconds_since_epoch}}; + auto tm_days = std::chrono::floor<std::chrono::days>(timepoint); + + std::chrono::year_month_day ymd(tm_days); + std::chrono::hh_mm_ss<std::chrono::seconds> timeofday(timepoint - tm_days); + + out_tm.tm_year = static_cast<int>(ymd.year()) - 1900; + out_tm.tm_mon = static_cast<unsigned>(ymd.month()) - 1; + out_tm.tm_mday = static_cast<unsigned>(ymd.day()); + out_tm.tm_hour = static_cast<int>(timeofday.hours().count()); + out_tm.tm_min = static_cast<int>(timeofday.minutes().count()); + out_tm.tm_sec = static_cast<int>(timeofday.seconds().count()); } } // namespace odbcabstraction } // namespace driver diff --git a/cpp/src/arrow/flight/sql/odbc/odbcabstraction/include/odbcabstraction/calendar_utils.h b/cpp/src/arrow/flight/sql/odbc/odbcabstraction/include/odbcabstraction/calendar_utils.h index 1f4c6e7912..6b55c443cd 100644 --- a/cpp/src/arrow/flight/sql/odbc/odbcabstraction/include/odbcabstraction/calendar_utils.h +++ b/cpp/src/arrow/flight/sql/odbc/odbcabstraction/include/odbcabstraction/calendar_utils.h @@ -24,6 +24,6 @@ namespace driver { namespace odbcabstraction { int64_t GetTodayTimeFromEpoch(); -void GetTimeForSecondsSinceEpoch(tm& date, int64_t value); +void GetTimeForSecondsSinceEpoch(const int64_t seconds_since_epoch, std::tm& out_tm); } // namespace odbcabstraction } // namespace driver