This is an automated email from the ASF dual-hosted git repository.
felipecrv pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 6aa33210ec GH-41044: [C++] formatting.h: Make sure space is allocated
for the 'Z' when formatting timestamps (#41045)
6aa33210ec is described below
commit 6aa33210ecd9773f18ceb775306b79de132b6c6f
Author: Felipe Oliveira Carvalho <[email protected]>
AuthorDate: Fri Apr 5 23:05:12 2024 -0300
GH-41044: [C++] formatting.h: Make sure space is allocated for the 'Z' when
formatting timestamps (#41045)
### What changes are included in this PR?
A test that reproduces an issue found by the fuzzer and a fix for it.
### Are these changes tested?
- A test
- Comments clarifying somethings around `formatting.h`
- Increasing the size of the local buffer used to format timestamps
The issue was introduced only recently (unreleased): #39272
* GitHub Issue: #41044
Authored-by: Felipe Oliveira Carvalho <[email protected]>
Signed-off-by: Felipe Oliveira Carvalho <[email protected]>
---
cpp/src/arrow/util/formatting.h | 11 ++++++++---
cpp/src/arrow/util/formatting_util_test.cc | 8 ++++++++
2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/cpp/src/arrow/util/formatting.h b/cpp/src/arrow/util/formatting.h
index 6125f792ff..dd9af907ec 100644
--- a/cpp/src/arrow/util/formatting.h
+++ b/cpp/src/arrow/util/formatting.h
@@ -126,8 +126,10 @@ namespace detail {
ARROW_EXPORT extern const char digit_pairs[];
// Based on fmtlib's format_int class:
-// Write digits from right to left into a stack allocated buffer
-inline void FormatOneChar(char c, char** cursor) { *--*cursor = c; }
+// Write digits from right to left into a stack allocated buffer.
+// \pre *cursor points to the byte after the one that will be written.
+// \post *cursor points to the byte that was written.
+inline void FormatOneChar(char c, char** cursor) { *(--(*cursor)) = c; }
template <typename Int>
void FormatOneDigit(Int value, char** cursor) {
@@ -326,6 +328,7 @@ class StringFormatter<DoubleType> : public
FloatToStringFormatterMixin<DoubleTyp
namespace detail {
constexpr size_t BufferSizeYYYY_MM_DD() {
+ // "-"? "99999-12-31"
return 1 + detail::Digits10(99999) + 1 + detail::Digits10(12) + 1 +
detail::Digits10(31);
}
@@ -352,6 +355,7 @@ inline void
FormatYYYY_MM_DD(arrow_vendored::date::year_month_day ymd, char** cu
template <typename Duration>
constexpr size_t BufferSizeHH_MM_SS() {
+ // "23:59:59" ("." "9"+)?
return detail::Digits10(23) + 1 + detail::Digits10(59) + 1 +
detail::Digits10(59) + 1 +
detail::Digits10(Duration::period::den) - 1;
}
@@ -505,8 +509,9 @@ class StringFormatter<TimestampType> {
timepoint_days -= days(1);
}
+ // YYYY_MM_DD " " HH_MM_SS "Z"?
constexpr size_t buffer_size =
- detail::BufferSizeYYYY_MM_DD() + 1 +
detail::BufferSizeHH_MM_SS<Duration>();
+ detail::BufferSizeYYYY_MM_DD() + 1 +
detail::BufferSizeHH_MM_SS<Duration>() + 1;
std::array<char, buffer_size> buffer;
char* cursor = buffer.data() + buffer_size;
diff --git a/cpp/src/arrow/util/formatting_util_test.cc
b/cpp/src/arrow/util/formatting_util_test.cc
index 13f57a495d..fcbeec347d 100644
--- a/cpp/src/arrow/util/formatting_util_test.cc
+++ b/cpp/src/arrow/util/formatting_util_test.cc
@@ -533,6 +533,14 @@ TEST(Formatting, Timestamp) {
}
}
+ {
+ constexpr int64_t kMillisInDay = 24 * 60 * 60 * 1000;
+ auto ty = timestamp(TimeUnit::MILLI, "+01:00");
+ StringFormatter<TimestampType> formatter(ty.get());
+ AssertFormatting(formatter, -15000 * 365 * kMillisInDay + 1,
+ "-13021-12-17 00:00:00.001Z");
+ }
+
{
auto ty = timestamp(TimeUnit::MILLI, "Pacific/Maruesas");
StringFormatter<TimestampType> formatter(ty.get());