This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/master by this push:
new d991bcf60ae [fix](ParquetReader) Fix Parquet Reader to read `int96`
parquet type problem (#32394)
d991bcf60ae is described below
commit d991bcf60aeac694dfc18f3f6a18262cb17612b0
Author: Tiewei Fang <[email protected]>
AuthorDate: Tue Mar 19 17:26:50 2024 +0800
[fix](ParquetReader) Fix Parquet Reader to read `int96` parquet type
problem (#32394)
`hi - JULIAN_EPOCH_OFFSET_DAYS` could be negative, so we can't all use
unsigned int.
---
be/src/vec/exec/format/parquet/parquet_column_convert.h | 13 +++++++------
be/src/vec/exec/format/parquet/parquet_common.cpp | 6 +++---
be/src/vec/exec/format/parquet/parquet_common.h | 12 ++++++------
3 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h
b/be/src/vec/exec/format/parquet/parquet_column_convert.h
index 39ee29f663f..1d6cfd63515 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.h
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h
@@ -318,12 +318,13 @@ public:
auto& data =
static_cast<ColumnVector<UInt64>*>(dst_col.get())->get_data();
for (int i = 0; i < rows; i++) {
- ParquetInt96 x = ParquetInt96_data[i];
- auto& num = data[start_idx + i];
- auto& value =
reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(num);
- int64_t micros = x.to_timestamp_micros();
- value.from_unixtime(micros / 1000000, *_convert_params->ctz);
- value.set_microsecond(micros % 1000000);
+ ParquetInt96 src_cell_data = ParquetInt96_data[i];
+ auto& dst_value =
+
reinterpret_cast<DateV2Value<DateTimeV2ValueType>&>(data[start_idx + i]);
+
+ int64_t timestamp_with_micros =
src_cell_data.to_timestamp_micros();
+ dst_value.from_unixtime(timestamp_with_micros / 1000000,
*_convert_params->ctz);
+ dst_value.set_microsecond(timestamp_with_micros % 1000000);
}
return Status::OK();
}
diff --git a/be/src/vec/exec/format/parquet/parquet_common.cpp
b/be/src/vec/exec/format/parquet/parquet_common.cpp
index cbef2a0f286..33e9f11242b 100644
--- a/be/src/vec/exec/format/parquet/parquet_common.cpp
+++ b/be/src/vec/exec/format/parquet/parquet_common.cpp
@@ -24,9 +24,9 @@
namespace doris::vectorized {
-const uint32_t ParquetInt96::JULIAN_EPOCH_OFFSET_DAYS = 2440588;
-const uint64_t ParquetInt96::MICROS_IN_DAY = 86400000000;
-const uint64_t ParquetInt96::NANOS_PER_MICROSECOND = 1000;
+const int32_t ParquetInt96::JULIAN_EPOCH_OFFSET_DAYS = 2440588;
+const int64_t ParquetInt96::MICROS_IN_DAY = 86400000000;
+const int64_t ParquetInt96::NANOS_PER_MICROSECOND = 1000;
ColumnSelectVector::ColumnSelectVector(const uint8_t* filter_map, size_t
filter_map_size,
bool filter_all) {
diff --git a/be/src/vec/exec/format/parquet/parquet_common.h
b/be/src/vec/exec/format/parquet/parquet_common.h
index 6667ab2c101..2cf745882ee 100644
--- a/be/src/vec/exec/format/parquet/parquet_common.h
+++ b/be/src/vec/exec/format/parquet/parquet_common.h
@@ -48,10 +48,10 @@ struct RowRange {
#pragma pack(1)
struct ParquetInt96 {
- uint64_t lo; // time of nanoseconds in a day
- uint32_t hi; // days from julian epoch
+ int64_t lo; // time of nanoseconds in a day
+ int32_t hi; // days from julian epoch
- inline uint64_t to_timestamp_micros() const {
+ inline int64_t to_timestamp_micros() const {
return (hi - JULIAN_EPOCH_OFFSET_DAYS) * MICROS_IN_DAY + lo /
NANOS_PER_MICROSECOND;
}
inline __int128 to_int128() const {
@@ -60,9 +60,9 @@ struct ParquetInt96 {
return ans;
}
- static const uint32_t JULIAN_EPOCH_OFFSET_DAYS;
- static const uint64_t MICROS_IN_DAY;
- static const uint64_t NANOS_PER_MICROSECOND;
+ static const int32_t JULIAN_EPOCH_OFFSET_DAYS;
+ static const int64_t MICROS_IN_DAY;
+ static const int64_t NANOS_PER_MICROSECOND;
};
#pragma pack()
static_assert(sizeof(ParquetInt96) == 12, "The size of ParquetInt96 is not
12.");
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]