This is an automated email from the ASF dual-hosted git repository.
morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new caba7e1bfda [fix](parquet) fix time zone error in parquet reader
(#33228)
caba7e1bfda is described below
commit caba7e1bfdafef875355a36bae386e805b68f4ab
Author: Ashin Gau <[email protected]>
AuthorDate: Fri Apr 5 21:41:31 2024 +0800
[fix](parquet) fix time zone error in parquet reader (#33228)
backport: #33217
---
be/src/vec/exec/format/parquet/decoder.cpp | 2 +-
be/src/vec/exec/format/parquet/decoder.h | 4 +--
be/src/vec/exec/format/parquet/parquet_pred_cmp.h | 2 +-
.../hive/test_hive_basic_type.out | 33 ++++++++++++----------
.../hive/test_external_catalog_hive.out | 10 +++++++
.../hive/test_external_catalog_hive.groovy | 5 +++-
6 files changed, 36 insertions(+), 20 deletions(-)
diff --git a/be/src/vec/exec/format/parquet/decoder.cpp
b/be/src/vec/exec/format/parquet/decoder.cpp
index aa16eaf4b6c..f4ecd3e7862 100644
--- a/be/src/vec/exec/format/parquet/decoder.cpp
+++ b/be/src/vec/exec/format/parquet/decoder.cpp
@@ -152,7 +152,7 @@ void Decoder::init(FieldSchema* field_schema,
cctz::time_zone* ctz) {
const auto& schema = field_schema->parquet_schema;
if (schema.__isset.logicalType && schema.logicalType.__isset.TIMESTAMP) {
const auto& timestamp_info = schema.logicalType.TIMESTAMP;
- if (!timestamp_info.isAdjustedToUTC) {
+ if (timestamp_info.isAdjustedToUTC) {
// should set timezone to utc+0
_decode_params->ctz =
const_cast<cctz::time_zone*>(&_decode_params->utc0);
}
diff --git a/be/src/vec/exec/format/parquet/decoder.h
b/be/src/vec/exec/format/parquet/decoder.h
index f3da9429dda..aee4a4d4de2 100644
--- a/be/src/vec/exec/format/parquet/decoder.h
+++ b/be/src/vec/exec/format/parquet/decoder.h
@@ -67,9 +67,9 @@ namespace doris::vectorized {
M(TypeIndex::Float64, Float64, Float64)
struct DecodeParams {
- // schema.logicalType.TIMESTAMP.isAdjustedToUTC == false
+ // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true
static const cctz::time_zone utc0;
- // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set the
time zone
+ // schema.logicalType.TIMESTAMP.isAdjustedToUTC == false, we should set
local time zone
cctz::time_zone* ctz = nullptr;
int32_t offset_days = 0;
int64_t second_mask = 1;
diff --git a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
index b993370a159..b9c909e7f2f 100644
--- a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
+++ b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
@@ -277,7 +277,7 @@ private:
const auto& schema = col_schema->parquet_schema;
if (schema.__isset.logicalType &&
schema.logicalType.__isset.TIMESTAMP) {
const auto& timestamp_info = schema.logicalType.TIMESTAMP;
- if (!timestamp_info.isAdjustedToUTC) {
+ if (timestamp_info.isAdjustedToUTC) {
// should set timezone to utc+0
resolved_ctz = cctz::utc_time_zone();
}
diff --git
a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
index 94de65a4979..f921b9d0d1c 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
@@ -13,9 +13,9 @@ true 6 6 6 60 6.6
60.599999999999994 7296 12/31/10 6 2010-12-31T12:06:13.650
201
true 8 8 8 80 8.8 80.8 7298 12/31/10
8 2010-12-31T12:08:13.780 2010 12
-- !5 --
-2 24 15314771 999319712124142303 true 6.009337E8
4.8177228079770208E16 \N northern rural 2022-08-30T23:21:08
407186.2849 phones int_col 2019-01-01 [2.5954339078494106e+17,
5.88165568758352e+17, 4.7802599872265741e+17, 6.9266228812515571e+17,
9.86405645575228e+17] \N phones int_col
-5 59 317349992 998913039814974432 false 5.6584864E8
9.900861328269033E17 Handling man satisfy firework descent top. Racing
closed county set-up crown cave. Correctly front duration pure. \N
2022-09-02T19:52:57 372765.2493 desktops tinyint_col
2021-10-03 [9.9832612525719834e+17, 3.6120761530306432e+17,
9.9691314965094349e+17, 8.9912907179234752e+17, 1.1955893747098878e+17]
["CrySxz", "FMXGRcaGbahSVqhp", "oRKqPmhM", "VdODasEdDWFSRIQf"] desktops
tinyint_col
-6 62 915699741 999653836472045196 true 4.51937536E8
8.7961505445021914E17 Tale get speed platform august curved. Ease grass
neighbour landlord. Baby genetic youth. \N 2022-08-07T09:30:56
875620.2176 phones smallint_col \N [9.4235407151618547e+17,
4.8332499920295616e+17, 9.1670077477898342e+17] ["zNfbLeFx",
"GNTJOmWJyRmOK", "hwvfhSQGsaaMEqUrWCK", "cQrQsROKLARA", "nONj", "oepXBFB",
"IPtUql"] phones smallint_col
+2 24 15314771 999319712124142303 true 6.009337E8
4.8177228079770208E16 \N northern rural 2022-08-30T23:21:08
407186.2849 phones int_col 2019-01-01 [2.5954339078494106e+17,
5.88165568758352e+17, 4.780259987226574e+17, 6.926622881251557e+17,
9.86405645575228e+17] \N phones int_col
+5 59 317349992 998913039814974432 false 5.6584864E8
9.900861328269033E17 Handling man satisfy firework descent top. Racing
closed county set-up crown cave. Correctly front duration pure. \N
2022-09-02T19:52:57 372765.2493 desktops tinyint_col
2021-10-03 [9.983261252571983e+17, 3.612076153030643e+17,
9.969131496509435e+17, 8.991290717923475e+17, 1.1955893747098878e+17]
["CrySxz", "FMXGRcaGbahSVqhp", "oRKqPmhM", "VdODasEdDWFSRIQf"] desktops
tinyint_col
+6 62 915699741 999653836472045196 true 4.51937536E8
8.7961505445021914E17 Tale get speed platform august curved. Ease grass
neighbour landlord. Baby genetic youth. \N 2022-08-07T09:30:56
875620.2176 phones smallint_col \N [9.423540715161855e+17,
4.8332499920295616e+17, 9.167007747789834e+17] ["zNfbLeFx", "GNTJOmWJyRmOK",
"hwvfhSQGsaaMEqUrWCK", "cQrQsROKLARA", "nONj", "oepXBFB", "IPtUql"]
phones smallint_col
-- !6 --
00cwjIryUv EXHwpeK2Nl hv2PYEMYMM eo69nyw4Yv K6797tgjFg
LlFNd8Kyy5 wkpLCO3uo1 AIXCj1MfeD ni0HxZbiUO 6IjRdM8Gqi
qsTMK6A2eC 1wu7v9OPwW qavArd9tDc sU88hZADLj lyzWlwLOCx
2022-11-25
@@ -108,30 +108,30 @@ test DATETIME(6) Yes true \N
-- !28 --
\N
-1970-01-01T07:43:20
-1970-01-01T08:00:00.001
-1970-01-01T08:00:00.003
-2023-04-20T15:51:49
+1969-12-31T23:43:20
+1970-01-01T00:00:00.001
+1970-01-01T00:00:00.003
+2023-04-20T07:51:49
-- !29 --
test DATETIME(6) Yes true \N
-- !30 --
\N
-1970-01-01T07:43:20
-1970-01-01T08:00:00.000001
-1970-01-01T08:00:00.000003
-2023-04-20T15:51:49
+1969-12-31T23:43:20
+1970-01-01T00:00:00.000001
+1970-01-01T00:00:00.000003
+2023-04-20T07:51:49
-- !31 --
test DATETIME(6) Yes true \N
-- !32 --
\N
-1970-01-01T07:59:59
-1970-01-01T08:00
-1970-01-01T08:00
-2023-04-20T15:51:49
+1969-12-31T23:59:59
+1970-01-01T00:00
+1970-01-01T00:00
+2023-04-20T07:51:49
-- !7 --
\N \N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
@@ -169,6 +169,9 @@ test DATETIME(6) Yes true \N
-- !36 --
\N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
+-- !42 --
+\N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
+
-- !41 --
\N \N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
\N \N \N \N \N \N \N \N \N \N
test test
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
[...]
diff --git
a/regression-test/data/external_table_p2/hive/test_external_catalog_hive.out
b/regression-test/data/external_table_p2/hive/test_external_catalog_hive.out
index 124239d174a..e6d67231749 100644
--- a/regression-test/data/external_table_p2/hive/test_external_catalog_hive.out
+++ b/regression-test/data/external_table_p2/hive/test_external_catalog_hive.out
@@ -162,3 +162,13 @@ Z6n2t4XA2n7CXTECJ,PE,iBbsCh0RE1Dd2A,z48
-- !par_fields_in_file_parquet5 --
+
+-- !parquet_adjusted_utc --
+1997-09-21 1999-01-12T15:12:31.235784
+1998-01-12 1993-06-11T11:33:12.356500
+2002-09-29 2001-01-17T13:23:42.120
+2008-08-07 2023-09-23T03:12:17.458
+2009-11-13 2011-11-11T17:23:06.986
+2012-07-08 2023-11-09T12:21:16.321
+2017-09-13 2009-09-20T20:23:14.309124
+2024-03-23 2024-02-01T13:11:09.170
diff --git
a/regression-test/suites/external_table_p2/hive/test_external_catalog_hive.groovy
b/regression-test/suites/external_table_p2/hive/test_external_catalog_hive.groovy
index ef2b5a32e42..264daa4b906 100644
---
a/regression-test/suites/external_table_p2/hive/test_external_catalog_hive.groovy
+++
b/regression-test/suites/external_table_p2/hive/test_external_catalog_hive.groovy
@@ -48,7 +48,7 @@ suite("test_external_catalog_hive", "p2") {
// test small table(text format)
def q01 = {
qt_q01 """ select name, count(1) as c from student group by name
order by c desc;"""
- qt_q02 """ select lo_orderkey, count(1) as c from lineorder group
by lo_orderkey order by c desc;"""
+ qt_q02 """ select lo_orderkey, count(1) as c from lineorder group
by lo_orderkey order by lo_orderkey asc;"""
qt_q03 """ select * from test1 order by col_1;"""
qt_q04 """ select * from string_table order by p_partkey desc;"""
qt_q05 """ select * from account_fund order by batchno;"""
@@ -109,6 +109,9 @@ suite("test_external_catalog_hive", "p2") {
qt_par_fields_in_file_orc5 """ select * from
multi_catalog.par_fields_in_file_orc where month = 8 and year = 2022 order by
id; """
qt_par_fields_in_file_parquet5 """ select * from
multi_catalog.par_fields_in_file_parquet where month = 8 and year = 2022 order
by id; """
+ // timestamp with isAdjustedToUTC=true
+ qt_parquet_adjusted_utc """select * from
multi_catalog.timestamp_with_time_zone order by date_col;"""
+
// test unsupported input format query
try {
sql """ select * from
multi_catalog.unsupported_input_format_empty; """
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]