This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 296993777bc [fix](parquet) fix time zone error(isAdjustedToUTC=true) 
in parquet reader (#33743)
296993777bc is described below

commit 296993777bcb62c2f4ab899b4638544fa80e4a7d
Author: Ashin Gau <[email protected]>
AuthorDate: Wed Apr 17 12:38:38 2024 +0800

    [fix](parquet) fix time zone error(isAdjustedToUTC=true) in parquet reader 
(#33743)
    
    This reverts commit caba7e1bfdafef875355a36bae386e805b68f4ab.
    backport: #33675
---
 be/src/vec/exec/format/parquet/decoder.cpp         |  7 ++++++-
 be/src/vec/exec/format/parquet/decoder.h           |  4 ++--
 be/src/vec/exec/format/parquet/parquet_pred_cmp.h  |  2 +-
 .../hive/test_hive_basic_type.out                  | 24 +++++++++++-----------
 .../hive/test_external_catalog_hive.out            | 10 ---------
 .../hive/test_external_catalog_hive.groovy         |  5 +----
 6 files changed, 22 insertions(+), 30 deletions(-)

diff --git a/be/src/vec/exec/format/parquet/decoder.cpp 
b/be/src/vec/exec/format/parquet/decoder.cpp
index f4ecd3e7862..4c8c3f9eab5 100644
--- a/be/src/vec/exec/format/parquet/decoder.cpp
+++ b/be/src/vec/exec/format/parquet/decoder.cpp
@@ -152,8 +152,13 @@ void Decoder::init(FieldSchema* field_schema, 
cctz::time_zone* ctz) {
     const auto& schema = field_schema->parquet_schema;
     if (schema.__isset.logicalType && schema.logicalType.__isset.TIMESTAMP) {
         const auto& timestamp_info = schema.logicalType.TIMESTAMP;
-        if (timestamp_info.isAdjustedToUTC) {
+        if (!timestamp_info.isAdjustedToUTC) {
             // should set timezone to utc+0
+            // Reference: 
https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#instant-semantics-timestamps-normalized-to-utc
+            // If isAdjustedToUTC = false, the reader should display the same 
value no mater what local time zone is. For example:
+            // When a timestamp is stored as `1970-01-03 12:00:00`,
+            // if isAdjustedToUTC = true, UTC8 should read as `1970-01-03 
20:00:00`, UTC6 should read as `1970-01-03 18:00:00`
+            // if isAdjustedToUTC = false, UTC8 and UTC6 should read as 
`1970-01-03 12:00:00`, which is the same as `1970-01-03 12:00:00` in UTC0
             _decode_params->ctz = 
const_cast<cctz::time_zone*>(&_decode_params->utc0);
         }
         const auto& time_unit = timestamp_info.unit;
diff --git a/be/src/vec/exec/format/parquet/decoder.h 
b/be/src/vec/exec/format/parquet/decoder.h
index aee4a4d4de2..f3da9429dda 100644
--- a/be/src/vec/exec/format/parquet/decoder.h
+++ b/be/src/vec/exec/format/parquet/decoder.h
@@ -67,9 +67,9 @@ namespace doris::vectorized {
     M(TypeIndex::Float64, Float64, Float64)
 
 struct DecodeParams {
-    // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true
+    // schema.logicalType.TIMESTAMP.isAdjustedToUTC == false
     static const cctz::time_zone utc0;
-    // schema.logicalType.TIMESTAMP.isAdjustedToUTC == false, we should set 
local time zone
+    // schema.logicalType.TIMESTAMP.isAdjustedToUTC == true, we should set the 
time zone
     cctz::time_zone* ctz = nullptr;
     int32_t offset_days = 0;
     int64_t second_mask = 1;
diff --git a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h 
b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
index b9c909e7f2f..b993370a159 100644
--- a/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
+++ b/be/src/vec/exec/format/parquet/parquet_pred_cmp.h
@@ -277,7 +277,7 @@ private:
                 const auto& schema = col_schema->parquet_schema;
                 if (schema.__isset.logicalType && 
schema.logicalType.__isset.TIMESTAMP) {
                     const auto& timestamp_info = schema.logicalType.TIMESTAMP;
-                    if (timestamp_info.isAdjustedToUTC) {
+                    if (!timestamp_info.isAdjustedToUTC) {
                         // should set timezone to utc+0
                         resolved_ctz = cctz::utc_time_zone();
                     }
diff --git 
a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out 
b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
index 11ca0ab612b..94de65a4979 100644
--- a/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
+++ b/regression-test/data/external_table_p0/hive/test_hive_basic_type.out
@@ -108,30 +108,30 @@ test      DATETIME(6)     Yes     true    \N
 
 -- !28 --
 \N
-1969-12-31T23:43:20
-1970-01-01T00:00:00.001
-1970-01-01T00:00:00.003
-2023-04-20T07:51:49
+1970-01-01T07:43:20
+1970-01-01T08:00:00.001
+1970-01-01T08:00:00.003
+2023-04-20T15:51:49
 
 -- !29 --
 test   DATETIME(6)     Yes     true    \N      
 
 -- !30 --
 \N
-1969-12-31T23:43:20
-1970-01-01T00:00:00.000001
-1970-01-01T00:00:00.000003
-2023-04-20T07:51:49
+1970-01-01T07:43:20
+1970-01-01T08:00:00.000001
+1970-01-01T08:00:00.000003
+2023-04-20T15:51:49
 
 -- !31 --
 test   DATETIME(6)     Yes     true    \N      
 
 -- !32 --
 \N
-1969-12-31T23:59:59
-1970-01-01T00:00
-1970-01-01T00:00
-2023-04-20T07:51:49
+1970-01-01T07:59:59
+1970-01-01T08:00
+1970-01-01T08:00
+2023-04-20T15:51:49
 
 -- !7 --
 \N     \N      \N      \N      \N      \N      \N      \N      \N      \N      
        test            test    
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
 [...]
diff --git 
a/regression-test/data/external_table_p2/hive/test_external_catalog_hive.out 
b/regression-test/data/external_table_p2/hive/test_external_catalog_hive.out
index e6d67231749..124239d174a 100644
--- a/regression-test/data/external_table_p2/hive/test_external_catalog_hive.out
+++ b/regression-test/data/external_table_p2/hive/test_external_catalog_hive.out
@@ -162,13 +162,3 @@ Z6n2t4XA2n7CXTECJ,PE,iBbsCh0RE1Dd2A,z48
 
 -- !par_fields_in_file_parquet5 --
 
-
--- !parquet_adjusted_utc --
-1997-09-21      1999-01-12T15:12:31.235784
-1998-01-12      1993-06-11T11:33:12.356500
-2002-09-29      2001-01-17T13:23:42.120
-2008-08-07      2023-09-23T03:12:17.458
-2009-11-13      2011-11-11T17:23:06.986
-2012-07-08      2023-11-09T12:21:16.321
-2017-09-13      2009-09-20T20:23:14.309124
-2024-03-23      2024-02-01T13:11:09.170
diff --git 
a/regression-test/suites/external_table_p2/hive/test_external_catalog_hive.groovy
 
b/regression-test/suites/external_table_p2/hive/test_external_catalog_hive.groovy
index 264daa4b906..ef2b5a32e42 100644
--- 
a/regression-test/suites/external_table_p2/hive/test_external_catalog_hive.groovy
+++ 
b/regression-test/suites/external_table_p2/hive/test_external_catalog_hive.groovy
@@ -48,7 +48,7 @@ suite("test_external_catalog_hive", "p2") {
         // test small table(text format)
         def q01 = {
             qt_q01 """ select name, count(1) as c from student group by name 
order by c desc;"""
-            qt_q02 """ select lo_orderkey, count(1) as c from lineorder group 
by lo_orderkey order by lo_orderkey asc;"""
+            qt_q02 """ select lo_orderkey, count(1) as c from lineorder group 
by lo_orderkey order by c desc;"""
             qt_q03 """ select * from test1 order by col_1;"""
             qt_q04 """ select * from string_table order by p_partkey desc;"""
             qt_q05 """ select * from account_fund order by batchno;"""
@@ -109,9 +109,6 @@ suite("test_external_catalog_hive", "p2") {
         qt_par_fields_in_file_orc5 """ select * from 
multi_catalog.par_fields_in_file_orc where month = 8 and year = 2022 order by 
id; """
         qt_par_fields_in_file_parquet5 """ select * from 
multi_catalog.par_fields_in_file_parquet where month = 8 and year = 2022 order 
by id; """
 
-        // timestamp with isAdjustedToUTC=true
-        qt_parquet_adjusted_utc """select * from 
multi_catalog.timestamp_with_time_zone order by date_col;"""
-
         // test unsupported input format query
         try {
             sql """ select * from 
multi_catalog.unsupported_input_format_empty; """


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to