This is an automated email from the ASF dual-hosted git repository.

yiguolei pushed a commit to branch branch-2.1
in repository https://gitbox.apache.org/repos/asf/doris.git

commit 8308bc96b93205f2a376ca3d427f923322d8f49e
Author: wuwenchi <[email protected]>
AuthorDate: Tue Jan 23 10:42:42 2024 +0800

    [fix](paimon)set timestamp's scale for parquet which has no logical type 
(#30119)
---
 .../exec/format/parquet/parquet_column_convert.h   | 28 +++++++++++
 .../paimon/paimon_timestamp_types.out              | 13 +++++
 .../paimon/paimon_timestamp_types.groovy           | 55 ++++++++++++++++++++++
 3 files changed, 96 insertions(+)

diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h 
b/be/src/vec/exec/format/parquet/parquet_column_convert.h
index b5c3ffb7c88..39ee29f663f 100644
--- a/be/src/vec/exec/format/parquet/parquet_column_convert.h
+++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h
@@ -127,6 +127,30 @@ struct ConvertParams {
     DecimalScaleParams decimal_scale;
     FieldSchema* field_schema = nullptr;
 
+    /**
+     * Some frameworks like paimon maybe writes non-standard parquet files. 
Timestamp field doesn't have
+     * logicalType or converted_type to indicates its precision. We have to 
reset the time mask.
+     */
+    void reset_time_scale_if_missing(int scale) {
+        const auto& schema = field_schema->parquet_schema;
+        if (!schema.__isset.logicalType && !schema.__isset.converted_type) {
+            int ts_scale = 9;
+            if (scale <= 3) {
+                ts_scale = 3;
+            } else if (scale <= 6) {
+                ts_scale = 6;
+            }
+            second_mask = common::exp10_i64(ts_scale);
+            scale_to_nano_factor = common::exp10_i64(9 - ts_scale);
+
+            // The missing parque metadata makes it impossible for us to know 
the time zone information,
+            // so we default to UTC here.
+            if (ctz == nullptr) {
+                ctz = const_cast<cctz::time_zone*>(&utc0);
+            }
+        }
+    }
+
     void init(FieldSchema* field_schema_, cctz::time_zone* ctz_) {
         field_schema = field_schema_;
         if (ctz_ != nullptr) {
@@ -671,8 +695,12 @@ inline Status get_converter(tparquet::Type::type 
parquet_physical_type, Primitiv
         break;
     case TypeIndex::DateTimeV2:
         if (tparquet::Type::INT96 == parquet_physical_type) {
+            // int96 only stores nanoseconds in standard parquet file
+            convert_params->reset_time_scale_if_missing(9);
             *converter = std::make_unique<Int96toTimestamp>();
         } else if (tparquet::Type::INT64 == parquet_physical_type) {
+            convert_params->reset_time_scale_if_missing(
+                    remove_nullable(dst_data_type)->get_scale());
             *converter = std::make_unique<Int64ToTimestamp>();
         }
         break;
diff --git 
a/regression-test/data/external_table_p2/paimon/paimon_timestamp_types.out 
b/regression-test/data/external_table_p2/paimon/paimon_timestamp_types.out
new file mode 100644
index 00000000000..641424b160e
--- /dev/null
+++ b/regression-test/data/external_table_p2/paimon/paimon_timestamp_types.out
@@ -0,0 +1,13 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !c1 --
+1      5432-08-30T05:43:21.100 5432-08-30T05:43:21.120 5432-08-30T05:43:21.123 
5432-08-30T05:43:21.123400      5432-08-30T05:43:21.123450      
5432-08-30T05:43:21.123456      5432-08-30T05:43:21.123456      
5432-08-30T05:43:21.123456      5432-08-30T05:43:21.123456
+
+-- !c2 --
+1      5432-08-30T05:43:21.100 5432-08-30T05:43:21.120 5432-08-30T05:43:21.123 
5432-08-30T05:43:21.123400      5432-08-30T05:43:21.123450      
5432-08-30T05:43:21.123456      5432-08-30T05:43:21.123456      
5432-08-30T05:43:21.123456      5432-08-30T05:43:21.123456
+
+-- !c3 --
+1      5432-08-30T05:43:21.100 5432-08-30T05:43:21.120 5432-08-30T05:43:21.123 
5432-08-30T05:43:21.123400      5432-08-30T05:43:21.123450      
5432-08-30T05:43:21.123456      5432-08-30T05:43:21.123456      
5432-08-30T05:43:21.123456      5432-08-30T05:43:21.123456
+
+-- !c4 --
+1      5432-08-30T05:43:21.100 5432-08-30T05:43:21.120 5432-08-30T05:43:21.123 
5432-08-30T05:43:21.123400      5432-08-30T05:43:21.123450      
5432-08-30T05:43:21.123456      5432-08-30T05:43:21.123456      
5432-08-30T05:43:21.123456      5432-08-30T05:43:21.123456
+
diff --git 
a/regression-test/suites/external_table_p2/paimon/paimon_timestamp_types.groovy 
b/regression-test/suites/external_table_p2/paimon/paimon_timestamp_types.groovy
new file mode 100644
index 00000000000..dbb1f1d038c
--- /dev/null
+++ 
b/regression-test/suites/external_table_p2/paimon/paimon_timestamp_types.groovy
@@ -0,0 +1,55 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("paimon_timestamp_types", 
"p2,external,paimon,external_remote,external_remote_paimon") {
+
+    def ts_orc = """select * from ts_orc"""
+    def ts_parquet = """select * from ts_parquet"""
+
+    String enabled = 
context.config.otherConfigs.get("enableExternalPaimonTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String catalog_name = "paimon_timestamp_catalog"
+        String user_name = context.config.otherConfigs.get("extHiveHmsUser")
+        String hiveHost = context.config.otherConfigs.get("extHiveHmsHost")
+        String hivePort = context.config.otherConfigs.get("extHdfsPort")
+
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                "type" = "paimon",
+                "paimon.catalog.type" = "filesystem",
+                "warehouse" = "hdfs://${hiveHost}/${hivePort}/paimon/paimon1",
+                "hadoop.username" = "${user_name}"
+            );
+        """
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        sql """use db1;"""
+        logger.info("use db1")
+
+        sql """set force_jni_scanner=true"""
+        qt_c1 ts_orc
+        qt_c2 ts_parquet
+
+        sql """set force_jni_scanner=false"""
+        qt_c3 ts_orc
+        qt_c4 ts_parquet
+
+    }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to