This is an automated email from the ASF dual-hosted git repository. morningman pushed a commit to branch branch-1.2-lts in repository https://gitbox.apache.org/repos/asf/doris.git
commit b7682f9390984611889ffb0b14b7f17b363130e3 Author: Jibing-Li <[email protected]> AuthorDate: Thu Feb 16 15:47:23 2023 +0800 [Fix](multi catalog)Fix partition case bug (#16763) Set column names from path to lower case in case-insensitive case. This is for Iceberg columns from path. Iceberg columns are case sensitive, which may cause error for table with partitions. --- be/src/vec/exec/format/orc/vorc_reader.cpp | 6 +- .../org/apache/doris/common/util/BrokerUtil.java | 6 ++ .../iceberg/iceberg_partition_upper_case.out | 109 +++++++++++++++++++++ .../iceberg/iceberg_partition_upper_case.groovy | 82 ++++++++++++++++ 4 files changed, 201 insertions(+), 2 deletions(-) diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 5d9ae9d63c..57e0b4fc89 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -172,11 +172,13 @@ Status OrcReader::init_reader( auto& selected_type = _row_reader->getSelectedType(); _col_orc_type.resize(selected_type.getSubtypeCount()); for (int i = 0; i < selected_type.getSubtypeCount(); ++i) { - auto name = _get_field_name_lower_case(&selected_type, i); + std::string name; // For hive engine, translate the column name in orc file to schema column name. // This is for Hive 1.x which use internal column name _col0, _col1... if (_is_hive) { - name = _file_col_to_schema_col[name]; + name = _file_col_to_schema_col[selected_type.getFieldName(i)]; + } else { + name = _get_field_name_lower_case(&selected_type, i); } _colname_to_idx[name] = i; _col_orc_type[i] = selected_type.getSubtype(i); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java index 974cefbc2d..f11c5fe29d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/BrokerUtil.java @@ -119,6 +119,12 @@ public class BrokerUtil { if (columnsFromPath == null || columnsFromPath.isEmpty()) { return Collections.emptyList(); } + if (!caseSensitive) { + for (int i = 0; i < columnsFromPath.size(); i++) { + String path = columnsFromPath.remove(i); + columnsFromPath.add(i, path.toLowerCase()); + } + } String[] strings = filePath.split("/"); if (strings.length < 2) { throw new UserException("Fail to parse columnsFromPath, expected: " diff --git a/regression-test/data/external_catalog_p0/iceberg/iceberg_partition_upper_case.out b/regression-test/data/external_catalog_p0/iceberg/iceberg_partition_upper_case.out new file mode 100644 index 0000000000..e286103bbd --- /dev/null +++ b/regression-test/data/external_catalog_p0/iceberg/iceberg_partition_upper_case.out @@ -0,0 +1,109 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !orcupper1 -- +1 k2_1 k3_1 Beijing +2 k2_2 k3_2 Beijing +3 k2_3 k3_3 Shanghai +4 k2_4 k3_4 Shanghai + +-- !orcupper2 -- +1 Beijing +2 Beijing +3 Shanghai +4 Shanghai + +-- !orcupper3 -- +1 k2_1 +2 k2_2 +3 k2_3 +4 k2_4 + +-- !orcupper4 -- +Beijing +Beijing +Shanghai +Shanghai + +-- !orcupper5 -- +2 k2_2 k3_2 Beijing + +-- !orclower1 -- +1 k2_1 k3_1 Beijing +2 k2_2 k3_2 Beijing +3 k2_3 k3_3 Shanghai +4 k2_4 k3_4 Shanghai + +-- !orclower1 -- +1 Beijing +2 Beijing +3 Shanghai +4 Shanghai + +-- !orclower1 -- +1 k2_1 +2 k2_2 +3 k2_3 +4 k2_4 + +-- !orclower1 -- +Beijing +Beijing +Shanghai +Shanghai + +-- !orclower1 -- +2 k2_2 k3_2 Beijing + +-- !parquetupper1 -- +1 k2_1 k3_1 Beijing +2 k2_2 k3_2 Beijing +3 k2_3 k3_3 Shanghai +4 k2_4 k3_4 Shanghai + +-- !parquetupper2 -- +1 Beijing +2 Beijing +3 Shanghai +4 Shanghai + +-- !parquetupper3 -- +1 k2_1 +2 k2_2 +3 k2_3 +4 k2_4 + +-- !parquetupper4 -- +Beijing +Beijing +Shanghai +Shanghai + +-- !parquetupper5 -- +2 k2_2 k3_2 Beijing + +-- !parquetlower1 -- +1 k2_1 k3_1 Beijing +2 k2_2 k3_2 Beijing +3 k2_3 k3_3 Shanghai +4 k2_4 k3_4 Shanghai + +-- !parquetlower2 -- +1 Beijing +2 Beijing +3 Shanghai +4 Shanghai + +-- !parquetlower3 -- +1 k2_1 +2 k2_2 +3 k2_3 +4 k2_4 + +-- !parquetlower4 -- +Beijing +Beijing +Shanghai +Shanghai + +-- !parquetlower5 -- +2 k2_2 k3_2 Beijing + diff --git a/regression-test/suites/external_catalog_p0/iceberg/iceberg_partition_upper_case.groovy b/regression-test/suites/external_catalog_p0/iceberg/iceberg_partition_upper_case.groovy new file mode 100644 index 0000000000..eaa1025972 --- /dev/null +++ b/regression-test/suites/external_catalog_p0/iceberg/iceberg_partition_upper_case.groovy @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("iceberg_partition_upper_case", "p0") { + def orc_upper1 = """select * from iceberg_partition_upper_case_orc order by k1;""" + def orc_upper2 = """select k1, city from iceberg_partition_upper_case_orc order by k1;""" + def orc_upper3 = """select k1, k2 from iceberg_partition_upper_case_orc order by k1;""" + def orc_upper4 = """select city from iceberg_partition_upper_case_orc order by city;""" + def orc_upper5 = """select * from iceberg_partition_upper_case_orc where k1>1 and city='Beijing' order by k1;""" + + def orc_lower1 = """select * from iceberg_partition_lower_case_orc order by k1;""" + def orc_lower2 = """select k1, city from iceberg_partition_lower_case_orc order by k1;""" + def orc_lower3 = """select k1, k2 from iceberg_partition_lower_case_orc order by k1;""" + def orc_lower4 = """select city from iceberg_partition_lower_case_orc order by city;""" + def orc_lower5 = """select * from iceberg_partition_lower_case_orc where k1>1 and city='Beijing' order by k1;""" + + def parquet_upper1 = """select * from iceberg_partition_upper_case_parquet order by k1;""" + def parquet_upper2 = """select k1, city from iceberg_partition_upper_case_parquet order by k1;""" + def parquet_upper3 = """select k1, k2 from iceberg_partition_upper_case_parquet order by k1;""" + def parquet_upper4 = """select city from iceberg_partition_upper_case_parquet order by city;""" + def parquet_upper5 = """select * from iceberg_partition_upper_case_parquet where k1>1 and city='Beijing' order by k1;""" + + def parquet_lower1 = """select * from iceberg_partition_lower_case_parquet order by k1;""" + def parquet_lower2 = """select k1, city from iceberg_partition_lower_case_parquet order by k1;""" + def parquet_lower3 = """select k1, k2 from iceberg_partition_lower_case_parquet order by k1;""" + def parquet_lower4 = """select city from iceberg_partition_lower_case_parquet order by city;""" + def parquet_lower5 = """select * from iceberg_partition_lower_case_parquet where k1>1 and city='Beijing' order by k1;""" + + String enabled = context.config.otherConfigs.get("enableExternalHiveTest") + if (enabled != null && enabled.equalsIgnoreCase("true")) { + String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost") + String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort") + String catalog_name = "iceberg_partition" + sql """drop catalog if exists ${catalog_name};""" + sql """ + create catalog if not exists ${catalog_name} properties ( + 'type'='hms', + 'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}' + ); + """ + logger.info("catalog " + catalog_name + " created") + sql """switch ${catalog_name};""" + logger.info("switched to catalog " + catalog_name) + sql """use multi_catalog;""" + qt_orcupper1 orc_upper1 + qt_orcupper2 orc_upper2 + qt_orcupper3 orc_upper3 + qt_orcupper4 orc_upper4 + qt_orcupper5 orc_upper5 + qt_orclower1 orc_lower1 + qt_orclower1 orc_lower2 + qt_orclower1 orc_lower3 + qt_orclower1 orc_lower4 + qt_orclower1 orc_lower5 + qt_parquetupper1 parquet_upper1 + qt_parquetupper2 parquet_upper2 + qt_parquetupper3 parquet_upper3 + qt_parquetupper4 parquet_upper4 + qt_parquetupper5 parquet_upper5 + qt_parquetlower1 parquet_lower1 + qt_parquetlower2 parquet_lower2 + qt_parquetlower3 parquet_lower3 + qt_parquetlower4 parquet_lower4 + qt_parquetlower5 parquet_lower5 + + } +} + --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
