This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/master by this push:
     new 489171e4c1 [Fix](multi catalog)Fix hive partition value contains 
special character such as / bug (#21876)
489171e4c1 is described below

commit 489171e4c180ef603ad95eea8b4bc49f042e177b
Author: Jibing-Li <[email protected]>
AuthorDate: Tue Jul 18 11:20:38 2023 +0800

    [Fix](multi catalog)Fix hive partition value contains special character 
such as / bug (#21876)
    
    Hive escapes some special characters in partition value to %XX, for 
example, / is escaped to %2F.
    Doris didn't handle this case which will cause doris failed to list the 
files under partition with special characters.
    This pr is to fix this bug.
---
 .../doris/datasource/hive/HiveMetaStoreCache.java  |  4 +-
 .../hive/test_hive_special_char_partition.out      | 51 ++++++++++++++++++++++
 .../hive/test_hive_special_char_partition.groovy   | 51 ++++++++++++++++++++++
 3 files changed, 105 insertions(+), 1 deletion(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 51653afc68..97a38b9864 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -74,6 +74,7 @@ import org.apache.hadoop.hdfs.HdfsConfiguration;
 import org.apache.hadoop.hive.common.ValidWriteIdList;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.metastore.utils.FileUtils;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.mapred.FileInputFormat;
 import org.apache.hadoop.mapred.InputFormat;
@@ -339,7 +340,8 @@ public class HiveMetaStoreCache {
             for (int i = 0; i < partitionColumns.size(); i++) {
                 sb.append(partitionColumns.get(i).getName());
                 sb.append("=");
-                sb.append(key.getValues().get(i));
+                // Partition value may contain special character, like / and 
so on. Need to encode.
+                sb.append(FileUtils.escapePathName(key.getValues().get(i)));
                 sb.append("/");
             }
             sb.delete(sb.length() - 1, sb.length());
diff --git 
a/regression-test/data/external_table_emr_p2/hive/test_hive_special_char_partition.out
 
b/regression-test/data/external_table_emr_p2/hive/test_hive_special_char_partition.out
new file mode 100644
index 0000000000..0bd26b1276
--- /dev/null
+++ 
b/regression-test/data/external_table_emr_p2/hive/test_hive_special_char_partition.out
@@ -0,0 +1,51 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !1 --
+name#  2023#01#01
+name1  2023/01/01
+name10 2023<01><01>
+name11 2023\\01\\01
+name12 2023.01.01
+name2  2023 01 01
+name3  2023:01:01
+name4  2023?01?01
+name5  2023=01=01
+name6  2023%01%01
+name8  2023"01"01
+name9  2023'01'01
+
+-- !2 --
+name2
+
+-- !3 --
+name1
+
+-- !4 --
+name4  2023?01?01
+
+-- !5 --
+name12 2023.01.01
+
+-- !6 --
+name10 2023<01><01>
+
+-- !7 --
+name3  2023:01:01
+
+-- !8 --
+name5  2023=01=01
+
+-- !9 --
+name8  2023"01"01
+
+-- !10 --
+name9  2023'01'01
+
+-- !11 --
+name11 2023\\01\\01
+
+-- !12 --
+name6  2023%01%01
+
+-- !13 --
+name#  2023#01#01
+
diff --git 
a/regression-test/suites/external_table_emr_p2/hive/test_hive_special_char_partition.groovy
 
b/regression-test/suites/external_table_emr_p2/hive/test_hive_special_char_partition.groovy
new file mode 100644
index 0000000000..cb862469f6
--- /dev/null
+++ 
b/regression-test/suites/external_table_emr_p2/hive/test_hive_special_char_partition.groovy
@@ -0,0 +1,51 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_special_char_partition", "p2") {
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String extHiveHmsHost = 
context.config.otherConfigs.get("extHiveHmsHost")
+        String extHiveHmsPort = 
context.config.otherConfigs.get("extHiveHmsPort")
+        String catalog_name = "test_hive_special_char_partition"
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hadoop.username' = 'hadoop',
+                'hive.metastore.uris' = 
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+            );
+        """
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        sql """use multi_catalog;"""
+        qt_1 "select * from special_character_1_partition order by name"
+        qt_2 "select name from special_character_1_partition where part='2023 
01 01'"
+        qt_3 "select name from special_character_1_partition where 
part='2023/01/01'"
+        qt_4 "select * from special_character_1_partition where 
part='2023?01?01'"
+        qt_5 "select * from special_character_1_partition where 
part='2023.01.01'"
+        qt_6 "select * from special_character_1_partition where 
part='2023<01><01>'"
+        qt_7 "select * from special_character_1_partition where 
part='2023:01:01'"
+        qt_8 "select * from special_character_1_partition where 
part='2023=01=01'"
+        qt_9 "select * from special_character_1_partition where 
part='2023\"01\"01'"
+        qt_10 "select * from special_character_1_partition where 
part='2023\\'01\\'01'"
+        qt_11 "select * from special_character_1_partition where 
part='2023\\\\01\\\\01'"
+        qt_12 "select * from special_character_1_partition where 
part='2023%01%01'"
+        qt_13 "select * from special_character_1_partition where 
part='2023#01#01'"
+    }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to