This is an automated email from the ASF dual-hosted git repository.

morningman pushed a commit to branch branch-1.2-lts
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-1.2-lts by this push:
     new 2a85d74f60 [Fix](multi catalog)Fix Hive partition path doesn't contain 
partition value case bug (#20283)
2a85d74f60 is described below

commit 2a85d74f60c8d0f013114aae8bd3f8903387f874
Author: Jibing-Li <[email protected]>
AuthorDate: Thu Jun 1 00:30:13 2023 +0800

    [Fix](multi catalog)Fix Hive partition path doesn't contain partition value 
case bug (#20283)
    
    Hive support create partition with a specific location. In this case, the 
file path for the create partition may not contain the partition name and 
value. Which will cause Doris fail to query the the hive partition.
    This pr is to fix this bug.
    
    cherry-pick #19053
---
 .../doris/datasource/hive/HiveMetaStoreCache.java  | 61 +++++++++++++++++---
 .../apache/doris/planner/external/HiveSplit.java   |  9 ++-
 .../doris/planner/external/QueryScanProvider.java  |  8 ++-
 .../planner/external/iceberg/IcebergSplit.java     |  2 +-
 .../hive/test_hive_partition_location.out          | 41 ++++++++++++++
 .../hive/test_hive_partition_location.groovy       | 65 ++++++++++++++++++++++
 6 files changed, 174 insertions(+), 12 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 42391636d7..fee8394078 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -36,6 +36,7 @@ import org.apache.doris.metric.MetricRepo;
 import org.apache.doris.planner.ColumnBound;
 import org.apache.doris.planner.ListPartitionPrunerV2;
 import org.apache.doris.planner.PartitionPrunerV2Base.UniqueId;
+import org.apache.doris.planner.external.HiveSplit;
 
 import com.google.common.base.Preconditions;
 import com.google.common.cache.CacheBuilder;
@@ -54,6 +55,7 @@ import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.mapred.FileInputFormat;
+import org.apache.hadoop.mapred.FileSplit;
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.InputSplit;
 import org.apache.hadoop.mapred.JobConf;
@@ -249,6 +251,7 @@ public class HiveMetaStoreCache {
             FileInputFormat.setInputPaths(jobConf, finalLocation);
             try {
                 InputFormat<?, ?> inputFormat = 
HiveUtil.getInputFormat(jobConf, key.inputFormat, false);
+                HiveSplit[] hiveSplits;
                 InputSplit[] splits;
                 String remoteUser = jobConf.get(HdfsResource.HADOOP_USER_NAME);
                 if (!Strings.isNullOrEmpty(remoteUser)) {
@@ -261,7 +264,31 @@ public class HiveMetaStoreCache {
                 if (LOG.isDebugEnabled()) {
                     LOG.debug("load #{} files for {} in catalog {}", 
splits.length, key, catalog.getName());
                 }
-                return ImmutableList.copyOf(splits);
+                if (splits == null) {
+                    LOG.warn("Splits for location {} is null", finalLocation);
+                    return ImmutableList.copyOf(new HiveSplit[0]);
+                }
+                hiveSplits = new HiveSplit[splits.length];
+                List<String> pValues;
+                // handle default hive partition case, replace the default 
partition value with null_string.
+                if (key.hasDefaultPartitionValue) {
+                    pValues = Lists.newArrayList();
+                    for (String value : key.partitionValues) {
+                        if (HIVE_DEFAULT_PARTITION.equals(value)) {
+                            pValues.add(FeConstants.null_string);
+                        } else {
+                            pValues.add(value);
+                        }
+                    }
+                } else {
+                    pValues = key.partitionValues;
+                }
+                for (int i = 0; i < splits.length; i++) {
+                    FileSplit fileSplit = (FileSplit) splits[i];
+                    hiveSplits[i] = new HiveSplit(fileSplit.getPath(), 
fileSplit.getStart(), fileSplit.getLength(),
+                        fileSplit.getLength(), null, pValues);
+                }
+                return ImmutableList.copyOf(hiveSplits);
             } catch (Exception e) {
                 throw new CacheException("failed to get input splits for %s in 
catalog %s", e, key, catalog.getName());
             }
@@ -312,7 +339,8 @@ public class HiveMetaStoreCache {
     public List<InputSplit> getFilesByPartitions(List<HivePartition> 
partitions) {
         long start = System.currentTimeMillis();
         List<FileCacheKey> keys = 
Lists.newArrayListWithExpectedSize(partitions.size());
-        partitions.stream().forEach(p -> keys.add(new 
FileCacheKey(p.getPath(), p.getInputFormat())));
+        partitions.stream().forEach(p -> keys.add(
+            new FileCacheKey(p.getPath(), p.getInputFormat(), 
p.getPartitionValues())));
 
         Stream<FileCacheKey> stream;
         if (partitions.size() < MIN_BATCH_FETCH_PARTITION_NUM) {
@@ -367,7 +395,7 @@ public class HiveMetaStoreCache {
                 PartitionCacheKey partKey = new PartitionCacheKey(dbName, 
tblName, values);
                 HivePartition partition = partitionCache.getIfPresent(partKey);
                 if (partition != null) {
-                    fileCache.invalidate(new FileCacheKey(partition.getPath(), 
null));
+                    fileCache.invalidate(new FileCacheKey(partition.getPath(), 
null, partition.getPartitionValues()));
                     partitionCache.invalidate(partKey);
                 }
             }
@@ -385,7 +413,7 @@ public class HiveMetaStoreCache {
             Table table = catalog.getClient().getTable(dbName, tblName);
             // we just need to assign the `location` filed because the 
`equals` method of `FileCacheKey`
             // just compares the value of `location`
-            fileCache.invalidate(new FileCacheKey(table.getSd().getLocation(), 
null));
+            fileCache.invalidate(new FileCacheKey(table.getSd().getLocation(), 
null, null));
         }
     }
 
@@ -398,7 +426,7 @@ public class HiveMetaStoreCache {
             PartitionCacheKey partKey = new PartitionCacheKey(dbName, tblName, 
values);
             HivePartition partition = partitionCache.getIfPresent(partKey);
             if (partition != null) {
-                fileCache.invalidate(new FileCacheKey(partition.getPath(), 
null));
+                fileCache.invalidate(new FileCacheKey(partition.getPath(), 
null, partition.getPartitionValues()));
                 partitionCache.invalidate(partKey);
             }
         }
@@ -616,10 +644,24 @@ public class HiveMetaStoreCache {
         private String location;
         // not in key
         private String inputFormat;
-
-        public FileCacheKey(String location, String inputFormat) {
+        // The values of partitions.
+        // e.g for file : hdfs://path/to/table/part1=a/part2=b/datafile
+        // partitionValues would be ["part1", "part2"]
+        protected List<String> partitionValues;
+        // Set to true if the partition values include a 
HIVE_DEFAULT_PARTITION.
+        private boolean hasDefaultPartitionValue = false;
+
+        public FileCacheKey(String location, String inputFormat, List<String> 
partitionValues) {
             this.location = location;
             this.inputFormat = inputFormat;
+            this.partitionValues = partitionValues == null ? 
Lists.newArrayList() : partitionValues;
+            // Set hasDefaultPartitionValue to true if partition values 
include default partition.
+            for (String value : this.partitionValues) {
+                if (HIVE_DEFAULT_PARTITION.equals(value)) {
+                    hasDefaultPartitionValue = true;
+                    break;
+                }
+            }
         }
 
         @Override
@@ -630,12 +672,13 @@ public class HiveMetaStoreCache {
             if (!(obj instanceof FileCacheKey)) {
                 return false;
             }
-            return location.equals(((FileCacheKey) obj).location);
+            return location.equals(((FileCacheKey) obj).location)
+                && partitionValues.equals(((FileCacheKey) 
obj).partitionValues);
         }
 
         @Override
         public int hashCode() {
-            return Objects.hash(location);
+            return Objects.hash(location, partitionValues);
         }
 
         @Override
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java
index 76fc3b02fd..1e8aeb4e98 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java
@@ -21,13 +21,20 @@ import lombok.Data;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapred.FileSplit;
 
+import java.util.List;
+
 @Data
 public class HiveSplit extends FileSplit {
     private long fileSize;
+    // The values of partitions.
+    // e.g for file : hdfs://path/to/table/part1=a/part2=b/datafile
+    // partitionValues would be ["part1", "part2"]
+    protected List<String> partitionValues;
 
-    public HiveSplit(Path file, long start, long length, long fileSize, 
String[] hosts) {
+    public HiveSplit(Path file, long start, long length, long fileSize, 
String[] hosts, List<String> partitionValues) {
         super(file, start, length, hosts);
         this.fileSize = fileSize;
+        this.partitionValues = partitionValues;
     }
 
     protected TableFormatType tableFormatType;
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java
 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java
index a208321cc2..a5bb274f6d 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/QueryScanProvider.java
@@ -113,8 +113,14 @@ public abstract class QueryScanProvider implements 
FileScanProviderIf {
             for (InputSplit split : inputSplits) {
                 FileSplit fileSplit = (FileSplit) split;
                 List<String> pathPartitionKeys = getPathPartitionKeys();
-                List<String> partitionValuesFromPath = 
BrokerUtil.parseColumnsFromPath(fileSplit.getPath().toString(),
+                List<String> partitionValuesFromPath;
+                // For hive split, use the partition value from metastore 
first.
+                if (fileSplit instanceof HiveSplit && ((HiveSplit) 
fileSplit).partitionValues != null) {
+                    partitionValuesFromPath = ((HiveSplit) 
fileSplit).partitionValues;
+                } else {
+                    partitionValuesFromPath = 
BrokerUtil.parseColumnsFromPath(fileSplit.getPath().toString(),
                         pathPartitionKeys, false);
+                }
 
                 TFileRangeDesc rangeDesc = createFileRangeDesc(fileSplit, 
partitionValuesFromPath, pathPartitionKeys,
                         locationType);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java
 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java
index a7cd320773..bccd39e738 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/iceberg/IcebergSplit.java
@@ -28,7 +28,7 @@ import java.util.List;
 @Data
 public class IcebergSplit extends HiveSplit {
     public IcebergSplit(Path file, long start, long length, long fileSize, 
String[] hosts) {
-        super(file, start, length, fileSize, hosts);
+        super(file, start, length, fileSize, hosts, null);
     }
 
     private Analyzer analyzer;
diff --git 
a/regression-test/data/external_table_emr_p2/hive/test_hive_partition_location.out
 
b/regression-test/data/external_table_emr_p2/hive/test_hive_partition_location.out
new file mode 100644
index 0000000000..15d4e8f232
--- /dev/null
+++ 
b/regression-test/data/external_table_emr_p2/hive/test_hive_partition_location.out
@@ -0,0 +1,41 @@
+-- This file is automatically generated. You should know what you did if you 
want to edit this
+-- !one_partition1 --
+1      Zhangsan        part1
+2      Lisi    part2
+
+-- !one_partition2 --
+1      Zhangsan        part1
+
+-- !one_partition3 --
+2      Lisi    part2
+
+-- !one_partition4 --
+part1
+
+-- !one_partition5 --
+part2
+
+-- !one_partition6 --
+part1
+part2
+
+-- !two_partition1 --
+1      Zhangsan        part1_1 part2_1
+2      Lisi    part1_2 part2_2
+
+-- !two_partition2 --
+1      Zhangsan        part1_1 part2_1
+
+-- !two_partition3 --
+1      Zhangsan        part1_1 part2_1
+
+-- !two_partition4 --
+2      Lisi    part1_2 part2_2
+
+-- !two_partition5 --
+2      Lisi    part1_2 part2_2
+
+-- !two_partition6 --
+part1_1        part2_1
+part1_2        part2_2
+
diff --git 
a/regression-test/suites/external_table_emr_p2/hive/test_hive_partition_location.groovy
 
b/regression-test/suites/external_table_emr_p2/hive/test_hive_partition_location.groovy
new file mode 100644
index 0000000000..19bfbf36f1
--- /dev/null
+++ 
b/regression-test/suites/external_table_emr_p2/hive/test_hive_partition_location.groovy
@@ -0,0 +1,65 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+suite("test_hive_partition_location", "p2") {
+    def one_partition1 = """select * from partition_location_1 order by id;"""
+    def one_partition2 = """select * from partition_location_1 where 
part='part1';"""
+    def one_partition3 = """select * from partition_location_1 where 
part='part2';"""
+    def one_partition4 = """select part from partition_location_1 where 
part='part1';"""
+    def one_partition5 = """select part from partition_location_1 where 
part='part2';"""
+    def one_partition6 = """select part from partition_location_1 order by 
part;"""
+
+    def two_partition1 = """select * from partition_location_2 order by id;"""
+    def two_partition2 = """select * from partition_location_2 where 
part1='part1_1';"""
+    def two_partition3 = """select * from partition_location_2 where 
part2='part2_1';"""
+    def two_partition4 = """select * from partition_location_2 where 
part1='part1_2';"""
+    def two_partition5 = """select * from partition_location_2 where 
part2='part2_2';"""
+    def two_partition6 = """select part1, part2 from partition_location_2 
order by part1;"""
+
+    String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
+    if (enabled != null && enabled.equalsIgnoreCase("true")) {
+        String extHiveHmsHost = 
context.config.otherConfigs.get("extHiveHmsHost")
+        String extHiveHmsPort = 
context.config.otherConfigs.get("extHiveHmsPort")
+        String catalog_name = "hive_partition_location"
+        sql """drop catalog if exists ${catalog_name};"""
+        sql """
+            create catalog if not exists ${catalog_name} properties (
+                'type'='hms',
+                'hive.metastore.uris' = 
'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
+            );
+        """
+        logger.info("catalog " + catalog_name + " created")
+        sql """switch ${catalog_name};"""
+        logger.info("switched to catalog " + catalog_name)
+        sql """use multi_catalog;"""
+        qt_one_partition1 one_partition1
+        qt_one_partition2 one_partition2
+        qt_one_partition3 one_partition3
+        qt_one_partition4 one_partition4
+        qt_one_partition5 one_partition5
+        qt_one_partition6 one_partition6
+
+        qt_two_partition1 two_partition1
+        qt_two_partition2 two_partition2
+        qt_two_partition3 two_partition3
+        qt_two_partition4 two_partition4
+        qt_two_partition5 two_partition5
+        qt_two_partition6 two_partition6
+        sql """drop catalog if exists ${catalog_name};"""
+    }
+}
+


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to