This is an automated email from the ASF dual-hosted git repository.

panxiaolei pushed a commit to branch dev_syxj_2
in repository https://gitbox.apache.org/repos/asf/doris.git

commit b33f30a8c514c305f9b10c0cd38662d5df073a25
Author: morningman <[email protected]>
AuthorDate: Mon Aug 21 11:26:25 2023 +0800

    [fix](hive) do not split compress data file
---
 .../doris/datasource/hive/HiveMetaStoreCache.java       |  2 ++
 .../org/apache/doris/external/hive/util/HiveUtil.java   |  3 ++-
 .../org/apache/doris/planner/external/FileScanNode.java | 17 ++++++++++-------
 .../org/apache/doris/planner/external/HiveSplit.java    |  5 -----
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
index 1ad1b12047..e54466ad86 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java
@@ -1000,6 +1000,7 @@ public class HiveMetaStoreCache {
         // File Cache for self splitter.
         private final List<HiveFileStatus> files = Lists.newArrayList();
         // File split cache for old splitter. This is a temp variable.
+        @Deprecated
         private final List<FileSplit> splits = Lists.newArrayList();
         private boolean isSplittable;
         // The values of partitions.
@@ -1021,6 +1022,7 @@ public class HiveMetaStoreCache {
             }
         }
 
+        @Deprecated
         public void addSplit(FileSplit split) {
             if (isFileVisible(split.getPath())) {
                 splits.add(split);
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java 
b/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java
index 704d0fadf8..e1baea3652 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java
@@ -190,7 +190,8 @@ public final class HiveUtil {
             return true;
         }
 
-        // use reflection to get isSplittable method on FileInputFormat
+        // use reflection to get isSplitable method on FileInputFormat
+        // ATTN: the method name is actually "isSplitable", but the right 
spell is "isSplittable"
         Method method = null;
         for (Class<?> clazz = inputFormat.getClass(); clazz != null; clazz = 
clazz.getSuperclass()) {
             try {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileScanNode.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileScanNode.java
index b8952d38b7..aa6eca6b0e 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileScanNode.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileScanNode.java
@@ -25,6 +25,7 @@ import org.apache.doris.analysis.TupleDescriptor;
 import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.TableIf;
 import org.apache.doris.common.UserException;
+import org.apache.doris.common.util.Util;
 import org.apache.doris.planner.PlanNodeId;
 import org.apache.doris.planner.external.FileSplit.FileSplitCreator;
 import org.apache.doris.qe.ConnectContext;
@@ -32,6 +33,7 @@ import org.apache.doris.spi.Split;
 import org.apache.doris.statistics.StatisticalType;
 import org.apache.doris.thrift.TExplainLevel;
 import org.apache.doris.thrift.TExpr;
+import org.apache.doris.thrift.TFileCompressType;
 import org.apache.doris.thrift.TFileRangeDesc;
 import org.apache.doris.thrift.TFileScanNode;
 import org.apache.doris.thrift.TFileScanRangeParams;
@@ -220,19 +222,20 @@ public abstract class FileScanNode extends 
ExternalScanNode {
         if (blockLocations == null) {
             blockLocations = new BlockLocation[0];
         }
-        long splitSize = 
ConnectContext.get().getSessionVariable().getFileSplitSize();
-        if (splitSize <= 0) {
-            splitSize = blockSize;
-        }
-        // Min split size is DEFAULT_SPLIT_SIZE(128MB).
-        splitSize = Math.max(splitSize, DEFAULT_SPLIT_SIZE);
         List<Split> result = Lists.newArrayList();
-        if (!splittable) {
+        TFileCompressType compressType = 
Util.inferFileCompressTypeByPath(path.toString());
+        if (!splittable || compressType != TFileCompressType.PLAIN) {
             LOG.debug("Path {} is not splittable.", path);
             String[] hosts = blockLocations.length == 0 ? null : 
blockLocations[0].getHosts();
             result.add(splitCreator.create(path, 0, length, length, 
modificationTime, hosts, partitionValues));
             return result;
         }
+        long splitSize = 
ConnectContext.get().getSessionVariable().getFileSplitSize();
+        if (splitSize <= 0) {
+            splitSize = blockSize;
+        }
+        // Min split size is DEFAULT_SPLIT_SIZE(128MB).
+        splitSize = Math.max(splitSize, DEFAULT_SPLIT_SIZE);
         long bytesRemaining;
         for (bytesRemaining = length; (double) bytesRemaining / (double) 
splitSize > 1.1D;
                 bytesRemaining -= splitSize) {
diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java 
b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java
index 0f230c85f4..0bc8442760 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java
@@ -32,11 +32,6 @@ public class HiveSplit extends FileSplit {
         this.acidInfo = acidInfo;
     }
 
-    public HiveSplit(Path path, long start, long length, long fileLength, 
String[] hosts, AcidInfo acidInfo) {
-        super(path, start, length, fileLength, hosts, null);
-        this.acidInfo = acidInfo;
-    }
-
     @Override
     public Object getInfo() {
         return acidInfo;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to