This is an automated email from the ASF dual-hosted git repository. panxiaolei pushed a commit to branch dev_syxj_2 in repository https://gitbox.apache.org/repos/asf/doris.git
commit b33f30a8c514c305f9b10c0cd38662d5df073a25 Author: morningman <[email protected]> AuthorDate: Mon Aug 21 11:26:25 2023 +0800 [fix](hive) do not split compress data file --- .../doris/datasource/hive/HiveMetaStoreCache.java | 2 ++ .../org/apache/doris/external/hive/util/HiveUtil.java | 3 ++- .../org/apache/doris/planner/external/FileScanNode.java | 17 ++++++++++------- .../org/apache/doris/planner/external/HiveSplit.java | 5 ----- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index 1ad1b12047..e54466ad86 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -1000,6 +1000,7 @@ public class HiveMetaStoreCache { // File Cache for self splitter. private final List<HiveFileStatus> files = Lists.newArrayList(); // File split cache for old splitter. This is a temp variable. + @Deprecated private final List<FileSplit> splits = Lists.newArrayList(); private boolean isSplittable; // The values of partitions. @@ -1021,6 +1022,7 @@ public class HiveMetaStoreCache { } } + @Deprecated public void addSplit(FileSplit split) { if (isFileVisible(split.getPath())) { splits.add(split); diff --git a/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java b/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java index 704d0fadf8..e1baea3652 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/external/hive/util/HiveUtil.java @@ -190,7 +190,8 @@ public final class HiveUtil { return true; } - // use reflection to get isSplittable method on FileInputFormat + // use reflection to get isSplitable method on FileInputFormat + // ATTN: the method name is actually "isSplitable", but the right spell is "isSplittable" Method method = null; for (Class<?> clazz = inputFormat.getClass(); clazz != null; clazz = clazz.getSuperclass()) { try { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileScanNode.java index b8952d38b7..aa6eca6b0e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileScanNode.java @@ -25,6 +25,7 @@ import org.apache.doris.analysis.TupleDescriptor; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.UserException; +import org.apache.doris.common.util.Util; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.planner.external.FileSplit.FileSplitCreator; import org.apache.doris.qe.ConnectContext; @@ -32,6 +33,7 @@ import org.apache.doris.spi.Split; import org.apache.doris.statistics.StatisticalType; import org.apache.doris.thrift.TExplainLevel; import org.apache.doris.thrift.TExpr; +import org.apache.doris.thrift.TFileCompressType; import org.apache.doris.thrift.TFileRangeDesc; import org.apache.doris.thrift.TFileScanNode; import org.apache.doris.thrift.TFileScanRangeParams; @@ -220,19 +222,20 @@ public abstract class FileScanNode extends ExternalScanNode { if (blockLocations == null) { blockLocations = new BlockLocation[0]; } - long splitSize = ConnectContext.get().getSessionVariable().getFileSplitSize(); - if (splitSize <= 0) { - splitSize = blockSize; - } - // Min split size is DEFAULT_SPLIT_SIZE(128MB). - splitSize = Math.max(splitSize, DEFAULT_SPLIT_SIZE); List<Split> result = Lists.newArrayList(); - if (!splittable) { + TFileCompressType compressType = Util.inferFileCompressTypeByPath(path.toString()); + if (!splittable || compressType != TFileCompressType.PLAIN) { LOG.debug("Path {} is not splittable.", path); String[] hosts = blockLocations.length == 0 ? null : blockLocations[0].getHosts(); result.add(splitCreator.create(path, 0, length, length, modificationTime, hosts, partitionValues)); return result; } + long splitSize = ConnectContext.get().getSessionVariable().getFileSplitSize(); + if (splitSize <= 0) { + splitSize = blockSize; + } + // Min split size is DEFAULT_SPLIT_SIZE(128MB). + splitSize = Math.max(splitSize, DEFAULT_SPLIT_SIZE); long bytesRemaining; for (bytesRemaining = length; (double) bytesRemaining / (double) splitSize > 1.1D; bytesRemaining -= splitSize) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java index 0f230c85f4..0bc8442760 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/HiveSplit.java @@ -32,11 +32,6 @@ public class HiveSplit extends FileSplit { this.acidInfo = acidInfo; } - public HiveSplit(Path path, long start, long length, long fileLength, String[] hosts, AcidInfo acidInfo) { - super(path, start, length, fileLength, hosts, null); - this.acidInfo = acidInfo; - } - @Override public Object getInfo() { return acidInfo; --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
