dengzhhu653 commented on code in PR #4520:
URL: https://github.com/apache/hive/pull/4520#discussion_r1299353313


##########
ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverMergeFiles.java:
##########
@@ -419,17 +454,78 @@ private AverageSize getAverageSize(FileSystem inpFs, Path 
dirPath) {
    */
   private long getMergeSize(FileSystem inpFs, Path dirPath, long avgSize) {
     AverageSize averageSize = getAverageSize(inpFs, dirPath);
-    if (averageSize.getTotalSize() < 0) {
+    return getMergeSize(averageSize.totalSize, averageSize.numFiles, avgSize);
+  }
+
+  private List<FileStatus> getManifestFilePaths(HiveConf conf, Path dirPath) 
throws IOException {
+    FileSystem manifestFs = dirPath.getFileSystem(conf);
+    List<String> filesKept;
+    List<FileStatus> pathsKept = new ArrayList<>();
+    try (FSDataInputStream inStream = manifestFs.open(new Path(dirPath, 
Utilities.BLOB_MANIFEST_FILE))) {
+      String paths = IOUtils.toString(inStream, Charset.defaultCharset());
+      filesKept = new 
ArrayList(Arrays.asList(paths.split(System.lineSeparator())));
+    }
+    // The first string contains the directory information. Not useful.
+    filesKept.remove(0);
+
+    for (String file : filesKept) {
+      pathsKept.add(manifestFs.getFileStatus(new Path(file)));
+    }
+    return pathsKept;
+  }
+
+  private long getMergeSize(List<FileStatus> fileStatuses, long avgSize) {
+    long totalSz = 0;
+    int noOfFiles = 0;
+    for (FileStatus fileStatus : fileStatuses) {
+      if (!fileStatus.isDirectory()) {
+        totalSz += fileStatus.getLen();
+        noOfFiles++;
+      }
+    }
+    return getMergeSize(totalSz, noOfFiles, avgSize);
+  }
+
+  private long getMergeSize(long totalSz, int numOfFiles, long avgSize) {
+    if (totalSz <= 0) {
       return -1;
     }
 
-    if (averageSize.getNumFiles() <= 1) {
+    if (numOfFiles <= 1) {
       return -1;
     }
 
-    if (averageSize.getTotalSize()/averageSize.getNumFiles() < avgSize) {
-      return averageSize.getTotalSize();
+    if (totalSz / numOfFiles < avgSize) {
+      return totalSz;
     }
+
     return -1;
   }
+
+  private void setupWorkWhenUsingManifestFile(MapWork mapWork, 
List<FileStatus> fileStatuses, Path dirPath) {
+    Map<String, Operator<? extends OperatorDesc>> aliasToWork = 
mapWork.getAliasToWork();
+    Map<Path, PartitionDesc> pathToPartitionInfo = 
mapWork.getPathToPartitionInfo();
+    Operator<? extends OperatorDesc> op = aliasToWork.get(dirPath.toString());
+    PartitionDesc partitionDesc = pathToPartitionInfo.get(dirPath);
+    Path tmpDirPath = Utilities.toTempPath(dirPath);
+    if (op != null) {
+      aliasToWork.remove(dirPath.toString());
+      aliasToWork.put(tmpDirPath.toString(), op);
+      mapWork.setAliasToWork(aliasToWork);
+    }
+    if (partitionDesc != null) {
+      pathToPartitionInfo.remove(dirPath);
+      pathToPartitionInfo.put(tmpDirPath, partitionDesc);

Review Comment:
   What if there are many dynamic partitions insert?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org
For additional commands, e-mail: gitbox-h...@hive.apache.org

Reply via email to