deniskuzZ commented on code in PR #3582:
URL: https://github.com/apache/hive/pull/3582#discussion_r1012649913
##########
ql/src/java/org/apache/hadoop/hive/ql/parse/repl/CopyUtils.java:
##########
@@ -492,20 +495,94 @@ private void doRegularCopyOnce(FileSystem sourceFs,
List<Path> srcList,
throw new IOException(e);
}
} else {
- //Destination should be empty
+ // Destination should have identical files only
if (overWrite) {
- deleteSubDirs(destinationFs, destination);
+ leaveIdenticalFilesOnly(sourceFs, paths, destinationFs, destination);
}
copyFilesBetweenFS(sourceFs, paths, destinationFs, destination, false,
true);
}
}
- private void deleteSubDirs(FileSystem fs, Path path) throws IOException {
- //Delete the root path instead of doing a listing
- //This is more optimised
- delete(fs, path, true);
- //Recreate just the Root folder
- mkdirs(fs, path);
+ /**
+ * Leave identical files only between source and destination. It gets source
paths.
+ */
+ void leaveIdenticalFilesOnly(FileSystem sourceFs, Path[] srcPaths,
+ FileSystem destinationFs, Path destinationPath) throws IOException {
+ for (Path srcPath : srcPaths) {
+ leaveIdenticalFilesOnlyFileByFile(sourceFs, srcPath,
+ destinationFs, new Path(destinationPath, srcPath.getName()));
+ }
+ }
+
+ /**
+ * Leave identical files only between source and destination. It gets a
source path.
+ */
+ private void leaveIdenticalFilesOnlyFileByFile(FileSystem sourceFs, Path
srcPath,
+ FileSystem destinationFs, Path dstPath) throws IOException {
+ boolean toDelete = false;
+ Type dstType = getType(destinationFs, dstPath);
+ Type srcType = getType(sourceFs, srcPath);
+ LOG.debug("Source path: {}, type: {}", srcPath, srcType);
+ LOG.debug("Destination path: {}, type: {}", dstPath, dstType);
+ switch (dstType) {
+ case NONE:
+ // If destination is none, then no need to delete anything.
+ break;
+ case FILE:
+ // If destination is a file,
+ if (srcType == Type.FILE) {
+ // If source is a file, then delete the destination file if it is not
same as source.
+ toDelete = !FileUtils.isSameFile(sourceFs, srcPath, destinationFs,
dstPath);
+ LOG.debug("Same file?: {}", !toDelete);
+ } else {
+ // Otherwise, delete the destination file.
+ toDelete = true;
+ }
+ break;
+ case DIRECTORY:
+ // If destination is a directory,
+ if (srcType == Type.DIRECTORY) {
+ // If both are directories, visit for children of both.
+ Set<String> bothChildNames = Stream.concat(
Review Comment:
why do you need concat? I think idea was to remove target-only files that
are not identical to the source. You should only compare the common set, isn't
it?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]