tillrohrmann commented on a change in pull request #14538:
URL: https://github.com/apache/flink/pull/14538#discussion_r551199868



##########
File path: 
flink-yarn/src/main/java/org/apache/flink/yarn/YarnClusterDescriptor.java
##########
@@ -292,26 +292,39 @@ public void addShipFiles(List<File> shipFiles) {
         this.shipFiles.addAll(shipFiles);
     }
 
-    private void addShipArchives(List<File> shipArchives) {
+    private void addShipArchives(List<Path> shipArchives) {
         checkArgument(
-                isArchiveOnlyIncludedInShipArchiveFiles(shipArchives),
+                isArchiveOnlyIncludedInShipArchiveFiles(shipArchives, 
yarnConfiguration),
                 "Non-archive files are included.");
         this.shipArchives.addAll(shipArchives);
     }
 
-    private static boolean isArchiveOnlyIncludedInShipArchiveFiles(List<File> 
shipFiles) {
-        return shipFiles.stream()
-                .filter(File::isFile)
-                .map(File::getName)
-                .map(String::toLowerCase)
-                .allMatch(
-                        name ->
-                                name.endsWith(".tar.gz")
-                                        || name.endsWith(".tar")
-                                        || name.endsWith(".tgz")
-                                        || name.endsWith(".dst")
-                                        || name.endsWith(".jar")
-                                        || name.endsWith(".zip"));
+    private static boolean isArchiveOnlyIncludedInShipArchiveFiles(List<Path> 
shipFiles, YarnConfiguration yarnConfig) {
+        for (Path shipFile : shipFiles) {
+            try {
+                if (Utils.isRemotePath(shipFile.toString())) {
+                    final FileSystem fs = shipFile.getFileSystem(yarnConfig);
+                    final String name = shipFile.getName().toLowerCase();
+                    if (fs.isFile(shipFile) && !(name.endsWith(".tar.gz") || 
name.endsWith(".tar") || name.endsWith(
+                        ".tgz") || name.endsWith(".dst") || 
name.endsWith(".jar") || name.endsWith(".zip"))) {
+                        return false;
+                    }
+                } else {
+                    final File localFile = new 
File(shipFile.toUri().getPath());
+                    final String name = localFile.getName().toLowerCase();
+                    if (localFile.isFile() && !(name.endsWith(".tar.gz") || 
name.endsWith(".tar") || name.endsWith(
+                        ".tgz") || name.endsWith(".dst") || 
name.endsWith(".jar") || name.endsWith(".zip"))) {
+                        return false;
+                    }

Review comment:
       This block looks like duplicated code.

##########
File path: 
flink-yarn/src/main/java/org/apache/flink/yarn/YarnApplicationFileUploader.java
##########
@@ -163,8 +164,15 @@ YarnLocalResourceDescriptor registerSingleLocalResource(
         addToRemotePaths(whetherToAddToRemotePaths, resourcePath);
 
         if (Utils.isRemotePath(resourcePath.toString())) {
-            final FileStatus fileStatus = 
fileSystem.getFileStatus(resourcePath);
-            LOG.debug("Using remote file {} to register local resource", 
fileStatus.getPath());
+            final FileSystem srcFs = 
resourcePath.getFileSystem(fileSystem.getConf());
+            final FileStatus resourceFileStatus = 
srcFs.getFileStatus(resourcePath);
+            LOG.debug("Using remote file {} to register local resource", 
resourceFileStatus.getPath());
+            final Path applicationDir = getApplicationDirPath(homeDir, 
applicationId);
+            final String suffix = (relativeDstPath.isEmpty() ? "" : 
relativeDstPath + "/") + resourcePath.getName();
+            final Path dst = new Path(applicationDir, suffix);
+            LOG.debug("Copying from {} to {} with replication factor {}", 
resourcePath, dst, fileReplication);
+            FileUtil.copy(srcFs, resourcePath, fileSystem, dst, false, 
fileSystem.getConf());

Review comment:
       Won't this also copy files on the remote FS if they are already located 
on this FS?

##########
File path: 
flink-yarn/src/main/java/org/apache/flink/yarn/YarnClusterDescriptor.java
##########
@@ -1757,11 +1769,28 @@ ContainerLaunchContext setupApplicationMasterContainer(
                 YarnConfigOptions.CLASSPATH_INCLUDE_USER_JAR);
     }
 
-    private static boolean isUsrLibDirIncludedInShipFiles(List<File> 
shipFiles) {
-        return shipFiles.stream()
-                .filter(File::isDirectory)
-                .map(File::getName)
-                .noneMatch(name -> name.equals(DEFAULT_FLINK_USR_LIB_DIR));
+    private static boolean isUsrLibDirIncludedInShipFiles(List<Path> 
shipFiles, YarnConfiguration yarnConfig) {
+        for (Path shipFile : shipFiles) {
+            try {
+                if (Utils.isRemotePath(shipFile.toString())) {
+                    final FileSystem fileSystem = 
shipFile.getFileSystem(yarnConfig);
+                    if (fileSystem.isDirectory(shipFile) && 
shipFile.getName().equals(DEFAULT_FLINK_USR_LIB_DIR)) {
+                        return false;
+                    }
+                } else {
+                    final File localFile = new 
File(shipFile.toUri().getPath());
+                    if (localFile.isDirectory() && 
localFile.getName().equals(DEFAULT_FLINK_USR_LIB_DIR)) {
+                        return false;
+                    }
+                }
+            } catch (IOException e) {
+                LOG.error("The shipping file {} parsing failed.", shipFile);
+                throw new RuntimeException("The shipping file " + shipFile + " 
parsing failed. " +
+                    "Error message: " + e.getMessage());
+            }

Review comment:
       Same here.

##########
File path: 
flink-yarn/src/main/java/org/apache/flink/yarn/YarnApplicationFileUploader.java
##########
@@ -163,8 +164,15 @@ YarnLocalResourceDescriptor registerSingleLocalResource(
         addToRemotePaths(whetherToAddToRemotePaths, resourcePath);
 
         if (Utils.isRemotePath(resourcePath.toString())) {
-            final FileStatus fileStatus = 
fileSystem.getFileStatus(resourcePath);
-            LOG.debug("Using remote file {} to register local resource", 
fileStatus.getPath());
+            final FileSystem srcFs = 
resourcePath.getFileSystem(fileSystem.getConf());
+            final FileStatus resourceFileStatus = 
srcFs.getFileStatus(resourcePath);
+            LOG.debug("Using remote file {} to register local resource", 
resourceFileStatus.getPath());
+            final Path applicationDir = getApplicationDirPath(homeDir, 
applicationId);
+            final String suffix = (relativeDstPath.isEmpty() ? "" : 
relativeDstPath + "/") + resourcePath.getName();
+            final Path dst = new Path(applicationDir, suffix);
+            LOG.debug("Copying from {} to {} with replication factor {}", 
resourcePath, dst, fileReplication);
+            FileUtil.copy(srcFs, resourcePath, fileSystem, dst, false, 
fileSystem.getConf());
+            final FileStatus fileStatus = fileSystem.getFileStatus(dst);

Review comment:
       Could we unify this piece of code with `uploadLocalFileToRemote`. In 
`uploadLocalFileToRemote`, we support to upload things to an eventual 
consistent FS, for example. This is missing here. Moreover,  I think we need 
tests for the added functionality.

##########
File path: 
flink-yarn/src/main/java/org/apache/flink/yarn/YarnClusterDescriptor.java
##########
@@ -292,26 +292,39 @@ public void addShipFiles(List<File> shipFiles) {
         this.shipFiles.addAll(shipFiles);
     }
 
-    private void addShipArchives(List<File> shipArchives) {
+    private void addShipArchives(List<Path> shipArchives) {
         checkArgument(
-                isArchiveOnlyIncludedInShipArchiveFiles(shipArchives),
+                isArchiveOnlyIncludedInShipArchiveFiles(shipArchives, 
yarnConfiguration),
                 "Non-archive files are included.");
         this.shipArchives.addAll(shipArchives);
     }
 
-    private static boolean isArchiveOnlyIncludedInShipArchiveFiles(List<File> 
shipFiles) {
-        return shipFiles.stream()
-                .filter(File::isFile)
-                .map(File::getName)
-                .map(String::toLowerCase)
-                .allMatch(
-                        name ->
-                                name.endsWith(".tar.gz")
-                                        || name.endsWith(".tar")
-                                        || name.endsWith(".tgz")
-                                        || name.endsWith(".dst")
-                                        || name.endsWith(".jar")
-                                        || name.endsWith(".zip"));
+    private static boolean isArchiveOnlyIncludedInShipArchiveFiles(List<Path> 
shipFiles, YarnConfiguration yarnConfig) {
+        for (Path shipFile : shipFiles) {
+            try {
+                if (Utils.isRemotePath(shipFile.toString())) {
+                    final FileSystem fs = shipFile.getFileSystem(yarnConfig);
+                    final String name = shipFile.getName().toLowerCase();
+                    if (fs.isFile(shipFile) && !(name.endsWith(".tar.gz") || 
name.endsWith(".tar") || name.endsWith(
+                        ".tgz") || name.endsWith(".dst") || 
name.endsWith(".jar") || name.endsWith(".zip"))) {
+                        return false;
+                    }
+                } else {
+                    final File localFile = new 
File(shipFile.toUri().getPath());
+                    final String name = localFile.getName().toLowerCase();
+                    if (localFile.isFile() && !(name.endsWith(".tar.gz") || 
name.endsWith(".tar") || name.endsWith(
+                        ".tgz") || name.endsWith(".dst") || 
name.endsWith(".jar") || name.endsWith(".zip"))) {
+                        return false;
+                    }
+                }
+            } catch (IOException e) {
+                LOG.error("The shipping archive archive {} parsing failed.", 
shipFile);
+                throw new RuntimeException("The shipping archive " + shipFile 
+ " parsing failed. " +
+                    "Error message: " + e.getMessage());

Review comment:
       We should not throw `RuntimeException` but rather fail explicitly if an 
error occurs here. We need to tell the user that something is wrong with his 
shipped files.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to