spark git commit: [SPARK-19812] YARN shuffle service fails to relocate recovery DB acro…
Repository: spark Updated Branches: refs/heads/master 7a365257e -> 7fecf5130 [SPARK-19812] YARN shuffle service fails to relocate recovery DB acro⦠â¦ss NFS directories ## What changes were proposed in this pull request? Change from using java Files.move to use Hadoop filesystem operations to move the directories. The java Files.move does not work when moving directories across NFS mounts and in fact also says that if the directory has entries you should do a recursive move. We are already using Hadoop filesystem here so just use the local filesystem from there as it handles this properly. Note that the DB here is actually a directory of files and not just a single file, hence the change in the name of the local var. ## How was this patch tested? Ran YarnShuffleServiceSuite unit tests. Unfortunately couldn't easily add one here since involves NFS. Ran manual tests to verify that the DB directories were properly moved across NFS mounted directories. Have been running this internally for weeks. Author: Tom GravesCloses #17748 from tgravescs/SPARK-19812. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7fecf513 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7fecf513 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7fecf513 Branch: refs/heads/master Commit: 7fecf5130163df9c204a2764d121a7011d007f4e Parents: 7a36525 Author: Tom Graves Authored: Wed Apr 26 08:23:31 2017 -0500 Committer: Tom Graves Committed: Wed Apr 26 08:23:31 2017 -0500 -- .../spark/network/yarn/YarnShuffleService.java | 23 +++- 1 file changed, 13 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7fecf513/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java -- diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java index c7620d0..4acc203 100644 --- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java +++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java @@ -21,7 +21,6 @@ import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.ByteBuffer; -import java.nio.file.Files; import java.util.List; import java.util.Map; @@ -340,9 +339,9 @@ public class YarnShuffleService extends AuxiliaryService { * when it previously was not. If YARN NM recovery is enabled it uses that path, otherwise * it will uses a YARN local dir. */ - protected File initRecoveryDb(String dbFileName) { + protected File initRecoveryDb(String dbName) { if (_recoveryPath != null) { -File recoveryFile = new File(_recoveryPath.toUri().getPath(), dbFileName); +File recoveryFile = new File(_recoveryPath.toUri().getPath(), dbName); if (recoveryFile.exists()) { return recoveryFile; } @@ -350,7 +349,7 @@ public class YarnShuffleService extends AuxiliaryService { // db doesn't exist in recovery path go check local dirs for it String[] localDirs = _conf.getTrimmedStrings("yarn.nodemanager.local-dirs"); for (String dir : localDirs) { - File f = new File(new Path(dir).toUri().getPath(), dbFileName); + File f = new File(new Path(dir).toUri().getPath(), dbName); if (f.exists()) { if (_recoveryPath == null) { // If NM recovery is not enabled, we should specify the recovery path using NM local @@ -363,17 +362,21 @@ public class YarnShuffleService extends AuxiliaryService { // make sure to move all DBs to the recovery path from the old NM local dirs. // If another DB was initialized first just make sure all the DBs are in the same // location. - File newLoc = new File(_recoveryPath.toUri().getPath(), dbFileName); - if (!newLoc.equals(f)) { + Path newLoc = new Path(_recoveryPath, dbName); + Path copyFrom = new Path(f.toURI()); + if (!newLoc.equals(copyFrom)) { +logger.info("Moving " + copyFrom + " to: " + newLoc); try { - Files.move(f.toPath(), newLoc.toPath()); + // The move here needs to handle moving non-empty directories across NFS mounts + FileSystem fs = FileSystem.getLocal(_conf); + fs.rename(copyFrom, newLoc); } catch (Exception e) { // Fail to move recovery file to new path, just continue on with new DB location logger.error("Failed to move
spark git commit: [SPARK-19812] YARN shuffle service fails to relocate recovery DB acro…
Repository: spark Updated Branches: refs/heads/branch-2.2 a2f5ced32 -> 612952251 [SPARK-19812] YARN shuffle service fails to relocate recovery DB acro⦠â¦ss NFS directories ## What changes were proposed in this pull request? Change from using java Files.move to use Hadoop filesystem operations to move the directories. The java Files.move does not work when moving directories across NFS mounts and in fact also says that if the directory has entries you should do a recursive move. We are already using Hadoop filesystem here so just use the local filesystem from there as it handles this properly. Note that the DB here is actually a directory of files and not just a single file, hence the change in the name of the local var. ## How was this patch tested? Ran YarnShuffleServiceSuite unit tests. Unfortunately couldn't easily add one here since involves NFS. Ran manual tests to verify that the DB directories were properly moved across NFS mounted directories. Have been running this internally for weeks. Author: Tom GravesCloses #17748 from tgravescs/SPARK-19812. (cherry picked from commit 7fecf5130163df9c204a2764d121a7011d007f4e) Signed-off-by: Tom Graves Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/61295225 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/61295225 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/61295225 Branch: refs/heads/branch-2.2 Commit: 612952251c5ac626e256bc2ab9414faf1662dde9 Parents: a2f5ced Author: Tom Graves Authored: Wed Apr 26 08:23:31 2017 -0500 Committer: Tom Graves Committed: Wed Apr 26 08:24:12 2017 -0500 -- .../spark/network/yarn/YarnShuffleService.java | 23 +++- 1 file changed, 13 insertions(+), 10 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/61295225/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java -- diff --git a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java index c7620d0..4acc203 100644 --- a/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java +++ b/common/network-yarn/src/main/java/org/apache/spark/network/yarn/YarnShuffleService.java @@ -21,7 +21,6 @@ import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.nio.ByteBuffer; -import java.nio.file.Files; import java.util.List; import java.util.Map; @@ -340,9 +339,9 @@ public class YarnShuffleService extends AuxiliaryService { * when it previously was not. If YARN NM recovery is enabled it uses that path, otherwise * it will uses a YARN local dir. */ - protected File initRecoveryDb(String dbFileName) { + protected File initRecoveryDb(String dbName) { if (_recoveryPath != null) { -File recoveryFile = new File(_recoveryPath.toUri().getPath(), dbFileName); +File recoveryFile = new File(_recoveryPath.toUri().getPath(), dbName); if (recoveryFile.exists()) { return recoveryFile; } @@ -350,7 +349,7 @@ public class YarnShuffleService extends AuxiliaryService { // db doesn't exist in recovery path go check local dirs for it String[] localDirs = _conf.getTrimmedStrings("yarn.nodemanager.local-dirs"); for (String dir : localDirs) { - File f = new File(new Path(dir).toUri().getPath(), dbFileName); + File f = new File(new Path(dir).toUri().getPath(), dbName); if (f.exists()) { if (_recoveryPath == null) { // If NM recovery is not enabled, we should specify the recovery path using NM local @@ -363,17 +362,21 @@ public class YarnShuffleService extends AuxiliaryService { // make sure to move all DBs to the recovery path from the old NM local dirs. // If another DB was initialized first just make sure all the DBs are in the same // location. - File newLoc = new File(_recoveryPath.toUri().getPath(), dbFileName); - if (!newLoc.equals(f)) { + Path newLoc = new Path(_recoveryPath, dbName); + Path copyFrom = new Path(f.toURI()); + if (!newLoc.equals(copyFrom)) { +logger.info("Moving " + copyFrom + " to: " + newLoc); try { - Files.move(f.toPath(), newLoc.toPath()); + // The move here needs to handle moving non-empty directories across NFS mounts + FileSystem fs = FileSystem.getLocal(_conf); + fs.rename(copyFrom, newLoc); } catch (Exception e) {