(hadoop) branch branch-3.4 updated: HDFS-17404. Add NN Socket Address to log when processing command from active NN (#6598)
This is an automated email from the ASF dual-hosted git repository. ctrezzo pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/hadoop.git The following commit(s) were added to refs/heads/branch-3.4 by this push: new fcc6ef6b10e0 HDFS-17404. Add NN Socket Address to log when processing command from active NN (#6598) fcc6ef6b10e0 is described below commit fcc6ef6b10e0595e8bbeb0c169b77b359d4b9b53 Author: Jtdellaringa <78453490+jtdellari...@users.noreply.github.com> AuthorDate: Tue Mar 5 10:33:22 2024 -0800 HDFS-17404. Add NN Socket Address to log when processing command from active NN (#6598) * Add nnSocketAddress to log when processing command from active nn --- .../java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java index b228fb2d5716..7d5d05bac54d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java @@ -777,7 +777,7 @@ class BPOfferService { ((BlockRecoveryCommand)cmd).getRecoveringBlocks()); break; case DatanodeProtocol.DNA_ACCESSKEYUPDATE: - LOG.info("DatanodeCommand action: DNA_ACCESSKEYUPDATE"); + LOG.info("DatanodeCommand action from active NN {}: DNA_ACCESSKEYUPDATE", nnSocketAddress); if (dn.isBlockTokenEnabled) { dn.blockPoolTokenSecretManager.addKeys( getBlockPoolId(), - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
(hadoop) branch branch-3.3 updated: HDFS-17404. Add NN Socket Address to log when processing command from active NN (#6598)
This is an automated email from the ASF dual-hosted git repository. ctrezzo pushed a commit to branch branch-3.3 in repository https://gitbox.apache.org/repos/asf/hadoop.git The following commit(s) were added to refs/heads/branch-3.3 by this push: new 7ad49298689c HDFS-17404. Add NN Socket Address to log when processing command from active NN (#6598) 7ad49298689c is described below commit 7ad49298689c6f62339e4804c8f06e64cb9ebd73 Author: Jtdellaringa <78453490+jtdellari...@users.noreply.github.com> AuthorDate: Tue Mar 5 10:33:22 2024 -0800 HDFS-17404. Add NN Socket Address to log when processing command from active NN (#6598) * Add nnSocketAddress to log when processing command from active nn --- .../java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java index b8a52aa79fed..afbeca4d88e9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java @@ -778,7 +778,7 @@ class BPOfferService { ((BlockRecoveryCommand)cmd).getRecoveringBlocks()); break; case DatanodeProtocol.DNA_ACCESSKEYUPDATE: - LOG.info("DatanodeCommand action: DNA_ACCESSKEYUPDATE"); + LOG.info("DatanodeCommand action from active NN {}: DNA_ACCESSKEYUPDATE", nnSocketAddress); if (dn.isBlockTokenEnabled) { dn.blockPoolTokenSecretManager.addKeys( getBlockPoolId(), - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
(hadoop) branch trunk updated: HDFS-17404. Add NN Socket Address to log when processing command from active NN (#6598)
This is an automated email from the ASF dual-hosted git repository. ctrezzo pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git The following commit(s) were added to refs/heads/trunk by this push: new a2d72411905e HDFS-17404. Add NN Socket Address to log when processing command from active NN (#6598) a2d72411905e is described below commit a2d72411905e51038824593a069e46345d9a5e90 Author: Jtdellaringa <78453490+jtdellari...@users.noreply.github.com> AuthorDate: Tue Mar 5 10:33:22 2024 -0800 HDFS-17404. Add NN Socket Address to log when processing command from active NN (#6598) * Add nnSocketAddress to log when processing command from active nn --- .../java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java index b228fb2d5716..7d5d05bac54d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java @@ -777,7 +777,7 @@ class BPOfferService { ((BlockRecoveryCommand)cmd).getRecoveringBlocks()); break; case DatanodeProtocol.DNA_ACCESSKEYUPDATE: - LOG.info("DatanodeCommand action: DNA_ACCESSKEYUPDATE"); + LOG.info("DatanodeCommand action from active NN {}: DNA_ACCESSKEYUPDATE", nnSocketAddress); if (dn.isBlockTokenEnabled) { dn.blockPoolTokenSecretManager.addKeys( getBlockPoolId(), - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
(hadoop) branch trunk updated (d278b349f639 -> d74e5160cd71)
This is an automated email from the ASF dual-hosted git repository. ctrezzo pushed a change to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git from d278b349f639 HADOOP-19044. S3A: AWS SDK V2 - Update region logic (#6479) add d74e5160cd71 HADOOP-19061 Capture exception from rpcRequestSender.start() in IPC.Connection.run() (#6519) No new revisions were added by this update. Summary of changes: .../main/java/org/apache/hadoop/ipc/Client.java| 28 -- 1 file changed, 15 insertions(+), 13 deletions(-) - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
(hadoop) branch trunk updated: HDFS-17332 DFSInputStream: avoid logging stacktrace until when we really need to fail a read request with a MissingBlockException (#6446)
This is an automated email from the ASF dual-hosted git repository. ctrezzo pushed a commit to branch trunk in repository https://gitbox.apache.org/repos/asf/hadoop.git The following commit(s) were added to refs/heads/trunk by this push: new 27ecc23ae7c5 HDFS-17332 DFSInputStream: avoid logging stacktrace until when we really need to fail a read request with a MissingBlockException (#6446) 27ecc23ae7c5 is described below commit 27ecc23ae7c5cafba6a5ea58d4a68d25bd7507dd Author: Xing Lin AuthorDate: Thu Jan 18 18:03:28 2024 -0800 HDFS-17332 DFSInputStream: avoid logging stacktrace until when we really need to fail a read request with a MissingBlockException (#6446) Print a warn log message for read retries and only print the full stack trace for a read request failure. Contributed by: Xing Lin --- .../org/apache/hadoop/hdfs/DFSInputStream.java | 108 + .../apache/hadoop/hdfs/DFSStripedInputStream.java | 8 +- .../java/org/apache/hadoop/hdfs/TestPread.java | 169 - .../test/java/org/apache/hadoop/hdfs/TestRead.java | 149 +- 4 files changed, 404 insertions(+), 30 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java index b5be33206e71..8fc6c555690d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs-client/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java @@ -771,7 +771,7 @@ public class DFSInputStream extends FSInputStream * ChecksumFileSystem */ private synchronized int readBuffer(ReaderStrategy reader, int len, - CorruptedBlocks corruptedBlocks) + CorruptedBlocks corruptedBlocks, final Map> exceptionMap) throws IOException { IOException ioe; @@ -786,6 +786,7 @@ public class DFSInputStream extends FSInputStream while (true) { // retry as many times as seekToNewSource allows. try { +DFSClientFaultInjector.get().fetchFromDatanodeException(); return reader.readFromBlock(blockReader, len); } catch (ChecksumException ce) { DFSClient.LOG.warn("Found Checksum error for " @@ -796,11 +797,18 @@ public class DFSInputStream extends FSInputStream // we want to remember which block replicas we have tried corruptedBlocks.addCorruptedBlock(getCurrentBlock(), currentNode); } catch (IOException e) { -if (!retryCurrentNode) { - DFSClient.LOG.warn("Exception while reading from " - + getCurrentBlock() + " of " + src + " from " - + currentNode, e); +String msg = String.format("Failed to read block %s for file %s from datanode %s. " ++ "Exception is %s. Retry with the current or next available datanode.", +getCurrentBlock().getBlockName(), src, currentNode.getXferAddr(), e); +DFSClient.LOG.warn(msg); + +// Add the exception to exceptionMap for this datanode. +InetSocketAddress datanode = currentNode.getResolvedAddress(); +if (!exceptionMap.containsKey(datanode)) { + exceptionMap.put(datanode, new LinkedList()); } +exceptionMap.get(datanode).add(e); + ioe = e; } boolean sourceFound; @@ -822,6 +830,29 @@ public class DFSInputStream extends FSInputStream } } + /** + * Send IOExceptions happened at each individual datanode to DFSClient.LOG for a failed read + * request. Used in both readWithStrategy() and pread(), to record the exceptions when a read + * request failed to be served. + * @param position offset in the file where we fail to read + * @param exceptionMap a map which stores the list of IOExceptions for each datanode + */ + private void logDataNodeExceptionsOnReadError(long position, final Map> exceptionMap) { +String msg = String.format("Failed to read from all available datanodes for file %s " ++ "at position=%d after retrying.", src, position); +DFSClient.LOG.error(msg); +for (Map.Entry> dataNodeExceptions : +exceptionMap.entrySet()) { + List exceptions = dataNodeExceptions.getValue(); + for (IOException ex : exceptions) { +msg = String.format("Exception when fetching file %s at position=%d at datanode %s:", src, +position, dataNodeExceptions.getKey()); +DFSClient.LOG.error(msg, ex); + } +} + } + protected synchronized int readWithStrategy(ReaderStrategy strategy) throws IOException { dfsClient.checkOpen(); @@ -831,6 +862,9 @@ public class DFSInputStream extends FSInputStream int len = strategy.getTargetLength(); CorruptedBloc
[1/2] hadoop git commit: MAPREDUCE-5951. Add support for the YARN Shared Cache.
Repository: hadoop Updated Branches: refs/heads/branch-2 b9426c0bf -> 3e7c06af4 http://git-wip-us.apache.org/repos/asf/hadoop/blob/3e7c06af/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java -- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java index 20b7b7d..f733bb5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java @@ -212,7 +212,7 @@ public class TestJobResourceUploader { destinationPathPrefix + "tmpArchives1.tgz#tmpArchivesfragment1.tgz" }; private String jobjarSubmitDir = "/jobjar-submit-dir"; - private String expectedJobJar = jobjarSubmitDir + "/job.jar"; + private String basicExpectedJobJar = jobjarSubmitDir + "/job.jar"; @Test public void testPathsWithNoFragNoSchemeRelative() throws IOException { @@ -228,7 +228,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesNoFrags, -expectedArchivesNoFrags, expectedJobJar); +expectedArchivesNoFrags, basicExpectedJobJar); } @Test @@ -246,7 +246,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesNoFrags, -expectedArchivesNoFrags, expectedJobJar); +expectedArchivesNoFrags, basicExpectedJobJar); } @Test @@ -264,7 +264,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithFrags, -expectedArchivesWithFrags, expectedJobJar); +expectedArchivesWithFrags, basicExpectedJobJar); } @Test @@ -282,7 +282,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithFrags, -expectedArchivesWithFrags, expectedJobJar); +expectedArchivesWithFrags, basicExpectedJobJar); } @Test @@ -300,7 +300,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithFrags, -expectedArchivesWithFrags, expectedJobJar); +expectedArchivesWithFrags, basicExpectedJobJar); } @Test @@ -318,7 +318,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesNoFrags, -expectedArchivesNoFrags, expectedJobJar); +expectedArchivesNoFrags, basicExpectedJobJar); } @Test @@ -336,7 +336,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf, true); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithWildcard, -expectedArchivesNoFrags, expectedJobJar); +expectedArchivesNoFrags, basicExpectedJobJar); } @Test @@ -354,50 +354,45 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf, true); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithFrags, -expectedArchivesWithFrags, expectedJobJar); +expectedArchivesWithFrags, basicExpectedJobJar); } private void runTmpResourcePathTest(JobResourceUploader uploader, ResourceConf rConf, JobConf jConf, String[] expectedFiles, String[] expectedArchives, String expectedJobJar) throws IOException { -rConf.setupJobConf(jConf); -// We use a pre and post job object here because we need the post job object -// to get the new values set during uploadResources, but we need the pre job -// to set the job jar because JobResourceUploader#uploadJobJar uses the Job -// interface not the JobConf. The post job is automatically created in -// validateResourcePaths. -Job jobPre = Job.getInstance(jConf); -uploadResources(uploader, jConf, jobPre); - -validateResourcePaths(jConf, expectedFiles, expectedArchives, -expectedJobJar, jobPre); +Job job = rConf.setupJobConf(jConf); +uploadResources(uploader, job); +validateResourcePaths(job, expectedFiles, expectedArchives, expectedJobJar);
[2/2] hadoop git commit: MAPREDUCE-5951. Add support for the YARN Shared Cache.
MAPREDUCE-5951. Add support for the YARN Shared Cache. (cherry picked from commit e46d5bb962b0c942f993afc505b165b1cd96e51b) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/3e7c06af Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/3e7c06af Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/3e7c06af Branch: refs/heads/branch-2 Commit: 3e7c06af41ea5124f595229ef869cb2d31b72ca8 Parents: b9426c0 Author: Chris TrezzoAuthored: Thu Oct 12 11:30:38 2017 -0700 Committer: Chris Trezzo Committed: Thu Oct 12 11:30:38 2017 -0700 -- .../mapreduce/v2/app/job/impl/JobImpl.java | 17 + .../v2/app/job/impl/TaskAttemptImpl.java| 52 ++- .../mapreduce/v2/util/LocalResourceBuilder.java | 180 .../apache/hadoop/mapreduce/v2/util/MRApps.java | 146 ++- .../TestLocalDistributedCacheManager.java | 9 + .../hadoop/mapreduce/v2/util/TestMRApps.java| 8 +- .../hadoop-mapreduce-client-core/pom.xml| 6 + .../java/org/apache/hadoop/mapreduce/Job.java | 226 ++ .../hadoop/mapreduce/JobResourceUploader.java | 416 --- .../apache/hadoop/mapreduce/MRJobConfig.java| 71 .../hadoop/mapreduce/SharedCacheConfig.java | 102 + .../src/main/resources/mapred-default.xml | 11 + .../src/site/markdown/SharedCacheSupport.md | 100 + .../mapreduce/TestJobResourceUploader.java | 76 ++-- .../TestJobResourceUploaderWithSharedCache.java | 365 .../org/apache/hadoop/mapred/YARNRunner.java| 54 ++- .../hadoop/mapred/TestLocalJobSubmission.java | 52 +++ .../apache/hadoop/mapreduce/v2/TestMRJobs.java | 59 +++ hadoop-project/src/site/site.xml| 1 + 19 files changed, 1711 insertions(+), 240 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/3e7c06af/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java -- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java index c40d6d9..81bc3ce 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java @@ -50,6 +50,7 @@ import org.apache.hadoop.mapred.JobACLsManager; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TaskCompletionEvent; import org.apache.hadoop.mapreduce.Counters; +import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobACL; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.MRJobConfig; @@ -1412,6 +1413,20 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, new char[] {'"', '=', '.'}); } + /* + * The goal is to make sure only the NM that hosts MRAppMaster will upload + * resources to shared cache. Clean up the shared cache policies for all + * resources so that later when TaskAttemptImpl creates + * ContainerLaunchContext, LocalResource.setShouldBeUploadedToSharedCache will + * be set up to false. In that way, the NMs that host the task containers + * won't try to upload the resources to shared cache. + */ + private static void cleanupSharedCacheUploadPolicies(Configuration conf) { +Map emap = Collections.emptyMap(); +Job.setArchiveSharedCacheUploadPolicies(conf, emap); +Job.setFileSharedCacheUploadPolicies(conf, emap); + } + public static class InitTransition implements MultipleArcTransition { @@ -1490,6 +1505,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, job.allowedReduceFailuresPercent = job.conf.getInt(MRJobConfig.REDUCE_FAILURES_MAXPERCENT, 0); +cleanupSharedCacheUploadPolicies(job.conf); + // create the Tasks but don't start them yet createMapTasks(job, inputLength, taskSplitMetaInfo); createReduceTasks(job); http://git-wip-us.apache.org/repos/asf/hadoop/blob/3e7c06af/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java
[1/2] hadoop git commit: MAPREDUCE-5951. Add support for the YARN Shared Cache.
Repository: hadoop Updated Branches: refs/heads/branch-3.0 97b7d39a5 -> 24b03eb79 http://git-wip-us.apache.org/repos/asf/hadoop/blob/24b03eb7/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java -- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java index d0d7a34..d347da5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java @@ -220,7 +220,7 @@ public class TestJobResourceUploader { destinationPathPrefix + "tmpArchives1.tgz#tmpArchivesfragment1.tgz" }; private String jobjarSubmitDir = "/jobjar-submit-dir"; - private String expectedJobJar = jobjarSubmitDir + "/job.jar"; + private String basicExpectedJobJar = jobjarSubmitDir + "/job.jar"; @Test public void testPathsWithNoFragNoSchemeRelative() throws IOException { @@ -236,7 +236,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesNoFrags, -expectedArchivesNoFrags, expectedJobJar); +expectedArchivesNoFrags, basicExpectedJobJar); } @Test @@ -254,7 +254,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesNoFrags, -expectedArchivesNoFrags, expectedJobJar); +expectedArchivesNoFrags, basicExpectedJobJar); } @Test @@ -272,7 +272,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithFrags, -expectedArchivesWithFrags, expectedJobJar); +expectedArchivesWithFrags, basicExpectedJobJar); } @Test @@ -290,7 +290,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithFrags, -expectedArchivesWithFrags, expectedJobJar); +expectedArchivesWithFrags, basicExpectedJobJar); } @Test @@ -308,7 +308,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithFrags, -expectedArchivesWithFrags, expectedJobJar); +expectedArchivesWithFrags, basicExpectedJobJar); } @Test @@ -326,7 +326,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesNoFrags, -expectedArchivesNoFrags, expectedJobJar); +expectedArchivesNoFrags, basicExpectedJobJar); } @Test @@ -344,7 +344,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf, true); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithWildcard, -expectedArchivesNoFrags, expectedJobJar); +expectedArchivesNoFrags, basicExpectedJobJar); } @Test @@ -362,7 +362,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf, true); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithFrags, -expectedArchivesWithFrags, expectedJobJar); +expectedArchivesWithFrags, basicExpectedJobJar); } @Test @@ -402,44 +402,39 @@ public class TestJobResourceUploader { private void runTmpResourcePathTest(JobResourceUploader uploader, ResourceConf rConf, JobConf jConf, String[] expectedFiles, String[] expectedArchives, String expectedJobJar) throws IOException { -rConf.setupJobConf(jConf); -// We use a pre and post job object here because we need the post job object -// to get the new values set during uploadResources, but we need the pre job -// to set the job jar because JobResourceUploader#uploadJobJar uses the Job -// interface not the JobConf. The post job is automatically created in -// validateResourcePaths. -Job jobPre = Job.getInstance(jConf); -uploadResources(uploader, jConf, jobPre); - -validateResourcePaths(jConf, expectedFiles, expectedArchives, -expectedJobJar, jobPre); +Job job = rConf.setupJobConf(jConf); +uploadResources(uploader, job); +
[2/2] hadoop git commit: MAPREDUCE-5951. Add support for the YARN Shared Cache.
MAPREDUCE-5951. Add support for the YARN Shared Cache. (cherry picked from commit e46d5bb962b0c942f993afc505b165b1cd96e51b) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/24b03eb7 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/24b03eb7 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/24b03eb7 Branch: refs/heads/branch-3.0 Commit: 24b03eb79f4e40824d7fe971b2b57c91fc425c92 Parents: 97b7d39 Author: Chris TrezzoAuthored: Thu Oct 12 10:58:02 2017 -0700 Committer: Chris Trezzo Committed: Thu Oct 12 11:15:46 2017 -0700 -- .../mapreduce/v2/app/job/impl/JobImpl.java | 16 + .../v2/app/job/impl/TaskAttemptImpl.java| 52 ++- .../mapreduce/v2/util/LocalResourceBuilder.java | 169 .../apache/hadoop/mapreduce/v2/util/MRApps.java | 137 ++ .../TestLocalDistributedCacheManager.java | 9 + .../hadoop/mapreduce/v2/util/TestMRApps.java| 8 +- .../hadoop-mapreduce-client-core/pom.xml| 6 + .../java/org/apache/hadoop/mapreduce/Job.java | 226 ++ .../hadoop/mapreduce/JobResourceUploader.java | 416 --- .../apache/hadoop/mapreduce/MRJobConfig.java| 71 .../hadoop/mapreduce/SharedCacheConfig.java | 102 + .../src/main/resources/mapred-default.xml | 11 + .../src/site/markdown/SharedCacheSupport.md | 100 + .../mapreduce/TestJobResourceUploader.java | 76 ++-- .../TestJobResourceUploaderWithSharedCache.java | 365 .../org/apache/hadoop/mapred/YARNRunner.java| 54 ++- .../hadoop/mapred/TestLocalJobSubmission.java | 52 +++ .../apache/hadoop/mapreduce/v2/TestMRJobs.java | 59 +++ hadoop-project/src/site/site.xml| 1 + 19 files changed, 1701 insertions(+), 229 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/24b03eb7/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java -- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java index 757c545..d2e2492 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java @@ -48,6 +48,7 @@ import org.apache.hadoop.mapred.JobACLsManager; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TaskCompletionEvent; import org.apache.hadoop.mapreduce.Counters; +import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobACL; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.MRJobConfig; @@ -1414,6 +1415,19 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, new char[] {'"', '=', '.'}); } + /* + * The goal is to make sure only the NM that hosts MRAppMaster will upload + * resources to shared cache. Clean up the shared cache policies for all + * resources so that later when TaskAttemptImpl creates + * ContainerLaunchContext, LocalResource.setShouldBeUploadedToSharedCache will + * be set up to false. In that way, the NMs that host the task containers + * won't try to upload the resources to shared cache. + */ + private static void cleanupSharedCacheUploadPolicies(Configuration conf) { +Job.setArchiveSharedCacheUploadPolicies(conf, Collections.emptyMap()); +Job.setFileSharedCacheUploadPolicies(conf, Collections.emptyMap()); + } + public static class InitTransition implements MultipleArcTransition { @@ -1492,6 +1506,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, job.allowedReduceFailuresPercent = job.conf.getInt(MRJobConfig.REDUCE_FAILURES_MAXPERCENT, 0); +cleanupSharedCacheUploadPolicies(job.conf); + // create the Tasks but don't start them yet createMapTasks(job, inputLength, taskSplitMetaInfo); createReduceTasks(job); http://git-wip-us.apache.org/repos/asf/hadoop/blob/24b03eb7/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java -- diff
[1/2] hadoop git commit: MAPREDUCE-5951. Add support for the YARN Shared Cache.
Repository: hadoop Updated Branches: refs/heads/trunk 13fcfb3d4 -> e46d5bb96 http://git-wip-us.apache.org/repos/asf/hadoop/blob/e46d5bb9/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java -- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java index d0d7a34..d347da5 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestJobResourceUploader.java @@ -220,7 +220,7 @@ public class TestJobResourceUploader { destinationPathPrefix + "tmpArchives1.tgz#tmpArchivesfragment1.tgz" }; private String jobjarSubmitDir = "/jobjar-submit-dir"; - private String expectedJobJar = jobjarSubmitDir + "/job.jar"; + private String basicExpectedJobJar = jobjarSubmitDir + "/job.jar"; @Test public void testPathsWithNoFragNoSchemeRelative() throws IOException { @@ -236,7 +236,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesNoFrags, -expectedArchivesNoFrags, expectedJobJar); +expectedArchivesNoFrags, basicExpectedJobJar); } @Test @@ -254,7 +254,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesNoFrags, -expectedArchivesNoFrags, expectedJobJar); +expectedArchivesNoFrags, basicExpectedJobJar); } @Test @@ -272,7 +272,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithFrags, -expectedArchivesWithFrags, expectedJobJar); +expectedArchivesWithFrags, basicExpectedJobJar); } @Test @@ -290,7 +290,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithFrags, -expectedArchivesWithFrags, expectedJobJar); +expectedArchivesWithFrags, basicExpectedJobJar); } @Test @@ -308,7 +308,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithFrags, -expectedArchivesWithFrags, expectedJobJar); +expectedArchivesWithFrags, basicExpectedJobJar); } @Test @@ -326,7 +326,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesNoFrags, -expectedArchivesNoFrags, expectedJobJar); +expectedArchivesNoFrags, basicExpectedJobJar); } @Test @@ -344,7 +344,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf, true); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithWildcard, -expectedArchivesNoFrags, expectedJobJar); +expectedArchivesNoFrags, basicExpectedJobJar); } @Test @@ -362,7 +362,7 @@ public class TestJobResourceUploader { JobResourceUploader uploader = new StubedUploader(jConf, true); runTmpResourcePathTest(uploader, rConf, jConf, expectedFilesWithFrags, -expectedArchivesWithFrags, expectedJobJar); +expectedArchivesWithFrags, basicExpectedJobJar); } @Test @@ -402,44 +402,39 @@ public class TestJobResourceUploader { private void runTmpResourcePathTest(JobResourceUploader uploader, ResourceConf rConf, JobConf jConf, String[] expectedFiles, String[] expectedArchives, String expectedJobJar) throws IOException { -rConf.setupJobConf(jConf); -// We use a pre and post job object here because we need the post job object -// to get the new values set during uploadResources, but we need the pre job -// to set the job jar because JobResourceUploader#uploadJobJar uses the Job -// interface not the JobConf. The post job is automatically created in -// validateResourcePaths. -Job jobPre = Job.getInstance(jConf); -uploadResources(uploader, jConf, jobPre); - -validateResourcePaths(jConf, expectedFiles, expectedArchives, -expectedJobJar, jobPre); +Job job = rConf.setupJobConf(jConf); +uploadResources(uploader, job); +
[2/2] hadoop git commit: MAPREDUCE-5951. Add support for the YARN Shared Cache.
MAPREDUCE-5951. Add support for the YARN Shared Cache. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e46d5bb9 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e46d5bb9 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e46d5bb9 Branch: refs/heads/trunk Commit: e46d5bb962b0c942f993afc505b165b1cd96e51b Parents: 13fcfb3 Author: Chris TrezzoAuthored: Thu Oct 12 10:58:02 2017 -0700 Committer: Chris Trezzo Committed: Thu Oct 12 10:59:20 2017 -0700 -- .../mapreduce/v2/app/job/impl/JobImpl.java | 16 + .../v2/app/job/impl/TaskAttemptImpl.java| 52 ++- .../mapreduce/v2/util/LocalResourceBuilder.java | 169 .../apache/hadoop/mapreduce/v2/util/MRApps.java | 137 ++ .../TestLocalDistributedCacheManager.java | 9 + .../hadoop/mapreduce/v2/util/TestMRApps.java| 8 +- .../hadoop-mapreduce-client-core/pom.xml| 6 + .../java/org/apache/hadoop/mapreduce/Job.java | 226 ++ .../hadoop/mapreduce/JobResourceUploader.java | 416 --- .../apache/hadoop/mapreduce/MRJobConfig.java| 71 .../hadoop/mapreduce/SharedCacheConfig.java | 102 + .../src/main/resources/mapred-default.xml | 11 + .../src/site/markdown/SharedCacheSupport.md | 100 + .../mapreduce/TestJobResourceUploader.java | 76 ++-- .../TestJobResourceUploaderWithSharedCache.java | 365 .../org/apache/hadoop/mapred/YARNRunner.java| 54 ++- .../hadoop/mapred/TestLocalJobSubmission.java | 52 +++ .../apache/hadoop/mapreduce/v2/TestMRJobs.java | 59 +++ hadoop-project/src/site/site.xml| 1 + 19 files changed, 1701 insertions(+), 229 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/e46d5bb9/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java -- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java index 757c545..d2e2492 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/JobImpl.java @@ -48,6 +48,7 @@ import org.apache.hadoop.mapred.JobACLsManager; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.TaskCompletionEvent; import org.apache.hadoop.mapreduce.Counters; +import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobACL; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.MRJobConfig; @@ -1414,6 +1415,19 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, new char[] {'"', '=', '.'}); } + /* + * The goal is to make sure only the NM that hosts MRAppMaster will upload + * resources to shared cache. Clean up the shared cache policies for all + * resources so that later when TaskAttemptImpl creates + * ContainerLaunchContext, LocalResource.setShouldBeUploadedToSharedCache will + * be set up to false. In that way, the NMs that host the task containers + * won't try to upload the resources to shared cache. + */ + private static void cleanupSharedCacheUploadPolicies(Configuration conf) { +Job.setArchiveSharedCacheUploadPolicies(conf, Collections.emptyMap()); +Job.setFileSharedCacheUploadPolicies(conf, Collections.emptyMap()); + } + public static class InitTransition implements MultipleArcTransition { @@ -1492,6 +1506,8 @@ public class JobImpl implements org.apache.hadoop.mapreduce.v2.app.job.Job, job.allowedReduceFailuresPercent = job.conf.getInt(MRJobConfig.REDUCE_FAILURES_MAXPERCENT, 0); +cleanupSharedCacheUploadPolicies(job.conf); + // create the Tasks but don't start them yet createMapTasks(job, inputLength, taskSplitMetaInfo); createReduceTasks(job); http://git-wip-us.apache.org/repos/asf/hadoop/blob/e46d5bb9/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/job/impl/TaskAttemptImpl.java -- diff --git
hadoop git commit: YARN-2960. Add documentation for the YARN shared cache.
Repository: hadoop Updated Branches: refs/heads/branch-2 fa6b8feb9 -> d12cea209 YARN-2960. Add documentation for the YARN shared cache. (cherry picked from commit 7e76f85bc68166b01b51fcf6ba4b3fd9281d4a03) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d12cea20 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d12cea20 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d12cea20 Branch: refs/heads/branch-2 Commit: d12cea20980267ce7cd44d7735c727fd8a63c4eb Parents: fa6b8fe Author: Chris TrezzoAuthored: Thu Oct 5 10:38:41 2017 -0700 Committer: Chris Trezzo Committed: Thu Oct 5 11:23:24 2017 -0700 -- hadoop-project/src/site/site.xml| 1 + .../src/site/markdown/SharedCache.md| 168 +++ 2 files changed, 169 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/d12cea20/hadoop-project/src/site/site.xml -- diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index 8f21fdd..6204050 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -138,6 +138,7 @@ + http://git-wip-us.apache.org/repos/asf/hadoop/blob/d12cea20/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md new file mode 100644 index 000..ea50e91 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md @@ -0,0 +1,168 @@ + + +YARN Shared Cache +=== + + + +Overview + + +The YARN Shared Cache provides the facility to upload and manage shared +application resources to HDFS in a safe and scalable manner. YARN applications +can leverage resources uploaded by other applications or previous +runs of the same application without having to reÂupload and localize identical files +multiple times. This will save network resources and reduce YARN application +startup time. + +Current Status and Future Plans + + +Currently the YARN Shared Cache is released and ready to use. The major +components are implemented and have been deployed in a large-scale +production setting. There are still some pieces missing (i.e. strong +authentication). These missing features will be implemented as part of a +follow-up phase 2 effort. Please see +[YARN-7282](https://issues.apache.org/jira/browse/YARN-7282) for more information. + +Architecture + + +The shared cache feature consists of 4 major components: + +1. The shared cache client. +2. The HDFS directory that acts as a cache. +3. The shared cache manager (aka. SCM). +4. The localization service and uploader. + +### The Shared Cache Client + +YARN application developers and users, should interact with the shared cache using +the shared cache client. This client is responsible for interacting with the +shared cache manager, computing the checksum of application resources, and +claiming application resources in the shared cache. Once an application has claimed +a resource, it is free to use that resource for the life-cycle of the application. +Please see the SharedCacheClient.java javadoc for further documentation. + +### The Shared Cache HDFS Directory + +The shared cache HDFS directory stores all of the shared cache resources. It is protected +by HDFS permissions and is globally readable, but writing is restricted to a trusted user. +This HDFS directory is only modified by the shared cache manager and the resource uploader +on the node manager. Resources are spread across a set of subdirectories using the resources's +checksum: +``` +/sharedcache/a/8/9/a896857d078/foo.jar +/sharedcache/5/0/f/50f11b09f87/bar.jar +/sharedcache/a/6/7/a678cb1aa8f/job.jar +``` + +### Shared Cache Manager (SCM) + +The shared cache manager is responsible for serving requests from the client and +managing the contents of the shared cache. It looks after both the meta data as +well as the persisted resources in HDFS. It is made up of two major components, +a back end store and a cleaner service. The SCM runs as a separate daemon +process that can be placed on any node in the cluster. This allows for +administrators to start/stop/upgrade the SCM without affecting other YARN +components (i.e. the resource manager or node managers). + +The back end store is responsible for maintaining and persisting metadata about +the shared cache. This includes the resources in the cache, when a
hadoop git commit: YARN-2960. Add documentation for the YARN shared cache.
Repository: hadoop Updated Branches: refs/heads/branch-3.0 4b9b66f92 -> e5af16cf6 YARN-2960. Add documentation for the YARN shared cache. (cherry picked from commit 7e76f85bc68166b01b51fcf6ba4b3fd9281d4a03) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e5af16cf Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e5af16cf Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e5af16cf Branch: refs/heads/branch-3.0 Commit: e5af16cf6cd54c8358af066d1ec677378bc3029d Parents: 4b9b66f Author: Chris TrezzoAuthored: Thu Oct 5 10:38:41 2017 -0700 Committer: Chris Trezzo Committed: Thu Oct 5 11:15:39 2017 -0700 -- hadoop-project/src/site/site.xml| 1 + .../src/site/markdown/SharedCache.md| 168 +++ 2 files changed, 169 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/e5af16cf/hadoop-project/src/site/site.xml -- diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index 9806f02..0b1f6ab 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -141,6 +141,7 @@ + http://git-wip-us.apache.org/repos/asf/hadoop/blob/e5af16cf/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md new file mode 100644 index 000..ea50e91 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md @@ -0,0 +1,168 @@ + + +YARN Shared Cache +=== + + + +Overview + + +The YARN Shared Cache provides the facility to upload and manage shared +application resources to HDFS in a safe and scalable manner. YARN applications +can leverage resources uploaded by other applications or previous +runs of the same application without having to reÂupload and localize identical files +multiple times. This will save network resources and reduce YARN application +startup time. + +Current Status and Future Plans + + +Currently the YARN Shared Cache is released and ready to use. The major +components are implemented and have been deployed in a large-scale +production setting. There are still some pieces missing (i.e. strong +authentication). These missing features will be implemented as part of a +follow-up phase 2 effort. Please see +[YARN-7282](https://issues.apache.org/jira/browse/YARN-7282) for more information. + +Architecture + + +The shared cache feature consists of 4 major components: + +1. The shared cache client. +2. The HDFS directory that acts as a cache. +3. The shared cache manager (aka. SCM). +4. The localization service and uploader. + +### The Shared Cache Client + +YARN application developers and users, should interact with the shared cache using +the shared cache client. This client is responsible for interacting with the +shared cache manager, computing the checksum of application resources, and +claiming application resources in the shared cache. Once an application has claimed +a resource, it is free to use that resource for the life-cycle of the application. +Please see the SharedCacheClient.java javadoc for further documentation. + +### The Shared Cache HDFS Directory + +The shared cache HDFS directory stores all of the shared cache resources. It is protected +by HDFS permissions and is globally readable, but writing is restricted to a trusted user. +This HDFS directory is only modified by the shared cache manager and the resource uploader +on the node manager. Resources are spread across a set of subdirectories using the resources's +checksum: +``` +/sharedcache/a/8/9/a896857d078/foo.jar +/sharedcache/5/0/f/50f11b09f87/bar.jar +/sharedcache/a/6/7/a678cb1aa8f/job.jar +``` + +### Shared Cache Manager (SCM) + +The shared cache manager is responsible for serving requests from the client and +managing the contents of the shared cache. It looks after both the meta data as +well as the persisted resources in HDFS. It is made up of two major components, +a back end store and a cleaner service. The SCM runs as a separate daemon +process that can be placed on any node in the cluster. This allows for +administrators to start/stop/upgrade the SCM without affecting other YARN +components (i.e. the resource manager or node managers). + +The back end store is responsible for maintaining and persisting metadata about +the shared cache. This includes the resources in the cache,
hadoop git commit: Revert "YARN-2960. Add documentation for the YARN shared cache."
Repository: hadoop Updated Branches: refs/heads/branch-3.0 54a01c28c -> 4b9b66f92 Revert "YARN-2960. Add documentation for the YARN shared cache." This reverts commit 54a01c28cc153872aa7eed68000ab0ddf010054a. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/4b9b66f9 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/4b9b66f9 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/4b9b66f9 Branch: refs/heads/branch-3.0 Commit: 4b9b66f921c671b6426d1bc912cca056cb2532c4 Parents: 54a01c2 Author: Chris TrezzoAuthored: Thu Oct 5 11:11:02 2017 -0700 Committer: Chris Trezzo Committed: Thu Oct 5 11:11:02 2017 -0700 -- hadoop-project/src/site/site.xml| 2 - .../src/site/markdown/SharedCache.md| 168 --- 2 files changed, 170 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/4b9b66f9/hadoop-project/src/site/site.xml -- diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index 44551fe..9806f02 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -141,8 +141,6 @@ - - http://git-wip-us.apache.org/repos/asf/hadoop/blob/4b9b66f9/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md deleted file mode 100644 index ea50e91..000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md +++ /dev/null @@ -1,168 +0,0 @@ - - -YARN Shared Cache -=== - - - -Overview - - -The YARN Shared Cache provides the facility to upload and manage shared -application resources to HDFS in a safe and scalable manner. YARN applications -can leverage resources uploaded by other applications or previous -runs of the same application without having to reÂupload and localize identical files -multiple times. This will save network resources and reduce YARN application -startup time. - -Current Status and Future Plans - - -Currently the YARN Shared Cache is released and ready to use. The major -components are implemented and have been deployed in a large-scale -production setting. There are still some pieces missing (i.e. strong -authentication). These missing features will be implemented as part of a -follow-up phase 2 effort. Please see -[YARN-7282](https://issues.apache.org/jira/browse/YARN-7282) for more information. - -Architecture - - -The shared cache feature consists of 4 major components: - -1. The shared cache client. -2. The HDFS directory that acts as a cache. -3. The shared cache manager (aka. SCM). -4. The localization service and uploader. - -### The Shared Cache Client - -YARN application developers and users, should interact with the shared cache using -the shared cache client. This client is responsible for interacting with the -shared cache manager, computing the checksum of application resources, and -claiming application resources in the shared cache. Once an application has claimed -a resource, it is free to use that resource for the life-cycle of the application. -Please see the SharedCacheClient.java javadoc for further documentation. - -### The Shared Cache HDFS Directory - -The shared cache HDFS directory stores all of the shared cache resources. It is protected -by HDFS permissions and is globally readable, but writing is restricted to a trusted user. -This HDFS directory is only modified by the shared cache manager and the resource uploader -on the node manager. Resources are spread across a set of subdirectories using the resources's -checksum: -``` -/sharedcache/a/8/9/a896857d078/foo.jar -/sharedcache/5/0/f/50f11b09f87/bar.jar -/sharedcache/a/6/7/a678cb1aa8f/job.jar -``` - -### Shared Cache Manager (SCM) - -The shared cache manager is responsible for serving requests from the client and -managing the contents of the shared cache. It looks after both the meta data as -well as the persisted resources in HDFS. It is made up of two major components, -a back end store and a cleaner service. The SCM runs as a separate daemon -process that can be placed on any node in the cluster. This allows for -administrators to start/stop/upgrade the SCM without affecting other YARN -components (i.e. the resource manager or node managers). - -The back end store is responsible for maintaining and persisting metadata about -the shared cache. This includes the resources in
hadoop git commit: YARN-2960. Add documentation for the YARN shared cache.
Repository: hadoop Updated Branches: refs/heads/branch-3.0 b61d12e30 -> 54a01c28c YARN-2960. Add documentation for the YARN shared cache. (cherry picked from commit 7e76f85bc68166b01b51fcf6ba4b3fd9281d4a03) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/54a01c28 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/54a01c28 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/54a01c28 Branch: refs/heads/branch-3.0 Commit: 54a01c28cc153872aa7eed68000ab0ddf010054a Parents: b61d12e Author: Chris TrezzoAuthored: Thu Oct 5 10:38:41 2017 -0700 Committer: Chris Trezzo Committed: Thu Oct 5 10:54:44 2017 -0700 -- hadoop-project/src/site/site.xml| 2 + .../src/site/markdown/SharedCache.md| 168 +++ 2 files changed, 170 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/54a01c28/hadoop-project/src/site/site.xml -- diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index 9806f02..44551fe 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -141,6 +141,8 @@ + + http://git-wip-us.apache.org/repos/asf/hadoop/blob/54a01c28/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md new file mode 100644 index 000..ea50e91 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md @@ -0,0 +1,168 @@ + + +YARN Shared Cache +=== + + + +Overview + + +The YARN Shared Cache provides the facility to upload and manage shared +application resources to HDFS in a safe and scalable manner. YARN applications +can leverage resources uploaded by other applications or previous +runs of the same application without having to reÂupload and localize identical files +multiple times. This will save network resources and reduce YARN application +startup time. + +Current Status and Future Plans + + +Currently the YARN Shared Cache is released and ready to use. The major +components are implemented and have been deployed in a large-scale +production setting. There are still some pieces missing (i.e. strong +authentication). These missing features will be implemented as part of a +follow-up phase 2 effort. Please see +[YARN-7282](https://issues.apache.org/jira/browse/YARN-7282) for more information. + +Architecture + + +The shared cache feature consists of 4 major components: + +1. The shared cache client. +2. The HDFS directory that acts as a cache. +3. The shared cache manager (aka. SCM). +4. The localization service and uploader. + +### The Shared Cache Client + +YARN application developers and users, should interact with the shared cache using +the shared cache client. This client is responsible for interacting with the +shared cache manager, computing the checksum of application resources, and +claiming application resources in the shared cache. Once an application has claimed +a resource, it is free to use that resource for the life-cycle of the application. +Please see the SharedCacheClient.java javadoc for further documentation. + +### The Shared Cache HDFS Directory + +The shared cache HDFS directory stores all of the shared cache resources. It is protected +by HDFS permissions and is globally readable, but writing is restricted to a trusted user. +This HDFS directory is only modified by the shared cache manager and the resource uploader +on the node manager. Resources are spread across a set of subdirectories using the resources's +checksum: +``` +/sharedcache/a/8/9/a896857d078/foo.jar +/sharedcache/5/0/f/50f11b09f87/bar.jar +/sharedcache/a/6/7/a678cb1aa8f/job.jar +``` + +### Shared Cache Manager (SCM) + +The shared cache manager is responsible for serving requests from the client and +managing the contents of the shared cache. It looks after both the meta data as +well as the persisted resources in HDFS. It is made up of two major components, +a back end store and a cleaner service. The SCM runs as a separate daemon +process that can be placed on any node in the cluster. This allows for +administrators to start/stop/upgrade the SCM without affecting other YARN +components (i.e. the resource manager or node managers). + +The back end store is responsible for maintaining and persisting metadata about +the shared cache. This includes the resources in the
hadoop git commit: YARN-2960. Add documentation for the YARN shared cache.
Repository: hadoop Updated Branches: refs/heads/trunk b733348dd -> 7e76f85bc YARN-2960. Add documentation for the YARN shared cache. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/7e76f85b Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/7e76f85b Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/7e76f85b Branch: refs/heads/trunk Commit: 7e76f85bc68166b01b51fcf6ba4b3fd9281d4a03 Parents: b733348 Author: Chris TrezzoAuthored: Thu Oct 5 10:38:41 2017 -0700 Committer: Chris Trezzo Committed: Thu Oct 5 10:38:41 2017 -0700 -- hadoop-project/src/site/site.xml| 1 + .../src/site/markdown/SharedCache.md| 168 +++ 2 files changed, 169 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/7e76f85b/hadoop-project/src/site/site.xml -- diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index a88f0e3..44551fe 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -142,6 +142,7 @@ + http://git-wip-us.apache.org/repos/asf/hadoop/blob/7e76f85b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md new file mode 100644 index 000..ea50e91 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/SharedCache.md @@ -0,0 +1,168 @@ + + +YARN Shared Cache +=== + + + +Overview + + +The YARN Shared Cache provides the facility to upload and manage shared +application resources to HDFS in a safe and scalable manner. YARN applications +can leverage resources uploaded by other applications or previous +runs of the same application without having to reÂupload and localize identical files +multiple times. This will save network resources and reduce YARN application +startup time. + +Current Status and Future Plans + + +Currently the YARN Shared Cache is released and ready to use. The major +components are implemented and have been deployed in a large-scale +production setting. There are still some pieces missing (i.e. strong +authentication). These missing features will be implemented as part of a +follow-up phase 2 effort. Please see +[YARN-7282](https://issues.apache.org/jira/browse/YARN-7282) for more information. + +Architecture + + +The shared cache feature consists of 4 major components: + +1. The shared cache client. +2. The HDFS directory that acts as a cache. +3. The shared cache manager (aka. SCM). +4. The localization service and uploader. + +### The Shared Cache Client + +YARN application developers and users, should interact with the shared cache using +the shared cache client. This client is responsible for interacting with the +shared cache manager, computing the checksum of application resources, and +claiming application resources in the shared cache. Once an application has claimed +a resource, it is free to use that resource for the life-cycle of the application. +Please see the SharedCacheClient.java javadoc for further documentation. + +### The Shared Cache HDFS Directory + +The shared cache HDFS directory stores all of the shared cache resources. It is protected +by HDFS permissions and is globally readable, but writing is restricted to a trusted user. +This HDFS directory is only modified by the shared cache manager and the resource uploader +on the node manager. Resources are spread across a set of subdirectories using the resources's +checksum: +``` +/sharedcache/a/8/9/a896857d078/foo.jar +/sharedcache/5/0/f/50f11b09f87/bar.jar +/sharedcache/a/6/7/a678cb1aa8f/job.jar +``` + +### Shared Cache Manager (SCM) + +The shared cache manager is responsible for serving requests from the client and +managing the contents of the shared cache. It looks after both the meta data as +well as the persisted resources in HDFS. It is made up of two major components, +a back end store and a cleaner service. The SCM runs as a separate daemon +process that can be placed on any node in the cluster. This allows for +administrators to start/stop/upgrade the SCM without affecting other YARN +components (i.e. the resource manager or node managers). + +The back end store is responsible for maintaining and persisting metadata about +the shared cache. This includes the resources in the cache, when a resource was +last used and a list of applications that are currently using
hadoop git commit: YARN-7250. Update Shared cache client api to use URLs.
Repository: hadoop Updated Branches: refs/heads/branch-2 11ac10fe1 -> cba1891b6 YARN-7250. Update Shared cache client api to use URLs. (cherry picked from commit c114da5e64d14b1d9e614081c4171ea0391cb1aa) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/cba1891b Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/cba1891b Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/cba1891b Branch: refs/heads/branch-2 Commit: cba1891b61e8c049d79b110da9dd4832467a9f08 Parents: 11ac10f Author: Chris TrezzoAuthored: Thu Sep 28 15:28:06 2017 -0700 Committer: Chris Trezzo Committed: Thu Sep 28 15:32:18 2017 -0700 -- .../yarn/client/api/SharedCacheClient.java | 22 .../client/api/impl/SharedCacheClientImpl.java | 36 +--- .../api/impl/TestSharedCacheClientImpl.java | 31 + 3 files changed, 23 insertions(+), 66 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/cba1891b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java index 60c1bd98..a9c1a07 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java @@ -27,6 +27,7 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.fs.Path; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.client.api.impl.SharedCacheClientImpl; import org.apache.hadoop.yarn.exceptions.YarnException; @@ -58,34 +59,25 @@ public abstract class SharedCacheClient extends AbstractService { * * * The SharedCacheManager responds with whether or not the - * resource exists in the cache. If the resource exists, a Path - * to the resource in the shared cache is returned. If the resource does not + * resource exists in the cache. If the resource exists, a URL to + * the resource in the shared cache is returned. If the resource does not * exist, null is returned instead. * * * - * Once a path has been returned for a resource, that path is safe to use for + * Once a URL has been returned for a resource, that URL is safe to use for * the lifetime of the application that corresponds to the provided * ApplicationId. * * - * - * Additionally, a name for the resource should be specified. A fragment will - * be added to the path with the desired name if the desired name is different - * than the name of the provided path from the shared cache. This ensures that - * if the returned path is used to create a LocalResource, then the symlink - * created during YARN localization will match the name specified. - * - * * @param applicationId ApplicationId of the application using the resource * @param resourceKey the key (i.e. checksum) that identifies the resource - * @param resourceName the desired name of the resource - * @return Path to the resource, or null if it does not exist + * @return URL to the resource, or null if it does not exist */ @Public @Unstable - public abstract Path use(ApplicationId applicationId, String resourceKey, - String resourceName) throws YarnException; + public abstract URL use(ApplicationId applicationId, String resourceKey) + throws YarnException; /** * http://git-wip-us.apache.org/repos/asf/hadoop/blob/cba1891b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/SharedCacheClientImpl.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/SharedCacheClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/SharedCacheClientImpl.java index b910c28..3191d36 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/SharedCacheClientImpl.java +++
hadoop git commit: YARN-7250. Update Shared cache client api to use URLs.
Repository: hadoop Updated Branches: refs/heads/branch-3.0 a6630f703 -> 3d2352211 YARN-7250. Update Shared cache client api to use URLs. (cherry picked from commit c114da5e64d14b1d9e614081c4171ea0391cb1aa) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/3d235221 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/3d235221 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/3d235221 Branch: refs/heads/branch-3.0 Commit: 3d235221106bd1ff2d99f660e89c6f3306f578ad Parents: a6630f7 Author: Chris TrezzoAuthored: Thu Sep 28 15:28:06 2017 -0700 Committer: Chris Trezzo Committed: Thu Sep 28 15:31:23 2017 -0700 -- .../yarn/client/api/SharedCacheClient.java | 22 .../client/api/impl/SharedCacheClientImpl.java | 36 +--- .../api/impl/TestSharedCacheClientImpl.java | 31 + 3 files changed, 23 insertions(+), 66 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d235221/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java index 60c1bd98..a9c1a07 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java @@ -27,6 +27,7 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.fs.Path; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.client.api.impl.SharedCacheClientImpl; import org.apache.hadoop.yarn.exceptions.YarnException; @@ -58,34 +59,25 @@ public abstract class SharedCacheClient extends AbstractService { * * * The SharedCacheManager responds with whether or not the - * resource exists in the cache. If the resource exists, a Path - * to the resource in the shared cache is returned. If the resource does not + * resource exists in the cache. If the resource exists, a URL to + * the resource in the shared cache is returned. If the resource does not * exist, null is returned instead. * * * - * Once a path has been returned for a resource, that path is safe to use for + * Once a URL has been returned for a resource, that URL is safe to use for * the lifetime of the application that corresponds to the provided * ApplicationId. * * - * - * Additionally, a name for the resource should be specified. A fragment will - * be added to the path with the desired name if the desired name is different - * than the name of the provided path from the shared cache. This ensures that - * if the returned path is used to create a LocalResource, then the symlink - * created during YARN localization will match the name specified. - * - * * @param applicationId ApplicationId of the application using the resource * @param resourceKey the key (i.e. checksum) that identifies the resource - * @param resourceName the desired name of the resource - * @return Path to the resource, or null if it does not exist + * @return URL to the resource, or null if it does not exist */ @Public @Unstable - public abstract Path use(ApplicationId applicationId, String resourceKey, - String resourceName) throws YarnException; + public abstract URL use(ApplicationId applicationId, String resourceKey) + throws YarnException; /** * http://git-wip-us.apache.org/repos/asf/hadoop/blob/3d235221/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/SharedCacheClientImpl.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/SharedCacheClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/SharedCacheClientImpl.java index b910c28..3191d36 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/SharedCacheClientImpl.java +++
hadoop git commit: YARN-7250. Update Shared cache client api to use URLs.
Repository: hadoop Updated Branches: refs/heads/trunk 6f789fe05 -> c114da5e6 YARN-7250. Update Shared cache client api to use URLs. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c114da5e Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c114da5e Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c114da5e Branch: refs/heads/trunk Commit: c114da5e64d14b1d9e614081c4171ea0391cb1aa Parents: 6f789fe Author: Chris TrezzoAuthored: Thu Sep 28 15:28:06 2017 -0700 Committer: Chris Trezzo Committed: Thu Sep 28 15:28:06 2017 -0700 -- .../yarn/client/api/SharedCacheClient.java | 22 .../client/api/impl/SharedCacheClientImpl.java | 36 +--- .../api/impl/TestSharedCacheClientImpl.java | 31 + 3 files changed, 23 insertions(+), 66 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/c114da5e/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java index 60c1bd98..a9c1a07 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/SharedCacheClient.java @@ -27,6 +27,7 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.fs.Path; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.URL; import org.apache.hadoop.yarn.client.api.impl.SharedCacheClientImpl; import org.apache.hadoop.yarn.exceptions.YarnException; @@ -58,34 +59,25 @@ public abstract class SharedCacheClient extends AbstractService { * * * The SharedCacheManager responds with whether or not the - * resource exists in the cache. If the resource exists, a Path - * to the resource in the shared cache is returned. If the resource does not + * resource exists in the cache. If the resource exists, a URL to + * the resource in the shared cache is returned. If the resource does not * exist, null is returned instead. * * * - * Once a path has been returned for a resource, that path is safe to use for + * Once a URL has been returned for a resource, that URL is safe to use for * the lifetime of the application that corresponds to the provided * ApplicationId. * * - * - * Additionally, a name for the resource should be specified. A fragment will - * be added to the path with the desired name if the desired name is different - * than the name of the provided path from the shared cache. This ensures that - * if the returned path is used to create a LocalResource, then the symlink - * created during YARN localization will match the name specified. - * - * * @param applicationId ApplicationId of the application using the resource * @param resourceKey the key (i.e. checksum) that identifies the resource - * @param resourceName the desired name of the resource - * @return Path to the resource, or null if it does not exist + * @return URL to the resource, or null if it does not exist */ @Public @Unstable - public abstract Path use(ApplicationId applicationId, String resourceKey, - String resourceName) throws YarnException; + public abstract URL use(ApplicationId applicationId, String resourceKey) + throws YarnException; /** * http://git-wip-us.apache.org/repos/asf/hadoop/blob/c114da5e/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/SharedCacheClientImpl.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/SharedCacheClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/SharedCacheClientImpl.java index b910c28..3191d36 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/SharedCacheClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/SharedCacheClientImpl.java @@ -21,8 +21,6 @@ package
hadoop git commit: YARN-7253. Shared Cache Manager daemon command listed as admin subcmd in yarn script.
Repository: hadoop Updated Branches: refs/heads/branch-3.0 e5629ee71 -> ad7ed9d04 YARN-7253. Shared Cache Manager daemon command listed as admin subcmd in yarn script. (cherry picked from commit c87db8d154ab2501e786b4f1669b205759ece5c3) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/ad7ed9d0 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/ad7ed9d0 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/ad7ed9d0 Branch: refs/heads/branch-3.0 Commit: ad7ed9d0446e8601eca931b64ae01cbfd85d4170 Parents: e5629ee Author: Chris TrezzoAuthored: Wed Sep 27 11:32:09 2017 -0700 Committer: Chris Trezzo Committed: Wed Sep 27 11:36:42 2017 -0700 -- hadoop-yarn-project/hadoop-yarn/bin/yarn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/ad7ed9d0/hadoop-yarn-project/hadoop-yarn/bin/yarn -- diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn b/hadoop-yarn-project/hadoop-yarn/bin/yarn index dcde0dc..52138c5 100755 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn @@ -48,7 +48,7 @@ function hadoop_usage hadoop_add_subcommand "rmadmin" admin "admin tools" hadoop_add_subcommand "router" daemon "run the Router daemon" hadoop_add_subcommand "scmadmin" admin "SharedCacheManager admin tools" - hadoop_add_subcommand "sharedcachemanager" admin "run the SharedCacheManager daemon" + hadoop_add_subcommand "sharedcachemanager" daemon "run the SharedCacheManager daemon" hadoop_add_subcommand "timelinereader" client "run the timeline reader server" hadoop_add_subcommand "timelineserver" daemon "run the timeline server" hadoop_add_subcommand "top" client "view cluster information" - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
hadoop git commit: YARN-7253. Shared Cache Manager daemon command listed as admin subcmd in yarn script.
Repository: hadoop Updated Branches: refs/heads/trunk 8facf1f97 -> c87db8d15 YARN-7253. Shared Cache Manager daemon command listed as admin subcmd in yarn script. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c87db8d1 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c87db8d1 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c87db8d1 Branch: refs/heads/trunk Commit: c87db8d154ab2501e786b4f1669b205759ece5c3 Parents: 8facf1f Author: Chris TrezzoAuthored: Wed Sep 27 11:32:09 2017 -0700 Committer: Chris Trezzo Committed: Wed Sep 27 11:32:09 2017 -0700 -- hadoop-yarn-project/hadoop-yarn/bin/yarn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/c87db8d1/hadoop-yarn-project/hadoop-yarn/bin/yarn -- diff --git a/hadoop-yarn-project/hadoop-yarn/bin/yarn b/hadoop-yarn-project/hadoop-yarn/bin/yarn index dcde0dc..52138c5 100755 --- a/hadoop-yarn-project/hadoop-yarn/bin/yarn +++ b/hadoop-yarn-project/hadoop-yarn/bin/yarn @@ -48,7 +48,7 @@ function hadoop_usage hadoop_add_subcommand "rmadmin" admin "admin tools" hadoop_add_subcommand "router" daemon "run the Router daemon" hadoop_add_subcommand "scmadmin" admin "SharedCacheManager admin tools" - hadoop_add_subcommand "sharedcachemanager" admin "run the SharedCacheManager daemon" + hadoop_add_subcommand "sharedcachemanager" daemon "run the SharedCacheManager daemon" hadoop_add_subcommand "timelinereader" client "run the timeline reader server" hadoop_add_subcommand "timelineserver" daemon "run the timeline server" hadoop_add_subcommand "top" client "view cluster information" - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
svn commit: r1792949 - in /hadoop/common/site/main: author/src/documentation/content/xdocs/ publish/
Author: ctrezzo Date: Thu Apr 27 19:42:35 2017 New Revision: 1792949 URL: http://svn.apache.org/viewvc?rev=1792949=rev Log: Add ctrezzo to Hadoop committers list and publish site. Modified: hadoop/common/site/main/author/src/documentation/content/xdocs/who.xml hadoop/common/site/main/publish/bylaws.pdf hadoop/common/site/main/publish/committer_criteria.pdf hadoop/common/site/main/publish/index.pdf hadoop/common/site/main/publish/issue_tracking.pdf hadoop/common/site/main/publish/linkmap.pdf hadoop/common/site/main/publish/mailing_lists.pdf hadoop/common/site/main/publish/privacy_policy.pdf hadoop/common/site/main/publish/releases.pdf hadoop/common/site/main/publish/version_control.pdf hadoop/common/site/main/publish/versioning.pdf hadoop/common/site/main/publish/who.html hadoop/common/site/main/publish/who.pdf Modified: hadoop/common/site/main/author/src/documentation/content/xdocs/who.xml URL: http://svn.apache.org/viewvc/hadoop/common/site/main/author/src/documentation/content/xdocs/who.xml?rev=1792949=1792948=1792949=diff == --- hadoop/common/site/main/author/src/documentation/content/xdocs/who.xml (original) +++ hadoop/common/site/main/author/src/documentation/content/xdocs/who.xml Thu Apr 27 19:42:35 2017 @@ -812,6 +812,14 @@ + ctrezzo + Chris Trezzo + Twitter + + -8 + + + curino Carlo Curino Microsoft Modified: hadoop/common/site/main/publish/bylaws.pdf URL: http://svn.apache.org/viewvc/hadoop/common/site/main/publish/bylaws.pdf?rev=1792949=1792948=1792949=diff == Binary files - no diff available. Modified: hadoop/common/site/main/publish/committer_criteria.pdf URL: http://svn.apache.org/viewvc/hadoop/common/site/main/publish/committer_criteria.pdf?rev=1792949=1792948=1792949=diff == Binary files - no diff available. Modified: hadoop/common/site/main/publish/index.pdf URL: http://svn.apache.org/viewvc/hadoop/common/site/main/publish/index.pdf?rev=1792949=1792948=1792949=diff == Binary files - no diff available. Modified: hadoop/common/site/main/publish/issue_tracking.pdf URL: http://svn.apache.org/viewvc/hadoop/common/site/main/publish/issue_tracking.pdf?rev=1792949=1792948=1792949=diff == Binary files - no diff available. Modified: hadoop/common/site/main/publish/linkmap.pdf URL: http://svn.apache.org/viewvc/hadoop/common/site/main/publish/linkmap.pdf?rev=1792949=1792948=1792949=diff == Binary files - no diff available. Modified: hadoop/common/site/main/publish/mailing_lists.pdf URL: http://svn.apache.org/viewvc/hadoop/common/site/main/publish/mailing_lists.pdf?rev=1792949=1792948=1792949=diff == Binary files - no diff available. Modified: hadoop/common/site/main/publish/privacy_policy.pdf URL: http://svn.apache.org/viewvc/hadoop/common/site/main/publish/privacy_policy.pdf?rev=1792949=1792948=1792949=diff == Binary files - no diff available. Modified: hadoop/common/site/main/publish/releases.pdf URL: http://svn.apache.org/viewvc/hadoop/common/site/main/publish/releases.pdf?rev=1792949=1792948=1792949=diff == Binary files - no diff available. Modified: hadoop/common/site/main/publish/version_control.pdf URL: http://svn.apache.org/viewvc/hadoop/common/site/main/publish/version_control.pdf?rev=1792949=1792948=1792949=diff == Binary files - no diff available. Modified: hadoop/common/site/main/publish/versioning.pdf URL: http://svn.apache.org/viewvc/hadoop/common/site/main/publish/versioning.pdf?rev=1792949=1792948=1792949=diff == Binary files - no diff available. Modified: hadoop/common/site/main/publish/who.html URL: http://svn.apache.org/viewvc/hadoop/common/site/main/publish/who.html?rev=1792949=1792948=1792949=diff == --- hadoop/common/site/main/publish/who.html (original) +++ hadoop/common/site/main/publish/who.html Thu Apr 27 19:42:35 2017 @@ -1359,6 +1359,17 @@ document.write("Last Published: " + docu +ctrezzo + Chris Trezzo + Twitter + +