hadoop git commit: HDFS-4210. Throw helpful exception when DNS entry for JournalNode cannot be resolved. Contributed by Charles Lamb and John Zhuge.
Repository: hadoop Updated Branches: refs/heads/branch-2.7 829959a8f -> 1f2ab8b74 HDFS-4210. Throw helpful exception when DNS entry for JournalNode cannot be resolved. Contributed by Charles Lamb and John Zhuge. (cherry picked from commit a291306510b76d1d3382c31bea7eeb54c89c4fb4) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/1f2ab8b7 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/1f2ab8b7 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/1f2ab8b7 Branch: refs/heads/branch-2.7 Commit: 1f2ab8b742bf1737881530c895fbba48462a9741 Parents: 829959a Author: Xiao ChenAuthored: Mon Aug 29 17:41:01 2016 -0700 Committer: Konstantin V Shvachko Committed: Wed Feb 21 16:01:42 2018 -0800 -- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../qjournal/client/QuorumJournalManager.java| 9 +++-- .../hdfs/qjournal/client/TestQJMWithFaults.java | 19 ++- 3 files changed, 28 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/1f2ab8b7/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index c93f24c..69c02b2f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -39,6 +39,9 @@ Release 2.7.6 - UNRELEASED HDFS-13112. Token expiration edits may cause log corruption or deadlock. (daryn via kihwal) +HDFS-4210. Throw helpful exception when DNS entry for JournalNode cannot be +resolved. (Charles Lamb and John Zhuge) + Release 2.7.5 - 2017-12-14 INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/1f2ab8b7/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java index 1b84964..4123204 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/client/QuorumJournalManager.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.net.URI; import java.net.URL; +import java.net.UnknownHostException; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -387,8 +388,12 @@ public class QuorumJournalManager implements JournalManager { List addrs = Lists.newArrayList(); for (String addr : parts) { - addrs.add(NetUtils.createSocketAddr( - addr, DFSConfigKeys.DFS_JOURNALNODE_RPC_PORT_DEFAULT)); + InetSocketAddress isa = NetUtils.createSocketAddr( + addr, DFSConfigKeys.DFS_JOURNALNODE_RPC_PORT_DEFAULT); + if (isa.isUnresolved()) { +throw new UnknownHostException(addr); + } + addrs.add(isa); } return addrs; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/1f2ab8b7/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java index 2e38d5f..3a441b5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/qjournal/client/TestQJMWithFaults.java @@ -27,7 +27,9 @@ import java.io.Closeable; import java.io.IOException; import java.lang.reflect.InvocationTargetException; import java.net.InetSocketAddress; +import java.net.URI; import java.net.URISyntaxException; +import java.net.UnknownHostException; import java.util.List; import java.util.Map; import java.util.Random; @@ -53,7 +55,9 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ipc.ProtobufRpcEngine; import org.apache.hadoop.test.GenericTestUtils; import org.apache.log4j.Level; +import org.junit.Rule; import org.junit.Test; +import org.junit.rules.ExpectedException; import org.mockito.Mockito; import org.mockito.invocation.InvocationOnMock; import org.mockito.stubbing.Answer; @@ -125,7 +129,10 @@ public class
hadoop git commit: YARN-7916. Remove call to docker logs on failure in container-executor. Contributed by Shane Kumpf
Repository: hadoop Updated Branches: refs/heads/branch-3.1 958b44504 -> bfbd43f2f YARN-7916. Remove call to docker logs on failure in container-executor. Contributed by Shane Kumpf (cherry picked from commit 3132709b46a35f70cf5278f3ace677e6e18a1d03) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/bfbd43f2 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/bfbd43f2 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/bfbd43f2 Branch: refs/heads/branch-3.1 Commit: bfbd43f2f7cf45e15f2769182fb758f5ad343b68 Parents: 958b445 Author: Jason LoweAuthored: Wed Feb 21 16:54:02 2018 -0600 Committer: Jason Lowe Committed: Wed Feb 21 16:55:06 2018 -0600 -- .../impl/container-executor.c | 35 1 file changed, 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/bfbd43f2/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c index 035c694..751949e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c @@ -1435,20 +1435,16 @@ int launch_docker_container_as_user(const char * user, const char *app_id, char *exit_code_file = NULL; char *docker_command_with_binary = NULL; char *docker_wait_command = NULL; - char *docker_logs_command = NULL; char *docker_inspect_command = NULL; char *docker_rm_command = NULL; char *docker_inspect_exitcode_command = NULL; int container_file_source =-1; int cred_file_source = -1; - int BUFFER_SIZE = 4096; - char buffer[BUFFER_SIZE]; size_t command_size = MIN(sysconf(_SC_ARG_MAX), 128*1024); docker_command_with_binary = (char *) alloc_and_clear_memory(command_size, sizeof(char)); docker_wait_command = (char *) alloc_and_clear_memory(command_size, sizeof(char)); - docker_logs_command = (char *) alloc_and_clear_memory(command_size, sizeof(char)); docker_inspect_command = (char *) alloc_and_clear_memory(command_size, sizeof(char)); docker_rm_command = (char *) alloc_and_clear_memory(command_size, sizeof(char)); docker_inspect_exitcode_command = (char *) alloc_and_clear_memory(command_size, sizeof(char)); @@ -1600,36 +1596,6 @@ int launch_docker_container_as_user(const char * user, const char *app_id, goto cleanup; } fprintf(LOGFILE, "Exit code from docker inspect: %d\n", exit_code); - if(exit_code != 0) { -fprintf(ERRORFILE, "Docker container exit code was not zero: %d\n", -exit_code); -snprintf(docker_logs_command, command_size, "%s logs --tail=250 %s", - docker_binary, container_id); -FILE* logs = popen(docker_logs_command, "r"); -if(logs != NULL) { - clearerr(logs); - res = fread(buffer, BUFFER_SIZE, 1, logs); - if(res < 1) { -fprintf(ERRORFILE, "%s %d %d\n", - "Unable to read from docker logs(ferror, feof):", ferror(logs), feof(logs)); -fflush(ERRORFILE); - } - else { -fprintf(ERRORFILE, "%s\n", buffer); -fflush(ERRORFILE); - } -} -else { - fprintf(ERRORFILE, "%s\n", "Failed to get output of docker logs"); - fprintf(ERRORFILE, "Command was '%s'\n", docker_logs_command); - fprintf(ERRORFILE, "%s\n", strerror(errno)); - fflush(ERRORFILE); -} -if(pclose(logs) != 0) { - fprintf(ERRORFILE, "%s\n", "Failed to fetch docker logs"); - fflush(ERRORFILE); -} - } cleanup: @@ -1662,7 +1628,6 @@ cleanup: free(cred_file_dest); free(docker_command_with_binary); free(docker_wait_command); - free(docker_logs_command); free(docker_inspect_command); free(docker_rm_command); return exit_code; - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
hadoop git commit: YARN-7916. Remove call to docker logs on failure in container-executor. Contributed by Shane Kumpf
Repository: hadoop Updated Branches: refs/heads/trunk 2bc3351ea -> 3132709b4 YARN-7916. Remove call to docker logs on failure in container-executor. Contributed by Shane Kumpf Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/3132709b Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/3132709b Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/3132709b Branch: refs/heads/trunk Commit: 3132709b46a35f70cf5278f3ace677e6e18a1d03 Parents: 2bc3351 Author: Jason LoweAuthored: Wed Feb 21 16:54:02 2018 -0600 Committer: Jason Lowe Committed: Wed Feb 21 16:54:02 2018 -0600 -- .../impl/container-executor.c | 35 1 file changed, 35 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/3132709b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c index 035c694..751949e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/native/container-executor/impl/container-executor.c @@ -1435,20 +1435,16 @@ int launch_docker_container_as_user(const char * user, const char *app_id, char *exit_code_file = NULL; char *docker_command_with_binary = NULL; char *docker_wait_command = NULL; - char *docker_logs_command = NULL; char *docker_inspect_command = NULL; char *docker_rm_command = NULL; char *docker_inspect_exitcode_command = NULL; int container_file_source =-1; int cred_file_source = -1; - int BUFFER_SIZE = 4096; - char buffer[BUFFER_SIZE]; size_t command_size = MIN(sysconf(_SC_ARG_MAX), 128*1024); docker_command_with_binary = (char *) alloc_and_clear_memory(command_size, sizeof(char)); docker_wait_command = (char *) alloc_and_clear_memory(command_size, sizeof(char)); - docker_logs_command = (char *) alloc_and_clear_memory(command_size, sizeof(char)); docker_inspect_command = (char *) alloc_and_clear_memory(command_size, sizeof(char)); docker_rm_command = (char *) alloc_and_clear_memory(command_size, sizeof(char)); docker_inspect_exitcode_command = (char *) alloc_and_clear_memory(command_size, sizeof(char)); @@ -1600,36 +1596,6 @@ int launch_docker_container_as_user(const char * user, const char *app_id, goto cleanup; } fprintf(LOGFILE, "Exit code from docker inspect: %d\n", exit_code); - if(exit_code != 0) { -fprintf(ERRORFILE, "Docker container exit code was not zero: %d\n", -exit_code); -snprintf(docker_logs_command, command_size, "%s logs --tail=250 %s", - docker_binary, container_id); -FILE* logs = popen(docker_logs_command, "r"); -if(logs != NULL) { - clearerr(logs); - res = fread(buffer, BUFFER_SIZE, 1, logs); - if(res < 1) { -fprintf(ERRORFILE, "%s %d %d\n", - "Unable to read from docker logs(ferror, feof):", ferror(logs), feof(logs)); -fflush(ERRORFILE); - } - else { -fprintf(ERRORFILE, "%s\n", buffer); -fflush(ERRORFILE); - } -} -else { - fprintf(ERRORFILE, "%s\n", "Failed to get output of docker logs"); - fprintf(ERRORFILE, "Command was '%s'\n", docker_logs_command); - fprintf(ERRORFILE, "%s\n", strerror(errno)); - fflush(ERRORFILE); -} -if(pclose(logs) != 0) { - fprintf(ERRORFILE, "%s\n", "Failed to fetch docker logs"); - fflush(ERRORFILE); -} - } cleanup: @@ -1662,7 +1628,6 @@ cleanup: free(cred_file_dest); free(docker_command_with_binary); free(docker_wait_command); - free(docker_logs_command); free(docker_inspect_command); free(docker_rm_command); return exit_code; - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
hadoop git commit: HADOOP-6852. apparent bug in concatenated-bzip2 support (decoding). Contributed by Zsolt Venczel.
Repository: hadoop Updated Branches: refs/heads/trunk 92cbbfe79 -> 2bc3351ea HADOOP-6852. apparent bug in concatenated-bzip2 support (decoding). Contributed by Zsolt Venczel. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/2bc3351e Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/2bc3351e Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/2bc3351e Branch: refs/heads/trunk Commit: 2bc3351eaf240ea685bcf5042d79f1554bf89e00 Parents: 92cbbfe Author: Sean MackroryAuthored: Wed Feb 21 12:53:18 2018 -0700 Committer: Sean Mackrory Committed: Wed Feb 21 12:57:14 2018 -0700 -- .../hadoop-client-minicluster/pom.xml | 1 + .../apache/hadoop/io/compress/BZip2Codec.java | 3 +- .../mapred/TestConcatenatedCompressedInput.java | 84 +-- .../src/test/resources/testdata/concat.bz2 | Bin 0 -> 208 bytes .../src/test/resources/testdata/concat.gz | Bin 0 -> 148 bytes .../testdata/testCompressThenConcat.txt.bz2 | Bin 0 -> 3056 bytes .../testdata/testCompressThenConcat.txt.gz | Bin 0 -> 3413 bytes .../testdata/testConcatThenCompress.txt.bz2 | Bin 0 -> 2567 bytes .../testdata/testConcatThenCompress.txt.gz | Bin 0 -> 2734 bytes 9 files changed, 42 insertions(+), 46 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/2bc3351e/hadoop-client-modules/hadoop-client-minicluster/pom.xml -- diff --git a/hadoop-client-modules/hadoop-client-minicluster/pom.xml b/hadoop-client-modules/hadoop-client-minicluster/pom.xml index 905d53a..a443648 100644 --- a/hadoop-client-modules/hadoop-client-minicluster/pom.xml +++ b/hadoop-client-modules/hadoop-client-minicluster/pom.xml @@ -615,6 +615,7 @@ testjar/* testshell/* +testdata/* http://git-wip-us.apache.org/repos/asf/hadoop/blob/2bc3351e/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java -- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java index 3c78cfc..99590ed 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/compress/BZip2Codec.java @@ -180,7 +180,8 @@ public class BZip2Codec implements Configurable, SplittableCompressionCodec { new DecompressorStream(in, decompressor, conf.getInt(IO_FILE_BUFFER_SIZE_KEY, IO_FILE_BUFFER_SIZE_DEFAULT)) : - new BZip2CompressionInputStream(in); + new BZip2CompressionInputStream( + in, 0L, Long.MAX_VALUE, READ_MODE.BYBLOCK); } /** http://git-wip-us.apache.org/repos/asf/hadoop/blob/2bc3351e/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java -- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java index 977d083..af6b952 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestConcatenatedCompressedInput.java @@ -18,18 +18,6 @@ package org.apache.hadoop.mapred; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - -import java.io.ByteArrayInputStream; -import java.io.FileInputStream; -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.List; -import java.util.zip.Inflater; - import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; @@ -42,16 +30,26 @@ import org.apache.hadoop.io.compress.zlib.ZlibFactory; import org.apache.hadoop.util.LineReader; import
hadoop git commit: YARN-5028. RMStateStore should trim down app state for completed applications. Contributed by Gergo Repas.
Repository: hadoop Updated Branches: refs/heads/trunk 004b72237 -> 92cbbfe79 YARN-5028. RMStateStore should trim down app state for completed applications. Contributed by Gergo Repas. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/92cbbfe7 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/92cbbfe7 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/92cbbfe7 Branch: refs/heads/trunk Commit: 92cbbfe79ec009a19a71a7f44329a4b2f9fa9be6 Parents: 004b722 Author: Yufei GuAuthored: Wed Feb 21 11:42:26 2018 -0800 Committer: Yufei Gu Committed: Wed Feb 21 11:42:51 2018 -0800 -- .../resourcemanager/recovery/RMStateStore.java | 34 +- .../recovery/RMStateStoreTestBase.java | 3 + .../recovery/TestZKRMStateStore.java| 66 3 files changed, 102 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/92cbbfe7/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java index f0ab324..bbe208d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java @@ -47,6 +47,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; import org.apache.hadoop.yarn.api.records.ReservationId; import org.apache.hadoop.yarn.api.records.impl.pb.ApplicationSubmissionContextPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ContainerLaunchContextPBImpl; import org.apache.hadoop.yarn.conf.HAUtil; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.event.AsyncDispatcher; @@ -65,6 +66,7 @@ import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.Applicatio import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEvent; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppEventType; +import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.AggregateAppResourceUsage; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttempt; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.RMAppAttemptEvent; @@ -257,6 +259,9 @@ public abstract class RMStateStore extends AbstractService { appState.getApplicationSubmissionContext().getApplicationId(); LOG.info("Updating info for app: " + appId); try { +if (isAppStateFinal(appState)) { + pruneAppState(appState); +} store.updateApplicationStateInternal(appId, appState); if (((RMStateUpdateAppEvent) event).isNotifyApplication()) { store.notifyApplication(new RMAppEvent(appId, @@ -276,7 +281,34 @@ public abstract class RMStateStore extends AbstractService { } } return finalState(isFenced); -}; +} + +private boolean isAppStateFinal(ApplicationStateData appState) { + RMAppState state = appState.getState(); + return state == RMAppState.FINISHED || state == RMAppState.FAILED || + state == RMAppState.KILLED; +} + +private void pruneAppState(ApplicationStateData appState) { + ApplicationSubmissionContext srcCtx = + appState.getApplicationSubmissionContext(); + ApplicationSubmissionContextPBImpl context = + new ApplicationSubmissionContextPBImpl(); + // most fields in the ApplicationSubmissionContext are not needed, + // but the following few need to be present for recovery to succeed + context.setApplicationId(srcCtx.getApplicationId()); + context.setResource(srcCtx.getResource()); + context.setQueue(srcCtx.getQueue()); + context.setAMContainerResourceRequests( + srcCtx.getAMContainerResourceRequests()); + context.setApplicationType(srcCtx.getApplicationType()); +
[3/3] hadoop git commit: HADOOP-10571. Use Log.*(Object, Throwable) overload to log exceptions. Contributed by Andras Bokor.
HADOOP-10571. Use Log.*(Object, Throwable) overload to log exceptions. Contributed by Andras Bokor. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/c2bbe22c Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/c2bbe22c Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/c2bbe22c Branch: refs/heads/branch-3.0 Commit: c2bbe22c5a664121be2542c54eb6d2f2e376c26c Parents: e011650 Author: Steve LoughranAuthored: Wed Feb 21 11:11:28 2018 + Committer: Steve Loughran Committed: Wed Feb 21 11:11:28 2018 + -- .../org/apache/hadoop/fs/LocalFileSystem.java | 2 +- .../apache/hadoop/ha/ActiveStandbyElector.java | 30 +- .../apache/hadoop/ha/FailoverController.java| 20 +- .../org/apache/hadoop/ha/HealthMonitor.java | 9 +- .../org/apache/hadoop/io/retry/RetryUtils.java | 11 +- .../main/java/org/apache/hadoop/net/DNS.java| 39 +- .../apache/hadoop/service/AbstractService.java | 27 +- .../hadoop/service/ServiceOperations.java | 6 +- .../hadoop/service/TestServiceOperations.java | 3 +- .../hadoop/hdfs/nfs/nfs3/DFSClientCache.java| 21 +- .../hadoop/hdfs/nfs/nfs3/OpenFileCtx.java | 310 +++- .../hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java| 363 +-- .../hadoop/hdfs/server/datanode/DataNode.java | 211 +-- .../hdfs/server/datanode/DataXceiver.java | 172 - .../hdfs/server/namenode/FSNamesystem.java | 2 +- .../server/namenode/ha/StandbyCheckpointer.java | 34 +- .../org/apache/hadoop/hdfs/MiniDFSCluster.java | 2 +- .../hadoop/test/MiniDFSClusterManager.java | 26 +- .../apache/hadoop/mapred/gridmix/Gridmix.java | 22 +- .../swift/http/HttpInputStreamWithRelease.java | 29 +- 20 files changed, 584 insertions(+), 755 deletions(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/c2bbe22c/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalFileSystem.java -- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalFileSystem.java index 91b2315..538ccdf 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalFileSystem.java @@ -139,7 +139,7 @@ public class LocalFileSystem extends ChecksumFileSystem { LOG.warn("Ignoring failure of renameTo"); } } catch (IOException e) { - LOG.warn("Error moving bad file " + p + ": " + e); + LOG.warn("Error moving bad file " + p, e); } return false; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/c2bbe22c/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java -- diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java index 93fd2cf..a23fb71 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ha/ActiveStandbyElector.java @@ -888,9 +888,8 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { Stat oldBreadcrumbStat = fenceOldActive(); writeBreadCrumbNode(oldBreadcrumbStat); - if (LOG.isDebugEnabled()) { -LOG.debug("Becoming active for " + this); - } + LOG.debug("Becoming active for {}", this); + appClient.becomeActive(); state = State.ACTIVE; return true; @@ -910,8 +909,8 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { throws KeeperException, InterruptedException { Preconditions.checkState(appData != null, "no appdata"); -LOG.info("Writing znode " + zkBreadCrumbPath + -" to indicate that the local node is the most recent active..."); +LOG.info("Writing znode {} to indicate that the local " + +"node is the most recent active...", zkBreadCrumbPath); if (oldBreadcrumbStat == null) { // No previous active, just create the node createWithRetries(zkBreadCrumbPath, appData, zkAcl, @@ -948,9 +947,8 @@ public class ActiveStandbyElector implements StatCallback, StringCallback { deleteWithRetries(zkBreadCrumbPath, stat.getVersion()); } catch (Exception e) { - LOG.warn("Unable to delete our own bread-crumb of being active
[2/3] hadoop git commit: HADOOP-10571. Use Log.*(Object, Throwable) overload to log exceptions. Contributed by Andras Bokor.
http://git-wip-us.apache.org/repos/asf/hadoop/blob/c2bbe22c/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java index 7a6aa89..db12c07 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/RpcProgramNfs3.java @@ -28,8 +28,6 @@ import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.util.EnumSet; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException; @@ -137,6 +135,8 @@ import org.jboss.netty.channel.Channel; import org.jboss.netty.channel.ChannelHandlerContext; import com.google.common.annotations.VisibleForTesting; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; /** * RPC program corresponding to nfs daemon. See {@link Nfs3}. @@ -146,7 +146,7 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { public static final FsPermission umask = new FsPermission( (short) DEFAULT_UMASK); - static final Log LOG = LogFactory.getLog(RpcProgramNfs3.class); + static final Logger LOG = LoggerFactory.getLogger(RpcProgramNfs3.class); private final NfsConfiguration config; private final WriteManager writeManager; @@ -204,7 +204,7 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { NfsConfigKeys.DFS_NFS_KERBEROS_PRINCIPAL_KEY); superuser = config.get(NfsConfigKeys.NFS_SUPERUSER_KEY, NfsConfigKeys.NFS_SUPERUSER_DEFAULT); -LOG.info("Configured HDFS superuser is " + superuser); +LOG.info("Configured HDFS superuser is {}", superuser); if (!enableDump) { writeDumpDir = null; @@ -230,13 +230,13 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { private void clearDirectory(String writeDumpDir) throws IOException { File dumpDir = new File(writeDumpDir); if (dumpDir.exists()) { - LOG.info("Delete current dump directory " + writeDumpDir); + LOG.info("Delete current dump directory {}", writeDumpDir); if (!(FileUtil.fullyDelete(dumpDir))) { throw new IOException("Cannot remove current dump directory: " + dumpDir); } } -LOG.info("Create new dump directory " + writeDumpDir); +LOG.info("Create new dump directory {}", writeDumpDir); if (!dumpDir.mkdirs()) { throw new IOException("Cannot create dump directory " + dumpDir); } @@ -298,9 +298,7 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { @Override public NFS3Response nullProcedure() { -if (LOG.isDebugEnabled()) { - LOG.debug("NFS NULL"); -} +LOG.debug("NFS NULL"); return new NFS3Response(Nfs3Status.NFS3_OK); } @@ -336,15 +334,14 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { FileHandle handle = request.getHandle(); if (LOG.isDebugEnabled()) { - LOG.debug("GETATTR for fileId: " + handle.getFileId() + " client: " - + remoteAddress); + LOG.debug("GETATTR for fileId: {} client: {}", + handle.getFileId(), remoteAddress); } - Nfs3FileAttributes attrs = null; try { attrs = writeManager.getFileAttr(dfsClient, handle, iug); } catch (RemoteException r) { - LOG.warn("Exception ", r); + LOG.warn("Exception", r); IOException io = r.unwrapRemoteException(); /** * AuthorizationException can be thrown if the user can't be proxy'ed. @@ -355,13 +352,13 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { return new GETATTR3Response(Nfs3Status.NFS3ERR_IO); } } catch (IOException e) { - LOG.info("Can't get file attribute, fileId=" + handle.getFileId(), e); + LOG.info("Can't get file attribute, fileId={}", handle.getFileId(), e); int status = mapErrorStatus(e); response.setStatus(status); return response; } if (attrs == null) { - LOG.error("Can't get path for fileId: " + handle.getFileId()); + LOG.error("Can't get path for fileId: {}", handle.getFileId()); response.setStatus(Nfs3Status.NFS3ERR_STALE); return response; } @@ -376,9 +373,7 @@ public class RpcProgramNfs3 extends RpcProgram implements Nfs3Interface { EnumSet updateFields = newAttr.getUpdateFields(); if (setMode && updateFields.contains(SetAttrField.MODE)) { - if
[1/3] hadoop git commit: HADOOP-10571. Use Log.*(Object, Throwable) overload to log exceptions. Contributed by Andras Bokor.
Repository: hadoop Updated Branches: refs/heads/branch-3.0 e01165009 -> c2bbe22c5 http://git-wip-us.apache.org/repos/asf/hadoop/blob/c2bbe22c/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java -- diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java index b78fc9c..d0ded89 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataXceiver.java @@ -148,10 +148,8 @@ class DataXceiver extends Receiver implements Runnable { (colonIdx < 0) ? remoteAddress : remoteAddress.substring(0, colonIdx); localAddress = peer.getLocalAddressString(); -if (LOG.isDebugEnabled()) { - LOG.debug("Number of active connections is: " - + datanode.getXceiverCount()); -} +LOG.debug("Number of active connections is: {}", +datanode.getXceiverCount()); } /** @@ -187,7 +185,7 @@ class DataXceiver extends Receiver implements Runnable { // This doesn't need to be in a critical section. Althogh the client // can resue the connection to issue a different request, trying sending // an OOB through the recently closed block receiver is harmless. -LOG.info("Sending OOB to peer: " + peer); +LOG.info("Sending OOB to peer: {}", peer); br.sendOOB(); } @@ -199,7 +197,7 @@ class DataXceiver extends Receiver implements Runnable { } xceiver.interrupt(); } -LOG.info("Stopped the writer: " + peer); +LOG.info("Stopped the writer: {}", peer); } /** @@ -239,14 +237,15 @@ class DataXceiver extends Receiver implements Runnable { } catch (InvalidMagicNumberException imne) { if (imne.isHandshake4Encryption()) { LOG.info("Failed to read expected encryption handshake from client " + - "at " + peer.getRemoteAddressString() + ". Perhaps the client " + + "at {}. Perhaps the client " + "is running an older version of Hadoop which does not support " + - "encryption", imne); + "encryption", peer.getRemoteAddressString(), imne); } else { LOG.info("Failed to read expected SASL data transfer protection " + - "handshake from client at " + peer.getRemoteAddressString() + + "handshake from client at {}" + ". Perhaps the client is running an older version of Hadoop " + - "which does not support SASL data transfer protection", imne); + "which does not support SASL data transfer protection", + peer.getRemoteAddressString(), imne); } return; } @@ -302,7 +301,7 @@ class DataXceiver extends Receiver implements Runnable { if (LOG.isTraceEnabled()) { LOG.trace(s, t); } else { - LOG.info(s + "; " + t); + LOG.info("{}; {}", s, t.toString()); } } else if (op == Op.READ_BLOCK && t instanceof SocketTimeoutException) { String s1 = @@ -311,23 +310,19 @@ class DataXceiver extends Receiver implements Runnable { if (LOG.isTraceEnabled()) { LOG.trace(s1, t); } else { - LOG.info(s1 + "; " + t); + LOG.info("{}; {}", s1, t.toString()); } } else if (t instanceof InvalidToken) { // The InvalidToken exception has already been logged in // checkAccess() method and this is not a server error. -if (LOG.isTraceEnabled()) { - LOG.trace(s, t); -} +LOG.trace(s, t); } else { LOG.error(s, t); } } finally { collectThreadLocalStates(); - if (LOG.isDebugEnabled()) { -LOG.debug(datanode.getDisplayName() + ":Number of active connections is: " -+ datanode.getXceiverCount()); - } + LOG.debug("{}:Number of active connections is: {}", + datanode.getDisplayName(), datanode.getXceiverCount()); updateCurrentThreadName("Cleaning up"); if (peer != null) { dataXceiverServer.closePeer(peer); @@ -405,21 +400,22 @@ class DataXceiver extends Receiver implements Runnable { DomainSocket sock = peer.getDomainSocket(); sock.sendFileDescriptors(fds, buf, 0, buf.length); if (supportsReceiptVerification) { - LOG.trace("Reading receipt verification byte for " + slotId); + LOG.trace("Reading receipt verification byte for {}", slotId); int val = sock.getInputStream().read(); if (val < 0) { throw new EOFException(); } } else { - LOG.trace("Receipt verification
[1/3] hadoop git commit: HADOOP-15247. Move commons-net up to 3.6. Contributed by Steve Loughran.
Repository: hadoop Updated Branches: refs/heads/branch-3.0 09bb378c1 -> e01165009 refs/heads/branch-3.1 e53da3914 -> 958b44504 refs/heads/trunk bdd2a184d -> 004b72237 HADOOP-15247. Move commons-net up to 3.6. Contributed by Steve Loughran. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/004b7223 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/004b7223 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/004b7223 Branch: refs/heads/trunk Commit: 004b722372de67635a24e71b264b3b604df4b693 Parents: bdd2a18 Author: Steve LoughranAuthored: Wed Feb 21 10:40:42 2018 + Committer: Steve Loughran Committed: Wed Feb 21 10:40:42 2018 + -- hadoop-project/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/004b7223/hadoop-project/pom.xml -- diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index ce51c99..f4ac239 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -612,7 +612,7 @@ commons-net commons-net -3.1 +3.6 javax.servlet - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
[3/3] hadoop git commit: HADOOP-15247. Move commons-net up to 3.6. Contributed by Steve Loughran.
HADOOP-15247. Move commons-net up to 3.6. Contributed by Steve Loughran. (cherry picked from commit 004b722372de67635a24e71b264b3b604df4b693) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e0116500 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e0116500 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e0116500 Branch: refs/heads/branch-3.0 Commit: e0116500904a1d7299ad4f7baa2125305dc6e316 Parents: 09bb378 Author: Steve LoughranAuthored: Wed Feb 21 10:42:07 2018 + Committer: Steve Loughran Committed: Wed Feb 21 10:42:07 2018 + -- hadoop-project/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/e0116500/hadoop-project/pom.xml -- diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 616cf73..e9b5808 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -589,7 +589,7 @@ commons-net commons-net -3.1 +3.6 javax.servlet - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
[2/3] hadoop git commit: HADOOP-15247. Move commons-net up to 3.6. Contributed by Steve Loughran.
HADOOP-15247. Move commons-net up to 3.6. Contributed by Steve Loughran. (cherry picked from commit 004b722372de67635a24e71b264b3b604df4b693) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/958b4450 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/958b4450 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/958b4450 Branch: refs/heads/branch-3.1 Commit: 958b445047ba971859d15caefba70a23992c8440 Parents: e53da39 Author: Steve LoughranAuthored: Wed Feb 21 10:41:20 2018 + Committer: Steve Loughran Committed: Wed Feb 21 10:41:20 2018 + -- hadoop-project/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/958b4450/hadoop-project/pom.xml -- diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index cbb9f89..74a2afd 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -600,7 +600,7 @@ commons-net commons-net -3.1 +3.6 javax.servlet - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
hadoop git commit: YARN-7947. Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps. Contributed by Eric Payne.
Repository: hadoop Updated Branches: refs/heads/branch-2.8 0588fde3a -> db1ec739e YARN-7947. Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps. Contributed by Eric Payne. (cherry picked from commit bdd2a184d78379d99c802a43ebec7d2cef0bbaf7) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/db1ec739 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/db1ec739 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/db1ec739 Branch: refs/heads/branch-2.8 Commit: db1ec739e388b84effc140c134130b0c87cd7ee9 Parents: 0588fde Author: Sunil GAuthored: Wed Feb 21 14:35:57 2018 +0530 Committer: Sunil G Committed: Wed Feb 21 15:40:12 2018 +0530 -- .../monitor/capacity/FifoIntraQueuePreemptionPlugin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/db1ec739/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java index 3332f2a..1776bd4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java @@ -412,7 +412,7 @@ public class FifoIntraQueuePreemptionPlugin TempUserPerPartition tmpUser = new TempUserPerPartition( tq.leafQueue.getUser(userName), tq.queueName, Resources.clone(userResourceUsage.getUsed(partition)), -Resources.clone(userSpecificAmUsed), +Resources.clone(amUsed), Resources.clone(userResourceUsage.getReserved(partition)), Resources.none()); - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
hadoop git commit: YARN-7947. Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps. Contributed by Eric Payne.
Repository: hadoop Updated Branches: refs/heads/branch-2.9 14ddac317 -> d9d8fa1d3 YARN-7947. Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps. Contributed by Eric Payne. (cherry picked from commit bdd2a184d78379d99c802a43ebec7d2cef0bbaf7) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/d9d8fa1d Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/d9d8fa1d Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/d9d8fa1d Branch: refs/heads/branch-2.9 Commit: d9d8fa1d3aa65946f7fd0502ff8757a032d91fd3 Parents: 14ddac3 Author: Sunil GAuthored: Wed Feb 21 14:35:57 2018 +0530 Committer: Sunil G Committed: Wed Feb 21 15:32:54 2018 +0530 -- .../monitor/capacity/FifoIntraQueuePreemptionPlugin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/d9d8fa1d/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java index 3332f2a..1776bd4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java @@ -412,7 +412,7 @@ public class FifoIntraQueuePreemptionPlugin TempUserPerPartition tmpUser = new TempUserPerPartition( tq.leafQueue.getUser(userName), tq.queueName, Resources.clone(userResourceUsage.getUsed(partition)), -Resources.clone(userSpecificAmUsed), +Resources.clone(amUsed), Resources.clone(userResourceUsage.getReserved(partition)), Resources.none()); - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
hadoop git commit: YARN-7947. Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps. Contributed by Eric Payne.
Repository: hadoop Updated Branches: refs/heads/branch-2 14f5797ef -> f7e5e45b7 YARN-7947. Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps. Contributed by Eric Payne. (cherry picked from commit bdd2a184d78379d99c802a43ebec7d2cef0bbaf7) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/f7e5e45b Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/f7e5e45b Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/f7e5e45b Branch: refs/heads/branch-2 Commit: f7e5e45b72eda41f83695500173c313f032be1e5 Parents: 14f5797 Author: Sunil GAuthored: Wed Feb 21 14:35:57 2018 +0530 Committer: Sunil G Committed: Wed Feb 21 15:26:20 2018 +0530 -- .../monitor/capacity/FifoIntraQueuePreemptionPlugin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/f7e5e45b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java index 3332f2a..1776bd4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java @@ -412,7 +412,7 @@ public class FifoIntraQueuePreemptionPlugin TempUserPerPartition tmpUser = new TempUserPerPartition( tq.leafQueue.getUser(userName), tq.queueName, Resources.clone(userResourceUsage.getUsed(partition)), -Resources.clone(userSpecificAmUsed), +Resources.clone(amUsed), Resources.clone(userResourceUsage.getReserved(partition)), Resources.none()); - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
hadoop git commit: YARN-7947. Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps. Contributed by Eric Payne.
Repository: hadoop Updated Branches: refs/heads/branch-3.0 c0d8103ba -> 09bb378c1 YARN-7947. Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps. Contributed by Eric Payne. (cherry picked from commit bdd2a184d78379d99c802a43ebec7d2cef0bbaf7) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/09bb378c Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/09bb378c Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/09bb378c Branch: refs/heads/branch-3.0 Commit: 09bb378c10f9dc0529450079e755ae7bbfde47d8 Parents: c0d8103 Author: Sunil GAuthored: Wed Feb 21 14:35:57 2018 +0530 Committer: Sunil G Committed: Wed Feb 21 15:06:28 2018 +0530 -- .../monitor/capacity/FifoIntraQueuePreemptionPlugin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/09bb378c/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java index 3332f2a..1776bd4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java @@ -412,7 +412,7 @@ public class FifoIntraQueuePreemptionPlugin TempUserPerPartition tmpUser = new TempUserPerPartition( tq.leafQueue.getUser(userName), tq.queueName, Resources.clone(userResourceUsage.getUsed(partition)), -Resources.clone(userSpecificAmUsed), +Resources.clone(amUsed), Resources.clone(userResourceUsage.getReserved(partition)), Resources.none()); - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
[Hadoop Wiki] Update of "Books" by Packt Publishing
Dear Wiki user, You have subscribed to a wiki page or wiki category on "Hadoop Wiki" for change notification. The "Books" page has been changed by Packt Publishing: https://wiki.apache.org/hadoop/Books?action=diff=45=46 == Hadoop Videos == + === Learn By Example: Hadoop, MapReduce for Big Data problems (Video) === + + '''Name:''' [[https://www.packtpub.com/big-data-and-business-intelligence/learn-example-hadoop-mapreduce-big-data-problems-video|Learn By Example: Hadoop, MapReduce for Big Data problems (Video)]] + + '''Author:''' Loonycorn + + '''Publisher:''' Packt + + '''Date of Publishing:''' Jan 2018 + + A hands-on workout in Hadoop, MapReduce and the art of thinking "parallel" === The Ultimate Hands-on Hadoop (Video) === - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
hadoop git commit: YARN-7947. Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps. Contributed by Eric Payne.
Repository: hadoop Updated Branches: refs/heads/branch-3.1 21af27995 -> e53da3914 YARN-7947. Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps. Contributed by Eric Payne. (cherry picked from commit bdd2a184d78379d99c802a43ebec7d2cef0bbaf7) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/e53da391 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/e53da391 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/e53da391 Branch: refs/heads/branch-3.1 Commit: e53da3914abd171e874a7475faeace987dd59328 Parents: 21af279 Author: Sunil GAuthored: Wed Feb 21 14:35:57 2018 +0530 Committer: Sunil G Committed: Wed Feb 21 14:57:43 2018 +0530 -- .../monitor/capacity/FifoIntraQueuePreemptionPlugin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/e53da391/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java index 3332f2a..1776bd4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java @@ -412,7 +412,7 @@ public class FifoIntraQueuePreemptionPlugin TempUserPerPartition tmpUser = new TempUserPerPartition( tq.leafQueue.getUser(userName), tq.queueName, Resources.clone(userResourceUsage.getUsed(partition)), -Resources.clone(userSpecificAmUsed), +Resources.clone(amUsed), Resources.clone(userResourceUsage.getReserved(partition)), Resources.none()); - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
hadoop git commit: YARN-7947. Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps. Contributed by Eric Payne.
Repository: hadoop Updated Branches: refs/heads/trunk 86b227a1f -> bdd2a184d YARN-7947. Capacity Scheduler intra-queue preemption can NPE for non-schedulable apps. Contributed by Eric Payne. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/bdd2a184 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/bdd2a184 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/bdd2a184 Branch: refs/heads/trunk Commit: bdd2a184d78379d99c802a43ebec7d2cef0bbaf7 Parents: 86b227a Author: Sunil GAuthored: Wed Feb 21 14:35:57 2018 +0530 Committer: Sunil G Committed: Wed Feb 21 14:35:57 2018 +0530 -- .../monitor/capacity/FifoIntraQueuePreemptionPlugin.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/bdd2a184/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java index 3332f2a..1776bd4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/FifoIntraQueuePreemptionPlugin.java @@ -412,7 +412,7 @@ public class FifoIntraQueuePreemptionPlugin TempUserPerPartition tmpUser = new TempUserPerPartition( tq.leafQueue.getUser(userName), tq.queueName, Resources.clone(userResourceUsage.getUsed(partition)), -Resources.clone(userSpecificAmUsed), +Resources.clone(amUsed), Resources.clone(userResourceUsage.getReserved(partition)), Resources.none()); - To unsubscribe, e-mail: common-commits-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-commits-h...@hadoop.apache.org
hadoop git commit: YARN-7223. Document GPU isolation feature. Contributed by Wangda Tan.
Repository: hadoop Updated Branches: refs/heads/branch-3.1 4aa4bb46a -> 21af27995 YARN-7223. Document GPU isolation feature. Contributed by Wangda Tan. (cherry picked from commit 86b227a1fbe26b992c5498cfdd3b1691b4362ee9) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/21af2799 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/21af2799 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/21af2799 Branch: refs/heads/branch-3.1 Commit: 21af27995d383536cdb5d8c5bfa8c38fcc71e617 Parents: 4aa4bb4 Author: Sunil GAuthored: Wed Feb 21 14:16:45 2018 +0530 Committer: Sunil G Committed: Wed Feb 21 14:17:18 2018 +0530 -- .../src/site/markdown/UsingGpus.md | 230 +++ 1 file changed, 230 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/21af2799/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md new file mode 100644 index 000..f6000e7 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md @@ -0,0 +1,230 @@ + + + +# Using GPU On YARN +# Prerequisites + +- As of now, only Nvidia GPUs are supported by YARN +- YARN node managers have to be pre-installed with Nvidia drivers. +- When Docker is used as container runtime context, nvidia-docker 1.0 needs to be installed (Current supported version in YARN for nvidia-docker). + +# Configs + +## GPU scheduling + +In `resource-types.xml` + +Add following properties + +``` + + + yarn.resource-types + yarn.io/gpu + + +``` + +In `yarn-site.xml` + +`DominantResourceCalculator` MUST be configured to enable GPU scheduling/isolation. + +For `Capacity Scheduler`, use following property to configure `DominantResourceCalculator` (In `capacity-scheduler.xml`): + +| Property | Default value | +| --- | --- | +| yarn.scheduler.capacity.resource-calculator | org.apache.hadoop.yarn.util.resource.DominantResourceCalculator | + + +## GPU Isolation + +### In `yarn-site.xml` + +``` + +yarn.nodemanager.resource-plugins +yarn.io/gpu + +``` + +This is to enable GPU isolation module on NodeManager side. + +By default, YARN will automatically detect and config GPUs when above config is set. Following configs need to be set in `yarn-site.xml` only if admin has specialized requirements. + +**1) Allowed GPU Devices** + +| Property | Default value | +| --- | --- | +| yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices | auto | + + Specify GPU devices which can be managed by YARN NodeManager (split by comma). + Number of GPU devices will be reported to RM to make scheduling decisions. + Set to auto (default) let YARN automatically discover GPU resource from + system. + + Manually specify GPU devices if auto detect GPU device failed or admin + only want subset of GPU devices managed by YARN. GPU device is identified + by their minor device number and index. A common approach to get minor + device number of GPUs is using `nvidia-smi -q` and search `Minor Number` + output. + + When minor numbers are specified manually, admin needs to include indice of GPUs + as well, format is `index:minor_number[,index:minor_number...]`. An example + of manual specification is `0:0,1:1,2:2,3:4"`to allow YARN NodeManager to + manage GPU devices with indices `0/1/2/3` and minor number `0/1/2/4`. + numbers . + +**2) Executable to discover GPUs** + +| Property | value | +| --- | --- | +| yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables | /absolute/path/to/nvidia-smi | + +When `yarn.nodemanager.resource.gpu.allowed-gpu-devices=auto` specified, +YARN NodeManager needs to run GPU discovery binary (now only support +`nvidia-smi`) to get GPU-related information. +When value is empty (default), YARN NodeManager will try to locate +discovery executable itself. +An example of the config value is: `/usr/local/bin/nvidia-smi` + +**3) Docker Plugin Related Configs** + +Following configs can be customized when user needs to run GPU applications inside Docker container. They're not required if admin follows default installation/configuration of `nvidia-docker`. + +| Property | Default value | +| --- | --- | +| yarn.nodemanager.resource-plugins.gpu.docker-plugin | nvidia-docker-v1 | + +Specify docker command plugin for GPU. By default uses Nvidia docker V1.0. + +| Property | Default value | +| --- | --- | +| yarn.nodemanager.resource-plugins.gpu.docker-plugin.nvidia-docker-v1.endpoint | http://localhost:3476/v1.0/docker/cli | +
hadoop git commit: YARN-7223. Document GPU isolation feature. Contributed by Wangda Tan.
Repository: hadoop Updated Branches: refs/heads/trunk 121e1e128 -> 86b227a1f YARN-7223. Document GPU isolation feature. Contributed by Wangda Tan. Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/86b227a1 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/86b227a1 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/86b227a1 Branch: refs/heads/trunk Commit: 86b227a1fbe26b992c5498cfdd3b1691b4362ee9 Parents: 121e1e1 Author: Sunil GAuthored: Wed Feb 21 14:16:45 2018 +0530 Committer: Sunil G Committed: Wed Feb 21 14:16:45 2018 +0530 -- .../src/site/markdown/UsingGpus.md | 230 +++ 1 file changed, 230 insertions(+) -- http://git-wip-us.apache.org/repos/asf/hadoop/blob/86b227a1/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md -- diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md new file mode 100644 index 000..f6000e7 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/src/site/markdown/UsingGpus.md @@ -0,0 +1,230 @@ + + + +# Using GPU On YARN +# Prerequisites + +- As of now, only Nvidia GPUs are supported by YARN +- YARN node managers have to be pre-installed with Nvidia drivers. +- When Docker is used as container runtime context, nvidia-docker 1.0 needs to be installed (Current supported version in YARN for nvidia-docker). + +# Configs + +## GPU scheduling + +In `resource-types.xml` + +Add following properties + +``` + + + yarn.resource-types + yarn.io/gpu + + +``` + +In `yarn-site.xml` + +`DominantResourceCalculator` MUST be configured to enable GPU scheduling/isolation. + +For `Capacity Scheduler`, use following property to configure `DominantResourceCalculator` (In `capacity-scheduler.xml`): + +| Property | Default value | +| --- | --- | +| yarn.scheduler.capacity.resource-calculator | org.apache.hadoop.yarn.util.resource.DominantResourceCalculator | + + +## GPU Isolation + +### In `yarn-site.xml` + +``` + +yarn.nodemanager.resource-plugins +yarn.io/gpu + +``` + +This is to enable GPU isolation module on NodeManager side. + +By default, YARN will automatically detect and config GPUs when above config is set. Following configs need to be set in `yarn-site.xml` only if admin has specialized requirements. + +**1) Allowed GPU Devices** + +| Property | Default value | +| --- | --- | +| yarn.nodemanager.resource-plugins.gpu.allowed-gpu-devices | auto | + + Specify GPU devices which can be managed by YARN NodeManager (split by comma). + Number of GPU devices will be reported to RM to make scheduling decisions. + Set to auto (default) let YARN automatically discover GPU resource from + system. + + Manually specify GPU devices if auto detect GPU device failed or admin + only want subset of GPU devices managed by YARN. GPU device is identified + by their minor device number and index. A common approach to get minor + device number of GPUs is using `nvidia-smi -q` and search `Minor Number` + output. + + When minor numbers are specified manually, admin needs to include indice of GPUs + as well, format is `index:minor_number[,index:minor_number...]`. An example + of manual specification is `0:0,1:1,2:2,3:4"`to allow YARN NodeManager to + manage GPU devices with indices `0/1/2/3` and minor number `0/1/2/4`. + numbers . + +**2) Executable to discover GPUs** + +| Property | value | +| --- | --- | +| yarn.nodemanager.resource-plugins.gpu.path-to-discovery-executables | /absolute/path/to/nvidia-smi | + +When `yarn.nodemanager.resource.gpu.allowed-gpu-devices=auto` specified, +YARN NodeManager needs to run GPU discovery binary (now only support +`nvidia-smi`) to get GPU-related information. +When value is empty (default), YARN NodeManager will try to locate +discovery executable itself. +An example of the config value is: `/usr/local/bin/nvidia-smi` + +**3) Docker Plugin Related Configs** + +Following configs can be customized when user needs to run GPU applications inside Docker container. They're not required if admin follows default installation/configuration of `nvidia-docker`. + +| Property | Default value | +| --- | --- | +| yarn.nodemanager.resource-plugins.gpu.docker-plugin | nvidia-docker-v1 | + +Specify docker command plugin for GPU. By default uses Nvidia docker V1.0. + +| Property | Default value | +| --- | --- | +| yarn.nodemanager.resource-plugins.gpu.docker-plugin.nvidia-docker-v1.endpoint | http://localhost:3476/v1.0/docker/cli | + +Specify end point of `nvidia-docker-plugin`. Please find documentation: