Repository: hadoop Updated Branches: refs/heads/branch-2 f2009dc89 -> 492142097
MAPREDUCE-6495. Docs for archive-logs tool (rkanter) (cherry picked from commit 0c4af0f99811a7138954391df3761aef9ff09155) Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/49214209 Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/49214209 Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/49214209 Branch: refs/heads/branch-2 Commit: 492142097bc04ad9bd0abff2fc7176a9ad63fc73 Parents: f2009dc Author: Robert Kanter <[email protected]> Authored: Tue Oct 20 17:34:34 2015 -0700 Committer: Robert Kanter <[email protected]> Committed: Tue Oct 20 17:34:56 2015 -0700 ---------------------------------------------------------------------- hadoop-mapreduce-project/CHANGES.txt | 2 + .../src/site/markdown/MapredCommands.md | 7 ++ hadoop-project/src/site/site.xml | 1 + .../apache/hadoop/tools/HadoopArchiveLogs.java | 4 +- .../src/site/markdown/HadoopArchiveLogs.md | 85 ++++++++++++++++++++ .../src/site/resources/css/site.css | 30 +++++++ 6 files changed, 127 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/49214209/hadoop-mapreduce-project/CHANGES.txt ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 2157e17..ea90466 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -331,6 +331,8 @@ Release 2.8.0 - UNRELEASED MAPREDUCE-6302. Preempt reducers after a configurable timeout irrespective of headroom. (kasha) + MAPREDUCE-6495. Docs for archive-logs tool (rkanter) + Release 2.7.2 - UNRELEASED INCOMPATIBLE CHANGES http://git-wip-us.apache.org/repos/asf/hadoop/blob/49214209/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapredCommands.md ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapredCommands.md b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapredCommands.md index 1fba481..3249476 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapredCommands.md +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/MapredCommands.md @@ -18,6 +18,7 @@ MapReduce Commands Guide * [Overview](#Overview) * [User Commands](#User_Commands) * [archive](#archive) + * [archive-logs](#archive-logs) * [classpath](#classpath) * [distcp](#distcp) * [job](#job) @@ -53,6 +54,12 @@ Commands useful for users of a hadoop cluster. Creates a hadoop archive. More information can be found at [Hadoop Archives Guide](../../hadoop-archives/HadoopArchives.html). +### `archive-logs` + +A tool to combine YARN aggregated logs into Hadoop archives to reduce the number +of files in HDFS. More information can be found at +[Hadoop Archive Logs Guide](../../hadoop-archive-logs/HadoopArchiveLogs.html). + ### `classpath` Usage: `yarn classpath [--glob |--jar <path> |-h |--help]` http://git-wip-us.apache.org/repos/asf/hadoop/blob/49214209/hadoop-project/src/site/site.xml ---------------------------------------------------------------------- diff --git a/hadoop-project/src/site/site.xml b/hadoop-project/src/site/site.xml index 9b888de..d4fde56 100644 --- a/hadoop-project/src/site/site.xml +++ b/hadoop-project/src/site/site.xml @@ -154,6 +154,7 @@ <menu name="Tools" inherit="top"> <item name="Hadoop Streaming" href="hadoop-streaming/HadoopStreaming.html"/> <item name="Hadoop Archives" href="hadoop-archives/HadoopArchives.html"/> + <item name="Hadoop Archive Logs" href="hadoop-archive-logs/HadoopArchiveLogs.html"/> <item name="DistCp" href="hadoop-distcp/DistCp.html"/> <item name="GridMix" href="hadoop-gridmix/GridMix.html"/> <item name="Rumen" href="hadoop-rumen/Rumen.html"/> http://git-wip-us.apache.org/repos/asf/hadoop/blob/49214209/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java b/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java index b633521..363e287 100644 --- a/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java +++ b/hadoop-tools/hadoop-archive-logs/src/main/java/org/apache/hadoop/tools/HadoopArchiveLogs.java @@ -221,7 +221,7 @@ public class HadoopArchiveLogs implements Tool { CommandLine commandLine = parser.parse(opts, args); if (commandLine.hasOption(HELP_OPTION)) { HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("yarn archive-logs", opts); + formatter.printHelp("mapred archive-logs", opts); System.exit(0); } if (commandLine.hasOption(MAX_ELIGIBLE_APPS_OPTION)) { @@ -254,7 +254,7 @@ public class HadoopArchiveLogs implements Tool { } } catch (ParseException pe) { HelpFormatter formatter = new HelpFormatter(); - formatter.printHelp("yarn archive-logs", opts); + formatter.printHelp("mapred archive-logs", opts); throw pe; } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/49214209/hadoop-tools/hadoop-archive-logs/src/site/markdown/HadoopArchiveLogs.md ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-archive-logs/src/site/markdown/HadoopArchiveLogs.md b/hadoop-tools/hadoop-archive-logs/src/site/markdown/HadoopArchiveLogs.md new file mode 100644 index 0000000..a54c9a9 --- /dev/null +++ b/hadoop-tools/hadoop-archive-logs/src/site/markdown/HadoopArchiveLogs.md @@ -0,0 +1,85 @@ +<!--- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> + +Hadoop Archive Logs Guide +========================= + + - [Overview](#Overview) + - [How to Archive Logs](#How_to_Archive_Logs) + +Overview +-------- + +For clusters with a lot of Yarn aggregated logs, it can be helpful to combine +them into hadoop archives in order to reduce the number of small files, and +hence the stress on the NameNode. This tool provides an easy way to do this. +Aggregated logs in hadoop archives can still be read by the Job History Server +and by the `yarn logs` command. + +For more on hadoop archives, see +[Hadoop Archives Guide](../hadoop-archives/HadoopArchives.html). + +How to Archive Logs +------------------- + + usage: mapred archive-logs + -force Force recreating the working directory if + an existing one is found. This should + only be used if you know that another + instance is not currently running + -help Prints this message + -maxEligibleApps <n> The maximum number of eligible apps to + process (default: -1 (all)) + -maxTotalLogsSize <megabytes> The maximum total logs size (in + megabytes) required to be eligible + (default: 1024) + -memory <megabytes> The amount of memory (in megabytes) for + each container (default: 1024) + -minNumberLogFiles <n> The minimum number of log files required + to be eligible (default: 20) + -verbose Print more details. + +The tool only supports running one instance on a cluster at a time in order +to prevent conflicts. It does this by checking for the existance of a +directory named ``archive-logs-work`` under +``yarn.nodemanager.remote-app-log-dir`` in HDFS +(default: ``/tmp/logs/archive-logs-work``). If for some reason that +directory was not cleaned up properly, and the tool refuses to run, you can +force it with the ``-force`` option. + +The ``-help`` option prints out the usage information. + +The tool works by performing the following procedure: + + 1. Determine the list of eligible applications, based on the following + criteria: + - is not already archived + - its aggregation status has successfully completed + - has at least ``-minNumberLogFiles`` log files + - the sum of its log files size is less than ``-maxTotalLogsSize`` megabytes + 2. If there are are more than ``-maxEligibleApps`` applications found, the + newest applications are dropped. They can be processed next time. + 3. A shell script is generated based on the eligible applications + 4. The Distributed Shell program is run with the aformentioned script. It + will run with ``-maxEligibleApps`` containers, one to process each + application, and with ``-memory`` megabytes of memory. Each container runs + the ``hadoop archives`` command for a single application and replaces + its aggregated log files with the resulting archive. + +The ``-verbose`` option makes the tool print more details about what it's +doing. + +The end result of running the tool is that the original aggregated log files for +a processed application will be replaced by a hadoop archive containing all of +those logs. http://git-wip-us.apache.org/repos/asf/hadoop/blob/49214209/hadoop-tools/hadoop-archive-logs/src/site/resources/css/site.css ---------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-archive-logs/src/site/resources/css/site.css b/hadoop-tools/hadoop-archive-logs/src/site/resources/css/site.css new file mode 100644 index 0000000..f830baa --- /dev/null +++ b/hadoop-tools/hadoop-archive-logs/src/site/resources/css/site.css @@ -0,0 +1,30 @@ +/* +* Licensed to the Apache Software Foundation (ASF) under one or more +* contributor license agreements. See the NOTICE file distributed with +* this work for additional information regarding copyright ownership. +* The ASF licenses this file to You under the Apache License, Version 2.0 +* (the "License"); you may not use this file except in compliance with +* the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +#banner { + height: 93px; + background: none; +} + +#bannerLeft img { + margin-left: 30px; + margin-top: 10px; +} + +#bannerRight img { + margin: 17px; +} +
