Author: umamahesh Date: Mon May 12 12:43:59 2014 New Revision: 1593948 URL: http://svn.apache.org/r1593948 Log: Merge from trunk to HDFS-2006 branch
Added: hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/HadoopStreaming.apt.vm - copied unchanged from r1593927, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/HadoopStreaming.apt.vm hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/MapReduceTutorial.apt.vm - copied unchanged from r1593927, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/apt/MapReduceTutorial.apt.vm hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm Modified: hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/ (props changed) hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/CHANGES.txt (contents, props changed) hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm (props changed) hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobBlock.java hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJHSDelegationTokenSecretManager.java hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestJHSSecurity.java hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/pom.xml Propchange: hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/ ------------------------------------------------------------------------------ Merged /hadoop/common/trunk/hadoop-mapreduce-project:r1588992-1593927 Modified: hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/CHANGES.txt?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/CHANGES.txt Mon May 12 12:43:59 2014 @@ -175,6 +175,22 @@ Release 2.5.0 - UNRELEASED MAPREDUCE-5812. Make job context available to OutputCommitter.isRecoverySupported() (Mohammad Kamrul Islam via jlowe) + MAPREDUCE-5638. Port Hadoop Archives document to trunk (Akira AJISAKA via + jeagles) + + MAPREDUCE-5402. In DynamicInputFormat, change MAX_CHUNKS_TOLERABLE, + MAX_CHUNKS_IDEAL, MIN_RECORDS_PER_CHUNK and SPLIT_RATIO to be configurable. + (Tsuyoshi OZAWA via szetszwo) + + MAPREDUCE-5637. Convert Hadoop Streaming document to APT (Akira AJISAKA via + jeagles) + + MAPREDUCE-5636. Convert MapReduce Tutorial document to APT (Akira AJISAKA + via jeagles) + + MAPREDUCE-5774. Job overview in History UI should list reducer phases in + chronological order. (Gera Shegalov via kasha) + OPTIMIZATIONS BUG FIXES @@ -200,6 +216,9 @@ Release 2.5.0 - UNRELEASED MAPREDUCE-5749. TestRMContainerAllocator#testReportedAppProgress Failed (jlowe) + MAPREDUCE-5884. History server uses short user name when canceling tokens + (Mohammad Kamrul Islam via jlowe) + Release 2.4.1 - UNRELEASED INCOMPATIBLE CHANGES Propchange: hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/CHANGES.txt ------------------------------------------------------------------------------ Merged /hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt:r1588992-1593927 Propchange: hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm ------------------------------------------------------------------------------ --- svn:mergeinfo (added) +++ svn:mergeinfo Mon May 12 12:43:59 2014 @@ -0,0 +1,13 @@ +/hadoop/common/branches/HDFS-1623/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm:1152502-1296519 +/hadoop/common/branches/HDFS-2802/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm:1360400-1480829 +/hadoop/common/branches/HDFS-2832/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm:1513717-1550362 +/hadoop/common/branches/HDFS-3042/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm:1306184-1342109 +/hadoop/common/branches/HDFS-3077/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm:1363593-1396941 +/hadoop/common/branches/HDFS-347/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm:1430995-1467533 +/hadoop/common/branches/HDFS-4685/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm:1547224-1569863 +/hadoop/common/branches/HDFS-4949/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm:1509426-1536569 +/hadoop/common/branches/HDFS-5535/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm:1550130-1574256 +/hadoop/common/branches/YARN-321/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm:1556680-1561449 +/hadoop/common/branches/branch-2/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm:1526848,1541680,1541699 +/hadoop/common/branches/branch-2.0.4-alpha/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm:1463804 +/hadoop/core/branches/branch-0.19/mapred/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm:713112 Added: hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm?rev=1593948&view=auto ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm (added) +++ hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/HadoopArchives.md.vm Mon May 12 12:43:59 2014 @@ -0,0 +1,138 @@ +<!--- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> + +#set ( $H3 = '###' ) + +Hadoop Archives Guide +===================== + + - [Overview](#Overview) + - [How to Create an Archive](#How_to_Create_an_Archive) + - [How to Look Up Files in Archives](#How_to_Look_Up_Files_in_Archives) + - [Archives Examples](#Archives_Examples) + - [Creating an Archive](#Creating_an_Archive) + - [Looking Up Files](#Looking_Up_Files) + - [Hadoop Archives and MapReduce](#Hadoop_Archives_and_MapReduce) + +Overview +-------- + + Hadoop archives are special format archives. A Hadoop archive maps to a file + system directory. A Hadoop archive always has a \*.har extension. A Hadoop + archive directory contains metadata (in the form of _index and _masterindex) + and data (part-\*) files. The _index file contains the name of the files that + are part of the archive and the location within the part files. + +How to Create an Archive +------------------------ + + `Usage: hadoop archive -archiveName name -p <parent> <src>* <dest>` + + -archiveName is the name of the archive you would like to create. An example + would be foo.har. The name should have a \*.har extension. The parent argument + is to specify the relative path to which the files should be archived to. + Example would be : + + `-p /foo/bar a/b/c e/f/g` + + Here /foo/bar is the parent path and a/b/c, e/f/g are relative paths to + parent. Note that this is a Map/Reduce job that creates the archives. You + would need a map reduce cluster to run this. For a detailed example the later + sections. + + If you just want to archive a single directory /foo/bar then you can just use + + `hadoop archive -archiveName zoo.har -p /foo/bar /outputdir` + +How to Look Up Files in Archives +-------------------------------- + + The archive exposes itself as a file system layer. So all the fs shell + commands in the archives work but with a different URI. Also, note that + archives are immutable. So, rename's, deletes and creates return an error. + URI for Hadoop Archives is + + `har://scheme-hostname:port/archivepath/fileinarchive` + + If no scheme is provided it assumes the underlying filesystem. In that case + the URI would look like + + `har:///archivepath/fileinarchive` + +Archives Examples +----------------- + +$H3 Creating an Archive + + `hadoop archive -archiveName foo.har -p /user/hadoop dir1 dir2 /user/zoo` + + The above example is creating an archive using /user/hadoop as the relative + archive directory. The directories /user/hadoop/dir1 and /user/hadoop/dir2 + will be archived in the following file system directory -- /user/zoo/foo.har. + Archiving does not delete the input files. If you want to delete the input + files after creating the archives (to reduce namespace), you will have to do + it on your own. + +$H3 Looking Up Files + + Looking up files in hadoop archives is as easy as doing an ls on the + filesystem. After you have archived the directories /user/hadoop/dir1 and + /user/hadoop/dir2 as in the example above, to see all the files in the + archives you can just run: + + `hdfs dfs -ls -R har:///user/zoo/foo.har/` + + To understand the significance of the -p argument, lets go through the above + example again. If you just do an ls (not lsr) on the hadoop archive using + + `hdfs dfs -ls har:///user/zoo/foo.har` + + The output should be: + +``` +har:///user/zoo/foo.har/dir1 +har:///user/zoo/foo.har/dir2 +``` + + As you can recall the archives were created with the following command + + `hadoop archive -archiveName foo.har -p /user/hadoop dir1 dir2 /user/zoo` + + If we were to change the command to: + + `hadoop archive -archiveName foo.har -p /user/ hadoop/dir1 hadoop/dir2 /user/zoo` + + then a ls on the hadoop archive using + + `hdfs dfs -ls har:///user/zoo/foo.har` + + would give you + +``` +har:///user/zoo/foo.har/hadoop/dir1 +har:///user/zoo/foo.har/hadoop/dir2 +``` + + Notice that the archived files have been archived relative to /user/ rather + than /user/hadoop. + +Hadoop Archives and MapReduce +----------------------------- + + Using Hadoop Archives in MapReduce is as easy as specifying a different input + filesystem than the default file system. If you have a hadoop archive stored + in HDFS in /user/zoo/foo.har then for using this archive for MapReduce input, + all you need to specify the input directory as har:///user/zoo/foo.har. Since + Hadoop Archives is exposed as a file system MapReduce will be able to use all + the logical input files in Hadoop Archives as input. Modified: hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java (original) +++ hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/HistoryClientService.java Mon May 12 12:43:59 2014 @@ -396,7 +396,7 @@ public class HistoryClientService extend .array(), new Text(protoToken.getKind()), new Text( protoToken.getService())); - String user = UserGroupInformation.getCurrentUser().getShortUserName(); + String user = UserGroupInformation.getCurrentUser().getUserName(); jhsDTSecretManager.cancelToken(token, user); return Records.newRecord(CancelDelegationTokenResponse.class); } Modified: hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobBlock.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobBlock.java?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobBlock.java (original) +++ hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/main/java/org/apache/hadoop/mapreduce/v2/hs/webapp/HsJobBlock.java Mon May 12 12:43:59 2014 @@ -107,9 +107,9 @@ public class HsJobBlock extends HtmlBloc infoBlock._("Average Map Time", StringUtils.formatTime(job.getAvgMapTime())); } if(job.getNumReduces() > 0) { - infoBlock._("Average Reduce Time", StringUtils.formatTime(job.getAvgReduceTime())); infoBlock._("Average Shuffle Time", StringUtils.formatTime(job.getAvgShuffleTime())); infoBlock._("Average Merge Time", StringUtils.formatTime(job.getAvgMergeTime())); + infoBlock._("Average Reduce Time", StringUtils.formatTime(job.getAvgReduceTime())); } for (ConfEntryInfo entry : job.getAcls()) { Modified: hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJHSDelegationTokenSecretManager.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJHSDelegationTokenSecretManager.java?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJHSDelegationTokenSecretManager.java (original) +++ hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJHSDelegationTokenSecretManager.java Mon May 12 12:43:59 2014 @@ -30,6 +30,8 @@ import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.v2.api.MRDelegationTokenIdentifier; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.authentication.util.KerberosName; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.delegation.DelegationKey; import org.junit.Test; @@ -87,6 +89,24 @@ public class TestJHSDelegationTokenSecre assertEquals("sequence number restore", tokenId2.getSequenceNumber() + 1, tokenId3.getSequenceNumber()); mgr.cancelToken(token1, "tokenOwner"); + + // Testing with full principal name + MRDelegationTokenIdentifier tokenIdFull = new MRDelegationTokenIdentifier( + new Text("tokenOwner/localhost@LOCALHOST"), new Text("tokenRenewer"), + new Text("tokenUser")); + KerberosName.setRules("RULE:[1:$1]\nRULE:[2:$1]"); + Token<MRDelegationTokenIdentifier> tokenFull = new Token<MRDelegationTokenIdentifier>( + tokenIdFull, mgr); + // Negative test + try { + mgr.cancelToken(tokenFull, "tokenOwner"); + } catch (AccessControlException ace) { + assertTrue(ace.getMessage().contains( + "is not authorized to cancel the token")); + } + // Succeed to cancel with full principal + mgr.cancelToken(tokenFull, tokenIdFull.getOwner().toString()); + long tokenRenewDate3 = mgr.getAllTokens().get(tokenId3).getRenewDate(); mgr.stopThreads(); Modified: hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestJHSSecurity.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestJHSSecurity.java?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestJHSSecurity.java (original) +++ hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapreduce/security/TestJHSSecurity.java Mon May 12 12:43:59 2014 @@ -198,6 +198,11 @@ public class TestJHSSecurity { fail("Unexpected exception" + e); } cancelDelegationToken(loggedInUser, hsService, token); + + // Testing the token with different renewer to cancel the token + Token tokenWithDifferentRenewer = getDelegationToken(loggedInUser, + hsService, "yarn"); + cancelDelegationToken(loggedInUser, hsService, tokenWithDifferentRenewer); if (clientUsingDT != null) { // RPC.stopProxy(clientUsingDT); clientUsingDT = null; Modified: hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml (original) +++ hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml Mon May 12 12:43:59 2014 @@ -72,24 +72,6 @@ <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <scope>provided</scope> - <exclusions> - <exclusion> - <groupId>commons-el</groupId> - <artifactId>commons-el</artifactId> - </exclusion> - <exclusion> - <groupId>tomcat</groupId> - <artifactId>jasper-runtime</artifactId> - </exclusion> - <exclusion> - <groupId>tomcat</groupId> - <artifactId>jasper-compiler</artifactId> - </exclusion> - <exclusion> - <groupId>org.mortbay.jetty</groupId> - <artifactId>jsp-2.1-jetty</artifactId> - </exclusion> - </exclusions> </dependency> <dependency> Modified: hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/pom.xml?rev=1593948&r1=1593947&r2=1593948&view=diff ============================================================================== --- hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/pom.xml (original) +++ hadoop/common/branches/HDFS-2006/hadoop-mapreduce-project/pom.xml Mon May 12 12:43:59 2014 @@ -81,24 +81,6 @@ <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <scope>provided</scope> - <exclusions> - <exclusion> - <groupId>commons-el</groupId> - <artifactId>commons-el</artifactId> - </exclusion> - <exclusion> - <groupId>tomcat</groupId> - <artifactId>jasper-runtime</artifactId> - </exclusion> - <exclusion> - <groupId>tomcat</groupId> - <artifactId>jasper-compiler</artifactId> - </exclusion> - <exclusion> - <groupId>org.mortbay.jetty</groupId> - <artifactId>jsp-2.1-jetty</artifactId> - </exclusion> - </exclusions> </dependency> <dependency>