Author: clamb Date: Mon Jun 16 18:13:57 2014 New Revision: 1602947 URL: http://svn.apache.org/r1602947 Log: merge from trunk r1602933
Added: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/resources/testBOM.txt - copied unchanged from r1602933, hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/resources/testBOM.txt Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/ (props changed) hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/CHANGES.txt (contents, props changed) hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/conf/ (props changed) hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LineRecordReader.java hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/CompositeRecordReader.java hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/WrappedRecordReader.java hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/CompositeRecordReader.java hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/WrappedRecordReader.java hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml (props changed) hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestLineRecordReader.java hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordCount.java Propchange: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/ ------------------------------------------------------------------------------ Merged /hadoop/common/trunk/hadoop-mapreduce-project:r1598784-1602933 Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/CHANGES.txt?rev=1602947&r1=1602946&r2=1602947&view=diff ============================================================================== --- hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/CHANGES.txt (original) +++ hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/CHANGES.txt Mon Jun 16 18:13:57 2014 @@ -77,6 +77,9 @@ Trunk (Unreleased) MAPREDUCE-5196. Add bookkeeping for managing checkpoints of task state. (Carlo Curino via cdouglas) + MAPREDUCE-5912. Task.calculateOutputSize does not handle Windows files after + MAPREDUCE-5196. (Remus Rusanu via cnauroth) + BUG FIXES MAPREDUCE-5714. Removed forceful JVM exit in shutDownJob. @@ -204,6 +207,12 @@ Release 2.5.0 - UNRELEASED MAPREDUCE-5899. Support incremental data copy in DistCp. (jing9) + MAPREDUCE-5886. Allow wordcount example job to accept multiple input paths. + (cnauroth) + + MAPREDUCE-5834. Increased test-timeouts in TestGridMixClasses to avoid + occassional failures. (Mit Desai via vinodkv) + OPTIMIZATIONS BUG FIXES @@ -247,7 +256,15 @@ Release 2.5.0 - UNRELEASED MAPREDUCE-5895. Close streams properly to avoid leakage in TaskLog. (Kousuke Saruta via devaraj) -Release 2.4.1 - UNRELEASED + MAPREDUCE-5777. Support utf-8 text with Byte Order Marker. + (Zhihai Xu via kasha) + + MAPREDUCE-5898. distcp to support preserving HDFS extended attributes(XAttrs) + (Yi Liu via umamahesh) + + MAPREDUCE-5920. Add Xattr option in DistCp docs. (Yi Liu via cnauroth) + +Release 2.4.1 - 2014-06-23 INCOMPATIBLE CHANGES Propchange: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/CHANGES.txt ------------------------------------------------------------------------------ Merged /hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt:r1598784-1602933 Propchange: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/conf/ ------------------------------------------------------------------------------ Merged /hadoop/common/trunk/hadoop-mapreduce-project/conf:r1598784-1602933 Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml URL: http://svn.apache.org/viewvc/hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml?rev=1602947&r1=1602946&r2=1602947&view=diff ============================================================================== --- hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml (original) +++ hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml Mon Jun 16 18:13:57 2014 @@ -91,6 +91,7 @@ <configuration> <excludes> <exclude>src/test/resources/recordSpanningMultipleSplits.txt</exclude> + <exclude>src/test/resources/testBOM.txt</exclude> </excludes> </configuration> </plugin> Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java?rev=1602947&r1=1602946&r2=1602947&view=diff ============================================================================== --- hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java (original) +++ hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/JobConf.java Mon Jun 16 18:13:57 2014 @@ -112,7 +112,7 @@ import org.apache.log4j.Level; @InterfaceAudience.Public @InterfaceStability.Stable public class JobConf extends Configuration { - + private static final Log LOG = LogFactory.getLog(JobConf.class); static{ @@ -882,7 +882,7 @@ public class JobConf extends Configurati JobContext.KEY_COMPARATOR, null, RawComparator.class); if (theClass != null) return ReflectionUtils.newInstance(theClass, this); - return WritableComparator.get(getMapOutputKeyClass().asSubclass(WritableComparable.class)); + return WritableComparator.get(getMapOutputKeyClass().asSubclass(WritableComparable.class), this); } /** Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LineRecordReader.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LineRecordReader.java?rev=1602947&r1=1602946&r2=1602947&view=diff ============================================================================== --- hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LineRecordReader.java (original) +++ hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/LineRecordReader.java Mon Jun 16 18:13:57 2014 @@ -197,6 +197,39 @@ public class LineRecordReader implements return retVal; } + private int skipUtfByteOrderMark(Text value) throws IOException { + // Strip BOM(Byte Order Mark) + // Text only support UTF-8, we only need to check UTF-8 BOM + // (0xEF,0xBB,0xBF) at the start of the text stream. + int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength, + Integer.MAX_VALUE); + int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos)); + // Even we read 3 extra bytes for the first line, + // we won't alter existing behavior (no backwards incompat issue). + // Because the newSize is less than maxLineLength and + // the number of bytes copied to Text is always no more than newSize. + // If the return size from readLine is not less than maxLineLength, + // we will discard the current line and read the next line. + pos += newSize; + int textLength = value.getLength(); + byte[] textBytes = value.getBytes(); + if ((textLength >= 3) && (textBytes[0] == (byte)0xEF) && + (textBytes[1] == (byte)0xBB) && (textBytes[2] == (byte)0xBF)) { + // find UTF-8 BOM, strip it. + LOG.info("Found UTF-8 BOM and skipped it"); + textLength -= 3; + newSize -= 3; + if (textLength > 0) { + // It may work to use the same buffer and not do the copyBytes + textBytes = value.copyBytes(); + value.set(textBytes, 3, textLength); + } else { + value.clear(); + } + } + return newSize; + } + /** Read a line. */ public synchronized boolean next(LongWritable key, Text value) throws IOException { @@ -206,11 +239,17 @@ public class LineRecordReader implements while (getFilePosition() <= end || in.needAdditionalRecordAfterSplit()) { key.set(pos); - int newSize = in.readLine(value, maxLineLength, maxBytesToConsume(pos)); + int newSize = 0; + if (pos == 0) { + newSize = skipUtfByteOrderMark(value); + } else { + newSize = in.readLine(value, maxLineLength, maxBytesToConsume(pos)); + pos += newSize; + } + if (newSize == 0) { return false; } - pos += newSize; if (newSize < maxLineLength) { return true; } Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java?rev=1602947&r1=1602946&r2=1602947&view=diff ============================================================================== --- hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java (original) +++ hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/Task.java Mon Jun 16 18:13:57 2014 @@ -1120,8 +1120,8 @@ abstract public class Task implements Wr if (isMapTask() && conf.getNumReduceTasks() > 0) { try { Path mapOutput = mapOutputFile.getOutputFile(); - FileSystem fs = mapOutput.getFileSystem(conf); - return fs.getFileStatus(mapOutput).getLen(); + FileSystem localFS = FileSystem.getLocal(conf); + return localFS.getFileStatus(mapOutput).getLen(); } catch (IOException e) { LOG.warn ("Could not find output size " , e); } Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/CompositeRecordReader.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/CompositeRecordReader.java?rev=1602947&r1=1602946&r2=1602947&view=diff ============================================================================== --- hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/CompositeRecordReader.java (original) +++ hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/CompositeRecordReader.java Mon Jun 16 18:13:57 2014 @@ -131,7 +131,7 @@ public abstract class CompositeRecordRea public void add(ComposableRecordReader<K,? extends V> rr) throws IOException { kids[rr.id()] = rr; if (null == q) { - cmp = WritableComparator.get(rr.createKey().getClass()); + cmp = WritableComparator.get(rr.createKey().getClass(), conf); q = new PriorityQueue<ComposableRecordReader<K,?>>(3, new Comparator<ComposableRecordReader<K,?>>() { public int compare(ComposableRecordReader<K,?> o1, Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/WrappedRecordReader.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/WrappedRecordReader.java?rev=1602947&r1=1602946&r2=1602947&view=diff ============================================================================== --- hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/WrappedRecordReader.java (original) +++ hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/join/WrappedRecordReader.java Mon Jun 16 18:13:57 2014 @@ -22,6 +22,8 @@ import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; @@ -38,7 +40,7 @@ import org.apache.hadoop.mapred.RecordRe @InterfaceStability.Stable public class WrappedRecordReader<K extends WritableComparable, U extends Writable> - implements ComposableRecordReader<K,U> { + implements ComposableRecordReader<K,U>, Configurable { private boolean empty = false; private RecordReader<K,U> rr; @@ -47,6 +49,7 @@ public class WrappedRecordReader<K exten private K khead; // key at the top of this RR private U vhead; // value assoc with khead private WritableComparator cmp; + private Configuration conf; private ResetableIterator<U> vjoin; @@ -55,13 +58,20 @@ public class WrappedRecordReader<K exten */ WrappedRecordReader(int id, RecordReader<K,U> rr, Class<? extends WritableComparator> cmpcl) throws IOException { + this(id, rr, cmpcl, null); + } + + WrappedRecordReader(int id, RecordReader<K,U> rr, + Class<? extends WritableComparator> cmpcl, + Configuration conf) throws IOException { this.id = id; this.rr = rr; + this.conf = (conf == null) ? new Configuration() : conf; khead = rr.createKey(); vhead = rr.createValue(); try { cmp = (null == cmpcl) - ? WritableComparator.get(khead.getClass()) + ? WritableComparator.get(khead.getClass(), this.conf) : cmpcl.newInstance(); } catch (InstantiationException e) { throw (IOException)new IOException().initCause(e); @@ -207,4 +217,13 @@ public class WrappedRecordReader<K exten return 42; } + @Override + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Override + public Configuration getConf() { + return conf; + } } Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java?rev=1602947&r1=1602946&r2=1602947&view=diff ============================================================================== --- hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java (original) +++ hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/input/LineRecordReader.java Mon Jun 16 18:13:57 2014 @@ -134,6 +134,39 @@ public class LineRecordReader extends Re return retVal; } + private int skipUtfByteOrderMark() throws IOException { + // Strip BOM(Byte Order Mark) + // Text only support UTF-8, we only need to check UTF-8 BOM + // (0xEF,0xBB,0xBF) at the start of the text stream. + int newMaxLineLength = (int) Math.min(3L + (long) maxLineLength, + Integer.MAX_VALUE); + int newSize = in.readLine(value, newMaxLineLength, maxBytesToConsume(pos)); + // Even we read 3 extra bytes for the first line, + // we won't alter existing behavior (no backwards incompat issue). + // Because the newSize is less than maxLineLength and + // the number of bytes copied to Text is always no more than newSize. + // If the return size from readLine is not less than maxLineLength, + // we will discard the current line and read the next line. + pos += newSize; + int textLength = value.getLength(); + byte[] textBytes = value.getBytes(); + if ((textLength >= 3) && (textBytes[0] == (byte)0xEF) && + (textBytes[1] == (byte)0xBB) && (textBytes[2] == (byte)0xBF)) { + // find UTF-8 BOM, strip it. + LOG.info("Found UTF-8 BOM and skipped it"); + textLength -= 3; + newSize -= 3; + if (textLength > 0) { + // It may work to use the same buffer and not do the copyBytes + textBytes = value.copyBytes(); + value.set(textBytes, 3, textLength); + } else { + value.clear(); + } + } + return newSize; + } + public boolean nextKeyValue() throws IOException { if (key == null) { key = new LongWritable(); @@ -146,9 +179,14 @@ public class LineRecordReader extends Re // We always read one extra line, which lies outside the upper // split limit i.e. (end - 1) while (getFilePosition() <= end || in.needAdditionalRecordAfterSplit()) { - newSize = in.readLine(value, maxLineLength, maxBytesToConsume(pos)); - pos += newSize; - if (newSize < maxLineLength) { + if (pos == 0) { + newSize = skipUtfByteOrderMark(); + } else { + newSize = in.readLine(value, maxLineLength, maxBytesToConsume(pos)); + pos += newSize; + } + + if ((newSize == 0) || (newSize < maxLineLength)) { break; } Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/CompositeRecordReader.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/CompositeRecordReader.java?rev=1602947&r1=1602946&r2=1602947&view=diff ============================================================================== --- hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/CompositeRecordReader.java (original) +++ hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/CompositeRecordReader.java Mon Jun 16 18:13:57 2014 @@ -109,7 +109,7 @@ public abstract class CompositeRecordRea } // create priority queue if (null == q) { - cmp = WritableComparator.get(keyclass); + cmp = WritableComparator.get(keyclass, conf); q = new PriorityQueue<ComposableRecordReader<K,?>>(3, new Comparator<ComposableRecordReader<K,?>>() { public int compare(ComposableRecordReader<K,?> o1, Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/WrappedRecordReader.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/WrappedRecordReader.java?rev=1602947&r1=1602946&r2=1602947&view=diff ============================================================================== --- hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/WrappedRecordReader.java (original) +++ hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/lib/join/WrappedRecordReader.java Mon Jun 16 18:13:57 2014 @@ -92,7 +92,7 @@ public class WrappedRecordReader<K exten keyclass = key.getClass().asSubclass(WritableComparable.class); valueclass = value.getClass(); if (cmp == null) { - cmp = WritableComparator.get(keyclass); + cmp = WritableComparator.get(keyclass, conf); } } } Propchange: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml ------------------------------------------------------------------------------ Merged /hadoop/common/trunk/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml:r1598784-1602933 Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm URL: http://svn.apache.org/viewvc/hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm?rev=1602947&r1=1602946&r2=1602947&view=diff ============================================================================== --- hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm (original) +++ hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/site/markdown/DistCp.md.vm Mon Jun 16 18:13:57 2014 @@ -196,7 +196,7 @@ Command Line Options Flag | Description | Notes ----------------- | ------------------------------------ | -------- -`-p[rbugpca]` | Preserve r: replication number b: block size u: user g: group p: permission c: checksum-type a: ACL | Modification times are not preserved. Also, when `-update` is specified, status updates will **not** be synchronized unless the file sizes also differ (i.e. unless the file is re-created). If -pa is specified, DistCp preserves the permissions also because ACLs are a super-set of permissions. +`-p[rbugpcax]` | Preserve r: replication number b: block size u: user g: group p: permission c: checksum-type a: ACL x: XAttr | Modification times are not preserved. Also, when `-update` is specified, status updates will **not** be synchronized unless the file sizes also differ (i.e. unless the file is re-created). If -pa is specified, DistCp preserves the permissions also because ACLs are a super-set of permissions. `-i` | Ignore failures | As explained in the Appendix, this option will keep more accurate statistics about the copy than the default case. It also preserves logs from failed copies, which can be valuable for debugging. Finally, a failing map will not cause the job to fail before all splits are attempted. `-log <logdir>` | Write logs to \<logdir\> | DistCp keeps logs of each file it attempts to copy as map output. If a map fails, the log output will not be retained if it is re-executed. `-m <num_maps>` | Maximum number of simultaneous copies | Specify the number of maps to copy data. Note that more maps may not necessarily improve throughput. Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java?rev=1602947&r1=1602946&r2=1602947&view=diff ============================================================================== --- hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java (original) +++ hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapred/TestLineRecordReader.java Mon Jun 16 18:13:57 2014 @@ -188,4 +188,41 @@ public class TestLineRecordReader { checkRecordSpanningMultipleSplits("recordSpanningMultipleSplits.txt.bz2", 200 * 1000, true); } + + @Test + public void testStripBOM() throws IOException { + // the test data contains a BOM at the start of the file + // confirm the BOM is skipped by LineRecordReader + String UTF8_BOM = "\uFEFF"; + URL testFileUrl = getClass().getClassLoader().getResource("testBOM.txt"); + assertNotNull("Cannot find testBOM.txt", testFileUrl); + File testFile = new File(testFileUrl.getFile()); + Path testFilePath = new Path(testFile.getAbsolutePath()); + long testFileSize = testFile.length(); + Configuration conf = new Configuration(); + conf.setInt(org.apache.hadoop.mapreduce.lib.input. + LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); + + // read the data and check whether BOM is skipped + FileSplit split = new FileSplit(testFilePath, 0, testFileSize, + (String[])null); + LineRecordReader reader = new LineRecordReader(conf, split); + LongWritable key = new LongWritable(); + Text value = new Text(); + int numRecords = 0; + boolean firstLine = true; + boolean skipBOM = true; + while (reader.next(key, value)) { + if (firstLine) { + firstLine = false; + if (value.toString().startsWith(UTF8_BOM)) { + skipBOM = false; + } + } + ++numRecords; + } + reader.close(); + + assertTrue("BOM is not skipped", skipBOM); + } } Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestLineRecordReader.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestLineRecordReader.java?rev=1602947&r1=1602946&r2=1602947&view=diff ============================================================================== --- hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestLineRecordReader.java (original) +++ hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/lib/input/TestLineRecordReader.java Mon Jun 16 18:13:57 2014 @@ -193,4 +193,42 @@ public class TestLineRecordReader { 200 * 1000, true); } + + @Test + public void testStripBOM() throws IOException { + // the test data contains a BOM at the start of the file + // confirm the BOM is skipped by LineRecordReader + String UTF8_BOM = "\uFEFF"; + URL testFileUrl = getClass().getClassLoader().getResource("testBOM.txt"); + assertNotNull("Cannot find testBOM.txt", testFileUrl); + File testFile = new File(testFileUrl.getFile()); + Path testFilePath = new Path(testFile.getAbsolutePath()); + long testFileSize = testFile.length(); + Configuration conf = new Configuration(); + conf.setInt(org.apache.hadoop.mapreduce.lib.input. + LineRecordReader.MAX_LINE_LENGTH, Integer.MAX_VALUE); + + TaskAttemptContext context = new TaskAttemptContextImpl(conf, new TaskAttemptID()); + + // read the data and check whether BOM is skipped + FileSplit split = new FileSplit(testFilePath, 0, testFileSize, + (String[])null); + LineRecordReader reader = new LineRecordReader(); + reader.initialize(split, context); + int numRecords = 0; + boolean firstLine = true; + boolean skipBOM = true; + while (reader.nextKeyValue()) { + if (firstLine) { + firstLine = false; + if (reader.getCurrentValue().toString().startsWith(UTF8_BOM)) { + skipBOM = false; + } + } + ++numRecords; + } + reader.close(); + + assertTrue("BOM is not skipped", skipBOM); + } } Modified: hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordCount.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordCount.java?rev=1602947&r1=1602946&r2=1602947&view=diff ============================================================================== --- hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordCount.java (original) +++ hadoop/common/branches/fs-encryption/hadoop-mapreduce-project/hadoop-mapreduce-examples/src/main/java/org/apache/hadoop/examples/WordCount.java Mon Jun 16 18:13:57 2014 @@ -68,8 +68,8 @@ public class WordCount { public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); - if (otherArgs.length != 2) { - System.err.println("Usage: wordcount <in> <out>"); + if (otherArgs.length < 2) { + System.err.println("Usage: wordcount <in> [<in>...] <out>"); System.exit(2); } Job job = new Job(conf, "word count"); @@ -79,8 +79,11 @@ public class WordCount { job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); - FileInputFormat.addInputPath(job, new Path(otherArgs[0])); - FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); + for (int i = 0; i < otherArgs.length - 1; ++i) { + FileInputFormat.addInputPath(job, new Path(otherArgs[i])); + } + FileOutputFormat.setOutputPath(job, + new Path(otherArgs[otherArgs.length - 1])); System.exit(job.waitForCompletion(true) ? 0 : 1); } }