Modified: hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/compress/TestCodec.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/compress/TestCodec.java?rev=885142&r1=885141&r2=885142&view=diff ============================================================================== --- hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/compress/TestCodec.java (original) +++ hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/compress/TestCodec.java Sat Nov 28 19:53:33 2009 @@ -19,31 +19,42 @@ import java.io.BufferedInputStream; import java.io.BufferedOutputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; +import java.util.Arrays; import java.util.Random; -import junit.framework.TestCase; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.io.RandomDatum; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; -import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.compress.CompressionOutputStream; +import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionLevel; +import org.apache.hadoop.io.compress.zlib.ZlibCompressor.CompressionStrategy; import org.apache.hadoop.io.compress.zlib.ZlibFactory; +import org.apache.hadoop.util.LineReader; +import org.apache.hadoop.util.ReflectionUtils; + +import org.apache.commons.codec.binary.Base64; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; -public class TestCodec extends TestCase { +import org.junit.Test; +import static org.junit.Assert.*; + +public class TestCodec { private static final Log LOG= LogFactory.getLog(TestCodec.class); @@ -51,22 +62,34 @@ private Configuration conf = new Configuration(); private int count = 10000; private int seed = new Random().nextInt(); - + + @Test public void testDefaultCodec() throws IOException { codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.DefaultCodec"); codecTest(conf, seed, count, "org.apache.hadoop.io.compress.DefaultCodec"); } - + + @Test public void testGzipCodec() throws IOException { codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.GzipCodec"); codecTest(conf, seed, count, "org.apache.hadoop.io.compress.GzipCodec"); } - + + @Test public void testBZip2Codec() throws IOException { codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.BZip2Codec"); codecTest(conf, seed, count, "org.apache.hadoop.io.compress.BZip2Codec"); } + @Test + public void testGzipCodecWithParam() throws IOException { + Configuration conf = new Configuration(this.conf); + ZlibFactory.setCompressionLevel(conf, CompressionLevel.BEST_COMPRESSION); + ZlibFactory.setCompressionStrategy(conf, CompressionStrategy.HUFFMAN_ONLY); + codecTest(conf, seed, 0, "org.apache.hadoop.io.compress.GzipCodec"); + codecTest(conf, seed, count, "org.apache.hadoop.io.compress.GzipCodec"); + } + private static void codecTest(Configuration conf, int seed, int count, String codecClass) throws IOException { @@ -133,6 +156,109 @@ LOG.info("SUCCESS! Completed checking " + count + " records"); } + @Test + public void testSplitableCodecs() throws Exception { + testSplitableCodec(BZip2Codec.class); + } + + private void testSplitableCodec( + Class<? extends SplittableCompressionCodec> codecClass) + throws IOException { + final long DEFLBYTES = 2 * 1024 * 1024; + final Configuration conf = new Configuration(); + final Random rand = new Random(); + final long seed = rand.nextLong(); + LOG.info("seed: " + seed); + rand.setSeed(seed); + SplittableCompressionCodec codec = + ReflectionUtils.newInstance(codecClass, conf); + final FileSystem fs = FileSystem.getLocal(conf); + final FileStatus infile = + fs.getFileStatus(writeSplitTestFile(fs, rand, codec, DEFLBYTES)); + if (infile.getLen() > Integer.MAX_VALUE) { + fail("Unexpected compression: " + DEFLBYTES + " -> " + infile.getLen()); + } + final int flen = (int) infile.getLen(); + final Text line = new Text(); + final Decompressor dcmp = CodecPool.getDecompressor(codec); + try { + for (int pos = 0; pos < infile.getLen(); pos += rand.nextInt(flen / 8)) { + // read from random positions, verifying that there exist two sequential + // lines as written in writeSplitTestFile + final SplitCompressionInputStream in = + codec.createInputStream(fs.open(infile.getPath()), dcmp, + pos, flen, SplittableCompressionCodec.READ_MODE.BYBLOCK); + if (in.getAdjustedStart() >= flen) { + break; + } + LOG.info("SAMPLE " + in.getAdjustedStart() + "," + in.getAdjustedEnd()); + final LineReader lreader = new LineReader(in); + lreader.readLine(line); // ignore; likely partial + if (in.getPos() >= flen) { + break; + } + lreader.readLine(line); + final int seq1 = readLeadingInt(line); + lreader.readLine(line); + if (in.getPos() >= flen) { + break; + } + final int seq2 = readLeadingInt(line); + assertEquals("Mismatched lines", seq1 + 1, seq2); + } + } finally { + CodecPool.returnDecompressor(dcmp); + } + // remove on success + fs.delete(infile.getPath().getParent(), true); + } + + private static int readLeadingInt(Text txt) throws IOException { + DataInputStream in = + new DataInputStream(new ByteArrayInputStream(txt.getBytes())); + return in.readInt(); + } + + /** Write infLen bytes (deflated) to file in test dir using codec. + * Records are of the form + * <i><b64 rand><i+i><b64 rand> + */ + private static Path writeSplitTestFile(FileSystem fs, Random rand, + CompressionCodec codec, long infLen) throws IOException { + final int REC_SIZE = 1024; + final Path wd = new Path(new Path( + System.getProperty("test.build.data", "/tmp")).makeQualified(fs), + codec.getClass().getSimpleName()); + final Path file = new Path(wd, "test" + codec.getDefaultExtension()); + final byte[] b = new byte[REC_SIZE]; + final Base64 b64 = new Base64(); + DataOutputStream fout = null; + Compressor cmp = CodecPool.getCompressor(codec); + try { + fout = new DataOutputStream(codec.createOutputStream( + fs.create(file, true), cmp)); + final DataOutputBuffer dob = new DataOutputBuffer(REC_SIZE * 4 / 3 + 4); + int seq = 0; + while (infLen > 0) { + rand.nextBytes(b); + final byte[] b64enc = b64.encode(b); // ensures rand printable, no LF + dob.reset(); + dob.writeInt(seq); + System.arraycopy(dob.getData(), 0, b64enc, 0, dob.getLength()); + fout.write(b64enc); + fout.write('\n'); + ++seq; + infLen -= b64enc.length; + } + LOG.info("Wrote " + seq + " records to " + file); + } finally { + IOUtils.cleanup(LOG, fout); + CodecPool.returnCompressor(cmp); + } + return file; + } + + @Test public void testCodecPoolGzipReuse() throws Exception { Configuration conf = new Configuration(); conf.setBoolean("hadoop.native.lib", true); @@ -149,19 +275,69 @@ assertTrue("Got mismatched ZlibCompressor", c2 != CodecPool.getCompressor(gzc)); } - public void testSequenceFileDefaultCodec() throws IOException, ClassNotFoundException, + private static void gzipReinitTest(Configuration conf, CompressionCodec codec) + throws IOException { + // Add codec to cache + ZlibFactory.setCompressionLevel(conf, CompressionLevel.BEST_COMPRESSION); + ZlibFactory.setCompressionStrategy(conf, + CompressionStrategy.DEFAULT_STRATEGY); + Compressor c1 = CodecPool.getCompressor(codec); + CodecPool.returnCompressor(c1); + // reset compressor's compression level to perform no compression + ZlibFactory.setCompressionLevel(conf, CompressionLevel.NO_COMPRESSION); + Compressor c2 = CodecPool.getCompressor(codec, conf); + // ensure same compressor placed earlier + assertTrue("Got mismatched ZlibCompressor", c1 == c2); + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + CompressionOutputStream cos = null; + // write trivially compressable data + byte[] b = new byte[1 << 15]; + Arrays.fill(b, (byte) 43); + try { + cos = codec.createOutputStream(bos, c2); + cos.write(b); + } finally { + if (cos != null) { + cos.close(); + } + CodecPool.returnCompressor(c2); + } + byte[] outbytes = bos.toByteArray(); + // verify data were not compressed + assertTrue("Compressed bytes contrary to configuration", + outbytes.length >= b.length); + } + + @Test + public void testCodecPoolCompressorReinit() throws Exception { + Configuration conf = new Configuration(); + conf.setBoolean("hadoop.native.lib", true); + if (ZlibFactory.isNativeZlibLoaded(conf)) { + GzipCodec gzc = ReflectionUtils.newInstance(GzipCodec.class, conf); + gzipReinitTest(conf, gzc); + } else { + LOG.warn("testCodecPoolCompressorReinit skipped: native libs not loaded"); + } + conf.setBoolean("hadoop.native.lib", false); + DefaultCodec dfc = ReflectionUtils.newInstance(DefaultCodec.class, conf); + gzipReinitTest(conf, dfc); + } + + @Test + public void testSequenceFileDefaultCodec() throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException { sequenceFileCodecTest(conf, 100, "org.apache.hadoop.io.compress.DefaultCodec", 100); sequenceFileCodecTest(conf, 200000, "org.apache.hadoop.io.compress.DefaultCodec", 1000000); } - - public void testSequenceFileBZip2Codec() throws IOException, ClassNotFoundException, + + @Test + public void testSequenceFileBZip2Codec() throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException { sequenceFileCodecTest(conf, 0, "org.apache.hadoop.io.compress.BZip2Codec", 100); sequenceFileCodecTest(conf, 100, "org.apache.hadoop.io.compress.BZip2Codec", 100); sequenceFileCodecTest(conf, 200000, "org.apache.hadoop.io.compress.BZip2Codec", 1000000); } - + private static void sequenceFileCodecTest(Configuration conf, int lines, String codecClass, int blockSize) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException { @@ -242,8 +418,4 @@ } - public TestCodec(String name) { - super(name); - } - }
Modified: hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFile.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFile.java?rev=885142&r1=885141&r2=885142&view=diff ============================================================================== --- hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFile.java (original) +++ hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFile.java Sat Nov 28 19:53:33 2009 @@ -319,7 +319,7 @@ scanner.close(); // test for a range of scanner - scanner = reader.createScanner(getSomeKey(10), getSomeKey(60)); + scanner = reader.createScannerByKey(getSomeKey(10), getSomeKey(60)); readAndCheckbytes(scanner, 10, 50); assertFalse(scanner.advance()); scanner.close(); Modified: hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFileByteArrays.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFileByteArrays.java?rev=885142&r1=885141&r2=885142&view=diff ============================================================================== --- hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFileByteArrays.java (original) +++ hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFileByteArrays.java Sat Nov 28 19:53:33 2009 @@ -673,7 +673,7 @@ Reader reader = new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf); Scanner scanner = - reader.createScanner(composeSortedKey(KEY, count, recordIndex) + reader.createScannerByKey(composeSortedKey(KEY, count, recordIndex) .getBytes(), null); try { @@ -698,7 +698,7 @@ throws IOException { Reader reader = new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf); Scanner scanner = - reader.createScanner(composeSortedKey(KEY, count, recordIndex) + reader.createScannerByKey(composeSortedKey(KEY, count, recordIndex) .getBytes(), null); try { @@ -729,7 +729,7 @@ Reader reader = new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf); Scanner scanner = - reader.createScanner(composeSortedKey(KEY, count, recordIndex) + reader.createScannerByKey(composeSortedKey(KEY, count, recordIndex) .getBytes(), null); byte[] vbuf1 = new byte[BUF_SIZE]; @@ -753,7 +753,7 @@ Reader reader = new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf); Scanner scanner = - reader.createScanner(composeSortedKey(KEY, count, recordIndex) + reader.createScannerByKey(composeSortedKey(KEY, count, recordIndex) .getBytes(), null); // read the indexed key Modified: hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFileSplit.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFileSplit.java?rev=885142&r1=885141&r2=885142&view=diff ============================================================================== --- hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFileSplit.java (original) +++ hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFileSplit.java Sat Nov 28 19:53:33 2009 @@ -17,6 +17,7 @@ package org.apache.hadoop.io.file.tfile; import java.io.IOException; +import java.util.Random; import junit.framework.Assert; import junit.framework.TestCase; @@ -42,6 +43,7 @@ private FileSystem fs; private Configuration conf; private Path path; + private Random random = new Random(); private String comparator = "memcmp"; private String outputFile = "TestTFileSplit"; @@ -74,7 +76,7 @@ long rowCount = 0; BytesWritable key, value; for (int i = 0; i < numSplit; ++i, offset += splitSize) { - Scanner scanner = reader.createScanner(offset, splitSize); + Scanner scanner = reader.createScannerByByteRange(offset, splitSize); int count = 0; key = new BytesWritable(); value = new BytesWritable(); @@ -90,18 +92,101 @@ Assert.assertEquals(rowCount, reader.getEntryCount()); reader.close(); } + + /* Similar to readFile(), tests the scanner created + * by record numbers rather than the offsets. + */ + void readRowSplits(int numSplits) throws IOException { + + Reader reader = + new Reader(fs.open(path), fs.getFileStatus(path).getLen(), conf); + + long totalRecords = reader.getEntryCount(); + for (int i=0; i<numSplits; i++) { + long startRec = i*totalRecords/numSplits; + long endRec = (i+1)*totalRecords/numSplits; + if (i == numSplits-1) { + endRec = totalRecords; + } + Scanner scanner = reader.createScannerByRecordNum(startRec, endRec); + int count = 0; + BytesWritable key = new BytesWritable(); + BytesWritable value = new BytesWritable(); + long x=startRec; + while (!scanner.atEnd()) { + assertEquals("Incorrect RecNum returned by scanner", scanner.getRecordNum(), x); + scanner.entry().get(key, value); + ++count; + assertEquals("Incorrect RecNum returned by scanner", scanner.getRecordNum(), x); + scanner.advance(); + ++x; + } + scanner.close(); + Assert.assertTrue(count == (endRec - startRec)); + } + // make sure specifying range at the end gives zero records. + Scanner scanner = reader.createScannerByRecordNum(totalRecords, -1); + Assert.assertTrue(scanner.atEnd()); + } static String composeSortedKey(String prefix, int total, int value) { return String.format("%s%010d", prefix, value); } + void checkRecNums() throws IOException { + long fileLen = fs.getFileStatus(path).getLen(); + Reader reader = new Reader(fs.open(path), fileLen, conf); + long totalRecs = reader.getEntryCount(); + long begin = random.nextLong() % (totalRecs / 2); + if (begin < 0) + begin += (totalRecs / 2); + long end = random.nextLong() % (totalRecs / 2); + if (end < 0) + end += (totalRecs / 2); + end += (totalRecs / 2) + 1; + + assertEquals("RecNum for offset=0 should be 0", 0, reader + .getRecordNumNear(0)); + for (long x : new long[] { fileLen, fileLen + 1, 2 * fileLen }) { + assertEquals("RecNum for offset>=fileLen should be total entries", + totalRecs, reader.getRecordNumNear(x)); + } + + for (long i = 0; i < 100; ++i) { + assertEquals("Locaton to RecNum conversion not symmetric", i, reader + .getRecordNumByLocation(reader.getLocationByRecordNum(i))); + } + + for (long i = 1; i < 100; ++i) { + long x = totalRecs - i; + assertEquals("Locaton to RecNum conversion not symmetric", x, reader + .getRecordNumByLocation(reader.getLocationByRecordNum(x))); + } + + for (long i = begin; i < end; ++i) { + assertEquals("Locaton to RecNum conversion not symmetric", i, reader + .getRecordNumByLocation(reader.getLocationByRecordNum(i))); + } + + for (int i = 0; i < 1000; ++i) { + long x = random.nextLong() % totalRecs; + if (x < 0) x += totalRecs; + assertEquals("Locaton to RecNum conversion not symmetric", x, reader + .getRecordNumByLocation(reader.getLocationByRecordNum(x))); + } + } + public void testSplit() throws IOException { System.out.println("testSplit"); createFile(100000, Compression.Algorithm.NONE.getName()); + checkRecNums(); readFile(); + readRowSplits(10); fs.delete(path, true); createFile(500000, Compression.Algorithm.GZ.getName()); + checkRecNums(); readFile(); + readRowSplits(83); fs.delete(path, true); } } Modified: hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFileUnsortedByteArrays.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFileUnsortedByteArrays.java?rev=885142&r1=885141&r2=885142&view=diff ============================================================================== --- hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFileUnsortedByteArrays.java (original) +++ hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/file/tfile/TestTFileUnsortedByteArrays.java Sat Nov 28 19:53:33 2009 @@ -89,7 +89,7 @@ try { Scanner scanner = - reader.createScanner("aaa".getBytes(), "zzz".getBytes()); + reader.createScannerByKey("aaa".getBytes(), "zzz".getBytes()); Assert .fail("Failed to catch creating scanner with keys on unsorted file."); } Modified: hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/serializer/SerializationTestUtil.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/serializer/SerializationTestUtil.java?rev=885142&r1=885141&r2=885142&view=diff ============================================================================== --- hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/serializer/SerializationTestUtil.java (original) +++ hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/serializer/SerializationTestUtil.java Sat Nov 28 19:53:33 2009 @@ -17,6 +17,8 @@ */ package org.apache.hadoop.io.serializer; +import java.util.Map; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.DataOutputBuffer; @@ -33,19 +35,33 @@ * @return deserialized item */ public static<K> K testSerialization(Configuration conf, K before) - throws Exception { - + throws Exception { + Map<String, String> metadata = + SerializationBase.getMetadataFromClass(GenericsUtil.getClass(before)); + return testSerialization(conf, metadata, before); + } + + /** + * A utility that tests serialization/deserialization. + * @param conf configuration to use, "io.serializations" is read to + * determine the serialization + * @param metadata the metadata to pass to the serializer/deserializer + * @param <K> the class of the item + * @param before item to (de)serialize + * @return deserialized item + */ + public static <K> K testSerialization(Configuration conf, + Map<String, String> metadata, K before) throws Exception { + SerializationFactory factory = new SerializationFactory(conf); - Serializer<K> serializer - = factory.getSerializer(GenericsUtil.getClass(before)); - Deserializer<K> deserializer - = factory.getDeserializer(GenericsUtil.getClass(before)); - + SerializerBase<K> serializer = factory.getSerializer(metadata); + DeserializerBase<K> deserializer = factory.getDeserializer(metadata); + DataOutputBuffer out = new DataOutputBuffer(); serializer.open(out); serializer.serialize(before); serializer.close(); - + DataInputBuffer in = new DataInputBuffer(); in.reset(out.getData(), out.getLength()); deserializer.open(in); Modified: hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java?rev=885142&r1=885141&r2=885142&view=diff ============================================================================== --- hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java (original) +++ hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/io/serializer/avro/TestAvroSerialization.java Sat Nov 28 19:53:33 2009 @@ -18,9 +18,14 @@ package org.apache.hadoop.io.serializer.avro; +import java.util.HashMap; +import java.util.Map; + import junit.framework.TestCase; +import org.apache.avro.util.Utf8; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.serializer.SerializationBase; import org.apache.hadoop.io.serializer.SerializationTestUtil; public class TestAvroSerialization extends TestCase { @@ -59,6 +64,16 @@ SerializationTestUtil.testSerialization(conf, before); assertEquals(before, after); } + + public void testGeneric() throws Exception { + Utf8 before = new Utf8("hadoop"); + Map<String, String> metadata = new HashMap<String, String>(); + metadata.put(SerializationBase.SERIALIZATION_KEY, + AvroGenericSerialization.class.getName()); + metadata.put(AvroSerialization.AVRO_SCHEMA_KEY, "\"string\""); + Utf8 after = SerializationTestUtil.testSerialization(conf, metadata, before); + assertEquals(before, after); + } public static class InnerRecord { public int x = 7; Modified: hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/util/TestGenericsUtil.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/util/TestGenericsUtil.java?rev=885142&r1=885141&r2=885142&view=diff ============================================================================== --- hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/util/TestGenericsUtil.java (original) +++ hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/util/TestGenericsUtil.java Sat Nov 28 19:53:33 2009 @@ -103,6 +103,12 @@ GenericOptionsParser parser = new GenericOptionsParser( new Configuration(), new String[] {"-jt"}); assertEquals(parser.getRemainingArgs().length, 0); + + // test if -D accepts -Dx=y=z + parser = + new GenericOptionsParser(new Configuration(), + new String[] {"-Dx=y=z"}); + assertEquals(parser.getConfiguration().get("x"), "y=z"); } public void testGetClass() { Modified: hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/util/TestPureJavaCrc32.java URL: http://svn.apache.org/viewvc/hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/util/TestPureJavaCrc32.java?rev=885142&r1=885141&r2=885142&view=diff ============================================================================== --- hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/util/TestPureJavaCrc32.java (original) +++ hadoop/common/branches/HADOOP-6194/src/test/core/org/apache/hadoop/util/TestPureJavaCrc32.java Sat Nov 28 19:53:33 2009 @@ -17,26 +17,27 @@ */ package org.apache.hadoop.util; -import junit.framework.TestCase; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.List; +import java.util.Properties; +import java.util.Random; import java.util.zip.CRC32; import java.util.zip.Checksum; -import java.util.LinkedHashMap; -import java.util.Map; -import java.util.Random; +import org.junit.Assert; +import org.junit.Test; /** * Unit test to verify that the pure-Java CRC32 algorithm gives * the same results as the built-in implementation. */ -public class TestPureJavaCrc32 extends TestCase { - private CRC32 theirs; - private PureJavaCrc32 ours; - - public void setUp() { - theirs = new CRC32(); - ours = new PureJavaCrc32(); - } +public class TestPureJavaCrc32 { + private final CRC32 theirs = new CRC32(); + private final PureJavaCrc32 ours = new PureJavaCrc32(); + @Test public void testCorrectness() throws Exception { checkSame(); @@ -92,10 +93,93 @@ } private void checkSame() { - assertEquals(theirs.getValue(), ours.getValue()); + Assert.assertEquals(theirs.getValue(), ours.getValue()); } /** + * Generate a table to perform checksums based on the same CRC-32 polynomial + * that java.util.zip.CRC32 uses. + */ + public static class Table { + private static final int polynomial = 0xEDB88320; + + private final int[][] tables; + + private Table(final int nBits, final int nTables) { + tables = new int[nTables][]; + final int size = 1 << nBits; + for(int i = 0; i < tables.length; i++) { + tables[i] = new int[size]; + } + + //compute the first table + final int[] first = tables[0]; + for (int i = 0; i < first.length; i++) { + int crc = i; + for (int j = 0; j < nBits; j++) { + if ((crc & 1) == 1) { + crc >>>= 1; + crc ^= polynomial; + } else { + crc >>>= 1; + } + } + first[i] = crc; + } + + //compute the remaining tables + final int mask = first.length - 1; + for(int j = 1; j < tables.length; j++) { + final int[] previous = tables[j-1]; + final int[] current = tables[j]; + for (int i = 0; i < current.length; i++) { + current[i] = (previous[i] >>> nBits) ^ first[previous[i] & mask]; + } + } + } + + String[] toStrings(String nameformat) { + final String[] s = new String[tables.length]; + for (int j = 0; j < tables.length; j++) { + final int[] t = tables[j]; + final StringBuilder b = new StringBuilder(); + b.append(String.format(" static final int[] " + nameformat + + " = new int[] {", j)); + for (int i = 0; i < t.length;) { + b.append("\n "); + for(int k = 0; k < 4; k++) { + b.append(String.format("0x%08X, ", t[i++])); + } + } + b.setCharAt(b.length() - 2, '\n'); + s[j] = b.toString() + " };\n"; + } + return s; + } + + /** {...@inheritdoc} */ + public String toString() { + final StringBuilder b = new StringBuilder(); + for(String s : toStrings(String.format("T%d_", + Integer.numberOfTrailingZeros(tables[0].length)) + "%d")) { + b.append(s); + } + return b.toString(); + } + + /** Generate CRC-32 lookup tables */ + public static void main(String[] args) throws FileNotFoundException { + int i = 8; + final PrintStream out = new PrintStream( + new FileOutputStream("table" + i + ".txt"), true); + final Table t = new Table(i, 16); + final String s = t.toString(); + System.out.println(s); + out.println(s); + } + } + + /** * Performance tests to compare performance of the Pure Java implementation * to the built-in java.util.zip implementation. This can be run from the * command line with: @@ -109,62 +193,108 @@ public static final int MAX_LEN = 32*1024*1024; // up to 32MB chunks public static final int BYTES_PER_SIZE = MAX_LEN * 4; - public static LinkedHashMap<String, Checksum> getImplsToTest() { - LinkedHashMap<String, Checksum> impls = - new LinkedHashMap<String, Checksum>(); - impls.put("BuiltIn", new CRC32()); - impls.put("PureJava", new PureJavaCrc32()); - return impls; - } + static final Checksum zip = new CRC32(); + static final Checksum[] CRCS = {new PureJavaCrc32()}; public static void main(String args[]) { - LinkedHashMap<String, Checksum> impls = getImplsToTest(); + printSystemProperties(System.out); + doBench(CRCS, System.out); + } + + private static void printCell(String s, int width, PrintStream out) { + final int w = s.length() > width? s.length(): width; + out.printf(" %" + w + "s |", s); + } - Random rand = new Random(); - byte[] bytes = new byte[MAX_LEN]; - rand.nextBytes(bytes); + private static void doBench(final Checksum[] crcs, final PrintStream out) { + final ArrayList<Checksum> a = new ArrayList<Checksum>(); + a.add(zip); + for (Checksum c : crcs) + if(c.getClass() != zip.getClass()) + a.add(c); + doBench(a, out); + } + private static void doBench(final List<Checksum> crcs, final PrintStream out + ) { + final byte[] bytes = new byte[MAX_LEN]; + new Random().nextBytes(bytes); // Print header - System.out.printf("||num bytes||"); - for (String entry : impls.keySet()) { - System.out.printf(entry + " MB/sec||"); + out.printf("\nPerformance Table (The unit is MB/sec)\n||"); + final String title = "Num Bytes"; + printCell("Num Bytes", 0, out); + for (Checksum c : crcs) { + out.printf("|"); + printCell(c.getClass().getSimpleName(), 8, out); } - System.out.printf("\n"); + out.printf("|\n"); // Warm up implementations to get jit going. - for (Map.Entry<String, Checksum> entry : impls.entrySet()) { - doBench("warmUp" + entry.getKey(), - entry.getValue(), bytes, 2, false); - doBench("warmUp" + entry.getKey(), - entry.getValue(), bytes, 2101, false); + for (Checksum c : crcs) { + doBench(c, bytes, 2, null); + doBench(c, bytes, 2101, null); } // Test on a variety of sizes for (int size = 1; size < MAX_LEN; size *= 2) { - System.out.printf("| %d\t|", size); + out.printf("|"); + printCell(String.valueOf(size), title.length()+1, out); - for (Map.Entry<String, Checksum> entry : impls.entrySet()) { + Long expected = null; + for(Checksum c : crcs) { System.gc(); - doBench(entry.getKey(), entry.getValue(), bytes, size, true); + final long result = doBench(c, bytes, size, out); + if(c.getClass() == zip.getClass()) { + expected = result; + } else if (result != expected) { + throw new RuntimeException(c.getClass() + " has bugs!"); + } + } - System.out.printf("\n"); + out.printf("\n"); } } - private static void doBench(String id, Checksum crc, - byte[] bytes, int size, boolean printout) { - long st = System.nanoTime(); - int trials = BYTES_PER_SIZE / size; + private static long doBench(Checksum crc, byte[] bytes, int size, + PrintStream out) { + final String name = crc.getClass().getSimpleName(); + final int trials = BYTES_PER_SIZE / size; + + final long st = System.nanoTime(); + crc.reset(); for (int i = 0; i < trials; i++) { crc.update(bytes, 0, size); } - long et = System.nanoTime(); + final long result = crc.getValue(); + final long et = System.nanoTime(); double mbProcessed = trials * size / 1024.0 / 1024.0; double secsElapsed = (et - st) / 1000000000.0d; - if (printout) { - System.out.printf("%.3f \t|", mbProcessed / secsElapsed); + if (out != null) { + final String s = String.format("%9.3f", mbProcessed/secsElapsed); + printCell(s, name.length()+1, out); + } + return result; + } + + private static void printSystemProperties(PrintStream out) { + final String[] names = { + "java.version", + "java.runtime.name", + "java.runtime.version", + "java.vm.version", + "java.vm.vendor", + "java.vm.name", + "java.vm.specification.version", + "java.specification.version", + "os.arch", + "os.name", + "os.version" + }; + final Properties p = System.getProperties(); + for(String n : names) { + out.println(n + " = " + p.getProperty(n)); } } }