Author: stack Date: Sun Jan 6 13:58:16 2008 New Revision: 609422 URL: http://svn.apache.org/viewvc?rev=609422&view=rev Log: HADOOP-2522 Separate MapFile benchmark from PerformanceEvaluation
Added: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MapFilePerformanceEvaluation.java Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java Modified: lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt?rev=609422&r1=609421&r2=609422&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt (original) +++ lucene/hadoop/trunk/src/contrib/hbase/CHANGES.txt Sun Jan 6 13:58:16 2008 @@ -152,6 +152,8 @@ HADOOP-2458 HStoreFile.writeSplitInfo should just call HStoreFile.Reference.write HADOOP-2471 Add reading/writing MapFile to PerformanceEvaluation suite + HADOOP-2522 Separate MapFile benchmark from PerformanceEvaluation + (Tom White via Stack) Added: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MapFilePerformanceEvaluation.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MapFilePerformanceEvaluation.java?rev=609422&view=auto ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MapFilePerformanceEvaluation.java (added) +++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MapFilePerformanceEvaluation.java Sun Jan 6 13:58:16 2008 @@ -0,0 +1,268 @@ +/** + * Copyright 2007 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase; + +import java.io.IOException; +import java.util.Random; + +import org.apache.commons.math.random.RandomData; +import org.apache.commons.math.random.RandomDataImpl; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.MapFile; +import org.apache.hadoop.io.Text; +import org.apache.log4j.Logger; + +/** + * <p> + * This class runs performance benchmarks for [EMAIL PROTECTED] MapFile}. + * </p> + */ +public class MapFilePerformanceEvaluation { + + private static final int ROW_LENGTH = 1000; + private static final int ROW_COUNT = 1000000; + + static final Logger LOG = + Logger.getLogger(MapFilePerformanceEvaluation.class.getName()); + + static Text format(final int i, final Text text) { + String v = Integer.toString(i); + text.set("0000000000".substring(v.length()) + v); + return text; + } + + private void runBenchmarks() throws Exception { + Configuration conf = new Configuration(); + FileSystem fs = FileSystem.get(conf); + Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile")); + if (fs.exists(mf)) { + fs.delete(mf); + } + + runBenchmark(new SequentialWriteBenchmark(conf, fs, mf, ROW_COUNT), + ROW_COUNT); + runBenchmark(new UniformRandomReadBenchmark(conf, fs, mf, ROW_COUNT), + ROW_COUNT); + runBenchmark(new GaussianRandomReadBenchmark(conf, fs, mf, ROW_COUNT), + ROW_COUNT); + runBenchmark(new SequentialReadBenchmark(conf, fs, mf, ROW_COUNT), + ROW_COUNT); + + } + + private void runBenchmark(RowOrientedBenchmark benchmark, int rowCount) + throws Exception { + LOG.info("Running " + benchmark.getClass().getSimpleName() + " for " + + rowCount + " rows."); + long elapsedTime = benchmark.run(); + LOG.info("Running " + benchmark.getClass().getSimpleName() + " for " + + rowCount + " rows took " + elapsedTime + "ms."); + } + + static abstract class RowOrientedBenchmark { + + protected final Configuration conf; + protected final FileSystem fs; + protected final Path mf; + protected final int totalRows; + protected Text key; + protected Text val; + + public RowOrientedBenchmark(Configuration conf, FileSystem fs, Path mf, + int totalRows) { + this.conf = conf; + this.fs = fs; + this.mf = mf; + this.totalRows = totalRows; + this.key = new Text(); + this.val = new Text(); + } + + void setUp() throws Exception { + // do nothing + } + + abstract void doRow(int i) throws Exception; + + protected int getReportingPeriod() { + return this.totalRows / 10; + } + + void tearDown() throws Exception { + // do nothing + } + + /** + * Run benchmark + * @return elapsed time. + * @throws Exception + */ + long run() throws Exception { + long elapsedTime; + setUp(); + long startTime = System.currentTimeMillis(); + try { + for (int i = 0; i < totalRows; i++) { + if (i > 0 && i % getReportingPeriod() == 0) { + LOG.info("Processed " + i + " rows."); + } + doRow(i); + } + elapsedTime = System.currentTimeMillis() - startTime; + } finally { + tearDown(); + } + return elapsedTime; + } + + } + + static class SequentialWriteBenchmark extends RowOrientedBenchmark { + + protected MapFile.Writer writer; + private Random random = new Random(); + private byte[] bytes = new byte[ROW_LENGTH]; + + public SequentialWriteBenchmark(Configuration conf, FileSystem fs, Path mf, + int totalRows) { + super(conf, fs, mf, totalRows); + } + + @Override + void setUp() throws Exception { + writer = new MapFile.Writer(conf, fs, mf.toString(), + Text.class, Text.class); + } + + @Override + void doRow(int i) throws Exception { + val.set(generateValue()); + writer.append(format(i, key), val); + } + + private byte[] generateValue() { + random.nextBytes(bytes); + return bytes; + } + + @Override + protected int getReportingPeriod() { + return this.totalRows; // don't report progress + } + + @Override + void tearDown() throws Exception { + writer.close(); + } + + } + + static abstract class ReadBenchmark extends RowOrientedBenchmark { + + protected MapFile.Reader reader; + + public ReadBenchmark(Configuration conf, FileSystem fs, Path mf, + int totalRows) { + super(conf, fs, mf, totalRows); + } + + @Override + void setUp() throws Exception { + reader = new MapFile.Reader(fs, mf.toString(), conf); + } + + @Override + void tearDown() throws Exception { + reader.close(); + } + + } + + static class SequentialReadBenchmark extends ReadBenchmark { + + public SequentialReadBenchmark(Configuration conf, FileSystem fs, + Path mf, int totalRows) { + super(conf, fs, mf, totalRows); + } + + @Override + void doRow(@SuppressWarnings("unused") int i) throws Exception { + reader.next(key, val); + } + + @Override + protected int getReportingPeriod() { + return this.totalRows; // don't report progress + } + + } + + static class UniformRandomReadBenchmark extends ReadBenchmark { + + private Random random = new Random(); + + public UniformRandomReadBenchmark(Configuration conf, FileSystem fs, + Path mf, int totalRows) { + super(conf, fs, mf, totalRows); + } + + @Override + void doRow(@SuppressWarnings("unused") int i) throws Exception { + reader.get(getRandomRow(), val); + } + + private Text getRandomRow() { + return format(random.nextInt(totalRows), key); + } + + } + + static class GaussianRandomReadBenchmark extends ReadBenchmark { + + private RandomData randomData = new RandomDataImpl(); + + public GaussianRandomReadBenchmark(Configuration conf, FileSystem fs, + Path mf, int totalRows) { + super(conf, fs, mf, totalRows); + } + + @Override + void doRow(@SuppressWarnings("unused") int i) throws Exception { + reader.get(getGaussianRandomRow(), val); + } + + private Text getGaussianRandomRow() { + int r = (int) randomData.nextGaussian(totalRows / 2, totalRows / 10); + return format(r, key); + } + + } + + /** + * @param args + * @throws IOException + */ + public static void main(String[] args) throws Exception { + new MapFilePerformanceEvaluation().runBenchmarks(); + } + +} Modified: lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java?rev=609422&r1=609421&r2=609422&view=diff ============================================================================== --- lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java (original) +++ lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/PerformanceEvaluation.java Sun Jan 6 13:58:16 2008 @@ -34,11 +34,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.MapFile; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.io.MapFile.Writer; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; @@ -86,7 +84,6 @@ private static final String SEQUENTIAL_READ = "sequentialRead"; private static final String SEQUENTIAL_WRITE = "sequentialWrite"; private static final String SCAN = "scan"; - private static final String MAPFILE = "mapfile"; private static final List<String> COMMANDS = Arrays.asList(new String [] {RANDOM_READ, @@ -94,8 +91,7 @@ RANDOM_WRITE, SEQUENTIAL_READ, SEQUENTIAL_WRITE, - SCAN, - MAPFILE}); + SCAN}); volatile HBaseConfiguration conf; private boolean miniCluster = false; @@ -552,59 +548,6 @@ LOG.error("Failed", e); } } - - private void doMapFile() throws IOException { - final int ROW_COUNT = 1000000; - Random random = new Random(); - Configuration c = new Configuration(); - FileSystem fs = FileSystem.get(c); - Path mf = fs.makeQualified(new Path("performanceevaluation.mapfile")); - if (fs.exists(mf)) { - fs.delete(mf); - } - Writer writer = new MapFile.Writer(c, fs, mf.toString(), - Text.class, Text.class); - LOG.info("Writing " + ROW_COUNT + " rows to " + mf.toString()); - long startTime = System.currentTimeMillis(); - // Add 1M rows. - for (int i = 0; i < ROW_COUNT; i++) { - writer.append(PerformanceEvaluation.format(i), - new Text(PerformanceEvaluation.generateValue(random))); - } - writer.close(); - LOG.info("Writing " + ROW_COUNT + " records took " + - (System.currentTimeMillis() - startTime) + "ms (Note: generation of keys " + - "and values is done inline and has been seen to consume " + - "significant time: e.g. ~30% of cpu time"); - // Do random reads. - LOG.info("Reading " + ROW_COUNT + " random rows"); - MapFile.Reader reader = new MapFile.Reader(fs, mf.toString(), c); - startTime = System.currentTimeMillis(); - for (int i = 0; i < ROW_COUNT; i++) { - if (i > 0 && i % (ROW_COUNT / 10) == 0) { - LOG.info("Read " + i); - } - reader.get(PerformanceEvaluation.getRandomRow(random, ROW_COUNT), - new Text()); - } - reader.close(); - LOG.info("Reading " + ROW_COUNT + " random records took " + - (System.currentTimeMillis() - startTime) + "ms (Note: generation of " + - "random key is done in line and takes a significant amount of cpu " + - "time: e.g 10-15%"); - // Do random reads. - LOG.info("Reading " + ROW_COUNT + " rows sequentially"); - reader = new MapFile.Reader(fs, mf.toString(), c); - startTime = System.currentTimeMillis(); - Text key = new Text(); - Text val = new Text(); - for (int i = 0; reader.next(key, val); i++) { - continue; - } - reader.close(); - LOG.info("Reading " + ROW_COUNT + " records serially took " + - (System.currentTimeMillis() - startTime) + "ms"); - } private void runTest(final String cmd) throws IOException { if (cmd.equals(RANDOM_READ_MEM)) { @@ -619,9 +562,7 @@ } try { - if (cmd.equals(MAPFILE)) { - doMapFile(); - } else if (N == 1) { + if (N == 1) { // If there is only one client and one HRegionServer, we assume nothing // has been set up at all. runNIsOne(cmd); @@ -661,7 +602,6 @@ System.err.println(" sequentialRead Run sequential read test"); System.err.println(" sequentialWrite Run sequential write test"); System.err.println(" scan Run scan test"); - System.err.println(" mapfile Do read, write tests against mapfile"); System.err.println(); System.err.println("Args:"); System.err.println(" nclients Integer. Required. Total number of " +