Author: srowen Date: Tue Feb 22 21:17:35 2011 New Revision: 1073511 URL: http://svn.apache.org/viewvc?rev=1073511&view=rev Log: MAHOUT-614 fix up overriding of Hadoop's FileOutputFormat
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java?rev=1073511&r1=1073510&r2=1073511&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleOutputFormat.java Tue Feb 22 21:17:35 2011 @@ -22,12 +22,10 @@ import java.io.IOException; import java.util.TreeMap; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.util.Progressable; /** * This abstract class extends the FileOutputFormat, allowing to write the @@ -52,23 +50,10 @@ public abstract class MultipleOutputForm /** * Create a composite record writer that can write key/value data to different * output files - * - * @param fs - * the file system to use - * @param job - * the job conf for the job - * @param name - * the leaf file name for the output file (such as part-00000") - * @param arg3 - * a progressable for reporting progress. * @return a composite record writer */ - public RecordWriter<K, V> getRecordWriter(FileSystem fs, Configuration job, String name, Progressable arg3) { - - final FileSystem myFS = fs; - final String myName = generateLeafFileName(name); - final Configuration myJob = job; - final Progressable myProgressable = arg3; + @Override + public RecordWriter<K, V> getRecordWriter(final TaskAttemptContext context) { return new RecordWriter<K, V>() { @@ -76,13 +61,13 @@ public abstract class MultipleOutputForm private final TreeMap<String, RecordWriter<K, V>> recordWriters = new TreeMap<String, RecordWriter<K, V>>(); @Override - public void write(K key, V value) throws IOException { + public void write(K key, V value) throws IOException, InterruptedException { // get the file name based on the key - String keyBasedPath = generateFileNameForKeyValue(key, value, myName); + String keyBasedPath = generateFileNameForKeyValue(key, value, generateLeafFileName(null)); // get the file name based on the input file name - String finalPath = getInputFileBasedOutputFileName(myJob, keyBasedPath); + String finalPath = getInputFileBasedOutputFileName(context.getConfiguration(), keyBasedPath); // get the actual key K actualKey = generateActualKey(key, value); @@ -93,7 +78,7 @@ public abstract class MultipleOutputForm // if we don't have the record writer yet for the final path, create // one // and add it to the cache - rw = getBaseRecordWriter(myFS, myJob, finalPath, myProgressable); + rw = getBaseRecordWriter(context.getConfiguration()); this.recordWriters.put(finalPath, rw); } try { @@ -177,19 +162,19 @@ public abstract class MultipleOutputForm * N trailing legs of the input file name where N is the config value for * "num.of.trailing.legs.to.use". * - * @param job + * @param conf * the job config * @param name * the output file name * @return the outfile name based on a given anme and the input file name. */ - protected String getInputFileBasedOutputFileName(Configuration job, String name) { - String infilepath = job.get("map.input.file"); + protected String getInputFileBasedOutputFileName(Configuration conf, String name) { + String infilepath = conf.get("map.input.file"); if (infilepath == null) { // if the map input file does not exists, then return the given name return name; } - int numOfTrailingLegsToUse = job.getInt("mapred.outputformat.numOfTrailingLegs", 0); + int numOfTrailingLegsToUse = conf.getInt("mapred.outputformat.numOfTrailingLegs", 0); if (numOfTrailingLegsToUse <= 0) { return name; } @@ -212,21 +197,8 @@ public abstract class MultipleOutputForm } /** - * - * @param fs - * the file system to use - * @param job - * a job conf object - * @param name - * the name of the file over which a record writer object will be - * constructed - * @param arg3 - * a progressable object * @return A RecordWriter object over the given file - * @throws IOException */ - protected abstract RecordWriter<K, V> getBaseRecordWriter(FileSystem fs, - Configuration job, - String name, - Progressable arg3) throws IOException; + protected abstract RecordWriter<K, V> getBaseRecordWriter(Configuration conf) + throws IOException, InterruptedException; } Modified: mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java?rev=1073511&r1=1073510&r2=1073511&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/MultipleTextOutputFormat.java Tue Feb 22 21:17:35 2011 @@ -20,12 +20,10 @@ package org.apache.mahout.classifier.bay import java.io.IOException; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; -import org.apache.hadoop.util.Progressable; /** * This class extends the MultipleOutputFormat, allowing to write the output @@ -36,24 +34,11 @@ public class MultipleTextOutputFormat<K, private TextOutputFormat<K, V> theTextOutputFormat; @Override - protected RecordWriter<K, V> getBaseRecordWriter(FileSystem fs, Configuration conf, String name, Progressable arg3) - throws IOException { + protected RecordWriter<K, V> getBaseRecordWriter(Configuration conf) throws IOException, InterruptedException { if (theTextOutputFormat == null) { theTextOutputFormat = new TextOutputFormat<K, V>(); } - try { - return theTextOutputFormat.getRecordWriter(new TaskAttemptContext(conf, new TaskAttemptID())); - } catch (InterruptedException e) { - // continue - } - return null; + return theTextOutputFormat.getRecordWriter(new TaskAttemptContext(conf, new TaskAttemptID())); } - @Override - public RecordWriter<K, V> getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { - if (theTextOutputFormat == null) { - theTextOutputFormat = new TextOutputFormat<K, V>(); - } - return theTextOutputFormat.getRecordWriter(job); - } }