Author: cutting Date: Mon Jun 26 10:48:11 2006 New Revision: 417256 URL: http://svn.apache.org/viewvc?rev=417256&view=rev Log: HADOOP-278. Check for the existence of input directories before starting MapReduce jobs, making it easier to debug this common error. Contributed by Owen.
Modified: lucene/hadoop/trunk/CHANGES.txt lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java Modified: lucene/hadoop/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=417256&r1=417255&r2=417256&view=diff ============================================================================== --- lucene/hadoop/trunk/CHANGES.txt (original) +++ lucene/hadoop/trunk/CHANGES.txt Mon Jun 26 10:48:11 2006 @@ -47,6 +47,10 @@ 11. HADOOP-135. Fix potential deadlock in JobTracker by acquiring locks in a consistent order. (omalley via cutting) +12. HADOOP-278. Check for existence of input directories before + starting MapReduce jobs, making it easier to debug this common + error. (omalley via cutting) + Release 0.3.2 - 2006-06-09 Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java?rev=417256&r1=417255&r2=417256&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormat.java Mon Jun 26 10:48:11 2006 @@ -19,6 +19,7 @@ import java.io.IOException; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; /** An input data format. Input files are stored in a [EMAIL PROTECTED] FileSystem}. * The processing of an input file may be split across multiple machines. @@ -26,6 +27,18 @@ * RecordReader}. Files must thus be split on record boundaries. */ public interface InputFormat { + /** + * Are the input directories valid? This method is used to test the input + * directories when a job is submitted so that the framework can fail early + * with a useful error message when the input directory does not exist. + * @param fileSys the file system to check for the directories + * @param inputDirs the list of input directories + * @return is each inputDir valid? + * @throws IOException + */ + boolean[] areValidInputDirectories(FileSystem fileSys, + Path[] inputDirs) throws IOException; + /** Splits a set of input files. One split is created per map task. * * @param fs the filesystem containing the files to be split Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java?rev=417256&r1=417255&r2=417256&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/InputFormatBase.java Mon Jun 26 10:48:11 2006 @@ -98,6 +98,16 @@ return (Path[])result.toArray(new Path[result.size()]); } + public boolean[] areValidInputDirectories(FileSystem fileSys, + Path[] inputDirs + ) throws IOException { + boolean[] result = new boolean[inputDirs.length]; + for(int i=0; i < inputDirs.length; ++i) { + result[i] = fileSys.isDirectory(inputDirs[i]); + } + return result; + } + /** Splits files returned by {#listPaths(FileSystem,JobConf) when * they're too big.*/ public FileSplit[] getSplits(FileSystem fs, JobConf job, int numSplits) Modified: lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java?rev=417256&r1=417255&r2=417256&view=diff ============================================================================== --- lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java (original) +++ lucene/hadoop/trunk/src/java/org/apache/hadoop/mapred/JobClient.java Mon Jun 26 10:48:11 2006 @@ -260,6 +260,17 @@ job.setWorkingDirectory(fs.getWorkingDirectory()); } + Path[] inputDirs = job.getInputPaths(); + boolean[] validDirs = + job.getInputFormat().areValidInputDirectories(fs, inputDirs); + for(int i=0; i < validDirs.length; ++i) { + if (!validDirs[i]) { + String msg = "Input directory " + inputDirs[i] + " is invalid."; + LOG.error(msg); + throw new IOException(msg); + } + } + // Check the output specification job.getOutputFormat().checkOutputSpecs(fs, job);