Author: pradeepkth Date: Fri Nov 13 18:36:31 2009 New Revision: 835950 URL: http://svn.apache.org/viewvc?rev=835950&view=rev Log: PIG-1090: Update sources to reflect recent changes in load-store interfaces (pradeepkth)
Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadFunc.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadMetadata.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinaryStorage.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigDump.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/TextLoader.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/DefaultIndexableLoader.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/MergeJoinIndexer.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/RandomSampleLoader.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/ReadToEndLoader.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOLoad.java Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadFunc.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadFunc.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadFunc.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadFunc.java Fri Nov 13 18:36:31 2009 @@ -19,6 +19,7 @@ import java.io.IOException; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.RecordReader; @@ -31,66 +32,76 @@ * from a dataset. */ public interface LoadFunc { + /** + * This method is called by the Pig runtime in the front end to convert the + * input location to an absolute path if the location is relative. The + * loadFunc implementation is free to choose how it converts a relative + * location to an absolute location since this may depend on what the location + * string represent (hdfs path or some other data source) + * + * @param location location as provided in the "load" statement of the script + * @param curDir the current working direction based on any "cd" statements + * in the script before the "load" statement. If there are no "cd" statements + * in the script, this would be the home directory - + * <pre>/user/<username> </pre> + * @return the absolute location based on the arguments passed + * @throws IOException if the conversion is not possible + */ + String relativeToAbsolutePath(String location, Path curDir) throws IOException; /** - * Communicate to the loader the load string used in Pig Latin to refer to the - * object(s) being loaded. That is, if the PL script is - * <b>A = load 'bla'</b> - * then 'bla' is the load string. In general Pig expects these to be - * a path name, a glob, or a URI. If there is no URI scheme present, - * Pig will assume it is a file name. This will be - * called during planning on the front end at which time an empty Job object - * will be passed as the second argument. + * Communicate to the loader the location of the object(s) being loaded. + * The location string passed to the LoadFunc here is the return value of + * {...@link LoadFunc#relativeToAbsolutePath(String, Path)} * - * This method will also be called in the backend multiple times and in those - * calls the Job object will actually have job information. Implementations + * This method will be called in the backend multiple times. Implementations * should bear in mind that this method is called multiple times and should * ensure there are no inconsistent side effects due to the multiple calls. * - * @param location Location indicated in load statement. + * @param location Location as returned by + * {...@link LoadFunc#relativeToAbsolutePath(String, Path)} * @param job the {...@link Job} object * @throws IOException if the location is not valid. */ void setLocation(String location, Job job) throws IOException; /** - * Return the InputFormat associated with this loader. This will be - * called during planning on the front end. The LoadFunc need not - * carry the InputFormat information to the backend, as it will - * be provided with the appropriate RecordReader there. This is the + * This will be called during planning on the front end. This is the * instance of InputFormat (rather than the class name) because the * load function may need to instantiate the InputFormat in order * to control how it is constructed. + * @return the InputFormat associated with this loader. + * @throws IOException if there is an exception during InputFormat + * construction */ - InputFormat getInputFormat(); + InputFormat getInputFormat() throws IOException; /** - * Return the LoadCaster associated with this loader. Returning - * null indicates that casts from byte array are not supported - * for this loader. This will be called on the front end during - * planning and not on the back end during execution. + * This will be called on the front end during planning and not on the back + * end during execution. + * @return the {...@link LoadCaster} associated with this loader. Returning null + * indicates that casts from byte array are not supported for this loader. + * construction + * @throws IOException if there is an exception during LoadCaster */ - LoadCaster getLoadCaster(); + LoadCaster getLoadCaster() throws IOException; /** * Initializes LoadFunc for reading data. This will be called during execution * before any calls to getNext. The RecordReader needs to be passed here because * it has been instantiated for a particular InputSplit. - * @param reader RecordReader to be used by this instance of the LoadFunc - * @param split The input split to process - */ - void prepareToRead(RecordReader reader, PigSplit split); - - /** - * Called after all reading is finished. + * @param reader {...@link RecordReader} to be used by this instance of the LoadFunc + * @param split The input {...@link PigSplit} to process + * @throws IOException if there is an exception during initialization */ - void doneReading(); + void prepareToRead(RecordReader reader, PigSplit split) throws IOException; /** * Retrieves the next tuple to be processed. * @return the next tuple to be processed or null if there are no more tuples * to be processed. - * @throws IOException + * @throws IOException if there is an exception while retrieving the next + * tuple */ Tuple getNext() throws IOException; Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadMetadata.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadMetadata.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadMetadata.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadMetadata.java Fri Nov 13 18:36:31 2009 @@ -19,6 +19,7 @@ import java.io.IOException; +import org.apache.hadoop.conf.Configuration; import org.apache.pig.impl.plan.OperatorPlan; /** @@ -29,28 +30,42 @@ public interface LoadMetadata { /** - * Get a schema for the data to be loaded. This schema should represent + * Get a schema for the data to be loaded. + * @param location Location as returned by + * {...@link LoadFunc#relativeToAbsolutePath(String, org.apache.hadoop.fs.Path)} + * @param conf The {...@link Configuration} object + * @return schema for the data to be loaded. This schema should represent * all tuples of the returned data. If the schema is unknown or it is * not possible to return a schema that represents all returned data, * then null should be returned. - * This method will be called after a - * {...@link LoadFunc#setLocation(String, org.apache.hadoop.mapreduce.Job)} - * call is made on the Loader implementing {...@link LoadFunc} and {...@link LoadMetadata} + * @throws IOException if an exception occurs while determining the schema */ - ResourceSchema getSchema(); + ResourceSchema getSchema(String location, Configuration conf) throws + IOException; /** * Get statistics about the data to be loaded. If no statistics are * available, then null should be returned. + * @param location Location as returned by + * {...@link LoadFunc#relativeToAbsolutePath(String, org.apache.hadoop.fs.Path)} + * @param conf The {...@link Configuration} object + * @return statistics about the data to be loaded. If no statistics are + * available, then null should be returned. + * @throws IOException if an exception occurs while retrieving statistics */ - ResourceStatistics getStatistics(); + ResourceStatistics getStatistics(String location, Configuration conf) + throws IOException; /** * Find what columns are partition keys for this input. - * This function assumes that setLocation has already been called. + * @param location Location as returned by + * {...@link LoadFunc#relativeToAbsolutePath(String, org.apache.hadoop.fs.Path)} + * @param conf The {...@link Configuration} object * @return array of field names of the partition keys. + * @throws IOException if an exception occurs while retrieving partition keys */ - String[] getPartitionKeys(); + String[] getPartitionKeys(String location, Configuration conf) + throws IOException; /** * Set the filter for partitioning. It is assumed that this filter Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java Fri Nov 13 18:36:31 2009 @@ -42,8 +42,7 @@ } public ResourceFieldSchema[] fields; - public Map<String, Integer> byName; - + enum Order { ASCENDING, DESCENDING } public int[] sortKeys; // each entry is an offset into the fields array. public Order[] sortKeyOrders; Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java Fri Nov 13 18:36:31 2009 @@ -17,12 +17,10 @@ */ package org.apache.pig; -import java.io.IOException; -import java.io.OutputStream; +import java.io.IOException; -import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.pig.data.Tuple; @@ -38,12 +36,32 @@ public interface StoreFunc { /** + * This method is called by the Pig runtime in the front end to convert the + * output location to an absolute path if the location is relative. The + * StoreFunc implementation is free to choose how it converts a relative + * location to an absolute location since this may depend on what the location + * string represent (hdfs path or some other data source) + * + * @param location location as provided in the "store" statement of the script + * @param curDir the current working direction based on any "cd" statements + * in the script before the "store" statement. If there are no "cd" statements + * in the script, this would be the home directory - + * <pre>/user/<username> </pre> + * @return the absolute location based on the arguments passed + * @throws IOException if the conversion is not possible + */ + String relToAbsPathForStoreLocation(String location, Path curDir) throws IOException; + + /** * Return the OutputFormat associated with StoreFunc. This will be called * on the front end during planning and not on the backend during - * execution. OutputFormat information need not be carried to the back end - * as the appropriate RecordWriter will be provided to the StoreFunc. + * execution. + * @return the {...@link OutputFormat} associated with StoreFunc + * @throws IOException if an exception occurs while constructing the + * OutputFormat + * */ - OutputFormat getOutputFormat(); + OutputFormat getOutputFormat() throws IOException; /** * Communicate to the store function the location used in Pig Latin to refer @@ -62,56 +80,32 @@ /** * Set the schema for data to be stored. This will be called on the - * front end during planning. If the store function wishes to record - * the schema it will need to carry it to the backend. - * Even if a store function cannot - * record the schema, it may need to implement this function to + * front end during planning. A Store function should implement this function to * check that a given schema is acceptable to it. For example, it * can check that the correct partition keys are included; * a storage function to be written directly to an OutputFormat can * make sure the schema will translate in a well defined way. - * @param s to be checked/set + * @param s to be checked * @throws IOException if this schema is not acceptable. It should include * a detailed error message indicating what is wrong with the schema. */ - void setSchema(ResourceSchema s) throws IOException; + void checkSchema(ResourceSchema s) throws IOException; /** * Initialize StoreFunc to write data. This will be called during * execution before the call to putNext. * @param writer RecordWriter to use. + * @throws IOException if an exception occurs during initialization */ - void prepareToWrite(RecordWriter writer); - - /** - * XXX FIXME: do we really need this - there is already - * {...@link OutputCommitter#commitTask(org.apache.hadoop.mapreduce.TaskAttemptContext)} - * Called when all writing is finished. This will be called on the backend, - * once for each writing task. - */ - void doneWriting(); + void prepareToWrite(RecordWriter writer) throws IOException; /** * Write a tuple the output stream to which this instance was * previously bound. * * @param t the tuple to store. - * @throws IOException + * @throws IOException if an exception occurs during the write */ void putNext(Tuple t) throws IOException; - - /** - * XXX FIXME: do we really need this - there is already - * {...@link OutputCommitter#cleanupJob(org.apache.hadoop.mapreduce.JobContext)} - * Called when writing all of the data is finished. This can be used - * to commit information to a metadata system, clean up tmp files, - * close connections, etc. This call will be made on the front end - * after all back end processing is finished. - * @param job The job object - */ - void allFinished(Job job); - - - } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java Fri Nov 13 18:36:31 2009 @@ -1767,7 +1767,17 @@ physOp.setResultType(op.getType()); FuncSpec lfSpec = op.getLoadFuncSpec(); if(null != lfSpec) { - ((POCast) physOp).setLoadFSpec(lfSpec); + try { + ((POCast) physOp).setLoadFSpec(lfSpec); + } catch (IOException e) { + int errCode = 1053; + String msg = "Cannot resolve load function to use for casting" + + " from " + DataType.findTypeName(op.getExpression(). + getType()) + " to " + DataType.findTypeName(op.getType()); + throw new LogicalToPhysicalTranslatorException(msg, errCode, + PigException.ERROR, e); + } + } try { currentPlan.connect(from, physOp); Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java Fri Nov 13 18:36:31 2009 @@ -67,7 +67,7 @@ // TODO Auto-generated constructor stub } - private void instantiateFunc() { + private void instantiateFunc() throws IOException { if (load != null) return; if (this.loadFSpec != null) { @@ -77,7 +77,7 @@ this.caster = load.getLoadCaster(); } - public void setLoadFSpec(FuncSpec lf) { + public void setLoadFSpec(FuncSpec lf) throws IOException { this.loadFSpec = lf; instantiateFunc(); } @@ -987,7 +987,13 @@ .getGenerator().getNextNodeId(mKey.scope))); clone.cloneHelper(this); clone.loadFSpec = loadFSpec; - clone.instantiateFunc(); + try { + clone.instantiateFunc(); + } catch (IOException e) { + CloneNotSupportedException cnse = new CloneNotSupportedException(); + cnse.initCause(e); + throw cnse; + } return clone; } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java Fri Nov 13 18:36:31 2009 @@ -21,6 +21,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.client.HTable; @@ -160,14 +161,6 @@ return null; } - /* (non-Javadoc) - * @see org.apache.pig.LoadFunc#doneReading() - */ - @Override - public void doneReading() { - // TODO Auto-generated method stub - - } /* (non-Javadoc) * @see org.apache.pig.LoadFunc#getInputFormat() @@ -204,4 +197,14 @@ // TODO Auto-generated method stub } + + /* (non-Javadoc) + * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String, org.apache.hadoop.fs.Path) + */ + @Override + public String relativeToAbsolutePath(String location, Path curDir) + throws IOException { + // TODO Auto-generated method stub + return null; + } } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java Fri Nov 13 18:36:31 2009 @@ -306,14 +306,6 @@ } /* (non-Javadoc) - * @see org.apache.pig.LoadFunc#doneReading() - */ - @Override - public void doneReading() { - // nothing to be done for now - } - - /* (non-Javadoc) * @see org.apache.pig.LoadFunc#getInputFormat() */ @Override @@ -345,53 +337,55 @@ } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#allFinished(org.apache.hadoop.mapreduce.Job) + * @see org.apache.pig.StoreFunc#getOutputFormat() */ @Override - public void allFinished(Job job) { - // TODO Auto-generated method stub - + public OutputFormat getOutputFormat() { + return new BinStorageOutputFormat(); } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#doneWriting() + * @see org.apache.pig.StoreFunc#prepareToWrite(org.apache.hadoop.mapreduce.RecordWriter) */ @Override - public void doneWriting() { - // TODO Auto-generated method stub - + public void prepareToWrite(RecordWriter writer) { + this.recWriter = (BinStorageRecordWriter) writer; } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#getOutputFormat() + * @see org.apache.pig.StoreFunc#setStoreLocation(java.lang.String, org.apache.hadoop.mapreduce.Job) */ @Override - public OutputFormat getOutputFormat() { - return new BinStorageOutputFormat(); + public void setStoreLocation(String location, Job job) throws IOException { + FileOutputFormat.setOutputPath(job, new Path(location)); } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#prepareToWrite(org.apache.hadoop.mapreduce.RecordWriter) + * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String, org.apache.hadoop.fs.Path) */ @Override - public void prepareToWrite(RecordWriter writer) { - this.recWriter = (BinStorageRecordWriter) writer; + public String relativeToAbsolutePath(String location, Path curDir) + throws IOException { + // TODO Auto-generated method stub + return null; } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#setSchema(org.apache.pig.ResourceSchema) + * @see org.apache.pig.StoreFunc#checkSchema(org.apache.pig.ResourceSchema) */ @Override - public void setSchema(ResourceSchema s) throws IOException { + public void checkSchema(ResourceSchema s) throws IOException { // TODO Auto-generated method stub } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#setStoreLocation(java.lang.String, org.apache.hadoop.mapreduce.Job) + * @see org.apache.pig.StoreFunc#relToAbsPathForStoreLocation(java.lang.String, org.apache.hadoop.fs.Path) */ @Override - public void setStoreLocation(String location, Job job) throws IOException { - FileOutputFormat.setOutputPath(job, new Path(location)); + public String relToAbsPathForStoreLocation(String location, Path curDir) + throws IOException { + // TODO Auto-generated method stub + return null; } } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinaryStorage.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinaryStorage.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinaryStorage.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinaryStorage.java Fri Nov 13 18:36:31 2009 @@ -23,6 +23,7 @@ import java.net.URL; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; @@ -148,15 +149,6 @@ } /* (non-Javadoc) - * @see org.apache.pig.LoadFunc#doneReading() - */ - @Override - public void doneReading() { - // TODO Auto-generated method stub - - } - - /* (non-Javadoc) * @see org.apache.pig.LoadFunc#getInputFormat() */ @Override @@ -193,56 +185,58 @@ } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#allFinished(org.apache.hadoop.mapreduce.Job) + * @see org.apache.pig.StoreFunc#getOutputFormat() */ @Override - public void allFinished(Job job) { + public OutputFormat getOutputFormat() { // TODO Auto-generated method stub - + return null; } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#doneWriting() + * @see org.apache.pig.StoreFunc#prepareToWrite(org.apache.hadoop.mapreduce.RecordWriter) */ @Override - public void doneWriting() { + public void prepareToWrite(RecordWriter writer) { // TODO Auto-generated method stub } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#getOutputFormat() + * @see org.apache.pig.StoreFunc#setSchema(org.apache.pig.ResourceSchema) */ @Override - public OutputFormat getOutputFormat() { + public void checkSchema(ResourceSchema s) throws IOException { // TODO Auto-generated method stub - return null; + } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#prepareToWrite(org.apache.hadoop.mapreduce.RecordWriter) + * @see org.apache.pig.StoreFunc#setStoreLocation(java.lang.String, org.apache.hadoop.mapreduce.Job) */ @Override - public void prepareToWrite(RecordWriter writer) { + public void setStoreLocation(String location, Job job) throws IOException { // TODO Auto-generated method stub } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#setSchema(org.apache.pig.ResourceSchema) + * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String, org.apache.hadoop.fs.Path) */ @Override - public void setSchema(ResourceSchema s) throws IOException { + public String relativeToAbsolutePath(String location, Path curDir) + throws IOException { // TODO Auto-generated method stub - + return null; } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#setStoreLocation(java.lang.String, org.apache.hadoop.mapreduce.Job) + * @see org.apache.pig.StoreFunc#relToAbsPathForStoreLocation(java.lang.String, org.apache.hadoop.fs.Path) */ @Override - public void setStoreLocation(String location, Job job) throws IOException { + public String relToAbsPathForStoreLocation(String location, Path curDir) + throws IOException { // TODO Auto-generated method stub - + return null; } } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigDump.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigDump.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigDump.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigDump.java Fri Nov 13 18:36:31 2009 @@ -20,6 +20,7 @@ import java.io.IOException; import java.io.OutputStream; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.RecordWriter; @@ -50,57 +51,49 @@ } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#allFinished(org.apache.hadoop.mapreduce.Job) + * @see org.apache.pig.StoreFunc#getOutputFormat() */ @Override - public void allFinished(Job job) { + public OutputFormat getOutputFormat() { // TODO Auto-generated method stub - + return null; } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#doneWriting() + * @see org.apache.pig.StoreFunc#prepareToWrite(org.apache.hadoop.mapreduce.RecordWriter) */ @Override - public void doneWriting() { + public void prepareToWrite(RecordWriter writer) { // TODO Auto-generated method stub } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#getOutputFormat() - */ - @Override - public OutputFormat getOutputFormat() { - // TODO Auto-generated method stub - return null; - } - - /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#prepareToWrite(org.apache.hadoop.mapreduce.RecordWriter) + * @see org.apache.pig.StoreFunc#setStoreLocation(java.lang.String, org.apache.hadoop.mapreduce.Job) */ @Override - public void prepareToWrite(RecordWriter writer) { + public void setStoreLocation(String location, Job job) throws IOException { // TODO Auto-generated method stub } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#setSchema(org.apache.pig.ResourceSchema) + * @see org.apache.pig.StoreFunc#checkSchema(org.apache.pig.ResourceSchema) */ @Override - public void setSchema(ResourceSchema s) throws IOException { + public void checkSchema(ResourceSchema s) throws IOException { // TODO Auto-generated method stub } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#setStoreLocation(java.lang.String, org.apache.hadoop.mapreduce.Job) + * @see org.apache.pig.StoreFunc#relToAbsPathForStoreLocation(java.lang.String, org.apache.hadoop.fs.Path) */ @Override - public void setStoreLocation(String location, Job job) throws IOException { + public String relToAbsPathForStoreLocation(String location, Path curDir) + throws IOException { // TODO Auto-generated method stub - + return null; } } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java Fri Nov 13 18:36:31 2009 @@ -304,15 +304,6 @@ } /* (non-Javadoc) - * @see org.apache.pig.LoadFunc#doneReading() - */ - @Override - public void doneReading() { - // TODO Auto-generated method stub - - } - - /* (non-Javadoc) * @see org.apache.pig.LoadFunc#getInputFormat() */ @Override @@ -348,24 +339,6 @@ } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#allFinished(org.apache.hadoop.conf.Configuration) - */ - @Override - public void allFinished(Job job) { - // TODO Auto-generated method stub - - } - - /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#doneWriting() - */ - @Override - public void doneWriting() { - // TODO Auto-generated method stub - - } - - /* (non-Javadoc) * @see org.apache.pig.StoreFunc#getOutputFormat() */ @Override @@ -391,13 +364,33 @@ } /* (non-Javadoc) - * @see org.apache.pig.StoreFunc#setSchema(org.apache.pig.ResourceSchema) + * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String, org.apache.hadoop.fs.Path) */ @Override - public void setSchema(ResourceSchema s) throws IOException { + public String relativeToAbsolutePath(String location, Path curDir) + throws IOException { + // TODO Auto-generated method stub + return null; + } + + /* (non-Javadoc) + * @see org.apache.pig.StoreFunc#checkSchema(org.apache.pig.ResourceSchema) + */ + @Override + public void checkSchema(ResourceSchema s) throws IOException { // TODO Auto-generated method stub } + /* (non-Javadoc) + * @see org.apache.pig.StoreFunc#relToAbsPathForStoreLocation(java.lang.String, org.apache.hadoop.fs.Path) + */ + @Override + public String relToAbsPathForStoreLocation(String location, Path curDir) + throws IOException { + // TODO Auto-generated method stub + return null; + } + } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/TextLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/TextLoader.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/TextLoader.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/TextLoader.java Fri Nov 13 18:36:31 2009 @@ -25,6 +25,7 @@ import java.util.Map; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; @@ -218,15 +219,6 @@ } /* (non-Javadoc) - * @see org.apache.pig.LoadFunc#doneReading() - */ - @Override - public void doneReading() { - // TODO Auto-generated method stub - - } - - /* (non-Javadoc) * @see org.apache.pig.LoadFunc#getInputFormat() */ @Override @@ -261,4 +253,14 @@ // TODO Auto-generated method stub } + + /* (non-Javadoc) + * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String, org.apache.hadoop.fs.Path) + */ + @Override + public String relativeToAbsolutePath(String location, Path curDir) + throws IOException { + // TODO Auto-generated method stub + return null; + } } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/DefaultIndexableLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/DefaultIndexableLoader.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/DefaultIndexableLoader.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/DefaultIndexableLoader.java Fri Nov 13 18:36:31 2009 @@ -26,6 +26,7 @@ import java.util.zip.GZIPInputStream; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.RecordReader; @@ -255,15 +256,6 @@ } /* (non-Javadoc) - * @see org.apache.pig.LoadFunc#doneReading() - */ - @Override - public void doneReading() { - // TODO Auto-generated method stub - - } - - /* (non-Javadoc) * @see org.apache.pig.LoadFunc#getInputFormat() */ @Override @@ -298,5 +290,15 @@ // TODO Auto-generated method stub } + + /* (non-Javadoc) + * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String, org.apache.hadoop.fs.Path) + */ + @Override + public String relativeToAbsolutePath(String location, Path curDir) + throws IOException { + // TODO Auto-generated method stub + return null; + } } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/MergeJoinIndexer.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/MergeJoinIndexer.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/MergeJoinIndexer.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/MergeJoinIndexer.java Fri Nov 13 18:36:31 2009 @@ -22,6 +22,7 @@ import java.util.Map; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; @@ -190,15 +191,6 @@ } /* (non-Javadoc) - * @see org.apache.pig.LoadFunc#doneReading() - */ - @Override - public void doneReading() { - // TODO Auto-generated method stub - - } - - /* (non-Javadoc) * @see org.apache.pig.LoadFunc#getInputFormat() */ @Override @@ -233,4 +225,14 @@ // TODO Auto-generated method stub } + + /* (non-Javadoc) + * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String, org.apache.hadoop.fs.Path) + */ + @Override + public String relativeToAbsolutePath(String location, Path curDir) + throws IOException { + // TODO Auto-generated method stub + return null; + } } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java Fri Nov 13 18:36:31 2009 @@ -23,6 +23,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.RecordReader; @@ -147,15 +148,6 @@ } /* (non-Javadoc) - * @see org.apache.pig.LoadFunc#doneReading() - */ - @Override - public void doneReading() { - // TODO Auto-generated method stub - - } - - /* (non-Javadoc) * @see org.apache.pig.LoadFunc#getLoadCaster() */ @Override @@ -181,6 +173,16 @@ // TODO Auto-generated method stub } + + /* (non-Javadoc) + * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String, org.apache.hadoop.fs.Path) + */ + @Override + public String relativeToAbsolutePath(String location, Path curDir) + throws IOException { + // TODO Auto-generated method stub + return null; + } } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/RandomSampleLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/RandomSampleLoader.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/RandomSampleLoader.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/RandomSampleLoader.java Fri Nov 13 18:36:31 2009 @@ -19,6 +19,7 @@ import java.io.IOException; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; @@ -56,17 +57,6 @@ super.setNumSamples(100); } - - /* (non-Javadoc) - * @see org.apache.pig.LoadFunc#doneReading() - */ - @Override - public void doneReading() { - // TODO Auto-generated method stub - - } - - /* (non-Javadoc) * @see org.apache.pig.LoadFunc#getInputFormat() */ @@ -104,5 +94,16 @@ // TODO Auto-generated method stub } + + + /* (non-Javadoc) + * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String, org.apache.hadoop.fs.Path) + */ + @Override + public String relativeToAbsolutePath(String location, Path curDir) + throws IOException { + // TODO Auto-generated method stub + return null; + } } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java Fri Nov 13 18:36:31 2009 @@ -66,7 +66,7 @@ * @see org.apache.pig.LoadFunc#getInputFormat() */ @Override - public InputFormat getInputFormat() { + public InputFormat getInputFormat() throws IOException { return loader.getInputFormat(); } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/ReadToEndLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/ReadToEndLoader.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/ReadToEndLoader.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/ReadToEndLoader.java Fri Nov 13 18:36:31 2009 @@ -22,6 +22,7 @@ import java.util.List; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; @@ -97,18 +98,18 @@ * @param wrappedLoadFunc * @param conf * @param inputLocation - * @param splitIndex + * @param startSplitIndex * @throws IOException * @throws InterruptedException */ public ReadToEndLoader(LoadFunc wrappedLoadFunc, Configuration conf, - String inputLocation, int splitIndex) throws IOException { + String inputLocation, int startSplitIndex) throws IOException { this.wrappedLoadFunc = wrappedLoadFunc; // make a copy so that if the underlying InputFormat writes to the // conf, we don't affect the caller's copy this.conf = new Configuration(conf); this.inputLocation = inputLocation; - this.startSplitIndex = splitIndex; + this.startSplitIndex = startSplitIndex; this.curSplitIndex = startSplitIndex; // let's initialize the wrappedLoadFunc @@ -165,6 +166,7 @@ } // if loadfunc returned null, we need to read next split // if there is one + reader.close(); curSplitIndex++; return getNextHelper(); } @@ -184,20 +186,10 @@ return t; } } - // we processed all splits - we are done - wrappedLoadFunc.doneReading(); return null; } /* (non-Javadoc) - * @see org.apache.pig.LoadFunc#doneReading() - */ - @Override - public void doneReading() { - throw new RuntimeException("Internal Error: Unimplemented method called!"); - } - - /* (non-Javadoc) * @see org.apache.pig.LoadFunc#getInputFormat() */ @Override @@ -228,5 +220,15 @@ public void setLocation(String location, Job job) throws IOException { throw new RuntimeException("Internal Error: Unimplemented method called!"); } + + /* (non-Javadoc) + * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String, org.apache.hadoop.fs.Path) + */ + @Override + public String relativeToAbsolutePath(String location, Path curDir) + throws IOException { + // TODO Auto-generated method stub + return null; + } } Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOLoad.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOLoad.java?rev=835950&r1=835949&r2=835950&view=diff ============================================================================== --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOLoad.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOLoad.java Fri Nov 13 18:36:31 2009 @@ -176,7 +176,9 @@ new Job(ConfigurationUtil.toConfiguration( mStorage.getConfiguration()))); LoadMetadata loadMetadata = (LoadMetadata)mLoadFunc; - ResourceSchema rSchema = loadMetadata.getSchema(); + ResourceSchema rSchema = loadMetadata.getSchema( + mInputFileSpec.getFileName(), + ConfigurationUtil.toConfiguration(mStorage.getConfiguration())); return Schema.getPigSchema(rSchema); } else { return null;