Author: pradeepkth
Date: Fri Nov 13 18:36:31 2009
New Revision: 835950
URL: http://svn.apache.org/viewvc?rev=835950&view=rev
Log:
PIG-1090: Update sources to reflect recent changes in load-store interfaces
(pradeepkth)
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadFunc.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadMetadata.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinaryStorage.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigDump.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/TextLoader.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/DefaultIndexableLoader.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/MergeJoinIndexer.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/RandomSampleLoader.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/ReadToEndLoader.java
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOLoad.java
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadFunc.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadFunc.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadFunc.java
(original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadFunc.java
Fri Nov 13 18:36:31 2009
@@ -19,6 +19,7 @@
import java.io.IOException;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
@@ -31,66 +32,76 @@
* from a dataset.
*/
public interface LoadFunc {
+ /**
+ * This method is called by the Pig runtime in the front end to convert the
+ * input location to an absolute path if the location is relative. The
+ * loadFunc implementation is free to choose how it converts a relative
+ * location to an absolute location since this may depend on what the
location
+ * string represent (hdfs path or some other data source)
+ *
+ * @param location location as provided in the "load" statement of the
script
+ * @param curDir the current working direction based on any "cd" statements
+ * in the script before the "load" statement. If there are no "cd"
statements
+ * in the script, this would be the home directory -
+ * <pre>/user/<username> </pre>
+ * @return the absolute location based on the arguments passed
+ * @throws IOException if the conversion is not possible
+ */
+ String relativeToAbsolutePath(String location, Path curDir) throws
IOException;
/**
- * Communicate to the loader the load string used in Pig Latin to refer to
the
- * object(s) being loaded. That is, if the PL script is
- * <b>A = load 'bla'</b>
- * then 'bla' is the load string. In general Pig expects these to be
- * a path name, a glob, or a URI. If there is no URI scheme present,
- * Pig will assume it is a file name. This will be
- * called during planning on the front end at which time an empty Job
object
- * will be passed as the second argument.
+ * Communicate to the loader the location of the object(s) being loaded.
+ * The location string passed to the LoadFunc here is the return value of
+ * {...@link LoadFunc#relativeToAbsolutePath(String, Path)}
*
- * This method will also be called in the backend multiple times and in
those
- * calls the Job object will actually have job information. Implementations
+ * This method will be called in the backend multiple times.
Implementations
* should bear in mind that this method is called multiple times and should
* ensure there are no inconsistent side effects due to the multiple calls.
*
- * @param location Location indicated in load statement.
+ * @param location Location as returned by
+ * {...@link LoadFunc#relativeToAbsolutePath(String, Path)}
* @param job the {...@link Job} object
* @throws IOException if the location is not valid.
*/
void setLocation(String location, Job job) throws IOException;
/**
- * Return the InputFormat associated with this loader. This will be
- * called during planning on the front end. The LoadFunc need not
- * carry the InputFormat information to the backend, as it will
- * be provided with the appropriate RecordReader there. This is the
+ * This will be called during planning on the front end. This is the
* instance of InputFormat (rather than the class name) because the
* load function may need to instantiate the InputFormat in order
* to control how it is constructed.
+ * @return the InputFormat associated with this loader.
+ * @throws IOException if there is an exception during InputFormat
+ * construction
*/
- InputFormat getInputFormat();
+ InputFormat getInputFormat() throws IOException;
/**
- * Return the LoadCaster associated with this loader. Returning
- * null indicates that casts from byte array are not supported
- * for this loader. This will be called on the front end during
- * planning and not on the back end during execution.
+ * This will be called on the front end during planning and not on the
back
+ * end during execution.
+ * @return the {...@link LoadCaster} associated with this loader.
Returning null
+ * indicates that casts from byte array are not supported for this loader.
+ * construction
+ * @throws IOException if there is an exception during LoadCaster
*/
- LoadCaster getLoadCaster();
+ LoadCaster getLoadCaster() throws IOException;
/**
* Initializes LoadFunc for reading data. This will be called during
execution
* before any calls to getNext. The RecordReader needs to be passed here
because
* it has been instantiated for a particular InputSplit.
- * @param reader RecordReader to be used by this instance of the LoadFunc
- * @param split The input split to process
- */
- void prepareToRead(RecordReader reader, PigSplit split);
-
- /**
- * Called after all reading is finished.
+ * @param reader {...@link RecordReader} to be used by this instance of
the LoadFunc
+ * @param split The input {...@link PigSplit} to process
+ * @throws IOException if there is an exception during initialization
*/
- void doneReading();
+ void prepareToRead(RecordReader reader, PigSplit split) throws IOException;
/**
* Retrieves the next tuple to be processed.
* @return the next tuple to be processed or null if there are no more
tuples
* to be processed.
- * @throws IOException
+ * @throws IOException if there is an exception while retrieving the next
+ * tuple
*/
Tuple getNext() throws IOException;
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadMetadata.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadMetadata.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadMetadata.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/LoadMetadata.java
Fri Nov 13 18:36:31 2009
@@ -19,6 +19,7 @@
import java.io.IOException;
+import org.apache.hadoop.conf.Configuration;
import org.apache.pig.impl.plan.OperatorPlan;
/**
@@ -29,28 +30,42 @@
public interface LoadMetadata {
/**
- * Get a schema for the data to be loaded. This schema should represent
+ * Get a schema for the data to be loaded.
+ * @param location Location as returned by
+ * {...@link LoadFunc#relativeToAbsolutePath(String,
org.apache.hadoop.fs.Path)}
+ * @param conf The {...@link Configuration} object
+ * @return schema for the data to be loaded. This schema should represent
* all tuples of the returned data. If the schema is unknown or it is
* not possible to return a schema that represents all returned data,
* then null should be returned.
- * This method will be called after a
- * {...@link LoadFunc#setLocation(String, org.apache.hadoop.mapreduce.Job)}
- * call is made on the Loader implementing {...@link LoadFunc} and
{...@link LoadMetadata}
+ * @throws IOException if an exception occurs while determining the schema
*/
- ResourceSchema getSchema();
+ ResourceSchema getSchema(String location, Configuration conf) throws
+ IOException;
/**
* Get statistics about the data to be loaded. If no statistics are
* available, then null should be returned.
+ * @param location Location as returned by
+ * {...@link LoadFunc#relativeToAbsolutePath(String,
org.apache.hadoop.fs.Path)}
+ * @param conf The {...@link Configuration} object
+ * @return statistics about the data to be loaded. If no statistics are
+ * available, then null should be returned.
+ * @throws IOException if an exception occurs while retrieving statistics
*/
- ResourceStatistics getStatistics();
+ ResourceStatistics getStatistics(String location, Configuration conf)
+ throws IOException;
/**
* Find what columns are partition keys for this input.
- * This function assumes that setLocation has already been called.
+ * @param location Location as returned by
+ * {...@link LoadFunc#relativeToAbsolutePath(String,
org.apache.hadoop.fs.Path)}
+ * @param conf The {...@link Configuration} object
* @return array of field names of the partition keys.
+ * @throws IOException if an exception occurs while retrieving partition
keys
*/
- String[] getPartitionKeys();
+ String[] getPartitionKeys(String location, Configuration conf)
+ throws IOException;
/**
* Set the filter for partitioning. It is assumed that this filter
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/ResourceSchema.java
Fri Nov 13 18:36:31 2009
@@ -42,8 +42,7 @@
}
public ResourceFieldSchema[] fields;
- public Map<String, Integer> byName;
-
+
enum Order { ASCENDING, DESCENDING }
public int[] sortKeys; // each entry is an offset into the fields array.
public Order[] sortKeyOrders;
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
--- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java
(original)
+++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/StoreFunc.java
Fri Nov 13 18:36:31 2009
@@ -17,12 +17,10 @@
*/
package org.apache.pig;
-import java.io.IOException;
-import java.io.OutputStream;
+import java.io.IOException;
-import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.pig.data.Tuple;
@@ -38,12 +36,32 @@
public interface StoreFunc {
/**
+ * This method is called by the Pig runtime in the front end to convert the
+ * output location to an absolute path if the location is relative. The
+ * StoreFunc implementation is free to choose how it converts a relative
+ * location to an absolute location since this may depend on what the
location
+ * string represent (hdfs path or some other data source)
+ *
+ * @param location location as provided in the "store" statement of the
script
+ * @param curDir the current working direction based on any "cd" statements
+ * in the script before the "store" statement. If there are no "cd"
statements
+ * in the script, this would be the home directory -
+ * <pre>/user/<username> </pre>
+ * @return the absolute location based on the arguments passed
+ * @throws IOException if the conversion is not possible
+ */
+ String relToAbsPathForStoreLocation(String location, Path curDir) throws
IOException;
+
+ /**
* Return the OutputFormat associated with StoreFunc. This will be called
* on the front end during planning and not on the backend during
- * execution. OutputFormat information need not be carried to the back end
- * as the appropriate RecordWriter will be provided to the StoreFunc.
+ * execution.
+ * @return the {...@link OutputFormat} associated with StoreFunc
+ * @throws IOException if an exception occurs while constructing the
+ * OutputFormat
+ *
*/
- OutputFormat getOutputFormat();
+ OutputFormat getOutputFormat() throws IOException;
/**
* Communicate to the store function the location used in Pig Latin to
refer
@@ -62,56 +80,32 @@
/**
* Set the schema for data to be stored. This will be called on the
- * front end during planning. If the store function wishes to record
- * the schema it will need to carry it to the backend.
- * Even if a store function cannot
- * record the schema, it may need to implement this function to
+ * front end during planning. A Store function should implement this
function to
* check that a given schema is acceptable to it. For example, it
* can check that the correct partition keys are included;
* a storage function to be written directly to an OutputFormat can
* make sure the schema will translate in a well defined way.
- * @param s to be checked/set
+ * @param s to be checked
* @throws IOException if this schema is not acceptable. It should include
* a detailed error message indicating what is wrong with the schema.
*/
- void setSchema(ResourceSchema s) throws IOException;
+ void checkSchema(ResourceSchema s) throws IOException;
/**
* Initialize StoreFunc to write data. This will be called during
* execution before the call to putNext.
* @param writer RecordWriter to use.
+ * @throws IOException if an exception occurs during initialization
*/
- void prepareToWrite(RecordWriter writer);
-
- /**
- * XXX FIXME: do we really need this - there is already
- * {...@link
OutputCommitter#commitTask(org.apache.hadoop.mapreduce.TaskAttemptContext)}
- * Called when all writing is finished. This will be called on the
backend,
- * once for each writing task.
- */
- void doneWriting();
+ void prepareToWrite(RecordWriter writer) throws IOException;
/**
* Write a tuple the output stream to which this instance was
* previously bound.
*
* @param t the tuple to store.
- * @throws IOException
+ * @throws IOException if an exception occurs during the write
*/
void putNext(Tuple t) throws IOException;
-
- /**
- * XXX FIXME: do we really need this - there is already
- * {...@link
OutputCommitter#cleanupJob(org.apache.hadoop.mapreduce.JobContext)}
- * Called when writing all of the data is finished. This can be used
- * to commit information to a metadata system, clean up tmp files,
- * close connections, etc. This call will be made on the front end
- * after all back end processing is finished.
- * @param job The job object
- */
- void allFinished(Job job);
-
-
-
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java
Fri Nov 13 18:36:31 2009
@@ -1767,7 +1767,17 @@
physOp.setResultType(op.getType());
FuncSpec lfSpec = op.getLoadFuncSpec();
if(null != lfSpec) {
- ((POCast) physOp).setLoadFSpec(lfSpec);
+ try {
+ ((POCast) physOp).setLoadFSpec(lfSpec);
+ } catch (IOException e) {
+ int errCode = 1053;
+ String msg = "Cannot resolve load function to use for casting"
+
+ " from " +
DataType.findTypeName(op.getExpression().
+ getType()) + " to " +
DataType.findTypeName(op.getType());
+ throw new LogicalToPhysicalTranslatorException(msg, errCode,
+ PigException.ERROR, e);
+ }
+
}
try {
currentPlan.connect(from, physOp);
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
Fri Nov 13 18:36:31 2009
@@ -67,7 +67,7 @@
// TODO Auto-generated constructor stub
}
- private void instantiateFunc() {
+ private void instantiateFunc() throws IOException {
if (load != null)
return;
if (this.loadFSpec != null) {
@@ -77,7 +77,7 @@
this.caster = load.getLoadCaster();
}
- public void setLoadFSpec(FuncSpec lf) {
+ public void setLoadFSpec(FuncSpec lf) throws IOException {
this.loadFSpec = lf;
instantiateFunc();
}
@@ -987,7 +987,13 @@
.getGenerator().getNextNodeId(mKey.scope)));
clone.cloneHelper(this);
clone.loadFSpec = loadFSpec;
- clone.instantiateFunc();
+ try {
+ clone.instantiateFunc();
+ } catch (IOException e) {
+ CloneNotSupportedException cnse = new CloneNotSupportedException();
+ cnse.initCause(e);
+ throw cnse;
+ }
return clone;
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/hbase/HBaseStorage.java
Fri Nov 13 18:36:31 2009
@@ -21,6 +21,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.client.HTable;
@@ -160,14 +161,6 @@
return null;
}
- /* (non-Javadoc)
- * @see org.apache.pig.LoadFunc#doneReading()
- */
- @Override
- public void doneReading() {
- // TODO Auto-generated method stub
-
- }
/* (non-Javadoc)
* @see org.apache.pig.LoadFunc#getInputFormat()
@@ -204,4 +197,14 @@
// TODO Auto-generated method stub
}
+
+ /* (non-Javadoc)
+ * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String,
org.apache.hadoop.fs.Path)
+ */
+ @Override
+ public String relativeToAbsolutePath(String location, Path curDir)
+ throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinStorage.java
Fri Nov 13 18:36:31 2009
@@ -306,14 +306,6 @@
}
/* (non-Javadoc)
- * @see org.apache.pig.LoadFunc#doneReading()
- */
- @Override
- public void doneReading() {
- // nothing to be done for now
- }
-
- /* (non-Javadoc)
* @see org.apache.pig.LoadFunc#getInputFormat()
*/
@Override
@@ -345,53 +337,55 @@
}
/* (non-Javadoc)
- * @see
org.apache.pig.StoreFunc#allFinished(org.apache.hadoop.mapreduce.Job)
+ * @see org.apache.pig.StoreFunc#getOutputFormat()
*/
@Override
- public void allFinished(Job job) {
- // TODO Auto-generated method stub
-
+ public OutputFormat getOutputFormat() {
+ return new BinStorageOutputFormat();
}
/* (non-Javadoc)
- * @see org.apache.pig.StoreFunc#doneWriting()
+ * @see
org.apache.pig.StoreFunc#prepareToWrite(org.apache.hadoop.mapreduce.RecordWriter)
*/
@Override
- public void doneWriting() {
- // TODO Auto-generated method stub
-
+ public void prepareToWrite(RecordWriter writer) {
+ this.recWriter = (BinStorageRecordWriter) writer;
}
/* (non-Javadoc)
- * @see org.apache.pig.StoreFunc#getOutputFormat()
+ * @see org.apache.pig.StoreFunc#setStoreLocation(java.lang.String,
org.apache.hadoop.mapreduce.Job)
*/
@Override
- public OutputFormat getOutputFormat() {
- return new BinStorageOutputFormat();
+ public void setStoreLocation(String location, Job job) throws IOException {
+ FileOutputFormat.setOutputPath(job, new Path(location));
}
/* (non-Javadoc)
- * @see
org.apache.pig.StoreFunc#prepareToWrite(org.apache.hadoop.mapreduce.RecordWriter)
+ * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String,
org.apache.hadoop.fs.Path)
*/
@Override
- public void prepareToWrite(RecordWriter writer) {
- this.recWriter = (BinStorageRecordWriter) writer;
+ public String relativeToAbsolutePath(String location, Path curDir)
+ throws IOException {
+ // TODO Auto-generated method stub
+ return null;
}
/* (non-Javadoc)
- * @see org.apache.pig.StoreFunc#setSchema(org.apache.pig.ResourceSchema)
+ * @see org.apache.pig.StoreFunc#checkSchema(org.apache.pig.ResourceSchema)
*/
@Override
- public void setSchema(ResourceSchema s) throws IOException {
+ public void checkSchema(ResourceSchema s) throws IOException {
// TODO Auto-generated method stub
}
/* (non-Javadoc)
- * @see org.apache.pig.StoreFunc#setStoreLocation(java.lang.String,
org.apache.hadoop.mapreduce.Job)
+ * @see
org.apache.pig.StoreFunc#relToAbsPathForStoreLocation(java.lang.String,
org.apache.hadoop.fs.Path)
*/
@Override
- public void setStoreLocation(String location, Job job) throws IOException {
- FileOutputFormat.setOutputPath(job, new Path(location));
+ public String relToAbsPathForStoreLocation(String location, Path curDir)
+ throws IOException {
+ // TODO Auto-generated method stub
+ return null;
}
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinaryStorage.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinaryStorage.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinaryStorage.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/BinaryStorage.java
Fri Nov 13 18:36:31 2009
@@ -23,6 +23,7 @@
import java.net.URL;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
@@ -148,15 +149,6 @@
}
/* (non-Javadoc)
- * @see org.apache.pig.LoadFunc#doneReading()
- */
- @Override
- public void doneReading() {
- // TODO Auto-generated method stub
-
- }
-
- /* (non-Javadoc)
* @see org.apache.pig.LoadFunc#getInputFormat()
*/
@Override
@@ -193,56 +185,58 @@
}
/* (non-Javadoc)
- * @see
org.apache.pig.StoreFunc#allFinished(org.apache.hadoop.mapreduce.Job)
+ * @see org.apache.pig.StoreFunc#getOutputFormat()
*/
@Override
- public void allFinished(Job job) {
+ public OutputFormat getOutputFormat() {
// TODO Auto-generated method stub
-
+ return null;
}
/* (non-Javadoc)
- * @see org.apache.pig.StoreFunc#doneWriting()
+ * @see
org.apache.pig.StoreFunc#prepareToWrite(org.apache.hadoop.mapreduce.RecordWriter)
*/
@Override
- public void doneWriting() {
+ public void prepareToWrite(RecordWriter writer) {
// TODO Auto-generated method stub
}
/* (non-Javadoc)
- * @see org.apache.pig.StoreFunc#getOutputFormat()
+ * @see org.apache.pig.StoreFunc#setSchema(org.apache.pig.ResourceSchema)
*/
@Override
- public OutputFormat getOutputFormat() {
+ public void checkSchema(ResourceSchema s) throws IOException {
// TODO Auto-generated method stub
- return null;
+
}
/* (non-Javadoc)
- * @see
org.apache.pig.StoreFunc#prepareToWrite(org.apache.hadoop.mapreduce.RecordWriter)
+ * @see org.apache.pig.StoreFunc#setStoreLocation(java.lang.String,
org.apache.hadoop.mapreduce.Job)
*/
@Override
- public void prepareToWrite(RecordWriter writer) {
+ public void setStoreLocation(String location, Job job) throws IOException {
// TODO Auto-generated method stub
}
/* (non-Javadoc)
- * @see org.apache.pig.StoreFunc#setSchema(org.apache.pig.ResourceSchema)
+ * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String,
org.apache.hadoop.fs.Path)
*/
@Override
- public void setSchema(ResourceSchema s) throws IOException {
+ public String relativeToAbsolutePath(String location, Path curDir)
+ throws IOException {
// TODO Auto-generated method stub
-
+ return null;
}
/* (non-Javadoc)
- * @see org.apache.pig.StoreFunc#setStoreLocation(java.lang.String,
org.apache.hadoop.mapreduce.Job)
+ * @see
org.apache.pig.StoreFunc#relToAbsPathForStoreLocation(java.lang.String,
org.apache.hadoop.fs.Path)
*/
@Override
- public void setStoreLocation(String location, Job job) throws IOException {
+ public String relToAbsPathForStoreLocation(String location, Path curDir)
+ throws IOException {
// TODO Auto-generated method stub
-
+ return null;
}
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigDump.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigDump.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigDump.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigDump.java
Fri Nov 13 18:36:31 2009
@@ -20,6 +20,7 @@
import java.io.IOException;
import java.io.OutputStream;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordWriter;
@@ -50,57 +51,49 @@
}
/* (non-Javadoc)
- * @see
org.apache.pig.StoreFunc#allFinished(org.apache.hadoop.mapreduce.Job)
+ * @see org.apache.pig.StoreFunc#getOutputFormat()
*/
@Override
- public void allFinished(Job job) {
+ public OutputFormat getOutputFormat() {
// TODO Auto-generated method stub
-
+ return null;
}
/* (non-Javadoc)
- * @see org.apache.pig.StoreFunc#doneWriting()
+ * @see
org.apache.pig.StoreFunc#prepareToWrite(org.apache.hadoop.mapreduce.RecordWriter)
*/
@Override
- public void doneWriting() {
+ public void prepareToWrite(RecordWriter writer) {
// TODO Auto-generated method stub
}
/* (non-Javadoc)
- * @see org.apache.pig.StoreFunc#getOutputFormat()
- */
- @Override
- public OutputFormat getOutputFormat() {
- // TODO Auto-generated method stub
- return null;
- }
-
- /* (non-Javadoc)
- * @see
org.apache.pig.StoreFunc#prepareToWrite(org.apache.hadoop.mapreduce.RecordWriter)
+ * @see org.apache.pig.StoreFunc#setStoreLocation(java.lang.String,
org.apache.hadoop.mapreduce.Job)
*/
@Override
- public void prepareToWrite(RecordWriter writer) {
+ public void setStoreLocation(String location, Job job) throws IOException {
// TODO Auto-generated method stub
}
/* (non-Javadoc)
- * @see org.apache.pig.StoreFunc#setSchema(org.apache.pig.ResourceSchema)
+ * @see org.apache.pig.StoreFunc#checkSchema(org.apache.pig.ResourceSchema)
*/
@Override
- public void setSchema(ResourceSchema s) throws IOException {
+ public void checkSchema(ResourceSchema s) throws IOException {
// TODO Auto-generated method stub
}
/* (non-Javadoc)
- * @see org.apache.pig.StoreFunc#setStoreLocation(java.lang.String,
org.apache.hadoop.mapreduce.Job)
+ * @see
org.apache.pig.StoreFunc#relToAbsPathForStoreLocation(java.lang.String,
org.apache.hadoop.fs.Path)
*/
@Override
- public void setStoreLocation(String location, Job job) throws IOException {
+ public String relToAbsPathForStoreLocation(String location, Path curDir)
+ throws IOException {
// TODO Auto-generated method stub
-
+ return null;
}
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/PigStorage.java
Fri Nov 13 18:36:31 2009
@@ -304,15 +304,6 @@
}
/* (non-Javadoc)
- * @see org.apache.pig.LoadFunc#doneReading()
- */
- @Override
- public void doneReading() {
- // TODO Auto-generated method stub
-
- }
-
- /* (non-Javadoc)
* @see org.apache.pig.LoadFunc#getInputFormat()
*/
@Override
@@ -348,24 +339,6 @@
}
/* (non-Javadoc)
- * @see
org.apache.pig.StoreFunc#allFinished(org.apache.hadoop.conf.Configuration)
- */
- @Override
- public void allFinished(Job job) {
- // TODO Auto-generated method stub
-
- }
-
- /* (non-Javadoc)
- * @see org.apache.pig.StoreFunc#doneWriting()
- */
- @Override
- public void doneWriting() {
- // TODO Auto-generated method stub
-
- }
-
- /* (non-Javadoc)
* @see org.apache.pig.StoreFunc#getOutputFormat()
*/
@Override
@@ -391,13 +364,33 @@
}
/* (non-Javadoc)
- * @see org.apache.pig.StoreFunc#setSchema(org.apache.pig.ResourceSchema)
+ * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String,
org.apache.hadoop.fs.Path)
*/
@Override
- public void setSchema(ResourceSchema s) throws IOException {
+ public String relativeToAbsolutePath(String location, Path curDir)
+ throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.pig.StoreFunc#checkSchema(org.apache.pig.ResourceSchema)
+ */
+ @Override
+ public void checkSchema(ResourceSchema s) throws IOException {
// TODO Auto-generated method stub
}
+ /* (non-Javadoc)
+ * @see
org.apache.pig.StoreFunc#relToAbsPathForStoreLocation(java.lang.String,
org.apache.hadoop.fs.Path)
+ */
+ @Override
+ public String relToAbsPathForStoreLocation(String location, Path curDir)
+ throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
+
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/TextLoader.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/TextLoader.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/TextLoader.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/builtin/TextLoader.java
Fri Nov 13 18:36:31 2009
@@ -25,6 +25,7 @@
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
@@ -218,15 +219,6 @@
}
/* (non-Javadoc)
- * @see org.apache.pig.LoadFunc#doneReading()
- */
- @Override
- public void doneReading() {
- // TODO Auto-generated method stub
-
- }
-
- /* (non-Javadoc)
* @see org.apache.pig.LoadFunc#getInputFormat()
*/
@Override
@@ -261,4 +253,14 @@
// TODO Auto-generated method stub
}
+
+ /* (non-Javadoc)
+ * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String,
org.apache.hadoop.fs.Path)
+ */
+ @Override
+ public String relativeToAbsolutePath(String location, Path curDir)
+ throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/DefaultIndexableLoader.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/DefaultIndexableLoader.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/DefaultIndexableLoader.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/DefaultIndexableLoader.java
Fri Nov 13 18:36:31 2009
@@ -26,6 +26,7 @@
import java.util.zip.GZIPInputStream;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
@@ -255,15 +256,6 @@
}
/* (non-Javadoc)
- * @see org.apache.pig.LoadFunc#doneReading()
- */
- @Override
- public void doneReading() {
- // TODO Auto-generated method stub
-
- }
-
- /* (non-Javadoc)
* @see org.apache.pig.LoadFunc#getInputFormat()
*/
@Override
@@ -298,5 +290,15 @@
// TODO Auto-generated method stub
}
+
+ /* (non-Javadoc)
+ * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String,
org.apache.hadoop.fs.Path)
+ */
+ @Override
+ public String relativeToAbsolutePath(String location, Path curDir)
+ throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/MergeJoinIndexer.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/MergeJoinIndexer.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/MergeJoinIndexer.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/MergeJoinIndexer.java
Fri Nov 13 18:36:31 2009
@@ -22,6 +22,7 @@
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
@@ -190,15 +191,6 @@
}
/* (non-Javadoc)
- * @see org.apache.pig.LoadFunc#doneReading()
- */
- @Override
- public void doneReading() {
- // TODO Auto-generated method stub
-
- }
-
- /* (non-Javadoc)
* @see org.apache.pig.LoadFunc#getInputFormat()
*/
@Override
@@ -233,4 +225,14 @@
// TODO Auto-generated method stub
}
+
+ /* (non-Javadoc)
+ * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String,
org.apache.hadoop.fs.Path)
+ */
+ @Override
+ public String relativeToAbsolutePath(String location, Path curDir)
+ throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/PoissonSampleLoader.java
Fri Nov 13 18:36:31 2009
@@ -23,6 +23,7 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordReader;
@@ -147,15 +148,6 @@
}
/* (non-Javadoc)
- * @see org.apache.pig.LoadFunc#doneReading()
- */
- @Override
- public void doneReading() {
- // TODO Auto-generated method stub
-
- }
-
- /* (non-Javadoc)
* @see org.apache.pig.LoadFunc#getLoadCaster()
*/
@Override
@@ -181,6 +173,16 @@
// TODO Auto-generated method stub
}
+
+ /* (non-Javadoc)
+ * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String,
org.apache.hadoop.fs.Path)
+ */
+ @Override
+ public String relativeToAbsolutePath(String location, Path curDir)
+ throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/RandomSampleLoader.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/RandomSampleLoader.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/RandomSampleLoader.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/RandomSampleLoader.java
Fri Nov 13 18:36:31 2009
@@ -19,6 +19,7 @@
import java.io.IOException;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
@@ -56,17 +57,6 @@
super.setNumSamples(100);
}
-
- /* (non-Javadoc)
- * @see org.apache.pig.LoadFunc#doneReading()
- */
- @Override
- public void doneReading() {
- // TODO Auto-generated method stub
-
- }
-
-
/* (non-Javadoc)
* @see org.apache.pig.LoadFunc#getInputFormat()
*/
@@ -104,5 +94,16 @@
// TODO Auto-generated method stub
}
+
+
+ /* (non-Javadoc)
+ * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String,
org.apache.hadoop.fs.Path)
+ */
+ @Override
+ public String relativeToAbsolutePath(String location, Path curDir)
+ throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/SampleLoader.java
Fri Nov 13 18:36:31 2009
@@ -66,7 +66,7 @@
* @see org.apache.pig.LoadFunc#getInputFormat()
*/
@Override
- public InputFormat getInputFormat() {
+ public InputFormat getInputFormat() throws IOException {
return loader.getInputFormat();
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/ReadToEndLoader.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/ReadToEndLoader.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/ReadToEndLoader.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/ReadToEndLoader.java
Fri Nov 13 18:36:31 2009
@@ -22,6 +22,7 @@
import java.util.List;
import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
@@ -97,18 +98,18 @@
* @param wrappedLoadFunc
* @param conf
* @param inputLocation
- * @param splitIndex
+ * @param startSplitIndex
* @throws IOException
* @throws InterruptedException
*/
public ReadToEndLoader(LoadFunc wrappedLoadFunc, Configuration conf,
- String inputLocation, int splitIndex) throws IOException {
+ String inputLocation, int startSplitIndex) throws IOException {
this.wrappedLoadFunc = wrappedLoadFunc;
// make a copy so that if the underlying InputFormat writes to the
// conf, we don't affect the caller's copy
this.conf = new Configuration(conf);
this.inputLocation = inputLocation;
- this.startSplitIndex = splitIndex;
+ this.startSplitIndex = startSplitIndex;
this.curSplitIndex = startSplitIndex;
// let's initialize the wrappedLoadFunc
@@ -165,6 +166,7 @@
}
// if loadfunc returned null, we need to read next split
// if there is one
+ reader.close();
curSplitIndex++;
return getNextHelper();
}
@@ -184,20 +186,10 @@
return t;
}
}
- // we processed all splits - we are done
- wrappedLoadFunc.doneReading();
return null;
}
/* (non-Javadoc)
- * @see org.apache.pig.LoadFunc#doneReading()
- */
- @Override
- public void doneReading() {
- throw new RuntimeException("Internal Error: Unimplemented method
called!");
- }
-
- /* (non-Javadoc)
* @see org.apache.pig.LoadFunc#getInputFormat()
*/
@Override
@@ -228,5 +220,15 @@
public void setLocation(String location, Job job) throws IOException {
throw new RuntimeException("Internal Error: Unimplemented method
called!");
}
+
+ /* (non-Javadoc)
+ * @see org.apache.pig.LoadFunc#relativeToAbsolutePath(java.lang.String,
org.apache.hadoop.fs.Path)
+ */
+ @Override
+ public String relativeToAbsolutePath(String location, Path curDir)
+ throws IOException {
+ // TODO Auto-generated method stub
+ return null;
+ }
}
Modified:
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOLoad.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOLoad.java?rev=835950&r1=835949&r2=835950&view=diff
==============================================================================
---
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOLoad.java
(original)
+++
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/logicalLayer/LOLoad.java
Fri Nov 13 18:36:31 2009
@@ -176,7 +176,9 @@
new Job(ConfigurationUtil.toConfiguration(
mStorage.getConfiguration())));
LoadMetadata loadMetadata = (LoadMetadata)mLoadFunc;
- ResourceSchema rSchema = loadMetadata.getSchema();
+ ResourceSchema rSchema = loadMetadata.getSchema(
+ mInputFileSpec.getFileName(),
+
ConfigurationUtil.toConfiguration(mStorage.getConfiguration()));
return Schema.getPigSchema(rSchema);
} else {
return null;