Modified: hadoop/pig/trunk/src/org/apache/pig/StoreFuncInterface.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/StoreFuncInterface.java?rev=934242&r1=934241&r2=934242&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/StoreFuncInterface.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/StoreFuncInterface.java Wed Apr 14 
23:44:16 2010
@@ -23,17 +23,19 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.OutputFormat;
 import org.apache.hadoop.mapreduce.RecordWriter;
+
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.util.UDFContext;
 
 
 /**
-* This interface is used to implement functions to write records
-* from a dataset.
-* 
-*
+* StoreFuncs take records from Pig's processing and store them into a data 
store.  Most frequently
+* this is an HDFS file, but it could also be an HBase instance, RDBMS, etc.
 */
-
+...@interfaceaudience.public
+...@interfacestability.stable
 public interface StoreFuncInterface {
 
     /**
@@ -74,6 +76,8 @@ public interface StoreFuncInterface {
      * This method will be called in the frontend and backend multiple times. 
Implementations
      * should bear in mind that this method is called multiple times and should
      * ensure there are no inconsistent side effects due to the multiple calls.
+     * {...@link #checkSchema(ResourceSchema)} will be called before any call 
to
+     * {...@link #setStoreLocation(String, Job)}.
      * 
 
      * @param location Location returned by 
@@ -106,9 +110,7 @@ public interface StoreFuncInterface {
     void prepareToWrite(RecordWriter writer) throws IOException;
 
     /**
-     * Write a tuple the output stream to which this instance was
-     * previously bound.
-     * 
+     * Write a tuple to the data store.
      * @param t the tuple to store.
      * @throws IOException if an exception occurs during the write
      */
@@ -118,7 +120,10 @@ public interface StoreFuncInterface {
      * This method will be called by Pig both in the front end and back end to
      * pass a unique signature to the {...@link StoreFuncInterface} which it 
can use to store
      * information in the {...@link UDFContext} which it needs to store between
-     * various method invocations in the front end and back end. 
+     * various method invocations in the front end and back end.  This is 
necessary
+     * because in a Pig Latin script with multiple stores, the different
+     * instances of store functions need to be able to find their (and only 
their)
+     * data in the UDFContext object.
      * @param signature a unique signature to identify this StoreFuncInterface
      */
     public void setStoreFuncUDFContextSignature(String signature);

Modified: hadoop/pig/trunk/src/org/apache/pig/StoreMetadata.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/StoreMetadata.java?rev=934242&r1=934241&r2=934242&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/StoreMetadata.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/StoreMetadata.java Wed Apr 14 23:44:16 
2010
@@ -22,19 +22,26 @@ import java.io.IOException;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.Job;
+
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
 import org.apache.pig.ResourceSchema;
 import org.apache.pig.ResourceStatistics;
 
 /**
- * This interface defines how to write metadata related to data to be loaded.
+ * This interface defines how to write metadata related to data to be stored.
  * If a given store function does not implement this interface, it will be 
assumed that it
  * is unable to record metadata about the associated data.
  */
-
+...@interfaceaudience.public
+...@interfacestability.evolving
 public interface StoreMetadata {
 
     /**
      * Store statistics about the data being written.
+     * @param stats statistics to be recorded
+     * @param location Location as returned by 
+     * {...@link LoadFunc#relativeToAbsolutePath(String, 
org.apache.hadoop.fs.Path)}
      * @param job The {...@link Job} object - this should be used only to 
obtain 
      * cluster properties through {...@link Job#getConfiguration()} and not to 
set/query
      * any runtime job information.  
@@ -44,6 +51,9 @@ public interface StoreMetadata {
 
     /**
      * Store schema of the data being written
+     * @param schema Schema to be recorded
+     * @param location Location as returned by 
+     * {...@link LoadFunc#relativeToAbsolutePath(String, 
org.apache.hadoop.fs.Path)}
      * @param job The {...@link Job} object - this should be used only to 
obtain 
      * cluster properties through {...@link Job#getConfiguration()} and not to 
set/query
      * any runtime job information.  

Modified: hadoop/pig/trunk/src/org/apache/pig/StreamToPig.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/StreamToPig.java?rev=934242&r1=934241&r2=934242&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/StreamToPig.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/StreamToPig.java Wed Apr 14 23:44:16 
2010
@@ -19,10 +19,12 @@ package org.apache.pig;
 
 import java.io.IOException;
 
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
 import org.apache.pig.data.Tuple;
 
 /**
- * The interface used for the custom mapping of a byte array, received from
+ * The interface is used for the custom mapping of a byte array, received from
  * the stdout of the streaming process, to a {...@link Tuple}. 
  * 
  * This interface, together with {...@link PigToStream}, is designed to provide
@@ -36,10 +38,15 @@ import org.apache.pig.data.Tuple;
  * Typically, user implements this interface for a particular type of 
  * stream command and specifies the implementation class in the Pig DEFINE
  * statement. 
+ * @since Pig 0.7
  */
+...@interfaceaudience.public
+...@interfacestability.stable
 public interface StreamToPig {
     /**
      *  Given a byte array from a streaming executable, produce a tuple.
+     * @param bytes to deserialize.
+     * @return Data as a Pig Tuple.
      */
     public Tuple deserialize(byte[] bytes) throws IOException;
 
@@ -47,7 +54,8 @@ public interface StreamToPig {
      * This will be called on the front end during planning and not on the 
back 
      * end during execution.
      * 
-     * @return the {...@link LoadCaster} associated with this object. 
+     * @return the {...@link LoadCaster} associated with this object, or null 
if
+     * there is no such LoadCaster. 
      * @throws IOException if there is an exception during LoadCaster 
      */
     public LoadCaster getLoadCaster() throws IOException;

Added: hadoop/pig/trunk/src/org/apache/pig/classification/InterfaceAudience.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/classification/InterfaceAudience.java?rev=934242&view=auto
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/classification/InterfaceAudience.java 
(added)
+++ hadoop/pig/trunk/src/org/apache/pig/classification/InterfaceAudience.java 
Wed Apr 14 23:44:16 2010
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.classification;
+
+import java.lang.annotation.Documented;
+
+/**
+ * Annotation to inform users of a package, class or method's intended 
audience.
+ */
+public class InterfaceAudience {
+  /**
+   * Intended for use by any project or application.
+   */
+  @Documented public @interface Public {};
+  
+  /**
+   * Intended only for the project(s) specified in the annotation
+   */
+  @Documented public @interface LimitedPrivate {
+    String[] value();
+  };
+  
+  /**
+   * Intended for use only within Pig itself.
+   */
+  @Documented public @interface Private {};
+
+  private InterfaceAudience() {} // Audience can't exist on its own
+}
+

Added: 
hadoop/pig/trunk/src/org/apache/pig/classification/InterfaceStability.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/classification/InterfaceStability.java?rev=934242&view=auto
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/classification/InterfaceStability.java 
(added)
+++ hadoop/pig/trunk/src/org/apache/pig/classification/InterfaceStability.java 
Wed Apr 14 23:44:16 2010
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.classification;
+
+import java.lang.annotation.Documented;
+
+/**
+ * Annotation to inform users of how much to rely on a particular package,
+ * class or method not changing over time.
+ */
+public class InterfaceStability {
+  /**
+   * Can evolve while retaining compatibility for minor release boundaries.; 
+   * can break compatibility only at major release (ie. at m.0).
+   */
+  @Documented
+  public @interface Stable {};
+  
+  /**
+   * Evolving, but can break compatibility at minor release (i.e. m.x)
+   */
+  @Documented
+  public @interface Evolving {};
+  
+  /**
+   * No guarantee is provided as to reliability or stability across any
+   * level of release granularity.
+   */
+  @Documented
+  public @interface Unstable {};
+}

Modified: hadoop/pig/trunk/src/overview.html
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/overview.html?rev=934242&r1=934241&r2=934242&view=diff
==============================================================================
--- hadoop/pig/trunk/src/overview.html (original)
+++ hadoop/pig/trunk/src/overview.html Wed Apr 14 23:44:16 2010
@@ -5,9 +5,9 @@ environment.  It consists of a language 
 <a href="http://wiki.apache.org/pig/PigLatin";>Pig Latin</a>,
 a compiler for this language, and an execution engine to execute the programs.
 <p>
-Pig currently runs on the <a href="http://hadoop.apache.org/core/";>hadoop</a>
-platform, reading data from and writing data to hdfs, and doing processing via
-one or more map-reduce jobs.
+Pig runs on <a href="http://hadoop.apache.org/core/";>hadoop</a>
+MapReduce, reading data from and writing data to HDFS, and doing processing via
+one or more MapReduce jobs.
 
 <h2> Design </h2>
 This section gives a very high overview of the design of the Pig system.  
@@ -16,9 +16,8 @@ looking for the Design heading in the do
 
 <h3> Overview </h3>
 <p>
-Pig's design is guided by our <a 
href="http://incubator.apache.org/pig/philosophy.html";>
-pig philosophy</a> and by our experience with similar data processing 
-systems.
+Pig's design is guided by our <a 
href="http://hadoop.apache.org/pig/philosophy.html";>
+pig philosophy</a>.
 <p>
 Pig shares many similarities with a traditional RDBMS design.  It has a parser,
 type checker, optimizer, and operators that perform the data processing.  
However,
@@ -28,18 +27,41 @@ transactions, pig does not directly mana
 execution framework.
 <p>
 <h3> High Level Architecture </h3>
-Pig is split between the front and back ends of the engine.  The front end 
handles
-parsing, checking, and doing initial optimization on a Pig Latin script.  The
-result is a {...@link org.apache.pig.impl.logicalLayer.LogicalPlan} that 
defines how
-the script will be executed.
-<p>
-Once a LogicalPlan has been generated, the backend of Pig handles executing the
-script.  Pig supports multiple different
-backend implementations, in order to allow Pig to run on different systems.  
-Currently pig comes with two backends, Map-Reduce and local.  For a given run,
-pig selects the backend to use via configuration.
-
+Pig is split between the front and back ends of the engine.  In the front end,
+the parser transforms a Pig Latin script into a
+...@link org.apache.pig.impl.logicalLayer.LogicalPlan}.  Semantic checks (such
+as type checking) and some optimizations (such as determining which fields in 
the data need
+to be read to satisfy the script) are done on this Logical Plan.  The Logical
+Plan is than transformed into a
+...@link 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan}.
+This Physical Plan contains the operators that will be applied to the data.  
This is then
+divided into a set of MapReduce jobs by the
+...@link 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MRCompiler} into an
+...@link 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan}. 
 This
+MROperPlan (aka the map reduce plan) is then optimized (for example, the 
combiner is used where
+possible, jobs that scan the same input data are combined where possible, 
etc.).  Finally a set of
+MapReduce jobs are generated by the
+...@link 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.JobControlCompiler}.
  These are
+submitted to Hadoop and monitored by the
+...@link 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher}.
+<p>
+On the backend, each 
+...@link 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce.Map},
+...@link 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigCombiner.Combine},
 and 
+...@link 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigMapReduce.Reduce}
 
+use the pipeline of physical operators constructed in the front end to load, 
process, and store
+data.
 
+<h3> Programmatic Interface </h3>
+<p>
+In addition to the command line and grunt interfaces, users can connect to 
+...@link org.apache.pig.PigServer} from a Java program.
+<p>
+Pig makes it easy for users to extend its functionality by implementing User 
Defined Functions
+(UDFs).  There are interfaces for defining functions to load data
+...@link org.apache.pig.LoadFunc}, storing data {...@link 
org.apache.pig.StoreFunc}, doing evaluations
+on fields (including collections of data, so user defined aggregates are 
possible) 
+...@link org.apache.pig.EvalFunc} and filtering data {...@link 
org.apache.pig.FilterFunc}.
 </BODY>
 </HTML>
 


Reply via email to