svn commit: r941005 - in /hadoop/pig/trunk: ./ src/org/apache/pig/data/

gates Tue, 04 May 2010 12:27:56 -0700

Author: gates
Date: Tue May  4 19:26:56 2010
New Revision: 941005

URL: http://svn.apache.org/viewvc?rev=941005&view=rev
Log:
PIG-1398: Marking Pig interfaces for org.apache.pig.data package.


Modified:
    hadoop/pig/trunk/CHANGES.txt
    hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java
    hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java
    hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java
    hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java
    hadoop/pig/trunk/src/org/apache/pig/data/DataType.java
    hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java
    hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java
    hadoop/pig/trunk/src/org/apache/pig/data/package.html

Modified: hadoop/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Tue May  4 19:26:56 2010
@@ -23,6 +23,7 @@ Trunk (unreleased changes)
 INCOMPATIBLE CHANGES
 
 IMPROVEMENTS
+PIG-1398: Marking Pig interfaces for org.apache.pig.data package (gates)
 
 PIG-1396: eclipse-files target in build.xml fails to generate necessary 
classes in src-gen
 

Modified: hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java Tue May  4 
19:26:56 2010
@@ -24,6 +24,8 @@ import java.net.URLClassLoader;
 import java.util.Comparator;
 import java.util.List;
 
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
 import org.apache.pig.impl.util.SpillableMemoryManager;
 
 /**
@@ -33,16 +35,19 @@ import org.apache.pig.impl.util.Spillabl
  * returns their implementation of a bag.  If the property
  * pig.data.bag.factory.name is set to a class name and
  * pig.data.bag.factory.jar is set to a URL pointing to a jar that
- * contains the above named class, then getInstance() will create a
- * a instance of the named class using the indicatd jar.  Otherwise, it
+ * contains the above named class, then getInstance() will create 
+ * an instance of the named class using the indicated jar.  Otherwise, it
  * will create an instance of DefaultBagFactory.
  */
[email protected]
[email protected]
 public abstract class BagFactory {
     private static BagFactory gSelf = null;
     private static SpillableMemoryManager gMemMgr;
 
     /**
      * Get a reference to the singleton factory.
+     * @return BagFactory
      */
     public static BagFactory getInstance() {
         if (gSelf == null) {
@@ -81,31 +86,51 @@ public abstract class BagFactory {
     
     /**
      * Get a default (unordered, not distinct) data bag.
+     * @return default data bag.
      */
     public abstract DataBag newDefaultBag();
 
     /**
-     * Get a default (unordered, not distinct) data bag from
-     * an existing list of tuples.
+     * Get a default (unordered, not distinct) data bag with
+     * an existing list of tuples inserted into the bag.
+     * @param listOfTuples list of tuples to be placed in the bag.  This list 
may not be
+     * copied, it may be used directly by the created bag.
+     * @return default data bag.
      */
     public abstract DataBag newDefaultBag(List<Tuple> listOfTuples);
     
     /**
-     * Get a sorted data bag.
+     * Get a sorted data bag.  Sorted bags guarantee that when an iterator
+     * is opened on the bag the tuples will be returned in sorted order.
      * @param comp Comparator that controls how the data is sorted.
      * If null, default comparator will be used.
+     * @return a sorted data bag
      */
     public abstract DataBag newSortedBag(Comparator<Tuple> comp);
     
     /**
-     * Get a distinct data bag.
+     * Get a distinct data bag.  Distinct bags guarantee that when an
+     * iterator is opened on the bag, no two tuples returned from the
+     * iterator will be equal.
+     * @return distinct data bag
      */
     public abstract DataBag newDistinctBag();
 
+    /**
+     * Construct a new BagFactory
+     */
     protected BagFactory() {
         gMemMgr = new SpillableMemoryManager();
     }
 
+    /**
+     * Register a bag with the
+     * {...@link org.apache.pig.impl.util.SpillableMemoryManager}.
+     * If the bags created by an implementation of BagFactory are managed by
+     * the {...@link org.apache.pig.impl.util.SpillableMemoryManager} then this
+     * method should be called each time a new bag is created.
+     * @param b bag to be registered.
+     */
     protected void registerBag(DataBag b) {
         gMemMgr.registerSpillable(b);
     }

Modified: hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java Tue May  4 19:26:56 
2010
@@ -31,6 +31,8 @@ import java.util.ArrayList;
 
 import org.apache.hadoop.io.WritableComparable;
 
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
 import org.apache.pig.impl.util.Spillable;
 
 /**
@@ -41,18 +43,18 @@ import org.apache.pig.impl.util.Spillabl
  * spill()), it takes whatever it has in memory, opens a spill file, and
  * writes the contents out.  This may happen multiple times.  The bag
  * tracks all of the files it's spilled to.
- * 
+ * <p>
  * DataBag provides an Iterator interface, that allows callers to read
  * through the contents.  The iterators are aware of the data spilling.
  * They have to be able to handle reading from files, as well as the fact
  * that data they were reading from memory may have been spilled to disk
  * underneath them.
- *
+ * <p>
  * The DataBag interface assumes that all data is written before any is
  * read.  That is, a DataBag cannot be used as a queue.  If data is written
  * after data is read, the results are undefined.  This condition is not
  * checked on each add or read, for reasons of speed.  Caveat emptor.
- *
+ * <p>
  * Since spills are asynchronous (the memory manager requesting a spill
  * runs in a separate thread), all operations dealing with the mContents
  * Collection (which is the collection of tuples contained in the bag) have
@@ -63,23 +65,35 @@ import org.apache.pig.impl.util.Spillabl
  * If pig changes its execution model to be multithreaded, we may need to
  * return to this issue, as synchronizing reads will most likely defeat the
  * purpose of multi-threading execution.
- *
+ * <p>
  * DataBags come in several types, default, sorted, and distinct.  The type
  * must be chosen up front, there is no way to convert a bag on the fly.
+ * Default data bags do not guarantee any particular order of retrieval for 
+ * the tuples and may contain duplicate tuples.  Sorted data bags guarantee
+ * that tuples will be retrieved in order, where "in order" is defined either
+ * by the default comparator for Tuple or the comparator provided by the
+ * caller when the bag was created.  Sorted bags may contain duplicates.
+ * Distinct bags do not guarantee any particular order of retrieval, but do
+ * guarantee that they will not contain duplicate tuples.
  */
[email protected]
[email protected]
 public interface DataBag extends Spillable, WritableComparable, 
Iterable<Tuple>, Serializable {
     /**
      * Get the number of elements in the bag, both in memory and on disk.
+     * @return number of elements in the bag
      */
     long size();
 
     /**
      * Find out if the bag is sorted.
+     * @return true if this is a sorted data bag, false otherwise.
      */
     boolean isSorted();
     
     /**
      * Find out if the bag is distinct.
+     * @return true if the bag is a distinct bag, false otherwise.
      */
     boolean isDistinct();
     
@@ -88,6 +102,7 @@ public interface DataBag extends Spillab
      * no particular order is guaranteed. For sorted bags the order
      * is guaranteed to be sorted according
      * to the provided comparator.
+     * @return tuple iterator
      */
     Iterator<Tuple> iterator();
 
@@ -114,5 +129,6 @@ public interface DataBag extends Spillab
      * This is used by FuncEvalSpec.FakeDataBag.
      * @param stale Set stale state.
      */
+    @InterfaceAudience.Private
     void markStale(boolean stale);
 }

Modified: hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java Tue May  4 
19:26:56 2010
@@ -24,11 +24,16 @@ import java.lang.StringBuilder;
 import java.util.ArrayList;
 import java.util.Collection;
 
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
+
 /**
  * An implementation of byte array.  This is done as an object because we
  * need to be able to implement compareTo, toString, hashCode, and some
  * other methods.
  */
[email protected]
[email protected]
 public class DataByteArray implements Comparable, Serializable {
 
     private static final long serialVersionUID = 1L;
@@ -143,6 +148,9 @@ public class DataByteArray implements Co
         mData = s.getBytes();
     }
 
+    /**
+     * Convert the byte array to a string.  UTF8 encoding will be assumed.
+     */
     @Override
     public String toString() {
         String r="";
@@ -158,7 +166,7 @@ public class DataByteArray implements Co
      * Compare two byte arrays.  Comparison is done first using byte values
      * then length.  So "g" will be greater than "abcdefg", but "hello worlds"
      * is greater than "hello world".  If the other object is not a
-     * DataByteArray, DataType.compare will be called.
+     * DataByteArray, {...@link DataType#compare} will be called.
      * @param other Other object to compare to.
      * @return -1 if less than, 1 if greater than, 0 if equal.
      */

Modified: hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java Tue May  4 
19:26:56 2010
@@ -27,13 +27,19 @@ import java.util.Map;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+
 import org.apache.hadoop.io.Writable;
+
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
 import org.apache.pig.backend.executionengine.ExecException;
 
 /**
  * A class to handle reading and writing of intermediate results of data
  * types.  This class could also be used for storing permanent results.
  */
[email protected]
[email protected]
 public class DataReaderWriter {
     private static TupleFactory mTupleFactory = TupleFactory.getInstance();
     private static BagFactory mBagFactory = BagFactory.getInstance();

Modified: hadoop/pig/trunk/src/org/apache/pig/data/DataType.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DataType.java?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/DataType.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/DataType.java Tue May  4 19:26:56 
2010
@@ -27,6 +27,9 @@ import java.util.Map;
 import java.util.TreeMap;
 
 import org.apache.hadoop.io.WritableComparable;
+
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
 import org.apache.pig.PigException;
 import org.apache.pig.ResourceSchema;
 import org.apache.pig.backend.executionengine.ExecException;
@@ -43,6 +46,8 @@ import org.apache.pig.impl.logicalLayer.
  * done as an enumeration, but it is done as byte codes instead to save
  * creating objects.
  */
[email protected]
[email protected]
 public class DataType {
     // IMPORTANT! This list can be used to record values of data on disk,
     // so do not change the values.  You may strand user data.
@@ -60,16 +65,24 @@ public class DataType {
     public static final byte DOUBLE    =  25;
     public static final byte BYTEARRAY =  50;
     public static final byte CHARARRAY =  55;
+    /**
+     * Internal use only.
+     */
     public static final byte BIGCHARARRAY =  60; //internal use only; for 
storing/loading chararray bigger than 64K characters in BinStorage
     public static final byte MAP       = 100;
     public static final byte TUPLE     = 110;
     public static final byte BAG       = 120;
     
-    // internal use only; used to store WriteableComparable objects 
-    // for creating ordered index in MergeJoin. Expecting a object that
-    // implements Writable interface and has default constructor
+    /**
+     * Internal use only; used to store WriteableComparable objects 
+     * for creating ordered index in MergeJoin. Expecting a object that
+     * implements Writable interface and has default constructor
+     */
     public static final byte GENERIC_WRITABLECOMPARABLE = 123; 
     
+    /**
+     * Internal use only.
+     */
     public static final byte INTERNALMAP = 127; // internal use only; for maps 
that are object->object.  Used by FindQuantiles.
     public static final byte ERROR     =  -1;
 
@@ -163,10 +176,19 @@ public class DataType {
                return ERROR;
        }
     
+    /**
+     * Return the number of types Pig knows about.
+     * @return number of types
+     */
     public static int numTypes(){
         byte[] types = genAllTypes();
         return types.length;
     }
+
+    /**
+     * Get an array of all type values.
+     * @return byte array with an entry for each type.
+     */
     public static byte[] genAllTypes(){
         byte[] types = { DataType.BAG, DataType.BIGCHARARRAY, 
DataType.BOOLEAN, DataType.BYTE, DataType.BYTEARRAY, 
                 DataType.CHARARRAY, DataType.DOUBLE, DataType.FLOAT, 
@@ -185,6 +207,10 @@ public class DataType {
         return names;
     }
     
+    /**
+     * Get a map of type values to type names.
+     * @return map
+     */
     public static Map<Byte, String> genTypeToNameMap(){
         byte[] types = genAllTypes();
         String[] names = genAllTypeNames();
@@ -195,6 +221,10 @@ public class DataType {
         return ret;
     }
 
+    /**
+     * Get a map of type names to type values.
+     * @return map
+     */
     public static Map<String, Byte> genNameToTypeMap(){
         byte[] types = genAllTypes();
         String[] names = genAllTypeNames();
@@ -326,12 +356,17 @@ public class DataType {
         return compare(o1, o2, dt1, dt2);
     }
 
-    /*
-     * Same as compare(Object o1, Object o2), but does not use reflection to 
determine the type 
+    /**
+     * Same as {...@link #compare(Object, Object)}, but does not use 
reflection to determine the type 
      * of passed in objects, relying instead on the caller to provide the 
appropriate values, as
-     * determined by DataType.findType(Object o);
+     * determined by {...@link DataType#findType(Object)}.
      * 
      * Use this version in cases where multiple objects of the same type have 
to be repeatedly compared.
+     * @param o1 first object
+     * @param o2 second object
+     * @param dt1 type, as byte value, of o1
+     * @param dt2 type, as byte value, of o2
+     * @return -1 if o1 is &lt; o2, 0 if they are equal, 1 if o1 &gt; o2
      */
     @SuppressWarnings("unchecked")
     public static int compare(Object o1, Object o2, byte dt1, byte dt2) {
@@ -430,7 +465,9 @@ public class DataType {
      * forced to an Integer.  This isn't particularly efficient, so if you
      * already <b>know</b> that the object you have is an Integer you
      * should just cast it.
-     * @return The object as a Integer.
+     * @param o object to cast
+     * @param type of the object you are casting
+     * @return The object as an Integer.
      * @throws ExecException if the type can't be forced to an Integer.
      */
     public static Integer toInteger(Object o,byte type) throws ExecException {
@@ -489,17 +526,24 @@ public class DataType {
                        throw new ExecException(msg, errCode, PigException.BUG);
                }
     }
+
     /**
-     * If type of object is not known, use this method, which internally calls
-     * toInteger(object,type)
-     * 
-     * @param o
-     * @return Object as Integer.
-     * @throws ExecException
+     * Force a data object to an Integer, if possible.  Any numeric type
+     * can be forced to an Integer (though precision may be lost), as well
+     * as CharArray, ByteArray, or Boolean.  Complex types cannot be
+     * forced to an Integer.  This isn't particularly efficient, so if you
+     * already <b>know</b> that the object you have is an Integer you
+     * should just cast it.  Unlike {...@link #toInteger(Object, byte)} this
+     * method will first determine the type of o and then do the cast.  
+     * Use {...@link #toInteger(Object, byte)} if you already know the type.
+     * @param o object to cast
+     * @return The object as an Integer.
+     * @throws ExecException if the type can't be forced to an Integer.
      */
     public static Integer toInteger(Object o) throws ExecException {
         return toInteger(o, findType(o));
     }
+
     /**
      * Force a data object to a Long, if possible.  Any numeric type
      * can be forced to a Long (though precision may be lost), as well
@@ -507,6 +551,8 @@ public class DataType {
      * forced to a Long.  This isn't particularly efficient, so if you
      * already <b>know</b> that the object you have is a Long you
      * should just cast it.
+     * @param o object to cast
+     * @param type of the object you are casting
      * @return The object as a Long.
      * @throws ExecException if the type can't be forced to a Long.
      */
@@ -567,13 +613,19 @@ public class DataType {
                }
 
     }
+
     /**
-     * If type of object is not known, use this method which in turns call
-     * toLong(object,type) after finding type.
-     * 
-     * @param o
-     * @return Object as Long.
-     * @throws ExecException
+     * Force a data object to a Long, if possible.  Any numeric type
+     * can be forced to a Long (though precision may be lost), as well
+     * as CharArray, ByteArray, or Boolean.  Complex types cannot be
+     * forced to an Long.  This isn't particularly efficient, so if you
+     * already <b>know</b> that the object you have is a Long you
+     * should just cast it.  Unlike {...@link #toLong(Object, byte)} this
+     * method will first determine the type of o and then do the cast.  
+     * Use {...@link #toLong(Object, byte)} if you already know the type.
+     * @param o object to cast
+     * @return The object as a Long.
+     * @throws ExecException if the type can't be forced to an Long.
      */
     public static Long toLong(Object o) throws ExecException {
         return toLong(o, findType(o));
@@ -586,6 +638,8 @@ public class DataType {
      * forced to a Float.  This isn't particularly efficient, so if you
      * already <b>know</b> that the object you have is a Float you
      * should just cast it.
+     * @param o object to cast
+     * @param type of the object you are casting
      * @return The object as a Float.
      * @throws ExecException if the type can't be forced to a Float.
      */
@@ -640,13 +694,19 @@ public class DataType {
                        throw new ExecException(msg, errCode, PigException.BUG);
                }
     }
+
     /**
-     * If type of object is not known, use this method which in turns call
-     * toFloat(object,type) after finding type.
-     * 
-     * @param o
-     * @return Object as Float.
-     * @throws ExecException
+     * Force a data object to a Float, if possible.  Any numeric type
+     * can be forced to a Float (though precision may be lost), as well
+     * as CharArray, ByteArray, or Boolean.  Complex types cannot be
+     * forced to an Float.  This isn't particularly efficient, so if you
+     * already <b>know</b> that the object you have is a Float you
+     * should just cast it.  Unlike {...@link #toFloat(Object, byte)} this
+     * method will first determine the type of o and then do the cast.  
+     * Use {...@link #toFloat(Object, byte)} if you already know the type.
+     * @param o object to cast
+     * @return The object as a Float.
+     * @throws ExecException if the type can't be forced to an Float.
      */
     public static Float toFloat(Object o) throws ExecException {
         return toFloat(o, findType(o));
@@ -659,6 +719,8 @@ public class DataType {
      * forced to a Double.  This isn't particularly efficient, so if you
      * already <b>know</b> that the object you have is a Double you
      * should just cast it.
+     * @param o object to cast
+     * @param type of the object you are casting
      * @return The object as a Double.
      * @throws ExecException if the type can't be forced to a Double.
      */
@@ -713,13 +775,19 @@ public class DataType {
                        throw new ExecException(msg, errCode, PigException.BUG);
                }
     }
+
     /**
-     * If type of object is not known, use this method which in turns call
-     * toLong(object,type) after finding type.
-     * 
-     * @param o
-     * @return Object as Double.
-     * @throws ExecException
+     * Force a data object to a Double, if possible.  Any numeric type
+     * can be forced to a Double, as well
+     * as CharArray, ByteArray, or Boolean.  Complex types cannot be
+     * forced to an Double.  This isn't particularly efficient, so if you
+     * already <b>know</b> that the object you have is a Double you
+     * should just cast it.  Unlike {...@link #toDouble(Object, byte)} this
+     * method will first determine the type of o and then do the cast.  
+     * Use {...@link #toDouble(Object, byte)} if you already know the type.
+     * @param o object to cast
+     * @return The object as a Double.
+     * @throws ExecException if the type can't be forced to an Double.
      */
     public static Double toDouble(Object o) throws ExecException {
         return toDouble(o, findType(o));
@@ -731,6 +799,8 @@ public class DataType {
      * forced to a String.  This isn't particularly efficient, so if you
      * already <b>know</b> that the object you have is a String you
      * should just cast it.
+     * @param o object to cast
+     * @param type of the object you are casting
      * @return The object as a String.
      * @throws ExecException if the type can't be forced to a String.
      */
@@ -785,22 +855,29 @@ public class DataType {
                        throw new ExecException(msg, errCode, PigException.BUG);
                }
     }
+
     /**
-     * If type of object is not known, use this method which in turns call
-     * toString(object,type) after finding type.
-     * 
-     * @param o
-     * @return Object as String.
-     * @throws ExecException
+     * Force a data object to a String, if possible.  Any simple (atomic) type
+     * can be forced to a String including ByteArray.  Complex types cannot be
+     * forced to a String.  This isn't particularly efficient, so if you
+     * already <b>know</b> that the object you have is a String you
+     * should just cast it.  Unlike {...@link #toString(Object, byte)} this
+     * method will first determine the type of o and then do the cast.  
+     * Use {...@link #toString(Object, byte)} if you already know the type.
+     * @param o object to cast
+     * @return The object as a String.
+     * @throws ExecException if the type can't be forced to a String.
      */
     public static String toString(Object o) throws ExecException {
         return toString(o, findType(o));
     }
+
     /**
      * If this object is a map, return it as a map.
      * This isn't particularly efficient, so if you
      * already <b>know</b> that the object you have is a Map you
      * should just cast it.
+     * @param o object to cast
      * @return The object as a Map.
      * @throws ExecException if the type can't be forced to a Double.
      */
@@ -829,6 +906,7 @@ public class DataType {
      * This isn't particularly efficient, so if you
      * already <b>know</b> that the object you have is a Tuple you
      * should just cast it.
+     * @param o object to cast
      * @return The object as a Double.
      * @throws ExecException if the type can't be forced to a Double.
      */
@@ -856,6 +934,7 @@ public class DataType {
      * This isn't particularly efficient, so if you
      * already <b>know</b> that the object you have is a bag you
      * should just cast it.
+     * @param o object to cast
      * @return The object as a Double.
      * @throws ExecException if the type can't be forced to a Double.
      */
@@ -890,6 +969,11 @@ public class DataType {
         System.out.println(t.toString());
     }
     
+    /**
+     * Determine if this type is a numeric type.
+     * @param t type (as byte value) to test
+     * @return true if this is a numeric type, false otherwise
+     */
     public static boolean isNumberType(byte t) {
         switch (t) {
             case INTEGER:   return true ;
@@ -900,6 +984,11 @@ public class DataType {
         }        
     }
     
+    /**
+     * Determine if this is a type that can work can be done on.
+     * @param t type (as a byte value) to test
+     * @return false if the type is unknown, null, or error; true otherwise.
+     */
     public static boolean isUsableType(byte t) {
         switch (t) {
             case UNKNOWN:    return false ;
@@ -909,8 +998,9 @@ public class DataType {
         }
     }
 
-        /***
-     * Merge types if possible
+    /**
+     * Merge types if possible.  Merging types means finding a type that one 
+     * or both types can be upcast to.
      * @param type1
      * @param type2
      * @return the merged type, or DataType.ERROR if not successful
@@ -946,6 +1036,11 @@ public class DataType {
         return DataType.ERROR ;
     }
     
+    /**
+     * Given a map, turn it into a String.
+     * @param m map
+     * @return string representation of the map
+     */
     public static String mapToString(Map<String, Object> m) {
         boolean hasNext = false;
         StringBuilder sb = new StringBuilder();
@@ -967,6 +1062,14 @@ public class DataType {
         return sb.toString();
     }
 
+    /**
+     * Test whether two byte arrays (Java byte arrays not Pig byte arrays) are
+     * equal.  I have no idea why we have this function.
+     * @param lhs byte array 1
+     * @param rhs byte array 2
+     * @return true if both are null or the two are the same length and have
+     * the same bytes.
+     */
     public static boolean equalByteArrays(byte[] lhs, byte[] rhs) {
         if(lhs == null && rhs == null) return true;
         if(lhs == null || rhs == null) return false;

Modified: hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java Tue May  4 19:26:56 2010
@@ -22,24 +22,28 @@ import java.util.List;
 
 import org.apache.hadoop.io.WritableComparable;
 
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
 import org.apache.pig.backend.executionengine.ExecException;
 
 /**
  * An ordered list of Data.  A tuple has fields, numbered 0 through
  * (number of fields - 1).  The entry in the field can be any datatype,
  * or it can be null.
- *
- * Tuples are constructed only by a TupleFactory.  A DefaultTupleFactory
- * is provided by the system.  If a user wishes to use their own type of
- * Tuple, they should also provide an implementation of TupleFactory to
+ * <p>
+ * Tuples are constructed only by a {...@link TupleFactory}.  A
+ * {...@link DefaultTupleFactory}
+ * is provided by the system.  If users wish to use their own type of
+ * Tuple, they should also provide an implementation of {...@link 
TupleFactory} to
  * construct their types of Tuples.
  *
- * Fields are numbered from 0.
  */
 
 // Put in to make the compiler not complain about WritableComparable
 // being a generic type.
 @SuppressWarnings("unchecked")
[email protected]
[email protected]
 public interface Tuple extends WritableComparable, Serializable {
        
     /**
@@ -80,8 +84,8 @@ public interface Tuple extends WritableC
     /**
      * Find the type of a given field.
      * @param fieldNum Number of field to get the type for.
-     * @return type, encoded as a byte value.  The values are taken from
-     * the class DataType.  If the field is null, then DataType.UNKNOWN
+     * @return type, encoded as a byte value.  The values are defined in
+     * {...@link DataType}.  If the field is null, then DataType.UNKNOWN
      * will be returned.
      * @throws ExecException if the field number is greater than or equal to
      * the number of fields in the tuple.
@@ -99,13 +103,18 @@ public interface Tuple extends WritableC
 
     /**
      * Get all of the fields in the tuple as a list.
-     * @return List&lt;Object&gt; containing the fields of the tuple
+     * @return a list of objects containing the fields of the tuple
      * in order.
      */
     List<Object> getAll();
 
     /**
-     * Set the value in a given field.
+     * Set the value in a given field.  This should not be called unless
+     * the tuple was constructed by {...@link TupleFactory#newTuple(int)} with 
an
+     * argument greater than the fieldNum being passed here.  This call will
+     * not automatically expand the tuple size.  That is if you called 
+     * {...@link TupleFactory#newTuple(int)} with a 2, it is okay to call
+     * this function with a 1, but not with a 2 or greater.
      * @param fieldNum Number of the field to set the value for.
      * @param val Object to put in the indicated field.
      * @throws ExecException if the field number is greater than or equal to
@@ -116,9 +125,10 @@ public interface Tuple extends WritableC
     /**
      * Append a field to a tuple.  This method is not efficient as it may
      * force copying of existing data in order to grow the data structure.
-     * Whenever possible you should construct your Tuple with the
-     * newTuple(int) method and then fill in the values with set(), rather
-     * than construct it with newTuple() and append values.
+     * Whenever possible you should construct your Tuple with 
+     * {...@link TupleFactory#newTuple(int)} and then fill in the values with 
+     * {...@link #set(int, Object)}, rather
+     * than construct it with {...@link TupleFactory#newTuple()} and append 
values.
      * @param val Object to append to the tuple.
      */
     void append(Object val);
@@ -127,7 +137,7 @@ public interface Tuple extends WritableC
      * Determine the size of tuple in memory.  This is used by data bags
      * to determine their memory size.  This need not be exact, but it
      * should be a decent estimation.
-     * @return estimated memory size.
+     * @return estimated memory size, in bytes.
      */
     long getMemorySize();
 
@@ -141,11 +151,13 @@ public interface Tuple extends WritableC
     String toDelimitedString(String delim) throws ExecException;
     
     /**
+     * Determine if this entire tuple (not any particular field) is null.
      * @return true if this Tuple is null
      */
     public boolean isNull();
     
     /**
+     * Mark this entire tuple as null or not null.
      * @param isNull boolean indicating whether this tuple is null
      */
     public void setNull(boolean isNull);

Modified: hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java Tue May  4 
19:26:56 2010
@@ -23,21 +23,27 @@ import java.net.URL;
 import java.net.URLClassLoader;
 import java.util.List;
 
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
+
 /**
  * A factory to construct tuples.  This class is abstract so that users can
  * override the tuple factory if they desire to provide their own that
  * returns their implementation of a tuple.  If the property
  * pig.data.tuple.factory.name is set to a class name and
  * pig.data.tuple.factory.jar is set to a URL pointing to a jar that
- * contains the above named class, then getInstance() will create a
- * a instance of the named class using the indicatd jar.  Otherwise, it
- * will create and instance of DefaultTupleFactory.
+ * contains the above named class, then {...@link #getInstance()} will create a
+ * an instance of the named class using the indicated jar.  Otherwise, it
+ * will create an instance of {...@link DefaultTupleFactory}.
  */
[email protected]
[email protected]
 public abstract class TupleFactory {
     private static TupleFactory gSelf = null;
 
     /**
      * Get a reference to the singleton factory.
+     * @return The TupleFactory to use to construct tuples.
      */
     public static TupleFactory getInstance() {
         if (gSelf == null) {
@@ -77,15 +83,17 @@ public abstract class TupleFactory {
     /**
      * Create an empty tuple.  This should be used as infrequently as
      * possible, use newTuple(int) instead.
+     * @return Empty new tuple.
      */
     public abstract Tuple newTuple();
 
     /**
-     * Create a tuple with size fields.  Whenever possible this is prefered
-     * over the nullary constructor, as the constructor can preallocate the
+     * Create a tuple with size fields.  Whenever possible this is preferred
+     * over the null constructor, as the constructor can preallocate the
      * size of the container holding the fields.  Once this is called, it
      * is legal to call Tuple.set(x, object), where x &lt; size.
      * @param size Number of fields in the tuple.
+     * @return Tuple with size fields
      */
     public abstract Tuple newTuple(int size);
     
@@ -93,6 +101,7 @@ public abstract class TupleFactory {
      * Create a tuple from the provided list of objects.  The underlying list
      * will be copied.
      * @param c List of objects to use as the fields of the tuple.
+     * @return A tuple with the list objects as its fields
      */
     public abstract Tuple newTuple(List c);
 
@@ -100,6 +109,7 @@ public abstract class TupleFactory {
      * Create a tuple from a provided list of objects, keeping the provided
      * list.  The new tuple will take over ownership of the provided list.
      * @param list List of objects that will become the fields of the tuple.
+     * @return A tuple with the list objects as its fields
      */
     public abstract Tuple newTupleNoCopy(List list);
 
@@ -108,14 +118,14 @@ public abstract class TupleFactory {
      * the fact that bags (currently) only take tuples, we often end up
      * sticking a single element in a tuple in order to put it in a bag.
      * @param datum Datum to put in the tuple.
+     * @return A tuple with one field
      */
     public abstract Tuple newTuple(Object datum);
 
     /**
      * Return the actual class representing a tuple that the implementing
-     * factory will be returning.  This is needed because hadoop (and
-     * possibly other systems) we use need to know the exact class we will
-     * be using for input and output.
+     * factory will be returning.  This is needed because hadoop needs
+     * to know the exact class we will be using for input and output.
      * @return Class that implements tuple.
      */
     public abstract Class tupleClass();

Modified: hadoop/pig/trunk/src/org/apache/pig/data/package.html
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/package.html?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/package.html (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/package.html Tue May  4 19:26:56 
2010
@@ -2,8 +2,6 @@
 <body>
 
 <p>
-Data types for Pig.
-<p>
 This package contains implementations of Pig specific data types as well as
 support functions for reading, writing, and using all Pig data types.
 <p>

svn commit: r941005 - in /hadoop/pig/trunk: ./ src/org/apache/pig/data/

Reply via email to