Author: gates Date: Tue May 4 19:26:56 2010 New Revision: 941005 URL: http://svn.apache.org/viewvc?rev=941005&view=rev Log: PIG-1398: Marking Pig interfaces for org.apache.pig.data package.
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java hadoop/pig/trunk/src/org/apache/pig/data/DataType.java hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java hadoop/pig/trunk/src/org/apache/pig/data/package.html Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=941005&r1=941004&r2=941005&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Tue May 4 19:26:56 2010 @@ -23,6 +23,7 @@ Trunk (unreleased changes) INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-1398: Marking Pig interfaces for org.apache.pig.data package (gates) PIG-1396: eclipse-files target in build.xml fails to generate necessary classes in src-gen Modified: hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java?rev=941005&r1=941004&r2=941005&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java Tue May 4 19:26:56 2010 @@ -24,6 +24,8 @@ import java.net.URLClassLoader; import java.util.Comparator; import java.util.List; +import org.apache.pig.classification.InterfaceAudience; +import org.apache.pig.classification.InterfaceStability; import org.apache.pig.impl.util.SpillableMemoryManager; /** @@ -33,16 +35,19 @@ import org.apache.pig.impl.util.Spillabl * returns their implementation of a bag. If the property * pig.data.bag.factory.name is set to a class name and * pig.data.bag.factory.jar is set to a URL pointing to a jar that - * contains the above named class, then getInstance() will create a - * a instance of the named class using the indicatd jar. Otherwise, it + * contains the above named class, then getInstance() will create + * an instance of the named class using the indicated jar. Otherwise, it * will create an instance of DefaultBagFactory. */ +...@interfaceaudience.public +...@interfacestability.stable public abstract class BagFactory { private static BagFactory gSelf = null; private static SpillableMemoryManager gMemMgr; /** * Get a reference to the singleton factory. + * @return BagFactory */ public static BagFactory getInstance() { if (gSelf == null) { @@ -81,31 +86,51 @@ public abstract class BagFactory { /** * Get a default (unordered, not distinct) data bag. + * @return default data bag. */ public abstract DataBag newDefaultBag(); /** - * Get a default (unordered, not distinct) data bag from - * an existing list of tuples. + * Get a default (unordered, not distinct) data bag with + * an existing list of tuples inserted into the bag. + * @param listOfTuples list of tuples to be placed in the bag. This list may not be + * copied, it may be used directly by the created bag. + * @return default data bag. */ public abstract DataBag newDefaultBag(List<Tuple> listOfTuples); /** - * Get a sorted data bag. + * Get a sorted data bag. Sorted bags guarantee that when an iterator + * is opened on the bag the tuples will be returned in sorted order. * @param comp Comparator that controls how the data is sorted. * If null, default comparator will be used. + * @return a sorted data bag */ public abstract DataBag newSortedBag(Comparator<Tuple> comp); /** - * Get a distinct data bag. + * Get a distinct data bag. Distinct bags guarantee that when an + * iterator is opened on the bag, no two tuples returned from the + * iterator will be equal. + * @return distinct data bag */ public abstract DataBag newDistinctBag(); + /** + * Construct a new BagFactory + */ protected BagFactory() { gMemMgr = new SpillableMemoryManager(); } + /** + * Register a bag with the + * {...@link org.apache.pig.impl.util.SpillableMemoryManager}. + * If the bags created by an implementation of BagFactory are managed by + * the {...@link org.apache.pig.impl.util.SpillableMemoryManager} then this + * method should be called each time a new bag is created. + * @param b bag to be registered. + */ protected void registerBag(DataBag b) { gMemMgr.registerSpillable(b); } Modified: hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java?rev=941005&r1=941004&r2=941005&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java Tue May 4 19:26:56 2010 @@ -31,6 +31,8 @@ import java.util.ArrayList; import org.apache.hadoop.io.WritableComparable; +import org.apache.pig.classification.InterfaceAudience; +import org.apache.pig.classification.InterfaceStability; import org.apache.pig.impl.util.Spillable; /** @@ -41,18 +43,18 @@ import org.apache.pig.impl.util.Spillabl * spill()), it takes whatever it has in memory, opens a spill file, and * writes the contents out. This may happen multiple times. The bag * tracks all of the files it's spilled to. - * + * <p> * DataBag provides an Iterator interface, that allows callers to read * through the contents. The iterators are aware of the data spilling. * They have to be able to handle reading from files, as well as the fact * that data they were reading from memory may have been spilled to disk * underneath them. - * + * <p> * The DataBag interface assumes that all data is written before any is * read. That is, a DataBag cannot be used as a queue. If data is written * after data is read, the results are undefined. This condition is not * checked on each add or read, for reasons of speed. Caveat emptor. - * + * <p> * Since spills are asynchronous (the memory manager requesting a spill * runs in a separate thread), all operations dealing with the mContents * Collection (which is the collection of tuples contained in the bag) have @@ -63,23 +65,35 @@ import org.apache.pig.impl.util.Spillabl * If pig changes its execution model to be multithreaded, we may need to * return to this issue, as synchronizing reads will most likely defeat the * purpose of multi-threading execution. - * + * <p> * DataBags come in several types, default, sorted, and distinct. The type * must be chosen up front, there is no way to convert a bag on the fly. + * Default data bags do not guarantee any particular order of retrieval for + * the tuples and may contain duplicate tuples. Sorted data bags guarantee + * that tuples will be retrieved in order, where "in order" is defined either + * by the default comparator for Tuple or the comparator provided by the + * caller when the bag was created. Sorted bags may contain duplicates. + * Distinct bags do not guarantee any particular order of retrieval, but do + * guarantee that they will not contain duplicate tuples. */ +...@interfaceaudience.public +...@interfacestability.stable public interface DataBag extends Spillable, WritableComparable, Iterable<Tuple>, Serializable { /** * Get the number of elements in the bag, both in memory and on disk. + * @return number of elements in the bag */ long size(); /** * Find out if the bag is sorted. + * @return true if this is a sorted data bag, false otherwise. */ boolean isSorted(); /** * Find out if the bag is distinct. + * @return true if the bag is a distinct bag, false otherwise. */ boolean isDistinct(); @@ -88,6 +102,7 @@ public interface DataBag extends Spillab * no particular order is guaranteed. For sorted bags the order * is guaranteed to be sorted according * to the provided comparator. + * @return tuple iterator */ Iterator<Tuple> iterator(); @@ -114,5 +129,6 @@ public interface DataBag extends Spillab * This is used by FuncEvalSpec.FakeDataBag. * @param stale Set stale state. */ + @InterfaceAudience.Private void markStale(boolean stale); } Modified: hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java?rev=941005&r1=941004&r2=941005&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java Tue May 4 19:26:56 2010 @@ -24,11 +24,16 @@ import java.lang.StringBuilder; import java.util.ArrayList; import java.util.Collection; +import org.apache.pig.classification.InterfaceAudience; +import org.apache.pig.classification.InterfaceStability; + /** * An implementation of byte array. This is done as an object because we * need to be able to implement compareTo, toString, hashCode, and some * other methods. */ +...@interfaceaudience.public +...@interfacestability.stable public class DataByteArray implements Comparable, Serializable { private static final long serialVersionUID = 1L; @@ -143,6 +148,9 @@ public class DataByteArray implements Co mData = s.getBytes(); } + /** + * Convert the byte array to a string. UTF8 encoding will be assumed. + */ @Override public String toString() { String r=""; @@ -158,7 +166,7 @@ public class DataByteArray implements Co * Compare two byte arrays. Comparison is done first using byte values * then length. So "g" will be greater than "abcdefg", but "hello worlds" * is greater than "hello world". If the other object is not a - * DataByteArray, DataType.compare will be called. + * DataByteArray, {...@link DataType#compare} will be called. * @param other Other object to compare to. * @return -1 if less than, 1 if greater than, 0 if equal. */ Modified: hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java?rev=941005&r1=941004&r2=941005&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java Tue May 4 19:26:56 2010 @@ -27,13 +27,19 @@ import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; + import org.apache.hadoop.io.Writable; + +import org.apache.pig.classification.InterfaceAudience; +import org.apache.pig.classification.InterfaceStability; import org.apache.pig.backend.executionengine.ExecException; /** * A class to handle reading and writing of intermediate results of data * types. This class could also be used for storing permanent results. */ +...@interfaceaudience.private +...@interfacestability.stable public class DataReaderWriter { private static TupleFactory mTupleFactory = TupleFactory.getInstance(); private static BagFactory mBagFactory = BagFactory.getInstance(); Modified: hadoop/pig/trunk/src/org/apache/pig/data/DataType.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DataType.java?rev=941005&r1=941004&r2=941005&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/DataType.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/DataType.java Tue May 4 19:26:56 2010 @@ -27,6 +27,9 @@ import java.util.Map; import java.util.TreeMap; import org.apache.hadoop.io.WritableComparable; + +import org.apache.pig.classification.InterfaceAudience; +import org.apache.pig.classification.InterfaceStability; import org.apache.pig.PigException; import org.apache.pig.ResourceSchema; import org.apache.pig.backend.executionengine.ExecException; @@ -43,6 +46,8 @@ import org.apache.pig.impl.logicalLayer. * done as an enumeration, but it is done as byte codes instead to save * creating objects. */ +...@interfaceaudience.public +...@interfacestability.stable public class DataType { // IMPORTANT! This list can be used to record values of data on disk, // so do not change the values. You may strand user data. @@ -60,16 +65,24 @@ public class DataType { public static final byte DOUBLE = 25; public static final byte BYTEARRAY = 50; public static final byte CHARARRAY = 55; + /** + * Internal use only. + */ public static final byte BIGCHARARRAY = 60; //internal use only; for storing/loading chararray bigger than 64K characters in BinStorage public static final byte MAP = 100; public static final byte TUPLE = 110; public static final byte BAG = 120; - // internal use only; used to store WriteableComparable objects - // for creating ordered index in MergeJoin. Expecting a object that - // implements Writable interface and has default constructor + /** + * Internal use only; used to store WriteableComparable objects + * for creating ordered index in MergeJoin. Expecting a object that + * implements Writable interface and has default constructor + */ public static final byte GENERIC_WRITABLECOMPARABLE = 123; + /** + * Internal use only. + */ public static final byte INTERNALMAP = 127; // internal use only; for maps that are object->object. Used by FindQuantiles. public static final byte ERROR = -1; @@ -163,10 +176,19 @@ public class DataType { return ERROR; } + /** + * Return the number of types Pig knows about. + * @return number of types + */ public static int numTypes(){ byte[] types = genAllTypes(); return types.length; } + + /** + * Get an array of all type values. + * @return byte array with an entry for each type. + */ public static byte[] genAllTypes(){ byte[] types = { DataType.BAG, DataType.BIGCHARARRAY, DataType.BOOLEAN, DataType.BYTE, DataType.BYTEARRAY, DataType.CHARARRAY, DataType.DOUBLE, DataType.FLOAT, @@ -185,6 +207,10 @@ public class DataType { return names; } + /** + * Get a map of type values to type names. + * @return map + */ public static Map<Byte, String> genTypeToNameMap(){ byte[] types = genAllTypes(); String[] names = genAllTypeNames(); @@ -195,6 +221,10 @@ public class DataType { return ret; } + /** + * Get a map of type names to type values. + * @return map + */ public static Map<String, Byte> genNameToTypeMap(){ byte[] types = genAllTypes(); String[] names = genAllTypeNames(); @@ -326,12 +356,17 @@ public class DataType { return compare(o1, o2, dt1, dt2); } - /* - * Same as compare(Object o1, Object o2), but does not use reflection to determine the type + /** + * Same as {...@link #compare(Object, Object)}, but does not use reflection to determine the type * of passed in objects, relying instead on the caller to provide the appropriate values, as - * determined by DataType.findType(Object o); + * determined by {...@link DataType#findType(Object)}. * * Use this version in cases where multiple objects of the same type have to be repeatedly compared. + * @param o1 first object + * @param o2 second object + * @param dt1 type, as byte value, of o1 + * @param dt2 type, as byte value, of o2 + * @return -1 if o1 is < o2, 0 if they are equal, 1 if o1 > o2 */ @SuppressWarnings("unchecked") public static int compare(Object o1, Object o2, byte dt1, byte dt2) { @@ -430,7 +465,9 @@ public class DataType { * forced to an Integer. This isn't particularly efficient, so if you * already <b>know</b> that the object you have is an Integer you * should just cast it. - * @return The object as a Integer. + * @param o object to cast + * @param type of the object you are casting + * @return The object as an Integer. * @throws ExecException if the type can't be forced to an Integer. */ public static Integer toInteger(Object o,byte type) throws ExecException { @@ -489,17 +526,24 @@ public class DataType { throw new ExecException(msg, errCode, PigException.BUG); } } + /** - * If type of object is not known, use this method, which internally calls - * toInteger(object,type) - * - * @param o - * @return Object as Integer. - * @throws ExecException + * Force a data object to an Integer, if possible. Any numeric type + * can be forced to an Integer (though precision may be lost), as well + * as CharArray, ByteArray, or Boolean. Complex types cannot be + * forced to an Integer. This isn't particularly efficient, so if you + * already <b>know</b> that the object you have is an Integer you + * should just cast it. Unlike {...@link #toInteger(Object, byte)} this + * method will first determine the type of o and then do the cast. + * Use {...@link #toInteger(Object, byte)} if you already know the type. + * @param o object to cast + * @return The object as an Integer. + * @throws ExecException if the type can't be forced to an Integer. */ public static Integer toInteger(Object o) throws ExecException { return toInteger(o, findType(o)); } + /** * Force a data object to a Long, if possible. Any numeric type * can be forced to a Long (though precision may be lost), as well @@ -507,6 +551,8 @@ public class DataType { * forced to a Long. This isn't particularly efficient, so if you * already <b>know</b> that the object you have is a Long you * should just cast it. + * @param o object to cast + * @param type of the object you are casting * @return The object as a Long. * @throws ExecException if the type can't be forced to a Long. */ @@ -567,13 +613,19 @@ public class DataType { } } + /** - * If type of object is not known, use this method which in turns call - * toLong(object,type) after finding type. - * - * @param o - * @return Object as Long. - * @throws ExecException + * Force a data object to a Long, if possible. Any numeric type + * can be forced to a Long (though precision may be lost), as well + * as CharArray, ByteArray, or Boolean. Complex types cannot be + * forced to an Long. This isn't particularly efficient, so if you + * already <b>know</b> that the object you have is a Long you + * should just cast it. Unlike {...@link #toLong(Object, byte)} this + * method will first determine the type of o and then do the cast. + * Use {...@link #toLong(Object, byte)} if you already know the type. + * @param o object to cast + * @return The object as a Long. + * @throws ExecException if the type can't be forced to an Long. */ public static Long toLong(Object o) throws ExecException { return toLong(o, findType(o)); @@ -586,6 +638,8 @@ public class DataType { * forced to a Float. This isn't particularly efficient, so if you * already <b>know</b> that the object you have is a Float you * should just cast it. + * @param o object to cast + * @param type of the object you are casting * @return The object as a Float. * @throws ExecException if the type can't be forced to a Float. */ @@ -640,13 +694,19 @@ public class DataType { throw new ExecException(msg, errCode, PigException.BUG); } } + /** - * If type of object is not known, use this method which in turns call - * toFloat(object,type) after finding type. - * - * @param o - * @return Object as Float. - * @throws ExecException + * Force a data object to a Float, if possible. Any numeric type + * can be forced to a Float (though precision may be lost), as well + * as CharArray, ByteArray, or Boolean. Complex types cannot be + * forced to an Float. This isn't particularly efficient, so if you + * already <b>know</b> that the object you have is a Float you + * should just cast it. Unlike {...@link #toFloat(Object, byte)} this + * method will first determine the type of o and then do the cast. + * Use {...@link #toFloat(Object, byte)} if you already know the type. + * @param o object to cast + * @return The object as a Float. + * @throws ExecException if the type can't be forced to an Float. */ public static Float toFloat(Object o) throws ExecException { return toFloat(o, findType(o)); @@ -659,6 +719,8 @@ public class DataType { * forced to a Double. This isn't particularly efficient, so if you * already <b>know</b> that the object you have is a Double you * should just cast it. + * @param o object to cast + * @param type of the object you are casting * @return The object as a Double. * @throws ExecException if the type can't be forced to a Double. */ @@ -713,13 +775,19 @@ public class DataType { throw new ExecException(msg, errCode, PigException.BUG); } } + /** - * If type of object is not known, use this method which in turns call - * toLong(object,type) after finding type. - * - * @param o - * @return Object as Double. - * @throws ExecException + * Force a data object to a Double, if possible. Any numeric type + * can be forced to a Double, as well + * as CharArray, ByteArray, or Boolean. Complex types cannot be + * forced to an Double. This isn't particularly efficient, so if you + * already <b>know</b> that the object you have is a Double you + * should just cast it. Unlike {...@link #toDouble(Object, byte)} this + * method will first determine the type of o and then do the cast. + * Use {...@link #toDouble(Object, byte)} if you already know the type. + * @param o object to cast + * @return The object as a Double. + * @throws ExecException if the type can't be forced to an Double. */ public static Double toDouble(Object o) throws ExecException { return toDouble(o, findType(o)); @@ -731,6 +799,8 @@ public class DataType { * forced to a String. This isn't particularly efficient, so if you * already <b>know</b> that the object you have is a String you * should just cast it. + * @param o object to cast + * @param type of the object you are casting * @return The object as a String. * @throws ExecException if the type can't be forced to a String. */ @@ -785,22 +855,29 @@ public class DataType { throw new ExecException(msg, errCode, PigException.BUG); } } + /** - * If type of object is not known, use this method which in turns call - * toString(object,type) after finding type. - * - * @param o - * @return Object as String. - * @throws ExecException + * Force a data object to a String, if possible. Any simple (atomic) type + * can be forced to a String including ByteArray. Complex types cannot be + * forced to a String. This isn't particularly efficient, so if you + * already <b>know</b> that the object you have is a String you + * should just cast it. Unlike {...@link #toString(Object, byte)} this + * method will first determine the type of o and then do the cast. + * Use {...@link #toString(Object, byte)} if you already know the type. + * @param o object to cast + * @return The object as a String. + * @throws ExecException if the type can't be forced to a String. */ public static String toString(Object o) throws ExecException { return toString(o, findType(o)); } + /** * If this object is a map, return it as a map. * This isn't particularly efficient, so if you * already <b>know</b> that the object you have is a Map you * should just cast it. + * @param o object to cast * @return The object as a Map. * @throws ExecException if the type can't be forced to a Double. */ @@ -829,6 +906,7 @@ public class DataType { * This isn't particularly efficient, so if you * already <b>know</b> that the object you have is a Tuple you * should just cast it. + * @param o object to cast * @return The object as a Double. * @throws ExecException if the type can't be forced to a Double. */ @@ -856,6 +934,7 @@ public class DataType { * This isn't particularly efficient, so if you * already <b>know</b> that the object you have is a bag you * should just cast it. + * @param o object to cast * @return The object as a Double. * @throws ExecException if the type can't be forced to a Double. */ @@ -890,6 +969,11 @@ public class DataType { System.out.println(t.toString()); } + /** + * Determine if this type is a numeric type. + * @param t type (as byte value) to test + * @return true if this is a numeric type, false otherwise + */ public static boolean isNumberType(byte t) { switch (t) { case INTEGER: return true ; @@ -900,6 +984,11 @@ public class DataType { } } + /** + * Determine if this is a type that can work can be done on. + * @param t type (as a byte value) to test + * @return false if the type is unknown, null, or error; true otherwise. + */ public static boolean isUsableType(byte t) { switch (t) { case UNKNOWN: return false ; @@ -909,8 +998,9 @@ public class DataType { } } - /*** - * Merge types if possible + /** + * Merge types if possible. Merging types means finding a type that one + * or both types can be upcast to. * @param type1 * @param type2 * @return the merged type, or DataType.ERROR if not successful @@ -946,6 +1036,11 @@ public class DataType { return DataType.ERROR ; } + /** + * Given a map, turn it into a String. + * @param m map + * @return string representation of the map + */ public static String mapToString(Map<String, Object> m) { boolean hasNext = false; StringBuilder sb = new StringBuilder(); @@ -967,6 +1062,14 @@ public class DataType { return sb.toString(); } + /** + * Test whether two byte arrays (Java byte arrays not Pig byte arrays) are + * equal. I have no idea why we have this function. + * @param lhs byte array 1 + * @param rhs byte array 2 + * @return true if both are null or the two are the same length and have + * the same bytes. + */ public static boolean equalByteArrays(byte[] lhs, byte[] rhs) { if(lhs == null && rhs == null) return true; if(lhs == null || rhs == null) return false; Modified: hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java?rev=941005&r1=941004&r2=941005&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java Tue May 4 19:26:56 2010 @@ -22,24 +22,28 @@ import java.util.List; import org.apache.hadoop.io.WritableComparable; +import org.apache.pig.classification.InterfaceAudience; +import org.apache.pig.classification.InterfaceStability; import org.apache.pig.backend.executionengine.ExecException; /** * An ordered list of Data. A tuple has fields, numbered 0 through * (number of fields - 1). The entry in the field can be any datatype, * or it can be null. - * - * Tuples are constructed only by a TupleFactory. A DefaultTupleFactory - * is provided by the system. If a user wishes to use their own type of - * Tuple, they should also provide an implementation of TupleFactory to + * <p> + * Tuples are constructed only by a {...@link TupleFactory}. A + * {...@link DefaultTupleFactory} + * is provided by the system. If users wish to use their own type of + * Tuple, they should also provide an implementation of {...@link TupleFactory} to * construct their types of Tuples. * - * Fields are numbered from 0. */ // Put in to make the compiler not complain about WritableComparable // being a generic type. @SuppressWarnings("unchecked") +...@interfaceaudience.public +...@interfacestability.stable public interface Tuple extends WritableComparable, Serializable { /** @@ -80,8 +84,8 @@ public interface Tuple extends WritableC /** * Find the type of a given field. * @param fieldNum Number of field to get the type for. - * @return type, encoded as a byte value. The values are taken from - * the class DataType. If the field is null, then DataType.UNKNOWN + * @return type, encoded as a byte value. The values are defined in + * {...@link DataType}. If the field is null, then DataType.UNKNOWN * will be returned. * @throws ExecException if the field number is greater than or equal to * the number of fields in the tuple. @@ -99,13 +103,18 @@ public interface Tuple extends WritableC /** * Get all of the fields in the tuple as a list. - * @return List<Object> containing the fields of the tuple + * @return a list of objects containing the fields of the tuple * in order. */ List<Object> getAll(); /** - * Set the value in a given field. + * Set the value in a given field. This should not be called unless + * the tuple was constructed by {...@link TupleFactory#newTuple(int)} with an + * argument greater than the fieldNum being passed here. This call will + * not automatically expand the tuple size. That is if you called + * {...@link TupleFactory#newTuple(int)} with a 2, it is okay to call + * this function with a 1, but not with a 2 or greater. * @param fieldNum Number of the field to set the value for. * @param val Object to put in the indicated field. * @throws ExecException if the field number is greater than or equal to @@ -116,9 +125,10 @@ public interface Tuple extends WritableC /** * Append a field to a tuple. This method is not efficient as it may * force copying of existing data in order to grow the data structure. - * Whenever possible you should construct your Tuple with the - * newTuple(int) method and then fill in the values with set(), rather - * than construct it with newTuple() and append values. + * Whenever possible you should construct your Tuple with + * {...@link TupleFactory#newTuple(int)} and then fill in the values with + * {...@link #set(int, Object)}, rather + * than construct it with {...@link TupleFactory#newTuple()} and append values. * @param val Object to append to the tuple. */ void append(Object val); @@ -127,7 +137,7 @@ public interface Tuple extends WritableC * Determine the size of tuple in memory. This is used by data bags * to determine their memory size. This need not be exact, but it * should be a decent estimation. - * @return estimated memory size. + * @return estimated memory size, in bytes. */ long getMemorySize(); @@ -141,11 +151,13 @@ public interface Tuple extends WritableC String toDelimitedString(String delim) throws ExecException; /** + * Determine if this entire tuple (not any particular field) is null. * @return true if this Tuple is null */ public boolean isNull(); /** + * Mark this entire tuple as null or not null. * @param isNull boolean indicating whether this tuple is null */ public void setNull(boolean isNull); Modified: hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java?rev=941005&r1=941004&r2=941005&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java Tue May 4 19:26:56 2010 @@ -23,21 +23,27 @@ import java.net.URL; import java.net.URLClassLoader; import java.util.List; +import org.apache.pig.classification.InterfaceAudience; +import org.apache.pig.classification.InterfaceStability; + /** * A factory to construct tuples. This class is abstract so that users can * override the tuple factory if they desire to provide their own that * returns their implementation of a tuple. If the property * pig.data.tuple.factory.name is set to a class name and * pig.data.tuple.factory.jar is set to a URL pointing to a jar that - * contains the above named class, then getInstance() will create a - * a instance of the named class using the indicatd jar. Otherwise, it - * will create and instance of DefaultTupleFactory. + * contains the above named class, then {...@link #getInstance()} will create a + * an instance of the named class using the indicated jar. Otherwise, it + * will create an instance of {...@link DefaultTupleFactory}. */ +...@interfaceaudience.public +...@interfacestability.stable public abstract class TupleFactory { private static TupleFactory gSelf = null; /** * Get a reference to the singleton factory. + * @return The TupleFactory to use to construct tuples. */ public static TupleFactory getInstance() { if (gSelf == null) { @@ -77,15 +83,17 @@ public abstract class TupleFactory { /** * Create an empty tuple. This should be used as infrequently as * possible, use newTuple(int) instead. + * @return Empty new tuple. */ public abstract Tuple newTuple(); /** - * Create a tuple with size fields. Whenever possible this is prefered - * over the nullary constructor, as the constructor can preallocate the + * Create a tuple with size fields. Whenever possible this is preferred + * over the null constructor, as the constructor can preallocate the * size of the container holding the fields. Once this is called, it * is legal to call Tuple.set(x, object), where x < size. * @param size Number of fields in the tuple. + * @return Tuple with size fields */ public abstract Tuple newTuple(int size); @@ -93,6 +101,7 @@ public abstract class TupleFactory { * Create a tuple from the provided list of objects. The underlying list * will be copied. * @param c List of objects to use as the fields of the tuple. + * @return A tuple with the list objects as its fields */ public abstract Tuple newTuple(List c); @@ -100,6 +109,7 @@ public abstract class TupleFactory { * Create a tuple from a provided list of objects, keeping the provided * list. The new tuple will take over ownership of the provided list. * @param list List of objects that will become the fields of the tuple. + * @return A tuple with the list objects as its fields */ public abstract Tuple newTupleNoCopy(List list); @@ -108,14 +118,14 @@ public abstract class TupleFactory { * the fact that bags (currently) only take tuples, we often end up * sticking a single element in a tuple in order to put it in a bag. * @param datum Datum to put in the tuple. + * @return A tuple with one field */ public abstract Tuple newTuple(Object datum); /** * Return the actual class representing a tuple that the implementing - * factory will be returning. This is needed because hadoop (and - * possibly other systems) we use need to know the exact class we will - * be using for input and output. + * factory will be returning. This is needed because hadoop needs + * to know the exact class we will be using for input and output. * @return Class that implements tuple. */ public abstract Class tupleClass(); Modified: hadoop/pig/trunk/src/org/apache/pig/data/package.html URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/package.html?rev=941005&r1=941004&r2=941005&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/data/package.html (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/package.html Tue May 4 19:26:56 2010 @@ -2,8 +2,6 @@ <body> <p> -Data types for Pig. -<p> This package contains implementations of Pig specific data types as well as support functions for reading, writing, and using all Pig data types. <p>