Author: gates
Date: Tue May 4 19:26:56 2010
New Revision: 941005
URL: http://svn.apache.org/viewvc?rev=941005&view=rev
Log:
PIG-1398: Marking Pig interfaces for org.apache.pig.data package.
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java
hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java
hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java
hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java
hadoop/pig/trunk/src/org/apache/pig/data/DataType.java
hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java
hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java
hadoop/pig/trunk/src/org/apache/pig/data/package.html
Modified: hadoop/pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Tue May 4 19:26:56 2010
@@ -23,6 +23,7 @@ Trunk (unreleased changes)
INCOMPATIBLE CHANGES
IMPROVEMENTS
+PIG-1398: Marking Pig interfaces for org.apache.pig.data package (gates)
PIG-1396: eclipse-files target in build.xml fails to generate necessary
classes in src-gen
Modified: hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/BagFactory.java Tue May 4
19:26:56 2010
@@ -24,6 +24,8 @@ import java.net.URLClassLoader;
import java.util.Comparator;
import java.util.List;
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
import org.apache.pig.impl.util.SpillableMemoryManager;
/**
@@ -33,16 +35,19 @@ import org.apache.pig.impl.util.Spillabl
* returns their implementation of a bag. If the property
* pig.data.bag.factory.name is set to a class name and
* pig.data.bag.factory.jar is set to a URL pointing to a jar that
- * contains the above named class, then getInstance() will create a
- * a instance of the named class using the indicatd jar. Otherwise, it
+ * contains the above named class, then getInstance() will create
+ * an instance of the named class using the indicated jar. Otherwise, it
* will create an instance of DefaultBagFactory.
*/
[email protected]
[email protected]
public abstract class BagFactory {
private static BagFactory gSelf = null;
private static SpillableMemoryManager gMemMgr;
/**
* Get a reference to the singleton factory.
+ * @return BagFactory
*/
public static BagFactory getInstance() {
if (gSelf == null) {
@@ -81,31 +86,51 @@ public abstract class BagFactory {
/**
* Get a default (unordered, not distinct) data bag.
+ * @return default data bag.
*/
public abstract DataBag newDefaultBag();
/**
- * Get a default (unordered, not distinct) data bag from
- * an existing list of tuples.
+ * Get a default (unordered, not distinct) data bag with
+ * an existing list of tuples inserted into the bag.
+ * @param listOfTuples list of tuples to be placed in the bag. This list
may not be
+ * copied, it may be used directly by the created bag.
+ * @return default data bag.
*/
public abstract DataBag newDefaultBag(List<Tuple> listOfTuples);
/**
- * Get a sorted data bag.
+ * Get a sorted data bag. Sorted bags guarantee that when an iterator
+ * is opened on the bag the tuples will be returned in sorted order.
* @param comp Comparator that controls how the data is sorted.
* If null, default comparator will be used.
+ * @return a sorted data bag
*/
public abstract DataBag newSortedBag(Comparator<Tuple> comp);
/**
- * Get a distinct data bag.
+ * Get a distinct data bag. Distinct bags guarantee that when an
+ * iterator is opened on the bag, no two tuples returned from the
+ * iterator will be equal.
+ * @return distinct data bag
*/
public abstract DataBag newDistinctBag();
+ /**
+ * Construct a new BagFactory
+ */
protected BagFactory() {
gMemMgr = new SpillableMemoryManager();
}
+ /**
+ * Register a bag with the
+ * {...@link org.apache.pig.impl.util.SpillableMemoryManager}.
+ * If the bags created by an implementation of BagFactory are managed by
+ * the {...@link org.apache.pig.impl.util.SpillableMemoryManager} then this
+ * method should be called each time a new bag is created.
+ * @param b bag to be registered.
+ */
protected void registerBag(DataBag b) {
gMemMgr.registerSpillable(b);
}
Modified: hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java Tue May 4 19:26:56
2010
@@ -31,6 +31,8 @@ import java.util.ArrayList;
import org.apache.hadoop.io.WritableComparable;
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
import org.apache.pig.impl.util.Spillable;
/**
@@ -41,18 +43,18 @@ import org.apache.pig.impl.util.Spillabl
* spill()), it takes whatever it has in memory, opens a spill file, and
* writes the contents out. This may happen multiple times. The bag
* tracks all of the files it's spilled to.
- *
+ * <p>
* DataBag provides an Iterator interface, that allows callers to read
* through the contents. The iterators are aware of the data spilling.
* They have to be able to handle reading from files, as well as the fact
* that data they were reading from memory may have been spilled to disk
* underneath them.
- *
+ * <p>
* The DataBag interface assumes that all data is written before any is
* read. That is, a DataBag cannot be used as a queue. If data is written
* after data is read, the results are undefined. This condition is not
* checked on each add or read, for reasons of speed. Caveat emptor.
- *
+ * <p>
* Since spills are asynchronous (the memory manager requesting a spill
* runs in a separate thread), all operations dealing with the mContents
* Collection (which is the collection of tuples contained in the bag) have
@@ -63,23 +65,35 @@ import org.apache.pig.impl.util.Spillabl
* If pig changes its execution model to be multithreaded, we may need to
* return to this issue, as synchronizing reads will most likely defeat the
* purpose of multi-threading execution.
- *
+ * <p>
* DataBags come in several types, default, sorted, and distinct. The type
* must be chosen up front, there is no way to convert a bag on the fly.
+ * Default data bags do not guarantee any particular order of retrieval for
+ * the tuples and may contain duplicate tuples. Sorted data bags guarantee
+ * that tuples will be retrieved in order, where "in order" is defined either
+ * by the default comparator for Tuple or the comparator provided by the
+ * caller when the bag was created. Sorted bags may contain duplicates.
+ * Distinct bags do not guarantee any particular order of retrieval, but do
+ * guarantee that they will not contain duplicate tuples.
*/
[email protected]
[email protected]
public interface DataBag extends Spillable, WritableComparable,
Iterable<Tuple>, Serializable {
/**
* Get the number of elements in the bag, both in memory and on disk.
+ * @return number of elements in the bag
*/
long size();
/**
* Find out if the bag is sorted.
+ * @return true if this is a sorted data bag, false otherwise.
*/
boolean isSorted();
/**
* Find out if the bag is distinct.
+ * @return true if the bag is a distinct bag, false otherwise.
*/
boolean isDistinct();
@@ -88,6 +102,7 @@ public interface DataBag extends Spillab
* no particular order is guaranteed. For sorted bags the order
* is guaranteed to be sorted according
* to the provided comparator.
+ * @return tuple iterator
*/
Iterator<Tuple> iterator();
@@ -114,5 +129,6 @@ public interface DataBag extends Spillab
* This is used by FuncEvalSpec.FakeDataBag.
* @param stale Set stale state.
*/
+ @InterfaceAudience.Private
void markStale(boolean stale);
}
Modified: hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/DataByteArray.java Tue May 4
19:26:56 2010
@@ -24,11 +24,16 @@ import java.lang.StringBuilder;
import java.util.ArrayList;
import java.util.Collection;
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
+
/**
* An implementation of byte array. This is done as an object because we
* need to be able to implement compareTo, toString, hashCode, and some
* other methods.
*/
[email protected]
[email protected]
public class DataByteArray implements Comparable, Serializable {
private static final long serialVersionUID = 1L;
@@ -143,6 +148,9 @@ public class DataByteArray implements Co
mData = s.getBytes();
}
+ /**
+ * Convert the byte array to a string. UTF8 encoding will be assumed.
+ */
@Override
public String toString() {
String r="";
@@ -158,7 +166,7 @@ public class DataByteArray implements Co
* Compare two byte arrays. Comparison is done first using byte values
* then length. So "g" will be greater than "abcdefg", but "hello worlds"
* is greater than "hello world". If the other object is not a
- * DataByteArray, DataType.compare will be called.
+ * DataByteArray, {...@link DataType#compare} will be called.
* @param other Other object to compare to.
* @return -1 if less than, 1 if greater than, 0 if equal.
*/
Modified: hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/DataReaderWriter.java Tue May 4
19:26:56 2010
@@ -27,13 +27,19 @@ import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+
import org.apache.hadoop.io.Writable;
+
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
import org.apache.pig.backend.executionengine.ExecException;
/**
* A class to handle reading and writing of intermediate results of data
* types. This class could also be used for storing permanent results.
*/
[email protected]
[email protected]
public class DataReaderWriter {
private static TupleFactory mTupleFactory = TupleFactory.getInstance();
private static BagFactory mBagFactory = BagFactory.getInstance();
Modified: hadoop/pig/trunk/src/org/apache/pig/data/DataType.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DataType.java?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/DataType.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/DataType.java Tue May 4 19:26:56
2010
@@ -27,6 +27,9 @@ import java.util.Map;
import java.util.TreeMap;
import org.apache.hadoop.io.WritableComparable;
+
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
import org.apache.pig.PigException;
import org.apache.pig.ResourceSchema;
import org.apache.pig.backend.executionengine.ExecException;
@@ -43,6 +46,8 @@ import org.apache.pig.impl.logicalLayer.
* done as an enumeration, but it is done as byte codes instead to save
* creating objects.
*/
[email protected]
[email protected]
public class DataType {
// IMPORTANT! This list can be used to record values of data on disk,
// so do not change the values. You may strand user data.
@@ -60,16 +65,24 @@ public class DataType {
public static final byte DOUBLE = 25;
public static final byte BYTEARRAY = 50;
public static final byte CHARARRAY = 55;
+ /**
+ * Internal use only.
+ */
public static final byte BIGCHARARRAY = 60; //internal use only; for
storing/loading chararray bigger than 64K characters in BinStorage
public static final byte MAP = 100;
public static final byte TUPLE = 110;
public static final byte BAG = 120;
- // internal use only; used to store WriteableComparable objects
- // for creating ordered index in MergeJoin. Expecting a object that
- // implements Writable interface and has default constructor
+ /**
+ * Internal use only; used to store WriteableComparable objects
+ * for creating ordered index in MergeJoin. Expecting a object that
+ * implements Writable interface and has default constructor
+ */
public static final byte GENERIC_WRITABLECOMPARABLE = 123;
+ /**
+ * Internal use only.
+ */
public static final byte INTERNALMAP = 127; // internal use only; for maps
that are object->object. Used by FindQuantiles.
public static final byte ERROR = -1;
@@ -163,10 +176,19 @@ public class DataType {
return ERROR;
}
+ /**
+ * Return the number of types Pig knows about.
+ * @return number of types
+ */
public static int numTypes(){
byte[] types = genAllTypes();
return types.length;
}
+
+ /**
+ * Get an array of all type values.
+ * @return byte array with an entry for each type.
+ */
public static byte[] genAllTypes(){
byte[] types = { DataType.BAG, DataType.BIGCHARARRAY,
DataType.BOOLEAN, DataType.BYTE, DataType.BYTEARRAY,
DataType.CHARARRAY, DataType.DOUBLE, DataType.FLOAT,
@@ -185,6 +207,10 @@ public class DataType {
return names;
}
+ /**
+ * Get a map of type values to type names.
+ * @return map
+ */
public static Map<Byte, String> genTypeToNameMap(){
byte[] types = genAllTypes();
String[] names = genAllTypeNames();
@@ -195,6 +221,10 @@ public class DataType {
return ret;
}
+ /**
+ * Get a map of type names to type values.
+ * @return map
+ */
public static Map<String, Byte> genNameToTypeMap(){
byte[] types = genAllTypes();
String[] names = genAllTypeNames();
@@ -326,12 +356,17 @@ public class DataType {
return compare(o1, o2, dt1, dt2);
}
- /*
- * Same as compare(Object o1, Object o2), but does not use reflection to
determine the type
+ /**
+ * Same as {...@link #compare(Object, Object)}, but does not use
reflection to determine the type
* of passed in objects, relying instead on the caller to provide the
appropriate values, as
- * determined by DataType.findType(Object o);
+ * determined by {...@link DataType#findType(Object)}.
*
* Use this version in cases where multiple objects of the same type have
to be repeatedly compared.
+ * @param o1 first object
+ * @param o2 second object
+ * @param dt1 type, as byte value, of o1
+ * @param dt2 type, as byte value, of o2
+ * @return -1 if o1 is < o2, 0 if they are equal, 1 if o1 > o2
*/
@SuppressWarnings("unchecked")
public static int compare(Object o1, Object o2, byte dt1, byte dt2) {
@@ -430,7 +465,9 @@ public class DataType {
* forced to an Integer. This isn't particularly efficient, so if you
* already <b>know</b> that the object you have is an Integer you
* should just cast it.
- * @return The object as a Integer.
+ * @param o object to cast
+ * @param type of the object you are casting
+ * @return The object as an Integer.
* @throws ExecException if the type can't be forced to an Integer.
*/
public static Integer toInteger(Object o,byte type) throws ExecException {
@@ -489,17 +526,24 @@ public class DataType {
throw new ExecException(msg, errCode, PigException.BUG);
}
}
+
/**
- * If type of object is not known, use this method, which internally calls
- * toInteger(object,type)
- *
- * @param o
- * @return Object as Integer.
- * @throws ExecException
+ * Force a data object to an Integer, if possible. Any numeric type
+ * can be forced to an Integer (though precision may be lost), as well
+ * as CharArray, ByteArray, or Boolean. Complex types cannot be
+ * forced to an Integer. This isn't particularly efficient, so if you
+ * already <b>know</b> that the object you have is an Integer you
+ * should just cast it. Unlike {...@link #toInteger(Object, byte)} this
+ * method will first determine the type of o and then do the cast.
+ * Use {...@link #toInteger(Object, byte)} if you already know the type.
+ * @param o object to cast
+ * @return The object as an Integer.
+ * @throws ExecException if the type can't be forced to an Integer.
*/
public static Integer toInteger(Object o) throws ExecException {
return toInteger(o, findType(o));
}
+
/**
* Force a data object to a Long, if possible. Any numeric type
* can be forced to a Long (though precision may be lost), as well
@@ -507,6 +551,8 @@ public class DataType {
* forced to a Long. This isn't particularly efficient, so if you
* already <b>know</b> that the object you have is a Long you
* should just cast it.
+ * @param o object to cast
+ * @param type of the object you are casting
* @return The object as a Long.
* @throws ExecException if the type can't be forced to a Long.
*/
@@ -567,13 +613,19 @@ public class DataType {
}
}
+
/**
- * If type of object is not known, use this method which in turns call
- * toLong(object,type) after finding type.
- *
- * @param o
- * @return Object as Long.
- * @throws ExecException
+ * Force a data object to a Long, if possible. Any numeric type
+ * can be forced to a Long (though precision may be lost), as well
+ * as CharArray, ByteArray, or Boolean. Complex types cannot be
+ * forced to an Long. This isn't particularly efficient, so if you
+ * already <b>know</b> that the object you have is a Long you
+ * should just cast it. Unlike {...@link #toLong(Object, byte)} this
+ * method will first determine the type of o and then do the cast.
+ * Use {...@link #toLong(Object, byte)} if you already know the type.
+ * @param o object to cast
+ * @return The object as a Long.
+ * @throws ExecException if the type can't be forced to an Long.
*/
public static Long toLong(Object o) throws ExecException {
return toLong(o, findType(o));
@@ -586,6 +638,8 @@ public class DataType {
* forced to a Float. This isn't particularly efficient, so if you
* already <b>know</b> that the object you have is a Float you
* should just cast it.
+ * @param o object to cast
+ * @param type of the object you are casting
* @return The object as a Float.
* @throws ExecException if the type can't be forced to a Float.
*/
@@ -640,13 +694,19 @@ public class DataType {
throw new ExecException(msg, errCode, PigException.BUG);
}
}
+
/**
- * If type of object is not known, use this method which in turns call
- * toFloat(object,type) after finding type.
- *
- * @param o
- * @return Object as Float.
- * @throws ExecException
+ * Force a data object to a Float, if possible. Any numeric type
+ * can be forced to a Float (though precision may be lost), as well
+ * as CharArray, ByteArray, or Boolean. Complex types cannot be
+ * forced to an Float. This isn't particularly efficient, so if you
+ * already <b>know</b> that the object you have is a Float you
+ * should just cast it. Unlike {...@link #toFloat(Object, byte)} this
+ * method will first determine the type of o and then do the cast.
+ * Use {...@link #toFloat(Object, byte)} if you already know the type.
+ * @param o object to cast
+ * @return The object as a Float.
+ * @throws ExecException if the type can't be forced to an Float.
*/
public static Float toFloat(Object o) throws ExecException {
return toFloat(o, findType(o));
@@ -659,6 +719,8 @@ public class DataType {
* forced to a Double. This isn't particularly efficient, so if you
* already <b>know</b> that the object you have is a Double you
* should just cast it.
+ * @param o object to cast
+ * @param type of the object you are casting
* @return The object as a Double.
* @throws ExecException if the type can't be forced to a Double.
*/
@@ -713,13 +775,19 @@ public class DataType {
throw new ExecException(msg, errCode, PigException.BUG);
}
}
+
/**
- * If type of object is not known, use this method which in turns call
- * toLong(object,type) after finding type.
- *
- * @param o
- * @return Object as Double.
- * @throws ExecException
+ * Force a data object to a Double, if possible. Any numeric type
+ * can be forced to a Double, as well
+ * as CharArray, ByteArray, or Boolean. Complex types cannot be
+ * forced to an Double. This isn't particularly efficient, so if you
+ * already <b>know</b> that the object you have is a Double you
+ * should just cast it. Unlike {...@link #toDouble(Object, byte)} this
+ * method will first determine the type of o and then do the cast.
+ * Use {...@link #toDouble(Object, byte)} if you already know the type.
+ * @param o object to cast
+ * @return The object as a Double.
+ * @throws ExecException if the type can't be forced to an Double.
*/
public static Double toDouble(Object o) throws ExecException {
return toDouble(o, findType(o));
@@ -731,6 +799,8 @@ public class DataType {
* forced to a String. This isn't particularly efficient, so if you
* already <b>know</b> that the object you have is a String you
* should just cast it.
+ * @param o object to cast
+ * @param type of the object you are casting
* @return The object as a String.
* @throws ExecException if the type can't be forced to a String.
*/
@@ -785,22 +855,29 @@ public class DataType {
throw new ExecException(msg, errCode, PigException.BUG);
}
}
+
/**
- * If type of object is not known, use this method which in turns call
- * toString(object,type) after finding type.
- *
- * @param o
- * @return Object as String.
- * @throws ExecException
+ * Force a data object to a String, if possible. Any simple (atomic) type
+ * can be forced to a String including ByteArray. Complex types cannot be
+ * forced to a String. This isn't particularly efficient, so if you
+ * already <b>know</b> that the object you have is a String you
+ * should just cast it. Unlike {...@link #toString(Object, byte)} this
+ * method will first determine the type of o and then do the cast.
+ * Use {...@link #toString(Object, byte)} if you already know the type.
+ * @param o object to cast
+ * @return The object as a String.
+ * @throws ExecException if the type can't be forced to a String.
*/
public static String toString(Object o) throws ExecException {
return toString(o, findType(o));
}
+
/**
* If this object is a map, return it as a map.
* This isn't particularly efficient, so if you
* already <b>know</b> that the object you have is a Map you
* should just cast it.
+ * @param o object to cast
* @return The object as a Map.
* @throws ExecException if the type can't be forced to a Double.
*/
@@ -829,6 +906,7 @@ public class DataType {
* This isn't particularly efficient, so if you
* already <b>know</b> that the object you have is a Tuple you
* should just cast it.
+ * @param o object to cast
* @return The object as a Double.
* @throws ExecException if the type can't be forced to a Double.
*/
@@ -856,6 +934,7 @@ public class DataType {
* This isn't particularly efficient, so if you
* already <b>know</b> that the object you have is a bag you
* should just cast it.
+ * @param o object to cast
* @return The object as a Double.
* @throws ExecException if the type can't be forced to a Double.
*/
@@ -890,6 +969,11 @@ public class DataType {
System.out.println(t.toString());
}
+ /**
+ * Determine if this type is a numeric type.
+ * @param t type (as byte value) to test
+ * @return true if this is a numeric type, false otherwise
+ */
public static boolean isNumberType(byte t) {
switch (t) {
case INTEGER: return true ;
@@ -900,6 +984,11 @@ public class DataType {
}
}
+ /**
+ * Determine if this is a type that can work can be done on.
+ * @param t type (as a byte value) to test
+ * @return false if the type is unknown, null, or error; true otherwise.
+ */
public static boolean isUsableType(byte t) {
switch (t) {
case UNKNOWN: return false ;
@@ -909,8 +998,9 @@ public class DataType {
}
}
- /***
- * Merge types if possible
+ /**
+ * Merge types if possible. Merging types means finding a type that one
+ * or both types can be upcast to.
* @param type1
* @param type2
* @return the merged type, or DataType.ERROR if not successful
@@ -946,6 +1036,11 @@ public class DataType {
return DataType.ERROR ;
}
+ /**
+ * Given a map, turn it into a String.
+ * @param m map
+ * @return string representation of the map
+ */
public static String mapToString(Map<String, Object> m) {
boolean hasNext = false;
StringBuilder sb = new StringBuilder();
@@ -967,6 +1062,14 @@ public class DataType {
return sb.toString();
}
+ /**
+ * Test whether two byte arrays (Java byte arrays not Pig byte arrays) are
+ * equal. I have no idea why we have this function.
+ * @param lhs byte array 1
+ * @param rhs byte array 2
+ * @return true if both are null or the two are the same length and have
+ * the same bytes.
+ */
public static boolean equalByteArrays(byte[] lhs, byte[] rhs) {
if(lhs == null && rhs == null) return true;
if(lhs == null || rhs == null) return false;
Modified: hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java Tue May 4 19:26:56 2010
@@ -22,24 +22,28 @@ import java.util.List;
import org.apache.hadoop.io.WritableComparable;
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
import org.apache.pig.backend.executionengine.ExecException;
/**
* An ordered list of Data. A tuple has fields, numbered 0 through
* (number of fields - 1). The entry in the field can be any datatype,
* or it can be null.
- *
- * Tuples are constructed only by a TupleFactory. A DefaultTupleFactory
- * is provided by the system. If a user wishes to use their own type of
- * Tuple, they should also provide an implementation of TupleFactory to
+ * <p>
+ * Tuples are constructed only by a {...@link TupleFactory}. A
+ * {...@link DefaultTupleFactory}
+ * is provided by the system. If users wish to use their own type of
+ * Tuple, they should also provide an implementation of {...@link
TupleFactory} to
* construct their types of Tuples.
*
- * Fields are numbered from 0.
*/
// Put in to make the compiler not complain about WritableComparable
// being a generic type.
@SuppressWarnings("unchecked")
[email protected]
[email protected]
public interface Tuple extends WritableComparable, Serializable {
/**
@@ -80,8 +84,8 @@ public interface Tuple extends WritableC
/**
* Find the type of a given field.
* @param fieldNum Number of field to get the type for.
- * @return type, encoded as a byte value. The values are taken from
- * the class DataType. If the field is null, then DataType.UNKNOWN
+ * @return type, encoded as a byte value. The values are defined in
+ * {...@link DataType}. If the field is null, then DataType.UNKNOWN
* will be returned.
* @throws ExecException if the field number is greater than or equal to
* the number of fields in the tuple.
@@ -99,13 +103,18 @@ public interface Tuple extends WritableC
/**
* Get all of the fields in the tuple as a list.
- * @return List<Object> containing the fields of the tuple
+ * @return a list of objects containing the fields of the tuple
* in order.
*/
List<Object> getAll();
/**
- * Set the value in a given field.
+ * Set the value in a given field. This should not be called unless
+ * the tuple was constructed by {...@link TupleFactory#newTuple(int)} with
an
+ * argument greater than the fieldNum being passed here. This call will
+ * not automatically expand the tuple size. That is if you called
+ * {...@link TupleFactory#newTuple(int)} with a 2, it is okay to call
+ * this function with a 1, but not with a 2 or greater.
* @param fieldNum Number of the field to set the value for.
* @param val Object to put in the indicated field.
* @throws ExecException if the field number is greater than or equal to
@@ -116,9 +125,10 @@ public interface Tuple extends WritableC
/**
* Append a field to a tuple. This method is not efficient as it may
* force copying of existing data in order to grow the data structure.
- * Whenever possible you should construct your Tuple with the
- * newTuple(int) method and then fill in the values with set(), rather
- * than construct it with newTuple() and append values.
+ * Whenever possible you should construct your Tuple with
+ * {...@link TupleFactory#newTuple(int)} and then fill in the values with
+ * {...@link #set(int, Object)}, rather
+ * than construct it with {...@link TupleFactory#newTuple()} and append
values.
* @param val Object to append to the tuple.
*/
void append(Object val);
@@ -127,7 +137,7 @@ public interface Tuple extends WritableC
* Determine the size of tuple in memory. This is used by data bags
* to determine their memory size. This need not be exact, but it
* should be a decent estimation.
- * @return estimated memory size.
+ * @return estimated memory size, in bytes.
*/
long getMemorySize();
@@ -141,11 +151,13 @@ public interface Tuple extends WritableC
String toDelimitedString(String delim) throws ExecException;
/**
+ * Determine if this entire tuple (not any particular field) is null.
* @return true if this Tuple is null
*/
public boolean isNull();
/**
+ * Mark this entire tuple as null or not null.
* @param isNull boolean indicating whether this tuple is null
*/
public void setNull(boolean isNull);
Modified: hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/TupleFactory.java Tue May 4
19:26:56 2010
@@ -23,21 +23,27 @@ import java.net.URL;
import java.net.URLClassLoader;
import java.util.List;
+import org.apache.pig.classification.InterfaceAudience;
+import org.apache.pig.classification.InterfaceStability;
+
/**
* A factory to construct tuples. This class is abstract so that users can
* override the tuple factory if they desire to provide their own that
* returns their implementation of a tuple. If the property
* pig.data.tuple.factory.name is set to a class name and
* pig.data.tuple.factory.jar is set to a URL pointing to a jar that
- * contains the above named class, then getInstance() will create a
- * a instance of the named class using the indicatd jar. Otherwise, it
- * will create and instance of DefaultTupleFactory.
+ * contains the above named class, then {...@link #getInstance()} will create a
+ * an instance of the named class using the indicated jar. Otherwise, it
+ * will create an instance of {...@link DefaultTupleFactory}.
*/
[email protected]
[email protected]
public abstract class TupleFactory {
private static TupleFactory gSelf = null;
/**
* Get a reference to the singleton factory.
+ * @return The TupleFactory to use to construct tuples.
*/
public static TupleFactory getInstance() {
if (gSelf == null) {
@@ -77,15 +83,17 @@ public abstract class TupleFactory {
/**
* Create an empty tuple. This should be used as infrequently as
* possible, use newTuple(int) instead.
+ * @return Empty new tuple.
*/
public abstract Tuple newTuple();
/**
- * Create a tuple with size fields. Whenever possible this is prefered
- * over the nullary constructor, as the constructor can preallocate the
+ * Create a tuple with size fields. Whenever possible this is preferred
+ * over the null constructor, as the constructor can preallocate the
* size of the container holding the fields. Once this is called, it
* is legal to call Tuple.set(x, object), where x < size.
* @param size Number of fields in the tuple.
+ * @return Tuple with size fields
*/
public abstract Tuple newTuple(int size);
@@ -93,6 +101,7 @@ public abstract class TupleFactory {
* Create a tuple from the provided list of objects. The underlying list
* will be copied.
* @param c List of objects to use as the fields of the tuple.
+ * @return A tuple with the list objects as its fields
*/
public abstract Tuple newTuple(List c);
@@ -100,6 +109,7 @@ public abstract class TupleFactory {
* Create a tuple from a provided list of objects, keeping the provided
* list. The new tuple will take over ownership of the provided list.
* @param list List of objects that will become the fields of the tuple.
+ * @return A tuple with the list objects as its fields
*/
public abstract Tuple newTupleNoCopy(List list);
@@ -108,14 +118,14 @@ public abstract class TupleFactory {
* the fact that bags (currently) only take tuples, we often end up
* sticking a single element in a tuple in order to put it in a bag.
* @param datum Datum to put in the tuple.
+ * @return A tuple with one field
*/
public abstract Tuple newTuple(Object datum);
/**
* Return the actual class representing a tuple that the implementing
- * factory will be returning. This is needed because hadoop (and
- * possibly other systems) we use need to know the exact class we will
- * be using for input and output.
+ * factory will be returning. This is needed because hadoop needs
+ * to know the exact class we will be using for input and output.
* @return Class that implements tuple.
*/
public abstract Class tupleClass();
Modified: hadoop/pig/trunk/src/org/apache/pig/data/package.html
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/package.html?rev=941005&r1=941004&r2=941005&view=diff
==============================================================================
--- hadoop/pig/trunk/src/org/apache/pig/data/package.html (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/package.html Tue May 4 19:26:56
2010
@@ -2,8 +2,6 @@
<body>
<p>
-Data types for Pig.
-<p>
This package contains implementations of Pig specific data types as well as
support functions for reading, writing, and using all Pig data types.
<p>