svn commit: r988611 - in /hadoop/pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/ src/org/apache/pig/data/ src/org/apache/pig/impl/ src/org/apache/pig/newplan/ src/org/ap
Author: gates Date: Tue Aug 24 16:26:05 2010 New Revision: 988611 URL: http://svn.apache.org/viewvc?rev=988611view=rev Log: PIG-1311 Document audience and stability for remaining interfaces. Removed: hadoop/pig/trunk/src/org/apache/pig/impl/FunctionInstantiator.java Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PigProgressable.java hadoop/pig/trunk/src/org/apache/pig/data/TupleRawComparator.java hadoop/pig/trunk/src/org/apache/pig/impl/PigContext.java hadoop/pig/trunk/src/org/apache/pig/newplan/OperatorPlan.java hadoop/pig/trunk/src/org/apache/pig/newplan/optimizer/PlanTransformListener.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=988611r1=988610r2=988611view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Tue Aug 24 16:26:05 2010 @@ -26,6 +26,8 @@ PIG-1249: Safe-guards against misconfigu IMPROVEMENTS +PIG-1311: Document audience and stability for remaining interfaces (gates) + PIG-506: Does pig need a NATIVE keyword? (aniket486 via thejas) PIG-1510: Add `deepCopy` for LogicalExpressions (swati.j via daijy) Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PigProgressable.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PigProgressable.java?rev=988611r1=988610r2=988611view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PigProgressable.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PigProgressable.java Tue Aug 24 16:26:05 2010 @@ -17,11 +17,28 @@ */ package org.apache.pig.backend.hadoop.executionengine.physicalLayer; +import org.apache.pig.classification.InterfaceAudience; +import org.apache.pig.classification.InterfaceStability; + + +/** + * Pig's progress indicator. An implemenation of this interface is passed to + * UDFs to allow them to send heartbeats. By default Hadoop will kill a task + * if it does not receive a heartbeat every 600 seconds. Any operation that + * may take more than this should call progress on a regular basis. + */ +...@interfaceaudience.public +...@interfacestability.stable public interface PigProgressable { -//Use to just inform that you are -//alive + +/** + * Report progress. + */ public void progress(); -//If you have a status to report +/** + * Report progress with a message. + * @param msg message to send with progress report. + */ public void progress(String msg); } Modified: hadoop/pig/trunk/src/org/apache/pig/data/TupleRawComparator.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/TupleRawComparator.java?rev=988611r1=988610r2=988611view=diff == --- hadoop/pig/trunk/src/org/apache/pig/data/TupleRawComparator.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/TupleRawComparator.java Tue Aug 24 16:26:05 2010 @@ -20,6 +20,9 @@ package org.apache.pig.data; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.io.RawComparator; +import org.apache.pig.classification.InterfaceAudience; +import org.apache.pig.classification.InterfaceStability; + /** * This interface is intended to compare Tuples. The semantics of Tuple comparison must take into account null values in * different ways. According to SQL semantics nulls are not equal. But for other Pig/Latin statements nulls must be @@ -28,6 +31,8 @@ import org.apache.hadoop.io.RawComparato * {...@link #compare(byte[],int,int,byte[],int,int)} method. * */ +...@interfaceaudience.public +...@interfacestability.evolving @SuppressWarnings(rawtypes) public interface TupleRawComparator extends RawComparator, Configurable { /** Modified: hadoop/pig/trunk/src/org/apache/pig/impl/PigContext.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/PigContext.java?rev=988611r1=988610r2=988611view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/PigContext.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/PigContext.java Tue Aug 24 16:26:05 2010 @@ -55,7 +55,7 @@ import org.apache.pig.impl.streaming.Exe import org.apache.pig.impl.streaming.StreamingCommand; import org.apache.pig.impl.util.JarManager; -public class PigContext implements Serializable, FunctionInstantiator { +public class PigContext implements Serializable { private static final long serialVersionUID = 1L; private transient final Log log =
[Pig Wiki] Update of HowlJournal by AlanGates
Dear Wiki user, You have subscribed to a wiki page or wiki category on Pig Wiki for change notification. The HowlJournal page has been changed by AlanGates. http://wiki.apache.org/pig/HowlJournal?action=diffrev1=1rev2=2 -- '''Authorization'''BR The initial proposal is to use HDFS permissions to determine whether Howl operations can be executed. For example, it would not be possible to drop a table unless the user had write permissions on the directory holding that table. We need to determine how to extend this model to data not stored in HDFS (e.g. Hbase) and objects that do not exist in HDFS (e.g. views). See HowlSecurity for more information. + '''Dynamic Partitioning'''BR Currently Howl can only store data into one partition at a time. It needs to support + spraying to multiple partitions in one write. + '''Non-partition Predicate Pushdown'''BR Since in the future storage formats (such as RCFile) should support predicate pushdown, Howl needs to be able to push predicates into the storage layer when appropriate. '''Notification'''BR Add ability for systems such as work flow to be notified when new data arrives in Howl. This will be designed around a few systems receiving notification, not large numbers of users receiving notifications (i.e. we will not be building a general purpose publish/subscribe system). One solution to this might be an RSS feed or similar simple service.
svn commit: r988625 - in /hadoop/pig/site: ./ author/ publish/ publish/skin/images/
Author: gates Date: Tue Aug 24 17:01:29 2010 New Revision: 988625 URL: http://svn.apache.org/viewvc?rev=988625view=rev Log: PIG-1558 Make forrest work with Java 1.6 by turning off validation. Modified: hadoop/pig/site/author/forrest.properties hadoop/pig/site/build.xml hadoop/pig/site/publish/about.pdf hadoop/pig/site/publish/index.pdf hadoop/pig/site/publish/issue_tracking.pdf hadoop/pig/site/publish/linkmap.pdf hadoop/pig/site/publish/mailing_lists.pdf hadoop/pig/site/publish/philosophy.pdf hadoop/pig/site/publish/releases.pdf hadoop/pig/site/publish/skin/images/rc-b-l-15-1body-2menu-3menu.png hadoop/pig/site/publish/skin/images/rc-b-r-15-1body-2menu-3menu.png hadoop/pig/site/publish/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png hadoop/pig/site/publish/skin/images/rc-t-r-15-1body-2menu-3menu.png hadoop/pig/site/publish/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png hadoop/pig/site/publish/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png hadoop/pig/site/publish/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png hadoop/pig/site/publish/version_control.pdf hadoop/pig/site/publish/whoweare.pdf Modified: hadoop/pig/site/author/forrest.properties URL: http://svn.apache.org/viewvc/hadoop/pig/site/author/forrest.properties?rev=988625r1=988624r2=988625view=diff == --- hadoop/pig/site/author/forrest.properties (original) +++ hadoop/pig/site/author/forrest.properties Tue Aug 24 17:01:29 2010 @@ -66,10 +66,11 @@ project.skin=hadoop-pelt # This set of properties determine if validation is performed # Values are inherited unless overridden. # e.g. if forrest.validate=false then all others are false unless set to true. -#forrest.validate=true +forrest.validate=false #forrest.validate.xdocs=${forrest.validate} #forrest.validate.skinconf=${forrest.validate} -#forrest.validate.sitemap=${forrest.validate} +# Make forrest work with Java 1.6 +forrest.validate.sitemap=false #forrest.validate.stylesheets=${forrest.validate} #forrest.validate.skins=${forrest.validate} #forrest.validate.skins.stylesheets=${forrest.validate.skins} Modified: hadoop/pig/site/build.xml URL: http://svn.apache.org/viewvc/hadoop/pig/site/build.xml?rev=988625r1=988624r2=988625view=diff == --- hadoop/pig/site/build.xml (original) +++ hadoop/pig/site/build.xml Tue Aug 24 17:01:29 2010 @@ -2,8 +2,13 @@ project name=site default=update basedir=. - target name=update depends=clean -exec dir=author executable=forrest failonerror=true / +target name=forrest.check unless=forrest.home +fail message='forrest.home' is not defined. +Please pass -Dforrest.home=lt;base of Apache Forrest installationgt; to Ant on the command-line. / +/target + + target name=update depends=clean, forrest.check +exec dir=author executable=${forrest.home}/bin/forrest failonerror=true / copy todir=publish/ fileset dir=author/build/site/ / /copy Modified: hadoop/pig/site/publish/about.pdf URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/about.pdf?rev=988625r1=988624r2=988625view=diff == --- hadoop/pig/site/publish/about.pdf (original) +++ hadoop/pig/site/publish/about.pdf Tue Aug 24 17:01:29 2010 @@ -76,8 +76,8 @@ endobj 16 0 obj /Type /Font /Subtype /Type1 -/Name /F3 -/BaseFont /Helvetica-Bold +/Name /F1 +/BaseFont /Helvetica /Encoding /WinAnsiEncoding endobj 17 0 obj @@ -90,8 +90,8 @@ endobj 18 0 obj /Type /Font /Subtype /Type1 -/Name /F1 -/BaseFont /Helvetica +/Name /F3 +/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding endobj 19 0 obj @@ -122,7 +122,7 @@ endobj endobj 3 0 obj -/Font /F3 16 0 R /F5 17 0 R /F1 18 0 R /F2 19 0 R /F7 20 0 R +/Font /F1 16 0 R /F5 17 0 R /F3 18 0 R /F2 19 0 R /F7 20 0 R /ProcSet [ /PDF /ImageC /Text ] endobj 9 0 obj @@ -155,8 +155,8 @@ xref 003268 0 n 002223 0 n 002369 0 n -002482 0 n -002592 0 n +002477 0 n +002587 0 n 002700 0 n 002816 0 n trailer Modified: hadoop/pig/site/publish/index.pdf URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/index.pdf?rev=988625r1=988624r2=988625view=diff == --- hadoop/pig/site/publish/index.pdf (original) +++ hadoop/pig/site/publish/index.pdf Tue Aug 24 17:01:29 2010 @@ -206,8 +206,8 @@ endobj 31 0 obj /Type /Font
svn commit: r988628 - in /hadoop/pig/site: author/src/documentation/content/xdocs/philosophy.xml publish/philosophy.html publish/philosophy.pdf
Author: gates Date: Tue Aug 24 17:14:12 2010 New Revision: 988628 URL: http://svn.apache.org/viewvc?rev=988628view=rev Log: PIG-1559 Updates to Pig philosophy. Modified: hadoop/pig/site/author/src/documentation/content/xdocs/philosophy.xml hadoop/pig/site/publish/philosophy.html hadoop/pig/site/publish/philosophy.pdf Modified: hadoop/pig/site/author/src/documentation/content/xdocs/philosophy.xml URL: http://svn.apache.org/viewvc/hadoop/pig/site/author/src/documentation/content/xdocs/philosophy.xml?rev=988628r1=988627r2=988628view=diff == --- hadoop/pig/site/author/src/documentation/content/xdocs/philosophy.xml (original) +++ hadoop/pig/site/author/src/documentation/content/xdocs/philosophy.xml Tue Aug 24 17:14:12 2010 @@ -21,10 +21,8 @@ section titlePigs Eat Anything/title p -Pig can operate on data whether it has metadata or not. - /p - p - It can operate on data that is relational, nested, or unstructured. +Pig can operate on data whether it has metadata or not. It can operate on data that is relational, nested, or unstructured. And it can easily be +extended to operate on data beyond files, including key/value stores, databases, etc. /p /section @@ -32,7 +30,7 @@ titlePigs Live Anywhere/title p Pig is intended to be a language for parallel data processing. It is not tied to one particular parallel framework. It has been implemented first - on hadoop, but we do not intend that to be only on hadoop. + on Hadoop, but we do not intend that to be only on Hadoop. /p /section @@ -44,12 +42,14 @@ p Pig allows integration of user code where ever possible, so it currently supports user defined field transformation functions, user defined - aggregates, user defined grouping functions, and user defined conditionals. In the future we want to support all the above in non-java languages, - as well as streaming, user defined types, and user defined splits. + aggregates, and user defined conditionals. These functions can be written in Java or scripting languages that can compile down to Java (e.g. Jython). + Pig supports user provided load and store functions. It supports external executables via its stream command and Map Reduce jars via its mapreduce + command. It allows users to provide a custom partitioner for their jobs in some circumstances and to set the level of reduce parallelism for their jobs. + command. It allows users to set the level of reduce parallelism for their jobs and in some circumstances to provide a custom partitioner. /p p - Currently pig has no optimizer, so it does not do any operation rearranging. When we add that in the future, it will always be possible for users to - turn code rearranging off, so that pig does exactly what they say in the order they say it. + Pig has an optimizer that rearranges some operations in Pig Latin scripts to give better performance, combines Map Reduce jobs together, etc. However, users + can easily turn this optimizer off to prevent it from making changes that do not make sense in their situation. /p /section Modified: hadoop/pig/site/publish/philosophy.html URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/philosophy.html?rev=988628r1=988627r2=988628view=diff == --- hadoop/pig/site/publish/philosophy.html (original) +++ hadoop/pig/site/publish/philosophy.html Tue Aug 24 17:14:12 2010 @@ -199,25 +199,23 @@ document.write(Last Published: + docu h2 class=h3Pigs Eat Anything/h2 div class=section p -Pig can operate on data whether it has metadata or not. - /p -p - It can operate on data that is relational, nested, or unstructured. +Pig can operate on data whether it has metadata or not. It can operate on data that is relational, nested, or unstructured. And it can easily be +extended to operate on data beyond files, including key/value stores, databases, etc. /p /div -a name=N10023/aa name=Pigs+Live+Anywhere/a +a name=N10020/aa name=Pigs+Live+Anywhere/a h2 class=h3Pigs Live Anywhere/h2 div class=section p Pig is intended to be a language for parallel data processing. It is not tied to one particular parallel framework. It has been implemented first - on hadoop, but we do not intend that to be only on hadoop. + on Hadoop, but we do not intend that to be only on Hadoop. /p /div -a name=N1002D/aa name=Pigs+Are+Domestic+Animals/a +a name=N1002A/aa name=Pigs+Are+Domestic+Animals/a h2 class=h3Pigs Are Domestic Animals/h2 div class=section p @@ -225,17 +223,19 @@ document.write(Last
svn commit: r988730 - in /hadoop/pig/trunk: ./ src/org/apache/pig/builtin/ test/org/apache/pig/test/
Author: dvryaboy Date: Tue Aug 24 21:11:16 2010 New Revision: 988730 URL: http://svn.apache.org/viewvc?rev=988730view=rev Log: PIG-1551 Improve dynamic invokers to deal with no-arg methods and array parameters Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/builtin/GenericInvoker.java hadoop/pig/trunk/src/org/apache/pig/builtin/InvokeForDouble.java hadoop/pig/trunk/src/org/apache/pig/builtin/InvokeForFloat.java hadoop/pig/trunk/src/org/apache/pig/builtin/InvokeForInt.java hadoop/pig/trunk/src/org/apache/pig/builtin/InvokeForLong.java hadoop/pig/trunk/src/org/apache/pig/builtin/InvokeForString.java hadoop/pig/trunk/src/org/apache/pig/builtin/Invoker.java hadoop/pig/trunk/test/org/apache/pig/test/TestInvoker.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=988730r1=988729r2=988730view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Tue Aug 24 21:11:16 2010 @@ -26,6 +26,8 @@ PIG-1249: Safe-guards against misconfigu IMPROVEMENTS +PIG-1551: Improve dynamic invokers to deal with no-arg methods and array parameters (dvryaboy) + PIG-1311: Document audience and stability for remaining interfaces (gates) PIG-506: Does pig need a NATIVE keyword? (aniket486 via thejas) Modified: hadoop/pig/trunk/src/org/apache/pig/builtin/GenericInvoker.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/GenericInvoker.java?rev=988730r1=988729r2=988730view=diff == --- hadoop/pig/trunk/src/org/apache/pig/builtin/GenericInvoker.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/builtin/GenericInvoker.java Tue Aug 24 21:11:16 2010 @@ -32,9 +32,18 @@ import org.apache.pig.impl.logicalLayer. * Class-specific non-generic extensions of this class are needed for Pig to know what type * of return to expect from exec, and to find the appropriate classes through reflection. * All they have to do is implement the constructors that call into super(). Note that the - * no-parameter constructor is brequired/b, if nonsensical, for Pig to do its work. + * no-parameter constructor is brequired/b, if seemingly nonsensical, for Pig to do its work. + * p + * The Invoker family of udfs understand the following class names (all case-independent): + * liString + * liLong + * liFloat + * liDouble + * liInt + * p + * Invokers can also work with array arguments, represented in Pig as DataBags of single-tuple + * elements. Simply refer to codestring[]/code, for example. * p - * * This UDF allows one to dynamically invoke Java methods that return a codeT/code * p * Usage of the Invoker family of UDFs (adjust as appropriate): @@ -54,6 +63,7 @@ import org.apache.pig.impl.logicalLayer. * The first argument to the constructor is the full path to desired method.br * The second argument is a list of classes of the method parameters.br * If the method is not static, the first element in this list is the object to invoke the method on.br + * The second argument is optional (a no-argument static method is assumed if it is not supplied).br * The third argument is the keyword static (or true) to signify that the method is static. br * The third argument is optional, and true by default.br * p @@ -65,6 +75,11 @@ public abstract class GenericInvokerT public GenericInvoker() {} +public GenericInvoker(String fullName) +throws ClassNotFoundException, FrontendException, SecurityException, NoSuchMethodException { + invoker_ = new InvokerT(fullName, ); +} + public GenericInvoker(String fullName, String paramSpecsStr) throws ClassNotFoundException, FrontendException, SecurityException, NoSuchMethodException { invoker_ = new InvokerT(fullName, paramSpecsStr); Modified: hadoop/pig/trunk/src/org/apache/pig/builtin/InvokeForDouble.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/InvokeForDouble.java?rev=988730r1=988729r2=988730view=diff == --- hadoop/pig/trunk/src/org/apache/pig/builtin/InvokeForDouble.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/builtin/InvokeForDouble.java Tue Aug 24 21:11:16 2010 @@ -26,6 +26,10 @@ public class InvokeForDouble extends Gen public InvokeForDouble() {} + public InvokeForDouble(String fullName) throws FrontendException, SecurityException, ClassNotFoundException, NoSuchMethodException { + super(fullName); + } + public InvokeForDouble(String fullName, String paramSpecsStr) throws FrontendException, SecurityException, ClassNotFoundException, NoSuchMethodException { super(fullName, paramSpecsStr); } @@ -34,4 +38,6 @@ public class
svn commit: r988770 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/tools/pigstats/ScriptState.java test/org/apache/pig/test/TestPigStats.java
Author: rding Date: Wed Aug 25 00:31:38 2010 New Revision: 988770 URL: http://svn.apache.org/viewvc?rev=988770view=rev Log: PIG-1557: couple of issue mapping aliases to jobs Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java hadoop/pig/trunk/test/org/apache/pig/test/TestPigStats.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=988770r1=988769r2=988770view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Wed Aug 25 00:31:38 2010 @@ -150,6 +150,8 @@ PIG-1309: Map-side Cogroup (ashutoshc) BUG FIXES +PIG-1557: couple of issue mapping aliases to jobs (rding) + PIG-1552: Nested describe failed when the alias is not referred in the first foreach inner plan (aniket486 via daijy) PIG-1486: update ant eclipse-files target to include new jar and remove contrib dirs from build path (thejas) Modified: hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java?rev=988770r1=988769r2=988770view=diff == --- hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/ScriptState.java Wed Aug 25 00:31:38 2010 @@ -687,6 +687,11 @@ public class ScriptState { } @Override +public void visitLoad(POLoad load) throws VisitorException { +setAlias(load); +} + +@Override public void visitFRJoin(POFRJoin join) throws VisitorException { setAlias(join); } Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestPigStats.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestPigStats.java?rev=988770r1=988769r2=988770view=diff == --- hadoop/pig/trunk/test/org/apache/pig/test/TestPigStats.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestPigStats.java Wed Aug 25 00:31:38 2010 @@ -18,18 +18,33 @@ package org.apache.pig.test; +import static org.junit.Assert.*; + import java.io.File; import java.io.IOException; -import junit.framework.TestCase; +import junit.framework.Assert; import org.apache.pig.ExecType; +import org.apache.pig.PigException; import org.apache.pig.PigServer; import org.apache.pig.backend.executionengine.ExecJob; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceOper; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan; +import org.apache.pig.backend.hadoop.executionengine.util.MapRedUtil; +import org.apache.pig.impl.PigContext; +import org.apache.pig.impl.logicalLayer.LogicalPlan; +import org.apache.pig.impl.util.LogUtils; +import org.apache.pig.tools.pigscript.parser.ParseException; import org.apache.pig.tools.pigstats.PigStats; +import org.apache.pig.tools.pigstats.ScriptState; +import org.junit.Test; -public class TestPigStats extends TestCase { +public class TestPigStats { +@Test public void testBytesWritten_JIRA_1027() { File outputFile = null; @@ -56,6 +71,33 @@ public class TestPigStats extends TestCa } } +@Test +public void testPigStatsAlias() throws Exception { +PigServer pig = new PigServer(ExecType.LOCAL); +pig.registerQuery(A = load 'input' as (name, age, gpa);); +pig.registerQuery(B = group A by name;); +pig.registerQuery(C = foreach B generate group, COUNT(A);); +pig.registerQuery(D = order C by $1;); +pig.registerQuery(E = limit D 10;); +pig.registerQuery(store E into 'output';); + +LogicalPlan lp = getLogicalPlan(pig); +PhysicalPlan pp = pig.getPigContext().getExecutionEngine().compile(lp, +null); +MROperPlan mp = getMRPlan(pp, pig.getPigContext()); + +assertEquals(3, mp.getKeys().size()); + +MapReduceOper mro = mp.getRoots().get(0); +assertEquals(A,B,C, getAlias(mro)); + +mro = mp.getSuccessors(mro).get(0); +assertEquals(D, getAlias(mro)); + +mro = mp.getSuccessors(mro).get(0); +assertEquals(D,E, getAlias(mro)); +} + private void deleteDirectory( File dir ) { File[] files = dir.listFiles(); for( File file : files ) { @@ -67,4 +109,31 @@ public class TestPigStats extends TestCa } dir.delete(); } + +public static