svn commit: r998353 - in /hadoop/pig/trunk: CHANGES.txt build.xml src/docs/forrest.properties test/bin/test-patch.sh
Author: gates Date: Fri Sep 17 22:18:44 2010 New Revision: 998353 URL: http://svn.apache.org/viewvc?rev=998353view=rev Log: PIG-1508 Make 'docs' target (forrest) work with Java 1.6 Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/build.xml hadoop/pig/trunk/src/docs/forrest.properties hadoop/pig/trunk/test/bin/test-patch.sh Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=998353r1=998352r2=998353view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Fri Sep 17 22:18:44 2010 @@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-1508: Make 'docs' target (forrest) work with Java 1.6 (cwsteinbach via gates) + PIG-1608: pig should always include pig-default.properties and pig.properties in the pig.jar (nrai via daijy) OPTIMIZATIONS Modified: hadoop/pig/trunk/build.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/build.xml?rev=998353r1=998352r2=998353view=diff == --- hadoop/pig/trunk/build.xml (original) +++ hadoop/pig/trunk/build.xml Fri Sep 17 22:18:44 2010 @@ -397,25 +397,18 @@ !-- == -- target name=docs depends=forrest.check, javadoc-all description=Generate forrest-based documentation. To use, specify -Dforrest.home=lt;base of Apache Forrest installationgt; on the command line. if=forrest.home - exec dir=${docs.dir} executable=${forrest.home}/bin/forrest - failonerror=true - env key=JAVA_HOME value=${java5.home}/ + exec dir=${docs.dir} executable=${forrest.home}/bin/forrest failonerror=true /exec copy todir=${build.docs} fileset dir=${docs.dir}/build/site/ / /copy /target -target name=forrest.check unless=forrest.home depends=java5.check +target name=forrest.check unless=forrest.home fail message='forrest.home' is not defined. Please pass -Dforrest.home=lt;base of Apache Forrest installationgt; to Ant on the command-line. / /target -target name=java5.check unless=java5.home - fail message='java5.home' is not defined. Forrest requires Java 5. - Please pass -Djava5.home=lt;base of Java 5 distributiongt; to Ant on the command-line. / -/target - target name=source-jar depends=cc-compile jar duplicate=preserve jarfile=${output.jarfile.sources} basedir=${src.dir} manifest @@ -943,7 +936,6 @@ arg value=${findbugs.home}/ arg value=${forrest.home}/ arg value=${basedir}/ - arg value=${java5.home}/ arg value=${ant.project.name}/ /exec /target @@ -967,7 +959,6 @@ arg value=${basedir}/ arg value=${trigger.url}/ arg value=${jira.passwd}/ - arg value=${java5.home}/ arg value=${curl.cmd}/ arg value=${defect}/ arg value=${ant.project.name}/ Modified: hadoop/pig/trunk/src/docs/forrest.properties URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/forrest.properties?rev=998353r1=998352r2=998353view=diff == --- hadoop/pig/trunk/src/docs/forrest.properties (original) +++ hadoop/pig/trunk/src/docs/forrest.properties Fri Sep 17 22:18:44 2010 @@ -92,7 +92,11 @@ #forrest.validate=true #forrest.validate.xdocs=${forrest.validate} #forrest.validate.skinconf=${forrest.validate} -#forrest.validate.sitemap=${forrest.validate} + +# PIG-1508: Workaround for http://issues.apache.org/jira/browse/FOR-984 +# Remove when forrest-0.9 is available +forrest.validate.sitemap=false + #forrest.validate.stylesheets=${forrest.validate} #forrest.validate.skins=${forrest.validate} #forrest.validate.skins.stylesheets=${forrest.validate.skins} Modified: hadoop/pig/trunk/test/bin/test-patch.sh URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/bin/test-patch.sh?rev=998353r1=998352r2=998353view=diff == --- hadoop/pig/trunk/test/bin/test-patch.sh (original) +++ hadoop/pig/trunk/test/bin/test-patch.sh Fri Sep 17 22:18:44 2010 @@ -12,8 +12,8 @@ parseArgs() { HUDSON) ### Set HUDSON to true to indicate that this script is being run by Hudson HUDSON=true - if [[ $# != 20 ]] ; then -echo ERROR: usage $0 HUDSON PATCH_DIR SUPPORT_DIR PS_CMD WGET_CMD JIRACLI SVN_CMD GREP_CMD PATCH_CMD FINDBUGS_HOME FORREST_HOME ECLIPSE_HOME PYTHON_HOME WORKSPACE_BASEDIR TRIGGER_BUILD JIRA_PASSWD JAVA5_HOME CURL_CMD DEFECT PROJECT NAME + if [[ $# != 19 ]] ; then +echo ERROR: usage $0 HUDSON PATCH_DIR
svn commit: r991693 [1/3] - in /hadoop/pig/trunk: ./ src/org/apache/pig/newplan/logical/expression/ src/org/apache/pig/newplan/logical/optimizer/ src/org/apache/pig/newplan/logical/rules/ test/org/apa
Author: gates Date: Wed Sep 1 20:30:50 2010 New Revision: 991693 URL: http://svn.apache.org/viewvc?rev=991693view=rev Log: PIG-1399: Filter expression optimizations. Added: hadoop/pig/trunk/src/org/apache/pig/newplan/logical/rules/ConstExpEvaluator.java hadoop/pig/trunk/src/org/apache/pig/newplan/logical/rules/DNFExpression.java hadoop/pig/trunk/src/org/apache/pig/newplan/logical/rules/DNFPlan.java hadoop/pig/trunk/src/org/apache/pig/newplan/logical/rules/DNFPlanGenerator.java hadoop/pig/trunk/src/org/apache/pig/newplan/logical/rules/LogicalExpressionProxy.java hadoop/pig/trunk/src/org/apache/pig/newplan/logical/rules/LogicalExpressionSimplifier.java hadoop/pig/trunk/src/org/apache/pig/newplan/logical/rules/NotConversionVisitor.java hadoop/pig/trunk/test/org/apache/pig/test/TestFilterSimplification.java Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/newplan/logical/expression/LogicalExpression.java hadoop/pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java hadoop/pig/trunk/test/org/apache/pig/test/TestFilterOpNumeric.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=991693r1=991692r2=991693view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Wed Sep 1 20:30:50 2010 @@ -36,6 +36,8 @@ PIG-1249: Safe-guards against misconfigu IMPROVEMENTS +PIG-1399: Filter expression optimizations (yanz via gates) + PIG-1531: Pig gobbles up error messages (nrai via hashutosh) PIG-1458: aggregate files for replicated join (rding) Modified: hadoop/pig/trunk/src/org/apache/pig/newplan/logical/expression/LogicalExpression.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/newplan/logical/expression/LogicalExpression.java?rev=991693r1=991692r2=991693view=diff == --- hadoop/pig/trunk/src/org/apache/pig/newplan/logical/expression/LogicalExpression.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/newplan/logical/expression/LogicalExpression.java Wed Sep 1 20:30:50 2010 @@ -99,6 +99,7 @@ public abstract class LogicalExpression uidOnlyFieldSchema = fieldSchema.mergeUid(uidOnlyFieldSchema); } + /** * Create the deep copy of this expression and add that into the passed * LogicalExpressionPlan Return the copy of this expression with updated Modified: hadoop/pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java?rev=991693r1=991692r2=991693view=diff == --- hadoop/pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/newplan/logical/optimizer/LogicalPlanOptimizer.java Wed Sep 1 20:30:50 2010 @@ -33,6 +33,7 @@ import org.apache.pig.newplan.logical.ru import org.apache.pig.newplan.logical.rules.PushUpFilter; import org.apache.pig.newplan.logical.rules.SplitFilter; import org.apache.pig.newplan.logical.rules.StreamTypeCastInserter; +import org.apache.pig.newplan.logical.rules.LogicalExpressionSimplifier; import org.apache.pig.newplan.optimizer.PlanOptimizer; import org.apache.pig.newplan.optimizer.Rule; @@ -53,11 +54,17 @@ public class LogicalPlanOptimizer extend protected ListSetRule buildRuleSets() { ListSetRule ls = new ArrayListSetRule(); +// Logical expression simplifier +SetRule s = new HashSetRule(); +// add logical expression simplification rule +Rule r = new LogicalExpressionSimplifier(FilterLogicExpressionSimplifier); +checkAndAddRule(s, r); +ls.add(s); + // ImplicitSplitInserter set // This set of rules Insert Foreach dedicated for casting after load -SetRule s = new HashSetRule(); -// add split filter rule -Rule r = new ImplicitSplitInserter(ImplicitSplitInserter); +s = new HashSetRule(); +r = new ImplicitSplitInserter(ImplicitSplitInserter); checkAndAddRule(s, r); if (!s.isEmpty()) ls.add(s); @@ -186,7 +193,7 @@ public class LogicalPlanOptimizer extend ruleSet.add(rule); } - + private void addListeners() { addPlanTransformListener(new SchemaPatcher()); addPlanTransformListener(new ProjectionPatcher()); Added: hadoop/pig/trunk/src/org/apache/pig/newplan/logical/rules/ConstExpEvaluator.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/newplan/logical/rules/ConstExpEvaluator.java?rev=991693view=auto
svn commit: r988611 - in /hadoop/pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/ src/org/apache/pig/data/ src/org/apache/pig/impl/ src/org/apache/pig/newplan/ src/org/ap
Author: gates Date: Tue Aug 24 16:26:05 2010 New Revision: 988611 URL: http://svn.apache.org/viewvc?rev=988611view=rev Log: PIG-1311 Document audience and stability for remaining interfaces. Removed: hadoop/pig/trunk/src/org/apache/pig/impl/FunctionInstantiator.java Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PigProgressable.java hadoop/pig/trunk/src/org/apache/pig/data/TupleRawComparator.java hadoop/pig/trunk/src/org/apache/pig/impl/PigContext.java hadoop/pig/trunk/src/org/apache/pig/newplan/OperatorPlan.java hadoop/pig/trunk/src/org/apache/pig/newplan/optimizer/PlanTransformListener.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=988611r1=988610r2=988611view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Tue Aug 24 16:26:05 2010 @@ -26,6 +26,8 @@ PIG-1249: Safe-guards against misconfigu IMPROVEMENTS +PIG-1311: Document audience and stability for remaining interfaces (gates) + PIG-506: Does pig need a NATIVE keyword? (aniket486 via thejas) PIG-1510: Add `deepCopy` for LogicalExpressions (swati.j via daijy) Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PigProgressable.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PigProgressable.java?rev=988611r1=988610r2=988611view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PigProgressable.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PigProgressable.java Tue Aug 24 16:26:05 2010 @@ -17,11 +17,28 @@ */ package org.apache.pig.backend.hadoop.executionengine.physicalLayer; +import org.apache.pig.classification.InterfaceAudience; +import org.apache.pig.classification.InterfaceStability; + + +/** + * Pig's progress indicator. An implemenation of this interface is passed to + * UDFs to allow them to send heartbeats. By default Hadoop will kill a task + * if it does not receive a heartbeat every 600 seconds. Any operation that + * may take more than this should call progress on a regular basis. + */ +...@interfaceaudience.public +...@interfacestability.stable public interface PigProgressable { -//Use to just inform that you are -//alive + +/** + * Report progress. + */ public void progress(); -//If you have a status to report +/** + * Report progress with a message. + * @param msg message to send with progress report. + */ public void progress(String msg); } Modified: hadoop/pig/trunk/src/org/apache/pig/data/TupleRawComparator.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/TupleRawComparator.java?rev=988611r1=988610r2=988611view=diff == --- hadoop/pig/trunk/src/org/apache/pig/data/TupleRawComparator.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/TupleRawComparator.java Tue Aug 24 16:26:05 2010 @@ -20,6 +20,9 @@ package org.apache.pig.data; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.io.RawComparator; +import org.apache.pig.classification.InterfaceAudience; +import org.apache.pig.classification.InterfaceStability; + /** * This interface is intended to compare Tuples. The semantics of Tuple comparison must take into account null values in * different ways. According to SQL semantics nulls are not equal. But for other Pig/Latin statements nulls must be @@ -28,6 +31,8 @@ import org.apache.hadoop.io.RawComparato * {...@link #compare(byte[],int,int,byte[],int,int)} method. * */ +...@interfaceaudience.public +...@interfacestability.evolving @SuppressWarnings(rawtypes) public interface TupleRawComparator extends RawComparator, Configurable { /** Modified: hadoop/pig/trunk/src/org/apache/pig/impl/PigContext.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/PigContext.java?rev=988611r1=988610r2=988611view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/PigContext.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/PigContext.java Tue Aug 24 16:26:05 2010 @@ -55,7 +55,7 @@ import org.apache.pig.impl.streaming.Exe import org.apache.pig.impl.streaming.StreamingCommand; import org.apache.pig.impl.util.JarManager; -public class PigContext implements Serializable, FunctionInstantiator { +public class PigContext implements Serializable { private static final long serialVersionUID = 1L; private transient final Log log
svn commit: r988625 - in /hadoop/pig/site: ./ author/ publish/ publish/skin/images/
Author: gates Date: Tue Aug 24 17:01:29 2010 New Revision: 988625 URL: http://svn.apache.org/viewvc?rev=988625view=rev Log: PIG-1558 Make forrest work with Java 1.6 by turning off validation. Modified: hadoop/pig/site/author/forrest.properties hadoop/pig/site/build.xml hadoop/pig/site/publish/about.pdf hadoop/pig/site/publish/index.pdf hadoop/pig/site/publish/issue_tracking.pdf hadoop/pig/site/publish/linkmap.pdf hadoop/pig/site/publish/mailing_lists.pdf hadoop/pig/site/publish/philosophy.pdf hadoop/pig/site/publish/releases.pdf hadoop/pig/site/publish/skin/images/rc-b-l-15-1body-2menu-3menu.png hadoop/pig/site/publish/skin/images/rc-b-r-15-1body-2menu-3menu.png hadoop/pig/site/publish/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png hadoop/pig/site/publish/skin/images/rc-t-r-15-1body-2menu-3menu.png hadoop/pig/site/publish/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png hadoop/pig/site/publish/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png hadoop/pig/site/publish/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png hadoop/pig/site/publish/version_control.pdf hadoop/pig/site/publish/whoweare.pdf Modified: hadoop/pig/site/author/forrest.properties URL: http://svn.apache.org/viewvc/hadoop/pig/site/author/forrest.properties?rev=988625r1=988624r2=988625view=diff == --- hadoop/pig/site/author/forrest.properties (original) +++ hadoop/pig/site/author/forrest.properties Tue Aug 24 17:01:29 2010 @@ -66,10 +66,11 @@ project.skin=hadoop-pelt # This set of properties determine if validation is performed # Values are inherited unless overridden. # e.g. if forrest.validate=false then all others are false unless set to true. -#forrest.validate=true +forrest.validate=false #forrest.validate.xdocs=${forrest.validate} #forrest.validate.skinconf=${forrest.validate} -#forrest.validate.sitemap=${forrest.validate} +# Make forrest work with Java 1.6 +forrest.validate.sitemap=false #forrest.validate.stylesheets=${forrest.validate} #forrest.validate.skins=${forrest.validate} #forrest.validate.skins.stylesheets=${forrest.validate.skins} Modified: hadoop/pig/site/build.xml URL: http://svn.apache.org/viewvc/hadoop/pig/site/build.xml?rev=988625r1=988624r2=988625view=diff == --- hadoop/pig/site/build.xml (original) +++ hadoop/pig/site/build.xml Tue Aug 24 17:01:29 2010 @@ -2,8 +2,13 @@ project name=site default=update basedir=. - target name=update depends=clean -exec dir=author executable=forrest failonerror=true / +target name=forrest.check unless=forrest.home +fail message='forrest.home' is not defined. +Please pass -Dforrest.home=lt;base of Apache Forrest installationgt; to Ant on the command-line. / +/target + + target name=update depends=clean, forrest.check +exec dir=author executable=${forrest.home}/bin/forrest failonerror=true / copy todir=publish/ fileset dir=author/build/site/ / /copy Modified: hadoop/pig/site/publish/about.pdf URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/about.pdf?rev=988625r1=988624r2=988625view=diff == --- hadoop/pig/site/publish/about.pdf (original) +++ hadoop/pig/site/publish/about.pdf Tue Aug 24 17:01:29 2010 @@ -76,8 +76,8 @@ endobj 16 0 obj /Type /Font /Subtype /Type1 -/Name /F3 -/BaseFont /Helvetica-Bold +/Name /F1 +/BaseFont /Helvetica /Encoding /WinAnsiEncoding endobj 17 0 obj @@ -90,8 +90,8 @@ endobj 18 0 obj /Type /Font /Subtype /Type1 -/Name /F1 -/BaseFont /Helvetica +/Name /F3 +/BaseFont /Helvetica-Bold /Encoding /WinAnsiEncoding endobj 19 0 obj @@ -122,7 +122,7 @@ endobj endobj 3 0 obj -/Font /F3 16 0 R /F5 17 0 R /F1 18 0 R /F2 19 0 R /F7 20 0 R +/Font /F1 16 0 R /F5 17 0 R /F3 18 0 R /F2 19 0 R /F7 20 0 R /ProcSet [ /PDF /ImageC /Text ] endobj 9 0 obj @@ -155,8 +155,8 @@ xref 003268 0 n 002223 0 n 002369 0 n -002482 0 n -002592 0 n +002477 0 n +002587 0 n 002700 0 n 002816 0 n trailer Modified: hadoop/pig/site/publish/index.pdf URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/index.pdf?rev=988625r1=988624r2=988625view=diff == --- hadoop/pig/site/publish/index.pdf (original) +++ hadoop/pig/site/publish/index.pdf Tue Aug 24 17:01:29 2010 @@ -206,8 +206,8 @@ endobj 31 0 obj /Type /Font
svn commit: r988628 - in /hadoop/pig/site: author/src/documentation/content/xdocs/philosophy.xml publish/philosophy.html publish/philosophy.pdf
Author: gates Date: Tue Aug 24 17:14:12 2010 New Revision: 988628 URL: http://svn.apache.org/viewvc?rev=988628view=rev Log: PIG-1559 Updates to Pig philosophy. Modified: hadoop/pig/site/author/src/documentation/content/xdocs/philosophy.xml hadoop/pig/site/publish/philosophy.html hadoop/pig/site/publish/philosophy.pdf Modified: hadoop/pig/site/author/src/documentation/content/xdocs/philosophy.xml URL: http://svn.apache.org/viewvc/hadoop/pig/site/author/src/documentation/content/xdocs/philosophy.xml?rev=988628r1=988627r2=988628view=diff == --- hadoop/pig/site/author/src/documentation/content/xdocs/philosophy.xml (original) +++ hadoop/pig/site/author/src/documentation/content/xdocs/philosophy.xml Tue Aug 24 17:14:12 2010 @@ -21,10 +21,8 @@ section titlePigs Eat Anything/title p -Pig can operate on data whether it has metadata or not. - /p - p - It can operate on data that is relational, nested, or unstructured. +Pig can operate on data whether it has metadata or not. It can operate on data that is relational, nested, or unstructured. And it can easily be +extended to operate on data beyond files, including key/value stores, databases, etc. /p /section @@ -32,7 +30,7 @@ titlePigs Live Anywhere/title p Pig is intended to be a language for parallel data processing. It is not tied to one particular parallel framework. It has been implemented first - on hadoop, but we do not intend that to be only on hadoop. + on Hadoop, but we do not intend that to be only on Hadoop. /p /section @@ -44,12 +42,14 @@ p Pig allows integration of user code where ever possible, so it currently supports user defined field transformation functions, user defined - aggregates, user defined grouping functions, and user defined conditionals. In the future we want to support all the above in non-java languages, - as well as streaming, user defined types, and user defined splits. + aggregates, and user defined conditionals. These functions can be written in Java or scripting languages that can compile down to Java (e.g. Jython). + Pig supports user provided load and store functions. It supports external executables via its stream command and Map Reduce jars via its mapreduce + command. It allows users to provide a custom partitioner for their jobs in some circumstances and to set the level of reduce parallelism for their jobs. + command. It allows users to set the level of reduce parallelism for their jobs and in some circumstances to provide a custom partitioner. /p p - Currently pig has no optimizer, so it does not do any operation rearranging. When we add that in the future, it will always be possible for users to - turn code rearranging off, so that pig does exactly what they say in the order they say it. + Pig has an optimizer that rearranges some operations in Pig Latin scripts to give better performance, combines Map Reduce jobs together, etc. However, users + can easily turn this optimizer off to prevent it from making changes that do not make sense in their situation. /p /section Modified: hadoop/pig/site/publish/philosophy.html URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/philosophy.html?rev=988628r1=988627r2=988628view=diff == --- hadoop/pig/site/publish/philosophy.html (original) +++ hadoop/pig/site/publish/philosophy.html Tue Aug 24 17:14:12 2010 @@ -199,25 +199,23 @@ document.write(Last Published: + docu h2 class=h3Pigs Eat Anything/h2 div class=section p -Pig can operate on data whether it has metadata or not. - /p -p - It can operate on data that is relational, nested, or unstructured. +Pig can operate on data whether it has metadata or not. It can operate on data that is relational, nested, or unstructured. And it can easily be +extended to operate on data beyond files, including key/value stores, databases, etc. /p /div -a name=N10023/aa name=Pigs+Live+Anywhere/a +a name=N10020/aa name=Pigs+Live+Anywhere/a h2 class=h3Pigs Live Anywhere/h2 div class=section p Pig is intended to be a language for parallel data processing. It is not tied to one particular parallel framework. It has been implemented first - on hadoop, but we do not intend that to be only on hadoop. + on Hadoop, but we do not intend that to be only on Hadoop. /p /div -a name=N1002D/aa name=Pigs+Are+Domestic+Animals/a +a name=N1002A/aa name=Pigs+Are+Domestic+Animals/a h2 class=h3Pigs Are Domestic Animals/h2 div class=section p @@ -225,17 +223,19 @@ document.write(Last
svn commit: r987014 - in /hadoop/pig/trunk: ./ ivy/ src/docs/src/documentation/content/xdocs/ test/ test/data/ test/data/pigunit/ test/org/apache/pig/pigunit/ test/org/apache/pig/pigunit/pig/ test/org
Author: gates Date: Thu Aug 19 04:33:33 2010 New Revision: 987014 URL: http://svn.apache.org/viewvc?rev=987014view=rev Log: PIG-1404: added PigUnit, a framework fo building unit tests of Pig Latin scripts. Added: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/pigunit.xml hadoop/pig/trunk/test/data/ hadoop/pig/trunk/test/data/pigunit/ hadoop/pig/trunk/test/data/pigunit/top_queries.pig hadoop/pig/trunk/test/data/pigunit/top_queries_expected_top_3.txt hadoop/pig/trunk/test/data/pigunit/top_queries_input_data.txt hadoop/pig/trunk/test/data/pigunit/top_queries_params.txt hadoop/pig/trunk/test/org/apache/pig/pigunit/ hadoop/pig/trunk/test/org/apache/pig/pigunit/Cluster.java hadoop/pig/trunk/test/org/apache/pig/pigunit/MiniClusterRunner.java hadoop/pig/trunk/test/org/apache/pig/pigunit/PigTest.java hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/ hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/GruntParser.java hadoop/pig/trunk/test/org/apache/pig/pigunit/pig/PigServer.java hadoop/pig/trunk/test/org/apache/pig/test/pigunit/ hadoop/pig/trunk/test/org/apache/pig/test/pigunit/TestPigTest.java hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/ hadoop/pig/trunk/test/org/apache/pig/test/pigunit/pig/TestGruntParser.java hadoop/pig/trunk/test/pigunit-tests Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/build.xml hadoop/pig/trunk/ivy.xml hadoop/pig/trunk/ivy/libraries.properties hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/site.xml Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=987014r1=987013r2=987014view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Thu Aug 19 04:33:33 2010 @@ -26,6 +26,8 @@ PIG-1249: Safe-guards against misconfigu IMPROVEMENTS +PIG-1404: added PigUnit, a framework fo building unit tests of Pig Latin scripts (romainr via gates) + PIG-1452: to remove hadoop20.jar from lib and use hadoop from the apache maven repo. (rding) Modified: hadoop/pig/trunk/build.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/build.xml?rev=987014r1=987013r2=987014view=diff == --- hadoop/pig/trunk/build.xml (original) +++ hadoop/pig/trunk/build.xml Thu Aug 19 04:33:33 2010 @@ -84,6 +84,10 @@ property name=test.unit.file value=${test.src.dir}/unit-tests/ property name=test.smoke.file value=${test.src.dir}/smoke-tests/ property name=test.all.file value=${test.src.dir}/all-tests/ +property name=pigunit.jarfile value=pigunit.jar / +property name=test.pigunit.src.dir value=${test.src.dir}/org/apache/pig/test/pigunit / +property name=commons-lang.jarfile value=commons-lang-2.4.jar / +property name=test.pigunit.file value=${test.src.dir}/pigunit-tests/ !-- test configuration, use ${user.home}/build.properties to configure values -- @@ -607,6 +611,7 @@ exclude name=**/TestOrderBy2.java / exclude name=**/TestPi.java / exclude name=**/nightly/** / +!-- exclude name=**/pigunit/** / -- exclude name=**/${exclude.testcase}.java if=exclude.testcase / /fileset /batchtest @@ -638,6 +643,22 @@ /target !-- == -- +!-- Pigunit-- +!-- == -- + +target depends=compile-test name=pigunit-jar description=create the pigunit jar file +echo *** Creating pigunit.jar ***/echo + jar destfile=${pigunit.jarfile} +fileset dir=${test.build.classes}/org/apache/pig/pigunit// + zipfileset src=${ivy.lib.dir}/${commons-lang.jarfile} / + /jar +/target + +target name=test-pigunit depends=compile-test,jar-withouthadoop, pigunit-jar description=Run tests that test PigUnit +macro-test-runner test.file=${test.pigunit.file} / +/target + +!-- == -- !-- D I S T R I B U T I O N-- !-- == -- target name=package depends=docs, api-report description=Create a Pig release Modified: hadoop/pig/trunk/ivy.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/ivy.xml?rev=987014r1=987013r2=987014view=diff == --- hadoop/pig/trunk/ivy.xml (original) +++ hadoop/pig/trunk/ivy.xml Thu Aug 19 04:33:33 2010 @@ -84,8 +84,11 @@ dependency org=org.codehaus.jackson name=jackson-core-asl
svn commit: r985332 - in /hadoop/pig/trunk: build.xml contrib/CHANGES.txt contrib/owl/
Author: gates Date: Fri Aug 13 19:35:31 2010 New Revision: 985332 URL: http://svn.apache.org/viewvc?rev=985332view=rev Log: PIG-1502 Remove Owl as a contrib project. Removed: hadoop/pig/trunk/contrib/owl/ Modified: hadoop/pig/trunk/build.xml hadoop/pig/trunk/contrib/CHANGES.txt Modified: hadoop/pig/trunk/build.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/build.xml?rev=985332r1=985331r2=985332view=diff == --- hadoop/pig/trunk/build.xml (original) +++ hadoop/pig/trunk/build.xml Fri Aug 13 19:35:31 2010 @@ -359,11 +359,10 @@ target name=javadoc-all depends=jar, ivy-javadoc description=Create documentation including all contrib projects mkdir dir=${build.javadoc} / -javadoc overview=${src.dir}/overview.html packagenames=org.apache.pig*,org.apache.hadoop.zebra*,org.apache.hadoop.owl* destdir=${build.javadoc} author=true version=true use=true windowtitle=${Name} ${version} API doctitle=${Name} ${version} API bottom=Copyright amp;copy; ${year} The Apache Software Foundation +javadoc overview=${src.dir}/overview.html packagenames=org.apache.pig*,org.apache.hadoop.zebra* destdir=${build.javadoc} author=true version=true use=true windowtitle=${Name} ${version} API doctitle=${Name} ${version} API bottom=Copyright amp;copy; ${year} The Apache Software Foundation packageset dir=${src.dir} / packageset dir=contrib/piggybank/java/src/main/java/ packageset dir=contrib/zebra/src/java/ -packageset dir=contrib/owl/java/main/ link href=${javadoc.link.java} / classpath path refid=javadoc-classpath / @@ -371,14 +370,10 @@ fileset dir=build include name=**/zebra-*-dev.jar/ /fileset -fileset dir=contrib/owl -include name=**/owl-*-dev.jar/ -/fileset /classpath group title=pig packages=org.apache.pig* / group title=contrib: Piggybank packages=org.apache.pig.piggybank* / group title=contrib: Zebra packages=org.apache.hadoop.zebra*/ -group title=contrib: Owl packages=org.apache.hadoop.owl*/ /javadoc /target !-- == -- Modified: hadoop/pig/trunk/contrib/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/CHANGES.txt?rev=985332r1=985331r2=985332view=diff == --- hadoop/pig/trunk/contrib/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/CHANGES.txt Fri Aug 13 19:35:31 2010 @@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-1502 Remove Owl as a contrib project (gates) + PIG-1386 UDF to extend functionalities of MaxTupleBy1stField (hcbusy via olgan) PIG-1526 improvements to HiveColumnarLoader - Partitioning Support (gerritjvv via olgan)
svn commit: r966413 - in /hadoop/pig/trunk: .gitignore CHANGES.txt
Author: gates Date: Wed Jul 21 20:49:03 2010 New Revision: 966413 URL: http://svn.apache.org/viewvc?rev=966413view=rev Log: PIG-1509: Add .gitignore file Added: hadoop/pig/trunk/.gitignore Modified: hadoop/pig/trunk/CHANGES.txt Added: hadoop/pig/trunk/.gitignore URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/.gitignore?rev=966413view=auto == --- hadoop/pig/trunk/.gitignore (added) +++ hadoop/pig/trunk/.gitignore Wed Jul 21 20:49:03 2010 @@ -0,0 +1,7 @@ +*~ +build/ +src-gen/ +test/org/apache/pig/test/utils/dotGraph/parser/ +ivy/*.jar +pig.jar + Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=966413r1=966412r2=966413view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Wed Jul 21 20:49:03 2010 @@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-1509: Add .gitignore file (cwsteinbach via gates) + PIG-1478: Add progress notification listener to PigRunner API (rding) PIG-1472: Optimize serialization/deserialization between Map and Reduce and between MR jobs (thejas)
svn commit: r964177 - in /hadoop/pig/trunk: ./ src/org/apache/pig/builtin/
Author: gates Date: Wed Jul 14 20:21:26 2010 New Revision: 964177 URL: http://svn.apache.org/viewvc?rev=964177view=rev Log: Javadoc improvements for org.apache.pig.builtin package. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/builtin/ARITY.java hadoop/pig/trunk/src/org/apache/pig/builtin/AVG.java hadoop/pig/trunk/src/org/apache/pig/builtin/BagSize.java hadoop/pig/trunk/src/org/apache/pig/builtin/BinStorage.java hadoop/pig/trunk/src/org/apache/pig/builtin/CONCAT.java hadoop/pig/trunk/src/org/apache/pig/builtin/COUNT.java hadoop/pig/trunk/src/org/apache/pig/builtin/COUNT_STAR.java hadoop/pig/trunk/src/org/apache/pig/builtin/ConstantSize.java hadoop/pig/trunk/src/org/apache/pig/builtin/DIFF.java hadoop/pig/trunk/src/org/apache/pig/builtin/Distinct.java hadoop/pig/trunk/src/org/apache/pig/builtin/DoubleAvg.java hadoop/pig/trunk/src/org/apache/pig/builtin/DoubleMax.java hadoop/pig/trunk/src/org/apache/pig/builtin/DoubleMin.java hadoop/pig/trunk/src/org/apache/pig/builtin/DoubleSum.java hadoop/pig/trunk/src/org/apache/pig/builtin/FloatAvg.java hadoop/pig/trunk/src/org/apache/pig/builtin/FloatMax.java hadoop/pig/trunk/src/org/apache/pig/builtin/FloatMin.java hadoop/pig/trunk/src/org/apache/pig/builtin/FloatSum.java hadoop/pig/trunk/src/org/apache/pig/builtin/IntAvg.java hadoop/pig/trunk/src/org/apache/pig/builtin/IntMax.java hadoop/pig/trunk/src/org/apache/pig/builtin/IntMin.java hadoop/pig/trunk/src/org/apache/pig/builtin/IntSum.java hadoop/pig/trunk/src/org/apache/pig/builtin/IsEmpty.java hadoop/pig/trunk/src/org/apache/pig/builtin/LongAvg.java hadoop/pig/trunk/src/org/apache/pig/builtin/LongMax.java hadoop/pig/trunk/src/org/apache/pig/builtin/LongMin.java hadoop/pig/trunk/src/org/apache/pig/builtin/LongSum.java hadoop/pig/trunk/src/org/apache/pig/builtin/MAX.java hadoop/pig/trunk/src/org/apache/pig/builtin/MIN.java hadoop/pig/trunk/src/org/apache/pig/builtin/MapSize.java hadoop/pig/trunk/src/org/apache/pig/builtin/PigStorage.java hadoop/pig/trunk/src/org/apache/pig/builtin/RANDOM.java hadoop/pig/trunk/src/org/apache/pig/builtin/SIZE.java hadoop/pig/trunk/src/org/apache/pig/builtin/SUM.java hadoop/pig/trunk/src/org/apache/pig/builtin/StringConcat.java hadoop/pig/trunk/src/org/apache/pig/builtin/StringMax.java hadoop/pig/trunk/src/org/apache/pig/builtin/StringMin.java hadoop/pig/trunk/src/org/apache/pig/builtin/StringSize.java hadoop/pig/trunk/src/org/apache/pig/builtin/TOKENIZE.java hadoop/pig/trunk/src/org/apache/pig/builtin/TextLoader.java hadoop/pig/trunk/src/org/apache/pig/builtin/TupleSize.java hadoop/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=964177r1=964176r2=964177view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Wed Jul 14 20:21:26 2010 @@ -100,6 +100,8 @@ PIG-1309: Map-side Cogroup (ashutoshc) BUG FIXES +PIG-1409: Fix up javadocs for org.apache.pig.builtin (gates) + PIG-1490: Make Pig storers work with remote HDFS in secure mode (rding) PIG-1469: DefaultDataBag assumes ArrayList as default List type (azaroth via dvryaboy) Modified: hadoop/pig/trunk/src/org/apache/pig/builtin/ARITY.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/ARITY.java?rev=964177r1=964176r2=964177view=diff == --- hadoop/pig/trunk/src/org/apache/pig/builtin/ARITY.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/builtin/ARITY.java Wed Jul 14 20:21:26 2010 @@ -26,6 +26,11 @@ import org.apache.pig.data.DataType; import org.apache.pig.data.Tuple; import org.apache.pig.impl.logicalLayer.schema.Schema; +/** + * Find the number of fields in a tuple. Expected input is a tuple, + * output is an integer. + * @deprecated Use {...@link SIZE} instead. + */ public class ARITY extends EvalFuncInteger { @Override Modified: hadoop/pig/trunk/src/org/apache/pig/builtin/AVG.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/AVG.java?rev=964177r1=964176r2=964177view=diff == --- hadoop/pig/trunk/src/org/apache/pig/builtin/AVG.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/builtin/AVG.java Wed Jul 14 20:21:26 2010 @@ -38,8 +38,21 @@ import org.apache.pig.backend.executione /** - * Generates the average of the values of the first field of a tuple. This class is Algebraic in - * implemenation, so if possible the execution will be split into a local and global application + * Generates the average of a set of values. This class implements + * {...@link
svn commit: r964182 - /hadoop/pig/trunk/src/org/apache/pig/builtin/package.html
Author: gates Date: Wed Jul 14 20:23:57 2010 New Revision: 964182 URL: http://svn.apache.org/viewvc?rev=964182view=rev Log: PIG-1409: File I forgot to add in the last checkin. Added: hadoop/pig/trunk/src/org/apache/pig/builtin/package.html Added: hadoop/pig/trunk/src/org/apache/pig/builtin/package.html URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/builtin/package.html?rev=964182view=auto == --- hadoop/pig/trunk/src/org/apache/pig/builtin/package.html (added) +++ hadoop/pig/trunk/src/org/apache/pig/builtin/package.html Wed Jul 14 20:23:57 2010 @@ -0,0 +1,11 @@ +html +body + +p +This package contains builtin Pig UDFs. This includes +...@link org.apache.pig.EvalFunc}s, +...@link org.apache.pig.LoadFunc}s and +...@link org.apache.pig.StoreFunc}s. + +/body +/html
svn commit: r954989 - in /hadoop/pig/site: author/src/documentation/content/xdocs/ publish/ publish/skin/images/
Author: gates Date: Tue Jun 15 18:03:50 2010 New Revision: 954989 URL: http://svn.apache.org/viewvc?rev=954989view=rev Log: Moved Owen, Nigel, Pi, and Utkarsh to emeriti. Modified: hadoop/pig/site/author/src/documentation/content/xdocs/whoweare.xml hadoop/pig/site/publish/skin/images/rc-b-l-15-1body-2menu-3menu.png hadoop/pig/site/publish/skin/images/rc-b-r-15-1body-2menu-3menu.png hadoop/pig/site/publish/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png hadoop/pig/site/publish/skin/images/rc-t-r-15-1body-2menu-3menu.png hadoop/pig/site/publish/skin/images/rc-t-r-5-1header-2searchbox-3searchbox.png hadoop/pig/site/publish/skin/images/rc-t-r-5-1header-2tab-selected-3tab-selected.png hadoop/pig/site/publish/skin/images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png hadoop/pig/site/publish/whoweare.html hadoop/pig/site/publish/whoweare.pdf Modified: hadoop/pig/site/author/src/documentation/content/xdocs/whoweare.xml URL: http://svn.apache.org/viewvc/hadoop/pig/site/author/src/documentation/content/xdocs/whoweare.xml?rev=954989r1=954988r2=954989view=diff == --- hadoop/pig/site/author/src/documentation/content/xdocs/whoweare.xml (original) +++ hadoop/pig/site/author/src/documentation/content/xdocs/whoweare.xml Tue Jun 15 18:03:50 2010 @@ -60,14 +60,6 @@ /tr tr -tdnigel/td -tda href=http://people.apache.org/~nigel;Nigel Daley/a/td -tdYahoo!/td -tdQA/td -td-8/td - /tr - - tr tdolga/td tda href=http://people.apache.org/~olga;Olga Natkovich/a/td tdYahoo!/td @@ -76,22 +68,6 @@ /tr tr -tdomalley/td -tda href=http://people.apache.org/~omalley;Owen O'Malley/a/td -tdYahoo!/td -td/td -td-8/td - /tr - - tr -tdpisong/td -tda href=http://people.apache.org/~pisong;Pi Song/a/td -td/td -td/td -td+10/td - /tr - - tr tdpradeepkth/td tda href=http://people.apache.org/~pradeepkth;Pradeep Kamath/a/td tdYahoo!/td @@ -108,14 +84,6 @@ /tr tr -tdutkarsh/td -tda href=http://people.apache.org/~utkarsh;Utkarsh Srivastava/a/td -tdTwitter/td -td/td -td-8/td - /tr - - tr tdyanz/td tda href=http://people.apache.org/~yanz;Yan Zhou/a/td tdYahoo!/td @@ -147,6 +115,14 @@ td-8/td /tr + tr +tddvryaboy/td +tda href=http://squarecog.com;Dmitriy Ryaboy/a/td +tdTwitter/td +td/td +td-8/td + /tr + /table /section @@ -156,7 +132,11 @@ pCommitters who are no longer active on Pig are:/p ul +liNigel Daley/li +liOwen O'Malley/li liChris Olston/li +liPi Song/li +liUtkarsh Srivastava/li /ul /section Modified: hadoop/pig/site/publish/skin/images/rc-b-l-15-1body-2menu-3menu.png URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/skin/images/rc-b-l-15-1body-2menu-3menu.png?rev=954989r1=954988r2=954989view=diff == Binary files - no diff available. Modified: hadoop/pig/site/publish/skin/images/rc-b-r-15-1body-2menu-3menu.png URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/skin/images/rc-b-r-15-1body-2menu-3menu.png?rev=954989r1=954988r2=954989view=diff == Binary files - no diff available. Modified: hadoop/pig/site/publish/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/skin/images/rc-b-r-5-1header-2tab-selected-3tab-selected.png?rev=954989r1=954988r2=954989view=diff == Binary files - no diff available. Modified: hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2searchbox-3searchbox.png?rev=954989r1=954988r2=954989view=diff == Binary files - no diff available. Modified: hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2tab-selected-3tab-selected.png?rev=954989r1=954988r2=954989view=diff == Binary files - no diff available. Modified: hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/skin/images/rc-t-l-5-1header-2tab-unselected-3tab
svn commit: r945807 - in /hadoop/pig/site/publish: whoweare.html whoweare.pdf
Author: gates Date: Tue May 18 18:29:04 2010 New Revision: 945807 URL: http://svn.apache.org/viewvc?rev=945807view=rev Log: Checking in changes to Who We Are for Dmitriy since forrest was giving his Mac fits. Modified: hadoop/pig/site/publish/whoweare.html hadoop/pig/site/publish/whoweare.pdf Modified: hadoop/pig/site/publish/whoweare.html URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/whoweare.html?rev=945807r1=945806r2=945807view=diff == --- hadoop/pig/site/publish/whoweare.html (original) +++ hadoop/pig/site/publish/whoweare.html Tue May 18 18:29:04 2010 @@ -359,12 +359,23 @@ document.write(Last Published: + docu /tr + +tr + +td colspan=1 rowspan=1dvryaboy/td +td colspan=1 rowspan=1a href=http://squarecog.com;Dmitriy Ryaboy/a/td +td colspan=1 rowspan=1Twitter/td +td colspan=1 rowspan=1/td +td colspan=1 rowspan=1-8/td + +/tr + /table /div -a name=N101EA/aa name=Emeriti/a +a name=N10207/aa name=Emeriti/a h2 class=h3Emeriti/h2 div class=section pCommitters who are no longer active on Pig are:/p Modified: hadoop/pig/site/publish/whoweare.pdf URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/whoweare.pdf?rev=945807r1=945806r2=945807view=diff == --- hadoop/pig/site/publish/whoweare.pdf (original) +++ hadoop/pig/site/publish/whoweare.pdf Tue May 18 18:29:04 2010 @@ -47,10 +47,10 @@ endobj endobj 12 0 obj - /Length 4052 /Filter [ /ASCII85Decode /FlateDecode ] + /Length 4250 /Filter [ /ASCII85Decode /FlateDecode ] stream -GatV$Ar:T(2.C;lFOb$+Qg%_;-37:dPK$o[g?h'P[QLs0%lYtNTAPA,uP@)OZMZad7\P]U_9Jr]cO.8$?j6?G7(M/X!SF+9HCl`2j7eDE)d-0c=#ni4mFL-o)o.5lnn*^%r3w...@cn;`kN[g1g1o?5\=S#cRg\kW.t=1W9U`00_jBPY9%e\Ndtmg*2hlg?FrN[irFMD^c=W/)CGPSLMs)8m/t...@l8yj[78q13eaat@nL3uW'$-h,!G5=N,QRi7o(28`'gEQ5*=NTs8SV!=R%Id\kk9!F,RZ?VZ*[\SdP2tDl$=\^44C*6YG[A*WUGV5b*G.0/Op_ll,`,UNbj30G3pt0sf[e[Z?*^Zau9Aad16#q4CSPprPe'm$4gaHR:9V*`?16J?W)-'ahPt?j8n$Dj1=$thog(hfL']...@s1l#)b,ek/k_k%m01X0MY6m/NO^,rE[Q6]82)\(Va\ubO-Z[ijV[CV/3YeBF768l]jn]]LOBncDY09V9]RhC[!%$3N#t?^4IDe.IS.86ZC-X/o!:E;k6dW]Qh,*lGn+2I7;bw5...@s/m*?,i9qXcsL'j...@^`%om;3ka4[dB,:-W!6r23Q-Z3/]\;gbmb)+3C/(`($r^Q96*O7X:i=;ap39=-BRb3'%...@.b`q\'Z^H=X5hQ[cNWmh)KR5NO!'r...@r@$\-+6DX(pkMWdO,N$pKU7H6XmoqRSfDgOFPu)+PEhaQ*U8sKt+QT+DCm#5\A=+]eK/f;buW+];jWlITlDi*fha7,Ddb(b*f!!ls3c...@97'NL7/Ih4D?rR$5aVOCe9TdsZtn7sZKMf187IMTZar?'[NV$Jr6u8QtPAAV[YWIIM%gVCWd1tK2eh\kNA-;N9./?$.j9DO:7sf\1j,Tu_k-mKLuV-RS'!0dk/#-'XKWk?]'x...@0o4el,;ZCj%p=c...@gq[ C,F7Cn]?8$tCFHUa-6g3;C$85d/`d/+3tAa+f^-6-DS)e(*q8-...@#%jbuacvkh...@#6k+x)/_\Q8fmA4c4ER''aJjJ5a0*8_u'm...@hm`m+kke17G\e5=p-$n-eGjY+ajKT%3Chfl@:FZaFeR%jn59-KrQ+-d.;lP`S2o?5eB'#sfej1etq$3uda...@q4'+D[ldrJK/EXOsK9n?E,Gp/C^)H.UYM43m^lYl`u3gGc/c])$n:^(Rg(WU76#-Xd)KF*/SQ*!B4/#RA:qZ?t_fHS)6O7h+49fe7l6pH:p8Eszlnhs0^...@gultgwn8[$hsl2]ao#d\0hj%YcPi'\F'U4AmGc?Cqe^u9V99(i[S548,J],eD:0@;H8:G/(3Sh[q*j`+wm...@8l2i!cq^/ek1AnAQHHot8-A^J'd...@pdbmh)d4%Mbp)f:KJTh80R*!524\)GL'hg;hr.bJ$)4m84Q@,p80(bmPu39qpd5?1nNm91,;D(P72'M?RqEF[\BF-VQ;bKu?b9_]CgboJ1)6Y!Z*g^H)kAI7P73Xks1hGD9J1AO8BN)@^u15I+bB_kAseQ,AkP9Xp4AseppK'/`Tu[9Rr6cuNE@k'Ub!EEN[utZ(8jG4E'PKa!Coj3Cfb6'''N#D'eSY_YAWf4C=.,Z0ZkIX.0tOU+!?5aMIh\EN[][!.uAQ%cLIi)l^\u0Fgd.F9b2;I/t^0OuDB3Ca[Wt/EA\sbLthobPW_de,,$0-c)%A:6bSQo[?K_,jA2L'1N.g4gc,S+SV9;=...@69[ju5:)4WlUl[n?5eA\#sfejoM'rN3udy...@q0*1$QmXPc-i=O6mQ0kjb2+A)ul4...@[t2_NtEUNJej(\W%f9DPS0')5a-eD@;/lMn;9TSY8lLSsVd?^!BO*TdWTan!nmG\RL;YturigLlp$QTnBYK#G`3J2'6H-o-eJ ,D+ajKT%3Ctj!eE2%aFj*Pjn5A-KrQ+-d.;mP`S3?5eBg#sfejZq`t%3udq...@q0+$QmXR],9:CImdkjb2+AFUGm(RtO:P\mpst$eOOei*SAoIQ8kM@:!),.cDJZ,L4O.%,!Pr2Psin...@r9n,9#7C^*HAZH77s:*gd*HY+detlTcqE9Gg6F=NAbep\gG6B6-7Wb;VN:Z$4K#F4BZ)CH]/\2#B\J#Rh^Z)d...@fqg-'i_EJ-pVb%^;G,mSj5fVh.,%%hc2bo...@9:H5suu-fZj;/FTV(l^+r%cbe_H85,\a+igV-KVG=He-TM,iSE=FXPFFCT*^adLS$)%]qd^/t+CqSB,ZAe+ui8B%`kW.`)Ye=4LSFCT,6...@*;^*h,U(!hBk_l.O%8K\pK4i_Cb6+n...Z;$HpP-nPpq$bipnB$7060`Kse\O+]+oTkD`Qj4+JT$GjpZeUIIZ1U0gu1T@JPf:t(+eIam$sS%Uh9:Me'l...@h`i;t(_bcho+...@nelkrj$i)$:Dubq28)NWQJ)2/?S(U@Eb/G'4]i!i6eZ3Y[KP?IB2...@somihaappt^=a%+qjfu;A%:,1\NG?f52E9$r4BscGV(pfj+eYiWTd8iV3i_nG4i7iFr9OGq]I']0R,2+F=t,+QkoI:]NbIf:t(5l`SsXK`hl?cqDCAi2jd_kOaLM8jT0;AYl$2CdJRVi3t`iJ%u!T^Ls-d[bmBan2LSls2bQ3M#758'-jD($sbffko*l`g`a%...@ezk^5/D2!R.Hs!VSq:4LO'1]mc13oC#4BQ*fGVM3*fgcSr`!`%%_XSQNi:UMLdk_Q'4...@pg(Z`Q-.$!g'M(7OTdW524W[NCj?Q`S=Su4f^X#/q+...@09=q6$pdaiks!cg/FOZXC;384$__SXmmJh^Cf\:hg\KpYet/W:AJMt,%?7[?j!/W.WNK(kN ;7f!t\'WJ%bC5o8,P,M_i*hF'M820(+qMu*(IW%4R%Pfs)7V/f?KfMoB9a%HR5e59K4;iMU]f;Ls#+1igJUK!l(:PlZsueJs,+#sWpN6MN[UQU6Wa6mnTkY!%%HFJ+L+qtiIYmID*M'0/XjaPC;3[;q1'%M,0USu9Uj^Uh4$ZmXn-qrI'bn:'XLSbp-!%]B!eid)Ui=Leq+C3;3je#_mcHGW'g...@3j[p(.Cg[R:CI?p/#Nq/BE+^A9e+qWu59B4Pt9Mq$acBSu7-m=5g]eff8rK-eBq0o(G!8#!Zc$PhZ_I[Be-IAc,(QtoKeKW@uVra^dT:m5F'?UXt34Y;OB8E7ED:uaKZ+XNe?+!d8uqHjb7BtrpDi'HA$r5p8TZr*u2orT5^nt-ZZ=-\tc),m...@bh^gbbu2pe$yrmi,c]=i0uku...@l07o
svn commit: r934649 [17/24] - in /hadoop/pig/trunk/contrib: ./ owl/ owl/bin/ owl/ci/ owl/ci/test_results/ owl/ci/test_scripts/ owl/docs/ owl/ivy/ owl/java/ owl/java/lib/ owl/java/main/ owl/java/main/M
Added: hadoop/pig/trunk/contrib/owl/java/test/org/apache/hadoop/owl/client/MultiplePartitionIntervalTest.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/owl/java/test/org/apache/hadoop/owl/client/MultiplePartitionIntervalTest.java?rev=934649view=auto == --- hadoop/pig/trunk/contrib/owl/java/test/org/apache/hadoop/owl/client/MultiplePartitionIntervalTest.java (added) +++ hadoop/pig/trunk/contrib/owl/java/test/org/apache/hadoop/owl/client/MultiplePartitionIntervalTest.java Thu Apr 15 23:56:44 2010 @@ -0,0 +1,750 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.owl.client; + +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.List; +import java.util.Date; +import org.apache.hadoop.owl.OwlTestCase; +import org.apache.hadoop.owl.common.OwlException; +import org.apache.hadoop.owl.common.OwlUtil; +import org.apache.hadoop.owl.client.CompleteSanityTest; +import org.apache.hadoop.owl.protocol.OwlDataElement; +import org.apache.hadoop.owl.protocol.OwlKeyValue; +import org.apache.hadoop.owl.protocol.OwlObject; +import org.apache.hadoop.owl.protocol.OwlPartitionKey; +import org.apache.hadoop.owl.protocol.OwlPartitionProperty; +import org.apache.hadoop.owl.protocol.OwlPropertyKey; +import org.apache.hadoop.owl.protocol.OwlResultObject; +import org.apache.hadoop.owl.protocol.OwlTable; +import org.junit.Before; +import org.junit.Test; + +public class MultiplePartitionIntervalTest extends OwlTestCase { +// private int counter = 0; +private static OwlClient client; +private CompleteSanityTest csit; +private MultiplePartitionTest mpit; + +public MultiplePartitionIntervalTest() { +client = new OwlClient(getUri()); +this.csit = new CompleteSanityTest(); +this.mpit = new MultiplePartitionTest(); +} + +@Before +public void testInitialize() { +} + +public void createMultiplePartitionedIntervalOwlTable(String owlTableName, +String databaseName, String propertyKeyName1, String type1, +String propertyKeyName2, String type2, String ptnType, +String partitionKey1, String partitionKey1Type1, +String partitionKey2, String partitionKey2Type2, +String part1PropertyKey1, String part1PropertyKey1Type1, +String part1PropertyKey2, String part1PropertyKey2Type2, +String part2PropertyKey1, String part2PropertyKey1Type1, +String part2PropertyKey2, String part2PropertyKey2Type2) +throws OwlException { + +System.out.println(Owl Table name + owlTableName ++ within owldatabase + databaseName); + +String testCmd = create owltable type basic + owlTableName ++ within owldatabase + databaseName + define property key ++ propertyKeyName1 + : + type1 + , + propertyKeyName2 ++ : + type2 + partitioned by + ptnType ++ with partition key + partitionKey1 ++ define property key + part1PropertyKey1 + : ++ part1PropertyKey1Type1 + , + part1PropertyKey2 + : ++ part1PropertyKey2Type2 ++ partitioned by LIST with partition key + partitionKey2 ++ : + partitionKey2Type2 + define property key ++ part2PropertyKey1 + : + part2PropertyKey1Type1 + , ++ part2PropertyKey2 + : + part2PropertyKey2Type2 ++ schema \f1:int\; + +System.out.println(testCmd); +client.execute(testCmd); +mpit.verifyCreateMultiplePartitionedOwlTable(owlTableName, databaseName, +propertyKeyName1, type1, propertyKeyName2, type2, +partitionKey1, partitionKey1Type1, partitionKey2, +partitionKey2Type2, part1PropertyKey1, part1PropertyKey1Type1, +part1PropertyKey2, part1PropertyKey2Type2, part2PropertyKey1, +part2PropertyKey1Type1, part2PropertyKey2, +part2PropertyKey2Type2); +} + +public void publishDataElementToMultiplePartitionedIntervalOwlTable( +String owlTableName, String databaseName, String propertyKeyName1, +int
svn commit: r934649 [24/24] - in /hadoop/pig/trunk/contrib: ./ owl/ owl/bin/ owl/ci/ owl/ci/test_results/ owl/ci/test_scripts/ owl/docs/ owl/ivy/ owl/java/ owl/java/lib/ owl/java/main/ owl/java/main/M
Added: hadoop/pig/trunk/contrib/owl/setup/oracle/orm.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/owl/setup/oracle/orm.xml?rev=934649view=auto == --- hadoop/pig/trunk/contrib/owl/setup/oracle/orm.xml (added) +++ hadoop/pig/trunk/contrib/owl/setup/oracle/orm.xml Thu Apr 15 23:56:44 2010 @@ -0,0 +1,464 @@ +?xml version=1.0 encoding=UTF-8 ? + +!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the License); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an AS IS BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +-- + !-- This orm is for oracle -- + +entity-mappings xmlns=http://java.sun.com/xml/ns/persistence/orm; +xmlns:xsi=http://www.w3.org/2001/XMLSchema-instance; +xsi:schemaLocation=http://java.sun.com/xml/ns/persistence/orm http://java.sun.com/xml/ns/persistence/orm_2_0.xsd; +version=2.0 +descriptionJPA Mapping file for Owl with JPA/description +packageorg.apache.hadoop.owl.orm/package + +entity class=org.apache.hadoop.owl.entity.DatabaseEntity name=DatabaseEntity +table name=owl_database/ +attributes +id name=id +column name=odb_id/ +generated-value strategy=AUTO/ +/id +basic name=name +column name=odb_name length=255/ +/basic +basic name=description +column name=odb_description length=255/ +/basic +basic name=owner +column name=odb_owner length=255/ +/basic +basic name=location +column name=odb_location length=750/ +/basic +basic name=createdAt +column name=odb_createdat/ +/basic +basic name=lastModifiedAt +column name=odb_lastmodified/ +/basic +basic name=version +column name=odb_version/ +/basic +/attributes +/entity + +entity class=org.apache.hadoop.owl.entity.OwlTableEntity name=OwlTableEntity +table name=owl_table/ +attributes +id name=id +column name=ot_id/ +generated-value strategy=AUTO/ +/id +basic name=databaseId +column name=ot_database_id/ +/basic +basic name=name +column name=ot_name length=255/ +/basic +basic name=description +column name=ot_description length=255/ +/basic +basic name=location +column name=ot_location length=750/ +/basic +basic name=owner +column name=ot_owner length=255/ +/basic +basic name=createdAt +column name=ot_createdat/ +/basic +basic name=lastModifiedAt +column name=ot_lastmodified/ +/basic +basic name=version +column name=ot_version/ +/basic +basic name=schemaId +column name=ot_schemaid/ +/basic +basic name=loader +column name=ot_loader/ +/basic +one-to-many name=partitionKeys target-entity=org.apache.hadoop.owl.entity.PartitionKeyEntity mapped-by=owlTable +join-column name=pak_owltable_id/ +cascade +cascade-all/ +/cascade +/one-to-many + +one-to-many name=propertyKeys target-entity=org.apache.hadoop.owl.entity.PropertyKeyEntity mapped-by=owlTable +join-column name=prk_owltable_id/ +cascade +cascade-all/ +/cascade +/one-to-many + +one-to-many name=keyValues target-entity=org.apache.hadoop.owl.entity.OwlTableKeyValueEntity mapped-by=owlTable +join-column name=otkv_owltable_id/ +cascade +cascade-all/ +/cascade +/one-to-many + +/attributes +/entity + +entity class=org.apache.hadoop.owl.entity.PartitionEntity name=PartitionEntity +table name=owl_partition/ +attributes +id name=id
svn commit: r933437 - in /hadoop/pig/branches/branch-0.5: CHANGES.txt src/docs/src/documentation/content/xdocs/site.xml
Author: gates Date: Mon Apr 12 22:23:41 2010 New Revision: 933437 URL: http://svn.apache.org/viewvc?rev=933437view=rev Log: PIG-1364: Public javadoc on apache site still on 0.2, needs to be updated for each version release. Modified: hadoop/pig/branches/branch-0.5/CHANGES.txt hadoop/pig/branches/branch-0.5/src/docs/src/documentation/content/xdocs/site.xml Modified: hadoop/pig/branches/branch-0.5/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.5/CHANGES.txt?rev=933437r1=933436r2=933437view=diff == --- hadoop/pig/branches/branch-0.5/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.5/CHANGES.txt Mon Apr 12 22:23:41 2010 @@ -18,7 +18,7 @@ Pig Change Log -Release 0.5.0 - Unreleased +Release 0.5.0 - 29 October 2009 INCOMPATIBLE CHANGES @@ -30,6 +30,8 @@ OPTIMIZATIONS BUG FIXES +PIG-1364: Public javadoc on apache site still on 0.2, needs to be updated for each version release (gates) + PIG-963: Join in local mode matches null keys (pradeepkth) PIG-660: Integration with Hadoop 20 (sms via olgan) PIG-956: 10 minute commit tests (olgan) Modified: hadoop/pig/branches/branch-0.5/src/docs/src/documentation/content/xdocs/site.xml URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.5/src/docs/src/documentation/content/xdocs/site.xml?rev=933437r1=933436r2=933437view=diff == --- hadoop/pig/branches/branch-0.5/src/docs/src/documentation/content/xdocs/site.xml (original) +++ hadoop/pig/branches/branch-0.5/src/docs/src/documentation/content/xdocs/site.xml Mon Apr 12 22:23:41 2010 @@ -58,7 +58,7 @@ See http://forrest.apache.org/docs/linki /docs external-refs -api href=http://hadoop.apache.org/pig/javadoc/docs/api/; / +api href=http://hadoop.apache.org/pig/docs/r0.5.0/api/; / wiki href=http://wiki.apache.org/pig/; / faq href=http://wiki.apache.org/pig/FAQ; / relnotes href=http://hadoop.apache.org/pig/releases.html; /
svn commit: r933442 - in /hadoop/pig/branches/branch-0.4: CHANGES.txt src/docs/src/documentation/content/xdocs/site.xml
Author: gates Date: Mon Apr 12 23:10:31 2010 New Revision: 933442 URL: http://svn.apache.org/viewvc?rev=933442view=rev Log: PIG-1364: Public javadoc on apache site still on 0.2, needs to be updated for each version release. Modified: hadoop/pig/branches/branch-0.4/CHANGES.txt hadoop/pig/branches/branch-0.4/src/docs/src/documentation/content/xdocs/site.xml Modified: hadoop/pig/branches/branch-0.4/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.4/CHANGES.txt?rev=933442r1=933441r2=933442view=diff == --- hadoop/pig/branches/branch-0.4/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.4/CHANGES.txt Mon Apr 12 23:10:31 2010 @@ -73,6 +73,8 @@ PIG-792: skew join implementation (srira BUG FIXES +PIG-1364: Public javadoc on apache site still on 0.2, needs to be updated for each version release (gates) + PIG-964: Handling null in skewed join (sriranjan via olgan) PIG-962: Skewed join creates 3 map reduce jobs (sriranjan via olgan) Modified: hadoop/pig/branches/branch-0.4/src/docs/src/documentation/content/xdocs/site.xml URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.4/src/docs/src/documentation/content/xdocs/site.xml?rev=933442r1=933441r2=933442view=diff == --- hadoop/pig/branches/branch-0.4/src/docs/src/documentation/content/xdocs/site.xml (original) +++ hadoop/pig/branches/branch-0.4/src/docs/src/documentation/content/xdocs/site.xml Mon Apr 12 23:10:31 2010 @@ -58,7 +58,7 @@ See http://forrest.apache.org/docs/linki /docs external-refs -api href=http://hadoop.apache.org/pig/javadoc/docs/api/; / +api href=http://hadoop.apache.org/pig/docs/r0.4.0/api/; / wiki href=http://wiki.apache.org/pig/; / faq href=http://wiki.apache.org/pig/FAQ; / relnotes href=http://hadoop.apache.org/pig/releases.html; /
svn commit: r933445 - in /hadoop/pig/site/publish/docs: r0.4.0/ r0.4.0/api/ r0.4.0/api/org/apache/pig/ r0.4.0/api/org/apache/pig/backend/ r0.4.0/api/org/apache/pig/backend/class-use/ r0.4.0/api/org/ap
Author: gates Date: Mon Apr 12 23:27:13 2010 New Revision: 933445 URL: http://svn.apache.org/viewvc?rev=933445view=rev Log: PIG-1364: Public javadoc on apache site still on 0.2, needs to be updated for each version release. [This commit notification would consist of 92 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.]
svn commit: r929330 [2/2] - in /hadoop/pig/branches/branch-0.7/contrib: ./ piggybank/java/src/main/java/org/apache/pig/piggybank/evaluation/datetime/convert/ piggybank/java/src/main/java/org/apache/pi
Added: hadoop/pig/branches/branch-0.7/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/datetime/convert/TestConvertDateTime.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.7/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/datetime/convert/TestConvertDateTime.java?rev=929330view=auto == --- hadoop/pig/branches/branch-0.7/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/datetime/convert/TestConvertDateTime.java (added) +++ hadoop/pig/branches/branch-0.7/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/datetime/convert/TestConvertDateTime.java Tue Mar 30 22:53:20 2010 @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.piggybank.test.evaluation.datetime.convert; + +import org.apache.pig.data.Tuple; +import org.apache.pig.data.TupleFactory; +import org.apache.pig.piggybank.evaluation.datetime.convert.*; +import org.junit.Test; + +import junit.framework.TestCase; + +public class TestConvertDateTime extends TestCase { +@Test +public void testUnixToISO() throws Exception { + +// Verify that (long) unix datetimes convert to ISO datetimes +Tuple t1 = TupleFactory.getInstance().newTuple(1); +t1.set(0, 1231290421000L); + +UnixToISO func = new UnixToISO(); +String iso = func.exec(t1); + +assertTrue(iso.equals(2009-01-07T01:07:01.000Z)); +} + +@Test +public void testISOToUnix() throws Exception { + +// Verify that ISO string datetimes convert to Unix (long) datetimes +Tuple t2 = TupleFactory.getInstance().newTuple(1); +t2.set(0, 2009-01-07T01:07:01.000Z); +ISOToUnix func2 = new ISOToUnix(); +Long unix = func2.exec(t2); + +assertTrue(unix == 1231290421000L); + +} + +@Test +public void testCustomFormatToISO() throws Exception { + +Tuple t = TupleFactory.getInstance().newTuple(2); +t.set(0, 10/10/2010); +t.set(1, dd/MM/); +CustomFormatToISO func = new CustomFormatToISO(); +String iso = func.exec(t); + +assertTrue(iso.equals(2010-10-10T00:00:00.000Z)); +} +} Added: hadoop/pig/branches/branch-0.7/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/datetime/diff/TestDiffDateTime.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.7/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/datetime/diff/TestDiffDateTime.java?rev=929330view=auto == --- hadoop/pig/branches/branch-0.7/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/datetime/diff/TestDiffDateTime.java (added) +++ hadoop/pig/branches/branch-0.7/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/evaluation/datetime/diff/TestDiffDateTime.java Tue Mar 30 22:53:20 2010 @@ -0,0 +1,104 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.piggybank.test.evaluation.datetime.diff; + +import org.apache.pig.data.Tuple; +import org.apache.pig.data.TupleFactory; +import org.apache.pig.piggybank.evaluation.datetime.diff.*; +import org.junit.Assert; +import org.junit.Test; + +import junit.framework.TestCase; + +public class
svn commit: r924347 - /hadoop/pig/trunk/CHANGES.txt
Author: gates Date: Wed Mar 17 16:14:33 2010 New Revision: 924347 URL: http://svn.apache.org/viewvc?rev=924347view=rev Log: Updated CHANGES.txt to note that 0.6.0 has been released. Modified: hadoop/pig/trunk/CHANGES.txt Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=924347r1=924346r2=924347view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Wed Mar 17 16:14:33 2010 @@ -288,7 +288,7 @@ PIG-1255: Tiny code cleanup for serializ PIG-613: Casting elements inside a tuple does not take effect (daijy) -Release 0.6.0 - Unreleased +Release 0.6.0 INCOMPATIBLE CHANGES
svn commit: r924355 - in /hadoop/pig/trunk/contrib: CHANGES.txt piggybank/java/src/main/java/org/apache/pig/piggybank/storage/XMLLoader.java piggybank/java/src/test/java/org/apache/pig/piggybank/test/
Author: gates Date: Wed Mar 17 16:26:59 2010 New Revision: 924355 URL: http://svn.apache.org/viewvc?rev=924355view=rev Log: PIG-1284 Added XMLLoader to piggybank. Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/XMLLoader.java hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestXMLLoader.java Modified: hadoop/pig/trunk/contrib/CHANGES.txt Modified: hadoop/pig/trunk/contrib/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/CHANGES.txt?rev=924355r1=924354r2=924355view=diff == --- hadoop/pig/trunk/contrib/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/CHANGES.txt Wed Mar 17 16:26:59 2010 @@ -1,10 +1,111 @@ -PIG-1126: updated fieldsToRead function (olgan) +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +Pig Change Log + +Trunk (unreleased changes) + +INCOMPATIBLE CHANGES + +IMPROVEMENTS + +PIG-1284 Added XMLLoader to piggybank (aloknsingh via gates) + +OPTIMIZATIONS + +BUG FIXES + +Release 0.6.0 + +INCOMPATIBLE CHANGES + +PIG-1126: updated fieldsToRead function for piggybank loaders (olgan) + +IMPROVEMENTS + PIG-1015: [piggybank] DateExtractor should take into account timezones (dryaboy via olgan) -PIG-911: Added SequenceFileLoader (dryaboy via gates) + +OPTIMIZATIONS + +BUG FIXES + +Release 0.5.0 - Released + +INCOMPATIBLE CHANGES + +IMPROVEMENTS + +PIG-911: Added SequenceFileLoader to piggybank (dryaboy via gates) + +OPTIMIZATIONS + +BUG FIXES + +Release 0.4.0 - Released + +INCOMPATIBLE CHANGES + +IMPROVEMENTS + PIG-885: New UDFs for piggybank (Bin, Decode, LookupInFiles, RegexExtract, RegexMatch, HashFVN, DiffDate) (daijy) -PIG-868: added strin manipulation functions (bennies via olgan) -PIG-273: addition of Top and SearchQuery UDFs (ankur via olgan) + +PIG-868: added strin manipulation functions to piggybank (bennies via olgan) + +OPTIMIZATIONS + +BUG FIXES + +Release 0.3.0 - Released + +INCOMPATIBLE CHANGES + +IMPROVEMENTS + +PIG-732: addition of Top and SearchQuery UDFs to piggybank (ankur via olgan) + +OPTIMIZATIONS + +BUG FIXES + +Release 0.2.0 - Released + +INCOMPATIBLE CHANGES + +IMPROVEMENTS + +OPTIMIZATIONS + +BUG FIXES + +Release 0.1.0 - Released + +INCOMPATIBLE CHANGES + +IMPROVEMENTS + PIG-246: created UDF repository (olgan) -PIG-245: UDF wrappers for Java Math functions (ajaygarg via olgan) -PIG-277: UDF for computing correlation and covariance between data sets (ajaygarg via olgan) + +PIG-245: UDF wrappers for Java Math functions added to piggybank (ajaygarg via olgan) + +PIG-277: UDF for computing correlation and covariance between data sets added to piggybank (ajaygarg via olgan) + +OPTIMIZATIONS + +BUG FIXES + Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/XMLLoader.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/XMLLoader.java?rev=924355view=auto == --- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/XMLLoader.java (added) +++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/XMLLoader.java Wed Mar 17 16:26:59 2010 @@ -0,0 +1,610 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License
svn commit: r924558 - in /hadoop/pig/trunk: CHANGES.txt bin/pig
Author: gates Date: Wed Mar 17 23:41:29 2010 New Revision: 924558 URL: http://svn.apache.org/viewvc?rev=924558view=rev Log: PIG-1293: pig wrapper script tends to fail if pig is in the path and PIG_HOME isn't set. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/bin/pig Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=924558r1=924557r2=924558view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Wed Mar 17 23:41:29 2010 @@ -157,6 +157,9 @@ OPTIMIZATIONS BUG FIXES +PIG-1293: pig wrapper script tends to fail if pig is in the path and PIG_HOME +isn't set (aw via gates) + PIG-1272: Column pruner causes wrong results (daijy) PIG-1275: empty bag in PigStorage read as null (daijy) Modified: hadoop/pig/trunk/bin/pig URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/bin/pig?rev=924558r1=924557r2=924558view=diff == --- hadoop/pig/trunk/bin/pig (original) +++ hadoop/pig/trunk/bin/pig Wed Mar 17 23:41:29 2010 @@ -52,7 +52,7 @@ for f in $@; do done # resolve links - $0 may be a softlink -this=$0 +this=${BASH_SOURCE-$0} while [ -h $this ]; do ls=`ls -ld $this` link=`expr $ls : '.*- \(.*\)$'`
svn commit: r919634 [3/3] - in /hadoop/pig/trunk: src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/ src/org/apache/pig/experimental/logical/ src/org/apache/pig/experimental/logica
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestExperimentalLogToPhyTranslationVisitor.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestExperimentalLogToPhyTranslationVisitor.java?rev=919634r1=919633r2=919634view=diff == --- hadoop/pig/trunk/test/org/apache/pig/test/TestExperimentalLogToPhyTranslationVisitor.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestExperimentalLogToPhyTranslationVisitor.java Fri Mar 5 21:55:19 2010 @@ -17,14 +17,22 @@ */ package org.apache.pig.test; +import java.io.ByteArrayOutputStream; import java.io.IOException; +import java.io.PrintStream; import java.util.List; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.Add; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.Divide; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.EqualToExpr; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.GreaterThanExpr; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.LessThanExpr; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.Mod; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.Multiply; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POCast; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.PONegative; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.POProject; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.Subtract; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POFilter; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POForEach; @@ -36,15 +44,29 @@ import org.apache.pig.backend.hadoop.executionengine.physicalLayer.expressionOperators.ConstantExpression; import org.apache.pig.data.DataType; import org.apache.pig.experimental.logical.LogicalPlanMigrationVistor; +import org.apache.pig.experimental.logical.expression.AddExpression; +import org.apache.pig.experimental.logical.expression.DivideExpression; +import org.apache.pig.experimental.logical.expression.IsNullExpression; import org.apache.pig.experimental.logical.expression.LogicalExpression; +import org.apache.pig.experimental.logical.expression.LogicalExpressionPlan; +import org.apache.pig.experimental.logical.expression.ModExpression; +import org.apache.pig.experimental.logical.expression.MultiplyExpression; +import org.apache.pig.experimental.logical.expression.NegativeExpression; +import org.apache.pig.experimental.logical.expression.NotExpression; +import org.apache.pig.experimental.logical.expression.ProjectExpression; +import org.apache.pig.experimental.logical.expression.SubtractExpression; +import org.apache.pig.experimental.logical.optimizer.PlanPrinter; import org.apache.pig.experimental.logical.optimizer.UidStamper; +import org.apache.pig.experimental.logical.relational.LOFilter; +import org.apache.pig.experimental.logical.relational.LOForEach; +import org.apache.pig.experimental.logical.relational.LOGenerate; +import org.apache.pig.experimental.logical.relational.LOLoad; import org.apache.pig.experimental.logical.relational.LogToPhyTranslationVisitor; import org.apache.pig.experimental.logical.relational.LogicalRelationalOperator; import org.apache.pig.experimental.logical.relational.LogicalSchema; import org.apache.pig.experimental.logical.relational.LogicalSchema.LogicalFieldSchema; -import org.apache.pig.experimental.plan.Operator; import org.apache.pig.experimental.plan.OperatorPlan; -import org.apache.pig.experimental.plan.PlanVisitor; +import org.apache.pig.impl.logicalLayer.LOIsNull; import org.apache.pig.impl.logicalLayer.LogicalPlan; import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.test.utils.LogicalPlanTester; @@ -466,7 +488,6 @@ PhysicalPlan phyPlan = translatePlan(newLogicalPlan); assertEquals(phyPlan.size(), 3); -POLoad load = (POLoad)phyPlan.getRoots().get(0); assertEquals(phyPlan.getLeaves().get(0).getClass(), POStore.class); POForEach foreach = (POForEach)phyPlan.getSuccessors(phyPlan.getRoots().get(0)).get(0); @@ -476,13 +497,13 @@ assertEquals(inner.size(), 1); POProject prj = (POProject)inner.getRoots().get(0); assertEquals(prj.getColumn(), 0); -assertEquals(prj.getInputs().get(0), load); - +
svn commit: r919264 - in /hadoop/pig/trunk: CHANGES.txt build.xml conf/pig.properties
Author: gates Date: Fri Mar 5 01:34:16 2010 New Revision: 919264 URL: http://svn.apache.org/viewvc?rev=919264view=rev Log: PIG-1053: Put pig.properties back into release distribution. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/build.xml hadoop/pig/trunk/conf/pig.properties Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=919264r1=919263r2=919264view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Fri Mar 5 01:34:16 2010 @@ -143,6 +143,8 @@ BUG FIXES +PIG-1053: Put pig.properties back into release distribution (gates). + PIG-1273: Skewed join throws error (rding) PIG-1267: Problems with partition filter optimizer (rding) Modified: hadoop/pig/trunk/build.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/build.xml?rev=919264r1=919263r2=919264view=diff == --- hadoop/pig/trunk/build.xml (original) +++ hadoop/pig/trunk/build.xml Fri Mar 5 01:34:16 2010 @@ -569,6 +569,7 @@ target name=package depends=docs, api-report description=Create a Pig release mkdir dir=${dist.dir} / mkdir dir=${dist.dir}/lib / +mkdir dir=${dist.dir}/conf / mkdir dir=${dist.dir}/scripts / mkdir dir=${dist.dir}/docs / mkdir dir=${dist.dir}/docs/api / @@ -596,6 +597,8 @@ fileset dir=${build.docs} / /copy + copy todir=${dist.dir}/conf file=conf/pig.properties/ + copy todir=${dist.dir}/src includeEmptyDirs=true fileset dir=${src.dir} / /copy Modified: hadoop/pig/trunk/conf/pig.properties URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/conf/pig.properties?rev=919264r1=919263r2=919264view=diff == --- hadoop/pig/trunk/conf/pig.properties (original) +++ hadoop/pig/trunk/conf/pig.properties Fri Mar 5 01:34:16 2010 @@ -23,7 +23,7 @@ verbose=false #exectype local|mapreduce, mapreduce is default -#exectype=mapreduce +exectype=mapreduce # hod realted properties #ssh.gateway #hod.expect.root @@ -32,31 +32,11 @@ #hod.config.dir #hod.param +#pig.logfile= + #Do not spill temp files smaller than this size (bytes) pig.spill.size.threshold=500 #EXPERIMENT: Activate garbage collection when spilling a file bigger than this size (bytes) #This should help reduce the number of files being spilled. pig.spill.gc.activation.size=4000 - - -## -# Everything below this line is Yahoo specific. Note that I've made -# (almost) no changes to the lines above to make merging in from Apache -# easier. Any values I don't want from above I override below. -# -# This file is configured for use with HOD on the production clusters. If you -# want to run pig with a static cluster you will need to remove everything -# below this line and set the cluster value (above) to the -# hostname and port of your job tracker. - -exectype=mapreduce - -hod.config.dir=/export/crawlspace/kryptonite/hod/current/conf -hod.server=local - -cluster.domain=inktomisearch.com - -log.file= - -yinst.cluster=kryptonite
svn commit: r910628 - in /hadoop/pig/branches/branch-0.6: CHANGES.txt RELEASE_NOTES.txt build.xml
Author: gates Date: Tue Feb 16 18:05:02 2010 New Revision: 910628 URL: http://svn.apache.org/viewvc?rev=910628view=rev Log: Preparing for release 0.6.0. Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt hadoop/pig/branches/branch-0.6/RELEASE_NOTES.txt hadoop/pig/branches/branch-0.6/build.xml Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/CHANGES.txt?rev=910628r1=910627r2=910628view=diff == --- hadoop/pig/branches/branch-0.6/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.6/CHANGES.txt Tue Feb 16 18:05:02 2010 @@ -18,7 +18,7 @@ Pig Change Log -Release 0.6.0 - 2010-01-08 +Release 0.6.0 - 2010-02-10 INCOMPATIBLE CHANGES Modified: hadoop/pig/branches/branch-0.6/RELEASE_NOTES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/RELEASE_NOTES.txt?rev=910628r1=910627r2=910628view=diff == --- hadoop/pig/branches/branch-0.6/RELEASE_NOTES.txt (original) +++ hadoop/pig/branches/branch-0.6/RELEASE_NOTES.txt Tue Feb 16 18:05:02 2010 @@ -1,10 +1,26 @@ -These notes are for Pig 0.3.0 release. +These notes are for Pig 0.6.0 release. Highlights == -The main focus of this release is multiquery support that allows to optimize -multiple queries within the same script that share a computation. +- Added Zebra as a contrib project. See http://wiki.apache.org/pig/zebra +- Added UDFContext, gives UDFs a way to pass info from front to back end and + gives UDFS access to JobConf in the backend. PIG-1085 +- Added left outer join for fragment replicate join. PIG-1036 +- Added ability to set job priority from Pig Latin. PIG-1025 +- Enhanced multi-query to work with joins in some cases. PIG-983 +- Reworked memory manager to significantly reduce GC Overhead and Out of Heap + failures. PIG-975 +- Added Accumulator interface for UDFs. PIG-979 +- Over 100 bug fixes and improvements. + +Incompatibilities += +PIG-922 changed LoadFunc.fieldsToRead's signature . Compiles LoadFuncs will +work as is, since the Pig code checks if the method conforms to the new +signature and only invokes it in the case that it does. If users wish to +compile their LoadFunc, they will need to change this method to match the new +signature. System Requirements === @@ -13,14 +29,14 @@ Java installation 2. Ant build tool: http://ant.apache.org - to build source only 3. Cygwin: http://www.cygwin.com/ - to run under Windows -4. This release is compatible with Hadoop 0.18.x releases +4. This release is compatible with Hadoop 0.20.x releases Trying the Release == -1. Download pig-0.3.0.tar.gz -2. Unpack the file: tar -xzvf pig-0.3.0.tar.gz -3. Move into the installation directory: cd pig-0.3.0 +1. Download pig-0.6.0.tar.gz +2. Unpack the file: tar -xzvf pig-0.6.0.tar.gz +3. Move into the installation directory: cd pig-0.6.0 4. To run pig without Hadoop cluster, execute the command below. This will take you into an interactive shell called grunt that allows you to navigate the local file system and execute Pig commands against the local files @@ -42,14 +58,10 @@ 9. To build the tutorial: cd tutorial ant -10. To run tutorial follow instructions in http://wiki.apache.org/pig/PigTutorial +10. To run tutorial follow instructions in Relevant Documentation == -Pig Language Manual(including Grunt commands): -http://wiki.apache.org/pig-data/attachments/FrontPage/attachments/plrm.htm -UDF Manual: http://wiki.apache.org/pig/UDFManual -Piggy Bank: http://wiki.apache.org/pig/PiggyBank -Pig Tutorial: http://wiki.apache.org/pig/PigTutorial -Pig Eclipse Plugin (PigPen): http://wiki.apache.org/pig/PigPen +http://hadoop.apache.org/pig/docs/r0.6.0/ +http://wiki.apache.org/pig/ Modified: hadoop/pig/branches/branch-0.6/build.xml URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/build.xml?rev=910628r1=910627r2=910628view=diff == --- hadoop/pig/branches/branch-0.6/build.xml (original) +++ hadoop/pig/branches/branch-0.6/build.xml Tue Feb 16 18:05:02 2010 @@ -24,7 +24,7 @@ !-- name and version properties -- property name=name value=pig / property name=Name value=Pig / -property name=version value=0.6.0-dev / +property name=version value=0.6.1-dev / property name=final.name value=${name}-${version} / condition property=isWindows os family=windows/
svn commit: r910632 - /hadoop/pig/tags/release-0.6.0-rc1/
Author: gates Date: Tue Feb 16 18:08:03 2010 New Revision: 910632 URL: http://svn.apache.org/viewvc?rev=910632view=rev Log: Pig 0.6.0-rc1 release. Added: hadoop/pig/tags/release-0.6.0-rc1/ (props changed) - copied from r910631, hadoop/pig/branches/branch-0.6/ Propchange: hadoop/pig/tags/release-0.6.0-rc1/ -- --- svn:ignore (added) +++ svn:ignore Tue Feb 16 18:08:03 2010 @@ -0,0 +1,4 @@ + +dist +depend +pig.jar Propchange: hadoop/pig/tags/release-0.6.0-rc1/ -- svn:mergeinfo = /hadoop/pig/branches/multiquery:741727-770826
svn commit: r908688 - in /hadoop/pig/branches/branch-0.6: src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/util/ src/org/apache/pig/impl/logicalLayer/schema/ test/org/apache/pig/t
Author: gates Date: Wed Feb 10 22:21:56 2010 New Revision: 908688 URL: http://svn.apache.org/viewvc?rev=908688view=rev Log: Added license header for Java files that were missing it. Modified: hadoop/pig/branches/branch-0.6/src/org/apache/pig/StoreConfig.java hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/schema/SchemaUtil.java hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestDataBagAccess.java hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestNullConstant.java hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestSchemaUtil.java Modified: hadoop/pig/branches/branch-0.6/src/org/apache/pig/StoreConfig.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/StoreConfig.java?rev=908688r1=908687r2=908688view=diff == --- hadoop/pig/branches/branch-0.6/src/org/apache/pig/StoreConfig.java (original) +++ hadoop/pig/branches/branch-0.6/src/org/apache/pig/StoreConfig.java Wed Feb 10 22:21:56 2010 @@ -1,5 +1,19 @@ -/** - * +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ package org.apache.pig; Modified: hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java?rev=908688r1=908687r2=908688view=diff == --- hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java (original) +++ hadoop/pig/branches/branch-0.6/src/org/apache/pig/backend/hadoop/executionengine/util/MapRedUtil.java Wed Feb 10 22:21:56 2010 @@ -1,5 +1,19 @@ -/** - * +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. */ package org.apache.pig.backend.hadoop.executionengine.util; Modified: hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/schema/SchemaUtil.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/schema/SchemaUtil.java?rev=908688r1=908687r2=908688view=diff == --- hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/schema/SchemaUtil.java (original) +++ hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/schema/SchemaUtil.java Wed Feb 10 22:21:56 2010 @@ -1,3 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under
svn commit: r901828 - in /hadoop/pig/trunk: ./ src/org/apache/pig/data/ src/org/apache/pig/pen/util/ test/org/apache/pig/test/
Author: gates Date: Thu Jan 21 19:06:02 2010 New Revision: 901828 URL: http://svn.apache.org/viewvc?rev=901828view=rev Log: PIG-1166 Reverting this change pending further discussion of when we want to break UDF interfaces. Removed: hadoop/pig/trunk/test/org/apache/pig/test/TestTupleBagInterface.java Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/data/AccumulativeBag.java hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java hadoop/pig/trunk/src/org/apache/pig/data/DistinctDataBag.java hadoop/pig/trunk/src/org/apache/pig/data/InternalCachedBag.java hadoop/pig/trunk/src/org/apache/pig/data/InternalDistinctBag.java hadoop/pig/trunk/src/org/apache/pig/data/InternalSortedBag.java hadoop/pig/trunk/src/org/apache/pig/data/NonSpillableDataBag.java hadoop/pig/trunk/src/org/apache/pig/data/ReadOnceBag.java hadoop/pig/trunk/src/org/apache/pig/data/SingleTupleBag.java hadoop/pig/trunk/src/org/apache/pig/data/TargetedTuple.java hadoop/pig/trunk/src/org/apache/pig/data/Tuple.java hadoop/pig/trunk/src/org/apache/pig/pen/util/ExampleTuple.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=901828r1=901827r2=901828view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Thu Jan 21 19:06:02 2010 @@ -24,8 +24,6 @@ IMPROVEMENTS -PIG-1166: A bit change of the interface of Tuple DataBag:make the set and append method return this (zjffdu) - PIG-1177: Pig 0.6 Docs - Zebra docs (chandec via olgan) PIG-1175: Pig 0.6 Docs - Store v. Dump (chandec via olgan) Modified: hadoop/pig/trunk/src/org/apache/pig/data/AccumulativeBag.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/AccumulativeBag.java?rev=901828r1=901827r2=901828view=diff == --- hadoop/pig/trunk/src/org/apache/pig/data/AccumulativeBag.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/AccumulativeBag.java Thu Jan 21 19:06:02 2010 @@ -35,11 +35,11 @@ this.index = index; } -public DataBag add(Tuple t) { +public void add(Tuple t) { throw new RuntimeException(AccumulativeBag does not support add operation); } -public DataBag addAll(DataBag b) { +public void addAll(DataBag b) { throw new RuntimeException(AccumulativeBag does not support add operation); } Modified: hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java?rev=901828r1=901827r2=901828view=diff == --- hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/DataBag.java Thu Jan 21 19:06:02 2010 @@ -94,16 +94,14 @@ /** * Add a tuple to the bag. * @param t tuple to add. - * @return The DataBag itself */ -DataBag add(Tuple t); +void add(Tuple t); /** * Add contents of a bag to the bag. * @param b bag to add contents of. - * @return The DataBag itself */ -DataBag addAll(DataBag b); +void addAll(DataBag b); /** * Clear out the contents of the bag, both on disk and in memory. Modified: hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java?rev=901828r1=901827r2=901828view=diff == --- hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/DefaultAbstractBag.java Thu Jan 21 19:06:02 2010 @@ -76,12 +76,11 @@ * Add a tuple to the bag. * @param t tuple to add. */ -public DataBag add(Tuple t) { +public void add(Tuple t) { synchronized (mContents) { mMemSizeChanged = true; mSize++; mContents.add(t); -return this; } } @@ -89,13 +88,12 @@ * Add contents of a bag to the bag. * @param b bag to add contents of. */ -public DataBag addAll(DataBag b) { +public void addAll(DataBag b) { synchronized (mContents) { mMemSizeChanged = true; mSize += b.size(); IteratorTuple i = b.iterator(); while (i.hasNext()) mContents.add(i.next()); -return this; } } Modified: hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data
svn commit: r901317 - in /hadoop/pig/branches/branch-0.6: CHANGES.txt src/org/apache/pig/impl/logicalLayer/LOCast.java src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java test/org/apache
Author: gates Date: Wed Jan 20 18:34:41 2010 New Revision: 901317 URL: http://svn.apache.org/viewvc?rev=901317view=rev Log: PIG-1191: POCast throws exception for certain sequences of LOAD, FILTER, FORACH. Checking in for Pradeep since he is out. Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOCast.java hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestTypeCheckingValidator.java Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/CHANGES.txt?rev=901317r1=901316r2=901317view=diff == --- hadoop/pig/branches/branch-0.6/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.6/CHANGES.txt Wed Jan 20 18:34:41 2010 @@ -145,6 +145,9 @@ BUG FIXES +PIG-1191: POCast throws exception for certain sequences of LOAD, FILTER, +FORACH (pradeepkth via gates) + PIG-1143: Poisson Sample Loader should compute the number of samples required only once (sriranjan via olgan) Modified: hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOCast.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOCast.java?rev=901317r1=901316r2=901317view=diff == --- hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOCast.java (original) +++ hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/LOCast.java Wed Jan 20 18:34:41 2010 @@ -26,6 +26,7 @@ import org.apache.pig.impl.plan.PlanVisitor; import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; import org.apache.pig.data.DataType; public class LOCast extends ExpressionOperator { @@ -34,6 +35,11 @@ private static final long serialVersionUID = 2L; private FuncSpec mLoadFuncSpec = null; +// store field schema representing the schema +// in user specified casts -this is so that if +// field schema is unset and then getFieldSchema is called we still +// rebuild the fieldschema correctly as specified by the user in the script +private FieldSchema userSpecifiedFieldSchema; /** * @@ -65,11 +71,22 @@ public Schema getSchema() { return mSchema; } + + +@Override +public void setFieldSchema(FieldSchema fs) throws FrontendException { +super.setFieldSchema(fs); +userSpecifiedFieldSchema = new Schema.FieldSchema(fs); +} @Override public Schema.FieldSchema getFieldSchema() throws FrontendException { if(!mIsFieldSchemaComputed) { -mFieldSchema = new Schema.FieldSchema(null, mType); +if(userSpecifiedFieldSchema != null) { +mFieldSchema = userSpecifiedFieldSchema; +} else { +mFieldSchema = new Schema.FieldSchema(null, mType); +} Schema.FieldSchema parFs = getExpression().getFieldSchema(); String canonicalName = (parFs != null ? parFs.canonicalName : null); mFieldSchema.setParent(canonicalName, getExpression()); Modified: hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java?rev=901317r1=901316r2=901317view=diff == --- hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java (original) +++ hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java Wed Jan 20 18:34:41 2010 @@ -37,6 +37,7 @@ *the logical binary expression operator that has to be visited * @throws VisitorException */ +@Override protected void visit(BinaryExpressionOperator binOp) throws VisitorException { binOp.unsetFieldSchema(); @@ -49,6 +50,7 @@ *the logical unary operator that has to be visited * @throws VisitorException */ +@Override protected void visit(UnaryExpressionOperator uniOp) throws VisitorException { uniOp.unsetFieldSchema(); super.visit(uniOp); @@ -60,6 +62,7 @@ *the logical cogroup operator that has to be visited * @throws VisitorException */ +@Override protected void visit(LOCogroup cg) throws VisitorException { cg.unsetSchema(); super.visit(cg); @@ -71,6 +74,7 @@ *the logical sort operator that has to be visited
svn commit: r900926 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/impl/logicalLayer/LOCast.java src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java test/org/apache/pig/test/Test
Author: gates Date: Tue Jan 19 19:39:51 2010 New Revision: 900926 URL: http://svn.apache.org/viewvc?rev=900926view=rev Log: PIG-1191: POCast throws exception for certain sequences of LOAD, FILTER, FORACH. Checking in for Pradeep since he's out. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=900926r1=900925r2=900926view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Tue Jan 19 19:39:51 2010 @@ -78,6 +78,9 @@ BUG FIXES +PIG-1191: POCast throws exception for certain sequences of LOAD, FILTER, + FORACH (pradeepkth via gates) + PIG-1171: Top-N queries produce incorrect results when followed by a cross statement (rding via olgan) PIG-1159: merge join right side table does not support comma seperated paths Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java?rev=900926r1=900925r2=900926view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java Tue Jan 19 19:39:51 2010 @@ -26,6 +26,7 @@ import org.apache.pig.impl.plan.PlanVisitor; import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.logicalLayer.schema.Schema; +import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; import org.apache.pig.data.DataType; public class LOCast extends ExpressionOperator { @@ -34,6 +35,11 @@ private static final long serialVersionUID = 2L; private FuncSpec mLoadFuncSpec = null; +// store field schema representing the schema +// in user specified casts -this is so that if +// field schema is unset and then getFieldSchema is called we still +// rebuild the fieldschema correctly as specified by the user in the script +private FieldSchema userSpecifiedFieldSchema; /** * @@ -65,11 +71,22 @@ public Schema getSchema() { return mSchema; } + + +@Override +public void setFieldSchema(FieldSchema fs) throws FrontendException { +super.setFieldSchema(fs); +userSpecifiedFieldSchema = new Schema.FieldSchema(fs); +} @Override public Schema.FieldSchema getFieldSchema() throws FrontendException { if(!mIsFieldSchemaComputed) { -mFieldSchema = new Schema.FieldSchema(null, mType); +if(userSpecifiedFieldSchema != null) { +mFieldSchema = userSpecifiedFieldSchema; +} else { +mFieldSchema = new Schema.FieldSchema(null, mType); +} Schema.FieldSchema parFs = getExpression().getFieldSchema(); String canonicalName = (parFs != null ? parFs.canonicalName : null); mFieldSchema.setParent(canonicalName, getExpression()); Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java?rev=900926r1=900925r2=900926view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/SchemaRemover.java Tue Jan 19 19:39:51 2010 @@ -37,6 +37,7 @@ *the logical binary expression operator that has to be visited * @throws VisitorException */ +@Override protected void visit(BinaryExpressionOperator binOp) throws VisitorException { binOp.unsetFieldSchema(); @@ -49,6 +50,7 @@ *the logical unary operator that has to be visited * @throws VisitorException */ +@Override protected void visit(UnaryExpressionOperator uniOp) throws VisitorException { uniOp.unsetFieldSchema(); super.visit(uniOp); @@ -60,6 +62,7 @@ *the logical cogroup operator that has to be visited * @throws VisitorException */ +@Override protected void visit(LOCogroup cg) throws VisitorException { cg.unsetSchema(); super.visit(cg); @@ -71,6 +74,7 @@ *the logical sort operator that has to be visited * @throws VisitorException */ +@Override protected void visit(LOSort s) throws VisitorException
svn commit: r899502 - in /hadoop/pig/trunk: src/org/apache/pig/experimental/logical/expression/ src/org/apache/pig/experimental/logical/relational/ src/org/apache/pig/experimental/plan/ test/org/apach
Author: gates Date: Fri Jan 15 00:53:47 2010 New Revision: 899502 URL: http://svn.apache.org/viewvc?rev=899502view=rev Log: PIG-1178. Commit expressions-2.patch. Added: hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/AndExpression.java hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/BinaryExpression.java hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/ColumnExpression.java hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/ConstantExpression.java hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/EqualExpression.java hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/LogicalExpressionVisitor.java hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/ProjectExpression.java Modified: hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/LogicalExpression.java hadoop/pig/trunk/src/org/apache/pig/experimental/logical/relational/LogicalPlanVisitor.java hadoop/pig/trunk/src/org/apache/pig/experimental/logical/relational/LogicalSchema.java hadoop/pig/trunk/src/org/apache/pig/experimental/plan/DependencyOrderWalker.java hadoop/pig/trunk/src/org/apache/pig/experimental/plan/DepthFirstWalker.java hadoop/pig/trunk/src/org/apache/pig/experimental/plan/Operator.java hadoop/pig/trunk/src/org/apache/pig/experimental/plan/PlanVisitor.java hadoop/pig/trunk/src/org/apache/pig/experimental/plan/PlanWalker.java hadoop/pig/trunk/src/org/apache/pig/experimental/plan/ReverseDependencyOrderWalker.java hadoop/pig/trunk/test/org/apache/pig/test/TestExperimentalOperatorPlan.java Added: hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/AndExpression.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/AndExpression.java?rev=899502view=auto == --- hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/AndExpression.java (added) +++ hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/AndExpression.java Fri Jan 15 00:53:47 2010 @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pig.experimental.logical.expression; + +import org.apache.pig.data.DataType; +import org.apache.pig.experimental.plan.OperatorPlan; +import org.apache.pig.experimental.plan.PlanVisitor; + +/** + * Boolean and expression. + */ +public class AndExpression extends BinaryExpression { + +/** + * Will add this operator to the plan and connect it to the + * left and right hand side operators. + * @param plan plan this operator is part of + * @param lhs expression on its left hand side + * @param rhs expression on its right hand side + */ +public AndExpression(OperatorPlan plan, + LogicalExpression lhs, + LogicalExpression rhs) { +super(And, plan, DataType.BOOLEAN, lhs, rhs); +} + +/** + * @link org.apache.pig.experimental.plan.Operator#accept(org.apache.pig.experimental.plan.PlanVisitor) + */ +@Override +public void accept(PlanVisitor v) { +if (!(v instanceof LogicalExpressionVisitor)) { +throw new RuntimeException(Expected LogicalExpressionVisitor); +} +((LogicalExpressionVisitor)v).visitAnd(this); +} + +} Added: hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/BinaryExpression.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/BinaryExpression.java?rev=899502view=auto == --- hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/BinaryExpression.java (added) +++ hadoop/pig/trunk/src/org/apache/pig/experimental/logical/expression/BinaryExpression.java Fri Jan 15 00:53:47 2010 @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work
svn commit: r898497 [2/2] - in /hadoop/pig/trunk: src/org/apache/pig/experimental/logical/ src/org/apache/pig/experimental/logical/expression/ src/org/apache/pig/experimental/logical/relational/ src/o
Added: hadoop/pig/trunk/test/org/apache/pig/test/TestExperimentalOperatorPlan.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestExperimentalOperatorPlan.java?rev=898497view=auto == --- hadoop/pig/trunk/test/org/apache/pig/test/TestExperimentalOperatorPlan.java (added) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestExperimentalOperatorPlan.java Tue Jan 12 20:32:29 2010 @@ -0,0 +1,646 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pig.test; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; + +import org.apache.pig.experimental.plan.BaseOperatorPlan; +import org.apache.pig.experimental.plan.DependencyOrderWalker; +import org.apache.pig.experimental.plan.DepthFirstWalker; +import org.apache.pig.experimental.plan.Operator; +import org.apache.pig.experimental.plan.OperatorPlan; +import org.apache.pig.experimental.plan.PlanEdge; +import org.apache.pig.experimental.plan.PlanVisitor; +import org.apache.pig.experimental.plan.PlanWalker; +import org.apache.pig.experimental.plan.ReverseDependencyOrderWalker; +import org.apache.pig.impl.util.Pair; +import org.junit.Test; + +import junit.framework.TestCase; + +public class TestExperimentalOperatorPlan extends TestCase { + +private static class SillyPlan extends BaseOperatorPlan { + +SillyPlan() { +super(); +} + +} + +private static class SillyOperator extends Operator { +private String name; + +SillyOperator(String n, SillyPlan p) { +super(n, p); +name = n; +} + +public boolean equals(SillyOperator other) { +return other.name == name; +} + +@Override +public void accept(PlanVisitor v) { +if (v instanceof SillyVisitor) { +((SillyVisitor)v).visitSillyOperator(this); +} +} +} + +private static class SillyVisitor extends PlanVisitor { + +StringBuffer buf; + +protected SillyVisitor(OperatorPlan plan, PlanWalker walker) { +super(plan, walker); +buf = new StringBuffer(); +} + +public void visitSillyOperator(SillyOperator so) { +buf.append(so.getName()); +} + +public String getVisitPattern() { +return buf.toString(); +} + +} + +// Tests for PlanEdge + +@Test +public void testPlanEdgeInsert() { +SillyPlan plan = new SillyPlan(); +SillyOperator fred = new SillyOperator(fred, plan); +SillyOperator joe = new SillyOperator(joe, plan); +PlanEdge edges = new PlanEdge(); + +// Test initial entry +edges.put(fred, joe, 0); +CollectionOperator c = edges.get(fred); +assertEquals(1, c.size()); +Operator[] a = new Operator[1]; +Operator[] b = c.toArray(a); +assertEquals(joe, b[0]); + +// Test entry with no position +SillyOperator bob = new SillyOperator(bob, plan); +edges.put(fred, bob); +c = edges.get(fred); +assertEquals(2, c.size()); +a = new Operator[2]; +b = c.toArray(a); +assertEquals(joe, b[0]); +assertEquals(bob, b[1]); + +// Test entry with position +SillyOperator jill = new SillyOperator(jill, plan); +edges.put(fred, jill, 1); +c = edges.get(fred); +assertEquals(3, c.size()); +a = new Operator[3]; +b = c.toArray(a); +assertEquals(joe, b[0]); +assertEquals(jill, b[1]); +assertEquals(bob, b[2]); +} + +// Test that entry with invalid position cannot be made. +@Test +public void testPlanEdgeInsertFirstIndexBad() { +SillyPlan plan = new SillyPlan(); +SillyOperator fred = new SillyOperator(fred, plan); +SillyOperator joe = new SillyOperator(joe, plan); +PlanEdge edges = new PlanEdge(); +boolean caught = false; +try { +edges.put(fred, joe,
svn commit: r892396 - in /hadoop/pig/trunk: ./ src/org/apache/pig/backend/executionengine/ src/org/apache/pig/backend/hadoop/executionengine/ src/org/apache/pig/backend/hadoop/executionengine/physical
Author: gates Date: Fri Dec 18 23:15:57 2009 New Revision: 892396 URL: http://svn.apache.org/viewvc?rev=892396view=rev Log: PIG-1156 Add aliases to ExecJobs and PhysicalOperators. Added: hadoop/pig/trunk/test/org/apache/pig/test/TestBatchAliases.java Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/ExecJob.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HJob.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/PhysicalOperator.java hadoop/pig/trunk/src/org/apache/pig/backend/local/executionengine/LocalJob.java hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=892396r1=892395r2=892396view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Fri Dec 18 23:15:57 2009 @@ -24,6 +24,8 @@ IMPROVEMENTS +PIG-1156: Add aliases to ExecJobs and PhysicalOperators (dvryaboy via gates) + PIG-1161: add missing license headers (dvryaboy via olgan) PIG-965: PERFORMANCE: optimize common case in matches (PORegex) (ankit.modi Modified: hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/ExecJob.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/ExecJob.java?rev=892396r1=892395r2=892396view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/ExecJob.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/ExecJob.java Fri Dec 18 23:15:57 2009 @@ -66,6 +66,11 @@ public IteratorTuple getResults() throws ExecException; /** + * Returns the alias of relation generated by this job + */ +public String getAlias() throws ExecException; + +/** * Get configuration information * * @return configuration information for the execution engine Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java?rev=892396r1=892395r2=892396view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/HExecutionEngine.java Fri Dec 18 23:15:57 2009 @@ -51,9 +51,11 @@ import org.apache.pig.impl.io.FileSpec; import org.apache.pig.impl.logicalLayer.LogicalPlan; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.LogToPhyTranslationVisitor; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.PhysicalOperator; import org.apache.pig.impl.plan.OperatorKey; import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.MapReduceLauncher; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.plans.PhysicalPlan; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore; import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.tools.pigstats.PigStats; @@ -238,15 +240,26 @@ MapReduceLauncher launcher = new MapReduceLauncher(); ListExecJob jobs = new ArrayListExecJob(); +MapString, PhysicalOperator leafMap = new HashMapString, PhysicalOperator(); +for (PhysicalOperator physOp : plan.getLeaves()) { +log.info(physOp); +if (physOp instanceof POStore) { +FileSpec spec = ((POStore) physOp).getSFile(); +if (spec != null) +leafMap.put(spec.toString(), physOp); +} +} try { PigStats stats = launcher.launchPig(plan, jobName, pigContext); for (FileSpec spec: launcher.getSucceededFiles()) { -jobs.add(new HJob(ExecJob.JOB_STATUS.COMPLETED, pigContext, spec, stats)); +String alias = leafMap.containsKey(spec.toString()) ? leafMap.get(spec.toString()).getAlias() : null; +jobs.add(new HJob(ExecJob.JOB_STATUS.COMPLETED, pigContext, spec, alias, stats)); } for (FileSpec spec: launcher.getFailedFiles()) { -HJob j = new HJob(ExecJob.JOB_STATUS.FAILED, pigContext, spec, stats); +String alias = leafMap.containsKey(spec.toString()) ? leafMap.get(spec.toString()).getAlias() : null; +HJob j = new HJob
svn commit: r886937 - in /hadoop/pig/site: author/src/documentation/content/xdocs/whoweare.xml publish/whoweare.html publish/whoweare.pdf
Author: gates Date: Thu Dec 3 21:56:50 2009 New Revision: 886937 URL: http://svn.apache.org/viewvc?rev=886937view=rev Log: Added Jeff and Yan as committers. Modified: hadoop/pig/site/author/src/documentation/content/xdocs/whoweare.xml hadoop/pig/site/publish/whoweare.html hadoop/pig/site/publish/whoweare.pdf Modified: hadoop/pig/site/author/src/documentation/content/xdocs/whoweare.xml URL: http://svn.apache.org/viewvc/hadoop/pig/site/author/src/documentation/content/xdocs/whoweare.xml?rev=886937r1=886936r2=886937view=diff == --- hadoop/pig/site/author/src/documentation/content/xdocs/whoweare.xml (original) +++ hadoop/pig/site/author/src/documentation/content/xdocs/whoweare.xml Thu Dec 3 21:56:50 2009 @@ -115,6 +115,22 @@ td-8/td /tr + tr +tdyanz/td +tda href=http://people.apache.org/~yanz;Yan Zhou/a/td +tdYahoo!/td +tdZebra contrib/td +td-8/td + /tr + + tr +tdzjffdu/td +tda href=http://people.apache.org/~zjffdu;Jeff Zhang/a/td +tdAutodesk/td +td/td +td+8/td + /tr + /table /section Modified: hadoop/pig/site/publish/whoweare.html URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/whoweare.html?rev=886937r1=886936r2=886937view=diff == --- hadoop/pig/site/publish/whoweare.html (original) +++ hadoop/pig/site/publish/whoweare.html Thu Dec 3 21:56:50 2009 @@ -315,12 +315,34 @@ /tr + +tr + +td colspan=1 rowspan=1yanz/td +td colspan=1 rowspan=1a href=http://people.apache.org/~yanz;Yan Zhou/a/td +td colspan=1 rowspan=1Yahoo!/td +td colspan=1 rowspan=1Zebra contrib/td +td colspan=1 rowspan=1-8/td + +/tr + + +tr + +td colspan=1 rowspan=1zjffdu/td +td colspan=1 rowspan=1a href=http://people.apache.org/~zjffdu;Jeff Zhang/a/td +td colspan=1 rowspan=1Autodesk/td +td colspan=1 rowspan=1/td +td colspan=1 rowspan=1+8/td + +/tr + /table /div -a name=N10175/aa name=Emeriti/a +a name=N101B0/aa name=Emeriti/a h2 class=h3Emeriti/h2 div class=section pCommitters who are no longer active on Pig are:/p Modified: hadoop/pig/site/publish/whoweare.pdf URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/whoweare.pdf?rev=886937r1=886936r2=886937view=diff == --- hadoop/pig/site/publish/whoweare.pdf (original) +++ hadoop/pig/site/publish/whoweare.pdf Thu Dec 3 21:56:50 2009 @@ -47,10 +47,10 @@ endobj 12 0 obj - /Length 3223 /Filter [ /ASCII85Decode /FlateDecode ] + /Length 3655 /Filter [ /ASCII85Decode /FlateDecode ] stream -GatU8D3*J\cR6o6N%T'JRsZN?T-g.e2G[HZ/`Hj.%5T]I.EloICX+EG=[12.?Tca/z...@fe@BB?6KrUZYd#H$j[WoBMl?nhSIQ].Z*GZGAD9`NA$9r?Z5=epdP.Q/5mh^9GII`QkY5Q=8a8N_4TBl4CIIa#kmVb%mr-I67K'2o`T;^Fh0p)Ym?7]%,rZuts:c*''E\q,-i1Gc7.*L(;r...@#bg[@ndYgqoXo2JN3T:IZ*S$r=D%4YNVZ5l,L7e;BGp7:S.8A4](khjb1uJu^6]nOrhn#UO4=X:\...@rpTO3[`C)fKPWiLH_G4qQ@GBo?a1lsgjr/7h'9Qkj7bNXsG,j:a[BCb'E^b^7i3J.X)$_le81o...@an..wr^v:g...@opk:)AJ$80echu2.^:ZVR7]2dl.BmS6h=r5DN4r-Y!S6/$FhY(7FUuWV?fGfESi6-nJ(O1Y5kiRIBGZ:0,#74+GsMV,B52EREBo3e%+/`Ii],C6uur\UF:O6PTZeqQ3opOFM*6_a4^dXn^P,9Cc.L6(NLDlhWdJ9RuVOXGr!iSt[(!gj;pa4U.=,b...@g7_#-#m('*:KYV7[M^$-M2!W9PNVM!4R/h\fn\l2p$*[81GaGdIC]!I:-Upje\OGkS]NVUE^U#9s7R7lS^hO]i$p3_l.5[6Ue9?r4MOb=O)abrag=7pMprZq\CJAlM+KSPQXlX'pSc\M`$n2De)5etZPfXeF?YbgJWrZ)i:6;T72)m1?0B4+RjK^4\dU]ghV_8KjN(EbG)U2-VG;+...@rqj`fp#+t@o(M`j.PjWW;pM8tt4t%\\c/l:Z^5Yr(0jM1!+a7V3-N4S4Qol*753?D]TXQba;-k)(-T[b%,QekY$3be81?/\;Y7fq3Pck7-6:eN5+i0\j[[[)aVGRD[1=Hk.arkc/c+n'#...@*um:9,[Y2(WHugEBb+GuV %8\t7'*aVM/%ON64%r+\(V//u[D06D*q7VW5J(35k0D?rKW!18S.kaH%*OeA?Z%8Po0$(5?i...@s/%FC(_qfPMI`6/WQqbB_?IT[L?s8j57lSfhZaQk0N(QjeNf^2bH?NhO[(c,[)Be.N*N39FP0X^trCSc\Mhbd%d1A6d1U(%L^ka4ldqX_D%80maG5/B,RK+df(htBD=Z3FqZmVq#BR:DP2$\h'chFsB\,BLW[FG%Am.Md\i...@r$bo.uc,M$Q\UHA=[cI6$X%0Rho6%7^uc2C#[7chSbBPJX_W!`L/N=A`)#Vmjmf#41T)g,jqFQIQ1JM=O6:.R*U9s\.\m8!F,Zmp;WeV,9@@!_EHI\A73)]rS24rroW+g?HT2_mVSTFW5o^^qKeJ+CDGh$OE/)n9YlA#bt%m8rn`_/mX[XFNng1*]KLX\K+uo5b%/,d=PY+[9ZU;(lpKV;;69tcHg3-93/Rd5!e:T`H)]l/8^r0!=(;=6p-Q\R=dV/9dk5,mYlaR%sb^jjk4jpf5Fb[4.%q7XJ9ep[WZe4UGG0maQ3]=*6I%BUMa_hN.%J%jH22D;E8G[,.2,m8c;]oemaH-YOYHJc7Lq`Rn8#?eMF54B,@Nij2%*AFV]K#nG$d1VqJf8_X.f31+rl4X`9V9oHOADHk*QL'.jHh65/Q1(jUXlL$@)^388.)XlLJY4qS`ZJe0cb%#3\P+jVf61dbcq6m[U+BgjRoQ12]jK^JsCa/A(k6X4MeGtd0g$`*9ILth'rog_F61+i+8DqqjJOXJR$YdYlQjpENdX8dPMCNoWnb-#PRk8Vu1F:.cXdRRr?2P]U`.i*bQLp7RY7dQ(49YBH%hWrXD6WX0R\Eh\k=#Z)unry...@b:b*qldMb#(i;R0Rn-+7\n,kXI*aSrX*l0k'bc6intir7et`...@iewsm2$a^pw@$hD+A] 0%=$l^rtgdlhiw97_...@igh]*=e\rsjju(C1C-kK0jR0Dn=_70`0P`e\nSE,rK[(`2O2;):)WI?I6'-'\.efId(-E#\oa6%J+C_t$C!Xk1YKt\g%09$(+;5#-L;nfq...@c.g8m',Tbk:$...@^\_n2oled_hiqq`ia]lp+r9wtn,ilz_rt...@a`o8qjz!o#t9t-.isnpep*7-n42dgxvh6u,q9$.0d7b-g...@uu*=*l%+d(=JlN*N(/ruiY(5DMN\=%G^Pr2B1A:,m8ueTuJ4LeLRUHlfb?e:?GcbVc!5g!l!f[XL=$n;K1_kO96maZDI/C_U%3bUB:^Is%Ns=h#Wl
svn commit: r886973 - in /hadoop/pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/ te
Author: gates Date: Thu Dec 3 22:56:45 2009 New Revision: 886973 URL: http://svn.apache.org/viewvc?rev=886973view=rev Log: PIG-1068: COGROUP fails with 'Type mismatch in key from map: expected org.apache.pig.impl.io.NullableText, recieved org.apache.pig.impl.io.NullableTuple' Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODemux.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POMultiQueryPackage.java hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=886973r1=886972r2=886973view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Thu Dec 3 22:56:45 2009 @@ -45,6 +45,10 @@ BUG FIXES +PIG-1068: COGROUP fails with 'Type mismatch in key from map: expected + org.apache.pig.impl.io.NullableText, recieved + org.apache.pig.impl.io.NullableTuple' (rding via gates) + PIG-1113: Diamond query optimization throws error in JOIN (rding via olgan) PIG-1116: Remove redundant map-reduce job for merge join (pradeepkth) Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java?rev=886973r1=886972r2=886973view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java Thu Dec 3 22:56:45 2009 @@ -662,9 +662,10 @@ pkg.addPackage(p); pkCount++; } + pkg.addIsKeyWrappedList(((POMultiQueryPackage)pk).getIsKeyWrappedList()); addShiftedKeyInfoIndex(initial, current, (POMultiQueryPackage)pk); } else { -pkg.addPackage(pk); +pkg.addPackage(pk, mapKeyType); pkCount = 1; } @@ -673,8 +674,6 @@ String msg = Internal Error. Inconsistency in key index found during optimization.; throw new OptimizerException(msg, errCode, PigException.BUG); } - -boolean[] keyPos = pk.getKeyPositionsInTuple(); PODemux demux = (PODemux)to.getLeaves().get(0); int plCount = 0; @@ -685,12 +684,11 @@ // operator, then it's the only operator in the plan. ListPhysicalPlan pls = ((PODemux)root).getPlans(); for (PhysicalPlan pl : pls) { -demux.addPlan(pl, keyPos); +demux.addPlan(pl); plCount++; } -demux.addIsKeyWrappedList(((PODemux)root).getIsKeyWrappedList()); } else { -demux.addPlan(from, mapKeyType, keyPos); +demux.addPlan(from); plCount = 1; } @@ -700,11 +698,11 @@ throw new OptimizerException(msg, errCode, PigException.BUG); } -if (demux.isSameMapKeyType()) { +if (pkg.isSameMapKeyType()) { pkg.setKeyType(pk.getKeyType()); } else { pkg.setKeyType(DataType.TUPLE); -} +} } private void addShiftedKeyInfoIndex(int index, POPackage pkg) throws OptimizerException { @@ -785,11 +783,11 @@ from.remove(cpk); PODemux demux = (PODemux)to.getLeaves().get(0); - -boolean isSameKeyType = demux.isSameMapKeyType(); - + POMultiQueryPackage pkg = (POMultiQueryPackage)to.getRoots().get(0); +boolean isSameKeyType = pkg.isSameMapKeyType(); + // if current initial + 1, it means we had // a split in the map of the MROper we are trying to // merge. In that case we would have changed the indices @@ -818,6 +816,8 @@ pkCount = 1; } +pkg.setSameMapKeyType(isSameKeyType); + if (pkCount != total) { int errCode = 2146; String msg = Internal Error. Inconsistency in key index found during optimization.; @@ -831,8 +831,6 @@ pkg.setKeyType(cpk.getKeyType()); -boolean[] keyPos = cpk.getKeyPositionsInTuple(); - // See comment above for why we flatten the Packages // in the from plan - for the same reason, we flatten
svn commit: r885772 - in /hadoop/pig/trunk/contrib/zebra: ./ src/java/org/apache/hadoop/zebra/schema/ src/java/org/apache/hadoop/zebra/types/ src/test/org/apache/hadoop/zebra/pig/ src/test/org/apache/
Author: gates Date: Tue Dec 1 14:38:11 2009 New Revision: 885772 URL: http://svn.apache.org/viewvc?rev=885772view=rev Log: PIG-1074 Zebra store function should allow '::' in column names in output schema. Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCogroup.java Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaPrimitive.java Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=885772r1=885771r2=885772view=diff == --- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Tue Dec 1 14:38:11 2009 @@ -8,6 +8,9 @@ IMPROVEMENTS + PIG-1074 Zebra store function should allow '::' in column names in output + schema (yanz via gates) + PIG-1077 Support record(row)-based file split in Zebra's TableInputFormat (chaow via gates) Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt?rev=885772r1=885771r2=885772view=diff == --- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt (original) +++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt Tue Dec 1 14:38:11 2009 @@ -68,7 +68,8 @@ #LETTER : [a-z, A-Z] | #DIGIT : [0-9] | #SPECIALCHAR : [_, ., #] -| IDENTIFIER: ( LETTER )+ ( DIGIT | LETTER | SPECIALCHAR )* +| #SCOPEOP : :: +| IDENTIFIER: ( LETTER )+ ( DIGIT | LETTER | SPECIALCHAR )* ( SCOPEOP ( LETTER )+ ( DIGIT | LETTER | SPECIALCHAR )*)* } ColumnType Type() : Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt?rev=885772r1=885771r2=885772view=diff == --- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt (original) +++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt Tue Dec 1 14:38:11 2009 @@ -73,7 +73,8 @@ | #OCTAL : [0 - 7] | #SPECIALCHAR : [_] | #FSSPECIALCHAR: [-, :, /] -| IDENTIFIER: ( LETTER )+ ( DIGIT | LETTER | SPECIALCHAR )* +| #SCOPEOP : :: +| IDENTIFIER: ( LETTER )+ ( DIGIT | LETTER | SPECIALCHAR )* ( SCOPEOP ( LETTER )+ ( DIGIT | LETTER | SPECIALCHAR )*)* | SHORT : (OCTAL){3} } Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCogroup.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCogroup.java?rev=885772view=auto == --- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCogroup.java (added) +++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCogroup.java Tue Dec 1 14:38:11 2009 @@ -0,0 +1,241 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.zebra.pig; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Iterator; +import java.util.StringTokenizer; + +import junit.framework.Assert; +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.BytesWritable; +import
svn commit: r885858 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java test/org/apache/pig/test/TestPushUpFilter.java
Author: gates Date: Tue Dec 1 18:31:25 2009 New Revision: 885858 URL: http://svn.apache.org/viewvc?rev=885858view=rev Log: PIG-1022: optimizer pushes filter before the foreach that generates column used by filter. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java hadoop/pig/trunk/test/org/apache/pig/test/TestPushUpFilter.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=885858r1=885857r2=885858view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Tue Dec 1 18:31:25 2009 @@ -41,6 +41,9 @@ BUG FIXES +PIG-1022: optimizer pushes filter before the foreach that generates column +used by filter (daijy via gates) + PIG-1107: PigLineRecordReader bails out on an empty line for compressed data (ankit.modi via olgan) Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java?rev=885858r1=885857r2=885858view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java Tue Dec 1 18:31:25 2009 @@ -36,6 +36,7 @@ import org.apache.pig.impl.logicalLayer.LOForEach; import org.apache.pig.impl.logicalLayer.LOLimit; import org.apache.pig.impl.logicalLayer.LOLoad; +import org.apache.pig.impl.logicalLayer.LOProject; import org.apache.pig.impl.logicalLayer.LOSplit; import org.apache.pig.impl.logicalLayer.LOStore; import org.apache.pig.impl.logicalLayer.LOStream; @@ -43,10 +44,12 @@ import org.apache.pig.impl.logicalLayer.LOUnion; import org.apache.pig.impl.logicalLayer.LogicalOperator; import org.apache.pig.impl.logicalLayer.LogicalPlan; +import org.apache.pig.impl.logicalLayer.TopLevelProjectFinder; import org.apache.pig.impl.logicalLayer.UDFFinder; import org.apache.pig.impl.plan.DepthFirstWalker; import org.apache.pig.impl.plan.ProjectionMap; import org.apache.pig.impl.plan.RequiredFields; +import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.plan.optimizer.OptimizerException; import org.apache.pig.PigException; import org.apache.pig.impl.util.MultiMap; @@ -256,6 +259,18 @@ PairBoolean, SetInteger mappingResult = isRequiredFieldMapped(requiredField, predecessor.getProjectionMap()); boolean mapped = mappingResult.first; + +// Check if it is a direct mapping, that is, project optionally followed by cast, so if project-project, it is not +// considered as a mapping +for (PairInteger, Integer pair : requiredField.getFields()) +{ +if (!isFieldSimple(loForEach.getForEachPlans().get(pair.second))) +{ +mapped = false; +break; +} +} + if (!mapped) { return false; } @@ -420,4 +435,44 @@ return new PairBoolean, SetInteger(true, grandParentIndexes); } + +/** + * Check if the inner plan is simple + * + * @param lp + *logical plan to check + * @return Whether if the logical plan is a simple project optionally followed by cast + */ +boolean isFieldSimple(LogicalPlan lp) throws OptimizerException +{ +TopLevelProjectFinder projectFinder = new TopLevelProjectFinder(lp); + +try { +projectFinder.visit(); +} catch (VisitorException ve) { +throw new OptimizerException(); +} +if (projectFinder.getProjectSet()!=null projectFinder.getProjectSet().size()==1) +{ +LOProject project = projectFinder.getProjectSet().iterator().next(); +if (lp.getPredecessors(project)==null) +{ +LogicalOperator pred = project; +while (lp.getSuccessors(pred)!=null) +{ +if (lp.getSuccessors(pred).size()!=1) +return false; +if (!(lp.getSuccessors(pred).get(0) instanceof LOCast)) +{ +return false; +} +pred = lp.getSuccessors(pred).get(0); +} +return true; +} +return false; +} +else +return true; +} } Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestPushUpFilter.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache
svn commit: r885859 - in /hadoop/pig/branches/branch-0.6/contrib/zebra: ./ src/java/org/apache/hadoop/zebra/schema/ src/java/org/apache/hadoop/zebra/types/ src/test/org/apache/hadoop/zebra/pig/ src/te
Author: gates Date: Tue Dec 1 18:33:01 2009 New Revision: 885859 URL: http://svn.apache.org/viewvc?rev=885859view=rev Log: PIG-1074 Zebra store function should allow '::' in column names in output schema. Added: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCogroup.java Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaPrimitive.java Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt?rev=885859r1=885858r2=885859view=diff == --- hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt Tue Dec 1 18:33:01 2009 @@ -26,6 +26,9 @@ OPTIMIZATIONS BUG FIXES +PIG-1074 Zebra store function should allow '::' in column names in output + schema (yanz via gates) + PIG-1095: Schema support of anonymous fields in COLECTION fails (yanz via gates) Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt?rev=885859r1=885858r2=885859view=diff == --- hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt (original) +++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt Tue Dec 1 18:33:01 2009 @@ -68,7 +68,8 @@ #LETTER : [a-z, A-Z] | #DIGIT : [0-9] | #SPECIALCHAR : [_, ., #] -| IDENTIFIER: ( LETTER )+ ( DIGIT | LETTER | SPECIALCHAR )* +| #SCOPEOP : :: +| IDENTIFIER: ( LETTER )+ ( DIGIT | LETTER | SPECIALCHAR )* ( SCOPEOP ( LETTER )+ ( DIGIT | LETTER | SPECIALCHAR )*)* } ColumnType Type() : Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt?rev=885859r1=885858r2=885859view=diff == --- hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt (original) +++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TableStorageParser.jjt Tue Dec 1 18:33:01 2009 @@ -73,7 +73,8 @@ | #OCTAL : [0 - 7] | #SPECIALCHAR : [_] | #FSSPECIALCHAR: [-, :, /] -| IDENTIFIER: ( LETTER )+ ( DIGIT | LETTER | SPECIALCHAR )* +| #SCOPEOP : :: +| IDENTIFIER: ( LETTER )+ ( DIGIT | LETTER | SPECIALCHAR )* ( SCOPEOP ( LETTER )+ ( DIGIT | LETTER | SPECIALCHAR )*)* | SHORT : (OCTAL){3} } Added: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCogroup.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCogroup.java?rev=885859view=auto == --- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCogroup.java (added) +++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCogroup.java Tue Dec 1 18:33:01 2009 @@ -0,0 +1,241 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.zebra.pig; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import
svn commit: r885929 - in /hadoop/pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/ src/org/apache/pig/impl/logicalLayer/ src/org/apache/pig/impl/logicalLayer/parser/ test/
Author: gates Date: Tue Dec 1 21:03:37 2009 New Revision: 885929 URL: http://svn.apache.org/viewvc?rev=885929view=rev Log: PIG-990 Provide a way to pin LogicalOperator Options. Added: hadoop/pig/trunk/test/org/apache/pig/test/TestPinOptions.java Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCogroup.java hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LogicalOperator.java hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=885929r1=885928r2=885929view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Tue Dec 1 21:03:37 2009 @@ -24,6 +24,8 @@ IMPROVEMENTS +PIG-990: Provide a way to pin LogicalOperator Options (dvryaboy via gates) + PIG-1103: refactoring of commit tests (olgan) PIG-1101: Allow arugment to limit to be long in addition to int (ashutoshc via Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java?rev=885929r1=885928r2=885929view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/LogToPhyTranslationVisitor.java Tue Dec 1 21:03:37 2009 @@ -969,7 +969,7 @@ logToPhyMap.put(loj, smj); return; } - else if (loj.getJoinType() == LOJoin.JOINTYPE.REGULAR){ + else if (loj.getJoinType() == LOJoin.JOINTYPE.HASH){ POGlobalRearrange poGlobal = new POGlobalRearrange(new OperatorKey( scope, nodeGen.getNextNodeId(scope)), loj .getRequestedParallelism()); Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCogroup.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCogroup.java?rev=885929r1=885928r2=885929view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCogroup.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCogroup.java Tue Dec 1 21:03:37 2009 @@ -64,6 +64,11 @@ private MultiMapLogicalOperator, LogicalPlan mGroupByPlans; private GROUPTYPE mGroupType; +/** + * static constant to refer to the option of selecting a group type + */ +public final static Integer OPTION_GROUPTYPE = 1; + /** * * @param plan Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java?rev=885929r1=885928r2=885929view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOJoin.java Tue Dec 1 21:03:37 2009 @@ -49,12 +49,13 @@ * Enum for the type of join */ public static enum JOINTYPE { -REGULAR, // Regular join +HASH,// Hash Join REPLICATED, // Fragment Replicated join SKEWED, // Skewed Join MERGE // Sort Merge Join }; + /** * LOJoin contains a list of logical operators corresponding to the * relational operators and a list of generates for each relational @@ -66,7 +67,12 @@ private boolean[] mInnerFlags; private JOINTYPE mJoinType; // Retains the type of the join -/** + /** +* static constant to refer to the option of selecting a join type +*/ + public final static Integer OPTION_JOIN = 1; + + /** * * @param plan *LogicalPlan this operator is a part of. Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LogicalOperator.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LogicalOperator.java?rev=885929r1=885928r2=885929view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LogicalOperator.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LogicalOperator.java Tue Dec 1 21
svn commit: r886015 - in /hadoop/pig/trunk/contrib/zebra: ./ src/java/org/apache/hadoop/zebra/io/ src/java/org/apache/hadoop/zebra/mapred/ src/java/org/apache/hadoop/zebra/schema/ src/java/org/apache/
Author: gates Date: Tue Dec 1 23:55:24 2009 New Revision: 886015 URL: http://svn.apache.org/viewvc?rev=886015view=rev Log: PIG-1098 Zebra Performance Optimizations. Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BlockDistribution.java hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/mapred/TableRecordReader.java hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/SubColumnExtraction.java hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TypesUtils.java Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=886015r1=886014r2=886015view=diff == --- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Tue Dec 1 23:55:24 2009 @@ -8,6 +8,8 @@ IMPROVEMENTS +PIG-1098 Zebra Performance Optimizations (yanz via gates) + PIG-1074 Zebra store function should allow '::' in column names in output schema (yanz via gates) Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java?rev=886015r1=886014r2=886015view=diff == --- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java (original) +++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java Tue Dec 1 23:55:24 2009 @@ -52,6 +52,7 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.zebra.io.ColumnGroup.Reader.CGRangeSplit; import org.apache.hadoop.zebra.io.ColumnGroup.Reader.CGRowSplit; +import org.apache.hadoop.zebra.io.ColumnGroup.Reader.CGScanner; import org.apache.hadoop.zebra.types.CGSchema; import org.apache.hadoop.zebra.parser.ParseException; import org.apache.hadoop.zebra.types.Partition; @@ -874,7 +875,7 @@ */ private class BTScanner implements TableScanner { private Projection schema; - private TableScanner[] cgScanners; + private CGScanner[] cgScanners; private int opCount = 0; Random random = new Random(System.nanoTime()); // checking for consistency once every 1000 times. @@ -936,7 +937,7 @@ } // Helper function for initialization. - private TableScanner createCGScanner(int cgIndex, CGRowSplit cgRowSplit, + private CGScanner createCGScanner(int cgIndex, CGRowSplit cgRowSplit, RangeSplit rangeSplit, BytesWritable beginKey, BytesWritable endKey) @@ -972,7 +973,7 @@ try { schema = partition.getProjection(); - cgScanners = new TableScanner[colGroups.length]; + cgScanners = new CGScanner[colGroups.length]; for (int i = 0; i colGroups.length; ++i) { if (!isCGDeleted(i) partition.isCGNeeded(i)) { @@ -1020,7 +1021,7 @@ for (int nx = 0; nx cgScanners.length; nx++) { if (cgScanners[nx] != null) { -cur = cgScanners[nx].advance(); +cur = cgScanners[nx].advanceCG(); if (!firstAdvance) { if (cur != first) { throw new IOException( @@ -1038,9 +1039,6 @@ @Override public boolean atEnd() throws IOException { -if (cgScanners.length == 0) { - return true; -} boolean ret = true; int i; for (i = 0; i cgScanners.length; i++) @@ -1077,16 +1075,12 @@ @Override public void getKey(BytesWritable key) throws IOException { -if (cgScanners.length == 0) { - return; -} - int i; for (i = 0; i cgScanners.length; i++) { if (cgScanners[i] != null) { -cgScanners[i].getKey(key); +cgScanners[i].getCGKey(key); break; } } @@ -1104,7 +1098,7 @@ if (cgScanners[index] != null) { BytesWritable key2 = new BytesWritable(); -cgScanners[index].getKey(key2); +cgScanners[index].getCGKey(key2); if (key.equals(key2)) { return; } @@ -1129,7 +1123,7
svn commit: r886018 - in /hadoop/pig/branches/branch-0.6/contrib/zebra: ./ src/java/org/apache/hadoop/zebra/io/ src/java/org/apache/hadoop/zebra/mapred/ src/java/org/apache/hadoop/zebra/schema/ src/ja
Author: gates Date: Wed Dec 2 00:30:41 2009 New Revision: 886018 URL: http://svn.apache.org/viewvc?rev=886018view=rev Log: PIG-1098 Zebra Performance Optimizations. Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BlockDistribution.java hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/mapred/TableRecordReader.java hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/SubColumnExtraction.java hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/TypesUtils.java Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt?rev=886018r1=886017r2=886018view=diff == --- hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt Wed Dec 2 00:30:41 2009 @@ -6,6 +6,8 @@ IMPROVEMENTS +PIG-1098 Zebra Performance Optimizations (yanz via gates) + PIG-1077 Support record(row)-based file split in Zebra's TableInputFormat (chaow via gates) Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java?rev=886018r1=886017r2=886018view=diff == --- hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java (original) +++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/io/BasicTable.java Wed Dec 2 00:30:41 2009 @@ -52,6 +52,7 @@ import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.zebra.io.ColumnGroup.Reader.CGRangeSplit; import org.apache.hadoop.zebra.io.ColumnGroup.Reader.CGRowSplit; +import org.apache.hadoop.zebra.io.ColumnGroup.Reader.CGScanner; import org.apache.hadoop.zebra.types.CGSchema; import org.apache.hadoop.zebra.parser.ParseException; import org.apache.hadoop.zebra.types.Partition; @@ -874,7 +875,7 @@ */ private class BTScanner implements TableScanner { private Projection schema; - private TableScanner[] cgScanners; + private CGScanner[] cgScanners; private int opCount = 0; Random random = new Random(System.nanoTime()); // checking for consistency once every 1000 times. @@ -936,7 +937,7 @@ } // Helper function for initialization. - private TableScanner createCGScanner(int cgIndex, CGRowSplit cgRowSplit, + private CGScanner createCGScanner(int cgIndex, CGRowSplit cgRowSplit, RangeSplit rangeSplit, BytesWritable beginKey, BytesWritable endKey) @@ -972,7 +973,7 @@ try { schema = partition.getProjection(); - cgScanners = new TableScanner[colGroups.length]; + cgScanners = new CGScanner[colGroups.length]; for (int i = 0; i colGroups.length; ++i) { if (!isCGDeleted(i) partition.isCGNeeded(i)) { @@ -1020,7 +1021,7 @@ for (int nx = 0; nx cgScanners.length; nx++) { if (cgScanners[nx] != null) { -cur = cgScanners[nx].advance(); +cur = cgScanners[nx].advanceCG(); if (!firstAdvance) { if (cur != first) { throw new IOException( @@ -1038,9 +1039,6 @@ @Override public boolean atEnd() throws IOException { -if (cgScanners.length == 0) { - return true; -} boolean ret = true; int i; for (i = 0; i cgScanners.length; i++) @@ -1077,16 +1075,12 @@ @Override public void getKey(BytesWritable key) throws IOException { -if (cgScanners.length == 0) { - return; -} - int i; for (i = 0; i cgScanners.length; i++) { if (cgScanners[i] != null) { -cgScanners[i].getKey(key); +cgScanners[i].getCGKey(key); break; } } @@ -1104,7 +1098,7 @@ if (cgScanners[index] != null
svn commit: r886030 - in /hadoop/pig/branches/branch-0.6: CHANGES.txt src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java test/org/apache/pig/test/TestPushUpFilter.java
Author: gates Date: Wed Dec 2 01:47:44 2009 New Revision: 886030 URL: http://svn.apache.org/viewvc?rev=886030view=rev Log: PIG-1022: optimizer pushes filter before the foreach that generates column used by filter. Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java hadoop/pig/branches/branch-0.6/test/org/apache/pig/test/TestPushUpFilter.java Modified: hadoop/pig/branches/branch-0.6/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/CHANGES.txt?rev=886030r1=886029r2=886030view=diff == --- hadoop/pig/branches/branch-0.6/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.6/CHANGES.txt Wed Dec 2 01:47:44 2009 @@ -127,6 +127,9 @@ BUG FIXES +PIG-1022: optimizer pushes filter before the foreach that generates column +used by filter (daijy via gates) + PIG-1108: Incorrect map output key type in MultiQuery optimiza (rding via olgan) PIG-1107: PigLineRecordReader bails out on an empty line for compressed data (ankit.modi via ) Modified: hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java?rev=886030r1=886029r2=886030view=diff == --- hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java (original) +++ hadoop/pig/branches/branch-0.6/src/org/apache/pig/impl/logicalLayer/optimizer/PushUpFilter.java Wed Dec 2 01:47:44 2009 @@ -36,6 +36,7 @@ import org.apache.pig.impl.logicalLayer.LOForEach; import org.apache.pig.impl.logicalLayer.LOLimit; import org.apache.pig.impl.logicalLayer.LOLoad; +import org.apache.pig.impl.logicalLayer.LOProject; import org.apache.pig.impl.logicalLayer.LOSplit; import org.apache.pig.impl.logicalLayer.LOStore; import org.apache.pig.impl.logicalLayer.LOStream; @@ -43,10 +44,12 @@ import org.apache.pig.impl.logicalLayer.LOUnion; import org.apache.pig.impl.logicalLayer.LogicalOperator; import org.apache.pig.impl.logicalLayer.LogicalPlan; +import org.apache.pig.impl.logicalLayer.TopLevelProjectFinder; import org.apache.pig.impl.logicalLayer.UDFFinder; import org.apache.pig.impl.plan.DepthFirstWalker; import org.apache.pig.impl.plan.ProjectionMap; import org.apache.pig.impl.plan.RequiredFields; +import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.plan.optimizer.OptimizerException; import org.apache.pig.PigException; import org.apache.pig.impl.util.MultiMap; @@ -256,6 +259,18 @@ PairBoolean, SetInteger mappingResult = isRequiredFieldMapped(requiredField, predecessor.getProjectionMap()); boolean mapped = mappingResult.first; + +// Check if it is a direct mapping, that is, project optionally followed by cast, so if project-project, it is not +// considered as a mapping +for (PairInteger, Integer pair : requiredField.getFields()) +{ +if (!isFieldSimple(loForEach.getForEachPlans().get(pair.second))) +{ +mapped = false; +break; +} +} + if (!mapped) { return false; } @@ -420,4 +435,44 @@ return new PairBoolean, SetInteger(true, grandParentIndexes); } + +/** + * Check if the inner plan is simple + * + * @param lp + *logical plan to check + * @return Whether if the logical plan is a simple project optionally followed by cast + */ +boolean isFieldSimple(LogicalPlan lp) throws OptimizerException +{ +TopLevelProjectFinder projectFinder = new TopLevelProjectFinder(lp); + +try { +projectFinder.visit(); +} catch (VisitorException ve) { +throw new OptimizerException(); +} +if (projectFinder.getProjectSet()!=null projectFinder.getProjectSet().size()==1) +{ +LOProject project = projectFinder.getProjectSet().iterator().next(); +if (lp.getPredecessors(project)==null) +{ +LogicalOperator pred = project; +while (lp.getSuccessors(pred)!=null) +{ +if (lp.getSuccessors(pred).size()!=1) +return false; +if (!(lp.getSuccessors(pred).get(0) instanceof LOCast)) +{ +return false; +} +pred = lp.getSuccessors(pred).get(0); +} +return true
svn commit: r884235 - in /hadoop/pig/trunk/contrib/zebra: ./ src/java/org/apache/hadoop/zebra/schema/ src/test/org/apache/hadoop/zebra/types/
Author: gates Date: Wed Nov 25 19:37:41 2009 New Revision: 884235 URL: http://svn.apache.org/viewvc?rev=884235view=rev Log: PIG-1095: Schema support of anonymous fields in COLECTION fails. Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaAnonymousCollection.java Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/SchemaParser.jjt hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestSchemaMap.java Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=884235r1=884234r2=884235view=diff == --- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Wed Nov 25 19:37:41 2009 @@ -28,7 +28,11 @@ OPTIMIZATIONS BUG FIXES -PIG_1078: merge join with empty table failed (yanz via gates) + +PIG-1095: Schema support of anonymous fields in COLECTION fails (yanz via + gates) + +PIG-1078: merge join with empty table failed (yanz via gates) PIG-1091: Exception when load with projection of map keys on a map column that is not map split (yanz via gates). Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java?rev=884235r1=884234r2=884235view=diff == --- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java (original) +++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/schema/Schema.java Wed Nov 25 19:37:41 2009 @@ -332,7 +332,7 @@ private ArrayListColumnSchema mFields; private HashMapString, ColumnSchema mNames; - private boolean dupColNameAllowed; + private boolean projection; /** * Constructor - schema for empty schema (zero-column) . @@ -342,6 +342,17 @@ } /** + * Constructor - schema for empty projection/schema (zero-column) . + * + * @param projection + * A projection schema or not + */ + public Schema(boolean projection) { +this.projection = projection; +init(); + } + + /** * Constructor - create a schema from a string representation. * * @param schema @@ -355,10 +366,9 @@ init(schema, false); } - public Schema(String schema, boolean dupAllowed) throws ParseException { -dupColNameAllowed = dupAllowed; -// suppose if duplicate is allowed, then it's from projection and hence virtual column is allowed -init(schema, dupAllowed); + public Schema(String schema, boolean projection) throws ParseException { +this.projection = projection; +init(schema, projection); } public Schema(ColumnSchema fs) throws ParseException { @@ -384,18 +394,16 @@ * Column to be added to the schema */ public void add(ColumnSchema f) throws ParseException { -add(f, false); - } - - private void add(ColumnSchema f, boolean dupAllowed) throws ParseException { if (f == null) { + if (!projection) +throw new ParseException(Empty column schema is not allowed); mFields.add(null); return; } f.index = mFields.size(); mFields.add(f); if (null != f null != f.name) { - if (mNames.put(f.name, f) != null !dupAllowed !dupColNameAllowed) + if (mNames.put(f.name, f) != null !projection) throw new ParseException(Duplicate field name: + f.name); } } @@ -684,7 +692,7 @@ org.apache.hadoop.zebra.tfile.Utils.writeString(out, toString()); } - private void init(String[] columnNames, boolean virtualColAllowed) throws ParseException { + private void init(String[] columnNames, boolean projection) throws ParseException { // the arg must be of type or they will be treated as the default type mFields = new ArrayListColumnSchema(); mNames = new HashMapString, ColumnSchema(); @@ -698,7 +706,10 @@ } TableSchemaParser parser = new TableSchemaParser(new StringReader(sb.toString())); -parser.RecordSchema(this, virtualColAllowed); +if (projection) + parser.ProjectionSchema(this); +else + parser.RecordSchema(this); } private void init() { @@ -706,7 +717,7 @@ mNames = new HashMapString, ColumnSchema(); } - private void init(String columnString, boolean virtualColAllowed) throws ParseException { + private void init(String columnString, boolean projection) throws ParseException { String trimmedColumnStr; if (columnString == null || (trimmedColumnStr = columnString.trim()).isEmpty()) { init(); @@ -717,7 +728,7
svn commit: r883800 - in /hadoop/pig/trunk/contrib/zebra: CHANGES.txt src/java/org/apache/hadoop/zebra/io/ColumnGroup.java src/java/org/apache/hadoop/zebra/mapred/TableInputFormat.java src/test/org/ap
Author: gates Date: Tue Nov 24 17:50:58 2009 New Revision: 883800 URL: http://svn.apache.org/viewvc?rev=883800view=rev Log: PIG_1078: merge join with empty table failed. Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMergeJoinEmpty.java Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/mapred/TableInputFormat.java Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=883800r1=883799r2=883800view=diff == --- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Tue Nov 24 17:50:58 2009 @@ -28,6 +28,7 @@ OPTIMIZATIONS BUG FIXES +PIG_1078: merge join with empty table failed (yanz via gates) PIG-1091: Exception when load with projection of map keys on a map column that is not map split (yanz via gates). Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java?rev=883800r1=883799r2=883800view=diff == --- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java (original) +++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java Tue Nov 24 17:50:58 2009 @@ -509,14 +509,16 @@ } BlockDistribution ret = new BlockDistribution(); - CGIndexEntry entry = cgindex.get(split.fileIndex); - FileStatus tfileStatus = fs.getFileStatus(new Path(path, entry.getName())); - - BlockLocation[] locations = fs.getFileBlockLocations(tfileStatus, split.startByte, split.numBytes); - for (BlockLocation l : locations) { -ret.add(l); - } - + if (split.fileIndex = 0) + { +CGIndexEntry entry = cgindex.get(split.fileIndex); +FileStatus tfileStatus = fs.getFileStatus(new Path(path, entry.getName())); + +BlockLocation[] locations = fs.getFileBlockLocations(tfileStatus, split.startByte, split.numBytes); +for (BlockLocation l : locations) { + ret.add(l); +} + } return ret; } @@ -530,6 +532,9 @@ void fillRowSplit(CGRowSplit rowSplit, long startOffset, long length) throws IOException { + if (rowSplit.fileIndex 0) +return; + Path tfPath = new Path(path, cgindex.get(rowSplit.fileIndex).getName()); FileStatus tfile = fs.getFileStatus(tfPath); @@ -748,7 +753,6 @@ long length = lengths[i]; Path path = paths[i]; int idx = cgindex.getFileIndex(path); - lst.add(new CGRowSplit(idx, start, length)); } @@ -1061,6 +1065,10 @@ if (!isSorted()) { throw new IOException(Cannot seek in unsorted Column Gruop); } +if (atEnd()) +{ + return false; +} int index = cgindex.lowerBound(new ByteArray(key.get(), 0, key.getSize()), comparator); @@ -1764,6 +1772,8 @@ int getFileIndex(Path path) throws IOException { String filename = path.getName(); + if (index.isEmpty()) +return -1; for (CGIndexEntry cgie : index) { if (cgie.getName().equals(filename)) { return cgie.getIndex(); Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/mapred/TableInputFormat.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/mapred/TableInputFormat.java?rev=883800r1=883799r2=883800view=diff == --- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/mapred/TableInputFormat.java (original) +++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/mapred/TableInputFormat.java Tue Nov 24 17:50:58 2009 @@ -486,11 +486,6 @@ bd = BlockDistribution.sum(bd, reader.getBlockDistribution((RangeSplit)null)); } - if (bd == null) { -// should never happen. -return new InputSplit[0]; - } - SortedTableSplit split = new SortedTableSplit(null, null, bd, conf); return new InputSplit[] { split }; } @@ -509,7 +504,8 @@ if (keyDistri == null) { // should never happen. - return new InputSplit[0]; + SortedTableSplit split = new SortedTableSplit(null, null, null, conf); + return new InputSplit[] { split }; } if (numSplits 0) { @@ -571,13 +567,20 @@ ArrayListInputSplit
svn commit: r883877 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt test/org/apache/pig/test/TestLogicalPlanBuilder.java
Author: gates Date: Tue Nov 24 21:07:28 2009 New Revision: 883877 URL: http://svn.apache.org/viewvc?rev=883877view=rev Log: PIG-1101: Allow arugment to limit to be long in addition to int. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=883877r1=883876r2=883877view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Tue Nov 24 21:07:28 2009 @@ -24,6 +24,9 @@ IMPROVEMENTS +PIG-1101: Allow arugment to limit to be long in addition to int (ashutoshc via + gates) + PIG-872: use distributed cache for the replicated data set in FR join (sriranjan via olgan) Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=883877r1=883876r2=883877view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Tue Nov 24 21:07:28 2009 @@ -1470,15 +1470,18 @@ { LogicalOperator input; Token t; +long l; log.trace(Entering LimitClause); } { ( input = NestedExpr(lp) {log.debug(Limit input: + input);} - t = INTEGER +( + t = INTEGER { l = Long.parseLong(t.image); } + | t = LONGINTEGER { l = Long.parseLong(t.image.substring(0, t.image.length() - 1)); } ) +) { -long l = Integer.parseInt(t.image); LogicalOperator limit = new LOLimit(lp, new OperatorKey(scope, getNextId()), l); addAlias(input.getAlias(), input); lp.add(limit); Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java?rev=883877r1=883876r2=883877view=diff == --- hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java Tue Nov 24 21:07:28 2009 @@ -999,6 +999,11 @@ } @Test +public void testLimitWithLong() { +buildPlan(limit (load 'a') 100L;); +} + +@Test public void testQuery75() { buildPlan(a = union (load 'a'), (load 'b'), (load 'c');); buildPlan(b = foreach a {generate $0;} parallel 10;);
svn commit: r883926 - in /hadoop/pig/branches/branch-0.6/contrib/zebra: ./ src/java/org/apache/hadoop/zebra/io/ src/java/org/apache/hadoop/zebra/mapred/ src/test/org/apache/hadoop/zebra/pig/
Author: gates Date: Tue Nov 24 23:33:02 2009 New Revision: 883926 URL: http://svn.apache.org/viewvc?rev=883926view=rev Log: PIG-1078: merge join with empty table failed. Added: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMergeJoinEmpty.java Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/mapred/TableInputFormat.java Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt?rev=883926r1=883925r2=883926view=diff == --- hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt Tue Nov 24 23:33:02 2009 @@ -27,6 +27,8 @@ BUG FIXES +PIG-1078: merge join with empty table failed (yanz via gates) + PIG-1091: Exception when load with projection of map keys on a map column that is not map split (yanz via gates) Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java?rev=883926r1=883925r2=883926view=diff == --- hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java (original) +++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/io/ColumnGroup.java Tue Nov 24 23:33:02 2009 @@ -509,14 +509,16 @@ } BlockDistribution ret = new BlockDistribution(); - CGIndexEntry entry = cgindex.get(split.fileIndex); - FileStatus tfileStatus = fs.getFileStatus(new Path(path, entry.getName())); - - BlockLocation[] locations = fs.getFileBlockLocations(tfileStatus, split.startByte, split.numBytes); - for (BlockLocation l : locations) { -ret.add(l); - } - + if (split.fileIndex = 0) + { +CGIndexEntry entry = cgindex.get(split.fileIndex); +FileStatus tfileStatus = fs.getFileStatus(new Path(path, entry.getName())); + +BlockLocation[] locations = fs.getFileBlockLocations(tfileStatus, split.startByte, split.numBytes); +for (BlockLocation l : locations) { + ret.add(l); +} + } return ret; } @@ -530,6 +532,9 @@ void fillRowSplit(CGRowSplit rowSplit, long startOffset, long length) throws IOException { + if (rowSplit.fileIndex 0) +return; + Path tfPath = new Path(path, cgindex.get(rowSplit.fileIndex).getName()); FileStatus tfile = fs.getFileStatus(tfPath); @@ -748,7 +753,6 @@ long length = lengths[i]; Path path = paths[i]; int idx = cgindex.getFileIndex(path); - lst.add(new CGRowSplit(idx, start, length)); } @@ -1061,6 +1065,10 @@ if (!isSorted()) { throw new IOException(Cannot seek in unsorted Column Gruop); } +if (atEnd()) +{ + return false; +} int index = cgindex.lowerBound(new ByteArray(key.get(), 0, key.getSize()), comparator); @@ -1764,6 +1772,8 @@ int getFileIndex(Path path) throws IOException { String filename = path.getName(); + if (index.isEmpty()) +return -1; for (CGIndexEntry cgie : index) { if (cgie.getName().equals(filename)) { return cgie.getIndex(); Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/mapred/TableInputFormat.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/mapred/TableInputFormat.java?rev=883926r1=883925r2=883926view=diff == --- hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/mapred/TableInputFormat.java (original) +++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/mapred/TableInputFormat.java Tue Nov 24 23:33:02 2009 @@ -486,11 +486,6 @@ bd = BlockDistribution.sum(bd, reader.getBlockDistribution((RangeSplit)null)); } - if (bd == null) { -// should never happen. -return new InputSplit[0]; - } - SortedTableSplit split = new SortedTableSplit(null, null, bd, conf); return new InputSplit[] { split }; } @@ -509,7 +504,8 @@ if (keyDistri == null) { // should never happen. - return new InputSplit
svn commit: r883466 - in /hadoop/pig/branches/branch-0.6/contrib/zebra: CHANGES.txt src/java/org/apache/hadoop/zebra/types/Partition.java src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.ja
Author: gates Date: Mon Nov 23 19:11:00 2009 New Revision: 883466 URL: http://svn.apache.org/viewvc?rev=883466view=rev Log: PIG-1091: Exception when load with projection of map keys on a map column that is not map split. Added: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt?rev=883466r1=883465r2=883466view=diff == --- hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.6/contrib/zebra/CHANGES.txt Mon Nov 23 19:11:00 2009 @@ -27,6 +27,9 @@ BUG FIXES +PIG-1091: Exception when load with projection of map keys on a map column that is + not map split (yanz via gates) + PIG-1026: [zebra] map split returns null (yanz via pradeepkth) PIG-1057 Zebra does not support concurrent deletions of column groups now Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java?rev=883466r1=883465r2=883466view=diff == --- hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java (original) +++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java Mon Nov 23 19:11:00 2009 @@ -726,8 +726,6 @@ cgindex = mapentry.getKey(); if (cgindex == null) throw new AssertionError( Internal Logical Error: RECORD does not have a CG index.); - if (mapentry.getValue() != null) -throw new AssertionError( Internal Logical Error: RECORD should not have a split key map.); cgentry = getCGEntry(cgindex.getCGIndex()); parCol = new PartitionedColumn(i, false); cgentry.addUser(parCol, name); Added: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java?rev=883466view=auto == --- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java (added) +++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java Mon Nov 23 19:11:00 2009 @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.zebra.io; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import junit.framework.Assert; +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RawLocalFileSystem; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.zebra.io.BasicTable; +import org.apache.hadoop.zebra.io.TableInserter; +import org.apache.hadoop.zebra.io.TableScanner; +import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit; +import org.apache.hadoop.zebra.parser.ParseException; +import org.apache.hadoop.zebra.schema.Schema; +import org.apache.hadoop.zebra.types.TypesUtils; +import org.apache.pig.data.DataBag; +import org.apache.pig.data.DataByteArray; +import org.apache.pig.data.Tuple; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * + * Test projections on complicated column types. + * + */ +public class
svn commit: r882929 [7/7] - in /hadoop/pig/branches/branch-0.6/contrib/zebra: ./ src/java/org/apache/hadoop/zebra/io/ src/java/org/apache/hadoop/zebra/mapred/ src/java/org/apache/hadoop/zebra/schema/
Added: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageGrammar.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageGrammar.java?rev=882929view=auto == --- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageGrammar.java (added) +++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageGrammar.java Sat Nov 21 15:36:12 2009 @@ -0,0 +1,833 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.zebra.types; + +import java.io.BufferedReader; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.PrintStream; +import java.util.HashMap; +import java.util.Map; +import javax.security.auth.login.LoginException; + +import junit.framework.Assert; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RawLocalFileSystem; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.zebra.io.BasicTable; +import org.apache.hadoop.zebra.io.TableInserter; +import org.apache.hadoop.zebra.parser.ParseException; +import org.apache.hadoop.zebra.schema.Schema; +import org.apache.hadoop.zebra.types.TypesUtils; +import org.apache.pig.data.DataBag; +import org.apache.pig.data.DataByteArray; +import org.apache.pig.data.Tuple; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * + * Test projections on complicated column types. + * + */ +public class TestStorageGrammar { + + final static String STR_SCHEMA = s1:bool, s2:int, s3:long, s4:float, s5:string, s6:bytes, r1:record(f1:int, f2:long), r2:record(r3:record(f3:float, f4)), m1:map(string),m2:map(map(int)), c:collection(f13:double, f14:float, f15:bytes),s7:string, s8:string, s9:string, s10:string, s11:string, s12:string, s13:string, s14:string, s15:string, s16:string, s17:string, s18:string, s19:string, s20:string, s21:string, s22:string, s23:string; + static String STR_STORAGE = null; + final private static Configuration conf = new Configuration(); + private static Path path; + private static FileSystem fs; + private static String user; + private static String group; + private static String defaultUser; + + static{ +try{ +String command = whoami; +Process process = new ProcessBuilder(command).start(); +InputStream is = process.getInputStream(); +InputStreamReader isr = new InputStreamReader(is); +BufferedReader br = new BufferedReader(isr); +System.out.printf(Output of running %s is:, + command); +String line; +while ((line = br.readLine()) != null) { + System.out.println(default user0: +line); + defaultUser = line; + +} +}catch(Exception e){ + e.printStackTrace(); +} + } + + @BeforeClass + public static void setUpOnce() throws IOException, LoginException { + if (System.getProperty(user) == null) { + System.setProperty(user, defaultUser); +} +user = System.getProperty(user); +if (System.getProperty(group) == null) { + System.setProperty(group, users); +} +group = System.getProperty(group); +System.out.println(user: + user + group: + group); +STR_STORAGE = [s1, s2] COMPRESS BY gz SECURE BY user: ++ user ++ group: ++ group ++ perm:777 SERIALIZE BY pig; [m1#{a}] SERIALIZE BY pig COMPRESS BY gz SECURE BY user: ++ user ++ group: ++ group ++ perm:777 ; [r1.f1] SECURE BY user: ++ user ++ group: ++ group ++ perm:777 SERIALIZE BY pig COMPRESS BY gz ; [s3, s4, r2.r3.f3] SERIALIZE BY pig SECURE BY user: ++ user ++ group: ++ group ++ perm:777 COMPRESS BY gz ; [s5, s6, m2#{x|y}] compREss by gz secURe by user: ++ user ++ group: ++ group ++
svn commit: r882818 - in /hadoop/pig/trunk/contrib/zebra: CHANGES.txt src/java/org/apache/hadoop/zebra/types/Partition.java src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java
Author: gates Date: Sat Nov 21 01:30:22 2009 New Revision: 882818 URL: http://svn.apache.org/viewvc?rev=882818view=rev Log: PIG-1091 Exception when load with projection of map keys on a map column that is not map split. Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=882818r1=882817r2=882818view=diff == --- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Sat Nov 21 01:30:22 2009 @@ -29,6 +29,9 @@ BUG FIXES +PIG-1091: Exception when load with projection of map keys on a map column + that is not map split (yanz via gates). + PIG-1026: [zebra] map split returns null (yanz via pradeepkth) PIG-1057 Zebra does not support concurrent deletions of column groups now Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java?rev=882818r1=882817r2=882818view=diff == --- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java (original) +++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/types/Partition.java Sat Nov 21 01:30:22 2009 @@ -726,8 +726,6 @@ cgindex = mapentry.getKey(); if (cgindex == null) throw new AssertionError( Internal Logical Error: RECORD does not have a CG index.); - if (mapentry.getValue() != null) -throw new AssertionError( Internal Logical Error: RECORD should not have a split key map.); cgentry = getCGEntry(cgindex.getCGIndex()); parCol = new PartitionedColumn(i, false); cgentry.addUser(parCol, name); Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java?rev=882818view=auto == --- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java (added) +++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestProjectionOnFullMap.java Sat Nov 21 01:30:22 2009 @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with this + * work for additional information regarding copyright ownership. The ASF + * licenses this file to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, WITHOUT + * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the + * License for the specific language governing permissions and limitations under + * the License. + */ +package org.apache.hadoop.zebra.io; + +import java.io.IOException; +import java.io.PrintStream; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import junit.framework.Assert; +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalFileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RawLocalFileSystem; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.zebra.io.BasicTable; +import org.apache.hadoop.zebra.io.TableInserter; +import org.apache.hadoop.zebra.io.TableScanner; +import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit; +import org.apache.hadoop.zebra.parser.ParseException; +import org.apache.hadoop.zebra.schema.Schema; +import org.apache.hadoop.zebra.types.TypesUtils; +import org.apache.pig.data.DataBag; +import org.apache.pig.data.DataByteArray; +import org.apache.pig.data.Tuple; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * + * Test projections on complicated column types. + * + */ +public class TestProjectionOnFullMap { + final static String STR_SCHEMA = f1:string, f2:map; + final static String STR_STORAGE = [f1]; [f2]; + private static Configuration conf; + private static Path path; + private static FileSystem fs
svn commit: r882340 - in /hadoop/pig/trunk/contrib/zebra: CHANGES.txt build-contrib.xml
Author: gates Date: Thu Nov 19 22:37:10 2009 New Revision: 882340 URL: http://svn.apache.org/viewvc?rev=882340view=rev Log: Changed version number to be 0.7.0 to match Pig version number change. Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt hadoop/pig/trunk/contrib/zebra/build-contrib.xml Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=882340r1=882339r2=882340view=diff == --- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Thu Nov 19 22:37:10 2009 @@ -3,6 +3,8 @@ Trunk (unreleased changes) INCOMPATIBLE CHANGES +PIG-1099 Changed version number to be 0.7.0 to match Pig version number + change (yanz via gates) IMPROVEMENTS Modified: hadoop/pig/trunk/contrib/zebra/build-contrib.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/build-contrib.xml?rev=882340r1=882339r2=882340view=diff == --- hadoop/pig/trunk/contrib/zebra/build-contrib.xml (original) +++ hadoop/pig/trunk/contrib/zebra/build-contrib.xml Thu Nov 19 22:37:10 2009 @@ -23,7 +23,7 @@ property name=name value=${ant.project.name}/ property name=root value=${basedir}/ - property name=version value=0.6.0-dev/ + property name=version value=0.7.0-dev/ !-- Load all the default properties, and any the user wants-- !-- to contribute (without having to type -D or edit this file --
svn commit: r880975 - in /hadoop/pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/
Author: gates Date: Mon Nov 16 21:43:41 2009 New Revision: 880975 URL: http://svn.apache.org/viewvc?rev=880975view=rev Log: PIG-1085: Pass JobConf and UDF specific configuration information to UDFs. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigMapBase.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigMapReduce.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=880975r1=880974r2=880975view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Mon Nov 16 21:43:41 2009 @@ -23,6 +23,8 @@ INCOMPATIBLE CHANGES IMPROVEMENTS +PIG-1085: Pass JobConf and UDF specific configuration information to UDFs + (gates) OPTIMIZATIONS Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java?rev=880975r1=880974r2=880975view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java Mon Nov 16 21:43:41 2009 @@ -81,6 +81,7 @@ import org.apache.pig.impl.util.JarManager; import org.apache.pig.impl.util.ObjectSerializer; import org.apache.pig.impl.util.Pair; +import org.apache.pig.impl.util.UDFContext; /** * This is compiler class that takes an MROperPlan and converts @@ -596,6 +597,9 @@ jobConf.setOutputCommitter(PigOutputCommitter.class); Job job = new Job(jobConf); jobStoreMap.put(job,new PairListPOStore, Path(storeLocations, tmpLocation)); + +// Serialize the UDF specific context info. +UDFContext.getUDFContext().serialize(jobConf); return job; } catch (JobCreationException jce) { throw jce; Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigMapBase.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigMapBase.java?rev=880975r1=880974r2=880975view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigMapBase.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigMapBase.java Mon Nov 16 21:43:41 2009 @@ -49,6 +49,7 @@ import org.apache.pig.backend.hadoop.executionengine.physicalLayer.util.PlanHelper; import org.apache.pig.impl.util.ObjectSerializer; import org.apache.pig.impl.util.SpillableMemoryManager; +import org.apache.pig.impl.util.UDFContext; public abstract class PigMapBase extends MapReduceBase{ private static final Tuple DUMMYTUPLE = null; @@ -166,6 +167,12 @@ keyType = ((byte[])ObjectSerializer.deserialize(job.get(pig.map.keytype)))[0]; pigReporter = new ProgressableReporter(); + +// Get the UDF specific context +UDFContext udfc = UDFContext.getUDFContext(); +udfc.addJobConf(job); +udfc.deserialize(); + if(!(mp.isEmpty())) { ListOperatorKey targetOpKeys = (ArrayListOperatorKey)ObjectSerializer.deserialize(job.get(map.target.ops)); @@ -178,7 +185,6 @@ } - } catch (IOException ioe) { String msg = Problem while configuring map plan.; throw new RuntimeException(msg, ioe); Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigMapReduce.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigMapReduce.java?rev=880975r1=880974r2=880975view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigMapReduce.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigMapReduce.java Mon Nov 16 21:43:41 2009 @@ -57,6 +57,7 @@ import org.apache.pig.impl.plan.VisitorException; import org.apache.pig.impl.util.ObjectSerializer; import org.apache.pig.impl.util.SpillableMemoryManager; +import
svn commit: r881008 - in /hadoop/pig/trunk: src/org/apache/pig/impl/util/ test/org/apache/pig/test/ test/org/apache/pig/test/utils/
Author: gates Date: Mon Nov 16 22:18:46 2009 New Revision: 881008 URL: http://svn.apache.org/viewvc?rev=881008view=rev Log: PIG-1085 checking in files I missed in the last checkin. Added: hadoop/pig/trunk/src/org/apache/pig/impl/util/UDFContext.java hadoop/pig/trunk/test/org/apache/pig/test/TestUDFContext.java hadoop/pig/trunk/test/org/apache/pig/test/utils/UDFContextTestEvalFunc.java hadoop/pig/trunk/test/org/apache/pig/test/utils/UDFContextTestEvalFunc2.java hadoop/pig/trunk/test/org/apache/pig/test/utils/UDFContextTestLoader.java Added: hadoop/pig/trunk/src/org/apache/pig/impl/util/UDFContext.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/util/UDFContext.java?rev=881008view=auto == --- hadoop/pig/trunk/src/org/apache/pig/impl/util/UDFContext.java (added) +++ hadoop/pig/trunk/src/org/apache/pig/impl/util/UDFContext.java Mon Nov 16 22:18:46 2009 @@ -0,0 +1,180 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pig.impl.util; + +import java.io.IOException; +//import java.io.Serializable; +import java.util.HashMap; +import java.util.Properties; + +import org.apache.hadoop.mapred.JobConf; + +import org.apache.pig.impl.util.ObjectSerializer; + +public class UDFContext { + +@SuppressWarnings(deprecation) +private JobConf jconf = null; +private HashMapInteger, Properties udfConfs; + +private static UDFContext self = null; + +private UDFContext() { +udfConfs = new HashMapInteger, Properties(); +} + +public static UDFContext getUDFContext() { +if (self == null) { +self = new UDFContext(); +} +return self; +} + +/** + * Adds the JobConf to this singleton. Will be + * called on the backend by the Map and Reduce + * functions so that UDFs can obtain the JobConf + * on the backend. + */ +@SuppressWarnings(deprecation) +public void addJobConf(JobConf conf) { +jconf = conf; +} + +/** + * Get the JobConf. This should only be called on + * the backend. It will return null on the frontend. + * @return JobConf for this job. This is a copy of the + * JobConf. Nothing written here will be kept by the system. + * getUDFConf should be used for recording UDF specific + * information. + */ +@SuppressWarnings(deprecation) +public JobConf getJobConf() { +if (jconf != null) return new JobConf(jconf); +else return null; +} + +/** + * Get a properties object that is specific to this UDF. + * Note that if a given UDF is called multiple times in a script, + * and each instance passes different arguments, then each will + * be provided with different configuration object. + * This can be used by loaders to pass their input object path + * or URI and separate themselves from other instances of the + * same loader. Constructor arguments could also be used, + * as they are available on both the front and back end. + * + * Note that this can only be used to share information + * across instantiations of the same function in the front end + * and between front end and back end. It cannot be used to + * share information between instantiations (that is, between + * map and/or reduce instances) on the back end at runtime. + * @param c of the UDF obtaining the properties object. + * @param args String arguments that make this instance of + * the UDF unique. + * @return A reference to the properties object specific to + * the calling UDF. This is a reference, not a copy. + * Any changes to this object will automatically be + * propogated to other instances of the UDF calling this + * function. + */ + +@SuppressWarnings(unchecked) +public Properties getUDFProperties(Class c, String[] args) { +Integer k = generateKey(c, args); +Properties p = udfConfs.get(k); +if (p == null) { +p = new Properties(); +udfConfs.put(k, p); +} +return p; +} + + /** + * Get
svn commit: r833126 - /hadoop/pig/trunk/test/hbase-site.xml
Author: gates Date: Thu Nov 5 18:57:17 2009 New Revision: 833126 URL: http://svn.apache.org/viewvc?rev=833126view=rev Log: PIG-970 Added a file I missed in the previous checkin. Added: hadoop/pig/trunk/test/hbase-site.xml Added: hadoop/pig/trunk/test/hbase-site.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/hbase-site.xml?rev=833126view=auto == --- hadoop/pig/trunk/test/hbase-site.xml (added) +++ hadoop/pig/trunk/test/hbase-site.xml Thu Nov 5 18:57:17 2009 @@ -0,0 +1,137 @@ +?xml version=1.0? +?xml-stylesheet type=text/xsl href=configuration.xsl? +!-- +/** + * Copyright 2007 The Apache Software Foundation + * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +-- +configuration + property +namehbase.regionserver.msginterval/name +value1000/value +descriptionInterval between messages from the RegionServer to HMaster +in milliseconds. Default is 15. Set this value low if you want unit +tests to be responsive. +/description + /property + property +namehbase.client.pause/name +value5000/value +descriptionGeneral client pause value. Used mostly as value to wait +before running a retry of a failed get, region lookup, etc./description + /property + property +namehbase.master.meta.thread.rescanfrequency/name +value1/value +descriptionHow long the HMaster sleeps (in milliseconds) between scans of +the root and meta tables. +/description + /property + property +namehbase.server.thread.wakefrequency/name +value1000/value +descriptionTime to sleep in between searches for work (in milliseconds). +Used as sleep interval by service threads such as META scanner and log roller. +/description + /property + property +namehbase.regionserver.handler.count/name +value5/value +descriptionCount of RPC Server instances spun up on RegionServers +Same property is used by the HMaster for count of master handlers. +Default is 10. +/description + /property + property +namehbase.master.lease.period/name +value6000/value +descriptionLength of time the master will wait before timing out a region +server lease. Since region servers report in every second (see above), this +value has been reduced so that the master will notice a dead region server +sooner. The default is 30 seconds. +/description + /property + property +namehbase.master.info.port/name +value-1/value +descriptionThe port for the hbase master web UI +Set to -1 if you do not want the info server to run. +/description + /property + property +namehbase.regionserver.info.port/name +value-1/value +descriptionThe port for the hbase regionserver web UI +Set to -1 if you do not want the info server to run. +/description + /property + property +namehbase.regionserver.info.port.auto/name +valuetrue/value +descriptionInfo server auto port bind. Enables automatic port +search if hbase.regionserver.info.port is already in use. +Enabled for testing to run multiple tests on one machine. +/description + /property + property +namehbase.master.lease.thread.wakefrequency/name +value3000/value +descriptionThe interval between checks for expired region server leases. +This value has been reduced due to the other reduced values above so that +the master will notice a dead region server sooner. The default is 15 seconds. +/description + /property + property +namehbase.regionserver.optionalcacheflushinterval/name +value1/value +description +Amount of time to wait since the last time a region was flushed before +invoking an optional cache flush. Default 60,000. +/description + /property + property +namehbase.regionserver.safemode/name +valuefalse/value +description +Turn on/off safe mode in region server. Always on for production, always off +for tests. +/description + /property + property +namehbase.hregion.max.filesize/name +value67108864/value +description +Maximum desired file size for an HRegion. If filesize exceeds +value
svn commit: r833166 [5/5] - in /hadoop/pig/trunk/contrib/zebra: ./ src/java/org/apache/hadoop/zebra/ src/java/org/apache/hadoop/zebra/io/ src/java/org/apache/hadoop/zebra/mapred/ src/java/org/apache/h
Added: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestTableMergeJoinInteger.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestTableMergeJoinInteger.java?rev=833166view=auto == --- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestTableMergeJoinInteger.java (added) +++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestTableMergeJoinInteger.java Thu Nov 5 21:02:57 2009 @@ -0,0 +1,222 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.zebra.pig; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.PrintWriter; +import java.util.Iterator; +import java.util.StringTokenizer; + +import junit.framework.Assert; +import junit.framework.TestCase; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.zebra.io.BasicTable; +import org.apache.hadoop.zebra.io.TableInserter; +import org.apache.hadoop.zebra.pig.TableStorer; +import org.apache.hadoop.zebra.schema.Schema; +import org.apache.hadoop.zebra.types.TypesUtils; +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.data.Tuple; +import org.apache.pig.test.MiniCluster; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Note: + * + * Make sure you add the build/pig-0.1.0-dev-core.jar to the Classpath of the + * app/debug configuration, when run this from inside the Eclipse. + * + */ +public class TestTableMergeJoinInteger { + protected static ExecType execType = ExecType.MAPREDUCE; + private static MiniCluster cluster; + protected static PigServer pigServer; + private static Path pathTable; + + @BeforeClass + public static void setUp() throws Exception { +if (System.getProperty(hadoop.log.dir) == null) { + String base = new File(.).getPath(); // getAbsolutePath(); + System + .setProperty(hadoop.log.dir, new Path(base).toString() + ./logs); +} + +if (execType == ExecType.MAPREDUCE) { + cluster = MiniCluster.buildCluster(); + pigServer = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); +} else { + pigServer = new PigServer(ExecType.LOCAL); +} + +Configuration conf = new Configuration(); +FileSystem fs = cluster.getFileSystem(); +Path pathWorking = fs.getWorkingDirectory(); +pathTable = new Path(pathWorking, TestTableStorer); +System.out.println(pathTable = + pathTable); +BasicTable.Writer writer = new BasicTable.Writer(pathTable, +SF_a:int,SF_b:string,SF_c,SF_d,SF_e,SF_f,SF_g, +[SF_a, SF_b, SF_c]; [SF_e, SF_f, SF_g], conf); +Schema schema = writer.getSchema(); +System.out.println(typeName + schema.getColumn(SF_a).getType().pigDataType()); +Tuple tuple = TypesUtils.createTuple(schema); + +final int numsBatch = 10; +final int numsInserters = 1; +TableInserter[] inserters = new TableInserter[numsInserters]; +for (int i = 0; i numsInserters; i++) { + inserters[i] = writer.getInserter(ins + i, false); +} + +for (int b = 0; b numsBatch; b++) { + for (int i = 0; i numsInserters; i++) { +TypesUtils.resetTuple(tuple); +for (int k = 0; k tuple.size(); ++k) { + try { + if(k==0) { + tuple.set(0, k+b); + } else { + tuple.set(k, b + _ + i + + k); + } + } catch (ExecException e) { +e.printStackTrace(); + } +} +inserters[i].insert(new BytesWritable((key + i).getBytes()), tuple); + } +} +for (int i = 0; i numsInserters; i++) { + inserters[i].close(); +} +writer.close(); + } + + @AfterClass + public static void tearDown() throws Exception { +pigServer.shutdown();
svn commit: r829126 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/tools/pigstats/PigStats.java test/org/apache/pig/test/TestPigStats.java
Author: gates Date: Fri Oct 23 17:01:24 2009 New Revision: 829126 URL: http://svn.apache.org/viewvc?rev=829126view=rev Log: PIG-1027: Number of bytes written are always zero in local mode. Added: hadoop/pig/trunk/test/org/apache/pig/test/TestPigStats.java Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStats.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=829126r1=829125r2=829126view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Fri Oct 23 17:01:24 2009 @@ -78,6 +78,8 @@ BUG FIXES +PIG-1027: Number of bytes written are always zero in local mode (zjffdu via gates). + PIG-976: Multi-query optimization throws ClassCastException (rding via pradeepkth) Modified: hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStats.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStats.java?rev=829126r1=829125r2=829126view=diff == --- hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStats.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStats.java Fri Oct 23 17:01:24 2009 @@ -102,11 +102,19 @@ stats.put(op.toString(), jobStats); POCounter counter = (POCounter) php.getPredecessors(op).get(0); jobStats.put(PIG_STATS_LOCAL_OUTPUT_RECORDS, (Long.valueOf(counter.getCount())).toString()); -jobStats.put(PIG_STATS_LOCAL_BYTES_WRITTEN, (Long.valueOf((new File(((POStore)op).getSFile().getFileName())).length())).toString()); +String localFilePath=normalizeToLocalFilePath(((POStore)op).getSFile().getFileName()); +jobStats.put(PIG_STATS_LOCAL_BYTES_WRITTEN, (Long.valueOf(new File(localFilePath).length())).toString()); } return stats; } +private String normalizeToLocalFilePath(String fileName) { +if (fileName.startsWith(file:)){ +return fileName.substring(5); +} +return fileName; +} + private MapString, MapString, String accumulateMRStats() throws ExecException { for(Job job : jc.getSuccessfulJobs()) { Added: hadoop/pig/trunk/test/org/apache/pig/test/TestPigStats.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestPigStats.java?rev=829126view=auto == --- hadoop/pig/trunk/test/org/apache/pig/test/TestPigStats.java (added) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestPigStats.java Fri Oct 23 17:01:24 2009 @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * License); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an AS IS BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pig.test; + +import java.io.File; +import java.io.IOException; + +import junit.framework.TestCase; + +import org.apache.pig.ExecType; +import org.apache.pig.PigServer; +import org.apache.pig.tools.pigstats.PigStats; + +public class TestPigStats extends TestCase { + +public void testBytesWritten_JIRA_1027() { + +File outputFile = null; +try { +outputFile = File.createTempFile(JIAR_1027, .out); +PigServer pig = new PigServer(ExecType.LOCAL); +pig +.registerQuery(A = load 'test/org/apache/pig/test/data/passwd';); +PigStats stats = pig.store(A, outputFile.getAbsolutePath()) +.getStatistics(); +assertEquals(outputFile.length(), stats.getBytesWritten()); +} catch (IOException e) { +fail(IOException happened); +} finally { +if (outputFile != null) { +outputFile.delete(); +} +} + +} +}
svn commit: r829156 - /hadoop/pig/trunk/CHANGES.txt
Author: gates Date: Fri Oct 23 18:05:23 2009 New Revision: 829156 URL: http://svn.apache.org/viewvc?rev=829156view=rev Log: Minor syntax cleanup in CHANGES.txt to make it easier to automate contributor counting. Modified: hadoop/pig/trunk/CHANGES.txt Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=829156r1=829155r2=829156view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Fri Oct 23 18:05:23 2009 @@ -30,7 +30,7 @@ String(String) constructor (olgan) PIG-984: Add map side grouping for data that is already collected when -it is read into the map (rding via gates). +it is read into the map (rding via gates) PIG-1025: Add ability to set job priority from Pig Latin script (kevinweil via gates) @@ -64,7 +64,7 @@ PIG-975: Need a databag that does not register with SpillableMemoryManager and spill data pro-actively (yinghe via olgan) -PIG-891: Fixing dfs statement for Pig (zjffdu via daijy). +PIG-891: Fixing dfs statement for Pig (zjffdu via daijy PIG-956: 10 minute commit tests (olgan) @@ -78,7 +78,7 @@ BUG FIXES -PIG-1027: Number of bytes written are always zero in local mode (zjffdu via gates). +PIG-1027: Number of bytes written are always zero in local mode (zjffdu via gates) PIG-976: Multi-query optimization throws ClassCastException (rding via pradeepkth) @@ -87,17 +87,17 @@ from physical plan (ashutoshc via gates) PIG-968: Fix findContainingJar to work properly when there is a + in the jar - path (tlipcon via gates). + path (tlipcon via gates) PIG-738: Regexp passed from pigscript fails in UDF (pradeepkth) PIG-942: Maps are not implicitly casted (pradeepkth) PIG-513: Removed unecessary bounds check in DefaultTuple (ashutoshc via - gates). + gates) PIG-951: Set parallelism explicitly to 1 for indexing job in merge join - (ashutoc via gates). + (ashutoshc via gates) PIG-592: schema inferred incorrectly (daijy) @@ -136,7 +136,7 @@ PIG-892: Make COUNT and AVG deal with nulls accordingly with SQL standart (olgan) -PIG-734: Changed maps to only take strings as keys (gates). +PIG-734: Changed maps to only take strings as keys (gates) IMPROVEMENTS @@ -159,7 +159,7 @@ PIG-845: PERFORMANCE: Merge Join (ashutoshc via pradeepkth) -PIG-893: Added string - integer, long, float, and double casts (zjffdu via gates). +PIG-893: Added string - integer, long, float, and double casts (zjffdu via gates) PIG-833: Added Zebra, new columnar storage mechanism for HDFS (rangadi plus many others via gates) @@ -169,15 +169,15 @@ PIG-820: Change RandomSampleLoader to take a LoadFunc instead of extending BinStorage. Added new Samplable interface for loaders to implement - allowing them to be used by RandomSampleLoader (ashutoshc via gates). + allowing them to be used by RandomSampleLoader (ashutoshc via gates) PIG-832: Make import list configurable (daijy) PIG-697: Proposed improvements to pig's optimizer (sms) -PIG-753: Allow UDFs with no parameters (zjffdu via gates). +PIG-753: Allow UDFs with no parameters (zjffdu via gates) -PIG-765: jdiff for pig ( gkesavan ). +PIG-765: jdiff for pig ( gkesavan OPTIMIZATIONS @@ -227,7 +227,7 @@ PIG-695: Pig should not fail when error logs cannot be created (sms) - PIG-878: Pig is returning too many blocks in the input split. (arunc via gates). + PIG-878: Pig is returning too many blocks in the input split. (arunc via gates) PIG-888: Pig do not pass udf to the backend in some situation (daijy) @@ -267,29 +267,30 @@ PIG-817: documentation update (chandec via olgan) -PIG-830: Add RegExLoader and apache log utils to piggybank (dvryaboy via gates). +PIG-830: Add RegExLoader and apache log utils to piggybank (dvryaboy via gates) PIG-831: Turned off reporting of records and bytes written for mutli-store queries as the returned results are confusing and wrong. (gates) PIG-813: documentation updates (chandec via olgan) -PIG-825: PIG_HADOOP_VERSION should be set to 18 (dvryaboy via gates). +PIG-825: PIG_HADOOP_VERSION should be set to 18 (dvryaboy via gates) PIG-795: support for SAMPLE command (ericg via olgan) PIG-619: Create one InputSplit even when the input file is zero length so that hadoop runs maps and creates output for the next -job (gates). +job (gates) PIG-697: Proposed improvements to pig's optimizer (sms) PIG-700: To automate the pig patch test process (gkesavan via sms) PIG-712: Added utility functions to create schemas for tuples and bags (zjffdu -via gates). +via gates) -PIG-652: Adapt changes in store interface to multi-query changes (hagleitn via gates). +PIG-652: Adapt changes in store interface to multi-query
svn commit: r829195 - in /hadoop/pig/trunk/contrib/zebra: CHANGES.txt build-contrib.xml build.xml
Author: gates Date: Fri Oct 23 19:59:53 2009 New Revision: 829195 URL: http://svn.apache.org/viewvc?rev=829195view=rev Log: PIG-996 Add findbugs, checkstyle, and clover to zebra build file. Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt hadoop/pig/trunk/contrib/zebra/build-contrib.xml hadoop/pig/trunk/contrib/zebra/build.xml Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=829195r1=829194r2=829195view=diff == --- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Fri Oct 23 19:59:53 2009 @@ -6,6 +6,9 @@ IMPROVEMENTS + PIG-996 Add findbugs, checkstyle, and clover to zebra build file (chaow via + gates) + PIG-993 Ability to drop a column group in a table (yanz and rangadi via gates) PIG-992 Separate schema related files into a schema package (yanz via Modified: hadoop/pig/trunk/contrib/zebra/build-contrib.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/build-contrib.xml?rev=829195r1=829194r2=829195view=diff == --- hadoop/pig/trunk/contrib/zebra/build-contrib.xml (original) +++ hadoop/pig/trunk/contrib/zebra/build-contrib.xml Fri Oct 23 19:59:53 2009 @@ -61,6 +61,18 @@ property name=build.ivy.lib.dir location=${build.ivy.dir}/lib / property name=ivy.lib.dir location=${build.ivy.lib.dir}/Pig/ + property name=clover.db.dir location=${build.dir}/test/clover/db/ +property name=clover.report.dir location=${build.dir}/test/clover/reports/ +property name=clover.jar location=${clover.home}/lib/clover.jar/ +available property=clover.present file=${clover.jar} / +!-- check if clover reports should be generated -- +condition property=clover.enabled + and +isset property=run.clover/ +isset property=clover.present/ + /and +/condition + !-- javacc properties -- property name=src.gen.dir value=${basedir}/src-gen/ / property name=src.gen.zebra.parser.dir value=${src.gen.dir}/org/apache/hadoop/zebra/parser / @@ -107,6 +119,7 @@ pathelement location=${conf.dir}/ pathelement location=${pig.root}/build/ pathelement location=${build.examples}/ + pathelement path=${clover.jar}/ path refid=classpath/ /path @@ -235,48 +248,6 @@ /fileset /copy /target - - !-- == -- - !-- Run unit tests -- - !-- == -- - target name=test depends=compile-test, compile if=test.available -echo message=contrib: ${name}/ -delete dir=${pig.log.dir}/ -mkdir dir=${pig.log.dir}/ -junit - printsummary=yes showoutput=${test.output} - haltonfailure=no fork=yes maxmemory=256m - errorProperty=tests.failed failureProperty=tests.failed - timeout=${test.timeout} - - sysproperty key=test.build.data value=${build.test}/data/ - sysproperty key=build.test value=${build.test}/ - sysproperty key=contrib.name value=${name}/ - sysproperty key=hadoop.log.dir value=${pig.log.dir}/ - - !-- requires fork=yes for: -relative File paths to use the specified user.dir -classpath to use build/contrib/*.jar - -- - sysproperty key=user.dir value=${build.test}/data/ - - sysproperty key=fs.default.name value=${fs.default.name}/ - sysproperty key=pig.test.localoutputfile value=${pig.test.localoutputfile}/ - sysproperty key=pig.log.dir value=${pig.log.dir}/ - classpath refid=test.classpath/ - formatter type=${test.junit.output.format} / - - batchtest todir=${build.test} unless=testcase -fileset dir=${src.test} - includes=**/Test*.java excludes=**/${test.exclude}.java / - /batchtest - batchtest todir=${build.test} if=testcase -fileset dir=${src.test} includes=**/${testcase}.java/ - /batchtest -/junit -fail if=tests.failedTests failed!/fail - - /target !-- == -- !-- Clean. Delete the build files, and their directories -- Modified: hadoop/pig/trunk/contrib/zebra/build.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/build.xml?rev=829195r1=829194r2=829195view=diff == --- hadoop/pig/trunk/contrib/zebra/build.xml (original) +++ hadoop/pig/trunk/contrib/zebra/build.xml Fri Oct 23 19:59:53 2009 @@ -24,10 +24,10 @@ project name=zebra default=jar import file=build-contrib.xml/ - property name=zebraVersion value=0.1.0 / + property name=zebraVersion
svn commit: r828773 - in /hadoop/pig/trunk: ./ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/impl/ src/org/apache/pig/tools/grunt/ test/org/a
Author: gates Date: Thu Oct 22 16:23:20 2009 New Revision: 828773 URL: http://svn.apache.org/viewvc?rev=828773view=rev Log: PIG-1025: Add ability to set job priority from Pig Latin script. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/PigServer.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java hadoop/pig/trunk/src/org/apache/pig/impl/PigContext.java hadoop/pig/trunk/src/org/apache/pig/tools/grunt/GruntParser.java hadoop/pig/trunk/test/org/apache/pig/test/TestGrunt.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=828773r1=828772r2=828773view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Thu Oct 22 16:23:20 2009 @@ -26,6 +26,9 @@ IMPROVEMENTS +PIG-1025: Add ability to set job priority from Pig Latin script (kevinweil via +gates) + PIG-1028: FINDBUGS: DM_NUMBER_CTOR: Method invokes inefficient Number constructor; use static valueOf instead (olgan) Modified: hadoop/pig/trunk/src/org/apache/pig/PigServer.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/PigServer.java?rev=828773r1=828772r2=828773view=diff == --- hadoop/pig/trunk/src/org/apache/pig/PigServer.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/PigServer.java Thu Oct 22 16:23:20 2009 @@ -439,6 +439,10 @@ currDAG.setJobName(name); } +public void setJobPriority(String priority){ +currDAG.setJobPriority(priority); +} + /** * Forces execution of query (and all queries from which it reads), in order to materialize * result @@ -903,6 +907,8 @@ private String jobName; +private String jobPriority; + private boolean batchMode; private int processedStores; @@ -934,6 +940,10 @@ ListExecJob execute() throws ExecException, FrontendException { pigContext.getProperties().setProperty(PigContext.JOB_NAME, jobName); +if (jobPriority != null) { + pigContext.getProperties().setProperty(PigContext.JOB_PRIORITY, jobPriority); +} + ListExecJob jobs = PigServer.this.execute(null); processedStores = storeOpTable.keySet().size(); return jobs; @@ -947,6 +957,10 @@ jobName = PigContext.JOB_NAME_PREFIX+:+name; } +public void setJobPriority(String priority){ +jobPriority = priority; +} + LogicalPlan getPlan(String alias) throws IOException { LogicalPlan plan = lp; Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java?rev=828773r1=828772r2=828773view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/JobControlCompiler.java Thu Oct 22 16:23:20 2009 @@ -36,6 +36,7 @@ import org.apache.hadoop.io.WritableComparator; import org.apache.hadoop.mapred.FileOutputFormat; import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.JobPriority; import org.apache.hadoop.mapred.OutputFormat; import org.apache.hadoop.mapred.jobcontrol.Job; import org.apache.hadoop.mapred.jobcontrol.JobControl; @@ -334,6 +335,25 @@ if (pigContext.getProperties().getProperty(PigContext.JOB_NAME) != null) jobConf.setJobName(pigContext.getProperties().getProperty(PigContext.JOB_NAME)); +if (pigContext.getProperties().getProperty(PigContext.JOB_PRIORITY) != null) { +// If the job priority was set, attempt to get the corresponding enum value +// and set the hadoop job priority. +String jobPriority = pigContext.getProperties().getProperty(PigContext.JOB_PRIORITY).toUpperCase(); +try { + // Allow arbitrary case; the Hadoop job priorities are all upper case. + jobConf.setJobPriority(JobPriority.valueOf(jobPriority)); +} catch (IllegalArgumentException e) { + StringBuffer sb = new StringBuffer(The job priority must be one of [); + JobPriority[] priorities = JobPriority.values(); + for (int i = 0; i priorities.length; ++i) { +if (i 0) sb.append(, ); +sb.append(priorities[i
svn commit: r828027 - in /hadoop/pig/trunk/contrib/zebra: ./ src/java/org/apache/hadoop/zebra/pig/ src/test/org/apache/hadoop/zebra/pig/
Author: gates Date: Wed Oct 21 14:17:54 2009 New Revision: 828027 URL: http://svn.apache.org/viewvc?rev=828027view=rev Log: PIG-944 Change schema to be taken from StoreConfig instead of TableStorer's constructor. Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/SchemaConverter.java hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/TableStorer.java hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestCollectionTableStorer.java hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestMapTableStorer.java hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestRealCluster.java hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestSimpleType.java hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestTableStorer.java Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=828027r1=828026r2=828027view=diff == --- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Wed Oct 21 14:17:54 2009 @@ -15,6 +15,9 @@ BUG FIXES + PIG-944 Change schema to be taken from StoreConfig instead of + TableStorer's constructor (yanz via gates). + PIG-918. Fix infinite loop only columns in first column group are specified. (Yan Zhou via rangadi) Modified: hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/SchemaConverter.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/SchemaConverter.java?rev=828027r1=828026r2=828027view=diff == --- hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/SchemaConverter.java (original) +++ hadoop/pig/trunk/contrib/zebra/src/java/org/apache/hadoop/zebra/pig/SchemaConverter.java Wed Oct 21 14:17:54 2009 @@ -21,72 +21,52 @@ import java.util.Iterator; import org.apache.hadoop.zebra.parser.ParseException; +import org.apache.hadoop.zebra.schema.ColumnType; import org.apache.pig.data.DataType; import org.apache.pig.impl.logicalLayer.FrontendException; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.logicalLayer.schema.Schema.FieldSchema; -/** - * A simple schema converter that only understands three field types. - */ class SchemaConverter { - enum FieldSchemaMaker { -SimpleField(SF_) { - @Override - FieldSchema toFieldSchema(String name) { -return new FieldSchema(name, DataType.BYTEARRAY); - } -}, - -MapField(MF_) { - @Override - FieldSchema toFieldSchema(String name) { -// TODO: how to convey key and value types? -return new FieldSchema(name, DataType.MAP); - } -}, - -MapListField(MLF_) { - @Override - FieldSchema toFieldSchema(String name) throws FrontendException { -Schema tupleSchema = new Schema(); -tupleSchema.add(MapField.toFieldSchema(null)); -tupleSchema.setTwoLevelAccessRequired(true); -return new FieldSchema(name, tupleSchema, DataType.BAG); - } -}; - -private String prefix; - -FieldSchemaMaker(String prefix) { - this.prefix = prefix; -} - -abstract FieldSchema toFieldSchema(String name) throws FrontendException; - -public static FieldSchema makeFieldSchema(String colname) -throws FrontendException { - for (FieldSchemaMaker e : FieldSchemaMaker.values()) { -if (colname.startsWith(e.prefix)) { - return e.toFieldSchema(colname.substring(e.prefix.length())); -} - } - throw new FrontendException(Cannot determine type from column name); -} - -public static String makeColumnName(FieldSchema fs) -throws FrontendException { - if (fs.alias == null) { -throw new FrontendException(No alias provided for field schema); - } - for (FieldSchemaMaker e : FieldSchemaMaker.values()) { -FieldSchema expected = e.toFieldSchema(dummy); -if (FieldSchema.equals(fs, expected, false, true)) { - return e.prefix + fs.alias; -} - } - throw new FrontendException(Unsupported field schema); + public static ColumnType toTableType(byte ptype) + { +ColumnType ret; +switch (ptype) { + case DataType.INTEGER: +ret = ColumnType.INT; +break; + case DataType.LONG: +ret = ColumnType.LONG; +break; + case DataType.FLOAT: +ret = ColumnType.FLOAT; +break; + case DataType.DOUBLE: +ret = ColumnType.DOUBLE; +break; + case DataType.BOOLEAN: +ret = ColumnType.BOOL; +break; + case
svn commit: r825641 [3/3] - in /hadoop/pig/trunk: ./ contrib/zebra/ contrib/zebra/src/java/org/apache/hadoop/zebra/io/ contrib/zebra/src/java/org/apache/hadoop/zebra/mapred/ contrib/zebra/src/java/org
Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageCollection.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageCollection.java?rev=825641r1=825640r2=825641view=diff == --- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageCollection.java (original) +++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageCollection.java Thu Oct 15 20:38:08 2009 @@ -25,12 +25,12 @@ import junit.framework.Assert; import org.apache.hadoop.zebra.types.CGSchema; -import org.apache.hadoop.zebra.types.ColumnType; -import org.apache.hadoop.zebra.types.ParseException; +import org.apache.hadoop.zebra.schema.ColumnType; +import org.apache.hadoop.zebra.parser.ParseException; import org.apache.hadoop.zebra.types.Partition; -import org.apache.hadoop.zebra.types.Schema; -import org.apache.hadoop.zebra.types.TableSchemaParser; -import org.apache.hadoop.zebra.types.Schema.ColumnSchema; +import org.apache.hadoop.zebra.schema.Schema; +import org.apache.hadoop.zebra.parser.TableSchemaParser; +import org.apache.hadoop.zebra.schema.Schema.ColumnSchema; import org.junit.Before; import org.junit.Test; @@ -64,11 +64,11 @@ CGSchema cgs2 = cgschemas[1]; ColumnSchema f11 = cgs1.getSchema().getColumn(0); - Assert.assertEquals(c1, f11.name); - Assert.assertEquals(ColumnType.COLLECTION, f11.type); + Assert.assertEquals(c1, f11.getName()); + Assert.assertEquals(ColumnType.COLLECTION, f11.getType()); ColumnSchema f21 = cgs2.getSchema().getColumn(0); - Assert.assertEquals(c2, f21.name); - Assert.assertEquals(ColumnType.COLLECTION, f21.type); + Assert.assertEquals(c2, f21.getName()); + Assert.assertEquals(ColumnType.COLLECTION, f21.getType()); System.out.println(*** Column Map **); MapString, HashSetPartition.PartitionInfo.ColumnMappingEntry colmap = p @@ -125,14 +125,14 @@ CGSchema cgs2 = cgschemas[1]; ColumnSchema f11 = cgs1.getSchema().getColumn(0); - Assert.assertEquals(c1.f1, f11.name); - Assert.assertEquals(ColumnType.INT, f11.type); + Assert.assertEquals(c1.f1, f11.getName()); + Assert.assertEquals(ColumnType.INT, f11.getType()); ColumnSchema f21 = cgs2.getSchema().getColumn(0); - Assert.assertEquals(c1.f2, f21.name); - Assert.assertEquals(ColumnType.INT, f21.type); + Assert.assertEquals(c1.f2, f21.getName()); + Assert.assertEquals(ColumnType.INT, f21.getType()); ColumnSchema f22 = cgs2.getSchema().getColumn(1); - Assert.assertEquals(c2, f22.name); - Assert.assertEquals(ColumnType.COLLECTION, f22.type); + Assert.assertEquals(c2, f22.getName()); + Assert.assertEquals(ColumnType.COLLECTION, f22.getType()); System.out.println(*** Column Map **); MapString, HashSetPartition.PartitionInfo.ColumnMappingEntry colmap = p Modified: hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageMap.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageMap.java?rev=825641r1=825640r2=825641view=diff == --- hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageMap.java (original) +++ hadoop/pig/trunk/contrib/zebra/src/test/org/apache/hadoop/zebra/types/TestStorageMap.java Thu Oct 15 20:38:08 2009 @@ -26,12 +26,12 @@ import junit.framework.Assert; import org.apache.hadoop.zebra.types.CGSchema; -import org.apache.hadoop.zebra.types.ColumnType; -import org.apache.hadoop.zebra.types.ParseException; +import org.apache.hadoop.zebra.schema.ColumnType; +import org.apache.hadoop.zebra.parser.ParseException; import org.apache.hadoop.zebra.types.Partition; -import org.apache.hadoop.zebra.types.Schema; -import org.apache.hadoop.zebra.types.TableSchemaParser; -import org.apache.hadoop.zebra.types.Schema.ColumnSchema; +import org.apache.hadoop.zebra.schema.Schema; +import org.apache.hadoop.zebra.parser.TableSchemaParser; +import org.apache.hadoop.zebra.schema.Schema.ColumnSchema; import org.junit.Before; import org.junit.Test; @@ -67,23 +67,23 @@ CGSchema cgs3 = cgschemas[2]; ColumnSchema f11 = cgs1.getSchema().getColumn(0); - Assert.assertEquals(f11.name, m1); - Assert.assertEquals(ColumnType.MAP, f11.type); + Assert.assertEquals(f11.getName(), m1); + Assert.assertEquals(ColumnType.MAP, f11.getType()); ColumnSchema f21 = cgs2.getSchema().getColumn(0); - Assert.assertEquals(f21.name, m2); + Assert.assertEquals(f21.getName(), m2); // TODO: type should be MAP! - Assert.assertEquals(ColumnType.MAP, f21.type); +
svn commit: r816723 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java test/org/apache/pig/test/TestMergeJoin.java test/org/apache/pig
Author: gates Date: Fri Sep 18 17:41:38 2009 New Revision: 816723 URL: http://svn.apache.org/viewvc?rev=816723view=rev Log: PIG-951: Set parallelism explicitly to 1 for indexing job in merge join Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=816723r1=816722r2=816723view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Fri Sep 18 17:41:38 2009 @@ -30,6 +30,9 @@ BUG FIXES +PIG-951: Set parallelism explicitly to 1 for indexing job in merge join + (ashutoc via gates). + Release 0.5.0 - Unreleased INCOMPATIBLE CHANGES Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java?rev=816723r1=816722r2=816723view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java Fri Sep 18 17:41:38 2009 @@ -1115,7 +1115,8 @@ } joinOp.setupRightPipeline(rightPipelinePlan); - + rightMROpr.requestedParallelism = 1; // we need exactly one reducer for indexing job. + // At this point, we must be operating on map plan of right input and it would contain nothing else other then a POLoad. POLoad rightLoader = (POLoad)rightMROpr.mapPlan.getRoots().get(0); joinOp.setRightLoaderFuncSpec(rightLoader.getLFile().getFuncSpec()); Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java?rev=816723r1=816722r2=816723view=diff == --- hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java Fri Sep 18 17:41:38 2009 @@ -26,11 +26,15 @@ import org.apache.pig.PigException; import org.apache.pig.PigServer; import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; import org.apache.pig.data.BagFactory; import org.apache.pig.data.DataBag; import org.apache.pig.data.Tuple; +import org.apache.pig.impl.PigContext; +import org.apache.pig.impl.logicalLayer.LogicalPlan; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.util.LogUtils; +import org.apache.pig.test.utils.LogicalPlanTester; import org.apache.pig.test.utils.TestHelper; import org.junit.After; import org.junit.Before; @@ -407,6 +411,20 @@ } @Test +public void testParallelism() throws Exception{ + +LogicalPlanTester tester = new LogicalPlanTester(); +tester.buildPlan(A = LOAD ' + INPUT_FILE + ';); +tester.buildPlan(B = LOAD ' + INPUT_FILE + ';); +tester.buildPlan(C = join A by $0, B by $0 using \merge\ parallel 50;); +LogicalPlan lp = tester.buildPlan(store C into 'out';); + PigContext pc = new PigContext(ExecType.MAPREDUCE,cluster.getProperties()); +pc.connect(); + MROperPlan mro = Util.buildMRPlan(Util.buildPhysicalPlan(lp, pc),pc); +Assert.assertEquals(1,mro.getRoots().get(0).getRequestedParallelism()); +} + +@Test public void testIndexer() throws IOException{ Util.createInputFile(cluster, temp_file1, new String[]{1+}); Util.createInputFile(cluster, temp_file2, new String[]{2+}); Modified: hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld?rev=816723r1=816722r2=816723view=diff == --- hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld Fri Sep 18 17:41:38 2009 @@ -6,7 +6,7 @@ | | | |---Load(file:/tmp/input1:org.apache.pig.builtin.PigStorage) - scope-117 | -|---MapReduce(-1,PigStorage) - scope-126: +|---MapReduce(1,PigStorage) - scope-126: | Store(file:/tmp/temp-1456742965/tmp-1456742965:org.apache.pig.builtin.BinStorage) - scope-133
svn commit: r816820 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/data/DefaultTuple.java
Author: gates Date: Sat Sep 19 00:04:47 2009 New Revision: 816820 URL: http://svn.apache.org/viewvc?rev=816820view=rev Log: PIG-513: Removed unecessary bounds check in DefaultTuple. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=816820r1=816819r2=816820view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Sat Sep 19 00:04:47 2009 @@ -30,6 +30,9 @@ BUG FIXES +PIG-513: Removed unecessary bounds check in DefaultTuple (ashutoshc via + gates). + PIG-951: Set parallelism explicitly to 1 for indexing job in merge join (ashutoc via gates). Modified: hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java?rev=816820r1=816819r2=816820view=diff == --- hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java Sat Sep 19 00:04:47 2009 @@ -116,7 +116,6 @@ * than or equal to the number of fields in the tuple. */ public boolean isNull(int fieldNum) throws ExecException { -checkBounds(fieldNum); return (mFields.get(fieldNum) == null); } @@ -130,7 +129,6 @@ * the number of fields in the tuple. */ public byte getType(int fieldNum) throws ExecException { -checkBounds(fieldNum); return DataType.findType(mFields.get(fieldNum)); } @@ -142,7 +140,6 @@ * the number of fields in the tuple. */ public Object get(int fieldNum) throws ExecException { -checkBounds(fieldNum); return mFields.get(fieldNum); } @@ -163,7 +160,6 @@ * the number of fields in the tuple. */ public void set(int fieldNum, Object val) throws ExecException { -checkBounds(fieldNum); mFields.set(fieldNum, val); } @@ -352,15 +348,6 @@ } } -private void checkBounds(int fieldNum) throws ExecException { -if (fieldNum = mFields.size()) { -int errCode = 1072; -String msg = Out of bounds access: Request for field number + fieldNum + - exceeds tuple size of + mFields.size(); -throw new ExecException(msg, errCode, PigException.INPUT); -} -} - /** * @return true if this Tuple is null */ @@ -376,3 +363,4 @@ } } +
svn commit: r799694 - in /hadoop/pig/branches/branch-0.3: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SliceWrapper.java
Author: gates Date: Fri Jul 31 18:49:21 2009 New Revision: 799694 URL: http://svn.apache.org/viewvc?rev=799694view=rev Log: PIG-878: Pig is returning too many blocks in the input split. (arunc via gates). Modified: hadoop/pig/branches/branch-0.3/CHANGES.txt hadoop/pig/branches/branch-0.3/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SliceWrapper.java Modified: hadoop/pig/branches/branch-0.3/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.3/CHANGES.txt?rev=799694r1=799693r2=799694view=diff == --- hadoop/pig/branches/branch-0.3/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.3/CHANGES.txt Fri Jul 31 18:49:21 2009 @@ -20,6 +20,10 @@ Pig Change Log +BUG FIXES + +PIG-878: Pig is returning too many blocks in the input split. (arunc via gates). + Release 0.3.0 - 06/16/09 INCOMPATIBLE CHANGES Modified: hadoop/pig/branches/branch-0.3/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SliceWrapper.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.3/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SliceWrapper.java?rev=799694r1=799693r2=799694view=diff == --- hadoop/pig/branches/branch-0.3/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SliceWrapper.java (original) +++ hadoop/pig/branches/branch-0.3/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SliceWrapper.java Fri Jul 31 18:49:21 2009 @@ -92,7 +92,7 @@ for (String loc : wrapped.getLocations()) { Path path = new Path(loc); FileStatus status = fs.getFileStatus(path); -BlockLocation[] b = fs.getFileBlockLocations(status, 0, status.getLen()); + BlockLocation[] b = fs.getFileBlockLocations(status, wrapped.getStart(), wrapped.getLength()); int total = 0; for (int i = 0; i b.length; i++) { total += b[i].getHosts().length;
svn commit: r795931 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SliceWrapper.java
Author: gates Date: Mon Jul 20 17:41:01 2009 New Revision: 795931 URL: http://svn.apache.org/viewvc?rev=795931view=rev Log: PIG-878: Pig is returning too many blocks in the input split. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SliceWrapper.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=795931r1=795930r2=795931view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Mon Jul 20 17:41:01 2009 @@ -40,6 +40,8 @@ BUG FIXES + PIG-878: Pig is returning too many blocks in the input split. (arunc via gates). + PIG-888: Pig do not pass udf to the backend in some situation (daijy) PIG-728: All backend error messages must be logged to preserve the Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SliceWrapper.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SliceWrapper.java?rev=795931r1=795930r2=795931view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SliceWrapper.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/SliceWrapper.java Mon Jul 20 17:41:01 2009 @@ -92,7 +92,7 @@ for (String loc : wrapped.getLocations()) { Path path = new Path(loc); FileStatus status = fs.getFileStatus(path); -BlockLocation[] b = fs.getFileBlockLocations(status, 0, status.getLen()); + BlockLocation[] b = fs.getFileBlockLocations(status, wrapped.getStart(), wrapped.getLength()); int total = 0; for (int i = 0; i b.length; i++) { total += b[i].getHosts().length;
svn commit: r775340 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/backend/executionengine/PigSlicer.java
Author: gates Date: Fri May 15 21:08:27 2009 New Revision: 775340 URL: http://svn.apache.org/viewvc?rev=775340view=rev Log: PIG-619: Create one InputSplit even when the input file is zero length so that hadoop runs maps and creates output for the next job . Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/PigSlicer.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=775340r1=775339r2=775340view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Fri May 15 21:08:27 2009 @@ -24,6 +24,10 @@ IMPROVEMENTS +PIG-619: Create one InputSplit even when the input file is zero length + so that hadoop runs maps and creates output for the next +job (gates). + PIG-693: Proposed improvements to pig's optimizer (sms) PIG-700: To automate the pig patch test process (gkesavan via sms) Modified: hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/PigSlicer.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/PigSlicer.java?rev=775340r1=775339r2=775340view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/PigSlicer.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/executionengine/PigSlicer.java Fri May 15 21:08:27 2009 @@ -100,6 +100,12 @@ // Anything that ends with a .gz we must process as a complete // file slices.add(new PigSlice(name, funcSpec, 0, size)); +} else if (size == 0) { +// add one empty slice. This is a total hack to deal with the + // case where hadoop isn't starting maps for empty arrays of + // InputSplits. See PIG-619. This should be removed + // once we determine why this is. +slices.add(new PigSlice(name, funcSpec, 0, bs)); } else { while (pos size) { if (pos + bs size) {
svn commit: r774989 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/tools/pigstats/PigStats.java
Author: gates Date: Fri May 15 02:43:52 2009 New Revision: 774989 URL: http://svn.apache.org/viewvc?rev=774989view=rev Log: PIG-810: Fixed NPE in PigStats (gates) Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStats.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=774989r1=774988r2=774989view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Fri May 15 02:43:52 2009 @@ -44,6 +44,8 @@ BUG FIXES +PIG-810: Fixed NPE in PigStats (gates) + PIG-804: problem with lineage with double map redirection (pradeepkth) PIG-733: Order by sampling dumps entire sample to hdfs which causes dfs Modified: hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStats.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStats.java?rev=774989r1=774988r2=774989view=diff == --- hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStats.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/tools/pigstats/PigStats.java Fri May 15 02:43:52 2009 @@ -172,7 +172,7 @@ } -lastJobID = lastJob.getAssignedJobID().toString(); +if (lastJob != null) lastJobID = lastJob.getAssignedJobID().toString(); return stats; }
svn commit: r772750 - in /hadoop/pig/trunk: ./ src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/ test/org/apache/pig/test/
Author: gates Date: Thu May 7 19:23:25 2009 New Revision: 772750 URL: http://svn.apache.org/viewvc?rev=772750view=rev Log: PIG-800: Fix distinct and order in local mode to not go into an infinite loop. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=772750r1=772749r2=772750view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Thu May 7 19:23:25 2009 @@ -63,6 +63,9 @@ PIG-774: Pig does not handle Chinese characters (in both the parameter subsitution using -param_file or embedded in the Pig script) correctly (daijy) +PIG-800: Fix distinct and order in local mode to not go into an infinite loop +(gates). + Release 0.2.0 INCOMPATIBLE CHANGES Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java?rev=772750r1=772749r2=772750view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/PODistinct.java Thu May 7 19:23:25 2009 @@ -79,8 +79,14 @@ while (in.returnStatus != POStatus.STATUS_EOP) { if (in.returnStatus == POStatus.STATUS_ERR) { log.error(Error in reading from inputs); -continue; +return in; +//continue; } else if (in.returnStatus == POStatus.STATUS_NULL) { +// Ignore the null, read the next tuple. It's not clear +// to me that we should ever get this, or if we should, +// how it differs from EOP. But ignoring it here seems +// to work. +in = processInput(); continue; } distinctBag.add((Tuple) in.result); Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java?rev=772750r1=772749r2=772750view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POSort.java Thu May 7 19:23:25 2009 @@ -258,8 +258,11 @@ while (res.returnStatus != POStatus.STATUS_EOP) { if (res.returnStatus == POStatus.STATUS_ERR) { log.error(Error in reading from the inputs); - continue; + return res; + //continue; } else if (res.returnStatus == POStatus.STATUS_NULL) { +// ignore the null, read the next tuple. +res = processInput(); continue; } sortedBag.add((Tuple) res.result); Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java?rev=772750r1=772749r2=772750view=diff == --- hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestLocal2.java Thu May 7 19:23:25 2009 @@ -20,13 +20,19 @@ import junit.framework.Assert; import junit.framework.TestCase; + +import org.apache.pig.EvalFunc; import org.apache.pig.PigServer; +import org.apache.pig.data.BagFactory; +import org.apache.pig.data.DataBag; import org.apache.pig.data.Tuple; import org.apache.pig.test.utils.TestHelper; import org.junit.Test; import java.io.File; +import java.io.FileOutputStream; import java.io.IOException; +import java.io.PrintStream; import java.text.DecimalFormat; import
svn commit: r771495 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/PigServer.java test/org/apache/pig/test/TestGrunt.java
Author: gates Date: Mon May 4 23:23:20 2009 New Revision: 771495 URL: http://svn.apache.org/viewvc?rev=771495view=rev Log: PIG-789: Fix dump and illustrate to work with new multi-query feature. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/PigServer.java hadoop/pig/trunk/test/org/apache/pig/test/TestGrunt.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=771495r1=771494r2=771495view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Mon May 4 23:23:20 2009 @@ -55,7 +55,11 @@ PIG-514: COUNT returns no results as a result of two filter statements in FOREACH (pradeepkth) -Release 0.2.0 - Unreleased +PIG-789: Fix dump and illustrate to work with new multi-query feature +(hagleitn via gates). + + +Release 0.2.0 INCOMPATIBLE CHANGES Modified: hadoop/pig/trunk/src/org/apache/pig/PigServer.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/PigServer.java?rev=771495r1=771494r2=771495view=diff == --- hadoop/pig/trunk/src/org/apache/pig/PigServer.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/PigServer.java Mon May 4 23:23:20 2009 @@ -445,6 +445,9 @@ throw new FrontendException(msg, errCode, PigException.INPUT); } +if (currDAG.isBatchOn()) { +currDAG.execute(); +} ExecJob job = store(id, FileLocalizer.getTemporaryPath(null, pigContext).toString(), BinStorage.class.getName() + ()); // invocation of execute is synchronous! @@ -475,32 +478,18 @@ /** * forces execution of query (and all queries from which it reads), in order to store result in file - */ + */ public ExecJob store( String id, String filename, -String func) throws IOException{ +String func) throws IOException { + if (!currDAG.getAliasOp().containsKey(id)) throw new IOException(Invalid alias: + id); - -try { -LogicalPlan readFrom = getPlanFromAlias(id, store); -return store(id, readFrom, filename, func); -} catch (FrontendException fe) { -int errCode = 1002; -String msg = Unable to store alias + id; -throw new FrontendException(msg, errCode, PigException.INPUT, fe); -} -} - -public ExecJob store( -String id, -LogicalPlan readFrom, -String filename, -String func) throws IOException { + try { LogicalPlan lp = compileLp(id); - + // MRCompiler needs a store to be the leaf - hence // add a store to the plan to explain @@ -701,7 +690,12 @@ public MapLogicalOperator, DataBag getExamples(String alias) { LogicalPlan plan = null; -try { + +try { +if (currDAG.isBatchOn()) { +currDAG.execute(); +} + plan = clonePlan(alias); } catch (IOException e) { //Since the original script is parsed anyway, there should not be an Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestGrunt.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestGrunt.java?rev=771495r1=771494r2=771495view=diff == --- hadoop/pig/trunk/test/org/apache/pig/test/TestGrunt.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestGrunt.java Mon May 4 23:23:20 2009 @@ -543,4 +543,50 @@ grunt.exec(); } + +@Test +public void testDump() throws Throwable { +PigServer server = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); +PigContext context = server.getPigContext(); + +String strCmd = +rmf bla; ++a = load 'file:test/org/apache/pig/test/data/passwd'; ++e = group a by $0; ++f = foreach e generate group, COUNT($1); ++store f into 'bla'; ++f1 = load 'bla'; ++g = order f1 by $1; ++dump g;; + +ByteArrayInputStream cmd = new ByteArrayInputStream(strCmd.getBytes()); +InputStreamReader reader = new InputStreamReader(cmd); + +Grunt grunt = new Grunt(new BufferedReader(reader), context); + +grunt.exec(); +} + +@Test +public void testIllustrate() throws Throwable { +PigServer server = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); +PigContext context = server.getPigContext(); + +String strCmd = +rmf bla
svn commit: r768110 - in /hadoop/pig/site: author/src/documentation/content/xdocs/index.xml publish/index.html publish/index.pdf
Author: gates Date: Fri Apr 24 00:47:05 2009 New Revision: 768110 URL: http://svn.apache.org/viewvc?rev=768110view=rev Log: Added plug for pig training available from cloudera. Modified: hadoop/pig/site/author/src/documentation/content/xdocs/index.xml hadoop/pig/site/publish/index.html hadoop/pig/site/publish/index.pdf Modified: hadoop/pig/site/author/src/documentation/content/xdocs/index.xml URL: http://svn.apache.org/viewvc/hadoop/pig/site/author/src/documentation/content/xdocs/index.xml?rev=768110r1=768109r2=768110view=diff == --- hadoop/pig/site/author/src/documentation/content/xdocs/index.xml (original) +++ hadoop/pig/site/author/src/documentation/content/xdocs/index.xml Fri Apr 24 00:47:05 2009 @@ -47,8 +47,9 @@ section title Getting Started /title ol - lia href=deployment.htmlHow to deploy it/a./li -lia href=http://wiki.apache.org/pig/;Learn about/a Pig by reading the wiki documentation strong-- still under construction./strong/li +lia href=deployment.htmlHow to deploy it/a./li +lia href=http://hadoop.apache.org/pig/docs/r0.2.0/;Read the documentation/a and a href=http://wiki.apache.org/pig/;the wiki/a./li +lia href=http://www.cloudera.com/hadoop-training-pig-introduction;Watch the training/a./li lia href=mailing_lists.htmlDiscuss it/a on the mailing list./li /ol /section Modified: hadoop/pig/site/publish/index.html URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/index.html?rev=768110r1=768109r2=768110view=diff == --- hadoop/pig/site/publish/index.html (original) +++ hadoop/pig/site/publish/index.html Fri Apr 24 00:47:05 2009 @@ -208,13 +208,15 @@ h2 class=h3 Getting Started /h2 div class=section ol - + li a href=deployment.htmlHow to deploy it/a./li li -a href=http://wiki.apache.org/pig/;Learn about/a Pig by reading the wiki documentation strong-- still under construction./strong -/li +a href=http://hadoop.apache.org/pig/docs/r0.2.0/;Read the documentation/a and a href=http://wiki.apache.org/pig/;the wiki/a./li + +li +a href=http://www.cloudera.com/hadoop-training-pig-introduction;Watch the training/a./li li a href=mailing_lists.htmlDiscuss it/a on the mailing list./li @@ -222,7 +224,7 @@ /ol /div -a name=N1005A/aa name=Getting+Involved/a +a name=N10062/aa name=Getting+Involved/a h2 class=h3 Getting Involved /h2 div class=section p Modified: hadoop/pig/site/publish/index.pdf URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/index.pdf?rev=768110r1=768109r2=768110view=diff == --- hadoop/pig/site/publish/index.pdf (original) +++ hadoop/pig/site/publish/index.pdf Fri Apr 24 00:47:05 2009 @@ -69,10 +69,10 @@ endobj 16 0 obj - /Length 2777 /Filter [ /ASCII85Decode /FlateDecode ] + /Length 2947 /Filter [ /ASCII85Decode /FlateDecode ] stream -Gat=-Edi!(4Ml_@`rpap;4co0NHsh%.3a_^loib_=]]=$...@dq%obz.l[d55cm[g5q$...@6dmcq[(eDELGIJm[BGa,_...@r(%X4N)_LJ/@]!*!)PXKEI4IhrA^A=0U]QX9r*eiiH.P=n$sm(U;T/g=o...@*knd#w'?ufU3Vg/MJkf],(QbPt7R(!FR*;I1jR7-%/s.3#A9)!fS6Fs1R,jD*!Qln\3pW]YI^o6cZFjhTe:hAt8d`PRShW94%jrkpho!...@y[0%6lt9yit.rn9j(P$[J7DU.]/PtdUeY$r,(]9+)*Ku4,o#P%LekE/od1Wdfa...@ckvg*.1j9pdrekyke;5+.ZlNVq#bC9Eld7E;.Ct[Z`%\.P#VNgU330;R%'XQL6u4'T%G'a68/h[mN%7!Ud77:tT].I3E8=Yn]\AhfduLNRZ.Vpc6'8f_rRU%`WCITgcZ`tpEAF1,E$D,nM`r^;?;l.(b(:36K^.gFfO%[4(NA1]L9%H'dJ4[f!=hQi35hk;\mW#HRcfeb?h-0f6o);VX?FJ9a2Ie^D\kZ(T:=-FGLUQQb7#=cUqQT-6VooD5q%ciWXXK_0`CSm5Q.('e2Xu25G#uXYK:Alf+i=?e7-m6?2g[PY1p+4*D?.1DR7Ol7%ZC.oUqjqi_]gO4WAu=T(.E@(0^t33N5N-$]=31uGsrHlK0Y!tlrUFWW%AVZX28+=@DE!DT:$mRD8hcPa`Y``6/ZCm?T1bbd*KPJR.dRH*S-KH^/3=1UG1R(h5P(sT2/G4^m0eFY+sS_6-knqZssQL)PoYU9rr?Toh`CEHb[f06WM!)_^-G210`FBVZE9[cZC=UWg-X;NU6HX$!f.5MAXY+gjmn;X?%FJ-.%92AA[O^^pN2JoNN79(l*)(,/.9EFOuWZ!1H?;A2RI1ubUMfLt]#`0o...@.mk/A(1;jCi #+C+Dsh!1d[nOWRLr=gL_+9#E\YSnlG8oS';NU0925DtmKg#OWC5=];cC(Afi'G,nSjh%pl!C5Yi[4l(0Iu2\?sq/Wimk$e_sJK#dfCU/%;lXZjW$cc8k...@l3['?N+uq^Uq$Caa:cSeUM5d%;6...@[c?F=do6li8ekS)F33X6jT\Qr=-/PPe)eTDkA2EOQU8OI'`?j=.s.Rt\D6,FIl?-%NuaR)Z$BCtT*/$fDBi/$q`...@gxm:sNVQL^,4rZ+!@jT=Ka_jW?2R5GR.i0j(D)phEK`nfKj-n_N#Gb(G%JDLf8q^RQkW95tG'Eu#qgQ1bqA+4S._K1+K3Q$M=$#eB#!?MZnbe;lhhU$o6bBqN:4QdD4T4=I9Zb`t...@?3`3ry3drqtjr=+o)G+uoe9X2/-13CQ2jfS:8M/;hZ@(?,=43u!H.ar+!:?jb00m)ooi=(o...@l-^0j;S.eHG.OI--r$bjn...@6,)AB[J?!dlCOnr;P!Po-r0U!_3#3!@(C%-D,r?...@\,tSq8BBR0oi:1c-E$dds+ir+]VfoHU/P9r'?11rbZkT3g:69:MR-^B(,(j[0**YY2*cQN%M]\b4.ec1%Z8M]q%WGEoQ/G:3%T=`p(uE.a!Yg;%-c7YF81$nBg%gYiD4%XgUNotc)F-CN'cl)?Q6^6kRb4/qn8^uG3'*C8ptYarE;Un'ENK=U0q3DAEn7N)*:RkQ(it7V'rpXBItE3M?p9lL3KF;3Dso=CkQP9[;K/+kQ:Q+WJ.e!=C$%=3f'jPEjQOiMAD#^]1F\i!^fZu2SDu?WnGG8$:tT37.5M8@!u5P\_?9t=E\VE/9REnu8RLh%qY$:tBI[4,Q;d1*+dbb?l...@sq;UNMh_LCY`*rI90J2'JU(/E:%'fBV6He,`Sd;_q...@8[g'Ib`h5bkI8!n
svn commit: r763381 - in /hadoop/pig/trunk/src/docs/src/documentation/content/xdocs: cookbook.xml tabs.xml
Author: gates Date: Wed Apr 8 20:36:51 2009 New Revision: 763381 URL: http://svn.apache.org/viewvc?rev=763381view=rev Log: Fixed references to 1.0 to 0.2 to reflect release version name change. Modified: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/cookbook.xml hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/tabs.xml Modified: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/cookbook.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/cookbook.xml?rev=763381r1=763380r2=763381view=diff == --- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/cookbook.xml (original) +++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/cookbook.xml Wed Apr 8 20:36:51 2009 @@ -295,7 +295,7 @@ section titleDrop Nulls Before a Join/title -pThis comment only applies to pig on the types branch, as pig 0.1.0 does not have nulls. /p +pThis comment only applies to pig 0.2.0 branch, as pig 0.1.0 does not have nulls. /p pWith the introduction of nulls, join and cogroup semantics were altered to work with nulls. The semantic for cogrouping with nulls is that nulls from a given input are grouped together, but nulls across inputs are not grouped together. This preserves the semantics of grouping (nulls are collected together from a single input to be passed to aggregate functions like COUNT) and the semantics of join (nulls are not joined across inputs). Since flattening an empty bag results in an empty row, in a standard join the rows with a null key will always be dropped. The join: /p source Modified: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/tabs.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/tabs.xml?rev=763381r1=763380r2=763381view=diff == --- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/tabs.xml (original) +++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/tabs.xml Wed Apr 8 20:36:51 2009 @@ -32,6 +32,6 @@ -- tab label=Project href=http://hadoop.apache.org/pig/; type=visible / tab label=Wiki href=http://wiki.apache.org/pig/; type=visible / - tab label=Pig 1.0.0 Documentation dir= type=visible / + tab label=Pig 0.2.0 Documentation dir= type=visible / /tabs
svn commit: r763388 - in /hadoop/pig/site: author/src/documentation/content/xdocs/ publish/ publish/docs/ publish/docs/r0.2.0/ publish/docs/r0.2.0/api/ publish/docs/r0.2.0/api/org/ publish/docs/r0.2.0
Author: gates Date: Wed Apr 8 20:49:42 2009 New Revision: 763388 URL: http://svn.apache.org/viewvc?rev=763388view=rev Log: Updated site for release 0.2.0 [This commit notification would consist of 313 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.]
svn commit: r763392 - /hadoop/pig/site/publish/docs/r0.2.0/api/
Author: gates Date: Wed Apr 8 21:04:00 2009 New Revision: 763392 URL: http://svn.apache.org/viewvc?rev=763392view=rev Log: Fixed wrong version numbers in java doc. Modified: hadoop/pig/site/publish/docs/r0.2.0/api/allclasses-frame.html hadoop/pig/site/publish/docs/r0.2.0/api/allclasses-noframe.html hadoop/pig/site/publish/docs/r0.2.0/api/constant-values.html hadoop/pig/site/publish/docs/r0.2.0/api/deprecated-list.html hadoop/pig/site/publish/docs/r0.2.0/api/help-doc.html hadoop/pig/site/publish/docs/r0.2.0/api/index-all.html hadoop/pig/site/publish/docs/r0.2.0/api/index.html hadoop/pig/site/publish/docs/r0.2.0/api/overview-frame.html hadoop/pig/site/publish/docs/r0.2.0/api/overview-summary.html hadoop/pig/site/publish/docs/r0.2.0/api/overview-tree.html hadoop/pig/site/publish/docs/r0.2.0/api/serialized-form.html Modified: hadoop/pig/site/publish/docs/r0.2.0/api/allclasses-frame.html URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/docs/r0.2.0/api/allclasses-frame.html?rev=763392r1=763391r2=763392view=diff == --- hadoop/pig/site/publish/docs/r0.2.0/api/allclasses-frame.html (original) +++ hadoop/pig/site/publish/docs/r0.2.0/api/allclasses-frame.html Wed Apr 8 21:04:00 2009 @@ -4,7 +4,7 @@ HEAD !-- Generated by javadoc (build 1.5.0_16) on Wed Apr 08 13:40:14 PDT 2009 -- TITLE -All Classes (Pig 0.3.0-dev API) +All Classes (Pig 0.2.0 API) /TITLE Modified: hadoop/pig/site/publish/docs/r0.2.0/api/allclasses-noframe.html URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/docs/r0.2.0/api/allclasses-noframe.html?rev=763392r1=763391r2=763392view=diff == --- hadoop/pig/site/publish/docs/r0.2.0/api/allclasses-noframe.html (original) +++ hadoop/pig/site/publish/docs/r0.2.0/api/allclasses-noframe.html Wed Apr 8 21:04:00 2009 @@ -4,7 +4,7 @@ HEAD !-- Generated by javadoc (build 1.5.0_16) on Wed Apr 08 13:40:14 PDT 2009 -- TITLE -All Classes (Pig 0.3.0-dev API) +All Classes (Pig 0.2.0 API) /TITLE Modified: hadoop/pig/site/publish/docs/r0.2.0/api/constant-values.html URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/docs/r0.2.0/api/constant-values.html?rev=763392r1=763391r2=763392view=diff == --- hadoop/pig/site/publish/docs/r0.2.0/api/constant-values.html (original) +++ hadoop/pig/site/publish/docs/r0.2.0/api/constant-values.html Wed Apr 8 21:04:00 2009 @@ -4,7 +4,7 @@ HEAD !-- Generated by javadoc (build 1.5.0_16) on Wed Apr 08 13:40:09 PDT 2009 -- TITLE -Constant Field Values (Pig 0.3.0-dev API) +Constant Field Values (Pig 0.2.0 API) /TITLE @@ -13,7 +13,7 @@ SCRIPT type=text/javascript function windowTitle() { -parent.document.title=Constant Field Values (Pig 0.3.0-dev API); +parent.document.title=Constant Field Values (Pig 0.2.0 API); } /SCRIPT NOSCRIPT Modified: hadoop/pig/site/publish/docs/r0.2.0/api/deprecated-list.html URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/docs/r0.2.0/api/deprecated-list.html?rev=763392r1=763391r2=763392view=diff == --- hadoop/pig/site/publish/docs/r0.2.0/api/deprecated-list.html (original) +++ hadoop/pig/site/publish/docs/r0.2.0/api/deprecated-list.html Wed Apr 8 21:04:00 2009 @@ -4,7 +4,7 @@ HEAD !-- Generated by javadoc (build 1.5.0_16) on Wed Apr 08 13:40:14 PDT 2009 -- TITLE -Deprecated List (Pig 0.3.0-dev API) +Deprecated List (Pig 0.2.0 API) /TITLE @@ -13,7 +13,7 @@ SCRIPT type=text/javascript function windowTitle() { -parent.document.title=Deprecated List (Pig 0.3.0-dev API); +parent.document.title=Deprecated List (Pig 0.2.0 API); } /SCRIPT NOSCRIPT Modified: hadoop/pig/site/publish/docs/r0.2.0/api/help-doc.html URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/docs/r0.2.0/api/help-doc.html?rev=763392r1=763391r2=763392view=diff == --- hadoop/pig/site/publish/docs/r0.2.0/api/help-doc.html (original) +++ hadoop/pig/site/publish/docs/r0.2.0/api/help-doc.html Wed Apr 8 21:04:00 2009 @@ -4,7 +4,7 @@ HEAD !-- Generated by javadoc (build 1.5.0_16) on Wed Apr 08 13:40:14 PDT 2009 -- TITLE -API Help (Pig 0.3.0-dev API) +API Help (Pig 0.2.0 API) /TITLE @@ -13,7 +13,7 @@ SCRIPT type=text/javascript function windowTitle() { -parent.document.title=API Help (Pig 0.3.0-dev API); +parent.document.title=API Help (Pig 0.2.0 API); } /SCRIPT NOSCRIPT Modified: hadoop/pig/site/publish/docs/r0.2.0/api/index-all.html URL: http://svn.apache.org/viewvc/hadoop/pig/site/publish/docs/r0.2.0/api/index-all.html?rev=763392r1=763391r2=763392view=diff == --- hadoop/pig/site
svn commit: r762803 - in /hadoop/pig/tags: release-0.2.0-rc2/ release-0.2.0/
Author: gates Date: Tue Apr 7 15:02:45 2009 New Revision: 762803 URL: http://svn.apache.org/viewvc?rev=762803view=rev Log: Pig 0.2.0 release Added: hadoop/pig/tags/release-0.2.0/ - copied from r762802, hadoop/pig/tags/release-0.2.0-rc2/ Removed: hadoop/pig/tags/release-0.2.0-rc2/
svn commit: r759193 - in /hadoop/pig/trunk: CHANGES.txt README.txt RELEASE_NOTES.txt build.xml
Author: gates Date: Fri Mar 27 16:15:38 2009 New Revision: 759193 URL: http://svn.apache.org/viewvc?rev=759193view=rev Log: Preparing for 0.3.0 development. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/README.txt hadoop/pig/trunk/RELEASE_NOTES.txt hadoop/pig/trunk/build.xml Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=759193r1=759192r2=759193view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Fri Mar 27 16:15:38 2009 @@ -34,7 +34,7 @@ PIG-725: javadoc: warning - Multiple sources of package comments found for package org.apache.commons.logging (gkesavan via sms) -Release 1.0.0 - Unreleased +Release 0.2.0 - Unreleased INCOMPATIBLE CHANGES Modified: hadoop/pig/trunk/README.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/README.txt?rev=759193r1=759192r2=759193view=diff == --- hadoop/pig/trunk/README.txt (original) +++ hadoop/pig/trunk/README.txt Fri Mar 27 16:15:38 2009 @@ -17,7 +17,7 @@ For the latest information about Pig, please visit our website at: - http://incubator.apache.org/pig/ + http://hadoop.apache.org/pig/ and our wiki, at: @@ -37,14 +37,3 @@ We welcome all contributions. For the details, please, visit http://wiki.apache.org/pig/HowToContribute. -Incubator Disclaimer -= - -Apache Pig is an effort undergoing incubation at The Apache Software -Foundation (ASF). Incubation is required of all newly accepted projects -until a further review indicates that the infrastructure, communications, -and decision making process have stabilized in a manner consistent with -other successful ASF projects. While incubation status is not necessarily -a reflection of the completeness or stability of the code, it does indicate -that the project has yet to be fully endorsed by the ASF. - Modified: hadoop/pig/trunk/RELEASE_NOTES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/RELEASE_NOTES.txt?rev=759193r1=759192r2=759193view=diff == --- hadoop/pig/trunk/RELEASE_NOTES.txt (original) +++ hadoop/pig/trunk/RELEASE_NOTES.txt Fri Mar 27 16:15:38 2009 @@ -1,4 +1,4 @@ -This notes are for Pig 1.0.0 release. +These notes are for Pig 0.2.0 release. Highlights == @@ -12,15 +12,15 @@ 1. Java 1.6.x or newer, preferably from Sun. Set JAVA_HOME to the root of your Java installation 2. Ant build tool: http://ant.apache.org - to build source only -3. Sigwin: http://www.cygwin.com/ - to run under windows +3. Cygwin: http://www.cygwin.com/ - to run under Windows 4. This release is compatible with Hadoop 0.18.x releases Trying the Release == -1. Download pig-1.0.0.tar.gz -2. Unpack the file: tar -xzvf pig-1.0.0.tar.gz -3. Move into the installation directory: cd pig-1.0.0 +1. Download pig-0.2.0.tar.gz +2. Unpack the file: tar -xzvf pig-0.2.0.tar.gz +3. Move into the installation directory: cd pig-0.2.0 4. To run pig without Hadoop cluster, execute the command below. This will take you into an interactive shell called grunt that allows you to navigate the local file system and execute Pig commands against the local files @@ -47,8 +47,9 @@ Relevant Documentation == -Pig Language Manual(including Grant commands): +Pig Language Manual(including Grunt commands): http://wiki.apache.org/pig-data/attachments/FrontPage/attachments/plrm.htm UDF Manual: http://wiki.apache.org/pig/UDFManual Piggy Bank: http://wiki.apache.org/pig/PiggyBank Pig Tutorial: http://wiki.apache.org/pig/PigTutorial +Pig Eclipse Plugin (PigPen): http://wiki.apache.org/pig/PigPen Modified: hadoop/pig/trunk/build.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/build.xml?rev=759193r1=759192r2=759193view=diff == --- hadoop/pig/trunk/build.xml (original) +++ hadoop/pig/trunk/build.xml Fri Mar 27 16:15:38 2009 @@ -25,7 +25,7 @@ !-- name and version properties -- property name=name value=pig / property name=Name value=Pig / -property name=version value=1.0.1-dev / +property name=version value=0.3.0-dev / property name=final.name value=${name}-${version} / condition property=isWindows os family=windows/
svn commit: r759197 - in /hadoop/pig/branches/branch-0.2: CHANGES.txt README.txt RELEASE_NOTES.txt build.xml
Author: gates Date: Fri Mar 27 16:25:49 2009 New Revision: 759197 URL: http://svn.apache.org/viewvc?rev=759197view=rev Log: Preparing for 0.2.0 release. Modified: hadoop/pig/branches/branch-0.2/CHANGES.txt hadoop/pig/branches/branch-0.2/README.txt hadoop/pig/branches/branch-0.2/RELEASE_NOTES.txt hadoop/pig/branches/branch-0.2/build.xml Modified: hadoop/pig/branches/branch-0.2/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.2/CHANGES.txt?rev=759197r1=759196r2=759197view=diff == --- hadoop/pig/branches/branch-0.2/CHANGES.txt (original) +++ hadoop/pig/branches/branch-0.2/CHANGES.txt Fri Mar 27 16:25:49 2009 @@ -18,7 +18,7 @@ Pig Change Log -Release 1.0.0 - 03/17/09 +Release 0.2.0 - 03/26/09 INCOMPATIBLE CHANGES Modified: hadoop/pig/branches/branch-0.2/README.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.2/README.txt?rev=759197r1=759196r2=759197view=diff == --- hadoop/pig/branches/branch-0.2/README.txt (original) +++ hadoop/pig/branches/branch-0.2/README.txt Fri Mar 27 16:25:49 2009 @@ -17,7 +17,7 @@ For the latest information about Pig, please visit our website at: - http://incubator.apache.org/pig/ + http://hadoop.apache.org/pig/ and our wiki, at: @@ -37,14 +37,3 @@ We welcome all contributions. For the details, please, visit http://wiki.apache.org/pig/HowToContribute. -Incubator Disclaimer -= - -Apache Pig is an effort undergoing incubation at The Apache Software -Foundation (ASF). Incubation is required of all newly accepted projects -until a further review indicates that the infrastructure, communications, -and decision making process have stabilized in a manner consistent with -other successful ASF projects. While incubation status is not necessarily -a reflection of the completeness or stability of the code, it does indicate -that the project has yet to be fully endorsed by the ASF. - Modified: hadoop/pig/branches/branch-0.2/RELEASE_NOTES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.2/RELEASE_NOTES.txt?rev=759197r1=759196r2=759197view=diff == --- hadoop/pig/branches/branch-0.2/RELEASE_NOTES.txt (original) +++ hadoop/pig/branches/branch-0.2/RELEASE_NOTES.txt Fri Mar 27 16:25:49 2009 @@ -1,4 +1,4 @@ -This notes are for Pig 1.0.0 release. +These notes are for Pig 0.2.0 release. Highlights == @@ -12,15 +12,15 @@ 1. Java 1.6.x or newer, preferably from Sun. Set JAVA_HOME to the root of your Java installation 2. Ant build tool: http://ant.apache.org - to build source only -3. Sigwin: http://www.cygwin.com/ - to run under windows +3. Cygwin: http://www.cygwin.com/ - to run under Windows 4. This release is compatible with Hadoop 0.18.x releases Trying the Release == -1. Download pig-1.0.0.tar.gz -2. Unpack the file: tar -xzvf pig-1.0.0.tar.gz -3. Move into the installation directory: cd pig-1.0.0 +1. Download pig-0.2.0.tar.gz +2. Unpack the file: tar -xzvf pig-0.2.0.tar.gz +3. Move into the installation directory: cd pig-0.2.0 4. To run pig without Hadoop cluster, execute the command below. This will take you into an interactive shell called grunt that allows you to navigate the local file system and execute Pig commands against the local files @@ -47,8 +47,9 @@ Relevant Documentation == -Pig Language Manual(including Grant commands): +Pig Language Manual(including Grunt commands): http://wiki.apache.org/pig-data/attachments/FrontPage/attachments/plrm.htm UDF Manual: http://wiki.apache.org/pig/UDFManual Piggy Bank: http://wiki.apache.org/pig/PiggyBank Pig Tutorial: http://wiki.apache.org/pig/PigTutorial +Pig Eclipse Plugin (PigPen): http://wiki.apache.org/pig/PigPen Modified: hadoop/pig/branches/branch-0.2/build.xml URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.2/build.xml?rev=759197r1=759196r2=759197view=diff == --- hadoop/pig/branches/branch-0.2/build.xml (original) +++ hadoop/pig/branches/branch-0.2/build.xml Fri Mar 27 16:25:49 2009 @@ -25,7 +25,7 @@ !-- name and version properties -- property name=name value=pig / property name=Name value=Pig / -property name=version value=1.0.1-dev / +property name=version value=0.2.0-dev / property name=final.name value=${name}-${version} / condition property=isWindows os family=windows/
svn commit: r759375 - /hadoop/pig/branches/branch-0.2/KEYS
Author: gates Date: Fri Mar 27 22:24:57 2009 New Revision: 759375 URL: http://svn.apache.org/viewvc?rev=759375view=rev Log: Added my (gates) key to the KEYS file. Modified: hadoop/pig/branches/branch-0.2/KEYS Modified: hadoop/pig/branches/branch-0.2/KEYS URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.2/KEYS?rev=759375r1=759374r2=759375view=diff == --- hadoop/pig/branches/branch-0.2/KEYS (original) +++ hadoop/pig/branches/branch-0.2/KEYS Fri Mar 27 22:24:57 2009 @@ -27,3 +27,27 @@ LMYTpbXVonowdEk4YL45jQCfY2/fXDhD2IwYB+PGmdcsWeEv10Y= =WLZG -END PGP PUBLIC KEY BLOCK- +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v1.2.6 (GNU/Linux) + +mQGiBEnNUFYRBACdB2/nTuzObFu/B6dqTo301jF0BiD0J4Nl1qz9cP5IwrJArCzw +BQGSzN5UX0gKTpi9FWzXPWrc/On3jAk65q9FgkUTbKQqwtRK2UUjD7GpsMiryIBx +3+f1d8MEkOsFVg+bOzKqIY2VbvksM3GElCusKaWTZpgdsY27UkdHZIj/3wCg4Qq4 +ftOTc25XTEPNgAiAE8kO5qcD/27pjtQu3nzA47VdAvWGNAiNjdwsan/bWUUAvl81 +XmQ0GHlo9D0iyOd2GX9WHuguZ4/tf203f0oOHmgvYFllsF6OttBTIfd57HXyrdQd +VuI8JuKxqM0FYaQkDKNseJZH1X80d604IOWWPVcxPs0Aqdcw1F9e6e5XUPqkBXye +6IKeA/9YxJpY5QFb4EP/AyyGjIj1CsmukwBHJ+fNKribdyY5YgaX0THAjXlYLRmC +HZugmtVIF71EiDutHfq9RPiLP1O13nh3zapo9MD9CrqJqPE7SavGSQ+l1Tnedp6Y +UGWffmL7e0XBCSvB3QKf+ZxOLIK2s72Wl5Mwd6gfBEdcBxBqsLQtQWxhbiBHYXRl +cyAoTm8gY29tbWVudCkgPGdhdGVzQHlhaG9vLWluYy5jb20+iF4EExECAB4FAknN +UFYCGwMGCwkIBwMCAxUCAwMWAgECHgECF4AACgkQiL0/VwTZuDLWUACfWFyJwm6D +oAW88ITpvypdOtRakYsAoM22YGm4jla+y9lryous9eIHNu1VuQENBEnNUFgQBADK +2OL+zY/4V80Bans/v0sRf8cUzB82eW84vMgxRmMS+Kwty8CBwEV4sgWbv0vJwifo +9ZhlMLjqmBwGTR3wIXqtRQAyk8rLYod31KWFyt64vZaubbxZNDxiM5CMFO+q3xjL +hbsMnIC/QliKT2d0K2radTp+jNz7lOkmSvZ9iQ7/0wADBgP/TWsRYmViLcUcOa1N +4Cij8Y3c1tD2qYI5b9eDY5GiOeECss0CudJN/cIvDNstLtLa4JbX5INRpskVTVsx +Duermrsj5/tONUb9GwBnhUuzA0GW1WCkpZXJG2Z2iwKcJ8wQ5KaPj9TNdahF5h7q +outJyNeVe9TC2PytS0tfCzd2lnOISQQYEQIACQUCSc1QWAIbDAAKCRCIvT9XBNm4 +MrvDAJ4ySDj+5CSCB+DQ8PotTK5oX7fDcQCfc0d5dQrqc2ul8/4WDB/LmNcXuhI= +=B5eJ +-END PGP PUBLIC KEY BLOCK-
svn commit: r759376 - /hadoop/pig/trunk/KEYS
Author: gates Date: Fri Mar 27 22:26:35 2009 New Revision: 759376 URL: http://svn.apache.org/viewvc?rev=759376view=rev Log: Added my (gates) key to the KEYS file in trunk. Modified: hadoop/pig/trunk/KEYS Modified: hadoop/pig/trunk/KEYS URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/KEYS?rev=759376r1=759375r2=759376view=diff == --- hadoop/pig/trunk/KEYS (original) +++ hadoop/pig/trunk/KEYS Fri Mar 27 22:26:35 2009 @@ -27,3 +27,27 @@ LMYTpbXVonowdEk4YL45jQCfY2/fXDhD2IwYB+PGmdcsWeEv10Y= =WLZG -END PGP PUBLIC KEY BLOCK- +-BEGIN PGP PUBLIC KEY BLOCK- +Version: GnuPG v1.2.6 (GNU/Linux) + +mQGiBEnNUFYRBACdB2/nTuzObFu/B6dqTo301jF0BiD0J4Nl1qz9cP5IwrJArCzw +BQGSzN5UX0gKTpi9FWzXPWrc/On3jAk65q9FgkUTbKQqwtRK2UUjD7GpsMiryIBx +3+f1d8MEkOsFVg+bOzKqIY2VbvksM3GElCusKaWTZpgdsY27UkdHZIj/3wCg4Qq4 +ftOTc25XTEPNgAiAE8kO5qcD/27pjtQu3nzA47VdAvWGNAiNjdwsan/bWUUAvl81 +XmQ0GHlo9D0iyOd2GX9WHuguZ4/tf203f0oOHmgvYFllsF6OttBTIfd57HXyrdQd +VuI8JuKxqM0FYaQkDKNseJZH1X80d604IOWWPVcxPs0Aqdcw1F9e6e5XUPqkBXye +6IKeA/9YxJpY5QFb4EP/AyyGjIj1CsmukwBHJ+fNKribdyY5YgaX0THAjXlYLRmC +HZugmtVIF71EiDutHfq9RPiLP1O13nh3zapo9MD9CrqJqPE7SavGSQ+l1Tnedp6Y +UGWffmL7e0XBCSvB3QKf+ZxOLIK2s72Wl5Mwd6gfBEdcBxBqsLQtQWxhbiBHYXRl +cyAoTm8gY29tbWVudCkgPGdhdGVzQHlhaG9vLWluYy5jb20+iF4EExECAB4FAknN +UFYCGwMGCwkIBwMCAxUCAwMWAgECHgECF4AACgkQiL0/VwTZuDLWUACfWFyJwm6D +oAW88ITpvypdOtRakYsAoM22YGm4jla+y9lryous9eIHNu1VuQENBEnNUFgQBADK +2OL+zY/4V80Bans/v0sRf8cUzB82eW84vMgxRmMS+Kwty8CBwEV4sgWbv0vJwifo +9ZhlMLjqmBwGTR3wIXqtRQAyk8rLYod31KWFyt64vZaubbxZNDxiM5CMFO+q3xjL +hbsMnIC/QliKT2d0K2radTp+jNz7lOkmSvZ9iQ7/0wADBgP/TWsRYmViLcUcOa1N +4Cij8Y3c1tD2qYI5b9eDY5GiOeECss0CudJN/cIvDNstLtLa4JbX5INRpskVTVsx +Duermrsj5/tONUb9GwBnhUuzA0GW1WCkpZXJG2Z2iwKcJ8wQ5KaPj9TNdahF5h7q +outJyNeVe9TC2PytS0tfCzd2lnOISQQYEQIACQUCSc1QWAIbDAAKCRCIvT9XBNm4 +MrvDAJ4ySDj+5CSCB+DQ8PotTK5oX7fDcQCfc0d5dQrqc2ul8/4WDB/LmNcXuhI= +=B5eJ +-END PGP PUBLIC KEY BLOCK-
svn commit: r759377 - /hadoop/pig/tags/release-0.2.0-rc2/
Author: gates Date: Fri Mar 27 22:28:43 2009 New Revision: 759377 URL: http://svn.apache.org/viewvc?rev=759377view=rev Log: Pig 0.2.0-rc2 release. Added: hadoop/pig/tags/release-0.2.0-rc2/ - copied from r759376, hadoop/pig/branches/branch-0.2/
svn commit: r758687 - in /hadoop/pig/trunk: ./ src/org/apache/pig/ src/org/apache/pig/tools/grunt/
Author: gates Date: Thu Mar 26 15:34:21 2009 New Revision: 758687 URL: http://svn.apache.org/viewvc?rev=758687view=rev Log: PIG-713 Added alias completion as part of tab completion in grunt. Added: hadoop/pig/trunk/src/org/apache/pig/tools/grunt/PigCompletorAliases.java hadoop/pig/trunk/src/org/apache/pig/tools/grunt/autocomplete_aliases Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/build.xml hadoop/pig/trunk/src/org/apache/pig/Main.java hadoop/pig/trunk/src/org/apache/pig/PigServer.java hadoop/pig/trunk/src/org/apache/pig/tools/grunt/Grunt.java hadoop/pig/trunk/src/org/apache/pig/tools/grunt/PigCompletor.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=758687r1=758686r2=758687view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Thu Mar 26 15:34:21 2009 @@ -53,6 +53,9 @@ Changed semantics of DEFINE to define last used alias if no argument is given (ericg via gates). + PIG-713 Added alias completion as part of tab completion in grunt (ericg + via gates). + IMPROVEMENTS PIG-270: proper line number for parse errors (daijy via olgan) Modified: hadoop/pig/trunk/build.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/build.xml?rev=758687r1=758686r2=758687view=diff == --- hadoop/pig/trunk/build.xml (original) +++ hadoop/pig/trunk/build.xml Thu Mar 26 15:34:21 2009 @@ -255,6 +255,7 @@ classpath refid=${cp} / /javac copy file=${src.dir}/org/apache/pig/tools/grunt/autocomplete todir=${build.classes}/org/apache/pig/tools/grunt/ +copy file=${src.dir}/org/apache/pig/tools/grunt/autocomplete_aliases todir=${build.classes}/org/apache/pig/tools/grunt/ /target !-- this target is for compilation with all warnings enabled -- Modified: hadoop/pig/trunk/src/org/apache/pig/Main.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/Main.java?rev=758687r1=758686r2=758687view=diff == --- hadoop/pig/trunk/src/org/apache/pig/Main.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/Main.java Thu Mar 26 15:34:21 2009 @@ -39,7 +39,6 @@ import org.apache.pig.impl.util.PropertiesUtil; import org.apache.pig.tools.cmdline.CmdLineParser; import org.apache.pig.tools.grunt.Grunt; -import org.apache.pig.tools.grunt.PigCompletor; import org.apache.pig.impl.util.LogUtils; import org.apache.pig.tools.timer.PerformanceTimerFactory; import org.apache.pig.tools.parameters.ParameterSubstitutionPreprocessor; @@ -309,7 +308,6 @@ // Interactive mode = ExecMode.SHELL; ConsoleReader reader = new ConsoleReader(System.in, new OutputStreamWriter(System.out)); -reader.addCompletor(new PigCompletor()); reader.setDefaultPrompt(grunt ); final String HISTORYFILE = .pig_history; String historyFile = System.getProperty(user.home) + File.separator + HISTORYFILE; Modified: hadoop/pig/trunk/src/org/apache/pig/PigServer.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/PigServer.java?rev=758687r1=758686r2=758687view=diff == --- hadoop/pig/trunk/src/org/apache/pig/PigServer.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/PigServer.java Thu Mar 26 15:34:21 2009 @@ -32,6 +32,7 @@ import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -663,6 +664,10 @@ // pigContext.getExecutionEngine().reclaimScope(this.scope); } +public SetString getAliasKeySet() { +return aliasOp.keySet(); +} + public MapLogicalOperator, DataBag getExamples(String alias) { //LogicalPlan plan = aliases.get(aliasOp.get(alias)); LogicalPlan plan = null; Modified: hadoop/pig/trunk/src/org/apache/pig/tools/grunt/Grunt.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/tools/grunt/Grunt.java?rev=758687r1=758686r2=758687view=diff == --- hadoop/pig/trunk/src/org/apache/pig/tools/grunt/Grunt.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/tools/grunt/Grunt.java Thu Mar 26 15:34:21 2009 @@ -25,12 +25,15 @@ import java.io.FileOutputStream; import jline.ConsoleReader; +import jline.Completor; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pig.PigServer; import org.apache.pig.impl.PigContext; import org.apache.pig.tools.grunt.GruntParser
svn commit: r758861 - in /hadoop/pig/branches: branch-0.2/ branch-1.0/
Author: gates Date: Thu Mar 26 21:24:16 2009 New Revision: 758861 URL: http://svn.apache.org/viewvc?rev=758861view=rev Log: Renamed branch-1.0 to branch-0.2 to match new release number Added: hadoop/pig/branches/branch-0.2/ - copied from r758860, hadoop/pig/branches/branch-1.0/ Removed: hadoop/pig/branches/branch-1.0/
svn commit: r758862 - in /hadoop/pig/tags: release-0.2.0-rc0/ release-1.0.0-rc0/
Author: gates Date: Thu Mar 26 21:26:45 2009 New Revision: 758862 URL: http://svn.apache.org/viewvc?rev=758862view=rev Log: Renamed release-1.0.0-rc0/ to release-0.2.0-rc0/ to match new release number Added: hadoop/pig/tags/release-0.2.0-rc0/ - copied from r758861, hadoop/pig/tags/release-1.0.0-rc0/ Removed: hadoop/pig/tags/release-1.0.0-rc0/
svn commit: r750271 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/Main.java
Author: gates Date: Thu Mar 5 01:13:52 2009 New Revision: 750271 URL: http://svn.apache.org/viewvc?rev=750271view=rev Log: PIG-692 When running a job from a script, use that script name as the default job name. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/Main.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=750271r1=750270r2=750271view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Thu Mar 5 01:13:52 2009 @@ -10,6 +10,9 @@ PIG-620: Added MaxTupleBy1stField UDF to piggybank (vzaliva via gates) + PIG-692: When running a job from a script, use the name of that script as + the default name for the job (vzaliva via gates) + OPTIMIZATIONS BUG FIXES Modified: hadoop/pig/trunk/src/org/apache/pig/Main.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/Main.java?rev=750271r1=750270r2=750271view=diff == --- hadoop/pig/trunk/src/org/apache/pig/Main.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/Main.java Thu Mar 5 01:13:52 2009 @@ -264,6 +264,11 @@ logFileName = validateLogFile(logFileName, file); pigContext.getProperties().setProperty(pig.logfile, logFileName); + +// Set job name based on name of the script +pigContext.getProperties().setProperty(PigContext.JOB_NAME, + PigLatin: +new File(file).getName() +); if (!debug) new File(substFile).deleteOnExit(); @@ -339,6 +344,11 @@ if (!debug) new File(substFile).deleteOnExit(); +// Set job name based on name of the script +pigContext.getProperties().setProperty(PigContext.JOB_NAME, + PigLatin: +new File(remainders[0]).getName() +); + grunt = new Grunt(pin, pigContext); gruntCalled = true; grunt.exec();
svn commit: r727093 - in /hadoop/pig/branches/types/lib: hbase-0.18.1-test.jar hbase-0.18.1.jar
Author: gates Date: Tue Dec 16 09:21:55 2008 New Revision: 727093 URL: http://svn.apache.org/viewvc?rev=727093view=rev Log: PIG-6 Libraries I should have added in the last commit. Added: hadoop/pig/branches/types/lib/hbase-0.18.1-test.jar (with props) hadoop/pig/branches/types/lib/hbase-0.18.1.jar (with props) Added: hadoop/pig/branches/types/lib/hbase-0.18.1-test.jar URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/lib/hbase-0.18.1-test.jar?rev=727093view=auto == Binary file - no diff available. Propchange: hadoop/pig/branches/types/lib/hbase-0.18.1-test.jar -- svn:mime-type = application/octet-stream Added: hadoop/pig/branches/types/lib/hbase-0.18.1.jar URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/lib/hbase-0.18.1.jar?rev=727093view=auto == Binary file - no diff available. Propchange: hadoop/pig/branches/types/lib/hbase-0.18.1.jar -- svn:mime-type = application/octet-stream
svn commit: r725925 - in /hadoop/pig/branches/types: ./ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/backend/hadoop/executionengine/physical
Author: gates Date: Thu Dec 11 22:48:23 2008 New Revision: 725925 URL: http://svn.apache.org/viewvc?rev=725925view=rev Log: PIG-556. Changed FindQuantiles to call progress(). Fixed issue with reporter passed to EvalFunc being null. Fixed issue with sampling phase of order by query running more than one reduce. Modified: hadoop/pig/branches/types/CHANGES.txt hadoop/pig/branches/types/src/org/apache/pig/EvalFunc.java hadoop/pig/branches/types/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java hadoop/pig/branches/types/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POUserFunc.java hadoop/pig/branches/types/src/org/apache/pig/impl/builtin/FindQuantiles.java hadoop/pig/branches/types/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java hadoop/pig/branches/types/test/org/apache/pig/test/data/GoldenFiles/MRC15.gld Modified: hadoop/pig/branches/types/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/CHANGES.txt?rev=725925r1=725924r2=725925view=diff == --- hadoop/pig/branches/types/CHANGES.txt (original) +++ hadoop/pig/branches/types/CHANGES.txt Thu Dec 11 22:48:23 2008 @@ -331,3 +331,9 @@ PIG-449: Schemas for bags should contain tuples all the time (pradeepk via olgan) + + PIG-543: Restore local mode to truly run locally instead of use map + reduce. (shubhamc via gates) + + PIG-556: Changed FindQuantiles to report progress. Fixed issue with null + reporter being passed to EvalFuncs. (gates) Modified: hadoop/pig/branches/types/src/org/apache/pig/EvalFunc.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/src/org/apache/pig/EvalFunc.java?rev=725925r1=725924r2=725925view=diff == --- hadoop/pig/branches/types/src/org/apache/pig/EvalFunc.java (original) +++ hadoop/pig/branches/types/src/org/apache/pig/EvalFunc.java Thu Dec 11 22:48:23 2008 @@ -23,6 +23,9 @@ import java.lang.reflect.Type; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + import org.apache.pig.data.Tuple; import org.apache.pig.impl.PigContext; import org.apache.pig.impl.logicalLayer.FrontendException; @@ -36,19 +39,15 @@ * The programmer should not make assumptions about state maintained * between invocations of the invoke() method since the Pig runtime * will schedule and localize invocations based on information provided - * at runtime. - * - * The programmer should not directly extend this class but instead - * extend one of the subclasses where we have bound the parameter T - * to a specific Datum - * - * @author database-syst...@yahoo.research - * + * at runtime. The programmer also should not make assumptions about when or + * how many times the class will be instantiated, since it may be instantiated + * multiple times in both the front and back end. */ public abstract class EvalFuncT { // UDFs must use this to report progress // if the exec is taking more that 300 ms protected PigProgressable reporter; +protected Log log = LogFactory.getLog(getClass()); private static int nextSchemaId; // for assigning unique ids to UDF columns protected String getSchemaName(String name, Schema input) { @@ -118,7 +117,8 @@ // report that progress is being made (otherwise hadoop times out after 600 seconds working on one outer tuple) protected void progress() { -if(reporter!=null) reporter.progress(); +if (reporter != null) reporter.progress(); +else log.warn(No reporter object provided to UDF + this.getClass().getName()); } /** @@ -166,7 +166,7 @@ } -public void setReporter(PigProgressable reporter) { +public final void setReporter(PigProgressable reporter) { this.reporter = reporter; } Modified: hadoop/pig/branches/types/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/types/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java?rev=725925r1=725924r2=725925view=diff == --- hadoop/pig/branches/types/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java (original) +++ hadoop/pig/branches/types/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java Thu Dec 11 22:48:23 2008 @@ -1172,7 +1172,7 @@ mro.reducePlan.connect(nfe3, str); mro.setReduceDone(true); -//mro.requestedParallelism = rp; +mro.requestedParallelism = 1; return mro; } Modified: hadoop/pig/branches/types/src/org/apache/pig/backend/hadoop