svn commit: r882208 - in /hadoop/pig/branches/load-store-redesign: src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/impl/io/ test/org/apache/pig/test/
Author: pradeepkth Date: Thu Nov 19 17:16:21 2009 New Revision: 882208 URL: http://svn.apache.org/viewvc?rev=882208view=rev Log: Fixes for a couple of more unit tests Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputCommitter.java hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/PigFile.java hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestEvalPipeline.java hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestPigContext.java Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputCommitter.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputCommitter.java?rev=882208r1=882207r2=882208view=diff == --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputCommitter.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputCommitter.java Thu Nov 19 17:16:21 2009 @@ -28,6 +28,7 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.pig.StoreFunc; import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore; +import org.apache.pig.impl.PigContext; import org.apache.pig.impl.util.ObjectSerializer; /** @@ -78,9 +79,19 @@ * @return * @throws IOException */ +@SuppressWarnings(unchecked) private ListOutputCommitter getCommitters(TaskAttemptContext context, String storeLookupKey) throws IOException { Configuration conf = context.getConfiguration(); + +// if there is a udf in the plan we would need to know the import +// path so we can instantiate the udf. This is required because +// we will be deserializing the POStores out of the plan in the next +// line below. The POStore inturn has a member reference to the Physical +// plan it is part of - so the deserialization goes deep and while +// deserializing the plan, the udf.import.list may be needed. +PigContext.setPackageImportList((ArrayListString)ObjectSerializer. +deserialize(conf.get(udf.import.list))); LinkedListPOStore stores = (LinkedListPOStore) ObjectSerializer. deserialize(conf.get(storeLookupKey)); ListOutputCommitter committers = new ArrayListOutputCommitter(); Modified: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/PigFile.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/PigFile.java?rev=882208r1=882207r2=882208view=diff == --- hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/PigFile.java (original) +++ hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/PigFile.java Thu Nov 19 17:16:21 2009 @@ -17,21 +17,27 @@ */ package org.apache.pig.impl.io; -import java.io.BufferedOutputStream; import java.io.IOException; -import java.io.InputStream; import java.util.Iterator; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.JobContext; +import org.apache.hadoop.mapreduce.JobID; +import org.apache.hadoop.mapreduce.OutputCommitter; +import org.apache.hadoop.mapreduce.OutputFormat; +import org.apache.hadoop.mapreduce.RecordWriter; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.pig.LoadFunc; import org.apache.pig.StoreFunc; +import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat; import org.apache.pig.data.BagFactory; import org.apache.pig.data.DataBag; import org.apache.pig.data.Tuple; import org.apache.pig.impl.PigContext; -import org.apache.pig.impl.io.FileLocalizer; -// XXX: FIXME: make this work with load store redesign public class PigFile { private String file = null; @@ -48,11 +54,10 @@ public DataBag load(LoadFunc lfunc, PigContext pigContext) throws IOException { DataBag content = BagFactory.getInstance().newDefaultBag(); -InputStream is = FileLocalizer.open(file, pigContext); -//XXX FIXME: make this work with new load-store redesign -//lfunc.bindTo(file, new BufferedPositionedInputStream(is), 0, Long.MAX_VALUE); +ReadToEndLoader loader = new ReadToEndLoader(lfunc, +ConfigurationUtil.toConfiguration(pigContext.getProperties()), file, 0); Tuple f = null; -while ((f = lfunc.getNext()) != null) { +
svn commit: r882221 - in /hadoop/pig/trunk: ./ src/org/apache/pig/impl/logicalLayer/ src/org/apache/pig/impl/logicalLayer/parser/ test/org/apache/pig/test/
Author: pradeepkth Date: Thu Nov 19 17:55:36 2009 New Revision: 882221 URL: http://svn.apache.org/viewvc?rev=882221view=rev Log: PIG-1064: Behaviour of COGROUP with and without schema when using * operator (pradeepkth) Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ProjectStarTranslator.java hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=882221r1=882220r2=882221view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Thu Nov 19 17:55:36 2009 @@ -33,6 +33,9 @@ BUG FIXES +PIG-1064: Behaviour of COGROUP with and without schema when using * operator +(pradeepkth) + Release 0.6.0 - Unreleased INCOMPATIBLE CHANGES Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ProjectStarTranslator.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ProjectStarTranslator.java?rev=882221r1=882220r2=882221view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ProjectStarTranslator.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ProjectStarTranslator.java Thu Nov 19 17:55:36 2009 @@ -57,7 +57,6 @@ //get the attributes of cogroup that are modified during the trnalsation MultiMapLogicalOperator, LogicalPlan mapGByPlans = cg.getGroupByPlans(); - for(LogicalOperator op: cg.getInputs()) { ArrayListLogicalPlan newGByPlans = new ArrayListLogicalPlan(); for(LogicalPlan lp: mapGByPlans.get(op)) { @@ -70,9 +69,41 @@ newGByPlans.add(lp); } } + + mapGByPlans.removeKey(op); mapGByPlans.put(op, newGByPlans); } + +// check if after translation none of group by plans in a cogroup +// have a project(*) - if they still do it's because the input +// for the project(*) did not have a schema - in this case, we should +// error out since we could have different number/types of +// cogroup keys +if(cg.getInputs().size() 1) { // only for cogroups +for(LogicalOperator op: cg.getInputs()) { +for(LogicalPlan lp: mapGByPlans.get(op)) { +if(checkPlanForProjectStar(lp)) { +// not following Error handling guidelines to give error code +// and error source since this will get swallowed by the parser +// which will just return a ParseException +throw new VisitorException(Cogroup/Group by * is only allowed if + + the input has a schema); +} +} +} +// check if after translation all group by plans have same arity +int arity = mapGByPlans.get(cg.getInputs().get(0)).size(); +for(LogicalOperator op: cg.getInputs()) { +if(arity != mapGByPlans.get(op).size()) { +// not following Error handling guidelines to give error code +// and error source since this will get swallowed by the parser +// which will just return a ParseException +throw new VisitorException(The arity of cogroup/group by columns + + do not match); +} +} +} } /* (non-Javadoc) Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=882221r1=882220r2=882221view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt Thu Nov 19 17:55:36 2009 @@ -1029,19 +1029,19 @@ ) { if(null != root) { -log.debug(Adding + root.getAlias() + + root + to the lookup table + aliases); - -//Translate all the project(*) leaves in the plan to a sequence of projections -ProjectStarTranslator translate = new ProjectStarTranslator(lp); -translate.visit(); - -addLogicalPlan(root, lp); - try { - log.debug(Root: + root.getClass().getName() +
svn commit: r882282 - /hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/TupleSize.java
Author: pradeepkth Date: Thu Nov 19 20:29:27 2009 New Revision: 882282 URL: http://svn.apache.org/viewvc?rev=882282view=rev Log: Removing src/org/apache/pig/impl/builtin/TupleSize.java as part of PIG-1062 changes Removed: hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/TupleSize.java
svn commit: r882340 - in /hadoop/pig/trunk/contrib/zebra: CHANGES.txt build-contrib.xml
Author: gates Date: Thu Nov 19 22:37:10 2009 New Revision: 882340 URL: http://svn.apache.org/viewvc?rev=882340view=rev Log: Changed version number to be 0.7.0 to match Pig version number change. Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt hadoop/pig/trunk/contrib/zebra/build-contrib.xml Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=882340r1=882339r2=882340view=diff == --- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original) +++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Thu Nov 19 22:37:10 2009 @@ -3,6 +3,8 @@ Trunk (unreleased changes) INCOMPATIBLE CHANGES +PIG-1099 Changed version number to be 0.7.0 to match Pig version number + change (yanz via gates) IMPROVEMENTS Modified: hadoop/pig/trunk/contrib/zebra/build-contrib.xml URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/build-contrib.xml?rev=882340r1=882339r2=882340view=diff == --- hadoop/pig/trunk/contrib/zebra/build-contrib.xml (original) +++ hadoop/pig/trunk/contrib/zebra/build-contrib.xml Thu Nov 19 22:37:10 2009 @@ -23,7 +23,7 @@ property name=name value=${ant.project.name}/ property name=root value=${basedir}/ - property name=version value=0.6.0-dev/ + property name=version value=0.7.0-dev/ !-- Load all the default properties, and any the user wants-- !-- to contribute (without having to type -D or edit this file --
svn commit: r882362 [2/2] - in /hadoop/pig/branches/load-store-redesign: ./ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/backend/hadoop/hbas
Modified: hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestLoad.java URL: http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestLoad.java?rev=882362r1=882361r2=882362view=diff == --- hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestLoad.java (original) +++ hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestLoad.java Thu Nov 19 23:38:22 2009 @@ -17,17 +17,25 @@ */ package org.apache.pig.test; -import java.util.*; - import java.io.BufferedReader; import java.io.File; +import java.io.FileNotFoundException; import java.io.FileReader; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; import junit.framework.Assert; import org.apache.pig.ExecType; import org.apache.pig.FuncSpec; +import org.apache.pig.PigServer; +import org.apache.pig.backend.datastorage.DataStorage; import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result; +import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; import org.apache.pig.builtin.PigStorage; import org.apache.pig.data.DataBag; import org.apache.pig.data.DataByteArray; @@ -35,54 +43,54 @@ import org.apache.pig.data.DefaultTuple; import org.apache.pig.data.Tuple; import org.apache.pig.impl.PigContext; -import org.apache.pig.impl.io.FileSpec; -import org.apache.pig.impl.plan.OperatorKey; -import org.apache.pig.PigServer; import org.apache.pig.impl.io.FileLocalizer; -import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus; -import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result; -import org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad; -import org.apache.pig.test.utils.GenPhyOp; -import org.apache.pig.test.utils.TestHelper; +import org.apache.pig.impl.io.FileSpec; import org.apache.pig.impl.logicalLayer.LOLoad; import org.apache.pig.impl.logicalLayer.LogicalOperator; import org.apache.pig.impl.logicalLayer.LogicalPlan; import org.apache.pig.impl.logicalLayer.LogicalPlanBuilder; -import org.apache.pig.backend.datastorage.ContainerDescriptor; -import org.apache.pig.backend.datastorage.DataStorage; -import org.apache.pig.backend.datastorage.DataStorageException; -import org.apache.pig.backend.datastorage.ElementDescriptor; +import org.apache.pig.impl.plan.OperatorKey; +import org.apache.pig.test.utils.GenPhyOp; +import org.apache.pig.test.utils.TestHelper; import org.junit.After; import org.junit.Before; import org.junit.Test; public class TestLoad extends junit.framework.TestCase { -FileSpec inpFSpec; -POLoad ld; + PigContext pc; -DataBag inpDB; -String curDir; -String inpDir; -PigServer pig; +PigServer[] servers; static MiniCluster cluster = MiniCluster.buildCluster(); + @Before public void setUp() throws Exception { -curDir = System.getProperty(user.dir); -inpDir = curDir + File.separatorChar + test/org/apache/pig/test/data/InputFiles/; -if ((System.getProperty(os.name).toUpperCase().startsWith(WINDOWS))) -inpDir=/+FileLocalizer.parseCygPath(inpDir, FileLocalizer.STYLE_WINDOWS); -inpFSpec = new FileSpec(file: + inpDir + passwd, new FuncSpec(PigStorage.class.getName(), new String[]{:})); - FileLocalizer.deleteTempFiles(); -pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties()); -pc = pig.getPigContext(); +servers = new PigServer[] { +new PigServer(ExecType.MAPREDUCE, cluster.getProperties()), +new PigServer(ExecType.LOCAL, new Properties()) +}; +} -ld = GenPhyOp.topLoadOp(); +@After +public void tearDown() throws Exception { +} + +@Test +public void testGetNextTuple() throws IOException { +pc = servers[0].getPigContext(); +String curDir = System.getProperty(user.dir); +String inpDir = curDir + File.separatorChar + test/org/apache/pig/test/data/InputFiles/; +if ((System.getProperty(os.name).toUpperCase().startsWith(WINDOWS))) +inpDir=/+FileLocalizer.parseCygPath(inpDir, FileLocalizer.STYLE_WINDOWS); +// copy passwd file to cluster and set that as the input location for the load +Util.copyFromLocalToCluster(cluster, inpDir + passwd, passwd); +FileSpec inpFSpec = new FileSpec(passwd, new FuncSpec(PigStorage.class.getName(), new String[]{:})); +POLoad ld = GenPhyOp.topLoadOp(); ld.setLFile(inpFSpec); ld.setPc(pc); -inpDB = DefaultBagFactory.getInstance().newDefaultBag(); +DataBag inpDB