svn commit: r882208 - in /hadoop/pig/branches/load-store-redesign: src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/impl/io/ test/org/apache/pig/test/

2009-11-19 Thread pradeepkth
Author: pradeepkth
Date: Thu Nov 19 17:16:21 2009
New Revision: 882208

URL: http://svn.apache.org/viewvc?rev=882208view=rev
Log:
Fixes for a couple of more unit tests

Modified:

hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputCommitter.java

hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/PigFile.java

hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestEvalPipeline.java

hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestPigContext.java

Modified: 
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputCommitter.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputCommitter.java?rev=882208r1=882207r2=882208view=diff
==
--- 
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputCommitter.java
 (original)
+++ 
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/PigOutputCommitter.java
 Thu Nov 19 17:16:21 2009
@@ -28,6 +28,7 @@
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.pig.StoreFunc;
 import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POStore;
+import org.apache.pig.impl.PigContext;
 import org.apache.pig.impl.util.ObjectSerializer;
 
 /**
@@ -78,9 +79,19 @@
  * @return
  * @throws IOException 
  */
+@SuppressWarnings(unchecked)
 private ListOutputCommitter getCommitters(TaskAttemptContext context,
 String storeLookupKey) throws IOException {
 Configuration conf = context.getConfiguration();
+
+// if there is a udf in the plan we would need to know the import
+// path so we can instantiate the udf. This is required because
+// we will be deserializing the POStores out of the plan in the next
+// line below. The POStore inturn has a member reference to the 
Physical
+// plan it is part of - so the deserialization goes deep and while
+// deserializing the plan, the udf.import.list may be needed.
+PigContext.setPackageImportList((ArrayListString)ObjectSerializer.
+deserialize(conf.get(udf.import.list)));
 LinkedListPOStore stores = (LinkedListPOStore) ObjectSerializer.
 deserialize(conf.get(storeLookupKey));
 ListOutputCommitter committers = new ArrayListOutputCommitter();

Modified: 
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/PigFile.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/PigFile.java?rev=882208r1=882207r2=882208view=diff
==
--- 
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/PigFile.java 
(original)
+++ 
hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/io/PigFile.java 
Thu Nov 19 17:16:21 2009
@@ -17,21 +17,27 @@
  */
 package org.apache.pig.impl.io;
 
-import java.io.BufferedOutputStream;
 import java.io.IOException;
-import java.io.InputStream;
 import java.util.Iterator;
 
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.OutputCommitter;
+import org.apache.hadoop.mapreduce.OutputFormat;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
+import org.apache.hadoop.mapreduce.TaskAttemptID;
 import org.apache.pig.LoadFunc;
 import org.apache.pig.StoreFunc;
+import org.apache.pig.backend.hadoop.datastorage.ConfigurationUtil;
+import 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigOutputFormat;
 import org.apache.pig.data.BagFactory;
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.PigContext;
-import org.apache.pig.impl.io.FileLocalizer;
 
 
-// XXX: FIXME: make this work with load store redesign
 
 public class PigFile {
 private String file = null;
@@ -48,11 +54,10 @@
 
 public DataBag load(LoadFunc lfunc, PigContext pigContext) throws 
IOException {
 DataBag content = BagFactory.getInstance().newDefaultBag();
-InputStream is = FileLocalizer.open(file, pigContext);
-//XXX FIXME: make this work with new load-store redesign
-//lfunc.bindTo(file, new BufferedPositionedInputStream(is), 0, 
Long.MAX_VALUE);
+ReadToEndLoader loader = new ReadToEndLoader(lfunc, 
+ConfigurationUtil.toConfiguration(pigContext.getProperties()), 
file, 0);
 Tuple f = null;
-while ((f = lfunc.getNext()) != null) {
+

svn commit: r882221 - in /hadoop/pig/trunk: ./ src/org/apache/pig/impl/logicalLayer/ src/org/apache/pig/impl/logicalLayer/parser/ test/org/apache/pig/test/

2009-11-19 Thread pradeepkth
Author: pradeepkth
Date: Thu Nov 19 17:55:36 2009
New Revision: 882221

URL: http://svn.apache.org/viewvc?rev=882221view=rev
Log:
PIG-1064: Behaviour of COGROUP with and without schema when using * operator 
(pradeepkth)

Modified:
hadoop/pig/trunk/CHANGES.txt

hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ProjectStarTranslator.java
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
hadoop/pig/trunk/test/org/apache/pig/test/TestLogicalPlanBuilder.java
hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=882221r1=882220r2=882221view=diff
==
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Thu Nov 19 17:55:36 2009
@@ -33,6 +33,9 @@
 
 BUG FIXES
 
+PIG-1064: Behaviour of COGROUP with and without schema when using * operator
+(pradeepkth)
+
 Release 0.6.0 - Unreleased
 
 INCOMPATIBLE CHANGES

Modified: 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ProjectStarTranslator.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ProjectStarTranslator.java?rev=882221r1=882220r2=882221view=diff
==
--- 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ProjectStarTranslator.java
 (original)
+++ 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ProjectStarTranslator.java
 Thu Nov 19 17:55:36 2009
@@ -57,7 +57,6 @@
 //get the attributes of cogroup that are modified during the 
trnalsation
 
 MultiMapLogicalOperator, LogicalPlan mapGByPlans = 
cg.getGroupByPlans();
-
 for(LogicalOperator op: cg.getInputs()) {
 ArrayListLogicalPlan newGByPlans = new ArrayListLogicalPlan();
 for(LogicalPlan lp: mapGByPlans.get(op)) {
@@ -70,9 +69,41 @@
 newGByPlans.add(lp);
 }
 }
+
+
 mapGByPlans.removeKey(op);
 mapGByPlans.put(op, newGByPlans);
 }
+
+// check if after translation none of group by plans in a cogroup
+// have a project(*) - if they still do it's because the input
+// for the project(*) did not have a schema - in this case, we should
+// error out since we could have different number/types of 
+// cogroup keys
+if(cg.getInputs().size()  1) { // only for cogroups
+for(LogicalOperator op: cg.getInputs()) {
+for(LogicalPlan lp: mapGByPlans.get(op)) {
+if(checkPlanForProjectStar(lp)) {
+// not following Error handling guidelines to give 
error code
+// and error source since this will get swallowed by 
the parser
+// which will just return a ParseException
+throw new VisitorException(Cogroup/Group by * is only 
allowed if  +
+   the input has a schema);
+}
+}
+}
+// check if after translation all group by plans have same arity
+int arity = mapGByPlans.get(cg.getInputs().get(0)).size();
+for(LogicalOperator op: cg.getInputs()) {
+if(arity != mapGByPlans.get(op).size()) {
+// not following Error handling guidelines to give error 
code
+// and error source since this will get swallowed by the 
parser
+// which will just return a ParseException
+throw new VisitorException(The arity of cogroup/group by 
columns  +
+   do not match);
+}
+}
+}
 }
 
 /* (non-Javadoc)

Modified: 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt?rev=882221r1=882220r2=882221view=diff
==
--- 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt 
(original)
+++ 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/parser/QueryParser.jjt 
Thu Nov 19 17:55:36 2009
@@ -1029,19 +1029,19 @@
)
{ 
if(null != root) {
-log.debug(Adding  + root.getAlias() +   + root +  to the 
lookup table  + aliases);
-
-//Translate all the project(*) leaves in the plan to a sequence of 
projections
-ProjectStarTranslator translate = new ProjectStarTranslator(lp);
-translate.visit();
-
-addLogicalPlan(root, lp);
-
 try {
-   log.debug(Root:  + root.getClass().getName() +  

svn commit: r882282 - /hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/TupleSize.java

2009-11-19 Thread pradeepkth
Author: pradeepkth
Date: Thu Nov 19 20:29:27 2009
New Revision: 882282

URL: http://svn.apache.org/viewvc?rev=882282view=rev
Log:
Removing src/org/apache/pig/impl/builtin/TupleSize.java as part of PIG-1062 
changes

Removed:

hadoop/pig/branches/load-store-redesign/src/org/apache/pig/impl/builtin/TupleSize.java



svn commit: r882340 - in /hadoop/pig/trunk/contrib/zebra: CHANGES.txt build-contrib.xml

2009-11-19 Thread gates
Author: gates
Date: Thu Nov 19 22:37:10 2009
New Revision: 882340

URL: http://svn.apache.org/viewvc?rev=882340view=rev
Log:
Changed version number to be 0.7.0 to match Pig version number change.

Modified:
hadoop/pig/trunk/contrib/zebra/CHANGES.txt
hadoop/pig/trunk/contrib/zebra/build-contrib.xml

Modified: hadoop/pig/trunk/contrib/zebra/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/CHANGES.txt?rev=882340r1=882339r2=882340view=diff
==
--- hadoop/pig/trunk/contrib/zebra/CHANGES.txt (original)
+++ hadoop/pig/trunk/contrib/zebra/CHANGES.txt Thu Nov 19 22:37:10 2009
@@ -3,6 +3,8 @@
 Trunk (unreleased changes)
 
   INCOMPATIBLE CHANGES
+PIG-1099 Changed version number to be 0.7.0 to match Pig version number
+   change (yanz via gates)
 
   IMPROVEMENTS
 

Modified: hadoop/pig/trunk/contrib/zebra/build-contrib.xml
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/zebra/build-contrib.xml?rev=882340r1=882339r2=882340view=diff
==
--- hadoop/pig/trunk/contrib/zebra/build-contrib.xml (original)
+++ hadoop/pig/trunk/contrib/zebra/build-contrib.xml Thu Nov 19 22:37:10 2009
@@ -23,7 +23,7 @@
 
   property name=name value=${ant.project.name}/
   property name=root value=${basedir}/
-  property name=version value=0.6.0-dev/
+  property name=version value=0.7.0-dev/
 
   !-- Load all the default properties, and any the user wants--
   !-- to contribute (without having to type -D or edit this file --




svn commit: r882362 [2/2] - in /hadoop/pig/branches/load-store-redesign: ./ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/ src/org/apache/pig/backend/hadoop/hbas

2009-11-19 Thread pradeepkth
Modified: 
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestLoad.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestLoad.java?rev=882362r1=882361r2=882362view=diff
==
--- 
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestLoad.java 
(original)
+++ 
hadoop/pig/branches/load-store-redesign/test/org/apache/pig/test/TestLoad.java 
Thu Nov 19 23:38:22 2009
@@ -17,17 +17,25 @@
  */
 package org.apache.pig.test;
 
-import java.util.*;
-
 import java.io.BufferedReader;
 import java.io.File;
+import java.io.FileNotFoundException;
 import java.io.FileReader;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Properties;
 
 import junit.framework.Assert;
 
 import org.apache.pig.ExecType;
 import org.apache.pig.FuncSpec;
+import org.apache.pig.PigServer;
+import org.apache.pig.backend.datastorage.DataStorage;
 import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
+import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
+import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad;
 import org.apache.pig.builtin.PigStorage;
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.DataByteArray;
@@ -35,54 +43,54 @@
 import org.apache.pig.data.DefaultTuple;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.impl.PigContext;
-import org.apache.pig.impl.io.FileSpec;
-import org.apache.pig.impl.plan.OperatorKey;
-import org.apache.pig.PigServer;
 import org.apache.pig.impl.io.FileLocalizer;
-import org.apache.pig.backend.hadoop.executionengine.physicalLayer.POStatus;
-import org.apache.pig.backend.hadoop.executionengine.physicalLayer.Result;
-import 
org.apache.pig.backend.hadoop.executionengine.physicalLayer.relationalOperators.POLoad;
-import org.apache.pig.test.utils.GenPhyOp;
-import org.apache.pig.test.utils.TestHelper;
+import org.apache.pig.impl.io.FileSpec;
 import org.apache.pig.impl.logicalLayer.LOLoad;
 import org.apache.pig.impl.logicalLayer.LogicalOperator;
 import org.apache.pig.impl.logicalLayer.LogicalPlan;
 import org.apache.pig.impl.logicalLayer.LogicalPlanBuilder;
-import org.apache.pig.backend.datastorage.ContainerDescriptor;
-import org.apache.pig.backend.datastorage.DataStorage;
-import org.apache.pig.backend.datastorage.DataStorageException;
-import org.apache.pig.backend.datastorage.ElementDescriptor;
+import org.apache.pig.impl.plan.OperatorKey;
+import org.apache.pig.test.utils.GenPhyOp;
+import org.apache.pig.test.utils.TestHelper;
 import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
 public class TestLoad extends junit.framework.TestCase {
-FileSpec inpFSpec;
-POLoad ld;
+
 PigContext pc;
-DataBag inpDB;
-String curDir;
-String inpDir;
-PigServer pig;
+PigServer[] servers;
 
 static MiniCluster cluster = MiniCluster.buildCluster();
+
 @Before
 public void setUp() throws Exception {
-curDir = System.getProperty(user.dir);
-inpDir = curDir + File.separatorChar + 
test/org/apache/pig/test/data/InputFiles/;
-if 
((System.getProperty(os.name).toUpperCase().startsWith(WINDOWS)))
-inpDir=/+FileLocalizer.parseCygPath(inpDir, 
FileLocalizer.STYLE_WINDOWS);
-inpFSpec = new FileSpec(file: + inpDir + passwd, new 
FuncSpec(PigStorage.class.getName(), new String[]{:}));
-
 FileLocalizer.deleteTempFiles();
-pig = new PigServer(ExecType.MAPREDUCE, cluster.getProperties());
-pc = pig.getPigContext();
+servers = new PigServer[] { 
+new PigServer(ExecType.MAPREDUCE, cluster.getProperties()),
+new PigServer(ExecType.LOCAL, new Properties())
+};   
+}
 
-ld = GenPhyOp.topLoadOp();
+@After
+public void tearDown() throws Exception {
+}
+
+@Test
+public void testGetNextTuple() throws IOException {
+pc = servers[0].getPigContext();
+String curDir = System.getProperty(user.dir);
+String inpDir = curDir + File.separatorChar + 
test/org/apache/pig/test/data/InputFiles/;
+if 
((System.getProperty(os.name).toUpperCase().startsWith(WINDOWS)))
+inpDir=/+FileLocalizer.parseCygPath(inpDir, 
FileLocalizer.STYLE_WINDOWS);
+// copy passwd file to cluster and set that as the input location for 
the load
+Util.copyFromLocalToCluster(cluster, inpDir + passwd, passwd);
+FileSpec inpFSpec = new FileSpec(passwd, new 
FuncSpec(PigStorage.class.getName(), new String[]{:}));
+POLoad ld = GenPhyOp.topLoadOp();
 ld.setLFile(inpFSpec);
 ld.setPc(pc);
 
-inpDB = DefaultBagFactory.getInstance().newDefaultBag();
+DataBag inpDB