svn commit: r816699 - in /hadoop/pig/trunk: ./ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/ src/org/apache/pig/impl/builtin/ test/org/apache/pig/t

2009-09-18 Thread olga
Author: olga
Date: Fri Sep 18 16:20:49 2009
New Revision: 816699

URL: http://svn.apache.org/viewvc?rev=816699view=rev
Log:
PIG-964: Handling null in skewed join (sriranjan via olgan)

Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/PigWarning.java

hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/SkewedPartitioner.java
hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PartitionSkewedKeys.java
hadoop/pig/trunk/src/org/apache/pig/impl/builtin/SampleLoader.java
hadoop/pig/trunk/test/org/apache/pig/test/TestSkewedJoin.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=816699r1=816698r2=816699view=diff
==
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri Sep 18 16:20:49 2009
@@ -98,6 +98,8 @@
 
 BUG FIXES
 
+PIG-964: Handling null in skewed join (sriranjan via olgan)
+
 PIG-962: Skewed join creates 3 map reduce jobs (sriranjan via olgan)
 
 PIG-957: Tutorial is broken with 0.4 branch and trunk (pradeepkth)

Modified: hadoop/pig/trunk/src/org/apache/pig/PigWarning.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/PigWarning.java?rev=816699r1=816698r2=816699view=diff
==
--- hadoop/pig/trunk/src/org/apache/pig/PigWarning.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/PigWarning.java Fri Sep 18 16:20:49 2009
@@ -59,5 +59,6 @@
 UNABLE_TO_CLOSE_SPILL_FILE,
 UNREACHABLE_CODE_BOTH_MAP_AND_REDUCE_PLANS_PROCESSED,
 USING_OVERLOADED_FUNCTION,
+REDUCER_COUNT_LOW,
 NULL_COUNTER_COUNT;
 }

Modified: 
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/SkewedPartitioner.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/SkewedPartitioner.java?rev=816699r1=816698r2=816699view=diff
==
--- 
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/SkewedPartitioner.java
 (original)
+++ 
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/SkewedPartitioner.java
 Fri Sep 18 16:20:49 2009
@@ -77,20 +77,20 @@
// for partition table, compute the index based on the sampler 
output
Pair Integer, Integer indexes;
Integer curIndex = -1;
-   Tuple keyTuple = null;
+   Tuple keyTuple = DefaultTupleFactory.getInstance().newTuple(1);

// extract the key from nullablepartitionwritable
PigNullableWritable key = ((NullablePartitionWritable) 
wrappedKey).getKey();
 
-   if (key instanceof NullableTuple) {
+   try {
+   keyTuple.set(0, key.getValueAsPigType());
+   } catch (ExecException e) {
+   return -1;
+   }
+   
+   // if the key is not null and key 
+   if (key instanceof NullableTuple  key.getValueAsPigType() != 
null) {
keyTuple = (Tuple)key.getValueAsPigType();
-   } else {
-   keyTuple = 
DefaultTupleFactory.getInstance().newTuple(1);
-   try {
-   keyTuple.set(0, key.getValueAsPigType());
-   } catch (ExecException e) {
-   return -1;
-   }
}
 
indexes = reducerMap.get(keyTuple);

Modified: 
hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PartitionSkewedKeys.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PartitionSkewedKeys.java?rev=816699r1=816698r2=816699view=diff
==
--- hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PartitionSkewedKeys.java 
(original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PartitionSkewedKeys.java 
Fri Sep 18 16:20:49 2009
@@ -26,6 +26,7 @@
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.pig.EvalFunc;
+import org.apache.pig.PigWarning;
 import org.apache.pig.backend.executionengine.ExecException;
 import org.apache.pig.data.BagFactory;
 import org.apache.pig.data.DataBag;
@@ -198,8 +199,13 @@
}
 
if (maxReducers  totalReducers_) {
-   throw new RuntimeException(You need at least  
+ maxReducers
-   +  reducers to run this job.);
+   if(pigLogger != null) {
+

svn commit: r816723 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java test/org/apache/pig/test/TestMergeJoin.java test/org/apache/pig

2009-09-18 Thread gates
Author: gates
Date: Fri Sep 18 17:41:38 2009
New Revision: 816723

URL: http://svn.apache.org/viewvc?rev=816723view=rev
Log:
PIG-951:  Set parallelism explicitly to 1 for indexing job in merge join


Modified:
hadoop/pig/trunk/CHANGES.txt

hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java
hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld

Modified: hadoop/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=816723r1=816722r2=816723view=diff
==
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Fri Sep 18 17:41:38 2009
@@ -30,6 +30,9 @@
 
 BUG FIXES
 
+PIG-951:  Set parallelism explicitly to 1 for indexing job in merge join
+  (ashutoc via gates).
+
 Release 0.5.0 - Unreleased
 
 INCOMPATIBLE CHANGES

Modified: 
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java?rev=816723r1=816722r2=816723view=diff
==
--- 
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
 (original)
+++ 
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java
 Fri Sep 18 17:41:38 2009
@@ -1115,7 +1115,8 @@
 }
 
 joinOp.setupRightPipeline(rightPipelinePlan);
-
+   rightMROpr.requestedParallelism = 1; // we need exactly one reducer 
for indexing job.
+
 // At this point, we must be operating on map plan of right input 
and it would contain nothing else other then a POLoad.
 POLoad rightLoader = (POLoad)rightMROpr.mapPlan.getRoots().get(0);
 
joinOp.setRightLoaderFuncSpec(rightLoader.getLFile().getFuncSpec());

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java?rev=816723r1=816722r2=816723view=diff
==
--- hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java Fri Sep 18 
17:41:38 2009
@@ -26,11 +26,15 @@
 import org.apache.pig.PigException;
 import org.apache.pig.PigServer;
 import org.apache.pig.backend.executionengine.ExecException;
+import 
org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan;
 import org.apache.pig.data.BagFactory;
 import org.apache.pig.data.DataBag;
 import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.PigContext;
+import org.apache.pig.impl.logicalLayer.LogicalPlan;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
 import org.apache.pig.impl.util.LogUtils;
+import org.apache.pig.test.utils.LogicalPlanTester;
 import org.apache.pig.test.utils.TestHelper;
 import org.junit.After;
 import org.junit.Before;
@@ -407,6 +411,20 @@
 }   
 
 @Test
+public void testParallelism() throws Exception{
+
+LogicalPlanTester tester = new LogicalPlanTester();
+tester.buildPlan(A = LOAD ' + INPUT_FILE + ';);
+tester.buildPlan(B = LOAD ' + INPUT_FILE + ';);
+tester.buildPlan(C = join A by $0, B by $0 using \merge\ parallel 
50;);
+LogicalPlan lp = tester.buildPlan(store C into 'out';);
+   PigContext pc = new 
PigContext(ExecType.MAPREDUCE,cluster.getProperties());
+pc.connect();
+   MROperPlan mro = Util.buildMRPlan(Util.buildPhysicalPlan(lp, pc),pc);
+Assert.assertEquals(1,mro.getRoots().get(0).getRequestedParallelism());
+}
+
+@Test
 public void testIndexer() throws IOException{
 Util.createInputFile(cluster, temp_file1, new String[]{1+});
 Util.createInputFile(cluster, temp_file2, new String[]{2+});

Modified: hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld?rev=816723r1=816722r2=816723view=diff
==
--- hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld 
(original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld Fri 
Sep 18 17:41:38 2009
@@ -6,7 +6,7 @@
 |   |
 |   |---Load(file:/tmp/input1:org.apache.pig.builtin.PigStorage) - 
scope-117
 |
-|---MapReduce(-1,PigStorage) - scope-126:
+|---MapReduce(1,PigStorage) - scope-126:
 |   
Store(file:/tmp/temp-1456742965/tmp-1456742965:org.apache.pig.builtin.BinStorage)
 - scope-133
 

[Pig Wiki] Update of MetadataInterfaceProposal by AlanGates

2009-09-18 Thread Apache Wiki
Dear Wiki user,

You have subscribed to a wiki page or wiki category on Pig Wiki for change 
notification.

The following page has been changed by AlanGates:
http://wiki.apache.org/pig/MetadataInterfaceProposal

--
  = Proposed Design for Pig Metadata Interface =
- With the introduction of SQL, Pig needs to be able to communicate with 
external metadata services.  These communications will includes such
+ With the introduction of SQL, Pig needs to be able to communicate with 
external metadata services.  These communications will include such
  operations as creating, altering, and dropping databases, tables, etc.  It 
will also include metadata queries, such as requests to show available
  tables, etc.  DDL operations of these sorts will be beyond the scope of the 
proposed metadata
  interfaces for load and storage functions.  However, Pig should not be 
tightly tied to a single metadata implementation.  It should be able to
- work with Owl, Hive's metastore, or any other metadata source that is added 
to Hadoop.  To this end this document proposes an interface for
+ work with Owl, Hive's metastore, or any other metadata source that is added 
to Hadoop.  To this end, this document proposes an interface for
  operating with metadata systems.  Different metadata connectors can then be 
implemented, one for each metadata system.
  
  == Interface ==
- 
  This interface will allow users to find information about tables, databases, 
etc. in the metadata store.  For each call, it will pass the portion
  of the syntax tree relavant to the operation to the metadata connector.  
These structures will be versioned.
  
@@ -39, +38 @@

  configuration file.  It will specify the URI of the server to use and the 
implementation of !MetadataDDL to use with this server.
  
  == Accessing Global Metadata from Pig Latin ==
- Pig Latin will not support a call to metadata within the language itself.  
Instead, it will support the ability to invoke a SQL DDL command.
+ Pig Latin will not support a call to metadata within the language itself.  
Instead, it will support the ability to invoke a Pig SQL DDL command.
  This SQL will then be sent to the SQL parser and dispatched through the 
metadata service as before.
  
  {{{


svn commit: r816820 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/data/DefaultTuple.java

2009-09-18 Thread gates
Author: gates
Date: Sat Sep 19 00:04:47 2009
New Revision: 816820

URL: http://svn.apache.org/viewvc?rev=816820view=rev
Log:
PIG-513:  Removed unecessary bounds check in DefaultTuple.


Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=816820r1=816819r2=816820view=diff
==
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Sat Sep 19 00:04:47 2009
@@ -30,6 +30,9 @@
 
 BUG FIXES
 
+PIG-513:  Removed unecessary bounds check in DefaultTuple (ashutoshc via
+  gates).
+
 PIG-951:  Set parallelism explicitly to 1 for indexing job in merge join
   (ashutoc via gates).
 

Modified: hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java?rev=816820r1=816819r2=816820view=diff
==
--- hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java Sat Sep 19 
00:04:47 2009
@@ -116,7 +116,6 @@
  * than or equal to the number of fields in the tuple.
  */
 public boolean isNull(int fieldNum) throws ExecException {
-checkBounds(fieldNum);
 return (mFields.get(fieldNum) == null);
 }
 
@@ -130,7 +129,6 @@
  * the number of fields in the tuple.
  */
 public byte getType(int fieldNum) throws ExecException {
-checkBounds(fieldNum);
 return DataType.findType(mFields.get(fieldNum));
 }
 
@@ -142,7 +140,6 @@
  * the number of fields in the tuple.
  */
 public Object get(int fieldNum) throws ExecException {
-checkBounds(fieldNum);
 return mFields.get(fieldNum);
 }
 
@@ -163,7 +160,6 @@
  * the number of fields in the tuple.
  */
 public void set(int fieldNum, Object val) throws ExecException {
-checkBounds(fieldNum);
 mFields.set(fieldNum, val);
 }
 
@@ -352,15 +348,6 @@
 }
 }
 
-private void checkBounds(int fieldNum) throws ExecException {
-if (fieldNum = mFields.size()) {
-int errCode = 1072;
-String msg = Out of bounds access: Request for field number  + 
fieldNum +
- exceeds tuple size of  + mFields.size();
-throw new ExecException(msg, errCode, PigException.INPUT);
-}
-}
-
 /**
  * @return true if this Tuple is null
  */
@@ -376,3 +363,4 @@
 }
 
 }
+




svn commit: r816832 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/impl/logicalLayer/LOCast.java src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java test/org/apache/pig/te

2009-09-18 Thread pradeepkth
Author: pradeepkth
Date: Sat Sep 19 00:26:37 2009
New Revision: 816832

URL: http://svn.apache.org/viewvc?rev=816832view=rev
Log:
Maps are not implicitly casted (pradeepkth)

Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java

hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java
hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=816832r1=816831r2=816832view=diff
==
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Sat Sep 19 00:26:37 2009
@@ -30,6 +30,8 @@
 
 BUG FIXES
 
+PIG-942: Maps are not implicitly casted (pradeepkth)
+
 PIG-513:  Removed unecessary bounds check in DefaultTuple (ashutoshc via
   gates).
 

Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java?rev=816832r1=816831r2=816832view=diff
==
--- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java (original)
+++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java Sat Sep 
19 00:26:37 2009
@@ -70,6 +70,7 @@
 public Schema.FieldSchema getFieldSchema() throws FrontendException {
 if(!mIsFieldSchemaComputed) {
 mFieldSchema = new Schema.FieldSchema(null, mType);
+mFieldSchema.setParent(getExpression().mFieldSchema.canonicalName, 
getExpression());
 mIsFieldSchemaComputed = true;
 }
 return mFieldSchema;

Modified: 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java?rev=816832r1=816831r2=816832view=diff
==
--- 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java
 (original)
+++ 
hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java
 Sat Sep 19 00:26:37 2009
@@ -239,6 +239,11 @@
 }
 
 map.setType(map.getValueType());
+if(map.getMap().getType() != DataType.MAP) {
+// insert cast if the predecessor does not
+// return map
+insertCast(map, DataType.MAP, map.getMap());
+}
 
 }
 

Modified: 
hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java?rev=816832r1=816831r2=816832view=diff
==
--- hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java 
(original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java 
Sat Sep 19 00:26:37 2009
@@ -20,13 +20,16 @@
 
 import java.io.File;
 import java.io.IOException;
+import java.util.Iterator;
 import java.util.List;
 import java.util.ArrayList;
 
 import junit.framework.TestCase;
 
 import org.apache.pig.EvalFunc;
+import org.apache.pig.ExecType;
 import org.apache.pig.FuncSpec;
+import org.apache.pig.PigServer;
 import org.apache.pig.impl.logicalLayer.validators.*;
 import org.apache.pig.impl.logicalLayer.* ;
 import org.apache.pig.impl.logicalLayer.schema.Schema;
@@ -3144,14 +3147,16 @@
 
 if(! (exOp instanceof LOProject)) exOp = foreachPlan.getRoots().get(1);
 
-LOMapLookup map = (LOMapLookup)foreachPlan.getSuccessors(exOp).get(0);
-LOCast cast = (LOCast)foreachPlan.getSuccessors(map).get(0);
-
assertTrue(cast.getLoadFuncSpec().getClassName().startsWith(BinStorage));
+LOCast cast1 = (LOCast)foreachPlan.getSuccessors(exOp).get(0);
+LOMapLookup map = (LOMapLookup)foreachPlan.getSuccessors(cast1).get(0);
+
assertTrue(cast1.getLoadFuncSpec().getClassName().startsWith(BinStorage));
+LOCast cast2 = (LOCast)foreachPlan.getSuccessors(map).get(0);
+
assertTrue(cast2.getLoadFuncSpec().getClassName().startsWith(BinStorage));
 
 foreachPlan = foreach.getForEachPlans().get(2);
 exOp = foreachPlan.getRoots().get(0);
 if(! (exOp instanceof LOProject)) exOp = foreachPlan.getRoots().get(1);
-cast = (LOCast)foreachPlan.getSuccessors(exOp).get(0);
+LOCast cast = (LOCast)foreachPlan.getSuccessors(exOp).get(0);
 
assertTrue(cast.getLoadFuncSpec().getClassName().startsWith(PigStorage));
 
 }
@@ -5456,10 +5461,11 @@
 LogicalPlan foreachPlan = foreach.getForEachPlans().get(0);
 
 LogicalOperator exOp = foreachPlan.getRoots().get(0);
-
-if(!