svn commit: r816699 - in /hadoop/pig/trunk: ./ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/ src/org/apache/pig/impl/builtin/ test/org/apache/pig/t
Author: olga Date: Fri Sep 18 16:20:49 2009 New Revision: 816699 URL: http://svn.apache.org/viewvc?rev=816699view=rev Log: PIG-964: Handling null in skewed join (sriranjan via olgan) Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/PigWarning.java hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/SkewedPartitioner.java hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PartitionSkewedKeys.java hadoop/pig/trunk/src/org/apache/pig/impl/builtin/SampleLoader.java hadoop/pig/trunk/test/org/apache/pig/test/TestSkewedJoin.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=816699r1=816698r2=816699view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Fri Sep 18 16:20:49 2009 @@ -98,6 +98,8 @@ BUG FIXES +PIG-964: Handling null in skewed join (sriranjan via olgan) + PIG-962: Skewed join creates 3 map reduce jobs (sriranjan via olgan) PIG-957: Tutorial is broken with 0.4 branch and trunk (pradeepkth) Modified: hadoop/pig/trunk/src/org/apache/pig/PigWarning.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/PigWarning.java?rev=816699r1=816698r2=816699view=diff == --- hadoop/pig/trunk/src/org/apache/pig/PigWarning.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/PigWarning.java Fri Sep 18 16:20:49 2009 @@ -59,5 +59,6 @@ UNABLE_TO_CLOSE_SPILL_FILE, UNREACHABLE_CODE_BOTH_MAP_AND_REDUCE_PLANS_PROCESSED, USING_OVERLOADED_FUNCTION, +REDUCER_COUNT_LOW, NULL_COUNTER_COUNT; } Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/SkewedPartitioner.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/SkewedPartitioner.java?rev=816699r1=816698r2=816699view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/SkewedPartitioner.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/partitioners/SkewedPartitioner.java Fri Sep 18 16:20:49 2009 @@ -77,20 +77,20 @@ // for partition table, compute the index based on the sampler output Pair Integer, Integer indexes; Integer curIndex = -1; - Tuple keyTuple = null; + Tuple keyTuple = DefaultTupleFactory.getInstance().newTuple(1); // extract the key from nullablepartitionwritable PigNullableWritable key = ((NullablePartitionWritable) wrappedKey).getKey(); - if (key instanceof NullableTuple) { + try { + keyTuple.set(0, key.getValueAsPigType()); + } catch (ExecException e) { + return -1; + } + + // if the key is not null and key + if (key instanceof NullableTuple key.getValueAsPigType() != null) { keyTuple = (Tuple)key.getValueAsPigType(); - } else { - keyTuple = DefaultTupleFactory.getInstance().newTuple(1); - try { - keyTuple.set(0, key.getValueAsPigType()); - } catch (ExecException e) { - return -1; - } } indexes = reducerMap.get(keyTuple); Modified: hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PartitionSkewedKeys.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PartitionSkewedKeys.java?rev=816699r1=816698r2=816699view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PartitionSkewedKeys.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/builtin/PartitionSkewedKeys.java Fri Sep 18 16:20:49 2009 @@ -26,6 +26,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pig.EvalFunc; +import org.apache.pig.PigWarning; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.data.BagFactory; import org.apache.pig.data.DataBag; @@ -198,8 +199,13 @@ } if (maxReducers totalReducers_) { - throw new RuntimeException(You need at least + maxReducers - + reducers to run this job.); + if(pigLogger != null) { +
svn commit: r816723 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java test/org/apache/pig/test/TestMergeJoin.java test/org/apache/pig
Author: gates Date: Fri Sep 18 17:41:38 2009 New Revision: 816723 URL: http://svn.apache.org/viewvc?rev=816723view=rev Log: PIG-951: Set parallelism explicitly to 1 for indexing job in merge join Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=816723r1=816722r2=816723view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Fri Sep 18 17:41:38 2009 @@ -30,6 +30,9 @@ BUG FIXES +PIG-951: Set parallelism explicitly to 1 for indexing job in merge join + (ashutoc via gates). + Release 0.5.0 - Unreleased INCOMPATIBLE CHANGES Modified: hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java?rev=816723r1=816722r2=816723view=diff == --- hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MRCompiler.java Fri Sep 18 17:41:38 2009 @@ -1115,7 +1115,8 @@ } joinOp.setupRightPipeline(rightPipelinePlan); - + rightMROpr.requestedParallelism = 1; // we need exactly one reducer for indexing job. + // At this point, we must be operating on map plan of right input and it would contain nothing else other then a POLoad. POLoad rightLoader = (POLoad)rightMROpr.mapPlan.getRoots().get(0); joinOp.setRightLoaderFuncSpec(rightLoader.getLFile().getFuncSpec()); Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java?rev=816723r1=816722r2=816723view=diff == --- hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestMergeJoin.java Fri Sep 18 17:41:38 2009 @@ -26,11 +26,15 @@ import org.apache.pig.PigException; import org.apache.pig.PigServer; import org.apache.pig.backend.executionengine.ExecException; +import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.plans.MROperPlan; import org.apache.pig.data.BagFactory; import org.apache.pig.data.DataBag; import org.apache.pig.data.Tuple; +import org.apache.pig.impl.PigContext; +import org.apache.pig.impl.logicalLayer.LogicalPlan; import org.apache.pig.impl.logicalLayer.schema.Schema; import org.apache.pig.impl.util.LogUtils; +import org.apache.pig.test.utils.LogicalPlanTester; import org.apache.pig.test.utils.TestHelper; import org.junit.After; import org.junit.Before; @@ -407,6 +411,20 @@ } @Test +public void testParallelism() throws Exception{ + +LogicalPlanTester tester = new LogicalPlanTester(); +tester.buildPlan(A = LOAD ' + INPUT_FILE + ';); +tester.buildPlan(B = LOAD ' + INPUT_FILE + ';); +tester.buildPlan(C = join A by $0, B by $0 using \merge\ parallel 50;); +LogicalPlan lp = tester.buildPlan(store C into 'out';); + PigContext pc = new PigContext(ExecType.MAPREDUCE,cluster.getProperties()); +pc.connect(); + MROperPlan mro = Util.buildMRPlan(Util.buildPhysicalPlan(lp, pc),pc); +Assert.assertEquals(1,mro.getRoots().get(0).getRequestedParallelism()); +} + +@Test public void testIndexer() throws IOException{ Util.createInputFile(cluster, temp_file1, new String[]{1+}); Util.createInputFile(cluster, temp_file2, new String[]{2+}); Modified: hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld?rev=816723r1=816722r2=816723view=diff == --- hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/data/GoldenFiles/MRC18.gld Fri Sep 18 17:41:38 2009 @@ -6,7 +6,7 @@ | | | |---Load(file:/tmp/input1:org.apache.pig.builtin.PigStorage) - scope-117 | -|---MapReduce(-1,PigStorage) - scope-126: +|---MapReduce(1,PigStorage) - scope-126: | Store(file:/tmp/temp-1456742965/tmp-1456742965:org.apache.pig.builtin.BinStorage) - scope-133
[Pig Wiki] Update of MetadataInterfaceProposal by AlanGates
Dear Wiki user, You have subscribed to a wiki page or wiki category on Pig Wiki for change notification. The following page has been changed by AlanGates: http://wiki.apache.org/pig/MetadataInterfaceProposal -- = Proposed Design for Pig Metadata Interface = - With the introduction of SQL, Pig needs to be able to communicate with external metadata services. These communications will includes such + With the introduction of SQL, Pig needs to be able to communicate with external metadata services. These communications will include such operations as creating, altering, and dropping databases, tables, etc. It will also include metadata queries, such as requests to show available tables, etc. DDL operations of these sorts will be beyond the scope of the proposed metadata interfaces for load and storage functions. However, Pig should not be tightly tied to a single metadata implementation. It should be able to - work with Owl, Hive's metastore, or any other metadata source that is added to Hadoop. To this end this document proposes an interface for + work with Owl, Hive's metastore, or any other metadata source that is added to Hadoop. To this end, this document proposes an interface for operating with metadata systems. Different metadata connectors can then be implemented, one for each metadata system. == Interface == - This interface will allow users to find information about tables, databases, etc. in the metadata store. For each call, it will pass the portion of the syntax tree relavant to the operation to the metadata connector. These structures will be versioned. @@ -39, +38 @@ configuration file. It will specify the URI of the server to use and the implementation of !MetadataDDL to use with this server. == Accessing Global Metadata from Pig Latin == - Pig Latin will not support a call to metadata within the language itself. Instead, it will support the ability to invoke a SQL DDL command. + Pig Latin will not support a call to metadata within the language itself. Instead, it will support the ability to invoke a Pig SQL DDL command. This SQL will then be sent to the SQL parser and dispatched through the metadata service as before. {{{
svn commit: r816820 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/data/DefaultTuple.java
Author: gates Date: Sat Sep 19 00:04:47 2009 New Revision: 816820 URL: http://svn.apache.org/viewvc?rev=816820view=rev Log: PIG-513: Removed unecessary bounds check in DefaultTuple. Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=816820r1=816819r2=816820view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Sat Sep 19 00:04:47 2009 @@ -30,6 +30,9 @@ BUG FIXES +PIG-513: Removed unecessary bounds check in DefaultTuple (ashutoshc via + gates). + PIG-951: Set parallelism explicitly to 1 for indexing job in merge join (ashutoc via gates). Modified: hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java?rev=816820r1=816819r2=816820view=diff == --- hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/data/DefaultTuple.java Sat Sep 19 00:04:47 2009 @@ -116,7 +116,6 @@ * than or equal to the number of fields in the tuple. */ public boolean isNull(int fieldNum) throws ExecException { -checkBounds(fieldNum); return (mFields.get(fieldNum) == null); } @@ -130,7 +129,6 @@ * the number of fields in the tuple. */ public byte getType(int fieldNum) throws ExecException { -checkBounds(fieldNum); return DataType.findType(mFields.get(fieldNum)); } @@ -142,7 +140,6 @@ * the number of fields in the tuple. */ public Object get(int fieldNum) throws ExecException { -checkBounds(fieldNum); return mFields.get(fieldNum); } @@ -163,7 +160,6 @@ * the number of fields in the tuple. */ public void set(int fieldNum, Object val) throws ExecException { -checkBounds(fieldNum); mFields.set(fieldNum, val); } @@ -352,15 +348,6 @@ } } -private void checkBounds(int fieldNum) throws ExecException { -if (fieldNum = mFields.size()) { -int errCode = 1072; -String msg = Out of bounds access: Request for field number + fieldNum + - exceeds tuple size of + mFields.size(); -throw new ExecException(msg, errCode, PigException.INPUT); -} -} - /** * @return true if this Tuple is null */ @@ -376,3 +363,4 @@ } } +
svn commit: r816832 - in /hadoop/pig/trunk: CHANGES.txt src/org/apache/pig/impl/logicalLayer/LOCast.java src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java test/org/apache/pig/te
Author: pradeepkth Date: Sat Sep 19 00:26:37 2009 New Revision: 816832 URL: http://svn.apache.org/viewvc?rev=816832view=rev Log: Maps are not implicitly casted (pradeepkth) Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=816832r1=816831r2=816832view=diff == --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Sat Sep 19 00:26:37 2009 @@ -30,6 +30,8 @@ BUG FIXES +PIG-942: Maps are not implicitly casted (pradeepkth) + PIG-513: Removed unecessary bounds check in DefaultTuple (ashutoshc via gates). Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java?rev=816832r1=816831r2=816832view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/LOCast.java Sat Sep 19 00:26:37 2009 @@ -70,6 +70,7 @@ public Schema.FieldSchema getFieldSchema() throws FrontendException { if(!mIsFieldSchemaComputed) { mFieldSchema = new Schema.FieldSchema(null, mType); +mFieldSchema.setParent(getExpression().mFieldSchema.canonicalName, getExpression()); mIsFieldSchemaComputed = true; } return mFieldSchema; Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java?rev=816832r1=816831r2=816832view=diff == --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/validators/TypeCheckingVisitor.java Sat Sep 19 00:26:37 2009 @@ -239,6 +239,11 @@ } map.setType(map.getValueType()); +if(map.getMap().getType() != DataType.MAP) { +// insert cast if the predecessor does not +// return map +insertCast(map, DataType.MAP, map.getMap()); +} } Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java?rev=816832r1=816831r2=816832view=diff == --- hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestTypeCheckingValidator.java Sat Sep 19 00:26:37 2009 @@ -20,13 +20,16 @@ import java.io.File; import java.io.IOException; +import java.util.Iterator; import java.util.List; import java.util.ArrayList; import junit.framework.TestCase; import org.apache.pig.EvalFunc; +import org.apache.pig.ExecType; import org.apache.pig.FuncSpec; +import org.apache.pig.PigServer; import org.apache.pig.impl.logicalLayer.validators.*; import org.apache.pig.impl.logicalLayer.* ; import org.apache.pig.impl.logicalLayer.schema.Schema; @@ -3144,14 +3147,16 @@ if(! (exOp instanceof LOProject)) exOp = foreachPlan.getRoots().get(1); -LOMapLookup map = (LOMapLookup)foreachPlan.getSuccessors(exOp).get(0); -LOCast cast = (LOCast)foreachPlan.getSuccessors(map).get(0); - assertTrue(cast.getLoadFuncSpec().getClassName().startsWith(BinStorage)); +LOCast cast1 = (LOCast)foreachPlan.getSuccessors(exOp).get(0); +LOMapLookup map = (LOMapLookup)foreachPlan.getSuccessors(cast1).get(0); + assertTrue(cast1.getLoadFuncSpec().getClassName().startsWith(BinStorage)); +LOCast cast2 = (LOCast)foreachPlan.getSuccessors(map).get(0); + assertTrue(cast2.getLoadFuncSpec().getClassName().startsWith(BinStorage)); foreachPlan = foreach.getForEachPlans().get(2); exOp = foreachPlan.getRoots().get(0); if(! (exOp instanceof LOProject)) exOp = foreachPlan.getRoots().get(1); -cast = (LOCast)foreachPlan.getSuccessors(exOp).get(0); +LOCast cast = (LOCast)foreachPlan.getSuccessors(exOp).get(0); assertTrue(cast.getLoadFuncSpec().getClassName().startsWith(PigStorage)); } @@ -5456,10 +5461,11 @@ LogicalPlan foreachPlan = foreach.getForEachPlans().get(0); LogicalOperator exOp = foreachPlan.getRoots().get(0); - -if(!