Author: olga
Date: Tue Dec 1 20:07:53 2009
New Revision: 885902
URL: http://svn.apache.org/viewvc?rev=885902&view=rev
Log:
PIG-1108: Incorrect map output key type in MultiQuery optimiza (rding via
olgan)
Modified:
hadoop/pig/trunk/CHANGES.txt
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java
hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java
Modified: hadoop/pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=885902&r1=885901&r2=885902&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Tue Dec 1 20:07:53 2009
@@ -41,6 +41,9 @@
BUG FIXES
+PIG-1108: Incorrect map output key type in MultiQuery optimiza (rding via
+olgan)
+
PIG-1022: optimizer pushes filter before the foreach that generates column
used by filter (daijy via gates)
Modified:
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java?rev=885902&r1=885901&r2=885902&view=diff
==============================================================================
---
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java
(original)
+++
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer/MultiQueryOptimizer.java
Tue Dec 1 20:07:53 2009
@@ -107,6 +107,11 @@
List<MapReduceOper> successors = getPlan().getSuccessors(mr);
for (MapReduceOper successor : successors) {
+ if (successor.getUseSecondaryKey()) {
+ log.debug("Splittee " + successor.getOperatorKey().getId()
+ + " uses secondary key, do not merge it");
+ continue;
+ }
if (isMapOnly(successor)) {
if (isSingleLoadMapperPlan(successor.mapPlan)) {
mappers.add(successor);
@@ -121,10 +126,11 @@
}
}
}
-
+
+ int numSplittees = successors.size();
+
// case 1: exactly one splittee and it's map-only
- if (mappers.size() == 1 && mapReducers.size() == 0
- && multiLoadMROpers.size() == 0 ) {
+ if (mappers.size() == 1 && numSplittees == 1) {
mergeOnlyMapperSplittee(mappers.get(0), mr);
log.info("Merged the only map-only splittee.");
@@ -133,16 +139,14 @@
}
// case 2: exactly one splittee and it has reducer
- if (isMapOnly(mr) && mapReducers.size() == 1
- && mappers.size() == 0 && multiLoadMROpers.size() == 0) {
+ if (isMapOnly(mr) && mapReducers.size() == 1 && numSplittees == 1) {
mergeOnlyMapReduceSplittee(mapReducers.get(0), mr);
log.info("Merged the only map-reduce splittee.");
return;
}
-
- int numSplittees = successors.size();
+
int numMerges = 0;
PhysicalPlan splitterPl = isMapOnly(mr) ? mr.mapPlan : mr.reducePlan;
Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java
URL:
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java?rev=885902&r1=885901&r2=885902&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestMultiQuery.java Tue Dec 1
20:07:53 2009
@@ -89,6 +89,33 @@
}
@Test
+ public void testMultiQueryJiraPig1108() {
+
+ try {
+ myPig.setBatchOn();
+
+ myPig.registerQuery("a = load
'file:test/org/apache/pig/test/data/passwd' "
+ + "using PigStorage(':') as (uname:chararray,
passwd:chararray, uid:int, gid:int);");
+ myPig.registerQuery("split a into plan1 if (uid > 5), plan2 if (
uid < 5);");
+ myPig.registerQuery("b = group plan1 by uname;");
+ myPig.registerQuery("c = foreach b { tmp = order plan1 by uid
desc; "
+ + "generate flatten(group) as foo, tmp; };");
+ myPig.registerQuery("d = filter c BY foo is not null;");
+ myPig.registerQuery("store d into '/tmp/output1';");
+ myPig.registerQuery("store plan2 into '/tmp/output2';");
+
+ List<ExecJob> jobs = myPig.executeBatch();
+ for (ExecJob job : jobs) {
+ assertTrue(job.getStatus() == ExecJob.JOB_STATUS.COMPLETED);
+ }
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ Assert.fail();
+ }
+ }
+
+ @Test
public void testMultiQueryJiraPig1060() {
// test case: