Author: daijy Date: Thu Jan 7 18:18:58 2010 New Revision: 896951 URL: http://svn.apache.org/viewvc?rev=896951&view=rev Log: PIG-1176: Column Pruner issues in union of loader with and without schema
Modified: hadoop/pig/trunk/CHANGES.txt hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java Modified: hadoop/pig/trunk/CHANGES.txt URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=896951&r1=896950&r2=896951&view=diff ============================================================================== --- hadoop/pig/trunk/CHANGES.txt (original) +++ hadoop/pig/trunk/CHANGES.txt Thu Jan 7 18:18:58 2010 @@ -136,6 +136,9 @@ PIG-1146: Inconsistent column pruning in LOUnion (daijy) +PIG-1176: Column Pruner issues in union of loader with and without schema +(daijy) + Release 0.6.0 - Unreleased INCOMPATIBLE CHANGES Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java?rev=896951&r1=896950&r2=896951&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/ColumnPruner.java Thu Jan 7 18:18:58 2010 @@ -169,7 +169,7 @@ currentOp = lOp.insertPlainForEachAfter(columnsToProject); } - if (lOp.pruneColumns(columnsPruned)) { + if (!columnsPruned.isEmpty()&&lOp.pruneColumns(columnsPruned)) { prunedColumnsMap.put(currentOp, columnsToPrune); } } catch (FrontendException e) { Modified: hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java?rev=896951&r1=896950&r2=896951&view=diff ============================================================================== --- hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java (original) +++ hadoop/pig/trunk/src/org/apache/pig/impl/logicalLayer/optimizer/PruneColumns.java Thu Jan 7 18:18:58 2010 @@ -73,9 +73,10 @@ } public class PruneColumns extends LogicalTransformer { - + private boolean safeToPrune = true; private static Log log = LogFactory.getLog(PruneColumns.class); Map<RelationalOperator, RequiredInfo> cachedRequiredInfo = new HashMap<RelationalOperator, RequiredInfo>(); + private Map<LOLoad, RequiredFields> prunedLoaderColumnsMap = new HashMap<LOLoad, RequiredFields>(); ColumnPruner pruner; public PruneColumns(LogicalPlan plan) { super(plan); @@ -175,6 +176,8 @@ { try { + if (!safeToPrune) + return; if (!(lo instanceof RelationalOperator)) { int errCode = 2182; @@ -183,6 +186,7 @@ } if (lo.getSchema()==null) { + safeToPrune = false; return; } RelationalOperator rlo = (RelationalOperator)lo; @@ -195,7 +199,7 @@ { // LOLoad has only one output RequiredFields loaderRequiredFields = requiredOutputInfo.requiredFieldsList.get(0); - pruneLoader((LOLoad)rlo, loaderRequiredFields); + prunedLoaderColumnsMap.put((LOLoad)rlo, loaderRequiredFields); return; } @@ -767,6 +771,12 @@ public void prune() throws OptimizerException { try { + if (!safeToPrune) + return; + + for (LOLoad load : prunedLoaderColumnsMap.keySet()) + pruneLoader(load, prunedLoaderColumnsMap.get(load)); + if (!pruner.isEmpty()) pruner.visit(); } Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java?rev=896951&r1=896950&r2=896951&view=diff ============================================================================== --- hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java (original) +++ hadoop/pig/trunk/test/org/apache/pig/test/TestPruneColumn.java Thu Jan 7 18:18:58 2010 @@ -1723,4 +1723,48 @@ "No map keys pruned for A", "No column pruned for B", "No map keys pruned for B"})); } + + // See PIG-1176 + @Test + public void testUnionMixedSchemaPruning() throws Exception { + pigServer.registerQuery("A = load '"+ Util.generateURI(tmpFile1.toString()) + "' AS (a0, a1, a2);"); + pigServer.registerQuery("B = foreach A generate a0;;"); + pigServer.registerQuery("C = load '"+ Util.generateURI(tmpFile2.toString()) + "';"); + pigServer.registerQuery("D = foreach C generate $0;"); + pigServer.registerQuery("E = union B, D;"); + Iterator<Tuple> iter = pigServer.openIterator("E"); + Collection<String> results = new HashSet<String>(); + results.add("(1)"); + results.add("(2)"); + results.add("(1)"); + results.add("(2)"); + + assertTrue(iter.hasNext()); + Tuple t = iter.next(); + + assertTrue(t.size()==1); + assertTrue(results.contains(t.toString())); + + assertTrue(iter.hasNext()); + t = iter.next(); + + assertTrue(t.size()==1); + assertTrue(results.contains(t.toString())); + + assertTrue(iter.hasNext()); + t = iter.next(); + + assertTrue(t.size()==1); + assertTrue(results.contains(t.toString())); + + assertTrue(iter.hasNext()); + t = iter.next(); + + assertTrue(t.size()==1); + assertTrue(results.contains(t.toString())); + + assertFalse(iter.hasNext()); + + assertTrue(emptyLogFileMessage()); + } }