Author: hashutosh
Date: Sat Feb 13 21:46:49 2010
New Revision: 909921

URL: http://svn.apache.org/viewvc?rev=909921&view=rev
Log:
PIG-1131: Pig simple join does not work when it contains empty lines

Modified:
    hadoop/pig/trunk/CHANGES.txt
    
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java
    hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java

Modified: hadoop/pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/CHANGES.txt?rev=909921&r1=909920&r2=909921&view=diff
==============================================================================
--- hadoop/pig/trunk/CHANGES.txt (original)
+++ hadoop/pig/trunk/CHANGES.txt Sat Feb 13 21:46:49 2010
@@ -93,6 +93,8 @@
 
 BUG FIXES
 
+PIG-1131: Pig simple join does not work when it contains empty lines 
(ashutoshc)
+
 PIG-834: incorrect plan when algebraic functions are nested (ashutoshc)
 
 PIG-1217: Fix argToFuncMapping in Piggybank Top function (dvryaboy via gates)

Modified: 
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java?rev=909921&r1=909920&r2=909921&view=diff
==============================================================================
--- 
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java
 (original)
+++ 
hadoop/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/POLocalRearrange.java
 Sat Feb 13 21:46:49 2010
@@ -118,9 +118,6 @@
     private int mProjectedColsMapSize = 0;
     private int mSecondaryProjectedColsMapSize = 0;
 
-    private ArrayList<Integer> minValuePositions;
-    private int minValuePositionsSize = 0;
-
     private Tuple lrOutput;
     
     private boolean useSecondaryKey = false;
@@ -459,27 +456,14 @@
 
                 Tuple minimalValue = null;
                 if(!mProjectStar) {
-                    if(minValuePositions == null) {
-                        // the very first time, we will have to build
-                        // the "value" tuple piecemeal but we can
-                        // do better next time round
-                        minValuePositions = new ArrayList<Integer>();
-                        minimalValue = mTupleFactory.newTuple();
-                        // look for individual columns that we are
-                        // projecting
-                        for (int i = 0; i < value.size(); i++) {
-                            if(mProjectedColsMap.get(i) == null) {
-                                // this column was not found in the "key"
-                                // so send it in the "value"
-                                minimalValue.append(value.get(i));
-                                minValuePositions.add(i);
-                            }
-                        }
-                        minValuePositionsSize = minValuePositions.size();
-                    } else {
-                        minimalValue = 
mTupleFactory.newTuple(minValuePositionsSize);
-                        for(int i = 0; i < minValuePositionsSize; i++) {
-                            minimalValue.set(i, 
value.get(minValuePositions.get(i)));
+                    minimalValue = mTupleFactory.newTuple();
+                    // look for individual columns that we are
+                    // projecting
+                    for (int i = 0; i < value.size(); i++) {
+                        if(mProjectedColsMap.get(i) == null) {
+                            // this column was not found in the "key"
+                            // so send it in the "value"
+                            minimalValue.append(value.get(i));
                         }
                     }
                 } else {
@@ -487,7 +471,7 @@
                     // we would send out an empty tuple as
                     // the "value" since all elements are in the
                     // "key"
-                    minimalValue = mTupleFactory.newTuple();
+                    minimalValue = mTupleFactory.newTuple(0);
     
                 }
                 lrOutput.set(2, minimalValue);

Modified: hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java?rev=909921&r1=909920&r2=909921&view=diff
==============================================================================
--- hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java (original)
+++ hadoop/pig/trunk/test/org/apache/pig/test/TestJoin.java Sat Feb 13 21:46:49 
2010
@@ -98,6 +98,40 @@
     }
 
     @Test
+    public void testJoinWithMissingFieldsInTuples() throws IOException{
+        
+        setUp(ExecType.MAPREDUCE);
+        String[] input1 = {
+                "ff ff ff",
+                "",
+                "",
+                "",
+                "",
+                "ff ff ff",
+                "",
+                ""
+                };
+        String[] input2 = {
+                "",
+                "",
+                "",
+                "",
+                ""
+                };
+        
+        String firstInput = createInputFile(ExecType.MAPREDUCE, "a.txt", 
input1);
+        String secondInput = createInputFile(ExecType.MAPREDUCE, "b.txt", 
input2);
+        String script = "a = load 'a.txt'  using PigStorage(' ');" +
+        "b = load 'b.txt'  using PigStorage('\u0001');" +
+        "c = join a by $0, b by $0;";
+        Util.registerMultiLineQuery(pigServer, script);
+        Iterator<Tuple> it = pigServer.openIterator("c");
+        assertFalse(it.hasNext());
+        deleteInputFile(ExecType.MAPREDUCE, firstInput);
+        deleteInputFile(ExecType.MAPREDUCE, secondInput);
+    }
+    
+    @Test
     public void testJoinUnkownSchema() throws Exception {
         // If any of the input schema is unknown, the resulting schema should 
be unknown as well
         for (ExecType execType : execTypes) {


Reply via email to