Author: pradeepkth
Date: Tue Apr 13 18:34:53 2010
New Revision: 933738

URL: http://svn.apache.org/viewvc?rev=933738&view=rev
Log:
PIG-1369: POProject does not handle null tuples and non existent fields in some 
cases (pradeepkth)

Modified:
    hadoop/pig/branches/branch-0.7/CHANGES.txt
    
hadoop/pig/branches/branch-0.7/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POProject.java
    hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestProject.java

Modified: hadoop/pig/branches/branch-0.7/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.7/CHANGES.txt?rev=933738&r1=933737&r2=933738&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.7/CHANGES.txt (original)
+++ hadoop/pig/branches/branch-0.7/CHANGES.txt Tue Apr 13 18:34:53 2010
@@ -183,6 +183,9 @@ OPTIMIZATIONS
 
 BUG FIXES
 
+PIG-1369: POProject does not handle null tuples and non existent fields in
+some cases (pradeepkth)
+
 PIG-1364: Public javadoc on apache site still on 0.2, needs to be updated for 
each version release (gates)
 
 PIG-1366: PigStorage's pushProjection implementation results in NPE under

Modified: 
hadoop/pig/branches/branch-0.7/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POProject.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.7/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POProject.java?rev=933738&r1=933737&r2=933738&view=diff
==============================================================================
--- 
hadoop/pig/branches/branch-0.7/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POProject.java
 (original)
+++ 
hadoop/pig/branches/branch-0.7/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POProject.java
 Tue Apr 13 18:34:53 2010
@@ -147,18 +147,19 @@ public class POProject extends Expressio
         } else if(columns.size() == 1) {
             try {
                 ret = inpValue.get(columns.get(0));
-            } catch (ExecException ee) {
+            } catch (IndexOutOfBoundsException ie) {
                 if(pigLogger != null) {
                     pigLogger.warn(this,"Attempt to access field " + 
                             "which was not found in the input", 
PigWarning.ACCESSING_NON_EXISTENT_FIELD);
                 }
                 res.returnStatus = POStatus.STATUS_OK;
                 ret = null;
-            } catch (IndexOutOfBoundsException ie) {
-                if(pigLogger != null) {
-                    pigLogger.warn(this,"Attempt to access field " + 
-                            "which was not found in the input", 
PigWarning.ACCESSING_NON_EXISTENT_FIELD);
-                }
+            } catch (NullPointerException npe) {
+                // the tuple is null, so a dereference should also produce a 
null
+                // there is a slight danger here that the Tuple implementation 
+                // may have given the exception for a different reason but if 
we
+                // don't catch it, we will die and the most common case for the
+                // exception would be because the tuple is null
                 res.returnStatus = POStatus.STATUS_OK;
                 ret = null;
             }
@@ -168,13 +169,21 @@ public class POProject extends Expressio
             for(int i: columns) {
                 try { 
                     objList.add(inpValue.get(i)); 
-                } catch (ExecException ee) {
+                } catch (IndexOutOfBoundsException ie) {
                     if(pigLogger != null) {
-                        pigLogger.warn(this,"Attempt to access field " + i +
+                        pigLogger.warn(this,"Attempt to access field " + i + 
                                 " which was not found in the input", 
PigWarning.ACCESSING_NON_EXISTENT_FIELD);
                     }
                     objList.add(null);
                 }
+                catch (NullPointerException npe) {
+                    // the tuple is null, so a dereference should also produce 
a null
+                    // there is a slight danger here that the Tuple 
implementation 
+                    // may have given the exception for a different reason but 
if we
+                    // don't catch it, we will die and the most common case 
for the
+                    // exception would be because the tuple is null
+                    objList.add(null);
+                }
             }
             ret = tupleFactory.newTuple(objList);
         }
@@ -212,15 +221,48 @@ public class POProject extends Expressio
             // appropriately from the input bag
             Tuple tuple = inpBag.iterator().next();
             Tuple tmpTuple = tupleFactory.newTuple(columns.size());
-            for (int i = 0; i < columns.size(); i++)
-                tmpTuple.set(i, tuple.get(columns.get(i)));
+            
+            for (int i = 0; i < columns.size(); i++) {
+                try {
+                    tmpTuple.set(i, tuple.get(columns.get(i)));
+                } catch (IndexOutOfBoundsException ie) {
+                    if(pigLogger != null) {
+                        pigLogger.warn(this,"Attempt to access field " + 
+                                "which was not found in the input", 
PigWarning.ACCESSING_NON_EXISTENT_FIELD);
+                    }
+                    tmpTuple.set(i, null);
+                } catch (NullPointerException npe) {
+                    // the tuple is null, so a dereference should also produce 
a null
+                    // there is a slight danger here that the Tuple 
implementation 
+                    // may have given the exception for a different reason but 
if we
+                    // don't catch it, we will die and the most common case 
for the
+                    // exception would be because the tuple is null
+                    tmpTuple.set(i, null);
+                }
+            } 
             outBag = new SingleTupleBag(tmpTuple);
         } else {
             outBag = bagFactory.newDefaultBag();
             for (Tuple tuple : inpBag) {
                 Tuple tmpTuple = tupleFactory.newTuple(columns.size());
-                for (int i = 0; i < columns.size(); i++)
-                    tmpTuple.set(i, tuple.get(columns.get(i)));
+                for (int i = 0; i < columns.size(); i++) {
+                    try {
+                        tmpTuple.set(i, tuple.get(columns.get(i)));
+                    } catch (IndexOutOfBoundsException ie) {
+                        if(pigLogger != null) {
+                            pigLogger.warn(this,"Attempt to access field " + 
+                                    "which was not found in the input", 
PigWarning.ACCESSING_NON_EXISTENT_FIELD);
+                        }
+                        tmpTuple.set(i, null);
+                    } catch (NullPointerException npe) {
+                        // the tuple is null, so a dereference should also 
produce a null
+                        // there is a slight danger here that the Tuple 
implementation 
+                        // may have given the exception for a different reason 
but if we
+                        // don't catch it, we will die and the most common 
case for the
+                        // exception would be because the tuple is null
+                        tmpTuple.set(i, null);
+                    }
+                }
                 outBag.add(tmpTuple);
             }
         }
@@ -293,12 +335,42 @@ public class POProject extends Expressio
             Object ret;
 
             if(columns.size() == 1) {
-                ret = inpValue.get(columns.get(0));
+                try{
+                    ret = inpValue.get(columns.get(0));
+                } catch (IndexOutOfBoundsException ie) {
+                    if(pigLogger != null) {
+                        pigLogger.warn(this,"Attempt to access field " + 
+                                "which was not found in the input", 
PigWarning.ACCESSING_NON_EXISTENT_FIELD);
+                    }
+                    ret = null;
+                } catch (NullPointerException npe) {
+                    // the tuple is null, so a dereference should also produce 
a null
+                    // there is a slight danger here that the Tuple 
implementation 
+                    // may have given the exception for a different reason but 
if we
+                    // don't catch it, we will die and the most common case 
for the
+                    // exception would be because the tuple is null
+                    ret = null;
+                }
             } else {
                 ArrayList<Object> objList = new 
ArrayList<Object>(columns.size()); 
                 
                 for(int i: columns) {
-                   objList.add(inpValue.get(i)); 
+                    try {
+                        objList.add(inpValue.get(i));
+                    } catch (IndexOutOfBoundsException ie) {
+                        if(pigLogger != null) {
+                            pigLogger.warn(this,"Attempt to access field " + 
+                                    "which was not found in the input", 
PigWarning.ACCESSING_NON_EXISTENT_FIELD);
+                        }
+                        objList.add(null);
+                    } catch (NullPointerException npe) {
+                        // the tuple is null, so a dereference should also 
produce a null
+                        // there is a slight danger here that the Tuple 
implementation 
+                        // may have given the exception for a different reason 
but if we
+                        // don't catch it, we will die and the most common 
case for the
+                        // exception would be because the tuple is null
+                        objList.add(null);
+                    }
                 }
                 ret = tupleFactory.newTuple(objList);
                 res.result = (Tuple)ret;

Modified: 
hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestProject.java
URL: 
http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestProject.java?rev=933738&r1=933737&r2=933738&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestProject.java 
(original)
+++ hadoop/pig/branches/branch-0.7/test/org/apache/pig/test/TestProject.java 
Tue Apr 13 18:34:53 2010
@@ -50,6 +50,7 @@ public class TestProject extends  junit.
 
     POProject proj;
 
+    @Override
     @Before
     public void setUp() throws Exception {
         r = new Random();
@@ -59,6 +60,7 @@ public class TestProject extends  junit.
         proj = GenPhyOp.exprProject();
     }
 
+    @Override
     @After
     public void tearDown() throws Exception {
     }
@@ -281,14 +283,14 @@ public class TestProject extends  junit.
     }
     
     @Test
-    public void testMissingCols() throws Exception {
-        MiniCluster cluster = MiniCluster.buildCluster();
+    public void testMissingCols1() throws Exception {
+        String inputFileName = "TestProject-testMissingCols1-input.txt";
         String input[] = { "hello\tworld", "good\tbye" };
-        Util.createInputFile(cluster, "input.txt", input);
-        String query = "a = load 'input.txt' as (s1:chararray, s2:chararray, 
extra:chararray);" +
+        Util.createLocalInputFile(inputFileName, input);
+        String query = "a = load '" + inputFileName + "' as (s1:chararray, 
s2:chararray, extra:chararray);" +
                        "b = foreach a generate s1, s2, extra;";
         
-        PigServer ps = new PigServer(ExecType.MAPREDUCE, 
cluster.getProperties());
+        PigServer ps = new PigServer(ExecType.LOCAL);
         Util.registerMultiLineQuery(ps, query);
         Iterator<Tuple> it = ps.openIterator("b");
         Tuple[] expectedResults = new Tuple[] {
@@ -300,5 +302,78 @@ public class TestProject extends  junit.
             assertEquals(expectedResults[i++], it.next());
         }
     }
+    
+    
+    @Test
+    public void testMissingCols2() throws Exception {
+        String inputFileName = "TestProject-testMissingCols2-input.txt";
+        String input[] = { "1\t(hello,world)", "2\t(good,bye)" };
+        Util.createLocalInputFile(inputFileName, input);
+        // in the script, PigStorage will return a null for the tuple field
+        // since it does not comply with the schema
+        String query = "a = load '" + inputFileName + "' as (i:int, " +
+                       "t:tuple(s1:chararray, s2:chararray, s3:chararray));" +
+                "b = foreach a generate t.(s2,s3);";
+        
+        PigServer ps = new PigServer(ExecType.LOCAL);
+        Util.registerMultiLineQuery(ps, query);
+        Iterator<Tuple> it = ps.openIterator("b");
+        Tuple[] expectedResults = new Tuple[] {
+                (Tuple) Util.getPigConstant("((null, null))"),
+                (Tuple) Util.getPigConstant("((null, null))")
+        };
+        int i = 0;
+        while(it.hasNext()) {
+            assertEquals(expectedResults[i++], it.next());
+        }
+    }
+    
+    @Test
+    public void testMissingCols3() throws Exception {
+        String inputFileName = "TestProject-testMissingCols3-input.txt";
+        String input[] = { "hello\tworld", "good\tbye" };
+        Util.createLocalInputFile(inputFileName, input);
+        String query = "a = load '" + inputFileName + "';" +
+                "b = group a all;" +
+                "c = foreach b generate flatten(a.($1, $2)),a.$2;";
+        
+        PigServer ps = new PigServer(ExecType.LOCAL);
+        Util.registerMultiLineQuery(ps, query);
+        Iterator<Tuple> it = ps.openIterator("c");
+        Tuple[] expectedResults = new Tuple[] {
+                (Tuple) Util.getPigConstant("('world', null, 
{(null),(null)})"),
+                (Tuple) Util.getPigConstant("('bye', null, {(null),(null)})")
+        };
+        int i = 0;
+        while(it.hasNext()) {
+            assertEquals(expectedResults[i++].toString(), 
it.next().toString());
+        }
+    }
+
+    @Test
+    public void testNullTupleCols() throws Exception {
+        String inputFileName = "TestProject-testNullTupleCols-input.txt";
+        String input[] = { "1\t(hello,world)", "2\t(good)", "3" };
+        Util.createLocalInputFile(inputFileName, input);
+        // PigStorage will return null as the value for the tuple field in the
+        // second record since it does not comply with the schema and in the
+        // third record since the field is absent
+        String query = "a = load '" + inputFileName + "' as (i:int, " +
+                       "t:tuple(s1:chararray, s2:chararray));" +
+                "b = foreach a generate t.s1, t.s2;";
+        
+        PigServer ps = new PigServer(ExecType.LOCAL);
+        Util.registerMultiLineQuery(ps, query);
+        Iterator<Tuple> it = ps.openIterator("b");
+        Tuple[] expectedResults = new Tuple[] {
+                (Tuple) Util.getPigConstant("('hello', 'world')"),
+                (Tuple) Util.getPigConstant("(null, null)"),
+                (Tuple) Util.getPigConstant("(null, null)")
+        };
+        int i = 0;
+        while(it.hasNext()) {
+            assertEquals(expectedResults[i++], it.next());
+        }
+    }
 
 }


Reply via email to