Author: knoguchi
Date: Fri Dec  2 15:53:19 2016
New Revision: 1772368

URL: http://svn.apache.org/viewvc?rev=1772368&view=rev
Log:
PIG-5067: Revisit union on numeric type and chararray to bytearray (knoguchi)

Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/docs/src/documentation/content/xdocs/basic.xml
    
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java
    
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java
    
pig/trunk/src/org/apache/pig/newplan/logical/visitor/UnionOnSchemaSetter.java
    pig/trunk/test/e2e/pig/tests/nightly.conf
    pig/trunk/test/org/apache/pig/test/TestUnionOnSchema.java

Modified: pig/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1772368&r1=1772367&r2=1772368&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Dec  2 15:53:19 2016
@@ -21,6 +21,8 @@ Pig Change Log
 Trunk (unreleased changes)
  
 INCOMPATIBLE CHANGES
+
+PIG-5067: Revisit union on numeric type and chararray to bytearray (knoguchi)
  
 IMPROVEMENTS
 

Modified: pig/trunk/src/docs/src/documentation/content/xdocs/basic.xml
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/basic.xml?rev=1772368&r1=1772367&r2=1772368&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/basic.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/basic.xml Fri Dec  2 
15:53:19 2016
@@ -713,7 +713,7 @@ This will cause an error …</source>
    <p></p>
    <ul>
       <li>
-         <p>If Pig cannot resolve incompatible types through implicit casts, 
an error will occur. For example, you cannot add chararray and float (see the 
Types Table for addition and subtraction).</p>
+         <p>If Pig cannot resolve incompatible types through implicit casts, 
an error will occur. For example, you cannot add chararray and float (see the 
<a href="#types-table-add">Types Table for addition and subtraction</a>).</p>
       <source>
 A = LOAD 'data' AS (name:chararray, age:int, gpa:float);
 B = FOREACH A GENERATE name + gpa;
@@ -8503,11 +8503,11 @@ B: (b1:long, b2:long, b3:long)
 A union B: null 
 </source>
   
-<p>Union columns with incompatible types result in a bytearray type: </p>
+<p>Union columns with incompatible types results in a failure. (See <a 
href="#types-table-add">Types Table for addition and subtraction</a> for 
incompatible types.)</p>
 <source>
-A: (a1:long, a2:long) 
-B: (b1:(b11:long, b12:long), b2:long) 
-A union B: (a1:bytearray, a2:long) 
+A: (a1:long)
+B: (a1:chararray)
+A union B: ERROR: Cannot cast from long to bytearray
 </source>
 
 <p>Union columns of compatible type will produce an "escalate" type. 

Modified: 
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java?rev=1772368&r1=1772367&r2=1772368&view=diff
==============================================================================
--- 
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java
 Fri Dec  2 15:53:19 2016
@@ -491,7 +491,7 @@ public class TypeCheckingExpVisitor exte
         byte outType = cast.getType();
         if(outType == DataType.BYTEARRAY && inType != outType) {
             int errCode = 1051;
-            String msg = "Cannot cast to bytearray";
+            String msg = "Cannot cast from " + DataType.findTypeName(inType) + 
" to bytearray";
             msgCollector.collect(msg, MessageType.Error) ;
             throw new TypeCheckerException(cast, msg, errCode, 
PigException.INPUT) ;
         }

Modified: 
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java?rev=1772368&r1=1772367&r2=1772368&view=diff
==============================================================================
--- 
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java
 (original)
+++ 
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java
 Fri Dec  2 15:53:19 2016
@@ -351,7 +351,8 @@ public class TypeCheckingRelVisitor exte
 
             if (outFieldSchema.type != fs.type) {
                 castNeededCounter++ ;
-                new CastExpression(genPlan, project, outFieldSchema);
+                CastExpression castexp = new CastExpression(genPlan, project, 
outFieldSchema);
+                castexp.setLocation(toOp.getLocation());
             }
 
             generatePlans.add(genPlan) ;

Modified: 
pig/trunk/src/org/apache/pig/newplan/logical/visitor/UnionOnSchemaSetter.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/visitor/UnionOnSchemaSetter.java?rev=1772368&r1=1772367&r2=1772368&view=diff
==============================================================================
--- 
pig/trunk/src/org/apache/pig/newplan/logical/visitor/UnionOnSchemaSetter.java 
(original)
+++ 
pig/trunk/src/org/apache/pig/newplan/logical/visitor/UnionOnSchemaSetter.java 
Fri Dec  2 15:53:19 2016
@@ -21,6 +21,7 @@ package org.apache.pig.newplan.logical.v
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.pig.PigException;
 import org.apache.pig.data.DataType;
 import org.apache.pig.impl.logicalLayer.FrontendException;
 import org.apache.pig.impl.util.Pair;
@@ -110,9 +111,20 @@ public class UnionOnSchemaSetter extends
                 } else {
                     ProjectExpression projExpr = 
                         new ProjectExpression( exprPlan, genInputs.size(), 0, 
gen );
-                    if( fs.type != DataType.BYTEARRAY
-                        && opSchema.getField( pos ).type != fs.type ) {
-                        new CastExpression( exprPlan, projExpr, fs );
+                    if( opSchema.getField( pos ).type != fs.type ) {
+                        if( fs.type != DataType.BYTEARRAY ) {
+                            CastExpression castexpr = new CastExpression( 
exprPlan, projExpr, fs );
+                            castexpr.setLocation(union.getLocation());
+                        } else {
+                            int errCode = 1056;
+                            String msg = "Union of incompatible types not 
allowed. "
+                                         + "Cannot cast from "
+                                         + 
DataType.findTypeName(opSchema.getField( pos ).type)
+                                         + " to bytearray for '"
+                                         + opSchema.getField( pos ).alias
+                                         + "'. Please typecast to compatible 
types before union." ;
+                            throw new FrontendException(union, msg, errCode, 
PigException.INPUT) ;
+                        }
                     }
                     genInputs.add( new LOInnerLoad( innerPlan, foreach, pos ) 
);
                 }

Modified: pig/trunk/test/e2e/pig/tests/nightly.conf
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/nightly.conf?rev=1772368&r1=1772367&r2=1772368&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/nightly.conf (original)
+++ pig/trunk/test/e2e/pig/tests/nightly.conf Fri Dec  2 15:53:19 2016
@@ -4872,21 +4872,6 @@ a = load ':INPATH:/singlefile/allscalar1
 b = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as 
(name:chararray, age:int, gpa:double, instate:chararray);
 C = union a, b;
 store C into ':OUTPATH:';\, 
-                },
-                {
-                    # Test Union using merge with incompatible types.  
float->bytearray and chararray->bytearray
-                    'num' => 8,
-                    'delimiter' => '   ',
-                    'pig' => q\
-A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as 
(name:chararray, age:int);
-B = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as 
(name:chararray, age:chararray);
-C = union onschema A, B;
-store C into ':OUTPATH:';\,
-                    'verify_pig_script' => q\
-A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as 
(name:chararray, age:bytearray);
-B = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as 
(name:chararray, age:bytearray);
-C = union A, B;
-store C into ':OUTPATH:';\,
                 }
               ]
 

Modified: pig/trunk/test/org/apache/pig/test/TestUnionOnSchema.java
URL: 
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestUnionOnSchema.java?rev=1772368&r1=1772367&r2=1772368&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestUnionOnSchema.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestUnionOnSchema.java Fri Dec  2 
15:53:19 2016
@@ -96,8 +96,6 @@ public class TestUnionOnSchema  {
 
     /**
      * Test UNION ONSCHEMA on two inputs with same schema
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaSameSchema() throws Exception {
@@ -128,8 +126,6 @@ public class TestUnionOnSchema  {
     
     /**
      * Test UNION ONSCHEMA with operations after the union
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaFilter() throws Exception {
@@ -161,8 +157,6 @@ public class TestUnionOnSchema  {
     
     /**
      * Test UNION ONSCHEMA with operations after the union
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaSuccOps() throws Exception {
@@ -194,8 +188,6 @@ public class TestUnionOnSchema  {
     
     /**
      * Test UNION ONSCHEMA with cast from bytearray to another type
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaCastOnByteArray() throws Exception {
@@ -223,8 +215,6 @@ public class TestUnionOnSchema  {
     /**
      * Test UNION ONSCHEMA where a common column has additional 'namespace' 
part
      *  in the column name in one of the inputs
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaScopedColumnName() throws Exception {
@@ -266,8 +256,6 @@ public class TestUnionOnSchema  {
     /**
      * Test UNION ONSCHEMA where a common column has additional 'namespace' 
part
      *  in the column name in both the inputs
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaScopedColumnNameBothInp1() throws Exception {
@@ -302,8 +290,6 @@ public class TestUnionOnSchema  {
     /**
      * Test UNION ONSCHEMA where a common column has additional 'namespace' 
part
      *  in the column name in both the inputs
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaScopedColumnNameBothInp2() throws Exception {
@@ -340,8 +326,6 @@ public class TestUnionOnSchema  {
      * Test UNION ONSCHEMA where a common column has additional 'namespace' 
part
      *  in the column name in one of the inputs.
      *  Negative test case
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaScopedColumnNameNeg() throws Exception {
@@ -366,8 +350,6 @@ public class TestUnionOnSchema  {
     /**
      * Test UNION ONSCHEMA on two inputs with same column names, but different
      * numeric types - test type promotion
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaDiffNumType() throws Exception {
@@ -396,8 +378,6 @@ public class TestUnionOnSchema  {
 
     /**
      * Test UNION ONSCHEMA on two inputs with no common columns
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaNoCommonCols() throws Exception {
@@ -424,8 +404,6 @@ public class TestUnionOnSchema  {
     
     /**
      * Test UNION ONSCHEMA on two inputs , one input with additional columns
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaAdditionalColumn() throws Exception {
@@ -498,8 +476,6 @@ public class TestUnionOnSchema  {
     
     /**
      * Test UNION ONSCHEMA on 3 inputs 
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchema3Inputs() throws Exception {
@@ -533,8 +509,6 @@ public class TestUnionOnSchema  {
 
     /**
      * Test UNION ONSCHEMA with bytearray type 
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaByteArrayConversions() throws Exception {
@@ -572,8 +546,6 @@ public class TestUnionOnSchema  {
     
     /**
      * negative test - test error on no schema
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaNoSchema() throws Exception {
@@ -597,8 +569,6 @@ public class TestUnionOnSchema  {
     
     /**
      * negative test - test error on null alias in one of the FieldSchema
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaNullAliasInFieldSchema() throws Exception {
@@ -640,8 +610,6 @@ public class TestUnionOnSchema  {
 
     /**
      * test union with incompatible types in schema
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaIncompatibleTypes() throws Exception {
@@ -650,7 +618,15 @@ public class TestUnionOnSchema  {
             + "l2 = load '" + INP_FILE_2NUMS + "' as (x : long, y : float);"
             + "u = union onschema l1, l2;";
 
-        checkSchemaEquals(query, "x : long, y : bytearray");
+        checkSchemaEx(query, "Cannot cast from chararray to bytearray");
+
+        //without "onschema"
+        query =
+            "  l1 = load '" + INP_FILE_2NUMS + "' as (x : long, y : 
chararray);"
+            + "l2 = load '" + INP_FILE_2NUMS + "' as (x : long, y : float);"
+            + "u = union l1, l2;";
+
+        checkSchemaEx(query, "Cannot cast from chararray to bytearray");
 
 
         
@@ -659,8 +635,15 @@ public class TestUnionOnSchema  {
             + "l2 = load '" + INP_FILE_2NUMS + "' as (x : map[ ], y : 
chararray);"
             + "u = union onschema l1, l2;"
         ; 
-        checkSchemaEquals(query, "x : bytearray, y : chararray");
+        checkSchemaEx(query, "Cannot cast from long to bytearray");
                
+        query =
+            "  l1 = load '" + INP_FILE_2NUMS + "' as (x : long, y : 
chararray);"
+            + "l2 = load '" + INP_FILE_2NUMS + "' as (x : map[ ], y : 
chararray);"
+            + "u = union l1, l2;"
+        ;
+        checkSchemaEx(query, "Cannot cast from long to bytearray");
+
         // bag column with different internal column types
         query =
             "  l1 = load '" + INP_FILE_2NUMS 
@@ -708,8 +691,6 @@ public class TestUnionOnSchema  {
 
     /**
      * Test UNION ONSCHEMA with input relation having udfs
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaInputUdfs() throws Exception {
@@ -745,8 +726,6 @@ public class TestUnionOnSchema  {
     /**
      * Test UNION ONSCHEMA with udf whose default type is different from
      * final type
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaUdfTypeEvolution() throws Exception {
@@ -797,8 +776,6 @@ public class TestUnionOnSchema  {
     /**
      * Test UNION ONSCHEMA with udf whose default type is different from
      * final type - where udf is not in immediate input of union
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaUdfTypeEvolution2() throws Exception {
@@ -869,8 +846,6 @@ public class TestUnionOnSchema  {
     /**
      * Test UNION ONSCHEMA with input relation having column names with 
multiple
      * level of namespace in their names
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testUnionOnSchemaScopeMulti() throws Exception {
@@ -916,8 +891,6 @@ public class TestUnionOnSchema  {
     
     /**
      * Test query with a union-onschema having another as input 
-     * @throws IOException
-     * @throws ParserException
      */
     @Test
     public void testTwoUnions() throws Exception {


Reply via email to