Author: knoguchi
Date: Fri Dec 2 15:53:19 2016
New Revision: 1772368
URL: http://svn.apache.org/viewvc?rev=1772368&view=rev
Log:
PIG-5067: Revisit union on numeric type and chararray to bytearray (knoguchi)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/docs/src/documentation/content/xdocs/basic.xml
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java
pig/trunk/src/org/apache/pig/newplan/logical/visitor/UnionOnSchemaSetter.java
pig/trunk/test/e2e/pig/tests/nightly.conf
pig/trunk/test/org/apache/pig/test/TestUnionOnSchema.java
Modified: pig/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1772368&r1=1772367&r2=1772368&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Fri Dec 2 15:53:19 2016
@@ -21,6 +21,8 @@ Pig Change Log
Trunk (unreleased changes)
Â
INCOMPATIBLE CHANGES
+
+PIG-5067: Revisit union on numeric type and chararray to bytearray (knoguchi)
Â
IMPROVEMENTS
Modified: pig/trunk/src/docs/src/documentation/content/xdocs/basic.xml
URL:
http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/basic.xml?rev=1772368&r1=1772367&r2=1772368&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/basic.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/basic.xml Fri Dec 2
15:53:19 2016
@@ -713,7 +713,7 @@ This will cause an error â¦</source>
<p></p>
<ul>
<li>
- <p>If Pig cannot resolve incompatible types through implicit casts,
an error will occur. For example, you cannot add chararray and float (see the
Types Table for addition and subtraction).</p>
+ <p>If Pig cannot resolve incompatible types through implicit casts,
an error will occur. For example, you cannot add chararray and float (see the
<a href="#types-table-add">Types Table for addition and subtraction</a>).</p>
<source>
A = LOAD 'data' AS (name:chararray, age:int, gpa:float);
B = FOREACH A GENERATE name + gpa;
@@ -8503,11 +8503,11 @@ B: (b1:long, b2:long, b3:long)
A union B: null
</source>
-<p>Union columns with incompatible types result in a bytearray type: </p>
+<p>Union columns with incompatible types results in a failure. (See <a
href="#types-table-add">Types Table for addition and subtraction</a> for
incompatible types.)</p>
<source>
-A: (a1:long, a2:long)
-B: (b1:(b11:long, b12:long), b2:long)
-A union B: (a1:bytearray, a2:long)
+A: (a1:long)
+B: (a1:chararray)
+A union B: ERROR: Cannot cast from long to bytearray
</source>
<p>Union columns of compatible type will produce an "escalate" type.
Modified:
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java?rev=1772368&r1=1772367&r2=1772368&view=diff
==============================================================================
---
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java
(original)
+++
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingExpVisitor.java
Fri Dec 2 15:53:19 2016
@@ -491,7 +491,7 @@ public class TypeCheckingExpVisitor exte
byte outType = cast.getType();
if(outType == DataType.BYTEARRAY && inType != outType) {
int errCode = 1051;
- String msg = "Cannot cast to bytearray";
+ String msg = "Cannot cast from " + DataType.findTypeName(inType) +
" to bytearray";
msgCollector.collect(msg, MessageType.Error) ;
throw new TypeCheckerException(cast, msg, errCode,
PigException.INPUT) ;
}
Modified:
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java?rev=1772368&r1=1772367&r2=1772368&view=diff
==============================================================================
---
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java
(original)
+++
pig/trunk/src/org/apache/pig/newplan/logical/visitor/TypeCheckingRelVisitor.java
Fri Dec 2 15:53:19 2016
@@ -351,7 +351,8 @@ public class TypeCheckingRelVisitor exte
if (outFieldSchema.type != fs.type) {
castNeededCounter++ ;
- new CastExpression(genPlan, project, outFieldSchema);
+ CastExpression castexp = new CastExpression(genPlan, project,
outFieldSchema);
+ castexp.setLocation(toOp.getLocation());
}
generatePlans.add(genPlan) ;
Modified:
pig/trunk/src/org/apache/pig/newplan/logical/visitor/UnionOnSchemaSetter.java
URL:
http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/newplan/logical/visitor/UnionOnSchemaSetter.java?rev=1772368&r1=1772367&r2=1772368&view=diff
==============================================================================
---
pig/trunk/src/org/apache/pig/newplan/logical/visitor/UnionOnSchemaSetter.java
(original)
+++
pig/trunk/src/org/apache/pig/newplan/logical/visitor/UnionOnSchemaSetter.java
Fri Dec 2 15:53:19 2016
@@ -21,6 +21,7 @@ package org.apache.pig.newplan.logical.v
import java.util.ArrayList;
import java.util.List;
+import org.apache.pig.PigException;
import org.apache.pig.data.DataType;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.impl.util.Pair;
@@ -110,9 +111,20 @@ public class UnionOnSchemaSetter extends
} else {
ProjectExpression projExpr =
new ProjectExpression( exprPlan, genInputs.size(), 0,
gen );
- if( fs.type != DataType.BYTEARRAY
- && opSchema.getField( pos ).type != fs.type ) {
- new CastExpression( exprPlan, projExpr, fs );
+ if( opSchema.getField( pos ).type != fs.type ) {
+ if( fs.type != DataType.BYTEARRAY ) {
+ CastExpression castexpr = new CastExpression(
exprPlan, projExpr, fs );
+ castexpr.setLocation(union.getLocation());
+ } else {
+ int errCode = 1056;
+ String msg = "Union of incompatible types not
allowed. "
+ + "Cannot cast from "
+ +
DataType.findTypeName(opSchema.getField( pos ).type)
+ + " to bytearray for '"
+ + opSchema.getField( pos ).alias
+ + "'. Please typecast to compatible
types before union." ;
+ throw new FrontendException(union, msg, errCode,
PigException.INPUT) ;
+ }
}
genInputs.add( new LOInnerLoad( innerPlan, foreach, pos )
);
}
Modified: pig/trunk/test/e2e/pig/tests/nightly.conf
URL:
http://svn.apache.org/viewvc/pig/trunk/test/e2e/pig/tests/nightly.conf?rev=1772368&r1=1772367&r2=1772368&view=diff
==============================================================================
--- pig/trunk/test/e2e/pig/tests/nightly.conf (original)
+++ pig/trunk/test/e2e/pig/tests/nightly.conf Fri Dec 2 15:53:19 2016
@@ -4872,21 +4872,6 @@ a = load ':INPATH:/singlefile/allscalar1
b = load ':INPATH:/singlefile/allscalar10k' using PigStorage() as
(name:chararray, age:int, gpa:double, instate:chararray);
C = union a, b;
store C into ':OUTPATH:';\,
- },
- {
- # Test Union using merge with incompatible types.
float->bytearray and chararray->bytearray
- 'num' => 8,
- 'delimiter' => ' ',
- 'pig' => q\
-A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as
(name:chararray, age:int);
-B = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as
(name:chararray, age:chararray);
-C = union onschema A, B;
-store C into ':OUTPATH:';\,
- 'verify_pig_script' => q\
-A = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as
(name:chararray, age:bytearray);
-B = load ':INPATH:/singlefile/studenttab10k' using PigStorage() as
(name:chararray, age:bytearray);
-C = union A, B;
-store C into ':OUTPATH:';\,
}
]
Modified: pig/trunk/test/org/apache/pig/test/TestUnionOnSchema.java
URL:
http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestUnionOnSchema.java?rev=1772368&r1=1772367&r2=1772368&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestUnionOnSchema.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestUnionOnSchema.java Fri Dec 2
15:53:19 2016
@@ -96,8 +96,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA on two inputs with same schema
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaSameSchema() throws Exception {
@@ -128,8 +126,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA with operations after the union
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaFilter() throws Exception {
@@ -161,8 +157,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA with operations after the union
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaSuccOps() throws Exception {
@@ -194,8 +188,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA with cast from bytearray to another type
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaCastOnByteArray() throws Exception {
@@ -223,8 +215,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA where a common column has additional 'namespace'
part
* in the column name in one of the inputs
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaScopedColumnName() throws Exception {
@@ -266,8 +256,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA where a common column has additional 'namespace'
part
* in the column name in both the inputs
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaScopedColumnNameBothInp1() throws Exception {
@@ -302,8 +290,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA where a common column has additional 'namespace'
part
* in the column name in both the inputs
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaScopedColumnNameBothInp2() throws Exception {
@@ -340,8 +326,6 @@ public class TestUnionOnSchema {
* Test UNION ONSCHEMA where a common column has additional 'namespace'
part
* in the column name in one of the inputs.
* Negative test case
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaScopedColumnNameNeg() throws Exception {
@@ -366,8 +350,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA on two inputs with same column names, but different
* numeric types - test type promotion
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaDiffNumType() throws Exception {
@@ -396,8 +378,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA on two inputs with no common columns
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaNoCommonCols() throws Exception {
@@ -424,8 +404,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA on two inputs , one input with additional columns
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaAdditionalColumn() throws Exception {
@@ -498,8 +476,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA on 3 inputs
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchema3Inputs() throws Exception {
@@ -533,8 +509,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA with bytearray type
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaByteArrayConversions() throws Exception {
@@ -572,8 +546,6 @@ public class TestUnionOnSchema {
/**
* negative test - test error on no schema
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaNoSchema() throws Exception {
@@ -597,8 +569,6 @@ public class TestUnionOnSchema {
/**
* negative test - test error on null alias in one of the FieldSchema
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaNullAliasInFieldSchema() throws Exception {
@@ -640,8 +610,6 @@ public class TestUnionOnSchema {
/**
* test union with incompatible types in schema
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaIncompatibleTypes() throws Exception {
@@ -650,7 +618,15 @@ public class TestUnionOnSchema {
+ "l2 = load '" + INP_FILE_2NUMS + "' as (x : long, y : float);"
+ "u = union onschema l1, l2;";
- checkSchemaEquals(query, "x : long, y : bytearray");
+ checkSchemaEx(query, "Cannot cast from chararray to bytearray");
+
+ //without "onschema"
+ query =
+ " l1 = load '" + INP_FILE_2NUMS + "' as (x : long, y :
chararray);"
+ + "l2 = load '" + INP_FILE_2NUMS + "' as (x : long, y : float);"
+ + "u = union l1, l2;";
+
+ checkSchemaEx(query, "Cannot cast from chararray to bytearray");
@@ -659,8 +635,15 @@ public class TestUnionOnSchema {
+ "l2 = load '" + INP_FILE_2NUMS + "' as (x : map[ ], y :
chararray);"
+ "u = union onschema l1, l2;"
;
- checkSchemaEquals(query, "x : bytearray, y : chararray");
+ checkSchemaEx(query, "Cannot cast from long to bytearray");
+ query =
+ " l1 = load '" + INP_FILE_2NUMS + "' as (x : long, y :
chararray);"
+ + "l2 = load '" + INP_FILE_2NUMS + "' as (x : map[ ], y :
chararray);"
+ + "u = union l1, l2;"
+ ;
+ checkSchemaEx(query, "Cannot cast from long to bytearray");
+
// bag column with different internal column types
query =
" l1 = load '" + INP_FILE_2NUMS
@@ -708,8 +691,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA with input relation having udfs
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaInputUdfs() throws Exception {
@@ -745,8 +726,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA with udf whose default type is different from
* final type
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaUdfTypeEvolution() throws Exception {
@@ -797,8 +776,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA with udf whose default type is different from
* final type - where udf is not in immediate input of union
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaUdfTypeEvolution2() throws Exception {
@@ -869,8 +846,6 @@ public class TestUnionOnSchema {
/**
* Test UNION ONSCHEMA with input relation having column names with
multiple
* level of namespace in their names
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testUnionOnSchemaScopeMulti() throws Exception {
@@ -916,8 +891,6 @@ public class TestUnionOnSchema {
/**
* Test query with a union-onschema having another as input
- * @throws IOException
- * @throws ParserException
*/
@Test
public void testTwoUnions() throws Exception {