[
https://issues.apache.org/jira/browse/HIVE-18524?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Matt McCline updated HIVE-18524:
--------------------------------
Description:
{noformat}
insert overwrite table insert_10_1
select cast(gpa as float),
age,
IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
IF(LENGTH(name)>10,cast(name as binary),NULL)
from studentnull10k
vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double]
ExprNodeDescs:
UDFToFloat(gpa) (type: float),
age (type: int),
if((age > 40), 2011-01-01 01:01:01.0, null) (type: timestamp),
if((length(name) > 10), CAST( name AS BINARY), null) (type: binary)
selectExpressions:
VectorUDFAdaptor(if((age > 40), 2011-01-01 01:01:01.0, null))
(children: LongColGreaterLongScalar(col 1:int, val 40) -> 4:boolean) ->
5:timestamp,
VectorUDFAdaptor(if((length(name) > 10), CAST( name AS BINARY), null))
(children: LongColGreaterLongScalar(col 4:int, val 10)(children:
StringLength(col 0:string) -> 4:int) -> 6:boolean,
VectorUDFAdaptor(CAST( name AS BINARY)) -> 7:binary) -> 8:binary
{noformat}
*// Notice there is no vector expression shown for the last IF stmt.* It has
been magically embedded inside the VectorUDFAdaptor object...
Execution results in this call stack.
{nocode}
Caused by: java.lang.NullPointerException
at java.util.Arrays.copyOfRange(Arrays.java:3521)
at
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory$9.writeValue(VectorExpressionWriterFactory.java:1101)
at
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory$VectorExpressionWriterBytes.writeValue(VectorExpressionWriterFactory.java:343)
at
org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc.getDeferredJavaObject(VectorUDFArgDesc.java:123)
at
org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor.setResult(VectorUDFAdaptor.java:211)
at
org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor.evaluate(VectorUDFAdaptor.java:177)
at
org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:145)
... 22 more
{nocode}
Change is due to:
HIVE-17139: Conditional expressions optimization: skip the expression
evaluation if the condition is not satisfied for vectorization engine. (Jia Ke,
reviewed by Ferdinand Xu)
Embedding a raw vector expression outside of VectorizationContext is quite
non-standard and evidently buggy.
[~Ferd] [~Ke Jia] I am inclined to revert this change. Comments? CC:
[~ashutoshc] [~hagleitn]
was:
{nocode}
insert overwrite table insert_10_1
select cast(gpa as float),
age,
IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
IF(LENGTH(name)>10,cast(name as binary),NULL)
from studentnull10k
vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double]
ExprNodeDescs:
UDFToFloat(gpa) (type: float),
age (type: int),
if((age > 40), 2011-01-01 01:01:01.0, null) (type: timestamp),
if((length(name) > 10), CAST( name AS BINARY), null) (type: binary)
selectExpressions:
VectorUDFAdaptor(if((age > 40), 2011-01-01 01:01:01.0, null))
(children: LongColGreaterLongScalar(col 1:int, val 40) -> 4:boolean) ->
5:timestamp,
VectorUDFAdaptor(if((length(name) > 10), CAST( name AS BINARY), null))
(children: LongColGreaterLongScalar(col 4:int, val 10)(children:
StringLength(col 0:string) -> 4:int) -> 6:boolean,
VectorUDFAdaptor(CAST( name AS BINARY)) -> 7:binary) -> 8:binary
{nocode}
*// Notice there is no vector expression shown for the last IF stmt.* It has
been magically embedded inside the VectorUDFAdaptor object...
Execution results in this call stack.
{nocode}
Caused by: java.lang.NullPointerException
at java.util.Arrays.copyOfRange(Arrays.java:3521)
at
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory$9.writeValue(VectorExpressionWriterFactory.java:1101)
at
org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory$VectorExpressionWriterBytes.writeValue(VectorExpressionWriterFactory.java:343)
at
org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc.getDeferredJavaObject(VectorUDFArgDesc.java:123)
at
org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor.setResult(VectorUDFAdaptor.java:211)
at
org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor.evaluate(VectorUDFAdaptor.java:177)
at
org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:145)
... 22 more
{nocode}
Change is due to:
HIVE-17139: Conditional expressions optimization: skip the expression
evaluation if the condition is not satisfied for vectorization engine. (Jia Ke,
reviewed by Ferdinand Xu)
Embedding a raw vector expression outside of VectorizationContext is quite
non-standard and evidently buggy.
[~Ferd] [~Ke Jia] I am inclined to revert this change. Comments? CC:
[~ashutoshc] [~hagleitn]
> Vectorization: Execution failure related to non-standard embedding of
> IfExprConditionalFilter inside VectorUDFAdaptor (HIVE-17139)
> ----------------------------------------------------------------------------------------------------------------------------------
>
> Key: HIVE-18524
> URL: https://issues.apache.org/jira/browse/HIVE-18524
> Project: Hive
> Issue Type: Bug
> Components: Hive
> Reporter: Matt McCline
> Priority: Critical
>
> {noformat}
> insert overwrite table insert_10_1
> select cast(gpa as float),
> age,
> IF(age>40,cast('2011-01-01 01:01:01' as timestamp),NULL),
> IF(LENGTH(name)>10,cast(name as binary),NULL)
> from studentnull10k
> vectorizationSchemaColumns: [0:name:string, 1:age:int, 2:gpa:double]
> ExprNodeDescs:
> UDFToFloat(gpa) (type: float),
> age (type: int),
> if((age > 40), 2011-01-01 01:01:01.0, null) (type: timestamp),
> if((length(name) > 10), CAST( name AS BINARY), null) (type: binary)
> selectExpressions:
> VectorUDFAdaptor(if((age > 40), 2011-01-01 01:01:01.0, null))
> (children: LongColGreaterLongScalar(col 1:int, val 40) -> 4:boolean)
> -> 5:timestamp,
> VectorUDFAdaptor(if((length(name) > 10), CAST( name AS BINARY), null))
> (children: LongColGreaterLongScalar(col 4:int, val 10)(children:
> StringLength(col 0:string) -> 4:int) -> 6:boolean,
> VectorUDFAdaptor(CAST( name AS BINARY)) -> 7:binary) -> 8:binary
> {noformat}
> *// Notice there is no vector expression shown for the last IF stmt.* It has
> been magically embedded inside the VectorUDFAdaptor object...
> Execution results in this call stack.
> {nocode}
> Caused by: java.lang.NullPointerException
> at java.util.Arrays.copyOfRange(Arrays.java:3521)
> at
> org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory$9.writeValue(VectorExpressionWriterFactory.java:1101)
> at
> org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory$VectorExpressionWriterBytes.writeValue(VectorExpressionWriterFactory.java:343)
> at
> org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFArgDesc.getDeferredJavaObject(VectorUDFArgDesc.java:123)
> at
> org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor.setResult(VectorUDFAdaptor.java:211)
> at
> org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor.evaluate(VectorUDFAdaptor.java:177)
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:145)
> ... 22 more
> {nocode}
> Change is due to:
> HIVE-17139: Conditional expressions optimization: skip the expression
> evaluation if the condition is not satisfied for vectorization engine. (Jia
> Ke, reviewed by Ferdinand Xu)
> Embedding a raw vector expression outside of VectorizationContext is quite
> non-standard and evidently buggy.
> [~Ferd] [~Ke Jia] I am inclined to revert this change. Comments? CC:
> [~ashutoshc] [~hagleitn]
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)