[
https://issues.apache.org/jira/browse/HIVE-20563?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16637254#comment-16637254
]
Matt McCline commented on HIVE-20563:
-------------------------------------
Things to look at:
1) If a THEN/ELSE expression is NULL the new data type conversion code inserts
a "ConstantVectorExpression(val null)" so the resulting expression will be
IfExprCondExprCondExpr instead of the optimized IfExprCondExprNull, for example.
2) A query now does not vectorize due to "notVectorizedReason: SELECT operator:
Unsupported type timestamp for cast to String" where it use to vectorize with
VectorUDFAdaptor. Suppress the exception and fall back to VectorUDFAdaptor.
Also, consider supporting that conversion.
> Vectorization: CASE WHEN expression fails when THEN/ELSE type and result type
> are different
> -------------------------------------------------------------------------------------------
>
> Key: HIVE-20563
> URL: https://issues.apache.org/jira/browse/HIVE-20563
> Project: Hive
> Issue Type: Bug
> Affects Versions: 4.0.0
> Reporter: Jesus Camacho Rodriguez
> Assignee: Matt McCline
> Priority: Major
> Attachments: HIVE-20563.01.patch, HIVE-20563.02.patch,
> HIVE-20563.03.patch, HIVE-20563.04.patch, HIVE-20563.05.patch,
> HIVE-20563.06.patch, HIVE-20563.07.patch
>
>
> With the following stacktrace:
> {code}
> java.lang.Exception: java.lang.RuntimeException:
> org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
> processing row
> at
> org.apache.hadoop.mapred.LocalJobRunner$Job.runTasks(LocalJobRunner.java:492)
> ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
> at
> org.apache.hadoop.mapred.LocalJobRunner$Job.run(LocalJobRunner.java:552)
> [hadoop-mapreduce-client-common-3.1.0.jar:?]
> Caused by: java.lang.RuntimeException:
> org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error while
> processing row
> at
> org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:163)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
> at
> org.apache.hadoop.hive.ql.exec.mr.ExecMapRunner.run(ExecMapRunner.java:37)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:465)
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:349)
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
> at
> org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:271)
> ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> ~[?:1.8.0_181]
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> ~[?:1.8.0_181]
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> ~[?:1.8.0_181]
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> ~[?:1.8.0_181]
> at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_181]
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime
> Error while processing row
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:973)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:154)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
> at
> org.apache.hadoop.hive.ql.exec.mr.ExecMapRunner.run(ExecMapRunner.java:37)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:465)
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:349)
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
> at
> org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:271)
> ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> ~[?:1.8.0_181]
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> ~[?:1.8.0_181]
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> ~[?:1.8.0_181]
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> ~[?:1.8.0_181]
> at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_181]
> Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Error evaluating
> cstring1
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:149)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.Operator.vectorForward(Operator.java:965)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:938)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator.process(VectorFilterOperator.java:136)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.Operator.vectorForward(Operator.java:965)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:938)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:125)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.deliverVectorizedRowBatch(VectorMapOperator.java:812)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:845)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:154)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
> at
> org.apache.hadoop.hive.ql.exec.mr.ExecMapRunner.run(ExecMapRunner.java:37)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:465)
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:349)
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
> at
> org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:271)
> ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> ~[?:1.8.0_181]
> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
> ~[?:1.8.0_181]
> at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> ~[?:1.8.0_181]
> at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> ~[?:1.8.0_181]
> at java.lang.Thread.run(Thread.java:748) ~[?:1.8.0_181]
> Caused by: java.lang.ClassCastException: org.apache.hadoop.io.FloatWritable
> cannot be cast to org.apache.hadoop.io.Text
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorAssignRow.assignRowColumn(VectorAssignRow.java:471)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorAssignRow.assignRowColumn(VectorAssignRow.java:350)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor.setResult(VectorUDFAdaptor.java:205)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.vector.udf.VectorUDFAdaptor.evaluate(VectorUDFAdaptor.java:146)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprCondExprBase.conditionalEvaluate(IfExprCondExprBase.java:68)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.vector.expressions.IfExprColumnCondExpr.evaluate(IfExprColumnCondExpr.java:113)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorSelectOperator.process(VectorSelectOperator.java:146)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.Operator.vectorForward(Operator.java:965)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:938)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator.process(VectorFilterOperator.java:136)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.Operator.vectorForward(Operator.java:965)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:938)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.TableScanOperator.process(TableScanOperator.java:125)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.deliverVectorizedRowBatch(VectorMapOperator.java:812)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator.process(VectorMapOperator.java:845)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at
> org.apache.hadoop.hive.ql.exec.mr.ExecMapper.map(ExecMapper.java:154)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at org.apache.hadoop.mapred.MapRunner.run(MapRunner.java:54)
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
> at
> org.apache.hadoop.hive.ql.exec.mr.ExecMapRunner.run(ExecMapRunner.java:37)
> ~[hive-exec-4.0.0-SNAPSHOT.jar:4.0.0-SNAPSHOT]
> at org.apache.hadoop.mapred.MapTask.runOldMapper(MapTask.java:465)
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
> at org.apache.hadoop.mapred.MapTask.run(MapTask.java:349)
> ~[hadoop-mapreduce-client-core-3.1.0.jar:?]
> at
> org.apache.hadoop.mapred.LocalJobRunner$Job$MapTaskRunnable.run(LocalJobRunner.java:271)
> ~[hadoop-mapreduce-client-common-3.1.0.jar:?]
> at
> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
> ~[?:1.8.0_181]
> ...
> {code}
> To repro:
> {code:sql}
> --! qt:dataset:alltypesorc
> set hive.stats.fetch.column.stats=true;
> set hive.explain.user=false;
> SET hive.vectorized.execution.enabled=true;
> set hive.fetch.task.conversion=none;
> -- SORT_QUERY_RESULTS
> EXPLAIN VECTORIZATION ONLY EXPRESSION SELECT cdouble, cstring1, cint, cfloat,
> csmallint,
> case
> when (cdouble is not null) then cdouble
> when (cstring1 is not null) then cstring1
> when (cint is not null) then cint
> when (cfloat is not null) then cfloat
> when (csmallint is not null) then csmallint
> else null
> end as c
> FROM alltypesorc
> WHERE (cdouble IS NULL)
> ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c
> LIMIT 10;
> SELECT cdouble, cstring1, cint, cfloat, csmallint,
> case
> when (cdouble is not null) then cdouble
> when (cstring1 is not null) then cstring1
> when (cint is not null) then cint
> when (cfloat is not null) then cfloat
> when (csmallint is not null) then csmallint
> else null
> end as c
> FROM alltypesorc
> WHERE (cdouble IS NULL)
> ORDER BY cdouble, cstring1, cint, cfloat, csmallint, c
> LIMIT 10;
> {code}
--
This message was sent by Atlassian JIRA
(v7.6.3#76005)