This is an automated email from the ASF dual-hosted git repository.
rameshkumar pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new fdea1bd1ba3 HIVE-26269 Class cast exception when vectorization is
enabled for certain case when cases (#3329)
fdea1bd1ba3 is described below
commit fdea1bd1ba3c4b2b27ef2bf0a463ca91d4d44653
Author: Ramesh Kumar <[email protected]>
AuthorDate: Wed Jun 15 10:31:44 2022 -0700
HIVE-26269 Class cast exception when vectorization is enabled for certain
case when cases (#3329)
---
.../hive/ql/exec/vector/VectorizationContext.java | 19 +-
.../queries/clientpositive/vector_case_when_3.q | 9 +
.../clientpositive/llap/vector_case_when_3.q.out | 288 +++++++++++++++++++++
3 files changed, 312 insertions(+), 4 deletions(-)
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 6a897939819..6d0e4899e68 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -2407,7 +2407,7 @@ import com.google.common.annotations.VisibleForTesting;
} else if (udf instanceof GenericUDFToString) {
ve = getCastToString(childExpr, returnType);
} else if (udf instanceof GenericUDFToDecimal) {
- ve = getCastToDecimal(childExpr, returnType);
+ ve = getCastToDecimal(childExpr, mode, returnType);
} else if (udf instanceof GenericUDFToChar) {
ve = getCastToChar(childExpr, returnType);
} else if (udf instanceof GenericUDFToVarchar) {
@@ -3232,8 +3232,8 @@ import com.google.common.annotations.VisibleForTesting;
return null;
}
- private VectorExpression getCastToDecimal(List<ExprNodeDesc> childExpr,
TypeInfo returnType)
- throws HiveException {
+ private VectorExpression getCastToDecimal(List<ExprNodeDesc> childExpr,
VectorExpressionDescriptor.Mode mode,
+ TypeInfo returnType) throws HiveException {
ExprNodeDesc child = childExpr.get(0);
String inputType = childExpr.get(0).getTypeString();
if (child instanceof ExprNodeConstantDesc) {
@@ -3278,7 +3278,18 @@ import com.google.common.annotations.VisibleForTesting;
int colIndex = getInputColumnIndex((ExprNodeColumnDesc) child);
DataTypePhysicalVariation dataTypePhysicalVariation =
getDataTypePhysicalVariation(colIndex);
if (dataTypePhysicalVariation == DataTypePhysicalVariation.DECIMAL_64)
{
-
+ // try to scale up the expression so we can match the return type
scale
+ if (tryDecimal64Cast && ((DecimalTypeInfo)returnType).precision() <=
18) {
+ List<ExprNodeDesc> children = new ArrayList<>();
+ int scaleDiff = ((DecimalTypeInfo)returnType).scale() -
((DecimalTypeInfo)childExpr.get(0).getTypeInfo()).scale();
+ ExprNodeDesc newConstant = new ExprNodeConstantDesc(new
DecimalTypeInfo(scaleDiff, 0),
+ HiveDecimal.create(POWEROFTENTABLE[scaleDiff]));
+ children.add(child);
+ children.add(newConstant);
+ ExprNodeGenericFuncDesc newScaledExpr = new
ExprNodeGenericFuncDesc(returnType,
+ new GenericUDFOPScaleUpDecimal64(), " ScaleUp ", children);
+ return getVectorExpression(newScaledExpr, mode);
+ }
// Do Decimal64 conversion instead.
return createDecimal64ToDecimalConversion(colIndex, returnType);
} else {
diff --git a/ql/src/test/queries/clientpositive/vector_case_when_3.q
b/ql/src/test/queries/clientpositive/vector_case_when_3.q
new file mode 100644
index 00000000000..35a157a9941
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_case_when_3.q
@@ -0,0 +1,9 @@
+set hive.explain.user=false;
+set hive.fetch.task.conversion=none;
+set hive.vectorized.execution.enabled=true;
+create external table test_decimal(rattag string, newclt_all decimal(15,2))
stored as orc;
+insert into test_decimal values('a', '10.20');
+explain vectorization detail select sum(case when rattag='a' then
newclt_all*0.3 else newclt_all end) from test_decimal;
+select sum(case when rattag='a' then newclt_all*0.3 else newclt_all end) from
test_decimal;
+explain vectorization detail select sum(case when rattag='Y' then
newclt_all*0.3 else newclt_all end) from test_decimal;
+select sum(case when rattag='Y' then newclt_all*0.3 else newclt_all end) from
test_decimal;
diff --git a/ql/src/test/results/clientpositive/llap/vector_case_when_3.q.out
b/ql/src/test/results/clientpositive/llap/vector_case_when_3.q.out
new file mode 100644
index 00000000000..ddfe1d85aa9
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/vector_case_when_3.q.out
@@ -0,0 +1,288 @@
+PREHOOK: query: create external table test_decimal(rattag string, newclt_all
decimal(15,2)) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_decimal
+POSTHOOK: query: create external table test_decimal(rattag string, newclt_all
decimal(15,2)) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_decimal
+PREHOOK: query: insert into test_decimal values('a', '10.20')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_decimal
+POSTHOOK: query: insert into test_decimal values('a', '10.20')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_decimal
+POSTHOOK: Lineage: test_decimal.newclt_all SCRIPT []
+POSTHOOK: Lineage: test_decimal.rattag SCRIPT []
+PREHOOK: query: explain vectorization detail select sum(case when rattag='a'
then newclt_all*0.3 else newclt_all end) from test_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_decimal
+#### A masked pattern was here ####
+POSTHOOK: query: explain vectorization detail select sum(case when rattag='a'
then newclt_all*0.3 else newclt_all end) from test_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_decimal
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: test_decimal
+ Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:rattag:string,
1:newclt_all:decimal(15,2)/DECIMAL_64,
2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>,
3:ROW__IS__DELETED:boolean]
+ Select Operator
+ expressions: if((rattag = 'a'), (newclt_all * 0.3),
newclt_all) (type: decimal(17,3))
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [10]
+ selectExpressions:
IfExprDecimal64ColumnDecimal64Column(col 7:boolean, col
8:decimal(17,3)/DECIMAL_64, col 9:decimal(17,3)/DECIMAL_64)(children:
StringGroupColEqualStringScalar(col 0:string, val a) -> 7:boolean,
Decimal64ColMultiplyDecimal64ScalarUnscaled(col 1:decimal(15,2)/DECIMAL_64,
decimal64Val 3, decimalVal 0.3) -> 8:decimal(17,3)/DECIMAL_64,
Decimal64ColScaleUp(col 1:decimal(15,2)/DECIMAL_64, decimal64Val 10, decimalVal
10) -> 9:decimal(17,3)/DECIMAL_64) -> [...]
+ Statistics: Num rows: 1 Data size: 197 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDecimal64ToDecimal(col
10:decimal(17,3)/DECIMAL_64) -> decimal(27,3)
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumns: 0:decimal(27,3)
+ Statistics: Num rows: 1 Data size: 112 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: decimal(27,3))
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: rattag:string,
newclt_all:decimal(15,2)/DECIMAL_64
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, decimal(17,3)/DECIMAL_64,
decimal(17,3), bigint, decimal(17,3)/DECIMAL_64, decimal(17,3)/DECIMAL_64,
decimal(17,3)/DECIMAL_64]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez] IS true
+ reduceColumnNullOrder:
+ reduceColumnSortOrder:
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: VALUE._col0:decimal(27,3)
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDecimal(col 0:decimal(27,3)) ->
decimal(27,3)
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ native: false
+ vectorProcessingMode: GLOBAL
+ projectedOutputColumnNums: [0]
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sum(case when rattag='a' then newclt_all*0.3 else
newclt_all end) from test_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_decimal
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(case when rattag='a' then newclt_all*0.3 else
newclt_all end) from test_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_decimal
+#### A masked pattern was here ####
+3.060
+PREHOOK: query: explain vectorization detail select sum(case when rattag='Y'
then newclt_all*0.3 else newclt_all end) from test_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_decimal
+#### A masked pattern was here ####
+POSTHOOK: query: explain vectorization detail select sum(case when rattag='Y'
then newclt_all*0.3 else newclt_all end) from test_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_decimal
+#### A masked pattern was here ####
+PLAN VECTORIZATION:
+ enabled: true
+ enabledConditionsMet: [hive.vectorized.execution.enabled IS true]
+
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: test_decimal
+ Statistics: Num rows: 1 Data size: 197 Basic stats: COMPLETE
Column stats: COMPLETE
+ TableScan Vectorization:
+ native: true
+ vectorizationSchemaColumns: [0:rattag:string,
1:newclt_all:decimal(15,2)/DECIMAL_64,
2:ROW__ID:struct<writeid:bigint,bucketid:int,rowid:bigint>,
3:ROW__IS__DELETED:boolean]
+ Select Operator
+ expressions: if((rattag = 'Y'), (newclt_all * 0.3),
newclt_all) (type: decimal(17,3))
+ outputColumnNames: _col0
+ Select Vectorization:
+ className: VectorSelectOperator
+ native: true
+ projectedOutputColumnNums: [10]
+ selectExpressions:
IfExprDecimal64ColumnDecimal64Column(col 7:boolean, col
8:decimal(17,3)/DECIMAL_64, col 9:decimal(17,3)/DECIMAL_64)(children:
StringGroupColEqualStringScalar(col 0:string, val Y) -> 7:boolean,
Decimal64ColMultiplyDecimal64ScalarUnscaled(col 1:decimal(15,2)/DECIMAL_64,
decimal64Val 3, decimalVal 0.3) -> 8:decimal(17,3)/DECIMAL_64,
Decimal64ColScaleUp(col 1:decimal(15,2)/DECIMAL_64, decimal64Val 10, decimalVal
10) -> 9:decimal(17,3)/DECIMAL_64) -> [...]
+ Statistics: Num rows: 1 Data size: 197 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: sum(_col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDecimal64ToDecimal(col
10:decimal(17,3)/DECIMAL_64) -> decimal(27,3)
+ className: VectorGroupByOperator
+ groupByMode: HASH
+ native: false
+ vectorProcessingMode: HASH
+ projectedOutputColumnNums: [0]
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Reduce Sink Vectorization:
+ className: VectorReduceSinkEmptyKeyOperator
+ native: true
+ nativeConditionsMet:
hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine
tez IN [tez] IS true, No PTF TopN IS true, No DISTINCT columns IS true,
BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true
+ valueColumns: 0:decimal(27,3)
+ Statistics: Num rows: 1 Data size: 112 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: decimal(27,3))
+ Execution mode: vectorized, llap
+ LLAP IO: all inputs
+ Map Vectorization:
+ enabled: true
+ enabledConditionsMet:
hive.vectorized.use.vectorized.input.format IS true
+ inputFormatFeatureSupport: [DECIMAL_64]
+ featureSupportInUse: [DECIMAL_64]
+ inputFileFormats:
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 2
+ includeColumns: [0, 1]
+ dataColumns: rattag:string,
newclt_all:decimal(15,2)/DECIMAL_64
+ partitionColumnCount: 0
+ scratchColumnTypeNames: [bigint, decimal(17,3)/DECIMAL_64,
decimal(17,3), bigint, decimal(17,3)/DECIMAL_64, decimal(17,3)/DECIMAL_64,
decimal(17,3)/DECIMAL_64]
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Vectorization:
+ enabled: true
+ enableConditionsMet: hive.vectorized.execution.reduce.enabled
IS true, hive.execution.engine tez IN [tez] IS true
+ reduceColumnNullOrder:
+ reduceColumnSortOrder:
+ allNative: false
+ usesVectorUDFAdaptor: false
+ vectorized: true
+ rowBatchContext:
+ dataColumnCount: 1
+ dataColumns: VALUE._col0:decimal(27,3)
+ partitionColumnCount: 0
+ scratchColumnTypeNames: []
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: sum(VALUE._col0)
+ Group By Vectorization:
+ aggregators: VectorUDAFSumDecimal(col 0:decimal(27,3)) ->
decimal(27,3)
+ className: VectorGroupByOperator
+ groupByMode: MERGEPARTIAL
+ native: false
+ vectorProcessingMode: GLOBAL
+ projectedOutputColumnNums: [0]
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ File Sink Vectorization:
+ className: VectorFileSinkOperator
+ native: false
+ Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select sum(case when rattag='Y' then newclt_all*0.3 else
newclt_all end) from test_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_decimal
+#### A masked pattern was here ####
+POSTHOOK: query: select sum(case when rattag='Y' then newclt_all*0.3 else
newclt_all end) from test_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_decimal
+#### A masked pattern was here ####
+10.200