This is an automated email from the ASF dual-hosted git repository. vgarg pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new fcfc71b HIVE-10362: Support Type check/conversion in dynamic partition column(Karen Coppage, reviewed by Vineet Garg, Zoltan Haindrich) fcfc71b is described below commit fcfc71b089662ec655125eb4373e1c5be42ec671 Author: Karen Coppage <karen.copp...@cloudera.com> AuthorDate: Wed Feb 12 09:37:58 2020 -0800 HIVE-10362: Support Type check/conversion in dynamic partition column(Karen Coppage, reviewed by Vineet Garg, Zoltan Haindrich) --- .../java/org/apache/hadoop/hive/conf/HiveConf.java | 4 + .../test/resources/testconfiguration.properties | 1 + .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 61 ++++++-- ql/src/test/queries/clientpositive/dynpart_cast.q | 14 ++ .../results/clientpositive/autoColumnStats_6.q.out | 16 +- .../test/results/clientpositive/dynpart_cast.q.out | 165 +++++++++++++++++++++ .../dynpart_sort_optimization_acid2.q.out | 10 +- .../infer_bucket_sort_num_buckets.q.out | 16 +- .../results/clientpositive/llap/dynpart_cast.q.out | 135 +++++++++++++++++ .../llap/dynpart_sort_opt_bucketing.q.out | 8 +- .../results/clientpositive/llap/orc_merge1.q.out | 24 +-- .../results/clientpositive/llap/orc_merge10.q.out | 48 +++--- .../results/clientpositive/llap/orc_merge2.q.out | 16 +- .../clientpositive/llap/orc_merge_diff_fs.q.out | 48 +++--- .../clientpositive/llap/rcfile_merge2.q.out | 16 +- .../test/results/clientpositive/llap/tez_dml.q.out | 8 +- .../test/results/clientpositive/orc_merge1.q.out | 24 +-- .../test/results/clientpositive/orc_merge10.q.out | 48 +++--- .../test/results/clientpositive/orc_merge2.q.out | 16 +- .../results/clientpositive/orc_merge_diff_fs.q.out | 48 +++--- .../clientpositive/smb_join_partition_key.q.out | 10 +- .../spark/infer_bucket_sort_num_buckets.q.out | 8 +- .../results/clientpositive/spark/orc_merge1.q.out | 6 +- .../results/clientpositive/spark/orc_merge2.q.out | 8 +- .../clientpositive/spark/orc_merge_diff_fs.q.out | 24 +-- 25 files changed, 568 insertions(+), 214 deletions(-) diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index a120b45..2f695d4 100644 --- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -601,6 +601,10 @@ public class HiveConf extends Configuration { "Maximum number of dynamic partitions allowed to be created in total."), DYNAMICPARTITIONMAXPARTSPERNODE("hive.exec.max.dynamic.partitions.pernode", 100, "Maximum number of dynamic partitions allowed to be created in each mapper/reducer node."), + DYNAMICPARTITIONCONVERT("hive.exec.dynamic.partition.type.conversion", true, + "Whether to check and cast a dynamic partition column before creating the partition " + + "directory. For example, if partition p is type int and we insert string '001', then if " + + "this value is true, directory p=1 will be created; if false, p=001"), MAXCREATEDFILES("hive.exec.max.created.files", 100000L, "Maximum number of HDFS files created by all mappers/reducers in a MapReduce job."), DEFAULTPARTITIONNAME("hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__", diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index 608ec04..dc4bf41 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -430,6 +430,7 @@ minillap.query.files=acid_bucket_pruning.q,\ orc_ppd_schema_evol_3a.q,\ global_limit.q,\ dynamic_partition_pruning_2.q,\ + dynpart_cast.q,\ results_cache_diff_fs.q,\ tez_union_dynamic_partition.q,\ tez_union_dynamic_partition_2.q,\ diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index c2514ee..33d3beb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hive.ql.parse; import static java.util.Objects.nonNull; +import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.DYNAMICPARTITIONCONVERT; import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS; import java.io.FileNotFoundException; @@ -7406,7 +7407,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { input = genConstraintsPlan(dest, qb, input); if (!qb.getIsQuery()) { - input = genConversionSelectOperator(dest, qb, input, tableDescriptor, dpCtx); + input = genConversionSelectOperator(dest, qb, input, tableDescriptor, dpCtx, parts); } if (destinationTable.isMaterializedView() && @@ -7535,7 +7536,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { input = genConstraintsPlan(dest, qb, input); if (!qb.getIsQuery()) { - input = genConversionSelectOperator(dest, qb, input, tableDescriptor, dpCtx); + input = genConversionSelectOperator(dest, qb, input, tableDescriptor, dpCtx, null); } if (destinationTable.isMaterializedView() && @@ -8401,7 +8402,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { * types that are expected by the table_desc. */ private Operator genConversionSelectOperator(String dest, QB qb, Operator input, - TableDesc table_desc, DynamicPartitionCtx dpCtx) throws SemanticException { + TableDesc table_desc, DynamicPartitionCtx dpCtx, List<FieldSchema> parts) + throws SemanticException { StructObjectInspector oi = null; try { Deserializer deserializer = table_desc.getDeserializerClass() @@ -8483,18 +8485,51 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } expressions.add(column); } - } - // deal with dynamic partition columns: convert ExprNodeDesc type to String?? - if (dynPart && dpCtx != null && dpCtx.getNumDPCols() > 0) { - // DP columns starts with tableFields.size() - for (int i = tableFields.size() + (updating(dest) ? 1 : 0); i < rowFields.size(); ++i) { - TypeInfo rowFieldTypeInfo = rowFields.get(i).getType(); - ExprNodeDesc column = new ExprNodeColumnDesc( - rowFieldTypeInfo, rowFields.get(i).getInternalName(), "", true); - expressions.add(column); + // deal with dynamic partition columns + if (dynPart && dpCtx != null && dpCtx.getNumDPCols() > 0) { + // rowFields contains non-partitioned columns (tableFields) followed by DP columns + int rowFieldsOffset = tableFields.size() + (updating(dest) ? 1 : 0); + for (int dpColIdx = 0; dpColIdx < rowFields.size() - rowFieldsOffset; ++dpColIdx) { + + // create ExprNodeDesc + ColumnInfo inputColumn = rowFields.get(dpColIdx + rowFieldsOffset); + TypeInfo inputTypeInfo = inputColumn.getType(); + ExprNodeDesc column = + new ExprNodeColumnDesc(inputTypeInfo, inputColumn.getInternalName(), "", true); + + // Cast input column to destination column type if necessary. + if (conf.getBoolVar(DYNAMICPARTITIONCONVERT)) { + if (parts != null && !parts.isEmpty()) { + String destPartitionName = dpCtx.getDPColNames().get(dpColIdx); + FieldSchema destPartitionFieldSchema = parts.stream() + .filter(dynamicPartition -> dynamicPartition.getName().equals(destPartitionName)) + .findFirst().orElse(null); + if (destPartitionFieldSchema == null) { + throw new IllegalStateException("Partition schema for dynamic partition " + + destPartitionName + " not found in DynamicPartitionCtx."); + } + String partitionType = destPartitionFieldSchema.getType(); + if (partitionType == null) { + throw new IllegalStateException("Couldn't get FieldSchema for partition" + + destPartitionFieldSchema.getName()); + } + PrimitiveTypeInfo partitionTypeInfo = + TypeInfoFactory.getPrimitiveTypeInfo(partitionType); + if (!partitionTypeInfo.equals(inputTypeInfo)) { + column = ExprNodeTypeCheck.getExprNodeDefaultExprProcessor() + .createConversionCast(column, partitionTypeInfo); + converted = true; + } + } else { + LOG.warn("Partition schema for dynamic partition " + inputColumn.getAlias() + " (" + + inputColumn.getInternalName() + ") not found in DynamicPartitionCtx. " + + "This is expected with a CTAS."); + } + } + expressions.add(column); + } } - // converted = true; // [TODO]: should we check & convert type to String and set it to true? } if (converted) { diff --git a/ql/src/test/queries/clientpositive/dynpart_cast.q b/ql/src/test/queries/clientpositive/dynpart_cast.q new file mode 100644 index 0000000..d281525 --- /dev/null +++ b/ql/src/test/queries/clientpositive/dynpart_cast.q @@ -0,0 +1,14 @@ +set hive.stats.autogather=true; + +drop table dynpart_cast; +create table dynpart_cast (i int) PARTITIONED BY (`static_part` int, `dyn_part` int); + +EXPLAIN +INSERT INTO TABLE dynpart_cast PARTITION (static_part=03, dyn_part) +SELECT 1, +'002'; + +-- stats task will fail here if dynamic partition not cast to integer and creates "dyn_part=002" +INSERT INTO TABLE dynpart_cast PARTITION (static_part=03, dyn_part) +SELECT 1, +'002'; diff --git a/ql/src/test/results/clientpositive/autoColumnStats_6.q.out b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out index da3be3e..ff708cb 100644 --- a/ql/src/test/results/clientpositive/autoColumnStats_6.q.out +++ b/ql/src/test/results/clientpositive/autoColumnStats_6.q.out @@ -41,11 +41,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 10) AS STRING) (type: string), CAST( (hash(value) pmod 10) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 231500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string), CAST( _col3 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: key, value, one, two, three Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -92,21 +92,21 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: int), _col3 (type: int) + key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 500 Data size: 231500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 231500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/dynpart_cast.q.out b/ql/src/test/results/clientpositive/dynpart_cast.q.out new file mode 100644 index 0000000..146a08f --- /dev/null +++ b/ql/src/test/results/clientpositive/dynpart_cast.q.out @@ -0,0 +1,165 @@ +PREHOOK: query: drop table dynpart_cast +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table dynpart_cast +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table dynpart_cast (i int) PARTITIONED BY (`static_part` int, `dyn_part` int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dynpart_cast +POSTHOOK: query: create table dynpart_cast (i int) PARTITIONED BY (`static_part` int, `dyn_part` int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dynpart_cast +PREHOOK: query: EXPLAIN +INSERT INTO TABLE dynpart_cast PARTITION (static_part=03, dyn_part) +SELECT 1, +'002' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dynpart_cast@static_part=3 +POSTHOOK: query: EXPLAIN +INSERT INTO TABLE dynpart_cast PARTITION (static_part=03, dyn_part) +SELECT 1, +'002' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int), 2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dynpart_cast + Select Operator + expressions: _col0 (type: int), UDFToInteger('3') (type: int), _col1 (type: int) + outputColumnNames: i, static_part, dyn_part + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(i, 'hll') + keys: static_part (type: int), dyn_part (type: int) + minReductionHashAggr: 0.99 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + + Stage: Stage-0 + Move Operator + tables: + partition: + dyn_part + static_part 3 + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dynpart_cast + + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: i + Column Types: int + Table: default.dynpart_cast + + Stage: Stage-3 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dynpart_cast + + Stage: Stage-5 + Map Reduce + Map Operator Tree: + TableScan + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dynpart_cast + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true +#### A masked pattern was here #### + +PREHOOK: query: INSERT INTO TABLE dynpart_cast PARTITION (static_part=03, dyn_part) +SELECT 1, +'002' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dynpart_cast@static_part=3 +POSTHOOK: query: INSERT INTO TABLE dynpart_cast PARTITION (static_part=03, dyn_part) +SELECT 1, +'002' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dynpart_cast@static_part=3/dyn_part=2 +POSTHOOK: Lineage: dynpart_cast PARTITION(static_part=3,dyn_part=2).i SIMPLE [] diff --git a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out index 43bb789..7811f7c 100644 --- a/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out +++ b/ql/src/test/results/clientpositive/dynpart_sort_optimization_acid2.q.out @@ -46,21 +46,21 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col2 (type: string), _col3 (type: string), _bucket_number (type: string), _col1 (type: string) + key expressions: _col2 (type: string), UDFToInteger(UDFToInteger(_col3)) (type: int), _bucket_number (type: string), _col1 (type: string) null sort order: aaaa sort order: ++++ - Map-reduce partition columns: _col2 (type: string), _col3 (type: string) - Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string), UDFToInteger(UDFToInteger(_col3)) (type: int) + Statistics: Num rows: 2000 Data size: 732000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string), KEY._bucket_number (type: string) + expressions: VALUE._col0 (type: string), KEY._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: int), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _bucket_number File Output Operator compressed: false Dp Sort State: PARTITION_BUCKET_SORTED - Statistics: Num rows: 2000 Data size: 1092000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 732000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out b/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out index f745b46..52fd083 100644 --- a/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out +++ b/ql/src/test/results/clientpositive/infer_bucket_sort_num_buckets.q.out @@ -57,11 +57,11 @@ STAGE PLANS: Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), CAST( VALUE._col2 AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 279000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: key, value, hr Statistics: Num rows: 1000 Data size: 373000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -128,21 +128,21 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 1000 Data size: 279000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 1000 Data size: 99000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 279000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/dynpart_cast.q.out b/ql/src/test/results/clientpositive/llap/dynpart_cast.q.out new file mode 100644 index 0000000..f571961 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/dynpart_cast.q.out @@ -0,0 +1,135 @@ +PREHOOK: query: drop table dynpart_cast +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table dynpart_cast +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table dynpart_cast (i int) PARTITIONED BY (`static_part` int, `dyn_part` int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@dynpart_cast +POSTHOOK: query: create table dynpart_cast (i int) PARTITIONED BY (`static_part` int, `dyn_part` int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@dynpart_cast +PREHOOK: query: EXPLAIN +INSERT INTO TABLE dynpart_cast PARTITION (static_part=03, dyn_part) +SELECT 1, +'002' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dynpart_cast@static_part=3 +POSTHOOK: query: EXPLAIN +INSERT INTO TABLE dynpart_cast PARTITION (static_part=03, dyn_part) +SELECT 1, +'002' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + Stage-3 depends on stages: Stage-0 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: _dummy_table + Row Limit Per Split: 1 + Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int), 2 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dynpart_cast + Select Operator + expressions: _col0 (type: int), UDFToInteger('3') (type: int), _col1 (type: int) + outputColumnNames: i, static_part, dyn_part + Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: compute_stats(i, 'hll') + keys: static_part (type: int), dyn_part (type: int) + minReductionHashAggr: 0.0 + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Statistics: Num rows: 1 Data size: 432 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0) + keys: KEY._col0 (type: int), KEY._col1 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,numdistinctvalues:bigint,ndvbitvector:binary>), _col0 (type: int), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-2 + Dependency Collection + + Stage: Stage-0 + Move Operator + tables: + partition: + dyn_part + static_part 3 + replace: false + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: default.dynpart_cast + + Stage: Stage-3 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: i + Column Types: int + Table: default.dynpart_cast + +PREHOOK: query: INSERT INTO TABLE dynpart_cast PARTITION (static_part=03, dyn_part) +SELECT 1, +'002' +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@dynpart_cast@static_part=3 +POSTHOOK: query: INSERT INTO TABLE dynpart_cast PARTITION (static_part=03, dyn_part) +SELECT 1, +'002' +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@dynpart_cast@static_part=3/dyn_part=2 +POSTHOOK: Lineage: dynpart_cast PARTITION(static_part=3,dyn_part=2).i SIMPLE [] diff --git a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_bucketing.q.out b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_bucketing.q.out index 453d245..39c7ae6 100644 --- a/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_bucketing.q.out +++ b/ql/src/test/results/clientpositive/llap/dynpart_sort_opt_bucketing.q.out @@ -604,14 +604,14 @@ STAGE PLANS: alias: t1_staging Statistics: Num rows: 51 Data size: 23008 Basic stats: PARTIAL Column stats: PARTIAL Select Operator - expressions: a (type: string), CAST( b AS decimal(6,0)) (type: decimal(6,0)), c (type: int), d (type: string), e (type: decimal(18,0)) + expressions: a (type: string), CAST( b AS decimal(6,0)) (type: decimal(6,0)), c (type: int), d (type: string), CAST( e AS decimal(3,0)) (type: decimal(3,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4 Statistics: Num rows: 51 Data size: 23008 Basic stats: PARTIAL Column stats: PARTIAL Reduce Output Operator - key expressions: _col4 (type: decimal(18,0)), _bucket_number (type: string), _col0 (type: string), _col1 (type: decimal(6,0)) + key expressions: _col4 (type: decimal(3,0)), _bucket_number (type: string), _col0 (type: string), _col1 (type: decimal(6,0)) null sort order: aaaa sort order: ++++ - Map-reduce partition columns: _col4 (type: decimal(18,0)) + Map-reduce partition columns: _col4 (type: decimal(3,0)) Statistics: Num rows: 51 Data size: 23008 Basic stats: PARTIAL Column stats: PARTIAL value expressions: _col2 (type: int), _col3 (type: string) Execution mode: llap @@ -620,7 +620,7 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Select Operator - expressions: KEY._col0 (type: string), KEY._col1 (type: decimal(6,0)), VALUE._col2 (type: int), VALUE._col3 (type: string), KEY._col4 (type: decimal(18,0)), KEY._bucket_number (type: string) + expressions: KEY._col0 (type: string), KEY._col1 (type: decimal(6,0)), VALUE._col2 (type: int), VALUE._col3 (type: string), KEY._col4 (type: decimal(3,0)), KEY._bucket_number (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _bucket_number File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/llap/orc_merge1.q.out b/ql/src/test/results/clientpositive/llap/orc_merge1.q.out index 9da73e6..44a5d6f 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge1.q.out @@ -73,19 +73,19 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1_n1 Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -211,19 +211,19 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b_n1 Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -389,19 +389,19 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c_n1 Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator diff --git a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out index a6ea334..3bf3710 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge10.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge10.q.out @@ -74,11 +74,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -96,11 +96,11 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -128,12 +128,12 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -222,11 +222,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -244,11 +244,11 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -276,12 +276,12 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -415,11 +415,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -437,11 +437,11 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -469,12 +469,12 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_merge2.q.out b/ql/src/test/results/clientpositive/llap/orc_merge2.q.out index 9b0d3b4..19ca90c 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge2.q.out @@ -48,11 +48,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 10) AS STRING) (type: string), CAST( (hash(value) pmod 10) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 231500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string), CAST( _col3 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: key, value, one, two, three Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -70,11 +70,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) Reduce Output Operator - key expressions: _col2 (type: int), _col3 (type: int) + key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 500 Data size: 231500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -102,12 +102,12 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 231500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out index d35f44b..3215f09 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_diff_fs.q.out @@ -74,11 +74,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -96,11 +96,11 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -128,12 +128,12 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -222,11 +222,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -244,11 +244,11 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -276,12 +276,12 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -415,11 +415,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -437,11 +437,11 @@ STAGE PLANS: Statistics: Num rows: 250 Data size: 283250 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col3 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -469,12 +469,12 @@ STAGE PLANS: Execution mode: llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out b/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out index fcff20a..59f4acd 100644 --- a/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out +++ b/ql/src/test/results/clientpositive/llap/rcfile_merge2.q.out @@ -48,11 +48,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 10) AS STRING) (type: string), CAST( (hash(value) pmod 10) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 231500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string), CAST( _col3 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: key, value, one, two, three Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -70,11 +70,11 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 658500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: struct<columntype:string,min:bigint,max:bigint,countnulls:bigint,bitvector:binary>), _col4 (type: struct<columntype:string,maxlength:bigint,sumlength:bigint,count:bigint,countnulls:bigint,bitvector:binary>) Reduce Output Operator - key expressions: _col2 (type: int), _col3 (type: int) + key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 500 Data size: 231500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: llap LLAP IO: no inputs @@ -102,12 +102,12 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 231500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_dml.q.out b/ql/src/test/results/clientpositive/llap/tez_dml.q.out index 4ad78d8..d716b63 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dml.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dml.q.out @@ -489,19 +489,19 @@ STAGE PLANS: alias: tmp_src Statistics: Num rows: 309 Data size: 30591 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: value (type: string), cnt (type: bigint) + expressions: value (type: string), UDFToInteger(cnt) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 309 Data size: 30591 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 309 Data size: 30591 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.tmp_src_part Select Operator - expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) + expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: c, d Statistics: Num rows: 309 Data size: 29355 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator diff --git a/ql/src/test/results/clientpositive/orc_merge1.q.out b/ql/src/test/results/clientpositive/orc_merge1.q.out index 9c07816..8a4aade 100644 --- a/ql/src/test/results/clientpositive/orc_merge1.q.out +++ b/ql/src/test/results/clientpositive/orc_merge1.q.out @@ -66,19 +66,19 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1_n1 Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -185,19 +185,19 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1b_n1 Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -343,19 +343,19 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orcfile_merge1c_n1 Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator diff --git a/ql/src/test/results/clientpositive/orc_merge10.q.out b/ql/src/test/results/clientpositive/orc_merge10.q.out index 4a5f03c..0b9b664 100644 --- a/ql/src/test/results/clientpositive/orc_merge10.q.out +++ b/ql/src/test/results/clientpositive/orc_merge10.q.out @@ -67,11 +67,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -118,20 +118,20 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -210,11 +210,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -261,20 +261,20 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -392,11 +392,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -443,20 +443,20 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/orc_merge2.q.out b/ql/src/test/results/clientpositive/orc_merge2.q.out index d132d62..2997a4f 100644 --- a/ql/src/test/results/clientpositive/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/orc_merge2.q.out @@ -41,11 +41,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 10) AS STRING) (type: string), CAST( (hash(value) pmod 10) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 231500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string), CAST( _col3 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string), _col3 (type: string) outputColumnNames: key, value, one, two, three Statistics: Num rows: 500 Data size: 274000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -92,20 +92,20 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: int), _col3 (type: int) + key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col3 (type: int) - Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) + Statistics: Num rows: 500 Data size: 231500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 51500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 231500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out index 7f9a04b..3806c36 100644 --- a/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out +++ b/ql/src/test/results/clientpositive/orc_merge_diff_fs.q.out @@ -67,11 +67,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -118,20 +118,20 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -210,11 +210,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -261,20 +261,20 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -392,11 +392,11 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), CAST( _col2 AS STRING) (type: string) + expressions: _col0 (type: int), _col1 (type: string), '1' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, part Statistics: Num rows: 500 Data size: 182000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator @@ -443,20 +443,20 @@ STAGE PLANS: Map Operator Tree: TableScan Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Map-reduce partition columns: _col2 (type: string) + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 500 Data size: 49500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 139500 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat diff --git a/ql/src/test/results/clientpositive/smb_join_partition_key.q.out b/ql/src/test/results/clientpositive/smb_join_partition_key.q.out index c18d01d..34246d1 100644 --- a/ql/src/test/results/clientpositive/smb_join_partition_key.q.out +++ b/ql/src/test/results/clientpositive/smb_join_partition_key.q.out @@ -109,20 +109,20 @@ PREHOOK: Output: default@smb_table_part POSTHOOK: query: INSERT OVERWRITE TABLE smb_table_part PARTITION (p1) SELECT key, value, 100 as p1 FROM data_table POSTHOOK: type: QUERY POSTHOOK: Input: default@data_table -POSTHOOK: Output: default@smb_table_part@p1=100 -POSTHOOK: Lineage: smb_table_part PARTITION(p1=100).key SIMPLE [(data_table)data_table.FieldSchema(name:key, type:int, comment:null), ] -POSTHOOK: Lineage: smb_table_part PARTITION(p1=100).value SIMPLE [(data_table)data_table.FieldSchema(name:value, type:string, comment:null), ] +POSTHOOK: Output: default@smb_table_part@p1=100.0 +POSTHOOK: Lineage: smb_table_part PARTITION(p1=100.0).key SIMPLE [(data_table)data_table.FieldSchema(name:key, type:int, comment:null), ] +POSTHOOK: Lineage: smb_table_part PARTITION(p1=100.0).value SIMPLE [(data_table)data_table.FieldSchema(name:value, type:string, comment:null), ] PREHOOK: query: SELECT s1.key, s2.p1 FROM smb_table s1 INNER JOIN smb_table_part s2 ON s1.key = s2.key ORDER BY s1.key PREHOOK: type: QUERY PREHOOK: Input: default@smb_table PREHOOK: Input: default@smb_table_part -PREHOOK: Input: default@smb_table_part@p1=100 +PREHOOK: Input: default@smb_table_part@p1=100.0 #### A masked pattern was here #### POSTHOOK: query: SELECT s1.key, s2.p1 FROM smb_table s1 INNER JOIN smb_table_part s2 ON s1.key = s2.key ORDER BY s1.key POSTHOOK: type: QUERY POSTHOOK: Input: default@smb_table POSTHOOK: Input: default@smb_table_part -POSTHOOK: Input: default@smb_table_part@p1=100 +POSTHOOK: Input: default@smb_table_part@p1=100.0 #### A masked pattern was here #### 1 100.0 2 100.0 diff --git a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out index 56d5ed9..9efcf98 100644 --- a/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out +++ b/ql/src/test/results/clientpositive/spark/infer_bucket_sort_num_buckets.q.out @@ -63,21 +63,21 @@ STAGE PLANS: Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), VALUE._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), CAST( VALUE._col2 AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string) Reducer 3 Execution mode: vectorized Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/spark/orc_merge1.q.out b/ql/src/test/results/clientpositive/spark/orc_merge1.q.out index 977c4cb..5c95429 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge1.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge1.q.out @@ -69,7 +69,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -152,7 +152,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -279,7 +279,7 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/spark/orc_merge2.q.out b/ql/src/test/results/clientpositive/spark/orc_merge2.q.out index 4647b86..089be29 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge2.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge2.q.out @@ -50,20 +50,20 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 10) (type: int), (hash(value) pmod 10) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 10) AS STRING) (type: string), CAST( (hash(value) pmod 10) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int), _col3 (type: int) + key expressions: _col2 (type: string), _col3 (type: string) null sort order: aa sort order: ++ - Map-reduce partition columns: _col2 (type: int), _col3 (type: int) + Map-reduce partition columns: _col2 (type: string), _col3 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int), KEY._col3 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string), KEY._col3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 File Output Operator compressed: false diff --git a/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out b/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out index b7d3dd7..d58642d 100644 --- a/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out +++ b/ql/src/test/results/clientpositive/spark/orc_merge_diff_fs.q.out @@ -71,20 +71,20 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false @@ -168,20 +168,20 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false @@ -310,20 +310,20 @@ STAGE PLANS: alias: src Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: UDFToInteger(key) (type: int), value (type: string), (hash(key) pmod 2) (type: int) + expressions: UDFToInteger(key) (type: int), value (type: string), CAST( (hash(key) pmod 2) AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col2 (type: int) + key expressions: _col2 (type: string) null sort order: a sort order: + - Map-reduce partition columns: _col2 (type: int) + Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string) Reducer 2 Reduce Operator Tree: Select Operator - expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: int) + expressions: VALUE._col0 (type: int), VALUE._col1 (type: string), KEY._col2 (type: string) outputColumnNames: _col0, _col1, _col2 File Output Operator compressed: false