Repository: hive Updated Branches: refs/heads/master 5cd6bb954 -> 5b82e5e9f
HIVE-14076: Vectorization is not supported for datatype:VOID error while inserting data into specific columns (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5b82e5e9 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5b82e5e9 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5b82e5e9 Branch: refs/heads/master Commit: 5b82e5e9f2982f64e78efe69bea7bb28128bdb8a Parents: 5cd6bb9 Author: Jesus Camacho Rodriguez <jcama...@apache.org> Authored: Thu Jun 23 08:15:41 2016 -0700 Committer: Jesus Camacho Rodriguez <jcama...@apache.org> Committed: Thu Jun 23 09:48:57 2016 -0700 ---------------------------------------------------------------------- .../test/resources/testconfiguration.properties | 3 +- .../hadoop/hive/ql/parse/SemanticAnalyzer.java | 12 ++--- .../acid_vectorization_missing_cols.q | 21 ++++++++ .../results/clientpositive/cbo_rp_insert.q.out | 2 +- .../insert_into_with_schema.q.out | 18 +++---- .../insert_into_with_schema2.q.out | 4 +- .../tez/acid_vectorization_missing_cols.q.out | 56 ++++++++++++++++++++ 7 files changed, 97 insertions(+), 19 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/5b82e5e9/itests/src/test/resources/testconfiguration.properties ---------------------------------------------------------------------- diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties index e2de63e..8ef978e 100644 --- a/itests/src/test/resources/testconfiguration.properties +++ b/itests/src/test/resources/testconfiguration.properties @@ -412,7 +412,8 @@ minitez.query.files.shared=acid_globallimit.q,\ union_type_chk.q -minitez.query.files=bucket_map_join_tez1.q,\ +minitez.query.files=acid_vectorization_missing_cols.q,\ + bucket_map_join_tez1.q,\ smb_cache.q,\ bucket_map_join_tez2.q,\ constprog_dpp.q,\ http://git-wip-us.apache.org/repos/asf/hive/blob/5b82e5e9/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java ---------------------------------------------------------------------- diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index 774cc2b..53f3b05 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -4282,12 +4282,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { "No table/partition found in QB metadata for dest='" + dest + "'")); } ArrayList<ExprNodeDesc> new_col_list = new ArrayList<ExprNodeDesc>(); - ArrayList<ColumnInfo> newSchema = new ArrayList<ColumnInfo>(); colListPos = 0; List<FieldSchema> targetTableCols = target != null ? target.getCols() : partition.getCols(); List<String> targetTableColNames = new ArrayList<String>(); + List<TypeInfo> targetTableColTypes = new ArrayList<TypeInfo>(); for(FieldSchema fs : targetTableCols) { targetTableColNames.add(fs.getName()); + targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType())); } Map<String, String> partSpec = qb.getMetaData().getPartSpecForAlias(dest); if(partSpec != null) { @@ -4296,13 +4297,15 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { for(Map.Entry<String, String> partKeyVal : partSpec.entrySet()) { if (partKeyVal.getValue() == null) { targetTableColNames.add(partKeyVal.getKey());//these must be after non-partition cols + targetTableColTypes.add(TypeInfoFactory.stringTypeInfo); } } } RowResolver newOutputRR = new RowResolver(); //now make the select produce <regular columns>,<dynamic partition columns> with //where missing columns are NULL-filled - for(String f : targetTableColNames) { + for (int i = 0; i < targetTableColNames.size(); i++) { + String f = targetTableColNames.get(i); if(targetCol2Projection.containsKey(f)) { //put existing column in new list to make sure it is in the right position new_col_list.add(targetCol2Projection.get(f)); @@ -4312,10 +4315,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer { } else { //add new 'synthetic' columns for projections not provided by Select - TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR); - CommonToken t = new CommonToken(HiveParser.TOK_NULL); - t.setText("TOK_NULL"); - ExprNodeDesc exp = genExprNodeDesc(new ASTNode(t), inputRR, tcCtx); + ExprNodeDesc exp = new ExprNodeConstantDesc(targetTableColTypes.get(i), null); new_col_list.add(exp); final String tableAlias = null;//this column doesn't come from any table ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(colListPos), http://git-wip-us.apache.org/repos/asf/hive/blob/5b82e5e9/ql/src/test/queries/clientpositive/acid_vectorization_missing_cols.q ---------------------------------------------------------------------- diff --git a/ql/src/test/queries/clientpositive/acid_vectorization_missing_cols.q b/ql/src/test/queries/clientpositive/acid_vectorization_missing_cols.q new file mode 100644 index 0000000..85e0bb1 --- /dev/null +++ b/ql/src/test/queries/clientpositive/acid_vectorization_missing_cols.q @@ -0,0 +1,21 @@ +set hive.vectorized.execution.enabled=true; +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; + +drop table if exists newtable; +create table newtable( + a string, + b int, + c double) +row format delimited +fields terminated by '\t' +stored as textfile; + +drop table if exists newtable_acid; +create table newtable_acid (b int, a varchar(50),c decimal(3,2), d int) +clustered by (b) into 2 buckets +stored as orc +tblproperties ('transactional'='true'); + +insert into newtable_acid(a,b,c) +select * from newtable; http://git-wip-us.apache.org/repos/asf/hive/blob/5b82e5e9/ql/src/test/results/clientpositive/cbo_rp_insert.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/cbo_rp_insert.q.out b/ql/src/test/results/clientpositive/cbo_rp_insert.q.out index 6428a4b..b5fcded 100644 --- a/ql/src/test/results/clientpositive/cbo_rp_insert.q.out +++ b/ql/src/test/results/clientpositive/cbo_rp_insert.q.out @@ -60,7 +60,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: x314@source POSTHOOK: Output: x314@target1 POSTHOOK: Lineage: target1.x SIMPLE [(source)source.FieldSchema(name:s2, type:int, comment:null), ] -POSTHOOK: Lineage: target1.y EXPRESSION [] +POSTHOOK: Lineage: target1.y SIMPLE [] POSTHOOK: Lineage: target1.z SIMPLE [(source)source.FieldSchema(name:s1, type:int, comment:null), ] PREHOOK: query: -- expect target1 to contain 1 row (2,NULL,1) select * from target1 http://git-wip-us.apache.org/repos/asf/hive/blob/5b82e5e9/ql/src/test/results/clientpositive/insert_into_with_schema.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/insert_into_with_schema.q.out b/ql/src/test/results/clientpositive/insert_into_with_schema.q.out index a44dd91..53e6c99 100644 --- a/ql/src/test/results/clientpositive/insert_into_with_schema.q.out +++ b/ql/src/test/results/clientpositive/insert_into_with_schema.q.out @@ -72,7 +72,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: x314@source POSTHOOK: Output: x314@target1 POSTHOOK: Lineage: target1.x SIMPLE [(source)source.FieldSchema(name:s2, type:int, comment:null), ] -POSTHOOK: Lineage: target1.y EXPRESSION [] +POSTHOOK: Lineage: target1.y SIMPLE [] POSTHOOK: Lineage: target1.z SIMPLE [(source)source.FieldSchema(name:s1, type:int, comment:null), ] PREHOOK: query: -- expect target1 to contain 1 row (2,NULL,1) select * from target1 @@ -99,9 +99,9 @@ POSTHOOK: Output: x314@target1 POSTHOOK: Output: x314@target2 POSTHOOK: Lineage: target1.x SIMPLE [(source)source.FieldSchema(name:s1, type:int, comment:null), ] POSTHOOK: Lineage: target1.y SIMPLE [(source)source.FieldSchema(name:s2, type:int, comment:null), ] -POSTHOOK: Lineage: target1.z EXPRESSION [] +POSTHOOK: Lineage: target1.z SIMPLE [] POSTHOOK: Lineage: target2.x SIMPLE [(source)source.FieldSchema(name:s2, type:int, comment:null), ] -POSTHOOK: Lineage: target2.y EXPRESSION [] +POSTHOOK: Lineage: target2.y SIMPLE [] POSTHOOK: Lineage: target2.z SIMPLE [(source)source.FieldSchema(name:s1, type:int, comment:null), ] PREHOOK: query: --expect target1 to have 2rows (2,NULL,1), (1,2,NULL) select * from target1 order by x,y,z @@ -192,7 +192,7 @@ POSTHOOK: Input: x314@source POSTHOOK: Input: x314@source2 POSTHOOK: Output: x314@target1 POSTHOOK: Lineage: target1.x SIMPLE [(source)source.FieldSchema(name:s1, type:int, comment:null), ] -POSTHOOK: Lineage: target1.y EXPRESSION [] +POSTHOOK: Lineage: target1.y SIMPLE [] POSTHOOK: Lineage: target1.z SIMPLE [(source2)source2.FieldSchema(name:s2, type:int, comment:null), ] PREHOOK: query: --expect target1 to have 1 row (1,NULL,NULL) select * from target1 @@ -224,7 +224,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: x314@values__tmp__table__2 POSTHOOK: Output: x314@pageviews@datestamp=2014-09-23/i=1 POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=1).link SIMPLE [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, type:string, comment:), ] -POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=1).source EXPRESSION [] +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=1).source SIMPLE [] POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=1).userid EXPRESSION [(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, type:string, comment:), ] PREHOOK: query: -- expect 1 row: ('jsmith', 'mail.com', NULL) in partition '2014-09-23'/'1' select * from pageviews @@ -256,7 +256,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: x314@values__tmp__table__3 POSTHOOK: Output: x314@pageviews@datestamp=2014-09-23/i=7 POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=7).link SIMPLE [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=7).source EXPRESSION [] +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=7).source SIMPLE [] POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=7).userid EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] PREHOOK: query: INSERT INTO TABLE pageviews PARTITION (datestamp,i)(userid,i,link,datestamp) VALUES ('jsmith', 17, '17mail.com', '2014-09-23') PREHOOK: type: QUERY @@ -267,7 +267,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: x314@values__tmp__table__4 POSTHOOK: Output: x314@pageviews@datestamp=2014-09-23/i=17 POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=17).link SIMPLE [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=17).source EXPRESSION [] +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=17).source SIMPLE [] POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=17).userid EXPRESSION [(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, type:string, comment:), ] PREHOOK: query: INSERT INTO TABLE pageviews PARTITION (datestamp,i)(userid,i,link,datestamp) VALUES ('jsmith', 19, '19mail.com', '2014-09-24') PREHOOK: type: QUERY @@ -278,7 +278,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: x314@values__tmp__table__5 POSTHOOK: Output: x314@pageviews@datestamp=2014-09-24/i=19 POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-24,i=19).link SIMPLE [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-24,i=19).source EXPRESSION [] +POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-24,i=19).source SIMPLE [] POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-24,i=19).userid EXPRESSION [(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, type:string, comment:), ] PREHOOK: query: -- here the 'datestamp' partition column is not provided and will be NULL-filled INSERT INTO TABLE pageviews PARTITION (datestamp,i)(userid,i,link) VALUES ('jsmith', 23, '23mail.com') @@ -291,7 +291,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: x314@values__tmp__table__6 POSTHOOK: Output: x314@pageviews@datestamp=__HIVE_DEFAULT_PARTITION__/i=23 POSTHOOK: Lineage: pageviews PARTITION(datestamp=__HIVE_DEFAULT_PARTITION__,i=23).link SIMPLE [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, type:string, comment:), ] -POSTHOOK: Lineage: pageviews PARTITION(datestamp=__HIVE_DEFAULT_PARTITION__,i=23).source EXPRESSION [] +POSTHOOK: Lineage: pageviews PARTITION(datestamp=__HIVE_DEFAULT_PARTITION__,i=23).source SIMPLE [] POSTHOOK: Lineage: pageviews PARTITION(datestamp=__HIVE_DEFAULT_PARTITION__,i=23).userid EXPRESSION [(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, type:string, comment:), ] PREHOOK: query: -- expect 5 rows: -- expect ('jsmith', 'mail.com', NULL) in partition '2014-09-23'/'1' http://git-wip-us.apache.org/repos/asf/hive/blob/5b82e5e9/ql/src/test/results/clientpositive/insert_into_with_schema2.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/insert_into_with_schema2.q.out b/ql/src/test/results/clientpositive/insert_into_with_schema2.q.out index 5d44d27..fa48963 100644 --- a/ql/src/test/results/clientpositive/insert_into_with_schema2.q.out +++ b/ql/src/test/results/clientpositive/insert_into_with_schema2.q.out @@ -42,7 +42,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@studenttab10k POSTHOOK: Output: default@student_acid POSTHOOK: Lineage: student_acid.age SIMPLE [(studenttab10k)studenttab10k.FieldSchema(name:age2, type:int, comment:null), ] -POSTHOOK: Lineage: student_acid.grade EXPRESSION [] +POSTHOOK: Lineage: student_acid.grade SIMPLE [] PREHOOK: query: select * from student_acid PREHOOK: type: QUERY PREHOOK: Input: default@student_acid @@ -91,7 +91,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@values__tmp__table__3 POSTHOOK: Output: default@student_acid POSTHOOK: Lineage: student_acid.age EXPRESSION [(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, type:string, comment:), ] -POSTHOOK: Lineage: student_acid.grade EXPRESSION [] +POSTHOOK: Lineage: student_acid.grade SIMPLE [] PREHOOK: query: select * from student_acid PREHOOK: type: QUERY PREHOOK: Input: default@student_acid http://git-wip-us.apache.org/repos/asf/hive/blob/5b82e5e9/ql/src/test/results/clientpositive/tez/acid_vectorization_missing_cols.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/acid_vectorization_missing_cols.q.out b/ql/src/test/results/clientpositive/tez/acid_vectorization_missing_cols.q.out new file mode 100644 index 0000000..6acc6e3 --- /dev/null +++ b/ql/src/test/results/clientpositive/tez/acid_vectorization_missing_cols.q.out @@ -0,0 +1,56 @@ +PREHOOK: query: drop table if exists newtable +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists newtable +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table newtable( + a string, + b int, + c double) +row format delimited +fields terminated by '\t' +stored as textfile +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@newtable +POSTHOOK: query: create table newtable( + a string, + b int, + c double) +row format delimited +fields terminated by '\t' +stored as textfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@newtable +PREHOOK: query: drop table if exists newtable_acid +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists newtable_acid +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table newtable_acid (b int, a varchar(50),c decimal(3,2), d int) +clustered by (b) into 2 buckets +stored as orc +tblproperties ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@newtable_acid +POSTHOOK: query: create table newtable_acid (b int, a varchar(50),c decimal(3,2), d int) +clustered by (b) into 2 buckets +stored as orc +tblproperties ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@newtable_acid +PREHOOK: query: insert into newtable_acid(a,b,c) +select * from newtable +PREHOOK: type: QUERY +PREHOOK: Input: default@newtable +PREHOOK: Output: default@newtable_acid +POSTHOOK: query: insert into newtable_acid(a,b,c) +select * from newtable +POSTHOOK: type: QUERY +POSTHOOK: Input: default@newtable +POSTHOOK: Output: default@newtable_acid +POSTHOOK: Lineage: newtable_acid.a EXPRESSION [(newtable)newtable.FieldSchema(name:a, type:string, comment:null), ] +POSTHOOK: Lineage: newtable_acid.b SIMPLE [(newtable)newtable.FieldSchema(name:b, type:int, comment:null), ] +POSTHOOK: Lineage: newtable_acid.c EXPRESSION [(newtable)newtable.FieldSchema(name:c, type:double, comment:null), ] +POSTHOOK: Lineage: newtable_acid.d SIMPLE []