hive git commit: HIVE-14076: Vectorization is not supported for datatype:VOID error while inserting data into specific columns (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

jcamacho Thu, 23 Jun 2016 09:50:28 -0700

Repository: hive
Updated Branches:
  refs/heads/master 5cd6bb954 -> 5b82e5e9f



HIVE-14076: Vectorization is not supported for datatype:VOID error while 
inserting data into specific columns (Jesus Camacho Rodriguez, reviewed by 
Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/5b82e5e9
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/5b82e5e9
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/5b82e5e9

Branch: refs/heads/master
Commit: 5b82e5e9f2982f64e78efe69bea7bb28128bdb8a
Parents: 5cd6bb9
Author: Jesus Camacho Rodriguez <jcama...@apache.org>
Authored: Thu Jun 23 08:15:41 2016 -0700
Committer: Jesus Camacho Rodriguez <jcama...@apache.org>
Committed: Thu Jun 23 09:48:57 2016 -0700

----------------------------------------------------------------------
 .../test/resources/testconfiguration.properties |  3 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  | 12 ++---
 .../acid_vectorization_missing_cols.q           | 21 ++++++++
 .../results/clientpositive/cbo_rp_insert.q.out  |  2 +-
 .../insert_into_with_schema.q.out               | 18 +++----
 .../insert_into_with_schema2.q.out              |  4 +-
 .../tez/acid_vectorization_missing_cols.q.out   | 56 ++++++++++++++++++++
 7 files changed, 97 insertions(+), 19 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/5b82e5e9/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index e2de63e..8ef978e 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -412,7 +412,8 @@ minitez.query.files.shared=acid_globallimit.q,\
   union_type_chk.q
 
 
-minitez.query.files=bucket_map_join_tez1.q,\
+minitez.query.files=acid_vectorization_missing_cols.q,\
+  bucket_map_join_tez1.q,\
   smb_cache.q,\
   bucket_map_join_tez2.q,\
   constprog_dpp.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/5b82e5e9/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 774cc2b..53f3b05 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -4282,12 +4282,13 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
         "No table/partition found in QB metadata for dest='" + dest + "'"));
     }
     ArrayList<ExprNodeDesc> new_col_list = new ArrayList<ExprNodeDesc>();
-    ArrayList<ColumnInfo> newSchema = new ArrayList<ColumnInfo>();
     colListPos = 0;
     List<FieldSchema> targetTableCols = target != null ? target.getCols() : 
partition.getCols();
     List<String> targetTableColNames = new ArrayList<String>();
+    List<TypeInfo> targetTableColTypes = new ArrayList<TypeInfo>();
     for(FieldSchema fs : targetTableCols) {
       targetTableColNames.add(fs.getName());
+      
targetTableColTypes.add(TypeInfoUtils.getTypeInfoFromTypeString(fs.getType()));
     }
     Map<String, String> partSpec = qb.getMetaData().getPartSpecForAlias(dest);
     if(partSpec != null) {
@@ -4296,13 +4297,15 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       for(Map.Entry<String, String> partKeyVal : partSpec.entrySet()) {
         if (partKeyVal.getValue() == null) {
           targetTableColNames.add(partKeyVal.getKey());//these must be after 
non-partition cols
+          targetTableColTypes.add(TypeInfoFactory.stringTypeInfo);
         }
       }
     }
     RowResolver newOutputRR = new RowResolver();
     //now make the select produce <regular columns>,<dynamic partition 
columns> with
     //where missing columns are NULL-filled
-    for(String f : targetTableColNames) {
+    for (int i = 0; i < targetTableColNames.size(); i++) {
+      String f = targetTableColNames.get(i);
       if(targetCol2Projection.containsKey(f)) {
         //put existing column in new list to make sure it is in the right 
position
         new_col_list.add(targetCol2Projection.get(f));
@@ -4312,10 +4315,7 @@ public class SemanticAnalyzer extends 
BaseSemanticAnalyzer {
       }
       else {
         //add new 'synthetic' columns for projections not provided by Select
-        TypeCheckCtx tcCtx = new TypeCheckCtx(inputRR);
-        CommonToken t = new CommonToken(HiveParser.TOK_NULL);
-        t.setText("TOK_NULL");
-        ExprNodeDesc exp = genExprNodeDesc(new ASTNode(t), inputRR, tcCtx);
+        ExprNodeDesc exp = new 
ExprNodeConstantDesc(targetTableColTypes.get(i), null);
         new_col_list.add(exp);
         final String tableAlias = null;//this column doesn't come from any 
table
         ColumnInfo colInfo = new ColumnInfo(getColumnInternalName(colListPos),

http://git-wip-us.apache.org/repos/asf/hive/blob/5b82e5e9/ql/src/test/queries/clientpositive/acid_vectorization_missing_cols.q
----------------------------------------------------------------------
diff --git 
a/ql/src/test/queries/clientpositive/acid_vectorization_missing_cols.q 
b/ql/src/test/queries/clientpositive/acid_vectorization_missing_cols.q
new file mode 100644
index 0000000..85e0bb1
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/acid_vectorization_missing_cols.q
@@ -0,0 +1,21 @@
+set hive.vectorized.execution.enabled=true;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+
+drop table if exists newtable;
+create table newtable(
+            a string,
+            b int,
+            c double)
+row format delimited
+fields terminated by '\t'
+stored as textfile;
+
+drop table if exists newtable_acid;
+create table newtable_acid (b int, a varchar(50),c decimal(3,2), d int)
+clustered by (b) into 2 buckets
+stored as orc
+tblproperties ('transactional'='true');
+
+insert into newtable_acid(a,b,c)
+select * from newtable;

http://git-wip-us.apache.org/repos/asf/hive/blob/5b82e5e9/ql/src/test/results/clientpositive/cbo_rp_insert.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/cbo_rp_insert.q.out 
b/ql/src/test/results/clientpositive/cbo_rp_insert.q.out
index 6428a4b..b5fcded 100644
--- a/ql/src/test/results/clientpositive/cbo_rp_insert.q.out
+++ b/ql/src/test/results/clientpositive/cbo_rp_insert.q.out
@@ -60,7 +60,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: x314@source
 POSTHOOK: Output: x314@target1
 POSTHOOK: Lineage: target1.x SIMPLE [(source)source.FieldSchema(name:s2, 
type:int, comment:null), ]
-POSTHOOK: Lineage: target1.y EXPRESSION []
+POSTHOOK: Lineage: target1.y SIMPLE []
 POSTHOOK: Lineage: target1.z SIMPLE [(source)source.FieldSchema(name:s1, 
type:int, comment:null), ]
 PREHOOK: query: -- expect target1 to contain 1 row (2,NULL,1)
 select * from target1

http://git-wip-us.apache.org/repos/asf/hive/blob/5b82e5e9/ql/src/test/results/clientpositive/insert_into_with_schema.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/insert_into_with_schema.q.out 
b/ql/src/test/results/clientpositive/insert_into_with_schema.q.out
index a44dd91..53e6c99 100644
--- a/ql/src/test/results/clientpositive/insert_into_with_schema.q.out
+++ b/ql/src/test/results/clientpositive/insert_into_with_schema.q.out
@@ -72,7 +72,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: x314@source
 POSTHOOK: Output: x314@target1
 POSTHOOK: Lineage: target1.x SIMPLE [(source)source.FieldSchema(name:s2, 
type:int, comment:null), ]
-POSTHOOK: Lineage: target1.y EXPRESSION []
+POSTHOOK: Lineage: target1.y SIMPLE []
 POSTHOOK: Lineage: target1.z SIMPLE [(source)source.FieldSchema(name:s1, 
type:int, comment:null), ]
 PREHOOK: query: -- expect target1 to contain 1 row (2,NULL,1)
 select * from target1
@@ -99,9 +99,9 @@ POSTHOOK: Output: x314@target1
 POSTHOOK: Output: x314@target2
 POSTHOOK: Lineage: target1.x SIMPLE [(source)source.FieldSchema(name:s1, 
type:int, comment:null), ]
 POSTHOOK: Lineage: target1.y SIMPLE [(source)source.FieldSchema(name:s2, 
type:int, comment:null), ]
-POSTHOOK: Lineage: target1.z EXPRESSION []
+POSTHOOK: Lineage: target1.z SIMPLE []
 POSTHOOK: Lineage: target2.x SIMPLE [(source)source.FieldSchema(name:s2, 
type:int, comment:null), ]
-POSTHOOK: Lineage: target2.y EXPRESSION []
+POSTHOOK: Lineage: target2.y SIMPLE []
 POSTHOOK: Lineage: target2.z SIMPLE [(source)source.FieldSchema(name:s1, 
type:int, comment:null), ]
 PREHOOK: query: --expect target1 to have 2rows (2,NULL,1), (1,2,NULL)
 select * from target1 order by x,y,z
@@ -192,7 +192,7 @@ POSTHOOK: Input: x314@source
 POSTHOOK: Input: x314@source2
 POSTHOOK: Output: x314@target1
 POSTHOOK: Lineage: target1.x SIMPLE [(source)source.FieldSchema(name:s1, 
type:int, comment:null), ]
-POSTHOOK: Lineage: target1.y EXPRESSION []
+POSTHOOK: Lineage: target1.y SIMPLE []
 POSTHOOK: Lineage: target1.z SIMPLE [(source2)source2.FieldSchema(name:s2, 
type:int, comment:null), ]
 PREHOOK: query: --expect target1 to have 1 row (1,NULL,NULL)
 select * from target1
@@ -224,7 +224,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: x314@values__tmp__table__2
 POSTHOOK: Output: x314@pageviews@datestamp=2014-09-23/i=1
 POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=1).link SIMPLE 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col2, 
type:string, comment:), ]
-POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=1).source 
EXPRESSION []
+POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=1).source SIMPLE 
[]
 POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=1).userid 
EXPRESSION 
[(values__tmp__table__2)values__tmp__table__2.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 PREHOOK: query: -- expect 1 row: ('jsmith', 'mail.com', NULL) in partition 
'2014-09-23'/'1'
 select * from pageviews
@@ -256,7 +256,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: x314@values__tmp__table__3
 POSTHOOK: Output: x314@pageviews@datestamp=2014-09-23/i=7
 POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=7).link SIMPLE 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
-POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=7).source 
EXPRESSION []
+POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=7).source SIMPLE 
[]
 POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=7).userid 
EXPRESSION 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 PREHOOK: query: INSERT INTO TABLE pageviews PARTITION 
(datestamp,i)(userid,i,link,datestamp) VALUES ('jsmith', 17, '17mail.com', 
'2014-09-23')
 PREHOOK: type: QUERY
@@ -267,7 +267,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: x314@values__tmp__table__4
 POSTHOOK: Output: x314@pageviews@datestamp=2014-09-23/i=17
 POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=17).link SIMPLE 
[(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
-POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=17).source 
EXPRESSION []
+POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=17).source 
SIMPLE []
 POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-23,i=17).userid 
EXPRESSION 
[(values__tmp__table__4)values__tmp__table__4.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 PREHOOK: query: INSERT INTO TABLE pageviews PARTITION 
(datestamp,i)(userid,i,link,datestamp) VALUES ('jsmith', 19, '19mail.com', 
'2014-09-24')
 PREHOOK: type: QUERY
@@ -278,7 +278,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: x314@values__tmp__table__5
 POSTHOOK: Output: x314@pageviews@datestamp=2014-09-24/i=19
 POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-24,i=19).link SIMPLE 
[(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
-POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-24,i=19).source 
EXPRESSION []
+POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-24,i=19).source 
SIMPLE []
 POSTHOOK: Lineage: pageviews PARTITION(datestamp=2014-09-24,i=19).userid 
EXPRESSION 
[(values__tmp__table__5)values__tmp__table__5.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 PREHOOK: query: -- here the 'datestamp' partition column is not provided and 
will be NULL-filled
 INSERT INTO TABLE pageviews PARTITION (datestamp,i)(userid,i,link) VALUES 
('jsmith', 23, '23mail.com')
@@ -291,7 +291,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: x314@values__tmp__table__6
 POSTHOOK: Output: x314@pageviews@datestamp=__HIVE_DEFAULT_PARTITION__/i=23
 POSTHOOK: Lineage: pageviews 
PARTITION(datestamp=__HIVE_DEFAULT_PARTITION__,i=23).link SIMPLE 
[(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col3, 
type:string, comment:), ]
-POSTHOOK: Lineage: pageviews 
PARTITION(datestamp=__HIVE_DEFAULT_PARTITION__,i=23).source EXPRESSION []
+POSTHOOK: Lineage: pageviews 
PARTITION(datestamp=__HIVE_DEFAULT_PARTITION__,i=23).source SIMPLE []
 POSTHOOK: Lineage: pageviews 
PARTITION(datestamp=__HIVE_DEFAULT_PARTITION__,i=23).userid EXPRESSION 
[(values__tmp__table__6)values__tmp__table__6.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
 PREHOOK: query: -- expect 5 rows:
 -- expect ('jsmith', 'mail.com', NULL) in partition '2014-09-23'/'1'

http://git-wip-us.apache.org/repos/asf/hive/blob/5b82e5e9/ql/src/test/results/clientpositive/insert_into_with_schema2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/insert_into_with_schema2.q.out 
b/ql/src/test/results/clientpositive/insert_into_with_schema2.q.out
index 5d44d27..fa48963 100644
--- a/ql/src/test/results/clientpositive/insert_into_with_schema2.q.out
+++ b/ql/src/test/results/clientpositive/insert_into_with_schema2.q.out
@@ -42,7 +42,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@studenttab10k
 POSTHOOK: Output: default@student_acid
 POSTHOOK: Lineage: student_acid.age SIMPLE 
[(studenttab10k)studenttab10k.FieldSchema(name:age2, type:int, comment:null), ]
-POSTHOOK: Lineage: student_acid.grade EXPRESSION []
+POSTHOOK: Lineage: student_acid.grade SIMPLE []
 PREHOOK: query: select * from student_acid
 PREHOOK: type: QUERY
 PREHOOK: Input: default@student_acid
@@ -91,7 +91,7 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@values__tmp__table__3
 POSTHOOK: Output: default@student_acid
 POSTHOOK: Lineage: student_acid.age EXPRESSION 
[(values__tmp__table__3)values__tmp__table__3.FieldSchema(name:tmp_values_col1, 
type:string, comment:), ]
-POSTHOOK: Lineage: student_acid.grade EXPRESSION []
+POSTHOOK: Lineage: student_acid.grade SIMPLE []
 PREHOOK: query: select * from student_acid
 PREHOOK: type: QUERY
 PREHOOK: Input: default@student_acid

http://git-wip-us.apache.org/repos/asf/hive/blob/5b82e5e9/ql/src/test/results/clientpositive/tez/acid_vectorization_missing_cols.q.out
----------------------------------------------------------------------
diff --git 
a/ql/src/test/results/clientpositive/tez/acid_vectorization_missing_cols.q.out 
b/ql/src/test/results/clientpositive/tez/acid_vectorization_missing_cols.q.out
new file mode 100644
index 0000000..6acc6e3
--- /dev/null
+++ 
b/ql/src/test/results/clientpositive/tez/acid_vectorization_missing_cols.q.out
@@ -0,0 +1,56 @@
+PREHOOK: query: drop table if exists newtable
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists newtable
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table newtable(
+            a string,
+            b int,
+            c double)
+row format delimited
+fields terminated by '\t'
+stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@newtable
+POSTHOOK: query: create table newtable(
+            a string,
+            b int,
+            c double)
+row format delimited
+fields terminated by '\t'
+stored as textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@newtable
+PREHOOK: query: drop table if exists newtable_acid
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists newtable_acid
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table newtable_acid (b int, a varchar(50),c 
decimal(3,2), d int)
+clustered by (b) into 2 buckets
+stored as orc
+tblproperties ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@newtable_acid
+POSTHOOK: query: create table newtable_acid (b int, a varchar(50),c 
decimal(3,2), d int)
+clustered by (b) into 2 buckets
+stored as orc
+tblproperties ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@newtable_acid
+PREHOOK: query: insert into newtable_acid(a,b,c)
+select * from newtable
+PREHOOK: type: QUERY
+PREHOOK: Input: default@newtable
+PREHOOK: Output: default@newtable_acid
+POSTHOOK: query: insert into newtable_acid(a,b,c)
+select * from newtable
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@newtable
+POSTHOOK: Output: default@newtable_acid
+POSTHOOK: Lineage: newtable_acid.a EXPRESSION 
[(newtable)newtable.FieldSchema(name:a, type:string, comment:null), ]
+POSTHOOK: Lineage: newtable_acid.b SIMPLE 
[(newtable)newtable.FieldSchema(name:b, type:int, comment:null), ]
+POSTHOOK: Lineage: newtable_acid.c EXPRESSION 
[(newtable)newtable.FieldSchema(name:c, type:double, comment:null), ]
+POSTHOOK: Lineage: newtable_acid.d SIMPLE []

hive git commit: HIVE-14076: Vectorization is not supported for datatype:VOID error while inserting data into specific columns (Jesus Camacho Rodriguez, reviewed by Ashutosh Chauhan)

Reply via email to