Author: xuefu
Date: Sat Mar 1 16:38:27 2014
New Revision: 1573193
URL: http://svn.apache.org/r1573193
Log:
HIVE-6375: Fix CTAS for parquet (Szehon via Xuefu)
Added:
hive/trunk/ql/src/test/queries/clientpositive/parquet_ctas.q
hive/trunk/ql/src/test/results/clientpositive/parquet_ctas.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hive/trunk/ql/src/test/results/clientpositive/ctas.q.out
hive/trunk/ql/src/test/results/clientpositive/ctas_hadoop20.q.out
hive/trunk/ql/src/test/results/clientpositive/merge3.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1573193&r1=1573192&r2=1573193&view=diff
==============================================================================
---
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
(original)
+++
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
Sat Mar 1 16:38:27 2014
@@ -5703,14 +5703,13 @@ public class SemanticAnalyzer extends Ba
colInfo.setAlias(nm[1]);
}
+ String colName = colInfo.getInternalName(); //default column name
if (field_schemas != null) {
FieldSchema col = new FieldSchema();
- if ("".equals(nm[0]) || nm[1] == null) {
- // ast expression is not a valid column name for table
- col.setName(colInfo.getInternalName());
- } else {
- col.setName(unescapeIdentifier(colInfo.getAlias()).toLowerCase());
// remove ``
+ if (!("".equals(nm[0])) && nm[1] != null) {
+ colName = unescapeIdentifier(colInfo.getAlias()).toLowerCase(); //
remove ``
}
+ col.setName(colName);;
col.setType(colInfo.getType().getTypeName());
field_schemas.add(col);
}
@@ -5721,7 +5720,7 @@ public class SemanticAnalyzer extends Ba
}
first = false;
- cols = cols.concat(colInfo.getInternalName());
+ cols = cols.concat(colName);
// Replace VOID type with string when the output is a temp table or
// local files.
Added: hive/trunk/ql/src/test/queries/clientpositive/parquet_ctas.q
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parquet_ctas.q?rev=1573193&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/parquet_ctas.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/parquet_ctas.q Sat Mar 1
16:38:27 2014
@@ -0,0 +1,24 @@
+drop table staging;
+drop table parquet_ctas;
+drop table parquet_ctas_advanced;
+drop table parquet_ctas_alias;
+drop table parquet_ctas_mixed;
+
+create table staging (key int, value string) stored as textfile;
+insert into table staging select * from src order by key limit 10;
+
+create table parquet_ctas stored as parquet as select * from staging;
+describe parquet_ctas;
+select * from parquet_ctas;
+
+create table parquet_ctas_advanced stored as parquet as select
key+1,concat(value,"value") from staging;
+describe parquet_ctas_advanced;
+select * from parquet_ctas_advanced;
+
+create table parquet_ctas_alias stored as parquet as select key+1 as
mykey,concat(value,"value") as myvalue from staging;
+describe parquet_ctas_alias;
+select * from parquet_ctas_alias;
+
+create table parquet_ctas_mixed stored as parquet as select
key,key+1,concat(value,"value") as myvalue from staging;
+describe parquet_ctas_mixed;
+select * from parquet_ctas_mixed;
\ No newline at end of file
Modified: hive/trunk/ql/src/test/results/clientpositive/ctas.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ctas.q.out?rev=1573193&r1=1573192&r2=1573193&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/ctas.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/ctas.q.out Sat Mar 1
16:38:27 2014
@@ -836,7 +836,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- columns _col0,_col1
+ columns key,value
columns.types string:string
field.delim ,
line.delim
Modified: hive/trunk/ql/src/test/results/clientpositive/ctas_hadoop20.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ctas_hadoop20.q.out?rev=1573193&r1=1573192&r2=1573193&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/ctas_hadoop20.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/ctas_hadoop20.q.out Sat Mar
1 16:38:27 2014
@@ -838,7 +838,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- columns _col0,_col1
+ columns key,value
columns.types string:string
field.delim ,
line.delim
Modified: hive/trunk/ql/src/test/results/clientpositive/merge3.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/merge3.q.out?rev=1573193&r1=1573192&r2=1573193&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/merge3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/merge3.q.out Sat Mar 1
16:38:27 2014
@@ -113,7 +113,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- columns _col0,_col1
+ columns key,value
columns.types string:string
name default.merge_src2
serialization.format 1
@@ -212,7 +212,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- columns _col0,_col1
+ columns key,value
columns.types string:string
name default.merge_src2
serialization.format 1
@@ -231,7 +231,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- columns _col0,_col1
+ columns key,value
columns.types string:string
name default.merge_src2
serialization.format 1
@@ -241,7 +241,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- columns _col0,_col1
+ columns key,value
columns.types string:string
name default.merge_src2
serialization.format 1
@@ -266,7 +266,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- columns _col0,_col1
+ columns key,value
columns.types string:string
name default.merge_src2
serialization.format 1
@@ -285,7 +285,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- columns _col0,_col1
+ columns key,value
columns.types string:string
name default.merge_src2
serialization.format 1
@@ -295,7 +295,7 @@ STAGE PLANS:
input format: org.apache.hadoop.mapred.TextInputFormat
output format:
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
properties:
- columns _col0,_col1
+ columns key,value
columns.types string:string
name default.merge_src2
serialization.format 1
Added: hive/trunk/ql/src/test/results/clientpositive/parquet_ctas.q.out
URL:
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_ctas.q.out?rev=1573193&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_ctas.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_ctas.q.out Sat Mar 1
16:38:27 2014
@@ -0,0 +1,186 @@
+PREHOOK: query: drop table staging
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table staging
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table parquet_ctas
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table parquet_ctas
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table parquet_ctas_advanced
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table parquet_ctas_advanced
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table parquet_ctas_alias
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table parquet_ctas_alias
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table parquet_ctas_mixed
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table parquet_ctas_mixed
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table staging (key int, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: create table staging (key int, value string) stored as
textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@staging
+PREHOOK: query: insert into table staging select * from src order by key limit
10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@staging
+POSTHOOK: query: insert into table staging select * from src order by key
limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@staging
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: create table parquet_ctas stored as parquet as select * from
staging
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@staging
+POSTHOOK: query: create table parquet_ctas stored as parquet as select * from
staging
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@parquet_ctas
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: describe parquet_ctas
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe parquet_ctas
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+key int from deserializer
+value string from deserializer
+PREHOOK: query: select * from parquet_ctas
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_ctas
+#### A masked pattern was here ####
+POSTHOOK: query: select * from parquet_ctas
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_ctas
+#### A masked pattern was here ####
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+0 val_0
+0 val_0
+0 val_0
+10 val_10
+100 val_100
+100 val_100
+103 val_103
+103 val_103
+104 val_104
+104 val_104
+PREHOOK: query: create table parquet_ctas_advanced stored as parquet as select
key+1,concat(value,"value") from staging
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@staging
+POSTHOOK: query: create table parquet_ctas_advanced stored as parquet as
select key+1,concat(value,"value") from staging
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@parquet_ctas_advanced
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: describe parquet_ctas_advanced
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe parquet_ctas_advanced
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+_c0 int from deserializer
+_c1 string from deserializer
+PREHOOK: query: select * from parquet_ctas_advanced
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_ctas_advanced
+#### A masked pattern was here ####
+POSTHOOK: query: select * from parquet_ctas_advanced
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_ctas_advanced
+#### A masked pattern was here ####
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+1 val_0value
+1 val_0value
+1 val_0value
+11 val_10value
+101 val_100value
+101 val_100value
+104 val_103value
+104 val_103value
+105 val_104value
+105 val_104value
+PREHOOK: query: create table parquet_ctas_alias stored as parquet as select
key+1 as mykey,concat(value,"value") as myvalue from staging
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@staging
+POSTHOOK: query: create table parquet_ctas_alias stored as parquet as select
key+1 as mykey,concat(value,"value") as myvalue from staging
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@parquet_ctas_alias
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: describe parquet_ctas_alias
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe parquet_ctas_alias
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+mykey int from deserializer
+myvalue string from deserializer
+PREHOOK: query: select * from parquet_ctas_alias
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_ctas_alias
+#### A masked pattern was here ####
+POSTHOOK: query: select * from parquet_ctas_alias
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_ctas_alias
+#### A masked pattern was here ####
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+1 val_0value
+1 val_0value
+1 val_0value
+11 val_10value
+101 val_100value
+101 val_100value
+104 val_103value
+104 val_103value
+105 val_104value
+105 val_104value
+PREHOOK: query: create table parquet_ctas_mixed stored as parquet as select
key,key+1,concat(value,"value") as myvalue from staging
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@staging
+POSTHOOK: query: create table parquet_ctas_mixed stored as parquet as select
key,key+1,concat(value,"value") as myvalue from staging
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@parquet_ctas_mixed
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+PREHOOK: query: describe parquet_ctas_mixed
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe parquet_ctas_mixed
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+key int from deserializer
+_c1 int from deserializer
+myvalue string from deserializer
+PREHOOK: query: select * from parquet_ctas_mixed
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_ctas_mixed
+#### A masked pattern was here ####
+POSTHOOK: query: select * from parquet_ctas_mixed
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_ctas_mixed
+#### A masked pattern was here ####
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key,
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value,
type:string, comment:default), ]
+0 1 val_0value
+0 1 val_0value
+0 1 val_0value
+10 11 val_10value
+100 101 val_100value
+100 101 val_100value
+103 104 val_103value
+103 104 val_103value
+104 105 val_104value
+104 105 val_104value