Author: xuefu
Date: Sat Mar  1 16:38:27 2014
New Revision: 1573193

URL: http://svn.apache.org/r1573193
Log:
HIVE-6375: Fix CTAS for parquet (Szehon via Xuefu)

Added:
    hive/trunk/ql/src/test/queries/clientpositive/parquet_ctas.q
    hive/trunk/ql/src/test/results/clientpositive/parquet_ctas.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/trunk/ql/src/test/results/clientpositive/ctas.q.out
    hive/trunk/ql/src/test/results/clientpositive/ctas_hadoop20.q.out
    hive/trunk/ql/src/test/results/clientpositive/merge3.q.out

Modified: 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=1573193&r1=1573192&r2=1573193&view=diff
==============================================================================
--- 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
(original)
+++ 
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java 
Sat Mar  1 16:38:27 2014
@@ -5703,14 +5703,13 @@ public class SemanticAnalyzer extends Ba
           colInfo.setAlias(nm[1]);
         }
 
+        String colName = colInfo.getInternalName();  //default column name
         if (field_schemas != null) {
           FieldSchema col = new FieldSchema();
-          if ("".equals(nm[0]) || nm[1] == null) {
-            // ast expression is not a valid column name for table
-            col.setName(colInfo.getInternalName());
-          } else {
-            col.setName(unescapeIdentifier(colInfo.getAlias()).toLowerCase()); 
// remove ``
+          if (!("".equals(nm[0])) && nm[1] != null) {
+            colName = unescapeIdentifier(colInfo.getAlias()).toLowerCase(); // 
remove ``
           }
+          col.setName(colName);;
           col.setType(colInfo.getType().getTypeName());
           field_schemas.add(col);
         }
@@ -5721,7 +5720,7 @@ public class SemanticAnalyzer extends Ba
         }
 
         first = false;
-        cols = cols.concat(colInfo.getInternalName());
+        cols = cols.concat(colName);
 
         // Replace VOID type with string when the output is a temp table or
         // local files.

Added: hive/trunk/ql/src/test/queries/clientpositive/parquet_ctas.q
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parquet_ctas.q?rev=1573193&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/parquet_ctas.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/parquet_ctas.q Sat Mar  1 
16:38:27 2014
@@ -0,0 +1,24 @@
+drop table staging;
+drop table parquet_ctas;
+drop table parquet_ctas_advanced;
+drop table parquet_ctas_alias;
+drop table parquet_ctas_mixed;
+
+create table staging (key int, value string) stored as textfile;
+insert into table staging select * from src order by key limit 10;
+
+create table parquet_ctas stored as parquet as select * from staging;
+describe parquet_ctas;
+select * from parquet_ctas;
+
+create table parquet_ctas_advanced stored as parquet as select 
key+1,concat(value,"value") from staging;
+describe parquet_ctas_advanced;
+select * from parquet_ctas_advanced;
+
+create table parquet_ctas_alias stored as parquet as select key+1 as 
mykey,concat(value,"value") as myvalue from staging;
+describe parquet_ctas_alias;
+select * from parquet_ctas_alias;
+
+create table parquet_ctas_mixed stored as parquet as select 
key,key+1,concat(value,"value") as myvalue from staging;
+describe parquet_ctas_mixed;
+select * from parquet_ctas_mixed;
\ No newline at end of file

Modified: hive/trunk/ql/src/test/results/clientpositive/ctas.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ctas.q.out?rev=1573193&r1=1573192&r2=1573193&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/ctas.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/ctas.q.out Sat Mar  1 
16:38:27 2014
@@ -836,7 +836,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    columns _col0,_col1
+                    columns key,value
                     columns.types string:string
                     field.delim ,
                     line.delim 

Modified: hive/trunk/ql/src/test/results/clientpositive/ctas_hadoop20.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/ctas_hadoop20.q.out?rev=1573193&r1=1573192&r2=1573193&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/ctas_hadoop20.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/ctas_hadoop20.q.out Sat Mar  
1 16:38:27 2014
@@ -838,7 +838,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    columns _col0,_col1
+                    columns key,value
                     columns.types string:string
                     field.delim ,
                     line.delim 

Modified: hive/trunk/ql/src/test/results/clientpositive/merge3.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/merge3.q.out?rev=1573193&r1=1573192&r2=1573193&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/merge3.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/merge3.q.out Sat Mar  1 
16:38:27 2014
@@ -113,7 +113,7 @@ STAGE PLANS:
                     input format: org.apache.hadoop.mapred.TextInputFormat
                     output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                     properties:
-                      columns _col0,_col1
+                      columns key,value
                       columns.types string:string
                       name default.merge_src2
                       serialization.format 1
@@ -212,7 +212,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    columns _col0,_col1
+                    columns key,value
                     columns.types string:string
                     name default.merge_src2
                     serialization.format 1
@@ -231,7 +231,7 @@ STAGE PLANS:
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
-              columns _col0,_col1
+              columns key,value
               columns.types string:string
               name default.merge_src2
               serialization.format 1
@@ -241,7 +241,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                columns _col0,_col1
+                columns key,value
                 columns.types string:string
                 name default.merge_src2
                 serialization.format 1
@@ -266,7 +266,7 @@ STAGE PLANS:
                   input format: org.apache.hadoop.mapred.TextInputFormat
                   output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
                   properties:
-                    columns _col0,_col1
+                    columns key,value
                     columns.types string:string
                     name default.merge_src2
                     serialization.format 1
@@ -285,7 +285,7 @@ STAGE PLANS:
             input format: org.apache.hadoop.mapred.TextInputFormat
             output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
             properties:
-              columns _col0,_col1
+              columns key,value
               columns.types string:string
               name default.merge_src2
               serialization.format 1
@@ -295,7 +295,7 @@ STAGE PLANS:
               input format: org.apache.hadoop.mapred.TextInputFormat
               output format: 
org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
               properties:
-                columns _col0,_col1
+                columns key,value
                 columns.types string:string
                 name default.merge_src2
                 serialization.format 1

Added: hive/trunk/ql/src/test/results/clientpositive/parquet_ctas.q.out
URL: 
http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_ctas.q.out?rev=1573193&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_ctas.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_ctas.q.out Sat Mar  1 
16:38:27 2014
@@ -0,0 +1,186 @@
+PREHOOK: query: drop table staging
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table staging
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table parquet_ctas
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table parquet_ctas
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table parquet_ctas_advanced
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table parquet_ctas_advanced
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table parquet_ctas_alias
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table parquet_ctas_alias
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: drop table parquet_ctas_mixed
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table parquet_ctas_mixed
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table staging (key int, value string) stored as textfile
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: create table staging (key int, value string) stored as 
textfile
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@staging
+PREHOOK: query: insert into table staging select * from src order by key limit 
10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@staging
+POSTHOOK: query: insert into table staging select * from src order by key 
limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@staging
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: create table parquet_ctas stored as parquet as select * from 
staging
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@staging
+POSTHOOK: query: create table parquet_ctas stored as parquet as select * from 
staging
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@parquet_ctas
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: describe parquet_ctas
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe parquet_ctas
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+key                    int                     from deserializer   
+value                  string                  from deserializer   
+PREHOOK: query: select * from parquet_ctas
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_ctas
+#### A masked pattern was here ####
+POSTHOOK: query: select * from parquet_ctas
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_ctas
+#### A masked pattern was here ####
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+0      val_0
+0      val_0
+0      val_0
+10     val_10
+100    val_100
+100    val_100
+103    val_103
+103    val_103
+104    val_104
+104    val_104
+PREHOOK: query: create table parquet_ctas_advanced stored as parquet as select 
key+1,concat(value,"value") from staging
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@staging
+POSTHOOK: query: create table parquet_ctas_advanced stored as parquet as 
select key+1,concat(value,"value") from staging
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@parquet_ctas_advanced
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: describe parquet_ctas_advanced
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe parquet_ctas_advanced
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+_c0                    int                     from deserializer   
+_c1                    string                  from deserializer   
+PREHOOK: query: select * from parquet_ctas_advanced
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_ctas_advanced
+#### A masked pattern was here ####
+POSTHOOK: query: select * from parquet_ctas_advanced
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_ctas_advanced
+#### A masked pattern was here ####
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+1      val_0value
+1      val_0value
+1      val_0value
+11     val_10value
+101    val_100value
+101    val_100value
+104    val_103value
+104    val_103value
+105    val_104value
+105    val_104value
+PREHOOK: query: create table parquet_ctas_alias stored as parquet as select 
key+1 as mykey,concat(value,"value") as myvalue from staging
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@staging
+POSTHOOK: query: create table parquet_ctas_alias stored as parquet as select 
key+1 as mykey,concat(value,"value") as myvalue from staging
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@parquet_ctas_alias
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: describe parquet_ctas_alias
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe parquet_ctas_alias
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+mykey                  int                     from deserializer   
+myvalue                string                  from deserializer   
+PREHOOK: query: select * from parquet_ctas_alias
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_ctas_alias
+#### A masked pattern was here ####
+POSTHOOK: query: select * from parquet_ctas_alias
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_ctas_alias
+#### A masked pattern was here ####
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+1      val_0value
+1      val_0value
+1      val_0value
+11     val_10value
+101    val_100value
+101    val_100value
+104    val_103value
+104    val_103value
+105    val_104value
+105    val_104value
+PREHOOK: query: create table parquet_ctas_mixed stored as parquet as select 
key,key+1,concat(value,"value") as myvalue from staging
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@staging
+POSTHOOK: query: create table parquet_ctas_mixed stored as parquet as select 
key,key+1,concat(value,"value") as myvalue from staging
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@staging
+POSTHOOK: Output: default@parquet_ctas_mixed
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+PREHOOK: query: describe parquet_ctas_mixed
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: describe parquet_ctas_mixed
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+key                    int                     from deserializer   
+_c1                    int                     from deserializer   
+myvalue                string                  from deserializer   
+PREHOOK: query: select * from parquet_ctas_mixed
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_ctas_mixed
+#### A masked pattern was here ####
+POSTHOOK: query: select * from parquet_ctas_mixed
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_ctas_mixed
+#### A masked pattern was here ####
+POSTHOOK: Lineage: staging.key EXPRESSION [(src)src.FieldSchema(name:key, 
type:string, comment:default), ]
+POSTHOOK: Lineage: staging.value SIMPLE [(src)src.FieldSchema(name:value, 
type:string, comment:default), ]
+0      1       val_0value
+0      1       val_0value
+0      1       val_0value
+10     11      val_10value
+100    101     val_100value
+100    101     val_100value
+103    104     val_103value
+103    104     val_103value
+104    105     val_104value
+104    105     val_104value


Reply via email to