This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 47c784b46a9 HIVE-27243: Iceberg: Implement Load data via temp table. 
(#4289). (Ayush Saxena, reviewed by Denys Kuzmenko)
47c784b46a9 is described below

commit 47c784b46a951d4ee9ce2712f62aa6af92b71395
Author: Ayush Saxena <[email protected]>
AuthorDate: Wed May 10 23:03:52 2023 +0530

    HIVE-27243: Iceberg: Implement Load data via temp table. (#4289). (Ayush 
Saxena, reviewed by Denys Kuzmenko)
---
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java |  13 +
 .../src/test/queries/positive/iceberg_load_data.q  |  43 ++++
 .../test/results/positive/iceberg_load_data.q.out  | 279 +++++++++++++++++++++
 .../hive/ql/metadata/HiveStorageHandler.java       |  11 +-
 .../hadoop/hive/ql/parse/LoadSemanticAnalyzer.java |  10 +
 .../apache/hadoop/hive/ql/parse/StorageFormat.java |  21 +-
 6 files changed, 367 insertions(+), 10 deletions(-)

diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 78611aa47ca..9ee6874dcbd 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -67,6 +67,8 @@ import 
org.apache.hadoop.hive.ql.ddl.table.misc.properties.AlterTableSetProperti
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
 import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.io.IOConstants;
+import org.apache.hadoop.hive.ql.io.StorageFormatDescriptor;
 import 
org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader;
 import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
@@ -77,6 +79,7 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.ql.parse.AlterTableExecuteSpec;
 import org.apache.hadoop.hive.ql.parse.PartitionTransform;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.StorageFormat;
 import org.apache.hadoop.hive.ql.parse.TransformSpec;
 import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
 import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -333,6 +336,16 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
     return true;
   }
 
+  @Override
+  public StorageFormatDescriptor 
getStorageFormatDescriptor(org.apache.hadoop.hive.metastore.api.Table table)
+      throws SemanticException {
+    if (table.getParameters() != null) {
+      String format = 
table.getParameters().getOrDefault(TableProperties.DEFAULT_FILE_FORMAT, 
IOConstants.PARQUET);
+      return StorageFormat.getDescriptor(format, 
TableProperties.DEFAULT_FILE_FORMAT);
+    }
+    return null;
+  }
+
   @Override
   public Map<String, String> getBasicStatistics(Partish partish) {
     org.apache.hadoop.hive.ql.metadata.Table hmsTable = partish.getTable();
diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_load_data.q 
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_load_data.q
new file mode 100644
index 00000000000..442357a706e
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_load_data.q
@@ -0,0 +1,43 @@
+create external table ice_parquet(
+  strcol string,
+  intcol integer
+) partitioned by (pcol int)
+stored by iceberg;
+
+explain LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE 
INTO TABLE ice_parquet;
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE INTO 
TABLE ice_parquet;
+
+select * from ice_parquet order by intcol;
+
+CREATE TABLE ice_avro (
+  number int,
+  first_name string)
+stored by iceberg
+STORED AS AVRO;
+
+explain LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO 
TABLE ice_avro;
+
+LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO TABLE 
ice_avro;
+
+select * from ice_avro order by number;
+
+CREATE TABLE ice_orc (
+  p_partkey int,
+  p_name string,
+  p_mfgr string,
+  p_brand string,
+  p_type string,
+  p_size int,
+  p_container string,
+  p_retailprice double,
+  p_comment string
+)
+stored by iceberg
+STORED AS ORC;
+
+explain LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO 
TABLE ice_orc;
+
+LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE ice_orc;
+
+select * from ice_orc order by p_partkey;
\ No newline at end of file
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out 
b/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out
new file mode 100644
index 00000000000..098455ab8d0
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out
@@ -0,0 +1,279 @@
+PREHOOK: query: create external table ice_parquet(
+  strcol string,
+  intcol integer
+) partitioned by (pcol int)
+stored by iceberg
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ice_parquet
+POSTHOOK: query: create external table ice_parquet(
+  strcol string,
+  intcol integer
+) partitioned by (pcol int)
+stored by iceberg
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ice_parquet
+PREHOOK: query: explain LOAD DATA LOCAL INPATH 
'../../data/files/parquet_partition' OVERWRITE INTO TABLE ice_parquet
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+PREHOOK: Output: default@ice_parquet
+POSTHOOK: query: explain LOAD DATA LOCAL INPATH 
'../../data/files/parquet_partition' OVERWRITE INTO TABLE ice_parquet
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_parquet
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+
+Stage-3
+  Stats Work{}
+    Stage-0
+      Move Operator
+        table:{"name:":"default.ice_parquet"}
+        Stage-2
+          Dependency Collection{}
+            Stage-1
+              Reducer 2 vectorized
+              File Output Operator [FS_18]
+                table:{"name:":"default.ice_parquet"}
+                Select Operator [SEL_17]
+                  Output:["_col0","_col1","_col2","_col2"]
+                <-Map 1 [SIMPLE_EDGE] vectorized
+                  PARTITION_ONLY_SHUFFLE [RS_13]
+                    PartitionCols:_col2
+                    Select Operator [SEL_12] (rows=77 width=187)
+                      Output:["_col0","_col1","_col2"]
+                      TableScan [TS_0] (rows=77 width=187)
+                        
default@ice_parquet__temp_table_for_load_data__,ice_parquet__temp_table_for_load_data__,Tbl:COMPLETE,Col:NONE,Output:["strcol","intcol","pcol"]
+              Reducer 3 vectorized
+              File Output Operator [FS_21]
+                Select Operator [SEL_20] (rows=1 width=752)
+                  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"]
+                  Group By Operator [GBY_19] (rows=1 width=752)
+                    
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","min(VALUE._col5)","max(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)","min(VALUE._col9)","max(VALUE._col10)","count(VALUE._col11)","compute_bit_vector_hll(VALUE._col12)"]
+                  <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized
+                    PARTITION_ONLY_SHUFFLE [RS_16]
+                      Group By Operator [GBY_15] (rows=1 width=752)
+                        
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(length(strcol))","avg(COALESCE(length(strcol),0))","count(1)","count(strcol)","compute_bit_vector_hll(strcol)","min(intcol)","max(intcol)","count(intcol)","compute_bit_vector_hll(intcol)","min(pcol)","max(pcol)","count(pcol)","compute_bit_vector_hll(pcol)"]
+                        Select Operator [SEL_14] (rows=77 width=187)
+                          Output:["strcol","intcol","pcol"]
+                           Please refer to the previous Select Operator 
[SEL_12]
+
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' 
OVERWRITE INTO TABLE ice_parquet
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+PREHOOK: Output: default@ice_parquet
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' 
OVERWRITE INTO TABLE ice_parquet
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_parquet
+PREHOOK: query: select * from ice_parquet order by intcol
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_parquet
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from ice_parquet order by intcol
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_parquet
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+a      1       100
+b      2       100
+c      3       200
+d      4       200
+e      5       300
+f      6       300
+PREHOOK: query: CREATE TABLE ice_avro (
+  number int,
+  first_name string)
+stored by iceberg
+STORED AS AVRO
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ice_avro
+POSTHOOK: query: CREATE TABLE ice_avro (
+  number int,
+  first_name string)
+stored by iceberg
+STORED AS AVRO
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ice_avro
+PREHOOK: query: explain LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' 
OVERWRITE INTO TABLE ice_avro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_avro__temp_table_for_load_data__
+PREHOOK: Output: default@ice_avro
+POSTHOOK: query: explain LOAD DATA LOCAL INPATH 
'../../data/files/doctors.avro' OVERWRITE INTO TABLE ice_avro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_avro__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_avro
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+
+Stage-3
+  Stats Work{}
+    Stage-0
+      Move Operator
+        table:{"name:":"default.ice_avro"}
+        Stage-2
+          Dependency Collection{}
+            Stage-1
+              Reducer 2
+              File Output Operator [FS_9]
+                Select Operator [SEL_8] (rows=1 width=588)
+                  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"]
+                  Group By Operator [GBY_7] (rows=1 width=588)
+                    
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)"]
+                  <-Map 1 [CUSTOM_SIMPLE_EDGE]
+                    File Output Operator [FS_2]
+                      table:{"name:":"default.ice_avro"}
+                      Select Operator [SEL_1] (rows=17 width=188)
+                        Output:["_col0","_col1"]
+                        TableScan [TS_0] (rows=17 width=188)
+                          
default@ice_avro__temp_table_for_load_data__,ice_avro__temp_table_for_load_data__,Tbl:COMPLETE,Col:NONE,Output:["number","first_name"]
+                    PARTITION_ONLY_SHUFFLE [RS_6]
+                      Group By Operator [GBY_5] (rows=1 width=588)
+                        
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["min(number)","max(number)","count(1)","count(number)","compute_bit_vector_hll(number)","max(length(first_name))","avg(COALESCE(length(first_name),0))","count(first_name)","compute_bit_vector_hll(first_name)"]
+                        Select Operator [SEL_4] (rows=17 width=188)
+                          Output:["number","first_name"]
+                           Please refer to the previous Select Operator [SEL_1]
+
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' 
OVERWRITE INTO TABLE ice_avro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_avro__temp_table_for_load_data__
+PREHOOK: Output: default@ice_avro
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' 
OVERWRITE INTO TABLE ice_avro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_avro__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_avro
+PREHOOK: query: select * from ice_avro order by number
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_avro
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from ice_avro order by number
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_avro
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1      William
+2      Patrick
+3      Jon
+4      Tom
+5      Peter
+6      Colin
+7      Sylvester
+8      Paul
+9      Christopher
+10     David
+11     Matt
+PREHOOK: query: CREATE TABLE ice_orc (
+  p_partkey int,
+  p_name string,
+  p_mfgr string,
+  p_brand string,
+  p_type string,
+  p_size int,
+  p_container string,
+  p_retailprice double,
+  p_comment string
+)
+stored by iceberg
+STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ice_orc
+POSTHOOK: query: CREATE TABLE ice_orc (
+  p_partkey int,
+  p_name string,
+  p_mfgr string,
+  p_brand string,
+  p_type string,
+  p_size int,
+  p_container string,
+  p_retailprice double,
+  p_comment string
+)
+stored by iceberg
+STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ice_orc
+PREHOOK: query: explain LOAD DATA LOCAL INPATH '../../data/files/part.orc' 
OVERWRITE INTO TABLE ice_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc__temp_table_for_load_data__
+PREHOOK: Output: default@ice_orc
+POSTHOOK: query: explain LOAD DATA LOCAL INPATH '../../data/files/part.orc' 
OVERWRITE INTO TABLE ice_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_orc
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+
+Stage-3
+  Stats Work{}
+    Stage-0
+      Move Operator
+        table:{"name:":"default.ice_orc"}
+        Stage-2
+          Dependency Collection{}
+            Stage-1
+              Reducer 2 vectorized
+              File Output Operator [FS_17]
+                Select Operator [SEL_16] (rows=1 width=3008)
+                  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51","_col52"
 [...]
+                  Group By Operator [GBY_15] (rows=1 width=3008)
+                    
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","max(VAL
 [...]
+                  <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized
+                    File Output Operator [FS_11]
+                      table:{"name:":"default.ice_orc"}
+                      Select Operator [SEL_10] (rows=33 width=1120)
+                        
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
+                        TableScan [TS_0] (rows=33 width=1120)
+                          
default@ice_orc__temp_table_for_load_data__,ice_orc__temp_table_for_load_data__,Tbl:COMPLETE,Col:NONE,Output:["p_partkey","p_name","p_mfgr","p_brand","p_type","p_size","p_container","p_retailprice","p_comment"]
+                    PARTITION_ONLY_SHUFFLE [RS_14]
+                      Group By Operator [GBY_13] (rows=1 width=3008)
+                        
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36"],aggregations:["min(p_partkey)","max(p_partkey)","count(1)","count(p_partkey)","compute_bit_vector_hll(p_partkey)","max(length(p_name))",
 [...]
+                        Select Operator [SEL_12] (rows=33 width=1120)
+                          
Output:["p_partkey","p_name","p_mfgr","p_brand","p_type","p_size","p_container","p_retailprice","p_comment"]
+                           Please refer to the previous Select Operator 
[SEL_10]
+
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE 
ice_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc__temp_table_for_load_data__
+PREHOOK: Output: default@ice_orc
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE 
ice_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_orc
+PREHOOK: query: select * from ice_orc order by p_partkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from ice_orc order by p_partkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+15103  almond aquamarine dodger light gainsboro        Manufacturer#5  
Brand#53        ECONOMY BURNISHED STEEL 46      LG PACK 1018.1  packages hinder 
carefu
+17273  almond antique forest lavender goldenrod        Manufacturer#3  
Brand#35        PROMO ANODIZED TIN      14      JUMBO CASE      1190.27 along 
the
+17927  almond aquamarine yellow dodger mint    Manufacturer#4  Brand#41        
ECONOMY BRUSHED COPPER  7       SM PKG  1844.92 ites. eve
+33357  almond azure aquamarine papaya violet   Manufacturer#4  Brand#41        
STANDARD ANODIZED TIN   12      WRAP CASE       1290.35 reful
+40982  almond antique misty red olive  Manufacturer#3  Brand#32        ECONOMY 
PLATED COPPER   1       LG PKG  1922.98 c foxes can s
+42669  almond antique medium spring khaki      Manufacturer#5  Brand#51        
STANDARD BURNISHED TIN  6       MED CAN 1611.66 sits haggl
+45261  almond aquamarine floral ivory bisque   Manufacturer#4  Brand#42        
SMALL PLATED STEEL      27      WRAP CASE       1206.26 careful
+48427  almond antique violet mint lemon        Manufacturer#4  Brand#42        
PROMO POLISHED STEEL    39      SM CASE 1375.42 hely ironic i
+49671  almond antique gainsboro frosted violet Manufacturer#4  Brand#41        
SMALL BRUSHED BRASS     10      SM BOX  1620.67 ccounts run quick
+65667  almond aquamarine pink moccasin thistle Manufacturer#1  Brand#12        
LARGE BURNISHED STEEL   42      JUMBO CASE      1632.66 e across the expr
+78486  almond azure blanched chiffon midnight  Manufacturer#5  Brand#52        
LARGE BRUSHED BRASS     23      MED BAG 1464.48 hely blith
+85768  almond antique chartreuse lavender yellow       Manufacturer#1  
Brand#12        LARGE BRUSHED STEEL     34      SM BAG  1753.76 refull
+86428  almond aquamarine burnished black steel Manufacturer#1  Brand#12        
STANDARD ANODIZED STEEL 28      WRAP BAG        1414.42 arefully 
+90681  almond antique chartreuse khaki white   Manufacturer#3  Brand#31        
MEDIUM BURNISHED TIN    17      SM CASE 1671.68 are slyly after the sl
+105685 almond antique violet chocolate turquoise       Manufacturer#2  
Brand#22        MEDIUM ANODIZED COPPER  14      MED CAN 1690.68 ly pending requ
+110592 almond antique salmon chartreuse burlywood      Manufacturer#1  
Brand#15        PROMO BURNISHED NICKEL  6       JUMBO PKG       1602.59  to the 
furiously
+112398 almond antique metallic orange dim      Manufacturer#3  Brand#32        
MEDIUM BURNISHED BRASS  19      JUMBO JAR       1410.39 ole car
+121152 almond antique burnished rose metallic  Manufacturer#1  Brand#14        
PROMO PLATED TIN        2       JUMBO BOX       1173.15 e pinto beans h
+121152 almond antique burnished rose metallic  Manufacturer#1  Brand#14        
PROMO PLATED TIN        2       JUMBO BOX       1173.15 e pinto beans h
+132666 almond aquamarine rose maroon antique   Manufacturer#2  Brand#24        
SMALL POLISHED NICKEL   25      MED BOX 1698.66 even 
+144293 almond antique olive coral navajo       Manufacturer#3  Brand#34        
STANDARD POLISHED STEEL 45      JUMBO CAN       1337.29 ag furiously about 
+146985 almond aquamarine midnight light salmon Manufacturer#2  Brand#23        
MEDIUM BURNISHED COPPER 2       SM CASE 2031.98 s cajole caref
+155733 almond antique sky peru orange  Manufacturer#5  Brand#53        SMALL 
PLATED BRASS      2       WRAP DRUM       1788.73 furiously. bra
+191709 almond antique violet turquoise frosted Manufacturer#2  Brand#22        
ECONOMY POLISHED STEEL  40      MED BOX 1800.7   haggle
+192697 almond antique blue firebrick mint      Manufacturer#5  Brand#52        
MEDIUM BURNISHED TIN    31      LG DRUM 1789.69 ickly ir
+195606 almond aquamarine sandy cyan gainsboro  Manufacturer#2  Brand#25        
STANDARD PLATED TIN     18      SM PKG  1701.6  ic de
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index 65e14af478a..ef976feb54c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -41,10 +41,10 @@ import 
org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableDesc;
 import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
 import org.apache.hadoop.hive.ql.ddl.table.create.like.CreateTableLikeDesc;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.io.StorageFormatDescriptor;
 import org.apache.hadoop.hive.ql.parse.AlterTableExecuteSpec;
 import org.apache.hadoop.hive.ql.parse.TransformSpec;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.plan.ColStatistics;
 import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
@@ -286,6 +286,15 @@ public interface HiveStorageHandler extends Configurable {
     return false;
   }
 
+  /**
+   *
+   * Gets the storage format descriptor to be used for temp table for LOAD 
data.
+   * @param table table object
+   * @return StorageFormatDescriptor if the storage handler can support load 
data
+   */
+  default StorageFormatDescriptor getStorageFormatDescriptor(Table table) 
throws SemanticException {
+    return null;
+  }
   /**
    * Check if CTAS and CMV operations should behave in a direct-insert manner 
(i.e. no move task).
    * <p>
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
index 7ed6d1b2d12..8b8cc6b7f42 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
+import org.apache.hadoop.hive.ql.io.StorageFormatDescriptor;
 import org.apache.hadoop.hive.ql.lockmgr.LockException;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -294,6 +295,15 @@ public class LoadSemanticAnalyzer extends SemanticAnalyzer 
{
       throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
     }
     if (ts.tableHandle.isNonNative()) {
+      // launch a tez job
+      StorageFormatDescriptor ss =
+          
ts.tableHandle.getStorageHandler().getStorageFormatDescriptor(ts.tableHandle.getTTable());
+      if (ss != null) {
+        inputFormatClassName = ss.getInputFormat();
+        serDeClassName = ss.getSerde();
+        reparseAndSuperAnalyze(ts.tableHandle, fromURI);
+        return;
+      }
       throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
     }
 
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java
index 3e9b854d7ce..c1fe01ad54b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.parse;
 
 import static org.apache.hadoop.hive.ql.parse.ParseUtils.ensureClassExists;
 
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -165,14 +164,7 @@ public class StorageFormat {
   }
 
   protected void processStorageFormat(String name) throws SemanticException {
-    if (name.isEmpty()) {
-      throw new SemanticException("File format in STORED AS clause cannot be 
empty");
-    }
-    StorageFormatDescriptor descriptor = storageFormatFactory.get(name);
-    if (descriptor == null) {
-      throw new SemanticException("Unrecognized file format in STORED AS 
clause:" +
-          " '" + name + "'");
-    }
+    StorageFormatDescriptor descriptor = getDescriptor(name, "STORED AS 
clause");
     inputFormat = ensureClassExists(descriptor.getInputFormat());
     outputFormat = ensureClassExists(descriptor.getOutputFormat());
     if (serde == null) {
@@ -245,4 +237,15 @@ public class StorageFormat {
   public void setStorageHandler(String storageHandlerClass) throws 
SemanticException {
     storageHandler = ensureClassExists(storageHandlerClass);
   }
+
+  public static StorageFormatDescriptor getDescriptor(String format, String 
clause) throws SemanticException {
+    if (format.isEmpty()) {
+      throw new SemanticException("File format in " + clause + " cannot be 
empty");
+    }
+    StorageFormatDescriptor descriptor = storageFormatFactory.get(format);
+    if (descriptor == null) {
+      throw new SemanticException("Unrecognized file format in " + clause + 
":" + " '" + format + "'");
+    }
+    return descriptor;
+  }
 }

Reply via email to