This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 47c784b46a9 HIVE-27243: Iceberg: Implement Load data via temp table.
(#4289). (Ayush Saxena, reviewed by Denys Kuzmenko)
47c784b46a9 is described below
commit 47c784b46a951d4ee9ce2712f62aa6af92b71395
Author: Ayush Saxena <[email protected]>
AuthorDate: Wed May 10 23:03:52 2023 +0530
HIVE-27243: Iceberg: Implement Load data via temp table. (#4289). (Ayush
Saxena, reviewed by Denys Kuzmenko)
---
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 13 +
.../src/test/queries/positive/iceberg_load_data.q | 43 ++++
.../test/results/positive/iceberg_load_data.q.out | 279 +++++++++++++++++++++
.../hive/ql/metadata/HiveStorageHandler.java | 11 +-
.../hadoop/hive/ql/parse/LoadSemanticAnalyzer.java | 10 +
.../apache/hadoop/hive/ql/parse/StorageFormat.java | 21 +-
6 files changed, 367 insertions(+), 10 deletions(-)
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index 78611aa47ca..9ee6874dcbd 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -67,6 +67,8 @@ import
org.apache.hadoop.hive.ql.ddl.table.misc.properties.AlterTableSetProperti
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.io.IOConstants;
+import org.apache.hadoop.hive.ql.io.StorageFormatDescriptor;
import
org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader;
import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
@@ -77,6 +79,7 @@ import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.parse.AlterTableExecuteSpec;
import org.apache.hadoop.hive.ql.parse.PartitionTransform;
import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.parse.StorageFormat;
import org.apache.hadoop.hive.ql.parse.TransformSpec;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc;
@@ -333,6 +336,16 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
return true;
}
+ @Override
+ public StorageFormatDescriptor
getStorageFormatDescriptor(org.apache.hadoop.hive.metastore.api.Table table)
+ throws SemanticException {
+ if (table.getParameters() != null) {
+ String format =
table.getParameters().getOrDefault(TableProperties.DEFAULT_FILE_FORMAT,
IOConstants.PARQUET);
+ return StorageFormat.getDescriptor(format,
TableProperties.DEFAULT_FILE_FORMAT);
+ }
+ return null;
+ }
+
@Override
public Map<String, String> getBasicStatistics(Partish partish) {
org.apache.hadoop.hive.ql.metadata.Table hmsTable = partish.getTable();
diff --git
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_load_data.q
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_load_data.q
new file mode 100644
index 00000000000..442357a706e
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_load_data.q
@@ -0,0 +1,43 @@
+create external table ice_parquet(
+ strcol string,
+ intcol integer
+) partitioned by (pcol int)
+stored by iceberg;
+
+explain LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE
INTO TABLE ice_parquet;
+
+LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE INTO
TABLE ice_parquet;
+
+select * from ice_parquet order by intcol;
+
+CREATE TABLE ice_avro (
+ number int,
+ first_name string)
+stored by iceberg
+STORED AS AVRO;
+
+explain LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO
TABLE ice_avro;
+
+LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO TABLE
ice_avro;
+
+select * from ice_avro order by number;
+
+CREATE TABLE ice_orc (
+ p_partkey int,
+ p_name string,
+ p_mfgr string,
+ p_brand string,
+ p_type string,
+ p_size int,
+ p_container string,
+ p_retailprice double,
+ p_comment string
+)
+stored by iceberg
+STORED AS ORC;
+
+explain LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO
TABLE ice_orc;
+
+LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE ice_orc;
+
+select * from ice_orc order by p_partkey;
\ No newline at end of file
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out
b/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out
new file mode 100644
index 00000000000..098455ab8d0
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out
@@ -0,0 +1,279 @@
+PREHOOK: query: create external table ice_parquet(
+ strcol string,
+ intcol integer
+) partitioned by (pcol int)
+stored by iceberg
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ice_parquet
+POSTHOOK: query: create external table ice_parquet(
+ strcol string,
+ intcol integer
+) partitioned by (pcol int)
+stored by iceberg
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ice_parquet
+PREHOOK: query: explain LOAD DATA LOCAL INPATH
'../../data/files/parquet_partition' OVERWRITE INTO TABLE ice_parquet
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+PREHOOK: Output: default@ice_parquet
+POSTHOOK: query: explain LOAD DATA LOCAL INPATH
'../../data/files/parquet_partition' OVERWRITE INTO TABLE ice_parquet
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_parquet
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+
+Stage-3
+ Stats Work{}
+ Stage-0
+ Move Operator
+ table:{"name:":"default.ice_parquet"}
+ Stage-2
+ Dependency Collection{}
+ Stage-1
+ Reducer 2 vectorized
+ File Output Operator [FS_18]
+ table:{"name:":"default.ice_parquet"}
+ Select Operator [SEL_17]
+ Output:["_col0","_col1","_col2","_col2"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_13]
+ PartitionCols:_col2
+ Select Operator [SEL_12] (rows=77 width=187)
+ Output:["_col0","_col1","_col2"]
+ TableScan [TS_0] (rows=77 width=187)
+
default@ice_parquet__temp_table_for_load_data__,ice_parquet__temp_table_for_load_data__,Tbl:COMPLETE,Col:NONE,Output:["strcol","intcol","pcol"]
+ Reducer 3 vectorized
+ File Output Operator [FS_21]
+ Select Operator [SEL_20] (rows=1 width=752)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"]
+ Group By Operator [GBY_19] (rows=1 width=752)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","min(VALUE._col5)","max(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)","min(VALUE._col9)","max(VALUE._col10)","count(VALUE._col11)","compute_bit_vector_hll(VALUE._col12)"]
+ <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_16]
+ Group By Operator [GBY_15] (rows=1 width=752)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(length(strcol))","avg(COALESCE(length(strcol),0))","count(1)","count(strcol)","compute_bit_vector_hll(strcol)","min(intcol)","max(intcol)","count(intcol)","compute_bit_vector_hll(intcol)","min(pcol)","max(pcol)","count(pcol)","compute_bit_vector_hll(pcol)"]
+ Select Operator [SEL_14] (rows=77 width=187)
+ Output:["strcol","intcol","pcol"]
+ Please refer to the previous Select Operator
[SEL_12]
+
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition'
OVERWRITE INTO TABLE ice_parquet
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+PREHOOK: Output: default@ice_parquet
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition'
OVERWRITE INTO TABLE ice_parquet
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_parquet
+PREHOOK: query: select * from ice_parquet order by intcol
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_parquet
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from ice_parquet order by intcol
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_parquet
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+a 1 100
+b 2 100
+c 3 200
+d 4 200
+e 5 300
+f 6 300
+PREHOOK: query: CREATE TABLE ice_avro (
+ number int,
+ first_name string)
+stored by iceberg
+STORED AS AVRO
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ice_avro
+POSTHOOK: query: CREATE TABLE ice_avro (
+ number int,
+ first_name string)
+stored by iceberg
+STORED AS AVRO
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ice_avro
+PREHOOK: query: explain LOAD DATA LOCAL INPATH '../../data/files/doctors.avro'
OVERWRITE INTO TABLE ice_avro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_avro__temp_table_for_load_data__
+PREHOOK: Output: default@ice_avro
+POSTHOOK: query: explain LOAD DATA LOCAL INPATH
'../../data/files/doctors.avro' OVERWRITE INTO TABLE ice_avro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_avro__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_avro
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+
+Stage-3
+ Stats Work{}
+ Stage-0
+ Move Operator
+ table:{"name:":"default.ice_avro"}
+ Stage-2
+ Dependency Collection{}
+ Stage-1
+ Reducer 2
+ File Output Operator [FS_9]
+ Select Operator [SEL_8] (rows=1 width=588)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"]
+ Group By Operator [GBY_7] (rows=1 width=588)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","max(VALUE._col5)","avg(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)"]
+ <-Map 1 [CUSTOM_SIMPLE_EDGE]
+ File Output Operator [FS_2]
+ table:{"name:":"default.ice_avro"}
+ Select Operator [SEL_1] (rows=17 width=188)
+ Output:["_col0","_col1"]
+ TableScan [TS_0] (rows=17 width=188)
+
default@ice_avro__temp_table_for_load_data__,ice_avro__temp_table_for_load_data__,Tbl:COMPLETE,Col:NONE,Output:["number","first_name"]
+ PARTITION_ONLY_SHUFFLE [RS_6]
+ Group By Operator [GBY_5] (rows=1 width=588)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["min(number)","max(number)","count(1)","count(number)","compute_bit_vector_hll(number)","max(length(first_name))","avg(COALESCE(length(first_name),0))","count(first_name)","compute_bit_vector_hll(first_name)"]
+ Select Operator [SEL_4] (rows=17 width=188)
+ Output:["number","first_name"]
+ Please refer to the previous Select Operator [SEL_1]
+
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro'
OVERWRITE INTO TABLE ice_avro
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_avro__temp_table_for_load_data__
+PREHOOK: Output: default@ice_avro
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro'
OVERWRITE INTO TABLE ice_avro
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_avro__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_avro
+PREHOOK: query: select * from ice_avro order by number
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_avro
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from ice_avro order by number
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_avro
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1 William
+2 Patrick
+3 Jon
+4 Tom
+5 Peter
+6 Colin
+7 Sylvester
+8 Paul
+9 Christopher
+10 David
+11 Matt
+PREHOOK: query: CREATE TABLE ice_orc (
+ p_partkey int,
+ p_name string,
+ p_mfgr string,
+ p_brand string,
+ p_type string,
+ p_size int,
+ p_container string,
+ p_retailprice double,
+ p_comment string
+)
+stored by iceberg
+STORED AS ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ice_orc
+POSTHOOK: query: CREATE TABLE ice_orc (
+ p_partkey int,
+ p_name string,
+ p_mfgr string,
+ p_brand string,
+ p_type string,
+ p_size int,
+ p_container string,
+ p_retailprice double,
+ p_comment string
+)
+stored by iceberg
+STORED AS ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ice_orc
+PREHOOK: query: explain LOAD DATA LOCAL INPATH '../../data/files/part.orc'
OVERWRITE INTO TABLE ice_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc__temp_table_for_load_data__
+PREHOOK: Output: default@ice_orc
+POSTHOOK: query: explain LOAD DATA LOCAL INPATH '../../data/files/part.orc'
OVERWRITE INTO TABLE ice_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_orc
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+
+Stage-3
+ Stats Work{}
+ Stage-0
+ Move Operator
+ table:{"name:":"default.ice_orc"}
+ Stage-2
+ Dependency Collection{}
+ Stage-1
+ Reducer 2 vectorized
+ File Output Operator [FS_17]
+ Select Operator [SEL_16] (rows=1 width=3008)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51","_col52"
[...]
+ Group By Operator [GBY_15] (rows=1 width=3008)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","max(VAL
[...]
+ <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized
+ File Output Operator [FS_11]
+ table:{"name:":"default.ice_orc"}
+ Select Operator [SEL_10] (rows=33 width=1120)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
+ TableScan [TS_0] (rows=33 width=1120)
+
default@ice_orc__temp_table_for_load_data__,ice_orc__temp_table_for_load_data__,Tbl:COMPLETE,Col:NONE,Output:["p_partkey","p_name","p_mfgr","p_brand","p_type","p_size","p_container","p_retailprice","p_comment"]
+ PARTITION_ONLY_SHUFFLE [RS_14]
+ Group By Operator [GBY_13] (rows=1 width=3008)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36"],aggregations:["min(p_partkey)","max(p_partkey)","count(1)","count(p_partkey)","compute_bit_vector_hll(p_partkey)","max(length(p_name))",
[...]
+ Select Operator [SEL_12] (rows=33 width=1120)
+
Output:["p_partkey","p_name","p_mfgr","p_brand","p_type","p_size","p_container","p_retailprice","p_comment"]
+ Please refer to the previous Select Operator
[SEL_10]
+
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE
ice_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc__temp_table_for_load_data__
+PREHOOK: Output: default@ice_orc
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE
ice_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_orc
+PREHOOK: query: select * from ice_orc order by p_partkey
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from ice_orc order by p_partkey
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+15103 almond aquamarine dodger light gainsboro Manufacturer#5
Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder
carefu
+17273 almond antique forest lavender goldenrod Manufacturer#3
Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along
the
+17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41
ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve
+33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41
STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful
+40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY
PLATED COPPER 1 LG PKG 1922.98 c foxes can s
+42669 almond antique medium spring khaki Manufacturer#5 Brand#51
STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl
+45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42
SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful
+48427 almond antique violet mint lemon Manufacturer#4 Brand#42
PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i
+49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41
SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick
+65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12
LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr
+78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52
LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
+85768 almond antique chartreuse lavender yellow Manufacturer#1
Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
+86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12
STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
+90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31
MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
+105685 almond antique violet chocolate turquoise Manufacturer#2
Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ
+110592 almond antique salmon chartreuse burlywood Manufacturer#1
Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the
furiously
+112398 almond antique metallic orange dim Manufacturer#3 Brand#32
MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14
PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14
PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24
SMALL POLISHED NICKEL 25 MED BOX 1698.66 even
+144293 almond antique olive coral navajo Manufacturer#3 Brand#34
STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about
+146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23
MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref
+155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL
PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra
+191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22
ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle
+192697 almond antique blue firebrick mint Manufacturer#5 Brand#52
MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
+195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25
STANDARD PLATED TIN 18 SM PKG 1701.6 ic de
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index 65e14af478a..ef976feb54c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -41,10 +41,10 @@ import
org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableDesc;
import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
import org.apache.hadoop.hive.ql.ddl.table.create.like.CreateTableLikeDesc;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.io.StorageFormatDescriptor;
import org.apache.hadoop.hive.ql.parse.AlterTableExecuteSpec;
import org.apache.hadoop.hive.ql.parse.TransformSpec;
import org.apache.hadoop.hive.ql.parse.SemanticException;
-import org.apache.hadoop.hive.ql.plan.ColStatistics;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
@@ -286,6 +286,15 @@ public interface HiveStorageHandler extends Configurable {
return false;
}
+ /**
+ *
+ * Gets the storage format descriptor to be used for temp table for LOAD
data.
+ * @param table table object
+ * @return StorageFormatDescriptor if the storage handler can support load
data
+ */
+ default StorageFormatDescriptor getStorageFormatDescriptor(Table table)
throws SemanticException {
+ return null;
+ }
/**
* Check if CTAS and CMV operations should behave in a direct-insert manner
(i.e. no move task).
* <p>
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
index 7ed6d1b2d12..8b8cc6b7f42 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
+import org.apache.hadoop.hive.ql.io.StorageFormatDescriptor;
import org.apache.hadoop.hive.ql.lockmgr.LockException;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -294,6 +295,15 @@ public class LoadSemanticAnalyzer extends SemanticAnalyzer
{
throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
}
if (ts.tableHandle.isNonNative()) {
+ // launch a tez job
+ StorageFormatDescriptor ss =
+
ts.tableHandle.getStorageHandler().getStorageFormatDescriptor(ts.tableHandle.getTTable());
+ if (ss != null) {
+ inputFormatClassName = ss.getInputFormat();
+ serDeClassName = ss.getSerde();
+ reparseAndSuperAnalyze(ts.tableHandle, fromURI);
+ return;
+ }
throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java
index 3e9b854d7ce..c1fe01ad54b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/StorageFormat.java
@@ -19,7 +19,6 @@ package org.apache.hadoop.hive.ql.parse;
import static org.apache.hadoop.hive.ql.parse.ParseUtils.ensureClassExists;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
@@ -165,14 +164,7 @@ public class StorageFormat {
}
protected void processStorageFormat(String name) throws SemanticException {
- if (name.isEmpty()) {
- throw new SemanticException("File format in STORED AS clause cannot be
empty");
- }
- StorageFormatDescriptor descriptor = storageFormatFactory.get(name);
- if (descriptor == null) {
- throw new SemanticException("Unrecognized file format in STORED AS
clause:" +
- " '" + name + "'");
- }
+ StorageFormatDescriptor descriptor = getDescriptor(name, "STORED AS
clause");
inputFormat = ensureClassExists(descriptor.getInputFormat());
outputFormat = ensureClassExists(descriptor.getOutputFormat());
if (serde == null) {
@@ -245,4 +237,15 @@ public class StorageFormat {
public void setStorageHandler(String storageHandlerClass) throws
SemanticException {
storageHandler = ensureClassExists(storageHandlerClass);
}
+
+ public static StorageFormatDescriptor getDescriptor(String format, String
clause) throws SemanticException {
+ if (format.isEmpty()) {
+ throw new SemanticException("File format in " + clause + " cannot be
empty");
+ }
+ StorageFormatDescriptor descriptor = storageFormatFactory.get(format);
+ if (descriptor == null) {
+ throw new SemanticException("Unrecognized file format in " + clause +
":" + " '" + format + "'");
+ }
+ return descriptor;
+ }
}