This is an automated email from the ASF dual-hosted git repository.
ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 7163566b01a HIVE-27244: Iceberg: Implement LOAD data for unpartitioned
table via Append API. (#4392). (Ayush Saxena, reviewed by Denys Kuzmenko, Butao
Zhang)
7163566b01a is described below
commit 7163566b01aeb1d87a422f72780d3f638e2600d9
Author: Ayush Saxena <[email protected]>
AuthorDate: Thu Jun 22 09:47:55 2023 +0530
HIVE-27244: Iceberg: Implement LOAD data for unpartitioned table via Append
API. (#4392). (Ayush Saxena, reviewed by Denys Kuzmenko, Butao Zhang)
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 4 +
.../iceberg/mr/hive/HiveIcebergStorageHandler.java | 12 ++
.../org/apache/iceberg/mr/hive/HiveTableUtil.java | 31 +++
.../src/test/queries/positive/iceberg_load_data.q | 19 +-
.../test/results/positive/iceberg_load_data.q.out | 223 ++++++++++++++++-----
.../org/apache/hadoop/hive/ql/exec/MoveTask.java | 13 +-
.../hive/ql/metadata/HiveStorageHandler.java | 17 ++
.../hadoop/hive/ql/parse/LoadSemanticAnalyzer.java | 29 ++-
.../apache/hadoop/hive/ql/plan/LoadTableDesc.java | 16 ++
9 files changed, 305 insertions(+), 59 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index f4d1c376d3f..196a1fc4eab 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -795,6 +795,10 @@ public class HiveConf extends Configuration {
"String used as a file extension for output files. \n" +
"If not set, defaults to the codec extension for text files (e.g.
\".gz\"), or no extension otherwise."),
+ HIVE_LOAD_DATA_USE_NATIVE_API("hive.load.data.use.native.api", true,
+ "Whether to use a native APIs for load queries to non-native
table(like iceberg), if false uses a Tez job for" +
+ " load queries"),
+
HIVE_IN_TEST("hive.in.test", false, "internal usage only, true in test
mode", true),
HIVE_IN_TEST_SSL("hive.in.ssl.test", false, "internal usage only, true in
SSL test mode", true),
// TODO: this needs to be removed; see TestReplicationScenarios* comments.
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index d6abacf4c67..ece13c519ca 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -349,6 +349,18 @@ public class HiveIcebergStorageHandler implements
HiveStoragePredicateHandler, H
return null;
}
+ public boolean supportsAppendData(org.apache.hadoop.hive.metastore.api.Table
table) throws SemanticException {
+ Table icebergTbl = IcebergTableUtil.getTable(conf, table);
+ return icebergTbl.spec().isUnpartitioned();
+ }
+
+ public void appendFiles(org.apache.hadoop.hive.metastore.api.Table table,
URI fromURI, boolean isOverwrite)
+ throws SemanticException {
+ Table icebergTbl = IcebergTableUtil.getTable(conf, table);
+ String format =
table.getParameters().get(TableProperties.DEFAULT_FILE_FORMAT);
+ HiveTableUtil.appendFiles(fromURI, format, icebergTbl, isOverwrite, conf);
+ }
+
@Override
public Map<String, String> getBasicStatistics(Partish partish) {
org.apache.hadoop.hive.ql.metadata.Table hmsTable = partish.getTable();
diff --git
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
index a02392417bf..3bd950c5869 100644
---
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
+++
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
@@ -23,6 +23,7 @@ import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.UncheckedIOException;
+import java.net.URI;
import java.util.Collections;
import java.util.List;
import java.util.Map;
@@ -44,16 +45,21 @@ import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import org.apache.hadoop.hive.metastore.utils.FileUtils;
+import org.apache.hadoop.hive.ql.io.IOConstants;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.iceberg.AppendFiles;
import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DeleteFiles;
import org.apache.iceberg.MetricsConfig;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.SerializableTable;
import org.apache.iceberg.StructLike;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.Transaction;
import org.apache.iceberg.data.TableMigrationUtil;
import org.apache.iceberg.exceptions.NotFoundException;
+import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.hadoop.HadoopFileIO;
import org.apache.iceberg.hadoop.Util;
import org.apache.iceberg.io.FileIO;
@@ -168,6 +174,31 @@ public class HiveTableUtil {
return dataFiles;
}
+ public static void appendFiles(URI fromURI, String format, Table icebergTbl,
boolean isOverwrite, Configuration conf)
+ throws SemanticException {
+ try {
+ Transaction transaction = icebergTbl.newTransaction();
+ if (isOverwrite) {
+ DeleteFiles delete = transaction.newDelete();
+ delete.deleteFromRowFilter(Expressions.alwaysTrue());
+ delete.commit();
+ }
+ AppendFiles append = transaction.newAppend();
+ PartitionSpec spec = icebergTbl.spec();
+ MetricsConfig metricsConfig =
MetricsConfig.fromProperties(icebergTbl.properties());
+ String nameMappingString =
icebergTbl.properties().get(TableProperties.DEFAULT_NAME_MAPPING);
+ NameMapping nameMapping = nameMappingString != null ?
NameMappingParser.fromJson(nameMappingString) : null;
+ RemoteIterator<LocatedFileStatus> filesIterator =
HiveTableUtil.getFilesIterator(new Path(fromURI), conf);
+ List<DataFile> dataFiles = HiveTableUtil.getDataFiles(filesIterator,
Collections.emptyMap(),
+ format == null ? IOConstants.PARQUET : format, spec, metricsConfig,
nameMapping, conf);
+ dataFiles.forEach(append::appendFile);
+ append.commit();
+ transaction.commitTransaction();
+ } catch (Exception e) {
+ throw new SemanticException("Can not append data files", e);
+ }
+ }
+
public static RemoteIterator<LocatedFileStatus> getFilesIterator(Path path,
Configuration conf) throws MetaException {
try {
FileSystem fileSystem = FileSystem.get(path.toUri(), conf);
diff --git
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_load_data.q
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_load_data.q
index 442357a706e..63e0f55d684 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_load_data.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_load_data.q
@@ -5,6 +5,7 @@ create external table ice_parquet(
stored by iceberg;
explain LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE
INTO TABLE ice_parquet;
+explain analyze LOAD DATA LOCAL INPATH '../../data/files/parquet_partition'
OVERWRITE INTO TABLE ice_parquet;
LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE INTO
TABLE ice_parquet;
@@ -17,6 +18,13 @@ stored by iceberg
STORED AS AVRO;
explain LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO
TABLE ice_avro;
+explain analyze LOAD DATA LOCAL INPATH '../../data/files/doctors.avro'
OVERWRITE INTO TABLE ice_avro;
+
+set hive.load.data.use.native.api=false;
+
+explain LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO
TABLE ice_avro;
+
+set hive.load.data.use.native.api=true;
LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO TABLE
ice_avro;
@@ -37,7 +45,16 @@ stored by iceberg
STORED AS ORC;
explain LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO
TABLE ice_orc;
+explain analyze LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE
INTO TABLE ice_orc;
+
+LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO TABLE
ice_orc;
+
+select * from ice_orc order by p_partkey;
+
+select count(*) from ice_orc;
LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE ice_orc;
-select * from ice_orc order by p_partkey;
\ No newline at end of file
+select * from ice_orc order by p_partkey;
+
+select count(*) from ice_orc;
\ No newline at end of file
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out
b/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out
index 098455ab8d0..00ba9725619 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out
@@ -60,6 +60,60 @@ Stage-3
Output:["strcol","intcol","pcol"]
Please refer to the previous Select Operator
[SEL_12]
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition'
OVERWRITE INTO TABLE ice_parquet
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+PREHOOK: Output: default@ice_parquet
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition'
OVERWRITE INTO TABLE ice_parquet
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_parquet
+PREHOOK: query: explain analyze LOAD DATA LOCAL INPATH
'../../data/files/parquet_partition' OVERWRITE INTO TABLE ice_parquet
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+PREHOOK: Output: default@ice_parquet
+POSTHOOK: query: explain analyze LOAD DATA LOCAL INPATH
'../../data/files/parquet_partition' OVERWRITE INTO TABLE ice_parquet
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_parquet
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+
+Stage-3
+ Stats Work{}
+ Stage-0
+ Move Operator
+ table:{"name:":"default.ice_parquet"}
+ Stage-2
+ Dependency Collection{}
+ Stage-1
+ Reducer 2 vectorized
+ File Output Operator [FS_18]
+ table:{"name:":"default.ice_parquet"}
+ Select Operator [SEL_17]
+ Output:["_col0","_col1","_col2","_col2"]
+ <-Map 1 [SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_13]
+ PartitionCols:_col2
+ Select Operator [SEL_12] (rows=77/6 width=187)
+ Output:["_col0","_col1","_col2"]
+ TableScan [TS_0] (rows=77/6 width=187)
+
default@ice_parquet__temp_table_for_load_data__,ice_parquet__temp_table_for_load_data__,Tbl:COMPLETE,Col:NONE,Output:["strcol","intcol","pcol"]
+ Reducer 3 vectorized
+ File Output Operator [FS_21]
+ Select Operator [SEL_20] (rows=1/1 width=752)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"]
+ Group By Operator [GBY_19] (rows=1/1 width=752)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","min(VALUE._col5)","max(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)","min(VALUE._col9)","max(VALUE._col10)","count(VALUE._col11)","compute_bit_vector_hll(VALUE._col12)"]
+ <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized
+ PARTITION_ONLY_SHUFFLE [RS_16]
+ Group By Operator [GBY_15] (rows=1/1 width=752)
+
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(length(strcol))","avg(COALESCE(length(strcol),0))","count(1)","count(strcol)","compute_bit_vector_hll(strcol)","min(intcol)","max(intcol)","count(intcol)","compute_bit_vector_hll(intcol)","min(pcol)","max(pcol)","count(pcol)","compute_bit_vector_hll(pcol)"]
+ Select Operator [SEL_14] (rows=77/6 width=187)
+ Output:["strcol","intcol","pcol"]
+ Please refer to the previous Select Operator
[SEL_12]
+
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition'
OVERWRITE INTO TABLE ice_parquet
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_parquet__temp_table_for_load_data__
@@ -98,6 +152,26 @@ STORED AS AVRO
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@ice_avro
+PREHOOK: query: explain LOAD DATA LOCAL INPATH '../../data/files/doctors.avro'
OVERWRITE INTO TABLE ice_avro
+PREHOOK: type: LOAD
+POSTHOOK: query: explain LOAD DATA LOCAL INPATH
'../../data/files/doctors.avro' OVERWRITE INTO TABLE ice_avro
+POSTHOOK: type: LOAD
+Stage-0
+ Move Operator
+ table:{"name:":"default.ice_avro"}
+
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro'
OVERWRITE INTO TABLE ice_avro
+PREHOOK: type: LOAD
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro'
OVERWRITE INTO TABLE ice_avro
+POSTHOOK: type: LOAD
+PREHOOK: query: explain analyze LOAD DATA LOCAL INPATH
'../../data/files/doctors.avro' OVERWRITE INTO TABLE ice_avro
+PREHOOK: type: LOAD
+POSTHOOK: query: explain analyze LOAD DATA LOCAL INPATH
'../../data/files/doctors.avro' OVERWRITE INTO TABLE ice_avro
+POSTHOOK: type: LOAD
+Stage-0
+ Move Operator
+ table:{"name:":"default.ice_avro"}
+
PREHOOK: query: explain LOAD DATA LOCAL INPATH '../../data/files/doctors.avro'
OVERWRITE INTO TABLE ice_avro
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_avro__temp_table_for_load_data__
@@ -138,13 +212,9 @@ Stage-3
Please refer to the previous Select Operator [SEL_1]
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro'
OVERWRITE INTO TABLE ice_avro
-PREHOOK: type: QUERY
-PREHOOK: Input: default@ice_avro__temp_table_for_load_data__
-PREHOOK: Output: default@ice_avro
+PREHOOK: type: LOAD
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro'
OVERWRITE INTO TABLE ice_avro
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@ice_avro__temp_table_for_load_data__
-POSTHOOK: Output: default@ice_avro
+POSTHOOK: type: LOAD
PREHOOK: query: select * from ice_avro order by number
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_avro
@@ -197,52 +267,76 @@ POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@ice_orc
PREHOOK: query: explain LOAD DATA LOCAL INPATH '../../data/files/part.orc'
OVERWRITE INTO TABLE ice_orc
-PREHOOK: type: QUERY
-PREHOOK: Input: default@ice_orc__temp_table_for_load_data__
-PREHOOK: Output: default@ice_orc
+PREHOOK: type: LOAD
POSTHOOK: query: explain LOAD DATA LOCAL INPATH '../../data/files/part.orc'
OVERWRITE INTO TABLE ice_orc
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@ice_orc__temp_table_for_load_data__
-POSTHOOK: Output: default@ice_orc
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+POSTHOOK: type: LOAD
+Stage-0
+ Move Operator
+ table:{"name:":"default.ice_orc"}
-Stage-3
- Stats Work{}
- Stage-0
- Move Operator
- table:{"name:":"default.ice_orc"}
- Stage-2
- Dependency Collection{}
- Stage-1
- Reducer 2 vectorized
- File Output Operator [FS_17]
- Select Operator [SEL_16] (rows=1 width=3008)
-
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51","_col52"
[...]
- Group By Operator [GBY_15] (rows=1 width=3008)
-
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","max(VAL
[...]
- <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized
- File Output Operator [FS_11]
- table:{"name:":"default.ice_orc"}
- Select Operator [SEL_10] (rows=33 width=1120)
-
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
- TableScan [TS_0] (rows=33 width=1120)
-
default@ice_orc__temp_table_for_load_data__,ice_orc__temp_table_for_load_data__,Tbl:COMPLETE,Col:NONE,Output:["p_partkey","p_name","p_mfgr","p_brand","p_type","p_size","p_container","p_retailprice","p_comment"]
- PARTITION_ONLY_SHUFFLE [RS_14]
- Group By Operator [GBY_13] (rows=1 width=3008)
-
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36"],aggregations:["min(p_partkey)","max(p_partkey)","count(1)","count(p_partkey)","compute_bit_vector_hll(p_partkey)","max(length(p_name))",
[...]
- Select Operator [SEL_12] (rows=33 width=1120)
-
Output:["p_partkey","p_name","p_mfgr","p_brand","p_type","p_size","p_container","p_retailprice","p_comment"]
- Please refer to the previous Select Operator
[SEL_10]
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE
INTO TABLE ice_orc
+PREHOOK: type: LOAD
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE
INTO TABLE ice_orc
+POSTHOOK: type: LOAD
+PREHOOK: query: explain analyze LOAD DATA LOCAL INPATH
'../../data/files/part.orc' OVERWRITE INTO TABLE ice_orc
+PREHOOK: type: LOAD
+POSTHOOK: query: explain analyze LOAD DATA LOCAL INPATH
'../../data/files/part.orc' OVERWRITE INTO TABLE ice_orc
+POSTHOOK: type: LOAD
+Stage-0
+ Move Operator
+ table:{"name:":"default.ice_orc"}
-PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE
ice_orc
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE
INTO TABLE ice_orc
+PREHOOK: type: LOAD
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE
INTO TABLE ice_orc
+POSTHOOK: type: LOAD
+PREHOOK: query: select * from ice_orc order by p_partkey
PREHOOK: type: QUERY
-PREHOOK: Input: default@ice_orc__temp_table_for_load_data__
-PREHOOK: Output: default@ice_orc
-POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE
ice_orc
+PREHOOK: Input: default@ice_orc
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from ice_orc order by p_partkey
POSTHOOK: type: QUERY
-POSTHOOK: Input: default@ice_orc__temp_table_for_load_data__
-POSTHOOK: Output: default@ice_orc
+POSTHOOK: Input: default@ice_orc
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+15103 almond aquamarine dodger light gainsboro Manufacturer#5
Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder
carefu
+17273 almond antique forest lavender goldenrod Manufacturer#3
Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along
the
+17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41
ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve
+33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41
STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful
+40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY
PLATED COPPER 1 LG PKG 1922.98 c foxes can s
+42669 almond antique medium spring khaki Manufacturer#5 Brand#51
STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl
+45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42
SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful
+48427 almond antique violet mint lemon Manufacturer#4 Brand#42
PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i
+49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41
SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick
+65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12
LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr
+78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52
LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
+85768 almond antique chartreuse lavender yellow Manufacturer#1
Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
+86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12
STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
+90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31
MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
+105685 almond antique violet chocolate turquoise Manufacturer#2
Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ
+110592 almond antique salmon chartreuse burlywood Manufacturer#1
Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the
furiously
+112398 almond antique metallic orange dim Manufacturer#3 Brand#32
MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14
PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14
PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24
SMALL POLISHED NICKEL 25 MED BOX 1698.66 even
+144293 almond antique olive coral navajo Manufacturer#3 Brand#34
STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about
+146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23
MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref
+155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL
PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra
+191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22
ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle
+192697 almond antique blue firebrick mint Manufacturer#5 Brand#52
MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
+195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25
STANDARD PLATED TIN 18 SM PKG 1701.6 ic de
+PREHOOK: query: select count(*) from ice_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from ice_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+26
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE
ice_orc
+PREHOOK: type: LOAD
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE
ice_orc
+POSTHOOK: type: LOAD
PREHOOK: query: select * from ice_orc order by p_partkey
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_orc
@@ -252,28 +346,63 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_orc
POSTHOOK: Output: hdfs://### HDFS PATH ###
15103 almond aquamarine dodger light gainsboro Manufacturer#5
Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder
carefu
+15103 almond aquamarine dodger light gainsboro Manufacturer#5
Brand#53 ECONOMY BURNISHED STEEL 46 LG PACK 1018.1 packages hinder
carefu
+17273 almond antique forest lavender goldenrod Manufacturer#3
Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along
the
17273 almond antique forest lavender goldenrod Manufacturer#3
Brand#35 PROMO ANODIZED TIN 14 JUMBO CASE 1190.27 along
the
17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41
ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve
+17927 almond aquamarine yellow dodger mint Manufacturer#4 Brand#41
ECONOMY BRUSHED COPPER 7 SM PKG 1844.92 ites. eve
33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41
STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful
+33357 almond azure aquamarine papaya violet Manufacturer#4 Brand#41
STANDARD ANODIZED TIN 12 WRAP CASE 1290.35 reful
+40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY
PLATED COPPER 1 LG PKG 1922.98 c foxes can s
40982 almond antique misty red olive Manufacturer#3 Brand#32 ECONOMY
PLATED COPPER 1 LG PKG 1922.98 c foxes can s
42669 almond antique medium spring khaki Manufacturer#5 Brand#51
STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl
+42669 almond antique medium spring khaki Manufacturer#5 Brand#51
STANDARD BURNISHED TIN 6 MED CAN 1611.66 sits haggl
+45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42
SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful
45261 almond aquamarine floral ivory bisque Manufacturer#4 Brand#42
SMALL PLATED STEEL 27 WRAP CASE 1206.26 careful
48427 almond antique violet mint lemon Manufacturer#4 Brand#42
PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i
+48427 almond antique violet mint lemon Manufacturer#4 Brand#42
PROMO POLISHED STEEL 39 SM CASE 1375.42 hely ironic i
49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41
SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick
+49671 almond antique gainsboro frosted violet Manufacturer#4 Brand#41
SMALL BRUSHED BRASS 10 SM BOX 1620.67 ccounts run quick
+65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12
LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr
65667 almond aquamarine pink moccasin thistle Manufacturer#1 Brand#12
LARGE BURNISHED STEEL 42 JUMBO CASE 1632.66 e across the expr
78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52
LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
+78486 almond azure blanched chiffon midnight Manufacturer#5 Brand#52
LARGE BRUSHED BRASS 23 MED BAG 1464.48 hely blith
+85768 almond antique chartreuse lavender yellow Manufacturer#1
Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
85768 almond antique chartreuse lavender yellow Manufacturer#1
Brand#12 LARGE BRUSHED STEEL 34 SM BAG 1753.76 refull
86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12
STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
+86428 almond aquamarine burnished black steel Manufacturer#1 Brand#12
STANDARD ANODIZED STEEL 28 WRAP BAG 1414.42 arefully
+90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31
MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
90681 almond antique chartreuse khaki white Manufacturer#3 Brand#31
MEDIUM BURNISHED TIN 17 SM CASE 1671.68 are slyly after the sl
105685 almond antique violet chocolate turquoise Manufacturer#2
Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ
+105685 almond antique violet chocolate turquoise Manufacturer#2
Brand#22 MEDIUM ANODIZED COPPER 14 MED CAN 1690.68 ly pending requ
+110592 almond antique salmon chartreuse burlywood Manufacturer#1
Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the
furiously
110592 almond antique salmon chartreuse burlywood Manufacturer#1
Brand#15 PROMO BURNISHED NICKEL 6 JUMBO PKG 1602.59 to the
furiously
112398 almond antique metallic orange dim Manufacturer#3 Brand#32
MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car
+112398 almond antique metallic orange dim Manufacturer#3 Brand#32
MEDIUM BURNISHED BRASS 19 JUMBO JAR 1410.39 ole car
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14
PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
+121152 almond antique burnished rose metallic Manufacturer#1 Brand#14
PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
121152 almond antique burnished rose metallic Manufacturer#1 Brand#14
PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
121152 almond antique burnished rose metallic Manufacturer#1 Brand#14
PROMO PLATED TIN 2 JUMBO BOX 1173.15 e pinto beans h
132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24
SMALL POLISHED NICKEL 25 MED BOX 1698.66 even
+132666 almond aquamarine rose maroon antique Manufacturer#2 Brand#24
SMALL POLISHED NICKEL 25 MED BOX 1698.66 even
+144293 almond antique olive coral navajo Manufacturer#3 Brand#34
STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about
144293 almond antique olive coral navajo Manufacturer#3 Brand#34
STANDARD POLISHED STEEL 45 JUMBO CAN 1337.29 ag furiously about
146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23
MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref
+146985 almond aquamarine midnight light salmon Manufacturer#2 Brand#23
MEDIUM BURNISHED COPPER 2 SM CASE 2031.98 s cajole caref
+155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL
PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra
155733 almond antique sky peru orange Manufacturer#5 Brand#53 SMALL
PLATED BRASS 2 WRAP DRUM 1788.73 furiously. bra
191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22
ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle
+191709 almond antique violet turquoise frosted Manufacturer#2 Brand#22
ECONOMY POLISHED STEEL 40 MED BOX 1800.7 haggle
+192697 almond antique blue firebrick mint Manufacturer#5 Brand#52
MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
192697 almond antique blue firebrick mint Manufacturer#5 Brand#52
MEDIUM BURNISHED TIN 31 LG DRUM 1789.69 ickly ir
195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25
STANDARD PLATED TIN 18 SM PKG 1701.6 ic de
+195606 almond aquamarine sandy cyan gainsboro Manufacturer#2 Brand#25
STANDARD PLATED TIN 18 SM PKG 1701.6 ic de
+PREHOOK: query: select count(*) from ice_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from ice_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+52
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
index 555bd7fb5b6..6aadb955332 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
@@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.metadata.Table;
import
org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol;
import
org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol;
import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
import org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc;
@@ -1059,12 +1060,18 @@ public class MoveTask extends Task<MoveWork> implements
Serializable {
* @return Returns <code>true</code> if the commit was successfully executed
* @throws HiveException If we tried to commit, but there was an error
during the process
*/
- private static boolean checkAndCommitNatively(MoveWork moveWork,
Configuration configuration) throws HiveException {
+ private boolean checkAndCommitNatively(MoveWork moveWork, Configuration
configuration) throws HiveException {
String storageHandlerClass = null;
Properties commitProperties = null;
boolean overwrite = false;
-
- if (moveWork.getLoadTableWork() != null) {
+ LoadTableDesc loadTableWork = moveWork.getLoadTableWork();
+ if (loadTableWork != null) {
+ if (loadTableWork.isUseAppendForLoad()) {
+ loadTableWork.getMdTable().getStorageHandler()
+ .appendFiles(loadTableWork.getMdTable().getTTable(),
loadTableWork.getSourcePath().toUri(),
+ loadTableWork.getLoadFileType() == LoadFileType.REPLACE_ALL);
+ return true;
+ }
// Get the info from the table data
TableDesc tableDesc = moveWork.getLoadTableWork().getTable();
storageHandlerClass = tableDesc.getProperties().getProperty(
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index d1efdc3c64b..a0bd32d43ec 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.metastore.api.LockType;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hadoop.hive.ql.Context.Operation;
+import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableDesc;
import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
import org.apache.hadoop.hive.ql.ddl.table.create.like.CreateTableLikeDesc;
@@ -304,6 +305,22 @@ public interface HiveStorageHandler extends Configurable {
default StorageFormatDescriptor getStorageFormatDescriptor(Table table)
throws SemanticException {
return null;
}
+
+ /**
+ * Checks whether the table supports appending data files to the table.
+ * @param table the table
+ * @return true if the table can append files directly to the table
+ * @throws SemanticException in case of any error.
+ */
+ default boolean supportsAppendData(Table table) throws SemanticException {
+ return false;
+ }
+
+ default void appendFiles(Table tbl, URI fromURI, boolean isOverwrite)
+ throws SemanticException {
+ throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
+ }
+
/**
* Check if CTAS and CMV operations should behave in a direct-insert manner
(i.e. no move task).
* <p>
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
index 8b8cc6b7f42..a172813a291 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
@@ -54,6 +54,7 @@ import org.apache.hadoop.hive.ql.io.StorageFormatDescriptor;
import org.apache.hadoop.hive.ql.lockmgr.LockException;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.plan.BasicStatsWork;
@@ -67,6 +68,8 @@ import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
+import static
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_LOAD_DATA_USE_NATIVE_API;
+
/**
* LoadSemanticAnalyzer.
*
@@ -295,16 +298,26 @@ public class LoadSemanticAnalyzer extends
SemanticAnalyzer {
throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
}
if (ts.tableHandle.isNonNative()) {
- // launch a tez job
- StorageFormatDescriptor ss =
-
ts.tableHandle.getStorageHandler().getStorageFormatDescriptor(ts.tableHandle.getTTable());
- if (ss != null) {
- inputFormatClassName = ss.getInputFormat();
- serDeClassName = ss.getSerde();
- reparseAndSuperAnalyze(ts.tableHandle, fromURI);
+ HiveStorageHandler storageHandler = ts.tableHandle.getStorageHandler();
+ boolean isUseNativeApi = conf.getBoolVar(HIVE_LOAD_DATA_USE_NATIVE_API);
+ if (isUseNativeApi &&
storageHandler.supportsAppendData(ts.tableHandle.getTTable())) {
+ LoadTableDesc loadTableWork =
+ new LoadTableDesc(new Path(fromURI), ts.tableHandle, isOverWrite,
true, isOverWrite);
+ Task<?> childTask =
+ TaskFactory.get(new MoveWork(getInputs(), getOutputs(),
loadTableWork, null, true, isLocal));
+ rootTasks.add(childTask);
return;
+ } else {
+ // launch a tez job
+ StorageFormatDescriptor ss =
storageHandler.getStorageFormatDescriptor(ts.tableHandle.getTTable());
+ if (ss != null) {
+ inputFormatClassName = ss.getInputFormat();
+ serDeClassName = ss.getSerde();
+ reparseAndSuperAnalyze(ts.tableHandle, fromURI);
+ return;
+ }
+ throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
}
- throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
}
if(ts.tableHandle.isStoredAsSubDirectories()) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java
b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java
index 3836ffafb8f..ca45779c389 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java
@@ -45,6 +45,8 @@ public class LoadTableDesc extends LoadDesc implements
Serializable {
private boolean isInsertOverwrite;
private boolean isDirectInsert;
+ private boolean useAppendForLoad;
+
// TODO: the below seem like they should just be combined into partitionDesc
private Table mdTable;
private org.apache.hadoop.hive.ql.plan.TableDesc table;
@@ -157,6 +159,20 @@ public class LoadTableDesc extends LoadDesc implements
Serializable {
}
}
+ public LoadTableDesc(Path path, Table tableHandle, boolean isOverWrite,
boolean useAppendForLoad,
+ boolean isInsertOverwrite) {
+ super(path, AcidUtils.Operation.NOT_ACID);
+ this.mdTable = tableHandle;
+ this.useAppendForLoad = useAppendForLoad;
+ this.loadFileType = isOverWrite ? LoadFileType.REPLACE_ALL :
LoadFileType.KEEP_EXISTING;
+ this.table = Utilities.getTableDesc(tableHandle);
+ this.isInsertOverwrite = isInsertOverwrite;
+ }
+
+ public boolean isUseAppendForLoad() {
+ return useAppendForLoad;
+ }
+
private void init(
final org.apache.hadoop.hive.ql.plan.TableDesc table,
final Map<String, String> partitionSpec,