This is an automated email from the ASF dual-hosted git repository.

ayushsaxena pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 7163566b01a HIVE-27244: Iceberg: Implement LOAD data for unpartitioned 
table via Append API. (#4392). (Ayush Saxena, reviewed by Denys Kuzmenko, Butao 
Zhang)
7163566b01a is described below

commit 7163566b01aeb1d87a422f72780d3f638e2600d9
Author: Ayush Saxena <[email protected]>
AuthorDate: Thu Jun 22 09:47:55 2023 +0530

    HIVE-27244: Iceberg: Implement LOAD data for unpartitioned table via Append 
API. (#4392). (Ayush Saxena, reviewed by Denys Kuzmenko, Butao Zhang)
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   4 +
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java |  12 ++
 .../org/apache/iceberg/mr/hive/HiveTableUtil.java  |  31 +++
 .../src/test/queries/positive/iceberg_load_data.q  |  19 +-
 .../test/results/positive/iceberg_load_data.q.out  | 223 ++++++++++++++++-----
 .../org/apache/hadoop/hive/ql/exec/MoveTask.java   |  13 +-
 .../hive/ql/metadata/HiveStorageHandler.java       |  17 ++
 .../hadoop/hive/ql/parse/LoadSemanticAnalyzer.java |  29 ++-
 .../apache/hadoop/hive/ql/plan/LoadTableDesc.java  |  16 ++
 9 files changed, 305 insertions(+), 59 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index f4d1c376d3f..196a1fc4eab 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -795,6 +795,10 @@ public class HiveConf extends Configuration {
         "String used as a file extension for output files. \n" +
         "If not set, defaults to the codec extension for text files (e.g. 
\".gz\"), or no extension otherwise."),
 
+    HIVE_LOAD_DATA_USE_NATIVE_API("hive.load.data.use.native.api", true,
+        "Whether to use a native APIs for load queries to non-native 
table(like iceberg), if false uses a Tez job for" +
+            " load queries"),
+
     HIVE_IN_TEST("hive.in.test", false, "internal usage only, true in test 
mode", true),
     HIVE_IN_TEST_SSL("hive.in.ssl.test", false, "internal usage only, true in 
SSL test mode", true),
     // TODO: this needs to be removed; see TestReplicationScenarios* comments.
diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index d6abacf4c67..ece13c519ca 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -349,6 +349,18 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
     return null;
   }
 
+  public boolean supportsAppendData(org.apache.hadoop.hive.metastore.api.Table 
table) throws SemanticException {
+    Table icebergTbl = IcebergTableUtil.getTable(conf, table);
+    return icebergTbl.spec().isUnpartitioned();
+  }
+
+  public void appendFiles(org.apache.hadoop.hive.metastore.api.Table table, 
URI fromURI, boolean isOverwrite)
+      throws SemanticException {
+    Table icebergTbl = IcebergTableUtil.getTable(conf, table);
+    String format = 
table.getParameters().get(TableProperties.DEFAULT_FILE_FORMAT);
+    HiveTableUtil.appendFiles(fromURI, format, icebergTbl, isOverwrite, conf);
+  }
+
   @Override
   public Map<String, String> getBasicStatistics(Partish partish) {
     org.apache.hadoop.hive.ql.metadata.Table hmsTable = partish.getTable();
diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
index a02392417bf..3bd950c5869 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveTableUtil.java
@@ -23,6 +23,7 @@ import java.io.IOException;
 import java.io.ObjectInputStream;
 import java.io.ObjectOutputStream;
 import java.io.UncheckedIOException;
+import java.net.URI;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -44,16 +45,21 @@ import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
 import org.apache.hadoop.hive.metastore.utils.FileUtils;
+import org.apache.hadoop.hive.ql.io.IOConstants;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.iceberg.AppendFiles;
 import org.apache.iceberg.DataFile;
+import org.apache.iceberg.DeleteFiles;
 import org.apache.iceberg.MetricsConfig;
 import org.apache.iceberg.PartitionSpec;
 import org.apache.iceberg.SerializableTable;
 import org.apache.iceberg.StructLike;
 import org.apache.iceberg.Table;
 import org.apache.iceberg.TableProperties;
+import org.apache.iceberg.Transaction;
 import org.apache.iceberg.data.TableMigrationUtil;
 import org.apache.iceberg.exceptions.NotFoundException;
+import org.apache.iceberg.expressions.Expressions;
 import org.apache.iceberg.hadoop.HadoopFileIO;
 import org.apache.iceberg.hadoop.Util;
 import org.apache.iceberg.io.FileIO;
@@ -168,6 +174,31 @@ public class HiveTableUtil {
     return dataFiles;
   }
 
+  public static void appendFiles(URI fromURI, String format, Table icebergTbl, 
boolean isOverwrite, Configuration conf)
+      throws SemanticException {
+    try {
+      Transaction transaction = icebergTbl.newTransaction();
+      if (isOverwrite) {
+        DeleteFiles delete = transaction.newDelete();
+        delete.deleteFromRowFilter(Expressions.alwaysTrue());
+        delete.commit();
+      }
+      AppendFiles append = transaction.newAppend();
+      PartitionSpec spec = icebergTbl.spec();
+      MetricsConfig metricsConfig = 
MetricsConfig.fromProperties(icebergTbl.properties());
+      String nameMappingString = 
icebergTbl.properties().get(TableProperties.DEFAULT_NAME_MAPPING);
+      NameMapping nameMapping = nameMappingString != null ? 
NameMappingParser.fromJson(nameMappingString) : null;
+      RemoteIterator<LocatedFileStatus> filesIterator = 
HiveTableUtil.getFilesIterator(new Path(fromURI), conf);
+      List<DataFile> dataFiles = HiveTableUtil.getDataFiles(filesIterator, 
Collections.emptyMap(),
+          format == null ? IOConstants.PARQUET : format, spec, metricsConfig, 
nameMapping, conf);
+      dataFiles.forEach(append::appendFile);
+      append.commit();
+      transaction.commitTransaction();
+    } catch (Exception e) {
+      throw new SemanticException("Can not append data files", e);
+    }
+  }
+
   public static RemoteIterator<LocatedFileStatus> getFilesIterator(Path path, 
Configuration conf) throws MetaException {
     try {
       FileSystem fileSystem = FileSystem.get(path.toUri(), conf);
diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_load_data.q 
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_load_data.q
index 442357a706e..63e0f55d684 100644
--- a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_load_data.q
+++ b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_load_data.q
@@ -5,6 +5,7 @@ create external table ice_parquet(
 stored by iceberg;
 
 explain LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE 
INTO TABLE ice_parquet;
+explain analyze LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' 
OVERWRITE INTO TABLE ice_parquet;
 
 LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' OVERWRITE INTO 
TABLE ice_parquet;
 
@@ -17,6 +18,13 @@ stored by iceberg
 STORED AS AVRO;
 
 explain LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO 
TABLE ice_avro;
+explain analyze LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' 
OVERWRITE INTO TABLE ice_avro;
+
+set hive.load.data.use.native.api=false;
+
+explain LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO 
TABLE ice_avro;
+
+set hive.load.data.use.native.api=true;
 
 LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' OVERWRITE INTO TABLE 
ice_avro;
 
@@ -37,7 +45,16 @@ stored by iceberg
 STORED AS ORC;
 
 explain LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO 
TABLE ice_orc;
+explain analyze LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE 
INTO TABLE ice_orc;
+
+LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE INTO TABLE 
ice_orc;
+
+select * from ice_orc order by p_partkey;
+
+select count(*) from ice_orc;
 
 LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE ice_orc;
 
-select * from ice_orc order by p_partkey;
\ No newline at end of file
+select * from ice_orc order by p_partkey;
+
+select count(*) from ice_orc;
\ No newline at end of file
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out 
b/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out
index 098455ab8d0..00ba9725619 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/iceberg_load_data.q.out
@@ -60,6 +60,60 @@ Stage-3
                           Output:["strcol","intcol","pcol"]
                            Please refer to the previous Select Operator 
[SEL_12]
 
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' 
OVERWRITE INTO TABLE ice_parquet
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+PREHOOK: Output: default@ice_parquet
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' 
OVERWRITE INTO TABLE ice_parquet
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_parquet
+PREHOOK: query: explain analyze LOAD DATA LOCAL INPATH 
'../../data/files/parquet_partition' OVERWRITE INTO TABLE ice_parquet
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+PREHOOK: Output: default@ice_parquet
+POSTHOOK: query: explain analyze LOAD DATA LOCAL INPATH 
'../../data/files/parquet_partition' OVERWRITE INTO TABLE ice_parquet
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_parquet__temp_table_for_load_data__
+POSTHOOK: Output: default@ice_parquet
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+
+Stage-3
+  Stats Work{}
+    Stage-0
+      Move Operator
+        table:{"name:":"default.ice_parquet"}
+        Stage-2
+          Dependency Collection{}
+            Stage-1
+              Reducer 2 vectorized
+              File Output Operator [FS_18]
+                table:{"name:":"default.ice_parquet"}
+                Select Operator [SEL_17]
+                  Output:["_col0","_col1","_col2","_col2"]
+                <-Map 1 [SIMPLE_EDGE] vectorized
+                  PARTITION_ONLY_SHUFFLE [RS_13]
+                    PartitionCols:_col2
+                    Select Operator [SEL_12] (rows=77/6 width=187)
+                      Output:["_col0","_col1","_col2"]
+                      TableScan [TS_0] (rows=77/6 width=187)
+                        
default@ice_parquet__temp_table_for_load_data__,ice_parquet__temp_table_for_load_data__,Tbl:COMPLETE,Col:NONE,Output:["strcol","intcol","pcol"]
+              Reducer 3 vectorized
+              File Output Operator [FS_21]
+                Select Operator [SEL_20] (rows=1/1 width=752)
+                  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17"]
+                  Group By Operator [GBY_19] (rows=1/1 width=752)
+                    
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(VALUE._col0)","avg(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","min(VALUE._col5)","max(VALUE._col6)","count(VALUE._col7)","compute_bit_vector_hll(VALUE._col8)","min(VALUE._col9)","max(VALUE._col10)","count(VALUE._col11)","compute_bit_vector_hll(VALUE._col12)"]
+                  <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized
+                    PARTITION_ONLY_SHUFFLE [RS_16]
+                      Group By Operator [GBY_15] (rows=1/1 width=752)
+                        
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["max(length(strcol))","avg(COALESCE(length(strcol),0))","count(1)","count(strcol)","compute_bit_vector_hll(strcol)","min(intcol)","max(intcol)","count(intcol)","compute_bit_vector_hll(intcol)","min(pcol)","max(pcol)","count(pcol)","compute_bit_vector_hll(pcol)"]
+                        Select Operator [SEL_14] (rows=77/6 width=187)
+                          Output:["strcol","intcol","pcol"]
+                           Please refer to the previous Select Operator 
[SEL_12]
+
 PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_partition' 
OVERWRITE INTO TABLE ice_parquet
 PREHOOK: type: QUERY
 PREHOOK: Input: default@ice_parquet__temp_table_for_load_data__
@@ -98,6 +152,26 @@ STORED AS AVRO
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@ice_avro
+PREHOOK: query: explain LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' 
OVERWRITE INTO TABLE ice_avro
+PREHOOK: type: LOAD
+POSTHOOK: query: explain LOAD DATA LOCAL INPATH 
'../../data/files/doctors.avro' OVERWRITE INTO TABLE ice_avro
+POSTHOOK: type: LOAD
+Stage-0
+  Move Operator
+    table:{"name:":"default.ice_avro"}
+
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' 
OVERWRITE INTO TABLE ice_avro
+PREHOOK: type: LOAD
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' 
OVERWRITE INTO TABLE ice_avro
+POSTHOOK: type: LOAD
+PREHOOK: query: explain analyze LOAD DATA LOCAL INPATH 
'../../data/files/doctors.avro' OVERWRITE INTO TABLE ice_avro
+PREHOOK: type: LOAD
+POSTHOOK: query: explain analyze LOAD DATA LOCAL INPATH 
'../../data/files/doctors.avro' OVERWRITE INTO TABLE ice_avro
+POSTHOOK: type: LOAD
+Stage-0
+  Move Operator
+    table:{"name:":"default.ice_avro"}
+
 PREHOOK: query: explain LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' 
OVERWRITE INTO TABLE ice_avro
 PREHOOK: type: QUERY
 PREHOOK: Input: default@ice_avro__temp_table_for_load_data__
@@ -138,13 +212,9 @@ Stage-3
                            Please refer to the previous Select Operator [SEL_1]
 
 PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' 
OVERWRITE INTO TABLE ice_avro
-PREHOOK: type: QUERY
-PREHOOK: Input: default@ice_avro__temp_table_for_load_data__
-PREHOOK: Output: default@ice_avro
+PREHOOK: type: LOAD
 POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/doctors.avro' 
OVERWRITE INTO TABLE ice_avro
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@ice_avro__temp_table_for_load_data__
-POSTHOOK: Output: default@ice_avro
+POSTHOOK: type: LOAD
 PREHOOK: query: select * from ice_avro order by number
 PREHOOK: type: QUERY
 PREHOOK: Input: default@ice_avro
@@ -197,52 +267,76 @@ POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@ice_orc
 PREHOOK: query: explain LOAD DATA LOCAL INPATH '../../data/files/part.orc' 
OVERWRITE INTO TABLE ice_orc
-PREHOOK: type: QUERY
-PREHOOK: Input: default@ice_orc__temp_table_for_load_data__
-PREHOOK: Output: default@ice_orc
+PREHOOK: type: LOAD
 POSTHOOK: query: explain LOAD DATA LOCAL INPATH '../../data/files/part.orc' 
OVERWRITE INTO TABLE ice_orc
-POSTHOOK: type: QUERY
-POSTHOOK: Input: default@ice_orc__temp_table_for_load_data__
-POSTHOOK: Output: default@ice_orc
-Vertex dependency in root stage
-Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+POSTHOOK: type: LOAD
+Stage-0
+  Move Operator
+    table:{"name:":"default.ice_orc"}
 
-Stage-3
-  Stats Work{}
-    Stage-0
-      Move Operator
-        table:{"name:":"default.ice_orc"}
-        Stage-2
-          Dependency Collection{}
-            Stage-1
-              Reducer 2 vectorized
-              File Output Operator [FS_17]
-                Select Operator [SEL_16] (rows=1 width=3008)
-                  
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36","_col37","_col38","_col39","_col40","_col41","_col42","_col43","_col44","_col45","_col46","_col47","_col48","_col49","_col50","_col51","_col52"
 [...]
-                  Group By Operator [GBY_15] (rows=1 width=3008)
-                    
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36"],aggregations:["min(VALUE._col0)","max(VALUE._col1)","count(VALUE._col2)","count(VALUE._col3)","compute_bit_vector_hll(VALUE._col4)","max(VAL
 [...]
-                  <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized
-                    File Output Operator [FS_11]
-                      table:{"name:":"default.ice_orc"}
-                      Select Operator [SEL_10] (rows=33 width=1120)
-                        
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"]
-                        TableScan [TS_0] (rows=33 width=1120)
-                          
default@ice_orc__temp_table_for_load_data__,ice_orc__temp_table_for_load_data__,Tbl:COMPLETE,Col:NONE,Output:["p_partkey","p_name","p_mfgr","p_brand","p_type","p_size","p_container","p_retailprice","p_comment"]
-                    PARTITION_ONLY_SHUFFLE [RS_14]
-                      Group By Operator [GBY_13] (rows=1 width=3008)
-                        
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23","_col24","_col25","_col26","_col27","_col28","_col29","_col30","_col31","_col32","_col33","_col34","_col35","_col36"],aggregations:["min(p_partkey)","max(p_partkey)","count(1)","count(p_partkey)","compute_bit_vector_hll(p_partkey)","max(length(p_name))",
 [...]
-                        Select Operator [SEL_12] (rows=33 width=1120)
-                          
Output:["p_partkey","p_name","p_mfgr","p_brand","p_type","p_size","p_container","p_retailprice","p_comment"]
-                           Please refer to the previous Select Operator 
[SEL_10]
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE 
INTO TABLE ice_orc
+PREHOOK: type: LOAD
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE 
INTO TABLE ice_orc
+POSTHOOK: type: LOAD
+PREHOOK: query: explain analyze LOAD DATA LOCAL INPATH 
'../../data/files/part.orc' OVERWRITE INTO TABLE ice_orc
+PREHOOK: type: LOAD
+POSTHOOK: query: explain analyze LOAD DATA LOCAL INPATH 
'../../data/files/part.orc' OVERWRITE INTO TABLE ice_orc
+POSTHOOK: type: LOAD
+Stage-0
+  Move Operator
+    table:{"name:":"default.ice_orc"}
 
-PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE 
ice_orc
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE 
INTO TABLE ice_orc
+PREHOOK: type: LOAD
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' OVERWRITE 
INTO TABLE ice_orc
+POSTHOOK: type: LOAD
+PREHOOK: query: select * from ice_orc order by p_partkey
 PREHOOK: type: QUERY
-PREHOOK: Input: default@ice_orc__temp_table_for_load_data__
-PREHOOK: Output: default@ice_orc
-POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE 
ice_orc
+PREHOOK: Input: default@ice_orc
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from ice_orc order by p_partkey
 POSTHOOK: type: QUERY
-POSTHOOK: Input: default@ice_orc__temp_table_for_load_data__
-POSTHOOK: Output: default@ice_orc
+POSTHOOK: Input: default@ice_orc
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+15103  almond aquamarine dodger light gainsboro        Manufacturer#5  
Brand#53        ECONOMY BURNISHED STEEL 46      LG PACK 1018.1  packages hinder 
carefu
+17273  almond antique forest lavender goldenrod        Manufacturer#3  
Brand#35        PROMO ANODIZED TIN      14      JUMBO CASE      1190.27 along 
the
+17927  almond aquamarine yellow dodger mint    Manufacturer#4  Brand#41        
ECONOMY BRUSHED COPPER  7       SM PKG  1844.92 ites. eve
+33357  almond azure aquamarine papaya violet   Manufacturer#4  Brand#41        
STANDARD ANODIZED TIN   12      WRAP CASE       1290.35 reful
+40982  almond antique misty red olive  Manufacturer#3  Brand#32        ECONOMY 
PLATED COPPER   1       LG PKG  1922.98 c foxes can s
+42669  almond antique medium spring khaki      Manufacturer#5  Brand#51        
STANDARD BURNISHED TIN  6       MED CAN 1611.66 sits haggl
+45261  almond aquamarine floral ivory bisque   Manufacturer#4  Brand#42        
SMALL PLATED STEEL      27      WRAP CASE       1206.26 careful
+48427  almond antique violet mint lemon        Manufacturer#4  Brand#42        
PROMO POLISHED STEEL    39      SM CASE 1375.42 hely ironic i
+49671  almond antique gainsboro frosted violet Manufacturer#4  Brand#41        
SMALL BRUSHED BRASS     10      SM BOX  1620.67 ccounts run quick
+65667  almond aquamarine pink moccasin thistle Manufacturer#1  Brand#12        
LARGE BURNISHED STEEL   42      JUMBO CASE      1632.66 e across the expr
+78486  almond azure blanched chiffon midnight  Manufacturer#5  Brand#52        
LARGE BRUSHED BRASS     23      MED BAG 1464.48 hely blith
+85768  almond antique chartreuse lavender yellow       Manufacturer#1  
Brand#12        LARGE BRUSHED STEEL     34      SM BAG  1753.76 refull
+86428  almond aquamarine burnished black steel Manufacturer#1  Brand#12        
STANDARD ANODIZED STEEL 28      WRAP BAG        1414.42 arefully 
+90681  almond antique chartreuse khaki white   Manufacturer#3  Brand#31        
MEDIUM BURNISHED TIN    17      SM CASE 1671.68 are slyly after the sl
+105685 almond antique violet chocolate turquoise       Manufacturer#2  
Brand#22        MEDIUM ANODIZED COPPER  14      MED CAN 1690.68 ly pending requ
+110592 almond antique salmon chartreuse burlywood      Manufacturer#1  
Brand#15        PROMO BURNISHED NICKEL  6       JUMBO PKG       1602.59  to the 
furiously
+112398 almond antique metallic orange dim      Manufacturer#3  Brand#32        
MEDIUM BURNISHED BRASS  19      JUMBO JAR       1410.39 ole car
+121152 almond antique burnished rose metallic  Manufacturer#1  Brand#14        
PROMO PLATED TIN        2       JUMBO BOX       1173.15 e pinto beans h
+121152 almond antique burnished rose metallic  Manufacturer#1  Brand#14        
PROMO PLATED TIN        2       JUMBO BOX       1173.15 e pinto beans h
+132666 almond aquamarine rose maroon antique   Manufacturer#2  Brand#24        
SMALL POLISHED NICKEL   25      MED BOX 1698.66 even 
+144293 almond antique olive coral navajo       Manufacturer#3  Brand#34        
STANDARD POLISHED STEEL 45      JUMBO CAN       1337.29 ag furiously about 
+146985 almond aquamarine midnight light salmon Manufacturer#2  Brand#23        
MEDIUM BURNISHED COPPER 2       SM CASE 2031.98 s cajole caref
+155733 almond antique sky peru orange  Manufacturer#5  Brand#53        SMALL 
PLATED BRASS      2       WRAP DRUM       1788.73 furiously. bra
+191709 almond antique violet turquoise frosted Manufacturer#2  Brand#22        
ECONOMY POLISHED STEEL  40      MED BOX 1800.7   haggle
+192697 almond antique blue firebrick mint      Manufacturer#5  Brand#52        
MEDIUM BURNISHED TIN    31      LG DRUM 1789.69 ickly ir
+195606 almond aquamarine sandy cyan gainsboro  Manufacturer#2  Brand#25        
STANDARD PLATED TIN     18      SM PKG  1701.6  ic de
+PREHOOK: query: select count(*) from ice_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from ice_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+26
+PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE 
ice_orc
+PREHOOK: type: LOAD
+POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/part.orc' INTO TABLE 
ice_orc
+POSTHOOK: type: LOAD
 PREHOOK: query: select * from ice_orc order by p_partkey
 PREHOOK: type: QUERY
 PREHOOK: Input: default@ice_orc
@@ -252,28 +346,63 @@ POSTHOOK: type: QUERY
 POSTHOOK: Input: default@ice_orc
 POSTHOOK: Output: hdfs://### HDFS PATH ###
 15103  almond aquamarine dodger light gainsboro        Manufacturer#5  
Brand#53        ECONOMY BURNISHED STEEL 46      LG PACK 1018.1  packages hinder 
carefu
+15103  almond aquamarine dodger light gainsboro        Manufacturer#5  
Brand#53        ECONOMY BURNISHED STEEL 46      LG PACK 1018.1  packages hinder 
carefu
+17273  almond antique forest lavender goldenrod        Manufacturer#3  
Brand#35        PROMO ANODIZED TIN      14      JUMBO CASE      1190.27 along 
the
 17273  almond antique forest lavender goldenrod        Manufacturer#3  
Brand#35        PROMO ANODIZED TIN      14      JUMBO CASE      1190.27 along 
the
 17927  almond aquamarine yellow dodger mint    Manufacturer#4  Brand#41        
ECONOMY BRUSHED COPPER  7       SM PKG  1844.92 ites. eve
+17927  almond aquamarine yellow dodger mint    Manufacturer#4  Brand#41        
ECONOMY BRUSHED COPPER  7       SM PKG  1844.92 ites. eve
 33357  almond azure aquamarine papaya violet   Manufacturer#4  Brand#41        
STANDARD ANODIZED TIN   12      WRAP CASE       1290.35 reful
+33357  almond azure aquamarine papaya violet   Manufacturer#4  Brand#41        
STANDARD ANODIZED TIN   12      WRAP CASE       1290.35 reful
+40982  almond antique misty red olive  Manufacturer#3  Brand#32        ECONOMY 
PLATED COPPER   1       LG PKG  1922.98 c foxes can s
 40982  almond antique misty red olive  Manufacturer#3  Brand#32        ECONOMY 
PLATED COPPER   1       LG PKG  1922.98 c foxes can s
 42669  almond antique medium spring khaki      Manufacturer#5  Brand#51        
STANDARD BURNISHED TIN  6       MED CAN 1611.66 sits haggl
+42669  almond antique medium spring khaki      Manufacturer#5  Brand#51        
STANDARD BURNISHED TIN  6       MED CAN 1611.66 sits haggl
+45261  almond aquamarine floral ivory bisque   Manufacturer#4  Brand#42        
SMALL PLATED STEEL      27      WRAP CASE       1206.26 careful
 45261  almond aquamarine floral ivory bisque   Manufacturer#4  Brand#42        
SMALL PLATED STEEL      27      WRAP CASE       1206.26 careful
 48427  almond antique violet mint lemon        Manufacturer#4  Brand#42        
PROMO POLISHED STEEL    39      SM CASE 1375.42 hely ironic i
+48427  almond antique violet mint lemon        Manufacturer#4  Brand#42        
PROMO POLISHED STEEL    39      SM CASE 1375.42 hely ironic i
 49671  almond antique gainsboro frosted violet Manufacturer#4  Brand#41        
SMALL BRUSHED BRASS     10      SM BOX  1620.67 ccounts run quick
+49671  almond antique gainsboro frosted violet Manufacturer#4  Brand#41        
SMALL BRUSHED BRASS     10      SM BOX  1620.67 ccounts run quick
+65667  almond aquamarine pink moccasin thistle Manufacturer#1  Brand#12        
LARGE BURNISHED STEEL   42      JUMBO CASE      1632.66 e across the expr
 65667  almond aquamarine pink moccasin thistle Manufacturer#1  Brand#12        
LARGE BURNISHED STEEL   42      JUMBO CASE      1632.66 e across the expr
 78486  almond azure blanched chiffon midnight  Manufacturer#5  Brand#52        
LARGE BRUSHED BRASS     23      MED BAG 1464.48 hely blith
+78486  almond azure blanched chiffon midnight  Manufacturer#5  Brand#52        
LARGE BRUSHED BRASS     23      MED BAG 1464.48 hely blith
+85768  almond antique chartreuse lavender yellow       Manufacturer#1  
Brand#12        LARGE BRUSHED STEEL     34      SM BAG  1753.76 refull
 85768  almond antique chartreuse lavender yellow       Manufacturer#1  
Brand#12        LARGE BRUSHED STEEL     34      SM BAG  1753.76 refull
 86428  almond aquamarine burnished black steel Manufacturer#1  Brand#12        
STANDARD ANODIZED STEEL 28      WRAP BAG        1414.42 arefully 
+86428  almond aquamarine burnished black steel Manufacturer#1  Brand#12        
STANDARD ANODIZED STEEL 28      WRAP BAG        1414.42 arefully 
+90681  almond antique chartreuse khaki white   Manufacturer#3  Brand#31        
MEDIUM BURNISHED TIN    17      SM CASE 1671.68 are slyly after the sl
 90681  almond antique chartreuse khaki white   Manufacturer#3  Brand#31        
MEDIUM BURNISHED TIN    17      SM CASE 1671.68 are slyly after the sl
 105685 almond antique violet chocolate turquoise       Manufacturer#2  
Brand#22        MEDIUM ANODIZED COPPER  14      MED CAN 1690.68 ly pending requ
+105685 almond antique violet chocolate turquoise       Manufacturer#2  
Brand#22        MEDIUM ANODIZED COPPER  14      MED CAN 1690.68 ly pending requ
+110592 almond antique salmon chartreuse burlywood      Manufacturer#1  
Brand#15        PROMO BURNISHED NICKEL  6       JUMBO PKG       1602.59  to the 
furiously
 110592 almond antique salmon chartreuse burlywood      Manufacturer#1  
Brand#15        PROMO BURNISHED NICKEL  6       JUMBO PKG       1602.59  to the 
furiously
 112398 almond antique metallic orange dim      Manufacturer#3  Brand#32        
MEDIUM BURNISHED BRASS  19      JUMBO JAR       1410.39 ole car
+112398 almond antique metallic orange dim      Manufacturer#3  Brand#32        
MEDIUM BURNISHED BRASS  19      JUMBO JAR       1410.39 ole car
+121152 almond antique burnished rose metallic  Manufacturer#1  Brand#14        
PROMO PLATED TIN        2       JUMBO BOX       1173.15 e pinto beans h
+121152 almond antique burnished rose metallic  Manufacturer#1  Brand#14        
PROMO PLATED TIN        2       JUMBO BOX       1173.15 e pinto beans h
 121152 almond antique burnished rose metallic  Manufacturer#1  Brand#14        
PROMO PLATED TIN        2       JUMBO BOX       1173.15 e pinto beans h
 121152 almond antique burnished rose metallic  Manufacturer#1  Brand#14        
PROMO PLATED TIN        2       JUMBO BOX       1173.15 e pinto beans h
 132666 almond aquamarine rose maroon antique   Manufacturer#2  Brand#24        
SMALL POLISHED NICKEL   25      MED BOX 1698.66 even 
+132666 almond aquamarine rose maroon antique   Manufacturer#2  Brand#24        
SMALL POLISHED NICKEL   25      MED BOX 1698.66 even 
+144293 almond antique olive coral navajo       Manufacturer#3  Brand#34        
STANDARD POLISHED STEEL 45      JUMBO CAN       1337.29 ag furiously about 
 144293 almond antique olive coral navajo       Manufacturer#3  Brand#34        
STANDARD POLISHED STEEL 45      JUMBO CAN       1337.29 ag furiously about 
 146985 almond aquamarine midnight light salmon Manufacturer#2  Brand#23        
MEDIUM BURNISHED COPPER 2       SM CASE 2031.98 s cajole caref
+146985 almond aquamarine midnight light salmon Manufacturer#2  Brand#23        
MEDIUM BURNISHED COPPER 2       SM CASE 2031.98 s cajole caref
+155733 almond antique sky peru orange  Manufacturer#5  Brand#53        SMALL 
PLATED BRASS      2       WRAP DRUM       1788.73 furiously. bra
 155733 almond antique sky peru orange  Manufacturer#5  Brand#53        SMALL 
PLATED BRASS      2       WRAP DRUM       1788.73 furiously. bra
 191709 almond antique violet turquoise frosted Manufacturer#2  Brand#22        
ECONOMY POLISHED STEEL  40      MED BOX 1800.7   haggle
+191709 almond antique violet turquoise frosted Manufacturer#2  Brand#22        
ECONOMY POLISHED STEEL  40      MED BOX 1800.7   haggle
+192697 almond antique blue firebrick mint      Manufacturer#5  Brand#52        
MEDIUM BURNISHED TIN    31      LG DRUM 1789.69 ickly ir
 192697 almond antique blue firebrick mint      Manufacturer#5  Brand#52        
MEDIUM BURNISHED TIN    31      LG DRUM 1789.69 ickly ir
 195606 almond aquamarine sandy cyan gainsboro  Manufacturer#2  Brand#25        
STANDARD PLATED TIN     18      SM PKG  1701.6  ic de
+195606 almond aquamarine sandy cyan gainsboro  Manufacturer#2  Brand#25        
STANDARD PLATED TIN     18      SM PKG  1701.6  ic de
+PREHOOK: query: select count(*) from ice_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from ice_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+52
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java 
b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
index 555bd7fb5b6..6aadb955332 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
@@ -62,6 +62,7 @@ import org.apache.hadoop.hive.ql.metadata.Table;
 import 
org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.BucketCol;
 import 
org.apache.hadoop.hive.ql.optimizer.physical.BucketingSortingCtx.SortCol;
 import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
 import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
 import org.apache.hadoop.hive.ql.plan.LoadFileDesc;
 import org.apache.hadoop.hive.ql.plan.LoadMultiFilesDesc;
@@ -1059,12 +1060,18 @@ public class MoveTask extends Task<MoveWork> implements 
Serializable {
    * @return Returns <code>true</code> if the commit was successfully executed
    * @throws HiveException If we tried to commit, but there was an error 
during the process
    */
-  private static boolean checkAndCommitNatively(MoveWork moveWork, 
Configuration configuration) throws HiveException {
+  private boolean checkAndCommitNatively(MoveWork moveWork, Configuration 
configuration) throws HiveException {
     String storageHandlerClass = null;
     Properties commitProperties = null;
     boolean overwrite = false;
-
-    if (moveWork.getLoadTableWork() != null) {
+    LoadTableDesc loadTableWork = moveWork.getLoadTableWork();
+    if (loadTableWork != null) {
+      if (loadTableWork.isUseAppendForLoad()) {
+        loadTableWork.getMdTable().getStorageHandler()
+            .appendFiles(loadTableWork.getMdTable().getTTable(), 
loadTableWork.getSourcePath().toUri(),
+                loadTableWork.getLoadFileType() == LoadFileType.REPLACE_ALL);
+        return true;
+      }
       // Get the info from the table data
       TableDesc tableDesc = moveWork.getLoadTableWork().getTable();
       storageHandlerClass = tableDesc.getProperties().getProperty(
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index d1efdc3c64b..a0bd32d43ec 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.metastore.api.LockType;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.ql.Context.Operation;
+import org.apache.hadoop.hive.ql.ErrorMsg;
 import org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableDesc;
 import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
 import org.apache.hadoop.hive.ql.ddl.table.create.like.CreateTableLikeDesc;
@@ -304,6 +305,22 @@ public interface HiveStorageHandler extends Configurable {
   default StorageFormatDescriptor getStorageFormatDescriptor(Table table) 
throws SemanticException {
     return null;
   }
+
+  /**
+   * Checks whether the table supports appending data files to the table.
+   * @param table the table
+   * @return true if the table can append files directly to the table
+   * @throws SemanticException in case of any error.
+   */
+  default boolean supportsAppendData(Table table) throws SemanticException {
+    return false;
+  }
+
+  default void appendFiles(Table tbl, URI fromURI, boolean isOverwrite)
+      throws SemanticException {
+    throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
+  }
+
   /**
    * Check if CTAS and CMV operations should behave in a direct-insert manner 
(i.e. no move task).
    * <p>
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java 
b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
index 8b8cc6b7f42..a172813a291 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/LoadSemanticAnalyzer.java
@@ -54,6 +54,7 @@ import org.apache.hadoop.hive.ql.io.StorageFormatDescriptor;
 import org.apache.hadoop.hive.ql.lockmgr.LockException;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.plan.BasicStatsWork;
@@ -67,6 +68,8 @@ import org.slf4j.LoggerFactory;
 
 import com.google.common.collect.Lists;
 
+import static 
org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVE_LOAD_DATA_USE_NATIVE_API;
+
 /**
  * LoadSemanticAnalyzer.
  *
@@ -295,16 +298,26 @@ public class LoadSemanticAnalyzer extends 
SemanticAnalyzer {
       throw new SemanticException(ErrorMsg.DML_AGAINST_VIEW.getMsg());
     }
     if (ts.tableHandle.isNonNative()) {
-      // launch a tez job
-      StorageFormatDescriptor ss =
-          
ts.tableHandle.getStorageHandler().getStorageFormatDescriptor(ts.tableHandle.getTTable());
-      if (ss != null) {
-        inputFormatClassName = ss.getInputFormat();
-        serDeClassName = ss.getSerde();
-        reparseAndSuperAnalyze(ts.tableHandle, fromURI);
+      HiveStorageHandler storageHandler = ts.tableHandle.getStorageHandler();
+      boolean isUseNativeApi = conf.getBoolVar(HIVE_LOAD_DATA_USE_NATIVE_API);
+      if (isUseNativeApi && 
storageHandler.supportsAppendData(ts.tableHandle.getTTable())) {
+        LoadTableDesc loadTableWork =
+            new LoadTableDesc(new Path(fromURI), ts.tableHandle, isOverWrite, 
true, isOverWrite);
+        Task<?> childTask =
+            TaskFactory.get(new MoveWork(getInputs(), getOutputs(), 
loadTableWork, null, true, isLocal));
+        rootTasks.add(childTask);
         return;
+      } else {
+        // launch a tez job
+        StorageFormatDescriptor ss = 
storageHandler.getStorageFormatDescriptor(ts.tableHandle.getTTable());
+        if (ss != null) {
+          inputFormatClassName = ss.getInputFormat();
+          serDeClassName = ss.getSerde();
+          reparseAndSuperAnalyze(ts.tableHandle, fromURI);
+          return;
+        }
+        throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
       }
-      throw new SemanticException(ErrorMsg.LOAD_INTO_NON_NATIVE.getMsg());
     }
 
     if(ts.tableHandle.isStoredAsSubDirectories()) {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java
index 3836ffafb8f..ca45779c389 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/LoadTableDesc.java
@@ -45,6 +45,8 @@ public class LoadTableDesc extends LoadDesc implements 
Serializable {
   private boolean isInsertOverwrite;
   private boolean isDirectInsert;
 
+  private boolean useAppendForLoad;
+
   // TODO: the below seem like they should just be combined into partitionDesc
   private Table mdTable;
   private org.apache.hadoop.hive.ql.plan.TableDesc table;
@@ -157,6 +159,20 @@ public class LoadTableDesc extends LoadDesc implements 
Serializable {
     }
   }
 
+  public LoadTableDesc(Path path, Table tableHandle, boolean isOverWrite, 
boolean useAppendForLoad,
+      boolean isInsertOverwrite) {
+    super(path, AcidUtils.Operation.NOT_ACID);
+    this.mdTable = tableHandle;
+    this.useAppendForLoad = useAppendForLoad;
+    this.loadFileType = isOverWrite ? LoadFileType.REPLACE_ALL : 
LoadFileType.KEEP_EXISTING;
+    this.table = Utilities.getTableDesc(tableHandle);
+    this.isInsertOverwrite = isInsertOverwrite;
+  }
+
+  public boolean isUseAppendForLoad() {
+    return useAppendForLoad;
+  }
+
   private void init(
       final org.apache.hadoop.hive.ql.plan.TableDesc table,
       final Map<String, String> partitionSpec,


Reply via email to