This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new a8a0ae782be HIVE-27158: Store hive columns stats in puffin files for 
iceberg tables (Simhadri Govindappa, reviewed by Ayush Saxena, Denys Kuzmenko, 
Rajesh Balamohan, Zsolt Miskolczi)
a8a0ae782be is described below

commit a8a0ae782be87d1198006ea3cb508f14070231b7
Author: SimhadriGovindappa <[email protected]>
AuthorDate: Wed Apr 19 15:31:29 2023 +0530

    HIVE-27158: Store hive columns stats in puffin files for iceberg tables 
(Simhadri Govindappa, reviewed by Ayush Saxena, Denys Kuzmenko, Rajesh 
Balamohan, Zsolt Miskolczi)
    
    Closes #4131
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   5 +-
 .../iceberg/mr/hive/HiveIcebergStorageHandler.java | 103 +++-
 .../src/test/queries/positive/col_stats.q          |  58 ++
 .../positive/use_basic_stats_from_iceberg.q        |   4 +-
 .../positive/vectorized_iceberg_read_mixed.q       |   8 +
 .../src/test/results/positive/col_stats.q.out      | 615 +++++++++++++++++++++
 .../positive/dynamic_partition_writes.q.out        |  22 +-
 .../llap/vectorized_iceberg_read_mixed.q.out       | 110 +++-
 .../positive/vectorized_iceberg_read_mixed.q.out   |  71 ++-
 .../hive/ql/metadata/HiveStorageHandler.java       |  41 ++
 .../hadoop/hive/ql/stats/ColStatsProcessor.java    |   3 +
 .../apache/hadoop/hive/ql/stats/StatsUtils.java    |   8 +-
 12 files changed, 1022 insertions(+), 26 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 8b666164212..7e6903a39d6 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2205,9 +2205,8 @@ public class HiveConf extends Configuration {
         "padding tolerance config (hive.exec.orc.block.padding.tolerance)."),
     HIVE_ORC_CODEC_POOL("hive.use.orc.codec.pool", false,
         "Whether to use codec pool in ORC. Disable if there are bugs with 
codec reuse."),
-    HIVE_USE_STATS_FROM("hive.use.stats.from","iceberg","Use stats from 
iceberg table snapshot for query " +
-        "planning. This has three values metastore, puffin and iceberg"),
-
+    HIVE_ICEBERG_STATS_SOURCE("hive.iceberg.stats.source", "iceberg",
+        "Use stats from iceberg table snapshot for query planning. This has 
two values metastore and iceberg"),
     HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true,
         "If this is set the header for RCFiles will simply be RCF.  If this is 
not\n" +
         "set the header will be that borrowed from sequence files, e.g. SEQ- 
followed\n" +
diff --git 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
index bcadebbf4c0..db69d6c34c8 100644
--- 
a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
+++ 
b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -23,6 +23,7 @@ import java.io.IOException;
 import java.io.Serializable;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.nio.ByteBuffer;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
@@ -35,14 +36,20 @@ import java.util.function.BiFunction;
 import java.util.function.Function;
 import java.util.stream.Collectors;
 import org.apache.commons.collections4.ListUtils;
+import org.apache.commons.lang3.SerializationUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.common.type.Date;
 import org.apache.hadoop.hive.common.type.SnapshotContext;
 import org.apache.hadoop.hive.common.type.Timestamp;
+import org.apache.hadoop.hive.conf.Constants;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.HiveMetaHook;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.LockType;
@@ -112,6 +119,12 @@ import org.apache.iceberg.exceptions.NoSuchTableException;
 import org.apache.iceberg.hadoop.HadoopConfigurable;
 import org.apache.iceberg.mr.Catalogs;
 import org.apache.iceberg.mr.InputFormatConfig;
+import org.apache.iceberg.puffin.Blob;
+import org.apache.iceberg.puffin.BlobMetadata;
+import org.apache.iceberg.puffin.Puffin;
+import org.apache.iceberg.puffin.PuffinCompressionCodec;
+import org.apache.iceberg.puffin.PuffinReader;
+import org.apache.iceberg.puffin.PuffinWriter;
 import 
org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
 import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
 import org.apache.iceberg.relocated.com.google.common.base.Splitter;
@@ -121,7 +134,10 @@ import 
org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
 import org.apache.iceberg.relocated.com.google.common.collect.Lists;
 import org.apache.iceberg.relocated.com.google.common.collect.Maps;
 import org.apache.iceberg.relocated.com.google.common.collect.Sets;
+import org.apache.iceberg.relocated.com.google.common.collect.Streams;
 import org.apache.iceberg.types.Types;
+import org.apache.iceberg.util.ByteBuffers;
+import org.apache.iceberg.util.Pair;
 import org.apache.iceberg.util.SerializationUtil;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -136,6 +152,7 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
   private static final String PUFFIN = "puffin";
   public static final String COPY_ON_WRITE = "copy-on-write";
   public static final String MERGE_ON_READ = "merge-on-read";
+  public static final String STATS = "/stats/";
   /**
    * Function template for producing a custom sort expression function:
    * Takes the source column index and the bucket count to creat a function 
where Iceberg bucket UDF is used to build
@@ -318,7 +335,7 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
     org.apache.hadoop.hive.ql.metadata.Table hmsTable = partish.getTable();
     // For write queries where rows got modified, don't fetch from cache as 
values could have changed.
     Table table = getTable(hmsTable);
-    String statsSource = HiveConf.getVar(conf, 
HiveConf.ConfVars.HIVE_USE_STATS_FROM).toLowerCase();
+    String statsSource = HiveConf.getVar(conf, 
HiveConf.ConfVars.HIVE_ICEBERG_STATS_SOURCE).toLowerCase();
     Map<String, String> stats = Maps.newHashMap();
     switch (statsSource) {
       case ICEBERG:
@@ -361,6 +378,90 @@ public class HiveIcebergStorageHandler implements 
HiveStoragePredicateHandler, H
     return table;
   }
 
+  @Override
+  public boolean canSetColStatistics(org.apache.hadoop.hive.ql.metadata.Table 
hmsTable) {
+    Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+    return table.currentSnapshot() != null ? getStatsSource().equals(ICEBERG) 
: false;
+  }
+
+  @Override
+  public boolean setColStatistics(org.apache.hadoop.hive.ql.metadata.Table 
hmsTable,
+      List<ColumnStatistics> colStats) {
+    Table tbl = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+    String snapshotId = String.format("%s-STATS-%d", tbl.name(), 
tbl.currentSnapshot().snapshotId());
+    invalidateStats(getStatsPath(tbl));
+    byte[] serializeColStats = SerializationUtils.serialize((Serializable) 
colStats);
+    try (PuffinWriter writer = 
Puffin.write(tbl.io().newOutputFile(getStatsPath(tbl).toString()))
+        .createdBy(Constants.HIVE_ENGINE).build()) {
+      writer.add(
+          new Blob(
+              tbl.name() + "-" + snapshotId,
+              ImmutableList.of(1),
+              tbl.currentSnapshot().snapshotId(),
+              tbl.currentSnapshot().sequenceNumber(),
+              ByteBuffer.wrap(serializeColStats),
+              PuffinCompressionCodec.NONE,
+              ImmutableMap.of()));
+      writer.finish();
+    } catch (IOException e) {
+      LOG.error(String.valueOf(e));
+    }
+    return false;
+  }
+
+  @Override
+  public boolean 
canProvideColStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
+    Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+    if (canSetColStatistics(hmsTable)) {
+      Path statsPath = getStatsPath(table);
+      try (FileSystem fs = statsPath.getFileSystem(conf)) {
+        if (fs.exists(statsPath)) {
+          return true;
+        }
+      } catch (IOException e) {
+        LOG.warn("Exception when trying to find Iceberg column stats for 
table:{} , snapshot:{} , " +
+            "statsPath: {} , stack trace: {}", table.name(), 
table.currentSnapshot(), statsPath, e);
+      }
+    }
+    return false;
+  }
+
+  @Override
+  public List<ColumnStatisticsObj> 
getColStatistics(org.apache.hadoop.hive.ql.metadata.Table hmsTable) {
+    Table table = IcebergTableUtil.getTable(conf, hmsTable.getTTable());
+    String statsPath = getStatsPath(table).toString();
+    LOG.info("Using stats from puffin file at: {}", statsPath);
+    try (PuffinReader reader = 
Puffin.read(table.io().newInputFile(statsPath)).build()) {
+      List<BlobMetadata> blobMetadata = reader.fileMetadata().blobs();
+      Map<BlobMetadata, List<ColumnStatistics>> collect =
+          
Streams.stream(reader.readAll(blobMetadata)).collect(Collectors.toMap(Pair::first,
+              blobMetadataByteBufferPair -> SerializationUtils.deserialize(
+                  
ByteBuffers.toByteArray(blobMetadataByteBufferPair.second()))));
+      return collect.get(blobMetadata.get(0)).get(0).getStatsObj();
+    } catch (IOException e) {
+      LOG.error("Error when trying to read iceberg col stats from puffin 
files: {}", e);
+    }
+    return null;
+  }
+
+  private String getStatsSource() {
+    return HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_ICEBERG_STATS_SOURCE, 
ICEBERG).toLowerCase();
+  }
+
+  private Path getStatsPath(Table table) {
+    return new Path(table.location() + STATS + table.name() + 
table.currentSnapshot().snapshotId());
+  }
+
+  private void invalidateStats(Path statsPath) {
+    try (FileSystem fs = statsPath.getFileSystem(conf)) {
+      if (fs.exists(statsPath)) {
+        fs.delete(statsPath, true);
+      }
+    } catch (IOException e) {
+      LOG.error("Failed to invalidate stale column stats: {}", e);
+    }
+  }
+
   /**
    * No need for exclusive locks when writing, since Iceberg tables use 
optimistic concurrency when writing
    * and only lock the table during the commit operation.
diff --git a/iceberg/iceberg-handler/src/test/queries/positive/col_stats.q 
b/iceberg/iceberg-handler/src/test/queries/positive/col_stats.q
new file mode 100644
index 00000000000..d7c4d811a8b
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/queries/positive/col_stats.q
@@ -0,0 +1,58 @@
+-- Mask random uuid
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+set hive.stats.autogather=true;
+set hive.stats.column.autogather=true;
+
+set hive.iceberg.stats.source=iceberg;
+drop table if exists tbl_ice_puffin;
+create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg 
tblproperties ('format-version'='2');
+insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 
51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 
'one', 55), (333, 'two', 56);
+insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 
51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 
'one', 55), (333, 'two', 56);
+explain select * from tbl_ice_puffin order by a, b, c;
+select * from tbl_ice_puffin order by a, b, c;
+desc formatted tbl_ice_puffin b;
+update tbl_ice_puffin set b='two' where b='one' or b='three';
+analyze table tbl_ice_puffin  compute statistics for columns;
+explain select * from tbl_ice_puffin order by a, b, c;
+select * from tbl_ice_puffin order by a, b, c;
+select count(*) from tbl_ice_puffin ;
+desc formatted tbl_ice_puffin b;
+
+
+-- Test if hive.iceberg.stats.source is empty
+set hive.iceberg.stats.source= ;
+drop table if exists tbl_ice_puffin;
+create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg 
tblproperties ('format-version'='2');
+insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 
51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 
'one', 55), (333, 'two', 56);
+explain select * from tbl_ice_puffin order by a, b, c;
+
+
+set hive.iceberg.stats.source=iceberg;
+drop table if exists tbl_ice_puffin;
+create external table tbl_ice_puffin(a int, b string, c int) stored by iceberg 
tblproperties ('format-version'='2');
+insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 51),(2, 'two', 
51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 
'one', 55), (333, 'two', 56);
+explain select * from tbl_ice_puffin order by a, b, c;
+select * from tbl_ice_puffin order by a, b, c;
+select count(*) from tbl_ice_puffin ;
+desc formatted tbl_ice_puffin a;
+
+
+set hive.iceberg.stats.source=metastore;
+
+drop table if exists tbl_ice;
+create external table tbl_ice(a int, b string, c int) stored by iceberg 
tblproperties ('format-version'='2');
+insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 
'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 
55), (333, 'two', 56);
+explain select * from tbl_ice order by a, b, c;
+select * from tbl_ice order by a, b, c;
+select count(*) from tbl_ice ;
+
+set hive.iceberg.stats.source=iceberg;
+delete from tbl_ice_puffin  where  a = 2;
+explain select * from tbl_ice order by a, b, c;
+select count(*) from tbl_ice ;
+
+create table t1 (a int) stored by iceberg tblproperties ('format-version'='2');
+create table t2 (b int) stored by iceberg tblproperties ('format-version'='2');
+describe formatted t1;
+describe formatted t2;
+explain select * from t1 join t2 on t1.a = t2.b;
\ No newline at end of file
diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q
 
b/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q
index 90e2d95d1df..d80f420c42c 100644
--- 
a/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q
+++ 
b/iceberg/iceberg-handler/src/test/queries/positive/use_basic_stats_from_iceberg.q
@@ -4,13 +4,13 @@ set hive.stats.autogather=true;
 set hive.stats.column.autogather=true;
 
 drop table if exists tbl_ice;
-set hive.use.stats.from = metastore;
+set hive.iceberg.stats.source=metastore;
 create external table tbl_ice(a int, b string, c int) stored by iceberg 
tblproperties ('format-version'='2');
 insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 
'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 
55), (333, 'two', 56);
 explain select * from tbl_ice order by a, b, c;
 
 drop table if exists tbl_ice;
-set hive.use.stats.from = iceberg;
+set hive.iceberg.stats.source = iceberg;
 create external table tbl_ice(a int, b string, c int) stored by iceberg 
tblproperties ('format-version'='2');
 insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 'two', 51),(2, 
'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), (111, 'one', 
55), (333, 'two', 56);
 explain select * from tbl_ice order by a, b, c;
diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_mixed.q
 
b/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_mixed.q
index b630b1f802d..99069bc266a 100644
--- 
a/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_mixed.q
+++ 
b/iceberg/iceberg-handler/src/test/queries/positive/vectorized_iceberg_read_mixed.q
@@ -46,6 +46,14 @@ explain select max(t_float), t_double, t_boolean, t_int, 
t_bigint, t_binary, t_s
 select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
         group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal;
 
+create external table t1 stored as orc as select * from 
tbl_ice_mixed_all_types ;
+
+explain select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, 
t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+    group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal;
+select max(t_float), t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+        group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal;
+
+
 create external table tbl_ice_mixed_parted (
     a int,
     b string
diff --git a/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out 
b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out
new file mode 100644
index 00000000000..b1f13fa76b5
--- /dev/null
+++ b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out
@@ -0,0 +1,615 @@
+PREHOOK: query: drop table if exists tbl_ice_puffin
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tbl_ice_puffin
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) 
stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) 
stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 
51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 
'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 
51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 
'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 
51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 
'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 
51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 
'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=18 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=18 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=18 width=95)
+                
default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select * from tbl_ice_puffin order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from tbl_ice_puffin order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1      one     50
+1      one     50
+2      two     51
+2      two     51
+2      two     51
+2      two     51
+2      two     51
+2      two     51
+3      three   52
+3      three   52
+4      four    53
+4      four    53
+5      five    54
+5      five    54
+111    one     55
+111    one     55
+333    two     56
+333    two     56
+PREHOOK: query: desc formatted tbl_ice_puffin b
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: query: desc formatted tbl_ice_puffin b
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_ice_puffin
+col_name               b                   
+data_type              string              
+min                                        
+max                                        
+num_nulls              0                   
+distinct_count         5                   
+avg_col_len            3.4444444444444446  
+max_col_len            5                   
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+comment                                    
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
+PREHOOK: query: update tbl_ice_puffin set b='two' where b='one' or b='three'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: default@tbl_ice_puffin
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: update tbl_ice_puffin set b='two' where b='one' or b='three'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: analyze table tbl_ice_puffin  compute statistics for columns
+PREHOOK: type: ANALYZE_TABLE
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: analyze table tbl_ice_puffin  compute statistics for columns
+POSTHOOK: type: ANALYZE_TABLE
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=24 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=24 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=24 width=95)
+                
default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select * from tbl_ice_puffin order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from tbl_ice_puffin order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1      two     50
+1      two     50
+2      two     51
+2      two     51
+2      two     51
+2      two     51
+2      two     51
+2      two     51
+3      two     52
+3      two     52
+4      four    53
+4      four    53
+5      five    54
+5      five    54
+111    two     55
+111    two     55
+333    two     56
+333    two     56
+PREHOOK: query: select count(*) from tbl_ice_puffin
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice_puffin
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+18
+PREHOOK: query: desc formatted tbl_ice_puffin b
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: query: desc formatted tbl_ice_puffin b
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_ice_puffin
+col_name               b                   
+data_type              string              
+min                                        
+max                                        
+num_nulls              0                   
+distinct_count         3                   
+avg_col_len            3.2222222222222223  
+max_col_len            4                   
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+comment                                    
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
+PREHOOK: query: drop table if exists tbl_ice_puffin
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: drop table if exists tbl_ice_puffin
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) 
stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) 
stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 
51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 
'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 
51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 
'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=9 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=9 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=9 width=95)
+                
default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: drop table if exists tbl_ice_puffin
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: drop table if exists tbl_ice_puffin
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) 
stored by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: create external table tbl_ice_puffin(a int, b string, c int) 
stored by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 
51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 
'five', 54), (111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: insert into tbl_ice_puffin values (1, 'one', 50), (2, 'two', 
51),(2, 'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 
'five', 54), (111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice_puffin order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=9 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=9 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=9 width=95)
+                
default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select * from tbl_ice_puffin order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from tbl_ice_puffin order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1      one     50
+2      two     51
+2      two     51
+2      two     51
+3      three   52
+4      four    53
+5      five    54
+111    one     55
+333    two     56
+PREHOOK: query: select count(*) from tbl_ice_puffin
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice_puffin
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+9
+PREHOOK: query: desc formatted tbl_ice_puffin a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: query: desc formatted tbl_ice_puffin a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@tbl_ice_puffin
+col_name               a                   
+data_type              int                 
+min                    1                   
+max                    333                 
+num_nulls              0                   
+distinct_count         7                   
+avg_col_len                                
+max_col_len                                
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+comment                                    
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
+PREHOOK: query: drop table if exists tbl_ice
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists tbl_ice
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create external table tbl_ice(a int, b string, c int) stored 
by iceberg tblproperties ('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: create external table tbl_ice(a int, b string, c int) stored 
by iceberg tblproperties ('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 
'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), 
(111, 'one', 55), (333, 'two', 56)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice
+POSTHOOK: query: insert into tbl_ice values (1, 'one', 50), (2, 'two', 51),(2, 
'two', 51),(2, 'two', 51), (3, 'three', 52), (4, 'four', 53), (5, 'five', 54), 
(111, 'one', 55), (333, 'two', 56)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=9 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=9 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=9 width=95)
+                
default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1      one     50
+2      two     51
+2      two     51
+2      two     51
+3      three   52
+4      four    53
+5      five    54
+111    one     55
+333    two     56
+PREHOOK: query: select count(*) from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+9
+PREHOOK: query: delete from tbl_ice_puffin  where  a = 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_puffin
+PREHOOK: Output: default@tbl_ice_puffin
+POSTHOOK: query: delete from tbl_ice_puffin  where  a = 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_puffin
+POSTHOOK: Output: default@tbl_ice_puffin
+PREHOOK: query: explain select * from tbl_ice order by a, b, c
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from tbl_ice order by a, b, c
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_8]
+        Select Operator [SEL_7] (rows=9 width=95)
+          Output:["_col0","_col1","_col2"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_6]
+            Select Operator [SEL_5] (rows=9 width=95)
+              Output:["_col0","_col1","_col2"]
+              TableScan [TS_0] (rows=9 width=95)
+                
default@tbl_ice,tbl_ice,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+
+PREHOOK: query: select count(*) from tbl_ice
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+9
+PREHOOK: query: create table t1 (a int) stored by iceberg tblproperties 
('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create table t1 (a int) stored by iceberg tblproperties 
('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+PREHOOK: query: create table t2 (b int) stored by iceberg tblproperties 
('format-version'='2')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t2
+POSTHOOK: query: create table t2 (b int) stored by iceberg tblproperties 
('format-version'='2')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t2
+PREHOOK: query: describe formatted t1
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t1
+POSTHOOK: query: describe formatted t1
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t1
+# col_name             data_type               comment             
+a                      int                                         
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+#### A masked pattern was here ####
+Table Type:            MANAGED_TABLE            
+Table Parameters:               
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}}
+       bucketing_version       2                   
+       current-schema          
{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"a\",\"required\":false,\"type\":\"int\"}]}
+       engine.hive.enabled     true                
+       format-version          2                   
+       iceberg.orc.files.only  false               
+       metadata_location       hdfs://### HDFS PATH ###
+       numFiles                0                   
+       numRows                 0                   
+       rawDataSize             0                   
+       serialization.format    1                   
+       snapshot-count          0                   
+       storage_handler         
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+       table_type              ICEBERG             
+       totalSize               0                   
+#### A masked pattern was here ####
+       uuid                    #Masked#
+       write.delete.mode       merge-on-read       
+       write.merge.mode        merge-on-read       
+       write.update.mode       merge-on-read       
+                
+# Storage Information           
+SerDe Library:         org.apache.iceberg.mr.hive.HiveIcebergSerDe      
+InputFormat:           org.apache.iceberg.mr.hive.HiveIcebergInputFormat       
 
+OutputFormat:          org.apache.iceberg.mr.hive.HiveIcebergOutputFormat      
 
+Compressed:            No                       
+Sort Columns:          []                       
+PREHOOK: query: describe formatted t2
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@t2
+POSTHOOK: query: describe formatted t2
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@t2
+# col_name             data_type               comment             
+b                      int                                         
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+#### A masked pattern was here ####
+Table Type:            MANAGED_TABLE            
+Table Parameters:               
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"b\":\"true\"}}
+       bucketing_version       2                   
+       current-schema          
{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"b\",\"required\":false,\"type\":\"int\"}]}
+       engine.hive.enabled     true                
+       format-version          2                   
+       iceberg.orc.files.only  false               
+       metadata_location       hdfs://### HDFS PATH ###
+       numFiles                0                   
+       numRows                 0                   
+       rawDataSize             0                   
+       serialization.format    1                   
+       snapshot-count          0                   
+       storage_handler         
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+       table_type              ICEBERG             
+       totalSize               0                   
+#### A masked pattern was here ####
+       uuid                    #Masked#
+       write.delete.mode       merge-on-read       
+       write.merge.mode        merge-on-read       
+       write.update.mode       merge-on-read       
+                
+# Storage Information           
+SerDe Library:         org.apache.iceberg.mr.hive.HiveIcebergSerDe      
+InputFormat:           org.apache.iceberg.mr.hive.HiveIcebergInputFormat       
 
+OutputFormat:          org.apache.iceberg.mr.hive.HiveIcebergOutputFormat      
 
+Compressed:            No                       
+Sort Columns:          []                       
+PREHOOK: query: explain select * from t1 join t2 on t1.a = t2.b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@t1
+PREHOOK: Input: default@t2
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select * from t1 join t2 on t1.a = t2.b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@t1
+POSTHOOK: Input: default@t2
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2
+      File Output Operator [FS_10]
+        Merge Join Operator [MERGEJOIN_25] (rows=1 width=4)
+          Conds:RS_28._col0=RS_31._col0(Inner),Output:["_col0","_col1"]
+        <-Map 1 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_28]
+            PartitionCols:_col0
+            Select Operator [SEL_27] (rows=1 width=4)
+              Output:["_col0"]
+              Filter Operator [FIL_26] (rows=1 width=4)
+                predicate:a is not null
+                TableScan [TS_0] (rows=1 width=4)
+                  default@t1,t1,Tbl:COMPLETE,Col:NONE,Output:["a"]
+        <-Map 3 [SIMPLE_EDGE] vectorized
+          SHUFFLE [RS_31]
+            PartitionCols:_col0
+            Select Operator [SEL_30] (rows=1 width=4)
+              Output:["_col0"]
+              Filter Operator [FIL_29] (rows=1 width=4)
+                predicate:b is not null
+                TableScan [TS_3] (rows=1 width=4)
+                  default@t2,t2,Tbl:COMPLETE,Col:NONE,Output:["b"]
+
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out
index 2cf955f898c..7e7a5eab1e3 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/dynamic_partition_writes.q.out
@@ -76,9 +76,9 @@ Stage-3
                 <-Map 1 [SIMPLE_EDGE] vectorized
                   PARTITION_ONLY_SHUFFLE [RS_13]
                     PartitionCols:_col1
-                    Select Operator [SEL_12] (rows=22 width=87)
+                    Select Operator [SEL_12] (rows=22 width=91)
                       Output:["_col0","_col1"]
-                      TableScan [TS_0] (rows=22 width=87)
+                      TableScan [TS_0] (rows=22 width=91)
                         
default@tbl_src,tbl_src,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
               Reducer 3 vectorized
               File Output Operator [FS_21]
@@ -90,7 +90,7 @@ Stage-3
                     PARTITION_ONLY_SHUFFLE [RS_16]
                       Group By Operator [GBY_15] (rows=1 width=400)
                         
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["min(a)","max(a)","count(1)","count(a)","compute_bit_vector_hll(a)","max(length(ccy))","avg(COALESCE(length(ccy),0))","count(ccy)","compute_bit_vector_hll(ccy)"]
-                        Select Operator [SEL_14] (rows=22 width=87)
+                        Select Operator [SEL_14] (rows=22 width=91)
                           Output:["a","ccy"]
                            Please refer to the previous Select Operator 
[SEL_12]
 
@@ -170,9 +170,9 @@ Stage-3
                 <-Map 1 [SIMPLE_EDGE] vectorized
                   PARTITION_ONLY_SHUFFLE [RS_13]
                     PartitionCols:iceberg_bucket(_col1, 2)
-                    Select Operator [SEL_12] (rows=22 width=87)
+                    Select Operator [SEL_12] (rows=22 width=91)
                       Output:["_col0","_col1"]
-                      TableScan [TS_0] (rows=22 width=87)
+                      TableScan [TS_0] (rows=22 width=91)
                         
default@tbl_src,tbl_src,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b"]
               Reducer 3 vectorized
               File Output Operator [FS_21]
@@ -184,7 +184,7 @@ Stage-3
                     PARTITION_ONLY_SHUFFLE [RS_16]
                       Group By Operator [GBY_15] (rows=1 width=400)
                         
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["min(a)","max(a)","count(1)","count(a)","compute_bit_vector_hll(a)","max(length(ccy))","avg(COALESCE(length(ccy),0))","count(ccy)","compute_bit_vector_hll(ccy)"]
-                        Select Operator [SEL_14] (rows=22 width=87)
+                        Select Operator [SEL_14] (rows=22 width=91)
                           Output:["a","ccy"]
                            Please refer to the previous Select Operator 
[SEL_12]
 
@@ -264,9 +264,9 @@ Stage-3
                 <-Map 1 [SIMPLE_EDGE] vectorized
                   PARTITION_ONLY_SHUFFLE [RS_13]
                     PartitionCols:_col1, iceberg_bucket(_col2, 3)
-                    Select Operator [SEL_12] (rows=22 width=94)
+                    Select Operator [SEL_12] (rows=22 width=99)
                       Output:["_col0","_col1","_col2"]
-                      TableScan [TS_0] (rows=22 width=94)
+                      TableScan [TS_0] (rows=22 width=99)
                         
default@tbl_src,tbl_src,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
               Reducer 3 vectorized
               File Output Operator [FS_21]
@@ -278,7 +278,7 @@ Stage-3
                     PARTITION_ONLY_SHUFFLE [RS_16]
                       Group By Operator [GBY_15] (rows=1 width=568)
                         
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12"],aggregations:["min(a)","max(a)","count(1)","count(a)","compute_bit_vector_hll(a)","max(length(ccy))","avg(COALESCE(length(ccy),0))","count(ccy)","compute_bit_vector_hll(ccy)","min(c)","max(c)","count(c)","compute_bit_vector_hll(c)"]
-                        Select Operator [SEL_14] (rows=22 width=94)
+                        Select Operator [SEL_14] (rows=22 width=99)
                           Output:["a","ccy","c"]
                            Please refer to the previous Select Operator 
[SEL_12]
 
@@ -403,7 +403,7 @@ Stage-3
                       Output:["_col0","_col1","_col2"]
                       Filter Operator [FIL_14] (rows=4 width=99)
                         predicate:(b = 'EUR')
-                        TableScan [TS_0] (rows=22 width=94)
+                        TableScan [TS_0] (rows=22 width=99)
                           
default@tbl_src,tbl_src,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
               Reducer 3 vectorized
               File Output Operator [FS_24]
@@ -461,7 +461,7 @@ Stage-3
                         Output:["_col0","_col1","_col2"]
                         Filter Operator [FIL_12] (rows=1 width=99)
                           predicate:((c = 100L) and (b = 'USD'))
-                          TableScan [TS_0] (rows=22 width=94)
+                          TableScan [TS_0] (rows=22 width=99)
                             
default@tbl_src,tbl_src,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
                     PARTITION_ONLY_SHUFFLE [RS_17]
                       Group By Operator [GBY_16] (rows=1 width=568)
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
index bdef92e60f6..50ce82dc248 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/llap/vectorized_iceberg_read_mixed.q.out
@@ -589,13 +589,13 @@ STAGE PLANS:
                     minReductionHashAggr: 0.99
                     mode: hash
                     outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
-                    Statistics: Num rows: 2 Data size: 746 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1 Data size: 373 Basic stats: 
COMPLETE Column stats: COMPLETE
                     Reduce Output Operator
                       key expressions: _col0 (type: double), _col1 (type: 
boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 
(type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: 
decimal(4,2))
                       null sort order: zzzzzzzzz
                       sort order: +++++++++
                       Map-reduce partition columns: _col0 (type: double), 
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: 
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), 
_col8 (type: decimal(4,2))
-                      Statistics: Num rows: 2 Data size: 746 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      Statistics: Num rows: 1 Data size: 373 Basic stats: 
COMPLETE Column stats: COMPLETE
                       value expressions: _col9 (type: float)
             Execution mode: vectorized, llap
             LLAP IO: all inputs (cache only)
@@ -607,14 +607,116 @@ STAGE PLANS:
                 keys: KEY._col0 (type: double), KEY._col1 (type: boolean), 
KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: binary), 
KEY._col5 (type: string), KEY._col6 (type: timestamp), KEY._col7 (type: date), 
KEY._col8 (type: decimal(4,2))
                 mode: mergepartial
                 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9
-                Statistics: Num rows: 2 Data size: 746 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE 
Column stats: COMPLETE
                 Select Operator
                   expressions: _col9 (type: float), _col0 (type: double), 
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: 
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), 
_col8 (type: decimal(4,2))
                   outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9
+                  Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 373 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, 
t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+        group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_mixed_all_types
+#### A masked pattern was here ####
+POSTHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, 
t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+        group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_mixed_all_types
+#### A masked pattern was here ####
+1.1    1.2     false   4       567890123456789 6       col7    2012-10-03 
19:58:08     1234-09-02      10.01
+5.1    6.2     true    40      567890123456780 8       col07   2012-10-03 
19:58:09     1234-09-03      10.02
+PREHOOK: query: create external table t1 stored as orc as select * from 
tbl_ice_mixed_all_types
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@tbl_ice_mixed_all_types
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create external table t1 stored as orc as select * from 
tbl_ice_mixed_all_types
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@tbl_ice_mixed_all_types
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.t_bigint SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_bigint, 
type:bigint, comment:null), ]
+POSTHOOK: Lineage: t1.t_binary SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_binary, 
type:binary, comment:null), ]
+POSTHOOK: Lineage: t1.t_boolean SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_boolean, 
type:boolean, comment:null), ]
+POSTHOOK: Lineage: t1.t_date SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_date, 
type:date, comment:null), ]
+POSTHOOK: Lineage: t1.t_decimal SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_decimal, 
type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: t1.t_double SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_double, 
type:double, comment:null), ]
+POSTHOOK: Lineage: t1.t_float SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_float, 
type:float, comment:null), ]
+POSTHOOK: Lineage: t1.t_int SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_int, 
type:int, comment:null), ]
+POSTHOOK: Lineage: t1.t_string SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_string, 
type:string, comment:null), ]
+POSTHOOK: Lineage: t1.t_timestamp SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_timestamp, 
type:timestamp, comment:null), ]
+PREHOOK: query: explain select max(t_float), t_double, t_boolean, t_int, 
t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from 
tbl_ice_mixed_all_types
+    group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_mixed_all_types
+#### A masked pattern was here ####
+POSTHOOK: query: explain select max(t_float), t_double, t_boolean, t_int, 
t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from 
tbl_ice_mixed_all_types
+    group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_mixed_all_types
+#### A masked pattern was here ####
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: tbl_ice_mixed_all_types
                   Statistics: Num rows: 2 Data size: 746 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Group By Operator
+                    aggregations: max(t_float)
+                    keys: t_double (type: double), t_boolean (type: boolean), 
t_int (type: int), t_bigint (type: bigint), t_binary (type: binary), t_string 
(type: string), t_timestamp (type: timestamp), t_date (type: date), t_decimal 
(type: decimal(4,2))
+                    minReductionHashAggr: 0.99
+                    mode: hash
+                    outputColumnNames: _col0, _col1, _col2, _col3, _col4, 
_col5, _col6, _col7, _col8, _col9
+                    Statistics: Num rows: 1 Data size: 373 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Reduce Output Operator
+                      key expressions: _col0 (type: double), _col1 (type: 
boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: binary), _col5 
(type: string), _col6 (type: timestamp), _col7 (type: date), _col8 (type: 
decimal(4,2))
+                      null sort order: zzzzzzzzz
+                      sort order: +++++++++
+                      Map-reduce partition columns: _col0 (type: double), 
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: 
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), 
_col8 (type: decimal(4,2))
+                      Statistics: Num rows: 1 Data size: 373 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      value expressions: _col9 (type: float)
+            Execution mode: vectorized, llap
+            LLAP IO: all inputs (cache only)
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: max(VALUE._col0)
+                keys: KEY._col0 (type: double), KEY._col1 (type: boolean), 
KEY._col2 (type: int), KEY._col3 (type: bigint), KEY._col4 (type: binary), 
KEY._col5 (type: string), KEY._col6 (type: timestamp), KEY._col7 (type: date), 
KEY._col8 (type: decimal(4,2))
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9
+                Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: _col9 (type: float), _col0 (type: double), 
_col1 (type: boolean), _col2 (type: int), _col3 (type: bigint), _col4 (type: 
binary), _col5 (type: string), _col6 (type: timestamp), _col7 (type: date), 
_col8 (type: decimal(4,2))
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9
+                  Statistics: Num rows: 1 Data size: 373 Basic stats: COMPLETE 
Column stats: COMPLETE
                   File Output Operator
                     compressed: false
-                    Statistics: Num rows: 2 Data size: 746 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    Statistics: Num rows: 1 Data size: 373 Basic stats: 
COMPLETE Column stats: COMPLETE
                     table:
                         input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
                         output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
index a43950aa6ac..34696284306 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/vectorized_iceberg_read_mixed.q.out
@@ -538,14 +538,79 @@ Stage-0
     Stage-1
       Reducer 2 vectorized
       File Output Operator [FS_11]
-        Select Operator [SEL_10] (rows=2 width=373)
+        Select Operator [SEL_10] (rows=1 width=373)
           
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
-          Group By Operator [GBY_9] (rows=2 width=373)
+          Group By Operator [GBY_9] (rows=1 width=373)
             
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, 
KEY._col8
           <-Map 1 [SIMPLE_EDGE] vectorized
             SHUFFLE [RS_8]
               PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8
-              Group By Operator [GBY_7] (rows=2 width=373)
+              Group By Operator [GBY_7] (rows=1 width=373)
+                
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double,
 t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
+                TableScan [TS_0] (rows=2 width=373)
+                  
default@tbl_ice_mixed_all_types,tbl_ice_mixed_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
+
+PREHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, 
t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+        group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_mixed_all_types
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select max(t_float), t_double, t_boolean, t_int, t_bigint, 
t_binary, t_string, t_timestamp, t_date, t_decimal from tbl_ice_mixed_all_types
+        group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_mixed_all_types
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+1.1    1.2     false   4       567890123456789 6       col7    2012-10-03 
19:58:08     1234-09-02      10.01
+5.1    6.2     true    40      567890123456780 8       col07   2012-10-03 
19:58:09     1234-09-03      10.02
+PREHOOK: query: create external table t1 stored as orc as select * from 
tbl_ice_mixed_all_types
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@tbl_ice_mixed_all_types
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t1
+POSTHOOK: query: create external table t1 stored as orc as select * from 
tbl_ice_mixed_all_types
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@tbl_ice_mixed_all_types
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t1
+POSTHOOK: Lineage: t1.t_bigint SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_bigint, 
type:bigint, comment:null), ]
+POSTHOOK: Lineage: t1.t_binary SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_binary, 
type:binary, comment:null), ]
+POSTHOOK: Lineage: t1.t_boolean SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_boolean, 
type:boolean, comment:null), ]
+POSTHOOK: Lineage: t1.t_date SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_date, 
type:date, comment:null), ]
+POSTHOOK: Lineage: t1.t_decimal SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_decimal, 
type:decimal(4,2), comment:null), ]
+POSTHOOK: Lineage: t1.t_double SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_double, 
type:double, comment:null), ]
+POSTHOOK: Lineage: t1.t_float SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_float, 
type:float, comment:null), ]
+POSTHOOK: Lineage: t1.t_int SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_int, 
type:int, comment:null), ]
+POSTHOOK: Lineage: t1.t_string SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_string, 
type:string, comment:null), ]
+POSTHOOK: Lineage: t1.t_timestamp SIMPLE 
[(tbl_ice_mixed_all_types)tbl_ice_mixed_all_types.FieldSchema(name:t_timestamp, 
type:timestamp, comment:null), ]
+PREHOOK: query: explain select max(t_float), t_double, t_boolean, t_int, 
t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from 
tbl_ice_mixed_all_types
+    group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_mixed_all_types
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select max(t_float), t_double, t_boolean, t_int, 
t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal from 
tbl_ice_mixed_all_types
+    group by t_double, t_boolean, t_int, t_bigint, t_binary, t_string, 
t_timestamp, t_date, t_decimal
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_mixed_all_types
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+Plan optimized by CBO.
+
+Vertex dependency in root stage
+Reducer 2 <- Map 1 (SIMPLE_EDGE)
+
+Stage-0
+  Fetch Operator
+    limit:-1
+    Stage-1
+      Reducer 2 vectorized
+      File Output Operator [FS_11]
+        Select Operator [SEL_10] (rows=1 width=373)
+          
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"]
+          Group By Operator [GBY_9] (rows=1 width=373)
+            
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(VALUE._col0)"],keys:KEY._col0,
 KEY._col1, KEY._col2, KEY._col3, KEY._col4, KEY._col5, KEY._col6, KEY._col7, 
KEY._col8
+          <-Map 1 [SIMPLE_EDGE] vectorized
+            SHUFFLE [RS_8]
+              PartitionCols:_col0, _col1, _col2, _col3, _col4, _col5, _col6, 
_col7, _col8
+              Group By Operator [GBY_7] (rows=1 width=373)
                 
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9"],aggregations:["max(t_float)"],keys:t_double,
 t_boolean, t_int, t_bigint, t_binary, t_string, t_timestamp, t_date, t_decimal
                 TableScan [TS_0] (rows=2 width=373)
                   
default@tbl_ice_mixed_all_types,tbl_ice_mixed_all_types,Tbl:COMPLETE,Col:COMPLETE,Output:["t_float","t_double","t_boolean","t_int","t_bigint","t_binary","t_string","t_timestamp","t_date","t_decimal"]
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java 
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
index aff2f51cbc1..65e14af478a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveStorageHandler.java
@@ -29,6 +29,8 @@ import 
org.apache.hadoop.hive.common.classification.InterfaceStability;
 import org.apache.hadoop.hive.common.type.SnapshotContext;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.HiveMetaHook;
+import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.EnvironmentContext;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.LockType;
@@ -42,6 +44,7 @@ import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.parse.AlterTableExecuteSpec;
 import org.apache.hadoop.hive.ql.parse.TransformSpec;
 import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.ColStatistics;
 import org.apache.hadoop.hive.ql.plan.DynamicPartitionCtx;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
 import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
@@ -245,6 +248,44 @@ public interface HiveStorageHandler extends Configurable {
     return false;
   }
 
+  /**
+   * Return some col statistics (Lower bounds, Upper bounds, Null value 
counts, NaN, total counts) calculated by
+   * the underlying storage handler implementation.
+   * @param table
+   * @return A List of Column Statistics Objects, can be null
+   */
+  default 
List<ColumnStatisticsObj>getColStatistics(org.apache.hadoop.hive.ql.metadata.Table
 table) {
+    return null;
+  }
+
+  /**
+   * Set column stats for non-native tables
+   * @param table
+   * @param colStats
+   * @return boolean
+   */
+  default boolean setColStatistics(org.apache.hadoop.hive.ql.metadata.Table 
table,
+      List<ColumnStatistics> colStats) {
+    return false;
+  }
+
+  /**
+   * Check if the storage handler can provide col statistics.
+   * @param tbl
+   * @return true if the storage handler can supply the col statistics
+   */
+  default boolean 
canProvideColStatistics(org.apache.hadoop.hive.ql.metadata.Table tbl) {
+    return false;
+  }
+
+  /**
+   * Check if the storage handler can set col statistics.
+   * @return true if the storage handler can set the col statistics
+   */
+  default boolean canSetColStatistics(org.apache.hadoop.hive.ql.metadata.Table 
tbl) {
+    return false;
+  }
+
   /**
    * Check if CTAS and CMV operations should behave in a direct-insert manner 
(i.e. no move task).
    * <p>
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java 
b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java
index 95d4b439d16..e2ee8ae07b4 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColStatsProcessor.java
@@ -218,6 +218,9 @@ public class ColStatsProcessor implements IStatsProcessor {
       }
 
       start = System. currentTimeMillis();
+      if (tbl != null && tbl.isNonNative() && 
tbl.getStorageHandler().canSetColStatistics(tbl)) {
+        tbl.getStorageHandler().setColStatistics(tbl, colStats);
+      }
       db.setPartitionColumnStatistics(request);
       end = System.currentTimeMillis();
       LOG.info("Time taken to update " + colStats.size() + " stats : " + ((end 
- start)/1000F) + " seconds.");
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java 
b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
index 9c1926a747e..a758bcdecd1 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
@@ -1069,8 +1069,12 @@ public class StatsUtils {
     }
     if (fetchColStats && !colStatsToRetrieve.isEmpty()) {
       try {
-        List<ColumnStatisticsObj> colStat = 
Hive.get().getTableColumnStatistics(
-            dbName, tabName, colStatsToRetrieve, false);
+        List<ColumnStatisticsObj> colStat;
+        if (table.isNonNative() && 
table.getStorageHandler().canProvideColStatistics(table)) {
+          colStat = table.getStorageHandler().getColStatistics(table);
+        } else {
+          colStat = Hive.get().getTableColumnStatistics(dbName, tabName, 
colStatsToRetrieve, false);
+        }
         stats = convertColStats(colStat, tabName);
       } catch (HiveException e) {
         LOG.error("Failed to retrieve table statistics: ", e);

Reply via email to