This is an automated email from the ASF dual-hosted git repository.

dengzh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new dd8a867386b HIVE-27163: Column stats are not getting published after 
an insert into an external table with custom location (Zhihua Deng, reviewed by 
Krisztian Kasa)
dd8a867386b is described below

commit dd8a867386b605ecd7e8dbec476556bf24f54c6f
Author: dengzh <[email protected]>
AuthorDate: Fri May 12 11:34:25 2023 +0800

    HIVE-27163: Column stats are not getting published after an insert into an 
external table with custom location (Zhihua Deng, reviewed by Krisztian Kasa)
    
    Closes #4228
---
 .../src/test/results/positive/col_stats.q.out      | 19 ++++----
 .../results/positive/truncate_iceberg_table.q.out  |  2 +-
 .../hive/ql/ddl/table/create/CreateTableDesc.java  | 29 ++++++++----
 .../clientpositive/stats_external_location.q       |  9 ++++
 .../clientpositive/llap/default_file_format.q.out  | 14 ++++++
 .../test/results/clientpositive/llap/mm_exim.q.out |  4 +-
 .../llap/stats_external_location.q.out             | 47 ++++++++++++++++++++
 .../llap/translated_external_rename3.q.out         |  3 --
 .../apache/hadoop/hive/common/StatsSetupConst.java | 51 ++++++++++++++++++++++
 .../apache/hadoop/hive/metastore/Warehouse.java    | 16 +++++--
 .../apache/hadoop/hive/metastore/HMSHandler.java   |  6 +--
 .../hive/metastore/utils/MetaStoreServerUtils.java | 51 +++++++++++++++++++++-
 12 files changed, 218 insertions(+), 33 deletions(-)

diff --git a/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out 
b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out
index b1f13fa76b5..851cb106ac3 100644
--- a/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out
+++ b/iceberg/iceberg-handler/src/test/results/positive/col_stats.q.out
@@ -244,14 +244,14 @@ Stage-0
     Stage-1
       Reducer 2 vectorized
       File Output Operator [FS_8]
-        Select Operator [SEL_7] (rows=9 width=95)
+        Select Operator [SEL_7] (rows=9 width=192)
           Output:["_col0","_col1","_col2"]
         <-Map 1 [SIMPLE_EDGE] vectorized
           SHUFFLE [RS_6]
-            Select Operator [SEL_5] (rows=9 width=95)
+            Select Operator [SEL_5] (rows=9 width=192)
               Output:["_col0","_col1","_col2"]
-              TableScan [TS_0] (rows=9 width=95)
-                
default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:COMPLETE,Output:["a","b","c"]
+              TableScan [TS_0] (rows=9 width=192)
+                
default@tbl_ice_puffin,tbl_ice_puffin,Tbl:COMPLETE,Col:NONE,Output:["a","b","c"]
 
 PREHOOK: query: drop table if exists tbl_ice_puffin
 PREHOOK: type: DROPTABLE
@@ -339,17 +339,16 @@ POSTHOOK: type: DESCTABLE
 POSTHOOK: Input: default@tbl_ice_puffin
 col_name               a                   
 data_type              int                 
-min                    1                   
-max                    333                 
-num_nulls              0                   
-distinct_count         7                   
+min                                        
+max                                        
+num_nulls                                  
+distinct_count                             
 avg_col_len                                
 max_col_len                                
 num_trues                                  
 num_falses                                 
-bit_vector             HL                  
+bit_vector                                 
 comment                                    
-COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
 PREHOOK: query: drop table if exists tbl_ice
 PREHOOK: type: DROPTABLE
 POSTHOOK: query: drop table if exists tbl_ice
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/truncate_iceberg_table.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/truncate_iceberg_table.q.out
index dfab2edec75..07e2a34e423 100644
--- 
a/iceberg/iceberg-handler/src/test/results/positive/truncate_iceberg_table.q.out
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/truncate_iceberg_table.q.out
@@ -225,7 +225,7 @@ Retention:                  0
 #### A masked pattern was here ####
 Table Type:            EXTERNAL_TABLE           
 Table Parameters:               
-       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"id\":\"true\",\"value\":\"true\"}}
+       COLUMN_STATS_ACCURATE   {\"BASIC_STATS\":\"true\"}
        EXTERNAL                TRUE                
        bucketing_version       2                   
        current-schema          
{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"value\",\"required\":false,\"type\":\"string\"}]}
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
index a228cca5045..8e2ca07b384 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/create/CreateTableDesc.java
@@ -20,10 +20,13 @@ package org.apache.hadoop.hive.ql.ddl.table.create;
 
 
 import java.io.Serializable;
+import java.nio.ByteBuffer;
+import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.stream.Collectors;
 
 import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.fs.Path;
@@ -34,6 +37,7 @@ import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.metastore.api.ObjectDictionary;
 import org.apache.hadoop.hive.metastore.api.Order;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.metastore.api.SQLCheckConstraint;
@@ -921,14 +925,23 @@ public class CreateTableDesc implements DDLDesc, 
Serializable {
     // When replicating the statistics for a table will be obtained from the 
source. Do not
     // reset it on replica.
     if (replicationSpec == null || !replicationSpec.isInReplicationScope()) {
-      if (!this.isCTAS && (tbl.getPath() == null || (!isExternal() && 
tbl.isEmpty()))) {
-        if (!tbl.isPartitioned() && 
conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
-          
StatsSetupConst.setStatsStateForCreateTable(tbl.getTTable().getParameters(),
-                  MetaStoreUtils.getColumnNames(tbl.getCols()), 
StatsSetupConst.TRUE);
-        }
-      } else {
-        
StatsSetupConst.setStatsStateForCreateTable(tbl.getTTable().getParameters(), 
null,
-                StatsSetupConst.FALSE);
+      // Remove COLUMN_STATS_ACCURATE=true from table's parameter, let the HMS 
determine if
+      // there is need to add column stats dependent on the table's location.
+      
StatsSetupConst.setStatsStateForCreateTable(tbl.getTTable().getParameters(), 
null,
+          StatsSetupConst.FALSE);
+      if (!this.isCTAS && !tbl.isPartitioned() && !tbl.isTemporary() &&
+          conf.getBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER)) {
+        // Put the flag into the dictionary in order not to pollute the table,
+        // ObjectDictionary is meant to convey repeatitive messages.
+        ObjectDictionary dictionary = tbl.getTTable().isSetDictionary() ?
+            tbl.getTTable().getDictionary() : new ObjectDictionary();
+        List<ByteBuffer> buffers = new ArrayList<>();
+        String statsSetup = 
StatsSetupConst.ColumnStatsSetup.getStatsSetupAsString(true,
+            storageHandler != null && 
storageHandler.isMetadataTableSupported() ? "metadata" : null, // Skip metadata 
directory for Iceberg table
+            MetaStoreUtils.getColumnNames(tbl.getCols()));
+        
buffers.add(ByteBuffer.wrap(statsSetup.getBytes(StandardCharsets.UTF_8)));
+        dictionary.putToValues(StatsSetupConst.STATS_FOR_CREATE_TABLE, 
buffers);
+        tbl.getTTable().setDictionary(dictionary);
       }
     }
 
diff --git a/ql/src/test/queries/clientpositive/stats_external_location.q 
b/ql/src/test/queries/clientpositive/stats_external_location.q
new file mode 100644
index 00000000000..87985a68d78
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/stats_external_location.q
@@ -0,0 +1,9 @@
+set hive.stats.column.autogather=true;
+set hive.stats.autogather=true;
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/test1;
+
+create external table test_custom(age int, name string) stored as orc location 
'/tmp/test1';
+insert into test_custom select 1, 'test';
+desc formatted test_custom age;
+
+drop table test_custom;
diff --git a/ql/src/test/results/clientpositive/llap/default_file_format.q.out 
b/ql/src/test/results/clientpositive/llap/default_file_format.q.out
index 0adf5ae7415..df2621d7912 100644
--- a/ql/src/test/results/clientpositive/llap/default_file_format.q.out
+++ b/ql/src/test/results/clientpositive/llap/default_file_format.q.out
@@ -170,8 +170,13 @@ Retention:                 0
 #### A masked pattern was here ####
 Table Type:            EXTERNAL_TABLE           
 Table Parameters:               
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c\":\"true\"}}
        EXTERNAL                TRUE                
        bucketing_version       2                   
+       numFiles                0                   
+       numRows                 0                   
+       rawDataSize             0                   
+       totalSize               0                   
 #### A masked pattern was here ####
                 
 # Storage Information           
@@ -234,9 +239,12 @@ Retention:                 0
 #### A masked pattern was here ####
 Table Type:            EXTERNAL_TABLE           
 Table Parameters:               
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c\":\"true\"}}
        EXTERNAL                TRUE                
        bucketing_version       2                   
        numFiles                0                   
+       numRows                 0                   
+       rawDataSize             0                   
        totalSize               0                   
 #### A masked pattern was here ####
                 
@@ -470,9 +478,12 @@ Retention:                 0
 #### A masked pattern was here ####
 Table Type:            EXTERNAL_TABLE           
 Table Parameters:               
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c\":\"true\"}}
        EXTERNAL                TRUE                
        bucketing_version       2                   
        numFiles                0                   
+       numRows                 0                   
+       rawDataSize             0                   
        totalSize               0                   
 #### A masked pattern was here ####
                 
@@ -536,9 +547,12 @@ Retention:                 0
 #### A masked pattern was here ####
 Table Type:            EXTERNAL_TABLE           
 Table Parameters:               
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"c\":\"true\"}}
        EXTERNAL                TRUE                
        bucketing_version       2                   
        numFiles                0                   
+       numRows                 0                   
+       rawDataSize             0                   
        totalSize               0                   
 #### A masked pattern was here ####
                 
diff --git a/ql/src/test/results/clientpositive/llap/mm_exim.q.out 
b/ql/src/test/results/clientpositive/llap/mm_exim.q.out
index c23d711534e..37ff35659c4 100644
--- a/ql/src/test/results/clientpositive/llap/mm_exim.q.out
+++ b/ql/src/test/results/clientpositive/llap/mm_exim.q.out
@@ -312,8 +312,8 @@ Table Type:                 MANAGED_TABLE
 Table Parameters:               
        bucketing_version       2                   
        numFiles                3                   
-       numRows                 0                   
-       rawDataSize             0                   
+       numRows                 6                   
+       rawDataSize             37                  
        totalSize               43                  
        transactional           true                
        transactional_properties        insert_only         
diff --git 
a/ql/src/test/results/clientpositive/llap/stats_external_location.q.out 
b/ql/src/test/results/clientpositive/llap/stats_external_location.q.out
new file mode 100644
index 00000000000..f7fc782fbf3
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/stats_external_location.q.out
@@ -0,0 +1,47 @@
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_custom
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_custom
+PREHOOK: query: insert into test_custom select 1, 'test'
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_custom
+POSTHOOK: query: insert into test_custom select 1, 'test'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_custom
+POSTHOOK: Lineage: test_custom.age SIMPLE []
+POSTHOOK: Lineage: test_custom.name SIMPLE []
+PREHOOK: query: desc formatted test_custom age
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@test_custom
+POSTHOOK: query: desc formatted test_custom age
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@test_custom
+col_name               age                 
+data_type              int                 
+min                    1                   
+max                    1                   
+num_nulls              0                   
+distinct_count         1                   
+avg_col_len                                
+max_col_len                                
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+comment                from deserializer   
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"age\":\"true\",\"name\":\"true\"}}
+PREHOOK: query: drop table test_custom
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@test_custom
+PREHOOK: Output: default@test_custom
+POSTHOOK: query: drop table test_custom
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@test_custom
+POSTHOOK: Output: default@test_custom
diff --git 
a/ql/src/test/results/clientpositive/llap/translated_external_rename3.q.out 
b/ql/src/test/results/clientpositive/llap/translated_external_rename3.q.out
index c7c920d156b..ff321fa86f0 100644
--- a/ql/src/test/results/clientpositive/llap/translated_external_rename3.q.out
+++ b/ql/src/test/results/clientpositive/llap/translated_external_rename3.q.out
@@ -98,14 +98,11 @@ Retention:                  0
 #### A masked pattern was here ####
 Table Type:            EXTERNAL_TABLE           
 Table Parameters:               
-       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\"}}
        EXTERNAL                TRUE                
        TRANSLATED_TO_EXTERNAL  TRUE                
        bucketing_version       2                   
        external.table.purge    TRUE                
        numFiles                2                   
-       numRows                 1                   
-       rawDataSize             1                   
        totalSize               4                   
 #### A masked pattern was here ####
                 
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
index 0ee6bcfbfa2..7ca76bf3741 100644
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
+++ 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/common/StatsSetupConst.java
@@ -167,6 +167,8 @@ public class StatsSetupConst {
 
   public static final String CASCADE = "CASCADE";
 
+  public static final String  STATS_FOR_CREATE_TABLE = 
"setStatsStateForCreateTable";
+
   public static final String TRUE = "true";
 
   public static final String FALSE = "false";
@@ -219,6 +221,55 @@ public class StatsSetupConst {
 
   }
 
+  /**
+   * Class for marking the column statistics when creating tables.
+   */
+  public static class ColumnStatsSetup {
+    private static ObjectReader objectReader;
+    private static ObjectWriter objectWriter;
+    static {
+      ObjectMapper objectMapper = new ObjectMapper();
+      objectReader = objectMapper.readerFor(ColumnStatsSetup.class);
+      objectWriter = objectMapper.writerFor(ColumnStatsSetup.class);
+    }
+
+    @JsonInclude(JsonInclude.Include.NON_DEFAULT)
+    public boolean enabled;
+    @JsonInclude(JsonInclude.Include.NON_DEFAULT)
+    public String fileToEscape;
+    @JsonInclude(JsonInclude.Include.NON_EMPTY)
+    public List<String> columnNames = new ArrayList<>();
+
+    public static ColumnStatsSetup parseStatsSetup(String statsSetup) {
+      if (statsSetup == null) {
+        return new ColumnStatsSetup();
+      }
+      try {
+        return objectReader.readValue(statsSetup);
+      } catch (Exception e) {
+         return new ColumnStatsSetup();
+      }
+    }
+
+    /**
+     * Get json representation of the ColumnStatsSetup
+     */
+    public static String getStatsSetupAsString(boolean enabled,
+        String fileToEscape,
+        List<String> columns) {
+      try {
+        ColumnStatsSetup statsSetup = new ColumnStatsSetup();
+        statsSetup.enabled = enabled;
+        statsSetup.columnNames = new ArrayList<>(columns);
+        statsSetup.fileToEscape = fileToEscape;
+        return objectWriter.writeValueAsString(statsSetup);
+      } catch (Exception e) {
+        // this should not happen
+        throw new RuntimeException(e);
+      }
+    }
+  }
+
   public static boolean areBasicStatsUptoDate(Map<String, String> params) {
     if (params == null) {
       return false;
diff --git 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
index 10c9fb26d22..2952276020c 100755
--- 
a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
+++ 
b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/Warehouse.java
@@ -32,6 +32,7 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import org.apache.commons.lang3.StringUtils;
+import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.hive.common.TableName;
 import org.apache.hadoop.hive.metastore.api.Catalog;
 import org.apache.hadoop.hive.metastore.api.DatabaseType;
@@ -500,16 +501,23 @@ public class Warehouse {
   }
 
   public boolean isEmptyDir(Path path) throws IOException, MetaException {
+    return isEmptyDir(path, null);
+  }
+
+  public boolean isEmptyDir(Path path, PathFilter pathFilter)
+      throws IOException, MetaException {
     try {
-      int listCount = getFs(path).listStatus(path).length;
-      if (listCount == 0) {
-        return true;
+      final int listCount;
+      if (pathFilter == null) {
+        listCount = getFs(path).listStatus(path).length;
+      } else {
+        listCount = getFs(path).listStatus(path, pathFilter).length;
       }
+      return listCount == 0;
     } catch (FileNotFoundException fnfe) {
       // File named by path doesn't exist; nothing to validate.
       return false;
     }
-    return false;
   }
 
   public boolean isWritable(Path path) throws IOException {
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
index d544bf30ed8..9033358a7cd 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HMSHandler.java
@@ -2361,10 +2361,8 @@ public class HMSHandler extends FacebookBase implements 
IHMSHandler {
           madeDir = true;
         }
       }
-      if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) &&
-          !MetaStoreUtils.isView(tbl)) {
-        MetaStoreServerUtils.updateTableStatsSlow(db, tbl, wh, madeDir, false, 
envContext);
-      }
+
+      MetaStoreServerUtils.updateTableStatsForCreateTable(wh, db, tbl, 
envContext, conf, tblPath, madeDir);
 
       // set create time
       long time = System.currentTimeMillis() / 1000;
diff --git 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
index 6a15b089cd6..9ef97f0c578 100644
--- 
a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
+++ 
b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/utils/MetaStoreServerUtils.java
@@ -27,6 +27,7 @@ import java.net.InetSocketAddress;
 import java.net.ServerSocket;
 import java.net.Socket;
 import java.net.UnknownHostException;
+import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
 import java.nio.charset.StandardCharsets;
 import java.security.MessageDigest;
@@ -34,7 +35,7 @@ import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashMap;
-import java.util.Iterator;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
 import java.util.Objects;
@@ -65,9 +66,11 @@ import org.apache.commons.lang3.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.hive.common.StatsSetupConst;
 import org.apache.hadoop.hive.common.TableName;
 import org.apache.hadoop.hive.metastore.ColumnType;
+import org.apache.hadoop.hive.metastore.ExceptionHandler;
 import org.apache.hadoop.hive.metastore.HiveMetaStore;
 import org.apache.hadoop.hive.metastore.IMetaStoreClient;
 import org.apache.hadoop.hive.metastore.Warehouse;
@@ -510,6 +513,52 @@ public class MetaStoreServerUtils {
     params.remove(StatsSetupConst.NUM_ERASURE_CODED_FILES);
   }
 
+  public static void updateTableStatsForCreateTable(Warehouse wh, Database db, 
Table tbl,
+      EnvironmentContext envContext, Configuration conf, Path tblPath, boolean 
newDir)
+      throws MetaException {
+    // If the created table is a view, skip generating the stats
+    if (MetaStoreUtils.isView(tbl)) {
+      return;
+    }
+    assert tblPath != null;
+    if (tbl.isSetDictionary() && tbl.getDictionary().getValues() != null) {
+      List<ByteBuffer> values = tbl.getDictionary().getValues().
+          remove(StatsSetupConst.STATS_FOR_CREATE_TABLE);
+      ByteBuffer buffer;
+      if (values != null && values.size() > 0 && (buffer = 
values.get(0)).hasArray()) {
+        String val = new String(buffer.array(), StandardCharsets.UTF_8);
+        StatsSetupConst.ColumnStatsSetup statsSetup = 
StatsSetupConst.ColumnStatsSetup.parseStatsSetup(val);
+        if (statsSetup.enabled) {
+          try {
+            PathFilter pathFilter = FileUtils.HIDDEN_FILES_PATH_FILTER;
+            if (StringUtils.isNotEmpty(statsSetup.fileToEscape)) {
+              final Set<String> filesToEscape = new HashSet<>();
+              for (String fileName : statsSetup.fileToEscape.split(",")) {
+                filesToEscape.add(fileName.trim());
+              }
+              pathFilter = p -> !filesToEscape.contains(p.getName());
+            }
+            // Set the column stats true in order to make it merge-able
+            if (newDir || wh.isEmptyDir(tblPath, pathFilter)) {
+              List<String> columns = statsSetup.columnNames;
+              if (columns == null || columns.isEmpty()) {
+                columns = getColumnNames(tbl.getSd().getCols());
+              }
+              StatsSetupConst.setStatsStateForCreateTable(tbl.getParameters(), 
columns, StatsSetupConst.TRUE);
+            }
+          } catch (IOException e) {
+            LOG.error("Error while checking the table directory: " + tblPath + 
" is empty or not", e);
+            throw ExceptionHandler.newMetaException(e);
+          }
+        }
+      }
+    }
+
+    if (MetastoreConf.getBoolVar(conf, 
MetastoreConf.ConfVars.STATS_AUTO_GATHER)) {
+      updateTableStatsSlow(db, tbl, wh, newDir, false, envContext);
+    }
+  }
+
   /**
    * Compare the names, types and comments of two lists of {@link FieldSchema}.
    * <p>

Reply via email to