This is an automated email from the ASF dual-hosted git repository.

hashutosh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 947b7a4  HIVE-23536 : Provide an option to skip stats generation for 
major compaction (Peter Vary via Ashutosh Chauhan)
947b7a4 is described below

commit 947b7a44896fa57bc4e2ddaa6014cc4cb2c7002e
Author: Peter Vary <[email protected]>
AuthorDate: Mon May 25 16:13:32 2020 -0700

    HIVE-23536 : Provide an option to skip stats generation for major 
compaction (Peter Vary via Ashutosh Chauhan)
    
    Signed-off-by: Ashutosh Chauhan <[email protected]>
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |  6 +++
 .../hive/ql/txn/compactor/TestCompactor.java       | 52 ++++++++++++++++++++++
 .../hadoop/hive/ql/txn/compactor/CompactorMR.java  |  5 ++-
 .../hadoop/hive/ql/txn/compactor/Worker.java       |  8 ++--
 4 files changed, 67 insertions(+), 4 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index a00d907..8094d28 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -2908,6 +2908,12 @@ public class HiveConf extends Configuration {
 
     HIVE_COMPACTOR_WAIT_TIMEOUT("hive.compactor.wait.timeout", 300000L, "Time 
out in "
         + "milliseconds for blocking compaction. It's value has to be higher 
than 2000 milliseconds. "),
+
+    HIVE_MR_COMPACTOR_GATHER_STATS("hive.mr.compactor.gather.stats", true, "If 
set to true MAJOR compaction " +
+        "will gather stats if there are stats already associated with the 
table/partition.\n" +
+        "Turn this off to save some resources and the stats are not used 
anyway.\n" +
+        "Works only for MR based compaction, CRUD based compaction uses 
hive.stats.autogather."),
+
     /**
      * @deprecated Use MetastoreConf.COMPACTOR_INITIATOR_FAILED_THRESHOLD
      */
diff --git 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
index c687f14..32fe535 100644
--- 
a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
+++ 
b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
@@ -129,6 +129,7 @@ public class TestCompactor {
     hiveConf.setVar(HiveConf.ConfVars.POSTEXECHOOKS, "");
     hiveConf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, TEST_WAREHOUSE_DIR);
     hiveConf.setVar(HiveConf.ConfVars.HIVEINPUTFORMAT, 
HiveInputFormat.class.getName());
+    hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSAUTOGATHER, false);
 
     TxnDbUtil.setConfValues(hiveConf);
     TxnDbUtil.cleanDb(hiveConf);
@@ -1468,6 +1469,57 @@ public class TestCompactor {
     }
   }
 
+  @Test
+  public void testCompactorGatherStats() throws Exception {
+    String dbName = "default";
+    String tableName = "stats_comp_test";
+    List<String> colNames = Arrays.asList("a");
+    executeStatementOnDriver("drop table if exists " + dbName + "." + 
tableName, driver);
+    executeStatementOnDriver("create table " + dbName + "." + tableName +
+        " (a INT) STORED AS ORC TBLPROPERTIES ('transactional'='true')", 
driver);
+    executeStatementOnDriver("insert into " + dbName + "." + tableName + " 
values(1)", driver);
+    executeStatementOnDriver("insert into " + dbName + "." + tableName + " 
values(1)", driver);
+
+    TxnStore txnHandler = TxnUtils.getTxnStore(conf);
+    txnHandler.compact(new CompactionRequest(dbName, tableName, 
CompactionType.MAJOR));
+    runWorker(conf);
+
+    // Make sure we do not have statistics for this table yet
+    // Compaction generates stats only if there is any
+    List<ColumnStatisticsObj> colStats = 
msClient.getTableColumnStatistics(dbName,
+        tableName, colNames, Constants.HIVE_ENGINE);
+    assertEquals("No stats should be there for the table", 0, colStats.size());
+
+    executeStatementOnDriver("analyze table " + dbName + "." + tableName + " 
compute statistics for columns", driver);
+    executeStatementOnDriver("insert into " + dbName + "." + tableName + " 
values(2)", driver);
+
+    // Make sure we have old statistics for the table
+    colStats = msClient.getTableColumnStatistics(dbName, tableName, colNames, 
Constants.HIVE_ENGINE);
+    assertEquals("Stats should be there", 1, colStats.size());
+    assertEquals("Value should contain old data", 1, 
colStats.get(0).getStatsData().getLongStats().getHighValue());
+    assertEquals("Value should contain old data", 1, 
colStats.get(0).getStatsData().getLongStats().getLowValue());
+
+    txnHandler.compact(new CompactionRequest(dbName, tableName, 
CompactionType.MAJOR));
+    runWorker(conf);
+    // Make sure the statistics is updated for the table
+    colStats = msClient.getTableColumnStatistics(dbName, tableName, colNames, 
Constants.HIVE_ENGINE);
+    assertEquals("Stats should be there", 1, colStats.size());
+    assertEquals("Value should contain new data", 2, 
colStats.get(0).getStatsData().getLongStats().getHighValue());
+    assertEquals("Value should contain new data", 1, 
colStats.get(0).getStatsData().getLongStats().getLowValue());
+
+    executeStatementOnDriver("insert into " + dbName + "." + tableName + " 
values(3)", driver);
+    HiveConf workerConf = new HiveConf(conf);
+    workerConf.setBoolVar(ConfVars.HIVE_MR_COMPACTOR_GATHER_STATS, false);
+
+    txnHandler.compact(new CompactionRequest(dbName, tableName, 
CompactionType.MAJOR));
+    runWorker(workerConf);
+    // Make sure the statistics is NOT updated for the table
+    colStats = msClient.getTableColumnStatistics(dbName, tableName, colNames, 
Constants.HIVE_ENGINE);
+    assertEquals("Stats should be there", 1, colStats.size());
+    assertEquals("Value should contain new data", 2, 
colStats.get(0).getStatsData().getLongStats().getHighValue());
+    assertEquals("Value should contain new data", 1, 
colStats.get(0).getStatsData().getLongStats().getLowValue());
+  }
+
   /**
    * Users have the choice of specifying compaction related tblproperties 
either in CREATE TABLE
    * statement or in ALTER TABLE .. COMPACT statement. This tests both cases.
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java 
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
index 018c733..0425142 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
@@ -211,6 +211,7 @@ public class CompactorMR {
    * @param sd metastore storage descriptor
    * @param writeIds list of valid write ids
    * @param ci CompactionInfo
+   * @param su StatsUpdater which is null if no stats gathering is needed
    * @throws java.io.IOException if the job fails
    */
   void run(HiveConf conf, String jobName, Table t, Partition p, 
StorageDescriptor sd, ValidWriteIdList writeIds,
@@ -294,7 +295,9 @@ public class CompactorMR {
     launchCompactionJob(job, baseDir, ci.type, dirsToSearch, 
dir.getCurrentDirectories(),
       dir.getCurrentDirectories().size(), dir.getObsolete().size(), conf, msc, 
ci.id, jobName);
 
-    su.gatherStats();
+    if (su != null) {
+      su.gatherStats();
+    }
   }
 
   /**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java 
b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java
index a96cf1e..8180adc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Worker.java
@@ -91,6 +91,7 @@ public class Worker extends RemoteCompactorThread implements 
MetaStoreThread {
   @Override
   public void run() {
     LOG.info("Starting Worker thread");
+    boolean computeStats = 
conf.getBoolVar(HiveConf.ConfVars.HIVE_MR_COMPACTOR_GATHER_STATS);
     do {
       boolean launchedJob = false;
       // Make sure nothing escapes this run method and kills the metastore at 
large,
@@ -201,10 +202,11 @@ public class Worker extends RemoteCompactorThread 
implements MetaStoreThread {
           continue;
         }
 
-        LOG.info("Starting " + ci.type.toString() + " compaction for " + 
ci.getFullPartitionName() + " in " + JavaUtils.txnIdToString(compactorTxnId));
-        final StatsUpdater su = StatsUpdater.init(ci, msc.findColumnsWithStats(
+        LOG.info("Starting " + ci.type.toString() + " compaction for " + 
ci.getFullPartitionName() + " in " +
+            JavaUtils.txnIdToString(compactorTxnId) + " with compute stats set 
to " + computeStats);
+        final StatsUpdater su = computeStats ? StatsUpdater.init(ci, 
msc.findColumnsWithStats(
             CompactionInfo.compactionInfoToStruct(ci)), conf,
-          runJobAsSelf(ci.runAs) ? ci.runAs : t.getOwner());
+          runJobAsSelf(ci.runAs) ? ci.runAs : t.getOwner()) : null;
         final CompactorMR mr = new CompactorMR();
         launchedJob = true;
         try {

Reply via email to