[ 
https://issues.apache.org/jira/browse/HIVE-24943?focusedWorklogId=574695&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-574695
 ]

ASF GitHub Bot logged work on HIVE-24943:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 31/Mar/21 08:56
            Start Date: 31/Mar/21 08:56
    Worklog Time Spent: 10m 
      Work Description: rbalamohan commented on a change in pull request #2134:
URL: https://github.com/apache/hive/pull/2134#discussion_r604717287



##########
File path: ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java
##########
@@ -444,29 +447,47 @@ private static boolean isDynPartIngest(Table t, 
CompactionInfo ci){
     return false;
   }
 
-  private boolean isEligibleForCompaction(CompactionInfo ci, 
ShowCompactResponse currentCompactions) {
-    LOG.info("Checking to see if we should compact " + 
ci.getFullPartitionName());
-
-    // Check if we already have initiated or are working on a compaction for 
this partition
-    // or table. If so, skip it. If we are just waiting on cleaning we can 
still check,
-    // as it may be time to compact again even though we haven't cleaned.
-    // todo: this is not robust. You can easily run `alter table` to start a 
compaction between
-    // the time currentCompactions is generated and now
-    if (lookForCurrentCompactions(currentCompactions, ci)) {
-      LOG.info("Found currently initiated or working compaction for " +
-          ci.getFullPartitionName() + " so we will not initiate another 
compaction");
-      return false;
-    }
-
+  private boolean isEligibleForCompaction(CompactionInfo ci,
+      ShowCompactResponse currentCompactions, Set<String> skipDBs, Set<String> 
skipTables) {
     try {
+      if (skipDBs.contains(ci.dbname)) {
+        LOG.debug("Skipping {}::{}, skipDBs:{}", ci.dbname, ci.tableName, 
skipDBs);
+        return false;
+      } else {
+        if (replIsCompactionDisabledForDatabase(ci.dbname)) {
+          skipDBs.add(ci.dbname);
+          LOG.debug("Skipping {}::{}, skipDBs:{}", ci.dbname, ci.tableName, 
skipDBs);

Review comment:
       Taken care of in recent commit.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Issue Time Tracking
-------------------

    Worklog Id:     (was: 574695)
    Time Spent: 40m  (was: 0.5h)

> Initiator: Optimise when tables/partitions are not eligible for compaction
> --------------------------------------------------------------------------
>
>                 Key: HIVE-24943
>                 URL: https://issues.apache.org/jira/browse/HIVE-24943
>             Project: Hive
>          Issue Type: Improvement
>            Reporter: Rajesh Balamohan
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 40m
>  Remaining Estimate: 0h
>
> When the entire database isn't qualified for compaction due to replication, 
> initiator could potentially skip the entire table/partition checks to save 
> HMS resources. Currently it runs in tight loop for each table/partition for 
> the database which isn't eligible for compaction.
> E.g Note that for "delete_orc_10" database repl parameters are enabled. So 
> until "ReplUtils.isFirstIncPending(database.getParameters());", it ends up 
> considering that entire database as "not chosen for compaction".
> Ref: 
> [https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java#L469]
> [https://github.com/apache/hive/blob/master/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/Initiator.java#L469]
>  
> {noformat}
> +----------------------------------------------------+
> |                   createtab_stmt                   |
> +----------------------------------------------------+
> | CREATE TABLE `delete_orc_10.test_con_s3_part_1`(   |
> |   `d_date_id` string,                              |
> |   `d_date` date,                                   |
> |   `d_dom` int,                                     |
> |   `d_year` int)                                    |
> | PARTITIONED BY (                                   |
> |   `d_date_sk` bigint)                              |
> | ROW FORMAT SERDE                                   |
> |   'org.apache.hadoop.hive.ql.io.orc.OrcSerde'      |
> | STORED AS INPUTFORMAT                              |
> |   'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'  |
> | OUTPUTFORMAT                                       |
> |   'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' |
> | LOCATION                                           |
> |   's3a://blah/delete_orc_10.db/test_con_s3_part_1' |
> | TBLPROPERTIES (                                    |
> |   'bucketing_version'='2',                         |
> |   'transactional'='true',                          |
> |   'transactional_properties'='default',            |
> |   'transient_lastDdlTime'='1610926861')            |
> +----------------------------------------------------+
> +----------------------------------------------------+
> |                   createdb_stmt                    |
> +----------------------------------------------------+
> | CREATE DATABASE `delete_orc_10`                    |
> | LOCATION                                           |
> |   's3a://blah/delete_orc_10.db' |
> | WITH DBPROPERTIES (                                |
> |   
> 'hive.repl.ckpt.key'='s3a://blha/loc/dHBjZHNfYmluX3BhcnRpdGlvbmVkX29yY18xMDAwMA==/7f1c5529-e17a-4750-bf16-a9861c9589dc/hive',
>   |
> |   'hive.repl.first.inc.pending'='true',            |
> |   'repl.last.id'='95653')                          |
> +----------------------------------------------------+
> {noformat}
> {noformat}
> <14>1 2021-03-25T23:42:50.095Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.CompactorThread" level="INFO" thread="Thread-12"] Compaction 
> is disabled for database delete_orc_10
> <14>1 2021-03-25T23:42:50.095Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.Initiator" level="INFO" thread="Thread-12"] Checking to see 
> if we should compact delete_orc_10.test_con_s3_part_1.d_date_sk=2447933
> <14>1 2021-03-25T23:42:50.118Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.CompactorThread" level="INFO" thread="Thread-12"] Compaction 
> is disabled for database delete_orc_10
> <14>1 2021-03-25T23:42:50.118Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.Initiator" level="INFO" thread="Thread-12"] Checking to see 
> if we should compact delete_orc_10.test_con_s3_part_debug.d_date_sk=2471951
> <14>1 2021-03-25T23:42:50.142Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.CompactorThread" level="INFO" thread="Thread-12"] Compaction 
> is disabled for database delete_orc_10
> <14>1 2021-03-25T23:42:50.142Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.Initiator" level="INFO" thread="Thread-12"] Checking to see 
> if we should compact delete_orc_10.test_con_s3_part_debug_1.d_date_sk=2421767
> <14>1 2021-03-25T23:42:50.164Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.CompactorThread" level="INFO" thread="Thread-12"] Compaction 
> is disabled for database delete_orc_10
> <14>1 2021-03-25T23:42:50.164Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.Initiator" level="INFO" thread="Thread-12"] Checking to see 
> if we should compact delete_orc_10.test_con_s3_part_1.d_date_sk=2479669
> <14>1 2021-03-25T23:42:50.185Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.CompactorThread" level="INFO" thread="Thread-12"] Compaction 
> is disabled for database delete_orc_10
> <14>1 2021-03-25T23:42:50.185Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.Initiator" level="INFO" thread="Thread-12"] Checking to see 
> if we should compact delete_orc_10.test_con_s3_part_2.d_date_sk=2482553
> <14>1 2021-03-25T23:42:50.205Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.CompactorThread" level="INFO" thread="Thread-12"] Compaction 
> is disabled for database delete_orc_10
> <14>1 2021-03-25T23:42:50.205Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.Initiator" level="INFO" thread="Thread-12"] Checking to see 
> if we should compact delete_orc_10.test_con_s3_part_debug_1.d_date_sk=2444638
> <14>1 2021-03-25T23:42:50.227Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.CompactorThread" level="INFO" thread="Thread-12"] Compaction 
> is disabled for database delete_orc_10
> <14>1 2021-03-25T23:42:50.227Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.Initiator" level="INFO" thread="Thread-12"] Checking to see 
> if we should compact delete_orc_10.test_con_s3_part_debug_1.d_date_sk=2480095
> <14>1 2021-03-25T23:42:50.253Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.CompactorThread" level="INFO" thread="Thread-12"] Compaction 
> is disabled for database delete_orc_10
> <14>1 2021-03-25T23:42:50.253Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.Initiator" level="INFO" thread="Thread-12"] Checking to see 
> if we should compact delete_orc_10.test_con_s3_part_1.d_date_sk=2468447
> <14>1 2021-03-25T23:42:50.281Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.CompactorThread" level="INFO" thread="Thread-12"] Compaction 
> is disabled for database delete_orc_10
> <14>1 2021-03-25T23:42:50.281Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.Initiator" level="INFO" thread="Thread-12"] Checking to see 
> if we should compact delete_orc_10.test_con_s3_part_1.d_date_sk=2440357
> <14>1 2021-03-25T23:42:50.308Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.CompactorThread" level="INFO" thread="Thread-12"] Compaction 
> is disabled for database delete_orc_10
> <14>1 2021-03-25T23:42:50.308Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.Initiator" level="INFO" thread="Thread-12"] Checking to see 
> if we should compact delete_orc_10.test_con_s3_part_debug_1.d_date_sk=2465904
> <14>1 2021-03-25T23:42:50.332Z 
> metastore-0.metastore-service.warehouse-1616689513-66lb.svc.cluster.local 
> metastore 1 e6cad49f-5d36-4a73-8da1-083e205976ee [mdc@18060 
> class="compactor.CompactorThread" level="INFO" thread="Thread-12"] Compaction 
> is disabled for database delete_orc_10
> {noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to