[ 
https://issues.apache.org/jira/browse/HIVE-25345?focusedWorklogId=625659&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-625659
 ]

ASF GitHub Bot logged work on HIVE-25345:
-----------------------------------------

                Author: ASF GitHub Bot
            Created on: 20/Jul/21 16:03
            Start Date: 20/Jul/21 16:03
    Worklog Time Spent: 10m 
      Work Description: lcspinter commented on a change in pull request #2493:
URL: https://github.com/apache/hive/pull/2493#discussion_r673259608



##########
File path: 
standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/metrics/AcidMetricService.java
##########
@@ -85,36 +85,113 @@ public void run() {
 
   private void collectMetrics() throws MetaException {
     ShowCompactResponse currentCompactions = txnHandler.showCompact(new 
ShowCompactRequest());
-    updateMetricsFromShowCompact(currentCompactions);
+    updateMetricsFromShowCompact(currentCompactions, conf);
     updateDBMetrics();
   }
 
   private void updateDBMetrics() throws MetaException {
     MetricsInfo metrics = txnHandler.getMetricsInfo();
     
Metrics.getOrCreateGauge(NUM_TXN_TO_WRITEID).set(metrics.getTxnToWriteIdCount());
+    if (metrics.getTxnToWriteIdCount() >=
+        MetastoreConf.getIntVar(conf, 
MetastoreConf.ConfVars.COMPACTOR_TXN_TO_WRITEID_RECORD_THRESHOLD_WARNING) &&
+        metrics.getTxnToWriteIdCount() <
+            MetastoreConf.getIntVar(conf, 
MetastoreConf.ConfVars.COMPACTOR_TXN_TO_WRITEID_RECORD_THRESHOLD_ERROR)) {
+      LOG.warn("An excessive amount of (" + metrics.getTxnToWriteIdCount() + 
") Hive ACID metadata found in " +
+          "TXN_TO_WRITEID table, which can cause serious performance 
degradation.");
+    } else if (metrics.getTxnToWriteIdCount() >=
+        MetastoreConf.getIntVar(conf, 
MetastoreConf.ConfVars.COMPACTOR_TXN_TO_WRITEID_RECORD_THRESHOLD_ERROR)) {
+      LOG.error("An excessive amount of (" + metrics.getTxnToWriteIdCount() + 
") Hive ACID metadata found in " +
+          "TXN_TO_WRITEID table, which can cause serious performance 
degradation.");
+    }
     
Metrics.getOrCreateGauge(NUM_COMPLETED_TXN_COMPONENTS).set(metrics.getCompletedTxnsCount());
-
+    if (metrics.getCompletedTxnsCount() >=
+        MetastoreConf.getIntVar(conf,
+            
MetastoreConf.ConfVars.COMPACTOR_COMPLETED_TXN_COMPONENTS_RECORD_THRESHOLD_WARNING)
 &&
+        metrics.getCompletedTxnsCount() <
+            MetastoreConf.getIntVar(conf,
+                
MetastoreConf.ConfVars.COMPACTOR_COMPLETED_TXN_COMPONENTS_RECORD_THRESHOLD_ERROR))
 {
+      LOG.warn("An excessive amount of (" + metrics.getCompletedTxnsCount() + 
") Hive ACID metadata found in " +
+          "COMPLETED_TXN_COMPONENTS table, which can cause serious performance 
degradation.");
+    } else if (metrics.getCompletedTxnsCount() >= MetastoreConf.getIntVar(conf,
+        
MetastoreConf.ConfVars.COMPACTOR_COMPLETED_TXN_COMPONENTS_RECORD_THRESHOLD_ERROR))
 {
+      LOG.error("An excessive amount of (" + metrics.getCompletedTxnsCount() + 
") Hive ACID metadata found in " +
+          "COMPLETED_TXN_COMPONENTS table, which can cause serious performance 
degradation.");
+    }
     
Metrics.getOrCreateGauge(NUM_OPEN_REPL_TXNS).set(metrics.getOpenReplTxnsCount());
     
Metrics.getOrCreateGauge(OLDEST_OPEN_REPL_TXN_ID).set(metrics.getOldestOpenReplTxnId());
     
Metrics.getOrCreateGauge(OLDEST_OPEN_REPL_TXN_AGE).set(metrics.getOldestOpenReplTxnAge());
+    if (metrics.getOldestOpenReplTxnAge() >=
+        MetastoreConf.getTimeVar(conf, 
MetastoreConf.ConfVars.COMPACTOR_OLDEST_REPLICATION_OPENTXN_THRESHOLD_WARNING,
+            TimeUnit.SECONDS) && metrics.getOldestOpenReplTxnAge() <
+        MetastoreConf.getTimeVar(conf, 
MetastoreConf.ConfVars.COMPACTOR_OLDEST_REPLICATION_OPENTXN_THRESHOLD_ERROR,
+            TimeUnit.SECONDS)) {
+      LOG.warn("A replication transaction has been open for " + 
metrics.getOldestOpenReplTxnAge() + " seconds. " +
+          "Before you abort a transaction that was created by replication, and 
which has been open a long time, " +
+          "make sure that the hive.repl.txn.timeout threshold has expired.");
+    } else if (metrics.getOldestOpenReplTxnAge() >=
+        MetastoreConf.getTimeVar(conf, 
MetastoreConf.ConfVars.COMPACTOR_OLDEST_REPLICATION_OPENTXN_THRESHOLD_ERROR,
+        TimeUnit.SECONDS)) {
+      LOG.error("A replication transaction has been open for " + 
metrics.getOldestOpenReplTxnAge() + " seconds. " +
+          "Before you abort a transaction that was created by replication, and 
which has been open a long time, " +
+          "make sure that the hive.repl.txn.timeout threshold has expired.");
+    }
     
Metrics.getOrCreateGauge(NUM_OPEN_NON_REPL_TXNS).set(metrics.getOpenNonReplTxnsCount());
     
Metrics.getOrCreateGauge(OLDEST_OPEN_NON_REPL_TXN_ID).set(metrics.getOldestOpenNonReplTxnId());
     
Metrics.getOrCreateGauge(OLDEST_OPEN_NON_REPL_TXN_AGE).set(metrics.getOldestOpenNonReplTxnAge());
+    if (metrics.getOldestOpenNonReplTxnAge() >=
+        MetastoreConf.getTimeVar(conf, 
MetastoreConf.ConfVars.COMPACTOR_OLDEST_OPENTXN_THRESHOLD_WARNING,
+            TimeUnit.SECONDS)
+        && metrics.getOldestOpenNonReplTxnAge() <
+        MetastoreConf.getTimeVar(conf, 
MetastoreConf.ConfVars.COMPACTOR_OLDEST_OPENTXN_THRESHOLD_ERROR,
+            TimeUnit.SECONDS)) {
+      LOG.warn("Found an open transaction with an age of " + 
metrics.getOldestOpenNonReplTxnAge() + " seconds.");
+    } else if (metrics.getOldestOpenNonReplTxnAge() >=
+        MetastoreConf.getTimeVar(conf, 
MetastoreConf.ConfVars.COMPACTOR_OLDEST_OPENTXN_THRESHOLD_ERROR,
+            TimeUnit.SECONDS)) {
+      LOG.error("Found an open transaction with an age of " + 
metrics.getOldestOpenNonReplTxnAge() + " seconds.");
+    }
 
     
Metrics.getOrCreateGauge(NUM_ABORTED_TXNS).set(metrics.getAbortedTxnsCount());
     
Metrics.getOrCreateGauge(OLDEST_ABORTED_TXN_ID).set(metrics.getOldestAbortedTxnId());
     
Metrics.getOrCreateGauge(OLDEST_ABORTED_TXN_AGE).set(metrics.getOldestAbortedTxnAge());
+    if (metrics.getOldestAbortedTxnAge() >=
+        MetastoreConf.getTimeVar(conf,
+            
MetastoreConf.ConfVars.COMPACTOR_OLDEST_UNCLEANED_ABORTEDTXN_TIME_THRESHOLD_WARNING,
+            TimeUnit.SECONDS) &&
+        metrics.getOldestAbortedTxnAge() <
+            MetastoreConf.getTimeVar(conf,
+                
MetastoreConf.ConfVars.COMPACTOR_OLDEST_UNCLEANED_ABORTEDTXN_TIME_THRESHOLD_ERROR,
+                TimeUnit.SECONDS)) {
+      LOG.warn("Found an aborted transaction with an age of " + 
metrics.getOldestAbortedTxnAge() + " seconds.");
+    } else if (metrics.getOldestAbortedTxnAge() >=
+        MetastoreConf.getTimeVar(conf,
+            
MetastoreConf.ConfVars.COMPACTOR_OLDEST_UNCLEANED_ABORTEDTXN_TIME_THRESHOLD_ERROR,
+            TimeUnit.SECONDS)) {
+      LOG.error("Found an aborted transaction with an age of " + 
metrics.getOldestAbortedTxnAge() + " seconds.");
+    }
 
     Metrics.getOrCreateGauge(NUM_LOCKS).set(metrics.getLocksCount());
     Metrics.getOrCreateGauge(OLDEST_LOCK_AGE).set(metrics.getOldestLockAge());
 
     
Metrics.getOrCreateGauge(TABLES_WITH_X_ABORTED_TXNS).set(metrics.getTablesWithXAbortedTxns());
+    if (metrics.getOldestAbortedTxnAge() >

Review comment:
       Yes, that would definitely add some value.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 625659)
    Time Spent: 1h 10m  (was: 1h)

> Add logging based on new compaction metrics
> -------------------------------------------
>
>                 Key: HIVE-25345
>                 URL: https://issues.apache.org/jira/browse/HIVE-25345
>             Project: Hive
>          Issue Type: Improvement
>            Reporter: László Pintér
>            Assignee: László Pintér
>            Priority: Major
>              Labels: pull-request-available
>          Time Spent: 1h 10m
>  Remaining Estimate: 0h
>




--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to