This is an automated email from the ASF dual-hosted git repository.

xhsun pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 7a62166  [TE] Disable alerts if it has no success run within 30 days 
(#5208)
7a62166 is described below

commit 7a621660eacf3ac9f52d94304e6c2531db976e5d
Author: Xiaohui Sun <[email protected]>
AuthorDate: Sun Apr 19 09:28:28 2020 -0700

    [TE] Disable alerts if it has no success run within 30 days (#5208)
    
    * [TE] Disable alerts if it has no success run within 30 days
    
    * [TE] Check if the update time is null
    
    * [TE] Send notification mail when disabling alerts
    
    * [TE] Updated typo and logs for disabling alerts
    
    Co-authored-by: Xiaohui Sun <[email protected]>
---
 .../anomaly/monitor/MonitorTaskRunner.java         | 65 ++++++++++++++++++++++
 .../thirdeye/datalayer/dao/GenericPojoDao.java     |  4 ++
 .../pinot/thirdeye/datalayer/dto/AbstractDTO.java  | 10 ++++
 3 files changed, 79 insertions(+)

diff --git 
a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/anomaly/monitor/MonitorTaskRunner.java
 
b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/anomaly/monitor/MonitorTaskRunner.java
index 545af0a..763e75b 100644
--- 
a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/anomaly/monitor/MonitorTaskRunner.java
+++ 
b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/anomaly/monitor/MonitorTaskRunner.java
@@ -19,6 +19,11 @@
 
 package org.apache.pinot.thirdeye.anomaly.monitor;
 
+import java.sql.Timestamp;
+import org.apache.commons.mail.EmailException;
+import org.apache.commons.mail.HtmlEmail;
+import org.apache.pinot.thirdeye.anomaly.ThirdEyeAnomalyConfiguration;
+import org.apache.pinot.thirdeye.anomaly.alert.util.EmailHelper;
 import org.apache.pinot.thirdeye.anomaly.job.JobConstants.JobStatus;
 import org.apache.pinot.thirdeye.anomaly.monitor.MonitorConstants.MonitorType;
 import org.apache.pinot.thirdeye.anomaly.task.TaskConstants.TaskStatus;
@@ -26,6 +31,7 @@ import org.apache.pinot.thirdeye.anomaly.task.TaskContext;
 import org.apache.pinot.thirdeye.anomaly.task.TaskInfo;
 import org.apache.pinot.thirdeye.anomaly.task.TaskResult;
 import org.apache.pinot.thirdeye.anomaly.task.TaskRunner;
+import org.apache.pinot.thirdeye.anomaly.utils.EmailUtils;
 import org.apache.pinot.thirdeye.datalayer.bao.DetectionConfigManager;
 import org.apache.pinot.thirdeye.datalayer.dto.DetectionConfigDTO;
 import org.apache.pinot.thirdeye.datalayer.dto.JobDTO;
@@ -39,15 +45,22 @@ import java.util.Map;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
 import org.apache.commons.collections4.CollectionUtils;
+import 
org.apache.pinot.thirdeye.detection.alert.DetectionAlertFilterRecipients;
 import org.apache.pinot.thirdeye.detection.health.DetectionHealth;
+import org.apache.pinot.thirdeye.notification.commons.SmtpConfiguration;
 import org.joda.time.DateTime;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
+import static 
org.apache.pinot.thirdeye.notification.commons.SmtpConfiguration.*;
+
+
 public class MonitorTaskRunner implements TaskRunner {
 
   private static final Logger LOG = 
LoggerFactory.getLogger(MonitorJobRunner.class);
   private static final long MAX_TASK_TIME = TimeUnit.HOURS.toMillis(6);
+  private static final long MAX_FAILED_DISABLE_DAYS = 30;
+  private ThirdEyeAnomalyConfiguration thirdeyeConfig;
 
   private DAORegistry DAO_REGISTRY = DAORegistry.getInstance();
 
@@ -56,6 +69,7 @@ public class MonitorTaskRunner implements TaskRunner {
 
     MonitorTaskInfo monitorTaskInfo = (MonitorTaskInfo) taskInfo;
     MonitorType monitorType = monitorTaskInfo.getMonitorType();
+    thirdeyeConfig = taskContext.getThirdEyeAnomalyConfiguration();
     if (monitorType.equals(MonitorType.UPDATE)) {
       executeMonitorUpdate(monitorTaskInfo);
     } else if (monitorType.equals(MonitorType.EXPIRE)) {
@@ -120,11 +134,62 @@ public class MonitorTaskRunner implements TaskRunner {
 
       // update detection health
       updateDetectionHealth();
+
+      // disable alerts that failed consecutively for a long time
+      disableLongFailedAlerts();
+
     } catch (Exception e) {
       LOG.error("Exception in monitor update task", e);
     }
   }
 
+  /**
+   * Disable the alert if it was updated before {@MAX_TASK_FAIL_DAYS} but 
there is no success run since then.
+   */
+  private void disableLongFailedAlerts() {
+    DetectionConfigManager detectionDAO = 
DAO_REGISTRY.getDetectionConfigManager();
+    List<DetectionConfigDTO> detectionConfigs = detectionDAO.findAllActive();
+    long currentTimeMillis = System.currentTimeMillis();
+    long maxTaskFailMillis = TimeUnit.DAYS.toMillis(MAX_FAILED_DISABLE_DAYS);
+    for (DetectionConfigDTO config : detectionConfigs) {
+      try {
+        Timestamp updateTime = config.getUpdateTime();
+        if (updateTime != null && config.getHealth() != null && 
config.getHealth().getDetectionTaskStatus() != null) {
+          long lastTaskExecutionTime = 
config.getHealth().getDetectionTaskStatus().getLastTaskExecutionTime();
+          // lastTaskExecutionTime == -1L is used for backward compatibility. 
Currently we have many long failing alerts have -1L.
+          if (updateTime.getTime() <= currentTimeMillis - maxTaskFailMillis && 
(lastTaskExecutionTime == -1L
+              || lastTaskExecutionTime <= currentTimeMillis - 
maxTaskFailMillis)) {
+            config.setActive(false);
+            detectionDAO.update(config);
+            sendDisableAlertNotificationEmail(config);
+            LOG.info("Disabled alert {} since it failed more than {} days. " + 
"Task last update time: {}. Last success task execution time: {}",
+                config.getId(), MAX_FAILED_DISABLE_DAYS, 
config.getUpdateTime(), lastTaskExecutionTime);
+          }
+        }
+      } catch (Exception e) {
+        LOG.error("Exception in disabling alert ", e);
+      }
+    }
+  }
+
+  private void sendDisableAlertNotificationEmail(DetectionConfigDTO config) 
throws EmailException {
+    HtmlEmail email = new HtmlEmail();
+    String subject = String.format("ThirdEye alert disabled: %s", 
config.getName());
+    String textBody = String.format(
+        "Your alert has failed for %d days and was disabled. Please fix your 
alert and enable it again. \n" + "Here is the link for your alert: 
https://thirdeye.corp.linkedin.com/app/#/manage/explore/%d";,
+        MAX_FAILED_DISABLE_DAYS, config.getId());
+    Set<String> recipients = 
EmailUtils.getValidEmailAddresses(thirdeyeConfig.getFailureToAddress());
+    if (config.getCreatedBy() != null && 
!config.getCreatedBy().equals("no-auth-user")) {
+      recipients.add(config.getCreatedBy());
+    }
+    if (config.getUpdatedBy() != null && 
!config.getUpdatedBy().equals("no-auth-user")) {
+      recipients.add(config.getUpdatedBy());
+    }
+    EmailHelper.sendEmailWithTextBody(email,
+        
SmtpConfiguration.createFromProperties(thirdeyeConfig.getAlerterConfiguration().get(SMTP_CONFIG_KEY)),
 subject,
+        textBody, thirdeyeConfig.getFailureFromAddress(), new 
DetectionAlertFilterRecipients(recipients));
+  }
+
   private void updateDetectionHealth() {
     DetectionConfigManager detectionDAO = 
DAO_REGISTRY.getDetectionConfigManager();
     List<DetectionConfigDTO> detectionConfigs = detectionDAO.findAllActive();
diff --git 
a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/datalayer/dao/GenericPojoDao.java
 
b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/datalayer/dao/GenericPojoDao.java
index 493ef91..8e983c5 100644
--- 
a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/datalayer/dao/GenericPojoDao.java
+++ 
b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/datalayer/dao/GenericPojoDao.java
@@ -429,6 +429,7 @@ public class GenericPojoDao {
 
               E e = OBJECT_MAPPER.readValue(entity.getJsonVal(), beanClass);
               e.setId(entity.getId());
+              e.setUpdateTime(entity.getUpdateTime());
               ret.add(e);
             }
           }
@@ -461,6 +462,7 @@ public class GenericPojoDao {
             e = OBJECT_MAPPER.readValue(genericJsonEntity.getJsonVal(), 
pojoClass);
             e.setId(genericJsonEntity.getId());
             e.setVersion(genericJsonEntity.getVersion());
+            e.setUpdateTime(genericJsonEntity.getUpdateTime());
           }
           return e;
         }
@@ -492,6 +494,7 @@ public class GenericPojoDao {
               E e = OBJECT_MAPPER.readValue(genericJsonEntity.getJsonVal(), 
pojoClass);
               e.setId(genericJsonEntity.getId());
               e.setVersion(genericJsonEntity.getVersion());
+              e.setUpdateTime(genericJsonEntity.getUpdateTime());
               result.add(e);
             }
           }
@@ -599,6 +602,7 @@ public class GenericPojoDao {
                   E bean = OBJECT_MAPPER.readValue(entity.getJsonVal(), 
pojoClass);
                   bean.setId(entity.getId());
                   bean.setVersion(entity.getVersion());
+                  bean.setUpdateTime(entity.getUpdateTime());
                   ret.add(bean);
                 }
               }
diff --git 
a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/datalayer/dto/AbstractDTO.java
 
b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/datalayer/dto/AbstractDTO.java
index fcfe833..39a16cc 100644
--- 
a/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/datalayer/dto/AbstractDTO.java
+++ 
b/thirdeye/thirdeye-pinot/src/main/java/org/apache/pinot/thirdeye/datalayer/dto/AbstractDTO.java
@@ -20,15 +20,25 @@
 package org.apache.pinot.thirdeye.datalayer.dto;
 
 import java.io.Serializable;
+import java.sql.Timestamp;
 import org.apache.commons.lang3.builder.ToStringBuilder;
 import org.apache.commons.lang3.builder.ToStringStyle;
 
 public abstract class AbstractDTO implements Serializable {
   private Long id;
   private int version;
+  protected Timestamp updateTime;
   protected String createdBy;
   protected String updatedBy;
 
+  public Timestamp getUpdateTime() {
+    return updateTime;
+  }
+
+  public void setUpdateTime(Timestamp updateTime) {
+    this.updateTime = updateTime;
+  }
+
   public Long getId() {
     return id;
   }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to