Repository: ambari
Updated Branches:
  refs/heads/trunk f4af798a5 -> d348f5db5


AMBARI-8084 - Alerts: Convert Bigtop Stack Nagios Alerts (jonathanhurley)


Project: http://git-wip-us.apache.org/repos/asf/ambari/repo
Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/d348f5db
Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/d348f5db
Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/d348f5db

Branch: refs/heads/trunk
Commit: d348f5db54e3a59a7a228fa698a8eb6d05712d6c
Parents: f4af798
Author: Jonathan Hurley <jhur...@hortonworks.com>
Authored: Thu Nov 6 12:24:38 2014 -0500
Committer: Jonathan Hurley <jhur...@hortonworks.com>
Committed: Fri Nov 7 16:12:17 2014 -0500

----------------------------------------------------------------------
 .../server/api/services/AmbariMetaInfo.java     |   6 -
 .../AlertDefinitionResourceProvider.java        |   1 +
 .../internal/AlertGroupResourceProvider.java    |   7 +
 .../BIGTOP/0.8/services/FLUME/alerts.json       |  17 +
 .../package/files/alert_flume_agent_status.py   |  99 ++++
 .../BIGTOP/0.8/services/GANGLIA/alerts.json     | 107 +++++
 .../BIGTOP/0.8/services/HBASE/alerts.json       | 109 +++++
 .../stacks/BIGTOP/0.8/services/HDFS/alerts.json | 480 +++++++++++++++++++
 .../HDFS/package/files/alert_checkpoint_time.py | 136 ++++++
 .../package/files/alert_ha_namenode_health.py   | 166 +++++++
 .../stacks/BIGTOP/0.8/services/HIVE/alerts.json |  39 ++
 .../package/files/alert_hive_thrift_port.py     |  89 ++++
 .../BIGTOP/0.8/services/OOZIE/alerts.json       |  40 ++
 .../package/files/alert_check_oozie_server.py   |  74 +++
 .../BIGTOP/0.8/services/WEBHCAT/alerts.json     |  18 +
 .../package/files/alert_webhcat_server.py       | 111 +++++
 .../stacks/BIGTOP/0.8/services/YARN/alerts.json | 321 +++++++++++++
 .../package/files/alert_nodemanager_health.py   | 123 +++++
 .../BIGTOP/0.8/services/ZOOKEEPER/alerts.json   |  51 ++
 .../AlertDefinitionResourceProviderTest.java    |   5 +
 .../AlertGroupResourceProviderTest.java         |  31 ++
 21 files changed, 2024 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/java/org/apache/ambari/server/api/services/AmbariMetaInfo.java
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/java/org/apache/ambari/server/api/services/AmbariMetaInfo.java
 
b/ambari-server/src/main/java/org/apache/ambari/server/api/services/AmbariMetaInfo.java
index 3d67fe9..bb4c569 100644
--- 
a/ambari-server/src/main/java/org/apache/ambari/server/api/services/AmbariMetaInfo.java
+++ 
b/ambari-server/src/main/java/org/apache/ambari/server/api/services/AmbariMetaInfo.java
@@ -1275,12 +1275,6 @@ public class AmbariMetaInfo {
         stackDefinitions.addAll(serviceDefinitions);
       }
 
-      // if there are no alert definitions defined for the cluster services
-      // then don't do anything and go to the next cluster
-      if (null == stackDefinitions || stackDefinitions.size() == 0) {
-        continue;
-      }
-
       List<AlertDefinitionEntity> persist = new 
ArrayList<AlertDefinitionEntity>();
       List<AlertDefinitionEntity> entities = 
alertDefinitionDao.findAll(clusterId);
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProvider.java
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProvider.java
 
b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProvider.java
index a8a7f67..f66fc1d 100644
--- 
a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProvider.java
+++ 
b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProvider.java
@@ -616,6 +616,7 @@ public class AlertDefinitionResourceProvider extends 
AbstractControllerResourceP
     setResourceProperty(resource, ALERT_DEF_SERVICE_NAME, 
entity.getServiceName(), requestedIds);
     setResourceProperty(resource, ALERT_DEF_COMPONENT_NAME, 
entity.getComponentName(), requestedIds);
     setResourceProperty(resource, ALERT_DEF_ENABLED, 
Boolean.valueOf(entity.getEnabled()), requestedIds);
+    setResourceProperty(resource, ALERT_DEF_IGNORE_HOST, 
Boolean.valueOf(entity.isHostIgnored()), requestedIds);
     setResourceProperty(resource, ALERT_DEF_SCOPE, entity.getScope(), 
requestedIds);
 
     boolean sourceTypeRequested = setResourceProperty(resource,

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AlertGroupResourceProvider.java
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AlertGroupResourceProvider.java
 
b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AlertGroupResourceProvider.java
index 50820a7..c93ef29 100644
--- 
a/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AlertGroupResourceProvider.java
+++ 
b/ambari-server/src/main/java/org/apache/ambari/server/controller/internal/AlertGroupResourceProvider.java
@@ -223,6 +223,13 @@ public class AlertGroupResourceProvider extends
       LOG.info("Deleting alert target {}", groupId);
 
       final AlertGroupEntity entity = s_dao.findGroupById(groupId.longValue());
+      if (entity.isDefault()) {
+        // default groups cannot be removed
+        LOG.warn("The default alert group for {} cannot be removed",
+            entity.getServiceName());
+
+        continue;
+      }
 
       modifyResources(new Command<Void>() {
         @Override

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/alerts.json
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/alerts.json 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/alerts.json
new file mode 100644
index 0000000..86fb854
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/alerts.json
@@ -0,0 +1,17 @@
+{
+  "FLUME": {
+    "service": [],
+    "FLUME_HANDLER": [
+      {
+        "name": "flume_agent_status",
+        "label": "Flume Agent Status",
+        "interval": 1,
+        "scope": "ANY",
+        "source": {
+          "type": "SCRIPT",
+          "path": 
"BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py"
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py
 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py
new file mode 100644
index 0000000..b183bbc
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/FLUME/package/files/alert_flume_agent_status.py
@@ -0,0 +1,99 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import socket
+
+from resource_management.libraries.functions.flume_agent_helper import 
find_expected_agent_names
+from resource_management.libraries.functions.flume_agent_helper import 
get_flume_status
+
+RESULT_CODE_OK = 'OK'
+RESULT_CODE_CRITICAL = 'CRITICAL'
+RESULT_CODE_UNKNOWN = 'UNKNOWN'
+
+FLUME_CONF_DIR_KEY = '{{flume-env/flume_conf_dir}}'
+
+FLUME_RUN_DIR = '/var/run/flume'
+
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used
+  to build the dictionary passed into execute
+  """
+  return (FLUME_CONF_DIR_KEY,)
+  
+
+def execute(parameters=None, host_name=None):
+  """
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  parameters (dictionary): a mapping of parameter key to value
+  host_name (string): the name of this host where the alert is running
+  """
+
+  if parameters is None:
+    return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the 
script.'])
+
+  flume_conf_directory = None
+  if FLUME_CONF_DIR_KEY in parameters:
+    flume_conf_directory = parameters[FLUME_CONF_DIR_KEY]
+
+  if flume_conf_directory is None:
+    return (RESULT_CODE_UNKNOWN, ['The Flume configuration directory is a 
required parameter.'])
+
+  if host_name is None:
+    host_name = socket.getfqdn()
+
+  processes = get_flume_status(flume_conf_directory, FLUME_RUN_DIR)
+  expected_agents = find_expected_agent_names(flume_conf_directory)
+
+  alert_label = ''
+  alert_state = RESULT_CODE_OK
+
+  if len(processes) == 0 and len(expected_agents) == 0:
+    alert_label = 'No agents defined on {0}'.format(host_name)
+  else:
+    ok = []
+    critical = []
+    text_arr = []
+
+    for process in processes:
+      if not process.has_key('status') or process['status'] == 'NOT_RUNNING':
+        critical.append(process['name'])
+      else:
+        ok.append(process['name'])
+
+    if len(critical) > 0:
+      text_arr.append("{0} {1} NOT running".format(", ".join(critical),
+        "is" if len(critical) == 1 else "are"))
+
+    if len(ok) > 0:
+      text_arr.append("{0} {1} running".format(", ".join(ok),
+        "is" if len(ok) == 1 else "are"))
+
+    plural = len(critical) > 1 or len(ok) > 1
+    alert_label = "Agent{0} {1} {2}".format(
+      "s" if plural else "",
+      " and ".join(text_arr),
+      "on " + host_name)
+
+    alert_state = RESULT_CODE_CRITICAL if len(critical) > 0 else RESULT_CODE_OK
+
+  return (alert_state, [alert_label])
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/GANGLIA/alerts.json
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/GANGLIA/alerts.json
 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/GANGLIA/alerts.json
new file mode 100644
index 0000000..05053a3
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/GANGLIA/alerts.json
@@ -0,0 +1,107 @@
+{
+  "GANGLIA": {
+    "service": [],
+    "GANGLIA_SERVER": [
+      {
+        "name": "ganglia_server_process",
+        "label": "Ganglia Server Process",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "8651",
+          "default_port": 8651,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }        
+        }
+      },
+      {
+        "name": "ganglia_monitor_hdfs_namenode",
+        "label": "Ganglia NameNode Process Monitor",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "8661",
+          "default_port": 8661,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }        
+        }
+      },
+      {
+        "name": "ganglia_monitor_hbase_master",
+        "label": "Ganglia HBase Master Process Monitor",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "8663",
+          "default_port": 8663,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }        
+        }
+      },
+      {
+        "name": "ganglia_monitor_yarn_resourcemanager",
+        "label": "Ganglia ResourceManager Process Monitor",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "8664",
+          "default_port": 8664,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }        
+        }
+      },
+      {
+        "name": "ganglia_monitor_mapreduce_history_server",
+        "label": "Ganglia History Server Process Monitor",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "8666",
+          "default_port": 8666,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }        
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HBASE/alerts.json
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HBASE/alerts.json 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HBASE/alerts.json
new file mode 100644
index 0000000..b8b8cab
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HBASE/alerts.json
@@ -0,0 +1,109 @@
+{
+  "HBASE": {
+    "service": [
+      {
+        "name": "hbase_regionserver_process_percent",
+        "label": "Percent RegionServers Available",
+        "interval": 1,
+        "scope": "SERVICE",
+        "enabled": true,
+        "source": {
+          "type": "AGGREGATE",
+          "alert_name": "hbase_regionserver_process",
+          "reporting": {
+            "ok": {
+              "text": "affected: [{1}], total: [{0}]"
+            },
+            "warning": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.1
+            },
+            "critical": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.3
+            }
+          }
+        }
+      }    
+    ],
+    "HBASE_MASTER": [
+      {
+        "name": "hbase_master_process",
+        "label": "HBase Master Process",
+        "interval": 1,
+        "scope": "ANY",
+        "source": {
+          "type": "PORT",
+          "uri": "{{hbase-site/hbase.master.port}}",
+          "default_port": 60000,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }        
+        }
+      },
+      {
+        "name": "hbase_master_cpu",
+        "label": "HBase Maser CPU Utilization",
+        "interval": 5,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "METRIC",
+          "uri": {
+            "http": "{{hbase-site/hbase.master.info.port}}",
+            "https": "{{hbase-site/hbase.master.info.port}}",
+            "https_property": "{{cluster-env/security_enabled}}",
+            "https_property_value": "true",
+            "default_port": 60010
+          },
+          "reporting": {
+            "ok": {
+              "text": "{1} CPU, load {0:.1%}"
+            },
+            "warning": {
+              "text": "{1} CPU, load {0:.1%}",
+              "value": 200
+            },
+            "critical": {
+              "text": "{1} CPU, load {0:.1%}",
+              "value": 250
+            }
+          },
+          "jmx": {
+            "property_list": [
+              "java.lang:type=OperatingSystem/SystemCpuLoad",
+              "java.lang:type=OperatingSystem/AvailableProcessors"
+            ],
+            "value": "{0} * 100"
+          }
+        }
+      }
+    ],
+    "HBASE_REGIONSERVER": [
+      {
+        "name": "hbase_regionserver_process",
+        "label": "HBase RegionServer Process",
+        "interval": 1,
+        "scope": "HOST",
+        "source": {
+          "type": "PORT",
+          "uri": "{{hbase-site/hbase.regionserver.info.port}}",
+          "default_port": 60030,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }        
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/alerts.json
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/alerts.json 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/alerts.json
new file mode 100644
index 0000000..96cb931
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/alerts.json
@@ -0,0 +1,480 @@
+{
+  "HDFS":{
+    "service": [
+      {
+        "name": "datanode_process_percent",
+        "label": "Percent DataNodes Available",
+        "interval": 1,
+        "scope": "SERVICE",
+        "enabled": true,
+        "source": {
+          "type": "AGGREGATE",
+          "alert_name": "datanode_process",
+          "reporting": {
+            "ok": {
+              "text": "affected: [{1}], total: [{0}]"
+            },
+            "warning": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.1
+            },
+            "critical": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.3
+            }
+          }
+        }
+      },
+      {
+        "name": "datanode_storage_percent",
+        "label": "Percent DataNodes With Available Space",
+        "interval": 1,
+        "scope": "SERVICE",
+        "enabled": true,
+        "source": {
+          "type": "AGGREGATE",
+          "alert_name": "datanode_storage",
+          "reporting": {
+            "ok": {
+              "text": "affected: [{1}], total: [{0}]"
+            },
+            "warning": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.1
+            },
+            "critical": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.3
+            }
+          }
+        }
+      },
+      {
+        "name": "journalnode_process_percent",
+        "label": "Percent JournalNodes Available",
+        "interval": 1,
+        "scope": "SERVICE",
+        "enabled": true,
+        "source": {
+          "type": "AGGREGATE",
+          "alert_name": "journalnode_process",
+          "reporting": {
+            "ok": {
+              "text": "affected: [{1}], total: [{0}]"
+            },
+            "warning": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.33
+            },
+            "critical": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.50
+            }
+          }
+        }
+      }
+    ],
+    "NAMENODE": [
+      {
+        "name": "namenode_webui",
+        "label": "NameNode Web UI",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "WEB",
+          "uri": {
+            "http": "{{hdfs-site/dfs.namenode.http-address}}",
+            "https": "{{hdfs-site/dfs.namenode.https-address}}",
+            "https_property": "{{hdfs-site/dfs.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "HTTP {0} response in {2:.4f} seconds"
+            },
+            "warning":{
+              "text": "HTTP {0} response in {2:.4f} seconds"
+            },
+            "critical": {
+              "text": "Connection failed to {1}"
+            }
+          }
+        }
+      },
+      {
+        "name": "namenode_cpu",
+        "label": "NameNode Host CPU Utilization",
+        "interval": 5,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "METRIC",
+          "uri": {
+            "http": "{{hdfs-site/dfs.namenode.http-address}}",
+            "https": "{{hdfs-site/dfs.namenode.https-address}}",
+            "https_property": "{{hdfs-site/dfs.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "{1} CPU, load {0:.1%}"
+            },
+            "warning": {
+              "text": "{1} CPU, load {0:.1%}",
+              "value": 200
+            },
+            "critical": {
+              "text": "{1} CPU, load {0:.1%}",
+              "value": 250
+            }
+          },
+          "jmx": {
+            "property_list": [
+              "java.lang:type=OperatingSystem/SystemCpuLoad",
+              "java.lang:type=OperatingSystem/AvailableProcessors"
+            ],
+            "value": "{0} * 100"
+          }
+        }
+      },
+      {
+        "name": "namenode_hdfs_blocks_health",
+        "label": "NameNode Blocks Health",
+        "interval": 2,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "METRIC",
+          "uri": {
+            "http": "{{hdfs-site/dfs.namenode.http-address}}",
+            "https": "{{hdfs-site/dfs.namenode.https-address}}",
+            "https_property": "{{hdfs-site/dfs.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "Total Blocks:[{1}], Missing Blocks:[{0}]"
+            },
+            "warning": {
+              "text": "Total Blocks:[{1}], Missing Blocks:[{0}]",
+              "value": 1
+            },          
+            "critical": {
+              "text": "Total Blocks:[{1}], Missing Blocks:[{0}]",
+              "value": 1
+            }
+          },
+          "jmx": {
+            "property_list": [
+              "Hadoop:service=NameNode,name=FSNamesystem/MissingBlocks",
+              "Hadoop:service=NameNode,name=FSNamesystem/BlocksTotal"
+            ],
+            "value": "{0}"
+          }
+        }
+      },
+      {
+        "name": "namenode_hdfs_capacity_utilization",
+        "label": "HDFS Capacity Utilization",
+        "interval": 2,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "METRIC",
+          "uri": {
+            "http": "{{hdfs-site/dfs.namenode.http-address}}",
+            "https": "{{hdfs-site/dfs.namenode.https-address}}",
+            "https_property": "{{hdfs-site/dfs.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "Capacity Used:[{2:d}%, {0}], Capacity Remaining:[{1}]"
+            },
+            "warning": {
+              "text": "Capacity Used:[{2:d}%, {0}], Capacity Remaining:[{1}]",
+              "value": 80
+            },          
+            "critical": {
+              "text": "Capacity Used:[{2:d}%, {0}], Capacity Remaining:[{1}]",
+              "value": 90
+            }
+          },
+          "jmx": {
+            "property_list": [
+              "Hadoop:service=NameNode,name=FSNamesystemState/CapacityUsed",
+              
"Hadoop:service=NameNode,name=FSNamesystemState/CapacityRemaining"
+            ],
+            "value": "{0}/({0} + {1}) * 100"
+          }
+        }
+      },
+      {
+        "name": "namenode_rpc_latency",
+        "label": "NameNode RPC Latency",
+        "interval": 2,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "METRIC",
+          "uri": {
+            "http": "{{hdfs-site/dfs.namenode.http-address}}",
+            "https": "{{hdfs-site/dfs.namenode.https-address}}",
+            "https_property": "{{hdfs-site/dfs.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]"
+            },
+            "warning": {
+              "text": "Average Queue Time:[{0}], Average Processing 
Time:[{1}]",
+              "value": 3000
+            },          
+            "critical": {
+              "text": "Average Queue Time:[{0}], Average Processing 
Time:[{1}]",
+              "value": 5000
+            }
+          },
+          "jmx": {
+            "property_list": [
+              
"Hadoop:service=NameNode,name=RpcActivityForPort*/RpcQueueTimeAvgTime",
+              
"Hadoop:service=NameNode,name=RpcActivityForPort*/RpcProcessingTimeAvgTime"
+            ],
+            "value": "{0}"
+          }
+        }
+      },
+      {
+        "name": "namenode_directory_status",
+        "label": "NameNode Directory Status",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "METRIC",
+          "uri": {
+            "http": "{{hdfs-site/dfs.namenode.http-address}}",
+            "https": "{{hdfs-site/dfs.namenode.https-address}}",
+            "https_property": "{{hdfs-site/dfs.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "Directories are healthy"
+            },
+            "warning": {
+              "text": "Failed directory count: {1}",
+              "value": 1
+            },          
+            "critical": {
+              "text": "Failed directory count: {1}",
+              "value": 1
+            }
+          },
+          "jmx": {
+            "property_list": [
+              "Hadoop:service=NameNode,name=NameNodeInfo/NameDirStatuses"
+            ],
+            "value": "calculate(args)\ndef calculate(args):\n  import json\n  
json_statuses = json.loads({0})\n  return len(json_statuses['failed']) if 
'failed' in json_statuses else 0"
+          }
+        }
+      },
+      {
+        "name": "namenode_process",
+        "label": "NameNode Process",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "{{hdfs-site/dfs.namenode.http-address}}",
+          "default_port": 50070,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }        
+        }
+      },
+      {
+        "name": "namenode_last_checkpoint",
+        "label": "NameNode Last Checkpoint",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "SCRIPT",
+          "path": 
"BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py"
+        }
+      },
+      {
+        "name": "namenode_ha_health",
+        "label": "NameNode High Availability Health",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "ignore_host": true,
+        "source": {
+          "type": "SCRIPT",
+          "path": 
"BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py"
+        }
+      }
+    ],
+    "SECONDARY_NAMENODE": [
+      {
+        "name": "secondary_namenode_process",
+        "label": "Secondary NameNode Process",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "{{hdfs-site/dfs.namenode.secondary.http-address}}",
+          "default_port": 50071,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }        
+        }
+      }
+    ],
+    "JOURNALNODE": [
+      {
+        "name": "journalnode_process",
+        "label": "JournalNode Process",
+        "interval": 1,
+        "scope": "HOST",
+        "enabled": true,
+        "source": {
+          "type": "PORT",        
+          "uri": "{{hdfs-site/dfs.journalnode.http-address}}",
+          "default_port": 8480,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }        
+        }
+      }
+    ],      
+    "DATANODE": [
+      {
+        "name": "datanode_process",
+        "label": "DateNode Process",
+        "interval": 1,
+        "scope": "HOST",
+        "enabled": true,
+        "source": {
+          "type": "PORT",        
+          "uri": "{{hdfs-site/dfs.datanode.address}}",
+          "default_port": 50010,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }        
+        }
+      },
+      {
+        "name": "datanode_webui",
+        "label": "DataNode Web UI",
+        "interval": 1,
+        "scope": "HOST",
+        "enabled": true,
+        "source": {
+          "type": "WEB",
+          "uri": {
+            "http": "{{hdfs-site/dfs.datanode.http.address}}",
+            "https": "{{hdfs-site/dfs.datanode.https.address}}",
+            "https_property": "{{hdfs-site/dfs.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "HTTP {0} response in {2:.4f} seconds"
+            },
+            "warning":{
+              "text": "HTTP {0} response in {2:.4f} seconds"
+            },
+            "critical": {
+              "text": "Connection failed to {1}"
+            }
+          }
+        }
+      },    
+      {
+        "name": "datanode_storage",
+        "label": "DataNode Storage",
+        "interval": 2,
+        "scope": "HOST",
+        "enabled": true,
+        "source": {
+          "type": "METRIC",
+          "uri": {
+            "http": "{{hdfs-site/dfs.datanode.http.address}}",
+            "https": "{{hdfs-site/dfs.datanode.https.address}}",
+            "https_property": "{{hdfs-site/dfs.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, 
{1}]"
+            },
+            "warning": {
+              "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, 
{1}]",
+              "value": 80
+            },
+            "critical": {
+              "text": "Remaining Capacity:[{0}], Total Capacity:[{2:d}% Used, 
{1}]",
+              "value": 90
+            }
+          },
+          "jmx": {
+            "property_list": [
+              "Hadoop:service=DataNode,name=FSDatasetState-*/Remaining",
+              "Hadoop:service=DataNode,name=FSDatasetState-*/Capacity"
+            ],
+            "value": "({1} - {0})/{1} * 100"
+          }
+        }
+      }    
+    ],
+    "ZKFC": [
+      {
+        "name": "hdfs_zookeeper_failover_controller_process",
+        "label": "ZooKeeper Failover Controller Process",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",        
+          "uri": "{{core-site/ha.zookeeper.quorum}}",
+          "default_port": 2181,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} on host {1}:{2}"
+            }
+          }
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
new file mode 100644
index 0000000..410608f
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_checkpoint_time.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import time
+import urllib2
+import json
+
+LABEL = 'Last Checkpoint: [{h} hours, {m} minutes, {tx} transactions]'
+
+NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
+NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
+NN_HTTP_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
+NN_CHECKPOINT_TX_KEY = '{{hdfs-site/dfs.namenode.checkpoint.txns}}'
+NN_CHECKPOINT_PERIOD_KEY = '{{hdfs-site/dfs.namenode.checkpoint.period}}'
+
+PERCENT_WARNING = 200
+PERCENT_CRITICAL = 200
+
+CHECKPOINT_TX_DEFAULT = 1000000
+CHECKPOINT_PERIOD_DEFAULT = 21600
+
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used
+  to build the dictionary passed into execute
+  """
+  return (NN_HTTP_ADDRESS_KEY, NN_HTTPS_ADDRESS_KEY, NN_HTTP_POLICY_KEY, 
+      NN_CHECKPOINT_TX_KEY, NN_CHECKPOINT_PERIOD_KEY)      
+  
+
+def execute(parameters=None, host_name=None):
+  """
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  parameters (dictionary): a mapping of parameter key to value
+  host_name (string): the name of this host where the alert is running
+  """
+
+  if parameters is None:
+    return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+  
+  uri = None
+  scheme = 'http'  
+  http_uri = None
+  https_uri = None
+  http_policy = 'HTTP_ONLY'
+  percent_warning = PERCENT_WARNING
+  percent_critical = PERCENT_CRITICAL
+  checkpoint_tx = CHECKPOINT_TX_DEFAULT
+  checkpoint_period = CHECKPOINT_PERIOD_DEFAULT
+  
+  if NN_HTTP_ADDRESS_KEY in parameters:
+    http_uri = parameters[NN_HTTP_ADDRESS_KEY]
+
+  if NN_HTTPS_ADDRESS_KEY in parameters:
+    https_uri = parameters[NN_HTTPS_ADDRESS_KEY]
+
+  if NN_HTTP_POLICY_KEY in parameters:
+    http_policy = parameters[NN_HTTP_POLICY_KEY]
+
+  if NN_CHECKPOINT_TX_KEY in parameters:
+    checkpoint_tx = parameters[NN_CHECKPOINT_TX_KEY]
+
+  if NN_CHECKPOINT_PERIOD_KEY in parameters:
+    checkpoint_period = parameters[NN_CHECKPOINT_PERIOD_KEY]
+    
+  # determine the right URI and whether to use SSL
+  uri = http_uri
+  if http_policy == 'HTTPS_ONLY':
+    scheme = 'https'
+    
+    if https_uri is not None:
+      uri = https_uri 
+  
+  current_time = int(round(time.time() * 1000))
+
+  last_checkpoint_time_qry = 
"{0}://{1}/jmx?qry=Hadoop:service=NameNode,name=FSNamesystem".format(scheme,uri)
+  journal_transaction_info_qry = 
"{0}://{1}/jmx?qry=Hadoop:service=NameNode,name=NameNodeInfo".format(scheme,uri)
+
+  # start out assuming an OK status
+  label = None
+  result_code = "OK"
+
+  try:
+    last_checkpoint_time = 
int(get_value_from_jmx(last_checkpoint_time_qry,"LastCheckpointTime"))
+    journal_transaction_info = 
get_value_from_jmx(journal_transaction_info_qry,"JournalTransactionInfo")
+    journal_transaction_info_dict = json.loads(journal_transaction_info)
+  
+    last_tx = int(journal_transaction_info_dict['LastAppliedOrWrittenTxId'])
+    most_recent_tx = 
int(journal_transaction_info_dict['MostRecentCheckpointTxId'])
+    transaction_difference = last_tx - most_recent_tx
+    
+    delta = (current_time - last_checkpoint_time)/1000
+
+    label = LABEL.format(h=get_time(delta)['h'], m=get_time(delta)['m'], 
tx=transaction_difference)
+    
+    if (transaction_difference > int(checkpoint_tx)) and (float(delta) / 
int(checkpoint_period)*100 >= int(percent_critical)):
+      result_code = 'CRITICAL'
+    elif (transaction_difference > int(checkpoint_tx)) and (float(delta) / 
int(checkpoint_period)*100 >= int(percent_warning)):
+      result_code = 'WARNING'
+
+  except Exception, e:
+    label = str(e)
+    result_code = 'UNKNOWN'
+        
+  return ((result_code, [label]))
+
+def get_time(delta):
+  h = int(delta/3600)
+  m = int((delta % 3600)/60)
+  return {'h':h, 'm':m}
+
+
+def get_value_from_jmx(qry, property):
+  response = urllib2.urlopen(qry)
+  data=response.read()
+  data_dict = json.loads(data)
+  return data_dict["beans"][0][property]

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
new file mode 100644
index 0000000..fc1541d
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HDFS/package/files/alert_ha_namenode_health.py
@@ -0,0 +1,166 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import urllib2
+import json
+
+RESULT_STATE_OK = 'OK'
+RESULT_STATE_CRITICAL = 'CRITICAL'
+RESULT_STATE_UNKNOWN = 'UNKNOWN'
+RESULT_STATE_SKIPPED = 'SKIPPED'
+
+HDFS_NN_STATE_ACTIVE = 'active'
+HDFS_NN_STATE_STANDBY = 'standby'
+
+HDFS_SITE_KEY = '{{hdfs-site}}'
+NAMESERVICE_KEY = '{{hdfs-site/dfs.nameservices}}'
+NN_HTTP_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.http-address}}'
+NN_HTTPS_ADDRESS_KEY = '{{hdfs-site/dfs.namenode.https-address}}'
+DFS_POLICY_KEY = '{{hdfs-site/dfs.http.policy}}'
+
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used
+  to build the dictionary passed into execute
+  """
+  return (HDFS_SITE_KEY, NAMESERVICE_KEY, NN_HTTP_ADDRESS_KEY,
+  NN_HTTPS_ADDRESS_KEY, DFS_POLICY_KEY)
+  
+
+def execute(parameters=None, host_name=None):
+  """
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  parameters (dictionary): a mapping of parameter key to value
+  host_name (string): the name of this host where the alert is running
+  """
+  if parameters is None:
+    return (RESULT_STATE_UNKNOWN, ['There were no parameters supplied to the 
script.'])
+
+  # if not in HA mode, then SKIP
+  if not NAMESERVICE_KEY in parameters:
+    return (RESULT_STATE_SKIPPED, ['NameNode HA is not enabled'])
+
+  # hdfs-site is required
+  if not HDFS_SITE_KEY in parameters:
+    return (RESULT_STATE_UNKNOWN, ['{0} is a required parameter for the 
script'.format(HDFS_SITE_KEY)])
+
+  # determine whether or not SSL is enabled
+  is_ssl_enabled = False
+  if DFS_POLICY_KEY in parameters:
+    dfs_policy = parameters[DFS_POLICY_KEY]
+    if dfs_policy == "HTTPS_ONLY":
+      is_ssl_enabled = True
+
+  name_service = parameters[NAMESERVICE_KEY]
+  hdfs_site = parameters[HDFS_SITE_KEY]
+
+  # look for dfs.ha.namenodes.foo
+  nn_unique_ids_key = 'dfs.ha.namenodes.' + name_service
+  if not nn_unique_ids_key in hdfs_site:
+    return (RESULT_STATE_UNKNOWN, ['Unable to find unique namenode alias key 
{0}'.format(nn_unique_ids_key)])
+
+  namenode_http_fragment = 'dfs.namenode.http-address.{0}.{1}'
+  jmx_uri_fragment = 
"http://{0}/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus";
+
+  if is_ssl_enabled:
+    namenode_http_fragment = 'dfs.namenode.https-address.{0}.{1}'
+    jmx_uri_fragment = 
"https://{0}/jmx?qry=Hadoop:service=NameNode,name=NameNodeStatus";
+
+
+  active_namenodes = []
+  standby_namenodes = []
+  unknown_namenodes = []
+
+  # now we have something like 'nn1,nn2,nn3,nn4'
+  # turn it into dfs.namenode.[property].[dfs.nameservices].[nn_unique_id]
+  # ie dfs.namenode.http-address.hacluster.nn1
+  nn_unique_ids = hdfs_site[nn_unique_ids_key].split(',')
+  for nn_unique_id in nn_unique_ids:
+    key = namenode_http_fragment.format(name_service,nn_unique_id)
+
+    if key in hdfs_site:
+      # use str() to ensure that unicode strings do not have the u' in them
+      value = str(hdfs_site[key])
+
+      try:
+        jmx_uri = jmx_uri_fragment.format(value)
+        state = get_value_from_jmx(jmx_uri,'State')
+
+        if state == HDFS_NN_STATE_ACTIVE:
+          active_namenodes.append(value)
+        elif state == HDFS_NN_STATE_STANDBY:
+          standby_namenodes.append(value)
+        else:
+          unknown_namenodes.append(value)
+      except:
+        unknown_namenodes.append(value)
+
+  # now that the request is done, determine if this host is the host that
+  # should report the status of the HA topology
+  is_active_namenode = False
+  for active_namenode in active_namenodes:
+    if active_namenode.startswith(host_name):
+      is_active_namenode = True
+
+  # there's only one scenario here; there is exactly 1 active and 1 standby
+  is_topology_healthy = len(active_namenodes) == 1 and len(standby_namenodes) 
== 1
+
+  result_label = 'Active{0}, Standby{1}, 
Unknown{2}'.format(str(active_namenodes),
+    str(standby_namenodes), str(unknown_namenodes))
+
+  # Healthy Topology:
+  #   - Active NN reports the alert, standby does not
+  #
+  # Unhealthy Topology:
+  #   - Report the alert if this is the first named host
+  #   - Report the alert if not the first named host, but the other host
+  #   could not report its status
+  if is_topology_healthy:
+    if is_active_namenode is True:
+      return (RESULT_STATE_OK, [result_label])
+    else:
+      return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
+  else:
+    # dfs.namenode.rpc-address.service.alias is guaranteed in HA mode
+    first_listed_host_key = 'dfs.namenode.rpc-address.{0}.{1}'.format(
+      name_service, nn_unique_ids[0])
+
+    first_listed_host = ''
+    if first_listed_host_key in hdfs_site:
+      first_listed_host = hdfs_site[first_listed_host_key]
+
+    is_first_listed_host = False
+    if first_listed_host.startswith(host_name):
+      is_first_listed_host = True
+
+    if is_first_listed_host:
+      return (RESULT_STATE_CRITICAL, [result_label])
+    else:
+      # not the first listed host, but the first host might be in the unknown
+      return (RESULT_STATE_SKIPPED, ['Another host will report this alert'])
+
+
+def get_value_from_jmx(qry, property):
+  response = urllib2.urlopen(qry)
+  data=response.read()
+  data_dict = json.loads(data)
+  return data_dict["beans"][0][property]

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/alerts.json
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/alerts.json 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/alerts.json
new file mode 100644
index 0000000..3c279a8
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/alerts.json
@@ -0,0 +1,39 @@
+{
+  "HIVE": {
+    "service": [],
+    "HIVE_METASTORE": [
+      {
+        "name": "hive_metastore_process",
+        "label": "Hive Metastore Process",
+        "interval": 1,
+        "scope": "ANY",
+        "source": {
+          "type": "PORT",
+          "uri": "{{hive-site/hive.metastore.uris}}",
+          "default_port": 9083,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }        
+        }
+      }
+    ],
+    "HIVE_SERVER": [
+      {
+        "name": "hive_server_process",
+        "label": "HiveServer2 Process",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "SCRIPT",
+          "path": 
"BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py"
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py
 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py
new file mode 100644
index 0000000..bd3f276
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/HIVE/package/files/alert_hive_thrift_port.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import json
+import socket
+import time
+import traceback
+import urllib2
+from resource_management.libraries.functions import hive_check 
+
+OK_MESSAGE = "TCP OK - %.4f response on port %s"
+CRITICAL_MESSAGE = "Connection failed on host {0}:{1}"
+
+HIVE_SERVER_THRIFT_PORT_KEY = '{{hive-site/hive.server2.thrift.port}}'
+SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}'
+
+PERCENT_WARNING = 200
+PERCENT_CRITICAL = 200
+
+THRIFT_PORT_DEFAULT = 10000
+
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used
+  to build the dictionary passed into execute
+  """
+  return (HIVE_SERVER_THRIFT_PORT_KEY,SECURITY_ENABLED_KEY)      
+  
+
+def execute(parameters=None, host_name=None):
+  """
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  parameters (dictionary): a mapping of parameter key to value
+  host_name (string): the name of this host where the alert is running
+  """
+
+  if parameters is None:
+    return (('UNKNOWN', ['There were no parameters supplied to the script.']))
+
+  thrift_port = THRIFT_PORT_DEFAULT
+  if HIVE_SERVER_THRIFT_PORT_KEY in parameters:
+    thrift_port = int(parameters[HIVE_SERVER_THRIFT_PORT_KEY])  
+
+  security_enabled = False
+  if SECURITY_ENABLED_KEY in parameters:
+    security_enabled = bool(parameters[SECURITY_ENABLED_KEY])  
+
+  result_code = None
+
+  try:
+    if host_name is None:
+      host_name = socket.getfqdn()
+
+    start_time = time.time()
+    is_thrift_port_ok = hive_check.check_thrift_port_sasl(host_name,
+        thrift_port, security_enabled=security_enabled)
+     
+    if is_thrift_port_ok == True:
+      result_code = 'OK'
+      total_time = time.time() - start_time
+      label = OK_MESSAGE % (total_time, thrift_port)
+    else:
+      result_code = 'CRITICAL'
+      label = CRITICAL_MESSAGE.format(host_name,thrift_port)
+
+  except Exception, e:
+    label = str(e)
+    result_code = 'UNKNOWN'
+        
+  return ((result_code, [label]))
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/alerts.json
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/alerts.json 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/alerts.json
new file mode 100644
index 0000000..478f887
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/alerts.json
@@ -0,0 +1,40 @@
+{
+  "OOZIE": {
+    "service": [],
+    "OOZIE_SERVER": [
+      {
+        "name": "oozie_server_webui",
+        "label": "Oozie Server Web UI",
+        "interval": 1,
+        "scope": "ANY",
+        "source": {
+          "type": "WEB",
+          "uri": {
+            "http": "{{oozie-site/oozie.base.url}}/oozie"
+          },
+          "reporting": {
+            "ok": {
+              "text": "HTTP {0} response in {2:.4f} seconds"
+            },
+            "warning":{
+              "text": "HTTP {0} response in {2:.4f} seconds"
+            },
+            "critical": {
+              "text": "Connection failed to {1}"
+            }
+          }
+        }
+      },
+      {
+        "name": "oozie_server_status",
+        "label": "Oozie Server Status",
+        "interval": 1,
+        "scope": "ANY",
+        "source": {
+          "type": "SCRIPT",
+          "path": 
"BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py"
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py
 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py
new file mode 100644
index 0000000..7bf1255
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/OOZIE/package/files/alert_check_oozie_server.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import subprocess
+from subprocess import CalledProcessError
+
+RESULT_CODE_OK = 'OK'
+RESULT_CODE_CRITICAL = 'CRITICAL'
+RESULT_CODE_UNKNOWN = 'UNKNOWN'
+
+OOZIE_URL_KEY = '{{oozie-site/oozie.base.url}}'
+
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used
+  to build the dictionary passed into execute
+  """
+  return (OOZIE_URL_KEY)
+  
+
+def execute(parameters=None, host_name=None):
+  """
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  parameters (dictionary): a mapping of parameter key to value
+  host_name (string): the name of this host where the alert is running
+  """
+
+  if parameters is None:
+    return (RESULT_CODE_UNKNOWN, ['There were no parameters supplied to the 
script.'])
+
+  oozie_url = None
+  if OOZIE_URL_KEY in parameters:
+    oozie_url = parameters[OOZIE_URL_KEY]
+
+  if oozie_url is None:
+    return (RESULT_CODE_UNKNOWN, ['The Oozie URL is a required parameter.'])
+
+  try:
+    # oozie admin -oozie http://server:11000/oozie -status
+    oozie_process = subprocess.Popen(['oozie', 'admin', '-oozie',
+      oozie_url, '-status'], stderr=subprocess.PIPE, stdout=subprocess.PIPE)
+
+    oozie_output, oozie_error = oozie_process.communicate()
+    oozie_return_code = oozie_process.returncode
+
+    if oozie_return_code == 0:
+      # strip trailing newlines
+      oozie_output = str(oozie_output).strip('\n')
+      return (RESULT_CODE_OK, [oozie_output])
+    else:
+      oozie_error = str(oozie_error).strip('\n')
+      return (RESULT_CODE_CRITICAL, [oozie_error])
+
+  except CalledProcessError, cpe:
+    return (RESULT_CODE_CRITICAL, [str(cpe)])

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/alerts.json
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/alerts.json
 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/alerts.json
new file mode 100644
index 0000000..88b5e3b
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/alerts.json
@@ -0,0 +1,18 @@
+{
+  "WEBHCAT": {
+    "service": [],
+    "WEBHCAT_SERVER": [
+      {
+        "name": "hive_webhcat_server_status",
+        "label": "WebHCat Server Status",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "SCRIPT",
+          "path": 
"BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py"
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
new file mode 100644
index 0000000..44840de
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/WEBHCAT/package/files/alert_webhcat_server.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import json
+import socket
+import time
+import urllib2
+
+RESULT_CODE_OK = 'OK'
+RESULT_CODE_CRITICAL = 'CRITICAL'
+RESULT_CODE_UNKNOWN = 'UNKNOWN'
+
+OK_MESSAGE = 'TCP OK - {0:.4f} response on port {1}'
+CRITICAL_CONNECTION_MESSAGE = 'Connection failed on host {0}:{1}'
+CRITICAL_TEMPLETON_STATUS_MESSAGE = 'WebHCat returned an unexpected status of 
"{0}"'
+CRITICAL_TEMPLETON_UNKNOWN_JSON_MESSAGE = 'Unable to determine WebHCat health 
from unexpected JSON response'
+
+TEMPLETON_PORT_KEY = '{{webhcat-site/templeton.port}}'
+SECURITY_ENABLED_KEY = '{{cluster-env/security_enabled}}'
+
+TEMPLETON_OK_RESPONSE = 'ok'
+TEMPLETON_PORT_DEFAULT = 50111
+
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used
+  to build the dictionary passed into execute
+  """
+  return (TEMPLETON_PORT_KEY,SECURITY_ENABLED_KEY)      
+  
+
+def execute(parameters=None, host_name=None):
+  """
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  parameters (dictionary): a mapping of parameter key to value
+  host_name (string): the name of this host where the alert is running
+  """
+
+  result_code = RESULT_CODE_UNKNOWN
+
+  if parameters is None:
+    return (result_code, ['There were no parameters supplied to the script.'])
+
+  templeton_port = TEMPLETON_PORT_DEFAULT
+  if TEMPLETON_PORT_KEY in parameters:
+    templeton_port = int(parameters[TEMPLETON_PORT_KEY])  
+
+  security_enabled = False
+  if SECURITY_ENABLED_KEY in parameters:
+    security_enabled = parameters[SECURITY_ENABLED_KEY].lower() == 'true'
+
+  scheme = 'http'
+  if security_enabled is True:
+    scheme = 'https'
+
+  label = ''
+  url_response = None
+  templeton_status = ''
+  total_time = 0
+
+  try:
+    # the alert will always run on the webhcat host
+    if host_name is None:
+      host_name = socket.getfqdn()
+    
+    query = "{0}://{1}:{2}/templeton/v1/status".format(scheme, host_name,
+        templeton_port)
+    
+    # execute the query for the JSON that includes templeton status
+    start_time = time.time()
+    url_response = urllib2.urlopen(query)
+    total_time = time.time() - start_time
+  except:
+    label = CRITICAL_CONNECTION_MESSAGE.format(host_name,templeton_port)
+    return (RESULT_CODE_CRITICAL, [label])
+
+  # URL response received, parse it
+  try:
+    json_response = json.loads(url_response.read())
+    templeton_status = json_response['status']
+  except:
+    return (RESULT_CODE_CRITICAL, [CRITICAL_TEMPLETON_UNKNOWN_JSON_MESSAGE])
+
+  # proper JSON received, compare against known value
+  if templeton_status.lower() == TEMPLETON_OK_RESPONSE:
+    result_code = RESULT_CODE_OK
+    label = OK_MESSAGE.format(total_time, templeton_port)
+  else:
+    result_code = RESULT_CODE_CRITICAL
+    label = CRITICAL_TEMPLETON_STATUS_MESSAGE.format(templeton_status)
+
+  return (result_code, [label])
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json
new file mode 100644
index 0000000..e0100e5
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/alerts.json
@@ -0,0 +1,321 @@
+{
+  "MAPREDUCE2": {
+    "service": [],
+    "HISTORYSERVER": [
+      {
+        "name": "mapreduce_history_server_webui",
+        "label": "History Server Web UI",
+        "interval": 1,
+        "scope": "ANY",
+        "source": {
+          "type": "WEB",
+          "uri": {
+            "http": "{{mapred-site/mapreduce.jobhistory.webapp.address}}",
+            "https": 
"{{mapred-site/mapreduce.jobhistory.webapp.https.address}}",
+            "https_property": 
"{{mapred-site/mapreduce.jobhistory.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "HTTP {0} response in {2:.4f} seconds"
+            },
+            "warning":{
+              "text": "HTTP {0} response in {2:.4f} seconds"
+            },
+            "critical": {
+              "text": "Connection failed to {1}"
+            }
+          }
+        }
+      },
+      {
+        "name": "mapreduce_history_server_cpu",
+        "label": "History Server CPU Utilization",
+        "interval": 5,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "METRIC",
+          "uri": {
+            "http": "{{mapred-site/mapreduce.jobhistory.webapp.address}}",
+            "https": 
"{{mapred-site/mapreduce.jobhistory.webapp.https.address}}",
+            "https_property": 
"{{mapred-site/mapreduce.jobhistory.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "{1} CPU, load {0:.1%}"
+            },
+            "warning": {
+              "text": "{1} CPU, load {0:.1%}",
+              "value": 200
+            },
+            "critical": {
+              "text": "{1} CPU, load {0:.1%}",
+              "value": 250
+            }
+          },
+          "jmx": {
+            "property_list": [
+              "java.lang:type=OperatingSystem/SystemCpuLoad",
+              "java.lang:type=OperatingSystem/AvailableProcessors"
+            ],
+            "value": "{0} * 100"
+          }
+        }
+      },
+      {
+        "name": "mapreduce_history_server_rpc_latency",
+        "label": "History Server RPC Latency",
+        "interval": 5,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "METRIC",
+          "uri": {
+            "http": "{{mapred-site/mapreduce.jobhistory.webapp.address}}",
+            "https": 
"{{mapred-site/mapreduce.jobhistory.webapp.https.address}}",
+            "https_property": 
"{{mapred-site/mapreduce.jobhistory.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]"
+            },
+            "warning": {
+              "text": "Average Queue Time:[{0}], Average Processing 
Time:[{1}]",
+              "value": 3000
+            },          
+            "critical": {
+              "text": "Average Queue Time:[{0}], Average Processing 
Time:[{1}]",
+              "value": 5000
+            }
+          },
+          "jmx": {
+            "property_list": [
+              
"Hadoop:service=JobHistoryServer,name=RpcActivityForPort*/RpcQueueTimeAvgTime",
+              
"Hadoop:service=JobHistoryServer,name=RpcActivityForPort*/RpcProcessingTimeAvgTime"
+            ],
+            "value": "{0}"
+          }
+        }
+      },
+      {
+        "name": "mapreduce_history_server_process",
+        "label": "History Server Process",
+        "interval": 1,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "PORT",
+          "uri": "{{mapred-site/mapreduce.jobhistory.webapp.address}}",
+          "default_port": 19888,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }
+        }
+      }
+    ]
+  },
+  "YARN": {
+    "service": [
+      {
+        "name": "yarn_nodemanager_webui_percent",
+        "label": "Percent NodeManagers Available",
+        "interval": 1,
+        "scope": "SERVICE",
+        "enabled": true,
+        "source": {
+          "type": "AGGREGATE",
+          "alert_name": "yarn_nodemanager_webui",
+          "reporting": {
+            "ok": {
+              "text": "affected: [{1}], total: [{0}]"
+            },
+            "warning": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.1
+            },
+            "critical": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.3
+            }
+          }
+        }
+      }
+    ],
+    "NODEMANAGER": [
+      {
+        "name": "yarn_nodemanager_webui",
+        "label": "NodeManager Web UI",
+        "interval": 1,
+        "scope": "HOST",
+        "source": {
+          "type": "WEB",
+          "uri": {
+            "http": "{{yarn-site/yarn.nodemanager.webapp.address}}",
+            "https": "{{yarn-site/yarn.nodemanager.webapp.https.address}}",
+            "https_property": "{{yarn-site/yarn.http.policy}}",
+            "https_property_value": "HTTPS_ONLY",
+            "default_port": 8042
+          },
+          "reporting": {
+            "ok": {
+              "text": "HTTP {0} response in {2:.4f} seconds"
+            },
+            "warning":{
+              "text": "HTTP {0} response in {2:.4f} seconds"
+            },
+            "critical": {
+              "text": "Connection failed to {1}"
+            }
+          }
+        }
+      },
+      {
+        "name": "yarn_nodemanager_health",
+        "label": "NodeManager Health",
+        "interval": 1,
+        "scope": "HOST",
+        "enabled": true,
+        "source": {
+          "type": "SCRIPT",
+          "path": 
"BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py"
+        }
+      }
+    ],
+    "RESOURCEMANAGER": [
+      {
+        "name": "yarn_resourcemanager_webui",
+        "label": "ResourceManager Web UI",
+        "interval": 1,
+        "scope": "ANY",
+        "source": {
+          "type": "WEB",
+          "uri": {
+            "http": "{{yarn-site/yarn.resourcemanager.webapp.address}}",
+            "https": "{{yarn-site/yarn.resourcemanager.webapp.https.address}}",
+            "https_property": "{{yarn-site/yarn.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "HTTP {0} response in {2:.4f} seconds"
+            },
+            "warning":{
+              "text": "HTTP {0} response in {2:.4f} seconds"
+            },
+            "critical": {
+              "text": "Connection failed to {1}"
+            }
+          }
+        }
+      },
+      {
+        "name": "yarn_resourcemanager_cpu",
+        "label": "ResourceManager CPU Utilization",
+        "interval": 5,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "METRIC",
+          "uri": {
+            "http": "{{yarn-site/yarn.resourcemanager.webapp.address}}",
+            "https": "{{yarn-site/yarn.resourcemanager.webapp.https.address}}",
+            "https_property": "{{yarn-site/yarn.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "{1} CPU, load {0:.1%}"
+            },
+            "warning": {
+              "text": "{1} CPU, load {0:.1%}",
+              "value": 200
+            },
+            "critical": {
+              "text": "{1} CPU, load {0:.1%}",
+              "value": 250
+            }
+          },
+          "jmx": {
+            "property_list": [
+              "java.lang:type=OperatingSystem/SystemCpuLoad",
+              "java.lang:type=OperatingSystem/AvailableProcessors"
+            ],
+            "value": "{0} * 100"
+          }
+        }
+      },
+      {
+        "name": "yarn_resourcemanager_rpc_latency",
+        "label": "ResourceManager RPC Latency",
+        "interval": 5,
+        "scope": "ANY",
+        "enabled": true,
+        "source": {
+          "type": "METRIC",
+          "uri": {
+            "http": "{{yarn-site/yarn.resourcemanager.webapp.address}}",
+            "https": "{{yarn-site/yarn.resourcemanager.webapp.https.address}}",
+            "https_property": "{{yarn-site/yarn.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "Average Queue Time:[{0}], Average Processing Time:[{1}]"
+            },
+            "warning": {
+              "text": "Average Queue Time:[{0}], Average Processing 
Time:[{1}]",
+              "value": 3000
+            },          
+            "critical": {
+              "text": "Average Queue Time:[{0}], Average Processing 
Time:[{1}]",
+              "value": 5000
+            }
+          },
+          "jmx": {
+            "property_list": [
+              
"Hadoop:service=ResourceManager,name=RpcActivityForPort*/RpcQueueTimeAvgTime",
+              
"Hadoop:service=ResourceManager,name=RpcActivityForPort*/RpcProcessingTimeAvgTime"
+            ],
+            "value": "{0}"
+          }
+        }
+      }
+    ],
+    "APP_TIMELINE_SERVER": [
+      {
+        "name": "yarn_app_timeline_server_webui",
+        "label": "App Timeline Web UI",
+        "interval": 1,
+        "scope": "ANY",
+        "source": {
+          "type": "WEB",
+          "uri": {
+            "http": "{{yarn-site/yarn.timeline-service.webapp.address}}",
+            "https": 
"{{yarn-site/yarn.timeline-service.webapp.https.address}}",
+            "https_property": "{{yarn-site/yarn.http.policy}}",
+            "https_property_value": "HTTPS_ONLY"
+          },
+          "reporting": {
+            "ok": {
+              "text": "HTTP {0} response in {2:.4f} seconds"
+            },
+            "warning":{
+              "text": "HTTP {0} response in {2:.4f} seconds"
+            },
+            "critical": {
+              "text": "Connection failed to {1}"
+            }
+          }
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
new file mode 100644
index 0000000..b1de951
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/YARN/package/files/alert_nodemanager_health.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python
+
+"""
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import json
+import socket
+import urllib2
+
+RESULT_CODE_OK = 'OK'
+RESULT_CODE_CRITICAL = 'CRITICAL'
+RESULT_CODE_UNKNOWN = 'UNKNOWN'
+
+NODEMANAGER_HTTP_ADDRESS_KEY = '{{yarn-site/yarn.nodemanager.webapp.address}}'
+NODEMANAGER_HTTPS_ADDRESS_KEY = 
'{{yarn-site/yarn.nodemanager.webapp.https.address}}'
+YARN_HTTP_POLICY_KEY = '{{yarn-site/yarn.http.policy}}'
+
+OK_MESSAGE = 'NodeManager Healthy'
+CRITICAL_CONNECTION_MESSAGE = 'Connection failed to {0}'
+CRITICAL_NODEMANAGER_STATUS_MESSAGE = 'NodeManager returned an unexpected 
status of "{0}"'
+CRITICAL_NODEMANAGER_UNKNOWN_JSON_MESSAGE = 'Unable to determine NodeManager 
health from unexpected JSON response'
+
+NODEMANAGER_DEFAULT_PORT = 8042
+
+def get_tokens():
+  """
+  Returns a tuple of tokens in the format {{site/property}} that will be used
+  to build the dictionary passed into execute
+  """
+  return (NODEMANAGER_HTTP_ADDRESS_KEY,NODEMANAGER_HTTPS_ADDRESS_KEY,
+  YARN_HTTP_POLICY_KEY)
+  
+
+def execute(parameters=None, host_name=None):
+  """
+  Returns a tuple containing the result code and a pre-formatted result label
+
+  Keyword arguments:
+  parameters (dictionary): a mapping of parameter key to value
+  host_name (string): the name of this host where the alert is running
+  """
+  result_code = RESULT_CODE_UNKNOWN
+
+  if parameters is None:
+    return (result_code, ['There were no parameters supplied to the script.'])
+
+  scheme = 'http'
+  http_uri = None
+  https_uri = None
+  http_policy = 'HTTP_ONLY'
+
+  if NODEMANAGER_HTTP_ADDRESS_KEY in parameters:
+    http_uri = parameters[NODEMANAGER_HTTP_ADDRESS_KEY]
+
+  if NODEMANAGER_HTTPS_ADDRESS_KEY in parameters:
+    https_uri = parameters[NODEMANAGER_HTTPS_ADDRESS_KEY]
+
+  if YARN_HTTP_POLICY_KEY in parameters:
+    http_policy = parameters[YARN_HTTP_POLICY_KEY]
+
+  # determine the right URI and whether to use SSL
+  uri = http_uri
+  if http_policy == 'HTTPS_ONLY':
+    scheme = 'https'
+
+    if https_uri is not None:
+      uri = https_uri
+
+  label = ''
+  url_response = None
+  node_healthy = 'false'
+  total_time = 0
+
+  # some yarn-site structures don't have the web ui address
+  if uri is None:
+    if host_name is None:
+      host_name = socket.getfqdn()
+
+    uri = '{0}:{1}'.format(host_name, NODEMANAGER_DEFAULT_PORT)
+
+  try:
+    query = "{0}://{1}/ws/v1/node/info".format(scheme,uri)
+    
+    # execute the query for the JSON that includes templeton status
+    url_response = urllib2.urlopen(query)
+  except:
+    label = CRITICAL_CONNECTION_MESSAGE.format(uri)
+    return (RESULT_CODE_CRITICAL, [label])
+
+  # URL response received, parse it
+  try:
+    json_response = json.loads(url_response.read())
+    node_healthy = json_response['nodeInfo']['nodeHealthy']
+
+    # convert boolean to string
+    node_healthy = str(node_healthy)
+  except:
+    return (RESULT_CODE_CRITICAL, [query])
+
+  # proper JSON received, compare against known value
+  if node_healthy.lower() == 'true':
+    result_code = RESULT_CODE_OK
+    label = OK_MESSAGE
+  else:
+    result_code = RESULT_CODE_CRITICAL
+    label = CRITICAL_NODEMANAGER_STATUS_MESSAGE.format(node_healthy)
+
+  return (result_code, [label])

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/ZOOKEEPER/alerts.json
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/ZOOKEEPER/alerts.json
 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/ZOOKEEPER/alerts.json
new file mode 100644
index 0000000..be210ea
--- /dev/null
+++ 
b/ambari-server/src/main/resources/stacks/BIGTOP/0.8/services/ZOOKEEPER/alerts.json
@@ -0,0 +1,51 @@
+{
+  "ZOOKEEPER": {
+    "service": [
+      {
+        "name": "zookeeper_server_process_percent",
+        "label": "Percent ZooKeeper Servers Available",
+        "interval": 1,
+        "scope": "SERVICE",
+        "enabled": true,
+        "source": {
+          "type": "AGGREGATE",
+          "alert_name": "zookeeper_server_process",
+          "reporting": {
+            "ok": {
+              "text": "affected: [{1}], total: [{0}]"
+            },
+            "warning": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.35
+            },
+            "critical": {
+              "text": "affected: [{1}], total: [{0}]",
+              "value": 0.70
+            }
+          }
+        }
+      }  
+    ],
+    "ZOOKEEPER_SERVER": [
+      {
+        "name": "zookeeper_server_process",
+        "label": "ZooKeeper Server Process",
+        "interval": 1,
+        "scope": "ANY",
+        "source": {
+          "type": "PORT",
+          "uri": "{{zookeeper-env/clientPort}}",
+          "default_port": 2181,
+          "reporting": {
+            "ok": {
+              "text": "TCP OK - {0:.4f} response on port {1}"
+            },
+            "critical": {
+              "text": "Connection failed: {0} to {1}:{2}"
+            }
+          }        
+        }
+      }
+    ]
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProviderTest.java
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProviderTest.java
 
b/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProviderTest.java
index d4f5fb4..b7e8ced 100644
--- 
a/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProviderTest.java
+++ 
b/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/AlertDefinitionResourceProviderTest.java
@@ -161,6 +161,7 @@ public class AlertDefinitionResourceProviderTest {
         AlertDefinitionResourceProvider.ALERT_DEF_ID,
         AlertDefinitionResourceProvider.ALERT_DEF_NAME,
         AlertDefinitionResourceProvider.ALERT_DEF_LABEL,
+        AlertDefinitionResourceProvider.ALERT_DEF_IGNORE_HOST,
         AlertDefinitionResourceProvider.ALERT_DEF_SOURCE,
         AlertDefinitionResourceProvider.ALERT_DEF_SOURCE_TYPE);
 
@@ -201,6 +202,10 @@ public class AlertDefinitionResourceProviderTest {
     Assert.assertEquals("Mock Label",
         r.getPropertyValue(AlertDefinitionResourceProvider.ALERT_DEF_LABEL));
 
+    Assert.assertEquals(
+        Boolean.FALSE,
+        
r.getPropertyValue(AlertDefinitionResourceProvider.ALERT_DEF_IGNORE_HOST));
+
     Assert.assertNotNull(r.getPropertyValue("AlertDefinition/source/type"));
   }
 

http://git-wip-us.apache.org/repos/asf/ambari/blob/d348f5db/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/AlertGroupResourceProviderTest.java
----------------------------------------------------------------------
diff --git 
a/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/AlertGroupResourceProviderTest.java
 
b/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/AlertGroupResourceProviderTest.java
index 9aad9a1..3d9f331 100644
--- 
a/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/AlertGroupResourceProviderTest.java
+++ 
b/ambari-server/src/test/java/org/apache/ambari/server/controller/internal/AlertGroupResourceProviderTest.java
@@ -522,6 +522,37 @@ public class AlertGroupResourceProviderTest {
   }
 
   /**
+   * Tests that a default group cannot be deleted via the resource provider.
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testDeleteDefaultGroup() throws Exception {
+    AlertGroupEntity group = new AlertGroupEntity();
+    group.setGroupId(ALERT_GROUP_ID);
+    group.setDefault(true);
+    group.setGroupName(ALERT_GROUP_NAME);
+    group.setAlertDefinitions(getMockDefinitions());
+    group.setAlertTargets(getMockTargets());
+
+    resetToStrict(m_dao);
+    expect(m_dao.findGroupById(ALERT_GROUP_ID)).andReturn(group).anyTimes();
+
+    replay(m_dao);
+
+    AlertGroupResourceProvider provider = createProvider(m_amc);
+
+    Predicate predicate = new PredicateBuilder().property(
+        AlertGroupResourceProvider.ALERT_GROUP_CLUSTER_NAME).equals(
+        ALERT_GROUP_CLUSTER_NAME).and().property(
+        AlertGroupResourceProvider.ALERT_GROUP_ID).equals(
+        ALERT_GROUP_ID.toString()).toPredicate();
+
+    provider.deleteResources(predicate);
+    verify(m_dao);
+  }
+
+  /**
    * @param amc
    * @return
    */

Reply via email to