Ottomata has submitted this change and it was merged.
Change subject: Drop Oozie bundle for Icinga monitoring of webrequest datasets
......................................................................
Drop Oozie bundle for Icinga monitoring of webrequest datasets
Trusty's send_nsca does not work with precise's Icinga, which gives us
a partially broken Icinga setup and makes the Hive partition check
fail more often than not. Since Ops want to get rid of the warnings,
but also do not want to fix the Icinga setup, Oozie's Icinga
monitoring got turned off :-/
Hence, we reflect this change in the repository too.
This change essentially reverts
80bb8685ed61f39ffb4acc546106afb373a13ec7, and updates the Oozie
diagram.
Bug: T76414
Change-Id: Ifd05ff42520f7bc64bceb36aa7b2e74d24a71aa7
---
M diagrams/oozie-overview.dia
D oozie/util/monitor/done_flag/send_ok_to_icinga.sh
D oozie/util/monitor/done_flag/workflow.xml
M oozie/webrequest/partition/add/README.md
D oozie/webrequest/partition/monitor_done_flag/bundle.properties
D oozie/webrequest/partition/monitor_done_flag/bundle.xml
D oozie/webrequest/partition/monitor_done_flag/coordinator.xml
7 files changed, 1 insertion(+), 285 deletions(-)
Approvals:
Ottomata: Verified; Looks good to me, approved
diff --git a/diagrams/oozie-overview.dia b/diagrams/oozie-overview.dia
index 23483d9..2b8771b 100644
--- a/diagrams/oozie-overview.dia
+++ b/diagrams/oozie-overview.dia
Binary files differ
diff --git a/oozie/util/monitor/done_flag/send_ok_to_icinga.sh
b/oozie/util/monitor/done_flag/send_ok_to_icinga.sh
deleted file mode 100755
index c5870b2..0000000
--- a/oozie/util/monitor/done_flag/send_ok_to_icinga.sh
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/bin/bash
-
-set -e
-
-usage="Usage: $(basename "$0") [--dry-run|-n] [--hostname|-h
<icinga_reported_hostname>] <icinga_service_description> <dataset_location>"
-
-icinga_nsca_host='icinga.wikimedia.org'
-icinga_nsca_port='5667'
-
-# This is hardcoded, as any DataNode may run this script
-# and we want the service to always be associated with
-# the same node.
-icinga_reported_hostname='analytics1027'
-
-dry_run='false'
-
-# Parse CLI args
-while [ $# -gt 2 ]; do
- case "$1" in
- -n|--dry-run)
- dry_run='true'
- ;;
- -h|--hostname)
- shift
- icinga_reported_hostname="$1"
- ;;
- *)
- break
- ;;
- esac
- shift
-done
-
-icinga_service_description="$1"
-location="$2"
-
-if [ -z "${icinga_service_description}" -o -z "${location}" ]; then
- echo "Error: Must provide <icinga_service_description> and <location>."
- echo "${usage}"
- exit 3
-fi
-
-nsca_message="${icinga_reported_hostname} ${icinga_service_description}
0 OK: A dataset has recently become ready. Location: ${location}"
-
-if [ "${dry_run}" == 'true' ]; then
- echo 'Dry run. Not sending passive check to icinga.'
- echo "${nsca_message}"
-else
- echo "${nsca_message}" | /usr/sbin/send_nsca -H "${icinga_nsca_host}" -p
"${icinga_nsca_port}"
-fi
diff --git a/oozie/util/monitor/done_flag/workflow.xml
b/oozie/util/monitor/done_flag/workflow.xml
deleted file mode 100644
index 99c9329..0000000
--- a/oozie/util/monitor/done_flag/workflow.xml
+++ /dev/null
@@ -1,62 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<workflow-app xmlns="uri:oozie:workflow:0.4"
- name="monitor_done_flag-${icinga_service_description}-${location}-wf">
-
- <parameters>
- <property>
- <name>queue_name</name>
- <value>default</value>
- </property>
-
- <!-- Required properties -->
- <property><name>name_node</name></property>
- <property><name>job_tracker</name></property>
-
- <property>
- <name>icinga_service_description</name>
- <description>
- The description of the icinga service to send a passive
- check to. This should match a monitor_service's description in
- the operations/puppet repository somewhere.
- </description>
- </property>
-
- <property>
- <name>location</name>
- <description>
- The path to the dataset instance that this workflow
- is checking. This is used for the icinga service
- description text.
- </description>
- </property>
-
- </parameters>
-
- <start to="send_ok_to_icinga"/>
-
- <action name="send_ok_to_icinga">
- <shell xmlns="uri:oozie:shell-action:0.1">
- <job-tracker>${job_tracker}</job-tracker>
- <name-node>${name_node}</name-node>
- <configuration>
- <property>
- <name>mapred.job.queue.name</name>
- <value>${queue_name}</value>
- </property>
- </configuration>
- <exec>send_ok_to_icinga.sh</exec>
- <argument>${icinga_service_description}</argument>
- <argument>${location}</argument>
- <file>send_ok_to_icinga.sh#send_ok_to_icinga.sh</file>
- </shell>
-
- <ok to="end"/>
- <error to="kill"/>
- </action>
-
- <kill name="kill">
- <message>Action failed, error
message[${wf:errorMessage(wf:lastErrorNode())}]</message>
- </kill>
-
- <end name="end"/>
-</workflow-app>
diff --git a/oozie/webrequest/partition/add/README.md
b/oozie/webrequest/partition/add/README.md
index c944705..37e0a78 100644
--- a/oozie/webrequest/partition/add/README.md
+++ b/oozie/webrequest/partition/add/README.md
@@ -35,5 +35,4 @@
table is not meant for researchers.
Icinga monitoring for the ```_SUCCESS``` marker is not part of this
-setup and can be found as a separate bundle, coordinator and workflow
-at oozie/webrequest/partition/monitor_done_flag
+setup and can be found at {{Citation needed}}.
\ No newline at end of file
diff --git a/oozie/webrequest/partition/monitor_done_flag/bundle.properties
b/oozie/webrequest/partition/monitor_done_flag/bundle.properties
deleted file mode 100644
index e9bb99e..0000000
--- a/oozie/webrequest/partition/monitor_done_flag/bundle.properties
+++ /dev/null
@@ -1,40 +0,0 @@
-# Configures a bundle to managy notifying icinga about done webrequest
-# datasets. Any of the following properties are overidable with -D.
-#
-# Usage:
-# oozie job -submit -config
oozie/webrequest/monitor_done_flag/bundle.properties.
-#
-# NOTE: The $oozie_directory must be synced to HDFS so that all relevant
-# .xml files exist there when this job is submitted.
-
-
-name_node = hdfs://analytics-hadoop
-job_tracker = resourcemanager.analytics.eqiad.wmnet:8032
-queue_name = default
-
-# Base path in HDFS to oozie files.
-# Other files will be used relative to this path.
-oozie_directory = ${name_node}/wmf/refinery/current/oozie
-
-# HDFS path to coordinator to run for each webrequest_source.
-coordinator_file =
${oozie_directory}/webrequest/partition/monitor_done_flag/coordinator.xml
-
-# HDFS path to workflow to run.
-workflow_file =
${oozie_directory}/util/monitor/done_flag/workflow.xml
-
-# HDFS path to webrequest dataset definition
-datasets_file = ${oozie_directory}/webrequest/datasets.xml
-
-# Initial import time of the webrequest dataset.
-start_time = 2014-04-01T00:00Z
-
-# Time to stop running this coordinator. Year 3000 == never!
-stop_time = 3000-01-01T00:00Z
-
-# HDFS path to directory where webrequest data is time bucketed.
-webrequest_data_directory = ${name_node}/wmf/data/raw/webrequest
-
-# Coordintator to start.
-oozie.bundle.application.path =
${oozie_directory}/webrequest/partition/monitor_done_flag/bundle.xml
-oozie.use.system.libpath = true
-oozie.action.external.stats.write = true
diff --git a/oozie/webrequest/partition/monitor_done_flag/bundle.xml
b/oozie/webrequest/partition/monitor_done_flag/bundle.xml
deleted file mode 100644
index 955a12a..0000000
--- a/oozie/webrequest/partition/monitor_done_flag/bundle.xml
+++ /dev/null
@@ -1,61 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<bundle-app xmlns="uri:oozie:bundle:0.2"
- name="webrequest-monitor_done_flag-bundle">
-
- <parameters>
- <property>
- <name>queue_name</name>
- <value>default</value>
- </property>
-
- <!-- Required properties. -->
- <property><name>coordinator_file</name></property>
- <property><name>name_node</name></property>
- <property><name>job_tracker</name></property>
- <property><name>workflow_file</name></property>
- <property><name>start_time</name></property>
- <property><name>stop_time</name></property>
- <property><name>webrequest_data_directory</name></property>
- </parameters>
-
- <coordinator name='monitor_done_flag-webrequest-bits'>
- <app-path>${coordinator_file}</app-path>
- <configuration>
- <property>
- <name>webrequest_source</name>
- <value>bits</value>
- </property>
- </configuration>
- </coordinator>
-
- <coordinator name='monitor_done_flag-webrequest-mobile'>
- <app-path>${coordinator_file}</app-path>
- <configuration>
- <property>
- <name>webrequest_source</name>
- <value>mobile</value>
- </property>
- </configuration>
- </coordinator>
-
- <coordinator name='monitor_done_flag-webrequest-text'>
- <app-path>${coordinator_file}</app-path>
- <configuration>
- <property>
- <name>webrequest_source</name>
- <value>text</value>
- </property>
- </configuration>
- </coordinator>
-
- <coordinator name='monitor_done_flag-webrequest-upload'>
- <app-path>${coordinator_file}</app-path>
- <configuration>
- <property>
- <name>webrequest_source</name>
- <value>upload</value>
- </property>
- </configuration>
- </coordinator>
-
-</bundle-app>
diff --git a/oozie/webrequest/partition/monitor_done_flag/coordinator.xml
b/oozie/webrequest/partition/monitor_done_flag/coordinator.xml
deleted file mode 100644
index 8d64c55..0000000
--- a/oozie/webrequest/partition/monitor_done_flag/coordinator.xml
+++ /dev/null
@@ -1,70 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<coordinator-app xmlns="uri:oozie:coordinator:0.4"
- name="webrequest-monitor_done_flag-${webrequest_source}-coord"
- frequency="${coord:hours(1)}"
- start="${start_time}"
- end="${stop_time}"
- timezone="Universal">
-
- <parameters>
- <property>
- <name>queue_name</name>
- <value>default</value>
- </property>
-
- <!-- Required properties. -->
- <property><name>name_node</name></property>
- <property><name>job_tracker</name></property>
- <property><name>workflow_file</name></property>
- <property><name>start_time</name></property>
- <property><name>stop_time</name></property>
- <property><name>webrequest_data_directory</name></property>
-
- <property><name>webrequest_source</name></property>
- </parameters>
-
- <controls>
- <!--
- There is not much sense in having more than one of the
- workflows running. That could only hide issues. So we limit
- to one.
- -->
- <concurrency>1</concurrency>
-
-
- <!--
- Backfilling gets in the way anyways, so we can throttle as
- well to 1, to not artifically keep jobs waiting.
- -->
- <throttle>1</throttle>
- </controls>
-
- <datasets>
- <!--
- Include the given datasets_file file. This should
- define the "webrequest" dataset for this coordinator.
- -->
- <include>${datasets_file}</include>
- </datasets>
-
- <input-events>
- <data-in name="input" dataset="webrequest_${webrequest_source}">
- <instance>${coord:current(0)}</instance>
- </data-in>
- </input-events>
-
- <action>
- <workflow>
- <app-path>${workflow_file}</app-path>
- <configuration>
-
- <!-- Pass these properties through to the workflow -->
-
<property><name>name_node</name><value>${name_node}</value></property>
-
<property><name>job_tracker</name><value>${job_tracker}</value></property>
-
<property><name>queue_name</name><value>${queue_name}</value></property>
-
<property><name>icinga_service_description</name><value>hive_partition_webrequest-${webrequest_source}</value></property>
-
<property><name>location</name><value>${coord:dataIn('input')}</value></property>
- </configuration>
- </workflow>
- </action>
-</coordinator-app>
--
To view, visit https://gerrit.wikimedia.org/r/177217
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: Ifd05ff42520f7bc64bceb36aa7b2e74d24a71aa7
Gerrit-PatchSet: 3
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits