Joal has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/392703 )
Change subject: Change restbase job to also count MW-API requests
......................................................................
Change restbase job to also count MW-API requests
The spark job counting restbase-varnish hits now also counts
MW-API hits, sending both to graphite. This patch renames restbase
job to apis, and includes settings for the new parameters and updates
the jar version to the new one.
Bug: T176785
Change-Id: I6a7ff7db4dcd646cec2999ff67f4301cea459693
---
A oozie/apis/README.md
R oozie/apis/coordinator.properties
R oozie/apis/coordinator.xml
R oozie/apis/workflow.xml
D oozie/restbase/README.md
5 files changed, 43 insertions(+), 35 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery
refs/changes/03/392703/1
diff --git a/oozie/apis/README.md b/oozie/apis/README.md
new file mode 100644
index 0000000..e1870ce
--- /dev/null
+++ b/oozie/apis/README.md
@@ -0,0 +1,14 @@
+Oozie job to schedule generating varnish-requests-count for RESTBase and
MW-API.
+The job runs every hour and reports metrics to Graphite.
+
+The oozie workflow launches a spark action that runs the
+APIsVarnishRequests scala job in analytics-refinery-source/refinery-job
+
+Example command for running the coordinator on command line:
+
+ oozie job -run \
+ -config coordinator.properties \
+ -D refinery_directory=hdfs://analytics-hadoop/wmf/refinery/current
+
+The results of this job can be viewed in Graphite (graphite.wikimedia.org)
under
+the restbase.requests and MediaWikiki.api namespaces.
diff --git a/oozie/restbase/coordinator.properties
b/oozie/apis/coordinator.properties
similarity index 77%
rename from oozie/restbase/coordinator.properties
rename to oozie/apis/coordinator.properties
index 94d6afb..64333b4 100644
--- a/oozie/restbase/coordinator.properties
+++ b/oozie/apis/coordinator.properties
@@ -1,7 +1,9 @@
-# Configures a coordinator to automatically manage generating and sending
RESTBase metrics to Graphite from
-# the refined webrequest text data. Any of the following properties are
override-able with -D.
+# Configures a coordinator to automatically manage generating and sending APIs
+# varnish requests counts to Graphite from the refined webrequest text data.
+#
+# Any of the following properties are override-able with -D.
# Usage:
-# oozie job -Duser=$USER -Dstart_time=2015-08-01T00:00Z -submit -config
oozie/restbase_metrics/coordinator.properties
+# oozie job -Duser=$USER -Dstart_time=2015-08-01T00:00Z -submit -config
oozie/apis/coordinator.properties
#
# NOTE: The $oozie_directory must be synced to HDFS so that all relevant
# .xml files exist there when this job is submitted.
@@ -26,18 +28,18 @@
# Other files will be used relative to this path.
oozie_directory = ${refinery_directory}/oozie
-# HDFS path to coordinator to run for each webrequest_source.
-coordinator_file = ${oozie_directory}/restbase/coordinator.xml
+# HDFS path to coordinator
+coordinator_file = ${oozie_directory}/apis/coordinator.xml
-# HDFS path to workflow to run.
-workflow_file = ${oozie_directory}/restbase/workflow.xml
+# HDFS path to workflow
+workflow_file = ${oozie_directory}/apis/workflow.xml
# HDFS path to refine webrequest dataset definitions
datasets_file = ${oozie_directory}/webrequest/datasets.xml
webrequest_data_directory = ${name_node}/wmf/data/wmf/webrequest
# Initial import time of the webrequest dataset.
-start_time = 2015-08-01T00:00Z
+start_time = 2017-11-21T00:00Z
# Time to stop running this coordinator. Year 3000 == never!
stop_time = 3000-01-01T00:00Z
@@ -46,13 +48,14 @@
spark_master = yarn
spark_deploy = cluster
spark_assembly_jar =
${name_node}/user/spark/share/lib/spark-assembly.jar
-spark_job_jar =
${artifacts_directory}/org/wikimedia/analytics/refinery/refinery-job-0.0.45.jar
-spark_job_class =
org.wikimedia.analytics.refinery.job.RESTBaseMetrics
-spark_job_name = restbase_metrics
+spark_job_jar =
${artifacts_directory}/org/wikimedia/analytics/refinery/refinery-job-0.0.55.jar
+spark_job_class =
org.wikimedia.analytics.refinery.job.APIsVarnishRequests
+spark_job_name = APIsVarnishRequests
spark_executor_memory = 2G
spark_driver_memory = 4G
spark_max_executors = 16
-graphite_namespace = restbase.requests
+restbase_namespace = restbase.requests
+mwapi_namespace = MediaWiki.api
graphite_host = graphite-in.eqiad.wmnet
graphite_port = 2003
diff --git a/oozie/restbase/coordinator.xml b/oozie/apis/coordinator.xml
similarity index 94%
rename from oozie/restbase/coordinator.xml
rename to oozie/apis/coordinator.xml
index 0a150e6..5686c5f 100644
--- a/oozie/restbase/coordinator.xml
+++ b/oozie/apis/coordinator.xml
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<coordinator-app xmlns="uri:oozie:coordinator:0.4"
- name="restbase-coord"
+ name="apis-coord"
frequency="${coord:hours(1)}"
start="${start_time}"
end="${stop_time}"
@@ -27,7 +27,8 @@
<property><name>spark_max_executors</name></property>
<property><name>graphite_host</name></property>
<property><name>graphite_port</name></property>
- <property><name>graphite_namespace</name></property>
+ <property><name>restbase_namespace</name></property>
+ <property><name>mwapi_namespace</name></property>
<property><name>webrequest_data_directory</name></property>
<property><name>send_error_email_workflow_file</name></property>
diff --git a/oozie/restbase/workflow.xml b/oozie/apis/workflow.xml
similarity index 91%
rename from oozie/restbase/workflow.xml
rename to oozie/apis/workflow.xml
index 2ec0dd5..4ba599d 100644
--- a/oozie/restbase/workflow.xml
+++ b/oozie/apis/workflow.xml
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<workflow-app xmlns="uri:oozie:workflow:0.4"
- name="restbase-wf-${year}-${month}-${day}-${hour}">
+ name="apis-wf-${year}-${month}-${day}-${hour}">
<parameters>
@@ -68,8 +68,12 @@
<description>Graphite port</description>
</property>
<property>
- <name>graphite_namespace</name>
- <description>Namespace/prefix for metric in Graphite</description>
+ <name>restbase_namespace</name>
+ <description>Namespace/prefix for restbase in
Graphite</description>
+ </property>
+ <property>
+ <name>mwapi_namespace</name>
+ <description>Namespace/prefix for mw-api in Graphite</description>
</property>
<property>
<name>webrequest_data_directory</name>
@@ -118,8 +122,10 @@
<arg>${graphite_host}</arg>
<arg>--graphite-port</arg>
<arg>${graphite_port}</arg>
- <arg>--namespace</arg>
- <arg>${graphite_namespace}</arg>
+ <arg>--restbaseNamespace</arg>
+ <arg>${restbase_namespace}</arg>
+ <arg>--restbaseNamespace</arg>
+ <arg>${mwapi_namespace}</arg>
<arg>--webrequest-base-path</arg>
<arg>${webrequest_data_directory}</arg>
</spark>
diff --git a/oozie/restbase/README.md b/oozie/restbase/README.md
deleted file mode 100644
index d8142f7..0000000
--- a/oozie/restbase/README.md
+++ /dev/null
@@ -1,16 +0,0 @@
-Oozie job to schedule generating metrics for RESTBase. The job runs every
-hour and reports metrics to Graphite.
-
-The oozie workflow launches a spark action that runs the
-RESTBaseMetrics scala job in analytics-refinery-source/refinery-job here -
-https://phabricator.wikimedia.org/diffusion/ANRS/
-
-Example command for running the coordinator on command line:
-
- oozie job -run \
- -config coordinator.properties \
- -D
refinery_directory=hdfs://analytics-hadoop/user/madhuvishy/refinery \
- -D
spark_job_jar=hdfs://analytics-hadoop/user/madhuvishy/source/refinery-job-0.0.45-SNAPSHOT.jar
-
-The results of this job can be viewed in Graphite (graphite.wikimedia.org)
under the restbase namespace,
-in restbase.requests.varnish_requests.
\ No newline at end of file
--
To view, visit https://gerrit.wikimedia.org/r/392703
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I6a7ff7db4dcd646cec2999ff67f4301cea459693
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Joal <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits