Joal has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/342197 )

Change subject: [WIP] Add oozie job for standard metrics computation
......................................................................

[WIP] Add oozie job for standard metrics computation

Bug: T160151
Change-Id: Ic5fddf5cc99b0c428f6685d0ef495ee2fe0c449b
---
R hive/mediawiki/history/create_mediawiki_metric_result.hql
M oozie/mediawiki/history/datasets.xml
M oozie/mediawiki/history/denormalize/bundle.properties
M oozie/mediawiki/history/denormalize/coordinator.xml
M oozie/mediawiki/history/denormalize/workflow.xml
A oozie/mediawiki/history/metrics/bundle.properties
A oozie/mediawiki/history/metrics/bundle.xml
A oozie/mediawiki/history/metrics/coordinator.xml
R oozie/mediawiki/history/metrics/daily_edits.hql
R oozie/mediawiki/history/metrics/daily_edits_by_anonymous_users.hql
R oozie/mediawiki/history/metrics/daily_edits_by_bot_users.hql
R oozie/mediawiki/history/metrics/daily_edits_by_registered_users.hql
R oozie/mediawiki/history/metrics/daily_unique_anonymous_editors.hql
R oozie/mediawiki/history/metrics/daily_unique_bot_editors.hql
R oozie/mediawiki/history/metrics/daily_unique_editors.hql
R oozie/mediawiki/history/metrics/daily_unique_page_creators.hql
R oozie/mediawiki/history/metrics/daily_unique_registered_editors.hql
R oozie/mediawiki/history/metrics/monthly_new_editors.hql
R oozie/mediawiki/history/metrics/monthly_new_registered_users.hql
R oozie/mediawiki/history/metrics/monthly_surviving_new_editors.hql
R oozie/mediawiki/history/metrics/productive_new_editors.hql
A oozie/mediawiki/history/metrics/workflow.xml
22 files changed, 466 insertions(+), 12 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/refinery 
refs/changes/97/342197/1

diff --git a/hive/mediawiki/metrics/create_mediawiki_metric_result.hql 
b/hive/mediawiki/history/create_mediawiki_metric_result.hql
similarity index 90%
rename from hive/mediawiki/metrics/create_mediawiki_metric_result.hql
rename to hive/mediawiki/history/create_mediawiki_metric_result.hql
index 6ee4865..8816541 100644
--- a/hive/mediawiki/metrics/create_mediawiki_metric_result.hql
+++ b/hive/mediawiki/history/create_mediawiki_metric_result.hql
@@ -9,7 +9,8 @@
 PARTITIONED BY (
   `infra` string COMMENT 'Infrastructure from which data is retrieved (usually 
labs or prod)',
   `version` string COMMENT 'Versioning information to keep multiple datasets 
(usually YYYY-MM)',
-  `metric`    string  COMMENT 'The metric being computed to measure')
+  `metric`    string  COMMENT 'The metric being computed to measure',
+  `wiki_db`   string  COMMENT 'The wiki this measurement pertains to')
 ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '\t'
 LINES TERMINATED BY '\n'
diff --git a/oozie/mediawiki/history/datasets.xml 
b/oozie/mediawiki/history/datasets.xml
index 1463381..7bba001 100644
--- a/oozie/mediawiki/history/datasets.xml
+++ b/oozie/mediawiki/history/datasets.xml
@@ -88,7 +88,7 @@
         <done-flag>_SUCCESS</done-flag>
     </dataset>
 
-    <dataset name="mw_history_denormalized"
+    <dataset name="mw_denormalized_history"
              frequency="${coord:months(1)}"
              initial-instance="${start_time}"
              timezone="Universal">
@@ -97,7 +97,6 @@
     </dataset>
 
     <!-- History metrics datasets -->
-
     <dataset name="mw_metrics"
              frequency="${coord:months(1)}"
              initial-instance="${start_time}"
diff --git a/oozie/mediawiki/history/denormalize/bundle.properties 
b/oozie/mediawiki/history/denormalize/bundle.properties
index f20ccf9..edaf983 100644
--- a/oozie/mediawiki/history/denormalize/bundle.properties
+++ b/oozie/mediawiki/history/denormalize/bundle.properties
@@ -76,7 +76,7 @@
 partitions_number                 = 1024
 tmp_path                          = 
${name_node}/tmp/mediawiki/history/checkpoints
 
-# Workflow to add a partition
+# Workflow to repair partitions
 repair_partitions_workflow_file   = 
${oozie_directory}/util/hive/partition/repair/workflow.xml
 
 # Workflow to mark a directory as done
diff --git a/oozie/mediawiki/history/denormalize/coordinator.xml 
b/oozie/mediawiki/history/denormalize/coordinator.xml
index 6a8f132..bbff8dd 100644
--- a/oozie/mediawiki/history/denormalize/coordinator.xml
+++ b/oozie/mediawiki/history/denormalize/coordinator.xml
@@ -99,7 +99,7 @@
         <data-out name="mw_page_history" dataset="mw_page_history">
             <instance>${coord:current(0)}</instance>
         </data-out>
-        <data-out name="mw_history_denormalized" 
dataset="mw_history_denormalized">
+        <data-out name="mw_denormalized_history" 
dataset="mw_denormalized_history">
             <instance>${coord:current(0)}</instance>
         </data-out>
     </output-events>
@@ -109,7 +109,7 @@
             <app-path>${workflow_file}</app-path>
             <configuration>
                 <property>
-                    <name>version</name>
+                    <name>mw_version_partition</name>
                     <value>${coord:formatTime(coord:nominalTime(), 
"yyyy")}-${coord:formatTime(coord:nominalTime(), "MM")}</value>
                 </property>
             </configuration>
diff --git a/oozie/mediawiki/history/denormalize/workflow.xml 
b/oozie/mediawiki/history/denormalize/workflow.xml
index 3348d2a..1e86335 100644
--- a/oozie/mediawiki/history/denormalize/workflow.xml
+++ b/oozie/mediawiki/history/denormalize/workflow.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <workflow-app xmlns="uri:oozie:workflow:0.4"
-    name="mediawiki-history-denormalize-wf-${mw_infra_partition}-${version}">
+    
name="mediawiki-history-denormalize-wf-${mw_infra_partition}-${mw_version_partition}">
 
     <parameters>
 
@@ -54,7 +54,7 @@
             <description>The infra data to process (prod or labs)</description>
         </property>
         <property>
-            <name>version</name>
+            <name>mw_version_partition</name>
             <description>The version for the job</description>
         </property>
 
@@ -217,7 +217,7 @@
             </configuration>
             <master>${spark_master}</master>
             <mode>${spark_deploy}</mode>
-            <name>${spark_job_name}-${mw_infra_partition}-${version}</name>
+            
<name>${spark_job_name}-${mw_infra_partition}-${mw_version_partition}</name>
             <class>${spark_job_class}</class>
             <jar>${spark_job_jar}</jar>
             <spark-opts>--conf spark.yarn.jar=${spark_assembly_jar} 
--executor-memory ${spark_executor_memory} --driver-memory 
${spark_driver_memory} --queue ${queue_name}</spark-opts>
@@ -228,7 +228,7 @@
             <arg>--infra</arg>
             <arg>${mw_infra_partition}</arg>
             <arg>--version</arg>
-            <arg>${version}</arg>
+            <arg>${mw_version_partition}</arg>
             <arg>--temporary-path</arg>
             <arg>${tmp_path}</arg>
             <arg>--num-partitions</arg>
diff --git a/oozie/mediawiki/history/metrics/bundle.properties 
b/oozie/mediawiki/history/metrics/bundle.properties
new file mode 100644
index 0000000..21d0a87
--- /dev/null
+++ b/oozie/mediawiki/history/metrics/bundle.properties
@@ -0,0 +1,68 @@
+# Configures a bundle to automatically compute prod and labs standard metrics
+# from  mediawiki history
+# Usage:
+#   oozie job -Duser=$USER -Dstart_time=2015-08-01T00:00Z -submit -config 
oozie/mediawiki/history/metrics/bundle.properties
+#
+# NOTE:  The $oozie_directory must be synced to HDFS so that all relevant
+#        .xml files exist there when this job is submitted.
+
+name_node                         = hdfs://analytics-hadoop
+job_tracker                       = resourcemanager.analytics.eqiad.wmnet:8032
+queue_name                        = default
+
+#Default user
+user                              = hdfs
+
+# Base path in HDFS to refinery.
+# When submitting this job for production, you should override this to point 
directly at a deployed
+# directory name, and not the 'symbolic' 'current' directory. E.g. 
/wmf/refinery/2015-01-05T17.59.18Z--7bb7f07
+refinery_directory                = ${name_node}/wmf/refinery/current
+
+# HDFS path to artifacts that will be used by this job.
+# E.g. refinery-job.jar should exist here.
+artifacts_directory               = ${refinery_directory}/artifacts
+
+# Base path in HDFS to oozie files.
+# Other files will be used relative to this path.
+oozie_directory                   = ${refinery_directory}/oozie
+
+# HDFS path to bundle to run.
+bundle_file                       = 
${oozie_directory}/mediawiki/history/metrics/bundle.xml
+
+# HDFS path to coordinator to run for each infra.
+coordinator_file                  = 
${oozie_directory}/mediawiki/history/metrics/coordinator.xml
+
+# HDFS path to workflow to run.
+workflow_file                     = 
${oozie_directory}/mediawiki/history/metrics/workflow.xml
+
+# HDFS path to hive-site.xml file.  This is needed to run hive actions.
+hive_site_xml                     = ${name_node}/user/hive/hive-site.xml
+
+# HDFS path to mediawiki history datasets definitions
+datasets_file                     = 
${oozie_directory}/mediawiki/history/datasets.xml
+mw_directory                      = ${name_node}/wmf/data/wmf/mediawiki
+
+# mw metric_result table
+mw_denormalized_history_table     = wmf.mediawiki_history
+mw_metrics_table                  = wmf.mediawiki_metric_result
+
+
+# Initial import time of the webrequest dataset.
+start_time                        = 2017-03-01T00:00Z
+
+# Time to stop running this coordinator.  Year 3000 == never!
+stop_time                         = 3000-01-01T00:00Z
+
+# Workflow to mark a directory as done
+mark_directory_done_workflow_file = 
${oozie_directory}/util/mark_directory_done/workflow.xml
+
+# Workflow to send an error email
+send_error_email_workflow_file    = 
${oozie_directory}/util/send_error_email/workflow.xml
+
+# email address to contact in case of SLA errors
+sla_alert_contact                 = [email protected]
+
+# Coordinator to start.
+oozie.bundle.application.path     = ${bundle_file}
+oozie.use.system.libpath          = true
+oozie.action.external.stats.write = true
\ No newline at end of file
diff --git a/oozie/mediawiki/history/metrics/bundle.xml 
b/oozie/mediawiki/history/metrics/bundle.xml
new file mode 100644
index 0000000..71e51e1
--- /dev/null
+++ b/oozie/mediawiki/history/metrics/bundle.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<bundle-app xmlns="uri:oozie:bundle:0.2"
+    name="mediawiki-history-metrics-bundle">
+
+    <parameters>
+        <!-- Required properties -->
+        <property><name>queue_name</name></property>
+        <property><name>coordinator_file</name></property>
+        <property><name>name_node</name></property>
+        <property><name>job_tracker</name></property>
+        <property><name>workflow_file</name></property>
+        <property><name>start_time</name></property>
+        <property><name>stop_time</name></property>
+
+        <property><name>datasets_file</name></property>
+        <property><name>mw_directory</name></property>
+
+        <property><name>mw_denormalized_history_table</name></property>
+        <property><name>mw_metrics_table</name></property>
+
+        <property><name>hive_site_xml</name></property>
+        <property><name>mark_directory_done_workflow_file</name></property>
+        <property><name>send_error_email_workflow_file</name></property>
+
+        <property><name>sla_alert_contact</name></property>
+    </parameters>
+
+    <coordinator name="mediawiki-history-metrics-coord-prod">
+        <app-path>${coordinator_file}</app-path>
+        <configuration>
+            <property>
+                <name>mw_infra_partition</name>
+                <value>prod</value>
+            </property>
+        </configuration>
+    </coordinator>
+
+    <coordinator name="mediawiki-history-metrics-coord-labs">
+        <app-path>${coordinator_file}</app-path>
+        <configuration>
+            <property>
+                <name>mw_infra_partition</name>
+                <value>labs</value>
+            </property>
+        </configuration>
+    </coordinator>
+
+</bundle-app>
diff --git a/oozie/mediawiki/history/metrics/coordinator.xml 
b/oozie/mediawiki/history/metrics/coordinator.xml
new file mode 100644
index 0000000..b0900dd
--- /dev/null
+++ b/oozie/mediawiki/history/metrics/coordinator.xml
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<coordinator-app xmlns="uri:oozie:coordinator:0.4"
+    xmlns:sla="uri:oozie:sla:0.2"
+    name="mediawiki-history-metrics-coord-${mw_infra_partition}"
+    frequency="${coord:months(1)}"
+    start="${start_time}"
+    end="${stop_time}"
+    timezone="Universal">
+
+    <parameters>
+        <!-- Required properties -->
+        <property><name>queue_name</name></property>
+        <property><name>name_node</name></property>
+        <property><name>job_tracker</name></property>
+        <property><name>workflow_file</name></property>
+        <property><name>start_time</name></property>
+        <property><name>stop_time</name></property>
+
+        <property><name>datasets_file</name></property>
+        <property><name>mw_directory</name></property>
+
+        <property><name>mw_denormalized_history_table</name></property>
+        <property><name>mw_metrics_table</name></property>
+
+        <property><name>mw_infra_partition</name></property>
+
+        <property><name>hive_site_xml</name></property>
+        <property><name>mark_directory_done_workflow_file</name></property>
+        <property><name>send_error_email_workflow_file</name></property>
+
+        <property><name>sla_alert_contact</name></property>
+    </parameters>
+
+    <controls>
+        <!--(timeout is measured in minutes)-->
+        <timeout>-1</timeout>
+
+        <!-- Setting low concurrency for resource sharing.
+             The job runs pretty fast (~1 minute) and increasing concurrency 
should not cause any problems-->
+        <concurrency>1</concurrency>
+
+        <throttle>2</throttle>
+
+    </controls>
+
+    <datasets>
+        <!--
+        Include refined datasets files.
+        $datasets_file will be used as the input events
+        -->
+        <include>${datasets_file}</include>
+    </datasets>
+
+    <input-events>
+        <data-in name="mw_history_denormalized" 
dataset="mw_history_denormalized">
+            <instance>${coord:current(0)}</instance>
+        </data-in>
+    </input-events>
+
+    <output-events>
+        <data-in name="mw_metrics_" dataset="mw_history_denormalized">
+            <instance>${coord:current(0)}</instance>
+        </data-in>
+    </output-events>
+
+    <action>
+        <workflow>
+            <app-path>${workflow_file}</app-path>
+            <configuration>
+                <property>
+                    <name>mw_version_partition</name>
+                    <value>${coord:formatTime(coord:nominalTime(), 
"yyyy")}-${coord:formatTime(coord:nominalTime(), "MM")}</value>
+                </property>
+            </configuration>
+        </workflow>
+
+        <sla:info>
+            <!--
+                Use action actual time as SLA base, since it's the time used
+                to compute timeout
+            -->
+            <sla:nominal-time>${coord:actualTime()}</sla:nominal-time>
+            <sla:should-end>${4 * DAYS}</sla:should-end>
+            <sla:alert-events>end_miss</sla:alert-events>
+            <sla:alert-contact>${sla_alert_contact}</sla:alert-contact>
+        </sla:info>
+
+    </action>
+</coordinator-app>
\ No newline at end of file
diff --git a/oozie/mediawiki/history/metric/daily_edits.hql 
b/oozie/mediawiki/history/metrics/daily_edits.hql
similarity index 83%
rename from oozie/mediawiki/history/metric/daily_edits.hql
rename to oozie/mediawiki/history/metrics/daily_edits.hql
index df630f4..45aca3f 100644
--- a/oozie/mediawiki/history/metric/daily_edits.hql
+++ b/oozie/mediawiki/history/metrics/daily_edits.hql
@@ -3,7 +3,9 @@
 --  start_timestamp -- YYYYMMDDHHmmss formatted start (inclusive)
 --  end_timestamp   -- YYYYMMDDHHmmss formatted end (exclusive)
 --  wiki_db         -- Optionally filter by source wiki database.
---                      Pass 'all' to compute for all wikis
+--                       Pass 'all' to compute for all wikis
+--  infra           -- infra partition to use
+--  version         -- version partition to use
 --
 -- Usage:
 --     hive -f daily_edits.hql                                \
@@ -21,7 +23,8 @@
 set hive.exec.max.dynamic.partitions.pernode=2000;
 
 -- dynamic partitions must be specified here
- insert overwrite table ${destination_table} partition (metric, wiki_db)
+ insert overwrite table ${destination_table}
+   partition (infra='${infra}', version='${version}', metric, wiki_db)
 -- dynamic partitions must be selected in order and at the end
  select concat_ws('-',
             substring(event_timestamp, 0, 4),
@@ -38,6 +41,8 @@
     and ('${wiki_db}' = 'all' or wiki_db = '${wiki_db}')
     and event_timestamp >= '${start_timestamp}'
     and event_timestamp <  '${end_timestamp}'
+    and infra = '${infra}'
+    and version = '${version}'
 
   group by wiki_db,
         substring(event_timestamp, 0, 4),
diff --git a/oozie/mediawiki/history/metric/daily_edits_by_anonymous_users.hql 
b/oozie/mediawiki/history/metrics/daily_edits_by_anonymous_users.hql
similarity index 100%
rename from oozie/mediawiki/history/metric/daily_edits_by_anonymous_users.hql
rename to oozie/mediawiki/history/metrics/daily_edits_by_anonymous_users.hql
diff --git a/oozie/mediawiki/history/metric/daily_edits_by_bot_users.hql 
b/oozie/mediawiki/history/metrics/daily_edits_by_bot_users.hql
similarity index 100%
rename from oozie/mediawiki/history/metric/daily_edits_by_bot_users.hql
rename to oozie/mediawiki/history/metrics/daily_edits_by_bot_users.hql
diff --git a/oozie/mediawiki/history/metric/daily_edits_by_registered_users.hql 
b/oozie/mediawiki/history/metrics/daily_edits_by_registered_users.hql
similarity index 100%
rename from oozie/mediawiki/history/metric/daily_edits_by_registered_users.hql
rename to oozie/mediawiki/history/metrics/daily_edits_by_registered_users.hql
diff --git a/oozie/mediawiki/history/metric/daily_unique_anonymous_editors.hql 
b/oozie/mediawiki/history/metrics/daily_unique_anonymous_editors.hql
similarity index 100%
rename from oozie/mediawiki/history/metric/daily_unique_anonymous_editors.hql
rename to oozie/mediawiki/history/metrics/daily_unique_anonymous_editors.hql
diff --git a/oozie/mediawiki/history/metric/daily_unique_bot_editors.hql 
b/oozie/mediawiki/history/metrics/daily_unique_bot_editors.hql
similarity index 100%
rename from oozie/mediawiki/history/metric/daily_unique_bot_editors.hql
rename to oozie/mediawiki/history/metrics/daily_unique_bot_editors.hql
diff --git a/oozie/mediawiki/history/metric/daily_unique_editors.hql 
b/oozie/mediawiki/history/metrics/daily_unique_editors.hql
similarity index 100%
rename from oozie/mediawiki/history/metric/daily_unique_editors.hql
rename to oozie/mediawiki/history/metrics/daily_unique_editors.hql
diff --git a/oozie/mediawiki/history/metric/unique_page_creators.hql 
b/oozie/mediawiki/history/metrics/daily_unique_page_creators.hql
similarity index 100%
rename from oozie/mediawiki/history/metric/unique_page_creators.hql
rename to oozie/mediawiki/history/metrics/daily_unique_page_creators.hql
diff --git a/oozie/mediawiki/history/metric/daily_unique_registered_editors.hql 
b/oozie/mediawiki/history/metrics/daily_unique_registered_editors.hql
similarity index 100%
rename from oozie/mediawiki/history/metric/daily_unique_registered_editors.hql
rename to oozie/mediawiki/history/metrics/daily_unique_registered_editors.hql
diff --git a/oozie/mediawiki/history/metric/monthly_new_editors.hql 
b/oozie/mediawiki/history/metrics/monthly_new_editors.hql
similarity index 100%
rename from oozie/mediawiki/history/metric/monthly_new_editors.hql
rename to oozie/mediawiki/history/metrics/monthly_new_editors.hql
diff --git a/oozie/mediawiki/history/metric/monthly_new_registered_users.hql 
b/oozie/mediawiki/history/metrics/monthly_new_registered_users.hql
similarity index 100%
rename from oozie/mediawiki/history/metric/monthly_new_registered_users.hql
rename to oozie/mediawiki/history/metrics/monthly_new_registered_users.hql
diff --git a/oozie/mediawiki/history/metric/monthly_surviving_new_editors.hql 
b/oozie/mediawiki/history/metrics/monthly_surviving_new_editors.hql
similarity index 100%
rename from oozie/mediawiki/history/metric/monthly_surviving_new_editors.hql
rename to oozie/mediawiki/history/metrics/monthly_surviving_new_editors.hql
diff --git a/oozie/mediawiki/history/metric/productive_new_editors.hql 
b/oozie/mediawiki/history/metrics/productive_new_editors.hql
similarity index 100%
rename from oozie/mediawiki/history/metric/productive_new_editors.hql
rename to oozie/mediawiki/history/metrics/productive_new_editors.hql
diff --git a/oozie/mediawiki/history/metrics/workflow.xml 
b/oozie/mediawiki/history/metrics/workflow.xml
new file mode 100644
index 0000000..acd0730
--- /dev/null
+++ b/oozie/mediawiki/history/metrics/workflow.xml
@@ -0,0 +1,244 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<workflow-app xmlns="uri:oozie:workflow:0.4"
+    
name="mediawiki-history-denormalize-wf-${mw_infra_partition}-${mw_version_partition}">
+
+    <parameters>
+
+        <!-- Default values for inner oozie settings -->
+        <property>
+            <name>oozie_launcher_queue_name</name>
+            <value>${queue_name}</value>
+        </property>
+        <property>
+            <name>oozie_launcher_memory</name>
+            <value>2048</value>
+        </property>
+
+        <!-- Required properties -->
+        <property><name>queue_name</name></property>
+        <property><name>name_node</name></property>
+        <property><name>job_tracker</name></property>
+
+        <property>
+            <name>mw_directory</name>
+            <description>Path to mediawiki processed data on 
Hadoop</description>
+        </property>
+
+        <property>
+            <name>mw_infra_partition</name>
+            <description>The infra data to process (prod or labs)</description>
+        </property>
+        <property>
+            <name>mw_version_partition</name>
+            <description>The version for the job</description>
+        </property>
+
+        <!-- Hive tables-->
+        <property>
+            <name>mw_denormalized_history_table</name>
+            <description>Recomputed denormalized history table</description>
+        </property>
+        <property>
+            <name>mw_metrics_table</name>
+            <description>Mediawiki metrics table</description>
+        </property>
+
+        <!-- Subworkflows -->
+        <property>
+            <name>mark_directory_done_workflow_file</name>
+            <description>Workflow for marking a directory done</description>
+        </property>
+        <property>
+            <name>send_error_email_workflow_file</name>
+            <description>Workflow for sending an email</description>
+        </property>
+    </parameters>
+
+    <start to="compute_daily_edits"/>
+
+    <!-- Add MW tables partitions -->
+
+    <action name="repair_archive_table_partitions">
+        <sub-workflow>
+            <app-path>${repair_partitions_workflow_file}</app-path>
+            <propagate-configuration/>
+            <configuration>
+                
<property><name>table</name><value>${mw_archive_table}</value></property>
+            </configuration>
+        </sub-workflow>
+        <ok to="repair_logging_table_partitions"/>
+        <error to="send_error_email"/>
+    </action>
+
+    <action name="repair_logging_table_partitions">
+        <sub-workflow>
+            <app-path>${repair_partitions_workflow_file}</app-path>
+            <propagate-configuration/>
+            <configuration>
+                
<property><name>table</name><value>${mw_logging_table}</value></property>
+            </configuration>
+        </sub-workflow>
+        <ok to="repair_page_table_partitions"/>
+        <error to="send_error_email"/>
+    </action>
+
+    <action name="repair_page_table_partitions">
+        <sub-workflow>
+            <app-path>${repair_partitions_workflow_file}</app-path>
+            <propagate-configuration/>
+            <configuration>
+                
<property><name>table</name><value>${mw_page_table}</value></property>
+            </configuration>
+        </sub-workflow>
+        <ok to="repair_revision_table_partitions"/>
+        <error to="send_error_email"/>
+    </action>
+
+    <action name="repair_revision_table_partitions">
+        <sub-workflow>
+            <app-path>${repair_partitions_workflow_file}</app-path>
+            <propagate-configuration/>
+            <configuration>
+                
<property><name>table</name><value>${mw_revision_table}</value></property>
+            </configuration>
+        </sub-workflow>
+        <ok to="repair_user_table_partitions"/>
+        <error to="send_error_email"/>
+    </action>
+
+    <action name="repair_user_table_partitions">
+        <sub-workflow>
+            <app-path>${repair_partitions_workflow_file}</app-path>
+            <propagate-configuration/>
+            <configuration>
+                
<property><name>table</name><value>${mw_user_table}</value></property>
+            </configuration>
+        </sub-workflow>
+        <ok to="repair_user_groups_table_partitions"/>
+        <error to="send_error_email"/>
+    </action>
+
+    <action name="repair_user_groups_table_partitions">
+        <sub-workflow>
+            <app-path>${repair_partitions_workflow_file}</app-path>
+            <propagate-configuration/>
+            <configuration>
+                
<property><name>table</name><value>${mw_user_groups_table}</value></property>
+            </configuration>
+        </sub-workflow>
+        <ok to="denormalize_history"/>
+        <error to="send_error_email"/>
+    </action>
+
+
+    <!-- Compute user, page and denormalized history -->
+
+    <action name="denormalize_history">
+        <spark xmlns="uri:oozie:spark-action:0.1">
+
+            <job-tracker>${job_tracker}</job-tracker>
+            <name-node>${name_node}</name-node>
+            <configuration>
+                <!--make sure oozie:launcher runs in a low priority queue -->
+                <property>
+                    <name>oozie.launcher.mapred.job.queue.name</name>
+                    <value>${oozie_launcher_queue_name}</value>
+                </property>
+                <property>
+                    <name>oozie.launcher.mapreduce.map.memory.mb</name>
+                    <value>${oozie_launcher_memory}</value>
+                </property>
+            </configuration>
+            <master>${spark_master}</master>
+            <mode>${spark_deploy}</mode>
+            
<name>${spark_job_name}-${mw_infra_partition}-${mw_version_partition}</name>
+            <class>${spark_job_class}</class>
+            <jar>${spark_job_jar}</jar>
+            <spark-opts>--conf spark.yarn.jar=${spark_assembly_jar} 
--executor-memory ${spark_executor_memory} --driver-memory 
${spark_driver_memory} --queue ${queue_name}</spark-opts>
+            <arg>--mediawiki-base-path</arg>
+            <arg>${mw_raw_directory}</arg>
+            <arg>--output-base-path</arg>
+            <arg>${mw_directory}</arg>
+            <arg>--infra</arg>
+            <arg>${mw_infra_partition}</arg>
+            <arg>--version</arg>
+            <arg>${mw_version_partition}</arg>
+            <arg>--temporary-path</arg>
+            <arg>${tmp_path}</arg>
+            <arg>--num-partitions</arg>
+            <arg>${partitions_number}</arg>
+        </spark>
+        <ok to="repair_user_history_table_partitions" />
+        <error to="send_error_email" />
+    </action>
+
+    <!-- Add MW History tables partitions -->
+
+    <action name="repair_user_history_table_partitions">
+        <sub-workflow>
+            <app-path>${repair_partitions_workflow_file}</app-path>
+            <propagate-configuration/>
+            <configuration>
+                
<property><name>table</name><value>${mw_user_history_table}</value></property>
+            </configuration>
+        </sub-workflow>
+        <ok to="repair_page_history_table_partitions"/>
+        <error to="send_error_email"/>
+    </action>
+
+    <action name="repair_page_history_table_partitions">
+        <sub-workflow>
+            <app-path>${repair_partitions_workflow_file}</app-path>
+            <propagate-configuration/>
+            <configuration>
+                
<property><name>table</name><value>${mw_page_history_table}</value></property>
+            </configuration>
+        </sub-workflow>
+        <ok to="repair_denormalized_history_table_partitions"/>
+        <error to="send_error_email"/>
+    </action>
+
+    <action name="repair_denormalized_history_table_partitions">
+        <sub-workflow>
+            <app-path>${repair_partitions_workflow_file}</app-path>
+            <propagate-configuration/>
+            <configuration>
+                
<property><name>table</name><value>${mw_denormalized_history_table}</value></property>
+            </configuration>
+        </sub-workflow>
+        <ok to="end"/>
+        <error to="send_error_email"/>
+    </action>
+
+    <action name="send_error_email">
+        <sub-workflow>
+            <app-path>${send_error_email_workflow_file}</app-path>
+            <propagate-configuration/>
+            <configuration>
+                <property>
+                    <name>parent_name</name>
+                    <value>${wf:name()}</value>
+                </property>
+                <property>
+                    <name>parent_failed_action</name>
+                    <value>${wf:lastErrorNode()}</value>
+                </property>
+                <property>
+                    <name>parent_error_code</name>
+                    <value>${wf:errorCode(wf:lastErrorNode())}</value>
+                </property>
+                <property>
+                    <name>parent_error_message</name>
+                    <value>${wf:errorMessage(wf:lastErrorNode())}</value>
+                </property>
+            </configuration>
+        </sub-workflow>
+        <ok to="kill"/>
+        <error to="kill"/>
+    </action>
+
+    <kill name="kill">
+        <message>Action failed, error 
message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    <end name="end"/>
+</workflow-app>

-- 
To view, visit https://gerrit.wikimedia.org/r/342197
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic5fddf5cc99b0c428f6685d0ef495ee2fe0c449b
Gerrit-PatchSet: 1
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: Joal <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to