Ottomata has submitted this change and it was merged.

Change subject: Use bits when producing legacy tsvs
......................................................................


Use bits when producing legacy tsvs

Change-Id: I5a64ec87c11466e480058ca1d9b29022fbbeeb9f
---
M bin/refinery-dump-status-webrequest-partitions
M diagrams/oozie-overview.dia
M oozie/webrequest/legacy_tsvs/bundle.properties
M oozie/webrequest/legacy_tsvs/bundle.xml
C oozie/webrequest/legacy_tsvs/coordinator_bits.xml
R oozie/webrequest/legacy_tsvs/coordinator_bits_misc_mobile_text.xml
A oozie/webrequest/legacy_tsvs/generate_5xx-bits_tsv.hql
7 files changed, 66 insertions(+), 48 deletions(-)

Approvals:
  Ottomata: Verified; Looks good to me, approved



diff --git a/bin/refinery-dump-status-webrequest-partitions 
b/bin/refinery-dump-status-webrequest-partitions
index f8fc75e..93f4bd7 100755
--- a/bin/refinery-dump-status-webrequest-partitions
+++ b/bin/refinery-dump-status-webrequest-partitions
@@ -69,7 +69,7 @@
     DATASET_VISIBILITIES["$DATASET"]=no
 }
 
-add_dataset "legacy_tsvs" "daily" "    5xx    | 5xx-misc  |5xx-mobile | 
5xx-text  |5xx-upload |    api    |   edits   | glam_nara |  mobile   |  
sampled  |   zero    |"
+add_dataset "legacy_tsvs" "daily" "    5xx    | 5xx-bits  | 5xx-misc  
|5xx-mobile | 5xx-text  |5xx-upload |    api    |   edits   | glam_nara |  
mobile   |  sampled  |   zero    |"
 add_dataset "pagecounts_all_sites" "hourly" " file name date  |  page   | 
project |"
 add_dataset "pagecounts_raw" "hourly" " file name date  |  page   | project |"
 add_dataset "raw_webrequest" "hourly" "  bits  |  misc  | mobile |  text  | 
upload |"
@@ -336,6 +336,7 @@
     local BASE
     for BASE in \
         5xx/5xx \
+        5xx-bits/5xx-bits \
         5xx-misc/5xx-misc \
         5xx-mobile/5xx-mobile \
         5xx-text/5xx-text \
diff --git a/diagrams/oozie-overview.dia b/diagrams/oozie-overview.dia
index e70d6f4..95926f6 100644
--- a/diagrams/oozie-overview.dia
+++ b/diagrams/oozie-overview.dia
Binary files differ
diff --git a/oozie/webrequest/legacy_tsvs/bundle.properties 
b/oozie/webrequest/legacy_tsvs/bundle.properties
index ed9128d..f58e252 100644
--- a/oozie/webrequest/legacy_tsvs/bundle.properties
+++ b/oozie/webrequest/legacy_tsvs/bundle.properties
@@ -8,75 +8,76 @@
 #        .xml files exist there when this job is submitted.
 
 
-name_node                           = hdfs://analytics-hadoop
-job_tracker                         = 
resourcemanager.analytics.eqiad.wmnet:8032
-queue_name                          = default
+name_node                              = hdfs://analytics-hadoop
+job_tracker                            = 
resourcemanager.analytics.eqiad.wmnet:8032
+queue_name                             = default
 
 # Base path in HDFS to refinery.
 # When submitting this job for production, you should
 # override this to point directly at a deployed
 # directory name, and not the 'symbolic' 'current' directory.
 # E.g.  /wmf/refinery/2015-01-05T17.59.18Z--7bb7f07
-refinery_directory                  = ${name_node}/wmf/refinery/current
+refinery_directory                     = ${name_node}/wmf/refinery/current
 
 # HDFS path to artifacts that will be used by this job.
 # E.g. refinery-hive.jar should exist here.
-artifacts_directory                 = ${refinery_directory}/artifacts
+artifacts_directory                    = ${refinery_directory}/artifacts
 
 # Base path in HDFS to oozie files.
 # Other files will be used relative to this path.
-oozie_directory                     = ${refinery_directory}/oozie
+oozie_directory                        = ${refinery_directory}/oozie
 
 # HDFS paths to the coordinators to run.
 # All of them are essentially the same coordinator and differ only in the
 # webrequest_sources they depend on. This allows to for example turn off upload
 # and have the coordinators that depend on upload block, while the coordinators
 # that do not depend on upload continue to run.
-coordinator_misc_file               = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_misc.xml
-coordinator_misc_mobile_text_file   = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_misc_mobile_text.xml
-coordinator_mobile_file             = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_mobile.xml
-coordinator_mobile_text_file        = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_mobile_text.xml
-coordinator_mobile_text_upload_file = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_mobile_text_upload.xml
-coordinator_text_file               = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_text.xml
-coordinator_upload_file             = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_upload.xml
+coordinator_bits_file                  = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_bits.xml
+coordinator_bits_misc_mobile_text_file = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_bits_misc_mobile_text.xml
+coordinator_misc_file                  = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_misc.xml
+coordinator_mobile_file                = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_mobile.xml
+coordinator_mobile_text_file           = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_mobile_text.xml
+coordinator_mobile_text_upload_file    = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_mobile_text_upload.xml
+coordinator_text_file                  = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_text.xml
+coordinator_upload_file                = 
${oozie_directory}/webrequest/legacy_tsvs/coordinator_upload.xml
 
 # HDFS path to workflow to run.
-workflow_file                       = 
${oozie_directory}/webrequest/legacy_tsvs/workflow.xml
+workflow_file                          = 
${oozie_directory}/webrequest/legacy_tsvs/workflow.xml
 
 # HDFS path to webrequest dataset definition
-webrequest_datasets_file            = 
${oozie_directory}/webrequest/datasets.xml
+webrequest_datasets_file               = 
${oozie_directory}/webrequest/datasets.xml
 
 # Time to start running this coordinator.
 # Make sure to have hours and minutes at 0!
-start_time                          = 2014-04-01T00:00Z
+start_time                             = 2014-04-01T00:00Z
 
 # Time to stop running this coordinator.  Year 3000 == never!
-stop_time                           = 3000-01-01T00:00Z
+stop_time                              = 3000-01-01T00:00Z
 
 # HDFS path to workflow to mark a directory as done
-mark_directory_done_workflow_file   = 
${oozie_directory}/util/mark_directory_done/workflow.xml
+mark_directory_done_workflow_file      = 
${oozie_directory}/util/mark_directory_done/workflow.xml
 
-archive_job_output_workflow_file    = 
${oozie_directory}/util/archive_job_output/workflow.xml
+archive_job_output_workflow_file       = 
${oozie_directory}/util/archive_job_output/workflow.xml
 
 # HDFS path to hive-site.xml file.  This is needed to run hive actions.
-hive_site_xml                       = 
${oozie_directory}/util/hive/hive-site.xml
+hive_site_xml                          = 
${oozie_directory}/util/hive/hive-site.xml
 
 # Table to write hourly pagecounts to (fully qualified)
-webrequest_table                    = wmf.webrequest
+webrequest_table                       = wmf.webrequest
 
 # HDFS path to directory where webrequst data is time bucketed.
-webrequest_data_directory           = ${name_node}/wmf/data/wmf/webrequest
+webrequest_data_directory              = ${name_node}/wmf/data/wmf/webrequest
 
 # Temporary directory
-temporary_directory                 = ${name_node}/tmp
+temporary_directory                    = ${name_node}/tmp
 
 # Archive base directory
-archive_directory                   = ${name_node}/wmf/data/archive
+archive_directory                      = ${name_node}/wmf/data/archive
 
 # Archive directory for webrequest data
-webrequest_archive_directory        = ${archive_directory}/webrequest
+webrequest_archive_directory           = ${archive_directory}/webrequest
 
 # Coordintator to start.
-oozie.bundle.application.path       = 
${oozie_directory}/webrequest/legacy_tsvs/bundle.xml
-oozie.use.system.libpath            = true
-oozie.action.external.stats.write   = true
+oozie.bundle.application.path          = 
${oozie_directory}/webrequest/legacy_tsvs/bundle.xml
+oozie.use.system.libpath               = true
+oozie.action.external.stats.write      = true
diff --git a/oozie/webrequest/legacy_tsvs/bundle.xml 
b/oozie/webrequest/legacy_tsvs/bundle.xml
index 7d65232..f4b442e 100644
--- a/oozie/webrequest/legacy_tsvs/bundle.xml
+++ b/oozie/webrequest/legacy_tsvs/bundle.xml
@@ -9,8 +9,9 @@
         </property>
 
         <!-- Required properties. -->
+        <property><name>coordinator_bits_file</name></property>
+        
<property><name>coordinator_bits_misc_mobile_text_file</name></property>
         <property><name>coordinator_misc_file</name></property>
-        <property><name>coordinator_misc_mobile_text_file</name></property>
         <property><name>coordinator_mobile_file</name></property>
         <property><name>coordinator_mobile_text_file</name></property>
         <property><name>coordinator_mobile_text_upload_file</name></property>
@@ -145,8 +146,11 @@
     </coordinator>
 
     <coordinator name="webrequest_legacy_tsvs-5xx">
-        <!-- TODO: Add 'bits', once it's turned on again -->
-        <app-path>${coordinator_misc_mobile_text_file}</app-path>
+        <!--
+        No 'upload', as that explicitly got excluded in the upd2log
+        filters.
+        -->
+        <app-path>${coordinator_bits_misc_mobile_text_file}</app-path>
         <configuration>
             <property>
                 <name>aspect_name</name>
@@ -163,7 +167,23 @@
         </configuration>
     </coordinator>
 
-    <!-- TODO: Add 'bits' per-source 5xx variant, once it's turned on again -->
+    <coordinator name="webrequest_legacy_tsvs-5xx-bits">
+        <app-path>${coordinator_bits_file}</app-path>
+        <configuration>
+            <property>
+                <name>aspect_name</name>
+                <value>5xx-bits</value>
+            </property>
+            <property>
+                <name>aspect_tsv_archive_directory</name>
+                <value>${webrequest_archive_directory}/5xx-bits</value>
+            </property>
+            <property>
+                <name>hour_offset</name>
+                <value>9</value>
+            </property>
+        </configuration>
+    </coordinator>
 
     <coordinator name="webrequest_legacy_tsvs-5xx-misc">
         <app-path>${coordinator_misc_file}</app-path>
@@ -178,7 +198,7 @@
             </property>
             <property>
                 <name>hour_offset</name>
-                <value>9</value>
+                <value>10</value>
             </property>
         </configuration>
     </coordinator>
@@ -196,7 +216,7 @@
             </property>
             <property>
                 <name>hour_offset</name>
-                <value>10</value>
+                <value>11</value>
             </property>
         </configuration>
     </coordinator>
@@ -214,7 +234,7 @@
             </property>
             <property>
                 <name>hour_offset</name>
-                <value>11</value>
+                <value>12</value>
             </property>
         </configuration>
     </coordinator>
@@ -232,7 +252,7 @@
             </property>
             <property>
                 <name>hour_offset</name>
-                <value>12</value>
+                <value>13</value>
             </property>
         </configuration>
     </coordinator>
diff --git a/oozie/webrequest/legacy_tsvs/coordinator_misc_mobile_text.xml 
b/oozie/webrequest/legacy_tsvs/coordinator_bits.xml
similarity index 91%
copy from oozie/webrequest/legacy_tsvs/coordinator_misc_mobile_text.xml
copy to oozie/webrequest/legacy_tsvs/coordinator_bits.xml
index 9f17521..abaaf2f 100644
--- a/oozie/webrequest/legacy_tsvs/coordinator_misc_mobile_text.xml
+++ b/oozie/webrequest/legacy_tsvs/coordinator_bits.xml
@@ -60,17 +60,7 @@
     </datasets>
 
     <input-events>
-        <data-in name="webrequest_misc" dataset="webrequest_misc">
-            <start-instance>${coord:current(0)}</start-instance>
-            <end-instance>${coord:current(23)}</end-instance>
-        </data-in>
-
-        <data-in name="webrequest_mobile" dataset="webrequest_mobile">
-            <start-instance>${coord:current(0)}</start-instance>
-            <end-instance>${coord:current(23)}</end-instance>
-        </data-in>
-
-        <data-in name="webrequest_text" dataset="webrequest_text">
+        <data-in name="webrequest_bits" dataset="webrequest_bits">
             <start-instance>${coord:current(0)}</start-instance>
             <end-instance>${coord:current(23)}</end-instance>
         </data-in>
diff --git a/oozie/webrequest/legacy_tsvs/coordinator_misc_mobile_text.xml 
b/oozie/webrequest/legacy_tsvs/coordinator_bits_misc_mobile_text.xml
similarity index 96%
rename from oozie/webrequest/legacy_tsvs/coordinator_misc_mobile_text.xml
rename to oozie/webrequest/legacy_tsvs/coordinator_bits_misc_mobile_text.xml
index 9f17521..a013528 100644
--- a/oozie/webrequest/legacy_tsvs/coordinator_misc_mobile_text.xml
+++ b/oozie/webrequest/legacy_tsvs/coordinator_bits_misc_mobile_text.xml
@@ -60,6 +60,11 @@
     </datasets>
 
     <input-events>
+        <data-in name="webrequest_bits" dataset="webrequest_bits">
+            <start-instance>${coord:current(0)}</start-instance>
+            <end-instance>${coord:current(23)}</end-instance>
+        </data-in>
+
         <data-in name="webrequest_misc" dataset="webrequest_misc">
             <start-instance>${coord:current(0)}</start-instance>
             <end-instance>${coord:current(23)}</end-instance>
diff --git a/oozie/webrequest/legacy_tsvs/generate_5xx-bits_tsv.hql 
b/oozie/webrequest/legacy_tsvs/generate_5xx-bits_tsv.hql
new file mode 120000
index 0000000..a8f4179
--- /dev/null
+++ b/oozie/webrequest/legacy_tsvs/generate_5xx-bits_tsv.hql
@@ -0,0 +1 @@
+generate_5xx_tsv.hql
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/186970
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I5a64ec87c11466e480058ca1d9b29022fbbeeb9f
Gerrit-PatchSet: 3
Gerrit-Project: analytics/refinery
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>
Gerrit-Reviewer: Ottomata <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to