QChris has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/154800

Change subject: Schedule all runs for recurrent reports through scheduler
......................................................................

Schedule all runs for recurrent reports through scheduler

To allow coalescing reports, the first run of a recurrent report was
scheduled immediately outside of the scheduler.

Besides buying us two different ways recurrent reports got injected,
it made us jump through hoops to test that the first run was executed,
and it injected stale, unrequested data into the report.

By making the parent report coalescable without recurrent reports, we
get rid of all the above three issues.

Now all recurrent runs (also the first one) get scheduled in the same
way. This unified scheduling considerably simplifies follow-up commits
that will postpone recurrent runs if the replication lag is too high.

Change-Id: I5d671e85ddd96fa9239a16f86e84a92fde00ffed
---
M tests/test_models/test_run_report.py
A wikimetrics/models/report_nodes/null_report.py
M wikimetrics/models/report_nodes/run_report.py
3 files changed, 18 insertions(+), 52 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/analytics/wikimetrics 
refs/changes/00/154800/1

diff --git a/tests/test_models/test_run_report.py 
b/tests/test_models/test_run_report.py
index 8de9c7b..8263295 100644
--- a/tests/test_models/test_run_report.py
+++ b/tests/test_models/test_run_report.py
@@ -387,39 +387,6 @@
             results[result_key]['FAILURE'].find('Edits was incorrectly 
configured') >= 0,
         )
 
-    def test_first_run_of_recurrent_report_happens(self):
-        '''
-        Scheduling a recurrent report should run the 1st run of the report 
right away
-        and not wait for the scheduled run
-        '''
-
-        parameters = {
-            'name': 'Edits - test',
-            'cohort': {
-                'id': self.cohort.id,
-                'name': self.cohort.name,
-            },
-            'metric': {
-                'name': 'NamespaceEdits',
-                'namespaces': [0, 1, 2],
-                'start_date': '2013-01-01 00:00:00',
-                'end_date': '2013-01-03 00:00:00',
-                'individualResults': True,
-                'aggregateResults': False,
-                'aggregateSum': False,
-                'aggregateAverage': False,
-                'aggregateStandardDeviation': False,
-            },
-            'recurrent': True,
-            'public': True
-        }
-
-        # with patch.object(RunReport, '_run_child_report', mock_run):
-        rr = RunReport(parameters, user_id=self.owner_user_id)
-        rr._run_child_report = MagicMock()
-        rr.task.delay(rr)
-        rr._run_child_report.assert_called_once_with()
-
 
 class RunReportBasicTest(DatabaseTest):
     def setUp(self):
diff --git a/wikimetrics/models/report_nodes/null_report.py 
b/wikimetrics/models/report_nodes/null_report.py
new file mode 100644
index 0000000..648c9a2
--- /dev/null
+++ b/wikimetrics/models/report_nodes/null_report.py
@@ -0,0 +1,9 @@
+from report import ReportLeaf
+
+
+class NullReport(ReportLeaf):
+    """
+    Report that returns an empty result.
+    """
+    def run(self):
+        return {}
diff --git a/wikimetrics/models/report_nodes/run_report.py 
b/wikimetrics/models/report_nodes/run_report.py
index 8ec2251..36fa126 100644
--- a/wikimetrics/models/report_nodes/run_report.py
+++ b/wikimetrics/models/report_nodes/run_report.py
@@ -14,6 +14,7 @@
 )
 from report import ReportNode
 from aggregate_report import AggregateReport
+from null_report import NullReport
 from validate_report import ValidateReport
 from metric_report import MetricReport
 from wikimetrics.api import write_report_task, CohortService
@@ -92,11 +93,11 @@
         if validate_report.valid():
             if recurrent and recurrent_parent_id is None:
                 # Valid parent recurrent report
-                # We do not add child nodes that compute data, as they'll get
-                # generated through the scheduler.
-                # (An initial run is kicked off in post_process through
-                # run_child_report)
-                self.children = []
+                # We do not add child nodes that compute real data, as they'll
+                # get generated through the scheduler. However, we employ a
+                # NullReport to allow coalescing even when there was no
+                # recurrent run yet.
+                self.children = [NullReport()]
             else:
                 # Valid, but not a parent recurring report, so we add the child
                 # node that does the real computation
@@ -137,21 +138,10 @@
         data = db_report.get_json_result(results)
 
         # code below schedules an async task on celery to write the file
+        report_id_to_write = self.persistent_id
         if self.recurrent_parent_id is not None:
-            write_report_task.delay(self.recurrent_parent_id, self.created, 
data)
-        else:
-            # report is public and does not have a recurrent_parent_id, it's
-            # the parent report, call the first run of the report
-            self._run_child_report()
-
-    def _run_child_report(self):
-        '''
-        Returns the scheduler we use for recurrent reports, right
-        now the secduler for recurrent reports is a task on another python 
module
-        what makes testing difficult.
-        Ading this level of indirection makes testing easier
-        '''
-        recurring_reports(self.persistent_id)
+            report_id_to_write = self.recurrent_parent_id
+        write_report_task.delay(report_id_to_write, self.created, data)
 
     def __repr__(self):
         return '<RunReport("{0}")>'.format(self.persistent_id)

-- 
To view, visit https://gerrit.wikimedia.org/r/154800
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I5d671e85ddd96fa9239a16f86e84a92fde00ffed
Gerrit-PatchSet: 1
Gerrit-Project: analytics/wikimetrics
Gerrit-Branch: master
Gerrit-Owner: QChris <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to