Mforns has submitted this change and it was merged.
Change subject: Fix calculation of time-to-read
......................................................................
Fix calculation of time-to-read
Fix calculation of distribution_of_response_time and rename the
fixed file to make a clean break from past incorrect calculations.
In addition, stop calculating distribution_of_unread_notifications
because it was also incorrect and is trivial to calculate from
distribution_of_response_time.
Bug: T131206
Change-Id: I679870bb85705dff9aea8a32f728e02164fd1a5a
---
M ee/config.yaml
A ee/days_to_read.sql
D ee/distribution_of_response_time.sql
D ee/distribution_of_unread_notifications.sql
4 files changed, 42 insertions(+), 60 deletions(-)
Approvals:
Mforns: Verified; Looks good to me, approved
Catrope: Looks good to me, approved
diff --git a/ee/config.yaml b/ee/config.yaml
index 4c0cac1..d06208b 100644
--- a/ee/config.yaml
+++ b/ee/config.yaml
@@ -24,21 +24,12 @@
explode_by:
wiki_db: enwiki, dewiki, eswiki, frwiki, hewiki, huwiki, itwiki,
nlwiki, plwiki, ptwiki, svwiki
- distribution_of_unread_notifications:
+ days_to_read:
frequency: months
granularity: months
lag: 2678400 # 31 days in seconds
timeboxed: true
- starts: 2013-09-01
- explode_by:
- wiki_db: enwiki, dewiki, eswiki, frwiki, hewiki, huwiki, itwiki,
nlwiki, plwiki, ptwiki, svwiki
-
- distribution_of_response_time:
- frequency: months
- granularity: months
- lag: 2678400 # 31 days in seconds
- timeboxed: true
- starts: 2013-09-01
+ starts: 2015-09-01
explode_by:
wiki_db: enwiki, dewiki, eswiki, frwiki, hewiki, huwiki, itwiki,
nlwiki, plwiki, ptwiki, svwiki
diff --git a/ee/days_to_read.sql b/ee/days_to_read.sql
new file mode 100644
index 0000000..46b69d2
--- /dev/null
+++ b/ee/days_to_read.sql
@@ -0,0 +1,40 @@
+SELECT
+ DATE('{from_timestamp}') AS date,
+ SUM(notification_read_timestamp IS NOT NULL AND days_to_read <= 2 ) AS
0_to_2,
+ SUM(notification_read_timestamp IS NOT NULL AND days_to_read BETWEEN 3
and 5) AS 3_to_5,
+ SUM(notification_read_timestamp IS NOT NULL AND days_to_read BETWEEN 6
AND 10) AS 6_to_10,
+ SUM(notification_read_timestamp IS NOT NULL AND days_to_read BETWEEN 11
AND 20) AS 11_to_20,
+ SUM(notification_read_timestamp IS NOT NULL AND days_to_read BETWEEN 21
AND 30) AS 21_to_30,
+ SUM(days_to_read >= 31) AS 31_plus
+FROM
+(
+ SELECT
+ notification.notification_event,
+ notification.notification_timestamp,
+ IFNULL(bundle.notification_read_timestamp,
notification.notification_read_timestamp) AS notification_read_timestamp,
+ # days (rounded down) between notification time & read time (or
now, if it hasn't been read yet)
+ TIMESTAMPDIFF(
+ DAY,
+ STR_TO_DATE(
+ notification.notification_timestamp,
+ '%Y%m%d%H%i%S'
+ ),
+ STR_TO_DATE(COALESCE(
+ bundle.notification_read_timestamp,
+ notification.notification_read_timestamp,
+ DATE_FORMAT(NOW(), '%Y%m%d%H%i%s')
+ ), '%Y%m%d%H%i%S'
+ )
+ ) AS days_to_read
+ FROM {wiki_db}.echo_notification AS notification
+ LEFT JOIN
+ (
+ SELECT
+ notification_read_timestamp,
+ notification_bundle_display_hash
+ FROM {wiki_db}.echo_notification
+ WHERE notification_bundle_base = 1
+ GROUP BY notification_bundle_display_hash
+ ) bundle ON notification.notification_bundle_display_hash =
bundle.notification_bundle_display_hash AND
notification.notification_bundle_display_hash != ''
+ WHERE notification.notification_timestamp BETWEEN '{from_timestamp}'
AND '{to_timestamp}'
+) AS temp;
diff --git a/ee/distribution_of_response_time.sql
b/ee/distribution_of_response_time.sql
deleted file mode 100644
index 44666a5..0000000
--- a/ee/distribution_of_response_time.sql
+++ /dev/null
@@ -1,27 +0,0 @@
-SELECT
- DATE('{from_timestamp}') AS date,
- SUM(notification_read_timestamp IS NOT NULL AND unread_diff < 172801)
AS 0_to_2, # <= 2 days
- SUM(notification_read_timestamp IS NOT NULL AND unread_diff BETWEEN
172800 AND 432001) AS 2_to_5, # 2 < x <= 5 days
- SUM(notification_read_timestamp IS NOT NULL AND unread_diff BETWEEN
432000 AND 864001) AS 5_to_10, # 5 < x <= 10 days
- SUM(notification_read_timestamp IS NOT NULL AND unread_diff BETWEEN
864000 AND 1728001) AS 10_to_20, # 10 < x <= 20 days
- SUM(notification_read_timestamp IS NOT NULL AND unread_diff BETWEEN
1728000 AND 2592001) AS 20_to_30, # 20 < x <= 30 days
- SUM(unread_diff > 2592000) AS 30_plus # > 30 days
-FROM
-(
- SELECT
- notification.notification_event,
- notification.notification_timestamp,
- IFNULL(bundle.notification_read_timestamp,
notification.notification_read_timestamp) AS notification_read_timestamp,
- COALESCE(bundle.notification_read_timestamp,
notification.notification_read_timestamp, DATE_FORMAT(NOW(), '%Y%m%d%H%i%s')) -
notification.notification_timestamp AS unread_diff
- FROM {wiki_db}.echo_notification AS notification
- LEFT JOIN
- (
- SELECT
- notification_read_timestamp,
- notification_bundle_display_hash
- FROM {wiki_db}.echo_notification
- WHERE notification_bundle_base = 1
- GROUP BY notification_bundle_display_hash
- ) bundle ON notification.notification_bundle_display_hash =
bundle.notification_bundle_display_hash AND
notification.notification_bundle_display_hash != ''
- WHERE notification.notification_timestamp BETWEEN '{from_timestamp}'
AND '{to_timestamp}'
-) AS temp;
diff --git a/ee/distribution_of_unread_notifications.sql
b/ee/distribution_of_unread_notifications.sql
deleted file mode 100644
index 9ba5c1e..0000000
--- a/ee/distribution_of_unread_notifications.sql
+++ /dev/null
@@ -1,22 +0,0 @@
-SELECT
- DATE('{from_timestamp}') AS date,
- SUM(unread_diff > 2592000) / COUNT(*) * 100 AS percentage
-FROM
-(
- SELECT
- notification.notification_event,
- notification.notification_timestamp,
- # diff in seconds between notification time & read time (or
now, if it hasn't been read yet)
- COALESCE(bundle.notification_read_timestamp,
notification.notification_read_timestamp, DATE_FORMAT(NOW(), '%Y%m%d%H%i%s')) -
notification.notification_timestamp AS unread_diff
- FROM {wiki_db}.echo_notification AS notification
- LEFT JOIN
- (
- SELECT
- notification_read_timestamp,
- notification_bundle_display_hash
- FROM {wiki_db}.echo_notification
- WHERE notification_bundle_base = 1
- GROUP BY notification_bundle_display_hash
- ) bundle ON notification.notification_bundle_display_hash =
bundle.notification_bundle_display_hash AND
notification.notification_bundle_display_hash != ''
- WHERE notification.notification_timestamp BETWEEN '{from_timestamp}'
AND '{to_timestamp}'
-) AS temp;
--
To view, visit https://gerrit.wikimedia.org/r/280371
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I679870bb85705dff9aea8a32f728e02164fd1a5a
Gerrit-PatchSet: 2
Gerrit-Project: analytics/limn-ee-data
Gerrit-Branch: master
Gerrit-Owner: Neil P. Quinn-WMF <[email protected]>
Gerrit-Reviewer: Catrope <[email protected]>
Gerrit-Reviewer: Matthias Mullie <[email protected]>
Gerrit-Reviewer: Mforns <[email protected]>
Gerrit-Reviewer: Neil P. Quinn-WMF <[email protected]>
Gerrit-Reviewer: Springle <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits