Bearloga has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/375091 )

Change subject: SRP visit times label fixes
......................................................................

SRP visit times label fixes

Also added data checks & fixed a bug introduced with a new version
of tidyr (at least I think that's how the issue started)

Change-Id: Ia3f4e6b030858b382c0a7c336d6759d022ebf14e
---
M modules/page_visit_times.R
M server.R
M tab_documentation/survival.md
M ui.R
M utils.R
5 files changed, 56 insertions(+), 38 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/rainbow 
refs/changes/91/375091/1

diff --git a/modules/page_visit_times.R b/modules/page_visit_times.R
index 1321dd6..df1fbe9 100644
--- a/modules/page_visit_times.R
+++ b/modules/page_visit_times.R
@@ -22,7 +22,7 @@
     tidyr::spread(label, time) %>%
     polloi::reorder_columns() %>%
     polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_srp_ld_plot), 
rename = FALSE) %>%
-    polloi::make_dygraph(xlab = "", ylab = "Time (s)", title = "Time at N% 
users leave the search results page") %>%
+    polloi::make_dygraph(xlab = "", ylab = "Time (s)", title = "Time at which 
N% users leave the search results page") %>%
     dyAxis("x", ticker = "Dygraph.dateTicker", axisLabelFormatter = 
polloi::custom_axis_formatter,
            axisLabelWidth = 100, pixelsPerLabel = 80) %>%
     dyRoller(rollPeriod = input$rolling_srp_ld_plot) %>%
diff --git a/server.R b/server.R
index 752f5ba..b91bcf9 100644
--- a/server.R
+++ b/server.R
@@ -80,18 +80,28 @@
       polloi::check_past_week(mobile_load_data, "Mobile Web load times"),
       polloi::check_yesterday(android_dygraph_set, "Android events"),
       polloi::check_past_week(android_load_data, "Android load times"),
+      polloi::check_yesterday(position_prop, "clicked result positions"),
+      polloi::check_past_week(position_prop, "clicked result positions"),
+      polloi::check_yesterday(source_prop, "source of search on Android"),
+      polloi::check_past_week(source_prop, "source of search on Android"),
       polloi::check_yesterday(ios_dygraph_set, "iOS events"),
       polloi::check_past_week(ios_load_data, "iOS load times"),
-      polloi::check_yesterday(dplyr::bind_rows(split_dataset), "API usage 
data"),
-      polloi::check_past_week(dplyr::bind_rows(split_dataset), "API usage 
data"),
+      polloi::check_yesterday(dplyr::bind_rows(split_dataset, .id = "api"), 
"API usage data"),
+      polloi::check_past_week(dplyr::bind_rows(split_dataset, .id = "api"), 
"API usage data"),
       polloi::check_yesterday(failure_data_with_automata, "zero results data"),
       polloi::check_past_week(failure_data_with_automata, "zero results data"),
       polloi::check_yesterday(suggestion_with_automata, "suggestions data"),
       polloi::check_past_week(suggestion_with_automata, "suggestions data"),
       polloi::check_yesterday(augmented_clickthroughs, "engagement % data"),
       polloi::check_past_week(augmented_clickthroughs, "engagement % data"),
-      polloi::check_yesterday(user_page_visit_dataset, "survival times"),
-      polloi::check_past_week(user_page_visit_dataset, "survival times"))
+      polloi::check_yesterday(paulscore_fulltext, "full-text PaulScores"),
+      polloi::check_past_week(paulscore_fulltext, "full-text PaulScores"),
+      polloi::check_yesterday(sister_search_traffic, "sister search traffic"),
+      polloi::check_past_week(sister_search_traffic, "sister search traffic"),
+      polloi::check_yesterday(user_page_visit_dataset, "page survival times"),
+      polloi::check_past_week(user_page_visit_dataset, "page survival times"),
+      polloi::check_yesterday(serp_page_visit_dataset, "serp survival times"),
+      polloi::check_past_week(serp_page_visit_dataset, "serp survival times"))
     notifications <- notifications[!vapply(notifications, is.null, FALSE)]
     return(dropdownMenu(type = "notifications", .list = notifications))
   })
diff --git a/tab_documentation/survival.md b/tab_documentation/survival.md
index e066ad5..ae7ab59 100644
--- a/tab_documentation/survival.md
+++ b/tab_documentation/survival.md
@@ -1,15 +1,15 @@
-Automated survival analysis: page visit times
+How long searchers stay on the visited search results
 =======
 
 When someone is randomly selected for search satisfaction tracking (using our 
[TSS2 schema](https://meta.wikimedia.org/wiki/Schema:TestSearchSatisfaction2)), 
we use a check-in system and survival analysis to estimate how long users stay 
on visited pages. To summarize the results on a daily basis, we record a set of 
statistics based on a measure formally known as "[median lethal 
dose](https://en.wikipedia.org/wiki/Median_lethal_dose)".
 
-This graph shows the length of time that must pass before N% of the users 
leave the page they visited. When the number goes up, we can infer that users 
are staying on the pages longer. In general, it appears it takes 15s to lose 
10%, 25-35s to lose 25%, and 55-75s to lose 50%.
+This graph shows the length of time that must pass before N% of the users 
leave the page (e.g. article) they visited. When the number goes up, we can 
infer that users are staying on the pages longer. In general, it appears it 
takes 15s to lose 10%, 25-35s to lose 25%, and 55-75s to lose 50%.
 
 On most days, we retain at least 20% of the test population past the 7 minute 
mark (the point at which the user's browser stops checking in), so on those 
days we cannot calculate the time it takes to lose 90/95/99% of the users.
 
 There are some days when we CAN calculate those times, and it can take 
anywhere between 270s (4m30s) and 390s (6m30s) for 90% of the users to have 
closed the page they clicked through from the search results page.
 
-Outages and inaccuracies
+Annotations
 ------
 * '__R__': on 2017-01-01 we started calculating all of Discovery's metrics 
using a new version of [our data retrieval and processing 
codebase](https://phabricator.wikimedia.org/diffusion/WDGO/) that we migrated 
to [Wikimedia Analytics](https://www.mediawiki.org/wiki/Analytics)' 
[Reportupdater 
infrastructure](https://wikitech.wikimedia.org/wiki/Analytics/Reportupdater). 
See [T150915](https://phabricator.wikimedia.org/T150915) for more details.
 * '__S__': on 2017-04-25 we changed the rates at which users are put into 
event logging (see [T163273](https://phabricator.wikimedia.org/T163273)). 
Specifically, we decreased the rate on English Wikipedia ("EnWiki") and 
increased it everywhere else.
diff --git a/ui.R b/ui.R
index bbed6f2..73c8980 100644
--- a/ui.R
+++ b/ui.R
@@ -69,8 +69,8 @@
                   menuItem(text = "Sister Search",
                            menuSubItem(text = "Traffic", tabName = 
"sister_search_traffic")),
                   menuItem(text = "Page Visit Times",
-                           menuSubItem(text = "Visited search results", 
tabName = "survival"),
-                           menuSubItem(text = "Search result pages", tabName = 
"spr_surv")),
+                           menuSubItem(text = "Search result pages", tabName = 
"spr_surv"),
+                           menuSubItem(text = "Visited search results", 
tabName = "survival")),
                   menuItem(text = "Language/Project Breakdown", tabName = 
"langproj_breakdown"),
                   menuItem(text = "Global Settings",
                            selectInput(inputId = "smoothing_global", label = 
"Smoothing", selectize = TRUE, selected = "day",
@@ -319,8 +319,8 @@
                     width = 3
                   ),
                   column(
-                    numericInput("rolling_lethal_dose_plot", "Roll Period", 
14, min = 1, max = 30),
-                    helpText("Each point is an average of this many days."),
+                    numericInput("rolling_lethal_dose_plot", "Rolling 
Average*", 14, min = 1, max = 30),
+                    helpText("* Each point will become an average of this many 
days."),
                     width = 3
                   ),
                   column(
@@ -348,10 +348,10 @@
                 fluidRow(
                   column(
                     fluidRow(
-                      column(polloi::smooth_select("smoothing_srp_ld_plot"), 
width = 8),
-                      column(numericInput("rolling_srp_ld_plot", "Roll 
Period", 1, min = 1, max = 30), width = 4)
+                      column(polloi::smooth_select("smoothing_srp_ld_plot"), 
width = 7),
+                      column(numericInput("rolling_srp_ld_plot", "Rolling 
Average*", 1, min = 1, max = 30), width = 5)
                     ),
-                    helpText("Each point is an average of this many days."),
+                    helpText("* Each point will become an average of this many 
days."),
                     width = 3
                   ),
                   column(
diff --git a/utils.R b/utils.R
index 27c4b0e..91f2376 100644
--- a/utils.R
+++ b/utils.R
@@ -346,28 +346,36 @@
   projects <- sub(" \\([0-9]{1,2}\\.[0-9]{1,3}%\\)", "", projects)
   if (length(languages) == 1 && languages[1] == "(None)") {
     temp <- data %>%
-      dplyr::filter_(~project %in% projects) %>%
+      dplyr::filter_(~ project %in% projects) %>%
       dplyr::rename(wiki = project) %>%
       dplyr::group_by(date, wiki)
-    if (input_metric %in% c("User engagement", "Threshold-passing %", 
"Clickthrough rate")){
-      temp %<>% dplyr::summarize(
-        `Threshold-passing %` = round(sum(`Threshold-passing %` * 
search_sessions_threshold, na.rm = TRUE) / sum(search_sessions_threshold, na.rm 
= TRUE), 2),
-        `Clickthrough rate` = round(sum(`Clickthrough rate` * `Result pages 
opened`, na.rm = TRUE) / sum(`Result pages opened`, na.rm = TRUE), 2),
-        `User engagement` = round((`Threshold-passing %` + `Clickthrough 
rate`) / 2, 2))
-    } else if (input_metric %in% c("clickthroughs", "Result pages opened", 
"search sessions")){
-      temp %<>% dplyr::summarize(
-        clickthroughs = round(sum(as.numeric(clickthroughs), na.rm = TRUE), 2),
-        `Result pages opened` = round(sum(as.numeric(`Result pages opened`), 
na.rm = TRUE), 2),
-        `search sessions` = round(sum(as.numeric(`search sessions`), na.rm = 
TRUE), 2))
-    } else if (input_metric %in% c("F = 0.1", "F = 0.5", "F = 0.9")){
-      temp %<>% dplyr::summarize(
-        `F = 0.1` = round(sum(`F = 0.1` * `search sessions`, na.rm = TRUE) / 
sum(`search sessions`, na.rm = TRUE), 2),
-        `F = 0.5` = round(sum(`F = 0.5` * `search sessions`, na.rm = TRUE) / 
sum(`search sessions`, na.rm = TRUE), 2),
-        `F = 0.9` = round(sum(`F = 0.9` * `search sessions`, na.rm = TRUE) / 
sum(`search sessions`, na.rm = TRUE), 2))
-    } else{
-      temp %<>% dplyr::summarize(
-        zero_results = sum(as.numeric(zero_results), na.rm = TRUE),
-        total = sum(as.numeric(total), na.rm = TRUE))
+    if (input_metric %in% c("User engagement", "Threshold-passing %", 
"Clickthrough rate")) {
+      temp %<>%
+        dplyr::summarize(
+          `Threshold-passing %` = round(sum(`Threshold-passing %` * 
search_sessions_threshold, na.rm = TRUE) / sum(search_sessions_threshold, na.rm 
= TRUE), 2),
+          `Clickthrough rate` = round(sum(`Clickthrough rate` * `Result pages 
opened`, na.rm = TRUE) / sum(`Result pages opened`, na.rm = TRUE), 2),
+          `User engagement` = round((`Threshold-passing %` + `Clickthrough 
rate`) / 2, 2)
+        )
+    } else if (input_metric %in% c("clickthroughs", "Result pages opened", 
"search sessions")) {
+      temp %<>%
+        dplyr::summarize(
+          clickthroughs = round(sum(as.numeric(clickthroughs), na.rm = TRUE), 
2),
+          `Result pages opened` = round(sum(as.numeric(`Result pages opened`), 
na.rm = TRUE), 2),
+          `search sessions` = round(sum(as.numeric(`search sessions`), na.rm = 
TRUE), 2)
+        )
+    } else if (input_metric %in% c("F = 0.1", "F = 0.5", "F = 0.9")) {
+      temp %<>%
+        dplyr::summarize(
+          `F = 0.1` = round(sum(`F = 0.1` * `search sessions`, na.rm = TRUE) / 
sum(`search sessions`, na.rm = TRUE), 2),
+          `F = 0.5` = round(sum(`F = 0.5` * `search sessions`, na.rm = TRUE) / 
sum(`search sessions`, na.rm = TRUE), 2),
+          `F = 0.9` = round(sum(`F = 0.9` * `search sessions`, na.rm = TRUE) / 
sum(`search sessions`, na.rm = TRUE), 2)
+        )
+    } else {
+      temp %<>%
+        dplyr::summarize(
+          zero_results = sum(as.numeric(zero_results), na.rm = TRUE),
+          total = sum(as.numeric(total), na.rm = TRUE)
+        )
     }
     temp %<>% dplyr::ungroup()
   } else {
@@ -376,15 +384,15 @@
       tidyr::unite(wiki, language, project, sep = " ") %>%
       dplyr::mutate(wiki = sub("(None) ", "", wiki, fixed = TRUE))
   }
-  if (input_metric == "Zero result rate"){
+  if (input_metric == "Zero result rate") {
     temp %<>%
       dplyr::mutate(zrr = round(100 * as.numeric(zero_results) / 
as.numeric(total), 2)) %>%
       dplyr::select(-c(total, zero_results)) %>%
       tidyr::spread(wiki, zrr)
   } else {
     temp %<>%
-      dplyr::select_(.dots = c("date", "wiki", paste0("`", input_metric, 
"`"))) %>%
-      tidyr::spread_(., key_col = "wiki", value_col = input_metric, fill = 0)
+      dplyr::select_(.dots = c("date", "wiki", "val" = paste0("`", 
input_metric, "`"))) %>%
+      tidyr::spread(., wiki, val, fill = 0)
   }
   return(temp)
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/375091
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia3f4e6b030858b382c0a7c336d6759d022ebf14e
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/rainbow
Gerrit-Branch: develop
Gerrit-Owner: Bearloga <mpo...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to