Bearloga has submitted this change and it was merged.

Change subject: Switch dashboards over to use automata detection in ZRR 
computation
......................................................................


Switch dashboards over to use automata detection in ZRR computation

Don't deploy it yet, I still need to stitch the data sources together -
but it works!

Bug: T121434
Change-Id: I5715a542765d964a228badbcb997115970e3ca3f
---
M server.R
M tab_documentation/failure_rate.md
M ui.R
M utils.R
4 files changed, 65 insertions(+), 39 deletions(-)

Approvals:
  Bearloga: Verified; Looks good to me, approved



diff --git a/server.R b/server.R
index 4320bb1..0661385 100644
--- a/server.R
+++ b/server.R
@@ -201,28 +201,29 @@
 
   # Failure plots
   output$failure_rate_plot <- renderDygraph({
-    failure_dygraph_set %>%
+    polloi::data_select(input$failure_rate_automata, 
failure_data_with_automata, failure_data_no_automata) %>%
       polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_failure_rate)) %>%
       
polloi::subset_by_date_range(time_frame_range(input$failure_rate_timeframe, 
input$failure_rate_timeframe_daterange)) %>%
-      polloi::make_dygraph(xlab = "Date", ylab = "Queries", title = "Search 
Queries with Zero Results, by day")
+      polloi::make_dygraph(xlab = "Date", ylab = "Zero Results Rate (%)", 
title = "Zero Results Rate, by day",
+                           legend_name = "ZRR")
   })
 
   output$failure_rate_change_plot <- renderDygraph({
-    failure_roc_dygraph_set[, c(1, 3)] %>%
+    polloi::data_select(input$failure_rate_automata, 
failure_roc_with_automata, failure_roc_no_automata) %>%
       polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_failure_rate)) %>%
       
polloi::subset_by_date_range(time_frame_range(input$failure_rate_timeframe, 
input$failure_rate_timeframe_daterange)) %>%
       polloi::make_dygraph(xlab = "Date", ylab = "Change (%)", title = "Zero 
Results rate change, by day", legend_name = "Change")
   })
 
   output$failure_breakdown_plot <- renderDygraph({
-    failure_breakdown_dygraph_set %>%
+    polloi::data_select(input$failure_breakdown_automata, 
failure_breakdown_with_automata, failure_breakdown_no_automata) %>%
       polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_failure_breakdown)) %>%
       
polloi::subset_by_date_range(time_frame_range(input$failure_breakdown_timeframe,
 input$failure_breakdown_timeframe_daterange)) %>%
       polloi::make_dygraph(xlab = "Date", ylab = "Zero Results Rate (%)", 
title = "Zero result rate by search type")
   })
 
   output$suggestion_dygraph_plot <- renderDygraph({
-    suggestion_dygraph_set %>%
+    polloi::data_select(input$failure_suggestions_automata, 
suggestion_with_automata, suggestion_no_automata) %>%
       polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_failure_suggestions)) %>%
       
polloi::subset_by_date_range(time_frame_range(input$failure_suggestions_timeframe,
 input$failure_suggestions_timeframe_daterange)) %>%
       polloi::make_dygraph(xlab = "Date", ylab = "Zero Results Rate (%)", 
title = "Zero Result Rates with Search Suggestions")
@@ -310,22 +311,22 @@
   })
   output$kpi_summary_box_zero_results <- renderValueBox({
     date_range <- input$kpi_summary_date_range_selector
-    x <- polloi::subset_by_date_range(failure_dygraph_set, from = 
start_date(date_range), to = Sys.Date() - 1)
-    x <- transform(x, Rate = `Zero Result Queries` / `Search Queries`)$Rate
+    x <- polloi::subset_by_date_range(failure_data_with_automata, from = 
start_date(date_range), to = Sys.Date() - 1)
+    x <- transform(x, Rate = rate)$Rate
     if (date_range == "quarterly") {
       return(valueBox(subtitle = "Zero results rate", color = "orange",
-                      value = sprintf("%.1f%%", median(100 * x))))
+                      value = sprintf("%.1f%%", median(x))))
     }
-    y1 <- median(polloi::half(x)); y2 <- median(polloi::half(x, FALSE)); z <- 
100 * (y2 - y1)/y1
+    y1 <- median(polloi::half(x)); y2 <- median(polloi::half(x, FALSE)); z <- 
(y2 - y1)/y1
     if (abs(z) > 0) {
       return(valueBox(
         subtitle = sprintf("Zero results rate (%.1f%%)", z),
-        value = sprintf("%.1f%%", 100 * y2),
+        value = sprintf("%.1f%%", y2),
         icon = cond_icon(z > 0), color = polloi::cond_color(z > 0, "red")
       ))
     }
     return(valueBox(subtitle = "Zero results rate (no change)",
-                    value = sprintf("%.1f%%", 100 * y2), color = "orange"))
+                    value = sprintf("%.1f%%", y2), color = "orange"))
   })
   output$kpi_summary_box_api_usage <- renderValueBox({
     date_range <- input$kpi_summary_date_range_selector
@@ -426,9 +427,9 @@
   output$kpi_zero_results_series <- renderDygraph({
     smooth_level <- input$smoothing_kpi_zero_results
     start_date <- Sys.Date() - switch(input$kpi_summary_date_range_selector, 
daily = 1, weekly = 8, monthly = 31, quarterly = 91)
-    zrr <- failure_dygraph_set %>%
+    zrr <- failure_data_with_automata %>%
       polloi::subset_by_date_range(from = start_date, to = Sys.Date()) %>%
-      transform(`Rate` = 100 * `Zero Result Queries` / `Search Queries`)
+      transform(`Rate` = rate)
     zrr_change <- 100 * (zrr$Rate[2:nrow(zrr)] - 
zrr$Rate[1:(nrow(zrr)-1)])/zrr$Rate[1:(nrow(zrr)-1)]
     zrr <- cbind(zrr[, c('date', 'Rate')], Change = c(NA, zrr_change)) %>%
       polloi::smoother(ifelse(smooth_level == "global", 
input$smoothing_global, smooth_level), rename = FALSE)
@@ -527,10 +528,10 @@
       polloi::check_past_week(ios_load_data, "iOS load times"),
       polloi::check_yesterday(dplyr::bind_rows(split_dataset), "API usage 
data"),
       polloi::check_past_week(dplyr::bind_rows(split_dataset), "API usage 
data"),
-      polloi::check_yesterday(failure_dygraph_set, "zero results data"),
-      polloi::check_past_week(failure_dygraph_set, "zero results data"),
-      polloi::check_yesterday(suggestion_dygraph_set, "suggestions data"),
-      polloi::check_past_week(suggestion_dygraph_set, "suggestions data"),
+      polloi::check_yesterday(failure_data_with_automata, "zero results data"),
+      polloi::check_past_week(failure_data_with_automata, "zero results data"),
+      polloi::check_yesterday(suggestion_with_automata, "suggestions data"),
+      polloi::check_past_week(suggestion_with_automata, "suggestions data"),
       polloi::check_yesterday(augmented_clickthroughs, "engagement % data"),
       polloi::check_past_week(augmented_clickthroughs, "engagement % data"),
       polloi::check_yesterday(user_page_visit_dataset, "survival times"),
diff --git a/tab_documentation/failure_rate.md 
b/tab_documentation/failure_rate.md
index a6c072b..df37405 100644
--- a/tab_documentation/failure_rate.md
+++ b/tab_documentation/failure_rate.md
@@ -1,7 +1,7 @@
 Search Queries with Zero Results
 =======
 
-Sometimes, searches return zero results. What we're visualising here is the 
number of times that happens, contrasted with the number of queries overall.
+Sometimes, searches return zero results. What we're visualising here is the 
proportion of the time that happens.
 
 Zero results doesn't actually mean a failure for the user, of course: some of 
these events are from "prefix search" in the search box, where the system 
attempts to match the user's already-typed characters to an existing page name. 
Others are from typos, resulting in a search page with no results, but *also* 
resulting in a spelling correction the user could use to get genuine results.
 
diff --git a/ui.R b/ui.R
index ac32202..e4dc5f1 100644
--- a/ui.R
+++ b/ui.R
@@ -208,6 +208,7 @@
               column(polloi::smooth_select("smoothing_failure_rate"), width = 
4),
               column(polloi::timeframe_select("failure_rate_timeframe"), width 
= 4),
               column(polloi::timeframe_daterange("failure_rate_timeframe"), 
width = 4)),
+            polloi::automata_select(input_id = "failure_rate_automata"),
             dygraphOutput("failure_rate_plot"),
             dygraphOutput("failure_rate_change_plot"),
             includeMarkdown("./tab_documentation/failure_rate.md")
@@ -217,6 +218,7 @@
               column(polloi::smooth_select("smoothing_failure_breakdown"), 
width = 4),
               column(polloi::timeframe_select("failure_breakdown_timeframe"), 
width = 4),
               
column(polloi::timeframe_daterange("failure_breakdown_timeframe"), width = 4)),
+            polloi::automata_select(input_id = "failure_breakdown_automata"),
             dygraphOutput("failure_breakdown_plot"),
             includeMarkdown("./tab_documentation/failure_breakdown.md")
     ),
@@ -225,6 +227,7 @@
               column(polloi::smooth_select("smoothing_failure_suggestions"), 
width = 4),
               
column(polloi::timeframe_select("failure_suggestions_timeframe"), width = 4),
               
column(polloi::timeframe_daterange("failure_suggestions_timeframe"), width = 
4)),
+            polloi::automata_select(input_id = "failure_suggestions_automata"),
             dygraphOutput("suggestion_dygraph_plot"),
             includeMarkdown("./tab_documentation/failure_suggests.md")
     ),
diff --git a/utils.R b/utils.R
index 5b63b90..bae640c 100644
--- a/utils.R
+++ b/utils.R
@@ -60,31 +60,53 @@
 
 read_failures <- function(date) {
 
-  data <- polloi::read_dataset("search/cirrus_query_aggregates.tsv")
-  interim_data <- reshape2::dcast(data, formula = date ~ variable, 
fun.aggregate = sum)
-  failure_dygraph_set <<- interim_data
+  interim <- 
polloi::read_dataset("search/cirrus_query_aggregates_with_automata.tsv")
+  interim$rate <- interim$rate*100
+  failure_data_with_automata <<- interim
 
-  interim_vector <- interim_data$`Zero Result Queries`/interim_data$`Search 
Queries`
-  output_vector <- (interim_vector[2:nrow(interim_data)] - 
interim_vector[1:(nrow(interim_data)-1)]) / 
interim_vector[1:(nrow(interim_data)-1)]
+  interim <- 
polloi::read_dataset("search/cirrus_query_aggregates_no_automata.tsv")
+  interim$rate <- interim$rate*100
+  failure_data_no_automata <<- interim
 
-  failure_roc_dygraph_set <<- data.frame(date = 
interim_data$date[2:nrow(interim_data)],
-                                         variable = "failure ROC",
-                                         daily_change = output_vector*100,
-                                         stringsAsFactors = FALSE)
+  interim <- 
{failure_data_with_automata$rate[1:nrow(failure_data_with_automata)-1] /
+              
failure_data_with_automata$rate[2:nrow(failure_data_with_automata)]}
 
-  interim_breakdown_data <- 
polloi::read_dataset("search/cirrus_query_breakdowns.tsv")
-  interim_breakdown_data$value <- interim_breakdown_data$value*100
-  failure_breakdown_dygraph_set <<- reshape2::dcast(interim_breakdown_data,
-                                                    formula = date ~ variable, 
fun.aggregate = sum)
+  failure_roc_with_automata <<- data.frame(date = 
failure_data_with_automata$date[2:nrow(failure_data_with_automata)],
+                                           daily_change = interim,
+                                           stringsAsFactors = FALSE)
 
-  suggestion_data <- 
polloi::read_dataset("search/cirrus_suggestion_breakdown.tsv")
-  suggestion_data$variable <- "Full-Text with Suggestions"
-  suggestion_data$value <- suggestion_data$value*100
-  suggestion_data <- rbind(suggestion_data,
-                           interim_breakdown_data[interim_breakdown_data$date 
%in% suggestion_data$date
-                                                  & 
interim_breakdown_data$variable == "Full-Text Search",])
-  suggestion_dygraph_set <<- reshape2::dcast(suggestion_data,
-                                             formula = date ~ variable, 
fun.aggregate = sum)
+  interim <- 
{failure_data_no_automata$rate[1:nrow(failure_data_no_automata)-1] /
+      failure_data_no_automata$rate[2:nrow(failure_data_no_automata)]}
+
+  failure_roc_no_automata <<- data.frame(date = 
failure_data_no_automata$date[2:nrow(failure_data_no_automata)],
+                                           daily_change = interim,
+                                           stringsAsFactors = FALSE)
+
+  interim_breakdown_with_automata <- 
polloi::read_dataset("search/cirrus_query_breakdowns_with_automata.tsv")
+  interim_breakdown_with_automata$rate <- 
interim_breakdown_with_automata$rate*100
+  failure_breakdown_with_automata <<- 
reshape2::dcast(interim_breakdown_with_automata,
+                                                      formula = date ~ 
query_type, fun.aggregate = sum)
+
+  interim_breakdown_no_automata <- 
polloi::read_dataset("search/cirrus_query_breakdowns_no_automata.tsv")
+  interim_breakdown_no_automata$rate <- interim_breakdown_no_automata$rate*100
+  failure_breakdown_no_automata <<- 
reshape2::dcast(interim_breakdown_no_automata,
+                                                    formula = date ~ 
query_type, fun.aggregate = sum)
+
+  interim <- 
polloi::read_dataset("search/cirrus_suggestion_breakdown_with_automata.tsv")
+  interim$rate <- interim$rate*100
+  interim$query_type <- "Full-Text with Suggestions"
+  interim <- rbind(interim[,c("date", "query_type", "rate")],
+                   
interim_breakdown_with_automata[interim_breakdown_with_automata$date %in% 
interim$date
+                                                   & 
interim_breakdown_with_automata$query_type == "Full-Text Search",])
+  suggestion_with_automata <<- reshape2::dcast(interim, formula = date ~ 
query_type, fun.aggregate = sum)
+
+  interim <- 
polloi::read_dataset("search/cirrus_suggestion_breakdown_no_automata.tsv")
+  interim$rate <- interim$rate*100
+  interim$query_type <- "Full-Text with Suggestions"
+  interim <- rbind(interim[,c("date", "query_type", "rate")],
+                   
interim_breakdown_no_automata[interim_breakdown_no_automata$date %in% 
interim$date
+                                                 & 
interim_breakdown_no_automata$query_type == "Full-Text Search",])
+  suggestion_no_automata <<- reshape2::dcast(interim, formula = date ~ 
query_type, fun.aggregate = sum)
 
 }
 

-- 
To view, visit https://gerrit.wikimedia.org/r/260864
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I5715a542765d964a228badbcb997115970e3ca3f
Gerrit-PatchSet: 2
Gerrit-Project: wikimedia/discovery/rainbow
Gerrit-Branch: master
Gerrit-Owner: OliverKeyes <[email protected]>
Gerrit-Reviewer: Bearloga <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to