OliverKeyes has uploaded a new change for review.
https://gerrit.wikimedia.org/r/260864
Change subject: Switch dashboards over to use automata detection in ZRR
computation
......................................................................
Switch dashboards over to use automata detection in ZRR computation
Don't deploy it yet, I still need to stitch the data sources together -
but it works!
Bug: T121434
Change-Id: I5715a542765d964a228badbcb997115970e3ca3f
---
M server.R
M tab_documentation/failure_rate.md
M ui.R
M utils.R
4 files changed, 65 insertions(+), 39 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/rainbow
refs/changes/64/260864/1
diff --git a/server.R b/server.R
index 4320bb1..0661385 100644
--- a/server.R
+++ b/server.R
@@ -201,28 +201,29 @@
# Failure plots
output$failure_rate_plot <- renderDygraph({
- failure_dygraph_set %>%
+ polloi::data_select(input$failure_rate_automata,
failure_data_with_automata, failure_data_no_automata) %>%
polloi::smoother(smooth_level =
polloi::smooth_switch(input$smoothing_global, input$smoothing_failure_rate)) %>%
polloi::subset_by_date_range(time_frame_range(input$failure_rate_timeframe,
input$failure_rate_timeframe_daterange)) %>%
- polloi::make_dygraph(xlab = "Date", ylab = "Queries", title = "Search
Queries with Zero Results, by day")
+ polloi::make_dygraph(xlab = "Date", ylab = "Zero Results Rate (%)",
title = "Zero Results Rate, by day",
+ legend_name = "ZRR")
})
output$failure_rate_change_plot <- renderDygraph({
- failure_roc_dygraph_set[, c(1, 3)] %>%
+ polloi::data_select(input$failure_rate_automata,
failure_roc_with_automata, failure_roc_no_automata) %>%
polloi::smoother(smooth_level =
polloi::smooth_switch(input$smoothing_global, input$smoothing_failure_rate)) %>%
polloi::subset_by_date_range(time_frame_range(input$failure_rate_timeframe,
input$failure_rate_timeframe_daterange)) %>%
polloi::make_dygraph(xlab = "Date", ylab = "Change (%)", title = "Zero
Results rate change, by day", legend_name = "Change")
})
output$failure_breakdown_plot <- renderDygraph({
- failure_breakdown_dygraph_set %>%
+ polloi::data_select(input$failure_breakdown_automata,
failure_breakdown_with_automata, failure_breakdown_no_automata) %>%
polloi::smoother(smooth_level =
polloi::smooth_switch(input$smoothing_global,
input$smoothing_failure_breakdown)) %>%
polloi::subset_by_date_range(time_frame_range(input$failure_breakdown_timeframe,
input$failure_breakdown_timeframe_daterange)) %>%
polloi::make_dygraph(xlab = "Date", ylab = "Zero Results Rate (%)",
title = "Zero result rate by search type")
})
output$suggestion_dygraph_plot <- renderDygraph({
- suggestion_dygraph_set %>%
+ polloi::data_select(input$failure_suggestions_automata,
suggestion_with_automata, suggestion_no_automata) %>%
polloi::smoother(smooth_level =
polloi::smooth_switch(input$smoothing_global,
input$smoothing_failure_suggestions)) %>%
polloi::subset_by_date_range(time_frame_range(input$failure_suggestions_timeframe,
input$failure_suggestions_timeframe_daterange)) %>%
polloi::make_dygraph(xlab = "Date", ylab = "Zero Results Rate (%)",
title = "Zero Result Rates with Search Suggestions")
@@ -310,22 +311,22 @@
})
output$kpi_summary_box_zero_results <- renderValueBox({
date_range <- input$kpi_summary_date_range_selector
- x <- polloi::subset_by_date_range(failure_dygraph_set, from =
start_date(date_range), to = Sys.Date() - 1)
- x <- transform(x, Rate = `Zero Result Queries` / `Search Queries`)$Rate
+ x <- polloi::subset_by_date_range(failure_data_with_automata, from =
start_date(date_range), to = Sys.Date() - 1)
+ x <- transform(x, Rate = rate)$Rate
if (date_range == "quarterly") {
return(valueBox(subtitle = "Zero results rate", color = "orange",
- value = sprintf("%.1f%%", median(100 * x))))
+ value = sprintf("%.1f%%", median(x))))
}
- y1 <- median(polloi::half(x)); y2 <- median(polloi::half(x, FALSE)); z <-
100 * (y2 - y1)/y1
+ y1 <- median(polloi::half(x)); y2 <- median(polloi::half(x, FALSE)); z <-
(y2 - y1)/y1
if (abs(z) > 0) {
return(valueBox(
subtitle = sprintf("Zero results rate (%.1f%%)", z),
- value = sprintf("%.1f%%", 100 * y2),
+ value = sprintf("%.1f%%", y2),
icon = cond_icon(z > 0), color = polloi::cond_color(z > 0, "red")
))
}
return(valueBox(subtitle = "Zero results rate (no change)",
- value = sprintf("%.1f%%", 100 * y2), color = "orange"))
+ value = sprintf("%.1f%%", y2), color = "orange"))
})
output$kpi_summary_box_api_usage <- renderValueBox({
date_range <- input$kpi_summary_date_range_selector
@@ -426,9 +427,9 @@
output$kpi_zero_results_series <- renderDygraph({
smooth_level <- input$smoothing_kpi_zero_results
start_date <- Sys.Date() - switch(input$kpi_summary_date_range_selector,
daily = 1, weekly = 8, monthly = 31, quarterly = 91)
- zrr <- failure_dygraph_set %>%
+ zrr <- failure_data_with_automata %>%
polloi::subset_by_date_range(from = start_date, to = Sys.Date()) %>%
- transform(`Rate` = 100 * `Zero Result Queries` / `Search Queries`)
+ transform(`Rate` = rate)
zrr_change <- 100 * (zrr$Rate[2:nrow(zrr)] -
zrr$Rate[1:(nrow(zrr)-1)])/zrr$Rate[1:(nrow(zrr)-1)]
zrr <- cbind(zrr[, c('date', 'Rate')], Change = c(NA, zrr_change)) %>%
polloi::smoother(ifelse(smooth_level == "global",
input$smoothing_global, smooth_level), rename = FALSE)
@@ -527,10 +528,10 @@
polloi::check_past_week(ios_load_data, "iOS load times"),
polloi::check_yesterday(dplyr::bind_rows(split_dataset), "API usage
data"),
polloi::check_past_week(dplyr::bind_rows(split_dataset), "API usage
data"),
- polloi::check_yesterday(failure_dygraph_set, "zero results data"),
- polloi::check_past_week(failure_dygraph_set, "zero results data"),
- polloi::check_yesterday(suggestion_dygraph_set, "suggestions data"),
- polloi::check_past_week(suggestion_dygraph_set, "suggestions data"),
+ polloi::check_yesterday(failure_data_with_automata, "zero results data"),
+ polloi::check_past_week(failure_data_with_automata, "zero results data"),
+ polloi::check_yesterday(suggestion_with_automata, "suggestions data"),
+ polloi::check_past_week(suggestion_with_automata, "suggestions data"),
polloi::check_yesterday(augmented_clickthroughs, "engagement % data"),
polloi::check_past_week(augmented_clickthroughs, "engagement % data"),
polloi::check_yesterday(user_page_visit_dataset, "survival times"),
diff --git a/tab_documentation/failure_rate.md
b/tab_documentation/failure_rate.md
index a6c072b..df37405 100644
--- a/tab_documentation/failure_rate.md
+++ b/tab_documentation/failure_rate.md
@@ -1,7 +1,7 @@
Search Queries with Zero Results
=======
-Sometimes, searches return zero results. What we're visualising here is the
number of times that happens, contrasted with the number of queries overall.
+Sometimes, searches return zero results. What we're visualising here is the
proportion of the time that happens.
Zero results doesn't actually mean a failure for the user, of course: some of
these events are from "prefix search" in the search box, where the system
attempts to match the user's already-typed characters to an existing page name.
Others are from typos, resulting in a search page with no results, but *also*
resulting in a spelling correction the user could use to get genuine results.
diff --git a/ui.R b/ui.R
index ac32202..3c768a3 100644
--- a/ui.R
+++ b/ui.R
@@ -208,6 +208,7 @@
column(polloi::smooth_select("smoothing_failure_rate"), width =
4),
column(polloi::timeframe_select("failure_rate_timeframe"), width
= 4),
column(polloi::timeframe_daterange("failure_rate_timeframe"),
width = 4)),
+ polloi:::automata_select(input_id = "failure_rate_automata"),
dygraphOutput("failure_rate_plot"),
dygraphOutput("failure_rate_change_plot"),
includeMarkdown("./tab_documentation/failure_rate.md")
@@ -217,6 +218,7 @@
column(polloi::smooth_select("smoothing_failure_breakdown"),
width = 4),
column(polloi::timeframe_select("failure_breakdown_timeframe"),
width = 4),
column(polloi::timeframe_daterange("failure_breakdown_timeframe"), width = 4)),
+ polloi:::automata_select(input_id = "failure_breakdown_automata"),
dygraphOutput("failure_breakdown_plot"),
includeMarkdown("./tab_documentation/failure_breakdown.md")
),
@@ -225,6 +227,7 @@
column(polloi::smooth_select("smoothing_failure_suggestions"),
width = 4),
column(polloi::timeframe_select("failure_suggestions_timeframe"), width = 4),
column(polloi::timeframe_daterange("failure_suggestions_timeframe"), width =
4)),
+ polloi:::automata_select(input_id =
"failure_suggestions_automata"),
dygraphOutput("suggestion_dygraph_plot"),
includeMarkdown("./tab_documentation/failure_suggests.md")
),
diff --git a/utils.R b/utils.R
index 5b63b90..bae640c 100644
--- a/utils.R
+++ b/utils.R
@@ -60,31 +60,53 @@
read_failures <- function(date) {
- data <- polloi::read_dataset("search/cirrus_query_aggregates.tsv")
- interim_data <- reshape2::dcast(data, formula = date ~ variable,
fun.aggregate = sum)
- failure_dygraph_set <<- interim_data
+ interim <-
polloi::read_dataset("search/cirrus_query_aggregates_with_automata.tsv")
+ interim$rate <- interim$rate*100
+ failure_data_with_automata <<- interim
- interim_vector <- interim_data$`Zero Result Queries`/interim_data$`Search
Queries`
- output_vector <- (interim_vector[2:nrow(interim_data)] -
interim_vector[1:(nrow(interim_data)-1)]) /
interim_vector[1:(nrow(interim_data)-1)]
+ interim <-
polloi::read_dataset("search/cirrus_query_aggregates_no_automata.tsv")
+ interim$rate <- interim$rate*100
+ failure_data_no_automata <<- interim
- failure_roc_dygraph_set <<- data.frame(date =
interim_data$date[2:nrow(interim_data)],
- variable = "failure ROC",
- daily_change = output_vector*100,
- stringsAsFactors = FALSE)
+ interim <-
{failure_data_with_automata$rate[1:nrow(failure_data_with_automata)-1] /
+
failure_data_with_automata$rate[2:nrow(failure_data_with_automata)]}
- interim_breakdown_data <-
polloi::read_dataset("search/cirrus_query_breakdowns.tsv")
- interim_breakdown_data$value <- interim_breakdown_data$value*100
- failure_breakdown_dygraph_set <<- reshape2::dcast(interim_breakdown_data,
- formula = date ~ variable,
fun.aggregate = sum)
+ failure_roc_with_automata <<- data.frame(date =
failure_data_with_automata$date[2:nrow(failure_data_with_automata)],
+ daily_change = interim,
+ stringsAsFactors = FALSE)
- suggestion_data <-
polloi::read_dataset("search/cirrus_suggestion_breakdown.tsv")
- suggestion_data$variable <- "Full-Text with Suggestions"
- suggestion_data$value <- suggestion_data$value*100
- suggestion_data <- rbind(suggestion_data,
- interim_breakdown_data[interim_breakdown_data$date
%in% suggestion_data$date
- &
interim_breakdown_data$variable == "Full-Text Search",])
- suggestion_dygraph_set <<- reshape2::dcast(suggestion_data,
- formula = date ~ variable,
fun.aggregate = sum)
+ interim <-
{failure_data_no_automata$rate[1:nrow(failure_data_no_automata)-1] /
+ failure_data_no_automata$rate[2:nrow(failure_data_no_automata)]}
+
+ failure_roc_no_automata <<- data.frame(date =
failure_data_no_automata$date[2:nrow(failure_data_no_automata)],
+ daily_change = interim,
+ stringsAsFactors = FALSE)
+
+ interim_breakdown_with_automata <-
polloi::read_dataset("search/cirrus_query_breakdowns_with_automata.tsv")
+ interim_breakdown_with_automata$rate <-
interim_breakdown_with_automata$rate*100
+ failure_breakdown_with_automata <<-
reshape2::dcast(interim_breakdown_with_automata,
+ formula = date ~
query_type, fun.aggregate = sum)
+
+ interim_breakdown_no_automata <-
polloi::read_dataset("search/cirrus_query_breakdowns_no_automata.tsv")
+ interim_breakdown_no_automata$rate <- interim_breakdown_no_automata$rate*100
+ failure_breakdown_no_automata <<-
reshape2::dcast(interim_breakdown_no_automata,
+ formula = date ~
query_type, fun.aggregate = sum)
+
+ interim <-
polloi::read_dataset("search/cirrus_suggestion_breakdown_with_automata.tsv")
+ interim$rate <- interim$rate*100
+ interim$query_type <- "Full-Text with Suggestions"
+ interim <- rbind(interim[,c("date", "query_type", "rate")],
+
interim_breakdown_with_automata[interim_breakdown_with_automata$date %in%
interim$date
+ &
interim_breakdown_with_automata$query_type == "Full-Text Search",])
+ suggestion_with_automata <<- reshape2::dcast(interim, formula = date ~
query_type, fun.aggregate = sum)
+
+ interim <-
polloi::read_dataset("search/cirrus_suggestion_breakdown_no_automata.tsv")
+ interim$rate <- interim$rate*100
+ interim$query_type <- "Full-Text with Suggestions"
+ interim <- rbind(interim[,c("date", "query_type", "rate")],
+
interim_breakdown_no_automata[interim_breakdown_no_automata$date %in%
interim$date
+ &
interim_breakdown_no_automata$query_type == "Full-Text Search",])
+ suggestion_no_automata <<- reshape2::dcast(interim, formula = date ~
query_type, fun.aggregate = sum)
}
--
To view, visit https://gerrit.wikimedia.org/r/260864
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I5715a542765d964a228badbcb997115970e3ca3f
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/rainbow
Gerrit-Branch: master
Gerrit-Owner: OliverKeyes <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits