Chelsyx has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/374387 )

Change subject: Use new UDF and break api calls down by referer class
......................................................................

Use new UDF and break api calls down by referer class

Bug: T172452
Change-Id: I0c3fad23abb3931223d0b6212c1f8a969a251f72
---
M modules/api.R
M modules/key_performance_metrics/api_usage.R
M tab_documentation/fulltext_basic.md
M tab_documentation/kpi_api_usage.md
M utils.R
5 files changed, 33 insertions(+), 12 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/rainbow 
refs/changes/87/374387/1

diff --git a/modules/api.R b/modules/api.R
index 7e8e7ff..affe6fa 100644
--- a/modules/api.R
+++ b/modules/api.R
@@ -1,13 +1,18 @@
 output$cirrus_aggregate <- renderDygraph({
   split_dataset$cirrus %>%
+    tidyr::spread(key = referer_class, value = calls) %>%
+    dplyr::mutate(All = ifelse(is.na(All), rowSums(.[, -c(1, 2)], na.rm = 
TRUE), All)) %>%
     polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_fulltext_search)) 
%>%
     polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Full-text 
via API usage by day", legend_name = "Searches") %>%
     dyRangeSelector %>%
-    dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
+    dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") 
%>%
+    dyEvent(as.Date("2017-08-14"), "U (new UDF)", labelLoc = "bottom")
 })
 
 output$open_aggregate <- renderDygraph({
   split_dataset$open %>%
+    tidyr::spread(key = referer_class, value = calls) %>%
+    dplyr::mutate(All = ifelse(is.na(All), rowSums(.[, -c(1, 2)], na.rm = 
TRUE), All)) %>%
     polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_open_search)) %>%
     polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "OpenSearch 
API usage by day", legend_name = "Searches") %>%
     dyRangeSelector %>%
@@ -16,6 +21,8 @@
 
 output$geo_aggregate <- renderDygraph({
   split_dataset$geo %>%
+    tidyr::spread(key = referer_class, value = calls) %>%
+    dplyr::mutate(All = ifelse(is.na(All), rowSums(.[, -c(1, 2)], na.rm = 
TRUE), All)) %>%
     polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_geo_search)) %>%
     polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Geo Search 
API usage by day", legend_name = "Searches") %>%
     dyRangeSelector %>%
@@ -24,6 +31,8 @@
 
 output$language_aggregate <- renderDygraph({
   split_dataset$language %>%
+    tidyr::spread(key = referer_class, value = calls) %>%
+    dplyr::mutate(All = ifelse(is.na(All), rowSums(.[, -c(1, 2)], na.rm = 
TRUE), All)) %>%
     polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_language_search)) 
%>%
     polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Language 
Search API usage by day", legend_name = "Searches") %>%
     dyRangeSelector %>%
@@ -32,6 +41,8 @@
 
 output$prefix_aggregate <- renderDygraph({
   split_dataset$prefix %>%
+    tidyr::spread(key = referer_class, value = calls) %>%
+    dplyr::mutate(All = ifelse(is.na(All), rowSums(.[, -c(1, 2)], na.rm = 
TRUE), All)) %>%
     polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_prefix_search)) 
%>%
     polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Prefix 
Search API usage by day", legend_name = "Searches") %>%
     dyRangeSelector %>%
diff --git a/modules/key_performance_metrics/api_usage.R 
b/modules/key_performance_metrics/api_usage.R
index 271b030..13a4c3a 100644
--- a/modules/key_performance_metrics/api_usage.R
+++ b/modules/key_performance_metrics/api_usage.R
@@ -2,6 +2,11 @@
   smooth_level <- input$smoothing_kpi_api_usage
   start_date <- Sys.Date() - switch(input$kpi_summary_date_range_selector, all 
= NA, daily = 1, weekly = 8, monthly = 31, quarterly = 91)
   api_usage <- split_dataset %>%
+  purrr::map(function(x) {
+    dplyr::group_by(x, date) %>%
+    dplyr::summarize(calls = sum(calls, na.rm = TRUE)) %>%
+    dplyr::ungroup()
+  }) %>%
   {
     if (!is.na(start_date)) {
       lapply(., polloi::subset_by_date_range, from = start_date, to = 
Sys.Date() - 1)
@@ -12,33 +17,35 @@
     dplyr::bind_rows(.id = "api") %>%
     tidyr::spread("api", "calls")
   if ( input$kpi_api_usage_series_include_open ) {
-    api_usage <- dplyr::mutate(api_usage, all = cirrus + geo + language + open 
+ prefix)
+    api_usage <- dplyr::mutate(api_usage, all = cirrus + ifelse(is.na(`cirrus 
(more like)`), 0, `cirrus (more like)`) + geo + language + open + prefix)
   } else {
-    api_usage <- dplyr::mutate(api_usage, all = cirrus + geo + language + 
prefix)
+    api_usage <- dplyr::mutate(api_usage, all = cirrus + ifelse(is.na(`cirrus 
(more like)`), 0, `cirrus (more like)`) + geo + language + prefix)
   }
   if ( input$kpi_api_usage_series_data == "raw" ) {
     api_usage %<>%
       polloi::smoother(ifelse(smooth_level == "global", 
input$smoothing_global, smooth_level), rename = FALSE) %>%
       { xts::xts(.[, -1], order.by = .$date) }
     if (!input$kpi_api_usage_series_include_open) {
-      colnames(api_usage)[6] <- "all except open"
+      colnames(api_usage)[7] <- "all except open"
     }
     return(dygraph(api_usage, main = "Calls over time", xlab = "Date",
                    ylab = ifelse(input$kpi_api_usage_series_log_scale, "Calls 
(log10 scale)", "Calls")) %>%
              dySeries("cirrus", label = "full-text via API") %>%
              dyLegend(width = 400, show = "always") %>%
-             dyOptions(strokeWidth = 3, colors = RColorBrewer::brewer.pal(6, 
"Set2")[6:1],
+             dyOptions(strokeWidth = 3, colors = RColorBrewer::brewer.pal(7, 
"Set2")[7:1],
                        drawPoints = FALSE, pointSize = 3, labelsKMB = TRUE,
                        includeZero = input$kpi_api_usage_series_log_scale,
                        logscale = input$kpi_api_usage_series_log_scale
              ) %>%
              dyCSS(css = system.file("custom.css", package = "polloi")) %>%
              dyRangeSelector %>%
-             dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = 
"bottom"))
+             dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = 
"bottom") %>%
+             dyEvent(as.Date("2017-08-14"), "U (new UDF)", labelLoc = 
"bottom"))
   }
   api_usage_change <- api_usage %>%
     dplyr::mutate(
       cirrus = polloi::percent_change(cirrus),
+      `cirrus (more like)` = polloi::percent_change(`cirrus (more like)`),
       geo = polloi::percent_change(geo),
       language = polloi::percent_change(language),
       open = polloi::percent_change(open),
@@ -48,12 +55,13 @@
     { .[-1, ] } %>%
     polloi::smoother(ifelse(smooth_level == "global", input$smoothing_global, 
smooth_level), rename = FALSE) %>%
     { xts::xts(.[, -1], .$date) }
-  if (!input$kpi_api_usage_series_include_open) colnames(api_usage_change)[6] 
<- "all except open"
+  if (!input$kpi_api_usage_series_include_open) colnames(api_usage_change)[7] 
<- "all except open"
   return(dygraph(api_usage_change, main = "Day-to-day % change over time", 
xlab = "Date", ylab = "% change") %>%
            dyLegend(width = 400, show = "always") %>%
-           dyOptions(strokeWidth = 3, colors = RColorBrewer::brewer.pal(6, 
"Set2"),
+           dyOptions(strokeWidth = 3, colors = RColorBrewer::brewer.pal(7, 
"Set2"),
                      drawPoints = FALSE, pointSize = 3, labelsKMB = TRUE, 
includeZero = TRUE) %>%
            dyCSS(css = system.file("custom.css", package = "polloi")) %>%
            dyRangeSelector %>%
-           dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = 
"bottom"))
+           dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = 
"bottom") %>%
+           dyEvent(as.Date("2017-08-14"), "U (new UDF)", labelLoc = "bottom"))
 })
diff --git a/tab_documentation/fulltext_basic.md 
b/tab_documentation/fulltext_basic.md
index bb3b641..5635740 100644
--- a/tab_documentation/fulltext_basic.md
+++ b/tab_documentation/fulltext_basic.md
@@ -13,6 +13,7 @@
 ------
 
 * '__R__': on 2017-01-01 we started calculating all of Discovery's metrics 
using a new version of [our data retrieval and processing 
codebase](https://phabricator.wikimedia.org/diffusion/WDGO/) that we migrated 
to [Wikimedia Analytics](https://www.mediawiki.org/wiki/Analytics)' 
[Reportupdater 
infrastructure](https://wikitech.wikimedia.org/wiki/Analytics/Reportupdater). 
See [T150915](https://phabricator.wikimedia.org/T150915) for more details. 
Furthermore, we switched to an updated UDF for counting API calls -- the 
previous version was undercounting full-text and geo search API calls (see 
[Gerrit change 315503](https://gerrit.wikimedia.org/r/#/c/315503/) for more 
details).
+* '__U__': on 2017-08-14 we started to use a new UDF to get the type of search 
API (see [Gerrit change 345863](https://gerrit.wikimedia.org/r/#/c/345863/) for 
more details).
 
 Questions, bug reports, and feature suggestions
 ------
diff --git a/tab_documentation/kpi_api_usage.md 
b/tab_documentation/kpi_api_usage.md
index e88883a..c5712c7 100644
--- a/tab_documentation/kpi_api_usage.md
+++ b/tab_documentation/kpi_api_usage.md
@@ -7,6 +7,7 @@
 ------
 
 * '__R__': on 2017-01-01 we started calculating all of Discovery's metrics 
using a new version of [our data retrieval and processing 
codebase](https://phabricator.wikimedia.org/diffusion/WDGO/) that we migrated 
to [Wikimedia Analytics](https://www.mediawiki.org/wiki/Analytics)' 
[Reportupdater 
infrastructure](https://wikitech.wikimedia.org/wiki/Analytics/Reportupdater). 
See [T150915](https://phabricator.wikimedia.org/T150915) for more details. 
Furthermore, we switched to an updated UDF for counting API calls -- the 
previous version was undercounting full-text and geo search API calls (see 
[Gerrit change 315503](https://gerrit.wikimedia.org/r/#/c/315503/) for more 
details).
+* '__U__': on 2017-08-14 we started to use a new UDF to get the type of search 
API (see [Gerrit change 345863](https://gerrit.wikimedia.org/r/#/c/345863/) for 
more details).
 
 Questions, bug reports, and feature suggestions
 ------
diff --git a/utils.R b/utils.R
index ab34131..eb13192 100644
--- a/utils.R
+++ b/utils.R
@@ -87,9 +87,9 @@
 }
 
 read_api <- function(){
-  split_dataset <<- 
polloi::read_dataset("discovery/metrics/search/search_api_usage.tsv", col_types 
= "Dci") %>%
-    dplyr::filter(!is.na(api), !is.na(calls)) %>%
-    dplyr::distinct(date, api, .keep_all = TRUE) %>%
+  split_dataset <<- 
polloi::read_dataset("discovery/metrics/search/search_api_usage.tsv", col_types 
= "Dcci") %>%
+    dplyr::filter(!is.na(api), !is.na(referer_class), !is.na(calls)) %>%
+    dplyr::distinct(date, api, referer_class, .keep_all = TRUE) %>%
     dplyr::arrange(api, date) %>%
     { split(., f = .$api) } %>%
     lapply(dplyr::select_, .dots = list(quote(-api)))

-- 
To view, visit https://gerrit.wikimedia.org/r/374387
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I0c3fad23abb3931223d0b6212c1f8a969a251f72
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/rainbow
Gerrit-Branch: develop
Gerrit-Owner: Chelsyx <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to