Bearloga has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/374387 )
Change subject: Use new UDF and break api calls down by referer class
......................................................................
Use new UDF and break api calls down by referer class
Bug: T172452
Change-Id: I0c3fad23abb3931223d0b6212c1f8a969a251f72
---
M modules/api.R
M modules/key_performance_metrics/api_usage.R
M tab_documentation/fulltext_basic.md
M tab_documentation/geo_basic.md
M tab_documentation/kpi_api_usage.md
M tab_documentation/language_basic.md
M tab_documentation/open_basic.md
M tab_documentation/prefix_basic.md
M utils.R
9 files changed, 45 insertions(+), 16 deletions(-)
Approvals:
Bearloga: Verified; Looks good to me, approved
diff --git a/modules/api.R b/modules/api.R
index 7e8e7ff..73368cd 100644
--- a/modules/api.R
+++ b/modules/api.R
@@ -1,39 +1,54 @@
output$cirrus_aggregate <- renderDygraph({
split_dataset$cirrus %>%
+ tidyr::spread(key = referer_class, value = calls) %>%
+ dplyr::mutate(All = ifelse(is.na(All), rowSums(.[, -c(1, 2)], na.rm =
TRUE), All)) %>%
polloi::smoother(smooth_level =
polloi::smooth_switch(input$smoothing_global, input$smoothing_fulltext_search))
%>%
polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Full-text
via API usage by day", legend_name = "Searches") %>%
dyRangeSelector %>%
- dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
+ dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
%>%
+ dyEvent(as.Date("2017-08-14"), "U (new UDF)", labelLoc = "bottom")
})
output$open_aggregate <- renderDygraph({
split_dataset$open %>%
+ tidyr::spread(key = referer_class, value = calls) %>%
+ dplyr::mutate(All = ifelse(is.na(All), rowSums(.[, -c(1, 2)], na.rm =
TRUE), All)) %>%
polloi::smoother(smooth_level =
polloi::smooth_switch(input$smoothing_global, input$smoothing_open_search)) %>%
polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "OpenSearch
API usage by day", legend_name = "Searches") %>%
dyRangeSelector %>%
- dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
+ dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
%>%
+ dyEvent(as.Date("2017-08-14"), "U (new UDF)", labelLoc = "bottom")
})
output$geo_aggregate <- renderDygraph({
split_dataset$geo %>%
+ tidyr::spread(key = referer_class, value = calls) %>%
+ dplyr::mutate(All = ifelse(is.na(All), rowSums(.[, -c(1, 2)], na.rm =
TRUE), All)) %>%
polloi::smoother(smooth_level =
polloi::smooth_switch(input$smoothing_global, input$smoothing_geo_search)) %>%
polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Geo Search
API usage by day", legend_name = "Searches") %>%
dyRangeSelector %>%
- dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
+ dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
%>%
+ dyEvent(as.Date("2017-08-14"), "U (new UDF)", labelLoc = "bottom")
})
output$language_aggregate <- renderDygraph({
split_dataset$language %>%
+ tidyr::spread(key = referer_class, value = calls) %>%
+ dplyr::mutate(All = ifelse(is.na(All), rowSums(.[, -c(1, 2)], na.rm =
TRUE), All)) %>%
polloi::smoother(smooth_level =
polloi::smooth_switch(input$smoothing_global, input$smoothing_language_search))
%>%
polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Language
Search API usage by day", legend_name = "Searches") %>%
dyRangeSelector %>%
- dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
+ dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
%>%
+ dyEvent(as.Date("2017-08-14"), "U (new UDF)", labelLoc = "bottom")
})
output$prefix_aggregate <- renderDygraph({
split_dataset$prefix %>%
+ tidyr::spread(key = referer_class, value = calls) %>%
+ dplyr::mutate(All = ifelse(is.na(All), rowSums(.[, -c(1, 2)], na.rm =
TRUE), All)) %>%
polloi::smoother(smooth_level =
polloi::smooth_switch(input$smoothing_global, input$smoothing_prefix_search))
%>%
polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Prefix
Search API usage by day", legend_name = "Searches") %>%
dyRangeSelector %>%
- dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
+ dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
%>%
+ dyEvent(as.Date("2017-08-14"), "U (new UDF)", labelLoc = "bottom")
})
diff --git a/modules/key_performance_metrics/api_usage.R
b/modules/key_performance_metrics/api_usage.R
index 271b030..13a4c3a 100644
--- a/modules/key_performance_metrics/api_usage.R
+++ b/modules/key_performance_metrics/api_usage.R
@@ -2,6 +2,11 @@
smooth_level <- input$smoothing_kpi_api_usage
start_date <- Sys.Date() - switch(input$kpi_summary_date_range_selector, all
= NA, daily = 1, weekly = 8, monthly = 31, quarterly = 91)
api_usage <- split_dataset %>%
+ purrr::map(function(x) {
+ dplyr::group_by(x, date) %>%
+ dplyr::summarize(calls = sum(calls, na.rm = TRUE)) %>%
+ dplyr::ungroup()
+ }) %>%
{
if (!is.na(start_date)) {
lapply(., polloi::subset_by_date_range, from = start_date, to =
Sys.Date() - 1)
@@ -12,33 +17,35 @@
dplyr::bind_rows(.id = "api") %>%
tidyr::spread("api", "calls")
if ( input$kpi_api_usage_series_include_open ) {
- api_usage <- dplyr::mutate(api_usage, all = cirrus + geo + language + open
+ prefix)
+ api_usage <- dplyr::mutate(api_usage, all = cirrus + ifelse(is.na(`cirrus
(more like)`), 0, `cirrus (more like)`) + geo + language + open + prefix)
} else {
- api_usage <- dplyr::mutate(api_usage, all = cirrus + geo + language +
prefix)
+ api_usage <- dplyr::mutate(api_usage, all = cirrus + ifelse(is.na(`cirrus
(more like)`), 0, `cirrus (more like)`) + geo + language + prefix)
}
if ( input$kpi_api_usage_series_data == "raw" ) {
api_usage %<>%
polloi::smoother(ifelse(smooth_level == "global",
input$smoothing_global, smooth_level), rename = FALSE) %>%
{ xts::xts(.[, -1], order.by = .$date) }
if (!input$kpi_api_usage_series_include_open) {
- colnames(api_usage)[6] <- "all except open"
+ colnames(api_usage)[7] <- "all except open"
}
return(dygraph(api_usage, main = "Calls over time", xlab = "Date",
ylab = ifelse(input$kpi_api_usage_series_log_scale, "Calls
(log10 scale)", "Calls")) %>%
dySeries("cirrus", label = "full-text via API") %>%
dyLegend(width = 400, show = "always") %>%
- dyOptions(strokeWidth = 3, colors = RColorBrewer::brewer.pal(6,
"Set2")[6:1],
+ dyOptions(strokeWidth = 3, colors = RColorBrewer::brewer.pal(7,
"Set2")[7:1],
drawPoints = FALSE, pointSize = 3, labelsKMB = TRUE,
includeZero = input$kpi_api_usage_series_log_scale,
logscale = input$kpi_api_usage_series_log_scale
) %>%
dyCSS(css = system.file("custom.css", package = "polloi")) %>%
dyRangeSelector %>%
- dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc =
"bottom"))
+ dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc =
"bottom") %>%
+ dyEvent(as.Date("2017-08-14"), "U (new UDF)", labelLoc =
"bottom"))
}
api_usage_change <- api_usage %>%
dplyr::mutate(
cirrus = polloi::percent_change(cirrus),
+ `cirrus (more like)` = polloi::percent_change(`cirrus (more like)`),
geo = polloi::percent_change(geo),
language = polloi::percent_change(language),
open = polloi::percent_change(open),
@@ -48,12 +55,13 @@
{ .[-1, ] } %>%
polloi::smoother(ifelse(smooth_level == "global", input$smoothing_global,
smooth_level), rename = FALSE) %>%
{ xts::xts(.[, -1], .$date) }
- if (!input$kpi_api_usage_series_include_open) colnames(api_usage_change)[6]
<- "all except open"
+ if (!input$kpi_api_usage_series_include_open) colnames(api_usage_change)[7]
<- "all except open"
return(dygraph(api_usage_change, main = "Day-to-day % change over time",
xlab = "Date", ylab = "% change") %>%
dyLegend(width = 400, show = "always") %>%
- dyOptions(strokeWidth = 3, colors = RColorBrewer::brewer.pal(6,
"Set2"),
+ dyOptions(strokeWidth = 3, colors = RColorBrewer::brewer.pal(7,
"Set2"),
drawPoints = FALSE, pointSize = 3, labelsKMB = TRUE,
includeZero = TRUE) %>%
dyCSS(css = system.file("custom.css", package = "polloi")) %>%
dyRangeSelector %>%
- dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc =
"bottom"))
+ dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc =
"bottom") %>%
+ dyEvent(as.Date("2017-08-14"), "U (new UDF)", labelLoc = "bottom"))
})
diff --git a/tab_documentation/fulltext_basic.md
b/tab_documentation/fulltext_basic.md
index bb3b641..c2a121a 100644
--- a/tab_documentation/fulltext_basic.md
+++ b/tab_documentation/fulltext_basic.md
@@ -13,6 +13,7 @@
------
* '__R__': on 2017-01-01 we started calculating all of Discovery's metrics
using a new version of [our data retrieval and processing
codebase](https://phabricator.wikimedia.org/diffusion/WDGO/) that we migrated
to [Wikimedia Analytics](https://www.mediawiki.org/wiki/Analytics)'
[Reportupdater
infrastructure](https://wikitech.wikimedia.org/wiki/Analytics/Reportupdater).
See [T150915](https://phabricator.wikimedia.org/T150915) for more details.
Furthermore, we switched to an updated UDF for counting API calls -- the
previous version was undercounting full-text and geo search API calls (see
[Gerrit change 315503](https://gerrit.wikimedia.org/r/#/c/315503/) for more
details).
+* '__U__': on 2017-08-14 we started to use a new UDF to get the type of search
API (see [Gerrit change 345863](https://gerrit.wikimedia.org/r/#/c/345863/) for
more details) and break down the API calls by referer class.
Questions, bug reports, and feature suggestions
------
diff --git a/tab_documentation/geo_basic.md b/tab_documentation/geo_basic.md
index f3ab210..5933b4a 100644
--- a/tab_documentation/geo_basic.md
+++ b/tab_documentation/geo_basic.md
@@ -12,6 +12,7 @@
------
* '__R__': on 2017-01-01 we started calculating all of Discovery's metrics
using a new version of [our data retrieval and processing
codebase](https://phabricator.wikimedia.org/diffusion/WDGO/) that we migrated
to [Wikimedia Analytics](https://www.mediawiki.org/wiki/Analytics)'
[Reportupdater
infrastructure](https://wikitech.wikimedia.org/wiki/Analytics/Reportupdater).
See [T150915](https://phabricator.wikimedia.org/T150915) for more details.
Furthermore, we switched to an updated UDF for counting API calls -- the
previous version was undercounting full-text and geo search API calls (see
[Gerrit change 315503](https://gerrit.wikimedia.org/r/#/c/315503/) for more
details).
+* '__U__': on 2017-08-14 we started to use a new UDF to get the type of search
API (see [Gerrit change 345863](https://gerrit.wikimedia.org/r/#/c/345863/) for
more details) and break down the API calls by referer class.
Questions, bug reports, and feature suggestions
------
diff --git a/tab_documentation/kpi_api_usage.md
b/tab_documentation/kpi_api_usage.md
index e88883a..c5712c7 100644
--- a/tab_documentation/kpi_api_usage.md
+++ b/tab_documentation/kpi_api_usage.md
@@ -7,6 +7,7 @@
------
* '__R__': on 2017-01-01 we started calculating all of Discovery's metrics
using a new version of [our data retrieval and processing
codebase](https://phabricator.wikimedia.org/diffusion/WDGO/) that we migrated
to [Wikimedia Analytics](https://www.mediawiki.org/wiki/Analytics)'
[Reportupdater
infrastructure](https://wikitech.wikimedia.org/wiki/Analytics/Reportupdater).
See [T150915](https://phabricator.wikimedia.org/T150915) for more details.
Furthermore, we switched to an updated UDF for counting API calls -- the
previous version was undercounting full-text and geo search API calls (see
[Gerrit change 315503](https://gerrit.wikimedia.org/r/#/c/315503/) for more
details).
+* '__U__': on 2017-08-14 we started to use a new UDF to get the type of search
API (see [Gerrit change 345863](https://gerrit.wikimedia.org/r/#/c/345863/) for
more details).
Questions, bug reports, and feature suggestions
------
diff --git a/tab_documentation/language_basic.md
b/tab_documentation/language_basic.md
index 6643c11..2742bc4 100644
--- a/tab_documentation/language_basic.md
+++ b/tab_documentation/language_basic.md
@@ -12,6 +12,7 @@
------
* '__R__': on 2017-01-01 we started calculating all of Discovery's metrics
using a new version of [our data retrieval and processing
codebase](https://phabricator.wikimedia.org/diffusion/WDGO/) that we migrated
to [Wikimedia Analytics](https://www.mediawiki.org/wiki/Analytics)'
[Reportupdater
infrastructure](https://wikitech.wikimedia.org/wiki/Analytics/Reportupdater).
See [T150915](https://phabricator.wikimedia.org/T150915) for more details.
+* '__U__': on 2017-08-14 we started to use a new UDF to get the type of search
API (see [Gerrit change 345863](https://gerrit.wikimedia.org/r/#/c/345863/) for
more details) and break down the API calls by referer class.
Questions, bug reports, and feature suggestions
------
diff --git a/tab_documentation/open_basic.md b/tab_documentation/open_basic.md
index 0270820..43673b1 100644
--- a/tab_documentation/open_basic.md
+++ b/tab_documentation/open_basic.md
@@ -15,6 +15,7 @@
------
* '__R__': on 2017-01-01 we started calculating all of Discovery's metrics
using a new version of [our data retrieval and processing
codebase](https://phabricator.wikimedia.org/diffusion/WDGO/) that we migrated
to [Wikimedia Analytics](https://www.mediawiki.org/wiki/Analytics)'
[Reportupdater
infrastructure](https://wikitech.wikimedia.org/wiki/Analytics/Reportupdater).
See [T150915](https://phabricator.wikimedia.org/T150915) for more details.
+* '__U__': on 2017-08-14 we started to use a new UDF to get the type of search
API (see [Gerrit change 345863](https://gerrit.wikimedia.org/r/#/c/345863/) for
more details) and break down the API calls by referer class.
Questions, bug reports, and feature suggestions
------
diff --git a/tab_documentation/prefix_basic.md
b/tab_documentation/prefix_basic.md
index 8ed5aa3..1f8a342 100644
--- a/tab_documentation/prefix_basic.md
+++ b/tab_documentation/prefix_basic.md
@@ -12,6 +12,7 @@
------
* After learning of a change to the search API call, we patched the Analytics
Hive UDF refinery (see [287264](https://gerrit.wikimedia.org/r/#/c/287264/)) to
check for generator=prefixsearch as well, not just list=prefixsearch. The data
was backfilled from 20 March 2016 using the updated Prefix API detection.
* '__R__': on 2017-01-01 we started calculating all of Discovery's metrics
using a new version of [our data retrieval and processing
codebase](https://phabricator.wikimedia.org/diffusion/WDGO/) that we migrated
to [Wikimedia Analytics](https://www.mediawiki.org/wiki/Analytics)'
[Reportupdater
infrastructure](https://wikitech.wikimedia.org/wiki/Analytics/Reportupdater).
See [T150915](https://phabricator.wikimedia.org/T150915) for more details.
+* '__U__': on 2017-08-14 we started to use a new UDF to get the type of search
API (see [Gerrit change 345863](https://gerrit.wikimedia.org/r/#/c/345863/) for
more details) and break down the API calls by referer class.
Questions, bug reports, and feature suggestions
------
diff --git a/utils.R b/utils.R
index ab34131..eb13192 100644
--- a/utils.R
+++ b/utils.R
@@ -87,9 +87,9 @@
}
read_api <- function(){
- split_dataset <<-
polloi::read_dataset("discovery/metrics/search/search_api_usage.tsv", col_types
= "Dci") %>%
- dplyr::filter(!is.na(api), !is.na(calls)) %>%
- dplyr::distinct(date, api, .keep_all = TRUE) %>%
+ split_dataset <<-
polloi::read_dataset("discovery/metrics/search/search_api_usage.tsv", col_types
= "Dcci") %>%
+ dplyr::filter(!is.na(api), !is.na(referer_class), !is.na(calls)) %>%
+ dplyr::distinct(date, api, referer_class, .keep_all = TRUE) %>%
dplyr::arrange(api, date) %>%
{ split(., f = .$api) } %>%
lapply(dplyr::select_, .dots = list(quote(-api)))
--
To view, visit https://gerrit.wikimedia.org/r/374387
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I0c3fad23abb3931223d0b6212c1f8a969a251f72
Gerrit-PatchSet: 2
Gerrit-Project: wikimedia/discovery/rainbow
Gerrit-Branch: develop
Gerrit-Owner: Chelsyx <[email protected]>
Gerrit-Reviewer: Bearloga <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits