Chelsyx has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/378067 )

Change subject: Interpretation and general findings for API dashboards
......................................................................

Interpretation and general findings for API dashboards

Bug: T172452
Change-Id: If97bb9cd23ae93117d106012d69b8f6250a19ce9
---
M modules/api.R
M modules/key_performance_metrics/api_usage.R
M tab_documentation/fulltext_basic.md
M tab_documentation/geo_basic.md
M tab_documentation/kpi_api_usage.md
M tab_documentation/language_basic.md
M tab_documentation/morelike_basic.md
M tab_documentation/open_basic.md
M tab_documentation/prefix_basic.md
M tab_documentation/referer_breakdown.md
M ui.R
11 files changed, 322 insertions(+), 105 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/rainbow 
refs/changes/67/378067/1

diff --git a/modules/api.R b/modules/api.R
index 790b29e..6cae3ad 100644
--- a/modules/api.R
+++ b/modules/api.R
@@ -1,9 +1,22 @@
 output$cirrus_aggregate <- renderDygraph({
-  split_dataset$`full-text via API` %>%
+  temp <- split_dataset$`full-text via API` %>%
     tidyr::spread(referrer, calls) %>%
-    polloi::reorder_columns() %>%
+    polloi::reorder_columns()
+  if (input$fulltext_search_prop) {
+    temp <- cbind(temp[, "date"], purrr::map_df(temp[, -c(1, 2)], function(x) 
round(100 * x / temp$All, 2))) %>%
+      dplyr::filter(date >= "2017-06-29")
+  }
+  temp %>%
     polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_fulltext_search)) 
%>%
-    polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Daily 
Full-text search API usage by referrer", legend_name = "Searches") %>%
+    polloi::make_dygraph(xlab = "Date",
+                         ylab = dplyr::case_when(
+                           input$fulltext_search_prop ~ "API Calls Share (%)",
+                           input$fulltext_search_log_scale ~ "Calls (log10 
scale)",
+                           TRUE ~ "API Calls"
+                         ),
+                         title = "Daily Full-text search via API usage by 
referrer",
+                         legend_name = "API Calls",
+                         logscale = input$fulltext_search_log_scale) %>%
     dyLegend(labelsDiv = "cirrus_aggregate_legend", width = 600) %>%
     dyRangeSelector %>%
     dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") 
%>%
@@ -11,21 +24,47 @@
 })
 
 output$morelike_aggregate <- renderDygraph({
-  split_dataset$`morelike via API` %>%
+  temp <- split_dataset$`morelike via API` %>%
     tidyr::spread(referrer, calls) %>%
-    polloi::reorder_columns() %>%
+    polloi::reorder_columns()
+  if (input$morelike_search_prop) {
+    temp <- cbind(temp[, "date"], purrr::map_df(temp[, -c(1, 2)], function(x) 
round(100 * x / temp$All, 2))) %>%
+      dplyr::filter(date >= "2017-06-29")
+  }
+  temp %>%
     polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_morelike_search)) 
%>%
-    polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Daily 
Morelike search API usage by referrer", legend_name = "Searches") %>%
+    polloi::make_dygraph(xlab = "Date",
+                         ylab = dplyr::case_when(
+                           input$morelike_search_prop ~ "API Calls Share (%)",
+                           input$morelike_search_log_scale ~ "Calls (log10 
scale)",
+                           TRUE ~ "API Calls"
+                         ),
+                         title = "Daily Morelike search API usage by referrer",
+                         legend_name = "API Calls",
+                         logscale = input$morelike_search_log_scale) %>%
     dyLegend(labelsDiv = "morelike_aggregate_legend", width = 600) %>%
     dyRangeSelector
 })
 
 output$open_aggregate <- renderDygraph({
-  split_dataset$open %>%
+  temp <- split_dataset$open %>%
     tidyr::spread(referrer, calls) %>%
-    polloi::reorder_columns() %>%
+    polloi::reorder_columns()
+  if (input$open_search_prop) {
+    temp <- cbind(temp[, "date"], purrr::map_df(temp[, -c(1, 2)], function(x) 
round(100 * x / temp$All, 2))) %>%
+      dplyr::filter(date >= "2017-06-29")
+  }
+  temp %>%
     polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_open_search)) %>%
-    polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Daily 
OpenSearch API usage by referrer", legend_name = "Searches") %>%
+    polloi::make_dygraph(xlab = "Date",
+                         ylab = dplyr::case_when(
+                           input$open_search_prop ~ "API Calls Share (%)",
+                           input$open_search_log_scale ~ "Calls (log10 scale)",
+                           TRUE ~ "API Calls"
+                         ),
+                         title = "Daily OpenSearch API usage by referrer",
+                         legend_name = "API Calls",
+                         logscale = input$open_search_log_scale) %>%
     dyLegend(labelsDiv = "open_aggregate_legend", width = 600) %>%
     dyRangeSelector %>%
     dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") 
%>%
@@ -33,11 +72,24 @@
 })
 
 output$geo_aggregate <- renderDygraph({
-  split_dataset$geo %>%
+  temp <- split_dataset$geo %>%
     tidyr::spread(referrer, calls) %>%
-    polloi::reorder_columns() %>%
+    polloi::reorder_columns()
+  if (input$geo_search_prop) {
+    temp <- cbind(temp[, "date"], purrr::map_df(temp[, -c(1, 2)], function(x) 
round(100 * x / temp$All, 2))) %>%
+      dplyr::filter(date >= "2017-06-29")
+  }
+  temp %>%
     polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_geo_search)) %>%
-    polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Daily Geo 
Search API usage by referrer", legend_name = "Searches") %>%
+    polloi::make_dygraph(xlab = "Date",
+                         ylab = dplyr::case_when(
+                           input$geo_search_prop ~ "API Calls Share (%)",
+                           input$geo_search_log_scale ~ "Calls (log10 scale)",
+                           TRUE ~ "API Calls"
+                         ),
+                         title = "Daily Geo Search API usage by referrer",
+                         legend_name = "API Calls",
+                         logscale = input$geo_search_log_scale) %>%
     dyLegend(labelsDiv = "geo_aggregate_legend", width = 600) %>%
     dyRangeSelector %>%
     dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") 
%>%
@@ -45,11 +97,24 @@
 })
 
 output$language_aggregate <- renderDygraph({
-  split_dataset$language %>%
+  temp <- split_dataset$language %>%
     tidyr::spread(referrer, calls) %>%
-    polloi::reorder_columns() %>%
+    polloi::reorder_columns()
+  if (input$language_search_prop) {
+    temp <- cbind(temp[, "date"], purrr::map_df(temp[, -c(1, 2)], function(x) 
round(100 * x / temp$All, 2))) %>%
+      dplyr::filter(date >= "2017-06-29")
+  }
+  temp %>%
     polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_language_search)) 
%>%
-    polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Daily 
Language search API usage by referrer", legend_name = "Searches") %>%
+    polloi::make_dygraph(xlab = "Date",
+                         ylab = dplyr::case_when(
+                           input$language_search_prop ~ "API Calls Share (%)",
+                           input$language_search_log_scale ~ "Calls (log10 
scale)",
+                           TRUE ~ "API Calls"
+                         ),
+                         title = "Daily Language search API usage by referrer",
+                         legend_name = "API Calls",
+                         logscale = input$language_search_log_scale) %>%
     dyLegend(labelsDiv = "language_aggregate_legend", width = 600) %>%
     dyRangeSelector %>%
     dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") 
%>%
@@ -57,11 +122,24 @@
 })
 
 output$prefix_aggregate <- renderDygraph({
-  split_dataset$prefix %>%
+  temp <- split_dataset$prefix %>%
     tidyr::spread(referrer, calls) %>%
-    polloi::reorder_columns() %>%
+    polloi::reorder_columns()
+  if (input$prefix_search_prop) {
+    temp <- cbind(temp[, "date"], purrr::map_df(temp[, -c(1, 2)], function(x) 
round(100 * x / temp$All, 2))) %>%
+      dplyr::filter(date >= "2017-06-29")
+  }
+  temp %>%
     polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_prefix_search)) 
%>%
-    polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Daily 
Prefix search API usage by referrer", legend_name = "Searches") %>%
+    polloi::make_dygraph(xlab = "Date",
+                         ylab = dplyr::case_when(
+                           input$prefix_search_prop ~ "API Calls Share (%)",
+                           input$prefix_search_log_scale ~ "Calls (log10 
scale)",
+                           TRUE ~ "API Calls"
+                         ),
+                         title = "Daily Prefix search API usage by referrer",
+                         legend_name = "API Calls",
+                         logscale = input$prefix_search_log_scale) %>%
     dyLegend(labelsDiv = "prefix_aggregate_legend", width = 600) %>%
     dyRangeSelector %>%
     dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") 
%>%
@@ -74,16 +152,23 @@
     dplyr::filter(date >= "2017-06-29") %>%
     dplyr::group_by(date, referrer) %>%
     dplyr::summarize(calls = sum(calls, na.rm = TRUE)) %>%
-    tidyr::spread(referrer, calls)
+    dplyr::ungroup() %>%
+    tidyr::spread(referrer, calls) %>%
+    polloi::reorder_columns()
   if (input$referer_breakdown_prop) {
-    temp <- cbind(temp$date, purrr::map_df(temp[, -c(1, 2)], function(x) 
round(100 * x / temp$All, 2)))
+    temp <- cbind(temp[, "date"], purrr::map_df(temp[, -c(1, 2)], function(x) 
round(100 * x / temp$All, 2)))
   }
   temp %>%
-    polloi::reorder_columns() %>%
     polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_referer_breakdown)) %>%
     polloi::make_dygraph(xlab = "Date",
-                         ylab = ifelse(input$referer_breakdown_prop, "API 
Calls Share (%)", "API Calls"),
-                         title = "Daily API usage by referrer", legend_name = 
"API Calls") %>%
+                         ylab = dplyr::case_when(
+                           input$referer_breakdown_prop ~ "API Calls Share 
(%)",
+                           input$referer_breakdown_log_scale ~ "Calls (log10 
scale)",
+                           TRUE ~ "API Calls"
+                         ),
+                         title = "Daily API usage by referrer",
+                         legend_name = "API Calls",
+                         logscale = input$referer_breakdown_log_scale) %>%
     dyLegend(labelsDiv = "referer_breakdown_plot_legend", width = 600) %>%
     dyRangeSelector
 })
diff --git a/modules/key_performance_metrics/api_usage.R 
b/modules/key_performance_metrics/api_usage.R
index 112ba56..c18e84f 100644
--- a/modules/key_performance_metrics/api_usage.R
+++ b/modules/key_performance_metrics/api_usage.R
@@ -1,64 +1,49 @@
 output$kpi_api_usage_series <- renderDygraph({
-  smooth_level <- input$smoothing_kpi_api_usage
   start_date <- Sys.Date() - switch(input$kpi_summary_date_range_selector, all 
= NA, daily = 1, weekly = 8, monthly = 31, quarterly = 91)
   api_usage <- split_dataset %>%
-    purrr::map(function(x) {
-      dplyr::filter(x, referrer == "All") %>%
-        dplyr::group_by(date) %>%
-        dplyr::summarize(calls = sum(calls, na.rm = TRUE)) %>%
-        dplyr::ungroup()
-    }) %>%
+    dplyr::bind_rows(.id = "api") %>%
+    dplyr::filter(referrer == "All") %>%
+    dplyr::select(-referrer) %>%
     {
       if (!is.na(start_date)) {
-        lapply(., polloi::subset_by_date_range, from = start_date, to = 
Sys.Date() - 1)
+        polloi::subset_by_date_range(., from = start_date, to = Sys.Date() - 1)
       } else {
         .
       }
     } %>%
-    dplyr::bind_rows(.id = "api") %>%
-    tidyr::spread("api", "calls")
-  api_usage <- dplyr::mutate(api_usage, all = `full-text via API` + 
dplyr::if_else(is.na(`morelike via API`), 0, `morelike via API`) + geo + 
language + prefix) %>%
+    tidyr::spread("api", "calls") %>%
+    dplyr::mutate(all = open + `full-text via API` + 
dplyr::if_else(is.na(`morelike via API`), 0, `morelike via API`) + geo + 
language + prefix) %>%
     polloi::reorder_columns()
-  if ( input$kpi_api_usage_series_data == "raw" ) {
-    api_usage %<>%
-      polloi::smoother(ifelse(smooth_level == "global", 
input$smoothing_global, smooth_level), rename = FALSE) %>%
-      { xts::xts(.[, -1], order.by = .$date) }
-    return(dygraph(api_usage, main = "Calls over time", xlab = "Date",
-                   ylab = ifelse(input$kpi_api_usage_series_log_scale, "Calls 
(log10 scale)", "Calls")) %>%
-             dyLegend(labelsDiv = "kpi_api_usage_series_legend", width = 600) 
%>%
-             dyOptions(
-               strokeWidth = 3, colors = RColorBrewer::brewer.pal(7, 
"Set2")[7:1],
-               drawPoints = FALSE, pointSize = 3, labelsKMB = TRUE,
-               includeZero = input$kpi_api_usage_series_log_scale,
-               logscale = input$kpi_api_usage_series_log_scale
-             ) %>%
-             dyCSS(css = system.file("custom.css", package = "polloi")) %>%
-             dyRangeSelector %>%
-             dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = 
"bottom") %>%
-             dyEvent(as.Date("2017-06-29"), "U (new UDF)", labelLoc = 
"bottom"))
-  } else {
-    api_usage_change <- api_usage %>%
-      dplyr::mutate(
-        `full-text via API` = polloi::percent_change(`full-text via API`),
-        `morelike via API` = polloi::percent_change(`morelike via API`),
-        geo = polloi::percent_change(geo),
-        language = polloi::percent_change(language),
-        open = polloi::percent_change(open),
-        prefix = polloi::percent_change(prefix),
-        all = polloi::percent_change(all)
-      ) %>%
-      { .[-1, ] } %>%
-      polloi::smoother(ifelse(smooth_level == "global", 
input$smoothing_global, smooth_level), rename = FALSE) %>%
-      { xts::xts(.[, -1], .$date) }
-    return(dygraph(api_usage_change, main = "Day-to-day % change over time", 
xlab = "Date", ylab = "% change") %>%
-             dyLegend(labelsDiv = "kpi_api_usage_series_legend", width = 600) 
%>%
-             dyOptions(
-               strokeWidth = 3, colors = RColorBrewer::brewer.pal(7, "Set2"),
-               drawPoints = FALSE, pointSize = 3, labelsKMB = TRUE, 
includeZero = TRUE
-             ) %>%
-             dyCSS(css = system.file("custom.css", package = "polloi")) %>%
-             dyRangeSelector %>%
-             dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = 
"bottom") %>%
-             dyEvent(as.Date("2017-06-29"), "U (new UDF)", labelLoc = 
"bottom"))
+  if (input$kpi_api_usage_series_prop) {
+    api_usage <- cbind(api_usage[, "date"], purrr::map_df(api_usage[, -c(1, 
2)], function(x) round(100 * x / api_usage$all, 2)))
   }
+  if ( input$kpi_api_usage_series_data == "raw" ) {
+    api_usage %>%
+      polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_kpi_api_usage)) 
%>%
+      polloi::make_dygraph(xlab = "Date",
+                           ylab = dplyr::case_when(
+                             input$kpi_api_usage_series_prop ~ "API Calls 
Share (%)",
+                             input$kpi_api_usage_series_log_scale ~ "Calls 
(log10 scale)",
+                             TRUE ~ "API Calls"
+                           ),
+                           title = "Calls over time",
+                           legend_name = "API Calls",
+                           logscale = input$kpi_api_usage_series_log_scale) %>%
+      dyLegend(labelsDiv = "kpi_api_usage_series_legend", width = 600) %>%
+      dyRangeSelector %>%
+      dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") 
%>%
+      dyEvent(as.Date("2017-06-29"), "U (new UDF)", labelLoc = "bottom")
+  } else {
+    cbind(api_usage[, "date"], purrr::map_df(api_usage[, -1], 
polloi::percent_change)) %>%
+      { .[-1, ] } %>%
+      polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, input$smoothing_kpi_api_usage)) 
%>%
+      polloi::make_dygraph(xlab = "Date",
+                           ylab = "% change",
+                           title = "Day-to-day % change over time",
+                           legend_name = "API Calls") %>%
+      dyLegend(labelsDiv = "kpi_api_usage_series_legend", width = 600) %>%
+      dyRangeSelector %>%
+      dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") 
%>%
+      dyEvent(as.Date("2017-06-29"), "U (new UDF)", labelLoc = "bottom")
+   }
 })
diff --git a/tab_documentation/fulltext_basic.md 
b/tab_documentation/fulltext_basic.md
index 49acc24..f7db6ba 100644
--- a/tab_documentation/fulltext_basic.md
+++ b/tab_documentation/fulltext_basic.md
@@ -4,10 +4,22 @@
 What we're talking about is "full text" search; searching for a particular
 term via the API and getting back packages that contain that term in either 
the title *or* the page's content.
 
-General trends
+On this dashboard, we breakdown API calls by the following class of referrer:
+
+* **None** is direct traffic that has empty referrer header. 
+* **Internal** is traffic referred by Wikimedia sites, specifically: 
mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, 
wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, 
wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$212-223)
 for more information.).
+* **Search engine** is traffic referred by Google, Bing, Yandex, Yahoo, 
DuckDuckGo or Baidu (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java$41)
 for more information.).
+* **Unknown** is traffic that does not have a HTTP referrer or has unusual 
referrer header (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$189-211)
 for more information.).
+* **External** is traffic referred by something other than search engine.
+
+For overall break down by referrer class, see 
[http://discovery.wmflabs.org/metrics/#referer_breakdown](http://discovery.wmflabs.org/metrics/#referer_breakdown);
 for overall breakdown by search request type, see 
[http://discovery.wmflabs.org/metrics/#kpi_api_usage](http://discovery.wmflabs.org/metrics/#kpi_api_usage).
+
+General findings
 ------
 
-There's not enough data to be able to say much about the patterns inherrent to 
Full-text Search (via API) usage right now, but it's interesting to compare the 
values found to how much other search API forms are used. Full-text sits near 
the top of the pack; Open Search is used dramatically more, but everything else 
somewhat (or very much) less.
+* About 80% of full-text search via API are direct API calls.
+* About 80% of full-text search via API are from bots.
+* About 90% of full-text search via API are done on desktop.
 
 Outages and inaccuracies
 ------
diff --git a/tab_documentation/geo_basic.md b/tab_documentation/geo_basic.md
index 83fadcd..c014d8a 100644
--- a/tab_documentation/geo_basic.md
+++ b/tab_documentation/geo_basic.md
@@ -3,10 +3,21 @@
 
 Geo Search, or Geographic Search, refers to the ability to search for pages 
that are "near" a particular set of geographic coordinates, in the sense of 
being about subjects that have physical locations we track. (places. Not 
people.)
 
-General trends
+On this dashboard, we breakdown API calls by the following class of referrer:
+
+* **None** is direct traffic that has empty referrer header. 
+* **Internal** is traffic referred by Wikimedia sites, specifically: 
mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, 
wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, 
wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$212-223)
 for more information.).
+* **Search engine** is traffic referred by Google, Bing, Yandex, Yahoo, 
DuckDuckGo or Baidu (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java$41)
 for more information.).
+* **Unknown** is traffic that does not have a HTTP referrer or has unusual 
referrer header (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$189-211)
 for more information.).
+* **External** is traffic referred by something other than search engine.
+
+For overall break down by referrer class, see 
[http://discovery.wmflabs.org/metrics/#referer_breakdown](http://discovery.wmflabs.org/metrics/#referer_breakdown);
 for overall breakdown by search request type, see 
[http://discovery.wmflabs.org/metrics/#kpi_api_usage](http://discovery.wmflabs.org/metrics/#kpi_api_usage).
+
+General findings
 ------
 
-It's early days, but the tiny adoption of Geographic Search, particularly 
compared to Cirrus or OpenSearch, suggests this isn't a tremendously useful 
feature to API users. We need to dig in and refine the heuristics for 
identifying requests as different types, however.
+* About 96% of geo search are direct API calls.
+* About 78% of geo search are from bots.
 
 Outages and inaccuracies
 ------
diff --git a/tab_documentation/kpi_api_usage.md 
b/tab_documentation/kpi_api_usage.md
index 0702341..d44aa56 100644
--- a/tab_documentation/kpi_api_usage.md
+++ b/tab_documentation/kpi_api_usage.md
@@ -1,7 +1,24 @@
 Key Performance Indicator: API usage
 =======
 
-We want people, both within our movement and outside it, to be able to easily 
access our information.
+We want people, both within our movement and outside it, to be able to easily 
access our information. This dashboard break down API usage by search request 
type. For more information about each type of request, see the following 
dashboards:
+
+* [Full-text search](http://discovery.wmflabs.org/metrics/#fulltext_search): 
searching for a particular term via the API and getting back packages that 
contain that term in either the title or the page's content (see 
[Help:CirrusSearch](https://www.mediawiki.org/wiki/Help:CirrusSearch#Full_text_search)
 for more details).
+* [Morelike search](http://discovery.wmflabs.org/metrics/#morelike_search): 
morelike is a feature of CirrusSearch that is used in extensions like 
[RelatedArticles](https://www.mediawiki.org/wiki/Extension:RelatedArticles) 
(see 
[Help:CirrusSearch](https://www.mediawiki.org/wiki/Help:CirrusSearch#Morelike) 
for more details).
+* [Open search](http://discovery.wmflabs.org/metrics/#open_search): 
[OpenSearch](https://en.wikipedia.org/wiki/OpenSearch) is a search format 
optimised for syndication and aggregation.
+* [Geo search](http://discovery.wmflabs.org/metrics/#geo_search): Geo Search, 
or Geographic Search, refers to the ability to search for pages that are “near” 
a particular set of geographic coordinates, in the sense of being about 
subjects that have physical locations we track (see 
[Help:CirrusSearch](https://www.mediawiki.org/wiki/Help:CirrusSearch#Geo_Search)
 for more details).
+* [Prefix search](http://discovery.wmflabs.org/metrics/#prefix_search): Prefix 
Search is simply run over page titles, and lets a user find pages that start 
with (“prefix”) a particular term.
+* [Language search](http://discovery.wmflabs.org/metrics/#language_search): 
Language search allows you to search for a particular language name in 
different scripts.
+
+For overall break down by referrer class, see 
[http://discovery.wmflabs.org/metrics/#referer_breakdown](http://discovery.wmflabs.org/metrics/#referer_breakdown).
+
+General findings
+------
+
+* About 56% of all API calls are morelike search which is used by 
RelatedArticles on mobile. We have more traffic during the weekends since users 
generally spend more time on their mobile devices on weekends.
+* About 22% of all API calls are open search on desktop. The usage is higher 
on weekdays like we see in other desktop usage pattern.
+* About 14% of all API calls are prefix search. And about 75% of prefix search 
via API are done on mobile.
+* About 8% of all API calls are full-text search. And about 80% of full-text 
search via API are from bots.
 
 Outages and inaccuracies
 ------
diff --git a/tab_documentation/language_basic.md 
b/tab_documentation/language_basic.md
index 1bd3792..c3d6ae0 100644
--- a/tab_documentation/language_basic.md
+++ b/tab_documentation/language_basic.md
@@ -3,10 +3,21 @@
 
 Language search doesn't actually allow you to search in different languages - 
it allows you to search for a particular language name in different scripts. 
This can be done through the API with `action=languagesearch`, which is what 
we're tracking here.
 
-General trends
+On this dashboard, we breakdown API calls by the following class of referrer:
+
+* **None** is direct traffic that has empty referrer header. 
+* **Internal** is traffic referred by Wikimedia sites, specifically: 
mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, 
wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, 
wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$212-223)
 for more information.).
+* **Search engine** is traffic referred by Google, Bing, Yandex, Yahoo, 
DuckDuckGo or Baidu (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java$41)
 for more information.).
+* **Unknown** is traffic that does not have a HTTP referrer or has unusual 
referrer header (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$189-211)
 for more information.).
+* **External** is traffic referred by something other than search engine.
+
+For overall break down by referrer class, see 
[http://discovery.wmflabs.org/metrics/#referer_breakdown](http://discovery.wmflabs.org/metrics/#referer_breakdown);
 for overall breakdown by search request type, see 
[http://discovery.wmflabs.org/metrics/#kpi_api_usage](http://discovery.wmflabs.org/metrics/#kpi_api_usage).
+
+General findings
 ------
 
-Given the limited utility of this, we'd expect to see (and do see) very few 
hits.
+* About 93% of language search are internal API calls and about 7% of language 
search are direct traffic. 
+* Given the limited utility of this, we'd expect to see (and do see) very few 
hits.
 
 Outages and inaccuracies
 ------
diff --git a/tab_documentation/morelike_basic.md 
b/tab_documentation/morelike_basic.md
index 0d1c5ec..88c92d5 100644
--- a/tab_documentation/morelike_basic.md
+++ b/tab_documentation/morelike_basic.md
@@ -1,8 +1,24 @@
-Morelike Search via API usage
+Morelike Search API usage
 =======
 
 [Morelike](https://www.mediawiki.org/wiki/Help:CirrusSearch#Morelike) is a 
feature of CirrusSearch that is used in extensions like 
[RelatedArticles](https://www.mediawiki.org/wiki/Extension:RelatedArticles). As 
of 2017-06-29, we use a new UDF which can distinguish morelike vs fulltext 
search API requests (see [Gerrit change 
345863](https://gerrit.wikimedia.org/r/#/c/345863/) for more details).
 
+On this dashboard, we breakdown API calls by the following class of referrer:
+
+* **None** is direct traffic that has empty referrer header. 
+* **Internal** is traffic referred by Wikimedia sites, specifically: 
mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, 
wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, 
wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$212-223)
 for more information.).
+* **Search engine** is traffic referred by Google, Bing, Yandex, Yahoo, 
DuckDuckGo or Baidu (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java$41)
 for more information.).
+* **Unknown** is traffic that does not have a HTTP referrer or has unusual 
referrer header (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$189-211)
 for more information.).
+* **External** is traffic referred by something other than search engine.
+
+For overall break down by referrer class, see 
[http://discovery.wmflabs.org/metrics/#referer_breakdown](http://discovery.wmflabs.org/metrics/#referer_breakdown);
 for overall breakdown by search request type, see 
[http://discovery.wmflabs.org/metrics/#kpi_api_usage](http://discovery.wmflabs.org/metrics/#kpi_api_usage).
+
+General findings
+------
+
+* Almost all of the morelike searches via API are used by RelatedArticles on 
mobile -- about 94% through mobile web and about 6% through apps. We have more 
traffic during the weekends since users generally spend more time on their 
mobile devices on weekends.
+* About 50% of the morelike search API usage are direct traffic. However, 88% 
(around 60 million API calls per day) of these direct morelike search are from 
mobile web and they might be misclassified internal traffic. Further 
investigation is needed.
+
 Questions, bug reports, and feature suggestions
 ------
 For technical, non-bug questions, [email 
Mikhail](mailto:mpo...@wikimedia.org?subject=Dashboard%20Question) or 
[Chelsy](mailto:c...@wikimedia.org?subject=Dashboard%20Question). If you 
experience a bug or notice something wrong or have a suggestion, [open a ticket 
in 
Phabricator](https://phabricator.wikimedia.org/maniphest/task/create/?projects=Discovery)
 in the Discovery board or [email 
Deb](mailto:d...@wikimedia.org?subject=Dashboard%20Question).
diff --git a/tab_documentation/open_basic.md b/tab_documentation/open_basic.md
index 99be0f8..82d659e 100644
--- a/tab_documentation/open_basic.md
+++ b/tab_documentation/open_basic.md
@@ -3,6 +3,21 @@
 
 [OpenSearch](https://en.wikipedia.org/wiki/OpenSearch) is a search format 
optimised for syndication and aggregation. MediaWiki has OpenSearch support, 
and so does its API; that's what's tracked here. OpenSearch allows you to 
retrieve full content as well as snippets, and allows you to *search* full 
content for your search term.
 
+On this dashboard, we breakdown API calls by the following class of referrer:
+
+* **None** is direct traffic that has empty referrer header. 
+* **Internal** is traffic referred by Wikimedia sites, specifically: 
mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, 
wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, 
wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$212-223)
 for more information.).
+* **Search engine** is traffic referred by Google, Bing, Yandex, Yahoo, 
DuckDuckGo or Baidu (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java$41)
 for more information.).
+* **Unknown** is traffic that does not have a HTTP referrer or has unusual 
referrer header (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$189-211)
 for more information.).
+* **External** is traffic referred by something other than search engine.
+
+For overall break down by referrer class, see 
[http://discovery.wmflabs.org/metrics/#referer_breakdown](http://discovery.wmflabs.org/metrics/#referer_breakdown);
 for overall breakdown by search request type, see 
[http://discovery.wmflabs.org/metrics/#kpi_api_usage](http://discovery.wmflabs.org/metrics/#kpi_api_usage).
+
+General findings
+------
+
+* Almost all of the open search are done on desktop. We have more traffic 
during the weekdays because users generally spend more time on their desktop on 
weekdays.
+
 Outages and inaccuracies
 ------
 
diff --git a/tab_documentation/prefix_basic.md 
b/tab_documentation/prefix_basic.md
index c6e94d0..ed3eef3 100644
--- a/tab_documentation/prefix_basic.md
+++ b/tab_documentation/prefix_basic.md
@@ -3,10 +3,22 @@
 
 Prefix Search is simply run over page titles, and lets a user find pages that 
start with ("prefix") a particular term. This is tremendously useful and is 
where the dropdown box in Wikipedia's user-facing search comes from. It's also 
exposed via the API, and hits to that service are what we're tracking here.
 
-General trends
+On this dashboard, we breakdown API calls by the following class of referrer:
+
+* **None** is direct traffic that has empty referrer header. 
+* **Internal** is traffic referred by Wikimedia sites, specifically: 
mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, 
wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, 
wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$212-223)
 for more information.).
+* **Search engine** is traffic referred by Google, Bing, Yandex, Yahoo, 
DuckDuckGo or Baidu (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java$41)
 for more information.).
+* **Unknown** is traffic that does not have a HTTP referrer or has unusual 
referrer header (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$189-211)
 for more information.).
+* **External** is traffic referred by something other than search engine.
+
+For overall break down by referrer class, see 
[http://discovery.wmflabs.org/metrics/#referer_breakdown](http://discovery.wmflabs.org/metrics/#referer_breakdown);
 for overall breakdown by search request type, see 
[http://discovery.wmflabs.org/metrics/#kpi_api_usage](http://discovery.wmflabs.org/metrics/#kpi_api_usage).
+
+General findings
 ------
 
-Prefix Search isn't tremendously used, which makes sense; for external 
services, the cost of making the call to us is too high to provide seamless 
dropdowns in the fashion we can on Wikipedia proper.
+* About 47% of prefix search are direct API calls. 57% of these direct traffic 
are from Wikipedia apps, 41% of these direct traffic are from mobile web which 
might be misclassified internal traffic. Further investigation is needed.
+* About 75% of prefix search via API are done on mobile.
+* Prefix Search isn't tremendously used by external services, because the cost 
of making the call to us is too high to provide seamless dropdowns in the 
fashion we can on Wikipedia proper.
 
 Notes
 ------
diff --git a/tab_documentation/referer_breakdown.md 
b/tab_documentation/referer_breakdown.md
index 61ddb1f..bc79c6c 100644
--- a/tab_documentation/referer_breakdown.md
+++ b/tab_documentation/referer_breakdown.md
@@ -1,9 +1,21 @@
 API Calls by Referrer Class
 =======
 
-All types of API calls are aggregated by date and referrer class.
+All types of API calls are aggregated by date and the following referrer class:
 
-**Internal** is traffic referred by Wikimedia sites, specifically: 
mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, 
wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, 
wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest 
source](https://git.wikimedia.org/blob/analytics%2Frefinery%2Fsource.git/master/refinery-core%2Fsrc%2Fmain%2Fjava%2Forg%2Fwikimedia%2Fanalytics%2Frefinery%2Fcore%2FWebrequest.java#L203)
 for more information.)
+* **None** is direct traffic that has empty referrer header. 
+* **Internal** is traffic referred by Wikimedia sites, specifically: 
mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, 
wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, 
wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$212-223)
 for more information.).
+* **Search engine** is traffic referred by Google, Bing, Yandex, Yahoo, 
DuckDuckGo or Baidu (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java$41)
 for more information.).
+* **Unknown** is traffic that does not have a HTTP referrer or has unusual 
referrer header (See [Webrequest 
source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$189-211)
 for more information.).
+* **External** is traffic referred by something other than search engine.
+
+For overall breakdown by search request type, see 
[http://discovery.wmflabs.org/metrics/#kpi_api_usage](http://discovery.wmflabs.org/metrics/#kpi_api_usage).
+
+General findings
+------
+
+* About 50% of API calls are internal traffic. 63% of internal traffic are 
from mobile web and the rest 37% are from desktop.
+* About 49% of API calls are direct traffic. 73% of direct traffic are from 
mobile (apps and mobile web) and the rest 27% are from desktop.
 
 Outages and inaccuracies
 ------
diff --git a/ui.R b/ui.R
index 73c8980..441261b 100644
--- a/ui.R
+++ b/ui.R
@@ -144,10 +144,16 @@
                                                             "Day-to-day % 
change" = "change"),
                                              inline = TRUE),
                                 width = 4),
-                         column(checkboxInput("kpi_api_usage_series_log_scale",
-                                              label = "Log10 Scale",
-                                              value = FALSE),
-                                width = 4),
+                         
column(conditionalPanel("input.kpi_api_usage_series_data == 'raw'",
+                                  
checkboxInput("kpi_api_usage_series_log_scale",
+                                    label = "Log10 Scale",
+                                    value = FALSE)),
+                                width = 2),
+                         
column(conditionalPanel("input.kpi_api_usage_series_data == 'raw'",
+                                  checkboxInput("kpi_api_usage_series_prop",
+                                    label = "Use Proportion",
+                                    value = FALSE)),
+                                width = 2),
                          
column(polloi::smooth_select("smoothing_kpi_api_usage"), width = 4)),
                 div(id = "kpi_api_usage_series_legend", style = "text-align: 
right;"),
                 dygraphOutput("kpi_api_usage_series"),
@@ -223,50 +229,85 @@
                 includeMarkdown("./tab_documentation/invoke_source.md")
         ),
         tabItem(tabName = "fulltext_search",
-                polloi::smooth_select("smoothing_fulltext_search"),
+                fluidRow(column(checkboxInput("fulltext_search_log_scale",
+                                              label = "Log10 Scale", value = 
FALSE),
+                         width = 2),
+                         column(checkboxInput("fulltext_search_prop",
+                                              label = "Use Proportion", value 
= FALSE),
+                         width = 2),
+                         
column(polloi::smooth_select("smoothing_fulltext_search"), width = 8)),
                 div(id = "cirrus_aggregate_legend", style = "text-align: 
right;"),
                 dygraphOutput("cirrus_aggregate"),
                 includeMarkdown("./tab_documentation/fulltext_basic.md")
         ),
         tabItem(tabName = "morelike_search",
-                polloi::smooth_select("smoothing_morelike_search"),
+                fluidRow(column(checkboxInput("morelike_search_log_scale",
+                                              label = "Log10 Scale", value = 
FALSE),
+                                width = 2),
+                         column(checkboxInput("morelike_search_prop",
+                                              label = "Use Proportion", value 
= FALSE),
+                                width = 2),
+                         
column(polloi::smooth_select("smoothing_morelike_search"), width = 8)),
                 div(id = "morelike_aggregate_legend", style = "text-align: 
right;"),
                 dygraphOutput("morelike_aggregate"),
                 includeMarkdown("./tab_documentation/morelike_basic.md")
         ),
         tabItem(tabName = "open_search",
-                polloi::smooth_select("smoothing_open_search"),
+                fluidRow(column(checkboxInput("open_search_log_scale",
+                                              label = "Log10 Scale", value = 
FALSE),
+                                width = 2),
+                         column(checkboxInput("open_search_prop",
+                                              label = "Use Proportion", value 
= FALSE),
+                                width = 2),
+                         
column(polloi::smooth_select("smoothing_open_search"), width = 8)),
                 div(id = "open_aggregate_legend", style = "text-align: 
right;"),
                 dygraphOutput("open_aggregate"),
                 includeMarkdown("./tab_documentation/open_basic.md")
         ),
         tabItem(tabName = "geo_search",
-                polloi::smooth_select("smoothing_geo_search"),
+                fluidRow(column(checkboxInput("geo_search_log_scale",
+                                              label = "Log10 Scale", value = 
FALSE),
+                                width = 2),
+                         column(checkboxInput("geo_search_prop",
+                                              label = "Use Proportion", value 
= FALSE),
+                                width = 2),
+                         column(polloi::smooth_select("smoothing_geo_search"), 
width = 8)),
                 div(id = "geo_aggregate_legend", style = "text-align: right;"),
                 dygraphOutput("geo_aggregate"),
                 includeMarkdown("./tab_documentation/geo_basic.md")
         ),
         tabItem(tabName = "prefix_search",
-                polloi::smooth_select("smoothing_prefix_search"),
+                fluidRow(column(checkboxInput("prefix_search_log_scale",
+                                              label = "Log10 Scale", value = 
FALSE),
+                                width = 2),
+                         column(checkboxInput("prefix_search_prop",
+                                              label = "Use Proportion", value 
= FALSE),
+                                width = 2),
+                         
column(polloi::smooth_select("smoothing_prefix_search"), width = 8)),
                 div(id = "prefix_aggregate_legend", style = "text-align: 
right;"),
                 dygraphOutput("prefix_aggregate"),
                 includeMarkdown("./tab_documentation/prefix_basic.md")
         ),
         tabItem(tabName = "language_search",
-                polloi::smooth_select("smoothing_language_search"),
+                fluidRow(column(checkboxInput("language_search_log_scale",
+                                              label = "Log10 Scale", value = 
FALSE),
+                                width = 2),
+                         column(checkboxInput("language_search_prop",
+                                              label = "Use Proportion", value 
= FALSE),
+                                width = 2),
+                         
column(polloi::smooth_select("smoothing_language_search"), width = 8)),
                 div(id = "language_aggregate_legend", style = "text-align: 
right;"),
                 dygraphOutput("language_aggregate"),
                 includeMarkdown("./tab_documentation/language_basic.md")
         ),
         tabItem(tabName = "referer_breakdown",
-                fluidRow(
-                  column(
-                    HTML("<label class = \"control-label\" style = 
\"margin-bottom:-30px;\">Type</label>"),
-                    shiny::checkboxInput("referer_breakdown_prop", label = 
"Use Proportion", value = FALSE),
-                    width = 2
-                  ),
-                  column(polloi::smooth_select("smoothing_referer_breakdown"), 
width = 10)
-                ),
+                fluidRow(column(checkboxInput("referer_breakdown_log_scale",
+                                              label = "Log10 Scale", value = 
FALSE),
+                                width = 2),
+                         column(checkboxInput("referer_breakdown_prop",
+                                              label = "Use Proportion", value 
= FALSE),
+                                width = 2),
+                         
column(polloi::smooth_select("smoothing_referer_breakdown"), width = 8)),
                 div(id = "referer_breakdown_plot_legend", style = "text-align: 
right;"),
                 dygraphOutput("referer_breakdown_plot"),
                 includeMarkdown("./tab_documentation/referer_breakdown.md")

-- 
To view, visit https://gerrit.wikimedia.org/r/378067
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: If97bb9cd23ae93117d106012d69b8f6250a19ce9
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/rainbow
Gerrit-Branch: develop
Gerrit-Owner: Chelsyx <c...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to