Chelsyx has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/378067 )
Change subject: Interpretation and general findings for API dashboards ...................................................................... Interpretation and general findings for API dashboards Bug: T172452 Change-Id: If97bb9cd23ae93117d106012d69b8f6250a19ce9 --- M modules/api.R M modules/key_performance_metrics/api_usage.R M tab_documentation/fulltext_basic.md M tab_documentation/geo_basic.md M tab_documentation/kpi_api_usage.md M tab_documentation/language_basic.md M tab_documentation/morelike_basic.md M tab_documentation/open_basic.md M tab_documentation/prefix_basic.md M tab_documentation/referer_breakdown.md M ui.R 11 files changed, 322 insertions(+), 105 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/rainbow refs/changes/67/378067/1 diff --git a/modules/api.R b/modules/api.R index 790b29e..6cae3ad 100644 --- a/modules/api.R +++ b/modules/api.R @@ -1,9 +1,22 @@ output$cirrus_aggregate <- renderDygraph({ - split_dataset$`full-text via API` %>% + temp <- split_dataset$`full-text via API` %>% tidyr::spread(referrer, calls) %>% - polloi::reorder_columns() %>% + polloi::reorder_columns() + if (input$fulltext_search_prop) { + temp <- cbind(temp[, "date"], purrr::map_df(temp[, -c(1, 2)], function(x) round(100 * x / temp$All, 2))) %>% + dplyr::filter(date >= "2017-06-29") + } + temp %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_fulltext_search)) %>% - polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Daily Full-text search API usage by referrer", legend_name = "Searches") %>% + polloi::make_dygraph(xlab = "Date", + ylab = dplyr::case_when( + input$fulltext_search_prop ~ "API Calls Share (%)", + input$fulltext_search_log_scale ~ "Calls (log10 scale)", + TRUE ~ "API Calls" + ), + title = "Daily Full-text search via API usage by referrer", + legend_name = "API Calls", + logscale = input$fulltext_search_log_scale) %>% dyLegend(labelsDiv = "cirrus_aggregate_legend", width = 600) %>% dyRangeSelector %>% dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") %>% @@ -11,21 +24,47 @@ }) output$morelike_aggregate <- renderDygraph({ - split_dataset$`morelike via API` %>% + temp <- split_dataset$`morelike via API` %>% tidyr::spread(referrer, calls) %>% - polloi::reorder_columns() %>% + polloi::reorder_columns() + if (input$morelike_search_prop) { + temp <- cbind(temp[, "date"], purrr::map_df(temp[, -c(1, 2)], function(x) round(100 * x / temp$All, 2))) %>% + dplyr::filter(date >= "2017-06-29") + } + temp %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_morelike_search)) %>% - polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Daily Morelike search API usage by referrer", legend_name = "Searches") %>% + polloi::make_dygraph(xlab = "Date", + ylab = dplyr::case_when( + input$morelike_search_prop ~ "API Calls Share (%)", + input$morelike_search_log_scale ~ "Calls (log10 scale)", + TRUE ~ "API Calls" + ), + title = "Daily Morelike search API usage by referrer", + legend_name = "API Calls", + logscale = input$morelike_search_log_scale) %>% dyLegend(labelsDiv = "morelike_aggregate_legend", width = 600) %>% dyRangeSelector }) output$open_aggregate <- renderDygraph({ - split_dataset$open %>% + temp <- split_dataset$open %>% tidyr::spread(referrer, calls) %>% - polloi::reorder_columns() %>% + polloi::reorder_columns() + if (input$open_search_prop) { + temp <- cbind(temp[, "date"], purrr::map_df(temp[, -c(1, 2)], function(x) round(100 * x / temp$All, 2))) %>% + dplyr::filter(date >= "2017-06-29") + } + temp %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_open_search)) %>% - polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Daily OpenSearch API usage by referrer", legend_name = "Searches") %>% + polloi::make_dygraph(xlab = "Date", + ylab = dplyr::case_when( + input$open_search_prop ~ "API Calls Share (%)", + input$open_search_log_scale ~ "Calls (log10 scale)", + TRUE ~ "API Calls" + ), + title = "Daily OpenSearch API usage by referrer", + legend_name = "API Calls", + logscale = input$open_search_log_scale) %>% dyLegend(labelsDiv = "open_aggregate_legend", width = 600) %>% dyRangeSelector %>% dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") %>% @@ -33,11 +72,24 @@ }) output$geo_aggregate <- renderDygraph({ - split_dataset$geo %>% + temp <- split_dataset$geo %>% tidyr::spread(referrer, calls) %>% - polloi::reorder_columns() %>% + polloi::reorder_columns() + if (input$geo_search_prop) { + temp <- cbind(temp[, "date"], purrr::map_df(temp[, -c(1, 2)], function(x) round(100 * x / temp$All, 2))) %>% + dplyr::filter(date >= "2017-06-29") + } + temp %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_geo_search)) %>% - polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Daily Geo Search API usage by referrer", legend_name = "Searches") %>% + polloi::make_dygraph(xlab = "Date", + ylab = dplyr::case_when( + input$geo_search_prop ~ "API Calls Share (%)", + input$geo_search_log_scale ~ "Calls (log10 scale)", + TRUE ~ "API Calls" + ), + title = "Daily Geo Search API usage by referrer", + legend_name = "API Calls", + logscale = input$geo_search_log_scale) %>% dyLegend(labelsDiv = "geo_aggregate_legend", width = 600) %>% dyRangeSelector %>% dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") %>% @@ -45,11 +97,24 @@ }) output$language_aggregate <- renderDygraph({ - split_dataset$language %>% + temp <- split_dataset$language %>% tidyr::spread(referrer, calls) %>% - polloi::reorder_columns() %>% + polloi::reorder_columns() + if (input$language_search_prop) { + temp <- cbind(temp[, "date"], purrr::map_df(temp[, -c(1, 2)], function(x) round(100 * x / temp$All, 2))) %>% + dplyr::filter(date >= "2017-06-29") + } + temp %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_language_search)) %>% - polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Daily Language search API usage by referrer", legend_name = "Searches") %>% + polloi::make_dygraph(xlab = "Date", + ylab = dplyr::case_when( + input$language_search_prop ~ "API Calls Share (%)", + input$language_search_log_scale ~ "Calls (log10 scale)", + TRUE ~ "API Calls" + ), + title = "Daily Language search API usage by referrer", + legend_name = "API Calls", + logscale = input$language_search_log_scale) %>% dyLegend(labelsDiv = "language_aggregate_legend", width = 600) %>% dyRangeSelector %>% dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") %>% @@ -57,11 +122,24 @@ }) output$prefix_aggregate <- renderDygraph({ - split_dataset$prefix %>% + temp <- split_dataset$prefix %>% tidyr::spread(referrer, calls) %>% - polloi::reorder_columns() %>% + polloi::reorder_columns() + if (input$prefix_search_prop) { + temp <- cbind(temp[, "date"], purrr::map_df(temp[, -c(1, 2)], function(x) round(100 * x / temp$All, 2))) %>% + dplyr::filter(date >= "2017-06-29") + } + temp %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_prefix_search)) %>% - polloi::make_dygraph(xlab = "Date", ylab = "Searches", title = "Daily Prefix search API usage by referrer", legend_name = "Searches") %>% + polloi::make_dygraph(xlab = "Date", + ylab = dplyr::case_when( + input$prefix_search_prop ~ "API Calls Share (%)", + input$prefix_search_log_scale ~ "Calls (log10 scale)", + TRUE ~ "API Calls" + ), + title = "Daily Prefix search API usage by referrer", + legend_name = "API Calls", + logscale = input$prefix_search_log_scale) %>% dyLegend(labelsDiv = "prefix_aggregate_legend", width = 600) %>% dyRangeSelector %>% dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") %>% @@ -74,16 +152,23 @@ dplyr::filter(date >= "2017-06-29") %>% dplyr::group_by(date, referrer) %>% dplyr::summarize(calls = sum(calls, na.rm = TRUE)) %>% - tidyr::spread(referrer, calls) + dplyr::ungroup() %>% + tidyr::spread(referrer, calls) %>% + polloi::reorder_columns() if (input$referer_breakdown_prop) { - temp <- cbind(temp$date, purrr::map_df(temp[, -c(1, 2)], function(x) round(100 * x / temp$All, 2))) + temp <- cbind(temp[, "date"], purrr::map_df(temp[, -c(1, 2)], function(x) round(100 * x / temp$All, 2))) } temp %>% - polloi::reorder_columns() %>% polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_referer_breakdown)) %>% polloi::make_dygraph(xlab = "Date", - ylab = ifelse(input$referer_breakdown_prop, "API Calls Share (%)", "API Calls"), - title = "Daily API usage by referrer", legend_name = "API Calls") %>% + ylab = dplyr::case_when( + input$referer_breakdown_prop ~ "API Calls Share (%)", + input$referer_breakdown_log_scale ~ "Calls (log10 scale)", + TRUE ~ "API Calls" + ), + title = "Daily API usage by referrer", + legend_name = "API Calls", + logscale = input$referer_breakdown_log_scale) %>% dyLegend(labelsDiv = "referer_breakdown_plot_legend", width = 600) %>% dyRangeSelector }) diff --git a/modules/key_performance_metrics/api_usage.R b/modules/key_performance_metrics/api_usage.R index 112ba56..c18e84f 100644 --- a/modules/key_performance_metrics/api_usage.R +++ b/modules/key_performance_metrics/api_usage.R @@ -1,64 +1,49 @@ output$kpi_api_usage_series <- renderDygraph({ - smooth_level <- input$smoothing_kpi_api_usage start_date <- Sys.Date() - switch(input$kpi_summary_date_range_selector, all = NA, daily = 1, weekly = 8, monthly = 31, quarterly = 91) api_usage <- split_dataset %>% - purrr::map(function(x) { - dplyr::filter(x, referrer == "All") %>% - dplyr::group_by(date) %>% - dplyr::summarize(calls = sum(calls, na.rm = TRUE)) %>% - dplyr::ungroup() - }) %>% + dplyr::bind_rows(.id = "api") %>% + dplyr::filter(referrer == "All") %>% + dplyr::select(-referrer) %>% { if (!is.na(start_date)) { - lapply(., polloi::subset_by_date_range, from = start_date, to = Sys.Date() - 1) + polloi::subset_by_date_range(., from = start_date, to = Sys.Date() - 1) } else { . } } %>% - dplyr::bind_rows(.id = "api") %>% - tidyr::spread("api", "calls") - api_usage <- dplyr::mutate(api_usage, all = `full-text via API` + dplyr::if_else(is.na(`morelike via API`), 0, `morelike via API`) + geo + language + prefix) %>% + tidyr::spread("api", "calls") %>% + dplyr::mutate(all = open + `full-text via API` + dplyr::if_else(is.na(`morelike via API`), 0, `morelike via API`) + geo + language + prefix) %>% polloi::reorder_columns() - if ( input$kpi_api_usage_series_data == "raw" ) { - api_usage %<>% - polloi::smoother(ifelse(smooth_level == "global", input$smoothing_global, smooth_level), rename = FALSE) %>% - { xts::xts(.[, -1], order.by = .$date) } - return(dygraph(api_usage, main = "Calls over time", xlab = "Date", - ylab = ifelse(input$kpi_api_usage_series_log_scale, "Calls (log10 scale)", "Calls")) %>% - dyLegend(labelsDiv = "kpi_api_usage_series_legend", width = 600) %>% - dyOptions( - strokeWidth = 3, colors = RColorBrewer::brewer.pal(7, "Set2")[7:1], - drawPoints = FALSE, pointSize = 3, labelsKMB = TRUE, - includeZero = input$kpi_api_usage_series_log_scale, - logscale = input$kpi_api_usage_series_log_scale - ) %>% - dyCSS(css = system.file("custom.css", package = "polloi")) %>% - dyRangeSelector %>% - dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") %>% - dyEvent(as.Date("2017-06-29"), "U (new UDF)", labelLoc = "bottom")) - } else { - api_usage_change <- api_usage %>% - dplyr::mutate( - `full-text via API` = polloi::percent_change(`full-text via API`), - `morelike via API` = polloi::percent_change(`morelike via API`), - geo = polloi::percent_change(geo), - language = polloi::percent_change(language), - open = polloi::percent_change(open), - prefix = polloi::percent_change(prefix), - all = polloi::percent_change(all) - ) %>% - { .[-1, ] } %>% - polloi::smoother(ifelse(smooth_level == "global", input$smoothing_global, smooth_level), rename = FALSE) %>% - { xts::xts(.[, -1], .$date) } - return(dygraph(api_usage_change, main = "Day-to-day % change over time", xlab = "Date", ylab = "% change") %>% - dyLegend(labelsDiv = "kpi_api_usage_series_legend", width = 600) %>% - dyOptions( - strokeWidth = 3, colors = RColorBrewer::brewer.pal(7, "Set2"), - drawPoints = FALSE, pointSize = 3, labelsKMB = TRUE, includeZero = TRUE - ) %>% - dyCSS(css = system.file("custom.css", package = "polloi")) %>% - dyRangeSelector %>% - dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") %>% - dyEvent(as.Date("2017-06-29"), "U (new UDF)", labelLoc = "bottom")) + if (input$kpi_api_usage_series_prop) { + api_usage <- cbind(api_usage[, "date"], purrr::map_df(api_usage[, -c(1, 2)], function(x) round(100 * x / api_usage$all, 2))) } + if ( input$kpi_api_usage_series_data == "raw" ) { + api_usage %>% + polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_kpi_api_usage)) %>% + polloi::make_dygraph(xlab = "Date", + ylab = dplyr::case_when( + input$kpi_api_usage_series_prop ~ "API Calls Share (%)", + input$kpi_api_usage_series_log_scale ~ "Calls (log10 scale)", + TRUE ~ "API Calls" + ), + title = "Calls over time", + legend_name = "API Calls", + logscale = input$kpi_api_usage_series_log_scale) %>% + dyLegend(labelsDiv = "kpi_api_usage_series_legend", width = 600) %>% + dyRangeSelector %>% + dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") %>% + dyEvent(as.Date("2017-06-29"), "U (new UDF)", labelLoc = "bottom") + } else { + cbind(api_usage[, "date"], purrr::map_df(api_usage[, -1], polloi::percent_change)) %>% + { .[-1, ] } %>% + polloi::smoother(smooth_level = polloi::smooth_switch(input$smoothing_global, input$smoothing_kpi_api_usage)) %>% + polloi::make_dygraph(xlab = "Date", + ylab = "% change", + title = "Day-to-day % change over time", + legend_name = "API Calls") %>% + dyLegend(labelsDiv = "kpi_api_usage_series_legend", width = 600) %>% + dyRangeSelector %>% + dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") %>% + dyEvent(as.Date("2017-06-29"), "U (new UDF)", labelLoc = "bottom") + } }) diff --git a/tab_documentation/fulltext_basic.md b/tab_documentation/fulltext_basic.md index 49acc24..f7db6ba 100644 --- a/tab_documentation/fulltext_basic.md +++ b/tab_documentation/fulltext_basic.md @@ -4,10 +4,22 @@ What we're talking about is "full text" search; searching for a particular term via the API and getting back packages that contain that term in either the title *or* the page's content. -General trends +On this dashboard, we breakdown API calls by the following class of referrer: + +* **None** is direct traffic that has empty referrer header. +* **Internal** is traffic referred by Wikimedia sites, specifically: mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$212-223) for more information.). +* **Search engine** is traffic referred by Google, Bing, Yandex, Yahoo, DuckDuckGo or Baidu (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java$41) for more information.). +* **Unknown** is traffic that does not have a HTTP referrer or has unusual referrer header (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$189-211) for more information.). +* **External** is traffic referred by something other than search engine. + +For overall break down by referrer class, see [http://discovery.wmflabs.org/metrics/#referer_breakdown](http://discovery.wmflabs.org/metrics/#referer_breakdown); for overall breakdown by search request type, see [http://discovery.wmflabs.org/metrics/#kpi_api_usage](http://discovery.wmflabs.org/metrics/#kpi_api_usage). + +General findings ------ -There's not enough data to be able to say much about the patterns inherrent to Full-text Search (via API) usage right now, but it's interesting to compare the values found to how much other search API forms are used. Full-text sits near the top of the pack; Open Search is used dramatically more, but everything else somewhat (or very much) less. +* About 80% of full-text search via API are direct API calls. +* About 80% of full-text search via API are from bots. +* About 90% of full-text search via API are done on desktop. Outages and inaccuracies ------ diff --git a/tab_documentation/geo_basic.md b/tab_documentation/geo_basic.md index 83fadcd..c014d8a 100644 --- a/tab_documentation/geo_basic.md +++ b/tab_documentation/geo_basic.md @@ -3,10 +3,21 @@ Geo Search, or Geographic Search, refers to the ability to search for pages that are "near" a particular set of geographic coordinates, in the sense of being about subjects that have physical locations we track. (places. Not people.) -General trends +On this dashboard, we breakdown API calls by the following class of referrer: + +* **None** is direct traffic that has empty referrer header. +* **Internal** is traffic referred by Wikimedia sites, specifically: mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$212-223) for more information.). +* **Search engine** is traffic referred by Google, Bing, Yandex, Yahoo, DuckDuckGo or Baidu (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java$41) for more information.). +* **Unknown** is traffic that does not have a HTTP referrer or has unusual referrer header (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$189-211) for more information.). +* **External** is traffic referred by something other than search engine. + +For overall break down by referrer class, see [http://discovery.wmflabs.org/metrics/#referer_breakdown](http://discovery.wmflabs.org/metrics/#referer_breakdown); for overall breakdown by search request type, see [http://discovery.wmflabs.org/metrics/#kpi_api_usage](http://discovery.wmflabs.org/metrics/#kpi_api_usage). + +General findings ------ -It's early days, but the tiny adoption of Geographic Search, particularly compared to Cirrus or OpenSearch, suggests this isn't a tremendously useful feature to API users. We need to dig in and refine the heuristics for identifying requests as different types, however. +* About 96% of geo search are direct API calls. +* About 78% of geo search are from bots. Outages and inaccuracies ------ diff --git a/tab_documentation/kpi_api_usage.md b/tab_documentation/kpi_api_usage.md index 0702341..d44aa56 100644 --- a/tab_documentation/kpi_api_usage.md +++ b/tab_documentation/kpi_api_usage.md @@ -1,7 +1,24 @@ Key Performance Indicator: API usage ======= -We want people, both within our movement and outside it, to be able to easily access our information. +We want people, both within our movement and outside it, to be able to easily access our information. This dashboard break down API usage by search request type. For more information about each type of request, see the following dashboards: + +* [Full-text search](http://discovery.wmflabs.org/metrics/#fulltext_search): searching for a particular term via the API and getting back packages that contain that term in either the title or the page's content (see [Help:CirrusSearch](https://www.mediawiki.org/wiki/Help:CirrusSearch#Full_text_search) for more details). +* [Morelike search](http://discovery.wmflabs.org/metrics/#morelike_search): morelike is a feature of CirrusSearch that is used in extensions like [RelatedArticles](https://www.mediawiki.org/wiki/Extension:RelatedArticles) (see [Help:CirrusSearch](https://www.mediawiki.org/wiki/Help:CirrusSearch#Morelike) for more details). +* [Open search](http://discovery.wmflabs.org/metrics/#open_search): [OpenSearch](https://en.wikipedia.org/wiki/OpenSearch) is a search format optimised for syndication and aggregation. +* [Geo search](http://discovery.wmflabs.org/metrics/#geo_search): Geo Search, or Geographic Search, refers to the ability to search for pages that are “near” a particular set of geographic coordinates, in the sense of being about subjects that have physical locations we track (see [Help:CirrusSearch](https://www.mediawiki.org/wiki/Help:CirrusSearch#Geo_Search) for more details). +* [Prefix search](http://discovery.wmflabs.org/metrics/#prefix_search): Prefix Search is simply run over page titles, and lets a user find pages that start with (“prefix”) a particular term. +* [Language search](http://discovery.wmflabs.org/metrics/#language_search): Language search allows you to search for a particular language name in different scripts. + +For overall break down by referrer class, see [http://discovery.wmflabs.org/metrics/#referer_breakdown](http://discovery.wmflabs.org/metrics/#referer_breakdown). + +General findings +------ + +* About 56% of all API calls are morelike search which is used by RelatedArticles on mobile. We have more traffic during the weekends since users generally spend more time on their mobile devices on weekends. +* About 22% of all API calls are open search on desktop. The usage is higher on weekdays like we see in other desktop usage pattern. +* About 14% of all API calls are prefix search. And about 75% of prefix search via API are done on mobile. +* About 8% of all API calls are full-text search. And about 80% of full-text search via API are from bots. Outages and inaccuracies ------ diff --git a/tab_documentation/language_basic.md b/tab_documentation/language_basic.md index 1bd3792..c3d6ae0 100644 --- a/tab_documentation/language_basic.md +++ b/tab_documentation/language_basic.md @@ -3,10 +3,21 @@ Language search doesn't actually allow you to search in different languages - it allows you to search for a particular language name in different scripts. This can be done through the API with `action=languagesearch`, which is what we're tracking here. -General trends +On this dashboard, we breakdown API calls by the following class of referrer: + +* **None** is direct traffic that has empty referrer header. +* **Internal** is traffic referred by Wikimedia sites, specifically: mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$212-223) for more information.). +* **Search engine** is traffic referred by Google, Bing, Yandex, Yahoo, DuckDuckGo or Baidu (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java$41) for more information.). +* **Unknown** is traffic that does not have a HTTP referrer or has unusual referrer header (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$189-211) for more information.). +* **External** is traffic referred by something other than search engine. + +For overall break down by referrer class, see [http://discovery.wmflabs.org/metrics/#referer_breakdown](http://discovery.wmflabs.org/metrics/#referer_breakdown); for overall breakdown by search request type, see [http://discovery.wmflabs.org/metrics/#kpi_api_usage](http://discovery.wmflabs.org/metrics/#kpi_api_usage). + +General findings ------ -Given the limited utility of this, we'd expect to see (and do see) very few hits. +* About 93% of language search are internal API calls and about 7% of language search are direct traffic. +* Given the limited utility of this, we'd expect to see (and do see) very few hits. Outages and inaccuracies ------ diff --git a/tab_documentation/morelike_basic.md b/tab_documentation/morelike_basic.md index 0d1c5ec..88c92d5 100644 --- a/tab_documentation/morelike_basic.md +++ b/tab_documentation/morelike_basic.md @@ -1,8 +1,24 @@ -Morelike Search via API usage +Morelike Search API usage ======= [Morelike](https://www.mediawiki.org/wiki/Help:CirrusSearch#Morelike) is a feature of CirrusSearch that is used in extensions like [RelatedArticles](https://www.mediawiki.org/wiki/Extension:RelatedArticles). As of 2017-06-29, we use a new UDF which can distinguish morelike vs fulltext search API requests (see [Gerrit change 345863](https://gerrit.wikimedia.org/r/#/c/345863/) for more details). +On this dashboard, we breakdown API calls by the following class of referrer: + +* **None** is direct traffic that has empty referrer header. +* **Internal** is traffic referred by Wikimedia sites, specifically: mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$212-223) for more information.). +* **Search engine** is traffic referred by Google, Bing, Yandex, Yahoo, DuckDuckGo or Baidu (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java$41) for more information.). +* **Unknown** is traffic that does not have a HTTP referrer or has unusual referrer header (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$189-211) for more information.). +* **External** is traffic referred by something other than search engine. + +For overall break down by referrer class, see [http://discovery.wmflabs.org/metrics/#referer_breakdown](http://discovery.wmflabs.org/metrics/#referer_breakdown); for overall breakdown by search request type, see [http://discovery.wmflabs.org/metrics/#kpi_api_usage](http://discovery.wmflabs.org/metrics/#kpi_api_usage). + +General findings +------ + +* Almost all of the morelike searches via API are used by RelatedArticles on mobile -- about 94% through mobile web and about 6% through apps. We have more traffic during the weekends since users generally spend more time on their mobile devices on weekends. +* About 50% of the morelike search API usage are direct traffic. However, 88% (around 60 million API calls per day) of these direct morelike search are from mobile web and they might be misclassified internal traffic. Further investigation is needed. + Questions, bug reports, and feature suggestions ------ For technical, non-bug questions, [email Mikhail](mailto:mpo...@wikimedia.org?subject=Dashboard%20Question) or [Chelsy](mailto:c...@wikimedia.org?subject=Dashboard%20Question). If you experience a bug or notice something wrong or have a suggestion, [open a ticket in Phabricator](https://phabricator.wikimedia.org/maniphest/task/create/?projects=Discovery) in the Discovery board or [email Deb](mailto:d...@wikimedia.org?subject=Dashboard%20Question). diff --git a/tab_documentation/open_basic.md b/tab_documentation/open_basic.md index 99be0f8..82d659e 100644 --- a/tab_documentation/open_basic.md +++ b/tab_documentation/open_basic.md @@ -3,6 +3,21 @@ [OpenSearch](https://en.wikipedia.org/wiki/OpenSearch) is a search format optimised for syndication and aggregation. MediaWiki has OpenSearch support, and so does its API; that's what's tracked here. OpenSearch allows you to retrieve full content as well as snippets, and allows you to *search* full content for your search term. +On this dashboard, we breakdown API calls by the following class of referrer: + +* **None** is direct traffic that has empty referrer header. +* **Internal** is traffic referred by Wikimedia sites, specifically: mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$212-223) for more information.). +* **Search engine** is traffic referred by Google, Bing, Yandex, Yahoo, DuckDuckGo or Baidu (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java$41) for more information.). +* **Unknown** is traffic that does not have a HTTP referrer or has unusual referrer header (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$189-211) for more information.). +* **External** is traffic referred by something other than search engine. + +For overall break down by referrer class, see [http://discovery.wmflabs.org/metrics/#referer_breakdown](http://discovery.wmflabs.org/metrics/#referer_breakdown); for overall breakdown by search request type, see [http://discovery.wmflabs.org/metrics/#kpi_api_usage](http://discovery.wmflabs.org/metrics/#kpi_api_usage). + +General findings +------ + +* Almost all of the open search are done on desktop. We have more traffic during the weekdays because users generally spend more time on their desktop on weekdays. + Outages and inaccuracies ------ diff --git a/tab_documentation/prefix_basic.md b/tab_documentation/prefix_basic.md index c6e94d0..ed3eef3 100644 --- a/tab_documentation/prefix_basic.md +++ b/tab_documentation/prefix_basic.md @@ -3,10 +3,22 @@ Prefix Search is simply run over page titles, and lets a user find pages that start with ("prefix") a particular term. This is tremendously useful and is where the dropdown box in Wikipedia's user-facing search comes from. It's also exposed via the API, and hits to that service are what we're tracking here. -General trends +On this dashboard, we breakdown API calls by the following class of referrer: + +* **None** is direct traffic that has empty referrer header. +* **Internal** is traffic referred by Wikimedia sites, specifically: mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$212-223) for more information.). +* **Search engine** is traffic referred by Google, Bing, Yandex, Yahoo, DuckDuckGo or Baidu (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java$41) for more information.). +* **Unknown** is traffic that does not have a HTTP referrer or has unusual referrer header (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$189-211) for more information.). +* **External** is traffic referred by something other than search engine. + +For overall break down by referrer class, see [http://discovery.wmflabs.org/metrics/#referer_breakdown](http://discovery.wmflabs.org/metrics/#referer_breakdown); for overall breakdown by search request type, see [http://discovery.wmflabs.org/metrics/#kpi_api_usage](http://discovery.wmflabs.org/metrics/#kpi_api_usage). + +General findings ------ -Prefix Search isn't tremendously used, which makes sense; for external services, the cost of making the call to us is too high to provide seamless dropdowns in the fashion we can on Wikipedia proper. +* About 47% of prefix search are direct API calls. 57% of these direct traffic are from Wikipedia apps, 41% of these direct traffic are from mobile web which might be misclassified internal traffic. Further investigation is needed. +* About 75% of prefix search via API are done on mobile. +* Prefix Search isn't tremendously used by external services, because the cost of making the call to us is too high to provide seamless dropdowns in the fashion we can on Wikipedia proper. Notes ------ diff --git a/tab_documentation/referer_breakdown.md b/tab_documentation/referer_breakdown.md index 61ddb1f..bc79c6c 100644 --- a/tab_documentation/referer_breakdown.md +++ b/tab_documentation/referer_breakdown.md @@ -1,9 +1,21 @@ API Calls by Referrer Class ======= -All types of API calls are aggregated by date and referrer class. +All types of API calls are aggregated by date and the following referrer class: -**Internal** is traffic referred by Wikimedia sites, specifically: mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest source](https://git.wikimedia.org/blob/analytics%2Frefinery%2Fsource.git/master/refinery-core%2Fsrc%2Fmain%2Fjava%2Forg%2Fwikimedia%2Fanalytics%2Frefinery%2Fcore%2FWebrequest.java#L203) for more information.) +* **None** is direct traffic that has empty referrer header. +* **Internal** is traffic referred by Wikimedia sites, specifically: mediawiki.org, wikibooks.org, wikidata.org, wikinews.org, wikimedia.org, wikimediafoundation.org, wikipedia.org, wikiquote.org, wikisource.org, wikiversity.org, wikivoyage.org, and wiktionary.org (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$212-223) for more information.). +* **Search engine** is traffic referred by Google, Bing, Yandex, Yahoo, DuckDuckGo or Baidu (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/SearchEngineClassifier.java$41) for more information.). +* **Unknown** is traffic that does not have a HTTP referrer or has unusual referrer header (See [Webrequest source](https://phabricator.wikimedia.org/diffusion/ANRS/browse/master/refinery-core/src/main/java/org/wikimedia/analytics/refinery/core/Webrequest.java$189-211) for more information.). +* **External** is traffic referred by something other than search engine. + +For overall breakdown by search request type, see [http://discovery.wmflabs.org/metrics/#kpi_api_usage](http://discovery.wmflabs.org/metrics/#kpi_api_usage). + +General findings +------ + +* About 50% of API calls are internal traffic. 63% of internal traffic are from mobile web and the rest 37% are from desktop. +* About 49% of API calls are direct traffic. 73% of direct traffic are from mobile (apps and mobile web) and the rest 27% are from desktop. Outages and inaccuracies ------ diff --git a/ui.R b/ui.R index 73c8980..441261b 100644 --- a/ui.R +++ b/ui.R @@ -144,10 +144,16 @@ "Day-to-day % change" = "change"), inline = TRUE), width = 4), - column(checkboxInput("kpi_api_usage_series_log_scale", - label = "Log10 Scale", - value = FALSE), - width = 4), + column(conditionalPanel("input.kpi_api_usage_series_data == 'raw'", + checkboxInput("kpi_api_usage_series_log_scale", + label = "Log10 Scale", + value = FALSE)), + width = 2), + column(conditionalPanel("input.kpi_api_usage_series_data == 'raw'", + checkboxInput("kpi_api_usage_series_prop", + label = "Use Proportion", + value = FALSE)), + width = 2), column(polloi::smooth_select("smoothing_kpi_api_usage"), width = 4)), div(id = "kpi_api_usage_series_legend", style = "text-align: right;"), dygraphOutput("kpi_api_usage_series"), @@ -223,50 +229,85 @@ includeMarkdown("./tab_documentation/invoke_source.md") ), tabItem(tabName = "fulltext_search", - polloi::smooth_select("smoothing_fulltext_search"), + fluidRow(column(checkboxInput("fulltext_search_log_scale", + label = "Log10 Scale", value = FALSE), + width = 2), + column(checkboxInput("fulltext_search_prop", + label = "Use Proportion", value = FALSE), + width = 2), + column(polloi::smooth_select("smoothing_fulltext_search"), width = 8)), div(id = "cirrus_aggregate_legend", style = "text-align: right;"), dygraphOutput("cirrus_aggregate"), includeMarkdown("./tab_documentation/fulltext_basic.md") ), tabItem(tabName = "morelike_search", - polloi::smooth_select("smoothing_morelike_search"), + fluidRow(column(checkboxInput("morelike_search_log_scale", + label = "Log10 Scale", value = FALSE), + width = 2), + column(checkboxInput("morelike_search_prop", + label = "Use Proportion", value = FALSE), + width = 2), + column(polloi::smooth_select("smoothing_morelike_search"), width = 8)), div(id = "morelike_aggregate_legend", style = "text-align: right;"), dygraphOutput("morelike_aggregate"), includeMarkdown("./tab_documentation/morelike_basic.md") ), tabItem(tabName = "open_search", - polloi::smooth_select("smoothing_open_search"), + fluidRow(column(checkboxInput("open_search_log_scale", + label = "Log10 Scale", value = FALSE), + width = 2), + column(checkboxInput("open_search_prop", + label = "Use Proportion", value = FALSE), + width = 2), + column(polloi::smooth_select("smoothing_open_search"), width = 8)), div(id = "open_aggregate_legend", style = "text-align: right;"), dygraphOutput("open_aggregate"), includeMarkdown("./tab_documentation/open_basic.md") ), tabItem(tabName = "geo_search", - polloi::smooth_select("smoothing_geo_search"), + fluidRow(column(checkboxInput("geo_search_log_scale", + label = "Log10 Scale", value = FALSE), + width = 2), + column(checkboxInput("geo_search_prop", + label = "Use Proportion", value = FALSE), + width = 2), + column(polloi::smooth_select("smoothing_geo_search"), width = 8)), div(id = "geo_aggregate_legend", style = "text-align: right;"), dygraphOutput("geo_aggregate"), includeMarkdown("./tab_documentation/geo_basic.md") ), tabItem(tabName = "prefix_search", - polloi::smooth_select("smoothing_prefix_search"), + fluidRow(column(checkboxInput("prefix_search_log_scale", + label = "Log10 Scale", value = FALSE), + width = 2), + column(checkboxInput("prefix_search_prop", + label = "Use Proportion", value = FALSE), + width = 2), + column(polloi::smooth_select("smoothing_prefix_search"), width = 8)), div(id = "prefix_aggregate_legend", style = "text-align: right;"), dygraphOutput("prefix_aggregate"), includeMarkdown("./tab_documentation/prefix_basic.md") ), tabItem(tabName = "language_search", - polloi::smooth_select("smoothing_language_search"), + fluidRow(column(checkboxInput("language_search_log_scale", + label = "Log10 Scale", value = FALSE), + width = 2), + column(checkboxInput("language_search_prop", + label = "Use Proportion", value = FALSE), + width = 2), + column(polloi::smooth_select("smoothing_language_search"), width = 8)), div(id = "language_aggregate_legend", style = "text-align: right;"), dygraphOutput("language_aggregate"), includeMarkdown("./tab_documentation/language_basic.md") ), tabItem(tabName = "referer_breakdown", - fluidRow( - column( - HTML("<label class = \"control-label\" style = \"margin-bottom:-30px;\">Type</label>"), - shiny::checkboxInput("referer_breakdown_prop", label = "Use Proportion", value = FALSE), - width = 2 - ), - column(polloi::smooth_select("smoothing_referer_breakdown"), width = 10) - ), + fluidRow(column(checkboxInput("referer_breakdown_log_scale", + label = "Log10 Scale", value = FALSE), + width = 2), + column(checkboxInput("referer_breakdown_prop", + label = "Use Proportion", value = FALSE), + width = 2), + column(polloi::smooth_select("smoothing_referer_breakdown"), width = 8)), div(id = "referer_breakdown_plot_legend", style = "text-align: right;"), dygraphOutput("referer_breakdown_plot"), includeMarkdown("./tab_documentation/referer_breakdown.md") -- To view, visit https://gerrit.wikimedia.org/r/378067 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: If97bb9cd23ae93117d106012d69b8f6250a19ce9 Gerrit-PatchSet: 1 Gerrit-Project: wikimedia/discovery/rainbow Gerrit-Branch: develop Gerrit-Owner: Chelsyx <c...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits