Chelsyx has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/379150 )

Change subject: Add maplink & mapframe prevalence graphs and modularize
......................................................................


Add maplink & mapframe prevalence graphs and modularize

- Splits up server.R into modules (like Search & Portal dashboards)
- Adds maplink & mapframe prevalence graphs
  - Overall prevalence
  - Language-project breakdown of prevalence

Bug: T170022
Change-Id: If1f1efa619037ce8adea873c148f9a1f78376506
---
M CHANGELOG.md
A modules/feature_usage.R
A modules/geographic_breakdown.R
A modules/kartographer/language-project_breakdown.R
A modules/kartographer/overall_prevalence.R
A modules/kartotherian.R
M server.R
A tab_documentation/overall_prevalence.md
A tab_documentation/prevalence_langproj.md
M tab_documentation/tiles_summary.md
M ui.R
M utils.R
12 files changed, 586 insertions(+), 160 deletions(-)

Approvals:
  Chelsyx: Verified; Looks good to me, approved



diff --git a/CHANGELOG.md b/CHANGELOG.md
index 208e2ab..f3e77ee 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,10 @@
 # Change Log (Patch Notes)
 All notable changes to this project will be documented in this file.
 
+## 2017/09/18
+- Modularized the dashboard source code
+- Added maplink & mapframe prevalence graphs 
([T170022](https://phabricator.wikimedia.org/T170022))
+
 ## 2017/06/20
 - Added licensing info ([T167930](https://phabricator.wikimedia.org/T167930))
 
diff --git a/modules/feature_usage.R b/modules/feature_usage.R
new file mode 100644
index 0000000..ec2460e
--- /dev/null
+++ b/modules/feature_usage.R
@@ -0,0 +1,55 @@
+output$users_per_platform <- renderDygraph({
+  user_data %>%
+    polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_users_per_platform)) %>%
+    polloi::make_dygraph("Date", "Events", "Unique users by platform, by day") 
%>%
+    dyAxis("y", logscale = input$users_per_platform_logscale) %>%
+    dyLegend(labelsDiv = "users_per_platform_legend", show = "always") %>%
+    dyRangeSelector %>%
+    dyEvent(as.Date("2016-04-15"), "A (Maps EL bug)", labelLoc = "bottom") %>%
+    dyEvent(as.Date("2016-06-17"), "A (Maps EL patch)", labelLoc = "bottom") 
%>%
+    dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
+})
+
+output$geohack_feature_usage <- renderDygraph({
+  usage_data$GeoHack %>%
+    polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_geohack_feature_usage)) %>%
+    polloi::make_dygraph("Date", "Events", "Feature usage for GeoHack") %>%
+    dyRangeSelector %>%
+    dyAxis("y", logscale = input$geohack_feature_usage_logscale) %>%
+    dyEvent(as.Date("2016-04-15"), "A (Maps EL bug)", labelLoc = "bottom") %>%
+    dyEvent(as.Date("2016-06-17"), "A (Maps EL patch)", labelLoc = "bottom") 
%>%
+    dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
+})
+
+output$wikiminiatlas_feature_usage <- renderDygraph({
+  usage_data$WikiMiniAtlas %>%
+    polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_wikiminiatlas_feature_usage)) %>%
+    polloi::make_dygraph("Date", "Events", "Feature usage for WikiMiniAtlas") 
%>%
+    dyRangeSelector %>%
+    dyAxis("y", logscale = input$wikiminiatlas_feature_usage_logscale) %>%
+    dyEvent(as.Date("2016-04-15"), "A (Maps EL bug)", labelLoc = "bottom") %>%
+    dyEvent(as.Date("2016-06-17"), "A (Maps EL patch)", labelLoc = "bottom") 
%>%
+    dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
+})
+
+output$wikivoyage_feature_usage <- renderDygraph({
+  usage_data$Wikivoyage %>%
+    polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_wikivoyage_feature_usage)) %>%
+    polloi::make_dygraph("Date", "Events", "Feature usage for Wikivoyage") %>%
+    dyRangeSelector %>%
+    dyAxis("y", logscale = input$wikivoyage_feature_usage_logscale) %>%
+    dyEvent(as.Date("2016-04-15"), "A (Maps EL bug)", labelLoc = "bottom") %>%
+    dyEvent(as.Date("2016-06-17"), "A (Maps EL patch)", labelLoc = "bottom") 
%>%
+    dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
+})
+
+output$wiwosm_feature_usage <- renderDygraph({
+  usage_data$WIWOSM %>%
+    polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_wiwosm_feature_usage)) %>%
+    polloi::make_dygraph("Date", "Events", "Feature usage for WIWOSM") %>%
+    dyRangeSelector %>%
+    dyAxis("y", logscale = input$wiwosm_feature_usage_logscale) %>%
+    dyEvent(as.Date("2016-04-15"), "A (Maps EL bug)", labelLoc = "bottom") %>%
+    dyEvent(as.Date("2016-06-17"), "A (Maps EL patch)", labelLoc = "bottom") 
%>%
+    dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
+})
diff --git a/modules/geographic_breakdown.R b/modules/geographic_breakdown.R
new file mode 100644
index 0000000..99deca5
--- /dev/null
+++ b/modules/geographic_breakdown.R
@@ -0,0 +1,7 @@
+output$users_by_country <- renderDygraph({
+  country_data %>%
+    polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_users_by_country)) %>%
+    polloi::make_dygraph("Date", "Users (%)", "Geographic breakdown of maps 
users") %>%
+    dyRangeSelector(fillColor = "", strokeColor = "") %>%
+    dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
+})
diff --git a/modules/kartographer/language-project_breakdown.R 
b/modules/kartographer/language-project_breakdown.R
new file mode 100644
index 0000000..f10e982
--- /dev/null
+++ b/modules/kartographer/language-project_breakdown.R
@@ -0,0 +1,192 @@
+output$language_selector_container <- renderUI({
+  req(input$project_selector)
+  temp_language <- available_languages
+  if ("Maplink" %in% input$prevalence_langproj_feature) {
+    temp_language <- temp_language[temp_language$maplink, ]
+  }
+  if ("Mapframe" %in% input$prevalence_langproj_feature) {
+    temp_language <- temp_language[temp_language$mapframe, ]
+  }
+
+  if (input$language_order == "alphabet") {
+    languages_to_display <- sort(temp_language$language)
+  } else {
+    languages_to_display <- 
temp_language$language[order(temp_language$articles, decreasing = TRUE)]
+  }
+
+  # e.g. if user sorts projects alphabetically and the selected project is 
"10th Anniversary of Wikipeda"
+  #      then automatically select the language "(None)" to avoid giving user 
an error. This also works if
+  #      the user selects a project that is not multilingual, so this 
automatically chooses the "(None)"
+  #      option for the user.
+  if (any(input$project_selector %in% 
projects_db$project[!projects_db$multilingual])) {
+    if (any(input$project_selector %in% 
projects_db$project[projects_db$multilingual])) {
+      if (!is.null(input$language_selector)) {
+        selected_language <- union("(None)", input$language_selector)
+      } else {
+        selected_language <- c("(None)", languages_to_display[[1]])
+      }
+    } else {
+      selected_language <- "(None)"
+    }
+  } else {
+    if (!is.null(input$language_selector)) {
+      selected_language <- input$language_selector
+    } else {
+      selected_language <- languages_to_display[[1]]
+    }
+  }
+
+  return(selectInput(
+    "language_selector", "Language",
+    multiple = TRUE, selectize = FALSE, size = 19,
+    choices = languages_to_display, selected = selected_language
+  ))
+})
+
+output$project_selector_container <- renderUI({
+  temp_project <- available_projects
+  if ("Maplink" %in% input$prevalence_langproj_feature) {
+    temp_project <- temp_project[temp_project$maplink, ]
+  }
+  if ("Mapframe" %in% input$prevalence_langproj_feature) {
+    temp_project <- temp_project[temp_project$mapframe, ]
+  }
+
+  if (input$project_order == "alphabet") {
+    projects_to_display <- sort(temp_project$project)
+  } else {
+    projects_to_display <- temp_project$project[order(temp_project$articles, 
decreasing = TRUE)]
+  }
+
+  if (!is.null(input$project_selector)) {
+    selected_project <- input$project_selector
+  } else {
+    selected_project <- projects_to_display[1]
+  }
+
+  return(selectInput(
+    "project_selector", "Project",
+    multiple = TRUE, selectize = FALSE, size = 19,
+    choices = projects_to_display, selected = selected_project
+  ))
+})
+
+output$prevalence_langproj_plot <- renderDygraph({
+  req(input$prevalence_langproj_feature, input$language_selector, 
input$project_selector)
+
+  if (length(input$prevalence_langproj_feature) == 1) {
+    if ("Maplink" %in% input$prevalence_langproj_feature) {
+      feature_prevalence <- maplink_prevalence %>%
+        dplyr::mutate(feature_articles = maplink_articles)
+    } else {
+      feature_prevalence <- mapframe_prevalence %>%
+        dplyr::mutate(feature_articles = mapframe_articles)
+    }
+    if (length(input$language_selector) == 1 && input$language_selector[1] == 
"(None)") {
+      feature_prevalence <- feature_prevalence %>%
+      {
+        if (input$prevalence_langproj_aggregation %in% c("Average", "Median")) 
{
+          if (input$prevalence_langproj_aggregation == "Average") {
+            aggregation_function <- mean
+          } else {
+            aggregation_function <- median
+          }
+          dplyr::group_by(., date, key = project, language) %>%
+            dplyr::summarize(value = round(100 * sum(feature_articles, na.rm = 
TRUE) / sum(total_articles, na.rm = TRUE), 2)) %>%
+            dplyr::summarize(value = aggregation_function(value, na.rm = TRUE))
+        } else {
+          dplyr::group_by(., date, key = project) %>%
+            dplyr::summarize(value = round(100 * sum(feature_articles, na.rm = 
TRUE) / sum(total_articles, na.rm = TRUE), 2))
+        }
+      } %>%
+        dplyr::ungroup() %>%
+        tidyr::spread(key, value)
+    } else {
+      feature_prevalence <- feature_prevalence %>%
+        dplyr::mutate(
+          label = paste0(dplyr::if_else(is.na(language), "", paste0(language, 
" ")), project),
+          language = dplyr::if_else(is.na(language), "(None)", language)
+        ) %>%
+        dplyr::filter(language %in% input$language_selector, project %in% 
input$project_selector) %>%
+        dplyr::group_by(date, key = label) %>%
+        dplyr::summarize(value = round(100 * sum(feature_articles, na.rm = 
TRUE) / sum(total_articles, na.rm = TRUE), 2)) %>%
+        dplyr::ungroup() %>%
+        tidyr::spread(key, value)
+    }
+  } else {
+    feature_prevalence <- dplyr::full_join(
+      maplink_prevalence,
+      mapframe_prevalence,
+      by = c("date", "wiki", "project", "language")
+    ) %>%
+      dplyr::filter(project %in% input$project_selector)
+    if (length(input$language_selector) == 1 && input$language_selector[1] == 
"(None)") {
+      feature_prevalence <- feature_prevalence %>%
+      {
+        if (input$prevalence_langproj_aggregation %in% c("Average", "Median")) 
{
+          if (input$prevalence_langproj_aggregation == "Average") {
+            aggregation_function <- mean
+          } else {
+            aggregation_function <- median
+          }
+          dplyr::group_by(., date, label = project, language) %>%
+            dplyr::summarize(
+              Maplink = round(100 * sum(maplink_articles, na.rm = TRUE) / 
sum(total_articles.x, na.rm = TRUE), 2),
+              Mapframe = round(100 * sum(mapframe_articles, na.rm = TRUE) / 
sum(total_articles.y, na.rm = TRUE), 2)
+            ) %>%
+            dplyr::summarize(
+              Maplink = aggregation_function(Maplink, na.rm = TRUE),
+              Mapframe = aggregation_function(Mapframe, na.rm = TRUE)
+            )
+        } else {
+          # input$prevalence_langproj_aggregation == "Overall"
+          dplyr::group_by(., date, label = project) %>%
+            dplyr::summarize(
+              Maplink = round(100 * sum(maplink_articles, na.rm = TRUE) / 
sum(total_articles.x, na.rm = TRUE), 2),
+              Mapframe = round(100 * sum(mapframe_articles, na.rm = TRUE) / 
sum(total_articles.y, na.rm = TRUE), 2)
+            )
+        }
+      } %>%
+        dplyr::ungroup() %>%
+        tidyr::gather(feature, prevalence, -c(date, label)) %>%
+        dplyr::transmute(
+          date = date, value = prevalence,
+          key = paste0(feature, " (", label, ")")
+        ) %>%
+        tidyr::spread(key, value)
+    } else {
+      feature_prevalence <- feature_prevalence %>%
+        dplyr::mutate(
+          label = paste0(dplyr::if_else(is.na(language), "", paste0(language, 
" ")), project),
+          language = dplyr::if_else(is.na(language), "(None)", language)
+        ) %>%
+        dplyr::filter(language %in% input$language_selector, project %in% 
input$project_selector)
+      if (nrow(feature_prevalence) > 0) {
+        feature_prevalence <- feature_prevalence %>%
+          dplyr::group_by(date, label) %>%
+          dplyr::summarize(
+            Maplink = round(100 * sum(maplink_articles, na.rm = TRUE) / 
sum(total_articles.x, na.rm = TRUE), 2),
+            Mapframe = round(100 * sum(mapframe_articles, na.rm = TRUE) / 
sum(total_articles.y, na.rm = TRUE), 2)
+          ) %>%
+          dplyr::ungroup() %>%
+          tidyr::gather(feature, prevalence, -c(date, label)) %>%
+          dplyr::transmute(
+            date = date, value = prevalence,
+            key = paste0(feature, " (", label, ")")
+          ) %>%
+          tidyr::spread(key, value)
+      } else {
+        return(invisible(NULL))
+      }
+    }
+  }
+  feature_prevalence %>%
+    polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_prevalence_langproj), rename = FALSE) %>%
+    polloi::reorder_columns() %>%
+    polloi::make_dygraph(
+      "Date", "Prevalence (%)",
+      paste(paste(input$prevalence_langproj_feature, collapse = " and "), 
"prevalence")
+    ) %>%
+    dyLegend(show = "always", width = 400, labelsDiv = 
"prevalence_langproj_legend") %>%
+    dyRangeSelector(retainDateWindow = TRUE, fillColor = "", strokeColor = "")
+})
diff --git a/modules/kartographer/overall_prevalence.R 
b/modules/kartographer/overall_prevalence.R
new file mode 100644
index 0000000..cd28296
--- /dev/null
+++ b/modules/kartographer/overall_prevalence.R
@@ -0,0 +1,7 @@
+output$overall_prevalence_series <- renderDygraph({
+  prevalence %>%
+    polloi::reorder_columns() %>%
+    polloi::make_dygraph("Date", "Prevalence (%)", "Maplink and Mapframe 
prevalence on Wikimedia projects") %>%
+    dyLegend(labelsDiv = "overall_prevalence_series_legend", show = "always") 
%>%
+    dyRangeSelector(retainDateWindow = TRUE, fillColor = "", strokeColor = "")
+})
diff --git a/modules/kartotherian.R b/modules/kartotherian.R
new file mode 100644
index 0000000..ead8141
--- /dev/null
+++ b/modules/kartotherian.R
@@ -0,0 +1,88 @@
+output$tiles_summary_series <- renderDygraph({
+  temp <- polloi::data_select(input$tile_summary_automata_check, 
new_tiles_automata, new_tiles_no_automata)[, list(
+    `total tiles` = sum(total),
+    `total users` = sum(users),
+    `average tiles per user` = sum(total)/sum(users)
+  ), by = "date"]
+  switch(input$tiles_summary_variable,
+         Users = { temp %<>% dplyr::select(-`total tiles`) },
+         Tiles = { temp %<>% dplyr::select(-`total users`) })
+  temp %<>% polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_tiles_summary_series))
+  polloi::make_dygraph(temp, "Date", input$tiles_summary_variable, "Tile 
usage") %>%
+    dySeries(name = grep('average tiles per user', names(temp), value = TRUE), 
axis = 'y2') %>%
+    dyAxis(name = 'y', drawGrid = FALSE, logscale = 
input$tiles_summary_logscale) %>%
+    dyAxis(name = 'y2', independentTicks = TRUE, drawGrid = FALSE) %>%
+    dyLegend(labelsDiv = "tiles_summary_series_legend", show = "always") %>%
+    dyRangeSelector(retainDateWindow = TRUE) %>%
+    dyEvent(as.Date("2015-09-17"), "A (announcement)", labelLoc = "bottom") %>%
+    dyEvent(as.Date("2016-01-08"), "B (enwiki launch)", labelLoc = "bottom") 
%>%
+    dyEvent(as.Date("2016-01-12"), "C (cache clear)", labelLoc = "bottom") %>%
+    dyEvent(as.Date("2016-11-09"), "D (pkget)", labelLoc = "bottom") %>%
+    dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") 
%>%
+    dyEvent(as.Date("2017-08-14"), "E (pkget)", labelLoc = "bottom")
+})
+
+output$tiles_style_series <- renderDygraph({
+  polloi::data_select(
+    input$tile_style_automata_check,
+    new_tiles_automata,
+    new_tiles_no_automata
+  )[, j = list(`total tiles` = sum(total)),
+    by = c("date", "style")] %>%
+    tidyr::spread(style, `total tiles`, fill = 0) %>%
+    polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_tiles_style_series)) %>%
+    polloi::make_dygraph("Date", "Tiles", "Total tiles by style", legend_name 
= "Style") %>%
+    dyAxis("y", logscale = input$tiles_style_logscale) %>%
+    dyLegend(labelsDiv = "tiles_style_series_legend", show = "always") %>%
+    dyRangeSelector %>%
+    dyEvent(as.Date("2015-09-17"), "A (announcement)", labelLoc = "bottom") %>%
+    dyEvent(as.Date("2016-01-08"), "B (enwiki launch)", labelLoc = "bottom") 
%>%
+    dyEvent(as.Date("2016-01-12"), "C (cache clear)", labelLoc = "bottom") %>%
+    dyEvent(as.Date("2016-11-09"), "D (pkget)", labelLoc = "top") %>%
+    dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") 
%>%
+    dyEvent(as.Date("2017-08-14"), "E (pkget)", labelLoc = "bottom")
+})
+
+output$tiles_users_series <- renderDygraph({
+  polloi::data_select(
+    input$tile_users_automata_check,
+    new_tiles_automata,
+    new_tiles_no_automata
+  )[, j = list(`total users` = sum(users)),
+    by = c("date", "style")] %>%
+    tidyr::spread(style, `total users`, fill = 0) %>%
+    polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_tiles_users_series)) %>%
+    polloi::make_dygraph("Date", "Users", "Total users by style") %>%
+    dyAxis("y", logscale = input$tiles_users_logscale) %>%
+    dyLegend(labelsDiv = "tiles_users_series_legend", show = "always") %>%
+    dyRangeSelector %>%
+    dyEvent(as.Date("2015-09-17"), "A (announcement)", labelLoc = "bottom") %>%
+    dyEvent(as.Date("2016-01-08"), "B (enwiki launch)", labelLoc = "bottom") 
%>%
+    dyEvent(as.Date("2016-01-12"), "C (cache clear)", labelLoc = "bottom") %>%
+    dyEvent(as.Date("2016-11-08"), "D (pkget)", labelLoc = "top") %>%
+    dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") 
%>%
+    dyEvent(as.Date("2017-08-14"), "E (pkget)", labelLoc = "bottom")
+})
+
+output$zoom_level_selector_container <- renderUI({
+  selectInput("zoom_level_selector", "Zoom level",
+              multiple = TRUE, selected = "0", selectize = FALSE, size = 19,
+              choices = as.character(sort(unique(new_tiles_no_automata$zoom))))
+})
+
+output$tiles_zoom_series <- renderDygraph({
+  req(input$zoom_level_selector)
+  polloi::data_select(
+    input$tile_zoom_automata_check,
+    new_tiles_automata,
+    new_tiles_no_automata
+  )[zoom %in% as.numeric(input$zoom_level_selector),
+    j = list(`total tiles` = sum(total)),
+    by = c("date", "zoom")] %>%
+    tidyr::spread(zoom, `total tiles`, fill = 0) %>%
+    polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_tiles_zoom_series)) %>%
+    polloi::make_dygraph("Date", "Tiles", "Total tiles by zoom level") %>%
+    dyAxis("y", logscale = input$tiles_zoom_logscale) %>%
+    dyLegend(labelsDiv = "tiles_zoom_series_legend", show = "always") %>%
+    dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
+})
diff --git a/server.R b/server.R
index eb642d5..8a134aa 100644
--- a/server.R
+++ b/server.R
@@ -12,6 +12,8 @@
     read_actions()
     progress$set(message = "Downloading EL user counts", value = 0.2)
     read_users()
+    progress$set(message = "Downloading prevalence data", value = 0.3)
+    read_prevalence()
     progress$set(message = "Downloading tile usage data", value = 0.4)
     suppressWarnings(read_tiles())
     progress$set(message = "Downloading geography data", value = 0.8)
@@ -21,159 +23,14 @@
     progress$close()
   }
 
-  output$tiles_summary_series <- renderDygraph({
-    temp <- polloi::data_select(input$tile_summary_automata_check, 
new_tiles_automata, new_tiles_no_automata)[, list(
-      `total tiles` = sum(total),
-      `total users` = sum(users),
-      `average tiles per user` = sum(total)/sum(users)
-    ), by = "date"]
-    switch(input$tiles_summary_variable,
-           Users = { temp %<>% dplyr::select(-`total tiles`) },
-           Tiles = { temp %<>% dplyr::select(-`total users`) })
-    temp %<>% polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_tiles_summary_series))
-    polloi::make_dygraph(temp, "Date", input$tiles_summary_variable, "Tile 
usage") %>%
-      dySeries(name = grep('average tiles per user', names(temp), value = 
TRUE), axis = 'y2') %>%
-      dyAxis(name = 'y', drawGrid = FALSE, logscale = 
input$tiles_summary_logscale) %>%
-      dyAxis(name = 'y2', independentTicks = TRUE, drawGrid = FALSE) %>%
-      dyLegend(labelsDiv = "tiles_summary_series_legend", show = "always") %>%
-      dyRangeSelector(retainDateWindow = TRUE) %>%
-      dyEvent(as.Date("2015-09-17"), "A (announcement)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2016-01-08"), "B (enwiki launch)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2016-01-12"), "C (cache clear)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2016-11-09"), "D (pkget)", labelLoc = "bottom") %>%
-      dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2017-08-14"), "E (pkget)", labelLoc = "bottom")
-  })
-
-  output$tiles_style_series <- renderDygraph({
-    polloi::data_select(
-      input$tile_style_automata_check,
-      new_tiles_automata,
-      new_tiles_no_automata
-    )[, j = list(`total tiles` = sum(total)),
-        by = c("date", "style")] %>%
-      tidyr::spread(style, `total tiles`, fill = 0) %>%
-      polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_tiles_style_series)) %>%
-      polloi::make_dygraph("Date", "Tiles", "Total tiles by style", 
legend_name = "Style") %>%
-      dyAxis("y", logscale = input$tiles_style_logscale) %>%
-      dyLegend(labelsDiv = "tiles_style_series_legend", show = "always") %>%
-      dyRangeSelector %>%
-      dyEvent(as.Date("2015-09-17"), "A (announcement)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2016-01-08"), "B (enwiki launch)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2016-01-12"), "C (cache clear)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2016-11-09"), "D (pkget)", labelLoc = "top") %>%
-      dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2017-08-14"), "E (pkget)", labelLoc = "bottom")
-  })
-
-  output$tiles_users_series <- renderDygraph({
-    polloi::data_select(
-      input$tile_users_automata_check,
-      new_tiles_automata,
-      new_tiles_no_automata
-    )[, j = list(`total users` = sum(users)),
-        by = c("date", "style")] %>%
-      tidyr::spread(style, `total users`, fill = 0) %>%
-      polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_tiles_users_series)) %>%
-      polloi::make_dygraph("Date", "Users", "Total users by style") %>%
-      dyAxis("y", logscale = input$tiles_users_logscale) %>%
-      dyLegend(labelsDiv = "tiles_users_series_legend", show = "always") %>%
-      dyRangeSelector %>%
-      dyEvent(as.Date("2015-09-17"), "A (announcement)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2016-01-08"), "B (enwiki launch)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2016-01-12"), "C (cache clear)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2016-11-08"), "D (pkget)", labelLoc = "top") %>%
-      dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2017-08-14"), "E (pkget)", labelLoc = "bottom")
-  })
-
-  output$zoom_level_selector_container <- renderUI({
-    selectInput("zoom_level_selector", "Zoom level",
-                multiple = TRUE, selected = "0", selectize = FALSE, size = 19,
-                choices = 
as.character(sort(unique(new_tiles_no_automata$zoom))))
-  })
-
-  output$tiles_zoom_series <- renderDygraph({
-    req(input$zoom_level_selector)
-    polloi::data_select(
-      input$tile_zoom_automata_check,
-      new_tiles_automata,
-      new_tiles_no_automata
-    )[zoom %in% as.numeric(input$zoom_level_selector),
-      j = list(`total tiles` = sum(total)),
-      by = c("date", "zoom")] %>%
-      tidyr::spread(zoom, `total tiles`, fill = 0) %>%
-      polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_tiles_zoom_series)) %>%
-      polloi::make_dygraph("Date", "Tiles", "Total tiles by zoom level") %>%
-      dyAxis("y", logscale = input$tiles_zoom_logscale) %>%
-      dyLegend(labelsDiv = "tiles_zoom_series_legend", show = "always") %>%
-      dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
-  })
-
-  output$users_per_platform <- renderDygraph({
-    user_data %>%
-      polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_users_per_platform)) %>%
-      polloi::make_dygraph("Date", "Events", "Unique users by platform, by 
day") %>%
-      dyAxis("y", logscale = input$users_per_platform_logscale) %>%
-      dyLegend(labelsDiv = "users_per_platform_legend", show = "always") %>%
-      dyRangeSelector %>%
-      dyEvent(as.Date("2016-04-15"), "A (Maps EL bug)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2016-06-17"), "A (Maps EL patch)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
-  })
-
-  output$geohack_feature_usage <- renderDygraph({
-    usage_data$GeoHack %>%
-      polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_geohack_feature_usage)) %>%
-      polloi::make_dygraph("Date", "Events", "Feature usage for GeoHack") %>%
-      dyRangeSelector %>%
-      dyAxis("y", logscale = input$geohack_feature_usage_logscale) %>%
-      dyEvent(as.Date("2016-04-15"), "A (Maps EL bug)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2016-06-17"), "A (Maps EL patch)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
-  })
-
-  output$wikiminiatlas_feature_usage <- renderDygraph({
-    usage_data$WikiMiniAtlas %>%
-      polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_wikiminiatlas_feature_usage)) %>%
-      polloi::make_dygraph("Date", "Events", "Feature usage for 
WikiMiniAtlas") %>%
-      dyRangeSelector %>%
-      dyAxis("y", logscale = input$wikiminiatlas_feature_usage_logscale) %>%
-      dyEvent(as.Date("2016-04-15"), "A (Maps EL bug)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2016-06-17"), "A (Maps EL patch)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
-  })
-
-  output$wikivoyage_feature_usage <- renderDygraph({
-    usage_data$Wikivoyage %>%
-      polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_wikivoyage_feature_usage)) %>%
-      polloi::make_dygraph("Date", "Events", "Feature usage for Wikivoyage") 
%>%
-      dyRangeSelector %>%
-      dyAxis("y", logscale = input$wikivoyage_feature_usage_logscale) %>%
-      dyEvent(as.Date("2016-04-15"), "A (Maps EL bug)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2016-06-17"), "A (Maps EL patch)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
-  })
-
-  output$wiwosm_feature_usage <- renderDygraph({
-    usage_data$WIWOSM %>%
-      polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_wiwosm_feature_usage)) %>%
-      polloi::make_dygraph("Date", "Events", "Feature usage for WIWOSM") %>%
-      dyRangeSelector %>%
-      dyAxis("y", logscale = input$wiwosm_feature_usage_logscale) %>%
-      dyEvent(as.Date("2016-04-15"), "A (Maps EL bug)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2016-06-17"), "A (Maps EL patch)", labelLoc = "bottom") 
%>%
-      dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
-  })
-
-
-  output$users_by_country <- renderDygraph({
-    country_data %>%
-      polloi::smoother(smooth_level = 
polloi::smooth_switch(input$smoothing_global, 
input$smoothing_users_by_country)) %>%
-      polloi::make_dygraph("Date", "Users (%)", "Geographic breakdown of maps 
users") %>%
-      dyRangeSelector(fillColor = "", strokeColor = "") %>%
-      dyEvent(as.Date("2017-01-01"), "R (reportupdater)", labelLoc = "bottom")
-  })
+  # Kartotherian usage (tile requests):
+  source("modules/kartotherian.R", local = TRUE)
+  # Kartographer usage (maplink & mapframe):
+  source("modules/kartographer/overall_prevalence.R", local = TRUE)
+  source("modules/kartographer/language-project_breakdown.R", local = TRUE)
+  # Feature usage and geo-breakdown:
+  source("modules/feature_usage.R", local = TRUE)
+  source("modules/geographic_breakdown.R", local = TRUE)
 
   # Check datasets for missing data and notify user which datasets are missing 
data (if any)
   output$message_menu <- renderMenu({
@@ -183,7 +40,11 @@
       polloi::check_yesterday(user_data, "user counts"),
       polloi::check_past_week(user_data, "user counts"),
       polloi::check_yesterday(new_tiles_automata, "tile usage data"),
-      polloi::check_past_week(new_tiles_automata, "tile usage data"))
+      polloi::check_past_week(new_tiles_automata, "tile usage data"),
+      polloi::check_yesterday(maplink_prevalence, "maplink prevalence data"),
+      polloi::check_past_week(maplink_prevalence, "maplink prevalence data"),
+      polloi::check_yesterday(mapframe_prevalence, "mapframe prevalence data"),
+      polloi::check_past_week(mapframe_prevalence, "mapframe prevalence data"))
     notifications <- notifications[!sapply(notifications, is.null)]
     return(dropdownMenu(type = "notifications", .list = notifications, 
badgeStatus = "warning"))
   })
diff --git a/tab_documentation/overall_prevalence.md 
b/tab_documentation/overall_prevalence.md
new file mode 100644
index 0000000..a51db9e
--- /dev/null
+++ b/tab_documentation/overall_prevalence.md
@@ -0,0 +1,39 @@
+Overall maplink & mapframe usage
+=======
+
+[Kartographer](https://www.mediawiki.org/wiki/Extension:Kartographer) is a 
MediaWiki extension that allows editors to easily add [Wikimedia 
Maps](https://www.mediawiki.org/wiki/Maps) to articles. Editors (and bots) can 
add 
[*maplinks*](https://www.mediawiki.org/wiki/Help:Extension:Kartographer#.3Cmaplink.3E)
 and 
[*mapframes*](https://www.mediawiki.org/wiki/Help:Extension:Kartographer#.3Cmapframe.3E_usage)
 (where possible; refer to the list below) to pages such as places on 
Wikivoyage, landmarks on Wikipedia, and files on Wikimedia Commons.
+
+A **_maplink_** is a textual link (e.g. often coordinates) that a user can 
click on to view an interactive, potentially annotated map and is enabled on 
all Wikivoyage and Wikipedia languages. On Commons, camera coordinates -- which 
are automatically extracted from EXIF data for photo uploads -- show up as 
maplinks.
+
+A **_mapframe_** is a static thumbnail of a map that a user can click on to 
view an interactive, possibly annotated map and is enabled on all Wikivoyage 
languages but only some Wikipedias. As of September 18th, 2017, the mapframe 
feature is enabled on the following wikis:
+
+- [Metawiki](https://meta.wikimedia.org/)
+- [MediaWiki](https://www.mediawiki.org/)
+- [Wikimedia Ukraine](https://ua.wikimedia.org/)
+- [Wikivoyage](https://www.wikivoyage.org/) (all languages)
+- Wikipedia:
+    - [Catalan](https://ca.wikipedia.org/)
+    - [Hebrew](https://he.wikipedia.org/)
+    - [Russian](https://ru.wikipedia.org/)
+    - [Macedonian](https://mk.wikipedia.org/)
+    - [French](https://fr.wikipedia.org/)
+    - [Finnish](https://fi.wikipedia.org)
+    - [Norwegian](https://no.wikipedia.org/)
+    - [Swedish](https://sv.wikipedia.org/)
+    - [Portuguese](https://pt.wikipedia.org/)
+    - [Czech](https://cs.wikipedia.org/)
+    - [Basque](https://eu.wikipedia.org/)
+
+The overall prevalence is computed per-project by dividing the total number of 
articles with a maplink/mapframe across all languages of the project by the 
total number of articles across all languages of the project. In the case of 
Wikimedia Commons, MediaWiki, and Meta wiki (which are not multi-language), the 
overall prevalence is just those prevalence in those projects.
+
+Questions, bug reports, and feature suggestions
+------
+For technical, non-bug questions, [email 
Mikhail](mailto:[email protected]?subject=Dashboard%20Question) or 
[Chelsy](mailto:[email protected]?subject=Dashboard%20Question). If you 
experience a bug or notice something wrong or have a suggestion, [open a ticket 
in 
Phabricator](https://phabricator.wikimedia.org/maniphest/task/create/?projects=Discovery)
 in the Discovery board or [email 
Deb](mailto:[email protected]?subject=Dashboard%20Question).
+
+<hr style="border-color: gray;">
+<p style="font-size: small;">
+  <strong>Link to this dashboard:</strong> <a 
href="https://discovery.wmflabs.org/maps/#kartographer_prevalence";>https://discovery.wmflabs.org/maps/#kartographer_prevalence</a>
+  | Page is available under <a 
href="https://creativecommons.org/licenses/by-sa/3.0/"; title="Creative Commons 
Attribution-ShareAlike License">CC-BY-SA 3.0</a>
+  | <a href="https://phabricator.wikimedia.org/diffusion/WDWZ/"; 
title="Wikimedia Maps Dashboard source code repository">Code</a> is licensed 
under <a 
href="https://phabricator.wikimedia.org/diffusion/WDWZ/browse/master/LICENSE.md";
 title="MIT License">MIT</a>
+  | Part of <a href="https://discovery.wmflabs.org/";>Discovery Dashboards</a>
+</p>
diff --git a/tab_documentation/prevalence_langproj.md 
b/tab_documentation/prevalence_langproj.md
new file mode 100644
index 0000000..8294a15
--- /dev/null
+++ b/tab_documentation/prevalence_langproj.md
@@ -0,0 +1,47 @@
+Maplink & mapframe prevalence by language and project
+=======
+
+[Kartographer](https://www.mediawiki.org/wiki/Extension:Kartographer) is a 
MediaWiki extension that allows editors to easily add [Wikimedia 
Maps](https://www.mediawiki.org/wiki/Maps) to articles. Editors (and bots) can 
add 
[*maplinks*](https://www.mediawiki.org/wiki/Help:Extension:Kartographer#.3Cmaplink.3E)
 and 
[*mapframes*](https://www.mediawiki.org/wiki/Help:Extension:Kartographer#.3Cmapframe.3E_usage)
 (where possible; refer to the list below) to pages such as places on 
Wikivoyage, landmarks on Wikipedia, and files on Wikimedia Commons.
+
+A **_maplink_** is a textual link (e.g. often coordinates) that a user can 
click on to view an interactive, potentially annotated map and is enabled on 
all Wikivoyage and Wikipedia languages. On Commons, camera coordinates -- which 
are automatically extracted from EXIF data for photo uploads -- show up as 
maplinks.
+
+A **_mapframe_** is a static thumbnail of a map that a user can click on to 
view an interactive, possibly annotated map and is enabled on all Wikivoyage 
languages but only some Wikipedias. As of September 18th, 2017, the mapframe 
feature is enabled on the following wikis:
+
+- [Metawiki](https://meta.wikimedia.org/)
+- [MediaWiki](https://www.mediawiki.org/)
+- [Wikimedia Ukraine](https://ua.wikimedia.org/)
+- [Wikivoyage](https://www.wikivoyage.org/) (all languages)
+- Wikipedia:
+    - [Catalan](https://ca.wikipedia.org/)
+    - [Hebrew](https://he.wikipedia.org/)
+    - [Russian](https://ru.wikipedia.org/)
+    - [Macedonian](https://mk.wikipedia.org/)
+    - [French](https://fr.wikipedia.org/)
+    - [Finnish](https://fi.wikipedia.org)
+    - [Norwegian](https://no.wikipedia.org/)
+    - [Swedish](https://sv.wikipedia.org/)
+    - [Portuguese](https://pt.wikipedia.org/)
+    - [Czech](https://cs.wikipedia.org/)
+    - [Basque](https://eu.wikipedia.org/)
+
+Notes
+-----
+
+* You can select multiple projects and multiple languages to compare 
simultaneously. (Hold down Ctrl on Windows or Command on Mac.)
+* The language picker will automatically choose "(None)" if you select a 
non-multilingual project such as Wikimedia Commons.
+* If you're interested in the overall metric for a multilingual project such 
as Wikipedia, make sure only "(None)" is selected in the languages picker. 
There are multiple aggregation options:
+    * __Overall__: divides the total number of articles with a 
maplink/mapframe by the total number of articles across all languages
+    * __Average__: computes the prevalence on a per-language basis and then 
computes the average of those prevalences
+    * __Median__: computes the prevalence on a per-language basis and then 
computes the median of those prevalences
+
+Questions, bug reports, and feature suggestions
+------
+For technical, non-bug questions, [email 
Mikhail](mailto:[email protected]?subject=Dashboard%20Question) or 
[Chelsy](mailto:[email protected]?subject=Dashboard%20Question). If you 
experience a bug or notice something wrong or have a suggestion, [open a ticket 
in 
Phabricator](https://phabricator.wikimedia.org/maniphest/task/create/?projects=Discovery)
 in the Discovery board or [email 
Deb](mailto:[email protected]?subject=Dashboard%20Question).
+
+<hr style="border-color: gray;">
+<p style="font-size: small;">
+  <strong>Link to this dashboard:</strong> <a 
href="https://discovery.wmflabs.org/maps/#kartographer_langproj";>https://discovery.wmflabs.org/maps/#kartographer_langproj</a>
+  | Page is available under <a 
href="https://creativecommons.org/licenses/by-sa/3.0/"; title="Creative Commons 
Attribution-ShareAlike License">CC-BY-SA 3.0</a>
+  | <a href="https://phabricator.wikimedia.org/diffusion/WDWZ/"; 
title="Wikimedia Maps Dashboard source code repository">Code</a> is licensed 
under <a 
href="https://phabricator.wikimedia.org/diffusion/WDWZ/browse/master/LICENSE.md";
 title="MIT License">MIT</a>
+  | Part of <a href="https://discovery.wmflabs.org/";>Discovery Dashboards</a>
+</p>
diff --git a/tab_documentation/tiles_summary.md 
b/tab_documentation/tiles_summary.md
index 831ac2f..ddd7ba0 100644
--- a/tab_documentation/tiles_summary.md
+++ b/tab_documentation/tiles_summary.md
@@ -1,7 +1,7 @@
-Tiles (Summary)
+Kartotherian tile requests
 =======
 
-The initial usage spike was due to the announcement of the mapping service to 
the development community, and not the regular usage by Wikipedia users. Once 
the initial interest subsided, the usage fell back to the expected 
pre-announcement levels.
+The initial usage spike was due to the announcement of the mapping service to 
the development community, and not the regular usage by Wikipedia users. Once 
the initial interest subsided, the usage fell back to the expected 
pre-announcement levels until a Pokemon Go fan site "Pkget" switched to using 
Wikimedia Maps tiles after being blocked by OpenStreetMap.
 
 Notes
 -----
diff --git a/ui.R b/ui.R
index 5d1c207..ad7c902 100644
--- a/ui.R
+++ b/ui.R
@@ -13,11 +13,14 @@
         tags$script(src = "custom.js")
       ),
       sidebarMenu(id = "tabs",
-                  menuItem(text = "Tiles",
+                  menuItem(text = "Kartotherian usage",
                            menuSubItem(text = "Summary", tabName = 
"tiles_summary"),
                            menuSubItem(text = "Tiles by style", tabName = 
"tiles_total_by_style"),
                            menuSubItem(text = "Users by style", tabName = 
"tiles_users_by_style"),
                            menuSubItem(text = "Tiles by zoom level", tabName = 
"tiles_total_by_zoom")),
+                  menuItem(text = "Kartographer usage",
+                           menuSubItem(text = "Overall prevalence", tabName = 
"kartographer_prevalence"),
+                           menuSubItem(text = "Language/project breakdown", 
tabName = "kartographer_langproj")),
                   menuItem(text = "Platform usage", tabName = 
"platform_usage"),
                   menuItem(text = "Feature usage",
                            menuSubItem(text = "GeoHack", tabName = 
"geohack_usage"),
@@ -85,11 +88,53 @@
                     width = 4
                   )
                 ),
-                fluidRow(column(uiOutput("zoom_level_selector_container"), 
width = 3),
-                         column(dygraphOutput("tiles_zoom_series"), width = 
8)),
+                fluidRow(
+                  column(uiOutput("zoom_level_selector_container"), width = 3),
+                  column(dygraphOutput("tiles_zoom_series"), width = 8)
+                ),
                 polloi::automata_select("tile_zoom_automata_check"),
                 div(id = "tiles_zoom_series_legend", style = "text-align: 
right;"),
                 includeMarkdown("./tab_documentation/tiles_total_by_zoom.md")),
+        tabItem(tabName = "kartographer_prevalence",
+                dygraphOutput("overall_prevalence_series"),
+                div(id = "overall_prevalence_series_legend", style = 
"text-align: right;"),
+                includeMarkdown("./tab_documentation/overall_prevalence.md")),
+        tabItem(tabName = "kartographer_langproj",
+                fluidRow(
+                  
column(polloi::smooth_select("smoothing_prevalence_langproj"), width = 4),
+                  column(checkboxGroupInput(
+                    "prevalence_langproj_feature", "Feature",
+                    c("Mapframe", "Maplink"), inline = TRUE,
+                    selected = c("Mapframe", "Maplink")
+                  ), width = 4),
+                  column(radioButtons(
+                    "prevalence_langproj_aggregation", "Aggregation",
+                    c("Overall", "Average", "Median"), inline = TRUE,
+                    selected = "Average"
+                  ), width = 4)
+                ),
+                fluidRow(
+                  column(
+                    selectInput("project_order", "Sort projects by",
+                                list("Alphabetical order" = "alphabet", 
"Volume of articles" = "volume"),
+                                selected = "volume"),
+                    uiOutput("project_selector_container"),
+                    width = 2
+                  ),
+                  column(
+                    selectInput("language_order", "Sort languages by",
+                                list("Alphabetical order" = "alphabet", 
"Volume of articles" = "volume"),
+                                selected = "volume"),
+                    uiOutput("language_selector_container"),
+                    width = 2
+                  ),
+                  column(
+                    dygraphOutput("prevalence_langproj_plot"),
+                    div(id = "prevalence_langproj_legend", style = 
"text-align: right;"),
+                    width = 8
+                  )
+                ),
+                includeMarkdown("./tab_documentation/prevalence_langproj.md")),
         tabItem(tabName = "platform_usage",
                 fluidRow(
                   
column(polloi::smooth_select("smoothing_users_per_platform"), width = 3),
diff --git a/utils.R b/utils.R
index e060be7..55f3d8a 100644
--- a/utils.R
+++ b/utils.R
@@ -30,10 +30,91 @@
   return(invisible())
 }
 
-read_countries <- function(){
+read_countries <- function() {
   country_data <<- 
polloi::read_dataset("discovery/metrics/maps/users_by_country.tsv", col_types = 
"Dcd") %>%
     dplyr::filter(!is.na(country), !is.na(users)) %>%
     dplyr::mutate(users = users * 100) %>%
     tidyr::spread(country, users, fill = 0)
   return(invisible())
 }
+
+read_prevalence <- function() {
+  projects_db <<- readr::read_csv(system.file("extdata/projects.csv", package 
= "polloi"), col_types = "cclc")[, c("project", "multilingual")]
+  lang_proj <- polloi::get_langproj()
+  maplinks <- 
polloi::read_dataset("discovery/metrics/maps/maplink_prevalence.tsv", col_types 
= "Dcii-")
+  mapframes <- 
polloi::read_dataset("discovery/metrics/maps/mapframe_prevalence.tsv", 
col_types = "Dcii-")
+  maplink_prevalence <<- maplinks %>%
+    dplyr::left_join(lang_proj, by = c("wiki" = "wikiid")) %>%
+    dplyr::filter(!is.na(project))
+  mapframe_prevalence <<- mapframes %>%
+    dplyr::left_join(lang_proj, by = c("wiki" = "wikiid")) %>%
+    dplyr::filter(!is.na(project))
+  available_languages_maplink <- maplink_prevalence %>%
+    dplyr::mutate(language = dplyr::if_else(is.na(language), "(None)", 
language)) %>%
+    dplyr::group_by(language) %>%
+    dplyr::top_n(1, date) %>%
+    dplyr::summarize(articles = sum(total_articles), maplink = TRUE) %>%
+    dplyr::ungroup() %>%
+    dplyr::select(c(language, articles, maplink))
+  available_languages_mapframe <- mapframe_prevalence %>%
+    dplyr::mutate(language = dplyr::if_else(is.na(language), "(None)", 
language)) %>%
+    dplyr::group_by(language) %>%
+    dplyr::top_n(1, date) %>%
+    dplyr::summarize(articles = sum(total_articles), mapframe = TRUE) %>%
+    dplyr::ungroup() %>%
+    dplyr::select(c(language, articles, mapframe))
+  available_languages <<- dplyr::full_join(
+    available_languages_maplink,
+    available_languages_mapframe,
+    by = "language"
+  ) %>%
+    dplyr::mutate(
+      articles = pmax(articles.x, articles.y, na.rm = TRUE),
+      maplink = dplyr::if_else(is.na(maplink), FALSE, maplink),
+      mapframe = dplyr::if_else(is.na(mapframe), FALSE, mapframe)
+    ) %>%
+    dplyr::select(-c(articles.x, articles.y))
+  available_projects_maplink <- maplink_prevalence %>%
+    dplyr::group_by(project) %>%
+    dplyr::top_n(1, date) %>%
+    dplyr::summarize(articles = sum(total_articles), maplink = TRUE) %>%
+    dplyr::ungroup() %>%
+    dplyr::select(c(project, articles, maplink))
+  available_projects_mapframe <- mapframe_prevalence %>%
+    dplyr::group_by(project) %>%
+    dplyr::top_n(1, date) %>%
+    dplyr::summarize(articles = sum(total_articles), mapframe = TRUE) %>%
+    dplyr::ungroup() %>%
+    dplyr::select(c(project, articles, mapframe))
+  available_projects <<- dplyr::full_join(
+    available_projects_maplink,
+    available_projects_mapframe,
+    by = "project"
+  ) %>%
+    dplyr::mutate(
+      articles = pmax(articles.x, articles.y, na.rm = TRUE),
+      maplink = dplyr::if_else(is.na(maplink), FALSE, maplink),
+      mapframe = dplyr::if_else(is.na(mapframe), FALSE, mapframe)
+    ) %>%
+    dplyr::select(-c(articles.x, articles.y))
+  prevalence <<- dplyr::inner_join(
+    maplinks, mapframes,
+    by = c("date", "wiki")
+  ) %>%
+    dplyr::left_join(lang_proj, by = c("wiki" = "wikiid")) %>%
+    dplyr::filter(!is.na(project)) %>%
+    dplyr::select(-wiki) %>%
+    dplyr::group_by(date, project) %>%
+    dplyr::summarize(
+      Maplink = round(100 * sum(maplink_articles) / sum(total_articles.x), 2),
+      Mapframe = round(100 * sum(mapframe_articles) / sum(total_articles.y), 2)
+    ) %>%
+    dplyr::ungroup() %>%
+    tidyr::gather(feature, prevalence, -c(date, project)) %>%
+    dplyr::transmute(
+      date = date, prevalence = prevalence,
+      group = paste0(feature, " (", project, ")")
+    ) %>%
+    tidyr::spread(group, prevalence)
+  return(invisible())
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/379150
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: If1f1efa619037ce8adea873c148f9a1f78376506
Gerrit-PatchSet: 3
Gerrit-Project: wikimedia/discovery/wetzel
Gerrit-Branch: develop
Gerrit-Owner: Bearloga <[email protected]>
Gerrit-Reviewer: Bearloga <[email protected]>
Gerrit-Reviewer: Chelsyx <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to