Bearloga has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/381126 )
Change subject: Session counts by volume for mobile web search ...................................................................... Session counts by volume for mobile web search Bug: T176811 Change-Id: I545a80a5f4214e3f170d6a104a48e6d30dddecc9 --- M docs/README.md M modules/metrics/search/config.yaml A modules/metrics/search/mobile_session_counts A modules/metrics/search/mobile_session_counts.R 4 files changed, 82 insertions(+), 1 deletion(-) Approvals: Bearloga: Verified; Looks good to me, approved diff --git a/docs/README.md b/docs/README.md index e5cc336..2053bcf 100644 --- a/docs/README.md +++ b/docs/README.md @@ -8,7 +8,7 @@ infrastructure. These datasets provide the metrics that are used by [Discovery's Dashboards](https://discovery.wmflabs.org/) -Last updated on 22 September 2017 +Last updated on 27 September 2017 Daily Metrics ------------- @@ -204,6 +204,8 @@ after clickthrough; Number of sessions with at least a click and the number of sessions that return to search for different things after clickthrough. +- **mobile\_session\_counts.tsv**: Number of user sessions on mobile + web, broken down by high, medium and low volume. wdqs/ ----- diff --git a/modules/metrics/search/config.yaml b/modules/metrics/search/config.yaml index 2181168..4b3f099 100644 --- a/modules/metrics/search/config.yaml +++ b/modules/metrics/search/config.yaml @@ -233,3 +233,8 @@ granularity: days starts: 2017-04-01 type: script + mobile_session_counts: + description: Number of user sessions on mobile web, broken down by high, medium and low volume. + granularity: days + starts: 2017-04-01 + type: script diff --git a/modules/metrics/search/mobile_session_counts b/modules/metrics/search/mobile_session_counts new file mode 100755 index 0000000..e88dc7e --- /dev/null +++ b/modules/metrics/search/mobile_session_counts @@ -0,0 +1,3 @@ +#!/bin/bash + +Rscript modules/metrics/search/mobile_session_counts.R -d $1 diff --git a/modules/metrics/search/mobile_session_counts.R b/modules/metrics/search/mobile_session_counts.R new file mode 100644 index 0000000..dcb44a0 --- /dev/null +++ b/modules/metrics/search/mobile_session_counts.R @@ -0,0 +1,71 @@ +#!/usr/bin/env Rscript + +source("config.R") +.libPaths(r_library) +suppressPackageStartupMessages(library("optparse")) + +option_list <- list( + make_option(c("-d", "--date"), default = NA, action = "store", type = "character") +) + +# Get command line options, if help option encountered print help and exit, +# otherwise if options not found on command line then set defaults: +opt <- parse_args(OptionParser(option_list = option_list)) + +if (is.na(opt$date)) { + quit(save = "no", status = 1) +} + +# Build query: +date_clause <- as.character(as.Date(opt$date), format = "LEFT(timestamp, 8) = '%Y%m%d'") + +query <- paste0("SELECT + DATE('", opt$date, "') AS date, + event_userSessionToken AS user_session_token, + COUNT(DISTINCT event_searchSessionToken) AS n_search_session + FROM MobileWebSearch_12054448 + WHERE ", date_clause, " + GROUP BY date, event_userSessionToken;") + +# Fetch data from MySQL database: +results <- tryCatch( + suppressMessages(data.table::as.data.table(wmf::mysql_read(query, "log"))), + error = function(e) { + return(data.frame()) + } +) + +if (nrow(results) == 0) { + # Here we make the script output tab-separated + # column names, as required by Reportupdater: + output <- data.frame( + date = character(), + user_sessions = numeric(), + search_sessions = numeric(), + high_volume = numeric(), + medium_volume = numeric(), + low_volume = numeric(), + threshold_high = numeric(), + threshold_low = numeric() + ) +} else { + # Split session counts: + `90th percentile` <- floor(quantile(results$n_search_session, 0.9)) + `10th percentile` <- ceiling(quantile(results$n_search_session, 0.1)) + results$session_type <- dplyr::case_when( + results$n_search_session > `90th percentile` ~ "high_volume", + results$n_search_session < `10th percentile` ~ "low_volume", + TRUE ~ "medium_volume" + ) + output <- cbind( + date = opt$date, + user_sessions = nrow(results), + search_sessions = sum(results$n_search_session, na.rm = TRUE), + tidyr::spread(results[, list(user_session = length(user_session_token)), by = "session_type"], + session_type, user_session), + threshold_high = `90th percentile`, + threshold_low = `10th percentile` + ) +} + +write.table(output, file = "", append = FALSE, sep = "\t", row.names = FALSE, quote = FALSE) -- To view, visit https://gerrit.wikimedia.org/r/381126 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I545a80a5f4214e3f170d6a104a48e6d30dddecc9 Gerrit-PatchSet: 2 Gerrit-Project: wikimedia/discovery/golden Gerrit-Branch: master Gerrit-Owner: Chelsyx <c...@wikimedia.org> Gerrit-Reviewer: Bearloga <mpo...@wikimedia.org> Gerrit-Reviewer: Chelsyx <c...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits