Chelsyx has uploaded a new change for review. ( https://gerrit.wikimedia.org/r/392102 )
Change subject: Add interleaved test analysis ...................................................................... Add interleaved test analysis Bug: T176493 Change-Id: I795023856963030e67e85e1cde7352842aa3a7a8 --- M functions.R M modules/data/data_aggregation.R M modules/data/data_cleansing.R M modules/data/fetch_data.R A modules/interleaved_test/data_processing.R A modules/interleaved_test/interleaved_preference.R A modules/interleaved_test/page_dwelltime.R M modules/setup.R M modules/sister_search/sidebar_results.R M modules/stat_test/engagement.R A modules/stat_test/remove_interleaved_data.R M modules/stat_test/return_rate.R M modules/stat_test/serp_from_autocomplete.R M modules/stat_test/serp_offset.R M modules/stat_test/visited_page.R M modules/test_summary/browser_os.R M modules/test_summary/events.R M modules/test_summary/searches.R M report.Rmd M run.R 20 files changed, 388 insertions(+), 65 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/autoreporter refs/changes/02/392102/1 diff --git a/functions.R b/functions.R index a4c6367..a7dff7e 100644 --- a/functions.R +++ b/functions.R @@ -79,3 +79,59 @@ ggplot2::scale_color_brewer(palette = "Set1") + ggplot2::labs(x = NULL, color = "Group", y = y_lab, title = title, subtitle = subtitle) } + +cppFunction('CharacterVector fill_in(CharacterVector ids) { + CharacterVector new_ids(ids.size()); + String current_id = ids[0]; + new_ids[0] = current_id; + for (int i = 1; i < ids.size(); i++) { + if (ids[i] != NA_STRING) { + current_id = ids[i]; + } + new_ids[i] = current_id; + } + return new_ids; +}') + +cppFunction('NumericVector cumunique(CharacterVector ids) { + NumericVector count(ids.size()); + String current_id = ids[0]; + count[0] = 1; + for (int i = 1; i < ids.size(); i++) { + if (ids[i] == current_id) { + count[i] = count[i-1]; + } else { + count[i] = count[i-1] + 1; + current_id = ids[i]; + } + } + return count; +}') + +# Process interleaved team draft +process_session <- function(df) { + processed_session <- unsplit(lapply(split(df, df$serp_id), function(df) { + if (is.na(df$event_extraParams[1]) || df$event_extraParams[1] == "") { + visited_pages <- rep(as.character(NA), times = nrow(df)) + } else { + from_json <- jsonlite::fromJSON(df$event_extraParams[1], simplifyVector = FALSE) + if (!("teamDraft" %in% names(from_json)) || all(is.na(df$article_id))) { + visited_pages <- rep(as.character(NA), times = nrow(df)) + } else { + team_a <- unlist(from_json$teamDraft$a) + team_b <- unlist(from_json$teamDraft$b) + visited_pages <- vapply(df$article_id, function(article_id) { + if (article_id %in% team_a) { + return("A") + } else if (article_id %in% team_b) { + return("B") + } else { + return(as.character(NA)) + } + }, "") + } + } + return(visited_pages) + }), df$serp_id) + return(processed_session) +} diff --git a/modules/data/data_aggregation.R b/modules/data/data_aggregation.R index db5a178..6115703 100644 --- a/modules/data/data_aggregation.R +++ b/modules/data/data_aggregation.R @@ -8,9 +8,9 @@ message("Aggregating by search...") searches <- events %>% - keep_where(!(is.na(serp_id))) %>% # remove visitPage and checkin events - arrange(date, session_id, serp_id, timestamp) %>% - group_by(group, wiki, session_id, serp_id) %>% + keep_where(!(is.na(search_id))) %>% # remove visitPage and checkin events + arrange(date, session_id, search_id, timestamp) %>% + group_by(group, wiki, session_id, search_id) %>% summarize( date = date[1], timestamp = timestamp[1], @@ -56,19 +56,19 @@ keep_where(event == "searchResultPage", `some same-wiki results` == "TRUE") %>% # SERPs with 0 results will not have an offset in extraParams ^ mutate(offset = purrr::map_int(event_extraParams, ~ parse_extraParams(.x, action = "searchResultPage")$offset)) %>% - select(session_id, event_id, serp_id, offset) + select(session_id, event_id, search_id, offset) message("Processing SERP interwiki data...") - extract_iw <- function(session_id, event_id, serp_id, event_extraParams) { + extract_iw <- function(session_id, event_id, search_id, event_extraParams) { return(data.frame( - session_id, event_id, serp_id, + session_id, event_id, search_id, parse_extraParams(event_extraParams, action = "searchResultPage")$iw, stringsAsFactors = FALSE )) } serp_iw <- events %>% keep_where(event == "searchResultPage") %>% - select(session_id, event_id, serp_id, event_extraParams) %>% + select(session_id, event_id, search_id, event_extraParams) %>% purrr::pmap_df(extract_iw) %>% mutate(source = case_when( source == "wikt" ~ "Wiktionary", @@ -104,12 +104,12 @@ esclick_result <- events %>% keep_where(event == "esclick") %>% cbind(purrr::map_df(.$event_extraParams, parse_extraParams, action = "esclick")) %>% - select(group, wiki, session_id, serp_id, page_id, event_id, hoverId, section, result, position) + select(group, wiki, session_id, search_id, page_id, event_id, hoverId, section, result, position) searches <- esclick_result %>% - group_by(group, wiki, session_id, serp_id) %>% + group_by(group, wiki, session_id, search_id) %>% summarize(with_esclick = TRUE) %>% - dplyr::right_join(searches, by = c("group", "wiki", "session_id", "serp_id")) %>% + dplyr::right_join(searches, by = c("group", "wiki", "session_id", "search_id")) %>% dplyr::mutate(with_esclick = ifelse(is.na(with_esclick), FALSE, with_esclick)) } @@ -118,12 +118,12 @@ hover_over <- events %>% keep_where(event %in% c("hover-on", "hover-off")) %>% cbind(purrr::map_df(.$event_extraParams, parse_extraParams, action = c("hover-on", "hover-off"))) %>% - select(timestamp, group, wiki, session_id, serp_id, page_id, event_id, event, hoverId, section, results) + select(timestamp, group, wiki, session_id, search_id, page_id, event_id, event, hoverId, section, results) searches <- hover_over %>% keep_where(event == "hover-on") %>% - group_by(group, wiki, session_id, serp_id) %>% + group_by(group, wiki, session_id, search_id) %>% summarize(n_hover = length(unique(hoverId))) %>% - dplyr::right_join(searches, by = c("group", "wiki", "session_id", "serp_id")) %>% + dplyr::right_join(searches, by = c("group", "wiki", "session_id", "search_id")) %>% dplyr::mutate(n_hover = ifelse(is.na(n_hover), 0, n_hover)) } diff --git a/modules/data/data_cleansing.R b/modules/data/data_cleansing.R index 5fd5d38..c59b798 100644 --- a/modules/data/data_cleansing.R +++ b/modules/data/data_cleansing.R @@ -17,6 +17,13 @@ data_cleansing_info <- paste0("Fulltext search events: Deleted ", nrow(events_raw) - nrow(events), " duplicated events.") rm(events_raw) # to free up memory +message("Removing unnecessary check-ins...") +events <- events[order(events$group, events$session_id, events$page_id, events$article_id, events$event, events$event_checkin, na.last = FALSE), ] +extra_checkins <- duplicated(events[, c("group", "session_id", "page_id", "article_id", "event")], fromLast = TRUE) & events$event == "checkin" +data_cleansing_info <- paste0(data_cleansing_info, " Deleted ", sum(extra_checkins), " unnecessary check-in events and only keep the last one.") +events <- events[!extra_checkins, ] +rm(extra_checkins) + message("Delete events with negative load time...") data_cleansing_info <- paste0(data_cleansing_info, " Deleted ", sum(events$load_time < 0, na.rm = TRUE), " events with negative load time.") events <- events %>% @@ -25,19 +32,20 @@ message("De-duplicating SERPs...") SERPs <- events %>% keep_where(event == "searchResultPage") %>% - select(c(session_id, page_id, query_hash, search_token)) %>% - group_by(session_id, query_hash) %>% - mutate(serp_id = page_id[1], cirrus_id = search_token[1]) %>% + dplyr::arrange(wiki, group, session_id, timestamp) %>% + select(c(group, session_id, page_id, query_hash)) %>% + group_by(group, session_id, query_hash) %>% + mutate(search_id = page_id[1]) %>% ungroup %>% - select(c(page_id, serp_id, cirrus_id)) + select(c(group, session_id, page_id, search_id)) events <- events %>% - dplyr::left_join(SERPs, by = "page_id") + dplyr::left_join(SERPs, by = c("group", "session_id", "page_id")) rm(SERPs) # to free up memory message("Removing events without an associated SERP (orphan clicks and check-ins)...") n_evnt <- nrow(events) events <- events %>% - keep_where(!(is.na(serp_id) & !(event %in% c("visitPage", "checkin")))) %>% # remove orphan click + keep_where(!(is.na(search_id) & !(event %in% c("visitPage", "checkin")))) %>% # remove orphan click group_by(session_id) %>% keep_where("searchResultPage" %in% event) %>% # remove orphan "visitPage" and "checkin" ungroup @@ -56,7 +64,7 @@ message("Remove sessions with more than 100 searches...") spider_session <- events %>% group_by(date, group, session_id) %>% - summarize(n_search = length(unique(serp_id))) %>% + summarize(n_search = length(unique(search_id))) %>% keep_where(n_search > 100) %>% {.$session_id} events <- events %>% @@ -64,6 +72,13 @@ data_cleansing_info <- paste0(data_cleansing_info, " Removed ", length(spider_session), " sessions with more than 100 searches.") rm(spider_session) +message("Fill in serp_id for visitedPage and checkin events...") +events %<>% + mutate(serp_id = ifelse(event %in% c("visitPage", "checkin"), NA, page_id), + event = factor(event, levels =c("searchResultPage", "click", "ssclick", "iwclick", "hover-on", "esclick", "hover-off", "visitPage", "checkin"))) %>% + dplyr::arrange(wiki, group, session_id, timestamp, event) %>% + mutate(serp_id = fill_in(serp_id)) + # Number of wikis in the test n_wiki <- length(unique(events$wiki)) message("Number of wikis in the test: ", n_wiki) diff --git a/modules/data/fetch_data.R b/modules/data/fetch_data.R index 490aeff..d95f687 100644 --- a/modules/data/fetch_data.R +++ b/modules/data/fetch_data.R @@ -3,6 +3,7 @@ event_uniqueId AS event_id, event_mwSessionId, event_pageViewId AS page_id, + event_articleId AS article_id, event_searchSessionId AS session_id, event_subTest AS `group`, wiki, @@ -26,7 +27,6 @@ event_checkin, event_extraParams, event_msToDisplayResults AS load_time, - event_searchToken AS search_token, userAgent AS user_agent FROM TestSearchSatisfaction2_", report_params$tss2_revision, "\n", "WHERE LEFT(timestamp, 8) >= '", report_params$start_date, "' AND LEFT(timestamp, 8) < '", report_params$end_date, "' \n", @@ -35,12 +35,13 @@ ifelse(is.null(report_params$event_action), "", paste0(" AND event_action IN('", paste(report_params$event_action, collapse = "', '"), "') \n")), ifelse(is.null(report_params$event_source), "", paste0(" AND event_source IN('", paste(report_params$event_source, collapse = "', '"), "') \n")), ifelse(is.null(report_params$other_filter), "", paste0(" AND ", report_params$other_filter, " \n")), - " AND CASE WHEN event_action = 'searchResultPage' THEN event_msToDisplayResults IS NOT NULL - WHEN event_action IN ('click', 'iwclick', 'ssclick') THEN event_position IS NOT NULL AND event_position > -1 - WHEN event_action = 'visitPage' THEN event_pageViewId IS NOT NULL - WHEN event_action = 'checkin' THEN event_checkin IS NOT NULL AND event_pageViewId IS NOT NULL - ELSE TRUE - END;" + " AND INSTR(userAgent, '\"is_bot\": false') > 0 + AND CASE WHEN event_action = 'searchResultPage' THEN event_msToDisplayResults IS NOT NULL + WHEN event_action IN ('click', 'iwclick', 'ssclick') THEN event_position IS NOT NULL AND event_position > -1 + WHEN event_action = 'visitPage' THEN event_pageViewId IS NOT NULL + WHEN event_action = 'checkin' THEN event_checkin IS NOT NULL AND event_pageViewId IS NOT NULL + ELSE TRUE + END;" ) # Fetch full-text search from autocomplete search to compute dwell time on SERP @@ -63,14 +64,14 @@ ifelse(is.null(report_params$test_group_names), "", paste0(" AND event_subTest IN('", paste(report_params$test_group_names, collapse = "', '"), "') \n")), ifelse(is.null(report_params$other_filter), "", paste0(" AND ", report_params$other_filter, " \n")), " AND event_source = 'autocomplete' - AND event_articleId IS NULL - AND event_action IN('visitPage', 'checkin') - AND CASE WHEN event_action = 'searchResultPage' THEN event_msToDisplayResults IS NOT NULL - WHEN event_action IN ('click', 'iwclick', 'ssclick') THEN event_position IS NOT NULL AND event_position > -1 - WHEN event_action = 'visitPage' THEN event_pageViewId IS NOT NULL - WHEN event_action = 'checkin' THEN event_checkin IS NOT NULL AND event_pageViewId IS NOT NULL - ELSE TRUE - END;" + AND event_articleId IS NULL + AND event_action IN('visitPage', 'checkin') + AND CASE WHEN event_action = 'searchResultPage' THEN event_msToDisplayResults IS NOT NULL + WHEN event_action IN ('click', 'iwclick', 'ssclick') THEN event_position IS NOT NULL AND event_position > -1 + WHEN event_action = 'visitPage' THEN event_pageViewId IS NOT NULL + WHEN event_action = 'checkin' THEN event_checkin IS NOT NULL AND event_pageViewId IS NOT NULL + ELSE TRUE + END;" ) if (!is.null(report_params$query)) { diff --git a/modules/interleaved_test/data_processing.R b/modules/interleaved_test/data_processing.R new file mode 100644 index 0000000..c1bef42 --- /dev/null +++ b/modules/interleaved_test/data_processing.R @@ -0,0 +1,26 @@ +if (!is.null(report_params$interleaved_group_names)) { + + message("Separate interleaved dataset") + events_interleaved <- events[events$group %in% report_params$interleaved_group_names, ] + n_interleaved_groups <- length(report_params$interleaved_group_names) + + message("Extracting team draft data so we know which visited result is which") + events_interleaved %<>% + keep_where(events_interleaved$event %in% c('searchResultPage', 'click', 'visitPage', 'checkin')) %>% + dplyr::arrange(wiki, group, session_id, timestamp, event) %>% + mutate(search_id = fill_in(search_id)) + events_interleaved <- data.table::data.table(events_interleaved) + events_interleaved[, team := process_session(.SD), by = c("wiki", "group", "session_id"), .SDcols = c("serp_id", "event_extraParams", "article_id")] + events_interleaved <- events_interleaved[order(events_interleaved$wiki, events_interleaved$group, events_interleaved$session_id, events_interleaved$serp_id, events_interleaved$timestamp, events_interleaved$event), ] + events_interleaved[, event_extraParams := NULL, ] + + message("Remove sessions with large numbers of searches") + events_interleaved[, valid := length(unique(search_id)) <= 20, by = c("date", "wiki", "group", "session_id")] + interleaved_visitpage <- events_interleaved[ + !is.na(team) & team != "" & event == "visitPage" & valid == TRUE, + c("date", "wiki", "group", "session_id", "search_id", "team"), + with = TRUE + ] + interleaved_visitpage <- interleaved_visitpage[order(interleaved_visitpage$date, interleaved_visitpage$wiki, interleaved_visitpage$group, interleaved_visitpage$session_id, interleaved_visitpage$search_id, interleaved_visitpage$team), ] + +} diff --git a/modules/interleaved_test/interleaved_preference.R b/modules/interleaved_test/interleaved_preference.R new file mode 100644 index 0000000..5318fa8 --- /dev/null +++ b/modules/interleaved_test/interleaved_preference.R @@ -0,0 +1,105 @@ +if (!is.null(report_params$interleaved_group_names)) { + + # Overall preference + interleaved_pref_overall_function <- function(by_wiki = FALSE, ...) { + interleaved_pref_overall <- rbind( + interleaved_visitpage[, j = list( + "observed" = wmf::interleaved_preference(paste(.SD$session_id), .SD$team), + "upper" = wmf::interleaved_confint(paste(.SD$session_id), .SD$team)$upper, + "lower" = wmf::interleaved_confint(paste(.SD$session_id), .SD$team)$lower, + "method" = "Sampling Sessions" + ), by = c(switch(by_wiki, "wiki", NULL), "group")], + interleaved_visitpage[, j = list( + "observed" = wmf::interleaved_preference(paste(.SD$session_id, .SD$search_id), .SD$team), + "upper" = wmf::interleaved_confint(paste(.SD$session_id, .SD$search_id), .SD$team)$upper, + "lower" = wmf::interleaved_confint(paste(.SD$session_id, .SD$search_id), .SD$team)$lower, + "method" = "Sampling Searches" + ), by = c(switch(by_wiki, "wiki", NULL), "group")] + ) + ggplot( + mutate(interleaved_pref_overall, method = sub("\\s", "\n", method)), + aes(x = method, y = observed) + ) + + geom_hline(yintercept = 0, linetype = "dashed") + + geom_linerange(aes(ymin = lower, ymax = upper)) + + geom_label(aes(label = sprintf("%.4f", observed))) + + labs( + x = "Bootstrap approach", y = "B ← Preference → A", + title = paste("Preference for results from two rankers, by group", switch(by_wiki, "and wiki", NULL)), + caption = "95% confidence intervals were bootstrapped using two different sampling approaches" + ) + } + p <- interleaved_pref_overall_function() + + facet_wrap(~ group) + + wmf::theme_facet() + ggsave("interleaved_pref_overall.png", p, path = fig_path, units = "in", dpi = plot_resolution, height = fig_height, width = fig_width) + rm(p) + + # Daily preference + interleaved_pref_daily_function <- function(by_wiki = FALSE, ...) { + interleaved_pref_daily <- rbind( + interleaved_visitpage[, j = list( + "observed" = wmf::interleaved_preference(paste(.SD$session_id), .SD$team), + "upper" = wmf::interleaved_confint(paste(.SD$session_id), .SD$team)$upper, + "lower" = wmf::interleaved_confint(paste(.SD$session_id), .SD$team)$lower, + "method" = "Sampling Sessions" + ), by = c("date", switch(by_wiki, "wiki", NULL), "group")], + interleaved_visitpage[, j = list( + "observed" = wmf::interleaved_preference(paste(.SD$session_id, .SD$search_id), .SD$team), + "upper" = wmf::interleaved_confint(paste(.SD$session_id, .SD$search_id), .SD$team)$upper, + "lower" = wmf::interleaved_confint(paste(.SD$session_id, .SD$search_id), .SD$team)$lower, + "method" = "Sampling Searches" + ), by = c("date", switch(by_wiki, "wiki", NULL), "group")] + ) %>% + mutate(preferred = dplyr::if_else(observed > 0, "A", "B")) %>% + group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group", "method", "preferred"))) %>% + arrange(date) %>% + mutate(counter = cumsum(!is.na(date))) %>% + ungroup + + ggplot(keep_where(interleaved_pref_daily, !is.na(observed)), aes(x = date, y = observed)) + + geom_hline(yintercept = 0, linetype = "dashed") + + geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.25) + + geom_line() + + geom_segment( + aes(xend = date, yend = ifelse(preferred == "A", 0.275, -0.275), color = preferred), + linetype = "dotted" + ) + + geom_point(aes(color = preferred)) + + geom_text( + aes(y = ifelse(preferred == "A", 0.3, -0.3), label = counter, color = preferred), + show.legend = FALSE, fontface = "bold" + ) + + scale_color_brewer(palette = "Set1") + + labs( + x = "Date", y = "B ← Preference → A", + title = paste("Preference for results from two rankers, daily by group", switch(by_wiki, "and wiki", NULL)), + subtitle = "Showing counts of how many times users preferred one ranking over the other", + caption = "95% confidence intervals were bootstrapped using two different sampling approaches" + ) + } + p <- interleaved_pref_daily_function() + + facet_grid(group ~ method, scales = "free_y") + + wmf::theme_facet() + ggsave("interleaved_pref_daily.png", p, path = fig_path, units = "in", dpi = plot_resolution, height = ifelse(n_interleaved_groups < 2, fig_height, 3*n_interleaved_groups), width = fig_width) + rm(p) + + # By wiki + if (n_wiki > 1) { + + p <- interleaved_pref_overall_function(by_wiki = TRUE) + + facet_grid(wiki ~ group, scales = "free_y") + + wmf::theme_facet() + ggsave("interleaved_pref_overall_wiki.png", p, path = fig_path, units = "in", dpi = plot_resolution, height = 3 * n_wiki, width = fig_width) + rm(p) + + p <- interleaved_pref_daily_function(by_wiki = TRUE) + + facet_grid(wiki + group ~ method, scales = "free_y") + + wmf::theme_facet() + ggsave("interleaved_pref_daily_wiki.png", p, path = fig_path, units = "in", dpi = plot_resolution, height = 3*n_interleaved_groups*n_wiki, width = fig_width) + rm(p) + + } + + rm(interleaved_visitpage) +} diff --git a/modules/interleaved_test/page_dwelltime.R b/modules/interleaved_test/page_dwelltime.R new file mode 100644 index 0000000..d48ef08 --- /dev/null +++ b/modules/interleaved_test/page_dwelltime.R @@ -0,0 +1,62 @@ +if (!is.null(report_params$interleaved_group_names)) { + + temp <- dplyr::inner_join( + visitedPages[visitedPages$group %in% report_params$interleaved_group_names, ], + events_interleaved[ + !is.na(team) & team != "" & event %in% c("visitPage", "checkin") & valid == TRUE, + c("wiki", "group", "session_id", "page_id", "team"), + with = TRUE + ], + by = c("wiki", "group", "session_id", "page_id") + ) + temp$SurvObj <- with(temp, survival::Surv(dwell_time, status == 2)) + + fit <- survival::survfit(SurvObj ~ group + team, data = temp) + ggsurv <- survminer::ggsurvplot( + fit, + conf.int = TRUE, + xlab = "T (Dwell Time in seconds)", + ylab = "Proportion of visits longer than T (P%)", + surv.scale = "percent", + palette = "Dark2", + legend = "bottom", + legend.title = "Group", + ggtheme = wmf::theme_facet() + ) + p <- ggsurv$plot + + facet_wrap(~ group, scales = "free_y") + + labs( + title = "How long users stay on each team's results", + subtitle = "With 95% confidence intervals." + ) + ggsave("interleaved_survival_all.png", p, path = fig_path, units = "in", dpi = plot_resolution, height = fig_height, width = fig_width) + rm(p) + + if (n_wiki > 1) { + + fit <- survival::survfit(SurvObj ~ group + wiki + team, data = temp) + ggsurv <- survminer::ggsurvplot( + fit, + conf.int = TRUE, + xlab = "T (Dwell Time in seconds)", + ylab = "Proportion of visits longer than T (P%)", + surv.scale = "percent", + palette = colorRampPalette(RColorBrewer::brewer.pal(9, "Set1"))(n_wiki*length(report_params$test_group_names)), + legend = "bottom", + legend.title = "Group", + ggtheme = wmf::theme_facet() + ) + p <- ggsurv$plot + + facet_grid(wiki ~ group, scales = "free_y") + + labs( + title = "How long users stay on each team's results, by wiki", + subtitle = "With 95% confidence intervals." + ) + ggsave("interleaved_survival_wiki.png", p, path = fig_path, units = "in", dpi = plot_resolution, height = 4 * n_wiki, width = fig_width) + rm(p) + + } + + rm(temp) + +} diff --git a/modules/setup.R b/modules/setup.R index fc697bf..d22a1f1 100644 --- a/modules/setup.R +++ b/modules/setup.R @@ -3,6 +3,7 @@ suppressPackageStartupMessages({ library(magrittr) library(ggplot2) + library(Rcpp) import::from( # We don't import certain verbs (e.g. distinct, left_join, bind_rows) # to avoid potential name-conflicts and because they're one-time use. diff --git a/modules/sister_search/sidebar_results.R b/modules/sister_search/sidebar_results.R index c6628fa..b78c3b9 100644 --- a/modules/sister_search/sidebar_results.R +++ b/modules/sister_search/sidebar_results.R @@ -11,7 +11,7 @@ serp_iw_breakdown_function <- function(by_wiki = FALSE, ...) { serp_iw %>% keep_where(!is.na(source), !is.na(position)) %>% - dplyr::left_join(events, by = c("session_id", "event_id", "serp_id")) %>% + dplyr::left_join(events, by = c("session_id", "event_id", "search_id")) %>% mutate(position = safe_ordinals(position)) %>% group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group", "source", "position"))) %>% summarize(counts = length(unique(event_id))) %>% @@ -34,7 +34,7 @@ p <- serp_iw %>% keep_where(!is.na(source)|!is.na(position)) %>% - dplyr::left_join(events, by = c("session_id", "event_id", "serp_id")) %>% + dplyr::left_join(events, by = c("session_id", "event_id", "search_id")) %>% group_by(group, wiki) %>% summarize(counts = length(unique(event_id))) %>% bar_chart(x = "wiki", y = "counts", x_lab = "Wiki", y_lab = "Number of SERPs", diff --git a/modules/stat_test/engagement.R b/modules/stat_test/engagement.R index 2f0f515..d66ace6 100644 --- a/modules/stat_test/engagement.R +++ b/modules/stat_test/engagement.R @@ -28,8 +28,8 @@ } # Engagement odds ratio for all -control_group <- grep("control", report_params$test_group_names, value = TRUE) -test_group <- setdiff(report_params$test_group_names, control_group) +control_group <- grep("control", traditional_test_groups, value = TRUE) +test_group <- setdiff(traditional_test_groups, control_group) for (this_group in test_group) { this_plot <- samewiki_ctr_all %>% diff --git a/modules/stat_test/remove_interleaved_data.R b/modules/stat_test/remove_interleaved_data.R new file mode 100644 index 0000000..681ea77 --- /dev/null +++ b/modules/stat_test/remove_interleaved_data.R @@ -0,0 +1,18 @@ +if (!is.null(report_params$interleaved_group_names)) { + message("Remove interleaved data...") + + events <- events[!(events$group %in% report_params$interleaved_group_names), ] + searches <- searches[!(searches$group %in% report_params$interleaved_group_names), ] + + if (all(c("visitPage", "checkin") %in% events$event)) { + visitedPages <- visitedPages[!(visitedPages$group %in% report_params$interleaved_group_names), ] + } + + if (exists("fulltext_from_auto")) { + fulltext_from_auto <- fulltext_from_auto[!(fulltext_from_auto$group %in% report_params$interleaved_group_names), ] + serp_from_auto <- serp_from_auto[!(serp_from_auto$group %in% report_params$interleaved_group_names), ] + } + +} + +traditional_test_groups <- setdiff(report_params$test_group_names, report_params$interleaved_group_names) diff --git a/modules/stat_test/return_rate.R b/modules/stat_test/return_rate.R index 01f9eff..3705c9a 100644 --- a/modules/stat_test/return_rate.R +++ b/modules/stat_test/return_rate.R @@ -1,14 +1,14 @@ return_rate_function <- function(by_wiki = FALSE, ...) { returnRate_to_same_search <- events %>% keep_where(!(event %in% c("visitPage", "checkin"))) %>% - group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group", "serp_id"))) %>% + group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group", "search_id"))) %>% keep_where(sum(grepl("click", event)) > 0) %>% # Among search with at least 1 click - arrange(group, serp_id, timestamp) %>% + arrange(group, search_id, timestamp) %>% mutate(n_click_cumsum = cumsum(grepl("click", event))) %>% keep_where(n_click_cumsum > 0) %>% # delete serp and hover before first click summarize(comeback = sum(event %in% c("searchResultPage", "hover-on", "hover-off")) > 0 | sum(n_click_cumsum > 1) > 0) %>% # comeback to the same serp or make another click or hover group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group"))) %>% - summarize(return_to_same_search = sum(comeback), n_search = length(unique(serp_id))) %>% + summarize(return_to_same_search = sum(comeback), n_search = length(unique(search_id))) %>% group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group"))) %>% dplyr::do(binom::binom.bayes(.$return_to_same_search, .$n_search, conf.level = 0.95, tol = 1e-9)) @@ -19,7 +19,7 @@ arrange(group, session_id, timestamp) %>% mutate(n_click_cumsum = cumsum(grepl("click", event))) %>% keep_where(n_click_cumsum > 0) %>% # delete serp before first click - summarize(another_search = length(unique(serp_id)) > 1) %>% # comeback to make another search + summarize(another_search = length(unique(search_id)) > 1) %>% # comeback to make another search group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group"))) %>% summarize(return_to_make_another_search = sum(another_search), n_session = length(unique(session_id))) %>% group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group"))) %>% diff --git a/modules/stat_test/serp_from_autocomplete.R b/modules/stat_test/serp_from_autocomplete.R index dfd9a71..58671ed 100644 --- a/modules/stat_test/serp_from_autocomplete.R +++ b/modules/stat_test/serp_from_autocomplete.R @@ -37,7 +37,7 @@ palette = "Set1", legend = "bottom", legend.title = "Group", - legend.labs = params$test_group_names, + legend.labs = traditional_test_groups, ggtheme = wmf::theme_min() ) p <- ggsurv$plot + @@ -59,7 +59,7 @@ xlab = "T (Dwell Time in seconds)", ylab = "Proportion of SERPs longer than T (P%)", surv.scale = "percent", - palette = colorRampPalette(RColorBrewer::brewer.pal(9, "Set1"))(n_wiki*length(report_params$test_group_names)), + palette = colorRampPalette(RColorBrewer::brewer.pal(9, "Set1"))(n_wiki*length(traditional_test_groups)), legend = "bottom", legend.title = "Group", ggtheme = wmf::theme_facet() diff --git a/modules/stat_test/serp_offset.R b/modules/stat_test/serp_offset.R index 80e8228..80820af 100644 --- a/modules/stat_test/serp_offset.R +++ b/modules/stat_test/serp_offset.R @@ -1,9 +1,9 @@ if (exists("serp_offset")) { search_offset_function <- function(by_wiki = FALSE, ...) { serp_offset %>% - group_by(session_id, serp_id) %>% + group_by(session_id, search_id) %>% summarize(`Any page-turning` = any(offset > 0)) %>% - dplyr::right_join(searches, by = c("session_id", "serp_id")) %>% + dplyr::right_join(searches, by = c("session_id", "search_id")) %>% group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group"))) %>% summarize(page_turn = sum(`Any page-turning`, na.rm = TRUE), n_search = n()) %>% ungroup %>% diff --git a/modules/stat_test/visited_page.R b/modules/stat_test/visited_page.R index 22058c6..291d215 100644 --- a/modules/stat_test/visited_page.R +++ b/modules/stat_test/visited_page.R @@ -12,7 +12,7 @@ palette = "Set1", legend = "bottom", legend.title = "Group", - legend.labs = report_params$test_group_names, + legend.labs = traditional_test_groups, ggtheme = wmf::theme_min() ) p <- ggsurv$plot + @@ -21,7 +21,7 @@ subtitle = "With 95% confidence intervals." ) ggsave("survival_visitedPages_all.png", p, path = fig_path, units = "in", dpi = plot_resolution, height = fig_height, width = fig_width) - rm(temp, p) + rm(p) scroll_function <- function(by_wiki = FALSE, ...) { visitedPages %>% @@ -41,8 +41,6 @@ if (n_wiki > 1) { # TODO: duplicated code survival_all - temp <- visitedPages - temp$SurvObj <- with(temp, survival::Surv(dwell_time, status == 2)) fit <- survival::survfit(SurvObj ~ group + wiki, data = temp) ggsurv <- survminer::ggsurvplot( fit, @@ -50,7 +48,7 @@ xlab = "T (Dwell Time in seconds)", ylab = "Proportion of visits longer than T (P%)", surv.scale = "percent", - palette = colorRampPalette(RColorBrewer::brewer.pal(9, "Set1"))(n_wiki*length(report_params$test_group_names)), + palette = colorRampPalette(RColorBrewer::brewer.pal(9, "Set1"))(n_wiki*length(traditional_test_groups)), legend = "bottom", legend.title = "Group", ggtheme = wmf::theme_facet() @@ -62,7 +60,7 @@ subtitle = "With 95% confidence intervals." ) ggsave("survival_visitedPages_wiki.png", p, path = fig_path, units = "in", dpi = plot_resolution, height = 4 * n_wiki, width = fig_width) - rm(temp, p) + rm(p) p <- scroll_function(by_wiki = TRUE) + facet_wrap(~ wiki, ncol = 3, scales = "free_y") + @@ -71,4 +69,6 @@ rm(p) } + rm(temp) + } diff --git a/modules/test_summary/browser_os.R b/modules/test_summary/browser_os.R index a784d2c..0c767ac 100644 --- a/modules/test_summary/browser_os.R +++ b/modules/test_summary/browser_os.R @@ -17,14 +17,14 @@ tally %>% mutate(prop = paste0(scales::percent_format()(n/sum(n)), " (", n, ")")) %>% select(-n) %>% - tidyr::spread(group, prop) %>% + tidyr::spread(group, prop, fill = "0% (0)") %>% ungroup } get_bayes_factor <- function(data) { BF <- data %>% tally %>% - tidyr::spread(group, n) %>% + tidyr::spread(group, n, fill = 0) %>% ungroup %>% select(dplyr::one_of(report_params$test_group_names)) %>% as.matrix() %>% diff --git a/modules/test_summary/events.R b/modules/test_summary/events.R index c4e8484..254ffc9 100644 --- a/modules/test_summary/events.R +++ b/modules/test_summary/events.R @@ -16,7 +16,9 @@ mutate(event = factor(event, levels =c("visitPage", "checkin"))) %>% # Order the bars group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group", "event"))) %>% tally %>% - bar_chart(x = "event", y = "n", x_lab = "Event type", y_lab = "Number of events", title = paste("Number of events on articles after clickthrough, by test group", switch(by_wiki, "and wiki", NULL))) + bar_chart(x = "event", y = "n", x_lab = "Event type", y_lab = "Number of events", + title = paste("Number of events on articles after clickthrough, by test group", switch(by_wiki, "and wiki", NULL)), + subtitle = "Only the last check-in events are kept.") } p <- event_after_click_function() + wmf::theme_min() ggsave("event_after_click_all.png", p, path = fig_path, units = "in", dpi = plot_resolution, height = fig_height, width = fig_width) diff --git a/modules/test_summary/searches.R b/modules/test_summary/searches.R index a1b0183..15b5fe2 100644 --- a/modules/test_summary/searches.R +++ b/modules/test_summary/searches.R @@ -25,7 +25,7 @@ keep_where(event == "searchResultPage") %>% mutate(results = if_else(n_results >= 5, "5+ results", Pluralize(n_results, "result"))) %>% group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group", "results"))) %>% - summarize(searches = length(unique(serp_id[!is.na(serp_id)]))) %>% + summarize(searches = length(unique(search_id[!is.na(search_id)]))) %>% bar_chart(x = "results", y = "searches", x_lab = "Number of same-wiki results returned", y_lab = "Number of searches", title = paste("Number of searches with n same-wiki result returned, by test group", switch(by_wiki, "and wiki", NULL))) } @@ -51,7 +51,7 @@ offset >= 100 ~ "100+ results", TRUE ~ Pluralize(offset, "result") )) %>% - dplyr::left_join(events, by = c("session_id", "event_id", "serp_id")) %>% + dplyr::left_join(events, by = c("session_id", "event_id", "search_id")) %>% group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group", "offset"))) %>% tally %>% bar_chart(x = "offset", y = "n", x_lab = "Offset", y_lab = "Number of SERPs", diff --git a/report.Rmd b/report.Rmd index 50e4f09..1efdef5 100644 --- a/report.Rmd +++ b/report.Rmd @@ -16,6 +16,7 @@ tss2_revision: 16909631 wiki: enwiki test_group_names: [explore_similar_control, explore_similar_test] + interleaved_group_names: [ltr-i-1024] event_action: [searchResultPage, click, ssclick, visitPage, checkin, hover-on, hover-off, esclick] event_source: "fulltext" other_filter: "event_subTest IS NOT NULL" @@ -110,7 +111,7 @@ SERPs = sum(events$event == "searchResultPage"), `Unique search queries` = length(unique(paste(events$session_id, events$query_hash))), # ^ includes other events besides SERP - Searches = length(unique(events$serp_id[!is.na(events$serp_id)])), + Searches = length(unique(events$search_id[!is.na(events$search_id)])), `Same-wiki clicks` = sum(events$event == "click"), `Other clicks` = sum(grepl("click", events$event) & events$event != "click") ) %>% @@ -250,9 +251,6 @@ target = 'row', backgroundColor = DT::styleInterval(2, c('transparent', 'yellow')) ) -``` -```{r ua_cleanup, eval=("user_agent" %in% names(events))} -rm(os_summary, browser_summary, os_summary_counts_cols_index, browser_summary_counts_cols_index) ``` `r if (any(exists("serp_iw"), exists("ssclick_des"), "ssclick" %in% events$event, "iwclick" %in% events$event)) { "### Sister Search {.tabset}" }` @@ -492,3 +490,35 @@ ```{r serp_loadtime_wiki, eval=(n_wiki > 1), results='asis'} cat("![](reports/figures/serp_loadtime_wiki.png)") ``` + +`r if (!is.null(report_params$interleaved_group_names)) { "## Interleaved test" }` + +`r if (!is.null(report_params$interleaved_group_names)) { "We use a technique called interleaving to evaluate the user-perceived relevance of search results from the experimental configuration. In it, each user is their own baseline -- we perform two searches behind the scenes and then interleave them together into a single set of results using the team draft algorithm described by Chapelle et al. (2012). For all the graphs in this section, A refers to control group or the first group in the interleaved group names, B referes to the name in the interleaved group names or the second group in the interleaved group names. For example, if the interleaved group name is 'ltr-i-1024', A is the control group and B is group '1024'; if the interleaved group name is 'ltr-i-20-1024', A is group '20' and B is group '1024'." }` + +`r if (!is.null(report_params$interleaved_group_names)) { "### Preference" }` + +```{r interleaved_pref_overall, eval=(!is.null(params$interleaved_group_names)), results='asis'} +cat("![](reports/figures/interleaved_pref_overall.png)") +``` + +```{r interleaved_pref_overall_wiki, eval=(!is.null(params$interleaved_group_names) & n_wiki > 1), results='asis'} +cat("![](reports/figures/interleaved_pref_overall_wiki.png)") +``` + +```{r interleaved_pref_daily, eval=(!is.null(params$interleaved_group_names)), results='asis'} +cat("![](reports/figures/interleaved_pref_daily.png)") +``` + +```{r interleaved_pref_daily_wiki, eval=(!is.null(params$interleaved_group_names) & n_wiki > 1), results='asis'} +cat("![](reports/figures/interleaved_pref_daily_wiki.png)") +``` + +`r if (!is.null(report_params$interleaved_group_names)) { "### Page visit times" }` + +```{r interleaved_survival_all, eval=(!is.null(params$interleaved_group_names)), results='asis'} +cat("![](reports/figures/interleaved_survival_all.png)") +``` + +```{r interleaved_survival_wiki, eval=(!is.null(params$interleaved_group_names) & n_wiki > 1), results='asis'} +cat("![](reports/figures/interleaved_survival_wiki.png)") +``` diff --git a/run.R b/run.R index 526098b..6bf76bc 100644 --- a/run.R +++ b/run.R @@ -31,6 +31,7 @@ opt <- parse_args(OptionParser(option_list = option_list)) # Set up +# report_params <- yaml::yaml.load_file("reports/test.yaml") report_params <- yaml::yaml.load_file(opt$yaml_file) if (!dir.exists("reports")) { dir.create("reports") @@ -59,7 +60,13 @@ source("modules/explore_similar/esclicks.R") source("modules/explore_similar/hover_over.R") -# Statistical test +# Interleaved test +source("modules/interleaved_test/data_processing.R") +source("modules/interleaved_test/interleaved_preference.R") +source("modules/interleaved_test/page_dwelltime.R") + +# Statistical test (traditional) +source("modules/stat_test/remove_interleaved_data.R") source("modules/stat_test/zrr.R") source("modules/stat_test/engagement.R") source("modules/stat_test/first_clicked.R") @@ -73,8 +80,8 @@ source("modules/stat_test/serp_load_time.R") # Render report -# rmarkdown::render("report.Rmd", output_file = output_report_name, params = report_params) -rmarkdown::render("report.Rmd", output_file = "reports/test.html", params = report_params) +rmarkdown::render("report.Rmd", output_file = output_report_name, params = report_params) +# rmarkdown::render("report.Rmd", output_file = "reports/test.html", params = report_params) if (!report_params$debug) { unlink(file.path("reports", "figures"), recursive = TRUE) # remove the temp figures directory } -- To view, visit https://gerrit.wikimedia.org/r/392102 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I795023856963030e67e85e1cde7352842aa3a7a8 Gerrit-PatchSet: 1 Gerrit-Project: wikimedia/discovery/autoreporter Gerrit-Branch: master Gerrit-Owner: Chelsyx <c...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits