Chelsyx has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/392102 )

Change subject: Add interleaved test analysis
......................................................................

Add interleaved test analysis

Bug: T176493
Change-Id: I795023856963030e67e85e1cde7352842aa3a7a8
---
M functions.R
M modules/data/data_aggregation.R
M modules/data/data_cleansing.R
M modules/data/fetch_data.R
A modules/interleaved_test/data_processing.R
A modules/interleaved_test/interleaved_preference.R
A modules/interleaved_test/page_dwelltime.R
M modules/setup.R
M modules/sister_search/sidebar_results.R
M modules/stat_test/engagement.R
A modules/stat_test/remove_interleaved_data.R
M modules/stat_test/return_rate.R
M modules/stat_test/serp_from_autocomplete.R
M modules/stat_test/serp_offset.R
M modules/stat_test/visited_page.R
M modules/test_summary/browser_os.R
M modules/test_summary/events.R
M modules/test_summary/searches.R
M report.Rmd
M run.R
20 files changed, 388 insertions(+), 65 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/autoreporter 
refs/changes/02/392102/1

diff --git a/functions.R b/functions.R
index a4c6367..a7dff7e 100644
--- a/functions.R
+++ b/functions.R
@@ -79,3 +79,59 @@
     ggplot2::scale_color_brewer(palette = "Set1") +
     ggplot2::labs(x = NULL, color = "Group", y = y_lab, title = title, 
subtitle = subtitle)
 }
+
+cppFunction('CharacterVector fill_in(CharacterVector ids) {
+  CharacterVector new_ids(ids.size());
+  String current_id = ids[0];
+  new_ids[0] = current_id;
+  for (int i = 1; i < ids.size(); i++) {
+    if (ids[i] != NA_STRING) {
+      current_id = ids[i];
+    }
+    new_ids[i] = current_id;
+  }
+  return new_ids;
+}')
+
+cppFunction('NumericVector cumunique(CharacterVector ids) {
+  NumericVector count(ids.size());
+  String current_id = ids[0];
+  count[0] = 1;
+  for (int i = 1; i < ids.size(); i++) {
+    if (ids[i] == current_id) {
+      count[i] = count[i-1];
+    } else {
+      count[i] = count[i-1] + 1;
+      current_id = ids[i];
+    }
+  }
+  return count;
+}')
+
+# Process interleaved team draft
+process_session <- function(df) {
+  processed_session <- unsplit(lapply(split(df, df$serp_id), function(df) {
+    if (is.na(df$event_extraParams[1]) || df$event_extraParams[1] == "") {
+      visited_pages <- rep(as.character(NA), times = nrow(df))
+    } else {
+      from_json <- jsonlite::fromJSON(df$event_extraParams[1], simplifyVector 
= FALSE)
+      if (!("teamDraft" %in% names(from_json)) || all(is.na(df$article_id))) {
+        visited_pages <- rep(as.character(NA), times = nrow(df))
+      } else {
+        team_a <- unlist(from_json$teamDraft$a)
+        team_b <- unlist(from_json$teamDraft$b)
+        visited_pages <- vapply(df$article_id, function(article_id) {
+          if (article_id %in% team_a) {
+            return("A")
+          } else if (article_id %in% team_b) {
+            return("B")
+          } else {
+            return(as.character(NA))
+          }
+        }, "")
+      }
+    }
+    return(visited_pages)
+  }), df$serp_id)
+  return(processed_session)
+}
diff --git a/modules/data/data_aggregation.R b/modules/data/data_aggregation.R
index db5a178..6115703 100644
--- a/modules/data/data_aggregation.R
+++ b/modules/data/data_aggregation.R
@@ -8,9 +8,9 @@
 
 message("Aggregating by search...")
 searches <- events %>%
-  keep_where(!(is.na(serp_id))) %>% # remove visitPage and checkin events
-  arrange(date, session_id, serp_id, timestamp) %>%
-  group_by(group, wiki, session_id, serp_id) %>%
+  keep_where(!(is.na(search_id))) %>% # remove visitPage and checkin events
+  arrange(date, session_id, search_id, timestamp) %>%
+  group_by(group, wiki, session_id, search_id) %>%
   summarize(
     date = date[1],
     timestamp = timestamp[1],
@@ -56,19 +56,19 @@
     keep_where(event == "searchResultPage", `some same-wiki results` == 
"TRUE") %>%
     # SERPs with 0 results will not have an offset in extraParams ^
     mutate(offset = purrr::map_int(event_extraParams, ~ parse_extraParams(.x, 
action = "searchResultPage")$offset)) %>%
-    select(session_id, event_id, serp_id, offset)
+    select(session_id, event_id, search_id, offset)
 
   message("Processing SERP interwiki data...")
-  extract_iw <- function(session_id, event_id, serp_id, event_extraParams) {
+  extract_iw <- function(session_id, event_id, search_id, event_extraParams) {
     return(data.frame(
-      session_id, event_id, serp_id,
+      session_id, event_id, search_id,
       parse_extraParams(event_extraParams, action = "searchResultPage")$iw,
       stringsAsFactors = FALSE
     ))
   }
   serp_iw <- events %>%
     keep_where(event == "searchResultPage") %>%
-    select(session_id, event_id, serp_id, event_extraParams) %>%
+    select(session_id, event_id, search_id, event_extraParams) %>%
     purrr::pmap_df(extract_iw) %>%
     mutate(source = case_when(
       source == "wikt" ~ "Wiktionary",
@@ -104,12 +104,12 @@
   esclick_result <- events %>%
     keep_where(event == "esclick") %>%
     cbind(purrr::map_df(.$event_extraParams, parse_extraParams, action = 
"esclick")) %>%
-    select(group, wiki, session_id, serp_id, page_id, event_id, hoverId, 
section, result, position)
+    select(group, wiki, session_id, search_id, page_id, event_id, hoverId, 
section, result, position)
 
   searches <- esclick_result %>%
-    group_by(group, wiki, session_id, serp_id) %>%
+    group_by(group, wiki, session_id, search_id) %>%
     summarize(with_esclick = TRUE) %>%
-    dplyr::right_join(searches, by = c("group", "wiki", "session_id", 
"serp_id")) %>%
+    dplyr::right_join(searches, by = c("group", "wiki", "session_id", 
"search_id")) %>%
     dplyr::mutate(with_esclick = ifelse(is.na(with_esclick), FALSE, 
with_esclick))
 }
 
@@ -118,12 +118,12 @@
   hover_over <- events %>%
     keep_where(event %in% c("hover-on", "hover-off")) %>%
     cbind(purrr::map_df(.$event_extraParams, parse_extraParams, action = 
c("hover-on", "hover-off"))) %>%
-    select(timestamp, group, wiki, session_id, serp_id, page_id, event_id, 
event, hoverId, section, results)
+    select(timestamp, group, wiki, session_id, search_id, page_id, event_id, 
event, hoverId, section, results)
 
   searches <- hover_over %>%
     keep_where(event == "hover-on") %>%
-    group_by(group, wiki, session_id, serp_id) %>%
+    group_by(group, wiki, session_id, search_id) %>%
     summarize(n_hover = length(unique(hoverId))) %>%
-    dplyr::right_join(searches, by = c("group", "wiki", "session_id", 
"serp_id")) %>%
+    dplyr::right_join(searches, by = c("group", "wiki", "session_id", 
"search_id")) %>%
     dplyr::mutate(n_hover = ifelse(is.na(n_hover), 0, n_hover))
 }
diff --git a/modules/data/data_cleansing.R b/modules/data/data_cleansing.R
index 5fd5d38..c59b798 100644
--- a/modules/data/data_cleansing.R
+++ b/modules/data/data_cleansing.R
@@ -17,6 +17,13 @@
 data_cleansing_info <- paste0("Fulltext search events: Deleted ", 
nrow(events_raw) - nrow(events), " duplicated events.")
 rm(events_raw) # to free up memory
 
+message("Removing unnecessary check-ins...")
+events <- events[order(events$group, events$session_id, events$page_id, 
events$article_id, events$event, events$event_checkin, na.last = FALSE), ]
+extra_checkins <- duplicated(events[, c("group", "session_id", "page_id", 
"article_id", "event")], fromLast = TRUE) & events$event == "checkin"
+data_cleansing_info <- paste0(data_cleansing_info, " Deleted ", 
sum(extra_checkins), " unnecessary check-in events and only keep the last one.")
+events <- events[!extra_checkins, ]
+rm(extra_checkins)
+
 message("Delete events with negative load time...")
 data_cleansing_info <- paste0(data_cleansing_info, " Deleted ", 
sum(events$load_time < 0, na.rm = TRUE), " events with negative load time.")
 events <- events %>%
@@ -25,19 +32,20 @@
 message("De-duplicating SERPs...")
 SERPs <- events %>%
   keep_where(event == "searchResultPage") %>%
-  select(c(session_id, page_id, query_hash, search_token)) %>%
-  group_by(session_id, query_hash) %>%
-  mutate(serp_id = page_id[1], cirrus_id = search_token[1]) %>%
+  dplyr::arrange(wiki, group, session_id, timestamp) %>%
+  select(c(group, session_id, page_id, query_hash)) %>%
+  group_by(group, session_id, query_hash) %>%
+  mutate(search_id = page_id[1]) %>%
   ungroup %>%
-  select(c(page_id, serp_id, cirrus_id))
+  select(c(group, session_id, page_id, search_id))
 events <- events %>%
-  dplyr::left_join(SERPs, by = "page_id")
+  dplyr::left_join(SERPs, by = c("group", "session_id", "page_id"))
 rm(SERPs) # to free up memory
 
 message("Removing events without an associated SERP (orphan clicks and 
check-ins)...")
 n_evnt <- nrow(events)
 events <- events %>%
-  keep_where(!(is.na(serp_id) & !(event %in% c("visitPage", "checkin")))) %>% 
# remove orphan click
+  keep_where(!(is.na(search_id) & !(event %in% c("visitPage", "checkin")))) 
%>% # remove orphan click
   group_by(session_id) %>%
   keep_where("searchResultPage" %in% event) %>% # remove orphan "visitPage" 
and "checkin"
   ungroup
@@ -56,7 +64,7 @@
 message("Remove sessions with more than 100 searches...")
 spider_session <- events %>%
   group_by(date, group, session_id) %>%
-  summarize(n_search = length(unique(serp_id))) %>%
+  summarize(n_search = length(unique(search_id))) %>%
   keep_where(n_search > 100) %>%
   {.$session_id}
 events <- events %>%
@@ -64,6 +72,13 @@
 data_cleansing_info <- paste0(data_cleansing_info, " Removed ", 
length(spider_session), " sessions with more than 100 searches.")
 rm(spider_session)
 
+message("Fill in serp_id for visitedPage and checkin events...")
+events %<>%
+  mutate(serp_id = ifelse(event %in% c("visitPage", "checkin"), NA, page_id),
+         event = factor(event, levels =c("searchResultPage", "click", 
"ssclick", "iwclick", "hover-on", "esclick", "hover-off", "visitPage", 
"checkin"))) %>%
+  dplyr::arrange(wiki, group, session_id, timestamp, event) %>%
+  mutate(serp_id = fill_in(serp_id))
+
 # Number of wikis in the test
 n_wiki <- length(unique(events$wiki))
 message("Number of wikis in the test: ", n_wiki)
diff --git a/modules/data/fetch_data.R b/modules/data/fetch_data.R
index 490aeff..d95f687 100644
--- a/modules/data/fetch_data.R
+++ b/modules/data/fetch_data.R
@@ -3,6 +3,7 @@
     event_uniqueId AS event_id,
     event_mwSessionId,
     event_pageViewId AS page_id,
+    event_articleId AS article_id,
     event_searchSessionId AS session_id,
     event_subTest AS `group`,
     wiki,
@@ -26,7 +27,6 @@
     event_checkin,
     event_extraParams,
     event_msToDisplayResults AS load_time,
-    event_searchToken AS search_token,
     userAgent AS user_agent
   FROM TestSearchSatisfaction2_", report_params$tss2_revision, "\n",
   "WHERE LEFT(timestamp, 8) >= '", report_params$start_date, "' AND 
LEFT(timestamp, 8) < '", report_params$end_date, "' \n",
@@ -35,12 +35,13 @@
   ifelse(is.null(report_params$event_action), "", paste0("  AND event_action 
IN('", paste(report_params$event_action, collapse = "', '"), "') \n")),
   ifelse(is.null(report_params$event_source), "", paste0("  AND event_source 
IN('", paste(report_params$event_source, collapse = "', '"), "') \n")),
   ifelse(is.null(report_params$other_filter), "", paste0("  AND ", 
report_params$other_filter, " \n")),
-  "  AND CASE WHEN event_action = 'searchResultPage' THEN 
event_msToDisplayResults IS NOT NULL
-  WHEN event_action IN ('click', 'iwclick', 'ssclick') THEN event_position IS 
NOT NULL AND event_position > -1
-  WHEN event_action = 'visitPage' THEN event_pageViewId IS NOT NULL
-  WHEN event_action = 'checkin' THEN event_checkin IS NOT NULL AND 
event_pageViewId IS NOT NULL
-  ELSE TRUE
-  END;"
+  " AND INSTR(userAgent, '\"is_bot\": false') > 0
+    AND CASE WHEN event_action = 'searchResultPage' THEN 
event_msToDisplayResults IS NOT NULL
+      WHEN event_action IN ('click', 'iwclick', 'ssclick') THEN event_position 
IS NOT NULL AND event_position > -1
+      WHEN event_action = 'visitPage' THEN event_pageViewId IS NOT NULL
+      WHEN event_action = 'checkin' THEN event_checkin IS NOT NULL AND 
event_pageViewId IS NOT NULL
+      ELSE TRUE
+      END;"
 )
 
 # Fetch full-text search from autocomplete search to compute dwell time on SERP
@@ -63,14 +64,14 @@
   ifelse(is.null(report_params$test_group_names), "", paste0("  AND 
event_subTest IN('", paste(report_params$test_group_names, collapse = "', '"), 
"') \n")),
   ifelse(is.null(report_params$other_filter), "", paste0("  AND ", 
report_params$other_filter, " \n")),
   " AND event_source = 'autocomplete'
-  AND event_articleId IS NULL
-  AND event_action IN('visitPage', 'checkin')
-  AND CASE WHEN event_action = 'searchResultPage' THEN 
event_msToDisplayResults IS NOT NULL
-  WHEN event_action IN ('click', 'iwclick', 'ssclick') THEN event_position IS 
NOT NULL AND event_position > -1
-  WHEN event_action = 'visitPage' THEN event_pageViewId IS NOT NULL
-  WHEN event_action = 'checkin' THEN event_checkin IS NOT NULL AND 
event_pageViewId IS NOT NULL
-  ELSE TRUE
-  END;"
+    AND event_articleId IS NULL
+    AND event_action IN('visitPage', 'checkin')
+    AND CASE WHEN event_action = 'searchResultPage' THEN 
event_msToDisplayResults IS NOT NULL
+      WHEN event_action IN ('click', 'iwclick', 'ssclick') THEN event_position 
IS NOT NULL AND event_position > -1
+      WHEN event_action = 'visitPage' THEN event_pageViewId IS NOT NULL
+      WHEN event_action = 'checkin' THEN event_checkin IS NOT NULL AND 
event_pageViewId IS NOT NULL
+      ELSE TRUE
+      END;"
 )
 
 if (!is.null(report_params$query)) {
diff --git a/modules/interleaved_test/data_processing.R 
b/modules/interleaved_test/data_processing.R
new file mode 100644
index 0000000..c1bef42
--- /dev/null
+++ b/modules/interleaved_test/data_processing.R
@@ -0,0 +1,26 @@
+if (!is.null(report_params$interleaved_group_names)) {
+
+  message("Separate interleaved dataset")
+  events_interleaved <- events[events$group %in% 
report_params$interleaved_group_names, ]
+  n_interleaved_groups <- length(report_params$interleaved_group_names)
+
+  message("Extracting team draft data so we know which visited result is 
which")
+  events_interleaved %<>%
+    keep_where(events_interleaved$event %in% c('searchResultPage', 'click', 
'visitPage', 'checkin')) %>%
+    dplyr::arrange(wiki, group, session_id, timestamp, event) %>%
+    mutate(search_id = fill_in(search_id))
+  events_interleaved <- data.table::data.table(events_interleaved)
+  events_interleaved[, team := process_session(.SD), by = c("wiki", "group", 
"session_id"), .SDcols = c("serp_id", "event_extraParams", "article_id")]
+  events_interleaved <- events_interleaved[order(events_interleaved$wiki, 
events_interleaved$group, events_interleaved$session_id, 
events_interleaved$serp_id, events_interleaved$timestamp, 
events_interleaved$event), ]
+  events_interleaved[, event_extraParams := NULL, ]
+
+  message("Remove sessions with large numbers of searches")
+  events_interleaved[, valid := length(unique(search_id)) <= 20, by = 
c("date", "wiki", "group", "session_id")]
+  interleaved_visitpage <- events_interleaved[
+    !is.na(team) & team != "" & event == "visitPage" & valid == TRUE,
+    c("date", "wiki", "group", "session_id", "search_id", "team"),
+    with = TRUE
+    ]
+  interleaved_visitpage <- 
interleaved_visitpage[order(interleaved_visitpage$date, 
interleaved_visitpage$wiki, interleaved_visitpage$group, 
interleaved_visitpage$session_id, interleaved_visitpage$search_id, 
interleaved_visitpage$team), ]
+
+}
diff --git a/modules/interleaved_test/interleaved_preference.R 
b/modules/interleaved_test/interleaved_preference.R
new file mode 100644
index 0000000..5318fa8
--- /dev/null
+++ b/modules/interleaved_test/interleaved_preference.R
@@ -0,0 +1,105 @@
+if (!is.null(report_params$interleaved_group_names)) {
+
+  # Overall preference
+  interleaved_pref_overall_function <- function(by_wiki = FALSE, ...) {
+    interleaved_pref_overall <- rbind(
+      interleaved_visitpage[, j = list(
+        "observed" = wmf::interleaved_preference(paste(.SD$session_id), 
.SD$team),
+        "upper" = wmf::interleaved_confint(paste(.SD$session_id), 
.SD$team)$upper,
+        "lower" = wmf::interleaved_confint(paste(.SD$session_id), 
.SD$team)$lower,
+        "method" = "Sampling Sessions"
+      ), by = c(switch(by_wiki, "wiki", NULL), "group")],
+      interleaved_visitpage[, j = list(
+        "observed" = wmf::interleaved_preference(paste(.SD$session_id, 
.SD$search_id), .SD$team),
+        "upper" = wmf::interleaved_confint(paste(.SD$session_id, 
.SD$search_id), .SD$team)$upper,
+        "lower" = wmf::interleaved_confint(paste(.SD$session_id, 
.SD$search_id), .SD$team)$lower,
+        "method" = "Sampling Searches"
+      ), by = c(switch(by_wiki, "wiki", NULL), "group")]
+    )
+    ggplot(
+      mutate(interleaved_pref_overall, method = sub("\\s", "\n", method)),
+      aes(x = method, y = observed)
+    ) +
+      geom_hline(yintercept = 0, linetype = "dashed") +
+      geom_linerange(aes(ymin = lower, ymax = upper)) +
+      geom_label(aes(label = sprintf("%.4f", observed))) +
+      labs(
+        x = "Bootstrap approach", y = "B ← Preference → A",
+        title = paste("Preference for results from two rankers, by group", 
switch(by_wiki, "and wiki", NULL)),
+        caption = "95% confidence intervals were bootstrapped using two 
different sampling approaches"
+      )
+  }
+  p <- interleaved_pref_overall_function() +
+    facet_wrap(~ group) +
+    wmf::theme_facet()
+  ggsave("interleaved_pref_overall.png", p, path = fig_path, units = "in", dpi 
= plot_resolution, height = fig_height, width = fig_width)
+  rm(p)
+
+  # Daily preference
+  interleaved_pref_daily_function <- function(by_wiki = FALSE, ...) {
+    interleaved_pref_daily <- rbind(
+      interleaved_visitpage[, j = list(
+        "observed" = wmf::interleaved_preference(paste(.SD$session_id), 
.SD$team),
+        "upper" = wmf::interleaved_confint(paste(.SD$session_id), 
.SD$team)$upper,
+        "lower" = wmf::interleaved_confint(paste(.SD$session_id), 
.SD$team)$lower,
+        "method" = "Sampling Sessions"
+      ), by = c("date", switch(by_wiki, "wiki", NULL), "group")],
+      interleaved_visitpage[, j = list(
+        "observed" = wmf::interleaved_preference(paste(.SD$session_id, 
.SD$search_id), .SD$team),
+        "upper" = wmf::interleaved_confint(paste(.SD$session_id, 
.SD$search_id), .SD$team)$upper,
+        "lower" = wmf::interleaved_confint(paste(.SD$session_id, 
.SD$search_id), .SD$team)$lower,
+        "method" = "Sampling Searches"
+      ), by = c("date", switch(by_wiki, "wiki", NULL), "group")]
+      ) %>%
+      mutate(preferred = dplyr::if_else(observed > 0, "A", "B")) %>%
+      group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group", 
"method", "preferred"))) %>%
+      arrange(date) %>%
+      mutate(counter = cumsum(!is.na(date))) %>%
+      ungroup
+
+    ggplot(keep_where(interleaved_pref_daily, !is.na(observed)), aes(x = date, 
y = observed)) +
+      geom_hline(yintercept = 0, linetype = "dashed") +
+      geom_ribbon(aes(ymin = lower, ymax = upper), alpha = 0.25) +
+      geom_line() +
+      geom_segment(
+        aes(xend = date, yend = ifelse(preferred == "A", 0.275, -0.275), color 
= preferred),
+        linetype = "dotted"
+      ) +
+      geom_point(aes(color = preferred)) +
+      geom_text(
+        aes(y = ifelse(preferred == "A", 0.3, -0.3), label = counter, color = 
preferred),
+        show.legend = FALSE, fontface = "bold"
+      ) +
+      scale_color_brewer(palette = "Set1") +
+      labs(
+        x = "Date", y = "B ← Preference → A",
+        title = paste("Preference for results from two rankers, daily by 
group", switch(by_wiki, "and wiki", NULL)),
+        subtitle = "Showing counts of how many times users preferred one 
ranking over the other",
+        caption = "95% confidence intervals were bootstrapped using two 
different sampling approaches"
+      )
+  }
+  p <- interleaved_pref_daily_function() +
+    facet_grid(group ~ method, scales = "free_y") +
+    wmf::theme_facet()
+  ggsave("interleaved_pref_daily.png", p, path = fig_path, units = "in", dpi = 
plot_resolution, height = ifelse(n_interleaved_groups < 2, fig_height, 
3*n_interleaved_groups), width = fig_width)
+  rm(p)
+
+  # By wiki
+  if (n_wiki > 1) {
+
+    p <- interleaved_pref_overall_function(by_wiki = TRUE) +
+      facet_grid(wiki ~ group, scales = "free_y") +
+      wmf::theme_facet()
+    ggsave("interleaved_pref_overall_wiki.png", p, path = fig_path, units = 
"in", dpi = plot_resolution, height = 3 * n_wiki, width = fig_width)
+    rm(p)
+
+    p <- interleaved_pref_daily_function(by_wiki = TRUE) +
+      facet_grid(wiki + group ~ method, scales = "free_y") +
+      wmf::theme_facet()
+    ggsave("interleaved_pref_daily_wiki.png", p, path = fig_path, units = 
"in", dpi = plot_resolution, height = 3*n_interleaved_groups*n_wiki, width = 
fig_width)
+    rm(p)
+
+  }
+
+  rm(interleaved_visitpage)
+}
diff --git a/modules/interleaved_test/page_dwelltime.R 
b/modules/interleaved_test/page_dwelltime.R
new file mode 100644
index 0000000..d48ef08
--- /dev/null
+++ b/modules/interleaved_test/page_dwelltime.R
@@ -0,0 +1,62 @@
+if (!is.null(report_params$interleaved_group_names)) {
+
+  temp <- dplyr::inner_join(
+    visitedPages[visitedPages$group %in% 
report_params$interleaved_group_names, ],
+    events_interleaved[
+      !is.na(team) & team != "" & event %in% c("visitPage", "checkin") & valid 
== TRUE,
+      c("wiki", "group", "session_id", "page_id", "team"),
+      with = TRUE
+      ],
+    by = c("wiki", "group", "session_id", "page_id")
+    )
+  temp$SurvObj <- with(temp, survival::Surv(dwell_time, status == 2))
+
+  fit <- survival::survfit(SurvObj ~ group + team, data = temp)
+  ggsurv <- survminer::ggsurvplot(
+    fit,
+    conf.int = TRUE,
+    xlab = "T (Dwell Time in seconds)",
+    ylab = "Proportion of visits longer than T (P%)",
+    surv.scale = "percent",
+    palette = "Dark2",
+    legend = "bottom",
+    legend.title = "Group",
+    ggtheme = wmf::theme_facet()
+  )
+  p <- ggsurv$plot +
+    facet_wrap(~ group, scales = "free_y") +
+    labs(
+      title = "How long users stay on each team's results",
+      subtitle = "With 95% confidence intervals."
+    )
+  ggsave("interleaved_survival_all.png", p, path = fig_path, units = "in", dpi 
= plot_resolution, height = fig_height, width = fig_width)
+  rm(p)
+
+  if (n_wiki > 1) {
+
+    fit <- survival::survfit(SurvObj ~ group + wiki + team, data = temp)
+    ggsurv <- survminer::ggsurvplot(
+      fit,
+      conf.int = TRUE,
+      xlab = "T (Dwell Time in seconds)",
+      ylab = "Proportion of visits longer than T (P%)",
+      surv.scale = "percent",
+      palette = colorRampPalette(RColorBrewer::brewer.pal(9, 
"Set1"))(n_wiki*length(report_params$test_group_names)),
+      legend = "bottom",
+      legend.title = "Group",
+      ggtheme = wmf::theme_facet()
+    )
+    p <- ggsurv$plot +
+      facet_grid(wiki ~ group, scales = "free_y") +
+      labs(
+        title = "How long users stay on each team's results, by wiki",
+        subtitle = "With 95% confidence intervals."
+      )
+    ggsave("interleaved_survival_wiki.png", p, path = fig_path, units = "in", 
dpi = plot_resolution, height = 4 * n_wiki, width = fig_width)
+    rm(p)
+
+  }
+
+  rm(temp)
+
+}
diff --git a/modules/setup.R b/modules/setup.R
index fc697bf..d22a1f1 100644
--- a/modules/setup.R
+++ b/modules/setup.R
@@ -3,6 +3,7 @@
 suppressPackageStartupMessages({
   library(magrittr)
   library(ggplot2)
+  library(Rcpp)
   import::from(
     # We don't import certain verbs (e.g. distinct, left_join, bind_rows)
     # to avoid potential name-conflicts and because they're one-time use.
diff --git a/modules/sister_search/sidebar_results.R 
b/modules/sister_search/sidebar_results.R
index c6628fa..b78c3b9 100644
--- a/modules/sister_search/sidebar_results.R
+++ b/modules/sister_search/sidebar_results.R
@@ -11,7 +11,7 @@
   serp_iw_breakdown_function <- function(by_wiki = FALSE, ...) {
     serp_iw %>%
       keep_where(!is.na(source), !is.na(position)) %>%
-      dplyr::left_join(events, by = c("session_id", "event_id", "serp_id")) %>%
+      dplyr::left_join(events, by = c("session_id", "event_id", "search_id")) 
%>%
       mutate(position = safe_ordinals(position)) %>%
       group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group", 
"source", "position"))) %>%
       summarize(counts = length(unique(event_id))) %>%
@@ -34,7 +34,7 @@
 
   p <- serp_iw %>%
     keep_where(!is.na(source)|!is.na(position)) %>%
-    dplyr::left_join(events, by = c("session_id", "event_id", "serp_id")) %>%
+    dplyr::left_join(events, by = c("session_id", "event_id", "search_id")) %>%
     group_by(group, wiki) %>%
     summarize(counts = length(unique(event_id))) %>%
     bar_chart(x = "wiki", y = "counts", x_lab = "Wiki", y_lab = "Number of 
SERPs",
diff --git a/modules/stat_test/engagement.R b/modules/stat_test/engagement.R
index 2f0f515..d66ace6 100644
--- a/modules/stat_test/engagement.R
+++ b/modules/stat_test/engagement.R
@@ -28,8 +28,8 @@
 }
 
 # Engagement odds ratio for all
-control_group <- grep("control", report_params$test_group_names, value = TRUE)
-test_group <- setdiff(report_params$test_group_names, control_group)
+control_group <- grep("control", traditional_test_groups, value = TRUE)
+test_group <- setdiff(traditional_test_groups, control_group)
 
 for (this_group in test_group) {
   this_plot <- samewiki_ctr_all %>%
diff --git a/modules/stat_test/remove_interleaved_data.R 
b/modules/stat_test/remove_interleaved_data.R
new file mode 100644
index 0000000..681ea77
--- /dev/null
+++ b/modules/stat_test/remove_interleaved_data.R
@@ -0,0 +1,18 @@
+if (!is.null(report_params$interleaved_group_names)) {
+  message("Remove interleaved data...")
+
+  events <- events[!(events$group %in% report_params$interleaved_group_names), 
]
+  searches <- searches[!(searches$group %in% 
report_params$interleaved_group_names), ]
+
+   if (all(c("visitPage", "checkin") %in% events$event)) {
+    visitedPages <- visitedPages[!(visitedPages$group %in% 
report_params$interleaved_group_names), ]
+   }
+
+  if (exists("fulltext_from_auto")) {
+    fulltext_from_auto <- fulltext_from_auto[!(fulltext_from_auto$group %in% 
report_params$interleaved_group_names), ]
+    serp_from_auto <- serp_from_auto[!(serp_from_auto$group %in% 
report_params$interleaved_group_names), ]
+  }
+
+}
+
+traditional_test_groups <- setdiff(report_params$test_group_names, 
report_params$interleaved_group_names)
diff --git a/modules/stat_test/return_rate.R b/modules/stat_test/return_rate.R
index 01f9eff..3705c9a 100644
--- a/modules/stat_test/return_rate.R
+++ b/modules/stat_test/return_rate.R
@@ -1,14 +1,14 @@
 return_rate_function <- function(by_wiki = FALSE, ...) {
   returnRate_to_same_search <- events %>%
     keep_where(!(event %in% c("visitPage", "checkin"))) %>%
-    group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group", 
"serp_id"))) %>%
+    group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group", 
"search_id"))) %>%
     keep_where(sum(grepl("click", event)) > 0) %>% # Among search with at 
least 1 click
-    arrange(group, serp_id, timestamp) %>%
+    arrange(group, search_id, timestamp) %>%
     mutate(n_click_cumsum = cumsum(grepl("click", event))) %>%
     keep_where(n_click_cumsum > 0) %>% # delete serp and hover before first 
click
     summarize(comeback = sum(event %in% c("searchResultPage", "hover-on", 
"hover-off")) > 0 | sum(n_click_cumsum > 1) > 0) %>% # comeback to the same 
serp or make another click or hover
     group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group"))) %>%
-    summarize(return_to_same_search = sum(comeback), n_search = 
length(unique(serp_id))) %>%
+    summarize(return_to_same_search = sum(comeback), n_search = 
length(unique(search_id))) %>%
     group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group"))) %>%
     dplyr::do(binom::binom.bayes(.$return_to_same_search, .$n_search, 
conf.level = 0.95, tol = 1e-9))
 
@@ -19,7 +19,7 @@
     arrange(group, session_id, timestamp) %>%
     mutate(n_click_cumsum = cumsum(grepl("click", event))) %>%
     keep_where(n_click_cumsum > 0) %>% # delete serp before first click
-    summarize(another_search = length(unique(serp_id)) > 1) %>% # comeback to 
make another search
+    summarize(another_search = length(unique(search_id)) > 1) %>% # comeback 
to make another search
     group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group"))) %>%
     summarize(return_to_make_another_search = sum(another_search), n_session = 
length(unique(session_id))) %>%
     group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group"))) %>%
diff --git a/modules/stat_test/serp_from_autocomplete.R 
b/modules/stat_test/serp_from_autocomplete.R
index dfd9a71..58671ed 100644
--- a/modules/stat_test/serp_from_autocomplete.R
+++ b/modules/stat_test/serp_from_autocomplete.R
@@ -37,7 +37,7 @@
     palette = "Set1",
     legend = "bottom",
     legend.title = "Group",
-    legend.labs = params$test_group_names,
+    legend.labs = traditional_test_groups,
     ggtheme = wmf::theme_min()
   )
   p <- ggsurv$plot +
@@ -59,7 +59,7 @@
       xlab = "T (Dwell Time in seconds)",
       ylab = "Proportion of SERPs longer than T (P%)",
       surv.scale = "percent",
-      palette = colorRampPalette(RColorBrewer::brewer.pal(9, 
"Set1"))(n_wiki*length(report_params$test_group_names)),
+      palette = colorRampPalette(RColorBrewer::brewer.pal(9, 
"Set1"))(n_wiki*length(traditional_test_groups)),
       legend = "bottom",
       legend.title = "Group",
       ggtheme = wmf::theme_facet()
diff --git a/modules/stat_test/serp_offset.R b/modules/stat_test/serp_offset.R
index 80e8228..80820af 100644
--- a/modules/stat_test/serp_offset.R
+++ b/modules/stat_test/serp_offset.R
@@ -1,9 +1,9 @@
 if (exists("serp_offset")) {
   search_offset_function <- function(by_wiki = FALSE, ...) {
     serp_offset %>%
-      group_by(session_id, serp_id) %>%
+      group_by(session_id, search_id) %>%
       summarize(`Any page-turning` = any(offset > 0)) %>%
-      dplyr::right_join(searches, by = c("session_id", "serp_id")) %>%
+      dplyr::right_join(searches, by = c("session_id", "search_id")) %>%
       group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group"))) %>%
       summarize(page_turn = sum(`Any page-turning`, na.rm = TRUE), n_search = 
n()) %>%
       ungroup %>%
diff --git a/modules/stat_test/visited_page.R b/modules/stat_test/visited_page.R
index 22058c6..291d215 100644
--- a/modules/stat_test/visited_page.R
+++ b/modules/stat_test/visited_page.R
@@ -12,7 +12,7 @@
     palette = "Set1",
     legend = "bottom",
     legend.title = "Group",
-    legend.labs = report_params$test_group_names,
+    legend.labs = traditional_test_groups,
     ggtheme = wmf::theme_min()
   )
   p <- ggsurv$plot +
@@ -21,7 +21,7 @@
       subtitle = "With 95% confidence intervals."
     )
   ggsave("survival_visitedPages_all.png", p, path = fig_path, units = "in", 
dpi = plot_resolution, height = fig_height, width = fig_width)
-  rm(temp, p)
+  rm(p)
 
   scroll_function <- function(by_wiki = FALSE, ...) {
     visitedPages %>%
@@ -41,8 +41,6 @@
 
   if (n_wiki > 1) {
     # TODO: duplicated code survival_all
-    temp <- visitedPages
-    temp$SurvObj <- with(temp, survival::Surv(dwell_time, status == 2))
     fit <- survival::survfit(SurvObj ~ group + wiki, data = temp)
     ggsurv <- survminer::ggsurvplot(
       fit,
@@ -50,7 +48,7 @@
       xlab = "T (Dwell Time in seconds)",
       ylab = "Proportion of visits longer than T (P%)",
       surv.scale = "percent",
-      palette = colorRampPalette(RColorBrewer::brewer.pal(9, 
"Set1"))(n_wiki*length(report_params$test_group_names)),
+      palette = colorRampPalette(RColorBrewer::brewer.pal(9, 
"Set1"))(n_wiki*length(traditional_test_groups)),
       legend = "bottom",
       legend.title = "Group",
       ggtheme = wmf::theme_facet()
@@ -62,7 +60,7 @@
         subtitle = "With 95% confidence intervals."
       )
     ggsave("survival_visitedPages_wiki.png", p, path = fig_path, units = "in", 
dpi = plot_resolution, height = 4 * n_wiki, width = fig_width)
-    rm(temp, p)
+    rm(p)
 
     p <- scroll_function(by_wiki = TRUE) +
       facet_wrap(~ wiki, ncol = 3, scales = "free_y") +
@@ -71,4 +69,6 @@
     rm(p)
   }
 
+  rm(temp)
+
 }
diff --git a/modules/test_summary/browser_os.R 
b/modules/test_summary/browser_os.R
index a784d2c..0c767ac 100644
--- a/modules/test_summary/browser_os.R
+++ b/modules/test_summary/browser_os.R
@@ -17,14 +17,14 @@
       tally %>%
       mutate(prop = paste0(scales::percent_format()(n/sum(n)), " (", n, ")")) 
%>%
       select(-n) %>%
-      tidyr::spread(group, prop) %>%
+      tidyr::spread(group, prop, fill = "0% (0)") %>%
       ungroup
   }
 
   get_bayes_factor <- function(data) {
     BF <- data %>%
       tally %>%
-      tidyr::spread(group, n) %>%
+      tidyr::spread(group, n, fill = 0) %>%
       ungroup %>%
       select(dplyr::one_of(report_params$test_group_names)) %>%
       as.matrix() %>%
diff --git a/modules/test_summary/events.R b/modules/test_summary/events.R
index c4e8484..254ffc9 100644
--- a/modules/test_summary/events.R
+++ b/modules/test_summary/events.R
@@ -16,7 +16,9 @@
     mutate(event = factor(event, levels =c("visitPage", "checkin"))) %>% # 
Order the bars
     group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group", 
"event"))) %>%
     tally %>%
-    bar_chart(x = "event", y = "n", x_lab = "Event type", y_lab = "Number of 
events", title = paste("Number of events on articles after clickthrough, by 
test group", switch(by_wiki, "and wiki", NULL)))
+    bar_chart(x = "event", y = "n", x_lab = "Event type", y_lab = "Number of 
events",
+              title = paste("Number of events on articles after clickthrough, 
by test group", switch(by_wiki, "and wiki", NULL)),
+              subtitle = "Only the last check-in events are kept.")
 }
 p <- event_after_click_function() + wmf::theme_min()
 ggsave("event_after_click_all.png", p, path = fig_path, units = "in", dpi = 
plot_resolution, height = fig_height, width = fig_width)
diff --git a/modules/test_summary/searches.R b/modules/test_summary/searches.R
index a1b0183..15b5fe2 100644
--- a/modules/test_summary/searches.R
+++ b/modules/test_summary/searches.R
@@ -25,7 +25,7 @@
     keep_where(event == "searchResultPage") %>%
     mutate(results = if_else(n_results >= 5, "5+ results", 
Pluralize(n_results, "result"))) %>%
     group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group", 
"results"))) %>%
-    summarize(searches = length(unique(serp_id[!is.na(serp_id)]))) %>%
+    summarize(searches = length(unique(search_id[!is.na(search_id)]))) %>%
     bar_chart(x = "results", y = "searches", x_lab = "Number of same-wiki 
results returned",
               y_lab = "Number of searches", title = paste("Number of searches 
with n same-wiki result returned, by test group", switch(by_wiki, "and wiki", 
NULL)))
 }
@@ -51,7 +51,7 @@
         offset >= 100 ~ "100+ results",
         TRUE ~ Pluralize(offset, "result")
       )) %>%
-      dplyr::left_join(events, by = c("session_id", "event_id", "serp_id")) %>%
+      dplyr::left_join(events, by = c("session_id", "event_id", "search_id")) 
%>%
       group_by(!!! rlang::syms(c(switch(by_wiki, "wiki", NULL), "group", 
"offset"))) %>%
       tally %>%
       bar_chart(x = "offset", y = "n", x_lab = "Offset", y_lab = "Number of 
SERPs",
diff --git a/report.Rmd b/report.Rmd
index 50e4f09..1efdef5 100644
--- a/report.Rmd
+++ b/report.Rmd
@@ -16,6 +16,7 @@
   tss2_revision: 16909631
   wiki: enwiki
   test_group_names: [explore_similar_control, explore_similar_test]
+  interleaved_group_names: [ltr-i-1024]
   event_action: [searchResultPage, click, ssclick, visitPage, checkin, 
hover-on, hover-off, esclick]
   event_source: "fulltext"
   other_filter: "event_subTest IS NOT NULL"
@@ -110,7 +111,7 @@
   SERPs = sum(events$event == "searchResultPage"),
   `Unique search queries` = length(unique(paste(events$session_id, 
events$query_hash))),
   # ^ includes other events besides SERP
-  Searches = length(unique(events$serp_id[!is.na(events$serp_id)])),
+  Searches = length(unique(events$search_id[!is.na(events$search_id)])),
   `Same-wiki clicks` = sum(events$event == "click"),
   `Other clicks` = sum(grepl("click", events$event) & events$event != "click")
 ) %>%
@@ -250,9 +251,6 @@
     target = 'row',
     backgroundColor = DT::styleInterval(2, c('transparent', 'yellow'))
   )
-```
-```{r ua_cleanup, eval=("user_agent" %in% names(events))}
-rm(os_summary, browser_summary, os_summary_counts_cols_index, 
browser_summary_counts_cols_index)
 ```
 
 `r if (any(exists("serp_iw"), exists("ssclick_des"), "ssclick" %in% 
events$event, "iwclick" %in% events$event)) { "### Sister Search {.tabset}" }`
@@ -492,3 +490,35 @@
 ```{r serp_loadtime_wiki, eval=(n_wiki > 1), results='asis'}
 cat("![](reports/figures/serp_loadtime_wiki.png)")
 ```
+
+`r if (!is.null(report_params$interleaved_group_names)) { "## Interleaved 
test" }`
+
+`r if (!is.null(report_params$interleaved_group_names)) { "We use a technique 
called interleaving to evaluate the user-perceived relevance of search results 
from the experimental configuration. In it, each user is their own baseline -- 
we perform two searches behind the scenes and then interleave them together 
into a single set of results using the team draft algorithm described by 
Chapelle et al. (2012). For all the graphs in this section, A refers to control 
group or the first group in the interleaved group names, B referes to the name 
in the interleaved group names or the second group in the interleaved group 
names. For example, if the interleaved group name is 'ltr-i-1024', A is the 
control group and B is group '1024'; if the interleaved group name is 
'ltr-i-20-1024', A is group '20' and B is group '1024'." }`
+
+`r if (!is.null(report_params$interleaved_group_names)) { "### Preference" }`
+
+```{r interleaved_pref_overall, 
eval=(!is.null(params$interleaved_group_names)), results='asis'}
+cat("![](reports/figures/interleaved_pref_overall.png)")
+```
+
+```{r interleaved_pref_overall_wiki, 
eval=(!is.null(params$interleaved_group_names) & n_wiki > 1), results='asis'}
+cat("![](reports/figures/interleaved_pref_overall_wiki.png)")
+```
+
+```{r interleaved_pref_daily, eval=(!is.null(params$interleaved_group_names)), 
results='asis'}
+cat("![](reports/figures/interleaved_pref_daily.png)")
+```
+
+```{r interleaved_pref_daily_wiki, 
eval=(!is.null(params$interleaved_group_names) & n_wiki > 1), results='asis'}
+cat("![](reports/figures/interleaved_pref_daily_wiki.png)")
+```
+
+`r if (!is.null(report_params$interleaved_group_names)) { "### Page visit 
times" }`
+
+```{r interleaved_survival_all, 
eval=(!is.null(params$interleaved_group_names)), results='asis'}
+cat("![](reports/figures/interleaved_survival_all.png)")
+```
+
+```{r interleaved_survival_wiki, 
eval=(!is.null(params$interleaved_group_names) & n_wiki > 1), results='asis'}
+cat("![](reports/figures/interleaved_survival_wiki.png)")
+```
diff --git a/run.R b/run.R
index 526098b..6bf76bc 100644
--- a/run.R
+++ b/run.R
@@ -31,6 +31,7 @@
 opt <- parse_args(OptionParser(option_list = option_list))
 
 # Set up
+# report_params <- yaml::yaml.load_file("reports/test.yaml")
 report_params <- yaml::yaml.load_file(opt$yaml_file)
 if (!dir.exists("reports")) {
   dir.create("reports")
@@ -59,7 +60,13 @@
 source("modules/explore_similar/esclicks.R")
 source("modules/explore_similar/hover_over.R")
 
-# Statistical test
+# Interleaved test
+source("modules/interleaved_test/data_processing.R")
+source("modules/interleaved_test/interleaved_preference.R")
+source("modules/interleaved_test/page_dwelltime.R")
+
+# Statistical test (traditional)
+source("modules/stat_test/remove_interleaved_data.R")
 source("modules/stat_test/zrr.R")
 source("modules/stat_test/engagement.R")
 source("modules/stat_test/first_clicked.R")
@@ -73,8 +80,8 @@
 source("modules/stat_test/serp_load_time.R")
 
 # Render report
-# rmarkdown::render("report.Rmd", output_file = output_report_name, params = 
report_params)
-rmarkdown::render("report.Rmd", output_file = "reports/test.html", params = 
report_params)
+rmarkdown::render("report.Rmd", output_file = output_report_name, params = 
report_params)
+# rmarkdown::render("report.Rmd", output_file = "reports/test.html", params = 
report_params)
 if (!report_params$debug) {
   unlink(file.path("reports", "figures"), recursive = TRUE) # remove the temp 
figures directory
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/392102
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I795023856963030e67e85e1cde7352842aa3a7a8
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/autoreporter
Gerrit-Branch: master
Gerrit-Owner: Chelsyx <c...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to