Bearloga has submitted this change and it was merged. (
https://gerrit.wikimedia.org/r/370977 )
Change subject: Remove duplicated clicks on the same position for each query
when computing paulscore
..
Remove duplicated clicks on the same position for each query when computing
paulscore
Bug: T172960
Change-Id: I972500c6150408a119f2c80dad9fe8a49f00845e
---
M modules/metrics/search/paulscore_approximations.R
1 file changed, 21 insertions(+), 9 deletions(-)
Approvals:
Bearloga: Verified; Looks good to me, approved
diff --git a/modules/metrics/search/paulscore_approximations.R
b/modules/metrics/search/paulscore_approximations.R
index 1f7fe9f..4c640ff 100644
--- a/modules/metrics/search/paulscore_approximations.R
+++ b/modules/metrics/search/paulscore_approximations.R
@@ -2,7 +2,10 @@
source("config.R")
.libPaths(r_library)
-suppressPackageStartupMessages(library("optparse"))
+suppressPackageStartupMessages({
+ library("optparse")
+ library("glue")
+})
option_list <- list(
make_option(c("-d", "--date"), default = NA, action = "store", type =
"character"),
@@ -19,10 +22,15 @@
}
# Build query:
-date_clause <- as.character(as.Date(opt$date), format = "LEFT(timestamp, 8) =
'%Y%m%d'")
+mmdd <- format(as.Date(opt$date), "%Y%m%d")
+revision <- dplyr::case_when(
+ as.Date(opt$date) < "2017-02-10" ~ "15922352",
+ as.Date(opt$date) < "2017-06-29" ~ "16270835",
+ TRUE ~ "16909631"
+)
-query <-paste0("SELECT
- DATE('", opt$date, "') AS date,
+query <- glue("SELECT
+ DATE('{opt$date}') AS date,
event_searchSessionId,
event_source,
wiki,
@@ -35,11 +43,15 @@
SUM(IF(event_action = 'click', POW(0.7, event_position), 0)) /
SUM(IF(event_action = 'searchResultPage', 1, 0)) AS pow_7,
SUM(IF(event_action = 'click', POW(0.8, event_position), 0)) /
SUM(IF(event_action = 'searchResultPage', 1, 0)) AS pow_8,
SUM(IF(event_action = 'click', POW(0.9, event_position), 0)) /
SUM(IF(event_action = 'searchResultPage', 1, 0)) AS pow_9
-FROM TestSearchSatisfaction2_", dplyr::if_else(as.Date(opt$date) <
"2017-02-10", "15922352", dplyr::if_else(as.Date(opt$date) < "2017-06-29",
"16270835", "16909631")), "
-WHERE ", date_clause, "
- AND event_action IN ('searchResultPage', 'click')
- AND IF(event_source = 'autocomplete', event_inputLocation = 'header', TRUE)
- AND IF(event_source = 'autocomplete' AND event_action = 'click',
event_position >= 0, TRUE)
+FROM (
+ SELECT DISTINCT
+event_searchSessionId, event_source, wiki, event_action, event_position,
event_pageViewId, event_query
+ FROM TestSearchSatisfaction2_{revision}
+ WHERE LEFT(timestamp, 8) = {mmdd}
+AND event_action IN ('searchResultPage', 'click')
+AND IF(event_source = 'autocomplete', event_inputLocation = 'header', TRUE)
+AND IF(event_source = 'autocomplete' AND event_action = 'click',
event_position >= 0, TRUE)
+) AS deduplicate
GROUP BY date, event_searchSessionId, event_source, wiki;")
# Fetch data from MySQL database:
--
To view, visit https://gerrit.wikimedia.org/r/370977
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I972500c6150408a119f2c80dad9fe8a49f00845e
Gerrit-PatchSet: 2
Gerrit-Project: wikimedia/discovery/golden
Gerrit-Branch: master
Gerrit-Owner: Chelsyx
Gerrit-Reviewer: Bearloga
___
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits