Bearloga has uploaded a new change for review.
https://gerrit.wikimedia.org/r/276656
Change subject: Update reference to TestSearchSatisfaction2 table
......................................................................
Update reference to TestSearchSatisfaction2 table
+ Make sure timestamps are sorted before being processed by dwell_time
Change-Id: I5a25e0f95e3638a1aecaebfb967e275b99285f13
---
M search/LDN.R
M search/dwelltime.R
2 files changed, 9 insertions(+), 10 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/golden
refs/changes/56/276656/1
diff --git a/search/LDN.R b/search/LDN.R
index 2172eba..51de332 100644
--- a/search/LDN.R
+++ b/search/LDN.R
@@ -4,7 +4,7 @@
# Per-file config:
base_path <- paste0(write_root, "search/")
-main <- function(date = NULL, table = "TestSearchSatisfaction2_14098806") {
+main <- function(date = NULL, table = "TestSearchSatisfaction2_15357244") {
checkins <- c(0, 10, 20, 30, 40, 50, 60, 90, 120, 150, 180, 210, 240, 300,
360, 420)
# ^ this will be used for figuring out the interval bounds for each check-in
@@ -13,18 +13,14 @@
data <- wmf::build_query(fields = "SELECT * ",
date = date,
table = table,
- conditionals = "event_subTest IS NULL")
+ conditionals = "event_subTest IS NULL
+ AND event_source = 'fulltext'")
data <- data.table::as.data.table(data)
data$timestamp <- lubridate::ymd_hms(data$timestamp)
- # Backwards-compatibility:
- if ( table == "TestSearchSatisfaction2_14098806" ) {
- data.table::setnames(data, "event_pageViewId", "event_pageId")
- }
-
# Treat each individual search session as its own thing, rather than
belonging
# to a set of other search sessions by the same user.
- page_visits <- plyr::ddply(data, .(event_searchSessionId, event_pageId),
+ page_visits <- plyr::ddply(data, .(event_searchSessionId, event_pageViewId),
function(session) {
if (!all(c('visitPage', 'checkin') %in%
session$event_action)) {
return(NULL)
diff --git a/search/dwelltime.R b/search/dwelltime.R
index 93c8e79..4c9eb07 100644
--- a/search/dwelltime.R
+++ b/search/dwelltime.R
@@ -1,7 +1,7 @@
# Per-file config:
base_path <- paste0(write_root, "search/")
-main <- function(date = NULL, table = "TestSearchSatisfaction2_14098806"){
+main <- function(date = NULL, table = "TestSearchSatisfaction2_15357244"){
# Retrieve data
data <- wmf::build_query(fields = "
@@ -9,8 +9,11 @@
timestamp",
date = date,
table = table,
- conditionals = "event_action
IN('searchResultPage','visitPage') AND event_subTest IS NULL")
+ conditionals = "event_action
IN('searchResultPage','visitPage')
+ AND event_subTest IS NULL
+ AND event_source = 'fulltext'")
data$timestamp <- lubridate::ymd_hms(data$timestamp)
+ data <- data[order(data$session_id, data$timestamp), ]
# Generate the data
if(is.null(date)){
--
To view, visit https://gerrit.wikimedia.org/r/276656
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I5a25e0f95e3638a1aecaebfb967e275b99285f13
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/golden
Gerrit-Branch: master
Gerrit-Owner: Bearloga <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits