OliverKeyes has uploaded a new change for review.
https://gerrit.wikimedia.org/r/273998
Change subject: Add code to generate a rolling window of data
......................................................................
Add code to generate a rolling window of data
Use a 30-day window (and also fix, going forward, the bulking bug
that led to this problem)
Bug: T119448
Change-Id: Ic6bb5b97e5a88b955158c65ee5ef9f18f57218ad
---
M maps/tiles.R
1 file changed, 13 insertions(+), 2 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/golden
refs/changes/98/273998/1
diff --git a/maps/tiles.R b/maps/tiles.R
index dfcf11a..5fef0ad 100644
--- a/maps/tiles.R
+++ b/maps/tiles.R
@@ -43,12 +43,12 @@
output <- as.data.table(results[, union('date', names(results))])
with_automata_output <- output[,list(users = length(user_id), total=sum(n),
average = round(mean(n)), median = ceiling(median(n)),
percentile95 = ceiling(quantile(n,
0.95)), percentile99 = ceiling(quantile(n, 0.99))),
- by= setdiff(names(output),c("n","user_id",
"is_automata"))]
+ by= setdiff(names(output),c("n","user_id",
"is_automata", "country"))]
without_automata_output <- output[output$is_automata == FALSE,
list(users = length(user_id),
total=sum(n), average = round(mean(n)), median = ceiling(median(n)),
percentile95 = ceiling(quantile(n, 0.95)),
percentile99 = ceiling(quantile(n, 0.99))),
- by= setdiff(names(output),c("n","user_id",
"is_automata"))]
+ by= setdiff(names(output),c("n","user_id",
"is_automata", "country"))]
# Work out unique users on a per-country basis
top_countries <- c("RU", "IT", "US", "UA", "FR", "IN", "DE", "ES", "GB")
@@ -63,4 +63,15 @@
conditional_write(without_automata_output, file.path(base_path,
"tile_aggregates_no_automata.tsv"))
conditional_write(user_output, file.path(base_path, "users_by_country.tsv"))
+ # Handle rolling window
+ rolling_earliest <- (date - 29)
+ with_automata_rolling <- rbind(readr::read_tsv(file.path(base_path,
"tile_aggregates_with_automata.tsv")), with_automata_output)
+ with_automata_rolling <- with_automata_rolling[with_automata_rolling$date >=
rolling_earliest,]
+ write.table(with_automata_rolling, file.path(base_path,
"tile_aggregates_with_automata_rolling.tsv"),
+ append = FALSE, sep = "\t", row.names = FALSE, quote = FALSE)
+
+ without_automata_rolling <- rbind(readr::read_tsv(file.path(base_path,
"tile_aggregates_no_automata.tsv")), without_automata_output)
+ without_automata_rolling <-
without_automata_rolling[without_automata_rolling$date >= rolling_earliest,]
+ write.table(with_automata_rolling, file.path(base_path,
"tile_aggregates_no_automata_rolling.tsv"),
+ append = FALSE, sep = "\t", row.names = FALSE, quote = FALSE)
}
--
To view, visit https://gerrit.wikimedia.org/r/273998
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic6bb5b97e5a88b955158c65ee5ef9f18f57218ad
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/golden
Gerrit-Branch: master
Gerrit-Owner: OliverKeyes <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits