OliverKeyes has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/273998

Change subject: Add code to generate a rolling window of data
......................................................................

Add code to generate a rolling window of data

Use a 30-day window (and also fix, going forward, the bulking bug
that led to this problem)

Bug: T119448
Change-Id: Ic6bb5b97e5a88b955158c65ee5ef9f18f57218ad
---
M maps/tiles.R
1 file changed, 13 insertions(+), 2 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/wikimedia/discovery/golden 
refs/changes/98/273998/1

diff --git a/maps/tiles.R b/maps/tiles.R
index dfcf11a..5fef0ad 100644
--- a/maps/tiles.R
+++ b/maps/tiles.R
@@ -43,12 +43,12 @@
   output <- as.data.table(results[, union('date', names(results))])
   with_automata_output <- output[,list(users = length(user_id), total=sum(n), 
average = round(mean(n)), median = ceiling(median(n)),
                                        percentile95 = ceiling(quantile(n, 
0.95)), percentile99 = ceiling(quantile(n, 0.99))),
-                                 by= setdiff(names(output),c("n","user_id", 
"is_automata"))]
+                                 by= setdiff(names(output),c("n","user_id", 
"is_automata", "country"))]
   
   without_automata_output <- output[output$is_automata == FALSE,
                                     list(users = length(user_id), 
total=sum(n), average = round(mean(n)), median = ceiling(median(n)),
                                     percentile95 = ceiling(quantile(n, 0.95)), 
percentile99 = ceiling(quantile(n, 0.99))),
-                                    by= setdiff(names(output),c("n","user_id", 
"is_automata"))]
+                                    by= setdiff(names(output),c("n","user_id", 
"is_automata", "country"))]
   
   # Work out unique users on a per-country basis
   top_countries <- c("RU", "IT", "US", "UA", "FR", "IN", "DE", "ES", "GB")
@@ -63,4 +63,15 @@
   conditional_write(without_automata_output, file.path(base_path, 
"tile_aggregates_no_automata.tsv"))
   conditional_write(user_output, file.path(base_path, "users_by_country.tsv"))
   
+  # Handle rolling window
+  rolling_earliest <- (date - 29)
+  with_automata_rolling <- rbind(readr::read_tsv(file.path(base_path, 
"tile_aggregates_with_automata.tsv")), with_automata_output)
+  with_automata_rolling <- with_automata_rolling[with_automata_rolling$date >= 
rolling_earliest,]
+  write.table(with_automata_rolling, file.path(base_path, 
"tile_aggregates_with_automata_rolling.tsv"),
+              append = FALSE, sep = "\t", row.names = FALSE, quote = FALSE)
+  
+  without_automata_rolling <- rbind(readr::read_tsv(file.path(base_path, 
"tile_aggregates_no_automata.tsv")), without_automata_output)
+  without_automata_rolling <- 
without_automata_rolling[without_automata_rolling$date >= rolling_earliest,]
+  write.table(with_automata_rolling, file.path(base_path, 
"tile_aggregates_no_automata_rolling.tsv"),
+              append = FALSE, sep = "\t", row.names = FALSE, quote = FALSE)
 }

-- 
To view, visit https://gerrit.wikimedia.org/r/273998
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic6bb5b97e5a88b955158c65ee5ef9f18f57218ad
Gerrit-PatchSet: 1
Gerrit-Project: wikimedia/discovery/golden
Gerrit-Branch: master
Gerrit-Owner: OliverKeyes <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to