https://www.mediawiki.org/wiki/Special:Code/MediaWiki/115057
Revision: 115057
Author: rfaulk
Date: 2012-04-26 00:18:32 +0000 (Thu, 26 Apr 2012)
Log Message:
-----------
modified metric names
used aliases for selected columns from data frames
Modified Paths:
--------------
trunk/tools/wsor/message_templates/R/template_analysis.R
trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
Modified: trunk/tools/wsor/message_templates/R/template_analysis.R
===================================================================
--- trunk/tools/wsor/message_templates/R/template_analysis.R 2012-04-25
22:47:23 UTC (rev 115056)
+++ trunk/tools/wsor/message_templates/R/template_analysis.R 2012-04-26
00:18:32 UTC (rev 115057)
@@ -7,13 +7,36 @@
# Import helper methods - GLOBAL
-home_dir <<- "/home/rfaulkner/trunk/projects/WSOR/message_templates/"
+home_dir <<- "/Users/rfaulkner/projects/wsor/message_templates/"
# home_dir <- "/home/rfaulk/trunk/projects/WSOR/message_templates/"
helper_import <- paste(home_dir,"R/R_helper_functions.R",sep="")
source(helper_import)
+# Column names
+
+# revisions_before <<- "revisions_before"
+# revisions_after_0_3 <<- "revisions_after_0_3"
+# revisions_after_3_30 <<- "revisions_after_3_30"
+# revisions_after_gt_30 <<- "revisions_after_gt_30"
+
+revisions_before <<- "ns_6_revisions_before"
+revisions_after_0_3 <<- "ns_6_revisions_after_0_3"
+revisions_after_3_30 <<- "ns_6_revisions_after_3_30"
+# revisions_after_gt_30 <<- "ns_0_revisions_after_gt_30"
+
+# revisions_deleted_before <<- "revisions_deleted_before"
+#revisions_deleted_after_0_3 <<- "revisions_deleted_after_0_3"
+#revisions_deleted_after_3_30 <<- "revisions_deleted_after_3_30"
+#revisions_deleted_after_gt_30 <<- "revisions_deleted_after_gt_30"
+
+revisions_deleted_before <<- "ns_0_revisions_deleted_before"
+revisions_deleted_after_0_3 <<- "ns_0_revisions_deleted_after_0_3"
+revisions_deleted_after_3_30 <<- "ns_0_revisions_deleted_after_3_30"
+revisions_deleted_after_gt_30 <<- "ns_0_revisions_deleted_after_gt_30"
+
+
# FUNCTION :: import.experimental.metrics.data
#
# Import the template data and build data frames from it
@@ -24,6 +47,7 @@
# Read aggregated results for the template
fname_last_part_edits <- "_editcounts.tsv"
+ # fname_last_part_edits <- "_editcountsnonamespace.tsv"
fname_last_part_blocks <- "_blocks.tsv"
fname_last_part_warn <- "_warnings.tsv"
@@ -48,7 +72,7 @@
#
process.data.frames <- function(min_edits_before=0,
min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf,
min_edits_after = 0, registered=TRUE) {
-
+
# MERGE THE METRICS AND ADD TEMPLATE COLS
# print("Merge Data..")
@@ -58,63 +82,73 @@
merged_test <<- merge(merged_test, warn_test,
by=intersect(names(merged_test),names(warn_test)), all=TRUE)
merged_control <<- merge(merged_control, warn_control,
by=intersect(names(merged_control),names(warn_control)), all=TRUE)
-
+
merged_test$template <<- 1
merged_control$template <<- 0
-
-
+
# FILTER DATA
# print("Filter Data..")
- maximum_warns_before <- 0
+ maximum_warns_before <- Inf
if (!registered)
IP_regex <-
"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
else
IP_regex <- '.*[a-zA-z].*'
-
+
condition_1 <- TRUE # merged_test$blocks_before > 0
condition_2 <- merged_test$blocks_after == 0
- condition_3 <- merged_test$ns_0_revisions_before >= min_edits_before &
merged_test$ns_0_revisions_before <= max_edits_before
- condition_4 <- merged_test$ns_0_revisions_deleted_before >=
min_deleted_edits_before & merged_test$ns_0_revisions_deleted_before <=
max_deleted_edits_before
+ condition_3 <- merged_test[[revisions_before]] >= min_edits_before &
merged_test[[revisions_before]] <= max_edits_before
+ condition_4 <- TRUE # merged_test[[revisions_deleted_before]] >=
min_deleted_edits_before & merged_test[[revisions_deleted_before]] <=
max_deleted_edits_before
condition_5 <- merged_test$warns_before <= maximum_warns_before
condition_6 <- filter.list.by.regex(IP_regex,
merged_test$recipient_name)
- condition_7 <- merged_test$ns_0_revisions_after_0_3 >= min_edits_after
-
+ condition_7 <- merged_test[[revisions_after_0_3]] >= min_edits_after
+
indices <- condition_1 & condition_2 & condition_3 & condition_4 &
condition_5 & condition_6 & condition_7
+
merged_test <<- merged_test[indices,]
-
+ merged_test <<- merged_test[!is.na(merged_test$recipient_name),]
+
condition_1 <- TRUE # merged_control$blocks_before > 0
condition_2 <- merged_control$blocks_after == 0
- condition_3 <- merged_control$ns_0_revisions_before >= min_edits_before
& merged_control$ns_0_revisions_before <= max_edits_before
- condition_4 <- merged_control$ns_0_revisions_deleted_before >=
min_deleted_edits_before & merged_control$ns_0_revisions_deleted_before <=
max_deleted_edits_before
+ condition_3 <- merged_control[[revisions_before]] >= min_edits_before &
merged_control[[revisions_before]] <= max_edits_before
+ condition_4 <- TRUE # merged_control[[revisions_deleted_before]] >=
min_deleted_edits_before & merged_control[[revisions_deleted_before]] <=
max_deleted_edits_before
condition_5 <- merged_control$warns_before <= maximum_warns_before
condition_6 <- filter.list.by.regex(IP_regex,
merged_control$recipient_name)
- condition_7 <- merged_control$ns_0_revisions_after_0_3 >=
min_edits_after
-
- indices <- condition_1 & condition_2 & condition_3 & condition_4 &
condition_5 & condition_6 & condition_7
+ condition_7 <- merged_control[[revisions_after_0_3]] >= min_edits_after
+
+ indices <- condition_1 & condition_2 & condition_3 & condition_4 &
condition_5 & condition_6 & condition_7
merged_control <<- merged_control[indices,]
-
-
+ merged_control <<-
merged_control[!is.na(merged_control$recipient_name),]
+
# ADD DERIVED COLS
# print("Add derived columns..")
- merged_test$edits_decrease <<- (merged_test$ns_0_revisions_before -
(merged_test$ns_0_revisions_after_0_3)) / (merged_test$ns_0_revisions_before)
- merged_control$edits_decrease <<- (merged_control$ns_0_revisions_before
- merged_control$ns_0_revisions_after_0_3) /
(merged_control$ns_0_revisions_before)
+ merged_test$edit_event <<-
convert.list.to.binomial.event(merged_test[[revisions_after_0_3]] +
merged_test[[revisions_after_3_30]]) # + merged_test[[revisions_after_gt_30]])
+ merged_control$edit_event <<-
convert.list.to.binomial.event(merged_control[[revisions_after_0_3]] +
merged_control[[revisions_after_3_30]]) # +
merged_control[[revisions_after_gt_30]])
- merged_test$edit_counts_0_3 <<- merged_test$ns_0_revisions_after_0_3
- merged_control$edit_counts_0_3 <<-
merged_control$ns_0_revisions_after_0_3
+ merged_test$edit_counts_all <<- merged_test[[revisions_after_0_3]] +
merged_test[[revisions_after_3_30]] # + merged_test[[revisions_after_gt_30]]
+ merged_control$edit_counts_all <<-
merged_control[[revisions_after_0_3]] + merged_control[[revisions_after_3_30]]
# + merged_control[[revisions_after_gt_30]]
- merged_test$edits_del_decrease <<-
(merged_test$ns_0_revisions_deleted_before -
(merged_test$ns_0_revisions_deleted_after_0_3)) /
(merged_test$ns_0_revisions_deleted_before)
- merged_control$edits_del_decrease <<-
(merged_control$ns_0_revisions_deleted_before -
(merged_control$ns_0_revisions_deleted_after_0_3)) /
(merged_control$ns_0_revisions_deleted_before)
+ merged_test$edits_norm <<- merged_test[[revisions_after_0_3]] /
merged_test[[revisions_before]]
+ merged_control$edits_norm <<- merged_control[[revisions_after_0_3]] /
merged_control[[revisions_before]]
- merged_test$edit_del_counts_0_3 <<-
merged_test$ns_0_revisions_deleted_after_0_3
- merged_control$edit_del_counts_0_3 <<-
merged_control$ns_0_revisions_deleted_after_0_3
+ merged_test$edits_decrease <<- (merged_test[[revisions_before]] -
(merged_test[[revisions_after_0_3]])) / (merged_test[[revisions_before]])
+ merged_control$edits_decrease <<- (merged_control[[revisions_before]] -
merged_control[[revisions_after_0_3]]) / (merged_control[[revisions_before]])
+
+ merged_test$edit_counts_0_3 <<- merged_test[[revisions_after_0_3]]
+ merged_control$edit_counts_0_3 <<- merged_control[[revisions_after_0_3]]
- merged_test$edit_del_counts <<-
ceiling(merged_test$ns_0_revisions_deleted_after_0_3 /
max(merged_test$ns_0_revisions_deleted_after_0_3))
- merged_control$edit_del_counts <<-
ceiling(merged_control$ns_0_revisions_deleted_after_0_3 /
max(merged_control$ns_0_revisions_deleted_after_0_3))
+ # merged_test$edits_del_decrease <<-
(merged_test[[revisions_deleted_before]] -
(merged_test[[revisions_deleted_after_0_3]])) /
(merged_test[[revisions_deleted_before]])
+ # merged_control$edits_del_decrease <<-
(merged_control[[revisions_deleted_before]] -
(merged_control[[revisions_deleted_after_0_3]])) /
(merged_control[[revisions_deleted_before]])
+
+ # merged_test$edit_del_counts_0_3 <<-
merged_test[[revisions_deleted_after_0_3]]
+ # merged_control$edit_del_counts_0_3 <<-
merged_control[[revisions_deleted_after_0_3]]
+
+ # merged_test$edit_del_counts <<-
ceiling(merged_test[[revisions_deleted_after_0_3]] /
max(merged_test[[revisions_deleted_after_0_3]]))
+ # merged_control$edit_del_counts <<-
ceiling(merged_control[[revisions_deleted_after_0_3]] /
max(merged_control[[revisions_deleted_after_0_3]]))
}
# FUNCTION :: execute.chi.square.test
@@ -153,7 +187,7 @@
chisq_res_control <<- chisq.test(counts_control$counts,
p=probs_test$counts)
}
-
+
# FUNCTION :: execute.main
#
# A pseudo main method to allow the script to be executed as a batch
@@ -166,12 +200,11 @@
# c(60,62,66,76) # c(107,109,111,113,115) # TWINKLE c(78,81) # c(84, 0)
# c(1,4) # c(84,99,101,103,105) # c(60,62,64,66,68,70,72,74,76) # CORENSEARCH
c(118, 120, 122, 124, 126, 128) # IMAGETAG c(132, 133, 135, 136, 138, 139, 141,
142)
# c(61,63,67,77) # c(108,110,114,116) # TWINKLE c(79,82) # c(86, 0) #
c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77) # CORENSEARCH
c(117, 119, 121, 123, 125, 127) # IMAGETAG c(131, 134, 137, 140)
- # paste(home_dir,"output/metrics_1018_1119_z",sep="") #
paste(home_dir,"output/metrics_1122_1222_z",sep="") #
paste(home_dir,"output/metrics_1109_1209_z",sep="")
- # paste(home_dir,"output/metrics_1108_1202_z",sep="") #
paste(home_dir,"output/metrics_pt_z",sep="") #
paste(home_dir,"output/metrics_1018_1119_z",sep="") #
paste(home_dir,"output/metrics_z",sep="")
+ # "output/metrics_1018_1119_z" # "output/metrics_1122_1222_z" #
"output/metrics_1109_1209_z" # "output/metrics_1108_1202_z" #
"output/metrics_pt_z" # "output/metrics_1018_1119_z" # "output/metrics_z"
- template_indices_control <- c(81,0)
- template_indices_test <- c(82,0)
- fname_first_part <- "output/metrics_1109_1209_z"
+ template_indices_control <- c(131,0)
+ template_indices_test <- c(133,0)
+ fname_first_part <- "output/metrics_z"
if (import_metrics)
import.experimental.metrics.data(template_indices_test,
template_indices_control, fname_first_part)
@@ -194,7 +227,9 @@
# LOGISTIC REGRESSION MODELLING:
all_data <<- append.data.frames(merged_test, merged_control)
-
+
+ # summary(glm(template ~ edit_event, data=all_data,
family=binomial(link="logit")))
+ # summary(glm(template ~ edits_counts_all, data=all_data,
family=binomial(link="logit")))
# summary(glm(template ~ edits_decrease, data=all_data,
family=binomial(link="logit")))
# summary(glm(template ~ edit_counts_0_3, data=all_data,
family=binomial(link="logit")))
# summary(glm(template ~ edits_del_decrease, data=all_data,
family=binomial(link="logit")))
Modified: trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
===================================================================
--- trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
2012-04-25 22:47:23 UTC (rev 115056)
+++ trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
2012-04-26 00:18:32 UTC (rev 115057)
@@ -109,16 +109,16 @@
for (i in edit_count_before_filter)
{
- process.data.frames(min_deleted_edits_before = i,
max_deleted_edits_before = Inf, registered=registered,
min_edits_after=rev_count_after_min)
+ process.data.frames(min_edits_before = i, max_edits_before =
Inf, registered=registered, min_edits_after=rev_count_after_min)
- means_test <<- c(means_test,
mean(merged_test$edit_del_counts_0_3))
- means_control <<- c(means_control,
mean(merged_control$edit_del_counts_0_3))
+ means_test <<- c(means_test, mean(merged_test$edits_decrease))
+ means_control <<- c(means_control,
mean(merged_control$edits_decrease))
- sd_test <<- c(sd_test, sd(merged_test$edit_del_counts_0_3))
- sd_control <<- c(sd_control,
sd(merged_control$edit_del_counts_0_3))
+ sd_test <<- c(sd_test, sd(merged_test$edits_decrease))
+ sd_control <<- c(sd_control, sd(merged_control$edits_decrease))
- data_counts_test <<- c(data_counts_test,
length(merged_test$edit_del_counts_0_3))
- data_counts_control <<- c(data_counts_control,
length(merged_control$edit_del_counts_0_3))
+ data_counts_test <<- c(data_counts_test,
length(merged_test$edits_decrease))
+ data_counts_control <<- c(data_counts_control,
length(merged_control$edits_decrease))
}
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs