https://www.mediawiki.org/wiki/Special:Code/MediaWiki/113066
Revision: 113066
Author: rfaulk
Date: 2012-03-05 19:25:26 +0000 (Mon, 05 Mar 2012)
Log Message:
-----------
modularized the main visualization implementation in order to more easily
modify parameters
Modified Paths:
--------------
trunk/tools/wsor/message_templates/R/template_analysis.R
trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
Modified: trunk/tools/wsor/message_templates/R/template_analysis.R
===================================================================
--- trunk/tools/wsor/message_templates/R/template_analysis.R 2012-03-05
19:22:27 UTC (rev 113065)
+++ trunk/tools/wsor/message_templates/R/template_analysis.R 2012-03-05
19:25:26 UTC (rev 113066)
@@ -47,7 +47,7 @@
# GLOBALS assumed to exist: warn_test, warn_control, blocks_test,
blocks_control, edits_test, edits_control
#
-process.data.frames <- function(min_edits_before=0,
min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf)
{
+process.data.frames <- function(min_edits_before=0,
min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf,
min_revisions_after = 0, registered=TRUE) {
# MERGE THE METRICS AND ADD TEMPLATE COLS
@@ -69,16 +69,18 @@
maximum_warns_before <- 0
- IP_regex <-
"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
- IP_regex_not <- '.*[a-zA-z].*'
+ if (!registered)
+ IP_regex <-
"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
+ else
+ IP_regex <- '.*[a-zA-z].*'
condition_1 <- TRUE # merged_test$blocks_before > 0
condition_2 <- merged_test$blocks_after == 0
condition_3 <- merged_test$ns_0_revisions_before >= min_edits_before &
merged_test$ns_0_revisions_before <= max_edits_before
condition_4 <- merged_test$ns_0_revisions_deleted_before >=
min_deleted_edits_before & merged_test$ns_0_revisions_deleted_before <=
max_deleted_edits_before
condition_5 <- merged_test$warns_before <= maximum_warns_before
- condition_6 <- filter.list.by.regex(IP_regex_not,
merged_test$recipient_name)
- condition_7 <- merged_test$ns_0_revisions_after_0_3 > 0
+ condition_6 <- filter.list.by.regex(IP_regex,
merged_test$recipient_name)
+ condition_7 <- merged_test$ns_0_revisions_after_0_3 >=
min_revisions_after
indices <- condition_1 & condition_2 & condition_3 & condition_4 &
condition_5 & condition_6 & condition_7
merged_test <<- merged_test[indices,]
@@ -88,8 +90,8 @@
condition_3 <- merged_control$ns_0_revisions_before >= min_edits_before
& merged_control$ns_0_revisions_before <= max_edits_before
condition_4 <- merged_control$ns_0_revisions_deleted_before >=
min_deleted_edits_before & merged_control$ns_0_revisions_deleted_before <=
max_deleted_edits_before
condition_5 <- merged_control$warns_before <= maximum_warns_before
- condition_6 <- filter.list.by.regex(IP_regex_not,
merged_control$recipient_name)
- condition_7 <- merged_control$ns_0_revisions_after_0_3 > 0
+ condition_6 <- filter.list.by.regex(IP_regex,
merged_control$recipient_name)
+ condition_7 <- merged_control$ns_0_revisions_after_0_3 >=
min_revisions_after
indices <- condition_1 & condition_2 & condition_3 & condition_4 &
condition_5 & condition_6 & condition_7
merged_control <<- merged_control[indices,]
@@ -149,13 +151,13 @@
# A pseudo main method to allow the script to be executed as a batch
#
-execute.main <- function(test_samples, control_samples) {
+execute.main <- function() {
# IMPORT DATA
- template_indices_control <- c(78,81) # c(84, 0) #
c(107,109,111,113,115) # c(1,4) # c(84,99,101,103,105) #
c(60,62,64,66,68,70,72,74,76)
- template_indices_test <- c(79,82) # c(86, 0) # c(108,110,114,116) #
c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77)
- fname_first_part <- paste(home_dir,"output/metrics_1109_1209_z",sep="")
# paste(home_dir,"output/metrics_1108_1202_z",sep="") #
paste(home_dir,"output/metrics_1122_1222_z",sep="") #
paste(home_dir,"output/metrics_pt_z",sep="") #
paste(home_dir,"output/metrics_1018_1119_z",sep="") #
"/home/rfaulk/WSOR/message_templates/output/metrics_pt_z"
+ template_indices_control <- c(60,62,66,76) # c(107,109,111,113,115) #
c(78,81) # c(84, 0) # c(1,4) # c(84,99,101,103,105) #
c(60,62,64,66,68,70,72,74,76)
+ template_indices_test <- c(61,63,67,77) # c(108,110,114,116) # c(79,82)
# c(86, 0) # c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77)
+ fname_first_part <- paste(home_dir,"output/metrics_1018_1119_z",sep="")
# paste(home_dir,"output/metrics_1122_1222_z",sep="") #
paste(home_dir,"output/metrics_1109_1209_z",sep="") #
paste(home_dir,"output/metrics_1108_1202_z",sep="") #
paste(home_dir,"output/metrics_pt_z",sep="") #
paste(home_dir,"output/metrics_1018_1119_z",sep="")
# import.experimental.metrics.data(template_indices_test,
template_indices_control, fname_first_part)
@@ -165,7 +167,8 @@
# print("")
# print("Processing data frames.")
- process.data.frames(1,0,Inf,Inf)
+ registered = TRUE
+ process.data.frames(3,0,Inf,Inf,registered)
@@ -177,7 +180,7 @@
# LOGISTIC REGRESSION MODELLING:
- all_data <- append.data.frames(merged_test, merged_control)
+ all_data <<- append.data.frames(merged_test, merged_control)
# summary(glm(template ~ edits_decrease, data=all_data,
family=binomial(link="logit")))
# summary(glm(template ~ edits_del_decrease, data=all_data,
family=binomial(link="logit")))
Modified: trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
===================================================================
--- trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
2012-03-05 19:22:27 UTC (rev 113065)
+++ trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
2012-03-05 19:25:26 UTC (rev 113066)
@@ -13,6 +13,8 @@
#
# Basic plotting for te st vs. control
#
+# e.g. call :: plot.control.vs.test("Huggle Short 2 Experiment (reduced) -
Decrease in Editor Activity", "Minimum Edits before Template Posting", "Mean %
Decrease in Edit Activity", edit_decrease_means_test,
edit_decrease_means_control)
+#
plot.control.vs.test <- function(title, x_label, y_label, test_samples,
control_samples) {
@@ -62,44 +64,68 @@
# IMPORT DATA
-# c(84, 0) c(107,109,111,113,115) c(78,81) c(1,4) c(84,99,101,103,105)
-template_indices_control <- c(60,62,64,66,68,70,72,74,76)
-
-# c(85, 0) c(108,110,114,116) c(79,82) c(2,3) c(85,86,100,102,104,106)
-template_indices_test <- c(61,63,65,67,69,71,73,75,77)
-
-# paste(home_dir,"output/metrics_1108_1202_z",sep="")
paste(home_dir,"output/metrics_1122_1222_z",sep="")
paste(home_dir,"output/metrics_1109_1209_z",sep="")
paste(home_dir,"output/metrics_pt_z",sep="")
"/home/rfaulk/WSOR/message_templates/output/metrics_pt_z"
-fname_first_part <- paste(home_dir,"output/metrics_1018_1119_z",sep="")
-
-import.experimental.metrics.data(template_indices_test,
template_indices_control, fname_first_part)
-
-
-
-# PROCESS DATA
-
-edit_count_before_filter <- 1:10
-
-edit_decrease_means_test <- c()
-edit_decrease_means_control <- c()
-
-for (i in edit_count_before_filter)
+line.plot.results <- function(edit_count_min_lower = 1, edit_count_min_upper =
10, import_metrics = FALSE, save_plot = TRUE, registered = TRUE, error_bars =
FALSE)
{
- process.data.frames(i,0,Inf,Inf)
+ # c(78,81) c(1,4) c(60,62,64,66,68,70,72,74,76) c(60,62,66,76)
c(107,109,111,113,115) c(84,99,101,103,105)
+ template_indices_control <- c(84, 0)
- edit_decrease_means_test <- c(edit_decrease_means_test,
mean(merged_test$edits_decrease))
- edit_decrease_means_control <- c(edit_decrease_means_control,
mean(merged_control$edits_decrease))
+ # c(79,82) c(2,3) c(61,63,65,67,69,71,73,75,77) c(61,63,67,77)
c(108,110,114,116) c(85,86,100,102,104,106)
+ template_indices_test <- c(85, 0)
+
+ # paste(home_dir,"output/metrics_1109_1209_z",sep="")
paste(home_dir,"output/metrics_pt_z",sep="")
paste(home_dir,"output/metrics_1018_1119_z",sep="")
paste(home_dir,"output/metrics_1122_1222_z",sep="")
+ fname_first_part <- paste(home_dir,"output/metrics_1108_1202_z",sep="")
+
+ if (import_metrics)
+ import.experimental.metrics.data(template_indices_test,
template_indices_control, fname_first_part)
+
+
+
+ # PROCESS DATA
+
+ edit_count_before_filter <- edit_count_min_lower : edit_count_min_upper
+
+ data_counts_test <<- c()
+ data_counts_control <<- c()
+
+ edit_decrease_means_test <<- c()
+ edit_decrease_means_control <<- c()
+
+ edit_decrease_sd_test <<- c()
+ edit_decrease_sd_control <<- c()
+
+
+ if (registered)
+ reg_str = 'registered'
+ else
+ reg_str = 'non_registered'
+
+ for (i in edit_count_before_filter)
+ {
+
process.data.frames(i,0,Inf,Inf,registered=registered,min_revisions_after=0)
+
+ edit_decrease_means_test <<- c(edit_decrease_means_test,
mean(merged_test$edits_decrease) * 100)
+ edit_decrease_means_control <<- c(edit_decrease_means_control,
mean(merged_control$edits_decrease) * 100)
+
+ edit_decrease_sd_test <<- c(edit_decrease_sd_test,
sd(merged_test$edits_decrease * 100))
+ edit_decrease_sd_control <<- c(edit_decrease_sd_control,
sd(merged_control$edits_decrease * 100))
+
+ data_counts_test <<- c(data_counts_test,
length(merged_test$edits_decrease))
+ data_counts_control <<- c(data_counts_control,
length(merged_control$edits_decrease))
+ }
+
+ # PLOT DATA
+
+ plot_title = paste("Huggle Short 1 & 2 Experiment (", reg_str, ") -
Decrease in Editor Activity", sep="")
+
+ df <- data.frame(x=1:length(edit_decrease_means_test),
y_test=edit_decrease_means_test, y_ctrl=edit_decrease_means_control,
y_test_sd=edit_decrease_sd_test, y_ctrl_sd=edit_decrease_sd_control)
+ p <- ggplot(df,aes(x)) + geom_line(aes(y=y_test,colour="Test")) +
geom_line(aes(y=y_ctrl,colour="Control"))
+
+ if (error_bars)
+ p <- p + geom_errorbar(aes(ymin = y_test - y_test_sd, ymax =
y_test + y_test_sd, colour="Test"), width=0.2) + geom_errorbar(aes(ymin =
y_ctrl - y_ctrl_sd, ymax = y_ctrl + y_ctrl_sd, colour="Control"), width=0.2)
+
+ p <- p + scale_x_continuous('Minimum Edits before Template Posting') +
scale_y_continuous('Mean % Decrease in Edit Activity') + opts(title =
plot_title, legend.title = theme_blank())
+
+ if (save_plot)
+
ggsave(paste('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/huggle_short_1_2_',reg_str,'.png',sep=""),width=8)
}
-# PLOT DATA
-
-# plot.control.vs.test("Huggle Short 2 Experiment - Decrease in Editor
Activity", "Minimum Edits before Template Posting", "Mean % Decrease in Edit
Activity", edit_decrease_means_test, edit_decrease_means_control)
-
-# ggplot
-
-plot_title = "Huggle 3 - % decrease of Edit Actitivity after Posting"
-
-df <- data.frame(x=1:length(edit_decrease_means_test),
y_test=edit_decrease_means_test, y_ctrl=edit_decrease_means_control)
-p = ggplot(df,aes(x)) + geom_line(aes(y=y_test,colour="Test")) +
geom_line(aes(y=y_ctrl,colour="Control"))
-p + scale_x_continuous('Minimum Edits before Template Posting') +
scale_y_continuous('Mean % Decrease in Edit Activity') + opts(title =
plot_title, legend.title = theme_blank())
-ggsave('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/huggle_3.png',width=8)
-
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs