https://www.mediawiki.org/wiki/Special:Code/MediaWiki/114040
Revision: 114040
Author: rfaulk
Date: 2012-03-16 22:19:06 +0000 (Fri, 16 Mar 2012)
Log Message:
-----------
updated scripts to allow for more flexibility in handling parameters
Modified Paths:
--------------
trunk/tools/wsor/message_templates/R/template_analysis.R
trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
Modified: trunk/tools/wsor/message_templates/R/template_analysis.R
===================================================================
--- trunk/tools/wsor/message_templates/R/template_analysis.R 2012-03-16
22:16:36 UTC (rev 114039)
+++ trunk/tools/wsor/message_templates/R/template_analysis.R 2012-03-16
22:19:06 UTC (rev 114040)
@@ -7,7 +7,7 @@
# Import helper methods - GLOBAL
-home_dir <- "/home/rfaulkner/trunk/projects/WSOR/message_templates/"
+home_dir <<- "/home/rfaulkner/trunk/projects/WSOR/message_templates/"
# home_dir <- "/home/rfaulk/trunk/projects/WSOR/message_templates/"
helper_import <- paste(home_dir,"R/R_helper_functions.R",sep="")
@@ -26,15 +26,15 @@
fname_last_part_edits <- "_editcounts.tsv"
fname_last_part_blocks <- "_blocks.tsv"
fname_last_part_warn <- "_warnings.tsv"
-
- warn_test <<- build.data.frames(template_indices_test,
fname_first_part, fname_last_part_warn, string_frames=c(1))
- warn_control <<- build.data.frames(template_indices_control,
fname_first_part, fname_last_part_warn, string_frames=c(1))
+
+ warn_test <<- build.data.frames(template_indices_test,
fname_first_part, fname_last_part_warn, home_dir, string_frames=c(1))
+ warn_control <<- build.data.frames(template_indices_control,
fname_first_part, fname_last_part_warn, home_dir, string_frames=c(1))
- blocks_test <<- build.data.frames(template_indices_test,
fname_first_part, fname_last_part_blocks, string_frames=c(1))
- blocks_control <<- build.data.frames(template_indices_control,
fname_first_part, fname_last_part_blocks, string_frames=c(1))
+ blocks_test <<- build.data.frames(template_indices_test,
fname_first_part, fname_last_part_blocks, home_dir, string_frames=c(1))
+ blocks_control <<- build.data.frames(template_indices_control,
fname_first_part, fname_last_part_blocks, home_dir, string_frames=c(1))
- edits_test <<- build.data.frames(template_indices_test,
fname_first_part, fname_last_part_edits, string_frames=c(1))
- edits_control <<- build.data.frames(template_indices_control,
fname_first_part, fname_last_part_edits, string_frames=c(1))
+ edits_test <<- build.data.frames(template_indices_test,
fname_first_part, fname_last_part_edits, home_dir, string_frames=c(1))
+ edits_control <<- build.data.frames(template_indices_control,
fname_first_part, fname_last_part_edits, home_dir, string_frames=c(1))
}
@@ -47,7 +47,7 @@
# GLOBALS assumed to exist: warn_test, warn_control, blocks_test,
blocks_control, edits_test, edits_control
#
-process.data.frames <- function(min_edits_before=0,
min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf,
min_revisions_after = 0, registered=TRUE) {
+process.data.frames <- function(min_edits_before=0,
min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf,
min_edits_after = 0, registered=TRUE) {
# MERGE THE METRICS AND ADD TEMPLATE COLS
@@ -80,7 +80,7 @@
condition_4 <- merged_test$ns_0_revisions_deleted_before >=
min_deleted_edits_before & merged_test$ns_0_revisions_deleted_before <=
max_deleted_edits_before
condition_5 <- merged_test$warns_before <= maximum_warns_before
condition_6 <- filter.list.by.regex(IP_regex,
merged_test$recipient_name)
- condition_7 <- merged_test$ns_0_revisions_after_0_3 >=
min_revisions_after
+ condition_7 <- merged_test$ns_0_revisions_after_0_3 >= min_edits_after
indices <- condition_1 & condition_2 & condition_3 & condition_4 &
condition_5 & condition_6 & condition_7
merged_test <<- merged_test[indices,]
@@ -91,7 +91,7 @@
condition_4 <- merged_control$ns_0_revisions_deleted_before >=
min_deleted_edits_before & merged_control$ns_0_revisions_deleted_before <=
max_deleted_edits_before
condition_5 <- merged_control$warns_before <= maximum_warns_before
condition_6 <- filter.list.by.regex(IP_regex,
merged_control$recipient_name)
- condition_7 <- merged_control$ns_0_revisions_after_0_3 >=
min_revisions_after
+ condition_7 <- merged_control$ns_0_revisions_after_0_3 >=
min_edits_after
indices <- condition_1 & condition_2 & condition_3 & condition_4 &
condition_5 & condition_6 & condition_7
merged_control <<- merged_control[indices,]
@@ -101,12 +101,20 @@
# print("Add derived columns..")
- merged_test$edits_decrease <<- (merged_test$ns_0_revisions_before -
merged_test$ns_0_revisions_after_0_3) / (merged_test$ns_0_revisions_before)
+ merged_test$edits_decrease <<- (merged_test$ns_0_revisions_before -
(merged_test$ns_0_revisions_after_0_3)) / (merged_test$ns_0_revisions_before)
merged_control$edits_decrease <<- (merged_control$ns_0_revisions_before
- merged_control$ns_0_revisions_after_0_3) /
(merged_control$ns_0_revisions_before)
- # merged_test$edits_del_decrease <<-
(merged_test$ns_0_revisions_deleted_before -
(merged_test$ns_0_revisions_deleted_after_0_3)) /
(merged_test$ns_0_revisions_deleted_before)
- # merged_control$edits_del_decrease <<-
(merged_control$ns_0_revisions_deleted_before -
(merged_control$ns_0_revisions_deleted_after_0_3)) /
(merged_control$ns_0_revisions_deleted_before)
+ merged_test$edit_counts_0_3 <<- merged_test$ns_0_revisions_after_0_3
+ merged_control$edit_counts_0_3 <<-
merged_control$ns_0_revisions_after_0_3
+ merged_test$edits_del_decrease <<-
(merged_test$ns_0_revisions_deleted_before -
(merged_test$ns_0_revisions_deleted_after_0_3)) /
(merged_test$ns_0_revisions_deleted_before)
+ merged_control$edits_del_decrease <<-
(merged_control$ns_0_revisions_deleted_before -
(merged_control$ns_0_revisions_deleted_after_0_3)) /
(merged_control$ns_0_revisions_deleted_before)
+
+ merged_test$edit_del_counts_0_3 <<-
merged_test$ns_0_revisions_deleted_after_0_3
+ merged_control$edit_del_counts_0_3 <<-
merged_control$ns_0_revisions_deleted_after_0_3
+
+ merged_test$edit_del_counts <<-
ceiling(merged_test$ns_0_revisions_deleted_after_0_3 /
max(merged_test$ns_0_revisions_deleted_after_0_3))
+ merged_control$edit_del_counts <<-
ceiling(merged_control$ns_0_revisions_deleted_after_0_3 /
max(merged_control$ns_0_revisions_deleted_after_0_3))
}
# FUNCTION :: execute.chi.square.test
@@ -151,27 +159,32 @@
# A pseudo main method to allow the script to be executed as a batch
#
-execute.main <- function() {
+execute.main <- function(min_edits_before = 0, max_edits_before = Inf,
min_edits_after = 0, min_deleted_edits_before = 0, max_deleted_edits_before =
Inf,
+load_metrics = FALSE, load_file = "", import_metrics = FALSE, registered =
FALSE) {
# IMPORT DATA
- template_indices_control <- c(60,62,66,76) # c(107,109,111,113,115) #
c(78,81) # c(84, 0) # c(1,4) # c(84,99,101,103,105) #
c(60,62,64,66,68,70,72,74,76)
- template_indices_test <- c(61,63,67,77) # c(108,110,114,116) # c(79,82)
# c(86, 0) # c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77)
- fname_first_part <- paste(home_dir,"output/metrics_1018_1119_z",sep="")
# paste(home_dir,"output/metrics_1122_1222_z",sep="") #
paste(home_dir,"output/metrics_1109_1209_z",sep="") #
paste(home_dir,"output/metrics_1108_1202_z",sep="") #
paste(home_dir,"output/metrics_pt_z",sep="") #
paste(home_dir,"output/metrics_1018_1119_z",sep="")
+ # c(60,62,66,76) # c(107,109,111,113,115) # TWINKLE c(78,81) # c(84, 0)
# c(1,4) # c(84,99,101,103,105) # c(60,62,64,66,68,70,72,74,76) # CORENSEARCH
c(118, 120, 122, 124, 126, 128) # IMAGETAG c(132, 133, 135, 136, 138, 139, 141,
142)
+ # c(61,63,67,77) # c(108,110,114,116) # TWINKLE c(79,82) # c(86, 0) #
c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77) # CORENSEARCH
c(117, 119, 121, 123, 125, 127) # IMAGETAG c(131, 134, 137, 140)
+ # paste(home_dir,"output/metrics_1018_1119_z",sep="") #
paste(home_dir,"output/metrics_1122_1222_z",sep="") #
paste(home_dir,"output/metrics_1109_1209_z",sep="")
+ # paste(home_dir,"output/metrics_1108_1202_z",sep="") #
paste(home_dir,"output/metrics_pt_z",sep="") #
paste(home_dir,"output/metrics_1018_1119_z",sep="") #
paste(home_dir,"output/metrics_z",sep="")
- # import.experimental.metrics.data(template_indices_test,
template_indices_control, fname_first_part)
+ template_indices_control <- c(81,0)
+ template_indices_test <- c(82,0)
+ fname_first_part <- "output/metrics_1109_1209_z"
+ if (import_metrics)
+ import.experimental.metrics.data(template_indices_test,
template_indices_control, fname_first_part)
+ if (load_metrics)
+ load(load_file)
- # PROCESS DATA
- # print("")
- # print("Processing data frames.")
- registered = TRUE
- process.data.frames(3,0,Inf,Inf,registered)
+ # PROCESS DATA
+ process.data.frames(min_edits_before = min_edits_before,
max_edits_before = max_edits_before, min_edits_after = min_edits_after,
+ min_deleted_edits_before = min_deleted_edits_before,
max_deleted_edits_before = max_deleted_edits_before, registered = registered)
-
# HYPOTHESIS TESTING
# t_result <- t.test(x=merged_test$edits_decrease,
y=merged_control$edits_decrease, alternative = "two.sided", paired = FALSE,
var.equal = FALSE, conf.level = 0.95)
@@ -181,7 +194,10 @@
# LOGISTIC REGRESSION MODELLING:
all_data <<- append.data.frames(merged_test, merged_control)
+
# summary(glm(template ~ edits_decrease, data=all_data,
family=binomial(link="logit")))
+ # summary(glm(template ~ edit_counts_0_3, data=all_data,
family=binomial(link="logit")))
# summary(glm(template ~ edits_del_decrease, data=all_data,
family=binomial(link="logit")))
-
+ # summary(glm(template ~ edit_del_counts_0_3, data=all_data,
family=binomial(link="logit")))
+ # summary(glm(template ~ edit_del_counts, data=all_data,
family=binomial(link="logit")))
}
Modified: trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
===================================================================
--- trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
2012-03-16 22:16:36 UTC (rev 114039)
+++ trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
2012-03-16 22:19:06 UTC (rev 114040)
@@ -64,23 +64,28 @@
# save_plot - saves plot if TRUE
# registered - look at registered editors if TRUE (non-registered otherwise)
# error_bars - display error bars if TRUE
+# plot_samples - plots the sample sizes used for each data point
#
-line.plot.results <- function(edit_count_min_lower = 1, edit_count_min_upper =
10, import_metrics = FALSE, save_plot = TRUE, filename = 'ggplot_out_',
registered = TRUE, error_bars = FALSE)
+line.plot.results <- function(edit_count_min_lower = 1, edit_count_min_upper =
10, rev_count_after_min = 0, import_metrics = FALSE, plot_width = 10,
+save_plot = FALSE, filename = 'ggplot_out_', registered = FALSE, error_bars =
FALSE, plot_title = "Huggle Experiments", load_metrics = FALSE, load_file = "",
plot_samples = FALSE,
+x_scale = "Minimum Edits before Template Posting", y_scale = "Sample Size",
plot_title_metric = "Metric Description")
{
# IMPORT DATA
- # c(78,81) c(1,4) c(60,62,64,66,68,70,72,74,76) c(60,62,66,76)
c(107,109,111,113,115) c(84,99,101,103,105)
- # c(79,82) c(2,3) c(61,63,65,67,69,71,73,75,77) c(61,63,67,77)
c(108,110,114,116) c(85,86,100,102,104,106)
- # paste(home_dir,"output/metrics_1109_1209_z",sep="")
paste(home_dir,"output/metrics_pt_z",sep="")
paste(home_dir,"output/metrics_1018_1119_z",sep="")
paste(home_dir,"output/metrics_1122_1222_z",sep="")
+ # c(84, 0) c(78,81) c(1,4) c(60,62,64,66,68,70,72,74,76)
c(60,62,66,76) c(107,109,111,113,115) c(84,99,101,103,105)
+ # c(85, 0) c(79,82) c(2,3) c(61,63,65,67,69,71,73,75,77)
c(61,63,67,77) c(108,110,114,116) c(85,86,100,102,104,106)
+ # paste(home_dir,"output/metrics_1108_1202_z",sep="")
paste(home_dir,"output/metrics_1109_1209_z",sep="")
paste(home_dir,"output/metrics_pt_z",sep="")
paste(home_dir,"output/metrics_1018_1119_z",sep="")
paste(home_dir,"output/metrics_1122_1222_z",sep="")
- template_indices_control <- c(84, 0)
- template_indices_test <- c(85, 0)
+ template_indices_control <- c(84, 0)
+ template_indices_test <- c(85, 0)
fname_first_part <- paste(home_dir,"output/metrics_1108_1202_z",sep="")
if (import_metrics)
import.experimental.metrics.data(template_indices_test,
template_indices_control, fname_first_part)
+ if (load_metrics)
+ load(load_file)
# PROCESS DATA
@@ -90,11 +95,11 @@
data_counts_test <<- c()
data_counts_control <<- c()
- edit_decrease_means_test <<- c()
- edit_decrease_means_control <<- c()
+ means_test <<- c()
+ means_control <<- c()
- edit_decrease_sd_test <<- c()
- edit_decrease_sd_control <<- c()
+ sd_test <<- c()
+ sd_control <<- c()
if (registered)
@@ -104,31 +109,58 @@
for (i in edit_count_before_filter)
{
-
process.data.frames(i,0,Inf,Inf,registered=registered,min_revisions_after=0)
+ process.data.frames(min_deleted_edits_before = i,
max_deleted_edits_before = Inf, registered=registered,
min_edits_after=rev_count_after_min)
+
+ means_test <<- c(means_test,
mean(merged_test$edit_del_counts_0_3))
+ means_control <<- c(means_control,
mean(merged_control$edit_del_counts_0_3))
- edit_decrease_means_test <<- c(edit_decrease_means_test,
mean(merged_test$edits_decrease) * 100)
- edit_decrease_means_control <<- c(edit_decrease_means_control,
mean(merged_control$edits_decrease) * 100)
+ sd_test <<- c(sd_test, sd(merged_test$edit_del_counts_0_3))
+ sd_control <<- c(sd_control,
sd(merged_control$edit_del_counts_0_3))
- edit_decrease_sd_test <<- c(edit_decrease_sd_test,
sd(merged_test$edits_decrease * 100))
- edit_decrease_sd_control <<- c(edit_decrease_sd_control,
sd(merged_control$edits_decrease * 100))
-
- data_counts_test <<- c(data_counts_test,
length(merged_test$edits_decrease))
- data_counts_control <<- c(data_counts_control,
length(merged_control$edits_decrease))
+ data_counts_test <<- c(data_counts_test,
length(merged_test$edit_del_counts_0_3))
+ data_counts_control <<- c(data_counts_control,
length(merged_control$edit_del_counts_0_3))
}
- # PLOT DATA
- plot_title = paste("Huggle Short 1 & 2 Experiment (", reg_str, ") -
Decrease in Editor Activity", sep="")
+ # PLOT - Decrease in Editor Activity
- df <- data.frame(x=1:length(edit_decrease_means_test),
y_test=edit_decrease_means_test, y_ctrl=edit_decrease_means_control,
y_test_sd=edit_decrease_sd_test, y_ctrl_sd=edit_decrease_sd_control)
+ # plot_title_full = paste(plot_title, "(", reg_str, ") - Decrease in
Editor Activity", sep="")
+ plot_title_full = paste(plot_title, "(", reg_str, ") - ",
plot_title_metric, sep="")
+
+ df <- data.frame(x=1:length(means_test), y_test=means_test,
y_ctrl=means_control, y_test_sd=sd_test, y_ctrl_sd=sd_control)
p <- ggplot(df,aes(x)) + geom_line(aes(y=y_test,colour="Test")) +
geom_line(aes(y=y_ctrl,colour="Control"))
if (error_bars)
p <- p + geom_errorbar(aes(ymin = y_test - y_test_sd, ymax =
y_test + y_test_sd, colour="Test"), width=0.2) + geom_errorbar(aes(ymin =
y_ctrl - y_ctrl_sd, ymax = y_ctrl + y_ctrl_sd, colour="Control"), width=0.2)
- p <- p + scale_x_continuous('Minimum Edits before Template Posting') +
scale_y_continuous('Mean % Decrease in Edit Activity') + opts(title =
plot_title, legend.title = theme_blank())
+ # Add axes labels and titles
+ p <- p + scale_x_continuous(x_scale) + scale_y_continuous(y_scale) +
opts(title = plot_title_full, legend.title = theme_blank())
if (save_plot)
-
ggsave(paste('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/',filename,reg_str,'.png',sep=""),width=8)
+
ggsave(paste('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/',filename,reg_str,'.png',sep=""),
width=plot_width)
+
+
+
+ # PLOT - Sample Sizes
+
+ if (plot_samples)
+ {
+ plot_title_full = paste(plot_title, "(", reg_str, ") - Sample
Sizes", sep="")
+ bins <- 1:length(data_counts_test)
+
+ test_samples <- counts.to.samples(bins, data_counts_test)
+ control_samples <- counts.to.samples(bins, data_counts_control)
+
+ labels <- c(test_samples * 0, control_samples / control_samples)
+ labels[labels == 0] = "Test"
+ labels[labels == 1] = "Control"
+
+ df <- data.frame(x=c(test_samples, control_samples),
labels=labels)
+ p <- ggplot(df, aes(x, fill=labels)) + geom_bar(binwidth=0.4,
position="dodge")
+ p <- p + scale_x_continuous(x_scale) +
scale_y_continuous('Sample Size') + opts(title = plot_title_full, legend.title
= theme_blank())
+
+ if (save_plot)
+
ggsave(paste('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/',filename,"samples_",reg_str,'.png',sep=""),
width=plot_width)
+ }
}
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs