https://www.mediawiki.org/wiki/Special:Code/MediaWiki/114040

Revision: 114040
Author:   rfaulk
Date:     2012-03-16 22:19:06 +0000 (Fri, 16 Mar 2012)
Log Message:
-----------
updated scripts to allow for more flexibility in handling parameters 

Modified Paths:
--------------
    trunk/tools/wsor/message_templates/R/template_analysis.R
    trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R

Modified: trunk/tools/wsor/message_templates/R/template_analysis.R
===================================================================
--- trunk/tools/wsor/message_templates/R/template_analysis.R    2012-03-16 
22:16:36 UTC (rev 114039)
+++ trunk/tools/wsor/message_templates/R/template_analysis.R    2012-03-16 
22:19:06 UTC (rev 114040)
@@ -7,7 +7,7 @@
 
 # Import helper methods - GLOBAL
 
-home_dir <- "/home/rfaulkner/trunk/projects/WSOR/message_templates/"
+home_dir <<- "/home/rfaulkner/trunk/projects/WSOR/message_templates/"
 # home_dir <- "/home/rfaulk/trunk/projects/WSOR/message_templates/"
 
 helper_import <- paste(home_dir,"R/R_helper_functions.R",sep="")
@@ -26,15 +26,15 @@
        fname_last_part_edits <- "_editcounts.tsv"
        fname_last_part_blocks <- "_blocks.tsv"
        fname_last_part_warn <- "_warnings.tsv"
-               
-       warn_test <<- build.data.frames(template_indices_test, 
fname_first_part, fname_last_part_warn, string_frames=c(1))
-       warn_control <<- build.data.frames(template_indices_control, 
fname_first_part, fname_last_part_warn, string_frames=c(1))
+
+       warn_test <<- build.data.frames(template_indices_test, 
fname_first_part, fname_last_part_warn, home_dir, string_frames=c(1))
+       warn_control <<- build.data.frames(template_indices_control, 
fname_first_part, fname_last_part_warn, home_dir, string_frames=c(1))
        
-       blocks_test <<- build.data.frames(template_indices_test, 
fname_first_part, fname_last_part_blocks, string_frames=c(1))
-       blocks_control <<- build.data.frames(template_indices_control, 
fname_first_part, fname_last_part_blocks, string_frames=c(1))
+       blocks_test <<- build.data.frames(template_indices_test, 
fname_first_part, fname_last_part_blocks, home_dir, string_frames=c(1))
+       blocks_control <<- build.data.frames(template_indices_control, 
fname_first_part, fname_last_part_blocks, home_dir, string_frames=c(1))
        
-       edits_test <<- build.data.frames(template_indices_test, 
fname_first_part, fname_last_part_edits, string_frames=c(1))
-       edits_control <<- build.data.frames(template_indices_control, 
fname_first_part, fname_last_part_edits, string_frames=c(1))
+       edits_test <<- build.data.frames(template_indices_test, 
fname_first_part, fname_last_part_edits, home_dir, string_frames=c(1))
+       edits_control <<- build.data.frames(template_indices_control, 
fname_first_part, fname_last_part_edits, home_dir, string_frames=c(1))
        
 }
 
@@ -47,7 +47,7 @@
 # GLOBALS assumed to exist:  warn_test, warn_control, blocks_test, 
blocks_control, edits_test, edits_control
 #
 
-process.data.frames <- function(min_edits_before=0, 
min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf, 
min_revisions_after = 0, registered=TRUE) {
+process.data.frames <- function(min_edits_before=0, 
min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf, 
min_edits_after = 0, registered=TRUE) {
        
        # MERGE THE METRICS AND ADD TEMPLATE COLS
 
@@ -80,7 +80,7 @@
        condition_4 <- merged_test$ns_0_revisions_deleted_before >= 
min_deleted_edits_before & merged_test$ns_0_revisions_deleted_before <= 
max_deleted_edits_before
        condition_5 <- merged_test$warns_before <= maximum_warns_before
        condition_6 <- filter.list.by.regex(IP_regex, 
merged_test$recipient_name)
-       condition_7 <- merged_test$ns_0_revisions_after_0_3 >= 
min_revisions_after
+       condition_7 <- merged_test$ns_0_revisions_after_0_3 >= min_edits_after
        
        indices <- condition_1 & condition_2 & condition_3 & condition_4 & 
condition_5 & condition_6 & condition_7
        merged_test <<- merged_test[indices,]
@@ -91,7 +91,7 @@
        condition_4 <- merged_control$ns_0_revisions_deleted_before >= 
min_deleted_edits_before & merged_control$ns_0_revisions_deleted_before <= 
max_deleted_edits_before
        condition_5 <- merged_control$warns_before <= maximum_warns_before
        condition_6 <- filter.list.by.regex(IP_regex, 
merged_control$recipient_name)
-       condition_7 <- merged_control$ns_0_revisions_after_0_3 >= 
min_revisions_after
+       condition_7 <- merged_control$ns_0_revisions_after_0_3 >= 
min_edits_after
        
        indices <- condition_1 & condition_2 & condition_3 & condition_4 & 
condition_5 & condition_6 & condition_7 
        merged_control <<- merged_control[indices,]
@@ -101,12 +101,20 @@
        
        # print("Add derived columns..")
        
-       merged_test$edits_decrease <<- (merged_test$ns_0_revisions_before - 
merged_test$ns_0_revisions_after_0_3) / (merged_test$ns_0_revisions_before)
+       merged_test$edits_decrease <<- (merged_test$ns_0_revisions_before - 
(merged_test$ns_0_revisions_after_0_3)) / (merged_test$ns_0_revisions_before)
        merged_control$edits_decrease <<- (merged_control$ns_0_revisions_before 
- merged_control$ns_0_revisions_after_0_3) / 
(merged_control$ns_0_revisions_before)
        
-       # merged_test$edits_del_decrease <<- 
(merged_test$ns_0_revisions_deleted_before - 
(merged_test$ns_0_revisions_deleted_after_0_3)) / 
(merged_test$ns_0_revisions_deleted_before)
-       # merged_control$edits_del_decrease <<- 
(merged_control$ns_0_revisions_deleted_before - 
(merged_control$ns_0_revisions_deleted_after_0_3)) / 
(merged_control$ns_0_revisions_deleted_before)
+       merged_test$edit_counts_0_3 <<- merged_test$ns_0_revisions_after_0_3
+       merged_control$edit_counts_0_3 <<- 
merged_control$ns_0_revisions_after_0_3
        
+       merged_test$edits_del_decrease <<- 
(merged_test$ns_0_revisions_deleted_before - 
(merged_test$ns_0_revisions_deleted_after_0_3)) / 
(merged_test$ns_0_revisions_deleted_before)
+       merged_control$edits_del_decrease <<- 
(merged_control$ns_0_revisions_deleted_before - 
(merged_control$ns_0_revisions_deleted_after_0_3)) / 
(merged_control$ns_0_revisions_deleted_before)
+       
+       merged_test$edit_del_counts_0_3 <<- 
merged_test$ns_0_revisions_deleted_after_0_3
+       merged_control$edit_del_counts_0_3 <<- 
merged_control$ns_0_revisions_deleted_after_0_3
+       
+       merged_test$edit_del_counts <<- 
ceiling(merged_test$ns_0_revisions_deleted_after_0_3 / 
max(merged_test$ns_0_revisions_deleted_after_0_3))
+       merged_control$edit_del_counts <<- 
ceiling(merged_control$ns_0_revisions_deleted_after_0_3 / 
max(merged_control$ns_0_revisions_deleted_after_0_3))
 }
 
 # FUNCTION :: execute.chi.square.test
@@ -151,27 +159,32 @@
 # A pseudo main method to allow the script to be executed as a batch 
 #
 
-execute.main <- function() {
+execute.main <- function(min_edits_before = 0, max_edits_before = Inf, 
min_edits_after = 0, min_deleted_edits_before = 0, max_deleted_edits_before = 
Inf, 
+load_metrics = FALSE, load_file = "", import_metrics = FALSE, registered = 
FALSE) {
        
        # IMPORT DATA
        
-       template_indices_control <- c(60,62,66,76) # c(107,109,111,113,115) # 
c(78,81) # c(84, 0) #  c(1,4) # c(84,99,101,103,105) # 
c(60,62,64,66,68,70,72,74,76) 
-       template_indices_test <- c(61,63,67,77) # c(108,110,114,116) # c(79,82) 
# c(86, 0) # c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77) 
-       fname_first_part <- paste(home_dir,"output/metrics_1018_1119_z",sep="") 
# paste(home_dir,"output/metrics_1122_1222_z",sep="") # 
paste(home_dir,"output/metrics_1109_1209_z",sep="") # 
paste(home_dir,"output/metrics_1108_1202_z",sep="") # 
paste(home_dir,"output/metrics_pt_z",sep="") #  
paste(home_dir,"output/metrics_1018_1119_z",sep="") 
+       # c(60,62,66,76) # c(107,109,111,113,115) # TWINKLE c(78,81) # c(84, 0) 
#  c(1,4) # c(84,99,101,103,105) # c(60,62,64,66,68,70,72,74,76) # CORENSEARCH 
c(118, 120, 122, 124, 126, 128) # IMAGETAG c(132, 133, 135, 136, 138, 139, 141, 
142)
+       # c(61,63,67,77) # c(108,110,114,116) # TWINKLE c(79,82) # c(86, 0) # 
c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77) # CORENSEARCH 
c(117, 119, 121, 123, 125, 127) # IMAGETAG c(131, 134, 137, 140)
+       # paste(home_dir,"output/metrics_1018_1119_z",sep="") # 
paste(home_dir,"output/metrics_1122_1222_z",sep="") # 
paste(home_dir,"output/metrics_1109_1209_z",sep="") 
+       # paste(home_dir,"output/metrics_1108_1202_z",sep="") # 
paste(home_dir,"output/metrics_pt_z",sep="") #  
paste(home_dir,"output/metrics_1018_1119_z",sep="") #  
paste(home_dir,"output/metrics_z",sep="") 
        
-       # import.experimental.metrics.data(template_indices_test, 
template_indices_control, fname_first_part)
+       template_indices_control <- c(81,0)
+       template_indices_test <- c(82,0)
+       fname_first_part <- "output/metrics_1109_1209_z"
        
+       if (import_metrics)
+               import.experimental.metrics.data(template_indices_test, 
template_indices_control, fname_first_part)
        
+       if (load_metrics)
+               load(load_file)
        
-       # PROCESS DATA
        
-       # print("")
-       # print("Processing data frames.")
-       registered = TRUE
-       process.data.frames(3,0,Inf,Inf,registered)
+       # PROCESS DATA  
+       process.data.frames(min_edits_before = min_edits_before, 
max_edits_before = max_edits_before, min_edits_after = min_edits_after, 
+       min_deleted_edits_before = min_deleted_edits_before, 
max_deleted_edits_before = max_deleted_edits_before, registered = registered)   
   
        
        
-       
        # HYPOTHESIS TESTING
        
        # t_result <- t.test(x=merged_test$edits_decrease, 
y=merged_control$edits_decrease, alternative = "two.sided", paired = FALSE, 
var.equal = FALSE, conf.level = 0.95)
@@ -181,7 +194,10 @@
        # LOGISTIC REGRESSION MODELLING:
        
        all_data <<- append.data.frames(merged_test, merged_control)
+       
        # summary(glm(template ~ edits_decrease, data=all_data, 
family=binomial(link="logit")))
+       # summary(glm(template ~ edit_counts_0_3, data=all_data, 
family=binomial(link="logit")))
        # summary(glm(template ~ edits_del_decrease, data=all_data, 
family=binomial(link="logit")))
-
+       # summary(glm(template ~ edit_del_counts_0_3, data=all_data, 
family=binomial(link="logit")))
+       # summary(glm(template ~ edit_del_counts, data=all_data, 
family=binomial(link="logit")))
 }

Modified: trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
===================================================================
--- trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R     
2012-03-16 22:16:36 UTC (rev 114039)
+++ trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R     
2012-03-16 22:19:06 UTC (rev 114040)
@@ -64,23 +64,28 @@
 # save_plot - saves plot if TRUE
 # registered - look at registered editors if TRUE (non-registered otherwise)
 # error_bars - display error bars if TRUE
+# plot_samples - plots the sample sizes used for each data point
 #
 
-line.plot.results <- function(edit_count_min_lower = 1, edit_count_min_upper = 
10, import_metrics = FALSE, save_plot = TRUE, filename = 'ggplot_out_', 
registered = TRUE, error_bars = FALSE)
+line.plot.results <- function(edit_count_min_lower = 1, edit_count_min_upper = 
10, rev_count_after_min = 0, import_metrics = FALSE, plot_width = 10,
+save_plot = FALSE, filename = 'ggplot_out_', registered = FALSE, error_bars = 
FALSE, plot_title = "Huggle Experiments", load_metrics = FALSE, load_file = "", 
plot_samples = FALSE,
+x_scale = "Minimum Edits before Template Posting", y_scale = "Sample Size", 
plot_title_metric = "Metric Description")
 {
        # IMPORT DATA 
        
-       #  c(78,81) c(1,4)  c(60,62,64,66,68,70,72,74,76) c(60,62,66,76) 
c(107,109,111,113,115) c(84,99,101,103,105)
-       #  c(79,82) c(2,3)  c(61,63,65,67,69,71,73,75,77) c(61,63,67,77)  
c(108,110,114,116) c(85,86,100,102,104,106)
-       #   paste(home_dir,"output/metrics_1109_1209_z",sep="") 
paste(home_dir,"output/metrics_pt_z",sep="")  
paste(home_dir,"output/metrics_1018_1119_z",sep="") 
paste(home_dir,"output/metrics_1122_1222_z",sep="")
+       #  c(84, 0) c(78,81) c(1,4) c(60,62,64,66,68,70,72,74,76) 
c(60,62,66,76) c(107,109,111,113,115) c(84,99,101,103,105)
+       #  c(85, 0) c(79,82) c(2,3) c(61,63,65,67,69,71,73,75,77) 
c(61,63,67,77)  c(108,110,114,116) c(85,86,100,102,104,106)
+       #  paste(home_dir,"output/metrics_1108_1202_z",sep="") 
paste(home_dir,"output/metrics_1109_1209_z",sep="") 
paste(home_dir,"output/metrics_pt_z",sep="")  
paste(home_dir,"output/metrics_1018_1119_z",sep="") 
paste(home_dir,"output/metrics_1122_1222_z",sep="")
 
-       template_indices_control <- c(84, 0)    
-       template_indices_test <- c(85, 0)       
+       template_indices_control <- c(84, 0)
+       template_indices_test <- c(85, 0)
        fname_first_part <- paste(home_dir,"output/metrics_1108_1202_z",sep="")
        
        if (import_metrics)
                import.experimental.metrics.data(template_indices_test, 
template_indices_control, fname_first_part)
        
+       if (load_metrics)
+               load(load_file)
        
        
        # PROCESS DATA
@@ -90,11 +95,11 @@
        data_counts_test <<- c()
        data_counts_control <<- c()
        
-       edit_decrease_means_test <<- c()
-       edit_decrease_means_control <<- c()
+       means_test <<- c()
+       means_control <<- c()
        
-       edit_decrease_sd_test <<- c()
-       edit_decrease_sd_control <<- c()        
+       sd_test <<- c()
+       sd_control <<- c()      
        
        
        if (registered)
@@ -104,31 +109,58 @@
        
        for (i in edit_count_before_filter)
        {
-               
process.data.frames(i,0,Inf,Inf,registered=registered,min_revisions_after=0)
+               process.data.frames(min_deleted_edits_before = i, 
max_deleted_edits_before = Inf, registered=registered, 
min_edits_after=rev_count_after_min)
+
+               means_test <<- c(means_test, 
mean(merged_test$edit_del_counts_0_3))
+               means_control <<- c(means_control, 
mean(merged_control$edit_del_counts_0_3))
                
-               edit_decrease_means_test <<- c(edit_decrease_means_test, 
mean(merged_test$edits_decrease) * 100)
-               edit_decrease_means_control <<- c(edit_decrease_means_control, 
mean(merged_control$edits_decrease) * 100)
+               sd_test <<- c(sd_test, sd(merged_test$edit_del_counts_0_3))
+               sd_control <<- c(sd_control, 
sd(merged_control$edit_del_counts_0_3))
                
-               edit_decrease_sd_test <<- c(edit_decrease_sd_test, 
sd(merged_test$edits_decrease * 100))
-               edit_decrease_sd_control <<- c(edit_decrease_sd_control, 
sd(merged_control$edits_decrease * 100))
-               
-               data_counts_test <<- c(data_counts_test, 
length(merged_test$edits_decrease))    
-               data_counts_control <<- c(data_counts_control, 
length(merged_control$edits_decrease))
+               data_counts_test <<- c(data_counts_test, 
length(merged_test$edit_del_counts_0_3))       
+               data_counts_control <<- c(data_counts_control, 
length(merged_control$edit_del_counts_0_3))
        }
        
-       # PLOT DATA             
        
-       plot_title = paste("Huggle Short 1 & 2 Experiment (", reg_str, ") - 
Decrease in Editor Activity", sep="")
+       # PLOT - Decrease in Editor Activity
        
-       df <- data.frame(x=1:length(edit_decrease_means_test), 
y_test=edit_decrease_means_test, y_ctrl=edit_decrease_means_control, 
y_test_sd=edit_decrease_sd_test, y_ctrl_sd=edit_decrease_sd_control)        
+       # plot_title_full = paste(plot_title, "(", reg_str, ") - Decrease in 
Editor Activity", sep="")
+       plot_title_full = paste(plot_title, "(", reg_str, ") - ", 
plot_title_metric, sep="")
+       
+       df <- data.frame(x=1:length(means_test), y_test=means_test, 
y_ctrl=means_control, y_test_sd=sd_test, y_ctrl_sd=sd_control)      
        p <- ggplot(df,aes(x)) + geom_line(aes(y=y_test,colour="Test")) + 
geom_line(aes(y=y_ctrl,colour="Control")) 
        
        if (error_bars)
                p <- p + geom_errorbar(aes(ymin = y_test - y_test_sd, ymax = 
y_test + y_test_sd, colour="Test"), width=0.2) + geom_errorbar(aes(ymin = 
y_ctrl - y_ctrl_sd, ymax = y_ctrl + y_ctrl_sd, colour="Control"), width=0.2)
        
-       p <- p + scale_x_continuous('Minimum Edits before Template Posting') + 
scale_y_continuous('Mean % Decrease in Edit Activity') + opts(title = 
plot_title, legend.title = theme_blank())
+       # Add axes labels and titles
+       p <- p + scale_x_continuous(x_scale) + scale_y_continuous(y_scale) + 
opts(title = plot_title_full, legend.title = theme_blank())
        
        if (save_plot)
-               
ggsave(paste('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/',filename,reg_str,'.png',sep=""),width=8)
+               
ggsave(paste('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/',filename,reg_str,'.png',sep=""),
 width=plot_width)
+               
+               
+       
+       # PLOT - Sample Sizes
+       
+       if (plot_samples)
+       {
+               plot_title_full = paste(plot_title, "(", reg_str, ") - Sample 
Sizes", sep="")
+               bins <- 1:length(data_counts_test)
+               
+               test_samples <- counts.to.samples(bins, data_counts_test)
+               control_samples <- counts.to.samples(bins, data_counts_control) 
        
+               
+               labels <- c(test_samples * 0, control_samples / control_samples)
+               labels[labels == 0] = "Test"
+               labels[labels == 1] = "Control"
+               
+               df <- data.frame(x=c(test_samples, control_samples), 
labels=labels) 
+               p <- ggplot(df, aes(x, fill=labels)) + geom_bar(binwidth=0.4, 
position="dodge")
+               p <- p + scale_x_continuous(x_scale) + 
scale_y_continuous('Sample Size') + opts(title = plot_title_full, legend.title 
= theme_blank())
+               
+               if (save_plot)
+                       
ggsave(paste('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/plots/',filename,"samples_",reg_str,'.png',sep=""),
 width=plot_width)
+       }
 }
 


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to