https://www.mediawiki.org/wiki/Special:Code/MediaWiki/115057

Revision: 115057
Author:   rfaulk
Date:     2012-04-26 00:18:32 +0000 (Thu, 26 Apr 2012)
Log Message:
-----------
modified metric names
used aliases for selected columns from data frames

Modified Paths:
--------------
    trunk/tools/wsor/message_templates/R/template_analysis.R
    trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R

Modified: trunk/tools/wsor/message_templates/R/template_analysis.R
===================================================================
--- trunk/tools/wsor/message_templates/R/template_analysis.R    2012-04-25 
22:47:23 UTC (rev 115056)
+++ trunk/tools/wsor/message_templates/R/template_analysis.R    2012-04-26 
00:18:32 UTC (rev 115057)
@@ -7,13 +7,36 @@
 
 # Import helper methods - GLOBAL
 
-home_dir <<- "/home/rfaulkner/trunk/projects/WSOR/message_templates/"
+home_dir <<- "/Users/rfaulkner/projects/wsor/message_templates/"
 # home_dir <- "/home/rfaulk/trunk/projects/WSOR/message_templates/"
 
 helper_import <- paste(home_dir,"R/R_helper_functions.R",sep="")
 source(helper_import)
 
 
+# Column names
+
+# revisions_before <<- "revisions_before" 
+# revisions_after_0_3 <<- "revisions_after_0_3" 
+# revisions_after_3_30 <<- "revisions_after_3_30" 
+# revisions_after_gt_30 <<- "revisions_after_gt_30"
+
+revisions_before <<- "ns_6_revisions_before"
+revisions_after_0_3 <<- "ns_6_revisions_after_0_3" 
+revisions_after_3_30 <<- "ns_6_revisions_after_3_30" 
+# revisions_after_gt_30 <<- "ns_0_revisions_after_gt_30"
+
+# revisions_deleted_before <<- "revisions_deleted_before" 
+#revisions_deleted_after_0_3 <<- "revisions_deleted_after_0_3" 
+#revisions_deleted_after_3_30 <<- "revisions_deleted_after_3_30"
+#revisions_deleted_after_gt_30 <<- "revisions_deleted_after_gt_30" 
+
+revisions_deleted_before <<- "ns_0_revisions_deleted_before"
+revisions_deleted_after_0_3 <<- "ns_0_revisions_deleted_after_0_3" 
+revisions_deleted_after_3_30 <<- "ns_0_revisions_deleted_after_3_30"
+revisions_deleted_after_gt_30 <<- "ns_0_revisions_deleted_after_gt_30" 
+
+
 # FUNCTION :: import.experimental.metrics.data
 #
 # Import the template data and build data frames from it
@@ -24,6 +47,7 @@
        # Read aggregated results for the template
                
        fname_last_part_edits <- "_editcounts.tsv"
+       # fname_last_part_edits <- "_editcountsnonamespace.tsv"
        fname_last_part_blocks <- "_blocks.tsv"
        fname_last_part_warn <- "_warnings.tsv"
 
@@ -48,7 +72,7 @@
 #
 
 process.data.frames <- function(min_edits_before=0, 
min_deleted_edits_before=0, max_edits_before=Inf, max_deleted_edits_before=Inf, 
min_edits_after = 0, registered=TRUE) {
-       
+
        # MERGE THE METRICS AND ADD TEMPLATE COLS
 
        # print("Merge Data..")
@@ -58,63 +82,73 @@
        
        merged_test <<- merge(merged_test, warn_test, 
by=intersect(names(merged_test),names(warn_test)), all=TRUE)
        merged_control <<- merge(merged_control, warn_control, 
by=intersect(names(merged_control),names(warn_control)), all=TRUE)
-       
+
        merged_test$template <<- 1
        merged_control$template <<- 0
-       
-       
+
        # FILTER DATA
 
        # print("Filter Data..")
        
-       maximum_warns_before <- 0
+       maximum_warns_before <- Inf
        
        if (!registered)
                IP_regex <- 
"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
        else
                IP_regex <- '.*[a-zA-z].*'
-       
+
        condition_1 <- TRUE # merged_test$blocks_before > 0
        condition_2 <- merged_test$blocks_after == 0
-       condition_3 <- merged_test$ns_0_revisions_before >= min_edits_before & 
merged_test$ns_0_revisions_before <= max_edits_before 
-       condition_4 <- merged_test$ns_0_revisions_deleted_before >= 
min_deleted_edits_before & merged_test$ns_0_revisions_deleted_before <= 
max_deleted_edits_before
+       condition_3 <- merged_test[[revisions_before]] >= min_edits_before & 
merged_test[[revisions_before]] <= max_edits_before 
+       condition_4 <- TRUE # merged_test[[revisions_deleted_before]] >= 
min_deleted_edits_before & merged_test[[revisions_deleted_before]] <= 
max_deleted_edits_before
        condition_5 <- merged_test$warns_before <= maximum_warns_before
        condition_6 <- filter.list.by.regex(IP_regex, 
merged_test$recipient_name)
-       condition_7 <- merged_test$ns_0_revisions_after_0_3 >= min_edits_after
-       
+       condition_7 <- merged_test[[revisions_after_0_3]] >= min_edits_after
+
        indices <- condition_1 & condition_2 & condition_3 & condition_4 & 
condition_5 & condition_6 & condition_7
+
        merged_test <<- merged_test[indices,]
-       
+       merged_test <<- merged_test[!is.na(merged_test$recipient_name),]        
+
        condition_1 <- TRUE # merged_control$blocks_before > 0
        condition_2 <- merged_control$blocks_after == 0
-       condition_3 <- merged_control$ns_0_revisions_before >= min_edits_before 
& merged_control$ns_0_revisions_before <= max_edits_before
-       condition_4 <- merged_control$ns_0_revisions_deleted_before >= 
min_deleted_edits_before & merged_control$ns_0_revisions_deleted_before <= 
max_deleted_edits_before
+       condition_3 <- merged_control[[revisions_before]] >= min_edits_before & 
merged_control[[revisions_before]] <= max_edits_before
+       condition_4 <- TRUE # merged_control[[revisions_deleted_before]] >= 
min_deleted_edits_before & merged_control[[revisions_deleted_before]] <= 
max_deleted_edits_before
        condition_5 <- merged_control$warns_before <= maximum_warns_before
        condition_6 <- filter.list.by.regex(IP_regex, 
merged_control$recipient_name)
-       condition_7 <- merged_control$ns_0_revisions_after_0_3 >= 
min_edits_after
-       
-       indices <- condition_1 & condition_2 & condition_3 & condition_4 & 
condition_5 & condition_6 & condition_7 
+       condition_7 <- merged_control[[revisions_after_0_3]] >= min_edits_after
+
+       indices <- condition_1 & condition_2 & condition_3 & condition_4 & 
condition_5 & condition_6 & condition_7
        merged_control <<- merged_control[indices,]
-               
-       
+       merged_control <<- 
merged_control[!is.na(merged_control$recipient_name),]
+
        # ADD DERIVED COLS 
        
        # print("Add derived columns..")
        
-       merged_test$edits_decrease <<- (merged_test$ns_0_revisions_before - 
(merged_test$ns_0_revisions_after_0_3)) / (merged_test$ns_0_revisions_before)
-       merged_control$edits_decrease <<- (merged_control$ns_0_revisions_before 
- merged_control$ns_0_revisions_after_0_3) / 
(merged_control$ns_0_revisions_before)
+       merged_test$edit_event <<- 
convert.list.to.binomial.event(merged_test[[revisions_after_0_3]] + 
merged_test[[revisions_after_3_30]]) # + merged_test[[revisions_after_gt_30]])
+       merged_control$edit_event <<- 
convert.list.to.binomial.event(merged_control[[revisions_after_0_3]] + 
merged_control[[revisions_after_3_30]]) # + 
merged_control[[revisions_after_gt_30]])
        
-       merged_test$edit_counts_0_3 <<- merged_test$ns_0_revisions_after_0_3
-       merged_control$edit_counts_0_3 <<- 
merged_control$ns_0_revisions_after_0_3
+       merged_test$edit_counts_all <<- merged_test[[revisions_after_0_3]] + 
merged_test[[revisions_after_3_30]] # + merged_test[[revisions_after_gt_30]]
+       merged_control$edit_counts_all <<- 
merged_control[[revisions_after_0_3]] + merged_control[[revisions_after_3_30]] 
# + merged_control[[revisions_after_gt_30]]
        
-       merged_test$edits_del_decrease <<- 
(merged_test$ns_0_revisions_deleted_before - 
(merged_test$ns_0_revisions_deleted_after_0_3)) / 
(merged_test$ns_0_revisions_deleted_before)
-       merged_control$edits_del_decrease <<- 
(merged_control$ns_0_revisions_deleted_before - 
(merged_control$ns_0_revisions_deleted_after_0_3)) / 
(merged_control$ns_0_revisions_deleted_before)
+       merged_test$edits_norm <<- merged_test[[revisions_after_0_3]] / 
merged_test[[revisions_before]]
+       merged_control$edits_norm <<- merged_control[[revisions_after_0_3]] / 
merged_control[[revisions_before]]
        
-       merged_test$edit_del_counts_0_3 <<- 
merged_test$ns_0_revisions_deleted_after_0_3
-       merged_control$edit_del_counts_0_3 <<- 
merged_control$ns_0_revisions_deleted_after_0_3
+       merged_test$edits_decrease <<- (merged_test[[revisions_before]] - 
(merged_test[[revisions_after_0_3]])) / (merged_test[[revisions_before]])
+       merged_control$edits_decrease <<- (merged_control[[revisions_before]] - 
merged_control[[revisions_after_0_3]]) / (merged_control[[revisions_before]])
+
+       merged_test$edit_counts_0_3 <<- merged_test[[revisions_after_0_3]]
+       merged_control$edit_counts_0_3 <<- merged_control[[revisions_after_0_3]]
        
-       merged_test$edit_del_counts <<- 
ceiling(merged_test$ns_0_revisions_deleted_after_0_3 / 
max(merged_test$ns_0_revisions_deleted_after_0_3))
-       merged_control$edit_del_counts <<- 
ceiling(merged_control$ns_0_revisions_deleted_after_0_3 / 
max(merged_control$ns_0_revisions_deleted_after_0_3))
+       # merged_test$edits_del_decrease <<- 
(merged_test[[revisions_deleted_before]] - 
(merged_test[[revisions_deleted_after_0_3]])) / 
(merged_test[[revisions_deleted_before]])
+       # merged_control$edits_del_decrease <<- 
(merged_control[[revisions_deleted_before]] - 
(merged_control[[revisions_deleted_after_0_3]])) / 
(merged_control[[revisions_deleted_before]])
+       
+       # merged_test$edit_del_counts_0_3 <<- 
merged_test[[revisions_deleted_after_0_3]]
+       # merged_control$edit_del_counts_0_3 <<- 
merged_control[[revisions_deleted_after_0_3]]
+       
+       # merged_test$edit_del_counts <<- 
ceiling(merged_test[[revisions_deleted_after_0_3]] / 
max(merged_test[[revisions_deleted_after_0_3]]))
+       # merged_control$edit_del_counts <<- 
ceiling(merged_control[[revisions_deleted_after_0_3]] / 
max(merged_control[[revisions_deleted_after_0_3]]))
 }
 
 # FUNCTION :: execute.chi.square.test
@@ -153,7 +187,7 @@
        chisq_res_control <<- chisq.test(counts_control$counts, 
p=probs_test$counts)
 }
 
-
+       
 # FUNCTION :: execute.main
 #
 # A pseudo main method to allow the script to be executed as a batch 
@@ -166,12 +200,11 @@
        
        # c(60,62,66,76) # c(107,109,111,113,115) # TWINKLE c(78,81) # c(84, 0) 
#  c(1,4) # c(84,99,101,103,105) # c(60,62,64,66,68,70,72,74,76) # CORENSEARCH 
c(118, 120, 122, 124, 126, 128) # IMAGETAG c(132, 133, 135, 136, 138, 139, 141, 
142)
        # c(61,63,67,77) # c(108,110,114,116) # TWINKLE c(79,82) # c(86, 0) # 
c(2,3) # c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77) # CORENSEARCH 
c(117, 119, 121, 123, 125, 127) # IMAGETAG c(131, 134, 137, 140)
-       # paste(home_dir,"output/metrics_1018_1119_z",sep="") # 
paste(home_dir,"output/metrics_1122_1222_z",sep="") # 
paste(home_dir,"output/metrics_1109_1209_z",sep="") 
-       # paste(home_dir,"output/metrics_1108_1202_z",sep="") # 
paste(home_dir,"output/metrics_pt_z",sep="") #  
paste(home_dir,"output/metrics_1018_1119_z",sep="") #  
paste(home_dir,"output/metrics_z",sep="") 
+       # "output/metrics_1018_1119_z" # "output/metrics_1122_1222_z" # 
"output/metrics_1109_1209_z" # "output/metrics_1108_1202_z" # 
"output/metrics_pt_z" # "output/metrics_1018_1119_z" # "output/metrics_z"
        
-       template_indices_control <- c(81,0)
-       template_indices_test <- c(82,0)
-       fname_first_part <- "output/metrics_1109_1209_z"
+       template_indices_control <- c(131,0)
+       template_indices_test <- c(133,0)
+       fname_first_part <- "output/metrics_z"
        
        if (import_metrics)
                import.experimental.metrics.data(template_indices_test, 
template_indices_control, fname_first_part)
@@ -194,7 +227,9 @@
        # LOGISTIC REGRESSION MODELLING:
        
        all_data <<- append.data.frames(merged_test, merged_control)
-       
+
+       # summary(glm(template ~ edit_event, data=all_data, 
family=binomial(link="logit")))     
+       # summary(glm(template ~ edits_counts_all, data=all_data, 
family=binomial(link="logit")))
        # summary(glm(template ~ edits_decrease, data=all_data, 
family=binomial(link="logit")))
        # summary(glm(template ~ edit_counts_0_3, data=all_data, 
family=binomial(link="logit")))
        # summary(glm(template ~ edits_del_decrease, data=all_data, 
family=binomial(link="logit")))

Modified: trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R
===================================================================
--- trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R     
2012-04-25 22:47:23 UTC (rev 115056)
+++ trunk/tools/wsor/message_templates/R/visualize_edits_decrease.R     
2012-04-26 00:18:32 UTC (rev 115057)
@@ -109,16 +109,16 @@
        
        for (i in edit_count_before_filter)
        {
-               process.data.frames(min_deleted_edits_before = i, 
max_deleted_edits_before = Inf, registered=registered, 
min_edits_after=rev_count_after_min)
+               process.data.frames(min_edits_before = i, max_edits_before = 
Inf, registered=registered, min_edits_after=rev_count_after_min)
 
-               means_test <<- c(means_test, 
mean(merged_test$edit_del_counts_0_3))
-               means_control <<- c(means_control, 
mean(merged_control$edit_del_counts_0_3))
+               means_test <<- c(means_test, mean(merged_test$edits_decrease))
+               means_control <<- c(means_control, 
mean(merged_control$edits_decrease))
                
-               sd_test <<- c(sd_test, sd(merged_test$edit_del_counts_0_3))
-               sd_control <<- c(sd_control, 
sd(merged_control$edit_del_counts_0_3))
+               sd_test <<- c(sd_test, sd(merged_test$edits_decrease))
+               sd_control <<- c(sd_control, sd(merged_control$edits_decrease))
                
-               data_counts_test <<- c(data_counts_test, 
length(merged_test$edit_del_counts_0_3))       
-               data_counts_control <<- c(data_counts_control, 
length(merged_control$edit_del_counts_0_3))
+               data_counts_test <<- c(data_counts_test, 
length(merged_test$edits_decrease))    
+               data_counts_control <<- c(data_counts_control, 
length(merged_control$edits_decrease))
        }
        
        


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to