https://www.mediawiki.org/wiki/Special:Code/MediaWiki/112749

Revision: 112749
Author:   rfaulk
Date:     2012-03-01 00:44:45 +0000 (Thu, 01 Mar 2012)
Log Message:
-----------
remove files.  functionality has been consolidated into template_analysis.R

Removed Paths:
-------------
    trunk/tools/wsor/message_templates/R/huggle3_analysis.R
    trunk/tools/wsor/message_templates/R/huggle3_analysis_chi_sq.R

Deleted: trunk/tools/wsor/message_templates/R/huggle3_analysis.R
===================================================================
--- trunk/tools/wsor/message_templates/R/huggle3_analysis.R     2012-03-01 
00:43:33 UTC (rev 112748)
+++ trunk/tools/wsor/message_templates/R/huggle3_analysis.R     2012-03-01 
00:44:45 UTC (rev 112749)
@@ -1,154 +0,0 @@
-# 
source('/home/rfaulkner/trunk/projects/WSOR/message_templates/R/huggle3_analysis.R')
-#
-# Ryan Faulkner, January 23rd 2012
-#
-# Comparison of edit counts for Huggle 3 test among templates z64 
(http://en.wikipedia.org/wiki/Template:Uw-error1-default) / z65 
(http://en.wikipedia.org/wiki/Template:Uw-error1-short)
-#  
-
-# Import helper methods - GLOBAL
-
-home_dir <- "/home/rfaulkner/trunk/projects/WSOR/message_templates/"
-# home_dir <- "/home/rfaulk/trunk/projects/WSOR/message_templates/"
-
-helper_import <- paste(home_dir,"R/R_helper_functions.R",sep="")
-source(helper_import)
-
-
-# FUNCTION :: import.experimental.metrics.data
-#
-# Import the template data and build data frames from it
-#
-
-import.experimental.metrics.data <- function(template_indices_test, 
template_indices_control, fname_first_part) {
-               
-       # Read aggregated results for the template
-               
-       fname_last_part_edits <- "_editcounts.tsv"
-       fname_last_part_blocks <- "_blocks.tsv"
-       fname_last_part_warn <- "_warnings.tsv"
-               
-       warn_test <<- build.data.frames(template_indices_test, 
fname_first_part, fname_last_part_warn, string_frames=c(1))
-       warn_control <<- build.data.frames(template_indices_control, 
fname_first_part, fname_last_part_warn, string_frames=c(1))
-       
-       blocks_test <<- build.data.frames(template_indices_test, 
fname_first_part, fname_last_part_blocks, string_frames=c(1))
-       blocks_control <<- build.data.frames(template_indices_control, 
fname_first_part, fname_last_part_blocks, string_frames=c(1))
-       
-       edits_test <<- build.data.frames(template_indices_test, 
fname_first_part, fname_last_part_edits, string_frames=c(1))
-       edits_control <<- build.data.frames(template_indices_control, 
fname_first_part, fname_last_part_edits, string_frames=c(1))
-       
-}
-
-
-
-# FUNCTION :: process.data.frames
-#
-# Given a set of data frames containing template test metrics per user posting 
combine and generate summary metric frames
-#
-# GLOBALS assumed to exist:  warn_test, warn_control, blocks_test, 
blocks_control, edits_test, edits_control
-#
-
-process.data.frames <- function() {
-       
-       # MERGE THE METRICS AND ADD TEMPLATE COLS
-
-       print("Merge Data..")
-       
-       merged_test <<- merge(edits_test, blocks_test, 
by=intersect(names(edits_test),names(blocks_test)), all=TRUE)
-       merged_control <<- merge(edits_control, blocks_control, 
by=intersect(names(edits_control),names(blocks_control)), all=TRUE)
-       
-       merged_test <<- merge(merged_test, warn_test, 
by=intersect(names(merged_test),names(warn_test)), all=TRUE)
-       merged_control <<- merge(merged_control, warn_control, 
by=intersect(names(merged_control),names(warn_control)), all=TRUE)
-       
-       merged_test$template <<- 1
-       merged_control$template <<- 0
-       
-       
-       # FILTER DATA
-
-       print("Filter Data..")
-       min_edits_before <- 5
-       min_deleted_edits_before <- 0
-       
-       max_edits_before <- Inf
-       max_deleted_edits_before <- Inf
-       
-       maximum_warns_before <- 0
-       
-       IP_regex <- 
"^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$"
-       IP_regex_not <- '.*[a-zA-z].*'
-       
-       condition_1 <- TRUE # merged_test$blocks_before > 0
-       condition_2 <- merged_test$blocks_after == 0
-       condition_3 <- merged_test$ns_0_revisions_before >= min_edits_before & 
merged_test$ns_0_revisions_before <= max_edits_before 
-       condition_4 <- merged_test$ns_0_revisions_deleted_before >= 
min_deleted_edits_before & merged_test$ns_0_revisions_deleted_before <= 
max_deleted_edits_before
-       condition_5 <- merged_test$warns_before <= maximum_warns_before
-       condition_6 <- filter.list.by.regex(IP_regex_not, 
merged_test$recipient_name)
-       condition_7 <- merged_test$ns_0_revisions_after_0_3 > 0
-       
-       indices <- condition_1 & condition_2 & condition_3 & condition_4 & 
condition_5 & condition_6 & condition_7
-       merged_test <<- merged_test[indices,]
-       
-       condition_1 <- TRUE # merged_control$blocks_before > 0
-       condition_2 <- merged_control$blocks_after == 0
-       condition_3 <- merged_control$ns_0_revisions_before >= min_edits_before 
& merged_control$ns_0_revisions_before <= max_edits_before
-       condition_4 <- merged_control$ns_0_revisions_deleted_before >= 
min_deleted_edits_before & merged_control$ns_0_revisions_deleted_before <= 
max_deleted_edits_before
-       condition_5 <- merged_control$warns_before <= maximum_warns_before
-       condition_6 <- filter.list.by.regex(IP_regex_not, 
merged_control$recipient_name)
-       condition_7 <- merged_control$ns_0_revisions_after_0_3 > 0
-       
-       indices <- condition_1 & condition_2 & condition_3 & condition_4 & 
condition_5 & condition_6 & condition_7 
-       merged_control <<- merged_control[indices,]
-               
-       
-       # ADD DERIVED COLS 
-       
-       print("Add derived columns..")
-       
-       merged_test$edits_decrease <<- (merged_test$ns_0_revisions_before - 
merged_test$ns_0_revisions_after_0_3) / (merged_test$ns_0_revisions_before)
-       merged_control$edits_decrease <<- (merged_control$ns_0_revisions_before 
- merged_control$ns_0_revisions_after_0_3) / 
(merged_control$ns_0_revisions_before)
-       
-       merged_test$edits_del_decrease <<- 
(merged_test$ns_0_revisions_deleted_before - 
(merged_test$ns_0_revisions_deleted_after_0_3)) / 
(merged_test$ns_0_revisions_deleted_before + 1)
-       merged_control$edits_del_decrease <<- 
(merged_control$ns_0_revisions_deleted_before - 
(merged_control$ns_0_revisions_deleted_after_0_3)) / 
(merged_control$ns_0_revisions_deleted_before + 1)
-       
-}
-
-
-
-# IMPORT DATA
-
-template_indices_control <- c(84, 0) # c(107,109,111,113,115) # c(1,4) # 
c(84,99,101,103,105) # c(60,62,64,66,68,70,72,74,76) 
-template_indices_test <- c(86, 0) # c(108,110,114,116) # c(2,3) # 
c(85,86,100,102,104,106) # c(61,63,65,67,69,71,73,75,77) 
-fname_first_part <- paste(home_dir,"output/metrics_1108_1202_z",sep="") # 
paste(home_dir,"output/metrics_1122_1222_z",sep="") # 
paste(home_dir,"output/metrics_pt_z",sep="") #  
paste(home_dir,"output/metrics_1018_1119_z",sep="") # 
"/home/rfaulk/WSOR/message_templates/output/metrics_pt_z"
-
-# import.experimental.metrics.data(template_indices_test, 
template_indices_control, fname_first_part)
-
-
-
-# PROCESS DATA
-
-# print("")
-# print("Processing data frames.")
-process.data.frames()
-
-
-
-# HYPOTHESIS TESTING
-
-#test_edits <- 
get.decrease.in.edits.after.template(edits_test$ns_0_revisions_before, 
edits_test$ns_0_revisions_after_3_30,lower_bound_rev_before=200,lower_bound_rev_after=0)
-#control_edits <- 
get.decrease.in.edits.after.template(edits_control$ns_0_revisions_before, 
edits_control$ns_0_revisions_after_3_30,lower_bound_rev_before=200, 
lower_bound_rev_after=0)
-
-#test_blocks <- get.change.in.blocks(blocks_test$blocks_before, 
blocks_test$blocks_after)
-#control_blocks <- get.change.in.blocks(blocks_control$blocks_before, 
blocks_control$blocks_after)
-
-#t_result_edits = t.test(x=test_edits, y=control_edits, alternative = 
"two.sided", paired = FALSE, var.equal = FALSE, conf.level = 0.95)
-#t_result_blocks = t.test(x=test_blocks, y=control_blocks, alternative = 
"two.sided", paired = FALSE, var.equal = FALSE, conf.level = 0.95)
-
-
-
-# LOGISTIC REGRESSION MODELLING:
-
-all_data <- append.data.frames(merged_test, merged_control)
-summary(glm(template ~ edits_decrease, data=all_data, 
family=binomial(link="logit")))
-# summary(glm(template ~ edits_del_decrease, data=all_data, 
family=binomial(link="logit")))
-
-

Deleted: trunk/tools/wsor/message_templates/R/huggle3_analysis_chi_sq.R
===================================================================
--- trunk/tools/wsor/message_templates/R/huggle3_analysis_chi_sq.R      
2012-03-01 00:43:33 UTC (rev 112748)
+++ trunk/tools/wsor/message_templates/R/huggle3_analysis_chi_sq.R      
2012-03-01 00:44:45 UTC (rev 112749)
@@ -1,84 +0,0 @@
-
-# Ryan Faulkner, January 25th 2012
-#
-# Comparison of metrics for Huggle 3 using a chi-square goodness of fit test
-#  
-
-
-source('/home/rfaulk/WSOR/message_templates/R/R_helper_functions.R')
-
-# Read aggregated results for the template
-
-template_indices_control <- c(60,62,64,66,68,70,72,74,76)
-template_indices_test <- c(61,63,65,67,69,71,73,75,77)
-
-fname_first_part <- 
"/home/rfaulk/WSOR/message_templates/output/metrics_1018_1119_z"
-fname_last_part <- "_editcounts.tsv"
-
-
-# MAIN EXECUTION
-# ==============
-
-# BUILD THE DATA FRAMES
-
-metrics_test <- build.data.frames(template_indices_test, fname_first_part, 
fname_last_part)
-metrics_control <- build.data.frames(template_indices_control, 
fname_first_part, fname_last_part)
-
-
-# Compute the change in edits after the template
-# ===============================================
-
-
-test_samples <- c()
-control_samples <- c()
-
-for (i in 1:length(metrics_test$ns_0_revisions_before)) 
-       if (metrics_test$ns_0_revisions_before[i] != 0)
-               test_samples <- c(test_samples, 
-               (metrics_test$ns_0_revisions_before[i] - 
metrics_test$ns_0_revisions_after[i]) / metrics_test$ns_0_revisions_before[i])
-
-for (i in 1:length(metrics_control$ns_0_revisions_before)) 
-       if (metrics_control$ns_0_revisions_before[i] != 0)
-               control_samples <- c(control_samples, 
-               (metrics_control$ns_0_revisions_before[i] - 
metrics_control$ns_0_revisions_after[i]) / 
metrics_control$ns_0_revisions_before[i])
-
-
-
-# Construct a distribution (Normal) using parameters computed from metric 
count data 
-# This will be used as the model distribution - do symetrically (ie. fot both 
ways)
-# 
====================================================================================
-
-
-# Number of samples for each template
-
-n_test <- length(test_samples)
-n_control <- length(control_samples)
-
-
-# Produce probabilities for normal to be fit
-# build data frames
-
-lower_bound_range <- trunc(min(min(c(control_samples, test_samples))) - 1)
-upper_bound_range <- trunc(max(max(c(control_samples, test_samples))) + 1)
-bins <- sort(lower_bound_range : upper_bound_range)
-
-probs_control <- get_normal_bins(bins, control_samples)
-probs_test <- get_normal_bins(bins, test_samples)
-
-probs_control <- data.frame(values=bins, counts=probs_control)
-probs_test <- data.frame(values=bins, counts=probs_test)
-
-counts_test <- get_bin_counts(bins, test_samples)
-counts_control <- get_bin_counts(bins, control_samples)
-
-counts_test <- pad_counts(bins, counts_test)
-counts_control <- pad_counts(bins, counts_control)
-
-       
-# Get chi-squared test results
-chisq_res_1 = chisq.test(counts_test$counts, p=probs_control$counts)
-chisq_res_2 = chisq.test(counts_control$counts, p=probs_test$counts)
-
-
-
-


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

Reply via email to