[MediaWiki-CVS] SVN: [114794] trunk/tools/wsor/newbie_warnings/R

halfak Sat, 07 Apr 2012 11:09:05 -0700

https://www.mediawiki.org/wiki/Special:Code/MediaWiki/114794


Revision: 114794
Author:   halfak
Date:     2012-04-07 18:08:59 +0000 (Sat, 07 Apr 2012)
Log Message:
-----------
old updates to unimportant scripts

Modified Paths:
--------------
    trunk/tools/wsor/newbie_warnings/R/loader/load_huggling_codings_mk2.R
    trunk/tools/wsor/newbie_warnings/R/outcomes.R
    trunk/tools/wsor/newbie_warnings/R/outcomes_mk2.R

Modified: trunk/tools/wsor/newbie_warnings/R/loader/load_huggling_codings_mk2.R
===================================================================
--- trunk/tools/wsor/newbie_warnings/R/loader/load_huggling_codings_mk2.R       
2012-04-07 01:08:35 UTC (rev 114793)
+++ trunk/tools/wsor/newbie_warnings/R/loader/load_huggling_codings_mk2.R       
2012-04-07 18:08:59 UTC (rev 114794)
@@ -22,8 +22,6 @@
                HUGGLING_CODINGS_MK2$experimental = 
HUGGLING_CODINGS_MK2$experimental == 1
                
                
-               HUGGLING_CODINGS_MK2$edits_own_talkpage      = 
HUGGLING_CODINGS_MK2$edits_own_talkpage > 0
-               HUGGLING_CODINGS_MK2$edits_hugglers_talkpage = 
HUGGLING_CODINGS_MK2$edits_hugglers_talkpage > 0
                HUGGLING_CODINGS_MK2$responds_own_talk       = 
HUGGLING_CODINGS_MK2$responds_own_talk > 0
                HUGGLING_CODINGS_MK2$responds_elsewhere      = 
HUGGLING_CODINGS_MK2$responds_elsewhere > 0
                HUGGLING_CODINGS_MK2$is_anon                 = 
HUGGLING_CODINGS_MK2$is_anon  > 0

Modified: trunk/tools/wsor/newbie_warnings/R/outcomes.R
===================================================================
--- trunk/tools/wsor/newbie_warnings/R/outcomes.R       2012-04-07 01:08:35 UTC 
(rev 114793)
+++ trunk/tools/wsor/newbie_warnings/R/outcomes.R       2012-04-07 18:08:59 UTC 
(rev 114794)
@@ -100,6 +100,49 @@
 ))
 
 #
+# 
+#
+regressions = function(messaged_codings){
+       outcomes = list()
+       for(outcome in c("stay", "improves", "contact", "good_contact", 
"good_outcome")){
+               cat("Processing:", outcome)
+               groups = list()
+               for(group in c("unlikely", "possible", "golden")){
+                       cat(".")
+                       group_codings = messaged_codings[ 
+                               messaged_codings$group == group,
+                       ]
+                       model = glm(
+                               group_codings[[outcome]] ~ 
+                                       anon + 
+                                       ntalk_edits_before_msg + 
+                                       talk_edits_before_msg + 
+                                       teaching * personal,
+                               data = group_codings
+                       )
+                       
+                       ncoefs = length(model$coefficients)
+                       features = list()
+                       i = 0
+                       for(feature in names(model$coefficients)){
+                               i = i+1
+                               features[[feature]] = list(
+                                       coef=coefficients(summary(model))[i],
+                                       
error=coefficients(summary(model))[i+ncoefs],
+                                       
pval=coefficients(summary(model))[i+ncoefs*3]
+                               )
+                       }
+                       groups[[group]] = features
+               }
+               cat("\n")
+               outcomes[[outcome]] = groups
+       }
+       outcomes
+}
+#outcomes = regressions(messaged_codings)
+
+
+#
 # Try removing teaching*personal.
 #
 
@@ -113,27 +156,27 @@
        cat("============================================================\n")
        
        print(summary(glm(
-               good_outcome ~ anon + ntalk_edits_before_msg + 
talk_edits_before_msg + teaching * personal * image,
+               good_outcome ~ anon + ntalk_edits_before_msg + 
talk_edits_before_msg + teaching * personal,
                data = group_codings
        )))
        
        print(summary(glm(
-               improves ~ anon + ntalk_edits_before_msg + 
talk_edits_before_msg + teaching * personal * image,
+               improves ~ anon + ntalk_edits_before_msg + 
talk_edits_before_msg + teaching * personal,
                data = group_codings
        )))
        
        print(summary(glm(
-               contact ~ anon + ntalk_edits_before_msg + talk_edits_before_msg 
+ teaching * personal * image,
+               contact ~ anon + ntalk_edits_before_msg + talk_edits_before_msg 
+ teaching * personal,
                data = group_codings
        )))
        
        print(summary(glm(
-               good_contact ~ anon + ntalk_edits_before_msg + 
talk_edits_before_msg + teaching * personal * image,
+               good_contact ~ anon + ntalk_edits_before_msg + 
talk_edits_before_msg + teaching * personal,
                data = group_codings
        )))
        
        print(summary(glm(
-               stay ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + 
teaching * personal * image,
+               stay ~ anon + ntalk_edits_before_msg + talk_edits_before_msg + 
teaching * personal,
                data = group_codings
        )))
        

Modified: trunk/tools/wsor/newbie_warnings/R/outcomes_mk2.R
===================================================================
--- trunk/tools/wsor/newbie_warnings/R/outcomes_mk2.R   2012-04-07 01:08:35 UTC 
(rev 114793)
+++ trunk/tools/wsor/newbie_warnings/R/outcomes_mk2.R   2012-04-07 18:08:59 UTC 
(rev 114794)
@@ -1,19 +1,7 @@
 source("loader/load_huggling_codings_mk2.R")
 library(doBy)
-hugglings = load_huggling_codings_mk2()
+hugglings = load_huggling_codings_mk2(reload=T)
 
-#hugglingCounts = summaryBy(
-#      recipient ~ recipient,
-#      data = hugglings,
-#      FUN=length
-#)
-#hugglingCounts$count = hugglingCounts$recipient.length
-#hugglingCounts$recipient.length = NULL
-#
-#hugglings = merge(hugglings, hugglingCounts, by=c("recipient"))
-
-#huggling_codings = load_huggling_codings(reload=T)
-#messaged_codings = huggling_codings[!is.na(huggling_codings$before_rating),]
 ifNA = function(val, naThen){
        if(is.na(val)){
                naThen
@@ -33,13 +21,47 @@
                F
        )
 )
-hugglings$good_contact = hugglings$contact & !hugglings$retaliates
-hugglings$stay         = !is.na(hugglings$after_rating)
-hugglings$improves     = hugglings$after_rating > hugglings$before_rating
+hugglings$good_contact   = mapply(
+       function(contact, retaliates){
+               if(contact){
+                       !retaliates
+               }else{
+                       NA
+               }
+       },
+       hugglings$contact,
+       hugglings$retaliates
+)
+hugglings$stay           = !is.na(hugglings$after_rating)
+hugglings$active         = hugglings$edits_after_msg_3days > 0
+hugglings$warned_again   = hugglings$warnings_after_72hrs > 0
+hugglings$blocked        = !is.na(hugglings$blocked_after_msg_seconds) & 
hugglings$blocked_after_msg_seconds < 604800
+hugglings$improves       = hugglings$after_rating > hugglings$before_rating
+hugglings$other_user_talk_edits = hugglings$user_talk_edits_after_msg - 
hugglings$edits_own_talkpage
 hugglings$talk_edits_before_msg = with(
        hugglings,
-       user_talk_edits_after_msg + article_talk_edits_before_msg
+       user_talk_edits_before_msg + article_talk_edits_before_msg
 )
+hugglings$talk_edits_after_msg = with(
+       hugglings,
+       user_talk_edits_after_msg + article_talk_edits_after_msg
+)
+hugglings$article_talk_before = hugglings$article_talk_edits_before_msg
+hugglings$user_talk_before = hugglings$user_talk_edits_before_msg
+
+
+hugglings$db_good_communication = mapply(
+       function(other_user_talk_edits, warned_again){
+               if(other_user_talk_edits){
+                       !warned_again
+               }else{
+                       NA
+               }
+       },
+       hugglings$other_user_talk_edits,
+       hugglings$warned_again
+)
+
 # Can't do it
 #messaged_codings$ntalk_edits_before_msg = with(
 #      messaged_codings,
@@ -109,7 +131,179 @@
        }
 ))
 
+pval = function(val){
+       if(is.na(val)){
+               "<span style=\"color: #ccc;\">---</span>"
+       }
+       else if(val < 0.05){
+               if(val < 0.001){
+                       val = "< .001"
+               }else{
+                       val = round(val, 3)
+               }
+               paste("'''", val, "'''", sep="")
+       }else if(val < 0.10){
+               paste("<span style=\"text-decoration: underline;\">", 
round(val, 3), "</span>", sep="")
+       }else{
+               paste("<span style=\"color: #ccc;\">", round(val, 3), 
"</span>", sep="")
+       }
+}
 
+model_outcomes = function(model){
+       ncoefs = length(model$coefficients)
+       coefs = list()
+       i = 0
+       for(feature in names(model$coefficients)){
+               i = i+1
+               coefs[[feature]] = list(
+                       coef=coefficients(summary(model))[i],
+                       error=coefficients(summary(model))[i+ncoefs],
+                       pval=coefficients(summary(model))[i+ncoefs*3]
+               )
+       }
+       outcomes = list(coefs=coefs)
+       smry = summary(model)
+       if(!is.null(smry$r.squared)){
+               outcomes$fitness = paste("R-squared:", round(smry$r.squared,2))
+       }else{
+               outcomes$fitness = paste("AIC:", round(smry$aic, 2))
+       }
+       outcomes
+}
+
+linear_outcomes = function(f){
+       outcomes = list()
+       outcome_names = c(
+               "ns0_edits_after_msg", 
+               "talk_edits_after_msg", 
+               "article_talk_edits_after_msg", 
+               "user_talk_edits_after_msg",
+               "edits_hugglers_talkpage",
+               "other_user_talk_edits"
+       )
+       for(outcome in outcome_names){
+               model = lm(
+                       f[[outcome]] ~
+                       (personal + nodirectives) *
+                       (
+                               warning_first_msg + 
+                               is_anon + 
+                               is_shared_ip + 
+                               ns0_edits_before_msg + 
+                               talk_edits_before_msg
+                       ),
+                       data=f
+               )
+               outcomes[[outcome]] = model_outcomes(model)
+       }
+       outcomes
+}
+logistic_outcomes = function(f){
+       outcomes = list()
+       outcome_names = c(
+               "stay", 
+               "active", 
+               "improves", 
+               "contact", 
+               "good_contact", 
+               "warned_again", 
+               "blocked",
+               "db_good_communication"
+       )
+       for(outcome in outcome_names){
+               model = glm(
+                       f[[outcome]] ~
+                       (personal + nodirectives) *
+                       (
+                               warning_first_msg + 
+                               is_anon + 
+                               is_shared_ip + 
+                               ns0_edits_before_msg + 
+                               talk_edits_before_msg
+                       ),
+                       data=f,
+                       family=binomial("logit")
+               )
+               outcomes[[outcome]] = model_outcomes(model)
+       }
+       outcomes
+}
+outcome_coefs = list(
+       overall = c(
+               linear_outcomes(hugglings), 
+               logistic_outcomes(hugglings)
+       ),
+       vandal = c(
+               linear_outcomes(hugglings[hugglings$group == "vandal",]), 
+               logistic_outcomes(hugglings[hugglings$group == "vandal",])
+       ),
+       "bad faith" = c(
+               linear_outcomes(hugglings[hugglings$group == "bad faith",]), 
+               logistic_outcomes(hugglings[hugglings$group == "bad faith",])
+       ),
+       test = c(
+               linear_outcomes(hugglings[hugglings$group == "test",]), 
+               logistic_outcomes(hugglings[hugglings$group == "test",])
+       ),
+       "good faith" = c(
+               linear_outcomes(hugglings[hugglings$group == "good faith",]), 
+               logistic_outcomes(hugglings[hugglings$group == "good faith",])
+       )
+)
+for(outcome in names(outcome_coefs$overall)){
+       cat("==Outcome:", outcome, "==\n")
+       cat('{| class="wikitable"\n')
+       cat('| align="center" 
style="background:#f0f0f0;font-weight:bold;"|Coefficients:\n')
+       cat(
+               '| colspan = "3" align="center" 
style="background:#f0f0f0;font-weight:bold;"|overall',
+               paste("(", outcome_coefs[["overall"]][[outcome]]$fitness, ")", 
sep=""),
+               '||\n'
+       )
+       cat(
+               '| colspan = "3" align="center" 
style="background:#f0f0f0;font-weight:bold;"|vandal',
+               paste("(", outcome_coefs[["vandal"]][[outcome]]$fitness, ")", 
sep=""),
+               '||\n'
+       )
+       cat(
+               '| colspan = "3" align="center" 
style="background:#f0f0f0;font-weight:bold;"|bad faith',
+               paste("(", outcome_coefs[["bad faith"]][[outcome]]$fitness, 
")", sep=""),
+               '||\n'
+       )
+       cat(
+               '| colspan = "3" align="center" 
style="background:#f0f0f0;font-weight:bold;"|test',
+               paste("(", outcome_coefs[["test"]][[outcome]]$fitness, ")", 
sep=""),
+               '||\n'
+       )
+       cat(
+               '| colspan = "3" align="center" 
style="background:#f0f0f0;font-weight:bold;"|good faith',
+               paste("(", outcome_coefs[["good faith"]][[outcome]]$fitness, 
")", sep=""),
+               '||\n'
+       )
+       cat("|-\n! ")
+       cat("||coef||error||p-val||")
+       cat("||coef||error||p-val||")
+       cat("||coef||error||p-val||")
+       cat("||coef||error||p-val||")
+       cat("||coef||error||p-val||\n")
+       for(feature in names(outcome_coefs$overall$stay$coefs)){
+               cat('|-\n| ', feature)
+               for(group in names(outcome_coefs)){
+                       coefs = 
outcome_coefs[[group]][[outcome]]$coefs[[feature]]
+                       cat(
+                               "",
+                               round(coefs$coef, 3), 
+                               round(coefs$error, 3), 
+                               pval(coefs$pval),
+                               "",
+                               sep="||"
+                       )
+               }
+               cat("\n")
+       }
+       cat("|}\n\n")
+}
+
+
 for(group in c("vandal", "bad faith", "test", "good faith")){
        group_codings = hugglings[hugglings$group == group,]
        
@@ -118,33 +312,130 @@
        cat("============================================================\n")
        
        print(summary(glm(
-               good_outcome ~ is_anon + personal + nodirectives,
-               data = group_codings
+               good_outcome ~ 
+               (personal + nodirectives) * 
+               (warning_first_msg + 
+               is_shared_ip + 
+               is_anon +
+               ns0_edits_before_msg +
+               user_talk_edits_before_msg),
+               data = group_codings,
+               family=binomial(link="logit")
        )))
        
        print(summary(glm(
-               improves ~ is_anon + personal + nodirectives,
-               data = group_codings[group_codings$before_rating <= 4,]
+               improves ~ 
+               (personal + nodirectives) * 
+               (warning_first_msg + 
+               is_shared_ip + 
+               is_anon +
+               ns0_edits_before_msg +
+               user_talk_edits_before_msg),
+               data = group_codings[group_codings$before_rating <= 4,],
+               family=binomial(link="logit")
        )))
        
        print(summary(glm(
-               contact ~ is_anon + personal + nodirectives,
-               data = group_codings
+               contact ~ 
+               (personal + nodirectives) * 
+               (warning_first_msg + 
+               is_shared_ip + 
+               is_anon +
+               ns0_edits_before_msg +
+               user_talk_edits_before_msg),
+               data = group_codings,
+               family=binomial(link="logit")
        )))
        
        print(summary(glm(
-               good_contact ~ is_anon + personal + nodirectives,
-               data = group_codings[group_codings$contact,]
+               good_contact ~ 
+               (personal + nodirectives) * 
+               (warning_first_msg + 
+               is_shared_ip + 
+               is_anon +
+               ns0_edits_before_msg +
+               user_talk_edits_before_msg),
+               data = group_codings[group_codings$contact,],
+               family=binomial(link="logit")
        )))
        
+       cat("Stay = after_rating != NA")
        print(summary(glm(
-               stay ~ is_anon + personal + nodirectives,
+               stay ~ 
+               (personal + nodirectives) * 
+               (warning_first_msg + 
+               is_shared_ip + 
+               is_anon +
+               ns0_edits_before_msg +
+               user_talk_edits_before_msg),
+               data = group_codings,
+               family=binomial(link="logit")
+       )))
+       
+       cat("Active = edits_after_msg_3days > 0")
+       print(summary(glm(
+               active ~ 
+               (personal + nodirectives) * 
+               (warning_first_msg + 
+               is_shared_ip + 
+               is_anon +
+               ns0_edits_before_msg +
+               user_talk_edits_before_msg),
+               data = group_codings,
+               family=binomial(link="logit")
+       )))
+       
+       cat("Warned_again = warnings_after_24hrs > 0")
+       print(summary(glm(
+               warned_again ~ 
+               (personal + nodirectives) * 
+               (warning_first_msg + 
+               is_shared_ip + 
+               is_anon +
+               ns0_edits_before_msg +
+               user_talk_edits_before_msg),
+               data = group_codings,
+               family=binomial(link="logit")
+       )))
+       
+       cat("Blocked = blocked_after_msg_seconds != NA & 
blocked_after_msg_seconds < 604800")
+       print(summary(glm(
+               blocked ~ 
+               (personal + nodirectives) * 
+               (warning_first_msg + 
+               is_shared_ip + 
+               is_anon +
+               ns0_edits_before_msg +
+               user_talk_edits_before_msg),
+               data = group_codings,
+               family=binomial(link="logit")
+       )))
+       
+       print(summary(glm(
+               ns0_edits_after_msg ~ 
+               (personal + nodirectives) * 
+               (warning_first_msg + 
+               is_shared_ip + 
+               is_anon +
+               ns0_edits_before_msg +
+               user_talk_edits_before_msg),
                data = group_codings
        )))
        
+       print(summary(glm(
+               user_talk_edits_after_msg ~ 
+               (personal + nodirectives) * 
+               (warning_first_msg + 
+               is_shared_ip + 
+               is_anon +
+               ns0_edits_before_msg +
+               user_talk_edits_before_msg),
+               data = group_codings
+       )))
        cat("\n\n\n")
 }
 
+
 meanNoNA = function(x){
        mean(x, na.rm=T)
 }


_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs

[MediaWiki-CVS] SVN: [114794] trunk/tools/wsor/newbie_warnings/R

Reply via email to