https://www.mediawiki.org/wiki/Special:Code/MediaWiki/114794
Revision: 114794
Author: halfak
Date: 2012-04-07 18:08:59 +0000 (Sat, 07 Apr 2012)
Log Message:
-----------
old updates to unimportant scripts
Modified Paths:
--------------
trunk/tools/wsor/newbie_warnings/R/loader/load_huggling_codings_mk2.R
trunk/tools/wsor/newbie_warnings/R/outcomes.R
trunk/tools/wsor/newbie_warnings/R/outcomes_mk2.R
Modified: trunk/tools/wsor/newbie_warnings/R/loader/load_huggling_codings_mk2.R
===================================================================
--- trunk/tools/wsor/newbie_warnings/R/loader/load_huggling_codings_mk2.R
2012-04-07 01:08:35 UTC (rev 114793)
+++ trunk/tools/wsor/newbie_warnings/R/loader/load_huggling_codings_mk2.R
2012-04-07 18:08:59 UTC (rev 114794)
@@ -22,8 +22,6 @@
HUGGLING_CODINGS_MK2$experimental =
HUGGLING_CODINGS_MK2$experimental == 1
- HUGGLING_CODINGS_MK2$edits_own_talkpage =
HUGGLING_CODINGS_MK2$edits_own_talkpage > 0
- HUGGLING_CODINGS_MK2$edits_hugglers_talkpage =
HUGGLING_CODINGS_MK2$edits_hugglers_talkpage > 0
HUGGLING_CODINGS_MK2$responds_own_talk =
HUGGLING_CODINGS_MK2$responds_own_talk > 0
HUGGLING_CODINGS_MK2$responds_elsewhere =
HUGGLING_CODINGS_MK2$responds_elsewhere > 0
HUGGLING_CODINGS_MK2$is_anon =
HUGGLING_CODINGS_MK2$is_anon > 0
Modified: trunk/tools/wsor/newbie_warnings/R/outcomes.R
===================================================================
--- trunk/tools/wsor/newbie_warnings/R/outcomes.R 2012-04-07 01:08:35 UTC
(rev 114793)
+++ trunk/tools/wsor/newbie_warnings/R/outcomes.R 2012-04-07 18:08:59 UTC
(rev 114794)
@@ -100,6 +100,49 @@
))
#
+#
+#
+regressions = function(messaged_codings){
+ outcomes = list()
+ for(outcome in c("stay", "improves", "contact", "good_contact",
"good_outcome")){
+ cat("Processing:", outcome)
+ groups = list()
+ for(group in c("unlikely", "possible", "golden")){
+ cat(".")
+ group_codings = messaged_codings[
+ messaged_codings$group == group,
+ ]
+ model = glm(
+ group_codings[[outcome]] ~
+ anon +
+ ntalk_edits_before_msg +
+ talk_edits_before_msg +
+ teaching * personal,
+ data = group_codings
+ )
+
+ ncoefs = length(model$coefficients)
+ features = list()
+ i = 0
+ for(feature in names(model$coefficients)){
+ i = i+1
+ features[[feature]] = list(
+ coef=coefficients(summary(model))[i],
+
error=coefficients(summary(model))[i+ncoefs],
+
pval=coefficients(summary(model))[i+ncoefs*3]
+ )
+ }
+ groups[[group]] = features
+ }
+ cat("\n")
+ outcomes[[outcome]] = groups
+ }
+ outcomes
+}
+#outcomes = regressions(messaged_codings)
+
+
+#
# Try removing teaching*personal.
#
@@ -113,27 +156,27 @@
cat("============================================================\n")
print(summary(glm(
- good_outcome ~ anon + ntalk_edits_before_msg +
talk_edits_before_msg + teaching * personal * image,
+ good_outcome ~ anon + ntalk_edits_before_msg +
talk_edits_before_msg + teaching * personal,
data = group_codings
)))
print(summary(glm(
- improves ~ anon + ntalk_edits_before_msg +
talk_edits_before_msg + teaching * personal * image,
+ improves ~ anon + ntalk_edits_before_msg +
talk_edits_before_msg + teaching * personal,
data = group_codings
)))
print(summary(glm(
- contact ~ anon + ntalk_edits_before_msg + talk_edits_before_msg
+ teaching * personal * image,
+ contact ~ anon + ntalk_edits_before_msg + talk_edits_before_msg
+ teaching * personal,
data = group_codings
)))
print(summary(glm(
- good_contact ~ anon + ntalk_edits_before_msg +
talk_edits_before_msg + teaching * personal * image,
+ good_contact ~ anon + ntalk_edits_before_msg +
talk_edits_before_msg + teaching * personal,
data = group_codings
)))
print(summary(glm(
- stay ~ anon + ntalk_edits_before_msg + talk_edits_before_msg +
teaching * personal * image,
+ stay ~ anon + ntalk_edits_before_msg + talk_edits_before_msg +
teaching * personal,
data = group_codings
)))
Modified: trunk/tools/wsor/newbie_warnings/R/outcomes_mk2.R
===================================================================
--- trunk/tools/wsor/newbie_warnings/R/outcomes_mk2.R 2012-04-07 01:08:35 UTC
(rev 114793)
+++ trunk/tools/wsor/newbie_warnings/R/outcomes_mk2.R 2012-04-07 18:08:59 UTC
(rev 114794)
@@ -1,19 +1,7 @@
source("loader/load_huggling_codings_mk2.R")
library(doBy)
-hugglings = load_huggling_codings_mk2()
+hugglings = load_huggling_codings_mk2(reload=T)
-#hugglingCounts = summaryBy(
-# recipient ~ recipient,
-# data = hugglings,
-# FUN=length
-#)
-#hugglingCounts$count = hugglingCounts$recipient.length
-#hugglingCounts$recipient.length = NULL
-#
-#hugglings = merge(hugglings, hugglingCounts, by=c("recipient"))
-
-#huggling_codings = load_huggling_codings(reload=T)
-#messaged_codings = huggling_codings[!is.na(huggling_codings$before_rating),]
ifNA = function(val, naThen){
if(is.na(val)){
naThen
@@ -33,13 +21,47 @@
F
)
)
-hugglings$good_contact = hugglings$contact & !hugglings$retaliates
-hugglings$stay = !is.na(hugglings$after_rating)
-hugglings$improves = hugglings$after_rating > hugglings$before_rating
+hugglings$good_contact = mapply(
+ function(contact, retaliates){
+ if(contact){
+ !retaliates
+ }else{
+ NA
+ }
+ },
+ hugglings$contact,
+ hugglings$retaliates
+)
+hugglings$stay = !is.na(hugglings$after_rating)
+hugglings$active = hugglings$edits_after_msg_3days > 0
+hugglings$warned_again = hugglings$warnings_after_72hrs > 0
+hugglings$blocked = !is.na(hugglings$blocked_after_msg_seconds) &
hugglings$blocked_after_msg_seconds < 604800
+hugglings$improves = hugglings$after_rating > hugglings$before_rating
+hugglings$other_user_talk_edits = hugglings$user_talk_edits_after_msg -
hugglings$edits_own_talkpage
hugglings$talk_edits_before_msg = with(
hugglings,
- user_talk_edits_after_msg + article_talk_edits_before_msg
+ user_talk_edits_before_msg + article_talk_edits_before_msg
)
+hugglings$talk_edits_after_msg = with(
+ hugglings,
+ user_talk_edits_after_msg + article_talk_edits_after_msg
+)
+hugglings$article_talk_before = hugglings$article_talk_edits_before_msg
+hugglings$user_talk_before = hugglings$user_talk_edits_before_msg
+
+
+hugglings$db_good_communication = mapply(
+ function(other_user_talk_edits, warned_again){
+ if(other_user_talk_edits){
+ !warned_again
+ }else{
+ NA
+ }
+ },
+ hugglings$other_user_talk_edits,
+ hugglings$warned_again
+)
+
# Can't do it
#messaged_codings$ntalk_edits_before_msg = with(
# messaged_codings,
@@ -109,7 +131,179 @@
}
))
+pval = function(val){
+ if(is.na(val)){
+ "<span style=\"color: #ccc;\">---</span>"
+ }
+ else if(val < 0.05){
+ if(val < 0.001){
+ val = "< .001"
+ }else{
+ val = round(val, 3)
+ }
+ paste("'''", val, "'''", sep="")
+ }else if(val < 0.10){
+ paste("<span style=\"text-decoration: underline;\">",
round(val, 3), "</span>", sep="")
+ }else{
+ paste("<span style=\"color: #ccc;\">", round(val, 3),
"</span>", sep="")
+ }
+}
+model_outcomes = function(model){
+ ncoefs = length(model$coefficients)
+ coefs = list()
+ i = 0
+ for(feature in names(model$coefficients)){
+ i = i+1
+ coefs[[feature]] = list(
+ coef=coefficients(summary(model))[i],
+ error=coefficients(summary(model))[i+ncoefs],
+ pval=coefficients(summary(model))[i+ncoefs*3]
+ )
+ }
+ outcomes = list(coefs=coefs)
+ smry = summary(model)
+ if(!is.null(smry$r.squared)){
+ outcomes$fitness = paste("R-squared:", round(smry$r.squared,2))
+ }else{
+ outcomes$fitness = paste("AIC:", round(smry$aic, 2))
+ }
+ outcomes
+}
+
+linear_outcomes = function(f){
+ outcomes = list()
+ outcome_names = c(
+ "ns0_edits_after_msg",
+ "talk_edits_after_msg",
+ "article_talk_edits_after_msg",
+ "user_talk_edits_after_msg",
+ "edits_hugglers_talkpage",
+ "other_user_talk_edits"
+ )
+ for(outcome in outcome_names){
+ model = lm(
+ f[[outcome]] ~
+ (personal + nodirectives) *
+ (
+ warning_first_msg +
+ is_anon +
+ is_shared_ip +
+ ns0_edits_before_msg +
+ talk_edits_before_msg
+ ),
+ data=f
+ )
+ outcomes[[outcome]] = model_outcomes(model)
+ }
+ outcomes
+}
+logistic_outcomes = function(f){
+ outcomes = list()
+ outcome_names = c(
+ "stay",
+ "active",
+ "improves",
+ "contact",
+ "good_contact",
+ "warned_again",
+ "blocked",
+ "db_good_communication"
+ )
+ for(outcome in outcome_names){
+ model = glm(
+ f[[outcome]] ~
+ (personal + nodirectives) *
+ (
+ warning_first_msg +
+ is_anon +
+ is_shared_ip +
+ ns0_edits_before_msg +
+ talk_edits_before_msg
+ ),
+ data=f,
+ family=binomial("logit")
+ )
+ outcomes[[outcome]] = model_outcomes(model)
+ }
+ outcomes
+}
+outcome_coefs = list(
+ overall = c(
+ linear_outcomes(hugglings),
+ logistic_outcomes(hugglings)
+ ),
+ vandal = c(
+ linear_outcomes(hugglings[hugglings$group == "vandal",]),
+ logistic_outcomes(hugglings[hugglings$group == "vandal",])
+ ),
+ "bad faith" = c(
+ linear_outcomes(hugglings[hugglings$group == "bad faith",]),
+ logistic_outcomes(hugglings[hugglings$group == "bad faith",])
+ ),
+ test = c(
+ linear_outcomes(hugglings[hugglings$group == "test",]),
+ logistic_outcomes(hugglings[hugglings$group == "test",])
+ ),
+ "good faith" = c(
+ linear_outcomes(hugglings[hugglings$group == "good faith",]),
+ logistic_outcomes(hugglings[hugglings$group == "good faith",])
+ )
+)
+for(outcome in names(outcome_coefs$overall)){
+ cat("==Outcome:", outcome, "==\n")
+ cat('{| class="wikitable"\n')
+ cat('| align="center"
style="background:#f0f0f0;font-weight:bold;"|Coefficients:\n')
+ cat(
+ '| colspan = "3" align="center"
style="background:#f0f0f0;font-weight:bold;"|overall',
+ paste("(", outcome_coefs[["overall"]][[outcome]]$fitness, ")",
sep=""),
+ '||\n'
+ )
+ cat(
+ '| colspan = "3" align="center"
style="background:#f0f0f0;font-weight:bold;"|vandal',
+ paste("(", outcome_coefs[["vandal"]][[outcome]]$fitness, ")",
sep=""),
+ '||\n'
+ )
+ cat(
+ '| colspan = "3" align="center"
style="background:#f0f0f0;font-weight:bold;"|bad faith',
+ paste("(", outcome_coefs[["bad faith"]][[outcome]]$fitness,
")", sep=""),
+ '||\n'
+ )
+ cat(
+ '| colspan = "3" align="center"
style="background:#f0f0f0;font-weight:bold;"|test',
+ paste("(", outcome_coefs[["test"]][[outcome]]$fitness, ")",
sep=""),
+ '||\n'
+ )
+ cat(
+ '| colspan = "3" align="center"
style="background:#f0f0f0;font-weight:bold;"|good faith',
+ paste("(", outcome_coefs[["good faith"]][[outcome]]$fitness,
")", sep=""),
+ '||\n'
+ )
+ cat("|-\n! ")
+ cat("||coef||error||p-val||")
+ cat("||coef||error||p-val||")
+ cat("||coef||error||p-val||")
+ cat("||coef||error||p-val||")
+ cat("||coef||error||p-val||\n")
+ for(feature in names(outcome_coefs$overall$stay$coefs)){
+ cat('|-\n| ', feature)
+ for(group in names(outcome_coefs)){
+ coefs =
outcome_coefs[[group]][[outcome]]$coefs[[feature]]
+ cat(
+ "",
+ round(coefs$coef, 3),
+ round(coefs$error, 3),
+ pval(coefs$pval),
+ "",
+ sep="||"
+ )
+ }
+ cat("\n")
+ }
+ cat("|}\n\n")
+}
+
+
for(group in c("vandal", "bad faith", "test", "good faith")){
group_codings = hugglings[hugglings$group == group,]
@@ -118,33 +312,130 @@
cat("============================================================\n")
print(summary(glm(
- good_outcome ~ is_anon + personal + nodirectives,
- data = group_codings
+ good_outcome ~
+ (personal + nodirectives) *
+ (warning_first_msg +
+ is_shared_ip +
+ is_anon +
+ ns0_edits_before_msg +
+ user_talk_edits_before_msg),
+ data = group_codings,
+ family=binomial(link="logit")
)))
print(summary(glm(
- improves ~ is_anon + personal + nodirectives,
- data = group_codings[group_codings$before_rating <= 4,]
+ improves ~
+ (personal + nodirectives) *
+ (warning_first_msg +
+ is_shared_ip +
+ is_anon +
+ ns0_edits_before_msg +
+ user_talk_edits_before_msg),
+ data = group_codings[group_codings$before_rating <= 4,],
+ family=binomial(link="logit")
)))
print(summary(glm(
- contact ~ is_anon + personal + nodirectives,
- data = group_codings
+ contact ~
+ (personal + nodirectives) *
+ (warning_first_msg +
+ is_shared_ip +
+ is_anon +
+ ns0_edits_before_msg +
+ user_talk_edits_before_msg),
+ data = group_codings,
+ family=binomial(link="logit")
)))
print(summary(glm(
- good_contact ~ is_anon + personal + nodirectives,
- data = group_codings[group_codings$contact,]
+ good_contact ~
+ (personal + nodirectives) *
+ (warning_first_msg +
+ is_shared_ip +
+ is_anon +
+ ns0_edits_before_msg +
+ user_talk_edits_before_msg),
+ data = group_codings[group_codings$contact,],
+ family=binomial(link="logit")
)))
+ cat("Stay = after_rating != NA")
print(summary(glm(
- stay ~ is_anon + personal + nodirectives,
+ stay ~
+ (personal + nodirectives) *
+ (warning_first_msg +
+ is_shared_ip +
+ is_anon +
+ ns0_edits_before_msg +
+ user_talk_edits_before_msg),
+ data = group_codings,
+ family=binomial(link="logit")
+ )))
+
+ cat("Active = edits_after_msg_3days > 0")
+ print(summary(glm(
+ active ~
+ (personal + nodirectives) *
+ (warning_first_msg +
+ is_shared_ip +
+ is_anon +
+ ns0_edits_before_msg +
+ user_talk_edits_before_msg),
+ data = group_codings,
+ family=binomial(link="logit")
+ )))
+
+ cat("Warned_again = warnings_after_24hrs > 0")
+ print(summary(glm(
+ warned_again ~
+ (personal + nodirectives) *
+ (warning_first_msg +
+ is_shared_ip +
+ is_anon +
+ ns0_edits_before_msg +
+ user_talk_edits_before_msg),
+ data = group_codings,
+ family=binomial(link="logit")
+ )))
+
+ cat("Blocked = blocked_after_msg_seconds != NA &
blocked_after_msg_seconds < 604800")
+ print(summary(glm(
+ blocked ~
+ (personal + nodirectives) *
+ (warning_first_msg +
+ is_shared_ip +
+ is_anon +
+ ns0_edits_before_msg +
+ user_talk_edits_before_msg),
+ data = group_codings,
+ family=binomial(link="logit")
+ )))
+
+ print(summary(glm(
+ ns0_edits_after_msg ~
+ (personal + nodirectives) *
+ (warning_first_msg +
+ is_shared_ip +
+ is_anon +
+ ns0_edits_before_msg +
+ user_talk_edits_before_msg),
data = group_codings
)))
+ print(summary(glm(
+ user_talk_edits_after_msg ~
+ (personal + nodirectives) *
+ (warning_first_msg +
+ is_shared_ip +
+ is_anon +
+ ns0_edits_before_msg +
+ user_talk_edits_before_msg),
+ data = group_codings
+ )))
cat("\n\n\n")
}
+
meanNoNA = function(x){
mean(x, na.rm=T)
}
_______________________________________________
MediaWiki-CVS mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-cvs