Torsten Hothorn <[EMAIL PROTECTED]> writes:
> > Dear All,
> >
> > is there a stratified version of the Wilcoxon test (also known as van
> > Elteren test) available in R?
>
> you can plug it together using the `coin' infrastructure (see the
> examples in the manual and vignette).
I managed to dig out our old code, and patched it up loosely to fit
versions of R later than 0.62 (the trend test code still seems
broken). Use at own risk. The usage is fairly straightforward:
> SKruskal.test(y~g1|g2)
Kruskal-Wallis stratified rank sum test
data: y , group: g1 , strata: g2
K = 3.1486, df = 3, p-value = 0.3693
"SKruskal.test" <-
function(formula,trend=F,data=sys.frame(sys.parent()),subset)
{
trendtest <- (mode(trend)=="logical" && trend) | (mode(trend)=="numeric")
deparen <- function(expr)
{
# Splus code had mode(expr) == "(", which should also work in R 0.62
if(mode(expr) == "call" && expr[[1]] == as.name("("))
expr <- expr[[2]]
if(is.recursive(expr))
for(i in seq(along = expr))
expr[[i]] <- deparen(expr[[i]])
expr
}
# as of now, accept x~g or x~g|s
# later: x~g1*g2|s1*s2 (or g1:g2 ?) for crossed factors
formula <- deparen(formula)
if (mode(formula) != "call")
stop("formula is specified incorrectly")
if (mode(formula[[3]])=="call") {
strat<-T
if(formula[[3]][[1]] != as.name("|"))
stop("malformed conditioning formula")
# The below trick appears to be necessary since terms.formula doesn't
# know about '|'
formula[[3]][[1]] <- as.name("+")
varlist <- attr(terms(formula), "variables")
# is all.names safe for all variants of model formulas?
var.names <- all.vars(varlist)
strata <- eval(varlist[[4]],data)
}
else if (mode(formula[[3]])=="name") {
strat<-F
varlist <- attr(terms(formula), "variables")
var.names <- all.vars(expr)
}
# Probably better to eval(varlist,data) and extract afterwards...
# Or maybe use model.matrix instead to catch embedded function calls, etc.
x <- eval(varlist[[2]],data)
group <- eval(varlist[[3]],data)
if (length(x) != length(group))
stop("response and group vector must have the same length")
if (strat && length(x) != length(strata))
stop("response and stratum vector must have the same length")
if (trendtest && mode(trend)=="numeric") {
if (length(trend)!= length(table(group)))
stop("the number of groups must equal the length of the trend vector")
else if (length(unique(trend)) < 2 )
stop("trend must have at least two different levels")
}
# Does the test below work in R ??
if (trendtest && mode(group)=="character" && !is.factor(group))
stop("group is of mode character and not a factor")
if (!strat) strata <- rep(1,length(x))
ok <- complete.cases(x,group,strata)
if ( !missing(subset) )
ok <- ok & eval(substitute(subset),data)
x <- x[ok]
strata <- as.factor(strata[ok])
group <- group[ok]
j <- length(unique(group))
# These tests are obsolete - I think...
# i <- length(table(strata))
# if (i<2) stop("all observations are in the same stratum")
# else if (all(sapply(table(strata),function(x)(x<2))==F)==F)
# stop("there must be at least 2 observations in each stratum")
# if (j<2) stop("all observations are in the same group")
if (!trendtest)
group <- as.factor(group)
else {
# is this logic OK??
if (mode(group)=="character" &&
all(class(group)!=c("ordered","factor")))
warning("group is not an ordered factor")
if (mode(trend)=="logical")
group<-as.numeric(group)
else
group<-trend[as.factor(group)]
}
r<-numeric(length(x))
r[order(strata)]<-unlist(tapply(x,strata,function(x)rank(x)/length(x)))
weight<-tapply(x,strata,
function(x)(12/((1+1/length(x))*
(1-sum(table(x)^3-table(x))/
(length(x)^3-length(x))
))))[strata]
weight <- as.vector(weight)
if(strat)
KW.lm <- lm(r ~ strata + group, weights=weight)
else
KW.lm <- lm(r ~ group, weights=weight)
if (!trendtest) {
aovtable <- anova(KW.lm)
Statistic <- aovtable["group","Sum Sq"]
df <- j-1
names(Statistic) <- "K"
pvalue <- 1- pchisq(Statistic,df)
if (strat){
method <- "Kruskal-Wallis stratified rank sum test"
name <- paste(var.names[1],", group: ",var.names[2],
", strata: ",var.names[3])
} else {
method <- "Kruskal-Wallis rank sum test"
name <- paste(var.names[1],", group: ",var.names[2])
}
}
else {
s <- summary(KW.lm)
Statistic <- s$coefficients["group","t Value"] *
s$sigma
df <- 1
names(Statistic) <- "Z"
pvalue <- 2*(1 - pnorm(abs(Statistic)))
if (mode(trend)=="numeric")
wname <- paste(trend,collapse=" ")
else
wname <- "as.numeric(group)"
if (strat) {
method <- "Kruskal-Wallis stratified rank sum trend test"
name <- paste(var.names[1], ", group: ",var.names[2],
", strata: ",var.names[3],", trend: ",wname)
} else {
method <- "Kruskal-Wallis rank sum trend test"
name <- paste(var.names[1], ", group: ",var.names[2],
", trend: ",wname)
}
}
names(df) <- "df"
RVAL <- list(statistic =Statistic,
parameter = df,
p.value = pvalue,
method = method,
data.name = name)
class(RVAL) <- "htest"
return(RVAL)
}
--
O__ ---- Peter Dalgaard Ă˜ster Farimagsgade 5, Entr.B
c/ /'_ --- Dept. of Biostatistics PO Box 2099, 1014 Cph. K
(*) \(*) -- University of Copenhagen Denmark Ph: (+45) 35327918
~~~~~~~~~~ - ([EMAIL PROTECTED]) FAX: (+45) 35327907
______________________________________________
[email protected] mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide! http://www.R-project.org/posting-guide.html