P.S. Here is some code that is more directly relevant.
Frank
cphadjchisq <- function(adjust, candidates, S)
{
require(rms)
f <- coxphFit(adjust, S, type='right', method='efron')
if(f$fail) stop('could not fit a model with just adjust')
ll2 <- -2*f$loglik[2]
chisq.adjust <- 2*diff(f$loglik)
p <- ncol(candidates)
chisq <- numeric(p)
names(chisq) <- colnames(candidates)
for(i in 1:p)
{
f <- coxphFit(cbind(adjust, candidates[,i]), S,
type='right', method='efron')
chisq[i] <- if(f$fail) NA else ll2 + 2*f$loglik[2]
}
list(chisq=chisq, P=1 - pchisq(chisq, 1), chisq.adjust=chisq.adjust,
padjust=ncol(adjust), pcandidates=ncol(candidates))
}
bootrankcph <- function(adjust, candidates, S, B=500, conf.int=0.95,
pr=TRUE)
{
adjust <- as.matrix(adjust)
candidates <- as.matrix(candidates)
i <- is.na(rowSums(adjust) + rowSums(candidates) + rowSums(S))
if(any(i))
{
i <- !i
adjust <- adjust[i,,drop=FALSE]
candidates <- candidates[i,,drop=FALSE]
S <- S[i,,drop=FALSE]
}
n <- nrow(S)
orig <- cphadjchisq(adjust, candidates, S)
orig.rank <- rank(-orig$chisq)
p <- ncol(candidates)
brank <- matrix(NA, ncol=p, nrow=B)
for(i in 1:B)
{
if(pr) cat(i, '\r')
s <- sample(1:n, n, replace=TRUE)
w <- cphadjchisq(adjust[s,,drop=FALSE], candidates[s,,drop=FALSE],
S[s,,drop=FALSE])
brank[i,] <- rank(-w$chisq)
}
if(pr) cat('\n')
lower <- apply(brank, 2, function(w) quantile(w, (1-conf.int)/2,
na.rm=TRUE))
upper <- apply(brank, 2, function(w) quantile(w, (1+conf.int)/2,
na.rm=TRUE))
structure(list(stats=orig,
ranks=cbind(rank=orig.rank, lower=lower, upper=upper)),
class='bootrankcph')
}
print.bootrankcph <- function(x, ...)
{
s <- x$stats
cat('\nOriginal Model L.R. Statistic for Adjustment Variables\n\n',
'Chi-square=', round(s$chisq.adjust,2),'with', s$padjust,
'd.f.\n\nOriginal Partial L.R. Statistics for Candidates (1
d.f.)\n\n')
i <- order(s$chisq)
print(cbind('chi-square'=round(s$chisq[i],2),
P=format.pval(s$P[i], digits=3, eps=.001)),
quote=FALSE)
invisible()
}
plot.bootrankcph <- function(x, ...)
{
x <- x$ranks
cand <- if(length(rownames(x))) as.factor(rownames(x)) else
as.factor(1:nrow(x))
cand <- reorder.factor(cand, x[,1])
Dotplot(cand ~ Cbind(x), pch=16, xlab='Rank', ylab='Candidate')
}
require(rms)
n <- 100
age <- rnorm(n, 50, 10)
sex <- sample(c('female','male'), n, TRUE)
adjust <- model.matrix(~ rcs(age,3)*sex)
candidates <- matrix(rnorm(n*50), ncol=50,
dimnames=list(NULL, c(letters,LETTERS)[1:50]))
S <- Surv(runif(n))
w <- bootrankcph(adjust, candidates, S, B=100)
w$stats
plot(w)
Frank E Harrell Jr wrote:
Ravi Varadhan wrote:
Frank,
Is there an article that discusses this idea of bootstrapping the
ranks of
the likelihood ratio chi-square Statistics to assess relative
importance of predictors in time-to-event data
(specifically Cox PH model)?
Thanks,
Ravi.
Do require(rms); ?anova.rms and see related articles:
@Article{hal09usi,
author = {Hall, Peter and Miller, Hugh},
title = {Using the bootstrap to quantify the authority of an
empirical ranking},
journal = Annals of Stat,
year = 2009,
volume = 37,
number = {6B},
pages = {3929-3959},
annote = {confidence interval for ranks;genomics;high
dimension;independent component bootstrap;$m$-out-of-$n$
bootstrap;ordering;overlap interval;prediction interval;synchronous
bootstrap;ordinary bootstrap may not provide accurate confidence
intervals for ranks;may need a different bootstrap if the number of
parameters being ranked increases with $n$ or is large;estimating $m$ is
difficult;in their first example, where $m=0.355n$, the ordinary
bootstrap provided a lower bound to the lengths of more accurate
confidence intervals of ranks}
}
@Article{xie09con,
author = {Xie, Minge and Singh, Kesar and Zhang, {Cun-Hui}},
title = {Confidence intervals for population ranks in the
presence of ties and near ties},
journal = JASA,
year = 2009,
volume = 104,
number = 486,
pages = {775-787},
annote = {bootstrap ranks;ranking;nonstandard bootstrap
inference;rank inference;slow convergence rate;smooth ranks in the
presence of near ties;rank inference for fixed effects risk adjustment
models}
}
-----Original Message-----
From: [email protected]
[mailto:[email protected]] On
Behalf Of Frank E Harrell Jr
Sent: Tuesday, March 30, 2010 3:57 PM
To: Michal Figurski
Cc: [email protected]
Subject: Re: [R] Problem comparing hazard ratios
Michal Figurski wrote:
Dear R-Helpers,
I am a novice in survival analysis. I have the following code:
for (i in 3:12) print(coxph(Surv(time, status)~a[,i], data=a))
I used it to fit the Cox Proportional Hazard models separately for
every available parameter (columns 3:12) in my data set - with
intention to compare the Hazard Ratios.
However, some of my variables are in range 0.1 to 1.6, others in
range 5000 to 9000. How do I compare HRs between such variables?
I have rescaled all the variables to be in 0 to 1 range - is this the
proper way to go? Is there a way to somehow calculate the same HRs
(as for rescaled parameters) from the HRs for original parameters?
Many thanks in advance.
There are a lot of issues related to this that will require a good bit
of study, both in survival analysis and in regression. I would start
with bootstrapping the ranks of the likelihood ratio chi-square
statistics of the competing biomarkers.
Frank
--
Frank E Harrell Jr Professor and Chairman School of Medicine
Department of Biostatistics Vanderbilt University
______________________________________________
[email protected] mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.