Dear R users,
I'm trying to fit a gamma-frailty model on a simulated dataset, with 6
covariates, and I'm running into some results I do not understand. I
constructed an example from my simulation code, where I fit a coxph model
without frailty (M1) and with frailty (M2) on a number of data samples with a
varying degree of heterogeneity (I'm running R 2.3.1, running takes ~1 min).
library(survival); set.seed(1)
lambda <- 0.01 # Exp. hazard rate
# Beta coefficients for Age,TC,HDLC,SBP,Diab,Smok
beta <- c(0.0483,0.0064,-0.0270,0.0037,0.4284,0.5234)
n <- 1000; Ngrp <- 2; # Nr patients, Nr frailty groups
# Thetas for gamma-frailty
thetaset <- c(1,2,10,100); Ntheta <- length(thetaset);
# Define the simulated population
age <-rnorm(n,48.6,11.7);tc<-rnorm(n,200,30)
hdlc<-rnorm(n,47,6);sbp<-rnorm(n,135,6)
rtmp <- runif(0,1,n=n); diab <- rep(0, n); diab[rtmp < 0.05223] <- 1;
rtmp <- runif(0,1,n=n); smok <- rep(0, n); smok[rtmp < 0.40458] <- 1;
# Storage for the estimated coefficients in both models
cf <- 0; length(cf) <- 2*Ntheta*6; dim(cf) <- c(Ntheta,2,6);
for (i in 1:Ntheta)
{
l = thetaset[i]
v=rep(rgamma(n/2,shape=l,scale=1/l),2) # Shared frailty values
print(paste("Theta = ",l, " variance in introduced frailty v =
",var(v)))
id=rep(seq(1:(n/2)),2); # Frailty id's,
c <- rep(1,n); # censoring flags
r <- runif(n,0,1); # For random variate
generation
t <- (-1*log(r)) /
(lambda*v*exp(age*beta[1]+tc*beta[2]+hdlc*beta[3]+sbp*beta[4]+diab*beta[5]+smok*beta[6]))
fitM1=coxph(Surv(t,c)~age+tc+hdlc+sbp+diab+smok); # Model 1, no
frailty
cf[i,1,] <- round(fitM1$coef,6) # Model 1
coefficients
fitM2=coxph(Surv(t,c)~age+tc+hdlc+sbp+diab+smok+
frailty(id,dist="gamma",sparse=T,method="em")) # Model 2,
frailty
cf[i,2,] <- round(fitM2$coef,6) # Model 2,
coefficients
# store estimated Variance of random effect
varre=fitM2$history$frailty$history[length(fitM2$history$frailty$history[,1])]
print(paste("Theta = ",l, " variance in estimated random effect=
",varre))
print(paste("Theta = ",l, " variance in estimated ind. frailty = ",
var(exp(fitM2$frail
print(paste("Theta=",l," org beta ",beta," M1: ",cf[i,1,], " M2: ",
cf[i,2,]))
# Calculate the 'actual' 10-year risk and the risk estimated using M1
and M2
estv <- exp(rep(fitM2$frail,2)) # Individual frailty values
from M2
# Original risk score
rs<-
age*beta[1]+tc*beta[2]+hdlc*beta[3]+sbp*beta[4]+diab*beta[5]+smok*beta[6]
pevent <- exp(-10*lambda)^(exp(rs)) # Original
10-year risk
rsM1<-
(cf[i,1,1]*age+cf[i,1,2]*tc+cf[i,1,3]*hdlc+cf[i,1,4]*sbp+cf[i,1,5]*diab+cf[i,1,6]*smok)
peventM1 <- exp(-10*lambda)^(exp(rsM1))
rsM2<-
(cf[i,1,1]*age+cf[i,1,2]*tc+cf[i,1,3]*hdlc+cf[i,1,4]*sbp+cf[i,1,5]*diab+cf[i,1,6]*smok)
peventM2 <- exp(-10*lambda)^(estv*exp(rsM1))
# Proportion of more accurate predictions
pred <- sum(abs(pevent-peventM2) < abs(pevent-peventM1))/n
print(paste("Theta = ",l," proportion pred. M2 more accurate than M1 =
",pred));
}
corresponding output:
[1] "Theta = 1 variance in introduced frailty v = 0.948105787326522"
[1] "Theta = 1 variance in estimated random effect= 1.04435029128552"
[1] "Theta = 1 variance in estimated ind. frailty = 0.520078219281493"
[1] "Theta= 1 org beta 0.0483 M1: 0.025015 M2: 0.045987"
[2] "Theta= 1 org beta 0.0064 M1: 0.002274 M2: 0.005178"
[3] "Theta= 1 org beta -0.027 M1: -0.014239 M2: -0.028824"
[4] "Theta= 1 org beta 0.0037 M1: 0.00071 M2: 0.010826"
[5] "Theta= 1 org beta 0.4284 M1: 0.421602 M2: 0.648227"
[6] "Theta= 1 org beta 0.5234 M1: 0.360069 M2: 0.593551"
[1] "Theta = 1 proportion pred. M2 more accurate than M1 = 0.448"
[1] "Theta = 2 variance in introduced frailty v = 0.515806773271924"
[1] "Theta = 2 variance in estimated random effect= 0.644954876113229"
[1] "Theta = 2 variance in estimated ind. frailty = 0.277209957022078"
[1] "Theta= 2 org beta 0.0483 M1: 0.033213 M2: 0.046216"
[2] "Theta= 2 org beta 0.0064 M1: 0.005633 M2: 0.009491"
[3] "Theta= 2 org beta -0.027 M1: -0.010048 M2: -0.016658"
[4] "Theta= 2 org beta 0.0037 M1: -0.006032 M2: -0.003639"
[5] "Theta= 2 org beta 0.4284 M1: 0.402992 M2: 0.403226"
[6] "Theta= 2 org beta 0.5234 M1: 0.457665 M2: 0.691871"
[1] "Theta = 2 proportion pred. M2 more accurate than M1 = 0.482"
[1] "Theta = 10 variance in introduced frailty v = 0.104197974543906"
[1] "Theta = 10 variance in estimated