I care a lot about R's speed. So I decided to give REvolution's R
(http://revolution-computing.com/) a try, which bills itself as an
optimized R. Note that I used the free version.
My machine is a Intel core 2 duo under Windows XP professional. The code
I run is in the end of this post.
First, the regular R 1.9. It takes 2 minutes and 6 seconds, CPU usage
50%
Next, REvolution's R. It takes 2 minutes and 10 seconds, CPU usage 100%.
In other words, REvolution's R consumes double the CPU with slightly
less speed.
The above has been replicated a few times (as a statistician of course).
Anyone has any insight on this? Anyway, my high hope was dashed.
rm(list=ls(all=TRUE))
library(MASS);
###small and common functions##################################
glmm.sample.y <- function(b, D.u, x, z)
{
pp <- matrix(0, nrow = n, ncol = m);
zero <- numeric(n.random);
ran <- t( mvrnorm(m, zero, D.u) );
for(j in 1:m) pp[, j] <- as.vector( x[, , j] %*% b + z[, , j] %*%
ran[ ,j] );
pp <- exp(pp)/( 1+exp(pp) );
y <- runif(m*n);
ifelse(y<pp, 1, 0);
}
#################
quadratic.form <- function(A, x) { return( as.vector(x %*% A %*% x) )
}
quadratic.form2 <- function(A, x)
{
x <- chol(A) %*% x;
colSums(x*x)
}
#######################################################################
REML.b.D.u <- function(b, D.u, x, y, z, n.iter)
{
u.mean.initial <- array( 0, c(n.random, m) );
TT <- matrix(0, n.random, n.random);
for(i in 1:n.iter)
{
TT <- TT*(1-1/i) + bias(b, D.u, x, z)/i;
obj <- sample.u(b, D.u, x, y, z, u.mean.initial);
b <- b - solve(obj$Hessian, obj$score);
D.u <- obj$uu + TT;
u.mean.initial <- obj$u.mean.initial;
print(i); print(date());
print("inside REML"); print(TT);
if(i==n.iter) write(TT, file="c:/liao/reml/simu50_8.dat",
append=T);
print(D.u);
}
list(b=b, D.u=D.u);
}
bias <- function(b, D.u, x, z)
{
yy <- glmm.sample.y(b, D.u, x, z); #this is the sampling stage
obj1 <- mle(b, D.u, x, yy, z, F, F, n.iter=50);
obj2 <- mle(b, D.u, x, yy, z, T, F, n.iter=50);
obj1$uu - obj2$uu
}
##################################################
mle <- function(b, D.u, x, y, z, indx1, indx2, n.iter)
{
u.mean.initial <- array( 0, c(n.random, m) );
for(i in 1:n.iter)
{
obj <- sample.u(b, D.u, x, y, z, u.mean.initial);
if(indx1) b <- b - solve(obj$Hessian, obj$score);
if(indx2) D.u <- obj$uu;
u.mean.initial <- obj$u.mean.initial;
}
list(b=b, D.u=D.u, uu=obj$uu, u.mean.initial=u.mean.initial);
}
##############################################
sample.u <- function(b, D.u, x, y, z, u.mean.initial)
{
D.u.inv <- solve(D.u);
uu <- matrix(0, n.random, n.random);
score <- numeric(n.fixed);
Hessian <- matrix(0, n.fixed, n.fixed);
for(i in 1:m)
{
ada.part <- as.vector(x[, , i] %*% b);
obj <- sample.u.one( D.u.inv, ada.part, x[, , i], y[, i], z[,
,i], u.mean.initial[, i] );
uu <- uu + obj$uu;
score <- score + obj$score;
Hessian <- Hessian + obj$Hessian;
u.mean.initial[, i] <- obj$u.mean.initial;
}
uu <- uu/m;
list(uu=uu, score=score, Hessian=Hessian,
u.mean.initial=u.mean.initial);
}
##########################################
sample.u.one <- function(D.u.inv, ada.part, x, y, z, u.mean.initial)
#ada.part, x, y, z for one subject only
{
fn <- log.like(y, z, D.u.inv, ada.part);
obj <- optim(u.mean.initial, fn, control=list(fnscale=-1), hessian
= T)
u.mean.initial <- obj$par;
var.root <- solve(-obj$hessian);
var.root <- t( chol(var.root) );
u <- matrix( rnorm(n.random*n.simu), nrow=n.random, ncol=n.simu );
log.prob1 <- -colSums(u*u)/2;
u <- u.mean.initial + var.root %*% u;
ada <- exp( ada.part + z %*% u );
ada <- ada/(1+ada); #it is probability now
log.prob2 <- colSums( y*log(ada) + (1-y)*log(1-ada) );
log.prob2 <- -quadratic.form2(D.u.inv, u)/2 + log.prob2;
weight <- exp(log.prob2 - log.prob1);
weight <- weight/sum(weight);
ada <- t(ada);
pi <- colSums(ada*weight);
product <- colSums( ada*(1-ada)*weight );
score <- as.vector( (y - pi) %*% x );
Hessian <- -t(x) %*% ( rep(product, n.fixed)*x );
obj <- cov.wt( t(u), weight, center=T );
uu <- obj$cov + obj$center %*% t(obj$center);
list(uu=uu, score=score, Hessian=Hessian,
u.mean.initial=obj$center);
}
#########################################
log.like <- function(y, z, D.u.inv, ada.part)
{
function(u)
{
ada <- exp( ada.part + z %*% u );
ada <- ada/(1+ada);
sum( y*log(ada) + (1-y)*log(1-ada) ) -quadratic.form(D.u.inv,
u)/2;
}
}
main <- function()
{
b <- c(.5, .5 , -1, -.5);
b <- c(.5, .5 , -1, -.5, 0, 0, 0, 0);
D.u <- diag( c(.5, .5) );
x <- array(0, c(n, n.fixed, m) );
x[ ,1, ] <- 1;
for(i in 1:n) x[i, 2, ] <- (i-5)/4;
x[, 3, 1:trunc(m/2)] <- 0;
x[, 3, trunc(m/2+1):m] <- 1;
x[, 4, ] <- x[, 2, ]*x[, 3, ];
x[1, 5:n.fixed, ] <- rnorm(4*m);
for(i in 2:n) x[i, 5:n.fixed,] <- x[1, 5:n.fixed, ];
z <- x[, 1:2, ];
for(i in 1:200)
{
print(i); print(date());
y <- glmm.sample.y(b, D.u, x, z);
obj <- mle(b, D.u, x, y, z, F, T, n.iter=50);
print("estimate with b known")
print(obj$D.u);
obj <- mle(b, D.u, x, y, z, T, T, n.iter=50);
print("profile MLE");
print(obj$D.u);
obj <- REML.b.D.u(b, D.u, x, y, z, n.iter=50);
print("REML type estimate")
print(obj$D.u);
}
}
###################################################
n <- 10; #number of observation for each cluster
m <- 50; #number of clusters
n.fixed <- 8;
n.random <- 2;
n.simu <- 2000;
main();
[[alternative HTML version deleted]]
______________________________________________
[email protected] mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.