[R] My surprising experience in trying out REvolution's R

Jason Liao Tue, 21 Apr 2009 08:53:47 -0700

 

I care a lot about R's speed. So I decided to give REvolution's R
(http://revolution-computing.com/) a try, which bills itself as an
optimized R. Note that I used the free version.


My machine is a Intel core 2 duo under Windows XP professional. The code
I run is in the end of this post.

First, the regular R 1.9. It takes 2 minutes and 6 seconds, CPU usage
50%

Next, REvolution's R. It takes 2 minutes and 10 seconds, CPU usage 100%.


In other words, REvolution's R consumes double the CPU with slightly
less speed.

The above has been replicated a few times (as a statistician of course).


Anyone has any insight on this? Anyway, my high hope was dashed.

 

  rm(list=ls(all=TRUE))

  library(MASS);

 

 ###small and common functions##################################

  glmm.sample.y <- function(b, D.u, x, z)

  {

    pp <- matrix(0, nrow = n, ncol = m);

    zero <- numeric(n.random);

 

    ran <- t( mvrnorm(m, zero, D.u) );

    for(j in 1:m) pp[, j] <- as.vector( x[, , j] %*% b + z[, , j] %*%
ran[ ,j] );

 

    pp <- exp(pp)/( 1+exp(pp) );

 

    y <- runif(m*n);

    ifelse(y<pp, 1, 0);

  }

 

  #################

   quadratic.form <- function(A, x) { return( as.vector(x %*% A %*% x) )
}

 

   quadratic.form2 <- function(A, x)

   {

      x <- chol(A) %*% x;

      colSums(x*x)

    }

 

 #######################################################################



  REML.b.D.u <- function(b, D.u, x, y, z, n.iter)

  {

     u.mean.initial <- array( 0, c(n.random, m) );



     TT <- matrix(0, n.random, n.random);

     for(i in 1:n.iter)

     {

        TT <- TT*(1-1/i) + bias(b, D.u, x, z)/i;



        obj <- sample.u(b, D.u, x, y, z, u.mean.initial);

        b <- b - solve(obj$Hessian, obj$score);

        D.u <- obj$uu + TT;



        u.mean.initial <- obj$u.mean.initial;



        print(i); print(date());

        print("inside REML"); print(TT);

        if(i==n.iter)  write(TT, file="c:/liao/reml/simu50_8.dat",
append=T);

        print(D.u);

     }

      list(b=b, D.u=D.u);

  }



   bias <- function(b, D.u, x, z)

   {

      yy <- glmm.sample.y(b, D.u, x, z);  #this is the sampling stage



      obj1 <- mle(b, D.u, x, yy, z, F, F, n.iter=50);

      obj2 <- mle(b, D.u, x, yy, z, T, F, n.iter=50);



      obj1$uu - obj2$uu

    }



   ##################################################

   mle <- function(b, D.u, x, y, z, indx1, indx2, n.iter)

   {

      u.mean.initial <- array( 0, c(n.random, m) );



      for(i in 1:n.iter)

      {

        obj <- sample.u(b, D.u, x, y, z, u.mean.initial);

        if(indx1) b <- b - solve(obj$Hessian, obj$score);

        if(indx2) D.u <- obj$uu;



        u.mean.initial <- obj$u.mean.initial;

      }

      list(b=b, D.u=D.u, uu=obj$uu, u.mean.initial=u.mean.initial);

     }



##############################################

   sample.u <- function(b, D.u, x, y, z, u.mean.initial)

   {

     D.u.inv <- solve(D.u);



     uu <- matrix(0, n.random, n.random);

     score <- numeric(n.fixed);

     Hessian <- matrix(0, n.fixed, n.fixed);



     for(i in 1:m)

     {

       ada.part <- as.vector(x[, , i] %*% b);

       obj <-  sample.u.one( D.u.inv, ada.part, x[, , i], y[, i], z[,
,i], u.mean.initial[, i] );



       uu <- uu + obj$uu;

       score <- score + obj$score;

       Hessian <- Hessian + obj$Hessian;



       u.mean.initial[, i] <- obj$u.mean.initial;

     }



     uu <- uu/m;

     list(uu=uu, score=score, Hessian=Hessian,
u.mean.initial=u.mean.initial);

  }

 ##########################################

   sample.u.one <- function(D.u.inv, ada.part, x, y, z, u.mean.initial)
#ada.part, x, y, z for one subject only

   {

      fn <- log.like(y, z, D.u.inv, ada.part);

      obj <- optim(u.mean.initial, fn, control=list(fnscale=-1), hessian
= T)

      u.mean.initial <- obj$par;



      var.root <- solve(-obj$hessian);

      var.root <- t( chol(var.root) );



      u <- matrix( rnorm(n.random*n.simu), nrow=n.random, ncol=n.simu );

      log.prob1 <- -colSums(u*u)/2;



      u <- u.mean.initial + var.root %*% u;



      ada <- exp( ada.part + z %*% u );

      ada <- ada/(1+ada); #it is probability now



      log.prob2 <- colSums( y*log(ada) + (1-y)*log(1-ada) );

      log.prob2 <- -quadratic.form2(D.u.inv, u)/2 + log.prob2;



      weight <- exp(log.prob2 - log.prob1);

      weight <- weight/sum(weight);



      ada <- t(ada);

      pi <- colSums(ada*weight);

      product <- colSums( ada*(1-ada)*weight );



      score <- as.vector( (y - pi) %*% x );

      Hessian <- -t(x) %*% ( rep(product, n.fixed)*x );



      obj <- cov.wt( t(u), weight, center=T );

      uu <- obj$cov + obj$center %*% t(obj$center);



      list(uu=uu, score=score, Hessian=Hessian,
u.mean.initial=obj$center);

    }



  #########################################



  log.like <- function(y, z, D.u.inv, ada.part)

  {

     function(u)

     {

        ada <- exp( ada.part + z %*% u );

        ada <- ada/(1+ada);

        sum( y*log(ada) + (1-y)*log(1-ada) ) -quadratic.form(D.u.inv,
u)/2;

     }

  }



  main <- function()

  {

    b <- c(.5, .5 , -1, -.5);

    b <- c(.5, .5 , -1, -.5, 0, 0, 0, 0);

    D.u <- diag( c(.5, .5) );



    x <- array(0, c(n, n.fixed, m) );

    x[ ,1, ] <- 1;



    for(i in 1:n) x[i, 2, ] <- (i-5)/4;



    x[, 3, 1:trunc(m/2)] <- 0;

    x[, 3, trunc(m/2+1):m] <- 1;



    x[, 4, ] <- x[, 2, ]*x[, 3, ];



    x[1, 5:n.fixed, ] <- rnorm(4*m);

    for(i in 2:n) x[i, 5:n.fixed,] <- x[1, 5:n.fixed, ];



    z <- x[, 1:2, ];



    for(i in 1:200)

    {

      print(i); print(date());

      y <- glmm.sample.y(b, D.u, x, z);



      obj <- mle(b, D.u, x, y, z, F, T, n.iter=50);

      print("estimate with b known")

      print(obj$D.u);



      obj <- mle(b, D.u, x, y, z, T, T, n.iter=50);

      print("profile MLE");

      print(obj$D.u);



      obj <- REML.b.D.u(b, D.u, x, y, z, n.iter=50);

      print("REML type estimate")

      print(obj$D.u);

    }

  }

###################################################

  n <- 10;  #number of observation for each cluster

  m <- 50;  #number of clusters



  n.fixed <- 8;

  n.random <- 2;



  n.simu <- 2000;



  main();




















        [[alternative HTML version deleted]]

______________________________________________
[email protected] mailing list
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

[R] My surprising experience in trying out REvolution's R

Reply via email to