> dt = data.table(d,key="grp1,grp2") > system.time(ans1 <- dt[ , list(mean(x),mean(y)) , by=list(grp1,grp2)]) user system elapsed 3.89 0.00 3.91 # your 7.064 is 12.23 for me though, so this 3.9 should be faster for you
However, Rprof() shows that 3.9 is mostly dispatch of mean to mean.default which then calls .Internal. Because there are so many groups here, dispatch bites. So ... > system.time(ans2 <- dt[ , list(.Internal(mean(x)),.Internal(mean(y))), > by=list(grp1,grp2)]) user system elapsed 0.20 0.00 0.21 > identical(ans1,ans2) TRUE "Hadley Wickham" <[email protected]> wrote in message news:[email protected]... > library(plyr) > > n<-100000 > grp1<-sample(1:750, n, replace=T) > grp2<-sample(1:750, n, replace=T) > d<-data.frame(x=rnorm(n), y=rnorm(n), grp1=grp1, grp2=grp2) > > system.time({ > d$avx1 <- ave(d$x, list(d$grp1, d$grp2)) > d$avy1 <- ave(d$y, list(d$grp1, d$grp2)) > }) > # user system elapsed > # 39.300 0.279 40.809 > system.time({ > d$avx2 <- ave(d$x, interaction(d$grp1, d$grp2, drop = T)) > d$avy2 <- ave(d$y, interaction(d$grp1, d$grp2, drop = T)) > }) > # user system elapsed > # 6.735 0.209 7.064 > > all.equal(d$avy1, d$avy2) > # TRUE > all.equal(d$avx1, d$avx2) > # TRUE > > i.e. ave should use g <- interaction(..., drop = TRUE) > > Hadley > > -- > Assistant Professor / Dobelman Family Junior Chair > Department of Statistics / Rice University > http://had.co.nz/ > ______________________________________________ [email protected] mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.

