Hi Eliot, 2016-11-14 16:57 GMT+01:00 Eliot Miranda <[email protected]>:
> Hi Thierry, > > > > On Nov 14, 2016, at 7:04 AM, Thierry Goubier <[email protected]> > wrote: > > > > Has anybody seen performance differences between the 32bits and the > 64bits versions of Pharo 6 ? > > > > I'm seeing a speedup greater than 2 on some intensive numerical code. > > > > Note that, on that code, Pharo 64bits is slower than R, by around 30%. > The code overall is memory bound. > > Can you post (a pointer to?) the R & Pharo code you're comparing? > I join both. It's a simple open lid cavity two dimensions lattice boltzmann code (D2Q9). The Pharo code is a very simple transcription, I didn't try to optimize it at all (I'm not entirely sure it gives the right results, I'll check that tomorrow). Those benchs are the result of the MACH project, an ITEA project, where, with a partner, we wrote a compiler for the R code that is 50 times faster than R (on a dual core i3) (and can use gpus, reaching speedups greater than 1000). Regards, Thierry > > TIA > > > > > Thierry > > > > > >
LatticeBoltzmann-ThierryGoubier.1.mcz
Description: Binary data
# LBM implementation D2Q9
# Thierry Goubier, 2016
main <- function(n = 200, m = 200, nbstep = 200) {
f <- array(0.0, c(9, n+1, m+1))
feq <- array(0.0, c(9, n+1, m+1))
rho <- array(0.0, c(n+1, m+1))
uv <- array(0.0, c(2, n+1, m+1))
t1 <- array(0.0, c(n+1, m+1))
t2 <- array(0.0, c(9, n+1, m+1))
w <- c(4.0/9.0, 1.0/9.0, 1.0/9.0, 1.0/9.0, 1.0/9.0, 1.0/36.0, 1.0/36.0, 1.0/36.0, 1.0/36.0)
uo <- 60.0 / n
rhoo <- 1.0
alpha <- 0.06
omega <- 1.0/(3.0* alpha + 0.5)
cx <- c(0.0,1.0,0.0,-1.0,0.0,1.0,-1.0,-1.0,1.0)
cy <- c(0.0,0.0,1.0,0.0,-1.0,1.0,1.0,-1.0,-1.0)
for (i in 1:9) {
f[i,,] <- w[i]
}
rho[,] <- rhoo
uv[,,] <- 0.0
uv[1,,m+1] <- uo
rhon <- array(0.0, c(n-1, 1))
for (i in 1:nbstep) {
t1[,] <- uv[ 1,, ]*uv[ 1,, ] + uv[ 2,, ]*uv[ 2,, ]
for (j in 1:9) {
t2[ j,, ] <- cx[ j ]*uv[1,,]+cy[ j ]*uv[ 2,, ]
feq[ j,, ] <- 1.0+3.0*t2[ j,, ] +4.5*t2[ j,, ]*t2[ j,,]-1.5*t1
feq[ j,, ] <- rho*w[ j ]*feq[ j,, ]
f[ j,, ] <- omega * feq[ j,, ]+(1.0 - omega)*f[ j,, ]
feq[ j,, ] <- f[ j,, ]
}
f[2,2:(n+1),] <- feq[2,1:n,]
f[3,,2:(m+1)] <- feq[3,,1:m]
f[4,1:n,] <- feq[4,2:(n+1),]
f[5,,1:m] <- feq[5,,2:(m+1)]
f[6,2:(n+1),2:(m+1)] <- feq[6,1:n,1:m]
f[7,1:n,2:(m+1)] <- feq[7,2:(n+1),1:m]
f[8,1:n,1:m] <- feq[8,2:(n+1),2:(m+1)]
f[9,2:(n+1),1:m] <- feq[9,1:n,2:(m+1)]
f[ 2,1, ] <- f[ 4,1,]
f[ 6,1, ] <- f[ 8,1,]
f[ 9,1, ] <- f[ 7,1,]
f[ 4,n+1, ] <- f[ 2,n+1, ]
f[ 8,n+1, ] <- f[ 6,n+1, ]
f[ 7,n+1, ] <- f[ 9,n+1, ]
f[ 3,,1 ] <- f[ 5,,1 ]
f[ 6,,1 ] <- f[ 8,,1 ]
f[ 7,,1] <- f[ 9,,1 ]
rhon <- f[1,2:n,m+1] + f[2,2:n,m+1] + f[4,2:n,m+1] + 2.*(f[3,2:n,m+1] + f[6,2:n,m+1] + f[7,2:n,m+1])
f[5,2:n,m+1] <- f[3,2:n,m+1]
f[9,2:n,m+1] <- f[7,2:n,m+1] + 0.5*(f[4,2:n,m+1] - f[2,2:n,m+1])+0.5*rhon*uo
f[8,2:n,m+1] <- f[6,2:n,m+1] + 0.5*(f[2,2:n,m+1] - f[4,2:n,m+1])-0.5*rhon*uo
rho[ , ] <- f[ 1,, ] + f[ 2,, ] + f[ 3,, ] + f[ 4,, ] + f[ 5,, ] + f[ 6,, ] + f[ 7,, ] + f[ 8,, ] + f[ 9,, ]
uv[ 1,, ] <- (f[ 1,, ]*cx[ 1 ] + f[ 2,, ]*cx[ 2 ] + f[ 3,, ]*cx[ 3 ] + f[ 4,, ]*cx[ 4 ] + f[ 5,, ]*cx[ 5 ] + f[ 6,, ]*cx[ 6 ] + f[ 7,, ]*cx[ 7 ] + f[ 8,, ]*cx[ 8 ] + f[ 9,, ]*cx[ 9 ]) / rho[ , ]
uv[ 2,, ] <- (f[ 1,, ]*cy[ 1 ] + f[ 2,, ]*cy[ 2 ] + f[ 3,, ]*cy[ 3 ] + f[ 4,, ]*cy[ 4 ] + f[ 5,, ]*cy[ 5 ] + f[ 6,, ]*cy[ 6 ] + f[ 7,, ]*cy[ 7 ] + f[ 8,, ]*cy[ 8 ] + f[ 9,, ]*cy[ 9 ]) / rho[ , ]
}
return(uv)
}
