Hi,

I've just been running some benchmarks for r.mapcalc
to try and find the best method for parallelizing
a script / best way to minimize overheads.

I'd like to understand where it is useful to combine
expressions into a single r.mapcalc process, and
where it isn't, and what sort of mapcalc expressions
can best take advantage of pthreads support, and
which are not good matches for it.
also, as with some OpenMP experiments, if it makes
sense to parallelize by row (given a target column
length of 1000-3000 cells), or by some other way
(e.g. for a 1000 row tall raster spawn 4 x 250row
each threads)
?

summary of results: r.mapcalc built without pthread
support was the fastest for my test case. When built
with pthread support, using WORKERS=1 was the fastest option (default is 8)*. 
executing r.mapcalc
as three different processes was the fastest of all.
1-worker grass7 x3 processes was faster than
grass6.5svn.

[*] note that even with r.mapcalc built without
pthreads (make clean r.mapcalc dir + edited r.mapcalc
Makefile) it still uses more than one CPU core.
maybe because of lib/gis/counter.c(?)


test case: r.blend's 3-in-1 r.mapcalc expression,
with a region of rows: 2355, cols: 3045

test machine: 6-core CPU (full cores, not hyperthreaded ones)

timing results: follow.


thanks for any ideas,
Hamish

----
(hoping linewrap doesn't mess it up)

# grass7, spearfish
FIRST=elevation.10m
SECOND=aspect
PERCENT=35
OUTPUT=tmp.mpclc
g.region n=4323650 s=4318940 w=289020 e=295110 res=2

for w in `seq 14` ; do
   echo -n "Workers: $w"
   time WORKERS=$w  r.mapcalc "$OUTPUT.r = r#$FIRST * $PERCENT/100.0 + (1.0 - 
$PERCENT/100.0) * r#$SECOND ; \
        $OUTPUT.g = g#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * 
g#$SECOND ; \
        $OUTPUT.b = b#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * 
b#$SECOND" \
          --overwrite --quiet
   echo
done

# three replicates shown columnwise
Workers: 1
real    0m5.711s        real    0m5.716s        real    0m5.672s
user    0m7.544s        user    0m7.552s        user    0m7.596s
sys     0m0.332s        sys     0m0.348s        sys     0m0.260s

Workers: 2
real    0m5.987s        real    0m6.142s        real    0m5.976s
user    0m7.028s        user    0m7.224s        user    0m7.028s
sys     0m0.584s        sys     0m0.596s        sys     0m0.560s

Workers: 3
real    0m6.300s        real    0m6.180s        real    0m6.184s
user    0m7.512s        user    0m7.408s        user    0m7.488s
sys     0m0.620s        sys     0m0.612s        sys     0m0.604s

Workers: 4
real    0m6.693s        real    0m6.605s        real    0m6.642s
user    0m7.780s        user    0m7.632s        user    0m7.612s
sys     0m0.856s        sys     0m0.840s        sys     0m0.888s

Workers: 5
real    0m6.279s        real    0m6.203s        real    0m6.246s
user    0m7.496s        user    0m7.528s        user    0m7.484s
sys     0m0.996s        sys     0m0.920s        sys     0m0.972s

Workers: 6
real    0m6.231s        real    0m6.378s        real    0m6.180s
user    0m7.588s        user    0m7.712s        user    0m7.656s
sys     0m1.028s        sys     0m0.980s        sys     0m0.924s

Workers: 7
real    0m6.080s        real    0m6.114s        real    0m6.148s
user    0m7.616s        user    0m7.588s        user    0m7.704s
sys     0m1.040s        sys     0m1.096s        sys     0m1.016s

Workers: 8
real    0m6.181s        real    0m6.294s        real    0m6.251s
user    0m7.700s        user    0m7.820s        user    0m7.756s
sys     0m1.100s        sys     0m1.096s        sys     0m1.140s

Workers: 9
real    0m6.307s        real    0m6.263s        real    0m6.269s
user    0m7.772s        user    0m7.784s        user    0m7.732s
sys     0m1.196s        sys     0m1.160s        sys     0m1.172s

Workers: 10
real    0m6.333s        real    0m6.333s        real    0m6.278s
user    0m7.720s        user    0m7.696s        user    0m7.752s
sys     0m1.272s        sys     0m1.304s        sys     0m1.204s

Workers: 11
real    0m6.315s        real    0m6.322s        real    0m6.292s
user    0m7.820s        user    0m7.760s        user    0m7.820s
sys     0m1.192s        sys     0m1.232s        sys     0m1.160s

Workers: 12
real    0m6.275s        real    0m6.307s        real    0m6.322s
user    0m7.724s        user    0m7.732s        user    0m7.824s
sys     0m1.240s        sys     0m1.256s        sys     0m1.192s

Workers: 13
real    0m6.324s        real    0m6.316s        real    0m6.301s
user    0m7.816s        user    0m7.852s        user    0m7.784s
sys     0m1.204s        sys     0m1.176s        sys     0m1.192s

Workers: 14
real    0m6.333s        real    0m6.316s        real    0m6.291s
user    0m7.668s        user    0m7.796s        user    0m7.840s
sys     0m1.336s        sys     0m1.224s        sys     0m1.156s


# split combined mapcalc expression into three different processes
export WORKERS=1
export GRASS_OVERWRITE=1
export GRASS_VERBOSE=0
time (
 r.mapcalc "$OUTPUT.r = r#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * 
r#$SECOND"
 r.mapcalc "$OUTPUT.g = g#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * 
g#$SECOND"
 r.mapcalc "$OUTPUT.b = b#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * 
b#$SECOND" 
)
real    0m5.650s        real    0m5.700s        real    0m5.727s
user    0m7.428s        user    0m7.596s        user    0m7.468s
sys     0m0.392s        sys     0m0.312s        sys     0m0.432s



# shell backgrounding as parallelization method:
export WORKERS=1
time (
 r.mapcalc "$OUTPUT.r = r#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * 
r#$SECOND" &
 r.mapcalc "$OUTPUT.g = g#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * 
g#$SECOND" &
 r.mapcalc "$OUTPUT.b = b#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * 
b#$SECOND" &
 wait
)
real    0m2.110s        real    0m2.115s        real    0m2.088s
user    0m8.121s        user    0m8.241s        user    0m8.229s
sys     0m0.544s        sys     0m0.476s        sys     0m0.408s



#same, but with r.mapcalc recompiled without pthreads libs
export WORKERS=1
real    0m2.100s        real    0m2.086s        real    0m2.095s
user    0m8.049s        user    0m8.037s        user    0m8.093s
sys     0m0.528s        sys     0m0.472s        sys     0m0.468s



#same, but with r.mapcalc recompiled without pthreads libs
export WORKERS=8
time (
 r.mapcalc "$OUTPUT.r = r#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * 
r#$SECOND" &
 r.mapcalc "$OUTPUT.g = g#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * 
g#$SECOND" &
 r.mapcalc "$OUTPUT.b = b#$FIRST * $PERCENT/100.0 + (1.0 - $PERCENT/100.0) * 
b#$SECOND" &
 wait
)
real    0m2.455s        real    0m2.466s        real    0m2.457s
user    0m8.229s        user    0m8.125s        user    0m8.225s
sys     0m0.940s        sys     0m1.072s        sys     0m0.956s


#same, for grass 6.5svn:
real    0m3.805s        real    0m3.651s        real    0m3.689s
user    0m10.481s       user    0m10.533s       user    0m10.557s
sys     0m0.484s        sys     0m0.372s        sys     0m0.400s

_______________________________________________
grass-dev mailing list
[email protected]
http://lists.osgeo.org/mailman/listinfo/grass-dev

Reply via email to