Very cool, Mario. You should post this on the Quantified Onion's discussion of Parallel computing modules: https://groups.google.com/forum/#!topic/the-quantified-onion/2cSWXogt5Xs
I have been meaning to try implementing a bunch of parallel algorithms on the different forking/threading frameworks to see how they perform. I will be sure to add MCE to my list of platforms to try. The code inside MCE looks very well written and it should be easy enough for me to grok it just by reading it. However, I would appreciate if you have any high-level explanation of how this code differs from other parallel process/thread managers. A good list to start with is this metacpan search for modules in the Parallel:: package space<https://metacpan.org/search?q=Parallel%3A%3A> . Anyway, thanks for the note! David On Wed, Feb 6, 2013 at 4:45 AM, Mario Roy <[email protected]> wrote: > Hi all, > > The upcoming MCE release 1.4 for Perl will help parallelize PDL using > child processes or threads. > > I'm very happy with the results. I'm able to perform a 2048x2048 matrix > multiplication in under 3 seconds inside a CentOS 6.3 virtual machine > configured to use all cores on my MacBook Pro at 2.0 GHz. > > [code] > > #!/usr/bin/perl > > use strict; > use warnings; > > use FindBin; > use lib "$FindBin::Bin/lib"; > > use Time::HiRes qw(time); > use Storable qw(freeze thaw); > > use PDL; > use PDL::IO::Storable; > > use MCE; > > my $tam = 2048; > my $cols = $tam; > my $rows = $tam; > > my $a = sequence $tam,$tam; > my $b = sequence $tam,$tam; > my $r = zeroes $tam,$tam; > > my $max_parallel_level = 1; > my @p = ( ); > > my $start = time(); > > strassen($a, $b, $r, $tam); > > my $end = time(); > > ## print $r; > > printf STDERR "\n## Compute time: %0.03f (secs)\n\n", $end - $start; > > ## > > sub strassen { > > my $a = $_[0]; my $b = $_[1]; my $c = $_[2]; my $tam = $_[3]; > my $level = $_[4] || 0; > > ## Perform the classic multiplication when matrix is <= 64 X 64 > > if ($tam <= 128) { > > # for my $i (0 .. $tam - 1) { > # for my $j (0 .. $tam - 1) { > # $c->[$i][$j] = 0; > # for my $k (0 .. $tam - 1) { > # $c->[$i][$j] += $a->[$i][$k] * $b->[$k][$j]; > # } > # } > # } > > $c += $a x $b; > > return; > } > > ## Otherwise, perform multiplication using Strassen's algorithm > > my $nTam = $tam / 2; > > my $a11 = zeroes $nTam,$nTam; my $a12 = zeroes $nTam,$nTam; > my $a21 = zeroes $nTam,$nTam; my $a22 = zeroes $nTam,$nTam; > > my $b11 = zeroes $nTam,$nTam; my $b12 = zeroes $nTam,$nTam; > my $b21 = zeroes $nTam,$nTam; my $b22 = zeroes $nTam,$nTam; > > my $p1 = zeroes $nTam,$nTam; my $p2 = zeroes $nTam,$nTam; > my $p3 = zeroes $nTam,$nTam; my $p4 = zeroes $nTam,$nTam; > my $p5 = zeroes $nTam,$nTam; my $p6 = zeroes $nTam,$nTam; > my $p7 = zeroes $nTam,$nTam; > > my $t1 = zeroes $nTam,$nTam; my $t2 = zeroes $nTam,$nTam; > > if (++$level <= $max_parallel_level) { > > ## Parallelize via MCE > > sub store_result { > my ($n, $result) = @_; > $p[$n] = $result; > } > > my $mce = MCE->new( > max_workers => 7, > user_tasks => [{ > user_func => sub { > my $self = $_[0]; > my $data = $self->{user_data}; > my $result = zeroes $nTam,$nTam; > strassen($data->[0], $data->[1], $result, $data->[3], > $level); > $self->do('store_result', $data->[2], $result); > }, > task_end => sub { > $p1 = $p[1]; $p2 = $p[2]; $p3 = $p[3]; $p4 = $p[4]; > $p5 = $p[5]; $p6 = $p[6]; $p7 = $p[7]; > @p = ( ); > } > }] > ); > > $mce->spawn(); > > ## Divide the matrices into 4 sub-matrices > > divide($a11, $a12, $a21, $a22, $a, $nTam); > divide($b11, $b12, $b21, $b22, $b, $nTam); > > ## Calculate p1 to p7 > > sum_m($a11, $a22, $t1, $nTam); > sum_m($b11, $b22, $t2, $nTam); > $mce->send([ $t1, $t2, 1, $nTam ]); > > sum_m($a21, $a22, $t1, $nTam); > $mce->send([ $t1, $b11, 2, $nTam ]); > > subtract_m($b12, $b22, $t2, $nTam); > $mce->send([ $a11, $t2, 3, $nTam ]); > > subtract_m($b21, $b11, $t2, $nTam); > $mce->send([ $a22, $t2, 4, $nTam ]); > > sum_m($a11, $a12, $t1, $nTam); > $mce->send([ $t1, $b22, 5, $nTam ]); > > subtract_m($a21, $a11, $t1, $nTam); > sum_m($b11, $b12, $t2, $nTam); > $mce->send([ $t1, $t2, 6, $nTam ]); > > subtract_m($a12, $a22, $t1, $nTam); > sum_m($b21, $b22, $t2, $nTam); > $mce->send([ $t1, $t2, 7, $nTam ]); > > $mce->run(); > } > else { > ## Divide the matrices into 4 sub-matrices > > divide($a11, $a12, $a21, $a22, $a, $nTam); > divide($b11, $b12, $b21, $b22, $b, $nTam); > > ## Calculate p1 to p7 > > sum_m($a11, $a22, $t1, $nTam); > sum_m($b11, $b22, $t2, $nTam); > strassen($t1, $t2, $p1, $nTam, $level); > > sum_m($a21, $a22, $t1, $nTam); > strassen($t1, $b11, $p2, $nTam, $level); > > subtract_m($b12, $b22, $t2, $nTam); > strassen($a11, $t2, $p3, $nTam, $level); > > subtract_m($b21, $b11, $t2, $nTam); > strassen($a22, $t2, $p4, $nTam, $level); > > sum_m($a11, $a12, $t1, $nTam); > strassen($t1, $b22, $p5, $nTam, $level); > > subtract_m($a21, $a11, $t1, $nTam); > sum_m($b11, $b12, $t2, $nTam); > strassen($t1, $t2, $p6, $nTam, $level); > > subtract_m($a12, $a22, $t1, $nTam); > sum_m($b21, $b22, $t2, $nTam); > strassen($t1, $t2, $p7, $nTam, $level); > } > > ## Calculate and group into a single matrix $c > > calc($p1, $p2, $p3, $p4, $p5, $p6, $p7, $c, $nTam); > > return; > } > > sub divide { > > my $m11 = $_[0]; my $m12 = $_[1]; my $m21 = $_[2]; my $m22 = $_[3]; > my $m = $_[4]; my $tam = $_[5]; > > # for my $i (0 .. $tam - 1) { > # for my $j (0 .. $tam - 1) { > # $m11->[$i][$j] = $m->[$i][$j]; > # $m12->[$i][$j] = $m->[$i][$j + $tam]; > # $m21->[$i][$j] = $m->[$i + $tam][$j]; > # $m22->[$i][$j] = $m->[$i + $tam][$j + $tam]; > # } > # } > > my $n1 = $tam - 1; > my $n2 = $tam + $n1; > > ins(inplace($m11), $m->slice("0:$n1,0:$n1")); > ins(inplace($m12), $m->slice("$tam:$n2,0:$n1")); > ins(inplace($m21), $m->slice("0:$n1,$tam:$n2")); > ins(inplace($m22), $m->slice("$tam:$n2,$tam:$n2")); > > return; > } > > sub calc { > > my $p1 = $_[0]; my $p2 = $_[1]; my $p3 = $_[2]; my $p4 = $_[3]; > my $p5 = $_[4]; my $p6 = $_[5]; my $p7 = $_[6]; my $c = $_[7]; > my $tam = $_[8]; > > my $c11 = zeroes $tam,$tam; my $c12 = zeroes $tam,$tam; > my $c21 = zeroes $tam,$tam; my $c22 = zeroes $tam,$tam; > my $t1 = zeroes $tam,$tam; my $t2 = zeroes $tam,$tam; > > sum_m($p1, $p4, $t1, $tam); > sum_m($t1, $p7, $t2, $tam); > subtract_m($t2, $p5, $c11, $tam); > > sum_m($p3, $p5, $c12, $tam); > sum_m($p2, $p4, $c21, $tam); > > sum_m($p1, $p3, $t1, $tam); > sum_m($t1, $p6, $t2, $tam); > subtract_m($t2, $p2, $c22, $tam); > > # for my $i (0 .. $tam - 1) { > # for my $j (0 .. $tam - 1) { > # $c->[$i][$j] = $c11->[$i][$j]; > # $c->[$i][$j + $tam] = $c12->[$i][$j]; > # $c->[$i + $tam][$j] = $c21->[$i][$j]; > # $c->[$i + $tam][$j + $tam] = $c22->[$i][$j]; > # } > # } > > ins(inplace($c), $c11, 0, 0); > ins(inplace($c), $c12, $tam, 0); > ins(inplace($c), $c21, 0, $tam); > ins(inplace($c), $c22, $tam, $tam); > > return; > } > > sub sum_m { > > my $a = $_[0]; my $b = $_[1]; my $r = $_[2]; my $tam = $_[3]; > > # for my $i (0 .. $tam - 1) { > # for my $j (0 .. $tam - 1) { > # $r->[$i][$j] = $a->[$i][$j] + $b->[$i][$j]; > # } > # } > > ins(inplace($r), $a + $b); > > return; > } > > sub subtract_m { > > my $a = $_[0]; my $b = $_[1]; my $r = $_[2]; my $tam = $_[3]; > > # for my $i (0 .. $tam - 1) { > # for my $j (0 .. $tam - 1) { > # $r->[$i][$j] = $a->[$i][$j] - $b->[$i][$j]; > # } > # } > > ins(inplace($r), $a - $b); > > return; > } > > [/code] > > > _______________________________________________ > Perldl mailing list > [email protected] > http://mailman.jach.hawaii.edu/mailman/listinfo/perldl > -- "Debugging is twice as hard as writing the code in the first place. Therefore, if you write the code as cleverly as possible, you are, by definition, not smart enough to debug it." -- Brian Kernighan
_______________________________________________ Perldl mailing list [email protected] http://mailman.jach.hawaii.edu/mailman/listinfo/perldl
