Very cool, Mario. You should post this on the Quantified Onion's discussion
of Parallel computing modules:
https://groups.google.com/forum/#!topic/the-quantified-onion/2cSWXogt5Xs

I have been meaning to try implementing a bunch of parallel algorithms on
the different forking/threading frameworks to see how they perform. I will
be sure to add MCE to my list of platforms to try.

The code inside MCE looks very well written and it should be easy enough
for me to grok it just by reading it. However, I would appreciate if you
have any high-level explanation of how this code differs from other
parallel process/thread managers. A good list to start with is this
metacpan search for modules in the Parallel:: package
space<https://metacpan.org/search?q=Parallel%3A%3A>
.

Anyway, thanks for the note!
David


On Wed, Feb 6, 2013 at 4:45 AM, Mario Roy <[email protected]> wrote:

> Hi all,
>
> The upcoming MCE release 1.4 for Perl will help parallelize PDL using
> child processes or threads.
>
> I'm very happy with the results. I'm able to perform a 2048x2048 matrix
> multiplication in under 3 seconds inside a CentOS 6.3 virtual machine
> configured to use all cores on my MacBook Pro at 2.0 GHz.
>
> [code]
>
> #!/usr/bin/perl
>
> use strict;
> use warnings;
>
> use FindBin;
> use lib "$FindBin::Bin/lib";
>
> use Time::HiRes qw(time);
> use Storable qw(freeze thaw);
>
> use PDL;
> use PDL::IO::Storable;
>
> use MCE;
>
> my $tam  = 2048;
> my $cols = $tam;
> my $rows = $tam;
>
> my $a = sequence $tam,$tam;
> my $b = sequence $tam,$tam;
> my $r = zeroes   $tam,$tam;
>
> my $max_parallel_level = 1;
> my @p = ( );
>
> my $start = time();
>
> strassen($a, $b, $r, $tam);
>
> my $end = time();
>
> ## print $r;
>
> printf STDERR "\n## Compute time: %0.03f (secs)\n\n",  $end - $start;
>
> ##
>
> sub strassen {
>
>    my $a = $_[0]; my $b = $_[1]; my $c = $_[2]; my $tam = $_[3];
>    my $level = $_[4] || 0;
>
>    ## Perform the classic multiplication when matrix is <= 64 X 64
>
>    if ($tam <= 128) {
>
>     # for my $i (0 .. $tam - 1) {
>     #    for my $j (0 .. $tam - 1) {
>     #       $c->[$i][$j] = 0;
>     #       for my $k (0 .. $tam - 1) {
>     #          $c->[$i][$j] += $a->[$i][$k] * $b->[$k][$j];
>     #       }
>     #    }
>     # }
>
>       $c += $a x $b;
>
>       return;
>    }
>
>    ## Otherwise, perform multiplication using Strassen's algorithm
>
>    my $nTam = $tam / 2;
>
>    my $a11 = zeroes $nTam,$nTam;  my $a12 = zeroes $nTam,$nTam;
>    my $a21 = zeroes $nTam,$nTam;  my $a22 = zeroes $nTam,$nTam;
>
>    my $b11 = zeroes $nTam,$nTam;  my $b12 = zeroes $nTam,$nTam;
>    my $b21 = zeroes $nTam,$nTam;  my $b22 = zeroes $nTam,$nTam;
>
>    my $p1  = zeroes $nTam,$nTam;  my $p2  = zeroes $nTam,$nTam;
>    my $p3  = zeroes $nTam,$nTam;  my $p4  = zeroes $nTam,$nTam;
>    my $p5  = zeroes $nTam,$nTam;  my $p6  = zeroes $nTam,$nTam;
>    my $p7  = zeroes $nTam,$nTam;
>
>    my $t1  = zeroes $nTam,$nTam;  my $t2  = zeroes $nTam,$nTam;
>
>    if (++$level <= $max_parallel_level) {
>
>       ## Parallelize via MCE
>
>       sub store_result {
>          my ($n, $result) = @_;
>          $p[$n] = $result;
>       }
>
>       my $mce = MCE->new(
>          max_workers => 7,
>          user_tasks => [{
>             user_func => sub {
>                my $self = $_[0];
>                my $data = $self->{user_data};
>                my $result = zeroes $nTam,$nTam;
>                strassen($data->[0], $data->[1], $result, $data->[3],
> $level);
>                $self->do('store_result', $data->[2], $result);
>             },
>             task_end => sub {
>                $p1 = $p[1]; $p2 = $p[2]; $p3 = $p[3]; $p4 = $p[4];
>                $p5 = $p[5]; $p6 = $p[6]; $p7 = $p[7];
>                @p  = ( );
>             }
>          }]
>       );
>
>       $mce->spawn();
>
>       ## Divide the matrices into 4 sub-matrices
>
>       divide($a11, $a12, $a21, $a22, $a, $nTam);
>       divide($b11, $b12, $b21, $b22, $b, $nTam);
>
>       ## Calculate p1 to p7
>
>       sum_m($a11, $a22, $t1, $nTam);
>       sum_m($b11, $b22, $t2, $nTam);
>       $mce->send([ $t1, $t2, 1, $nTam ]);
>
>       sum_m($a21, $a22, $t1, $nTam);
>       $mce->send([ $t1, $b11, 2, $nTam ]);
>
>       subtract_m($b12, $b22, $t2, $nTam);
>       $mce->send([ $a11, $t2, 3, $nTam ]);
>
>       subtract_m($b21, $b11, $t2, $nTam);
>       $mce->send([ $a22, $t2, 4, $nTam ]);
>
>       sum_m($a11, $a12, $t1, $nTam);
>       $mce->send([ $t1, $b22, 5, $nTam ]);
>
>       subtract_m($a21, $a11, $t1, $nTam);
>       sum_m($b11, $b12, $t2, $nTam);
>       $mce->send([ $t1, $t2, 6, $nTam ]);
>
>       subtract_m($a12, $a22, $t1, $nTam);
>       sum_m($b21, $b22, $t2, $nTam);
>       $mce->send([ $t1, $t2, 7, $nTam ]);
>
>       $mce->run();
>    }
>    else {
>       ## Divide the matrices into 4 sub-matrices
>
>       divide($a11, $a12, $a21, $a22, $a, $nTam);
>       divide($b11, $b12, $b21, $b22, $b, $nTam);
>
>       ## Calculate p1 to p7
>
>       sum_m($a11, $a22, $t1, $nTam);
>       sum_m($b11, $b22, $t2, $nTam);
>       strassen($t1, $t2, $p1, $nTam, $level);
>
>       sum_m($a21, $a22, $t1, $nTam);
>       strassen($t1, $b11, $p2, $nTam, $level);
>
>       subtract_m($b12, $b22, $t2, $nTam);
>       strassen($a11, $t2, $p3, $nTam, $level);
>
>       subtract_m($b21, $b11, $t2, $nTam);
>       strassen($a22, $t2, $p4, $nTam, $level);
>
>       sum_m($a11, $a12, $t1, $nTam);
>       strassen($t1, $b22, $p5, $nTam, $level);
>
>       subtract_m($a21, $a11, $t1, $nTam);
>       sum_m($b11, $b12, $t2, $nTam);
>       strassen($t1, $t2, $p6, $nTam, $level);
>
>       subtract_m($a12, $a22, $t1, $nTam);
>       sum_m($b21, $b22, $t2, $nTam);
>       strassen($t1, $t2, $p7, $nTam, $level);
>    }
>
>    ## Calculate and group into a single matrix $c
>
>    calc($p1, $p2, $p3, $p4, $p5, $p6, $p7, $c, $nTam);
>
>    return;
> }
>
> sub divide {
>
>    my $m11 = $_[0]; my $m12 = $_[1]; my $m21 = $_[2]; my $m22 = $_[3];
>    my $m   = $_[4]; my $tam = $_[5];
>
>  # for my $i (0 .. $tam - 1) {
>  #    for my $j (0 .. $tam - 1) {
>  #       $m11->[$i][$j] = $m->[$i][$j];
>  #       $m12->[$i][$j] = $m->[$i][$j + $tam];
>  #       $m21->[$i][$j] = $m->[$i + $tam][$j];
>  #       $m22->[$i][$j] = $m->[$i + $tam][$j + $tam];
>  #    }
>  # }
>
>    my $n1 = $tam - 1;
>    my $n2 = $tam + $n1;
>
>    ins(inplace($m11), $m->slice("0:$n1,0:$n1"));
>    ins(inplace($m12), $m->slice("$tam:$n2,0:$n1"));
>    ins(inplace($m21), $m->slice("0:$n1,$tam:$n2"));
>    ins(inplace($m22), $m->slice("$tam:$n2,$tam:$n2"));
>
>    return;
> }
>
> sub calc {
>
>    my $p1  = $_[0]; my $p2  = $_[1]; my $p3  = $_[2]; my $p4  = $_[3];
>    my $p5  = $_[4]; my $p6  = $_[5]; my $p7  = $_[6]; my $c   = $_[7];
>    my $tam = $_[8];
>
>    my $c11 = zeroes $tam,$tam;  my $c12 = zeroes $tam,$tam;
>    my $c21 = zeroes $tam,$tam;  my $c22 = zeroes $tam,$tam;
>    my $t1  = zeroes $tam,$tam;  my $t2  = zeroes $tam,$tam;
>
>    sum_m($p1, $p4, $t1, $tam);
>    sum_m($t1, $p7, $t2, $tam);
>    subtract_m($t2, $p5, $c11, $tam);
>
>    sum_m($p3, $p5, $c12, $tam);
>    sum_m($p2, $p4, $c21, $tam);
>
>    sum_m($p1, $p3, $t1, $tam);
>    sum_m($t1, $p6, $t2, $tam);
>    subtract_m($t2, $p2, $c22, $tam);
>
>  # for my $i (0 .. $tam - 1) {
>  #    for my $j (0 .. $tam - 1) {
>  #       $c->[$i][$j] = $c11->[$i][$j];
>  #       $c->[$i][$j + $tam] = $c12->[$i][$j];
>  #       $c->[$i + $tam][$j] = $c21->[$i][$j];
>  #       $c->[$i + $tam][$j + $tam] = $c22->[$i][$j];
>  #    }
>  # }
>
>    ins(inplace($c), $c11, 0, 0);
>    ins(inplace($c), $c12, $tam, 0);
>    ins(inplace($c), $c21, 0, $tam);
>    ins(inplace($c), $c22, $tam, $tam);
>
>    return;
> }
>
> sub sum_m {
>
>    my $a = $_[0]; my $b = $_[1]; my $r = $_[2]; my $tam = $_[3];
>
>  # for my $i (0 .. $tam - 1) {
>  #    for my $j (0 .. $tam - 1) {
>  #       $r->[$i][$j] = $a->[$i][$j] + $b->[$i][$j];
>  #    }
>  # }
>
>    ins(inplace($r), $a + $b);
>
>    return;
> }
>
> sub subtract_m {
>
>    my $a = $_[0]; my $b = $_[1]; my $r = $_[2]; my $tam = $_[3];
>
>  # for my $i (0 .. $tam - 1) {
>  #    for my $j (0 .. $tam - 1) {
>  #       $r->[$i][$j] = $a->[$i][$j] - $b->[$i][$j];
>  #    }
>  # }
>
>    ins(inplace($r), $a - $b);
>
>    return;
> }
>
> [/code]
>
>
> _______________________________________________
> Perldl mailing list
> [email protected]
> http://mailman.jach.hawaii.edu/mailman/listinfo/perldl
>



-- 
 "Debugging is twice as hard as writing the code in the first place.
  Therefore, if you write the code as cleverly as possible, you are,
  by definition, not smart enough to debug it." -- Brian Kernighan
_______________________________________________
Perldl mailing list
[email protected]
http://mailman.jach.hawaii.edu/mailman/listinfo/perldl

Reply via email to