Hi all,
The upcoming MCE release 1.4 for Perl will help parallelize PDL using
child processes or threads.
I'm very happy with the results. I'm able to perform a 2048x2048 matrix
multiplication in under 3 seconds inside a CentOS 6.3 virtual machine
configured to use all cores on my MacBook Pro at 2.0 GHz.
[code]
#!/usr/bin/perl
use strict;
use warnings;
use FindBin;
use lib "$FindBin::Bin/lib";
use Time::HiRes qw(time);
use Storable qw(freeze thaw);
use PDL;
use PDL::IO::Storable;
use MCE;
my $tam = 2048;
my $cols = $tam;
my $rows = $tam;
my $a = sequence $tam,$tam;
my $b = sequence $tam,$tam;
my $r = zeroes $tam,$tam;
my $max_parallel_level = 1;
my @p = ( );
my $start = time();
strassen($a, $b, $r, $tam);
my $end = time();
## print $r;
printf STDERR "\n## Compute time: %0.03f (secs)\n\n", $end - $start;
##
sub strassen {
my $a = $_[0]; my $b = $_[1]; my $c = $_[2]; my $tam = $_[3];
my $level = $_[4] || 0;
## Perform the classic multiplication when matrix is <= 64 X 64
if ($tam <= 128) {
# for my $i (0 .. $tam - 1) {
# for my $j (0 .. $tam - 1) {
# $c->[$i][$j] = 0;
# for my $k (0 .. $tam - 1) {
# $c->[$i][$j] += $a->[$i][$k] * $b->[$k][$j];
# }
# }
# }
$c += $a x $b;
return;
}
## Otherwise, perform multiplication using Strassen's algorithm
my $nTam = $tam / 2;
my $a11 = zeroes $nTam,$nTam; my $a12 = zeroes $nTam,$nTam;
my $a21 = zeroes $nTam,$nTam; my $a22 = zeroes $nTam,$nTam;
my $b11 = zeroes $nTam,$nTam; my $b12 = zeroes $nTam,$nTam;
my $b21 = zeroes $nTam,$nTam; my $b22 = zeroes $nTam,$nTam;
my $p1 = zeroes $nTam,$nTam; my $p2 = zeroes $nTam,$nTam;
my $p3 = zeroes $nTam,$nTam; my $p4 = zeroes $nTam,$nTam;
my $p5 = zeroes $nTam,$nTam; my $p6 = zeroes $nTam,$nTam;
my $p7 = zeroes $nTam,$nTam;
my $t1 = zeroes $nTam,$nTam; my $t2 = zeroes $nTam,$nTam;
if (++$level <= $max_parallel_level) {
## Parallelize via MCE
sub store_result {
my ($n, $result) = @_;
$p[$n] = $result;
}
my $mce = MCE->new(
max_workers => 7,
user_tasks => [{
user_func => sub {
my $self = $_[0];
my $data = $self->{user_data};
my $result = zeroes $nTam,$nTam;
strassen($data->[0], $data->[1], $result, $data->[3], $level);
$self->do('store_result', $data->[2], $result);
},
task_end => sub {
$p1 = $p[1]; $p2 = $p[2]; $p3 = $p[3]; $p4 = $p[4];
$p5 = $p[5]; $p6 = $p[6]; $p7 = $p[7];
@p = ( );
}
}]
);
$mce->spawn();
## Divide the matrices into 4 sub-matrices
divide($a11, $a12, $a21, $a22, $a, $nTam);
divide($b11, $b12, $b21, $b22, $b, $nTam);
## Calculate p1 to p7
sum_m($a11, $a22, $t1, $nTam);
sum_m($b11, $b22, $t2, $nTam);
$mce->send([ $t1, $t2, 1, $nTam ]);
sum_m($a21, $a22, $t1, $nTam);
$mce->send([ $t1, $b11, 2, $nTam ]);
subtract_m($b12, $b22, $t2, $nTam);
$mce->send([ $a11, $t2, 3, $nTam ]);
subtract_m($b21, $b11, $t2, $nTam);
$mce->send([ $a22, $t2, 4, $nTam ]);
sum_m($a11, $a12, $t1, $nTam);
$mce->send([ $t1, $b22, 5, $nTam ]);
subtract_m($a21, $a11, $t1, $nTam);
sum_m($b11, $b12, $t2, $nTam);
$mce->send([ $t1, $t2, 6, $nTam ]);
subtract_m($a12, $a22, $t1, $nTam);
sum_m($b21, $b22, $t2, $nTam);
$mce->send([ $t1, $t2, 7, $nTam ]);
$mce->run();
}
else {
## Divide the matrices into 4 sub-matrices
divide($a11, $a12, $a21, $a22, $a, $nTam);
divide($b11, $b12, $b21, $b22, $b, $nTam);
## Calculate p1 to p7
sum_m($a11, $a22, $t1, $nTam);
sum_m($b11, $b22, $t2, $nTam);
strassen($t1, $t2, $p1, $nTam, $level);
sum_m($a21, $a22, $t1, $nTam);
strassen($t1, $b11, $p2, $nTam, $level);
subtract_m($b12, $b22, $t2, $nTam);
strassen($a11, $t2, $p3, $nTam, $level);
subtract_m($b21, $b11, $t2, $nTam);
strassen($a22, $t2, $p4, $nTam, $level);
sum_m($a11, $a12, $t1, $nTam);
strassen($t1, $b22, $p5, $nTam, $level);
subtract_m($a21, $a11, $t1, $nTam);
sum_m($b11, $b12, $t2, $nTam);
strassen($t1, $t2, $p6, $nTam, $level);
subtract_m($a12, $a22, $t1, $nTam);
sum_m($b21, $b22, $t2, $nTam);
strassen($t1, $t2, $p7, $nTam, $level);
}
## Calculate and group into a single matrix $c
calc($p1, $p2, $p3, $p4, $p5, $p6, $p7, $c, $nTam);
return;
}
sub divide {
my $m11 = $_[0]; my $m12 = $_[1]; my $m21 = $_[2]; my $m22 = $_[3];
my $m = $_[4]; my $tam = $_[5];
# for my $i (0 .. $tam - 1) {
# for my $j (0 .. $tam - 1) {
# $m11->[$i][$j] = $m->[$i][$j];
# $m12->[$i][$j] = $m->[$i][$j + $tam];
# $m21->[$i][$j] = $m->[$i + $tam][$j];
# $m22->[$i][$j] = $m->[$i + $tam][$j + $tam];
# }
# }
my $n1 = $tam - 1;
my $n2 = $tam + $n1;
ins(inplace($m11), $m->slice("0:$n1,0:$n1"));
ins(inplace($m12), $m->slice("$tam:$n2,0:$n1"));
ins(inplace($m21), $m->slice("0:$n1,$tam:$n2"));
ins(inplace($m22), $m->slice("$tam:$n2,$tam:$n2"));
return;
}
sub calc {
my $p1 = $_[0]; my $p2 = $_[1]; my $p3 = $_[2]; my $p4 = $_[3];
my $p5 = $_[4]; my $p6 = $_[5]; my $p7 = $_[6]; my $c = $_[7];
my $tam = $_[8];
my $c11 = zeroes $tam,$tam; my $c12 = zeroes $tam,$tam;
my $c21 = zeroes $tam,$tam; my $c22 = zeroes $tam,$tam;
my $t1 = zeroes $tam,$tam; my $t2 = zeroes $tam,$tam;
sum_m($p1, $p4, $t1, $tam);
sum_m($t1, $p7, $t2, $tam);
subtract_m($t2, $p5, $c11, $tam);
sum_m($p3, $p5, $c12, $tam);
sum_m($p2, $p4, $c21, $tam);
sum_m($p1, $p3, $t1, $tam);
sum_m($t1, $p6, $t2, $tam);
subtract_m($t2, $p2, $c22, $tam);
# for my $i (0 .. $tam - 1) {
# for my $j (0 .. $tam - 1) {
# $c->[$i][$j] = $c11->[$i][$j];
# $c->[$i][$j + $tam] = $c12->[$i][$j];
# $c->[$i + $tam][$j] = $c21->[$i][$j];
# $c->[$i + $tam][$j + $tam] = $c22->[$i][$j];
# }
# }
ins(inplace($c), $c11, 0, 0);
ins(inplace($c), $c12, $tam, 0);
ins(inplace($c), $c21, 0, $tam);
ins(inplace($c), $c22, $tam, $tam);
return;
}
sub sum_m {
my $a = $_[0]; my $b = $_[1]; my $r = $_[2]; my $tam = $_[3];
# for my $i (0 .. $tam - 1) {
# for my $j (0 .. $tam - 1) {
# $r->[$i][$j] = $a->[$i][$j] + $b->[$i][$j];
# }
# }
ins(inplace($r), $a + $b);
return;
}
sub subtract_m {
my $a = $_[0]; my $b = $_[1]; my $r = $_[2]; my $tam = $_[3];
# for my $i (0 .. $tam - 1) {
# for my $j (0 .. $tam - 1) {
# $r->[$i][$j] = $a->[$i][$j] - $b->[$i][$j];
# }
# }
ins(inplace($r), $a - $b);
return;
}
[/code]
_______________________________________________
Perldl mailing list
[email protected]
http://mailman.jach.hawaii.edu/mailman/listinfo/perldl