Performance spark wsloss/wcemm ultra-sparse (prefilter empty blocks) Ultra-sparse matrices are a common case for factorization algorithms. Accordingly, this change introduces a prefilter for empty blocks on wsloss and wcemm because the full aggregate ensures result correctness. In a scenario of wsloss over KDD2010 (15M x 30M, sparsity 9.4e-7), this achieved a total runtime reduction from 70s to 39s despite inputs from HDFS.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/a19a14c0 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/a19a14c0 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/a19a14c0 Branch: refs/heads/master Commit: a19a14c05e8034d5abf7f5c9ffbaea96f05b8017 Parents: 10d1afc Author: Matthias Boehm <[email protected]> Authored: Fri Jan 22 22:40:29 2016 -0800 Committer: Matthias Boehm <[email protected]> Committed: Sat Jan 23 16:08:14 2016 -0800 ---------------------------------------------------------------------- .../runtime/instructions/spark/QuaternarySPInstruction.java | 7 +++++++ 1 file changed, 7 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/a19a14c0/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java index af65a9e..500cc01 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/QuaternarySPInstruction.java @@ -53,6 +53,7 @@ import org.apache.sysml.runtime.instructions.cp.CPOperand; import org.apache.sysml.runtime.instructions.cp.DoubleObject; import org.apache.sysml.runtime.instructions.spark.data.LazyIterableIterator; import org.apache.sysml.runtime.instructions.spark.data.PartitionedBroadcastMatrix; +import org.apache.sysml.runtime.instructions.spark.functions.FilterNonEmptyBlocksFunction; import org.apache.sysml.runtime.instructions.spark.utils.RDDAggregateUtils; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; import org.apache.sysml.runtime.matrix.data.MatrixBlock; @@ -196,6 +197,12 @@ public class QuaternarySPInstruction extends ComputationSPInstruction int brlen = inMc.getRowsPerBlock(); int bclen = inMc.getColsPerBlock(); + //pre-filter empty blocks (ultra-sparse matrices) for full aggregates + //(map/redwsloss, map/redwcemm); safe because theses ops produce a scalar + if( qop.wtype1 != null || qop.wtype4 != null ) { + in = in.filter(new FilterNonEmptyBlocksFunction()); + } + //map-side only operation (one rdd input, two broadcasts) if( WeightedSquaredLoss.OPCODE.equalsIgnoreCase(getOpcode()) || WeightedSigmoid.OPCODE.equalsIgnoreCase(getOpcode())
