[SYSTEMML-2046] Large dense blocks in unary/grouped aggregates This patch modifies all unary aggregates and grouped aggregates to support large dense matrix blocks >16GB. Furthermore, this includes minor cleanups and extensions of the new dense block abstraction as well as cleanups of various unary aggregate operations.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/131b647b Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/131b647b Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/131b647b Branch: refs/heads/master Commit: 131b647b1838af29f5e9bcc1e8d054ce40e32f61 Parents: d91d24a Author: Matthias Boehm <[email protected]> Authored: Thu Dec 28 16:00:09 2017 -0800 Committer: Matthias Boehm <[email protected]> Committed: Thu Dec 28 16:00:09 2017 -0800 ---------------------------------------------------------------------- .../sysml/runtime/codegen/SpoofCellwise.java | 14 +- .../sysml/runtime/codegen/SpoofRowwise.java | 2 +- .../sysml/runtime/matrix/data/DenseBlock.java | 34 +- .../runtime/matrix/data/DenseBlockDRB.java | 9 +- .../runtime/matrix/data/DenseBlockLDRB.java | 13 +- .../sysml/runtime/matrix/data/LibMatrixAgg.java | 1314 +++++++----------- .../runtime/matrix/data/LibMatrixBincell.java | 32 +- .../sysml/runtime/matrix/data/MatrixBlock.java | 102 +- .../matrix/operators/BinaryOperator.java | 5 + 9 files changed, 601 insertions(+), 924 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/131b647b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java index 8c7e14d..5eb3b76 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofCellwise.java @@ -422,7 +422,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl double[] c, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - double[] data = (a != null) ? a.values(0) : null; + double[] data = (a != null) ? a.valuesAt(0) : null; long lnnz = 0; for( int i=rl, ix=rl*n; i<ru; i++ ) @@ -439,7 +439,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl private long executeDenseRowAggSum(DenseBlock a, SideInput[] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) { - double[] data = (a != null) ? a.values(0) : null; + double[] data = (a != null) ? a.valuesAt(0) : null; KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); @@ -460,7 +460,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl double[] c, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - double[] data = (a != null) ? a.values(0) : null; + double[] data = (a != null) ? a.valuesAt(0) : null; double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; ValueFunction vfun = getAggFunction(); @@ -490,7 +490,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl private long executeDenseColAggSum(DenseBlock a, SideInput[] b, double[] scalars, double[] c, int m, int n, boolean sparseSafe, int rl, int ru) { - double[] data = (a != null) ? a.values(0) : null; + double[] data = (a != null) ? a.valuesAt(0) : null; KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); @@ -513,7 +513,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl double[] c, int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - double[] data = (a != null) ? a.values(0) : null; + double[] data = (a != null) ? a.valuesAt(0) : null; double initialVal = (_aggOp==AggOp.MIN) ? Double.MAX_VALUE : -Double.MAX_VALUE; ValueFunction vfun = getAggFunction(); @@ -544,7 +544,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - double[] data = (a != null) ? a.values(0) : null; + double[] data = (a != null) ? a.valuesAt(0) : null; KahanFunction kplus = (KahanFunction) getAggFunction(); KahanObject kbuff = new KahanObject(0, 0); @@ -562,7 +562,7 @@ public abstract class SpoofCellwise extends SpoofOperator implements Serializabl int m, int n, boolean sparseSafe, int rl, int ru) throws DMLRuntimeException { - double[] data = (a != null) ? a.values(0) : null; + double[] data = (a != null) ? a.valuesAt(0) : null; //safe aggregation for min/max w/ handling of zero entries //note: sparse safe with zero value as min/max handled outside http://git-wip-us.apache.org/repos/asf/systemml/blob/131b647b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java index c40be9f..39dd2ed 100644 --- a/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java +++ b/src/main/java/org/apache/sysml/runtime/codegen/SpoofRowwise.java @@ -289,7 +289,7 @@ public abstract class SpoofRowwise extends SpoofOperator private void executeDense(DenseBlock a, SideInput[] b, double[] scalars, double[] c, int n, int rl, int ru) { - double[] data = (a != null) ? a.values(0) : null; + double[] data = (a != null) ? a.valuesAt(0) : null; if( data == null ) return; http://git-wip-us.apache.org/repos/asf/systemml/blob/131b647b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java index 574f508..cf3f6db 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlock.java @@ -22,6 +22,8 @@ package org.apache.sysml.runtime.matrix.data; import java.io.Serializable; +import org.apache.sysml.runtime.instructions.cp.KahanObject; + /** * This DenseBlock is an abstraction for different dense, row-major * matrix formats. For efficient dense operations, this API does not @@ -146,6 +148,15 @@ public abstract class DenseBlock implements Serializable */ public abstract double[][] values(); + /** + * Get the allocated block for the given row. This call + * is equivalent to valuesAt(indexes(r)). + * + * @param r row index + * @return block + */ + public abstract double[] values(int r); + /** * Get an allocated block. @@ -153,7 +164,7 @@ public abstract class DenseBlock implements Serializable * @param bix block index * @return block */ - public abstract double[] values(int bix); + public abstract double[] valuesAt(int bix); /** * Get the block index for a given row. @@ -226,6 +237,27 @@ public abstract class DenseBlock implements Serializable */ public abstract void set(DenseBlock db); + /** + * Copy the given kahan object sum and correction. + * + * @param kbuff kahan object + */ + public void set(KahanObject kbuff) { + set(0, 0, kbuff._sum); + set(0, 1, kbuff._correction); + } + + /** + * Copy the given kahan object sum and correction + * into the given row. + * + * @param r row index + * @param kbuff kahan object + */ + public void set(int r, KahanObject kbuff) { + set(r, 0, kbuff._sum); + set(r, 1, kbuff._correction); + } /** * Get the value for a given row and column. http://git-wip-us.apache.org/repos/asf/systemml/blob/131b647b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java index d2eb5ca..ae526eb 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockDRB.java @@ -131,7 +131,12 @@ public class DenseBlockDRB extends DenseBlock } @Override - public double[] values(int bix) { + public double[] values(int r) { + return data; + } + + @Override + public double[] valuesAt(int bix) { return data; } @@ -168,7 +173,7 @@ public class DenseBlockDRB extends DenseBlock @Override public void set(DenseBlock db) { - System.arraycopy(db.values(0), 0, data, 0, rlen*clen); + System.arraycopy(db.valuesAt(0), 0, data, 0, rlen*clen); } @Override http://git-wip-us.apache.org/repos/asf/systemml/blob/131b647b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java index 926f7ff..cd25516 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/DenseBlockLDRB.java @@ -119,7 +119,7 @@ public class DenseBlockLDRB extends DenseBlock public long countNonZeros() { long nnz = 0; for(int i=0; i<numBlocks(); i++ ) { - double[] a = values(i); + double[] a = valuesAt(i); for(int j=0; j<a.length; j++) nnz += (a[j]!=0) ? 1 : 0; } @@ -143,9 +143,14 @@ public class DenseBlockLDRB extends DenseBlock public double[][] values() { return data; } + + @Override + public double[] values(int r) { + return data[r / blen]; + } @Override - public double[] values(int bix) { + public double[] valuesAt(int bix) { return data[bix]; } @@ -193,7 +198,7 @@ public class DenseBlockLDRB extends DenseBlock @Override public void set(DenseBlock db) { for(int bi=0; bi<numBlocks(); bi++) - System.arraycopy(db.values(bi), 0, data[bi], 0, size(bi)); + System.arraycopy(db.valuesAt(bi), 0, data[bi], 0, size(bi)); } @Override @@ -205,7 +210,7 @@ public class DenseBlockLDRB extends DenseBlock public String toString() { StringBuilder sb = new StringBuilder(); for(int i=0; i<rlen; i++) { - double[] data = values(index(i)); + double[] data = values(i); int ix = pos(i); for(int j=0; j<clen; j++) { sb.append(data[ix+j]);
