[SYSTEMML-378] Extended sparse block and impls (for runtime integration) The new sparse block abstraction was missing essential primitives such as various allocate, reset, row get/set, and print which prevented a seamless integration. This change modifies the abstraction and its implementations accordingly.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/2af90e04 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/2af90e04 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/2af90e04 Branch: refs/heads/master Commit: 2af90e04624d0e82281a5e2d92fa6c8a09ec6795 Parents: 47742f2 Author: Matthias Boehm <[email protected]> Authored: Tue Jan 19 23:06:18 2016 -0800 Committer: Matthias Boehm <[email protected]> Committed: Wed Jan 20 14:58:26 2016 -0800 ---------------------------------------------------------------------- .../sysml/runtime/matrix/data/SparseBlock.java | 77 +++++++++++++++++- .../runtime/matrix/data/SparseBlockCOO.java | 63 ++++++++++++++- .../runtime/matrix/data/SparseBlockCSR.java | 85 +++++++++++++++++++- .../runtime/matrix/data/SparseBlockFactory.java | 48 +++++++++++ .../runtime/matrix/data/SparseBlockMCSR.java | 64 ++++++++++++++- .../sysml/runtime/matrix/data/SparseRow.java | 32 ++------ 6 files changed, 333 insertions(+), 36 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2af90e04/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java index 9715fd3..e340f5d 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java @@ -61,6 +61,25 @@ public abstract class SparseBlock implements Serializable */ public abstract void allocate(int r); + /** + * Allocate the underlying data structure holding non-zero values + * of row r if necessary, w/ given size. + * + * @param r + */ + public abstract void allocate(int r, int nnz); + + /** + * Allocate the underlying data structure holding non-zero values + * of row r w/ the specified estimated nnz and max nnz. + * + * @param r + * @param ennz + * @param maxnnz + */ + public abstract void allocate(int r, int ennz, int maxnnz); + + //////////////////////// //obtain basic meta data @@ -81,11 +100,24 @@ public abstract class SparseBlock implements Serializable /** * Clears the sparse block by deleting non-zero values. After this call - * size() is guaranteed to return 0. + * all size() calls are guaranteed to return 0. */ public abstract void reset(); /** + * Clears the sparse block by deleting non-zero values. After this call + * all size() calls are guaranteed to return 0. + */ + public abstract void reset(int ennz, int maxnnz); + + /** + * Clears row r of the sparse block by deleting non-zero values. + * After this call size(r) is guaranteed to return 0. + */ + public abstract void reset(int r, int ennz, int maxnnz); + + + /** * Get the number of non-zero values in the sparse block. * * @return @@ -183,6 +215,19 @@ public abstract class SparseBlock implements Serializable public abstract boolean set(int r, int c, double v); /** + * Set the values of row r to the given sparse row. This might update + * existing non-zero values, insert a new row, or delete a row. + * + * NOTE: This method exists for incremental runtime integration and might + * be deleted in the future. + * + * @param r row index starting at 0 + * @param row + * @return + */ + public abstract void set(int r, SparseRow row); + + /** * Append a value to the end of the physical representation. This should * only be used for operations with sequential write pattern or if followed * by a sort() operation. Note that this operation does not perform any @@ -245,6 +290,17 @@ public abstract class SparseBlock implements Serializable public abstract double get(int r, int c); /** + * Get values of row r in the format of a sparse row. + * + * NOTE: This method exists for incremental runtime integration and might + * be deleted in the future. + * + * @param r row index starting at 0 + * @return + */ + public abstract SparseRow get(int r); + + /** * Get position of first column index lower than or equal column c * in row r. The position is relative to the indexes/values arrays * returned by indexes(r) and values(r). If no such value exists, @@ -297,6 +353,19 @@ public abstract class SparseBlock implements Serializable } /** + * Get a non-zero iterator over the partial sparse block [0,ru). Note + * that the returned IJV object is reused across next calls and should + * be directly consumed or deep copied. + * + * @param ru exclusive upper row index starting at 0 + * @return + */ + public Iterator<IJV> getIterator(int ru) { + //default generic iterator, override if necessary + return new SparseBlockIterator(ru); + } + + /** * Get a non-zero iterator over the subblock [rl, ru). Note that * the returned IJV object is reused across next calls and should * be directly consumed or deep copied. @@ -309,8 +378,10 @@ public abstract class SparseBlock implements Serializable //default generic iterator, override if necessary return new SparseBlockIterator(rl, Math.min(ru,numRows())); } - - + + @Override + public abstract String toString(); + /** * Default sparse block iterator implemented against the sparse block * api in an implementation-agnostic manner. http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2af90e04/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java index dae25a4..792d0d8 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCOO.java @@ -113,8 +113,8 @@ public class SparseBlockCOO extends SparseBlock for( int i=0, pos=0; i<_rlen; i++ ) { int alen = rows[i].size(); - int[] aix = rows[i].getIndexContainer(); - double[] avals = rows[i].getValueContainer(); + int[] aix = rows[i].indexes(); + double[] avals = rows[i].values(); for( int j=0; j<alen; j++ ) { _rindexes[pos] = i; _cindexes[pos] = aix[j]; @@ -128,6 +128,16 @@ public class SparseBlockCOO extends SparseBlock public void allocate(int r) { //do nothing everything preallocated } + + @Override + public void allocate(int r, int nnz) { + //do nothing everything preallocated + } + + @Override + public void allocate(int r, int ennz, int maxnnz) { + //do nothing everything preallocated + } @Override public int numRows() { @@ -144,6 +154,23 @@ public class SparseBlockCOO extends SparseBlock _size = 0; } + @Override + public void reset(int ennz, int maxnnz) { + _size = 0; + } + + @Override + public void reset(int r, int ennz, int maxnnz) { + int pos = pos(r); + int len = size(r); + + //overlapping array copy (shift rhs values left) + System.arraycopy(_rindexes, pos+len, _rindexes, pos, _size-(pos+len)); + System.arraycopy(_cindexes, pos+len, _cindexes, pos, _size-(pos+len)); + System.arraycopy(_values, pos+len, _values, pos, _size-(pos+len)); + _size -= len; + } + @Override public long size() { return _size; @@ -240,6 +267,20 @@ public class SparseBlockCOO extends SparseBlock shiftRightAndInsert(index, r, c, v); return true; // nnz++ } + + @Override + public void set(int r, SparseRow row) { + int pos = pos(r); + int alen = row.size(); + int[] aix = row.indexes(); + double[] avals = row.values(); + deleteIndexRange(r, aix[0], aix[alen-1]+1); + shiftRightByN(pos, alen); + Arrays.fill(_rindexes, pos, pos+alen, r); + System.arraycopy(aix, 0, _cindexes, pos, alen); + System.arraycopy(avals, 0, _values, pos, alen); + _size+=alen; + } @Override public void append(int r, int c, double v) { @@ -331,6 +372,19 @@ public class SparseBlockCOO extends SparseBlock int index = Arrays.binarySearch(_cindexes, pos, pos+len, c); return (index >= 0) ? _values[index] : 0; } + + @Override + public SparseRow get(int r) { + int pos = pos(r); + int len = size(r); + + SparseRow row = new SparseRow(len); + System.arraycopy(_cindexes, pos, row.indexes(), 0, len); + System.arraycopy(_values, pos, row.values(), 0, len); + row.setSize(len); + + return row; + } @Override public int posFIndexLTE(int r, int c) { @@ -381,6 +435,11 @@ public class SparseBlockCOO extends SparseBlock public Iterator<IJV> getIterator() { return new SparseBlockCOOIterator(0, _size); } + + @Override + public Iterator<IJV> getIterator(int ru) { + return new SparseBlockCOOIterator(0, pos(ru)); + } @Override public Iterator<IJV> getIterator(int rl, int ru) { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2af90e04/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java index e0e1a23..bb17c83 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockCSR.java @@ -115,8 +115,8 @@ public class SparseBlockCSR extends SparseBlock for( int i=0, pos=0; i<rlen; i++ ) { int alen = rows[i].size(); - int[] aix = rows[i].getIndexContainer(); - double[] avals = rows[i].getValueContainer(); + int[] aix = rows[i].indexes(); + double[] avals = rows[i].values(); for( int j=0; j<alen; j++ ) { _indexes[pos] = aix[j]; _values[pos] = avals[j]; @@ -130,6 +130,16 @@ public class SparseBlockCSR extends SparseBlock public void allocate(int r) { //do nothing everything preallocated } + + @Override + public void allocate(int r, int nnz) { + //do nothing everything preallocated + } + + @Override + public void allocate(int r, int ennz, int maxnnz) { + //do nothing everything preallocated + } @Override public int numRows() { @@ -145,6 +155,22 @@ public class SparseBlockCSR extends SparseBlock public void reset() { _size = 0; } + + @Override + public void reset(int ennz, int maxnnz) { + _size = 0; + } + + @Override + public void reset(int r, int ennz, int maxnnz) { + int pos = pos(r); + int len = size(r); + + //overlapping array copy (shift rhs values left) + System.arraycopy(_indexes, pos+len, _indexes, pos, _size-(pos+len)); + System.arraycopy(_values, pos+len, _values, pos, _size-(pos+len)); + _size -= len; + } @Override public long size() { @@ -228,6 +254,19 @@ public class SparseBlockCSR extends SparseBlock } @Override + public void set(int r, SparseRow row) { + int pos = pos(r); + int alen = row.size(); + int[] aix = row.indexes(); + double[] avals = row.values(); + deleteIndexRange(r, aix[0], aix[alen-1]+1); + shiftRightByN(pos, alen); + System.arraycopy(aix, 0, _indexes, pos, alen); + System.arraycopy(avals, 0, _values, pos, alen); + _size+=alen; + } + + @Override public void append(int r, int c, double v) { //early abort on zero if( v==0 ) return; @@ -321,7 +360,20 @@ public class SparseBlockCSR extends SparseBlock int index = Arrays.binarySearch(_indexes, pos, pos+len, c); return (index >= 0) ? _values[index] : 0; } - + + @Override + public SparseRow get(int r) { + int pos = pos(r); + int len = size(r); + + SparseRow row = new SparseRow(len); + System.arraycopy(_indexes, pos, row.indexes(), 0, len); + System.arraycopy(_values, pos, row.values(), 0, len); + row.setSize(len); + + return row; + } + @Override public int posFIndexLTE(int r, int c) { int pos = pos(r); @@ -367,6 +419,33 @@ public class SparseBlockCSR extends SparseBlock return (index < pos+len) ? index : -1; } + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("SparseBlockCSR: rlen="); + sb.append(numRows()); + sb.append(", nnz="); + sb.append(size()); + sb.append("\n"); + for( int i=0; i<numRows(); i++ ) { + sb.append("row +"); + sb.append(i); + sb.append(": "); + //append row + int pos = pos(i); + int len = size(i); + for(int j=pos; j<pos+len; j++) { + sb.append(_indexes[j]); + sb.append(": "); + sb.append(_values[j]); + sb.append("\t"); + } + sb.append("\n"); + } + + return sb.toString(); + } + /////////////////////////// // private helper methods http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2af90e04/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java new file mode 100644 index 0000000..7d5a5b1 --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockFactory.java @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +package org.apache.sysml.runtime.matrix.data; + +public abstract class SparseBlockFactory +{ + /** + * + * @param rlen + */ + public static SparseBlock createSparseBlock(int rlen) { + return createSparseBlock(MatrixBlock.DEFAULT_SPARSEBLOCK, rlen); + } + + /** + * + * @param type + * @param rlen + * @return + */ + public static SparseBlock createSparseBlock( SparseBlock.Type type, int rlen ) { + switch( type ) { + case MCSR: return new SparseBlockMCSR(rlen, -1); + case CSR: return new SparseBlockCSR(rlen); + case COO: return new SparseBlockCOO(rlen); + default: + throw new RuntimeException("Unexpected sparse block type: "+type.toString()); + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2af90e04/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java index ce0b42c..dc4ffe7 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlockMCSR.java @@ -89,6 +89,18 @@ public class SparseBlockMCSR extends SparseBlock } @Override + public void allocate(int r, int nnz) { + if( _rows[r] == null ) + _rows[r] = new SparseRow(nnz); + } + + @Override + public void allocate(int r, int ennz, int maxnnz) { + if( _rows[r] == null ) + _rows[r] = new SparseRow(ennz, maxnnz); + } + + @Override public int numRows() { return _rows.length; } @@ -102,7 +114,20 @@ public class SparseBlockMCSR extends SparseBlock public void reset() { for( SparseRow row : _rows ) if( row != null ) - row.reset(row.size(), -1); + row.reset(row.size(), Integer.MAX_VALUE); + } + + @Override + public void reset(int ennz, int maxnnz) { + for( SparseRow row : _rows ) + if( row != null ) + row.reset(ennz, maxnnz); + } + + @Override + public void reset(int r, int ennz, int maxnnz) { + if( _rows[r] != null ) + _rows[r].reset(ennz, maxnnz); } @Override @@ -118,14 +143,15 @@ public class SparseBlockMCSR extends SparseBlock @Override public int size(int r) { //prior check with isEmpty(r) expected - return _rows[r].size(); + //TODO perf sparse block + return (_rows[r]!=null) ? _rows[r].size() : 0; } @Override public long size(int rl, int ru) { int ret = 0; for( int i=rl; i<ru; i++ ) - ret += (_rows[i]!=null) ? _rows[i].size() : 0; + ret += (_rows[i]!=null) ? _rows[i].size() : 0; return ret; } @@ -172,6 +198,11 @@ public class SparseBlockMCSR extends SparseBlock } @Override + public void set(int r, SparseRow row) { + _rows[r] = row; + } + + @Override public void append(int r, int c, double v) { if( _rows[r] == null ) _rows[r] = new SparseRow(); @@ -208,9 +239,15 @@ public class SparseBlockMCSR extends SparseBlock @Override public double get(int r, int c) { - //prior check with isEmpty(r) expected + if( _rows[r] == null ) + return 0; return _rows[r].get(c); } + + @Override + public SparseRow get(int r) { + return _rows[r]; + } @Override public int posFIndexLTE(int r, int c) { @@ -229,4 +266,23 @@ public class SparseBlockMCSR extends SparseBlock //prior check with isEmpty(r) expected return _rows[r].searchIndexesFirstGT(c); } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("SparseBlockMCSR: rlen="); + sb.append(numRows()); + sb.append(", nnz="); + sb.append(size()); + sb.append("\n"); + for( int i=0; i<numRows(); i++ ) { + sb.append("row +"); + sb.append(i); + sb.append(": "); + sb.append(_rows[i]); + sb.append("\n"); + } + + return sb.toString(); + } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2af90e04/src/main/java/org/apache/sysml/runtime/matrix/data/SparseRow.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseRow.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseRow.java index 31d26ad..e6bbbcf 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseRow.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseRow.java @@ -78,51 +78,35 @@ public class SparseRow implements Serializable size = newsize; } - public int size() - { + public int size() { return size; } - public void setSize(int newsize) - { + public void setSize(int newsize) { size = newsize; } - public boolean isEmpty() - { + public boolean isEmpty() { return (size == 0); } - public double[] values() - { - return getValueContainer(); - } - - public double[] getValueContainer() - { + public double[] values() { return values; } - public int[] indexes() - { - return getIndexContainer(); - } - - public int[] getIndexContainer() - { + public int[] indexes() { return indexes; } - public void setValueContainer(double[] d) { + public void setValues(double[] d) { values = d; } - public void setIndexContainer(int[] i) { + public void setIndexes(int[] i) { indexes = i; } - public int capacity() - { + public int capacity() { return values.length; }
