[SYSTEMML-378] New sparse block abstraction Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/7c06ef60 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/7c06ef60 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/7c06ef60
Branch: refs/heads/master Commit: 7c06ef6068ee4fc3633d63041e4b5174d902d516 Parents: cf7d206 Author: Matthias Boehm <[email protected]> Authored: Sun Jan 17 23:00:41 2016 -0800 Committer: Matthias Boehm <[email protected]> Committed: Sun Jan 17 23:12:18 2016 -0800 ---------------------------------------------------------------------- .../sysml/runtime/matrix/data/SparseBlock.java | 350 +++++++++++++++++++ 1 file changed, 350 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7c06ef60/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java new file mode 100644 index 0000000..478e900 --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java @@ -0,0 +1,350 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + + +package org.apache.sysml.runtime.matrix.data; + +import java.io.Serializable; +import java.util.Iterator; + +/** + * This SparseBlock is an abstraction for different sparse matrix formats. + * Since the design is a tradeoff between performance and generality, we + * restrict this abstraction to row-major sparse representations for now. + * All sparse matrix block operations are supposed to be implemented + * against this abstraction in order to enable variability/extensibility. + * + * Example sparse format that can be implemented efficiently include + * CSR, MCSR, and - with performance drawbacks - COO. + * + */ +public abstract class SparseBlock implements Serializable +{ + private static final long serialVersionUID = -5008747088111141395L; + + //internal configuration parameters for all sparse blocks + protected static final int INIT_CAPACITY = 4; //initial array capacity + protected static final double RESIZE_FACTOR1 = 2; //factor until reaching est nnz + protected static final double RESIZE_FACTOR2 = 1.1; //factor after reaching est nnz + + public enum Type { + MCSR, + CSR, + COO, + } + + + //////////////////////// + //basic allocation + + /** + * Allocate the underlying data structure holding non-zero values + * of row r if necessary. + * + * @param r + */ + public abstract void allocate(int r); + + //////////////////////// + //obtain basic meta data + + /** + * Get the number of rows in the sparse block. + * + * @return + */ + public abstract int numRows(); + + /** + * Indicates if the underlying implementation allows thread-safe row + * updates if concurrent threads update disjoint rows. + * + * @return + */ + public abstract boolean isThreadSafe(); + + /** + * Get the number of non-zero values in the sparse block. + * + * @return + */ + public abstract long size(); + + /** + * Get the number of non-zero values in row r. + * + * @param r row index starting at 0 + * @return + */ + public abstract int size(int r); + + /** + * Get information if row r is empty, i.e., does not contain non-zero + * values. Equivalent to size(r)==0. Users should do this check if + * it is unknown if the underlying row data structure is allocated. + * + * @param r row index starting at 0 + * @return + */ + public abstract boolean isEmpty(int r); + + + //////////////////////// + //obtain indexes/values/positions + + /** + * Get the sorted array of column indexes of all non-zero entries in + * row r. Note that - for flexibility of the implementing format - the + * returned array may be larger, where the range for row r is given by + * [pos(r),pos(r)+size(r)). + * + * @param r row index starting at 0 + * @return + */ + public abstract int[] indexes(int r); + + /** + * Get the array of all non-zero entries in row r, sorted by their column + * indexes. Note that - for flexibility of the implementing format - the + * returned array may be larger, where the range for row r is given by + * [pos(r),pos(r)+size(r)). + * + * @param r row index starting at 0 + * @return + */ + public abstract double[] values(int r); + + /** + * Get the starting position of row r in the indexes/values arrays returned + * by indexes(r) and values(r). + * + * @param r row index starting at 0 + * @return + */ + public abstract int pos(int r); + + + //////////////////////// + //update operations + + /** + * Set the value of a matrix cell (r,c). This might update an existing + * non-zero value, insert a new non-zero value, or delete a non-zero value. + * + * @param r row index starting at 0 + * @param c column index starting at 0 + * @param v zero or non-zero value + * @return + */ + public abstract boolean set(int r, int c, double v); + + /** + * Append a value to the end of the physical representation. This should + * only be used for operations with sequential write pattern or if followed + * by a sort() operation. Note that this operation does not perform any + * matrix cell updates. + * + * @param r row index starting at 0 + * @param c column index starting at 0 + * @param v zero or non-zero value + */ + public abstract void append(int r, int c, double v); + + /** + * Sets a sorted array of non-zeros values into the column range [cl,cu) + * in row r. The passed value array may be larger and the relevant range + * is given by [vix,vix+len). + * + * @param r row index starting at 0 + * @param cl lower column index starting at 0 + * @param cu upper column index starting at 0 + * @param v value array + * @param vix start index in value array + * @param vlen number of relevant values + */ + public abstract void setIndexRange(int r, int cl, int cu, double[] v, int vix, int vlen); + + /** + * Deletes all non-zero values of the given column range [cl,cu) in row r. + * + * @param r row index starting at 0 + * @param cl lower column index starting at 0 + * @param cu upper column index starting at 0 + */ + public abstract void deleteIndexRange(int r, int cl, int cu); + + /** + * Sort all non-zero value/index pairs of the sparse block by row + * and column index. + */ + public abstract void sort(); + + /** + * Sort all non-zero value/index pairs of row r column index. + * + * @param r row index starting at 0 + */ + public abstract void sort(int r); + + + //////////////////////// + //search operations + + /** + * Get value of matrix cell (r,c). In case of non existing values + * this call returns 0. + * + * @param r row index starting at 0 + * @param c column index starting at 0 + * @return + */ + public abstract double get(int r, int c); + + /** + * Get position of first column index lower than or equal column c + * in row r. The position is relative to the indexes/values arrays + * returned by indexes(r) and values(r). If no such value exists, + * this call returns -1. + * + * @param r row index starting at 0 + * @param c column index starting at 0 + * @return + */ + public abstract int posFIndexLTE(int r, int c); + + /** + * Get position of first column index greater than or equal column c + * in row r. The position is relative to the indexes/values arrays + * returned by indexes(r) and values(r). If no such value exists, + * this call returns -1. + * + * @param r + * @param c + * @return + */ + public abstract int posFIndexGTE(int r, int c); + + /** + * Get position of first column index greater than column c in row r. + * The position is relative to the indexes/values arrays returned by + * indexes(r) and values(r). If no such value exists, this call + * returns -1. + * + * @param r + * @param c + * @return + */ + public abstract int posFIndexGT(int r, int c); + + + //////////////////////// + //iterators + + /** + * Get a non-zero iterator over the entire sparse block. Note that + * the returned IJV object is reused across next calls and should + * be directly consumed or deep copied. + * + * @return + */ + public Iterator<IJV> getIterator() { + //default generic iterator, override if necessary + return new SparseBlockIterator(numRows()); + } + + /** + * Get a non-zero iterator over the subblock [rl, ru). Note that + * the returned IJV object is reused across next calls and should + * be directly consumed or deep copied. + * + * @param rl inclusive lower row index starting at 0 + * @param ru exclusive upper row index starting at 0 + * @return + */ + public Iterator<IJV> getIterator(int rl, int ru) { + //default generic iterator, override if necessary + return new SparseBlockIterator(rl, Math.min(ru,numRows())); + } + + + /** + * Default sparse block iterator implemented against the sparse block + * api in an implementation-agnostic manner. + * + */ + private class SparseBlockIterator implements Iterator<IJV> + { + private int _rlen = 0; //row upper + private int _curRow = -1; //current row + private int _curColIx = -1; //current col index pos + private int[] _curIndexes = null; //current col indexes + private double[] _curValues = null; //current col values + private boolean _noNext = false; //end indicator + private IJV retijv = new IJV(); //reuse output tuple + + protected SparseBlockIterator(int ru) { + _rlen = ru; + _curRow = 0; + findNextNonZeroRow(); + } + + protected SparseBlockIterator(int rl, int ru) { + _rlen = ru; + _curRow = rl; + findNextNonZeroRow(); + } + + @Override + public boolean hasNext() { + return !_noNext; + } + + @Override + public IJV next( ) { + retijv.set(_curRow, _curIndexes[_curColIx], _curValues[_curColIx]); + if( ++_curColIx >= pos(_curRow)+size(_curRow) ) { + _curRow++; + findNextNonZeroRow(); + } + + return retijv; + } + + @Override + public void remove() { + throw new RuntimeException("SparseBlockIterator is unsupported!"); + } + + /** + * Moves cursor to next non-zero value or indicates that no more + * values are available. + */ + private void findNextNonZeroRow() { + while( _curRow<_rlen && isEmpty(_curRow)) + _curRow++; + if(_curRow >= _rlen) + _noNext = true; + else { + _curColIx = pos(_curRow); + _curIndexes = indexes(_curRow); + _curValues = values(_curRow); + } + } + } +}
