[SYSTEMML-378] New sparse block abstraction

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/7c06ef60
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/7c06ef60
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/7c06ef60

Branch: refs/heads/master
Commit: 7c06ef6068ee4fc3633d63041e4b5174d902d516
Parents: cf7d206
Author: Matthias Boehm <[email protected]>
Authored: Sun Jan 17 23:00:41 2016 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Sun Jan 17 23:12:18 2016 -0800

----------------------------------------------------------------------
 .../sysml/runtime/matrix/data/SparseBlock.java  | 350 +++++++++++++++++++
 1 file changed, 350 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/7c06ef60/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java
new file mode 100644
index 0000000..478e900
--- /dev/null
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/SparseBlock.java
@@ -0,0 +1,350 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+
+package org.apache.sysml.runtime.matrix.data;
+
+import java.io.Serializable;
+import java.util.Iterator;
+
+/**
+ * This SparseBlock is an abstraction for different sparse matrix formats.
+ * Since the design is a tradeoff between performance and generality, we 
+ * restrict this abstraction to row-major sparse representations for now. 
+ * All sparse matrix block operations are supposed to be implemented 
+ * against this abstraction in order to enable variability/extensibility.
+ * 
+ * Example sparse format that can be implemented efficiently include
+ * CSR, MCSR, and - with performance drawbacks - COO.
+ * 
+ */
+public abstract class SparseBlock implements Serializable
+{
+       private static final long serialVersionUID = -5008747088111141395L;
+       
+       //internal configuration parameters for all sparse blocks
+       protected static final int INIT_CAPACITY = 4;       //initial array 
capacity
+       protected static final double RESIZE_FACTOR1 = 2;   //factor until 
reaching est nnz
+       protected static final double RESIZE_FACTOR2 = 1.1; //factor after 
reaching est nnz
+       
+       public enum Type {
+               MCSR,
+               CSR,
+               COO,
+       }
+       
+       
+       ////////////////////////
+       //basic allocation
+
+       /**
+        * Allocate the underlying data structure holding non-zero values
+        * of row r if necessary. 
+        * 
+        * @param r
+        */
+       public abstract void allocate(int r);
+       
+       ////////////////////////
+       //obtain basic meta data
+       
+       /**
+        * Get the number of rows in the sparse block.
+        * 
+        * @return
+        */
+       public abstract int numRows();
+       
+       /**
+        * Indicates if the underlying implementation allows thread-safe row
+        * updates if concurrent threads update disjoint rows. 
+        * 
+        * @return
+        */
+       public abstract boolean isThreadSafe();
+       
+       /**
+        * Get the number of non-zero values in the sparse block.
+        * 
+        * @return
+        */
+       public abstract long size();
+       
+       /**
+        * Get the number of non-zero values in row r.
+        * 
+        * @param r  row index starting at 0
+        * @return
+        */
+       public abstract int size(int r);
+       
+       /**
+        * Get information if row r is empty, i.e., does not contain non-zero 
+        * values. Equivalent to size(r)==0. Users should do this check if 
+        * it is unknown if the underlying row data structure is allocated. 
+        * 
+        * @param r  row index starting at 0
+        * @return
+        */
+       public abstract boolean isEmpty(int r); 
+       
+       
+       ////////////////////////
+       //obtain indexes/values/positions
+       
+       /**
+        * Get the sorted array of column indexes of all non-zero entries in 
+        * row r. Note that - for flexibility of the implementing format - the 
+        * returned array may be larger, where the range for row r is given by 
+        * [pos(r),pos(r)+size(r)).
+        * 
+        * @param r  row index starting at 0
+        * @return
+        */
+       public abstract int[] indexes(int r);
+       
+       /**
+        * Get the array of all non-zero entries in row r, sorted by their 
column
+        * indexes. Note that - for flexibility of the implementing format - 
the 
+        * returned array may be larger, where the range for row r is given by 
+        * [pos(r),pos(r)+size(r)).
+        * 
+        * @param r  row index starting at 0
+        * @return
+        */
+       public abstract double[] values(int r);
+       
+       /**
+        * Get the starting position of row r in the indexes/values arrays 
returned
+        * by indexes(r) and values(r). 
+        * 
+        * @param r  row index starting at 0
+        * @return
+        */
+       public abstract int pos(int r);
+       
+       
+       ////////////////////////
+       //update operations
+       
+       /**
+        * Set the value of a matrix cell (r,c). This might update an existing 
+        * non-zero value, insert a new non-zero value, or delete a non-zero 
value.
+        * 
+        * @param r  row index starting at 0
+        * @param c  column index starting at 0
+        * @param v  zero or non-zero value 
+        * @return
+        */
+       public abstract boolean set(int r, int c, double v);
+       
+       /**
+        * Append a value to the end of the physical representation. This 
should 
+        * only be used for operations with sequential write pattern or if 
followed
+        * by a sort() operation. Note that this operation does not perform any 
+        * matrix cell updates.  
+        * 
+        * @param r  row index starting at 0
+        * @param c  column index starting at 0
+        * @param v  zero or non-zero value
+        */
+       public abstract void append(int r, int c, double v);
+       
+       /**
+        * Sets a sorted array of non-zeros values into the column range 
[cl,cu) 
+        * in row r. The passed value array may be larger and the relevant 
range 
+        * is given by [vix,vix+len).
+        * 
+        * @param r    row index starting at 0
+        * @param cl   lower column index starting at 0
+        * @param cu   upper column index starting at 0
+        * @param v    value array
+        * @param vix  start index in value array
+        * @param vlen number of relevant values 
+        */
+       public abstract void setIndexRange(int r, int cl, int cu, double[] v, 
int vix, int vlen);
+       
+       /**
+        * Deletes all non-zero values of the given column range [cl,cu) in row 
r.
+        * 
+        * @param r   row index starting at 0
+        * @param cl  lower column index starting at 0
+        * @param cu  upper column index starting at 0
+        */
+       public abstract void deleteIndexRange(int r, int cl, int cu);
+       
+       /**
+        * Sort all non-zero value/index pairs of the sparse block by row 
+        * and column index. 
+        */
+       public abstract void sort();
+       
+       /**
+        * Sort all non-zero value/index pairs of row r column index.
+        * 
+        * @param r  row index starting at 0
+        */
+       public abstract void sort(int r);
+       
+       
+       ////////////////////////
+       //search operations
+       
+       /**
+        * Get value of matrix cell (r,c). In case of non existing values
+        * this call returns 0.
+        * 
+        * @param r  row index starting at 0
+        * @param c  column index starting at 0
+        * @return
+        */
+       public abstract double get(int r, int c);
+       
+       /**
+        * Get position of first column index lower than or equal column c 
+        * in row r. The position is relative to the indexes/values arrays 
+        * returned by indexes(r) and values(r). If no such value exists, 
+        * this call returns -1.
+        * 
+        * @param r  row index starting at 0
+        * @param c  column index starting at 0
+        * @return
+        */
+       public abstract int posFIndexLTE(int r, int c);
+       
+       /**
+        * Get position of first column index greater than or equal column c
+        * in row r. The position is relative to the indexes/values arrays 
+        * returned by indexes(r) and values(r). If no such value exists, 
+        * this call returns -1.
+        * 
+        * @param r
+        * @param c
+        * @return
+        */
+       public abstract int posFIndexGTE(int r, int c);
+       
+       /**
+        * Get position of first column index greater than column c in row r. 
+        * The position is relative to the indexes/values arrays returned by 
+        * indexes(r) and values(r). If no such value exists, this call 
+        * returns -1.
+        * 
+        * @param r
+        * @param c
+        * @return
+        */
+       public abstract int posFIndexGT(int r, int c);
+       
+       
+       ////////////////////////
+       //iterators
+       
+       /**
+        * Get a non-zero iterator over the entire sparse block. Note that
+        * the returned IJV object is reused across next calls and should 
+        * be directly consumed or deep copied. 
+        * 
+        * @return
+        */
+       public Iterator<IJV> getIterator() {
+               //default generic iterator, override if necessary
+               return new SparseBlockIterator(numRows());
+       }
+       
+       /**
+        * Get a non-zero iterator over the subblock [rl, ru). Note that
+        * the returned IJV object is reused across next calls and should 
+        * be directly consumed or deep copied. 
+        * 
+        * @param rl   inclusive lower row index starting at 0
+        * @param ru   exclusive upper row index starting at 0
+        * @return
+        */
+       public Iterator<IJV> getIterator(int rl, int ru) {
+               //default generic iterator, override if necessary
+               return new SparseBlockIterator(rl, Math.min(ru,numRows()));
+       }
+
+
+       /**
+        * Default sparse block iterator implemented against the sparse block
+        * api in an implementation-agnostic manner.
+        * 
+        */
+       private class SparseBlockIterator implements Iterator<IJV>
+       {
+               private int _rlen = 0; //row upper
+               private int _curRow = -1; //current row
+               private int _curColIx = -1; //current col index pos
+               private int[] _curIndexes = null; //current col indexes
+               private double[] _curValues = null; //current col values
+               private boolean _noNext = false; //end indicator                
+               private IJV retijv = new IJV(); //reuse output tuple
+
+               protected SparseBlockIterator(int ru) {
+                       _rlen = ru;
+                       _curRow = 0;
+                       findNextNonZeroRow();
+               }
+               
+               protected SparseBlockIterator(int rl, int ru) {
+                       _rlen = ru;
+                       _curRow = rl;
+                       findNextNonZeroRow();
+               }
+               
+               @Override
+               public boolean hasNext() {
+                       return !_noNext;
+               }
+
+               @Override
+               public IJV next( ) {
+                       retijv.set(_curRow, _curIndexes[_curColIx], 
_curValues[_curColIx]);
+                       if( ++_curColIx >= pos(_curRow)+size(_curRow) ) {
+                               _curRow++;
+                               findNextNonZeroRow();
+                       }
+                       
+                       return retijv;
+               }
+
+               @Override
+               public void remove() {
+                       throw new RuntimeException("SparseBlockIterator is 
unsupported!");                      
+               }               
+               
+               /**
+                * Moves cursor to next non-zero value or indicates that no 
more 
+                * values are available.
+                */
+               private void findNextNonZeroRow() {
+                       while( _curRow<_rlen && isEmpty(_curRow))
+                               _curRow++;
+                       if(_curRow >= _rlen)
+                               _noNext = true;
+                       else {
+                               _curColIx = pos(_curRow);
+                               _curIndexes = indexes(_curRow); 
+                               _curValues = values(_curRow);
+                       }
+               }               
+       }
+}

Reply via email to