Repository: incubator-systemml
Updated Branches:
  refs/heads/master b20727de5 -> df090f2b1


[SYSTEMML-923] Performance spark csv reblock of sparse matrices

Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ed072841
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ed072841
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ed072841

Branch: refs/heads/master
Commit: ed072841c25a0c527f78d66d087c3d047c2f95d6
Parents: b20727d
Author: Matthias Boehm <mbo...@us.ibm.com>
Authored: Fri Sep 16 01:17:31 2016 +0200
Committer: Matthias Boehm <mbo...@us.ibm.com>
Committed: Fri Sep 16 06:33:28 2016 +0200

----------------------------------------------------------------------
 .../instructions/spark/utils/RDDConverterUtils.java    | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ed072841/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java
index 6fe4a50..ba1934a 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/spark/utils/RDDConverterUtils.java
@@ -50,6 +50,7 @@ import org.apache.spark.sql.types.StructField;
 import scala.Tuple2;
 
 import org.apache.sysml.conf.ConfigurationManager;
+import org.apache.sysml.hops.OptimizerUtils;
 import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.instructions.spark.data.SerLongWritable;
 import org.apache.sysml.runtime.instructions.spark.data.SerText;
@@ -597,9 +598,8 @@ public class RDDConverterUtils
                        //determine number of non-zeros of row (w/o string 
parsing)
                        long lnnz = 0;
                        for( String col : cols ) {
-                               if( !col.isEmpty() && !col.equals("0") && 
!col.equals("0.0") ) {
-                                       lnnz++;
-                               }
+                               lnnz += (!col.isEmpty() && !col.equals("0") 
+                                               && !col.equals("0.0")) ? 1 : 0;
                        }
                        
                        //update counters
@@ -626,6 +626,8 @@ public class RDDConverterUtils
                private long _clen = -1;
                private int _brlen = -1;
                private int _bclen = -1;
+               private double _sparsity = 1.0;
+               private boolean _sparse = false;
                private boolean _header = false;
                private String _delim = null;
                private boolean _fill = false;
@@ -637,6 +639,9 @@ public class RDDConverterUtils
                        _clen = mc.getCols();
                        _brlen = mc.getRowsPerBlock();
                        _bclen = mc.getColsPerBlock();
+                       _sparsity = OptimizerUtils.getSparsity(mc);
+                       _sparse = mc.nnzKnown() && 
MatrixBlock.evalSparseFormatInMemory(mc.getRows(), 
+                                       mc.getCols(), mc.getNonZeros()) && 
(!fill || fillValue==0);
                        _header = hasHeader;
                        _delim = delim;
                        _fill = fill;
@@ -710,7 +715,7 @@ public class RDDConverterUtils
                        for( int cix=1; cix<=ncblks; cix++ ) {
                                int lclen = 
(int)UtilFunctions.computeBlockSize(_clen, cix, _bclen);                        
    
                                ix[cix-1] = new MatrixIndexes(rix, cix);
-                               mb[cix-1] = new MatrixBlock(lrlen, lclen, 
false);               
+                               mb[cix-1] = new MatrixBlock(lrlen, lclen, 
_sparse, (int)(lrlen*lclen*_sparsity));               
                        }
                }
                

Reply via email to