(systemds) branch main updated: [SYSTEMDS-3860] Extended sparsity exploitation in codegen row templates

mboehm7 Wed, 20 Aug 2025 06:31:00 -0700

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git



The following commit(s) were added to refs/heads/main by this push:
     new b1c5d64d78 [SYSTEMDS-3860] Extended sparsity exploitation in codegen 
row templates
b1c5d64d78 is described below

commit b1c5d64d7884f3c63284bbfea6003d13faec4a33
Author: Frxms <tomoki.men...@gmail.com>
AuthorDate: Wed Aug 20 14:00:30 2025 +0200

    [SYSTEMDS-3860] Extended sparsity exploitation in codegen row templates
    
    Finalized runtime kernels, code generation, and optimization
    
    Closes #2297.
    Closes #2277.
    Closes #2276.
---
 src/main/java/org/apache/sysds/api/DMLOptions.java |   15 +-
 src/main/java/org/apache/sysds/api/DMLScript.java  |    4 +
 .../apache/sysds/hops/codegen/SpoofCompiler.java   |    2 +-
 .../org/apache/sysds/hops/codegen/cplan/CNode.java |   20 +-
 .../sysds/hops/codegen/cplan/CNodeBinary.java      |  197 +++-
 .../apache/sysds/hops/codegen/cplan/CNodeNary.java |    5 +-
 .../apache/sysds/hops/codegen/cplan/CNodeRow.java  |    4 +-
 .../sysds/hops/codegen/cplan/CNodeTernary.java     |    7 +-
 .../sysds/hops/codegen/cplan/CNodeUnary.java       |   65 +-
 .../sysds/hops/codegen/cplan/CodeTemplate.java     |   18 +-
 .../sysds/hops/codegen/cplan/java/Binary.java      |   62 +-
 .../sysds/hops/codegen/cplan/java/Ternary.java     |    2 +-
 .../sysds/hops/codegen/cplan/java/Unary.java       |   25 +-
 .../sysds/hops/codegen/template/TemplateRow.java   |   26 +-
 .../sysds/runtime/codegen/LibSpoofPrimitives.java  | 1226 ++++++++++++++++++++
 .../apache/sysds/runtime/codegen/SpoofRowwise.java |   45 +-
 .../codegen/CPlanVectorPrimitivesTest.java         |  461 ++++++++
 .../component/codegen/SparseVectorAllocTest.java   |  133 +++
 .../test/functions/codegen/RowAggTmplTest.java     |   12 +-
 .../scripts/functions/codegen/rowAggPattern49.R    |   54 +
 .../scripts/functions/codegen/rowAggPattern49.dml  |   52 +
 .../scripts/functions/codegen/rowAggPattern50.R    |   43 +
 .../scripts/functions/codegen/rowAggPattern50.dml  |   40 +
 23 files changed, 2436 insertions(+), 82 deletions(-)

diff --git a/src/main/java/org/apache/sysds/api/DMLOptions.java 
b/src/main/java/org/apache/sysds/api/DMLOptions.java
index 97d5f54a4a..917aecc4ab 100644
--- a/src/main/java/org/apache/sysds/api/DMLOptions.java
+++ b/src/main/java/org/apache/sysds/api/DMLOptions.java
@@ -86,6 +86,7 @@ public class DMLOptions {
        public boolean              federatedCompilation = false;     // 
Compile federated instructions based on input federation state and privacy 
constraints.
        public boolean              noFedRuntimeConversion = false;   // If 
activated, no runtime conversion of CP instructions to FED instructions will be 
performed.
        public int                  seed          = -1;               // The 
general seed for the execution, if -1 random (system time).
+       public boolean                          sparseIntermediate = false;     
  // whether SparseRowIntermediates should be used for rowwise operations
 
        public final static DMLOptions defaultOptions = new DMLOptions(null);
 
@@ -119,7 +120,8 @@ public class DMLOptions {
                        ", w=" + fedWorker +
                        ", federatedCompilation=" + federatedCompilation +
                        ", noFedRuntimeConversion=" + noFedRuntimeConversion +
-                       ", seed=" + seed + 
+                       ", seed=" + seed +
+                       ", sparseIntermediate=" + sparseIntermediate +
                        '}';
        }
        
@@ -353,6 +355,11 @@ public class DMLOptions {
                        dmlOptions.seed = 
Integer.parseInt(line.getOptionValue("seed"));
                }
 
+               //TODO move to systemds-config instead of command-line arg
+               if(line.hasOption("sparseIntermediate")){
+                       dmlOptions.sparseIntermediate = true;
+               }
+
                return dmlOptions;
        }
        
@@ -436,7 +443,10 @@ public class DMLOptions {
                Option commandlineSeed = OptionBuilder
                        .withDescription("A general seed for the execution 
through the commandline")
                        .hasArg().create("seed");
-               
+               Option sparseRowIntermediates = OptionBuilder
+                       .withDescription("If activated, sparseRowVector 
intermediates will be used to calculate rowwise operations.")
+                       .create("sparseIntermediate");
+
                options.addOption(configOpt);
                options.addOption(cleanOpt);
                options.addOption(statsOpt);
@@ -457,6 +467,7 @@ public class DMLOptions {
                options.addOption(federatedCompilation);
                options.addOption(noFedRuntimeConversion);
                options.addOption(commandlineSeed);
+               options.addOption(sparseRowIntermediates);
 
                // Either a clean(-clean), a file(-f), a script(-s) or 
help(-help) needs to be specified
                OptionGroup fileOrScriptOpt = new OptionGroup()
diff --git a/src/main/java/org/apache/sysds/api/DMLScript.java 
b/src/main/java/org/apache/sysds/api/DMLScript.java
index 2bc8d3b816..65805b5c2e 100644
--- a/src/main/java/org/apache/sysds/api/DMLScript.java
+++ b/src/main/java/org/apache/sysds/api/DMLScript.java
@@ -155,6 +155,9 @@ public class DMLScript
        // Global seed 
        public static int               SEED                 = -1;
 
+       // Sparse row flag
+       public static boolean                   SPARSE_INTERMEDIATE = false;
+
        public static String MONITORING_ADDRESS = null;
 
        // flag that indicates whether or not to suppress any prints to stdout
@@ -278,6 +281,7 @@ public class DMLScript
                        LINEAGE_ESTIMATE      = dmlOptions.lineage_estimate;
                        LINEAGE_DEBUGGER      = dmlOptions.lineage_debugger;
                        SEED                  = dmlOptions.seed;
+                       SPARSE_INTERMEDIATE       = 
dmlOptions.sparseIntermediate;
 
 
                        String fnameOptConfig = dmlOptions.configFile;
diff --git a/src/main/java/org/apache/sysds/hops/codegen/SpoofCompiler.java 
b/src/main/java/org/apache/sysds/hops/codegen/SpoofCompiler.java
index 34329ca64d..307205dbc8 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/SpoofCompiler.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/SpoofCompiler.java
@@ -470,7 +470,7 @@ public class SpoofCompiler {
         * @param recompile true if invoked during dynamic recompilation
         * @return dag root nodes of modified dag 
         */
-       public static ArrayList<Hop> optimize(ArrayList<Hop> roots, boolean 
recompile) 
+       public static ArrayList<Hop> optimize(ArrayList<Hop> roots, boolean 
recompile)
        {
                if( roots == null || roots.isEmpty() )
                        return roots;
diff --git a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNode.java 
b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNode.java
index 36cc8f4979..36ebd238ac 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNode.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNode.java
@@ -19,6 +19,7 @@
 
 package org.apache.sysds.hops.codegen.cplan;
 
+import org.apache.sysds.api.DMLScript;
 import org.apache.sysds.common.Types.DataType;
 import org.apache.sysds.hops.codegen.SpoofCompiler.GeneratorAPI;
 import org.apache.sysds.hops.codegen.template.TemplateUtils;
@@ -77,6 +78,14 @@ public abstract class CNode
                        _genVar = "TMP"+_seqVar.getNextID();
                return _genVar; 
        }
+
+       public String createVarname(boolean sparse) {
+               if(!sparse) {
+                       return createVarname();
+               } else {
+                       return _genVar = "S" + createVarname();
+               }
+       }
        
        public String getVarname() {
                return _genVar;
@@ -98,6 +107,8 @@ public abstract class CNode
                                return "len";
                        if(getVarname().startsWith("b"))
                                return getVarname() + ".clen";
+                       else if(getVarname().startsWith("STMP"))
+                               return "len";
                        else if(_dataType == DataType.MATRIX)
                                return getVarname() + ".length";
                }
@@ -222,8 +233,13 @@ public abstract class CNode
        
        protected String replaceUnaryPlaceholders(String tmp, String varj, 
boolean vectIn, GeneratorAPI api) {
                //replace sparse and dense inputs
-               tmp = tmp.replace("%IN1v%", varj+"vals");
-               tmp = tmp.replace("%IN1i%", varj+"ix");
+               if(DMLScript.SPARSE_INTERMEDIATE) {
+                       tmp = tmp.replace("%IN1v%", varj.startsWith("STMP") ? 
varj+".values()" : varj+"vals");
+                       tmp = tmp.replace("%IN1i%", varj.startsWith("STMP") ? 
varj+".indexes()" :varj+"ix");
+               } else {
+                       tmp = tmp.replace("%IN1v%", varj+"vals");
+                       tmp = tmp.replace("%IN1i%", varj+"ix");
+               }
                tmp = tmp.replace("%IN1%", 
                        (vectIn && TemplateUtils.isMatrix(_inputs.get(0))) ? 
                                ((api == GeneratorAPI.JAVA) ? varj + 
".values(rix)" : varj + ".vals(0)" ) :
diff --git a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeBinary.java 
b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeBinary.java
index b29d586c38..6031d8492a 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeBinary.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeBinary.java
@@ -22,6 +22,7 @@ package org.apache.sysds.hops.codegen.cplan;
 import java.util.Arrays;
 
 import org.apache.commons.lang3.StringUtils;
+import org.apache.sysds.api.DMLScript;
 import org.apache.sysds.common.Opcodes;
 import org.apache.sysds.hops.codegen.template.TemplateUtils;
 import org.apache.sysds.common.Types.DataType;
@@ -126,7 +127,8 @@ public class CNodeBinary extends CNode {
        }
        
        private final BinType _type;
-       
+       private boolean sparseTemplate;
+
        public CNodeBinary( CNode in1, CNode in2, BinType type ) {
                //canonicalize commutative matrix-scalar operations
                //to increase reuse potential
@@ -143,6 +145,23 @@ public class CNodeBinary extends CNode {
                setOutputDims();
        }
 
+       public CNodeBinary( CNode in1, CNode in2, BinType type, double 
sparsityEst, double scalarVal ) {
+               //canonicalize commutative matrix-scalar operations
+               //to increase reuse potential
+               if( type.isCommutative() && in1 instanceof CNodeData
+                       && in1.getDataType()==DataType.SCALAR ) {
+                       CNode tmp = in1;
+                       in1 = in2;
+                       in2 = tmp;
+               }
+
+               _inputs.add(in1);
+               _inputs.add(in2);
+               _type = type;
+               setOutputDims();
+               sparseTemplate = getTemplateType(sparsityEst, scalarVal);
+       }
+
        public BinType getType() {
                return _type;
        }
@@ -157,60 +176,63 @@ public class CNodeBinary extends CNode {
                //generate children
                sb.append(_inputs.get(0).codegen(sparse, api));
                sb.append(_inputs.get(1).codegen(sparse, api));
-               
+
                //generate binary operation (use sparse template, if data input)
-               boolean lsparseLhs = sparse && _inputs.get(0) instanceof 
CNodeData 
-                       && _inputs.get(0).getVarname().startsWith("a");
-               boolean lsparseRhs = sparse && _inputs.get(1) instanceof 
CNodeData 
-                       && _inputs.get(1).getVarname().startsWith("a");
+               boolean lsparseLhs = sparse ? _inputs.get(0) instanceof 
CNodeData
+                       && _inputs.get(0).getVarname().startsWith("a") ||
+                       _inputs.get(0).getVarname().startsWith("STMP") : false;
+               boolean lsparseRhs = sparse ? _inputs.get(1) instanceof 
CNodeData
+                       && _inputs.get(1).getVarname().startsWith("a") ||
+                       _inputs.get(1).getVarname().startsWith("STMP") : false;
                boolean scalarInput = _inputs.get(0).getDataType().isScalar();
                boolean scalarVector = (_inputs.get(0).getDataType().isScalar()
                        && _inputs.get(1).getDataType().isMatrix());
                boolean vectorVector = _inputs.get(0).getDataType().isMatrix()
                        && _inputs.get(1).getDataType().isMatrix();
-               String var = createVarname();
+               String var = createVarname(sparse && sparseTemplate && 
getOutputType(scalarVector, lsparseLhs, lsparseRhs));
                String tmp = getLanguageTemplateClass(this, api)
-                       .getTemplate(_type, lsparseLhs, lsparseRhs, 
scalarVector, scalarInput, vectorVector);
+                       .getTemplate(_type, lsparseLhs, lsparseRhs, 
scalarVector, scalarInput, vectorVector, sparseTemplate);
 
                tmp = tmp.replace("%TMP%", var);
-               
+
                //replace input references and start indexes
                for( int j=0; j<2; j++ ) {
                        String varj = _inputs.get(j).getVarname(api);
-                       
                        //replace sparse and dense inputs
-                       tmp = tmp.replace("%IN"+(j+1)+"v%", varj+"vals");
-                       tmp = tmp.replace("%IN"+(j+1)+"i%", varj+"ix");
+                       tmp = tmp.replace("%IN"+(j+1)+"v%", 
varj.startsWith("STMP") ? varj+".values()" : varj+"vals");
+                       tmp = tmp.replace("%IN"+(j+1)+"i%", 
varj.startsWith("STMP") ? varj+".indexes()" : varj+"ix");
                        tmp = tmp.replace("%IN"+(j+1)+"%",
-                                       varj.startsWith("a") ? (api == 
GeneratorAPI.JAVA ? varj : 
-                                               (_inputs.get(j).getDataType() 
== DataType.MATRIX ? varj + ".vals(0)" : varj)) :
-                                               varj.startsWith("b") ? (api == 
GeneratorAPI.JAVA ? varj + ".values(rix)" : 
-                                                               (_type == 
BinType.VECT_MATRIXMULT ? varj : varj + ".vals(0)")) :
-                                                       
_inputs.get(j).getDataType() == DataType.MATRIX ? (api == GeneratorAPI.JAVA ? 
varj : varj + ".vals(0)") : varj);
-                       
+                               varj.startsWith("a") ? (api == 
GeneratorAPI.JAVA ? varj :
+                                       (_inputs.get(j).getDataType() == 
DataType.MATRIX ? varj + ".vals(0)" : varj)) :
+                                       varj.startsWith("b") ? (api == 
GeneratorAPI.JAVA ? varj + ".values(rix)" :
+                                               (_type == 
BinType.VECT_MATRIXMULT ? varj : varj + ".vals(0)")) :
+                                               _inputs.get(j).getDataType() == 
DataType.MATRIX ? (api == GeneratorAPI.JAVA ? varj : varj + ".vals(0)") : varj);
+
+                               tmp = tmp.replace("%SLEN"+(j+1)+"%", 
varj.startsWith("STMP") ? varj+".size()" : varj.startsWith("a") ? "alen" : 
"blen");
+
                        //replace start position of main input
-                       tmp = tmp.replace("%POS"+(j+1)+"%", (_inputs.get(j) 
instanceof CNodeData 
-                                       && 
_inputs.get(j).getDataType().isMatrix()) ? (!varj.startsWith("b")) ? varj+"i" : 
-                                       
((TemplateUtils.isMatrix(_inputs.get(j)) || (_type.isElementwise()
-                                               && 
TemplateUtils.isColVector(_inputs.get(j)))) && _type!=BinType.VECT_MATRIXMULT) ?
+                       tmp = tmp.replace("%POS"+(j+1)+"%", (_inputs.get(j) 
instanceof CNodeData
+                               && _inputs.get(j).getDataType().isMatrix()) ? 
(!varj.startsWith("b")) ? varj+"i" :
+                               ((TemplateUtils.isMatrix(_inputs.get(j)) || 
(_type.isElementwise()
+                                       && 
TemplateUtils.isColVector(_inputs.get(j)))) && _type!=BinType.VECT_MATRIXMULT) ?
                                        varj + ".pos(rix)" : "0" : "0");
                }
                //replace length information (e.g., after matrix mult)
-               if( _type == BinType.VECT_OUTERMULT_ADD || (_type == 
BinType.VECT_CBIND && vectorVector) ) {
+               if( _type == BinType.VECT_OUTERMULT_ADD || (_type == 
BinType.VECT_CBIND && vectorVector)) {
                        for( int j=0; j<2; j++ )
                                tmp = tmp.replace("%LEN"+(j+1)+"%", 
_inputs.get(j).getVectorLength(api));
                }
-               else { //general case 
+               else { //general case
                        CNode mInput = getIntermediateInputVector();
                        if( mInput != null )
                                tmp = tmp.replace("%LEN%", 
mInput.getVectorLength(api));
                }
-               
+
                sb.append(tmp);
-               
+
                //mark as generated
                _generated = true;
-               
+
                return sb.toString();
        }
        
@@ -219,7 +241,126 @@ public class CNodeBinary extends CNode {
                        if( getInput().get(i).getDataType().isMatrix() )
                                return getInput().get(i);
                return null;
-       } 
+       }
+
+       private boolean getTemplateType(double sparsityEst, double scalarVal) {
+               if(!DMLScript.SPARSE_INTERMEDIATE)
+                       return false;
+               else {
+                       switch(_type) {
+                               case VECT_MULT:
+                               case VECT_DIV:
+                               case VECT_LESS:
+                               case VECT_MINUS:
+                               case VECT_PLUS:
+                               case VECT_XOR:
+                               case VECT_BITWAND:
+                               case VECT_BIASADD:
+                               case VECT_BIASMULT:
+                               case VECT_MIN:
+                               case VECT_MAX:
+                               case VECT_NOTEQUAL:
+                               case VECT_GREATER:
+                               case VECT_EQUAL:
+                               case VECT_LESSEQUAL:
+                               case VECT_GREATEREQUAL: return sparsityEst < 
0.1;
+                               case VECT_MULT_SCALAR:
+                               case VECT_DIV_SCALAR:
+                               case VECT_XOR_SCALAR:
+                               case VECT_BITWAND_SCALAR: return sparsityEst < 
0.3;
+                               case VECT_GREATER_SCALAR: {
+                                       if(scalarVal != Double.NaN) {
+                                               return 
_inputs.get(1).getDataType().isScalar() ? scalarVal >= 0 && sparsityEst < 0.2
+                                                       : 
_inputs.get(0).getDataType().isScalar() && scalarVal < 0 && sparsityEst < 0.2;
+                                       } else
+                                               return false;
+                               }
+                               case VECT_GREATEREQUAL_SCALAR: {
+                                       if(scalarVal != Double.NaN) {
+                                               return 
_inputs.get(1).getDataType().isScalar() ? scalarVal > 0 && sparsityEst < 0.2
+                                                       : 
_inputs.get(0).getDataType().isScalar() && scalarVal <= 0 && sparsityEst < 0.2;
+                                       } else
+                                               return false;
+                               }
+                               case VECT_MIN_SCALAR: {
+                                       if(scalarVal != Double.NaN) {
+                                               return 
_inputs.get(1).getDataType().isScalar() ? scalarVal >= 0 && sparsityEst < 0.2
+                                                       : 
_inputs.get(0).getDataType().isScalar() && scalarVal >= 0 && sparsityEst < 0.2;
+                                       } else
+                                               return false;
+                               }
+                               case VECT_LESS_SCALAR: {
+                                       if(scalarVal != Double.NaN) {
+                                               return 
_inputs.get(1).getDataType().isScalar() ? scalarVal <= 0 && sparsityEst < 0.2
+                                                       : 
_inputs.get(0).getDataType().isScalar() && scalarVal > 0 && sparsityEst < 0.2;
+                                       } else
+                                               return false;
+                               }
+                               case VECT_LESSEQUAL_SCALAR: {
+                                       if(scalarVal != Double.NaN) {
+                                               return 
_inputs.get(1).getDataType().isScalar() ? scalarVal < 0 && sparsityEst < 0.2
+                                                       : 
_inputs.get(0).getDataType().isScalar() && scalarVal >= 0 && sparsityEst < 0.2;
+                                       } else
+                                               return false;
+                               }
+                               case VECT_MAX_SCALAR: {
+                                       if(scalarVal != Double.NaN) {
+                                               return 
_inputs.get(1).getDataType().isScalar() ? scalarVal <= 0 && sparsityEst < 0.2
+                                                       : 
_inputs.get(0).getDataType().isScalar() && scalarVal <= 0 && sparsityEst < 0.2;
+                                       } else
+                                               return false;
+                               }
+                               case VECT_POW_SCALAR:
+                               case VECT_EQUAL_SCALAR:{
+                                       if(scalarVal != Double.NaN) {
+                                               return 
_inputs.get(1).getDataType().isScalar() ? scalarVal != 0 && sparsityEst < 0.2
+                                                       : 
_inputs.get(0).getDataType().isScalar() && scalarVal != 0 && sparsityEst < 0.2;
+                                       } else
+                                               return false;
+                               }
+                               case VECT_NOTEQUAL_SCALAR:{
+                                       if(scalarVal != Double.NaN) {
+                                               return 
_inputs.get(1).getDataType().isScalar() ? scalarVal == 0 && sparsityEst < 0.2
+                                                       : 
_inputs.get(0).getDataType().isScalar() && scalarVal == 0 && sparsityEst < 0.2;
+                                       } else
+                                               return false;
+                               }
+                               default: return sparsityEst < 0.3;
+                       }
+               }
+       }
+
+       public boolean getOutputType(boolean scalarVector, boolean lsparseLhs, 
boolean lsparseRhs) {
+               switch(_type) {
+                       case VECT_POW_SCALAR: return !scalarVector && 
lsparseLhs;
+                       case VECT_MULT_SCALAR:
+                       case VECT_DIV_SCALAR:
+                       case VECT_XOR_SCALAR:
+                       case VECT_MIN_SCALAR:
+                       case VECT_MAX_SCALAR:
+                       case VECT_EQUAL_SCALAR:
+                       case VECT_NOTEQUAL_SCALAR:
+                       case VECT_LESS_SCALAR:
+                       case VECT_LESSEQUAL_SCALAR:
+                       case VECT_GREATER_SCALAR:
+                       case VECT_GREATEREQUAL_SCALAR:
+                       case VECT_BITWAND_SCALAR: return lsparseLhs || 
lsparseRhs;
+                       case VECT_MULT:
+                       case VECT_DIV:
+                       case VECT_MINUS:
+                       case VECT_PLUS:
+                       case VECT_XOR:
+                       case VECT_BITWAND:
+                       case VECT_BIASADD:
+                       case VECT_BIASMULT:
+                       case VECT_MIN:
+                       case VECT_MAX:
+                       case VECT_NOTEQUAL:
+                       case VECT_LESS:
+                       case VECT_GREATER: return lsparseLhs && lsparseRhs;
+                       default: return false;
+               }
+       }
        
        @Override
        public String toString() {
diff --git a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeNary.java 
b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeNary.java
index dcf18ec656..35c351546d 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeNary.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeNary.java
@@ -60,7 +60,10 @@ public class CNodeNary extends CNode
                                                        sb.append( sparseInput ?
                                                                "    
LibSpoofPrimitives.vectWrite("+varj+"vals, %TMP%, "
                                                                        
+varj+"ix, "+pos+", "+off+", "+input._cols+");\n" :
-                                                               "    
LibSpoofPrimitives.vectWrite("+(varj.startsWith("b")?varj+".values(rix)":varj)
+                                                               
varj.startsWith("STMP") ?
+                                                                       "    
LibSpoofPrimitives.vectWrite("+varj+".values(), %TMP%, "
+                                                                               
+varj+".indexes(), "+pos+", "+off+", "+varj+".size());\n" :
+                                                                       "    
LibSpoofPrimitives.vectWrite("+(varj.startsWith("b")?varj+".values(rix)":varj)
                                                                        +", 
%TMP%, "+pos+", "+off+", "+input._cols+");\n");
                                                        off += input._cols;     
                                                }
diff --git a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeRow.java 
b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeRow.java
index 77dec97cbe..c0d06b4bcb 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeRow.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeRow.java
@@ -37,6 +37,7 @@ public class CNodeRow extends CNodeTpl
                + "import 
org.apache.sysds.runtime.codegen.SpoofOperator.SideInput;\n"
                + "import org.apache.sysds.runtime.codegen.SpoofRowwise;\n"
                + "import 
org.apache.sysds.runtime.codegen.SpoofRowwise.RowType;\n"
+           + "import org.apache.sysds.runtime.data.SparseRowVector;\n"
                + "import org.apache.commons.math3.util.FastMath;\n"
                + "\n"
                + "public final class %TMP% extends SpoofRowwise { \n"
@@ -162,7 +163,8 @@ private static final String TEMPLATE_ROWAGG_OUT_CUDA  = 
"\t\tif(threadIdx.x == 0
                        case NO_AGG_B1:
                        case NO_AGG_CONST:
                                if(api == GeneratorAPI.JAVA)
-                                       return 
TEMPLATE_NOAGG_OUT.replace("%IN%", varName).replace("%LEN%", 
_output.getVarname()+".length");
+                                       return 
TEMPLATE_NOAGG_OUT.replace("%IN%", 
varName.startsWith("STMP")?varName+".values(), 
"+varName+".indexes()":varName).replace("%LEN%",
+                                               varName.startsWith("STMP") ? 
varName+".size()" : _output.getVarname()+".length");
                                else
                                        return 
TEMPLATE_NOAGG_CONST_OUT_CUDA.replace("%IN%", varName + 
".vals(0)").replaceAll("%LEN%", _output.getVarname()+".length");
                        case FULL_AGG:
diff --git 
a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeTernary.java 
b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeTernary.java
index 5e81109283..c6ff9802b1 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeTernary.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeTernary.java
@@ -82,10 +82,13 @@ public class CNodeTernary extends CNode
                        String varj = _inputs.get(j-1).getVarname();
                        //replace sparse and dense inputs
                        tmp = tmp.replace("%IN"+j+"v%", 
-                               varj+(varj.startsWith("a")?"vals":"") );
+                               varj+(varj.startsWith("a")?"vals" : 
varj.startsWith("STMP") ? ".values()" :"") );
                        tmp = tmp.replace("%IN"+j+"i%", 
-                               varj+(varj.startsWith("a")?"ix":"") );
+                               varj+(varj.startsWith("a")?"ix": 
varj.startsWith("STMP") ? ".indexes()" :"") );
                        tmp = tmp.replace("%IN"+j+"%", varj );
+                       tmp = tmp.replace("%POS%", varj.startsWith("a") ? 
varj+"i" : varj.startsWith("STMP") ? "0" : "");
+                       tmp = tmp.replace("%LEN%",
+                               varj.startsWith("a") ? "alen" : 
varj.startsWith("STMP") ? varj+".size()" : "");
                }
                sb.append(tmp);
                
diff --git a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeUnary.java 
b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeUnary.java
index fe67995b6b..ffbe0087f1 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeUnary.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/CNodeUnary.java
@@ -23,6 +23,7 @@ import java.util.Arrays;
 
 import org.apache.commons.lang3.ArrayUtils;
 import org.apache.commons.lang3.StringUtils;
+import org.apache.sysds.api.DMLScript;
 import org.apache.sysds.common.Opcodes;
 import org.apache.sysds.common.Types.DataType;
 import org.apache.sysds.runtime.util.UtilFunctions;
@@ -85,13 +86,21 @@ public class CNodeUnary extends CNode
        }
        
        private UnaryType _type;
-       
+       private boolean sparseTemplate;
+
        public CNodeUnary( CNode in1, UnaryType type ) {
                _inputs.add(in1);
                _type = type;
                setOutputDims();
        }
-       
+
+       public CNodeUnary( CNode in1, UnaryType type, double sparsity ) {
+               _inputs.add(in1);
+               _type = type;
+               setOutputDims();
+               sparseTemplate = getTemplateType(sparsity);
+       }
+
        public UnaryType getType() {
                return _type;
        }
@@ -111,11 +120,13 @@ public class CNodeUnary extends CNode
                sb.append(_inputs.get(0).codegen(sparse, api));
                
                //generate unary operation
-               boolean lsparse = sparse && (_inputs.get(0) instanceof CNodeData
-                       && _inputs.get(0).getVarname().startsWith("a")
-                       && !_inputs.get(0).isLiteral());
-               String var = createVarname();
-               String tmp = getLanguageTemplateClass(this, 
api).getTemplate(_type, lsparse);
+               boolean lsparse = sparse &&
+                       ((_inputs.get(0) instanceof CNodeData
+                               && _inputs.get(0).getVarname().startsWith("a")
+                               && !_inputs.get(0).isLiteral())
+                               || 
_inputs.get(0).getVarname().startsWith("STMP"));
+               String var = createVarname(sparseTemplate && lsparse && 
getOutputType());
+               String tmp = getLanguageTemplateClass(this, 
api).getTemplate(_type, lsparse, sparseTemplate);
                tmp = tmp.replaceAll("%TMP%", var);
                
                //replace sparse and dense inputs
@@ -130,6 +141,46 @@ public class CNodeUnary extends CNode
                
                return sb.toString();
        }
+
+       public boolean getTemplateType(double sparsity) {
+               if(!DMLScript.SPARSE_INTERMEDIATE)
+                       return false;
+               else {
+                       switch(_type) {
+                               case VECT_SQRT:
+                               case VECT_ABS:
+                               case VECT_ROUND:
+                               case VECT_CEIL:
+                               case VECT_FLOOR:
+                               case VECT_SIN:
+                               case VECT_TAN:
+                               case VECT_ASIN:
+                               case VECT_ATAN:
+                               case VECT_SINH:
+                               case VECT_TANH:
+                               case VECT_SIGN: return sparsity <= 0.3;
+                               default: return false;
+                       }
+               }
+       }
+
+       public boolean getOutputType() {
+               switch(_type) {
+                       case VECT_SQRT:
+                       case VECT_ABS:
+                       case VECT_ROUND:
+                       case VECT_CEIL:
+                       case VECT_FLOOR:
+                       case VECT_SIN:
+                       case VECT_TAN:
+                       case VECT_ASIN:
+                       case VECT_ATAN:
+                       case VECT_SINH:
+                       case VECT_TANH:
+                       case VECT_SIGN: return true;
+                       default: return false;
+               }
+       }
        
        @Override
        public String toString() {
diff --git 
a/src/main/java/org/apache/sysds/hops/codegen/cplan/CodeTemplate.java 
b/src/main/java/org/apache/sysds/hops/codegen/cplan/CodeTemplate.java
index e29594f525..1ca8b4a3c1 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/CodeTemplate.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/CodeTemplate.java
@@ -31,9 +31,17 @@ public abstract class CodeTemplate {
        public String getTemplate() {
                throw new RuntimeException("Calling wrong getTemplate method on 
" + getClass().getCanonicalName());
        }
-       
+
+       /**
+        * @param sparseTemplate added to turn SparseRowVector intermediates on 
and off
+        */
        public String getTemplate(CNodeBinary.BinType type, boolean sparseLhs, 
boolean sparseRhs, boolean scalarVector,
-               boolean scalarInput, boolean vectorVector) {
+               boolean scalarInput, boolean vectorVector, boolean 
sparseTemplate) {
+               throw new RuntimeException("Calling wrong getTemplate method on 
" + getClass().getCanonicalName());
+       }
+
+       public String getTemplate(CNodeBinary.BinType type, boolean sparseLhs, 
boolean sparseRhs,
+               boolean scalarVector, boolean scalarInput, boolean 
vectorVector) {
                throw new RuntimeException("Calling wrong getTemplate method on 
" + getClass().getCanonicalName());
        }
        
@@ -44,6 +52,10 @@ public abstract class CodeTemplate {
        public String getTemplate(CNodeUnary.UnaryType type, boolean sparse) {
                throw new RuntimeException("Calling wrong getTemplate method on 
" + getClass().getCanonicalName());
        }
+
+       public String getTemplate(CNodeUnary.UnaryType type, boolean sparse, 
boolean sparseTemplate) {
+               throw new RuntimeException("Calling wrong getTemplate method on 
" + getClass().getCanonicalName());
+       }
        
        public static String getTemplate(String templateFileName) {
                try {
@@ -68,5 +80,5 @@ public abstract class CodeTemplate {
                        return null;
                }
        }
-       
+
 }
diff --git a/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Binary.java 
b/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Binary.java
index 40496249e5..34c4ee9088 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Binary.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Binary.java
@@ -25,7 +25,7 @@ import org.apache.sysds.hops.codegen.cplan.CodeTemplate;
 public class Binary extends CodeTemplate {
 
        public String getTemplate(BinType type, boolean sparseLhs, boolean 
sparseRhs,
-               boolean scalarVector, boolean scalarInput, boolean vectorVector)
+               boolean scalarVector, boolean scalarInput, boolean 
vectorVector, boolean sparseTemplate)
        {
                switch (type) {
                        case ROWMAXS_VECTMULT:
@@ -68,13 +68,22 @@ public class Binary extends CodeTemplate {
                        }
 
                        //vector-scalar operations
+                       case VECT_POW_SCALAR: {
+                               String vectName = type.getVectorPrimitiveName();
+                               if( scalarVector )
+                                       return sparseRhs ? "    double[] %TMP% 
= LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2v%, %IN2i%, %POS2%, alen, 
%LEN%);\n" :
+                                               "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2%, %POS2%, %LEN%);\n";
+                               else if(sparseTemplate) {
+                                       return sparseLhs ? "    SparseRowVector 
%TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%LEN%, %IN1v%, %IN2%, %IN1i%, 
%POS1%, %SLEN1%);\n" :
+                                               "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2%, %POS1%, %LEN%);\n";
+                               } else {
+                                       return sparseLhs ? "    double[] %TMP% 
= LibSpoofPrimitives.vect"+vectName+"Write(%IN1v%, %IN2%, %IN1i%, %POS1%, alen, 
%LEN%);\n" :
+                                               "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2%, %POS1%, %LEN%);\n";
+                               }
+                       }
                        case VECT_MULT_SCALAR:
                        case VECT_DIV_SCALAR:
-                       case VECT_MINUS_SCALAR:
-                       case VECT_PLUS_SCALAR:
-                       case VECT_POW_SCALAR:
                        case VECT_XOR_SCALAR:
-                       case VECT_BITWAND_SCALAR:
                        case VECT_MIN_SCALAR:
                        case VECT_MAX_SCALAR:
                        case VECT_EQUAL_SCALAR:
@@ -82,7 +91,22 @@ public class Binary extends CodeTemplate {
                        case VECT_LESS_SCALAR:
                        case VECT_LESSEQUAL_SCALAR:
                        case VECT_GREATER_SCALAR:
-                       case VECT_GREATEREQUAL_SCALAR: {
+                       case VECT_GREATEREQUAL_SCALAR:
+                       case VECT_BITWAND_SCALAR: {
+                               String vectName = type.getVectorPrimitiveName();
+                               if(scalarVector) {
+                                       if(sparseRhs)
+                                               return sparseTemplate ? "    
SparseRowVector %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%LEN%, %IN1%, 
%IN2v%, %IN2i%, %POS2%, %SLEN1%);\n" :
+                                                       "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2v%, %IN2i%, %POS2%, alen, 
%LEN%);\n";
+                               } else {
+                                       if(sparseLhs)
+                                               return sparseTemplate ? "    
SparseRowVector %TMP% = LibSpoofPrimitives.vect"+vectName+"Write(%LEN%, %IN1v%, 
%IN2%, %IN1i%, %POS1%, %SLEN1%);\n" :
+                                                       "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1v%, %IN2%, %IN1i%, %POS1%, alen, 
%LEN%);\n";
+                               }
+                               return  "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2%, %POS1%, %LEN%);\n";
+                       }
+                       case VECT_MINUS_SCALAR:
+                       case VECT_PLUS_SCALAR: {
                                String vectName = type.getVectorPrimitiveName();
                                if( scalarVector )
                                        return sparseRhs ? "    double[] %TMP% 
= LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2v%, %IN2i%, %POS2%, alen, 
%LEN%);\n" :
@@ -115,20 +139,34 @@ public class Binary extends CodeTemplate {
                        case VECT_BIASMULT:
                        case VECT_MIN:
                        case VECT_MAX:
-                       case VECT_EQUAL:
                        case VECT_NOTEQUAL:
                        case VECT_LESS:
+                       case VECT_GREATER:{
+                               String vectName = type.getVectorPrimitiveName();
+                               if(sparseTemplate && sparseLhs && sparseRhs) {
+                                       return "    SparseRowVector %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%LEN%, %IN1v%, %IN2v%, %IN1i%, %IN2i%, 
%POS1%, %POS2%, %SLEN1%, %SLEN2%);\n";
+                               } else {
+                                       return sparseLhs ?
+                                               "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1v%, %IN2%, %IN1i%, %POS1%, %POS2%, 
alen, %LEN%);\n" :
+                                               sparseRhs ?
+                                                       "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2v%, %POS1%, %IN2i%, %POS2%, 
alen, %LEN%);\n" :
+                                                       "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2%, %POS1%, %POS2%, 
%LEN%);\n";
+                               }
+                       }
+                       case VECT_EQUAL:
                        case VECT_LESSEQUAL:
-                       case VECT_GREATER:
                        case VECT_GREATEREQUAL: {
                                String vectName = type.getVectorPrimitiveName();
-                               return sparseLhs ?
+                               if(sparseTemplate && sparseLhs && sparseRhs) {
+                                       return "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%LEN%, %IN1v%, %IN2v%, %IN1i%, %IN2i%, 
%POS1%, %POS2%, %SLEN1%, %SLEN2%);\n";
+                               } else {
+                                       return sparseLhs ?
                                                "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1v%, %IN2%, %IN1i%, %POS1%, %POS2%, 
alen, %LEN%);\n" :
                                                sparseRhs ?
-                                               "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2v%, %POS1%, %IN2i%, %POS2%, 
alen, %LEN%);\n" :
-                                               "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2%, %POS1%, %POS2%, 
%LEN%);\n";
+                                                       "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2v%, %POS1%, %IN2i%, %POS2%, 
alen, %LEN%);\n" :
+                                                       "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %IN2%, %POS1%, %POS2%, 
%LEN%);\n";
+                               }
                        }
-
                        //scalar-scalar operations
                        case MULT:
                                return "    double %TMP% = %IN1% * %IN2%;\n";
diff --git 
a/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Ternary.java 
b/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Ternary.java
index a86d51cca8..64d282bbb8 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Ternary.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Ternary.java
@@ -51,7 +51,7 @@ public class Ternary extends CodeTemplate {
 
                        case LOOKUP_RC1:
                                return sparse ?
-                                       "    double %TMP% = getValue(%IN1v%, 
%IN1i%, ai, alen, %IN3%-1);\n" :
+                                       "    double %TMP% = getValue(%IN1v%, 
%IN1i%, %POS%, %LEN%, %IN3%-1);\n" :
                                        "    double %TMP% = getValue(%IN1%, 
%IN2%, rix, %IN3%-1);\n";
 
                        case LOOKUP_RVECT1:
diff --git a/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Unary.java 
b/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Unary.java
index d8a1085df5..ef256223b9 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Unary.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/cplan/java/Unary.java
@@ -25,7 +25,7 @@ import org.apache.sysds.hops.codegen.cplan.CodeTemplate;
 
 public class Unary extends CodeTemplate {
        @Override
-       public String getTemplate(UnaryType type, boolean sparse) {
+       public String getTemplate(UnaryType type, boolean sparse, boolean 
sparseTemplate) {
                switch( type ) {
                        case ROW_SUMS:
                        case ROW_SUMSQS:
@@ -38,25 +38,32 @@ public class Unary extends CodeTemplate {
                                return sparse ? "    double %TMP% = 
LibSpoofPrimitives.vect"+vectName+"(%IN1v%, %IN1i%, %POS1%, alen, len);\n":
                                                "    double %TMP% = 
LibSpoofPrimitives.vect"+vectName+"(%IN1%, %POS1%, %LEN%);\n";
                        }
-                       case VECT_EXP:
-                       case VECT_POW2:
-                       case VECT_MULT2:
+
                        case VECT_SQRT:
-                       case VECT_LOG:
                        case VECT_ABS:
                        case VECT_ROUND:
                        case VECT_CEIL:
                        case VECT_FLOOR:
-                       case VECT_SIGN:
                        case VECT_SIN:
-                       case VECT_COS:
                        case VECT_TAN:
                        case VECT_ASIN:
-                       case VECT_ACOS:
                        case VECT_ATAN:
                        case VECT_SINH:
-                       case VECT_COSH:
                        case VECT_TANH:
+                       case VECT_SIGN:{
+                               String vectName = type.getVectorPrimitiveName();
+                               return sparse ? sparseTemplate ?
+                                       "    SparseRowVector %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(len, %IN1v%, %IN1i%, %POS1%, alen);\n" 
:
+                                       "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1v%, %IN1i%, %POS1%, alen, len);\n" 
:
+                                       "    double[] %TMP% = 
LibSpoofPrimitives.vect"+vectName+"Write(%IN1%, %POS1%, %LEN%);\n";
+                       }
+                       case VECT_EXP:
+                       case VECT_POW2:
+                       case VECT_MULT2:
+                       case VECT_LOG:
+                       case VECT_COS:
+                       case VECT_ACOS:
+                       case VECT_COSH:
                        case VECT_CUMSUM:
                        case VECT_CUMMIN:
                        case VECT_CUMMAX:
diff --git 
a/src/main/java/org/apache/sysds/hops/codegen/template/TemplateRow.java 
b/src/main/java/org/apache/sysds/hops/codegen/template/TemplateRow.java
index 955bf778b8..8efb0245e9 100644
--- a/src/main/java/org/apache/sysds/hops/codegen/template/TemplateRow.java
+++ b/src/main/java/org/apache/sysds/hops/codegen/template/TemplateRow.java
@@ -37,6 +37,7 @@ import org.apache.sysds.hops.NaryOp;
 import org.apache.sysds.hops.ParameterizedBuiltinOp;
 import org.apache.sysds.hops.TernaryOp;
 import org.apache.sysds.hops.UnaryOp;
+import org.apache.sysds.hops.OptimizerUtils;
 import org.apache.sysds.hops.codegen.cplan.CNode;
 import org.apache.sysds.hops.codegen.cplan.CNodeBinary;
 import org.apache.sysds.hops.codegen.cplan.CNodeData;
@@ -391,7 +392,7 @@ public class TemplateRow extends TemplateBase
                        {
                                if( HopRewriteUtils.isUnary(hop, 
SUPPORTED_VECT_UNARY) ) {
                                        String opname = 
"VECT_"+((UnaryOp)hop).getOp().name();
-                                       out = new CNodeUnary(cdata1, 
UnaryType.valueOf(opname));
+                                       out = new CNodeUnary(cdata1, 
UnaryType.valueOf(opname), hop.getInput(0).getSparsity());
                                        if( cdata1 instanceof CNodeData && 
!inHops2.containsKey("X") )
                                                inHops2.put("X", 
hop.getInput().get(0));
                                }
@@ -403,7 +404,7 @@ public class TemplateRow extends TemplateBase
                        {
                                cdata1 = 
TemplateUtils.wrapLookupIfNecessary(cdata1, hop.getInput().get(0));
                                String primitiveOpName = 
((UnaryOp)hop).getOp().name();
-                               out = new CNodeUnary(cdata1, 
UnaryType.valueOf(primitiveOpName));
+                               out = new CNodeUnary(cdata1, 
UnaryType.valueOf(primitiveOpName), hop.getInput(0).getSparsity());
                        }
                }
                else if(HopRewriteUtils.isBinary(hop, OpOp2.CBIND)) {
@@ -440,7 +441,14 @@ public class TemplateRow extends TemplateBase
                                                cdata1 = new CNodeUnary(cdata1, 
UnaryType.LOOKUP_R);
                                        if( TemplateUtils.isColVector(cdata2) )
                                                cdata2 = new CNodeUnary(cdata2, 
UnaryType.LOOKUP_R);
-                                       out = getVectorBinary(cdata1, cdata2, 
((BinaryOp)hop).getOp().name());
+                                       String opName = 
((BinaryOp)hop).getOp().name();
+                                       Hop hopIn1 = hop.getInput(0);
+                                       Hop hopIn2 = hop.getInput(1);
+                                       double sparsityEst = 
OptimizerUtils.getBinaryOpSparsity(
+                                               hopIn1.getSparsity(), 
hopIn2.getSparsity(), OpOp2.valueOf(opName), false);
+                                       double literalVal = hopIn1 instanceof 
LiteralOp ? ((LiteralOp) hopIn1).getDoubleValue()
+                                               : hopIn2 instanceof LiteralOp ? 
((LiteralOp) hopIn2).getDoubleValue() : Double.NaN;
+                                       out = getVectorBinary(cdata1, cdata2, 
opName, sparsityEst, literalVal);
                                        if( cdata1 instanceof CNodeData && 
!inHops2.containsKey("X")
                                                && 
!(cdata1.getDataType()==DataType.SCALAR) ) {
                                                inHops2.put("X", 
hop.getInput().get(0));
@@ -569,7 +577,17 @@ public class TemplateRow extends TemplateBase
                        return new CNodeBinary(cdata1, cdata2, 
BinType.valueOf("VECT_"+name+"_SCALAR"));
                }
        }
-       
+
+       private static CNodeBinary getVectorBinary(CNode cdata1, CNode cdata2, 
String name, double sparsity, double literalVal) {
+               if( TemplateUtils.isMatrix(cdata1) && 
(TemplateUtils.isMatrix(cdata2)
+                       || TemplateUtils.isRowVector(cdata2)) ) {
+                       return new CNodeBinary(cdata1, cdata2, 
BinType.valueOf("VECT_"+name), sparsity, literalVal);
+               }
+               else {
+                       return new CNodeBinary(cdata1, cdata2, 
BinType.valueOf("VECT_"+name+"_SCALAR"), sparsity, literalVal);
+               }
+       }
+
        /**
         * Comparator to order input hops of the row aggregate template. We try 
         * to order matrices-vectors-scalars via sorting by number of cells but 
diff --git 
a/src/main/java/org/apache/sysds/runtime/codegen/LibSpoofPrimitives.java 
b/src/main/java/org/apache/sysds/runtime/codegen/LibSpoofPrimitives.java
index 6c0dc395c3..bc6ba19895 100644
--- a/src/main/java/org/apache/sysds/runtime/codegen/LibSpoofPrimitives.java
+++ b/src/main/java/org/apache/sysds/runtime/codegen/LibSpoofPrimitives.java
@@ -23,6 +23,7 @@ import java.util.Arrays;
 
 import org.apache.commons.math3.util.FastMath;
 import org.apache.sysds.runtime.data.DenseBlockFP64;
+import org.apache.sysds.runtime.data.SparseRowVector;
 import org.apache.sysds.runtime.functionobjects.BitwAnd;
 import org.apache.sysds.runtime.functionobjects.IntegerDivide;
 import org.apache.sysds.runtime.functionobjects.Modulus;
@@ -51,6 +52,10 @@ public class LibSpoofPrimitives
                @Override protected VectorBuffer initialValue() { return new 
VectorBuffer(0,0,0); }
        };
 
+       private static ThreadLocal<SparseVectorBuffer> sparseMemPool = new 
ThreadLocal<>() {
+               @Override protected SparseVectorBuffer initialValue() { return 
new SparseVectorBuffer(0,0,0); }
+       };
+
        public static double rowMaxsVectMult(double[] a, double[] b, int ai, 
int bi, int len) {
                double val = Double.NEGATIVE_INFINITY;
                int j=0;
@@ -2164,6 +2169,1155 @@ public class LibSpoofPrimitives
                return (vectSum(avalsSqr, 0, len)-len*meanVal)/(len-1);
        }
 
+       /**
+        * Vector primitives with SparseRowVector intermediates
+        * Changes:
+        *      - Changed method signature to avoid method duplicate conflicts
+        *              e.g. (double[], double, int[], int, int, int) --> (int, 
double[], double, int[], int, int)
+        *  - Added blen for vector - vector calculations to be able to use 
both vectors as SparseRowVectors
+        *  - Implemented a new SparseVectorBuffer class that creates a ring 
buffer for SparseRowVectors in different sizes
+        */
+
+       public static SparseRowVector vectMultWrite(int len, double[] a, double 
bval, int[] aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               if( a == null ) return c;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for(int j = 0; j < alen; j++) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = a[ai+j]*bval;
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       public static SparseRowVector vectMultWrite(int len, double bval, 
double[] a, int[] aix, int ai, int alen) {
+               return vectMultWrite(len, a, bval, aix, ai, alen);
+       }
+
+       //old version with branching (not used)
+       public static SparseRowVector vectMultWriteB(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               SparseRowVector c = allocSparseVector(Math.min(alen, blen));
+               if( a == null || b == null ) return c;
+               int aItr = ai;
+               int bItr = bi;
+               int index = 0;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               while(aItr < ai+alen && bItr < bi+blen) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       if(aIdx == bIdx) {
+                               indexes[index] = aIdx;
+                               values[index] = a[aItr] * b[bItr];
+                               aItr++;
+                               bItr++;
+                               index++;
+                       } else if(aIdx < bIdx)
+                               aItr++;
+                       else
+                               bItr++;
+               }
+               c.setSize(index);
+               return c;
+       }
+
+       //version without branching
+       public static SparseRowVector vectMultWrite(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               SparseRowVector c = allocSparseVector(Math.min(alen, blen));
+               int index = 0;
+               int aItr = ai;
+               int bItr = bi;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               while(aItr < ai+alen && bItr < bi+blen) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       indexes[index] = aIdx;
+                       values[index] = a[aItr] * b[bItr];
+                       index += aIdx == bIdx ? 1 : 0;
+                       aItr += aIdx <= bIdx ? 1 : 0;
+                       bItr += aIdx >= bIdx ? 1 : 0;
+               }
+               c.setSize(index);
+               return c;
+       }
+
+       public static void vectWrite(double[] a, int[] aix, double[] c, int ci, 
int len) {
+               if( a == null ) return;
+               for(int j = 0; j < len; j++)
+                       c[ci+aix[j]] = a[j];
+       }
+
+       public static void vectWrite(double[] a, double[] c, int[] aix, int ai, 
int ci, int alen) {
+               if( a == null ) return;
+               for(int j = 0; j < alen; j++)
+                       c[ci+aix[ai+j]] = a[ai+j];
+       }
+
+       public static SparseRowVector vectDivWrite(int len, double[] a, double 
bval, int[] aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for( int j = 0; j < alen; j++ ) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = a[ai+j] / bval;
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       public static SparseRowVector vectDivWrite(int len, double bval, 
double[] a, int[] aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for(int j = 0; j < alen; j++) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = bval / a[ai+j];
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       //old version with branching (not used)
+       public static SparseRowVector vectDivWriteB(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int aItr = ai;
+               int bItr = bi;
+               int index = 0;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               while(aItr < ai+alen && bItr < bi+blen) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       if(aIdx == bIdx) {
+                               indexes[index] = aIdx;
+                               values[index] = a[aItr] / b[bItr];
+                               aItr++;
+                               bItr++;
+                               index++;
+                       } else if(aIdx < bIdx) {
+                               indexes[index] = aIdx;
+                               values[index] = (a[aItr]>0) ? 
Double.POSITIVE_INFINITY : Double.NEGATIVE_INFINITY;
+                               aItr++;
+                               index++;
+                       } else {
+                               bItr++;
+                       }
+               }
+               c.setSize(index);
+               return c;
+       }
+
+       //version without branching
+       public static SparseRowVector vectDivWrite(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               SparseRowVector c = allocSparseVector(Math.min(alen, blen));
+               int index = 0;
+               int aItr = ai;
+               int bItr = bi;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               while(aItr < ai+alen && bItr < bi+blen) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       indexes[index] = aIdx;
+                       values[index] = a[aItr] / b[bItr];
+                       index += aIdx == bIdx ? 1 : 0;
+                       aItr += aIdx <= bIdx ? 1 : 0;
+                       bItr += aIdx >= bIdx ? 1 : 0;
+               }
+               c.setSize(index);
+               return c;
+       }
+
+       public static SparseRowVector vectMinusWriteB(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               SparseRowVector c = allocSparseVector(alen+blen);
+               int aItr = ai;
+               int bItr = bi;
+               int index = 0;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               while(aItr < ai+alen && bItr < bi+blen) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       if(aIdx == bIdx) {
+                               indexes[index] = aIdx;
+                               values[index] = a[aItr] - b[bItr];
+                               aItr++;
+                               bItr++;
+                               index++;
+                       } else if(aIdx < bIdx) {
+                               indexes[index] = aIdx;
+                               values[index] = a[aItr];
+                               aItr++;
+                               index++;
+                       } else {
+                               indexes[index] = bIdx;
+                               values[index] = -b[bItr];
+                               bItr++;
+                               index++;
+                       }
+               }
+               for (; aItr < ai+alen; aItr++) {
+                       indexes[index] = aix[aItr];
+                       values[index] = a[aItr];
+                       index++;
+               }
+               for (; bItr < bi+blen; bItr++) {
+                       indexes[index] = bix[bItr];
+                       values[index] = -b[bItr];
+                       index++;
+               }
+               c.setSize(index);
+               return c;
+       }
+
+       public static SparseRowVector vectMinusWrite(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               SparseRowVector c = allocSparseVector(alen+blen);
+               int aEnd = ai+alen;
+               int bEnd = bi+blen;
+               int aItr = ai;
+               int bItr = bi;
+               int index = 0;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               while (aItr < aEnd && bItr < bEnd) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       int useA = (aIdx <= bIdx) ? 1 : 0;
+                       int useB = (aIdx >= bIdx) ? 1 : 0;
+                       indexes[index] = (useA == 1) ? aIdx : bIdx;
+                       double av = (useA == 1) ? a[aItr] : 0.0;
+                       double bv = (useB == 1) ? b[bItr] : 0.0;
+
+                       values[index] = av - bv;
+                       aItr += useA;
+                       bItr += useB;
+                       index++;
+               }
+               for (; aItr < aEnd; aItr++, index++) {
+                       indexes[index] = aix[aItr];
+                       values[index] = a[aItr];
+               }
+               for (; bItr < bEnd; bItr++, index++) {
+                       indexes[index] = bix[bItr];
+                       values[index] = -b[bItr];
+               }
+               c.setSize(index);
+               return c;
+       }
+
+       public static SparseRowVector vectPlusWrite(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               SparseRowVector c = allocSparseVector(alen+blen);
+               int aEnd = ai+alen;
+               int bEnd = bi+blen;
+               int aItr = ai;
+               int bItr = bi;
+               int index = 0;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               while (aItr < aEnd && bItr < bEnd) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       int useA = (aIdx <= bIdx) ? 1 : 0;
+                       int useB = (aIdx >= bIdx) ? 1 : 0;
+                       indexes[index] = (useA == 1) ? aIdx : bIdx;
+                       double av = (useA == 1) ? a[aItr] : 0.0;
+                       double bv = (useB == 1) ? b[bItr] : 0.0;
+
+                       values[index] = av + bv;
+                       aItr += useA;
+                       bItr += useB;
+                       index++;
+               }
+               for (; aItr < aEnd; aItr++) {
+                       indexes[index] = aix[aItr];
+                       values[index] = a[aItr];
+                       index++;
+               }
+               for (; bItr < bEnd; bItr++) {
+                       indexes[index] = bix[bItr];
+                       values[index] = b[bItr];
+                       index++;
+               }
+               c.setSize(index);
+               return c;
+       }
+
+       public static SparseRowVector vectXorWrite(int len, double[] a, double 
bval, int[] aix, int ai, int alen) {
+               if(bval != 0) {
+                       SparseRowVector c = allocSparseVector(len);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       int index = 0;
+                       int aItr = 0;
+                       while(aItr < ai+alen && index < len) {
+                               indexes[index] = index;
+                               if(aix[aItr] == index) {
+                                       values[index] = !(a[aItr] != 0) ? 1 : 0;
+                                       aItr++;
+                               } else {
+                                       values[index] = 1;
+                               }
+                               index++;
+                       }
+                       for(; index < len; index++) {
+                               indexes[index] = index;
+                               values[index] = 1;
+                       }
+                       c.setSize(len);
+                       return c;
+               } else {
+                       SparseRowVector c = allocSparseVector(alen);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       for(int j = 0; j < alen; j++) {
+                               indexes[j] = aix[ai+j];
+                               values[j] = (a[ai+j] != 0) ? 1 : 0;
+                       }
+                       c.setSize(alen);
+                       return c;
+               }
+       }
+
+       public static SparseRowVector vectXorWrite(int len, double bval, 
double[] a, int[] aix, int ai, int alen) {
+               return vectXorWrite(len, a, bval, aix, ai, alen);
+       }
+
+       public static SparseRowVector vectXorWrite(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               SparseRowVector c = allocSparseVector(alen+blen);
+               int aEnd = ai+alen;
+               int bEnd = bi+blen;
+               int aItr = ai;
+               int bItr = bi;
+               int index = 0;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               while (aItr < aEnd && bItr < bEnd) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       int useA = (aIdx <= bIdx) ? 1 : 0;
+                       int useB = (aIdx >= bIdx) ? 1 : 0;
+                       indexes[index] = (useA == 1) ? aIdx : bIdx;
+                       double av = (useA == 1) ? a[aItr] : 0.0;
+                       double bv = (useB == 1) ? b[bItr] : 0.0;
+
+                       values[index] = ((av != 0) != (bv != 0)) ? 1 : 0;
+                       aItr += useA;
+                       bItr += useB;
+                       index++;
+               }
+               for (; aItr < aEnd; aItr++) {
+                       indexes[index] = aix[aItr];
+                       values[index] = (a[aItr] != 0) ? 1 : 0;
+                       index++;
+               }
+               for (; bItr < bEnd; bItr++) {
+                       indexes[index] = bix[bItr];
+                       values[index] = (b[bItr] != 0) ? 1 : 0;
+                       index++;
+               }
+               c.setSize(index);
+               return c;
+       }
+
+       public static SparseRowVector vectPowWrite(int len, double[] a, double 
bval, int[] aix, int ai, int alen) {
+               if(bval == 0) {
+                       SparseRowVector c = allocSparseVector(len);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       int index = 0;
+                       int aItr = 0;
+                       while(aItr < ai+alen && index < len) {
+                               indexes[index] = index;
+                               if(aix[aItr] == index) {
+                                       values[index] = Math.pow(a[aItr], bval) 
- 1;
+                                       aItr++;
+                               } else {
+                                       values[index] = 1;
+                               }
+                               index++;
+                       }
+                       for(; index < len; index++) {
+                               indexes[index] = index;
+                               values[index] = 1;
+                       }
+                       c.setSize(len);
+                       return c;
+               } else {
+                       SparseRowVector c = allocSparseVector(alen);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       for(int j = 0; j < alen; j++) {
+                               indexes[j] = aix[ai+j];
+                               values[j] = Math.pow(a[ai+j], bval);
+                       }
+                       c.setSize(alen);
+                       return c;
+               }
+       }
+
+       public static SparseRowVector vectMinWrite(int len, double[] a, double 
bval, int[] aix, int ai, int alen) {
+               if(bval < 0) {
+                       SparseRowVector c = allocSparseVector(len);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       int index = 0;
+                       int aItr = 0;
+                       while(aItr < ai+alen && index < len) {
+                               indexes[index] = index;
+                               if(aix[aItr] == index) {
+                                       values[index] = Math.min(a[aItr], bval);
+                                       aItr++;
+                               } else {
+                                       values[index] = bval;
+                               }
+                               index++;
+                       }
+                       for(; index < len; index++) {
+                               indexes[index] = index;
+                               values[index] = bval;
+                       }
+                       c.setSize(len);
+                       return c;
+               } else {
+                       SparseRowVector c = allocSparseVector(alen);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       for(int j = 0; j < alen; j++) {
+                               indexes[j] = aix[ai+j];
+                               values[j] = Math.min(a[ai+j], bval);
+                       }
+                       c.setSize(alen);
+                       return c;
+               }
+       }
+
+       public static SparseRowVector vectMinWrite(int len, double bval, 
double[] a, int[] aix, int ai, int alen) {
+               return vectMinWrite(len, a, bval, aix, ai, alen);
+       }
+
+       public static SparseRowVector vectMinWrite(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               SparseRowVector c = allocSparseVector(alen+blen);
+               int aEnd = ai+alen;
+               int bEnd = bi+blen;
+               int aItr = ai;
+               int bItr = bi;
+               int index = 0;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               while (aItr < aEnd && bItr < bEnd) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       int useA = (aIdx <= bIdx) ? 1 : 0;
+                       int useB = (aIdx >= bIdx) ? 1 : 0;
+                       indexes[index] = (useA == 1) ? aIdx : bIdx;
+                       double av = (useA == 1) ? a[aItr] : 0.0;
+                       double bv = (useB == 1) ? b[bItr] : 0.0;
+
+                       values[index] = Math.min(av, bv);
+                       aItr += useA;
+                       bItr += useB;
+                       index++;
+               }
+               for (; aItr < aEnd; aItr++) {
+                       indexes[index] = aix[aItr];
+                       values[index] = Math.min(a[aItr], 0);
+                       index++;
+               }
+               for (; bItr < bEnd; bItr++) {
+                       indexes[index] = bix[bItr];
+                       values[index] = Math.min(b[bItr], 0);
+                       index++;
+               }
+               c.setSize(index);
+               return c;
+       }
+
+       public static SparseRowVector vectMaxWrite(int len, double[] a, double 
bval, int[] aix, int ai, int alen) {
+               if(bval > 0) {
+                       SparseRowVector c = allocSparseVector(len);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       int index = 0;
+                       int aItr = 0;
+                       while(aItr < ai+alen && index < len) {
+                               indexes[index] = index;
+                               if(aix[aItr] == index) {
+                                       values[index] = Math.max(a[aItr], bval);
+                                       aItr++;
+                               } else {
+                                       values[index] = bval;
+                               }
+                               index++;
+                       }
+                       for(; index < len; index++) {
+                               indexes[index] = index;
+                               values[index] = bval;
+                       }
+                       c.setSize(len);
+                       return c;
+               } else {
+                       SparseRowVector c = allocSparseVector(alen);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       for(int j = 0; j < alen; j++) {
+                               indexes[j] = aix[ai+j];
+                               values[j] = Math.max(a[ai+j], bval);
+                       }
+                       c.setSize(alen);
+                       return c;
+               }
+       }
+
+       public static SparseRowVector vectMaxWrite(int len, double bval, 
double[] a, int[] aix, int ai, int alen) {
+               return vectMaxWrite(len, a, bval, aix, ai, alen);
+       }
+
+       public static SparseRowVector vectMaxWrite(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               SparseRowVector c = allocSparseVector(alen+blen);
+               int aEnd = ai+alen;
+               int bEnd = bi+blen;
+               int aItr = ai;
+               int bItr = bi;
+               int index = 0;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               while (aItr < aEnd && bItr < bEnd) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       int useA = (aIdx <= bIdx) ? 1 : 0;
+                       int useB = (aIdx >= bIdx) ? 1 : 0;
+                       indexes[index] = (useA == 1) ? aIdx : bIdx;
+                       double av = (useA == 1) ? a[aItr] : 0.0;
+                       double bv = (useB == 1) ? b[bItr] : 0.0;
+
+                       values[index] = Math.max(av, bv);
+                       aItr += useA;
+                       bItr += useB;
+                       index++;
+               }
+               for (; aItr < aEnd; aItr++) {
+                       indexes[index] = aix[aItr];
+                       values[index] = Math.max(a[aItr], 0);
+                       index++;
+               }
+               for (; bItr < bEnd; bItr++) {
+                       indexes[index] = bix[bItr];
+                       values[index] = Math.max(b[bItr], 0);
+                       index++;
+               }
+               c.setSize(index);
+               return c;
+       }
+
+       public static SparseRowVector vectEqualWrite(int len, double[] a, 
double bval, int[] aix, int ai, int alen) {
+               if(bval == 0) {
+                       SparseRowVector c = allocSparseVector(len);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       int index = 0;
+                       int aItr = 0;
+                       while(aItr < ai+alen && index < len) {
+                               indexes[index] = index;
+                               if(aix[aItr] == index) {
+                                       values[index] = a[aItr] == bval ? 1 : 0;
+                                       aItr++;
+                               } else {
+                                       values[index] = 1;
+                               }
+                               index++;
+                       }
+                       for(; index < len; index++) {
+                               indexes[index] = index;
+                               values[index] = 1;
+                       }
+                       c.setSize(len);
+                       return c;
+               } else {
+                       SparseRowVector c = allocSparseVector(alen);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       for(int j = 0; j < alen; j++) {
+                               indexes[j] = aix[ai+j];
+                               values[j] = a[ai+j] == bval ? 1 : 0;
+                       }
+                       c.setSize(alen);
+                       return c;
+               }
+       }
+
+       public static SparseRowVector vectEqualWrite(int len, double bval, 
double[] a, int[] aix, int ai, int alen) {
+               return vectEqualWrite(len, a, bval, aix, ai, alen);
+       }
+
+       //doesn't return SparseRowVector, but still uses two sparse vectors as 
inputs
+       public static double[] vectEqualWrite(int len, double[] a, double[] b, 
int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               double[] c = allocVector(len, true, 1);
+               int aEnd = ai+alen;
+               int bEnd = bi+blen;
+               int aItr = ai;
+               int bItr = bi;
+               while(aItr < aEnd && bItr < bEnd) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       int useA = (aIdx <= bIdx) ? 1 : 0;
+                       int useB = (aIdx >= bIdx) ? 1 : 0;
+                       int index = (useA == 1) ? aIdx : bIdx;
+                       double av = (useA == 1) ? a[aItr] : 0.0;
+                       double bv = (useB == 1) ? b[bItr] : 0.0;
+
+                       c[index] = av == bv ? 1 : 0;
+                       aItr += useA;
+                       bItr += useB;
+               }
+               for (; aItr < aEnd; aItr++) c[aix[aItr]] = 0;
+               for (; bItr < bEnd; bItr++)  c[bix[bItr]] = 0;
+               return c;
+       }
+
+       public static SparseRowVector vectNotequalWrite(int len, double[] a, 
double bval, int[] aix, int ai, int alen) {
+               if(bval != 0) {
+                       SparseRowVector c = allocSparseVector(len);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       int index = 0;
+                       int aItr = 0;
+                       while(aItr < ai+alen && index < len) {
+                               indexes[index] = index;
+                               if(aix[aItr] == index) {
+                                       values[index] = a[aItr] != bval ? 1 : 0;
+                                       aItr++;
+                               } else {
+                                       values[index] = 1;
+                               }
+                               index++;
+                       }
+                       for(; index < len; index++) {
+                               indexes[index] = index;
+                               values[index] = 1;
+                       }
+                       c.setSize(len);
+                       return c;
+               } else {
+                       SparseRowVector c = allocSparseVector(alen);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       for(int j = 0; j < alen; j++) {
+                               indexes[j] = aix[ai+j];
+                               values[j] = a[ai+j] != bval ? 1 : 0;
+                       }
+                       c.setSize(alen);
+                       return c;
+               }
+       }
+
+       public static SparseRowVector vectNotequalWrite(int len, double bval, 
double[] a, int[] aix, int ai, int alen) {
+               return vectNotequalWrite(len, a, bval, aix, ai, alen);
+       }
+
+       public static SparseRowVector vectNotequalWrite(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               SparseRowVector c = allocSparseVector(alen+blen);
+               int aEnd = ai+alen;
+               int bEnd = bi+blen;
+               int aItr = ai;
+               int bItr = bi;
+               int index = 0;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               while (aItr < aEnd && bItr < bEnd) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       int useA = (aIdx <= bIdx) ? 1 : 0;
+                       int useB = (aIdx >= bIdx) ? 1 : 0;
+                       indexes[index] = (useA == 1) ? aIdx : bIdx;
+                       double av = (useA == 1) ? a[aItr] : 0.0;
+                       double bv = (useB == 1) ? b[bItr] : 0.0;
+
+                       values[index] = (av != bv) ? 1 : 0;
+                       aItr += useA;
+                       bItr += useB;
+                       index++;
+               }
+               for (; aItr < aEnd; aItr++) {
+                       indexes[index] = aix[aItr];
+                       values[index] = a[aItr] != 0 ? 1 : 0;
+                       index++;
+               }
+               for (; bItr < bEnd; bItr++) {
+                       indexes[index] = bix[bItr];
+                       values[index] = b[bItr] != 0 ? 1 : 0;
+                       index++;
+               }
+               c.setSize(index);
+               return c;
+       }
+
+       public static SparseRowVector vectLessWrite(int len, double[] a, double 
bval, int[] aix, int ai, int alen) {
+               if(bval > 0) {
+                       SparseRowVector c = allocSparseVector(len);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       int index = 0;
+                       int aItr = 0;
+                       while(aItr < ai+alen && index < len) {
+                               indexes[index] = index;
+                               if(aix[aItr] == index) {
+                                       values[index] = a[aItr] < bval ? 1 : 0;
+                                       aItr++;
+                               } else {
+                                       values[index] = 1;
+                               }
+                               index++;
+                       }
+                       for(; index < len; index++) {
+                               indexes[index] = index;
+                               values[index] = 1;
+                       }
+                       c.setSize(len);
+                       return c;
+               } else {
+                       SparseRowVector c = allocSparseVector(alen);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       for(int j = 0; j < alen; j++) {
+                               indexes[j] = aix[ai+j];
+                               values[j] = a[ai+j] < bval ? 1 : 0;
+                       }
+                       c.setSize(alen);
+                       return c;
+               }
+       }
+
+       public static SparseRowVector vectLessWrite(int len, double bval, 
double[] a, int[] aix, int ai, int alen) {
+               return vectGreaterequalWrite(len, a, bval, aix, ai, alen);
+       }
+
+       public static SparseRowVector vectLessWrite(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               SparseRowVector c = allocSparseVector(alen+blen);
+               int aEnd = ai+alen;
+               int bEnd = bi+blen;
+               int aItr = ai;
+               int bItr = bi;
+               int index = 0;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               while (aItr < aEnd && bItr < bEnd) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       int useA = (aIdx <= bIdx) ? 1 : 0;
+                       int useB = (aIdx >= bIdx) ? 1 : 0;
+                       indexes[index] = (useA == 1) ? aIdx : bIdx;
+                       double av = (useA == 1) ? a[aItr] : 0.0;
+                       double bv = (useB == 1) ? b[bItr] : 0.0;
+
+                       values[index] = av < bv ? 1 : 0;
+                       aItr += useA;
+                       bItr += useB;
+                       index++;
+               }
+               for (; aItr < aEnd; aItr++) {
+                       indexes[index] = aix[aItr];
+                       values[index] = a[aItr] < 0? 1 : 0;
+                       index++;
+               }
+               for (; bItr < bEnd; bItr++) {
+                       indexes[index] = bix[bItr];
+                       values[index] = 0 < b[bItr] ? 1 : 0;
+                       index++;
+               }
+               c.setSize(index);
+               return c;
+       }
+
+       public static SparseRowVector vectLessequalWrite(int len, double[] a, 
double bval, int[] aix, int ai, int alen) {
+               if(bval >= 0) {
+                       SparseRowVector c = allocSparseVector(len);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       int index = 0;
+                       int aItr = 0;
+                       while(aItr < ai+alen && index < len) {
+                               indexes[index] = index;
+                               if(aix[aItr] == index) {
+                                       values[index] = a[aItr] <= bval ? 1 : 0;
+                                       aItr++;
+                               } else {
+                                       values[index] = 1;
+                               }
+                               index++;
+                       }
+                       for(; index < len; index++) {
+                               indexes[index] = index;
+                               values[index] = 1;
+                       }
+                       c.setSize(len);
+                       return c;
+               } else {
+                       SparseRowVector c = allocSparseVector(alen);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       for(int j = 0; j < alen; j++) {
+                               indexes[j] = aix[ai+j];
+                               values[j] = a[ai+j] <= bval ? 1 : 0;
+                       }
+                       c.setSize(alen);
+                       return c;
+               }
+       }
+
+       public static SparseRowVector vectLessequalWrite(int len, double bval, 
double[] a, int[] aix, int ai, int alen) {
+               return vectGreaterWrite(len, a, bval, aix, ai, alen);
+       }
+
+       //doesn't return SparseRowVector, but still uses two sparse vectors as 
inputs
+       public static double[] vectLessequalWrite(int len, double[] a, double[] 
b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               double[] c = allocVector(len, true, 1);
+               int aEnd = ai+alen;
+               int bEnd = bi+blen;
+               int aItr = ai;
+               int bItr = bi;
+               while(aItr < aEnd && bItr < bEnd) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       int useA = (aIdx <= bIdx) ? 1 : 0;
+                       int useB = (aIdx >= bIdx) ? 1 : 0;
+                       int index = (useA == 1) ? aIdx : bIdx;
+                       double av = (useA == 1) ? a[aItr] : 0.0;
+                       double bv = (useB == 1) ? b[bItr] : 0.0;
+
+                       c[index] = av <= bv ? 1 : 0;
+                       aItr += useA;
+                       bItr += useB;
+               }
+               for(; aItr < ai+alen; aItr++) c[aix[aItr]] = (a[aItr] <= 0) ? 1 
: 0;
+               for(; bItr < bi+blen; bItr++)  c[bix[bItr]] = (0 <= b[bItr]) ? 
1 : 0;
+               return c;
+       }
+
+       public static SparseRowVector vectGreaterWrite(int len, double[] a, 
double bval, int[] aix, int ai, int alen) {
+               if(bval < 0) {
+                       SparseRowVector c = allocSparseVector(len);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       int index = 0;
+                       int aItr = 0;
+                       while(aItr < ai+alen && index < len) {
+                               indexes[index] = index;
+                               if(aix[aItr] == index) {
+                                       values[index] = a[aItr] > bval ? 1 : 0;
+                                       aItr++;
+                               } else {
+                                       values[index] = 1;
+                               }
+                               index++;
+                       }
+                       for(; index < len; index++) {
+                               indexes[index] = index;
+                               values[index] = 1;
+                       }
+                       c.setSize(len);
+                       return c;
+               } else {
+                       SparseRowVector c = allocSparseVector(alen);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       for(int j = 0; j < alen; j++) {
+                               indexes[j] = aix[ai+j];
+                               values[j] = a[ai+j] > bval ? 1 : 0;
+                       }
+                       c.setSize(alen);
+                       return c;
+               }
+       }
+
+       public static SparseRowVector vectGreaterWrite(int len, double bval, 
double[] a, int[] aix, int ai, int alen) {
+               return vectLessequalWrite(len, a, bval, aix, ai, alen);
+       }
+
+       public static SparseRowVector vectGreaterWrite(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               SparseRowVector c = allocSparseVector(alen+blen);
+               int aEnd = ai+alen;
+               int bEnd = bi+blen;
+               int aItr = ai;
+               int bItr = bi;
+               int index = 0;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               while (aItr < aEnd && bItr < bEnd) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       int useA = (aIdx <= bIdx) ? 1 : 0;
+                       int useB = (aIdx >= bIdx) ? 1 : 0;
+                       indexes[index] = (useA == 1) ? aIdx : bIdx;
+                       double av = (useA == 1) ? a[aItr] : 0.0;
+                       double bv = (useB == 1) ? b[bItr] : 0.0;
+
+                       values[index] = av >bv ? 1 : 0;
+                       aItr += useA;
+                       bItr += useB;
+                       index++;
+               }
+               for (; aItr < ai+alen; aItr++) {
+                       indexes[index] = aix[aItr];
+                       values[index] = a[aItr] > 0 ? 1 : 0;
+                       index++;
+               }
+               for (; bItr < bi+blen; bItr++) {
+                       indexes[index] = bix[bItr];
+                       values[index] = 0 > b[bItr] ? 1 : 0;
+                       index++;
+               }
+               c.setSize(index);
+               return c;
+       }
+
+       public static SparseRowVector vectGreaterequalWrite(int len, double[] 
a, double bval, int[] aix, int ai, int alen) {
+               if(bval <= 0) {
+                       SparseRowVector c = allocSparseVector(len);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       int index = 0;
+                       int aItr = 0;
+                       while(aItr < ai+alen && index < len) {
+                               indexes[index] = index;
+                               if(aix[aItr] == index) {
+                                       values[index] = a[aItr] >= bval ? 1 : 0;
+                                       aItr++;
+                               } else {
+                                       values[index] = 1;
+                               }
+                               index++;
+                       }
+                       for(; index < len; index++) {
+                               indexes[index] = index;
+                               values[index] = 1;
+                       }
+                       c.setSize(len);
+                       return c;
+               } else {
+                       SparseRowVector c = allocSparseVector(alen);
+                       int[] indexes = c.indexes();
+                       double[] values = c.values();
+                       for(int j = 0; j < alen; j++) {
+                               indexes[j] = aix[ai+j];
+                               values[j] = a[ai+j] >= bval ? 1 : 0;
+                       }
+                       c.setSize(alen);
+                       return c;
+               }
+       }
+
+       public static SparseRowVector vectGreaterequalWrite(int len, double 
bval, double[] a, int[] aix, int ai, int alen) {
+               return vectLessWrite(len, a, bval, aix, ai, alen);
+       }
+
+       //doesn't return SparseRowVector, but still uses two sparse vectors as 
inputs
+       public static double[] vectGreaterequalWrite(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               double[] c = allocVector(len, true, 1);
+               int aEnd = ai+alen;
+               int bEnd = bi+blen;
+               int aItr = ai;
+               int bItr = bi;
+               while(aItr < aEnd && bItr < bEnd) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       int useA = (aIdx <= bIdx) ? 1 : 0;
+                       int useB = (aIdx >= bIdx) ? 1 : 0;
+                       int index = (useA == 1) ? aIdx : bIdx;
+                       double av = (useA == 1) ? a[aItr] : 0.0;
+                       double bv = (useB == 1) ? b[bItr] : 0.0;
+
+                       c[index] = av >= bv ? 1 : 0;
+                       aItr += useA;
+                       bItr += useB;
+               }
+               for(; aItr < ai+alen; aItr++) c[aix[aItr]] = (a[aItr] >= 0) ? 1 
: 0;
+               for(; bItr < bi+blen; bItr++)  c[bix[bItr]] = (0 >= b[bItr]) ? 
1 : 0;
+               return c;
+       }
+
+       public static SparseRowVector vectBitwandWrite(int len, double[] a, 
double bval, int[] aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               int bval1 = (int) bval;
+               for( int j = 0; j < alen; j++ ) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = bwAnd(a[ai+j], bval1);
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       public static SparseRowVector vectBitwandWrite(int len, double bval, 
double[] a, int[] aix, int ai, int alen) {
+               return vectBitwandWrite(len, a, bval, aix, ai, alen);
+       }
+
+       public static SparseRowVector vectBitwandWrite(int len, double[] a, 
double[] b, int[] aix, int[] bix, int ai, int bi, int alen, int blen) {
+               SparseRowVector c = allocSparseVector(Math.min(alen, blen));
+               int index = 0;
+               int aItr = ai;
+               int bItr = bi;
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               while(aItr < ai+alen && bItr < bi+blen) {
+                       int aIdx = aix[aItr];
+                       int bIdx = bix[bItr];
+                       indexes[index] = aIdx;
+                       values[index] = bwAnd(a[aItr], b[bItr]);
+                       index += aIdx == bIdx ? 1 : 0;
+                       aItr += aIdx <= bIdx ? 1 : 0;
+                       bItr += aIdx >= bIdx ? 1 : 0;
+               }
+               c.setSize(index);
+               return c;
+       }
+
+       public static SparseRowVector vectSqrtWrite(int len, double[] a, int[] 
aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for(int j = 0; j < alen; j++) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = Math.sqrt(a[ai+j]);
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       public static SparseRowVector vectAbsWrite(int len, double[] a, int[] 
aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for(int j = 0; j < alen; j++) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = Math.abs(a[ai+j]);
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       public static SparseRowVector vectRoundWrite(int len, double[] a, int[] 
aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for(int j = 0; j < alen; j++) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = Math.round(a[ai+j]);
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       public static SparseRowVector vectCeilWrite(int len, double[] a, int[] 
aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for(int j = 0; j < alen; j++) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = Math.ceil(a[ai+j]);
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       public static SparseRowVector vectFloorWrite(int len, double[] a, int[] 
aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for(int j = 0; j < alen; j++) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = Math.floor(a[ai+j]);
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       public static SparseRowVector vectSinWrite(int len, double[] a, int[] 
aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for(int j = 0; j < alen; j++) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = Math.sin(a[ai+j]);
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       public static SparseRowVector vectTanWrite(int len, double[] a, int[] 
aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for(int j = 0; j < alen; j++) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = Math.tan(a[ai+j]);
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       public static SparseRowVector vectAsinWrite(int len, double[] a, int[] 
aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for(int j = 0; j < alen; j++) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = Math.asin(a[ai+j]);
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       public static SparseRowVector vectAtanWrite(int len, double[] a, int[] 
aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for(int j = 0; j < alen; j++) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = Math.atan(a[ai+j]);
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       public static SparseRowVector vectSinhWrite(int len, double[] a, int[] 
aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for(int j = 0; j < alen; j++) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = Math.sinh(a[ai+j]);
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       public static SparseRowVector vectTanhWrite(int len, double[] a, int[] 
aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for(int j = 0; j < alen; j++) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = Math.tanh(a[ai+j]);
+               }
+               c.setSize(alen);
+               return c;
+       }
+
+       public static SparseRowVector vectSignWrite(int len, double[] a, int[] 
aix, int ai, int alen) {
+               SparseRowVector c = allocSparseVector(alen);
+               int[] indexes = c.indexes();
+               double[] values = c.values();
+               for(int j = 0; j < alen; j++) {
+                       indexes[j] = aix[ai+j];
+                       values[j] = Math.signum(a[ai+j]);
+               }
+               c.setSize(alen);
+               return c;
+       }
+
        //complex builtin functions that are not directly generated
        //(included here in order to reduce the number of imports)
        
@@ -2194,10 +3348,19 @@ public class LibSpoofPrimitives
                if( numVectors > 0 )
                        memPool.set(new VectorBuffer(numVectors, len, len2));
        }
+
+       public static void setupSparseThreadLocalMemory(int numVectors, int 
len, int len2) {
+               if( numVectors > 0 )
+                       sparseMemPool.set(new SparseVectorBuffer(numVectors, 
len, len2));
+       }
        
        public static void cleanupThreadLocalMemory() {
                memPool.remove();
        }
+
+       public static void cleanupSparseThreadLocalMemory() {
+               sparseMemPool.remove();
+       }
        
        public static double[] allocVector(int len, boolean reset) {
                return allocVector(len, reset, 0);
@@ -2217,6 +3380,21 @@ public class LibSpoofPrimitives
                        Arrays.fill(vect, resetVal);
                return vect;
        }
+
+       public static SparseRowVector allocSparseVector(int len) {
+               SparseVectorBuffer buff = sparseMemPool.get();
+
+               //find next matching vector in ring buffer or
+               //allocate new vector if no vector was returned
+               SparseRowVector vect = buff.next(len);
+               if(vect == null)
+                       vect = new SparseRowVector(len);
+                       //reset vector for normal outputs
+               else if(vect.size() != 0)
+                       vect.reset(len, len);
+
+               return vect;
+       }
        
        /**
         * Simple ring buffer of allocated vectors, where
@@ -2265,4 +3443,52 @@ public class LibSpoofPrimitives
                                && _data.length == lnum);
                }
        }
+
+       /**
+        * Simple ring buffer of allocated SparseRowVectors, where
+        * vectors of different sizes are interspersed.
+        */
+       private static class SparseVectorBuffer {
+               private static final int MAX_SIZE = 512*1024; //4MB
+               private final SparseRowVector[] _data;
+               private int _pos;
+               private int _len1;
+               private int _len2;
+
+               public SparseVectorBuffer(int num, int len1, int len2) {
+                       //best effort size restriction since large intermediates
+                       //not necessarily used (num refers to the total number)
+                       len1 = Math.min(len1, MAX_SIZE);
+                       len2 = Math.min(len2, MAX_SIZE);
+                       //pre-allocate ring buffer
+                       int lnum = (len2>0 && len1!=len2) ? 2*num : num;
+                       _data = new SparseRowVector[lnum];
+                       for( int i=0; i<num; i++ ) {
+                               if( lnum > num ) {
+                                       _data[2*i] = new SparseRowVector(len1);
+                                       _data[2*i+1] = new 
SparseRowVector(len2);
+                               }
+                               else {
+                                       _data[i] = new SparseRowVector(len1);
+                               }
+                       }
+                       _pos = -1;
+                       _len1 = len1;
+                       _len2 = len2;
+               }
+               public SparseRowVector next(int len) {
+                       if( _len1<len && _len2<len )
+                               return null;
+                       do {
+                               _pos = (_pos+1>=_data.length) ? 0 : _pos+1;
+                       } while( _data[_pos].values().length<len );
+                       return _data[_pos];
+               }
+               @SuppressWarnings("unused")
+               public boolean isReusable(int num, int len1, int len2) {
+                       int lnum = (len2>0 && len1!=len2) ? 2*num : num;
+                       return (_len1 == len1 && _len2 == len2
+                               && _data.length == lnum);
+               }
+       }
 }
diff --git a/src/main/java/org/apache/sysds/runtime/codegen/SpoofRowwise.java 
b/src/main/java/org/apache/sysds/runtime/codegen/SpoofRowwise.java
index eab223bc97..e48d44f33f 100644
--- a/src/main/java/org/apache/sysds/runtime/codegen/SpoofRowwise.java
+++ b/src/main/java/org/apache/sysds/runtime/codegen/SpoofRowwise.java
@@ -27,6 +27,7 @@ import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Future;
 import java.util.stream.IntStream;
 
+import org.apache.sysds.api.DMLScript;
 import org.apache.sysds.runtime.DMLRuntimeException;
 import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
 import org.apache.sysds.runtime.controlprogram.caching.MatrixObject;
@@ -187,7 +188,12 @@ public abstract class SpoofRowwise extends SpoofOperator
                
                //setup thread-local memory if necessary
                if( allocTmp &&_reqVectMem > 0 )
-                       LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, 
n, n2);
+                       if(inputs.get(0).isInSparseFormat() && 
DMLScript.SPARSE_INTERMEDIATE) {
+                               
LibSpoofPrimitives.setupSparseThreadLocalMemory(_reqVectMem, n, n2);
+                               
LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, n, n2);
+                       } else {
+                               
LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, n, n2);
+                       }
                
                //core sequential execute
                MatrixBlock a = inputs.get(0);
@@ -201,7 +207,12 @@ public abstract class SpoofRowwise extends SpoofOperator
                
                //post-processing
                if( allocTmp &&_reqVectMem > 0 )
-                       LibSpoofPrimitives.cleanupThreadLocalMemory();
+                       if(inputs.get(0).isInSparseFormat() && 
DMLScript.SPARSE_INTERMEDIATE) {
+                               
LibSpoofPrimitives.cleanupSparseThreadLocalMemory();
+                               LibSpoofPrimitives.cleanupThreadLocalMemory();
+                       } else {
+                               LibSpoofPrimitives.cleanupThreadLocalMemory();
+                       }
                if( flipOut ) {
                        fixTransposeDimensions(out);
                        out = LibMatrixReorg.transpose(out, new MatrixBlock(
@@ -431,7 +442,12 @@ public abstract class SpoofRowwise extends SpoofOperator
                        
                        //allocate vector intermediates and partial output
                        if( _reqVectMem > 0 )
-                               
LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
+                               if(_a.isInSparseFormat() && 
DMLScript.SPARSE_INTERMEDIATE) {
+                                       
LibSpoofPrimitives.setupSparseThreadLocalMemory(_reqVectMem, _clen, _clen2);
+                                       
LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
+                               } else {
+                                       
LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
+                               }
                        DenseBlock c = DenseBlockFactory.createDenseBlock(1, 
_outLen);
                        
                        if( !_a.isInSparseFormat() )
@@ -440,7 +456,12 @@ public abstract class SpoofRowwise extends SpoofOperator
                                executeSparse(_a.getSparseBlock(), _b, 
_scalars, c, _clen, _rl, _ru, 0);
                        
                        if( _reqVectMem > 0 )
-                               LibSpoofPrimitives.cleanupThreadLocalMemory();
+                               if(_a.isInSparseFormat() && 
DMLScript.SPARSE_INTERMEDIATE) {
+                                       
LibSpoofPrimitives.cleanupSparseThreadLocalMemory();
+                                       
LibSpoofPrimitives.cleanupThreadLocalMemory();
+                               } else {
+                                       
LibSpoofPrimitives.cleanupThreadLocalMemory();
+                               }
                        return c;
                }
        }
@@ -474,15 +495,25 @@ public abstract class SpoofRowwise extends SpoofOperator
                public Long call() {
                        //allocate vector intermediates
                        if( _reqVectMem > 0 )
-                               
LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
+                               if(_a.isInSparseFormat() && 
DMLScript.SPARSE_INTERMEDIATE) {
+                                       
LibSpoofPrimitives.setupSparseThreadLocalMemory(_reqVectMem, _clen, _clen2);
+                                       
LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
+                               } else {
+                                       
LibSpoofPrimitives.setupThreadLocalMemory(_reqVectMem, _clen, _clen2);
+                               }
                        
                        if( !_a.isInSparseFormat() )
                                executeDense(_a.getDenseBlock(), _b, _scalars, 
_c.getDenseBlock(), _clen, _rl, _ru, 0);
                        else
                                executeSparse(_a.getSparseBlock(), _b, 
_scalars, _c.getDenseBlock(), _clen, _rl, _ru, 0);
-                       
+
                        if( _reqVectMem > 0 )
-                               LibSpoofPrimitives.cleanupThreadLocalMemory();
+                               if(_a.isInSparseFormat() && 
DMLScript.SPARSE_INTERMEDIATE) {
+                                       
LibSpoofPrimitives.cleanupSparseThreadLocalMemory();
+                                       
LibSpoofPrimitives.cleanupThreadLocalMemory();
+                               } else {
+                                       
LibSpoofPrimitives.cleanupThreadLocalMemory();
+                               }
                        
                        //maintain nnz for row partition
                        return _c.recomputeNonZeros(_rl, _ru-1, 0, 
_c.getNumColumns()-1);
diff --git 
a/src/test/java/org/apache/sysds/test/component/codegen/CPlanVectorPrimitivesTest.java
 
b/src/test/java/org/apache/sysds/test/component/codegen/CPlanVectorPrimitivesTest.java
index 7d8b4c9096..bae22b1c38 100644
--- 
a/src/test/java/org/apache/sysds/test/component/codegen/CPlanVectorPrimitivesTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/codegen/CPlanVectorPrimitivesTest.java
@@ -21,6 +21,7 @@ package org.apache.sysds.test.component.codegen;
 
 import java.lang.reflect.Method;
 
+import org.apache.sysds.runtime.data.SparseRowVector;
 import org.junit.Test;
 import org.apache.sysds.common.Types.OpOp2;
 import org.apache.sysds.hops.codegen.cplan.CNodeBinary.BinType;
@@ -716,6 +717,287 @@ public class CPlanVectorPrimitivesTest extends 
AutomatedTestBase
                testVectorBinaryPrimitive(BinType.VECT_BITWAND, 
InputType.VECTOR_SPARSE, InputType.VECTOR_DENSE);
        }
 
+       //********************testing with sparse 
intermediates********************//
+       //vector - scalar
+
+       @Test
+       public void testVectorScalarMultSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_MULT_SCALAR, 
InputType.VECTOR_SPARSE, InputType.SCALAR);
+       }
+
+       @Test
+       public void testVectorScalarDivSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_DIV_SCALAR, 
InputType.VECTOR_SPARSE, InputType.SCALAR);
+       }
+
+       @Test
+       public void testVectorScalarMinSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_MIN_SCALAR, 
InputType.VECTOR_SPARSE, InputType.SCALAR);
+       }
+
+       @Test
+       public void testVectorScalarMaxSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_MAX_SCALAR, 
InputType.VECTOR_SPARSE, InputType.SCALAR);
+       }
+
+       @Test
+       public void testVectorScalarPowSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_POW_SCALAR, 
InputType.VECTOR_SPARSE, InputType.SCALAR);
+       }
+
+       @Test
+       public void testVectorScalarEqualSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_EQUAL_SCALAR, 
InputType.VECTOR_SPARSE, InputType.SCALAR);
+       }
+
+       @Test
+       public void testVectorScalarNotEqualSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_NOTEQUAL_SCALAR, 
InputType.VECTOR_SPARSE, InputType.SCALAR);
+       }
+
+       @Test
+       public void testVectorScalarLessSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_LESS_SCALAR, 
InputType.VECTOR_SPARSE, InputType.SCALAR);
+       }
+
+       @Test
+       public void testVectorScalarLessEqualSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_LESSEQUAL_SCALAR, 
InputType.VECTOR_SPARSE, InputType.SCALAR);
+       }
+
+       @Test
+       public void testVectorScalarGreaterSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_GREATER_SCALAR, 
InputType.VECTOR_SPARSE, InputType.SCALAR);
+       }
+
+       @Test
+       public void testVectorScalarGreaterEqualSparseToSparse() {
+               
testVectorBinarySparsePrimitive(BinType.VECT_GREATEREQUAL_SCALAR, 
InputType.VECTOR_SPARSE, InputType.SCALAR);
+       }
+
+       //todo: this only makes sense, when bval is 0
+       @Test
+       public void testVectorScalarXorSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_XOR_SCALAR, 
InputType.VECTOR_SPARSE, InputType.SCALAR);
+       }
+
+       @Test
+       public void testVectorScalarBitwAndSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_BITWAND_SCALAR, 
InputType.VECTOR_SPARSE, InputType.SCALAR);
+       }
+
+       //scalar - vector
+
+       @Test
+       public void testScalarVectorMultSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_MULT_SCALAR, 
InputType.SCALAR, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testScalarVectorDivSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_DIV_SCALAR, 
InputType.SCALAR, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testScalarVectorMinSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_MIN_SCALAR, 
InputType.SCALAR, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testScalarVectorMaxSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_MAX_SCALAR, 
InputType.SCALAR, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testScalarVectorEqualSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_EQUAL_SCALAR, 
InputType.SCALAR, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testScalarVectorNotEqualSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_NOTEQUAL_SCALAR, 
InputType.SCALAR, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testScalarVectorLessSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_LESS_SCALAR, 
InputType.SCALAR, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testScalarVectorLessEqualSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_LESSEQUAL_SCALAR, 
InputType.SCALAR, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testScalarVectorGreaterSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_GREATER_SCALAR, 
InputType.SCALAR, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testScalarVectorGreaterEqualSparseToSparse() {
+               
testVectorBinarySparsePrimitive(BinType.VECT_GREATEREQUAL_SCALAR, 
InputType.SCALAR, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testScalarVectorXorSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_XOR_SCALAR, 
InputType.SCALAR, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testScalarVectorBitwAndSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_BITWAND_SCALAR, 
InputType.SCALAR, InputType.VECTOR_SPARSE);
+       }
+
+       //special binary
+
+       //      @Test
+       //      public void testVectorPow2SparseToSparse() {
+       //              testVectorUnarySparsePrimitive(UnaryType.VECT_POW2, 
InputType.VECTOR_SPARSE);
+       //      }
+       //
+       //      @Test
+       //      public void testVectorMult2SparseToSparse() {
+       //              testVectorUnarySparsePrimitive(UnaryType.VECT_MULT2, 
InputType.VECTOR_SPARSE);
+       //      }
+
+       //vector - vector
+
+       @Test
+       public void testVectorVectorMultSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_MULT, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorVectorDivSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_DIV, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorVectorPlusSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_PLUS, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorVectorMinusSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_MINUS, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorVectorMinSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_MIN, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       }
+       @Test
+       public void testVectorVectorMaxSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_MAX, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorVectorEqualSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_EQUAL, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorVectorNotEqualSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_NOTEQUAL, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorVectorLessSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_LESS, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorVectorLessEqualSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_LESSEQUAL, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorVectorGreaterSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_GREATER, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorVectorGreaterEqualSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_GREATEREQUAL, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorVectorXorSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_XOR, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorVectorBitwAndSparseToSparse() {
+               testVectorBinarySparsePrimitive(BinType.VECT_BITWAND, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       }
+
+       //      @Test
+       //      public void testVectorVectorMatrixMultSparseToSparse() {
+       //              
testVectorBinarySparsePrimitive(BinType.VECT_MATRIXMULT, 
InputType.VECTOR_SPARSE, InputType.VECTOR_SPARSE);
+       //      }
+
+       //unary primitives with sparse intermediates
+
+       @Test
+       public void testVectorSqrtSparseToSparse() {
+               testVectorUnarySparsePrimitive(UnaryType.VECT_SQRT, 
InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorAbsSparseToSparse() {
+               testVectorUnarySparsePrimitive(UnaryType.VECT_ABS, 
InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorRoundSparseToSparse() {
+               testVectorUnarySparsePrimitive(UnaryType.VECT_ROUND, 
InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorCeilSparseToSparse() {
+               testVectorUnarySparsePrimitive(UnaryType.VECT_CEIL, 
InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorFloorSparseToSparse() {
+               testVectorUnarySparsePrimitive(UnaryType.VECT_FLOOR, 
InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorSinSparseToSparse() {
+               testVectorUnarySparsePrimitive(UnaryType.VECT_SIN, 
InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorTanSparseToSparse() {
+               testVectorUnarySparsePrimitive(UnaryType.VECT_TAN, 
InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorAsinSparseToSparse() {
+               testVectorUnarySparsePrimitive(UnaryType.VECT_ASIN, 
InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorAtanSparseToSparse() {
+               testVectorUnarySparsePrimitive(UnaryType.VECT_ATAN, 
InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorSinhSparseToSparse() {
+               testVectorUnarySparsePrimitive(UnaryType.VECT_SINH, 
InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorTanhSparseToSparse() {
+               testVectorUnarySparsePrimitive(UnaryType.VECT_TANH, 
InputType.VECTOR_SPARSE);
+       }
+
+       @Test
+       public void testVectorSignSparseToSparse() {
+               testVectorUnarySparsePrimitive(UnaryType.VECT_SIGN, 
InputType.VECTOR_SPARSE);
+       }
+
        @SuppressWarnings("incomplete-switch")
        private static void testVectorAggPrimitive(UnaryType aggtype, InputType 
type1)
        {
@@ -792,6 +1074,45 @@ public class CPlanVectorPrimitivesTest extends 
AutomatedTestBase
                        throw new RuntimeException(ex);
                }
        }
+
+       private static void testVectorUnarySparsePrimitive(UnaryType utype, 
InputType type1)
+       {
+               try {
+                       //generate input data
+                       double sparsity = (type1 == InputType.VECTOR_DENSE) ? 
sparsity1 : sparsity2;
+                       MatrixBlock in = MatrixBlock.randOperations(m, n, 
sparsity, -1, 1, "uniform", 7);
+
+                       //get vector primitive via reflection
+                       String meName = 
"vect"+StringUtils.camelize(utype.name().split("_")[1])+"Write";
+                       Method me = LibSpoofPrimitives.class.getMethod(meName, 
new Class[]{int.class, double[].class, int[].class, int.class, int.class});
+
+
+                       for( int i=0; i<m; i++ ) {
+                               double[] ret1 = new double[n];
+                               //execute vector primitive via reflection
+                               SparseRowVector retX = (SparseRowVector) 
me.invoke(null, n, in.getSparseBlock().values(i), 
in.getSparseBlock().indexes(i),
+                                       in.getSparseBlock().pos(i), 
in.getSparseBlock().size(i));
+
+                               int[] indexes = retX.indexes();
+                               for (int j = 0; j < retX.size(); j++) {
+                                       ret1[indexes[j]] = retX.get(indexes[j]);
+                               }
+
+                               //execute comparison operation
+                               String opcode = 
utype.name().split("_")[1].toLowerCase();
+                               UnaryOperator uop = new 
UnaryOperator(Builtin.getBuiltinFnObject(opcode));
+                               double[] ret2 = 
DataConverter.convertToDoubleVector(
+                                       in.slice(i, i, 0, n-1, new 
MatrixBlock())
+                                               .unaryOperations(uop, new 
MatrixBlock()), false);
+
+                               //compare results
+                               TestUtils.compareMatrices(ret1, ret2, eps);
+                       }
+               }
+               catch( Exception ex ) {
+                       throw new RuntimeException(ex);
+               }
+       }
        
        private static void testVectorBinaryPrimitive(BinType bintype, 
InputType type1, InputType type2)
        {
@@ -868,4 +1189,144 @@ public class CPlanVectorPrimitivesTest extends 
AutomatedTestBase
                        throw new RuntimeException(ex);
                }
        }
+
+       private static void testVectorBinarySparsePrimitive(BinType bintype, 
InputType type1, InputType type2) {
+               try {
+                       //generate input data (scalar later derived if needed)
+                       double sparsityA = (type1 == InputType.VECTOR_DENSE) ? 
sparsity1 : sparsity2;
+                       MatrixBlock inA = MatrixBlock.randOperations(m, n, 
sparsityA, -5, 5, "uniform", 3);
+                       double sparsityB = (type2 == InputType.VECTOR_DENSE) ? 
sparsity1 : sparsity2;
+                       MatrixBlock inB = MatrixBlock.randOperations(m, n, 
sparsityB, -5, 5, "uniform", 7);
+
+                       boolean sparse = getOutputType(bintype);
+                       int testType = getTestType(bintype);
+
+                       //get vector primitive via reflection
+                       String meName = 
"vect"+StringUtils.camelize(bintype.name().split("_")[1])+"Write";
+                       final Method me ;
+                       if( type1==InputType.VECTOR_SPARSE && 
type2==InputType.SCALAR )
+                               me = LibSpoofPrimitives.class.getMethod(meName, 
new Class[]{int.class, double[].class, double.class, int[].class, int.class, 
int.class});
+                       else if( type1==InputType.SCALAR && 
type2==InputType.VECTOR_SPARSE )
+                               me = LibSpoofPrimitives.class.getMethod(meName, 
new Class[]{int.class, double.class, double[].class, int[].class, int.class, 
int.class});
+                       else //if( type1==InputType.VECTOR_SPARSE && 
type2==InputType.VECTOR_SPARSE )
+                               me = LibSpoofPrimitives.class.getMethod(meName, 
new Class[]{int.class, double[].class, double[].class, int[].class, 
int[].class, int.class, int.class, int.class, int.class});
+
+                       for( int i=0; i<m; i++ ) {
+                               //execute vector primitive via reflection
+                               double[] ret1 = new double[n];
+                               SparseRowVector retX = null;
+                               if( type1==InputType.VECTOR_SPARSE && 
type2==InputType.SCALAR )
+                                       if(testType >= 0 && i == m-1) {
+                                               if(testType == 0) {
+                                                       retX = 
(SparseRowVector) me.invoke(null, n, inA.getSparseBlock().values(i), 0, 
inA.getSparseBlock().indexes(i),
+                                                               
inA.getSparseBlock().pos(i), inA.getSparseBlock().size(i));
+                                               } else {
+                                                       retX = 
(SparseRowVector) me.invoke(null, n, inA.getSparseBlock().values(i), inB.min(), 
inA.getSparseBlock().indexes(i),
+                                                               
inA.getSparseBlock().pos(i), inA.getSparseBlock().size(i));
+                                               }
+                                       } else {
+                                               retX = (SparseRowVector) 
me.invoke(null, n, inA.getSparseBlock().values(i), inB.max(), 
inA.getSparseBlock().indexes(i),
+                                                       
inA.getSparseBlock().pos(i), inA.getSparseBlock().size(i));
+                                       }
+                               else if( type1==InputType.SCALAR && 
type2==InputType.VECTOR_SPARSE )
+                                       if(testType >= 0 && i == m-1) {
+                                               if(testType == 0) {
+                                                       retX = 
(SparseRowVector) me.invoke(null, n, 0, inB.getSparseBlock().values(i),
+                                                               
inB.getSparseBlock().indexes(i), inB.getSparseBlock().pos(i), 
inB.getSparseBlock().size(i));
+                                               } else {
+                                                       retX = 
(SparseRowVector) me.invoke(null, n, inA.min(), inB.getSparseBlock().values(i),
+                                                               
inB.getSparseBlock().indexes(i), inB.getSparseBlock().pos(i), 
inB.getSparseBlock().size(i));
+                                               }
+                                       } else {
+                                               retX = (SparseRowVector) 
me.invoke(null, n, inA.max(), inB.getSparseBlock().values(i),
+                                                       
inB.getSparseBlock().indexes(i), inB.getSparseBlock().pos(i), 
inB.getSparseBlock().size(i));
+                                       }
+                               else if( type1==InputType.VECTOR_SPARSE && 
type2==InputType.VECTOR_SPARSE )
+                                       if(sparse)
+                                               retX = (SparseRowVector) 
me.invoke(null, n, inA.getSparseBlock().values(i), 
inB.getSparseBlock().values(i),
+                                                       
inA.getSparseBlock().indexes(i), inB.getSparseBlock().indexes(i), 
inA.getSparseBlock().pos(i), inB.getSparseBlock().pos(i), 
inA.getSparseBlock().size(i), inB.getSparseBlock().size(i));
+                                       else
+                                               ret1 = (double[]) 
me.invoke(null, n, inA.getSparseBlock().values(i), 
inB.getSparseBlock().values(i),
+                                                       
inA.getSparseBlock().indexes(i), inB.getSparseBlock().indexes(i), 
inA.getSparseBlock().pos(i), inB.getSparseBlock().pos(i), 
inA.getSparseBlock().size(i), inB.getSparseBlock().size(i));
+
+                               if(sparse) {
+                                       int[] indexes = retX.indexes();
+                                       for (int j = 0; j < retX.size(); j++) {
+                                               ret1[indexes[j]] = 
retX.get(indexes[j]);
+                                       }
+                               }
+
+                               //execute comparison operation
+                               String opcode = 
OpOp2.valueOf(bintype.name().split("_")[1]).toString();
+                               MatrixBlock in1 = inA.slice(i, i, 0, n-1, new 
MatrixBlock());
+                               MatrixBlock in2 = inB.slice(i, i, 0, n-1, new 
MatrixBlock());
+                               double[] ret2 = null;
+                               if( type1 == InputType.SCALAR ) {
+                                       ScalarOperator bop = 
InstructionUtils.parseScalarBinaryOperator(opcode, true);
+                                       bop = bop.setConstant(testType >= 0 && 
i == m-1 ? testType == 0 ? 0 : inA.min() : inA.max());
+                                       ret2 = 
DataConverter.convertToDoubleVector(
+                                               in2.scalarOperations(bop, new 
MatrixBlock()), false);
+                               }
+                               else if( type2 == InputType.SCALAR ) {
+                                       ScalarOperator bop = 
InstructionUtils.parseScalarBinaryOperator(opcode, false);
+                                       bop = bop.setConstant(testType >= 0 && 
i == m-1 ? testType == 0 ? 0 : inB.min() : inB.max());
+                                       ret2 = 
DataConverter.convertToDoubleVector(
+                                               in1.scalarOperations(bop, new 
MatrixBlock()), false);
+                               }
+                               else { //vector-vector
+                                       BinaryOperator bop = 
InstructionUtils.parseBinaryOperator(opcode);
+                                       ret2 = 
DataConverter.convertToDoubleVector(
+                                               in1.binaryOperations(bop, in2, 
new MatrixBlock()), false);
+                               }
+
+                               //compare results
+                               TestUtils.compareMatrices(ret2, ret1, eps);
+                       }
+               }
+               catch( Exception ex ) {
+                       throw new RuntimeException(ex);
+               }
+       }
+
+
+       /**
+        * @param type
+        * @return {@code true}, when the matching method has a sparse return
+        */
+       private static boolean getOutputType(BinType type) {
+               switch(type) {
+                       case VECT_EQUAL:
+                       case VECT_LESSEQUAL:
+                       case VECT_GREATEREQUAL:
+                               return false;
+                       default:
+                               return true;
+               }
+       }
+
+       /**
+        * @param type
+        * @return returns {@code -1}, for normal testing;<br>
+        *         returns {@code 0}, for testing with 0 and non-zeros;<br>
+        *         returns {@code 1}, for testing with negative and positive 
numbers;
+        */
+       private static int getTestType(BinType type) {
+               switch(type) {
+                       case VECT_DIV_SCALAR:
+                       case VECT_EQUAL_SCALAR:
+                       case VECT_NOTEQUAL_SCALAR:
+                       case VECT_XOR_SCALAR:
+                               //                      case VECT_POW_SCALAR:
+                               return 0;
+                       case VECT_GREATER_SCALAR:
+                       case VECT_GREATEREQUAL_SCALAR:
+                       case VECT_LESS_SCALAR:
+                       case VECT_LESSEQUAL_SCALAR:
+                       case VECT_MIN_SCALAR:
+                       case VECT_MAX_SCALAR:
+                               return 1;
+                       default:
+                               return -1;
+               }
+       }
 }
diff --git 
a/src/test/java/org/apache/sysds/test/component/codegen/SparseVectorAllocTest.java
 
b/src/test/java/org/apache/sysds/test/component/codegen/SparseVectorAllocTest.java
new file mode 100644
index 0000000000..e777e4ef6d
--- /dev/null
+++ 
b/src/test/java/org/apache/sysds/test/component/codegen/SparseVectorAllocTest.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.component.codegen;
+
+import org.apache.sysds.runtime.codegen.LibSpoofPrimitives;
+import org.apache.sysds.runtime.data.SparseRowVector;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestUtils;
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * This is the component test for the ring buffer used in 
LibSpoofPrimitives.java,
+ * that allocates a vector with a certain size.
+ * Every allocation method is tested to achieve the needed coverage.
+ */
+public class SparseVectorAllocTest extends AutomatedTestBase
+{
+       double[] val1 = new double[]{1.5, 5.7, 9.1, 3.7, 5.3};
+       double[] val2 = new double[]{9.6, 7.1, 2.7};
+       int[] indexes1 = new int[]{3, 7, 14, 20, 81};
+       int[] indexes2 = new int[]{20, 30, 90};
+
+       @Override
+       public void setUp() {
+               TestUtils.clearAssertionInformation();
+       }
+
+       @Test
+       public void testBasicAllocationSameLen() {
+               testBasicSparseVectorAllocation(1, 10, 10);
+       }
+
+       @Test
+       public void testBasicAllocationLongerExp() {
+               testBasicSparseVectorAllocation(1, 10, 15);
+       }
+
+       @Test
+       public void testBasicAllocationShorterExp() {
+               testBasicSparseVectorAllocation(1, 10, 7);
+       }
+
+       @Test
+       public void testVectorReuse1() {
+               testBufferReuse(3, 10, 5, 5);
+       }
+
+       @Test
+       public void testVectorReuse2() {
+               testBufferReuse(3, 10, -1, 5);
+       }
+
+       /** tests the allocation of an empty vector
+        * @param numVectors number of vectors that should be pre-allocated
+        * @param len the length of the vector
+        * @param expLen the expected length of the allocated vector
+        */
+       public void testBasicSparseVectorAllocation(int numVectors, int len, 
int expLen) {
+               //test the basic allocation of an empty vector
+               LibSpoofPrimitives.setupSparseThreadLocalMemory(numVectors, 
len, -1);
+               SparseRowVector sparseVec = 
LibSpoofPrimitives.allocSparseVector(expLen);
+
+               Assert.assertTrue("Vector capacity should be initialized 
correctly", expLen <= sparseVec.capacity());
+               Assert.assertEquals("Vector size should be initialized with 0", 
0, sparseVec.size());
+
+               LibSpoofPrimitives.cleanupSparseThreadLocalMemory();
+       }
+
+       /** tests the allocation of a vector that is reused multiple times
+        * @param numVectors number of vectors that should be pre-allocated
+        * @param len1 length of the first vector
+        * @param len2 length of the second vector
+        * @param expLen expected length of allocated vector
+        */
+       public void testBufferReuse(int numVectors, int len1, int len2, int 
expLen) {
+               //test the reuse of the vectors in the ring buffer
+               LibSpoofPrimitives.setupSparseThreadLocalMemory(numVectors, 
len1, len2);
+
+               //allocate first vector
+               SparseRowVector vec1 = 
LibSpoofPrimitives.allocSparseVector(expLen);
+               vec1.set(0, 1.0);
+               vec1.set(2, 2.0);
+
+               //allocate second vector
+               SparseRowVector vec2 = 
LibSpoofPrimitives.allocSparseVector(expLen);
+
+               Assert.assertEquals("Reused vector should be reset to size 0", 
0, vec2.size());
+
+               for(int j = 0; j < vec2.size(); j++) {
+                       vec2.set(vec2.indexes()[j], vec2.get(vec2.indexes()[j]) 
* 32);
+               }
+
+               SparseRowVector vec3 = 
LibSpoofPrimitives.allocSparseVector(expLen);
+
+               Assert.assertEquals("Reused vector should be reset to size 0", 
0, vec3.size());
+
+               SparseRowVector vec4 = 
LibSpoofPrimitives.allocSparseVector(expLen);
+
+               for(int j = 0; j < vec4.size(); j++) {
+                       vec4.set(vec4.indexes()[j], vec4.get(vec3.indexes()[j]) 
* 32);
+               }
+
+               Assert.assertEquals("Reused vector should be reset to size 0", 
0, vec4.size());
+
+               SparseRowVector vec5 = 
LibSpoofPrimitives.allocSparseVector(expLen);
+
+               for(int j = 0; j < vec5.size(); j++) {
+                       vec2.set(vec5.indexes()[j], vec5.get(vec5.indexes()[j]) 
* 32);
+               }
+
+               Assert.assertEquals("Reused vector should be reset to size 0", 
0, vec5.size());
+
+               LibSpoofPrimitives.cleanupSparseThreadLocalMemory();
+       }
+}
diff --git 
a/src/test/java/org/apache/sysds/test/functions/codegen/RowAggTmplTest.java 
b/src/test/java/org/apache/sysds/test/functions/codegen/RowAggTmplTest.java
index 73dbace862..03fcb40bef 100644
--- a/src/test/java/org/apache/sysds/test/functions/codegen/RowAggTmplTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/codegen/RowAggTmplTest.java
@@ -89,6 +89,8 @@ public class RowAggTmplTest extends AutomatedTestBase
        private static final String TEST_NAME46 = TEST_NAME+"46"; //conv2d(X - 
mean(X), F1) + conv2d(X - mean(X), F2);
        private static final String TEST_NAME47 = TEST_NAME+"47"; //sum(X + 
rowVars(X))
        private static final String TEST_NAME48 = TEST_NAME+"48"; 
//sum(rowVars(X))
+       private static final String TEST_NAME49 = TEST_NAME+"49"; 
//X*rowSums(K*v)*X
+       private static final String TEST_NAME50 = TEST_NAME+"50"; 
//(abs(A)*B)+(B*v)
 
        private static final String TEST_DIR = "functions/codegen/";
        private static final String TEST_CLASS_DIR = TEST_DIR + 
RowAggTmplTest.class.getSimpleName() + "/";
@@ -100,7 +102,7 @@ public class RowAggTmplTest extends AutomatedTestBase
        @Override
        public void setUp() {
                TestUtils.clearAssertionInformation();
-               for(int i=1; i<=48; i++)
+               for(int i=1; i<=50; i++)
                        addTestConfiguration( TEST_NAME+i, new 
TestConfiguration(TEST_CLASS_DIR, TEST_NAME+i, new String[] { String.valueOf(i) 
}) );
        }
        
@@ -829,6 +831,12 @@ public class RowAggTmplTest extends AutomatedTestBase
                testCodegenIntegration( TEST_NAME48, false, ExecType.SPARK );
        }
 
+       @Test
+       public void testCodegenRowAgg49CP() {testCodegenIntegration( 
TEST_NAME49, false, ExecType.CP );}
+
+       @Test
+       public void testCodegenRowAgg50CP() {testCodegenIntegration( 
TEST_NAME50, false, ExecType.CP );}
+
        private void testCodegenIntegration( String testname, boolean rewrites, 
ExecType instType )
        {
                boolean oldFlag = OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION;
@@ -841,7 +849,7 @@ public class RowAggTmplTest extends AutomatedTestBase
                        
                        String HOME = SCRIPT_DIR + TEST_DIR;
                        fullDMLScriptName = HOME + testname + ".dml";
-                       programArgs = new String[]{"-explain", "codegen", 
"-stats", "-args", output("S") };
+                       programArgs = new String[]{"-explain", "codegen", 
"-sparseIntermediate", "-stats", "-args", output("S") };
                        
                        fullRScriptName = HOME + testname + ".R";
                        rCmd = getRCmd(inputDir(), expectedDir());
diff --git a/src/test/scripts/functions/codegen/rowAggPattern49.R 
b/src/test/scripts/functions/codegen/rowAggPattern49.R
new file mode 100644
index 0000000000..06d5c63144
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern49.R
@@ -0,0 +1,54 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+# library("matrixStats")
+
+W = matrix(seq(28,29), 1, 2)
+J = matrix(0, 1, 8)
+Z = cbind(J, W, J)
+Y = matrix(0, 10, 18)
+X = rbind(Z, Y, Y, Y, Y, Y, Y, Y, Y)
+v = seq(1,81)
+v1 = seq(20, 37)
+W = matrix(seq(13,14), 1, 2)
+J = matrix(0, 1, 8)
+Z= cbind(J, W, J)
+Y = matrix(0, 10, 18)
+K = rbind(Z, Y, Y, Y, Y, Y, Y, Y, Y)
+
+# S = (X < rowSums(X*K))
+# S = X*rowMins(K)*X
+S = X*rowSums(K*v)*X
+# S = (X*v)/rowSums(X*v)
+# S = abs((X*v)/rowSums(X*v))
+# S = (X/v)+rowMeans(X-v)
+# S = (X*v)+rowSums(X*v)
+# S = (X*rowSums(X*v))/(X*v)
+
+# S = X*rowSums(X*K)
+# S = rowSums((X*v)/K)*v
+# S = (K*v)/(rowSums(X*v))
+
+
+writeMM(as(S, "CsparseMatrix"), paste(args[2], "S", sep=""));
diff --git a/src/test/scripts/functions/codegen/rowAggPattern49.dml 
b/src/test/scripts/functions/codegen/rowAggPattern49.dml
new file mode 100644
index 0000000000..8c0d1db225
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern49.dml
@@ -0,0 +1,52 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+W = matrix(seq(28,29), 1, 2)
+J = matrix(0, 1, 8)
+Z= cbind(J, W, J)
+Y = matrix(0, 10, 18)
+X = rbind(Z, Y, Y, Y, Y, Y, Y, Y, Y)
+v = seq(1,81)
+v1 = seq(20, 37)
+W = matrix(seq(13,14), 1, 2)
+J = matrix(0, 1, 8)
+Z= cbind(J, W, J)
+Y = matrix(0, 10, 18)
+K = rbind(Z, Y, Y, Y, Y, Y, Y, Y, Y)
+while(FALSE) { }
+
+# B = X < rowSums(X*K)
+# S = abs(21) * B
+# S = B * rowSums(v)
+# S = (X < rowSums(X*K))
+# S = X*(k>1)*X
+S = X*rowSums(K*v)*X
+# S = (X*v)/rowSums(X*v)
+# S = abs((X*v)/rowSums(X*v))
+# S = (X/v)+rowMeans(X-v)
+# S = (X*v)+rowSums(X*v)
+# S = (X*rowSums(X*v))/(X*v)
+
+# S = X*rowSums(X*K)
+# S = rowSums((X*v)/K)*v
+# S = (K*v)/(rowSums(X*v))
+
+write(S,$1)
diff --git a/src/test/scripts/functions/codegen/rowAggPattern50.R 
b/src/test/scripts/functions/codegen/rowAggPattern50.R
new file mode 100644
index 0000000000..af2c3301a7
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern50.R
@@ -0,0 +1,43 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args<-commandArgs(TRUE)
+options(digits=22)
+library("Matrix")
+# library("matrixStats")
+
+W = matrix(seq(28,29), 1, 2)
+J = matrix(0, 1, 8)
+Z = cbind(J, W, J)
+Y = matrix(0, 10, 18)
+X = rbind(Z, Y, Y, Y, Y, Y, Y, Y, Y)
+v = seq(1,81)
+v1 = seq(20, 37)
+W = matrix(seq(13,14), 1, 2)
+J = matrix(0, 1, 8)
+Z= cbind(J, W, J)
+Y = matrix(0, 10, 18)
+K = rbind(Z, Y, Y, Y, Y, Y, Y, Y, Y)
+
+S = (abs(X)*K)+rowSums(X*v)
+
+
+writeMM(as(S, "CsparseMatrix"), paste(args[2], "S", sep=""));
diff --git a/src/test/scripts/functions/codegen/rowAggPattern50.dml 
b/src/test/scripts/functions/codegen/rowAggPattern50.dml
new file mode 100644
index 0000000000..64efe460f9
--- /dev/null
+++ b/src/test/scripts/functions/codegen/rowAggPattern50.dml
@@ -0,0 +1,40 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+W = matrix(seq(28,29), 1, 2)
+J = matrix(0, 1, 8)
+Z= cbind(J, W, J)
+Y = matrix(0, 10, 18)
+X = rbind(Z, Y, Y, Y, Y, Y, Y, Y, Y)
+v = seq(1,81)
+v1 = seq(20, 37)
+W = matrix(seq(13,14), 1, 2)
+J = matrix(0, 1, 8)
+Z= cbind(J, W, J)
+Y = matrix(0, 10, 18)
+K = rbind(Z, Y, Y, Y, Y, Y, Y, Y, Y)
+while(FALSE) { }
+
+
+S = (abs(X)*K)+rowSums(X*v)
+
+
+write(S,$1)

(systemds) branch main updated: [SYSTEMDS-3860] Extended sparsity exploitation in codegen row templates

Reply via email to