Repository: incubator-systemml
Updated Branches:
  refs/heads/master 2b5b12557 -> 6f8cea9bc


[SYSTEMML-446] Add support for cublas daxpy operation

Closes #330.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/6f8cea9b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/6f8cea9b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/6f8cea9b

Branch: refs/heads/master
Commit: 6f8cea9bc2d42913d0cf1c917c1aada9aa55bee1
Parents: 2b5b125
Author: Niketan Pansare <[email protected]>
Authored: Fri Jan 6 10:28:54 2017 -0800
Committer: Niketan Pansare <[email protected]>
Committed: Fri Jan 6 10:28:54 2017 -0800

----------------------------------------------------------------------
 .../java/org/apache/sysml/hops/TernaryOp.java   |  11 +-
 .../java/org/apache/sysml/lops/PlusMult.java    |   4 +-
 .../instructions/GPUInstructionParser.java      |   9 +-
 .../gpu/MatrixMatrixAxpyGPUInstruction.java     | 112 +++++++++++++++++++
 .../runtime/matrix/data/LibMatrixCUDA.java      |  32 ++++++
 .../misc/RewriteFuseBinaryOpChainTest.java      |   8 +-
 6 files changed, 168 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6f8cea9b/src/main/java/org/apache/sysml/hops/TernaryOp.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/hops/TernaryOp.java 
b/src/main/java/org/apache/sysml/hops/TernaryOp.java
index f3b490d..b8be594 100644
--- a/src/main/java/org/apache/sysml/hops/TernaryOp.java
+++ b/src/main/java/org/apache/sysml/hops/TernaryOp.java
@@ -19,6 +19,7 @@
 
 package org.apache.sysml.hops;
 
+import org.apache.sysml.api.DMLScript;
 import org.apache.sysml.conf.ConfigurationManager;
 import org.apache.sysml.hops.rewrite.HopRewriteUtils;
 import org.apache.sysml.lops.Aggregate;
@@ -635,10 +636,14 @@ public class TernaryOp extends Hop
                if ( _op != OpOp3.PLUS_MULT && _op != OpOp3.MINUS_MULT )
                        throw new HopsException("Unexpected operation: " + _op 
+ ", expecting " + OpOp3.PLUS_MULT + " or" +  OpOp3.MINUS_MULT);
                
-               ExecType et = optFindExecType();
+               ExecType et = null;
+               if(DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR || 
getMemEstimate() < OptimizerUtils.GPU_MEMORY_BUDGET) )
+                       et = ExecType.GPU;
+               else
+                       et = optFindExecType();
                PlusMult plusmult = null;
                
-               if( et == ExecType.CP || et == ExecType.SPARK ) {
+               if( et == ExecType.CP || et == ExecType.SPARK || et == 
ExecType.GPU ) {
                        plusmult = new PlusMult(
                                        getInput().get(0).constructLops(),
                                        getInput().get(1).constructLops(),
@@ -1081,4 +1086,4 @@ public class TernaryOp extends Hop
                
                return ret;
        }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6f8cea9b/src/main/java/org/apache/sysml/lops/PlusMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/lops/PlusMult.java 
b/src/main/java/org/apache/sysml/lops/PlusMult.java
index 8ee8625..7052185 100644
--- a/src/main/java/org/apache/sysml/lops/PlusMult.java
+++ b/src/main/java/org/apache/sysml/lops/PlusMult.java
@@ -45,7 +45,7 @@ public class PlusMult extends Lop
                boolean aligner = false;
                boolean definesMRJob = false;
                
-               if ( et == ExecType.CP ||  et == ExecType.SPARK ){
+               if ( et == ExecType.CP ||  et == ExecType.SPARK || et == 
ExecType.GPU ){
                        lps.addCompatibility(JobType.INVALID);
                        lps.setProperties( inputs, et, 
ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob );
                }
@@ -135,4 +135,4 @@ public class PlusMult extends Lop
                
                return sb.toString();
        }
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6f8cea9b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java 
b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
index 76d900d..c1d884e 100644
--- 
a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java
@@ -26,6 +26,7 @@ import 
org.apache.sysml.runtime.instructions.gpu.ArithmeticBinaryGPUInstruction;
 import org.apache.sysml.runtime.instructions.gpu.BuiltinUnaryGPUInstruction;
 import org.apache.sysml.runtime.instructions.gpu.ConvolutionGPUInstruction;
 import org.apache.sysml.runtime.instructions.gpu.GPUInstruction;
+import 
org.apache.sysml.runtime.instructions.gpu.MatrixMatrixAxpyGPUInstruction;
 import 
org.apache.sysml.runtime.instructions.gpu.GPUInstruction.GPUINSTRUCTION_TYPE;
 import org.apache.sysml.runtime.instructions.gpu.MMTSJGPUInstruction;
 import org.apache.sysml.runtime.instructions.gpu.ReorgGPUInstruction;
@@ -121,10 +122,14 @@ public class GPUInstructionParser  extends 
InstructionParser
                                return 
ReorgGPUInstruction.parseInstruction(str);
                                
                        case ArithmeticBinary:
-                               return 
ArithmeticBinaryGPUInstruction.parseInstruction(str);
+                               String opcode = InstructionUtils.getOpCode(str);
+                               if( opcode.equals("+*") || opcode.equals("-*")  
)
+                                       return 
MatrixMatrixAxpyGPUInstruction.parseInstruction(str);
+                               else
+                                       return 
ArithmeticBinaryGPUInstruction.parseInstruction(str);
                                
                        default: 
                                throw new DMLRuntimeException("Invalid GPU 
Instruction Type: " + gputype );
                }
        }       
-}
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6f8cea9b/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixMatrixAxpyGPUInstruction.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixMatrixAxpyGPUInstruction.java
 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixMatrixAxpyGPUInstruction.java
new file mode 100644
index 0000000..cc8ff9f
--- /dev/null
+++ 
b/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixMatrixAxpyGPUInstruction.java
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysml.runtime.instructions.gpu;
+
+import org.apache.sysml.parser.Expression.DataType;
+import org.apache.sysml.runtime.DMLRuntimeException;
+import org.apache.sysml.runtime.controlprogram.caching.MatrixObject;
+import org.apache.sysml.runtime.controlprogram.context.ExecutionContext;
+import org.apache.sysml.runtime.instructions.InstructionUtils;
+import org.apache.sysml.runtime.instructions.cp.CPOperand;
+import org.apache.sysml.runtime.instructions.cp.ScalarObject;
+import org.apache.sysml.runtime.matrix.data.LibMatrixCUDA;
+import org.apache.sysml.runtime.matrix.operators.Operator;
+import org.apache.sysml.utils.Statistics;
+
+public class MatrixMatrixAxpyGPUInstruction extends 
ArithmeticBinaryGPUInstruction
+{
+       
+       CPOperand constant = null;
+       int multiplier = 1;
+       public MatrixMatrixAxpyGPUInstruction(Operator op, 
+                          CPOperand in1, 
+                          CPOperand constant, 
+                          int multiplier,
+                          CPOperand in2, 
+                          CPOperand out, 
+                          String opcode,
+                          String istr){
+               super(op, in1, in2, out, opcode, istr);
+               this.constant = constant;
+               this.multiplier = multiplier;
+       }
+       
+       public static MatrixMatrixAxpyGPUInstruction parseInstruction ( String 
str ) throws DMLRuntimeException {
+               String[] parts = 
InstructionUtils.getInstructionPartsWithValueType(str);
+               InstructionUtils.checkNumFields ( parts, 4 );
+               
+               String opcode = parts[0];
+               int multiplier = 1;
+               if(opcode.equals("-*"))
+                       multiplier = -1;
+               CPOperand in1 = new CPOperand(parts[1]);
+               CPOperand constant = new CPOperand(parts[2]);
+               if(constant.getDataType() != DataType.SCALAR)
+                       throw new DMLRuntimeException("Expected second operand 
to be a scalar");
+               CPOperand in2 = new CPOperand(parts[3]);
+               CPOperand out = new CPOperand(parts[4]);
+               
+               DataType dt1 = in1.getDataType();
+               DataType dt2 = in2.getDataType();
+               DataType dt3 = out.getDataType();
+        
+               Operator operator = (dt1 != dt2) ?
+                               
InstructionUtils.parseScalarBinaryOperator(opcode, (dt1 == DataType.SCALAR)) : 
+                               InstructionUtils.parseBinaryOperator(opcode);
+               
+               if(dt1 == DataType.MATRIX && dt2 == DataType.MATRIX && dt3 == 
DataType.MATRIX) {
+                       return new MatrixMatrixAxpyGPUInstruction(operator, 
in1, constant, multiplier, in2, out, opcode, str);  
+               }
+               else if( dt3 == DataType.MATRIX && ((dt1 == DataType.SCALAR && 
dt2 == DataType.MATRIX) || (dt1 == DataType.MATRIX && dt2 == DataType.SCALAR)) 
) {
+                       throw new DMLRuntimeException("Unsupported GPU 
PlusMult/MinusMult ArithmeticInstruction.");
+                       // return new 
ScalarMatrixArithmeticGPUInstruction(operator, in1, in2, out, opcode, str);
+               }
+               else
+                       throw new DMLRuntimeException("Unsupported GPU 
ArithmeticInstruction.");
+       }
+
+       
+       @Override
+       public void processInstruction(ExecutionContext ec) throws 
DMLRuntimeException {
+               Statistics.incrementNoOfExecutedGPUInst();
+               
+               MatrixObject in1 = 
ec.getMatrixInputForGPUInstruction(_input1.getName());
+               MatrixObject in2 = 
ec.getMatrixInputForGPUInstruction(_input2.getName());
+               ScalarObject scalar = ec.getScalarInput(constant.getName(), 
constant.getValueType(), constant.isLiteral());
+               
+               long rlen1 = in1.getNumRows();
+               long clen1 = in1.getNumColumns();
+               long rlen2 = in2.getNumRows();
+               long clen2 = in2.getNumColumns();
+               if (rlen1 != rlen2 || clen1 != clen2){
+                       // TODO: We donot support matrix-vector axpy operation 
+                       throw new DMLRuntimeException("The dimensions of inputs 
in GPU axpy operation should match:"+
+                                       rlen1 + " != " +  rlen2 + " || " +  
clen1 + " != " + clen2);
+               }
+
+               ec.setMetaData(_output.getName(), (int)rlen1, (int)clen1);
+               
+               LibMatrixCUDA.axpy(ec, in1, in2, _output.getName(), 
multiplier*scalar.getDoubleValue());
+               
+               ec.releaseMatrixInputForGPUInstruction(_input1.getName());
+               ec.releaseMatrixInputForGPUInstruction(_input2.getName());
+        ec.releaseMatrixOutputForGPUInstruction(_output.getName());
+       }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6f8cea9b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index ca3ccd3..7da2891 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -56,6 +56,7 @@ import static jcuda.runtime.JCuda.cudaMalloc;
 import static jcuda.runtime.JCuda.cudaMemcpy;
 import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost;
 import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice;
+import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToDevice;
 import static jcuda.jcudnn.cudnnActivationMode.CUDNN_ACTIVATION_RELU;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -1722,6 +1723,37 @@ public class LibMatrixCUDA {
                                
ExecutionConfig.getConfigForSimpleMatrixOperations(rlen, clen),
                                src, dest, rlen, clen);
        }
+       
+       /**
+        * Performs daxpy operation
+        * 
+        * @param ec
+        * @param in1
+        * @param in2
+        * @param outputName
+        * @param constant
+        * @throws DMLRuntimeException
+        */
+       public static void axpy(ExecutionContext ec, MatrixObject in1, 
MatrixObject in2,
+                       String outputName,  double constant) throws 
DMLRuntimeException {
+               if(isInSparseFormat(in1)) 
+                       ((JCudaObject)in1.getGPUObject()).sparseToDense();
+               if(isInSparseFormat(in2)) 
+                       ((JCudaObject)in2.getGPUObject()).sparseToDense();
+               Pointer A = 
((JCudaObject)in1.getGPUObject()).jcudaDenseMatrixPtr;
+               Pointer B = 
((JCudaObject)in2.getGPUObject()).jcudaDenseMatrixPtr;
+               MatrixObject out = ec.getMatrixObject(outputName);
+               ec.getDenseMatrixOutputForGPUInstruction(outputName);   // 
Allocated the dense output matrix
+           Pointer C = ((JCudaObject)out.getGPUObject()).jcudaDenseMatrixPtr;
+           Pointer alphaPtr = pointerTo(constant);
+           long n = (in1.getNumRows()*in1.getNumColumns());
+           // C <- A + alpha*B 
+           // becomes
+           // C <- A
+           // C <- alpha*B + C
+           cudaMemcpy(C, A, n*((long)jcuda.Sizeof.DOUBLE), 
cudaMemcpyDeviceToDevice);
+           JCublas2.cublasDaxpy(cublasHandle, (int) n, alphaPtr, B, 1, C, 1);
+       }
 
        /**
         * Performs elementwise operation specified by op of two input matrices 
in1 and in2

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6f8cea9b/src/test/java/org/apache/sysml/test/integration/functions/misc/RewriteFuseBinaryOpChainTest.java
----------------------------------------------------------------------
diff --git 
a/src/test/java/org/apache/sysml/test/integration/functions/misc/RewriteFuseBinaryOpChainTest.java
 
b/src/test/java/org/apache/sysml/test/integration/functions/misc/RewriteFuseBinaryOpChainTest.java
index ff85ebc..4c21587 100644
--- 
a/src/test/java/org/apache/sysml/test/integration/functions/misc/RewriteFuseBinaryOpChainTest.java
+++ 
b/src/test/java/org/apache/sysml/test/integration/functions/misc/RewriteFuseBinaryOpChainTest.java
@@ -153,6 +153,7 @@ public class RewriteFuseBinaryOpChainTest extends 
AutomatedTestBase
         * @param rewrites
         * @param instType
         */
+       @SuppressWarnings("unused")
        private void testFuseBinaryChain( String testname, boolean rewrites, 
ExecType instType )
        {       
                RUNTIME_PLATFORM platformOld = rtplatform;
@@ -191,7 +192,12 @@ public class RewriteFuseBinaryOpChainTest extends 
AutomatedTestBase
                        
                        //check for applies rewrites
                        if( rewrites && instType!=ExecType.MR  ) {
-                               String prefix = (instType==ExecType.SPARK) ? 
Instruction.SP_INST_PREFIX  : "";
+                               String prefix = "";
+                               if((instType == ExecType.SPARK || 
instType==ExecType.CP) && AutomatedTestBase.TEST_GPU)
+                                       prefix = Instruction.GPU_INST_PREFIX;
+                               else if(instType == ExecType.SPARK)
+                                       prefix = Instruction.SP_INST_PREFIX;
+                               
                                String opcode = 
(testname.equals(TEST_NAME1)||testname.equals(TEST_NAME3)) ? prefix+"+*" : 
prefix+"-*";
                                Assert.assertTrue("Rewrite not 
applied.",Statistics.getCPHeavyHitterOpCodes().contains(opcode));
                        }

Reply via email to