Repository: incubator-systemml Updated Branches: refs/heads/master 2b5b12557 -> 6f8cea9bc
[SYSTEMML-446] Add support for cublas daxpy operation Closes #330. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/6f8cea9b Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/6f8cea9b Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/6f8cea9b Branch: refs/heads/master Commit: 6f8cea9bc2d42913d0cf1c917c1aada9aa55bee1 Parents: 2b5b125 Author: Niketan Pansare <[email protected]> Authored: Fri Jan 6 10:28:54 2017 -0800 Committer: Niketan Pansare <[email protected]> Committed: Fri Jan 6 10:28:54 2017 -0800 ---------------------------------------------------------------------- .../java/org/apache/sysml/hops/TernaryOp.java | 11 +- .../java/org/apache/sysml/lops/PlusMult.java | 4 +- .../instructions/GPUInstructionParser.java | 9 +- .../gpu/MatrixMatrixAxpyGPUInstruction.java | 112 +++++++++++++++++++ .../runtime/matrix/data/LibMatrixCUDA.java | 32 ++++++ .../misc/RewriteFuseBinaryOpChainTest.java | 8 +- 6 files changed, 168 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6f8cea9b/src/main/java/org/apache/sysml/hops/TernaryOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/TernaryOp.java b/src/main/java/org/apache/sysml/hops/TernaryOp.java index f3b490d..b8be594 100644 --- a/src/main/java/org/apache/sysml/hops/TernaryOp.java +++ b/src/main/java/org/apache/sysml/hops/TernaryOp.java @@ -19,6 +19,7 @@ package org.apache.sysml.hops; +import org.apache.sysml.api.DMLScript; import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.hops.rewrite.HopRewriteUtils; import org.apache.sysml.lops.Aggregate; @@ -635,10 +636,14 @@ public class TernaryOp extends Hop if ( _op != OpOp3.PLUS_MULT && _op != OpOp3.MINUS_MULT ) throw new HopsException("Unexpected operation: " + _op + ", expecting " + OpOp3.PLUS_MULT + " or" + OpOp3.MINUS_MULT); - ExecType et = optFindExecType(); + ExecType et = null; + if(DMLScript.USE_ACCELERATOR && (DMLScript.FORCE_ACCELERATOR || getMemEstimate() < OptimizerUtils.GPU_MEMORY_BUDGET) ) + et = ExecType.GPU; + else + et = optFindExecType(); PlusMult plusmult = null; - if( et == ExecType.CP || et == ExecType.SPARK ) { + if( et == ExecType.CP || et == ExecType.SPARK || et == ExecType.GPU ) { plusmult = new PlusMult( getInput().get(0).constructLops(), getInput().get(1).constructLops(), @@ -1081,4 +1086,4 @@ public class TernaryOp extends Hop return ret; } -} +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6f8cea9b/src/main/java/org/apache/sysml/lops/PlusMult.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/lops/PlusMult.java b/src/main/java/org/apache/sysml/lops/PlusMult.java index 8ee8625..7052185 100644 --- a/src/main/java/org/apache/sysml/lops/PlusMult.java +++ b/src/main/java/org/apache/sysml/lops/PlusMult.java @@ -45,7 +45,7 @@ public class PlusMult extends Lop boolean aligner = false; boolean definesMRJob = false; - if ( et == ExecType.CP || et == ExecType.SPARK ){ + if ( et == ExecType.CP || et == ExecType.SPARK || et == ExecType.GPU ){ lps.addCompatibility(JobType.INVALID); lps.setProperties( inputs, et, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob ); } @@ -135,4 +135,4 @@ public class PlusMult extends Lop return sb.toString(); } -} +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6f8cea9b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java index 76d900d..c1d884e 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java @@ -26,6 +26,7 @@ import org.apache.sysml.runtime.instructions.gpu.ArithmeticBinaryGPUInstruction; import org.apache.sysml.runtime.instructions.gpu.BuiltinUnaryGPUInstruction; import org.apache.sysml.runtime.instructions.gpu.ConvolutionGPUInstruction; import org.apache.sysml.runtime.instructions.gpu.GPUInstruction; +import org.apache.sysml.runtime.instructions.gpu.MatrixMatrixAxpyGPUInstruction; import org.apache.sysml.runtime.instructions.gpu.GPUInstruction.GPUINSTRUCTION_TYPE; import org.apache.sysml.runtime.instructions.gpu.MMTSJGPUInstruction; import org.apache.sysml.runtime.instructions.gpu.ReorgGPUInstruction; @@ -121,10 +122,14 @@ public class GPUInstructionParser extends InstructionParser return ReorgGPUInstruction.parseInstruction(str); case ArithmeticBinary: - return ArithmeticBinaryGPUInstruction.parseInstruction(str); + String opcode = InstructionUtils.getOpCode(str); + if( opcode.equals("+*") || opcode.equals("-*") ) + return MatrixMatrixAxpyGPUInstruction.parseInstruction(str); + else + return ArithmeticBinaryGPUInstruction.parseInstruction(str); default: throw new DMLRuntimeException("Invalid GPU Instruction Type: " + gputype ); } } -} +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6f8cea9b/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixMatrixAxpyGPUInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixMatrixAxpyGPUInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixMatrixAxpyGPUInstruction.java new file mode 100644 index 0000000..cc8ff9f --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/instructions/gpu/MatrixMatrixAxpyGPUInstruction.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.instructions.gpu; + +import org.apache.sysml.parser.Expression.DataType; +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.controlprogram.caching.MatrixObject; +import org.apache.sysml.runtime.controlprogram.context.ExecutionContext; +import org.apache.sysml.runtime.instructions.InstructionUtils; +import org.apache.sysml.runtime.instructions.cp.CPOperand; +import org.apache.sysml.runtime.instructions.cp.ScalarObject; +import org.apache.sysml.runtime.matrix.data.LibMatrixCUDA; +import org.apache.sysml.runtime.matrix.operators.Operator; +import org.apache.sysml.utils.Statistics; + +public class MatrixMatrixAxpyGPUInstruction extends ArithmeticBinaryGPUInstruction +{ + + CPOperand constant = null; + int multiplier = 1; + public MatrixMatrixAxpyGPUInstruction(Operator op, + CPOperand in1, + CPOperand constant, + int multiplier, + CPOperand in2, + CPOperand out, + String opcode, + String istr){ + super(op, in1, in2, out, opcode, istr); + this.constant = constant; + this.multiplier = multiplier; + } + + public static MatrixMatrixAxpyGPUInstruction parseInstruction ( String str ) throws DMLRuntimeException { + String[] parts = InstructionUtils.getInstructionPartsWithValueType(str); + InstructionUtils.checkNumFields ( parts, 4 ); + + String opcode = parts[0]; + int multiplier = 1; + if(opcode.equals("-*")) + multiplier = -1; + CPOperand in1 = new CPOperand(parts[1]); + CPOperand constant = new CPOperand(parts[2]); + if(constant.getDataType() != DataType.SCALAR) + throw new DMLRuntimeException("Expected second operand to be a scalar"); + CPOperand in2 = new CPOperand(parts[3]); + CPOperand out = new CPOperand(parts[4]); + + DataType dt1 = in1.getDataType(); + DataType dt2 = in2.getDataType(); + DataType dt3 = out.getDataType(); + + Operator operator = (dt1 != dt2) ? + InstructionUtils.parseScalarBinaryOperator(opcode, (dt1 == DataType.SCALAR)) : + InstructionUtils.parseBinaryOperator(opcode); + + if(dt1 == DataType.MATRIX && dt2 == DataType.MATRIX && dt3 == DataType.MATRIX) { + return new MatrixMatrixAxpyGPUInstruction(operator, in1, constant, multiplier, in2, out, opcode, str); + } + else if( dt3 == DataType.MATRIX && ((dt1 == DataType.SCALAR && dt2 == DataType.MATRIX) || (dt1 == DataType.MATRIX && dt2 == DataType.SCALAR)) ) { + throw new DMLRuntimeException("Unsupported GPU PlusMult/MinusMult ArithmeticInstruction."); + // return new ScalarMatrixArithmeticGPUInstruction(operator, in1, in2, out, opcode, str); + } + else + throw new DMLRuntimeException("Unsupported GPU ArithmeticInstruction."); + } + + + @Override + public void processInstruction(ExecutionContext ec) throws DMLRuntimeException { + Statistics.incrementNoOfExecutedGPUInst(); + + MatrixObject in1 = ec.getMatrixInputForGPUInstruction(_input1.getName()); + MatrixObject in2 = ec.getMatrixInputForGPUInstruction(_input2.getName()); + ScalarObject scalar = ec.getScalarInput(constant.getName(), constant.getValueType(), constant.isLiteral()); + + long rlen1 = in1.getNumRows(); + long clen1 = in1.getNumColumns(); + long rlen2 = in2.getNumRows(); + long clen2 = in2.getNumColumns(); + if (rlen1 != rlen2 || clen1 != clen2){ + // TODO: We donot support matrix-vector axpy operation + throw new DMLRuntimeException("The dimensions of inputs in GPU axpy operation should match:"+ + rlen1 + " != " + rlen2 + " || " + clen1 + " != " + clen2); + } + + ec.setMetaData(_output.getName(), (int)rlen1, (int)clen1); + + LibMatrixCUDA.axpy(ec, in1, in2, _output.getName(), multiplier*scalar.getDoubleValue()); + + ec.releaseMatrixInputForGPUInstruction(_input1.getName()); + ec.releaseMatrixInputForGPUInstruction(_input2.getName()); + ec.releaseMatrixOutputForGPUInstruction(_output.getName()); + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6f8cea9b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java index ca3ccd3..7da2891 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java @@ -56,6 +56,7 @@ import static jcuda.runtime.JCuda.cudaMalloc; import static jcuda.runtime.JCuda.cudaMemcpy; import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToHost; import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyHostToDevice; +import static jcuda.runtime.cudaMemcpyKind.cudaMemcpyDeviceToDevice; import static jcuda.jcudnn.cudnnActivationMode.CUDNN_ACTIVATION_RELU; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -1722,6 +1723,37 @@ public class LibMatrixCUDA { ExecutionConfig.getConfigForSimpleMatrixOperations(rlen, clen), src, dest, rlen, clen); } + + /** + * Performs daxpy operation + * + * @param ec + * @param in1 + * @param in2 + * @param outputName + * @param constant + * @throws DMLRuntimeException + */ + public static void axpy(ExecutionContext ec, MatrixObject in1, MatrixObject in2, + String outputName, double constant) throws DMLRuntimeException { + if(isInSparseFormat(in1)) + ((JCudaObject)in1.getGPUObject()).sparseToDense(); + if(isInSparseFormat(in2)) + ((JCudaObject)in2.getGPUObject()).sparseToDense(); + Pointer A = ((JCudaObject)in1.getGPUObject()).jcudaDenseMatrixPtr; + Pointer B = ((JCudaObject)in2.getGPUObject()).jcudaDenseMatrixPtr; + MatrixObject out = ec.getMatrixObject(outputName); + ec.getDenseMatrixOutputForGPUInstruction(outputName); // Allocated the dense output matrix + Pointer C = ((JCudaObject)out.getGPUObject()).jcudaDenseMatrixPtr; + Pointer alphaPtr = pointerTo(constant); + long n = (in1.getNumRows()*in1.getNumColumns()); + // C <- A + alpha*B + // becomes + // C <- A + // C <- alpha*B + C + cudaMemcpy(C, A, n*((long)jcuda.Sizeof.DOUBLE), cudaMemcpyDeviceToDevice); + JCublas2.cublasDaxpy(cublasHandle, (int) n, alphaPtr, B, 1, C, 1); + } /** * Performs elementwise operation specified by op of two input matrices in1 and in2 http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6f8cea9b/src/test/java/org/apache/sysml/test/integration/functions/misc/RewriteFuseBinaryOpChainTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/misc/RewriteFuseBinaryOpChainTest.java b/src/test/java/org/apache/sysml/test/integration/functions/misc/RewriteFuseBinaryOpChainTest.java index ff85ebc..4c21587 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/misc/RewriteFuseBinaryOpChainTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/misc/RewriteFuseBinaryOpChainTest.java @@ -153,6 +153,7 @@ public class RewriteFuseBinaryOpChainTest extends AutomatedTestBase * @param rewrites * @param instType */ + @SuppressWarnings("unused") private void testFuseBinaryChain( String testname, boolean rewrites, ExecType instType ) { RUNTIME_PLATFORM platformOld = rtplatform; @@ -191,7 +192,12 @@ public class RewriteFuseBinaryOpChainTest extends AutomatedTestBase //check for applies rewrites if( rewrites && instType!=ExecType.MR ) { - String prefix = (instType==ExecType.SPARK) ? Instruction.SP_INST_PREFIX : ""; + String prefix = ""; + if((instType == ExecType.SPARK || instType==ExecType.CP) && AutomatedTestBase.TEST_GPU) + prefix = Instruction.GPU_INST_PREFIX; + else if(instType == ExecType.SPARK) + prefix = Instruction.SP_INST_PREFIX; + String opcode = (testname.equals(TEST_NAME1)||testname.equals(TEST_NAME3)) ? prefix+"+*" : prefix+"-*"; Assert.assertTrue("Rewrite not applied.",Statistics.getCPHeavyHitterOpCodes().contains(opcode)); }
