This is an automated email from the ASF dual-hosted git repository.
arnabp20 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new b1bdeae [SYSTEMDS-2913] Refactor GPUInstruction to support reuse
better
b1bdeae is described below
commit b1bdeaed3e7c4124e15e297e4cfd37176c0071f9
Author: arnabp <[email protected]>
AuthorDate: Fri Apr 2 13:38:10 2021 +0200
[SYSTEMDS-2913] Refactor GPUInstruction to support reuse better
This patch refactors the subclasses of GPUInstruction by moving
the input and output operands to the parent class.
This patch also extends the infrastructure of reuse of gpu
intermediates to support all gpu instructions.
---
.../gpu/AggregateBinaryGPUInstruction.java | 19 +----
.../gpu/AggregateUnaryGPUInstruction.java | 99 +++++++++++-----------
.../gpu/ArithmeticBinaryGPUInstruction.java | 21 +----
.../gpu/BuiltinBinaryGPUInstruction.java | 21 +----
.../gpu/BuiltinUnaryGPUInstruction.java | 6 +-
.../runtime/instructions/gpu/GPUInstruction.java | 30 ++++++-
.../instructions/gpu/MMTSJGPUInstruction.java | 10 +--
.../gpu/MatrixAppendGPUInstruction.java | 21 ++---
.../gpu/MatrixBuiltinGPUInstruction.java | 4 +-
.../gpu/MatrixIndexingGPUInstruction.java | 29 ++++---
.../gpu/MatrixMatrixBuiltinGPUInstruction.java | 14 +--
.../gpu/MatrixReshapeGPUInstruction.java | 19 +++--
.../gpu/RelationalBinaryGPUInstruction.java | 9 +-
.../instructions/gpu/ReorgGPUInstruction.java | 22 +----
.../gpu/ScalarMatrixBuiltinGPUInstruction.java | 58 +++++++------
.../instructions/gpu/context/GPUContext.java | 9 ++
.../instructions/gpu/context/GPUObject.java | 8 +-
.../apache/sysds/runtime/lineage/LineageCache.java | 49 +++++++----
.../sysds/runtime/lineage/LineageCacheConfig.java | 10 ++-
.../test/functions/lineage/GPUFullReuseTest.java | 11 ++-
.../scripts/functions/lineage/FullReuseGPU1.dml | 4 +-
21 files changed, 234 insertions(+), 239 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/AggregateBinaryGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/AggregateBinaryGPUInstruction.java
index 6445135..e737d52 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/AggregateBinaryGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/AggregateBinaryGPUInstruction.java
@@ -18,7 +18,6 @@
*/
package org.apache.sysds.runtime.instructions.gpu;
-import org.apache.commons.lang3.tuple.Pair;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
@@ -27,9 +26,6 @@ import org.apache.sysds.runtime.functionobjects.Plus;
import org.apache.sysds.runtime.functionobjects.SwapIndex;
import org.apache.sysds.runtime.instructions.InstructionUtils;
import org.apache.sysds.runtime.instructions.cp.CPOperand;
-import org.apache.sysds.runtime.lineage.LineageItem;
-import org.apache.sysds.runtime.lineage.LineageItemUtils;
-import org.apache.sysds.runtime.lineage.LineageTraceable;
import org.apache.sysds.runtime.matrix.data.LibMatrixCUDA;
import org.apache.sysds.runtime.matrix.data.LibMatrixCuMatMult;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
@@ -38,20 +34,14 @@ import org.apache.sysds.runtime.matrix.operators.Operator;
import org.apache.sysds.runtime.matrix.operators.ReorgOperator;
import org.apache.sysds.utils.GPUStatistics;
-public class AggregateBinaryGPUInstruction extends GPUInstruction implements
LineageTraceable {
- private CPOperand _input1 = null;
- private CPOperand _input2 = null;
- public CPOperand _output = null;
+public class AggregateBinaryGPUInstruction extends GPUInstruction {
private boolean _isLeftTransposed;
private boolean _isRightTransposed;
private AggregateBinaryGPUInstruction(Operator op, CPOperand in1,
CPOperand in2, CPOperand out, String opcode,
String istr, boolean leftTranspose, boolean
rightTranspose) {
- super(op, opcode, istr);
+ super(op, in1, in2, out, opcode, istr);
_gputype = GPUINSTRUCTION_TYPE.AggregateBinary;
- _input1 = in1;
- _input2 = in2;
- _output = out;
_isLeftTransposed = leftTranspose;
_isRightTransposed = rightTranspose;
}
@@ -102,9 +92,4 @@ public class AggregateBinaryGPUInstruction extends
GPUInstruction implements Lin
return LibMatrixCUDA.isInSparseFormat(ec.getGPUContext(0), mo);
}
- @Override
- public Pair<String, LineageItem> getLineageItem(ExecutionContext ec) {
- return Pair.of(_output.getName(), new LineageItem(getOpcode(),
- LineageItemUtils.getLineage(ec, _input1, _input2)));
- }
}
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/AggregateUnaryGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/AggregateUnaryGPUInstruction.java
index 905a94a..12f76b0 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/AggregateUnaryGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/AggregateUnaryGPUInstruction.java
@@ -36,67 +36,68 @@ import org.apache.sysds.utils.GPUStatistics;
* Implements aggregate unary instructions for CUDA
*/
public class AggregateUnaryGPUInstruction extends GPUInstruction {
- private CPOperand _input1 = null;
- private CPOperand _output = null;
private AggregateUnaryGPUInstruction(Operator op, CPOperand in1,
CPOperand out, String opcode, String istr) {
- super(op, opcode, istr);
+ super(op, in1, null, out, opcode, istr);
_gputype = GPUINSTRUCTION_TYPE.AggregateUnary;
- _input1 = in1;
- _output = out;
}
- public static AggregateUnaryGPUInstruction parseInstruction(String str ) {
- String[] parts = InstructionUtils.getInstructionPartsWithValueType(str);
- String opcode = parts[0];
- CPOperand in1 = new CPOperand(parts[1]);
- CPOperand out = new CPOperand(parts[2]);
+ public static AggregateUnaryGPUInstruction parseInstruction(String str)
{
+ String[] parts =
InstructionUtils.getInstructionPartsWithValueType(str);
+ String opcode = parts[0];
+ CPOperand in1 = new CPOperand(parts[1]);
+ CPOperand out = new CPOperand(parts[2]);
- // This follows logic similar to AggregateUnaryCPInstruction.
- // nrow, ncol & length should either read or refresh metadata
- Operator aggop = null;
- if(opcode.equalsIgnoreCase("nrow") || opcode.equalsIgnoreCase("ncol") ||
opcode.equalsIgnoreCase("length")) {
- throw new DMLRuntimeException("nrow, ncol & length should not be
compiled as GPU instructions!");
- } else {
- aggop = InstructionUtils.parseBasicAggregateUnaryOperator(opcode);
- }
- return new AggregateUnaryGPUInstruction(aggop, in1, out, opcode, str);
- }
+ // This follows logic similar to AggregateUnaryCPInstruction.
+ // nrow, ncol & length should either read or refresh metadata
+ Operator aggop = null;
+ if (opcode.equalsIgnoreCase("nrow") ||
opcode.equalsIgnoreCase("ncol") || opcode.equalsIgnoreCase("length")) {
+ throw new DMLRuntimeException("nrow, ncol & length
should not be compiled as GPU instructions!");
+ }
+ else {
+ aggop =
InstructionUtils.parseBasicAggregateUnaryOperator(opcode);
+ }
+ return new AggregateUnaryGPUInstruction(aggop, in1, out,
opcode, str);
+ }
- @Override
- public void processInstruction(ExecutionContext ec) {
- GPUStatistics.incrementNoOfExecutedGPUInst();
+ @Override
+ public void processInstruction(ExecutionContext ec) {
+ GPUStatistics.incrementNoOfExecutedGPUInst();
- String opcode = getOpcode();
+ String opcode = getOpcode();
- // nrow, ncol & length should either read or refresh metadata
- if(opcode.equalsIgnoreCase("nrow") || opcode.equalsIgnoreCase("ncol") ||
opcode.equalsIgnoreCase("length")) {
- throw new DMLRuntimeException("nrow, ncol & length should not be
compiled as GPU instructions!");
- }
+ // nrow, ncol & length should either read or refresh metadata
+ if (opcode.equalsIgnoreCase("nrow") ||
opcode.equalsIgnoreCase("ncol") || opcode.equalsIgnoreCase("length")) {
+ throw new DMLRuntimeException("nrow, ncol & length
should not be compiled as GPU instructions!");
+ }
- //get inputs
- MatrixObject in1 = getMatrixInputForGPUInstruction(ec, _input1.getName());
+ // get inputs
+ MatrixObject in1 = getMatrixInputForGPUInstruction(ec,
_input1.getName());
- int rlen = (int)in1.getNumRows();
- int clen = (int)in1.getNumColumns();
+ int rlen = (int) in1.getNumRows();
+ int clen = (int) in1.getNumColumns();
- IndexFunction indexFunction = ((AggregateUnaryOperator) _optr).indexFn;
- if (indexFunction instanceof ReduceRow){ // COL{SUM, MAX...}
- ec.setMetaData(_output.getName(), 1, clen);
- } else if (indexFunction instanceof ReduceCol) { // ROW{SUM, MAX,...}
- ec.setMetaData(_output.getName(), rlen, 1);
- }
+ IndexFunction indexFunction = ((AggregateUnaryOperator)
_optr).indexFn;
+ if (indexFunction instanceof ReduceRow) { // COL{SUM, MAX...}
+ ec.setMetaData(_output.getName(), 1, clen);
+ }
+ else if (indexFunction instanceof ReduceCol) { // ROW{SUM,
MAX,...}
+ ec.setMetaData(_output.getName(), rlen, 1);
+ }
- LibMatrixCUDA.unaryAggregate(ec, ec.getGPUContext(0), getExtendedOpcode(),
in1, _output.getName(), (AggregateUnaryOperator)_optr);
+ LibMatrixCUDA.unaryAggregate(ec, ec.getGPUContext(0),
getExtendedOpcode(),
+ in1, _output.getName(),
(AggregateUnaryOperator) _optr);
- //release inputs/outputs
- ec.releaseMatrixInputForGPUInstruction(_input1.getName());
+ // release inputs/outputs
+ ec.releaseMatrixInputForGPUInstruction(_input1.getName());
- // If the unary aggregate is a row reduction or a column reduction, it
results in a vector
- // which needs to be released. Otherwise a scala is produced and it is
copied back to the host
- // and set in the execution context by invoking the setScalarOutput
- if (indexFunction instanceof ReduceRow || indexFunction instanceof
ReduceCol) {
- ec.releaseMatrixOutputForGPUInstruction(_output.getName());
- }
- }
-}
+ // If the unary aggregate is a row reduction or a column
reduction, it results
+ // in a vector
+ // which needs to be released. Otherwise a scala is produced
and it is copied
+ // back to the host
+ // and set in the execution context by invoking the
setScalarOutput
+ if (indexFunction instanceof ReduceRow || indexFunction
instanceof ReduceCol) {
+
ec.releaseMatrixOutputForGPUInstruction(_output.getName());
+ }
+ }
+}
\ No newline at end of file
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/ArithmeticBinaryGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/ArithmeticBinaryGPUInstruction.java
index f451910..5c7f6b9 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/ArithmeticBinaryGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/ArithmeticBinaryGPUInstruction.java
@@ -19,29 +19,18 @@
package org.apache.sysds.runtime.instructions.gpu;
-import org.apache.commons.lang3.tuple.Pair;
import org.apache.sysds.common.Types.DataType;
import org.apache.sysds.runtime.DMLRuntimeException;
-import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysds.runtime.instructions.InstructionUtils;
import org.apache.sysds.runtime.instructions.cp.CPOperand;
-import org.apache.sysds.runtime.lineage.LineageItem;
-import org.apache.sysds.runtime.lineage.LineageItemUtils;
-import org.apache.sysds.runtime.lineage.LineageTraceable;
import org.apache.sysds.runtime.matrix.operators.Operator;
-public abstract class ArithmeticBinaryGPUInstruction extends GPUInstruction
implements LineageTraceable {
- protected CPOperand _input1;
- protected CPOperand _input2;
- protected CPOperand _output;
+public abstract class ArithmeticBinaryGPUInstruction extends GPUInstruction {
protected ArithmeticBinaryGPUInstruction(Operator op, CPOperand in1,
CPOperand in2, CPOperand out, String opcode,
String istr) {
- super(op, opcode, istr);
+ super(op, in1, in2, out, opcode, istr);
_gputype = GPUINSTRUCTION_TYPE.ArithmeticBinary;
- _input1 = in1;
- _input2 = in2;
- _output = out;
}
public static ArithmeticBinaryGPUInstruction parseInstruction ( String
str ) {
@@ -70,10 +59,4 @@ public abstract class ArithmeticBinaryGPUInstruction extends
GPUInstruction impl
else
throw new DMLRuntimeException("Unsupported GPU
ArithmeticInstruction.");
}
-
- @Override
- public Pair<String, LineageItem> getLineageItem(ExecutionContext ec) {
- return Pair.of(_output.getName(), new LineageItem(getOpcode(),
- LineageItemUtils.getLineage(ec, _input1, _input2)));
- }
}
\ No newline at end of file
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java
index 82d3222..de604ba 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/BuiltinBinaryGPUInstruction.java
@@ -19,35 +19,24 @@
package org.apache.sysds.runtime.instructions.gpu;
-import org.apache.commons.lang3.tuple.Pair;
import org.apache.sysds.common.Types.DataType;
import org.apache.sysds.common.Types.ValueType;
import org.apache.sysds.runtime.DMLRuntimeException;
-import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysds.runtime.functionobjects.Builtin;
import org.apache.sysds.runtime.functionobjects.ValueFunction;
import org.apache.sysds.runtime.instructions.InstructionUtils;
import org.apache.sysds.runtime.instructions.cp.CPOperand;
-import org.apache.sysds.runtime.lineage.LineageItem;
-import org.apache.sysds.runtime.lineage.LineageItemUtils;
-import org.apache.sysds.runtime.lineage.LineageTraceable;
import org.apache.sysds.runtime.matrix.operators.BinaryOperator;
import org.apache.sysds.runtime.matrix.operators.Operator;
-public abstract class BuiltinBinaryGPUInstruction extends GPUInstruction
implements LineageTraceable {
+public abstract class BuiltinBinaryGPUInstruction extends GPUInstruction {
@SuppressWarnings("unused")
private int _arity;
- CPOperand output;
- CPOperand input1, input2;
-
protected BuiltinBinaryGPUInstruction(Operator op, CPOperand input1,
CPOperand input2, CPOperand output,
String opcode, String istr, int _arity) {
- super(op, opcode, istr);
+ super(op, input1, input2, output, opcode, istr);
this._arity = _arity;
- this.output = output;
- this.input1 = input1;
- this.input2 = input2;
}
public static BuiltinBinaryGPUInstruction parseInstruction(String str) {
@@ -88,10 +77,4 @@ public abstract class BuiltinBinaryGPUInstruction extends
GPUInstruction impleme
"GPU : Unsupported GPU builtin operations on a
matrix and a scalar:" + opcode);
}
- @Override
- public Pair<String, LineageItem> getLineageItem(ExecutionContext ec) {
- return Pair.of(output.getName(), new LineageItem(getOpcode(),
- LineageItemUtils.getLineage(ec, input1, input2)));
- }
-
}
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/BuiltinUnaryGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/BuiltinUnaryGPUInstruction.java
index 8ad6d98..0812c81 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/BuiltinUnaryGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/BuiltinUnaryGPUInstruction.java
@@ -29,16 +29,12 @@ import org.apache.sysds.runtime.matrix.operators.Operator;
public abstract class BuiltinUnaryGPUInstruction extends GPUInstruction {
int _arity;
- CPOperand _input;
- CPOperand _output;
protected BuiltinUnaryGPUInstruction(Operator op, CPOperand in,
CPOperand out, int _arity, String opcode,
String istr) {
- super(op, opcode, istr);
+ super(op, in, null, out, opcode, istr);
_gputype = GPUINSTRUCTION_TYPE.BuiltinUnary;
this._arity = _arity;
- _input = in;
- _output = out;
}
public int getArity() {
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/GPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/GPUInstruction.java
index 615e194..4c51c70 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/GPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/GPUInstruction.java
@@ -19,6 +19,7 @@
package org.apache.sysds.runtime.instructions.gpu;
+import org.apache.commons.lang3.tuple.Pair;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysds.api.DMLScript;
@@ -27,12 +28,18 @@ import
org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysds.runtime.instructions.GPUInstructionParser;
import org.apache.sysds.runtime.instructions.Instruction;
import org.apache.sysds.runtime.instructions.cp.CPInstruction;
+import org.apache.sysds.runtime.instructions.cp.CPOperand;
import org.apache.sysds.runtime.instructions.gpu.context.GPUContext;
+import org.apache.sysds.runtime.lineage.LineageItem;
+import org.apache.sysds.runtime.lineage.LineageItemUtils;
+import org.apache.sysds.runtime.lineage.LineageTraceable;
import org.apache.sysds.runtime.matrix.operators.Operator;
import org.apache.sysds.utils.Statistics;
-public abstract class GPUInstruction extends Instruction {
+public abstract class GPUInstruction extends Instruction implements
LineageTraceable {
private static final Log LOG =
LogFactory.getLog(GPUInstruction.class.getName());
+ public final CPOperand _output;
+ public final CPOperand _input1, _input2;
public enum GPUINSTRUCTION_TYPE {
AggregateUnary,
@@ -152,14 +159,27 @@ public abstract class GPUInstruction extends Instruction {
protected boolean _requiresLabelUpdate = false;
- protected GPUInstruction(Operator op, String opcode, String istr) {
+ protected GPUInstruction(Operator op, CPOperand in1, CPOperand in2,
CPOperand out, String opcode, String istr) {
super(op);
+ _input1 = in1;
+ _input2 = in2;
+ _output = out;
instString = istr;
// prepare opcode and update requirement for repeated usage
instOpcode = opcode;
_requiresLabelUpdate = super.requiresLabelUpdate();
}
+
+ protected GPUInstruction(Operator op, String opcode, String istr) {
+ super(op);
+ _input1 = null;
+ _input2 = null;
+ _output = null;
+ instString = istr;
+ instOpcode = opcode;
+ _requiresLabelUpdate = super.requiresLabelUpdate();
+ }
@Override
public IType getType() {
@@ -231,4 +251,10 @@ public abstract class GPUInstruction extends Instruction {
protected MatrixObject
getDenseMatrixOutputForGPUInstruction(ExecutionContext ec, String name, long
numRows, long numCols) {
return ec.getDenseMatrixOutputForGPUInstruction(name, numRows,
numCols).getKey();
}
+
+ @Override
+ public Pair<String, LineageItem> getLineageItem(ExecutionContext ec) {
+ return Pair.of(_output.getName(), new LineageItem(getOpcode(),
+ LineageItemUtils.getLineage(ec, _input1, _input2)));
+ }
}
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/MMTSJGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/MMTSJGPUInstruction.java
index 0a435d7..29e50e0 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/MMTSJGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/MMTSJGPUInstruction.java
@@ -31,8 +31,6 @@ import org.apache.sysds.utils.GPUStatistics;
public class MMTSJGPUInstruction extends GPUInstruction {
private MMTSJType _type = null;
- CPOperand _input;
- CPOperand _output;
/**
* MMTSJGPUInstruction constructor.
@@ -51,11 +49,9 @@ public class MMTSJGPUInstruction extends GPUInstruction {
* ?
*/
private MMTSJGPUInstruction(Operator op, CPOperand in1, MMTSJType type,
CPOperand out, String opcode, String istr) {
- super(op, opcode, istr);
+ super(op, in1, null, out, opcode, istr);
_gputype = GPUINSTRUCTION_TYPE.MMTSJ;
_type = type;
- _input = in1;
- _output = out;
}
public static MMTSJGPUInstruction parseInstruction ( String str )
@@ -77,14 +73,14 @@ public class MMTSJGPUInstruction extends GPUInstruction {
@Override
public void processInstruction(ExecutionContext ec) {
GPUStatistics.incrementNoOfExecutedGPUInst();
- MatrixObject mat = getMatrixInputForGPUInstruction(ec,
_input.getName());
+ MatrixObject mat = getMatrixInputForGPUInstruction(ec,
_input1.getName());
boolean isLeftTransposed = ( _type == MMTSJType.LEFT);
int rlen = (int) (isLeftTransposed? mat.getNumColumns() :
mat.getNumRows());
int clen = rlen;
//execute operations
ec.setMetaData(_output.getName(), rlen, clen);
LibMatrixCUDA.matmultTSMM(ec, ec.getGPUContext(0),
getExtendedOpcode(), mat, _output.getName(), isLeftTransposed);
- ec.releaseMatrixInputForGPUInstruction(_input.getName());
+ ec.releaseMatrixInputForGPUInstruction(_input1.getName());
ec.releaseMatrixOutputForGPUInstruction(_output.getName());
}
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixAppendGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixAppendGPUInstruction.java
index a4084a4..e539e69 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixAppendGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixAppendGPUInstruction.java
@@ -37,16 +37,11 @@ import org.apache.sysds.utils.GPUStatistics;
*/
public class MatrixAppendGPUInstruction extends GPUInstruction {
- CPOperand output;
- CPOperand input1, input2;
AppendCPInstruction.AppendType atype;
private MatrixAppendGPUInstruction(Operator op, CPOperand in1,
CPOperand in2, CPOperand out,
AppendCPInstruction.AppendType type, String opcode,
String istr) {
- super(op, opcode, istr);
- this.output = out;
- this.input1 = in1;
- this.input2 = in2;
+ super(op, in1, in2, out, opcode, istr);
this.atype = type;
}
@@ -75,16 +70,16 @@ public class MatrixAppendGPUInstruction extends
GPUInstruction {
public void processInstruction(ExecutionContext ec) {
GPUStatistics.incrementNoOfExecutedGPUInst();
String opcode = getOpcode();
- MatrixObject mat1 = getMatrixInputForGPUInstruction(ec,
input1.getName());
- MatrixObject mat2 = getMatrixInputForGPUInstruction(ec,
input2.getName());
+ MatrixObject mat1 = getMatrixInputForGPUInstruction(ec,
_input1.getName());
+ MatrixObject mat2 = getMatrixInputForGPUInstruction(ec,
_input2.getName());
if(atype == AppendCPInstruction.AppendType.CBIND)
- LibMatrixCUDA.cbind(ec, ec.getGPUContext(0),
getExtendedOpcode(), mat1, mat2, output.getName());
+ LibMatrixCUDA.cbind(ec, ec.getGPUContext(0),
getExtendedOpcode(), mat1, mat2, _output.getName());
else if (atype == AppendCPInstruction.AppendType.RBIND )
- LibMatrixCUDA.rbind(ec, ec.getGPUContext(0),
getExtendedOpcode(), mat1, mat2, output.getName());
+ LibMatrixCUDA.rbind(ec, ec.getGPUContext(0),
getExtendedOpcode(), mat1, mat2, _output.getName());
else
throw new DMLRuntimeException("Unsupported GPU
operator:" + opcode);
- ec.releaseMatrixInputForGPUInstruction(input1.getName());
- ec.releaseMatrixInputForGPUInstruction(input2.getName());
- ec.releaseMatrixOutputForGPUInstruction(output.getName());
+ ec.releaseMatrixInputForGPUInstruction(_input1.getName());
+ ec.releaseMatrixInputForGPUInstruction(_input2.getName());
+ ec.releaseMatrixOutputForGPUInstruction(_output.getName());
}
}
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java
index 9a3ae12..c6ce962 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixBuiltinGPUInstruction.java
@@ -44,7 +44,7 @@ public class MatrixBuiltinGPUInstruction extends
BuiltinUnaryGPUInstruction {
GPUStatistics.incrementNoOfExecutedGPUInst();
String opcode = getOpcode();
- MatrixObject mat = getMatrixInputForGPUInstruction(ec,
_input.getName());
+ MatrixObject mat = getMatrixInputForGPUInstruction(ec,
_input1.getName());
if(opcode != "ucumk+*")
ec.setMetaData(_output.getName(), mat.getNumRows(),
mat.getNumColumns());
@@ -119,7 +119,7 @@ public class MatrixBuiltinGPUInstruction extends
BuiltinUnaryGPUInstruction {
LOG.trace("processInstruction() " + getExtendedOpcode()
+ " executed in " + duration + "ms.");
}
- ec.releaseMatrixInputForGPUInstruction(_input.getName());
+ ec.releaseMatrixInputForGPUInstruction(_input1.getName());
ec.releaseMatrixOutputForGPUInstruction(_output.getName());
}
}
\ No newline at end of file
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixIndexingGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixIndexingGPUInstruction.java
index 8eb4567..eab4afa 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixIndexingGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixIndexingGPUInstruction.java
@@ -20,12 +20,15 @@ package org.apache.sysds.runtime.instructions.gpu;
import org.apache.sysds.lops.LeftIndex;
import org.apache.sysds.lops.RightIndex;
+import org.apache.commons.lang3.tuple.Pair;
import org.apache.sysds.common.Types.DataType;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysds.runtime.instructions.InstructionUtils;
import org.apache.sysds.runtime.instructions.cp.CPOperand;
+import org.apache.sysds.runtime.lineage.LineageItem;
+import org.apache.sysds.runtime.lineage.LineageItemUtils;
import org.apache.sysds.runtime.matrix.data.LibMatrixCUDA;
import org.apache.sysds.runtime.matrix.operators.Operator;
import org.apache.sysds.runtime.matrix.operators.SimpleOperator;
@@ -34,33 +37,25 @@ import org.apache.sysds.utils.GPUStatistics;
public class MatrixIndexingGPUInstruction extends GPUInstruction {
CPOperand rowLower, rowUpper, colLower, colUpper;
- CPOperand input1;
- CPOperand input2;
- CPOperand output;
private MatrixIndexingGPUInstruction(CPOperand in, CPOperand rl,
CPOperand ru, CPOperand cl,
CPOperand cu, CPOperand out, String opcode, String
istr) {
- super(null, opcode, istr);
+ super(null, in, null, out, opcode, istr);
_gputype = GPUINSTRUCTION_TYPE.MatrixIndexing;
rowLower = rl;
rowUpper = ru;
colLower = cl;
colUpper = cu;
- input1 = in;
- output = out;
}
private MatrixIndexingGPUInstruction(Operator op, CPOperand lhsInput,
CPOperand rhsInput, CPOperand rl,
CPOperand ru, CPOperand cl, CPOperand cu, CPOperand
out, String opcode, String istr) {
- super(op, opcode, istr);
+ super(op, lhsInput, rhsInput, out, opcode, istr);
_gputype = GPUINSTRUCTION_TYPE.MatrixIndexing;
rowLower = rl;
rowUpper = ru;
colLower = cl;
colUpper = cu;
- input1 = lhsInput;
- input2 = rhsInput;
- output = out;
}
public static MatrixIndexingGPUInstruction parseInstruction ( String
str ) {
@@ -123,10 +118,10 @@ public class MatrixIndexingGPUInstruction extends
GPUInstruction {
IndexRange ixrange = getIndexRange(ec);
if ( opcode.equalsIgnoreCase(RightIndex.OPCODE) ) {
- MatrixObject mat1 = getMatrixInputForGPUInstruction(ec,
input1.getName());
- LibMatrixCUDA.sliceOperations(ec, ec.getGPUContext(0),
getExtendedOpcode(), mat1, ixrange, output.getName());
-
ec.releaseMatrixInputForGPUInstruction(input1.getName());
-
ec.releaseMatrixOutputForGPUInstruction(output.getName());
+ MatrixObject mat1 = getMatrixInputForGPUInstruction(ec,
_input1.getName());
+ LibMatrixCUDA.sliceOperations(ec, ec.getGPUContext(0),
getExtendedOpcode(), mat1, ixrange, _output.getName());
+
ec.releaseMatrixInputForGPUInstruction(_input1.getName());
+
ec.releaseMatrixOutputForGPUInstruction(_output.getName());
}
else {
throw new DMLRuntimeException("Unsupported GPU
operator:" + opcode);
@@ -140,4 +135,10 @@ public class MatrixIndexingGPUInstruction extends
GPUInstruction {
(int)(ec.getScalarInput(colLower).getLongValue()-1),
(int)(ec.getScalarInput(colUpper).getLongValue()-1));
}
+
+ @Override
+ public Pair<String, LineageItem> getLineageItem(ExecutionContext ec) {
+ return Pair.of(_output.getName(), new LineageItem(getOpcode(),
+ LineageItemUtils.getLineage(ec,
_input1,rowLower,rowUpper,colLower,colUpper)));
+ }
}
\ No newline at end of file
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixMatrixBuiltinGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixMatrixBuiltinGPUInstruction.java
index d1c6a9b..ff49b0d 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixMatrixBuiltinGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixMatrixBuiltinGPUInstruction.java
@@ -40,20 +40,20 @@ public class MatrixMatrixBuiltinGPUInstruction extends
BuiltinBinaryGPUInstructi
GPUStatistics.incrementNoOfExecutedGPUInst();
String opcode = getOpcode();
- MatrixObject mat1 = getMatrixInputForGPUInstruction(ec,
input1.getName());
- MatrixObject mat2 = getMatrixInputForGPUInstruction(ec,
input2.getName());
+ MatrixObject mat1 = getMatrixInputForGPUInstruction(ec,
_input1.getName());
+ MatrixObject mat2 = getMatrixInputForGPUInstruction(ec,
_input2.getName());
if (opcode.equals("solve")) {
- ec.setMetaData(output.getName(), mat1.getNumColumns(),
1);
- LibMatrixCUDA.solve(ec, ec.getGPUContext(0),
getExtendedOpcode(), mat1, mat2, output.getName());
+ ec.setMetaData(_output.getName(), mat1.getNumColumns(),
1);
+ LibMatrixCUDA.solve(ec, ec.getGPUContext(0),
getExtendedOpcode(), mat1, mat2, _output.getName());
}
else {
throw new DMLRuntimeException("Unsupported GPU
operator:" + opcode);
}
- ec.releaseMatrixInputForGPUInstruction(input1.getName());
- ec.releaseMatrixInputForGPUInstruction(input2.getName());
- ec.releaseMatrixOutputForGPUInstruction(output.getName());
+ ec.releaseMatrixInputForGPUInstruction(_input1.getName());
+ ec.releaseMatrixInputForGPUInstruction(_input2.getName());
+ ec.releaseMatrixOutputForGPUInstruction(_output.getName());
}
}
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixReshapeGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixReshapeGPUInstruction.java
index aa5ee9e..97d39cc 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixReshapeGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/MatrixReshapeGPUInstruction.java
@@ -18,6 +18,7 @@
*/
package org.apache.sysds.runtime.instructions.gpu;
+import org.apache.commons.lang3.tuple.Pair;
import org.apache.sysds.common.Types.ValueType;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.controlprogram.caching.MatrixObject;
@@ -28,6 +29,8 @@ import org.apache.sysds.runtime.instructions.cp.BooleanObject;
import org.apache.sysds.runtime.instructions.cp.CPOperand;
import org.apache.sysds.runtime.instructions.gpu.context.ExecutionConfig;
import org.apache.sysds.runtime.instructions.gpu.context.GPUContext;
+import org.apache.sysds.runtime.lineage.LineageItem;
+import org.apache.sysds.runtime.lineage.LineageItemUtils;
import org.apache.sysds.runtime.matrix.data.LibMatrixCUDA;
import org.apache.sysds.runtime.matrix.operators.Operator;
import org.apache.sysds.runtime.matrix.operators.ReorgOperator;
@@ -37,20 +40,16 @@ import jcuda.Pointer;
public class MatrixReshapeGPUInstruction extends GPUInstruction {
- private final CPOperand _input;
- private final CPOperand _output;
private final CPOperand _opRows;
private final CPOperand _opCols;
private final CPOperand _opByRow;
protected MatrixReshapeGPUInstruction(Operator op, String opcode,
String istr,
CPOperand in1, CPOperand in2, CPOperand in3, CPOperand
in4, CPOperand out) {
- super(op, opcode, istr);
- _input = in1;
+ super(op, in1, null, out, opcode, istr);
_opRows = in2;
_opCols = in3;
_opByRow = in4;
- _output = out;
}
public static MatrixReshapeGPUInstruction parseInstruction ( String str
) {
@@ -80,7 +79,7 @@ public class MatrixReshapeGPUInstruction extends
GPUInstruction {
GPUStatistics.incrementNoOfExecutedGPUInst();
String instName = getExtendedOpcode();
GPUContext gCtx = ec.getGPUContext(0);
- MatrixObject mat = getMatrixInputForGPUInstruction(ec,
_input.getName());
+ MatrixObject mat = getMatrixInputForGPUInstruction(ec,
_input1.getName());
if(rows*cols != mat.getNumRows()*mat.getNumColumns()) {
throw new DMLRuntimeException("Incorrect number of rows
and cols in rshape instruction");
}
@@ -100,8 +99,14 @@ public class MatrixReshapeGPUInstruction extends
GPUInstruction {
LibMatrixCUDA.toInt(mat.getNumRows()),
LibMatrixCUDA.toInt(mat.getNumColumns()),
rows, cols);
}
- ec.releaseMatrixInputForGPUInstruction(_input.getName());
+ ec.releaseMatrixInputForGPUInstruction(_input1.getName());
ec.releaseMatrixOutputForGPUInstruction(_output.getName());
}
+ @Override
+ public Pair<String, LineageItem> getLineageItem(ExecutionContext ec) {
+ return Pair.of(_output.getName(), new LineageItem(getOpcode(),
+ LineageItemUtils.getLineage(ec, _input1, _opRows,
_opCols, _opByRow)));
+ }
+
}
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/RelationalBinaryGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/RelationalBinaryGPUInstruction.java
index adeb13b..ae97fe9 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/RelationalBinaryGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/RelationalBinaryGPUInstruction.java
@@ -27,17 +27,10 @@ import org.apache.sysds.runtime.matrix.operators.Operator;
public abstract class RelationalBinaryGPUInstruction extends GPUInstruction {
- protected CPOperand _input1;
- protected CPOperand _input2;
- protected CPOperand _output;
-
protected RelationalBinaryGPUInstruction(Operator op, CPOperand in1,
CPOperand in2, CPOperand out, String opcode,
String istr) {
- super(op, opcode, istr);
+ super(op, in1, in2, out, opcode, istr);
_gputype = GPUINSTRUCTION_TYPE.RelationalBinary;
- _input1 = in1;
- _input2 = in2;
- _output = out;
}
public static RelationalBinaryGPUInstruction parseInstruction ( String
str ) {
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/ReorgGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/ReorgGPUInstruction.java
index a3e36d0..f55e16c 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/ReorgGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/ReorgGPUInstruction.java
@@ -19,24 +19,18 @@
package org.apache.sysds.runtime.instructions.gpu;
-import org.apache.commons.lang3.tuple.Pair;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.controlprogram.caching.MatrixObject;
import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
import org.apache.sysds.runtime.functionobjects.SwapIndex;
import org.apache.sysds.runtime.instructions.InstructionUtils;
import org.apache.sysds.runtime.instructions.cp.CPOperand;
-import org.apache.sysds.runtime.lineage.LineageItem;
-import org.apache.sysds.runtime.lineage.LineageItemUtils;
-import org.apache.sysds.runtime.lineage.LineageTraceable;
import org.apache.sysds.runtime.matrix.data.LibMatrixCUDA;
import org.apache.sysds.runtime.matrix.operators.Operator;
import org.apache.sysds.runtime.matrix.operators.ReorgOperator;
import org.apache.sysds.utils.GPUStatistics;
-public class ReorgGPUInstruction extends GPUInstruction implements
LineageTraceable {
- private CPOperand _input;
- private CPOperand _output;
+public class ReorgGPUInstruction extends GPUInstruction {
/**
* for opcodes r'
@@ -53,10 +47,8 @@ public class ReorgGPUInstruction extends GPUInstruction
implements LineageTracea
* instruction string
*/
private ReorgGPUInstruction(Operator op, CPOperand in, CPOperand out,
String opcode, String istr) {
- super(op, opcode, istr);
+ super(op, in, null, out, opcode, istr);
_gputype = GPUINSTRUCTION_TYPE.Reorg;
- _input = in;
- _output = out;
}
public static ReorgGPUInstruction parseInstruction ( String str ) {
@@ -74,20 +66,14 @@ public class ReorgGPUInstruction extends GPUInstruction
implements LineageTracea
@Override
public void processInstruction(ExecutionContext ec) {
GPUStatistics.incrementNoOfExecutedGPUInst();
- MatrixObject mat = getMatrixInputForGPUInstruction(ec,
_input.getName());
+ MatrixObject mat = getMatrixInputForGPUInstruction(ec,
_input1.getName());
int rlen = (int) mat.getNumColumns();
int clen = (int) mat.getNumRows();
//execute operation
ec.setMetaData(_output.getName(), rlen, clen);
LibMatrixCUDA.transpose(ec, ec.getGPUContext(0),
getExtendedOpcode(), mat, _output.getName());
//release inputs/outputs
- ec.releaseMatrixInputForGPUInstruction(_input.getName());
+ ec.releaseMatrixInputForGPUInstruction(_input1.getName());
ec.releaseMatrixOutputForGPUInstruction(_output.getName());
}
-
- @Override
- public Pair<String, LineageItem> getLineageItem(ExecutionContext ec) {
- return Pair.of(_output.getName(), new LineageItem(getOpcode(),
- LineageItemUtils.getLineage(ec, _input)));
- }
}
\ No newline at end of file
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/ScalarMatrixBuiltinGPUInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/ScalarMatrixBuiltinGPUInstruction.java
index 4329189..233408e 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/ScalarMatrixBuiltinGPUInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/ScalarMatrixBuiltinGPUInstruction.java
@@ -39,34 +39,36 @@ public class ScalarMatrixBuiltinGPUInstruction extends
BuiltinBinaryGPUInstructi
_gputype = GPUINSTRUCTION_TYPE.BuiltinUnary;
}
- @Override
- public void processInstruction(ExecutionContext ec) {
- GPUStatistics.incrementNoOfExecutedGPUInst();
+ @Override
+ public void processInstruction(ExecutionContext ec) {
+ GPUStatistics.incrementNoOfExecutedGPUInst();
- String opcode = getOpcode();
- CPOperand mat = ( input1.getDataType() == DataType.MATRIX ) ? input1 :
input2;
- CPOperand scalar = ( input1.getDataType() == DataType.MATRIX ) ? input2
: input1;
- MatrixObject in1 = getMatrixInputForGPUInstruction(ec, mat.getName());
- ScalarObject constant = ec.getScalarInput(scalar);
-
- if(opcode.equals("max")) {
- ec.setMetaData(output.getName(), in1.getNumRows(), in1.getNumColumns());
- double constVal = constant.getDoubleValue();
- if(constVal == 0)
- LibMatrixCuDNN.relu(ec, ec.getGPUContext(0),
getExtendedOpcode(), in1, output.getName());
- else
- LibMatrixCUDA.matrixScalarOp(ec, ec.getGPUContext(0),
getExtendedOpcode(), in1, output.getName(), false,
-
InstructionUtils.parseScalarBinaryOperator(opcode, false, constVal));
- } else if(opcode.equals("min")) {
- ec.setMetaData(output.getName(), in1.getNumRows(), in1.getNumColumns());
- double constVal = constant.getDoubleValue();
- LibMatrixCUDA.matrixScalarOp(ec, ec.getGPUContext(0),
getExtendedOpcode(), in1, output.getName(), false,
-
InstructionUtils.parseScalarBinaryOperator(opcode, false, constVal));
- } else {
- throw new DMLRuntimeException("Unsupported GPU operator:" + opcode);
- }
- ec.releaseMatrixInputForGPUInstruction(mat.getName());
- ec.releaseMatrixOutputForGPUInstruction(output.getName());
- }
+ String opcode = getOpcode();
+ CPOperand mat = (_input1.getDataType() == DataType.MATRIX) ?
_input1 : _input2;
+ CPOperand scalar = (_input1.getDataType() == DataType.MATRIX) ?
_input2 : _input1;
+ MatrixObject in1 = getMatrixInputForGPUInstruction(ec,
mat.getName());
+ ScalarObject constant = ec.getScalarInput(scalar);
+
+ if (opcode.equals("max")) {
+ ec.setMetaData(_output.getName(), in1.getNumRows(),
in1.getNumColumns());
+ double constVal = constant.getDoubleValue();
+ if (constVal == 0)
+ LibMatrixCuDNN.relu(ec, ec.getGPUContext(0),
getExtendedOpcode(), in1, _output.getName());
+ else
+ LibMatrixCUDA.matrixScalarOp(ec,
ec.getGPUContext(0), getExtendedOpcode(), in1,
+ _output.getName(), false,
InstructionUtils.parseScalarBinaryOperator(opcode, false, constVal));
+ }
+ else if (opcode.equals("min")) {
+ ec.setMetaData(_output.getName(), in1.getNumRows(),
in1.getNumColumns());
+ double constVal = constant.getDoubleValue();
+ LibMatrixCUDA.matrixScalarOp(ec, ec.getGPUContext(0),
getExtendedOpcode(), in1,
+ _output.getName(), false,
InstructionUtils.parseScalarBinaryOperator(opcode, false, constVal));
+ }
+ else {
+ throw new DMLRuntimeException("Unsupported GPU
operator:" + opcode);
+ }
+ ec.releaseMatrixInputForGPUInstruction(mat.getName());
+ ec.releaseMatrixOutputForGPUInstruction(_output.getName());
+ }
}
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUContext.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUContext.java
index b00b12c..16e3c7e 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUContext.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUContext.java
@@ -254,6 +254,15 @@ public class GPUContext {
return ret;
}
+ /**
+ * Shallow copy the given source {@link GPUObject} to a new {@link
GPUObject} and
+ * assign that to the given {@link MatrixObject}.
+ * This copy doesn't memcopy the device memory.
+ *
+ * @param source a {@link GPUObject} which is the source of the copy
+ * @param mo a {@link MatrixObject} to associate with the new {@link
GPUObject}
+ * @return a new {@link GPUObject} instance
+ */
public GPUObject shallowCopyGPUObject(GPUObject source, MatrixObject
mo) {
GPUObject ret = new GPUObject(this, source, mo);
getMemoryManager().getGPUMatrixMemoryManager().addGPUObject(ret);
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUObject.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUObject.java
index d55f934..df8e2c0 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUObject.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/GPUObject.java
@@ -159,6 +159,10 @@ public class GPUObject {
jcudaSparseMatrixPtr = null;
}
}
+
+ public void setDirty(boolean flag) {
+ dirty = flag;
+ }
//
----------------------------------------------------------------------
@@ -452,9 +456,9 @@ public class GPUObject {
timestamp = new AtomicLong(that.timestamp.get());
isSparse = that.isSparse;
isLineageCached = that.isLineageCached;
- if (isDensePointerNull())
+ if (!that.isDensePointerNull())
setDensePointer(that.getDensePointer());
- if (getJcudaSparseMatrixPtr() != null)
+ if (that.getJcudaSparseMatrixPtr() != null)
setSparseMatrixCudaPointer(that.getSparseMatrixCudaPointer());
gpuContext = gCtx;
this.mat = mat;
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
b/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
index 20fcdc2..6d5254a 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
@@ -46,7 +46,6 @@ import
org.apache.sysds.runtime.instructions.cp.MultiReturnBuiltinCPInstruction;
import
org.apache.sysds.runtime.instructions.cp.ParameterizedBuiltinCPInstruction;
import org.apache.sysds.runtime.instructions.cp.ScalarObject;
import org.apache.sysds.runtime.instructions.fed.ComputationFEDInstruction;
-import org.apache.sysds.runtime.instructions.gpu.AggregateBinaryGPUInstruction;
import org.apache.sysds.runtime.instructions.gpu.GPUInstruction;
import org.apache.sysds.runtime.instructions.gpu.context.GPUObject;
import org.apache.sysds.runtime.lineage.LineageCacheConfig.LineageCacheStatus;
@@ -93,10 +92,11 @@ public class LineageCache
if (LineageCacheConfig.isReusable(inst, ec)) {
ComputationCPInstruction cinst = inst instanceof
ComputationCPInstruction ? (ComputationCPInstruction)inst : null;
ComputationFEDInstruction cfinst = inst instanceof
ComputationFEDInstruction ? (ComputationFEDInstruction)inst : null;
+ GPUInstruction gpuinst = inst instanceof GPUInstruction
? (GPUInstruction)inst : null;
LineageItem instLI = (cinst != null) ?
cinst.getLineageItem(ec).getValue()
: (cfinst != null) ?
cfinst.getLineageItem(ec).getValue()
- :
((LineageTraceable)inst).getLineageItem(ec).getValue(); //GPU instruction
+ : gpuinst.getLineageItem(ec).getValue();
List<MutablePair<LineageItem, LineageCacheEntry>>
liList = null;
if (inst instanceof MultiReturnBuiltinCPInstruction) {
liList = new ArrayList<>();
@@ -134,8 +134,8 @@ public class LineageCache
putIntern(item.getKey(), cinst.output.getDataType(), null, null, 0);
else if (cfinst != null)
putIntern(item.getKey(), cfinst.output.getDataType(), null, null, 0);
- else if (inst instanceof
AggregateBinaryGPUInstruction)
-
putIntern(item.getKey(),
((AggregateBinaryGPUInstruction)inst)._output.getDataType(), null, null, 0);
+ else if (gpuinst != null)
+
putIntern(item.getKey(), gpuinst._output.getDataType(), null, null, 0);
//FIXME: different o/p
datatypes for MultiReturnBuiltins.
}
}
@@ -154,16 +154,20 @@ public class LineageCache
outName =
cinst.output.getName();
else if (inst instanceof
ComputationFEDInstruction)
outName =
cfinst.output.getName();
- else if (inst instanceof
AggregateBinaryGPUInstruction)
- outName =
((AggregateBinaryGPUInstruction) inst)._output.getName();
+ else if (inst instanceof GPUInstruction)
+ outName =
gpuinst._output.getName();
if (e.isMatrixValue() && e._gpuPointer
== null)
ec.setMatrixOutput(outName,
e.getMBValue());
else if (e.isScalarValue())
ec.setScalarOutput(outName,
e.getSOValue());
- else //TODO handle locks on gpu objects
+ else { //TODO handle locks on gpu
objects
+ //shallow copy the cached
GPUObj to the output MatrixObject
ec.getMatrixObject(outName).setGPUObject(ec.getGPUContext(0),
ec.getGPUContext(0).shallowCopyGPUObject(e._gpuPointer,
ec.getMatrixObject(outName)));
+ //Set dirty to true, so that it
is later copied to the host
+
ec.getMatrixObject(outName).getGPUObject(ec.getGPUContext(0)).setDirty(true);
+ }
reuse = true;
@@ -418,15 +422,27 @@ public class LineageCache
liData.add(Pair.of(li, value));
}
}
- else if (inst instanceof AggregateBinaryGPUInstruction)
- liGpuObj =
ec.getMatrixObject(((AggregateBinaryGPUInstruction)
inst)._output).getGPUObject(ec.getGPUContext(0));
+ else if (inst instanceof GPUInstruction) {
+ // TODO: gpu multiretrun instructions
+ Data gpudata = ec.getVariable(((GPUInstruction)
inst)._output);
+ liGpuObj = gpudata instanceof MatrixObject ?
+
ec.getMatrixObject(((GPUInstruction)inst)._output).getGPUObject(ec.getGPUContext(0))
: null;
+
+ // Scalar gpu intermediates is always copied
back to host.
+ // No need to cache the GPUobj for scalar
intermediates.
+ if (liGpuObj == null)
+ liData = Arrays.asList(Pair.of(instLI,
ec.getVariable(((GPUInstruction)inst)._output)));
+ }
else
liData = inst instanceof
ComputationCPInstruction ?
Arrays.asList(Pair.of(instLI,
ec.getVariable(((ComputationCPInstruction) inst).output))) :
Arrays.asList(Pair.of(instLI,
ec.getVariable(((ComputationFEDInstruction) inst).output)));
synchronized( _cache ) {
if (liGpuObj != null) {
+ // No need to make space as the entry
is in gpu
+ // TODO: account gpu memory. Eviction
LineageCacheEntry centry =
_cache.get(instLI);
+ // Cache the GPUObj for future reuse
liGpuObj.setIsLinCached(true);
centry._gpuPointer = liGpuObj;
centry._computeTime = computetime;
@@ -664,16 +680,13 @@ public class LineageCache
if (!LineageCacheConfig.getCompAssRW())
return true;
- if (inst instanceof GPUInstruction)
- return true;
-
- CPOperand output = inst instanceof ComputationCPInstruction ?
- ((ComputationCPInstruction)inst).output :
- ((ComputationFEDInstruction)inst).output;
+ CPOperand output = inst instanceof ComputationCPInstruction ?
((ComputationCPInstruction)inst).output
+ : inst instanceof ComputationFEDInstruction ?
((ComputationFEDInstruction)inst).output
+ : ((GPUInstruction)inst)._output;
if (output.isMatrix()) {
- MatrixObject mo = inst instanceof
ComputationCPInstruction ?
-
ec.getMatrixObject(((ComputationCPInstruction)inst).output) :
-
ec.getMatrixObject(((ComputationFEDInstruction)inst).output);
+ MatrixObject mo = inst instanceof
ComputationCPInstruction ?
ec.getMatrixObject(((ComputationCPInstruction)inst).output)
+ : inst instanceof ComputationFEDInstruction ?
ec.getMatrixObject(((ComputationFEDInstruction)inst).output)
+ :
ec.getMatrixObject(((GPUInstruction)inst)._output);
//limit this to full reuse as partial reuse is
applicable even for loop dependent operation
return !(LineageCacheConfig.getCacheType() ==
ReuseCacheType.REUSE_FULL
&& !mo.isMarked());
diff --git
a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
index 680a283..5ceb647 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
@@ -210,7 +210,7 @@ public class LineageCacheConfig
long c2 =
ec.getMatrixObject(fedinst.input2).getNumColumns();
return(c1 == 1 || c2 == 1);
}
- else { //CPInstruction
+ else if (inst instanceof ComputationCPInstruction) {
//CPInstruction
ComputationCPInstruction cpinst =
(ComputationCPInstruction) inst;
if( !cpinst.input1.isMatrix() ||
!cpinst.input2.isMatrix() )
return false;
@@ -218,6 +218,14 @@ public class LineageCacheConfig
long c2 =
ec.getMatrixObject(cpinst.input2).getNumColumns();
return(c1 == 1 || c2 == 1);
}
+ else { //GPUInstruction
+ GPUInstruction gpuinst = (GPUInstruction)inst;
+ if( !gpuinst._input1.isMatrix() ||
!gpuinst._input2.isMatrix() )
+ return false;
+ long c1 =
ec.getMatrixObject(gpuinst._input1).getNumColumns();
+ long c2 =
ec.getMatrixObject(gpuinst._input2).getNumColumns();
+ return(c1 == 1 || c2 == 1);
+ }
}
public static boolean isOutputFederated(Instruction inst, Data data) {
diff --git
a/src/test/java/org/apache/sysds/test/functions/lineage/GPUFullReuseTest.java
b/src/test/java/org/apache/sysds/test/functions/lineage/GPUFullReuseTest.java
index a5d1cd2..6451487 100644
---
a/src/test/java/org/apache/sysds/test/functions/lineage/GPUFullReuseTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/lineage/GPUFullReuseTest.java
@@ -38,6 +38,7 @@ public class GPUFullReuseTest extends AutomatedTestBase{
protected static final String TEST_DIR = "functions/lineage/";
protected static final String TEST_NAME1 = "FullReuseGPU1";
+ protected static final String TEST_NAME2 = "LineageTraceGPU1";
protected String TEST_CLASS_DIR = TEST_DIR +
GPUFullReuseTest.class.getSimpleName() + "/";
@BeforeClass
@@ -50,17 +51,24 @@ public class GPUFullReuseTest extends AutomatedTestBase{
public void setUp() {
TestUtils.clearAssertionInformation();
addTestConfiguration( TEST_NAME1, new
TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] {"R"}) );
+ addTestConfiguration( TEST_NAME2, new
TestConfiguration(TEST_CLASS_DIR, TEST_NAME2, new String[] {"R"}) );
}
@Test
- public void ReuseSingleInst() { //reuse ba+*
+ public void ReuseAggBin() { //reuse AggregateBinary and sum
testLineageTraceExec(TEST_NAME1);
}
+
+ @Test
+ public void ReuseSimpleHLM() { //hyper-parameter tuning over LM
(simple)
+ testLineageTraceExec(TEST_NAME2);
+ }
private void testLineageTraceExec(String testname) {
System.out.println("------------ BEGIN " + testname +
"------------");
getAndLoadTestConfiguration(testname);
+ AutomatedTestBase.TEST_GPU = true; //adds '-gpu'
List<String> proArgs = new ArrayList<>();
proArgs.add("-stats");
proArgs.add("-args");
@@ -72,7 +80,6 @@ public class GPUFullReuseTest extends AutomatedTestBase{
runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
HashMap<MatrixValue.CellIndex, Double> R_orig =
readDMLMatrixFromOutputDir("R");
- AutomatedTestBase.TEST_GPU = true; //adds '-gpu'
proArgs.add("-stats");
proArgs.add("-lineage");
proArgs.add("reuse_full");
diff --git a/src/test/scripts/functions/lineage/FullReuseGPU1.dml
b/src/test/scripts/functions/lineage/FullReuseGPU1.dml
index 58307dc..9e37a4f 100644
--- a/src/test/scripts/functions/lineage/FullReuseGPU1.dml
+++ b/src/test/scripts/functions/lineage/FullReuseGPU1.dml
@@ -20,10 +20,12 @@
#-------------------------------------------------------------
X = rand(rows=1000, cols=100, sparsity=1, seed=42);
y = rand(rows=100, cols=100, sparsity=1, seed=42);
+R = matrix(0, rows=1, cols=100);
+
for (i in 1:10) {
tmp = X %*% y;
+ R[1,i] = sum(tmp);
}
-R = tmp;
write(R, $1, format="text");