http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java b/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java deleted file mode 100644 index bccb1ea..0000000 --- a/src/main/java/org/apache/sysml/lops/ConvolutionTransform.java +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.lops; - -import org.apache.sysml.lops.LopProperties.ExecLocation; -import org.apache.sysml.lops.LopProperties.ExecType; -import org.apache.sysml.lops.compile.JobType; -import org.apache.sysml.parser.Expression.DataType; -import org.apache.sysml.parser.Expression.ValueType; - -public class ConvolutionTransform extends Lop -{ - public enum OperationTypes { - MAX_POOL, MAX_POOL_BACKWARD, AVG_POOL, AVG_POOL_BACKWARD, - RELU_MAX_POOLING, RELU_MAX_POOLING_BACKWARD, RELU_BACKWARD, - CONV2D, CONV2D_BACKWARD_FILTER, CONV2D_BACKWARD_DATA, - BIAS_ADD, CONV2D_BIAS_ADD, BIAS_MULTIPLY, CHANNEL_SUMS - } - - private OperationTypes operation; - private double intermediateMemBudget; - private final int numThreads; - - /** - * Constructor when we have one input. - * - * @param input low-level operator - * @param op convolution transform operation type - * @param dt data type - * @param vt value type - * @param et execution type - * @param k number of threads - * @param intermediateMemBudget intermediate memory budget - */ - public ConvolutionTransform(Lop input, ConvolutionTransform.OperationTypes op, DataType dt, ValueType vt, ExecType et, int k, double intermediateMemBudget) - { - super(Lop.Type.Transform, dt, vt); - init(input, op, dt, vt, et); - numThreads = k; - this.intermediateMemBudget = intermediateMemBudget; - } - - public ConvolutionTransform(Lop input1, Lop input2, ConvolutionTransform.OperationTypes op, DataType dt, ValueType vt, ExecType et, int k) - { - super(Lop.Type.Transform, dt, vt); - init(input1, op, dt, vt, et); - numThreads = k; - this.addInput(input2); - input2.addOutput(this); - setLevel(); - } - - public ConvolutionTransform(Lop input1, Lop input2, Lop input3, ConvolutionTransform.OperationTypes op, DataType dt, ValueType vt, ExecType et, int k) - { - super(Lop.Type.Transform, dt, vt); - init(input1, op, dt, vt, et); - numThreads = k; - this.addInput(input2); - input2.addOutput(this); - this.addInput(input3); - input3.addOutput(this); - setLevel(); - } - - private void init (Lop input, ConvolutionTransform.OperationTypes op, DataType dt, ValueType vt, ExecType et) - { - operation = op; - - this.addInput(input); - input.addOutput(this); - - boolean breaksAlignment = true; - boolean aligner = false; - boolean definesMRJob = false; - if ( et == ExecType.MR ) { - throw new RuntimeException("The execution type is not supported: " + et.name()); - } - else //CP/SPARK - { - // <code>breaksAlignment</code> is not meaningful when <code>Transform</code> executes in CP. - breaksAlignment = false; - lps.addCompatibility(JobType.INVALID); - lps.setProperties( inputs, et, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob ); - } - } - - public void updateLopProperties() { - lps.setLevel(inputs); - } - - @Override - public String toString() { - - return " Operation: " + operation; - } - - /** - * method to get operation type - * @return operation type - */ - - public OperationTypes getOperationType() - { - return operation; - } - - private String getOpcode() { - switch(operation) { - - case MAX_POOL: - return "maxpooling"; - - case RELU_MAX_POOLING: - return "relu_maxpooling"; - - case RELU_MAX_POOLING_BACKWARD: - return "relu_maxpooling_backward"; - - case RELU_BACKWARD: - return "relu_backward"; - - case MAX_POOL_BACKWARD: - return "maxpooling_backward"; - - case AVG_POOL: - return "avgpooling"; - - case AVG_POOL_BACKWARD: - return "avgpooling_backward"; - - case CONV2D: - return "conv2d"; - - case CONV2D_BIAS_ADD: - return "conv2d_bias_add"; - - case BIAS_ADD: - return "bias_add"; - - case BIAS_MULTIPLY: - return "bias_multiply"; - - case CONV2D_BACKWARD_FILTER: - return "conv2d_backward_filter"; - - case CONV2D_BACKWARD_DATA: - return "conv2d_backward_data"; - - case CHANNEL_SUMS: - return "channel_sums"; - - default: - throw new UnsupportedOperationException(this.printErrorLocation() + "Instruction is not defined for Transform operation " + operation); - - } - } - - @Override - public String getInstructions(String input, String bias, String output) { - if(operation == OperationTypes.BIAS_ADD || operation == OperationTypes.BIAS_MULTIPLY || operation == OperationTypes.RELU_BACKWARD) { - StringBuilder sb = new StringBuilder(); - sb.append( getExecType() ); - - sb.append( OPERAND_DELIMITOR ); - sb.append( getOpcode() ); - sb.append( OPERAND_DELIMITOR ); - sb.append( getInputs().get(0).prepInputOperand(input)); - sb.append( OPERAND_DELIMITOR ); - sb.append( getInputs().get(0).prepInputOperand(bias)); - //output - sb.append( OPERAND_DELIMITOR ); - sb.append( this.prepOutputOperand(output)); - - //append degree of parallelism - if( getExecType()==ExecType.CP ) { - sb.append( OPERAND_DELIMITOR ); - sb.append( numThreads ); - } - - sb.append( OPERAND_DELIMITOR ); - sb.append( intermediateMemBudget ); - return sb.toString(); - } - else { - throw new LopsException("The operation is not supported with two operands:" + operation.name()); - } - } - - @Override - public String getInstructions(String input, String C, String HW, String output) { - if(operation == OperationTypes.CHANNEL_SUMS) { - StringBuilder sb = new StringBuilder(); - sb.append( getExecType() ); - - sb.append( OPERAND_DELIMITOR ); - sb.append( getOpcode() ); - sb.append( OPERAND_DELIMITOR ); - sb.append( getInputs().get(0).prepInputOperand(input)); - sb.append( OPERAND_DELIMITOR ); - sb.append( getInputs().get(1).prepInputOperand(C)); - sb.append( OPERAND_DELIMITOR ); - sb.append( getInputs().get(2).prepInputOperand(HW)); - //output - sb.append( OPERAND_DELIMITOR ); - sb.append( this.prepOutputOperand(output)); - - return sb.toString(); - } - else { - throw new LopsException("The operation is not supported with three operands:" + operation.name()); - } - } - - @Override - public String getInstructions(String[] inputs, String output) { - StringBuilder sb = new StringBuilder(); - appendOpcode(sb); - - for( int i=0; i<inputs.length-12; i++ ) { - if( i > 0 ) - sb.append( OPERAND_DELIMITOR ); - sb.append( getInputs().get(i).prepInputOperand(inputs[i])); - } - appendOperands(inputs.length-12, inputs.length, output, sb); - - return sb.toString(); - } - - public void appendOpcode(StringBuilder sb) { - sb.append( getExecType() ); - sb.append( OPERAND_DELIMITOR ); - sb.append( getOpcode() ); - sb.append( OPERAND_DELIMITOR ); - } - - public void appendOperands(int startInputIndex, int endInputIndex, String output, StringBuilder sb) { - for( int i=startInputIndex; i < endInputIndex; i++ ) { - Lop ltmp = getInputs().get(i); - sb.append( OPERAND_DELIMITOR ); - sb.append( ltmp.prepScalarInputOperand(getExecType())); - } - - //output - sb.append( OPERAND_DELIMITOR ); - sb.append( this.prepOutputOperand(output)); - - //append degree of parallelism - if( getExecType()==ExecType.CP ) { - sb.append( OPERAND_DELIMITOR ); - sb.append( numThreads ); - } - - sb.append( OPERAND_DELIMITOR ); - sb.append( intermediateMemBudget ); - } - -}
http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/lops/DnnTransform.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/lops/DnnTransform.java b/src/main/java/org/apache/sysml/lops/DnnTransform.java new file mode 100644 index 0000000..02dcec1 --- /dev/null +++ b/src/main/java/org/apache/sysml/lops/DnnTransform.java @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.lops; + +import org.apache.sysml.lops.LopProperties.ExecLocation; +import org.apache.sysml.lops.LopProperties.ExecType; +import org.apache.sysml.lops.compile.JobType; +import org.apache.sysml.parser.Expression.DataType; +import org.apache.sysml.parser.Expression.ValueType; + +public class DnnTransform extends Lop +{ + public enum OperationTypes { + MAX_POOL, MAX_POOL_BACKWARD, AVG_POOL, AVG_POOL_BACKWARD, + RELU_MAX_POOLING, RELU_MAX_POOLING_BACKWARD, RELU_BACKWARD, + CONV2D, CONV2D_BACKWARD_FILTER, CONV2D_BACKWARD_DATA, + BIAS_ADD, CONV2D_BIAS_ADD, BIAS_MULTIPLY, CHANNEL_SUMS + } + + private OperationTypes operation; + private double intermediateMemBudget; + private final int numThreads; + + /** + * Constructor when we have one input. + * + * @param input low-level operator + * @param op convolution transform operation type + * @param dt data type + * @param vt value type + * @param et execution type + * @param k number of threads + * @param intermediateMemBudget intermediate memory budget + */ + public DnnTransform(Lop input, DnnTransform.OperationTypes op, DataType dt, ValueType vt, ExecType et, int k, double intermediateMemBudget) + { + super(Lop.Type.Transform, dt, vt); + init(input, op, dt, vt, et); + numThreads = k; + this.intermediateMemBudget = intermediateMemBudget; + } + + public DnnTransform(Lop input1, Lop input2, DnnTransform.OperationTypes op, DataType dt, ValueType vt, ExecType et, int k) + { + super(Lop.Type.Transform, dt, vt); + init(input1, op, dt, vt, et); + numThreads = k; + this.addInput(input2); + input2.addOutput(this); + setLevel(); + } + + public DnnTransform(Lop input1, Lop input2, Lop input3, DnnTransform.OperationTypes op, DataType dt, ValueType vt, ExecType et, int k) + { + super(Lop.Type.Transform, dt, vt); + init(input1, op, dt, vt, et); + numThreads = k; + this.addInput(input2); + input2.addOutput(this); + this.addInput(input3); + input3.addOutput(this); + setLevel(); + } + + private void init (Lop input, DnnTransform.OperationTypes op, DataType dt, ValueType vt, ExecType et) + { + operation = op; + + this.addInput(input); + input.addOutput(this); + + boolean breaksAlignment = true; + boolean aligner = false; + boolean definesMRJob = false; + if ( et == ExecType.MR ) { + throw new RuntimeException("The execution type is not supported: " + et.name()); + } + else //CP/SPARK + { + // <code>breaksAlignment</code> is not meaningful when <code>Transform</code> executes in CP. + breaksAlignment = false; + lps.addCompatibility(JobType.INVALID); + lps.setProperties( inputs, et, ExecLocation.ControlProgram, breaksAlignment, aligner, definesMRJob ); + } + } + + public void updateLopProperties() { + lps.setLevel(inputs); + } + + @Override + public String toString() { + + return " Operation: " + operation; + } + + /** + * method to get operation type + * @return operation type + */ + + public OperationTypes getOperationType() + { + return operation; + } + + private String getOpcode() { + switch(operation) { + + case MAX_POOL: + return "maxpooling"; + + case RELU_MAX_POOLING: + return "relu_maxpooling"; + + case RELU_MAX_POOLING_BACKWARD: + return "relu_maxpooling_backward"; + + case RELU_BACKWARD: + return "relu_backward"; + + case MAX_POOL_BACKWARD: + return "maxpooling_backward"; + + case AVG_POOL: + return "avgpooling"; + + case AVG_POOL_BACKWARD: + return "avgpooling_backward"; + + case CONV2D: + return "conv2d"; + + case CONV2D_BIAS_ADD: + return "conv2d_bias_add"; + + case BIAS_ADD: + return "bias_add"; + + case BIAS_MULTIPLY: + return "bias_multiply"; + + case CONV2D_BACKWARD_FILTER: + return "conv2d_backward_filter"; + + case CONV2D_BACKWARD_DATA: + return "conv2d_backward_data"; + + case CHANNEL_SUMS: + return "channel_sums"; + + default: + throw new UnsupportedOperationException(this.printErrorLocation() + "Instruction is not defined for Transform operation " + operation); + + } + } + + @Override + public String getInstructions(String input, String bias, String output) { + if(operation == OperationTypes.BIAS_ADD || operation == OperationTypes.BIAS_MULTIPLY || operation == OperationTypes.RELU_BACKWARD) { + StringBuilder sb = new StringBuilder(); + sb.append( getExecType() ); + + sb.append( OPERAND_DELIMITOR ); + sb.append( getOpcode() ); + sb.append( OPERAND_DELIMITOR ); + sb.append( getInputs().get(0).prepInputOperand(input)); + sb.append( OPERAND_DELIMITOR ); + sb.append( getInputs().get(0).prepInputOperand(bias)); + //output + sb.append( OPERAND_DELIMITOR ); + sb.append( this.prepOutputOperand(output)); + + //append degree of parallelism + if( getExecType()==ExecType.CP ) { + sb.append( OPERAND_DELIMITOR ); + sb.append( numThreads ); + } + + sb.append( OPERAND_DELIMITOR ); + sb.append( intermediateMemBudget ); + return sb.toString(); + } + else { + throw new LopsException("The operation is not supported with two operands:" + operation.name()); + } + } + + @Override + public String getInstructions(String input, String C, String HW, String output) { + if(operation == OperationTypes.CHANNEL_SUMS) { + StringBuilder sb = new StringBuilder(); + sb.append( getExecType() ); + + sb.append( OPERAND_DELIMITOR ); + sb.append( getOpcode() ); + sb.append( OPERAND_DELIMITOR ); + sb.append( getInputs().get(0).prepInputOperand(input)); + sb.append( OPERAND_DELIMITOR ); + sb.append( getInputs().get(1).prepInputOperand(C)); + sb.append( OPERAND_DELIMITOR ); + sb.append( getInputs().get(2).prepInputOperand(HW)); + //output + sb.append( OPERAND_DELIMITOR ); + sb.append( this.prepOutputOperand(output)); + + return sb.toString(); + } + else { + throw new LopsException("The operation is not supported with three operands:" + operation.name()); + } + } + + @Override + public String getInstructions(String[] inputs, String output) { + StringBuilder sb = new StringBuilder(); + appendOpcode(sb); + + for( int i=0; i<inputs.length-12; i++ ) { + if( i > 0 ) + sb.append( OPERAND_DELIMITOR ); + sb.append( getInputs().get(i).prepInputOperand(inputs[i])); + } + appendOperands(inputs.length-12, inputs.length, output, sb); + + return sb.toString(); + } + + public void appendOpcode(StringBuilder sb) { + sb.append( getExecType() ); + sb.append( OPERAND_DELIMITOR ); + sb.append( getOpcode() ); + sb.append( OPERAND_DELIMITOR ); + } + + public void appendOperands(int startInputIndex, int endInputIndex, String output, StringBuilder sb) { + for( int i=startInputIndex; i < endInputIndex; i++ ) { + Lop ltmp = getInputs().get(i); + sb.append( OPERAND_DELIMITOR ); + sb.append( ltmp.prepScalarInputOperand(getExecType())); + } + + //output + sb.append( OPERAND_DELIMITOR ); + sb.append( this.prepOutputOperand(output)); + + //append degree of parallelism + if( getExecType()==ExecType.CP ) { + sb.append( OPERAND_DELIMITOR ); + sb.append( numThreads ); + } + + sb.append( OPERAND_DELIMITOR ); + sb.append( intermediateMemBudget ); + } + +} http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java index 3ca8e1d..ca78106 100644 --- a/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java +++ b/src/main/java/org/apache/sysml/parser/BuiltinFunctionExpression.java @@ -29,7 +29,7 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.parser.LanguageException.LanguageErrorCodes; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; -import org.apache.sysml.runtime.util.ConvolutionUtils; +import org.apache.sysml.runtime.util.DnnUtils; import org.apache.sysml.runtime.util.UtilFunctions; public class BuiltinFunctionExpression extends DataIdentifier @@ -40,7 +40,7 @@ public class BuiltinFunctionExpression extends DataIdentifier public BuiltinFunctionExpression(ParserRuleContext ctx, BuiltinFunctionOp bifop, ArrayList<ParameterExpression> args, String fname) { _opcode = bifop; setCtxValuesAndFilename(ctx, fname); - args = expandConvolutionArguments(args); + args = expandDnnArguments(args); _args = new Expression[args.size()]; for(int i=0; i < args.size(); i++) { _args[i] = args.get(i).getExpr(); @@ -391,7 +391,7 @@ public class BuiltinFunctionExpression extends DataIdentifier out.setBlockDimensions(exp.getOutput().getRowsInBlock(), exp.getOutput().getColumnsInBlock()); } - private static ArrayList<ParameterExpression> orderConvolutionParams(ArrayList<ParameterExpression> paramExpression, int skip) { + private static ArrayList<ParameterExpression> orderDnnParams(ArrayList<ParameterExpression> paramExpression, int skip) { ArrayList<ParameterExpression> newParams = new ArrayList<>(); for(int i = 0; i < skip; i++) @@ -458,14 +458,14 @@ public class BuiltinFunctionExpression extends DataIdentifier return newParamExpressions; } - private ArrayList<ParameterExpression> expandConvolutionArguments(ArrayList<ParameterExpression> paramExpression) { + private ArrayList<ParameterExpression> expandDnnArguments(ArrayList<ParameterExpression> paramExpression) { try { if(_opcode == BuiltinFunctionOp.CONV2D || _opcode == BuiltinFunctionOp.CONV2D_BACKWARD_FILTER || _opcode == BuiltinFunctionOp.CONV2D_BACKWARD_DATA) { HashSet<String> expand = new HashSet<>(); expand.add("input_shape"); expand.add("filter_shape"); expand.add("stride"); expand.add("padding"); paramExpression = expandListParams(paramExpression, expand); - paramExpression = orderConvolutionParams(paramExpression, 2); + paramExpression = orderDnnParams(paramExpression, 2); } else if(_opcode == BuiltinFunctionOp.MAX_POOL || _opcode == BuiltinFunctionOp.AVG_POOL || _opcode == BuiltinFunctionOp.MAX_POOL_BACKWARD || _opcode == BuiltinFunctionOp.AVG_POOL_BACKWARD) { @@ -476,9 +476,9 @@ public class BuiltinFunctionExpression extends DataIdentifier paramExpression.add(new ParameterExpression("filter_shape2", new IntIdentifier(1, this))); paramExpression = replaceListParams(paramExpression, "pool_size", "filter_shape", 3); if(_opcode == BuiltinFunctionOp.MAX_POOL_BACKWARD || _opcode == BuiltinFunctionOp.AVG_POOL_BACKWARD) - paramExpression = orderConvolutionParams(paramExpression, 2); + paramExpression = orderDnnParams(paramExpression, 2); else - paramExpression = orderConvolutionParams(paramExpression, 1); + paramExpression = orderDnnParams(paramExpression, 1); } } catch(LanguageException e) { @@ -1393,8 +1393,8 @@ public class BuiltinFunctionExpression extends DataIdentifier output.setDimensions(N, C*H*W); } else if(H > 0 && W > 0 && stride_h > 0 && stride_w > 0 && pad_h >= 0 && pad_w >= 0 && R > 0 && S > 0) { - long P = ConvolutionUtils.getP(H, R, stride_h, pad_h); - long Q = ConvolutionUtils.getQ(W, S, stride_w, pad_w); + long P = DnnUtils.getP(H, R, stride_h, pad_h); + long Q = DnnUtils.getQ(W, S, stride_w, pad_w); // Try to set both rows and columns if(this.getOpCode() == BuiltinFunctionOp.CONV2D) http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/parser/DMLTranslator.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/DMLTranslator.java b/src/main/java/org/apache/sysml/parser/DMLTranslator.java index d29a8f4..08df72f 100644 --- a/src/main/java/org/apache/sysml/parser/DMLTranslator.java +++ b/src/main/java/org/apache/sysml/parser/DMLTranslator.java @@ -34,14 +34,14 @@ import org.apache.sysml.conf.DMLConfig; import org.apache.sysml.hops.AggBinaryOp; import org.apache.sysml.hops.AggUnaryOp; import org.apache.sysml.hops.BinaryOp; -import org.apache.sysml.hops.ConvolutionOp; +import org.apache.sysml.hops.DnnOp; import org.apache.sysml.hops.DataGenOp; import org.apache.sysml.hops.DataOp; import org.apache.sysml.hops.FunctionOp; import org.apache.sysml.hops.FunctionOp.FunctionType; import org.apache.sysml.hops.Hop; import org.apache.sysml.hops.Hop.AggOp; -import org.apache.sysml.hops.Hop.ConvOp; +import org.apache.sysml.hops.Hop.OpOpDnn; import org.apache.sysml.hops.Hop.DataGenMethod; import org.apache.sysml.hops.Hop.DataOpTypes; import org.apache.sysml.hops.Hop.Direction; @@ -2689,30 +2689,30 @@ public class DMLTranslator ArrayList<Hop> inHops1 = new ArrayList<>(); inHops1.add(expr); inHops1.add(expr2); - currBuiltinOp = new ConvolutionOp(target.getName(), target.getDataType(), target.getValueType(), - ConvOp.valueOf(source.getOpCode().name()), inHops1); + currBuiltinOp = new DnnOp(target.getName(), target.getDataType(), target.getValueType(), + OpOpDnn.valueOf(source.getOpCode().name()), inHops1); setBlockSizeAndRefreshSizeInfo(expr, currBuiltinOp); break; } case AVG_POOL: case MAX_POOL: { - currBuiltinOp = new ConvolutionOp(target.getName(), target.getDataType(), target.getValueType(), - ConvOp.valueOf(source.getOpCode().name()), getALHopsForPoolingForwardIM2COL(expr, source, 1, hops)); + currBuiltinOp = new DnnOp(target.getName(), target.getDataType(), target.getValueType(), + OpOpDnn.valueOf(source.getOpCode().name()), getALHopsForPoolingForwardIM2COL(expr, source, 1, hops)); setBlockSizeAndRefreshSizeInfo(expr, currBuiltinOp); break; } case AVG_POOL_BACKWARD: case MAX_POOL_BACKWARD: { - currBuiltinOp = new ConvolutionOp(target.getName(), target.getDataType(), target.getValueType(), - ConvOp.valueOf(source.getOpCode().name()), getALHopsForConvOpPoolingCOL2IM(expr, source, 1, hops)); + currBuiltinOp = new DnnOp(target.getName(), target.getDataType(), target.getValueType(), + OpOpDnn.valueOf(source.getOpCode().name()), getALHopsForConvOpPoolingCOL2IM(expr, source, 1, hops)); setBlockSizeAndRefreshSizeInfo(expr, currBuiltinOp); break; } case CONV2D: case CONV2D_BACKWARD_FILTER: case CONV2D_BACKWARD_DATA: { - currBuiltinOp = new ConvolutionOp(target.getName(), target.getDataType(), target.getValueType(), - ConvOp.valueOf(source.getOpCode().name()), getALHopsForConvOp(expr, source, 1, hops)); + currBuiltinOp = new DnnOp(target.getName(), target.getDataType(), target.getValueType(), + OpOpDnn.valueOf(source.getOpCode().name()), getALHopsForConvOp(expr, source, 1, hops)); setBlockSizeAndRefreshSizeInfo(expr, currBuiltinOp); break; } http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java index f01d3ae..fcc27e9 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/CPInstructionParser.java @@ -39,7 +39,7 @@ import org.apache.sysml.runtime.instructions.cp.CPInstruction; import org.apache.sysml.runtime.instructions.cp.CPInstruction.CPType; import org.apache.sysml.runtime.instructions.cp.CentralMomentCPInstruction; import org.apache.sysml.runtime.instructions.cp.CompressionCPInstruction; -import org.apache.sysml.runtime.instructions.cp.ConvolutionCPInstruction; +import org.apache.sysml.runtime.instructions.cp.DnnCPInstruction; import org.apache.sysml.runtime.instructions.cp.CovarianceCPInstruction; import org.apache.sysml.runtime.instructions.cp.DataGenCPInstruction; import org.apache.sysml.runtime.instructions.cp.DataPartitionCPInstruction; @@ -235,22 +235,22 @@ public class CPInstructionParser extends InstructionParser String2CPInstructionType.put( "rsort" , CPType.Reorg); // Opcodes related to convolutions - String2CPInstructionType.put( "relu_backward" , CPType.Convolution); - String2CPInstructionType.put( "relu_maxpooling" , CPType.Convolution); - String2CPInstructionType.put( "relu_maxpooling_backward" , CPType.Convolution); - String2CPInstructionType.put( "maxpooling" , CPType.Convolution); - String2CPInstructionType.put( "maxpooling_backward" , CPType.Convolution); - String2CPInstructionType.put( "avgpooling" , CPType.Convolution); - String2CPInstructionType.put( "avgpooling_backward" , CPType.Convolution); - String2CPInstructionType.put( "conv2d" , CPType.Convolution); - String2CPInstructionType.put( "conv2d_bias_add" , CPType.Convolution); - String2CPInstructionType.put( "conv2d_backward_filter" , CPType.Convolution); - String2CPInstructionType.put( "conv2d_backward_data" , CPType.Convolution); - String2CPInstructionType.put( "bias_add" , CPType.Convolution); - String2CPInstructionType.put( "bias_multiply" , CPType.Convolution); - String2CPInstructionType.put( "channel_sums" , CPType.Convolution); - String2CPInstructionType.put( "batch_norm2d", CPType.Convolution); - String2CPInstructionType.put( "batch_norm2d_backward", CPType.Convolution); + String2CPInstructionType.put( "relu_backward" , CPType.Dnn); + String2CPInstructionType.put( "relu_maxpooling" , CPType.Dnn); + String2CPInstructionType.put( "relu_maxpooling_backward" , CPType.Dnn); + String2CPInstructionType.put( "maxpooling" , CPType.Dnn); + String2CPInstructionType.put( "maxpooling_backward" , CPType.Dnn); + String2CPInstructionType.put( "avgpooling" , CPType.Dnn); + String2CPInstructionType.put( "avgpooling_backward" , CPType.Dnn); + String2CPInstructionType.put( "conv2d" , CPType.Dnn); + String2CPInstructionType.put( "conv2d_bias_add" , CPType.Dnn); + String2CPInstructionType.put( "conv2d_backward_filter" , CPType.Dnn); + String2CPInstructionType.put( "conv2d_backward_data" , CPType.Dnn); + String2CPInstructionType.put( "bias_add" , CPType.Dnn); + String2CPInstructionType.put( "bias_multiply" , CPType.Dnn); + String2CPInstructionType.put( "channel_sums" , CPType.Dnn); + String2CPInstructionType.put( "batch_norm2d", CPType.Dnn); + String2CPInstructionType.put( "batch_norm2d_backward", CPType.Dnn); // Quaternary instruction opcodes String2CPInstructionType.put( "wsloss" , CPType.Quaternary); @@ -344,8 +344,8 @@ public class CPInstructionParser extends InstructionParser case Reorg: return ReorgCPInstruction.parseInstruction(str); - case Convolution: - return ConvolutionCPInstruction.parseInstruction(str); + case Dnn: + return DnnCPInstruction.parseInstruction(str); case UaggOuterChain: return UaggOuterChainCPInstruction.parseInstruction(str); http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java index 8e9bb47..59c7350 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/GPUInstructionParser.java @@ -26,7 +26,7 @@ import org.apache.sysml.runtime.instructions.gpu.AggregateBinaryGPUInstruction; import org.apache.sysml.runtime.instructions.gpu.ArithmeticBinaryGPUInstruction; import org.apache.sysml.runtime.instructions.gpu.BuiltinBinaryGPUInstruction; import org.apache.sysml.runtime.instructions.gpu.BuiltinUnaryGPUInstruction; -import org.apache.sysml.runtime.instructions.gpu.ConvolutionGPUInstruction; +import org.apache.sysml.runtime.instructions.gpu.DnnGPUInstruction; import org.apache.sysml.runtime.instructions.gpu.GPUInstruction; import org.apache.sysml.runtime.instructions.gpu.MatrixIndexingGPUInstruction; import org.apache.sysml.runtime.instructions.gpu.MatrixMatrixAxpyGPUInstruction; @@ -44,22 +44,22 @@ public class GPUInstructionParser extends InstructionParser String2GPUInstructionType = new HashMap<>(); // Neural Network Operators - String2GPUInstructionType.put( "relu_backward", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "conv2d", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "conv2d_bias_add", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "conv2d_backward_filter", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "conv2d_backward_data", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "maxpooling", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "maxpooling_backward", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "avgpooling", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "avgpooling_backward", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "bias_add", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "bias_multiply", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "channel_sums", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "lstm", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "lstm_backward", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "batch_norm2d", GPUINSTRUCTION_TYPE.Convolution); - String2GPUInstructionType.put( "batch_norm2d_backward", GPUINSTRUCTION_TYPE.Convolution); + String2GPUInstructionType.put( "relu_backward", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "conv2d", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "conv2d_bias_add", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "conv2d_backward_filter", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "conv2d_backward_data", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "maxpooling", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "maxpooling_backward", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "avgpooling", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "avgpooling_backward", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "bias_add", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "bias_multiply", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "channel_sums", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "lstm", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "lstm_backward", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "batch_norm2d", GPUINSTRUCTION_TYPE.Dnn); + String2GPUInstructionType.put( "batch_norm2d_backward", GPUINSTRUCTION_TYPE.Dnn); // Matrix Multiply Operators String2GPUInstructionType.put( "ba+*", GPUINSTRUCTION_TYPE.AggregateBinary); @@ -182,8 +182,8 @@ public class GPUInstructionParser extends InstructionParser case Append: return MatrixAppendGPUInstruction.parseInstruction(str); - case Convolution: - return ConvolutionGPUInstruction.parseInstruction(str); + case Dnn: + return DnnGPUInstruction.parseInstruction(str); case MMTSJ: return MMTSJGPUInstruction.parseInstruction(str); http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/runtime/instructions/SPInstructionParser.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/SPInstructionParser.java b/src/main/java/org/apache/sysml/runtime/instructions/SPInstructionParser.java index dd91b9f..efec463 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/SPInstructionParser.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/SPInstructionParser.java @@ -51,7 +51,7 @@ import org.apache.sysml.runtime.instructions.spark.CastSPInstruction; import org.apache.sysml.runtime.instructions.spark.CentralMomentSPInstruction; import org.apache.sysml.runtime.instructions.spark.CheckpointSPInstruction; import org.apache.sysml.runtime.instructions.spark.CompressionSPInstruction; -import org.apache.sysml.runtime.instructions.spark.ConvolutionSPInstruction; +import org.apache.sysml.runtime.instructions.spark.DnnSPInstruction; import org.apache.sysml.runtime.instructions.spark.CovarianceSPInstruction; import org.apache.sysml.runtime.instructions.spark.CpmmSPInstruction; import org.apache.sysml.runtime.instructions.spark.CumulativeAggregateSPInstruction; @@ -138,10 +138,10 @@ public class SPInstructionParser extends InstructionParser String2SPInstructionType.put( "tack+*" , SPType.AggregateTernary); // Neural network operators - String2SPInstructionType.put( "conv2d", SPType.Convolution); - String2SPInstructionType.put( "conv2d_bias_add", SPType.Convolution); - String2SPInstructionType.put( "maxpooling", SPType.Convolution); - String2SPInstructionType.put( "relu_maxpooling", SPType.Convolution); + String2SPInstructionType.put( "conv2d", SPType.Dnn); + String2SPInstructionType.put( "conv2d_bias_add", SPType.Dnn); + String2SPInstructionType.put( "maxpooling", SPType.Dnn); + String2SPInstructionType.put( "relu_maxpooling", SPType.Dnn); String2SPInstructionType.put( RightIndex.OPCODE, SPType.MatrixIndexing); String2SPInstructionType.put( LeftIndex.OPCODE, SPType.MatrixIndexing); @@ -370,8 +370,8 @@ public class SPInstructionParser extends InstructionParser case AggregateTernary: return AggregateTernarySPInstruction.parseInstruction(str); - case Convolution: - return ConvolutionSPInstruction.parseInstruction(str); + case Dnn: + return DnnSPInstruction.parseInstruction(str); case MatrixIndexing: return IndexingSPInstruction.parseInstruction(str); http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/runtime/instructions/cp/CPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/CPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/CPInstruction.java index afad85f..52da951 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/CPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/CPInstruction.java @@ -34,7 +34,7 @@ public abstract class CPInstruction extends Instruction MultiReturnParameterizedBuiltin, ParameterizedBuiltin, MultiReturnBuiltin, Builtin, Reorg, Variable, External, Append, Rand, QSort, QPick, MatrixIndexing, MMTSJ, PMMJ, MMChain, MatrixReshape, Partition, Compression, SpoofFused, - StringInit, CentralMoment, Covariance, UaggOuterChain, Convolution } + StringInit, CentralMoment, Covariance, UaggOuterChain, Dnn } protected final CPType _cptype; protected final Operator _optr; http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java deleted file mode 100644 index 97d050d..0000000 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/ConvolutionCPInstruction.java +++ /dev/null @@ -1,645 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.instructions.cp; - -import java.util.ArrayList; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.sysml.api.DMLScript; -import org.apache.sysml.hops.OptimizerUtils; -import org.apache.sysml.runtime.DMLRuntimeException; -import org.apache.sysml.runtime.controlprogram.context.ExecutionContext; -import org.apache.sysml.runtime.instructions.InstructionUtils; -import org.apache.sysml.runtime.matrix.data.ConvolutionParameters; -import org.apache.sysml.runtime.matrix.data.LibMatrixDNN; -import org.apache.sysml.runtime.matrix.data.LibMatrixDNN.PoolingType; -import org.apache.sysml.runtime.matrix.data.LibMatrixNative; -import org.apache.sysml.runtime.matrix.data.MatrixBlock; -import org.apache.sysml.runtime.util.ConvolutionUtils; -import org.apache.sysml.utils.NativeHelper; - -public class ConvolutionCPInstruction extends UnaryCPInstruction { - private static final Log LOG = LogFactory.getLog(ConvolutionCPInstruction.class.getName()); - private static boolean warnedUnderUtilitization = false; - - private final CPOperand _in2; - private final CPOperand _in3; - private final CPOperand _in4; - private final CPOperand _in5; - private final CPOperand _in6; - private final CPOperand _in7; - private final CPOperand _in8; - private final CPOperand _out2; - private final CPOperand _out3; - private final CPOperand _out4; - private final CPOperand _out5; - private final ArrayList<CPOperand> _input_shape; - private final ArrayList<CPOperand> _filter_shape; - private final ArrayList<CPOperand> _stride; - private final ArrayList<CPOperand> _padding; - private final int _numThreads; - private final double _intermediateMemoryBudget; - - public ConvolutionCPInstruction(CPOperand in, CPOperand in2, CPOperand in3, CPOperand out, - ArrayList<CPOperand> stride, ArrayList<CPOperand> padding, ArrayList<CPOperand> input_shape, - ArrayList<CPOperand> filter_shape, int numThreads, double intermediateMemoryBudget, String opcode, String istr) { - super(CPType.Convolution, null, in, out, opcode, istr); - _in2 = in2; - _in3 = in3; - _in4 = null; _in5 = null; _in6 = null; _in7 = null; _in8 = null; - _out2 = null; _out3 = null; _out4 = null; _out5 = null; - _stride = stride; - _padding = padding; - _input_shape = input_shape; - _filter_shape = filter_shape; - _numThreads = numThreads; - _intermediateMemoryBudget = intermediateMemoryBudget; - } - - public ConvolutionCPInstruction(CPOperand in, CPOperand in2, CPOperand out, String opcode, String istr, int numThreads, double intermediateMemoryBudget) { - this(in, in2, null, out, null, null, null, null, numThreads, intermediateMemoryBudget, opcode, istr); - if( !(opcode.equals("bias_add") || opcode.equals("relu_backward") || opcode.equals("bias_multiply") ) ) { - throw new DMLRuntimeException("Incorrect usage. Expected the opcode to be bias_add or bias_multiply or relu_backward, but found " + opcode); - } - } - - public ConvolutionCPInstruction(CPOperand in, CPOperand in2, CPOperand in3, CPOperand out, String opcode, String istr, int numThreads, double intermediateMemoryBudget) { - this(in, in2, in3, out, null, null, null, null, numThreads, intermediateMemoryBudget, opcode, istr); - if( !opcode.equals("channel_sums") ) { - throw new DMLRuntimeException("Incorrect usage. Expected the opcode to be channel_sums, but found " + opcode); - } - } - - private ConvolutionCPInstruction(CPOperand in, CPOperand out, String opcode, String istr, - ArrayList<CPOperand> stride, ArrayList<CPOperand> padding, ArrayList<CPOperand> input_shape, - ArrayList<CPOperand> filter_shape, int numThreads, double intermediateMemoryBudget) { - this(in, null, null, out, stride, padding, input_shape, filter_shape, numThreads, intermediateMemoryBudget, opcode, istr); - } - - public ConvolutionCPInstruction(CPOperand in, CPOperand in2, CPOperand out, String opcode, - String istr, ArrayList<CPOperand> stride, - ArrayList<CPOperand> padding, ArrayList<CPOperand> input_shape, - ArrayList<CPOperand> filter_shape, int numThreads, double intermediateMemoryBudget) { - this(in, in2, null, out, stride, padding, input_shape, filter_shape, numThreads, intermediateMemoryBudget, opcode, istr); - } - - public ConvolutionCPInstruction(CPOperand in, CPOperand in2, CPOperand in3, CPOperand out, String opcode, - String istr, ArrayList<CPOperand> stride, - ArrayList<CPOperand> padding, ArrayList<CPOperand> input_shape, - ArrayList<CPOperand> filter_shape, int numThreads, double intermediateMemoryBudget) { - this(in, in2, in3, out, stride, padding, input_shape, filter_shape, numThreads, intermediateMemoryBudget, opcode, istr); - } - - public ConvolutionCPInstruction(CPOperand in1, CPOperand in2, CPOperand in3, CPOperand in4, CPOperand in5, - CPOperand in6, CPOperand in7, CPOperand in8, - CPOperand out, CPOperand out2, CPOperand out3, CPOperand out4, CPOperand out5, String opcode, String istr, - double intermediateMemoryBudget) throws DMLRuntimeException { - super(CPType.Convolution, null, in1, out, opcode, istr); - _in2 = in2; - _in3 = in3; - _in4 = in4; - _in5 = in5; - _in6 = in6; - _in7 = in7; - _in8 = in8; - _out2 = out2; - _out3 = out3; - _out4 = out4; - _out5 = out5; - _stride = null; - _padding = null; - _input_shape = null; - _filter_shape = null; - _numThreads = 0; - _intermediateMemoryBudget = intermediateMemoryBudget; - } - - public static ConvolutionCPInstruction parseInstruction(String str) { - - String[] parts = InstructionUtils.getInstructionPartsWithValueType(str); - String opcode = parts[0]; - if (opcode.equalsIgnoreCase("maxpooling") || opcode.equalsIgnoreCase("relu_maxpooling") || - opcode.equalsIgnoreCase("avgpooling")) { - InstructionUtils.checkNumFields(parts, 16); - // stride1, stride2, padding1, padding2 - // input_shape1, input_shape2, input_shape3, input_shape4, - // filter_shape1, filter_shape2, filter_shape3, filter_shape4, k - CPOperand in = new CPOperand(parts[1]); - CPOperand out = new CPOperand(parts[14]); - - ArrayList<CPOperand> stride = new ArrayList<>(); - ArrayList<CPOperand> padding = new ArrayList<>(); - ArrayList<CPOperand> input_shape = new ArrayList<>(); - ArrayList<CPOperand> filter_shape = new ArrayList<>(); - stride.add(new CPOperand(parts[2])); - stride.add(new CPOperand(parts[3])); - padding.add(new CPOperand(parts[4])); - padding.add(new CPOperand(parts[5])); - input_shape.add(new CPOperand(parts[6])); - input_shape.add(new CPOperand(parts[7])); - input_shape.add(new CPOperand(parts[8])); - input_shape.add(new CPOperand(parts[9])); - filter_shape.add(new CPOperand(parts[10])); - filter_shape.add(new CPOperand(parts[11])); - filter_shape.add(new CPOperand(parts[12])); - filter_shape.add(new CPOperand(parts[13])); - int k = Integer.parseInt(parts[15]); - - return new ConvolutionCPInstruction(in, out, opcode, str, stride, - padding, input_shape, filter_shape, k, Double.parseDouble(parts[16])); - } - else if (opcode.equalsIgnoreCase("maxpooling_backward") || opcode.equalsIgnoreCase("relu_maxpooling_backward") - || opcode.equalsIgnoreCase("avgpooling_backward") - || opcode.equalsIgnoreCase("conv2d") - || opcode.equalsIgnoreCase("conv2d_backward_filter") - || opcode.equalsIgnoreCase("conv2d_backward_data")) { - InstructionUtils.checkNumFields(parts, 17); - // dout, stride1, stride2, padding1, padding2 - // input_shape1, input_shape2, input_shape3, input_shape4, - // filter_shape1, filter_shape2, filter_shape3, filter_shape4, k - CPOperand in = new CPOperand(parts[1]); - CPOperand in2 = new CPOperand(parts[2]); - CPOperand out = new CPOperand(parts[15]); - - ArrayList<CPOperand> stride = new ArrayList<>(); - ArrayList<CPOperand> padding = new ArrayList<>(); - ArrayList<CPOperand> input_shape = new ArrayList<>(); - ArrayList<CPOperand> filter_shape = new ArrayList<>(); - stride.add(new CPOperand(parts[3])); - stride.add(new CPOperand(parts[4])); - padding.add(new CPOperand(parts[5])); - padding.add(new CPOperand(parts[6])); - input_shape.add(new CPOperand(parts[7])); - input_shape.add(new CPOperand(parts[8])); - input_shape.add(new CPOperand(parts[9])); - input_shape.add(new CPOperand(parts[10])); - filter_shape.add(new CPOperand(parts[11])); - filter_shape.add(new CPOperand(parts[12])); - filter_shape.add(new CPOperand(parts[13])); - filter_shape.add(new CPOperand(parts[14])); - int k = Integer.parseInt(parts[16]); - - return new ConvolutionCPInstruction(in, in2, out, opcode, str, stride, - padding, input_shape, filter_shape, k, Double.parseDouble(parts[17])); - } - else if (opcode.equalsIgnoreCase("conv2d_bias_add")) { - InstructionUtils.checkNumFields(parts, 18); - // dout, stride1, stride2, padding1, padding2 - // input_shape1, input_shape2, input_shape3, input_shape4, - // filter_shape1, filter_shape2, filter_shape3, filter_shape4, k - CPOperand in = new CPOperand(parts[1]); - CPOperand in2 = new CPOperand(parts[2]); - CPOperand in3 = new CPOperand(parts[3]); - CPOperand out = new CPOperand(parts[16]); - - ArrayList<CPOperand> stride = new ArrayList<>(); - ArrayList<CPOperand> padding = new ArrayList<>(); - ArrayList<CPOperand> input_shape = new ArrayList<>(); - ArrayList<CPOperand> filter_shape = new ArrayList<>(); - stride.add(new CPOperand(parts[4])); - stride.add(new CPOperand(parts[5])); - padding.add(new CPOperand(parts[6])); - padding.add(new CPOperand(parts[7])); - input_shape.add(new CPOperand(parts[8])); - input_shape.add(new CPOperand(parts[9])); - input_shape.add(new CPOperand(parts[10])); - input_shape.add(new CPOperand(parts[11])); - filter_shape.add(new CPOperand(parts[12])); - filter_shape.add(new CPOperand(parts[13])); - filter_shape.add(new CPOperand(parts[14])); - filter_shape.add(new CPOperand(parts[15])); - int k = Integer.parseInt(parts[17]); - - return new ConvolutionCPInstruction(in, in2, in3, out, opcode, str, stride, - padding, input_shape, filter_shape, k, Double.parseDouble(parts[18])); - } - else if (opcode.equalsIgnoreCase("bias_add") || opcode.equals("relu_backward") || opcode.equalsIgnoreCase("bias_multiply") ) { - InstructionUtils.checkNumFields(parts, 5); - CPOperand in = new CPOperand(parts[1]); - CPOperand in2 = new CPOperand(parts[2]); - CPOperand out = new CPOperand(parts[3]); - int k = Integer.parseInt(parts[4]); - return new ConvolutionCPInstruction(in, in2, out, opcode, str, k, Double.parseDouble(parts[5])); - } - else if (opcode.equalsIgnoreCase("channel_sums")) { - InstructionUtils.checkNumFields(parts, 4); - CPOperand in = new CPOperand(parts[1]); - CPOperand in2 = new CPOperand(parts[2]); - CPOperand in3 = new CPOperand(parts[3]); - CPOperand out = new CPOperand(parts[4]); - return new ConvolutionCPInstruction(in, in2, in3, out, opcode, str, -1, 0); - } - else if (opcode.equalsIgnoreCase("batch_norm2d")) { - InstructionUtils.checkNumFields(parts, 13); - CPOperand in1 = new CPOperand(parts[1]); // image - CPOperand in2 = new CPOperand(parts[2]); // scale - CPOperand in3 = new CPOperand(parts[3]); // bias - CPOperand in4 = new CPOperand(parts[4]); // runningMean - CPOperand in5 = new CPOperand(parts[5]); // runningVar - CPOperand in6 = new CPOperand(parts[6]); // mode - CPOperand in7 = new CPOperand(parts[7]); // epsilon - CPOperand in8 = new CPOperand(parts[8]); // exponentialAverageFactor - CPOperand out = new CPOperand(parts[9]); // ret - CPOperand out2 = new CPOperand(parts[10]); // retRunningMean - CPOperand out3 = new CPOperand(parts[11]); // retRunningVar - CPOperand out4 = new CPOperand(parts[12]); // resultSaveMean - CPOperand out5 = new CPOperand(parts[13]); // resultSaveInvVariance - return new ConvolutionCPInstruction(in1, in2, in3, in4, in5, in6, in7, in8, out, out2, out3, out4, out5, opcode, str, 0); - } - else if (opcode.equalsIgnoreCase("batch_norm2d_backward")) { - InstructionUtils.checkNumFields(parts, 9); - CPOperand in1 = new CPOperand(parts[1]); // image - CPOperand in2 = new CPOperand(parts[2]); // dout - CPOperand in3 = new CPOperand(parts[3]); // scale - CPOperand in4 = new CPOperand(parts[4]); // epsilon - CPOperand in5 = new CPOperand(parts[5]); // resultSaveMean - CPOperand in6 = new CPOperand(parts[6]); // resultSaveInvVariance - CPOperand out = new CPOperand(parts[7]); // dX - CPOperand out2 = new CPOperand(parts[8]); // dScale - CPOperand out3 = new CPOperand(parts[9]); // dBias - return new ConvolutionCPInstruction(in1, in2, in3, in4, in5, in6, null, null, out, out2, out3, null, null, opcode, str, 0); - } - else { - throw new DMLRuntimeException("Unknown opcode while parsing a ConvolutionCPInstruction: " + str); - } - } - - private static int getScalarInput(ExecutionContext ec, ArrayList<CPOperand> aL, int index) { - return (int) ec.getScalarInput(aL.get(index).getName(), - aL.get(index).getValueType(), aL.get(index).isLiteral()).getLongValue(); - } - - public void processReluBackwardInstruction(ExecutionContext ec) { - // (X > 0) * dout - MatrixBlock input = ec.getMatrixInput(input1.getName(), getExtendedOpcode()); - MatrixBlock dout = ec.getMatrixInput(_in2.getName(), getExtendedOpcode()); - MatrixBlock outputBlock = new MatrixBlock(input.getNumRows(), input.getNumColumns(), - input.isInSparseFormat() || dout.isInSparseFormat() ); - - if( !input.isEmpty() && !dout.isEmpty() ) { //sparse-safe - outputBlock.allocateBlock(); - LibMatrixDNN.reluBackward(input, dout, outputBlock, _numThreads); - } - - // release inputs/outputs - ec.releaseMatrixInput(input1.getName(), getExtendedOpcode()); - ec.releaseMatrixInput(_in2.getName(), getExtendedOpcode()); - ec.setMatrixOutput(getOutputVariableName(), outputBlock, getExtendedOpcode()); - } - - public void processBiasAddInstruction(ExecutionContext ec) { - MatrixBlock input = ec.getMatrixInput(input1.getName(), getExtendedOpcode()); - MatrixBlock bias = ec.getMatrixInput(_in2.getName(), getExtendedOpcode()); - MatrixBlock outputBlock = null; - - if(bias.getNumColumns() != 1) { - throw new DMLRuntimeException("Expected the number of columns of bias matrix to be 1, but found " + bias.getNumColumns()); - } - - if(input.isEmpty() && bias.isEmpty()) { - outputBlock = new MatrixBlock(input.getNumRows(), input.getNumColumns(), true); - } - else if(bias.isEmpty()) { - outputBlock = new MatrixBlock(input); - } - else { - // As we always fill the output first with bias - outputBlock = new MatrixBlock(input.getNumRows(), input.getNumColumns(), false); - outputBlock.allocateDenseBlock(); - LibMatrixDNN.biasAdd(input, bias, outputBlock, _numThreads); - } - - // release inputs/outputs - ec.releaseMatrixInput(input1.getName(), getExtendedOpcode()); - ec.releaseMatrixInput(_in2.getName(), getExtendedOpcode()); - ec.setMatrixOutput(getOutputVariableName(), outputBlock, getExtendedOpcode()); - } - - public void processBiasMultiplyInstruction(ExecutionContext ec) { - MatrixBlock input = ec.getMatrixInput(input1.getName(), getExtendedOpcode()); - MatrixBlock bias = ec.getMatrixInput(_in2.getName(), getExtendedOpcode()); - MatrixBlock outputBlock = null; - - if(bias.getNumColumns() != 1) { - throw new DMLRuntimeException("Expected the number of columns of bias matrix to be 1, but found " + bias.getNumColumns()); - } - - if(bias.isEmpty()) { - // Anything multiplied by zero is zero - outputBlock = new MatrixBlock(input.getNumRows(), input.getNumColumns(), true); - } - else { - // As we always fill the output first with bias - outputBlock = new MatrixBlock(input.getNumRows(), input.getNumColumns(), - input.isInSparseFormat()).allocateBlock(); - LibMatrixDNN.biasMultiply(input, bias, outputBlock, _numThreads); - } - - // release inputs/outputs - ec.releaseMatrixInput(input1.getName(), getExtendedOpcode()); - ec.releaseMatrixInput(_in2.getName(), getExtendedOpcode()); - ec.setMatrixOutput(getOutputVariableName(), outputBlock, getExtendedOpcode()); - } - - public void processChannelSumsInstruction(ExecutionContext ec) { - MatrixBlock input = ec.getMatrixInput(input1.getName(), getExtendedOpcode()); - int C = (int) ec.getScalarInput(_in2.getName(), _in2.getValueType(), _in2.isLiteral()).getLongValue(); - int HW = (int) ec.getScalarInput(_in3.getName(), _in3.getValueType(), _in3.isLiteral()).getLongValue(); - if(C*HW != input.getNumColumns()) { - throw new DMLRuntimeException("Expected rows*cols" + C + "*" + HW + " to be equal to number of columns of input " + input.getNumColumns()); - } - MatrixBlock outputBlock = null; - if(input.isEmpty()) { - outputBlock = new MatrixBlock(C, 1, true); - } - else { - outputBlock = new MatrixBlock(C, 1, false).allocateBlock(); - LibMatrixDNN.channelSums(input, outputBlock, C, HW); - } - - // release inputs/outputs - ec.releaseMatrixInput(input1.getName(), getExtendedOpcode()); - ec.setMatrixOutput(getOutputVariableName(), outputBlock, getExtendedOpcode()); - } - - - - public void processBatchNorm2dInstruction(ExecutionContext ec) { - MatrixBlock image = ec.getMatrixInput(input1.getName(), getExtendedOpcode()); - MatrixBlock scale = ec.getMatrixInput(_in2.getName(), getExtendedOpcode()); - MatrixBlock bias = ec.getMatrixInput(_in3.getName(), getExtendedOpcode()); - MatrixBlock runningMean = ec.getMatrixInput(_in4.getName(), getExtendedOpcode()); - MatrixBlock runningVar = ec.getMatrixInput(_in5.getName(), getExtendedOpcode()); - String phase = ec.getScalarInput(_in6.getName(), _in6.getValueType(), _in6.isLiteral()).getStringValue(); - double epsilon = ec.getScalarInput(_in7.getName(), _in7.getValueType(), _in7.isLiteral()).getDoubleValue(); - double mu = ec.getScalarInput(_in8.getName(), _in8.getValueType(), _in8.isLiteral()).getDoubleValue(); - - MatrixBlock ret = new MatrixBlock(image.getNumRows(), image.getNumColumns(), false).allocateBlock(); - MatrixBlock retRunningMean = new MatrixBlock(runningMean.getNumRows(), runningMean.getNumColumns(), false).allocateBlock(); - MatrixBlock retRunningVar = new MatrixBlock(runningVar.getNumRows(), runningVar.getNumColumns(), false).allocateBlock(); - MatrixBlock resultSaveMean = new MatrixBlock(runningMean.getNumRows(), runningMean.getNumColumns(), false).allocateBlock(); - MatrixBlock resultSaveInvVariance = new MatrixBlock(runningVar.getNumRows(), runningVar.getNumColumns(), false).allocateBlock(); - - LibMatrixDNN.batchNorm2D(image, scale, bias, runningMean, runningVar, phase, epsilon, mu, ret, - retRunningMean, retRunningVar, resultSaveMean, resultSaveInvVariance); - - // release inputs/outputs - ec.releaseMatrixInput(input1.getName(), getExtendedOpcode()); - ec.releaseMatrixInput(_in2.getName(), getExtendedOpcode()); - ec.releaseMatrixInput(_in3.getName(), getExtendedOpcode()); - ec.releaseMatrixInput(_in4.getName(), getExtendedOpcode()); - ec.releaseMatrixInput(_in5.getName(), getExtendedOpcode()); - ec.setMatrixOutput(output.getName(), ret, getExtendedOpcode()); - ec.setMatrixOutput(_out2.getName(), retRunningMean, getExtendedOpcode()); - ec.setMatrixOutput(_out3.getName(), retRunningVar, getExtendedOpcode()); - ec.setMatrixOutput(_out4.getName(), resultSaveMean, getExtendedOpcode()); - ec.setMatrixOutput(_out5.getName(), resultSaveInvVariance, getExtendedOpcode()); - } - - public void processBatchNorm2dBackwardInstruction(ExecutionContext ec) { - MatrixBlock image = ec.getMatrixInput(input1.getName(), getExtendedOpcode()); - MatrixBlock dout = ec.getMatrixInput(_in2.getName(), getExtendedOpcode()); - MatrixBlock scale = ec.getMatrixInput(_in3.getName(), getExtendedOpcode()); - double epsilon = ec.getScalarInput(_in4.getName(), _in4.getValueType(), _in4.isLiteral()).getDoubleValue(); - MatrixBlock resultSaveMean = ec.getMatrixInput(_in5.getName(), getExtendedOpcode()); - MatrixBlock resultSaveInvVariance = ec.getMatrixInput(_in6.getName(), getExtendedOpcode()); - - MatrixBlock dX = new MatrixBlock(image.getNumRows(), image.getNumColumns(), false).allocateBlock(); - MatrixBlock dScale = new MatrixBlock(scale.getNumRows(), scale.getNumColumns(), false).allocateBlock(); - MatrixBlock dBias = new MatrixBlock(scale.getNumRows(), scale.getNumColumns(), false).allocateBlock(); - - LibMatrixDNN.batchNorm2DBackward(image, dout, scale, epsilon, resultSaveMean, resultSaveInvVariance, dX, dScale, dBias); - - // release inputs/outputs - ec.releaseMatrixInput(input1.getName(), getExtendedOpcode()); - ec.releaseMatrixInput(_in2.getName(), getExtendedOpcode()); - ec.releaseMatrixInput(_in3.getName(), getExtendedOpcode()); - ec.releaseMatrixInput(_in5.getName(), getExtendedOpcode()); - ec.releaseMatrixInput(_in6.getName(), getExtendedOpcode()); - ec.setMatrixOutput(output.getName(), dX, getExtendedOpcode()); - ec.setMatrixOutput(_out2.getName(), dScale, getExtendedOpcode()); - ec.setMatrixOutput(_out3.getName(), dBias, getExtendedOpcode()); - } - - - // Assumption: enableNative && NativeHelper.isNativeLibraryLoaded() is true - // This increases the number of native calls. For example:the cases where filter is sparse but input is dense - private static boolean isFilterSparse(MatrixBlock filter) { - long numElems = filter.getNumRows()*filter.getNumColumns(); - // if filter is less than 10 MB in dense format (which handles almost all the cases). - // In fact, using threshold of 1 MB is still sufficient for common CNNs. - if(filter.isInSparseFormat() && numElems < 10e+6) - filter.sparseToDense(); - return filter.isInSparseFormat(); - } - - - @Override - public void processInstruction(ExecutionContext ec) { - - if (instOpcode.equalsIgnoreCase("bias_add")) { - processBiasAddInstruction(ec); - return; - } - else if (instOpcode.equalsIgnoreCase("bias_multiply")) { - processBiasMultiplyInstruction(ec); - return; - } - else if (instOpcode.equalsIgnoreCase("relu_backward")) { - processReluBackwardInstruction(ec); - return; - } - else if (instOpcode.equalsIgnoreCase("channel_sums")) { - processChannelSumsInstruction(ec); - return; - } - else if (instOpcode.equalsIgnoreCase("batch_norm2d")) { - processBatchNorm2dInstruction(ec); - return; - } - else if (instOpcode.equalsIgnoreCase("batch_norm2d_backward")) { - processBatchNorm2dBackwardInstruction(ec); - return; - } - - // acquire inputs - MatrixBlock outputBlock = null; - MatrixBlock matBlock = instOpcode.equalsIgnoreCase("avgpooling_backward") ? null : ec.getMatrixInput(input1.getName(), getExtendedOpcode()); - int pad_h = getScalarInput(ec, _padding, 0); - int pad_w = getScalarInput(ec, _padding, 1); - int stride_h = getScalarInput(ec, _stride, 0); - int stride_w = getScalarInput(ec, _stride, 1); - - int N = getScalarInput(ec, _input_shape, 0); - int C = getScalarInput(ec, _input_shape, 1); - int H = getScalarInput(ec, _input_shape, 2); - int W = getScalarInput(ec, _input_shape, 3); - - int K = getScalarInput(ec, _filter_shape, 0); - - int R = getScalarInput(ec, _filter_shape, 2); - int S = getScalarInput(ec, _filter_shape, 3); - int P = (int) ConvolutionUtils.getP(H, R, stride_h, pad_h); - int Q = (int) ConvolutionUtils.getQ(W, S, stride_w, pad_w); - - ConvolutionParameters params = new ConvolutionParameters(N, C, H, W, K, R, S, stride_h, stride_w, pad_h, pad_w, _numThreads); - params.enableNative = NativeHelper.isNativeLibraryLoaded(); - if (instOpcode.equalsIgnoreCase("maxpooling") || instOpcode.equalsIgnoreCase("relu_maxpooling") || - instOpcode.equalsIgnoreCase("avgpooling")) { - if(matBlock.isEmpty()) { - outputBlock = new MatrixBlock(N, C*P*Q, true); - } - else { - outputBlock = new MatrixBlock(N, C*P*Q, false).allocateBlock(); - - PoolingType poolType = (instOpcode.equalsIgnoreCase("maxpooling") || instOpcode.equalsIgnoreCase("relu_maxpooling")) ? PoolingType.MAX : PoolingType.AVG; - if(instOpcode.equalsIgnoreCase("relu_maxpooling")) - params.minValForMaxPoolOperations = 0; - LibMatrixDNN.pooling(matBlock, outputBlock, params, poolType); - } - } - else if (instOpcode.equalsIgnoreCase("maxpooling_backward") || instOpcode.equalsIgnoreCase("relu_maxpooling_backward") || - instOpcode.equalsIgnoreCase("avgpooling_backward")) { - MatrixBlock dout = ec.getMatrixInput(_in2.getName(), getExtendedOpcode()); - boolean isEmpty = instOpcode.equalsIgnoreCase("avgpooling_backward") ? dout.isEmpty() : (matBlock.isEmpty() || dout.isEmpty()); - if(isEmpty) { - outputBlock = new MatrixBlock(N, C*H*W, true); - } - else { - outputBlock = new MatrixBlock(N, C*H*W, false).allocateBlock(); - PoolingType poolType = (instOpcode.equalsIgnoreCase("maxpooling_backward") || instOpcode.equalsIgnoreCase("relu_maxpooling_backward")) ? PoolingType.MAX : PoolingType.AVG; - boolean performReLUBackward = instOpcode.equalsIgnoreCase("relu_maxpooling_backward"); - if(performReLUBackward) - params.minValForMaxPoolOperations = 0; - LibMatrixDNN.poolingBackward(matBlock, dout, outputBlock, params, performReLUBackward, poolType); - } - ec.releaseMatrixInput(_in2.getName(), getExtendedOpcode()); - } - else if (instOpcode.equalsIgnoreCase("conv2d")) { - resetNumThreads(params, C*R*S, P*Q, matBlock.getNonZeros() / (matBlock.getNumRows()*matBlock.getNumColumns())); - MatrixBlock filter = ec.getMatrixInput(_in2.getName(), getExtendedOpcode()); - if(filter.isEmpty() || matBlock.isEmpty()) { - outputBlock = new MatrixBlock(N, K*P*Q, true); - } - else { - boolean sparse = matBlock.isUltraSparse(false) && params.bias == null - && matBlock.getInMemorySize() < MatrixBlock.estimateSizeDenseInMemory(N, K*P*Q); - outputBlock = new MatrixBlock(N, K*P*Q, sparse).allocateBlock(); - if(params.enableNative && !isFilterSparse(filter) && !matBlock.isInSparseFormat()) - LibMatrixNative.conv2d(matBlock, filter, outputBlock, params); - else - LibMatrixDNN.conv2d(matBlock, filter, outputBlock, params); - } - ec.releaseMatrixInput(_in2.getName(), getExtendedOpcode()); - } - else if (instOpcode.equalsIgnoreCase("conv2d_bias_add")) { - resetNumThreads(params, C*R*S, P*Q, matBlock.getNonZeros() / (matBlock.getNumRows()*matBlock.getNumColumns())); - MatrixBlock filter = ec.getMatrixInput(_in3.getName(), getExtendedOpcode()); - MatrixBlock bias = ec.getMatrixInput(_in2.getName(), getExtendedOpcode()); - if(bias.getNumRows() != params.K || bias.getNumColumns() != 1) { - throw new DMLRuntimeException("Incorrect shape of bias matrix: [" + bias.getNumRows() + " " + bias.getNumColumns() + "]. " - + "Expected: [" + params.K + ", 1]"); - } - boolean isOutputConvEmpty = filter.isEmpty() || matBlock.isEmpty(); - if(isOutputConvEmpty && bias.isEmpty()) { - // bias_add(empty mb, empty mb) = empty mb - outputBlock = new MatrixBlock(N, K*P*Q, true); - } - else if(isOutputConvEmpty && !bias.isEmpty()) { - // Add bias to empty output block - // bias_add(empty mb, bias) - outputBlock = new MatrixBlock(N, K*P*Q, false).allocateBlock(); - for(int n = 0; n < params.N; n++) - ConvolutionUtils.fillBias(bias, outputBlock.getDenseBlockValues(), - n, n+1, params.N, params.K, params.P*params.Q); - } - else { - outputBlock = new MatrixBlock(N, K*P*Q, false).allocateBlock(); - if(!bias.isEmpty()) { - // Handle situation where both input and filter are non empty, but bias is empty - params.bias = bias; - } - if(params.enableNative && !isFilterSparse(filter) && !matBlock.isInSparseFormat()) - LibMatrixNative.conv2d(matBlock, filter, outputBlock, params); - else - LibMatrixDNN.conv2d(matBlock, filter, outputBlock, params); - } - ec.releaseMatrixInput(_in3.getName(), getExtendedOpcode()); - ec.releaseMatrixInput(_in2.getName(), getExtendedOpcode()); - } - else if (instOpcode.equalsIgnoreCase("conv2d_backward_filter")) { - MatrixBlock dout = ec.getMatrixInput(_in2.getName(), getExtendedOpcode()); - if(dout.isEmpty() || matBlock.isEmpty()) { - outputBlock = new MatrixBlock(K, C*R*S, true); - } - else { - outputBlock = new MatrixBlock(K, C*R*S, false).allocateBlock(); - if(params.enableNative && !matBlock.isInSparseFormat() && !dout.isInSparseFormat()) - LibMatrixNative.conv2dBackwardFilter(matBlock, dout, outputBlock, params); - else - LibMatrixDNN.conv2dBackwardFilter(matBlock, dout, outputBlock, params); - } - ec.releaseMatrixInput(_in2.getName(), getExtendedOpcode()); - } - else if (instOpcode.equalsIgnoreCase("conv2d_backward_data")) { - MatrixBlock dout = ec.getMatrixInput(_in2.getName(), getExtendedOpcode()); - if(dout.isEmpty() || matBlock.isEmpty()) { - outputBlock = new MatrixBlock(N, C * H * W, true); - } - else { - outputBlock = new MatrixBlock(N, C * H * W, false).allocateBlock(); - if(params.enableNative && !isFilterSparse(matBlock) && !dout.isInSparseFormat()) - LibMatrixNative.conv2dBackwardData(matBlock, dout, outputBlock, params); - else - LibMatrixDNN.conv2dBackwardData(matBlock, dout, outputBlock, params); - } - ec.releaseMatrixInput(_in2.getName(), getExtendedOpcode()); - } - else { - throw new DMLRuntimeException("Unsupported op code " + instOpcode); - } - - // release inputs/outputs - if(!instOpcode.equalsIgnoreCase("avgpooling_backward")) - ec.releaseMatrixInput(input1.getName(), getExtendedOpcode()); - ec.setMatrixOutput(getOutputVariableName(), outputBlock, getExtendedOpcode()); - } - - /** - * Reset the number of thread to respect the intermediate CP memory budget - * - * @param params convolution parameters - * @param numRows number of rows of intermediate matrix used per thread - * @param numCols number of rows of intermediate matrix used per thread - * @param sparsity sparsity of intermediate matrix used per thread - */ - private void resetNumThreads(ConvolutionParameters params, int numRows, int numCols, double sparsity) { - if(DMLScript.USE_ACCELERATOR) { - double memBudget1Thread = OptimizerUtils.estimateSizeExactSparsity(numRows, numCols, sparsity); - int limitedDegreeOfParallelism = (int) Math.floor(_intermediateMemoryBudget / memBudget1Thread); - if(params.numThreads > limitedDegreeOfParallelism) { - params.numThreads = limitedDegreeOfParallelism; - if(!warnedUnderUtilitization) - LOG.warn("CPU Under-utilization to respect the intermediate memory budget. To avoid this, please try reducing the mini-batch or forcing gpu execution."); - warnedUnderUtilitization = true; - } - } - } -}
