[MINOR] Refactoring rename Convolution to Dnn hop/lop/instructions
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/9fa5a09b Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/9fa5a09b Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/9fa5a09b Branch: refs/heads/master Commit: 9fa5a09b4dc45fba79f736f5db17fef46d308ddf Parents: 23df148 Author: Matthias Boehm <[email protected]> Authored: Sat Jun 16 12:06:54 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sat Jun 16 12:55:34 2018 -0700 ---------------------------------------------------------------------- .../java/org/apache/sysml/hops/AggUnaryOp.java | 6 +- .../java/org/apache/sysml/hops/BinaryOp.java | 6 +- .../org/apache/sysml/hops/ConvolutionOp.java | 928 ------------------- src/main/java/org/apache/sysml/hops/DnnOp.java | 925 ++++++++++++++++++ src/main/java/org/apache/sysml/hops/Hop.java | 24 +- .../sysml/hops/rewrite/HopRewriteUtils.java | 11 + .../apache/sysml/lops/ConvolutionTransform.java | 274 ------ .../org/apache/sysml/lops/DnnTransform.java | 274 ++++++ .../sysml/parser/BuiltinFunctionExpression.java | 18 +- .../org/apache/sysml/parser/DMLTranslator.java | 20 +- .../instructions/CPInstructionParser.java | 38 +- .../instructions/GPUInstructionParser.java | 38 +- .../instructions/SPInstructionParser.java | 14 +- .../runtime/instructions/cp/CPInstruction.java | 2 +- .../cp/ConvolutionCPInstruction.java | 645 ------------- .../instructions/cp/DnnCPInstruction.java | 645 +++++++++++++ .../gpu/ConvolutionGPUInstruction.java | 750 --------------- .../instructions/gpu/DnnGPUInstruction.java | 750 +++++++++++++++ .../instructions/gpu/GPUInstruction.java | 2 +- .../spark/ConvolutionSPInstruction.java | 402 -------- .../instructions/spark/DnnSPInstruction.java | 402 ++++++++ .../instructions/spark/SPInstruction.java | 2 +- .../matrix/data/ConvolutionParameters.java | 162 ---- .../runtime/matrix/data/DnnParameters.java | 160 ++++ .../sysml/runtime/matrix/data/LibMatrixDNN.java | 28 +- .../runtime/matrix/data/LibMatrixDNNConv2d.java | 46 +- .../runtime/matrix/data/LibMatrixDNNHelper.java | 2 +- .../runtime/matrix/data/LibMatrixDNNIm2Col.java | 6 +- .../matrix/data/LibMatrixDNNPooling.java | 38 +- .../runtime/matrix/data/LibMatrixDNNRelu.java | 6 +- .../matrix/data/LibMatrixDNNRotate180.java | 10 +- .../runtime/matrix/data/LibMatrixNative.java | 8 +- .../sysml/runtime/util/ConvolutionUtils.java | 99 -- .../org/apache/sysml/runtime/util/DnnUtils.java | 99 ++ .../org/apache/sysml/utils/NativeHelper.java | 6 +- .../org/apache/sysml/api/dl/CaffeLayer.scala | 10 +- .../sysml/test/gpu/NeuralNetworkOpTests.java | 30 +- .../tensor/Conv2DBackwardDataTest.java | 4 +- .../functions/tensor/Conv2DBackwardTest.java | 4 +- .../functions/tensor/PoolBackwardTest.java | 4 +- 40 files changed, 3452 insertions(+), 3446 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/AggUnaryOp.java b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java index 116ed91..1c39787 100644 --- a/src/main/java/org/apache/sysml/hops/AggUnaryOp.java +++ b/src/main/java/org/apache/sysml/hops/AggUnaryOp.java @@ -25,7 +25,7 @@ import org.apache.sysml.hops.rewrite.HopRewriteUtils; import org.apache.sysml.lops.Aggregate; import org.apache.sysml.lops.Aggregate.OperationTypes; import org.apache.sysml.lops.Binary; -import org.apache.sysml.lops.ConvolutionTransform; +import org.apache.sysml.lops.DnnTransform; import org.apache.sysml.lops.Group; import org.apache.sysml.lops.Lop; import org.apache.sysml.lops.PartialAggregate; @@ -146,11 +146,11 @@ public class AggUnaryOp extends MultiThreadedHop // Apply channel sums only if rewrite is applicable and if the dimension of C is known at compile time // and if numChannels is less than 8 MB. ReorgOp in = ((ReorgOp)getInput().get(0)); - agg1 = new ConvolutionTransform( + agg1 = new DnnTransform( in.getInput().get(0).getInput().get(0).constructLops(), in.getInput().get(1).constructLops(), in.getInput().get(2).constructLops(), - ConvolutionTransform.OperationTypes.CHANNEL_SUMS, getDataType(), getValueType(), et, -1); + DnnTransform.OperationTypes.CHANNEL_SUMS, getDataType(), getValueType(), et, -1); agg1.getOutputParameters().setDimensions(numChannels, 1, getRowsInBlock(), getColsInBlock(), -1); setLineNumbers(agg1); setLops(agg1); http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/hops/BinaryOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/BinaryOp.java b/src/main/java/org/apache/sysml/hops/BinaryOp.java index 0c3f96c..3624db8 100644 --- a/src/main/java/org/apache/sysml/hops/BinaryOp.java +++ b/src/main/java/org/apache/sysml/hops/BinaryOp.java @@ -37,7 +37,7 @@ import org.apache.sysml.lops.CoVariance; import org.apache.sysml.lops.CombineBinary; import org.apache.sysml.lops.CombineBinary.OperationTypes; import org.apache.sysml.lops.CombineUnary; -import org.apache.sysml.lops.ConvolutionTransform; +import org.apache.sysml.lops.DnnTransform; import org.apache.sysml.lops.Data; import org.apache.sysml.lops.DataPartition; import org.apache.sysml.lops.Group; @@ -653,8 +653,8 @@ public class BinaryOp extends MultiThreadedHop if(op == OpOp2.MULT && isLeftXGt0 && !getInput().get(0).isVector() && !getInput().get(1).isVector() && getInput().get(0).dimsKnown() && getInput().get(1).dimsKnown()) { - binary = new ConvolutionTransform(getInput().get(0).getInput().get(0).constructLops(), - getInput().get(1).constructLops(), ConvolutionTransform.OperationTypes.RELU_BACKWARD, + binary = new DnnTransform(getInput().get(0).getInput().get(0).constructLops(), + getInput().get(1).constructLops(), DnnTransform.OperationTypes.RELU_BACKWARD, getDataType(), getValueType(), et, OptimizerUtils.getConstrainedNumThreads(_maxNumThreads)); } else http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java b/src/main/java/org/apache/sysml/hops/ConvolutionOp.java deleted file mode 100644 index 20ab041..0000000 --- a/src/main/java/org/apache/sysml/hops/ConvolutionOp.java +++ /dev/null @@ -1,928 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.hops; - -import org.apache.sysml.api.DMLScript; -import org.apache.sysml.hops.rewrite.HopRewriteUtils; -import org.apache.sysml.lops.ConvolutionTransform; -import org.apache.sysml.lops.ConvolutionTransform.OperationTypes; -import org.apache.sysml.lops.Lop; -import org.apache.sysml.lops.LopProperties.ExecType; -import org.apache.sysml.parser.Expression.DataType; -import org.apache.sysml.parser.Expression.ValueType; -import org.apache.sysml.runtime.DMLRuntimeException; -import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool; -import org.apache.sysml.runtime.matrix.MatrixCharacteristics; -import org.apache.sysml.runtime.matrix.data.ConvolutionParameters; -import java.util.ArrayList; - -public class ConvolutionOp extends MultiThreadedHop -{ - // ------------------------------------------------------------------------- - // This flag allows us to compile plans with less unknowns and also serves as future tensorblock integration. - // By default, these flags are turned on. - - // When this flag is turned on, we attempt to check the parent convolution hop for unknown dimensions. - // For example: in case of conv -> maxpool, the input channel/height/width of maxpool will match output channel/height/width of conv. - private static final boolean INFER_TENSOR_SHAPE_FROM_PARENT_CONV_OP = true; - // This guards us from cases where the user provides incorrect C,H,W parameters. - private static final boolean THROW_ERROR_IF_INFERRED_SHAPE_MISMATCH = true; - // ------------------------------------------------------------------------- - - // Specifies the type of this hop - private Hop.ConvOp op; - - private ConvolutionOp() { - //default constructor for clone - } - - /** - * Create a hop from the builtin expression - * - * @param l name of the hop - * @param dt datatype (only supports matrix datatype) - * @param vt valuetype (only supports matrix valuetype) - * @param o type of this hop - * @param inp input hops - */ - public ConvolutionOp(String l, DataType dt, ValueType vt, ConvOp o, ArrayList<Hop> inp) - { - super(l, dt, vt); - op = o; - - for( int i=0; i<inp.size(); i++ ) { - Hop in = inp.get(i); - getInput().add(i, in); - in.getParent().add(this); - } - - //compute unknown dims and nnz - refreshSizeInformation(); - } - - @Override - public void checkArity() { - HopsException.check(_input.size() >= 1, this, "should have at least one input but has %d inputs", _input.size()); - } - - public ConvOp getOp() { - return op; - } - - @Override - public String getOpString() { - return "" + HopsConv2Lops.get(op); - } - - private static boolean isEligibleForSpark() { - return false; - } - - @Override - public boolean isGPUEnabled() { - if(!DMLScript.USE_ACCELERATOR) - return false; - return true; - } - - @Override - public Lop constructLops() - { - //return already created lops - if( getLops() != null ) - return getLops(); - - ExecType et = optFindExecType(); - - ArrayList<Hop> inputs = getInput(); - switch( op ) - { - case MAX_POOL: - case MAX_POOL_BACKWARD: - case AVG_POOL: - case AVG_POOL_BACKWARD: - case CONV2D: - case CONV2D_BACKWARD_DATA: - case CONV2D_BACKWARD_FILTER: - case BIAS_ADD: - case BIAS_MULTIPLY: - { - if(et == ExecType.CP || et == ExecType.GPU) { - setLops(constructConvolutionLops(et, inputs)); - break; - } - else { - throw new HopsException("Unimplemented ConvolutionOp for execution type: " + et.name()); - } - // break; - } - default: - throw new HopsException("Unsupported lops construction for operation type '"+op+"'."); - } - - //add reblock/checkpoint lops if necessary - constructAndSetLopsDataFlowProperties(); - - return getLops(); - } - - public void setOp(ConvOp op) { - this.op = op; - } - - private int getNumExpectedInputs() { - switch(op) { - case MAX_POOL_BACKWARD: - case AVG_POOL_BACKWARD: - case CONV2D: - case CONV2D_BACKWARD_FILTER: - case CONV2D_BACKWARD_DATA: - return 14; - case BIAS_ADD: - case BIAS_MULTIPLY: - return 2; - default: - return 13; - } - } - - /** - * Returns parent matrix X or null - * @param input input hop - * @return either null or X if input is max(X,0) or max(0,X) - */ - private static Hop isInputReLU(Hop input) { - if(HopRewriteUtils.isBinary(input, OpOp2.MAX)) { - if(HopRewriteUtils.isLiteralOfValue(input.getInput().get(0), 0)) { - return input.getInput().get(1); - } - else if(HopRewriteUtils.isLiteralOfValue(input.getInput().get(1), 0)) { - return input.getInput().get(0); - } - else - return null; - } - else - return null; - } - - private static boolean isInputConv2d(Hop input) { - return input instanceof ConvolutionOp && ((ConvolutionOp) input).getOp() == ConvOp.CONV2D; - } - - /** - * Compares the input parameters for max_pool/max_pool_backward operations - * - * @return true if the following parameters match: stride=[stride, stride], padding=[pad, pad], input_shape=[numImg, numChannels, imgSize, imgSize], pool_size=[poolSize1, poolSize2] - */ - private static boolean isPoolingParametersEqualAndKnown(ConvolutionParameters param1, ConvolutionParameters param2) { - return isEqualAndKnown(param1.stride_h, param2.stride_h) && isEqualAndKnown(param1.stride_w, param2.stride_w) && - isEqualAndKnown(param1.pad_h, param2.pad_h) && isEqualAndKnown(param1.pad_w, param2.pad_w) && - isEqualAndKnown(param1.R, param2.R) && isEqualAndKnown(param1.S, param2.S) && - isEqualAndKnown(param1.N, param2.N) && isEqualAndKnown(param1.C, param2.C) && - isEqualAndKnown(param1.H, param2.H) && isEqualAndKnown(param1.W, param2.W); - } - - private static boolean isEqualAndKnown(int val1, int val2) { - return val1 >= 0 && val2 >= 0 && val1 == val2; - } - - /** - * Returns the output lop of max_pool/avg_pool operation with same parameters as this hop. - * If corresponding output lop is not found or if this is not a max_pool_backward operation, this function returns null - * - * @return output lop of max_pool/avg_pool operation with same parameters as this hop - */ - private Lop getMaxPoolOutputLop() { - if(op == ConvOp.MAX_POOL_BACKWARD || op == ConvOp.AVG_POOL_BACKWARD) { - ConvOp opType = (op == ConvOp.MAX_POOL_BACKWARD) ? ConvOp.MAX_POOL : ConvOp.AVG_POOL; - Hop inputImage = getInput().get(0); - for(Hop tmpParent : inputImage.getParent()) { - if(!(tmpParent instanceof ConvolutionOp)) - continue; - ConvolutionOp parent = (ConvolutionOp) tmpParent; - if(parent.getOp() == opType && isPoolingParametersEqualAndKnown(parent._cachedParams, _cachedParams)) { - return parent.constructLops(); - } - } - } - return null; - } - - public Lop constructConvolutionLops(ExecType et, ArrayList<Hop> inputs) { - if(inputs.size() != getNumExpectedInputs()) - throw new HopsException("Incorrect number of inputs for " + op.name()); - - // --------------------------------------------------------------- - // Deal with fused operators and contruct lhsInputLop/optionalRhsInputLop - Lop lhsInputLop = null; Lop optionalRhsInputLop = null; - ArrayList<Hop> inputsOfPotentiallyFusedOp = inputs; - OperationTypes lopOp = HopsConv2Lops.get(op); - - // RELU_MAX_POOLING and RELU_MAX_POOLING_BACKWARD is extremely useful for CP backend - // by reducing unnecessary sparse-to-dense-to-sparse conversion. - // For other backends, this operators is not necessary as it reduces an additional relu operator. - Hop parentReLU = isInputReLU(inputs.get(0)); - if(OptimizerUtils.ALLOW_OPERATOR_FUSION && et == ExecType.CP && op == ConvOp.MAX_POOL && parentReLU != null) { - lhsInputLop = parentReLU.constructLops(); - lopOp = OperationTypes.RELU_MAX_POOLING; - } - else if(OptimizerUtils.ALLOW_OPERATOR_FUSION && et == ExecType.CP && op == ConvOp.MAX_POOL_BACKWARD && parentReLU != null) { - lhsInputLop = parentReLU.constructLops(); - lopOp = OperationTypes.RELU_MAX_POOLING_BACKWARD; - } - else if(OptimizerUtils.ALLOW_OPERATOR_FUSION && op == ConvOp.BIAS_ADD && isInputConv2d(inputs.get(0))) { - lopOp = OperationTypes.CONV2D_BIAS_ADD; - - // the first lop is image - lhsInputLop = inputs.get(0).getInput().get(0).constructLops(); - // the second lop is bias - optionalRhsInputLop = inputs.get(1).constructLops(); - - // Use the inputs from conv2d rather than bias_add - inputsOfPotentiallyFusedOp = inputs.get(0).getInput(); - } - else { - lhsInputLop = inputs.get(0).constructLops(); - } - // --------------------------------------------------------------- - - // --------------------------------------------------------------- - // Compute intermediate memory budget that can be passed to GPU operators - // for better CuDNN operator selection at runtime - double intermediateMemEstimate = computeIntermediateMemEstimate(-1, -1, -1 ); - if(et == ExecType.GPU && _dim1 >= 0 && _dim2 >= 0) { - // This enables us to compile more efficient matrix-matrix CuDNN operation instead of - // row-by-row invocation of multiple vector-matrix CuDNN operations. - // This is possible as the operations on GPU are single-threaded - double optimisticIntermediateMemEstimate = GPUContextPool.initialGPUMemBudget() - getOutputMemEstimate() - inputs.get(0).getOutputMemEstimate(); - if(optionalRhsInputLop != null) { - optimisticIntermediateMemEstimate -= inputs.get(1).getOutputMemEstimate(); - } - intermediateMemEstimate = Math.max(intermediateMemEstimate, optimisticIntermediateMemEstimate); - } - // --------------------------------------------------------------- - - // Construct the lop - Lop optionalMaxPoolOutput = (et == ExecType.GPU) ? getMaxPoolOutputLop() : null; - Lop[] l2inputs = new Lop[inputsOfPotentiallyFusedOp.size()-1]; - for( int i=1; i < inputsOfPotentiallyFusedOp.size(); i++ ) - l2inputs[i-1] = inputsOfPotentiallyFusedOp.get(i).constructLops(); - ConvolutionTransform convolutionLop = new ConvolutionTransform( - lhsInputLop, lopOp, getDataType(), getValueType(), et, - OptimizerUtils.getConstrainedNumThreads(_maxNumThreads), intermediateMemEstimate); - setOutputDimensions(convolutionLop); - setLineNumbers(convolutionLop); - - // --------------------------------------------------------------- - // Add input/output for parent lops of convolutionLop - lhsInputLop.addOutput(convolutionLop); - if(optionalRhsInputLop != null) { - convolutionLop.addInput(optionalRhsInputLop); - optionalRhsInputLop.addOutput(convolutionLop); - } - for( int i=0; i < l2inputs.length; i++ ) { - convolutionLop.addInput(l2inputs[i]); - l2inputs[i].addOutput(convolutionLop); - } - // Only valid for MAX_POOLING_BACKWARD on GPU - if(optionalMaxPoolOutput != null) { - convolutionLop.addInput(optionalMaxPoolOutput); - optionalMaxPoolOutput.addOutput(convolutionLop); - } - convolutionLop.updateLopProperties(); - - // TODO double check that optionalMaxPoolOutput adheres to proper - // ID ordering of constructed lops (previously hidden by setLevel) - - // --------------------------------------------------------------- - - return convolutionLop; - } - - - @Override - protected double computeOutputMemEstimate( long dim1, long dim2, long nnz ) - { - if(getOp() == ConvOp.BIAS_MULTIPLY) { - // in non-gpu mode, the worst case size of bias multiply operation is same as that of input. - if(DMLScript.USE_ACCELERATOR) - return OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, 1.0); - else - return OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, getInput().get(0).getSparsity()); - } - else { - double sparsity = 1.0; - return OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, sparsity); - } - } - - // --------------------------------------------------------------- - // Utility methods to guard the computation of memory estimates in presense of unknowns - private static class IntermediateDimensions { - int dim1; int dim2; double sp; - public IntermediateDimensions(ConvolutionOp h, String dim1Str, String dim2Str, double sp) { - dim1 = (int) h.getDim(dim1Str); - dim2 = (int) h.getDim(dim2Str); - this.sp = sp; - } - public IntermediateDimensions(ConvolutionOp h, String dim1Str, String dim2Str) { - dim1 = (int) h.getDim(dim1Str); - dim2 = (int) h.getDim(dim2Str); - sp = 1; - } - public IntermediateDimensions(ConvolutionOp h, int dim1, String dim2Str) { - this.dim1 = dim1; - dim2 = (int) h.getDim(dim2Str); - sp = 1; - } - - /** - * Add two computed memory estimates - * - * @param val1 memory estimate 1 - * @param val2 memory estimate 2 - * @return sum of memory estimates - */ - static double guardedAdd(double val1, double val2) { - if(val1 < 0 || val2 < 0) return OptimizerUtils.DEFAULT_SIZE; - double ret = val1 + val2; - if(ret >= OptimizerUtils.DEFAULT_SIZE) return OptimizerUtils.DEFAULT_SIZE; - else return ret; - } - - /** - * Compute memory estimates for given intermediate matrices - * - * @param intermediates list of intermediates - * @param numWorkers number of workers - * @return memory estimate - */ - public static double addEstimateSizes(ArrayList<IntermediateDimensions> intermediates, int numWorkers) { - double memBudget = 0; - for(int i = 0; i < intermediates.size(); i++) { - memBudget = guardedAdd(memBudget, OptimizerUtils.estimateSizeExactSparsity( - intermediates.get(i).dim1, intermediates.get(i).dim2, intermediates.get(i).sp)*numWorkers); - } - return memBudget; - } - - /** - * Compute max of two computed memory estimates - * @param val1 memory estimate 1 - * @param val2 memory estimate 2 - * @return max of memory estimates - */ - public static double guardedMax(double val1, double val2) { - if(val1 < 0 || val2 < 0) return OptimizerUtils.DEFAULT_SIZE; - double ret = Math.max(val1, val2); - if(ret >= OptimizerUtils.DEFAULT_SIZE) return OptimizerUtils.DEFAULT_SIZE; - else return ret; - } - } - - /** - * Helper utility to compute intermediate memory estimate - * - * @param gpuIntermediates intermediates for GPU - * @param cpIntermediates intermediates for CP - * @return memory estimates - */ - private double computeIntermediateMemEstimateHelper( - ArrayList<IntermediateDimensions> gpuIntermediates, - ArrayList<IntermediateDimensions> cpIntermediates) { - // Since CP operators use row-level parallelism by default - int numWorkers = (int) Math.min(OptimizerUtils.getConstrainedNumThreads(_maxNumThreads), Math.max(getDim("N"), 1)); - if(DMLScript.USE_ACCELERATOR) { - // Account for potential sparse-to-dense conversion - double gpuMemBudget = IntermediateDimensions.addEstimateSizes(gpuIntermediates, 1); - double cpMemoryBudget = IntermediateDimensions.addEstimateSizes(cpIntermediates, numWorkers); - if(cpMemoryBudget > gpuMemBudget) { - double oneThreadCPMemBudget = IntermediateDimensions.addEstimateSizes(cpIntermediates, 1); - if(oneThreadCPMemBudget <= gpuMemBudget) { - // Why limit CPU ? in-order to give more opportunity to compile GPU operators - cpMemoryBudget = oneThreadCPMemBudget; - } - } - // Finally, use the maximum of CP and GPU memory budget - return IntermediateDimensions.guardedMax(cpMemoryBudget, gpuMemBudget); - } - else { - // When -gpu flag is not provided, the memory estimates for CP are not affected. - return IntermediateDimensions.addEstimateSizes(cpIntermediates, numWorkers); - } - } - - @Override - protected double computeIntermediateMemEstimate( long ignoreDim1, long ignoreDim2, long ignoreNnz ) - { - ArrayList<IntermediateDimensions> gpuIntermediates = new ArrayList<>(); - ArrayList<IntermediateDimensions> cpIntermediates = new ArrayList<>(); - if(getOp() == ConvOp.CONV2D) { - // Assumption: To compile a GPU conv2d operator, following should fit on the GPU: - // 1. output in dense format (i.e. computeOutputMemEstimate) - // 2. input in any format - // 3. atleast one input row in dense format - // 4. filter in dense format - - // Account for potential sparse-to-dense conversion of atleast 1 input row and filter - gpuIntermediates.add(new IntermediateDimensions(this, 1, "CHW")); - gpuIntermediates.add(new IntermediateDimensions(this, "K", "CRS")); - - // im2col operation preserves the worst-case sparsity of the input. - cpIntermediates.add(new IntermediateDimensions(this, "CRS", "PQ", getInput().get(0).getSparsity())); - } - else if(getOp() == ConvOp.CONV2D_BACKWARD_DATA) { - // Assumption: To compile a GPU conv2d_backward_data operator, following should fit on the GPU: - // 1. output in dense format (i.e. computeOutputMemEstimate) - // 2. dout in any format - // 3. atleast one dout row in dense format - // 4. filter in dense format - - // Account for potential sparse-to-dense conversion of atleast 1 input row and filter - gpuIntermediates.add(new IntermediateDimensions(this, 1, "KPQ")); - gpuIntermediates.add(new IntermediateDimensions(this, "K", "CRS")); - - // There are 2 intermediates: rotate180 and input to col2im for conv2d_backward_data - // rotate180 preserves the "exact" sparsity of the dout matrix - cpIntermediates.add(new IntermediateDimensions(this, "PQ", "K", getInput().get(1).getSparsity())); - // Note: worst-case sparsity for the input of col2im (of size NPQ x CRS where N is determined by degree of parallelism) - cpIntermediates.add(new IntermediateDimensions(this, "PQ", "CRS")); - } - else if(getOp() == ConvOp.CONV2D_BACKWARD_FILTER) { - // Assumption: To compile a GPU conv2d_backward_filter operator, following should fit on the GPU: - // 1. output in dense format (i.e. computeOutputMemEstimate) - // 2. dout in any format - // 3. atleast one dout and input row in dense format - - // Account for potential sparse-to-dense conversion of atleast 1 input + dout row - gpuIntermediates.add(new IntermediateDimensions(this, 1, "CHW")); - gpuIntermediates.add(new IntermediateDimensions(this, 1, "KPQ")); - - // There are 2 intermediates: im2col and rotate180 for conv2d_backward_filter - // rotate180 preserves the "exact" sparsity of the dout matrix - cpIntermediates.add(new IntermediateDimensions(this, "PQ", "K", getInput().get(1).getSparsity())); - // im2col operation preserves the worst-case sparsity of the input. - cpIntermediates.add(new IntermediateDimensions(this, "CRS", "PQ", getInput().get(0).getSparsity())); - } - else if(getOp() == ConvOp.MAX_POOL || getOp() == ConvOp.AVG_POOL) { - // Account for potential sparse-to-dense conversion of atleast 1 input row - gpuIntermediates.add(new IntermediateDimensions(this, 1, "CHW")); - } - else if(getOp() == ConvOp.MAX_POOL_BACKWARD || getOp() == ConvOp.AVG_POOL_BACKWARD) { - // Account for potential sparse-to-dense conversion of atleast 1 input + dout row - gpuIntermediates.add(new IntermediateDimensions(this, 1, "CHW")); - gpuIntermediates.add(new IntermediateDimensions(this, 1, "CPQ")); - } - - if(gpuIntermediates.size() > 0 || cpIntermediates.size() > 0) - return computeIntermediateMemEstimateHelper(gpuIntermediates, cpIntermediates); - else - return 0; - } - - - @Override - protected long[] inferOutputCharacteristics( MemoTable memo ) - { - // [numRows, numCols, NNZ] - long[] ret = new long[3]; - - if(op == ConvOp.BIAS_ADD || op == ConvOp.BIAS_MULTIPLY) { - MatrixCharacteristics[] mc = memo.getAllInputStats(getInput()); - ret[0] = mc[0].rowsKnown() ? mc[0].getRows() : -1; - ret[1] = mc[0].colsKnown() ? mc[0].getCols() : -1; - ret[2] = -1; - return (ret[0]>=0 && ret[1]>=0) ? ret : null; - } - - refreshSizeInformation(); - ret[0] = _dim1; ret[1] = _dim2; ret[2] = _nnz; - - //safe return (create entry only if at least dims known) - return (ret[0]>0 && ret[1]>0) ? ret : null; - } - - - @Override - public boolean allowsAllExecTypes() - { - return true; - } - - @Override - protected ExecType optFindExecType() { - - checkAndSetForcedPlatform(); - - ExecType REMOTE = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR; - - if( _etypeForced != null ) { - _etype = _etypeForced; - } - else { - if ( OptimizerUtils.isMemoryBasedOptLevel() ) { - _etype = findExecTypeByMemEstimate(); - } - else { - _etype = REMOTE; - } - - //check for valid CP dimensions and matrix size - checkAndSetInvalidCPDimsAndSize(); - } - - // TODO: Fix this after adding remaining spark instructions - _etype = !isEligibleForSpark() && _etype == REMOTE ? ExecType.CP : _etype; - - //mark for recompile (forever) - setRequiresRecompileIfNecessary(); - - return _etype; - } - - // Parameters recomputed in refreshSizeInformation and passed across many calls of getDim - private ConvolutionParameters _cachedParams = new ConvolutionParameters(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, _maxNumThreads); - - // stride1, stride2, padding1, padding2 - // input_shape1, input_shape2, input_shape3, input_shape4, - // filter_shape1, filter_shape2, filter_shape3, filter_shape4 - ConvolutionParameters parseInput() { - - Hop imageHeightHop = null; Hop filterHeightHop = null; - if(op == ConvOp.MAX_POOL_BACKWARD || op == ConvOp.AVG_POOL_BACKWARD - || op == ConvOp.CONV2D - || op == ConvOp.CONV2D_BACKWARD_FILTER - || op == ConvOp.CONV2D_BACKWARD_DATA) { - imageHeightHop = getInput().get(8); - filterHeightHop = getInput().get(12); - _cachedParams.setIfUnknown( - getInput().get(6), // N - getInput().get(7), // C - imageHeightHop, // H - getInput().get(9), // W - getInput().get(10), // K - filterHeightHop, // R - getInput().get(13), // S - getInput().get(2), // stride_h - getInput().get(3), // stride_w - getInput().get(4), // pad+h - getInput().get(5), _maxNumThreads); - } - else { - imageHeightHop = getInput().get(7); - filterHeightHop = getInput().get(11); - _cachedParams.setIfUnknown( - getInput().get(5), - getInput().get(6), - imageHeightHop, - getInput().get(8), - getInput().get(9), - filterHeightHop, - getInput().get(12), - getInput().get(1), - getInput().get(2), - getInput().get(3), - getInput().get(4), _maxNumThreads); - } - - if(INFER_TENSOR_SHAPE_FROM_PARENT_CONV_OP) { - boolean isPool = (getOp() == ConvOp.MAX_POOL || getOp() == ConvOp.AVG_POOL); - boolean isConv = getOp() == ConvOp.CONV2D; - boolean unknownCHWPQ = _cachedParams.C < 0 || _cachedParams.H < 0 || _cachedParams.W < 0 || _cachedParams.P < 0 || _cachedParams.Q < 0; - if((isPool || isConv) && unknownCHWPQ) { - // Only infer input shape for convolution and maxpool - inferCHWPQFromParentOp(); - } - } - - if(imageHeightHop == filterHeightHop && _cachedParams.R < 0 && _cachedParams.H > 0) { - // Unknown R, but known H and both are equal - // This happens for one-dimensional conv2d where H=R and H can be inferred from the parent hop - _cachedParams.R = _cachedParams.H; - } - - // Compute P and Q if unknown. At script level, they are computed using following script: - // P = as.integer(floor((H + 2*pad_h - R)/stride_h + 1)) - // Q = as.integer(floor((W + 2*pad_w - S)/stride_w + 1)) - if(_cachedParams.P < 0 && _cachedParams.H >= 0 && _cachedParams.R >= 0 && _cachedParams.stride_h >= 0 && _cachedParams.pad_h >= 0) { - _cachedParams.P = (int) org.apache.sysml.runtime.util.ConvolutionUtils.getP(_cachedParams.H, _cachedParams.R, _cachedParams.stride_h, _cachedParams.pad_h); - } - if(_cachedParams.Q < 0 && _cachedParams.W >= 0 && _cachedParams.S >= 0 && _cachedParams.stride_w >= 0 && _cachedParams.pad_w >= 0) { - _cachedParams.Q = (int) org.apache.sysml.runtime.util.ConvolutionUtils.getQ(_cachedParams.W, _cachedParams.S, _cachedParams.stride_w, _cachedParams.pad_w); - } - - return _cachedParams; - } - - /** - * Utility method to check if the given hop is a BIAS_ADD hop - * - * @param hop the given hop - * @return true if the given hop is BIAS_ADD - */ - private static boolean isInputBiasAdd(Hop hop) { - if(hop instanceof ConvolutionOp && ((ConvolutionOp) hop).getOp() == ConvOp.BIAS_ADD) { - return true; - } - return false; - } - - /** - * Utility method to check if the inferred shapes are equal to the given shape with a guard for unknown - * - * @param dim1 inferred shape - * @param dim2 given shape - * @param paramType string denoting the parameter for pretty printing of the error message - */ - private void throwExceptionIfNotEqual(int dim1, int dim2, String paramType) { - if(dim1 >= 0 && dim2 >= 0 && dim1 != dim2) { - throw new DMLRuntimeException("Inferred " + paramType + " from parent doesn't match with given " + paramType + ":" + dim1 + " != " + dim2); - } - } - - /** - * Gets the values for the parameters C, H, W, P, Q from parent hops - */ - private void inferCHWPQFromParentOp() { - Hop tmp = getInput().get(0); - // Skip bias_add and go to its parent - tmp = isInputBiasAdd(tmp) ? tmp.getInput().get(0) : tmp; - Hop parentReLU = isInputReLU(tmp); - // Skip ReLU and go to its parent - tmp = (parentReLU != null) ? parentReLU : tmp; - - // Cast tmp as parent - ConvolutionOp parentOp = (tmp instanceof ConvolutionOp) ? ((ConvolutionOp) tmp) : null; - - if(parentOp == null) - return; - else if(parentOp.getOp() == ConvOp.MAX_POOL || parentOp.getOp() == ConvOp.AVG_POOL) { - ConvolutionParameters parentParam = parentOp.parseInput(); - int prevC = _cachedParams.C; int prevH = _cachedParams.H; int prevW = _cachedParams.W; - // [C, P, Q] from maxpool becomes [C, H, W] of next op - _cachedParams.C = (_cachedParams.C < 0) ? parentParam.C : _cachedParams.C; - _cachedParams.H = (_cachedParams.H < 0) ? parentParam.P : _cachedParams.H; - _cachedParams.W = (_cachedParams.W < 0) ? parentParam.Q : _cachedParams.W; - if(LOG.isDebugEnabled()) { - LOG.debug("Inferring [C,H,W] from maxpool parent: [" + prevC + "," + prevH + "," + prevW + "]-> [" + _cachedParams.C + "," + _cachedParams.H + "," + _cachedParams.W + "]"); - } - if(THROW_ERROR_IF_INFERRED_SHAPE_MISMATCH) { - throwExceptionIfNotEqual(prevC, _cachedParams.C, "C"); - throwExceptionIfNotEqual(prevH, _cachedParams.H, "H"); - throwExceptionIfNotEqual(prevW, _cachedParams.W, "W"); - } - } - else if(parentOp.getOp() == ConvOp.CONV2D) { - ConvolutionParameters parentParam = parentOp.parseInput(); - int prevC = _cachedParams.C; int prevH = _cachedParams.H; int prevW = _cachedParams.W; - // [K, P, Q] from convolution becomes [C, H, W] of next op - _cachedParams.C = (_cachedParams.C < 0) ? parentParam.K : _cachedParams.C; - _cachedParams.H = (_cachedParams.H < 0) ? parentParam.P : _cachedParams.H; - _cachedParams.W = (_cachedParams.W < 0) ? parentParam.Q : _cachedParams.W; - if(LOG.isDebugEnabled()) { - LOG.debug("Inferring [C,H,W] from maxpool parent: [" + prevC + "," + prevH + "," + prevW + "]-> [" + _cachedParams.C + "," + _cachedParams.H + "," + _cachedParams.W + "]"); - } - if(THROW_ERROR_IF_INFERRED_SHAPE_MISMATCH) { - throwExceptionIfNotEqual(prevC, _cachedParams.C, "C"); - throwExceptionIfNotEqual(prevH, _cachedParams.H, "H"); - throwExceptionIfNotEqual(prevW, _cachedParams.W, "W"); - } - } - } - - @Override - public void refreshSizeInformation() - { - if(op == ConvOp.BIAS_ADD || op == ConvOp.BIAS_MULTIPLY) { - Hop input1 = getInput().get(0); - setDim1(input1.getDim1()); - setDim2(input1.getDim2()); - _nnz = -1; // cannot infer stats - return; - } - - // Reset the _cachedParams to avoid incorrect sizes - _cachedParams = new ConvolutionParameters(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, _maxNumThreads); - - switch(op) - { - case MAX_POOL: - case AVG_POOL: - { - _dim1 = getDim("N"); - _dim2 = getDim("CPQ"); - _nnz = -1; // cannot infer stats - break; - } - case MAX_POOL_BACKWARD: - case AVG_POOL_BACKWARD: - { - _dim1 = getDim("N"); - _dim2 = getDim("CHW"); - _nnz = -1; - break; - } - case CONV2D: - { - _dim1 = getDim("N"); - _dim2 = getDim("KPQ"); - _nnz = -1; // cannot infer stats - break; - } - case CONV2D_BACKWARD_DATA: - { - _dim1 = getDim("N"); - _dim2 = getDim("CHW"); - _nnz = -1; // cannot infer stats - break; - } - case CONV2D_BACKWARD_FILTER: - { - _dim1 = getDim("K"); - _dim2 = getDim("CRS"); - _nnz = -1; // cannot infer stats - break; - } - default: - throw new RuntimeException("The sizes are not refreshed for " + op.name()); - } - } - - @Override - public Object clone() throws CloneNotSupportedException - { - ConvolutionOp ret = new ConvolutionOp(); - - //copy generic attributes - ret.clone(this, false); - - //copy specific attributes - ret.op = op; - ret._maxNumThreads = _maxNumThreads; - return ret; - } - - @Override - public boolean compare( Hop that ) - { - if( !(that instanceof ConvolutionOp) ) - return false; - - ConvolutionOp that2 = (ConvolutionOp)that; - - boolean ret = (op == that2.op) - && (getInput().size()==that.getInput().size()) - && _maxNumThreads == that2._maxNumThreads; - - //compare all childs - if( ret ) //sizes matched - for( int i=0; i<_input.size(); i++ ) - ret &= getInput().get(i) == that2.getInput().get(i); - - return ret; - } - - // ------------------------------------------------------------------------------------------------------ - // Utility methods to get the dimensions taking into account unknown dimensions - - /** - * Convenient method to get the dimensions required by ConvolutionOp. - * - * @param dimString can be K, CRS, N, CHW, KPQ, PQ - * @return either -1 or value associated with the dimString - */ - private long getDim(String dimString) { - if(op == ConvOp.BIAS_ADD || op == ConvOp.BIAS_MULTIPLY) { - throw new RuntimeException("getDim method should not be invoked for bias_add and bias_multiply"); - } - try { - parseInput(); - } catch (DMLRuntimeException e) { - throw new RuntimeException(e); - } - Hop filter = null; // shape: K x CRS - Hop input = null; // shape: N x CHW - Hop dout = null; // shape: N x KPQ - Hop dout1 = null; // shape: N x CPQ - - if(getOp() == ConvOp.CONV2D) { - input = getInput().get(0); - filter = getInput().get(1); - } - else if(getOp() == ConvOp.CONV2D_BACKWARD_DATA) { - filter = getInput().get(0); - dout = getInput().get(1); - } - else if(getOp() == ConvOp.CONV2D_BACKWARD_FILTER) { - input = getInput().get(0); - dout = getInput().get(1); - } - else if(getOp() == ConvOp.MAX_POOL || getOp() == ConvOp.AVG_POOL) { - input = getInput().get(0); - } - else if(getOp() == ConvOp.MAX_POOL_BACKWARD || getOp() == ConvOp.AVG_POOL_BACKWARD) { - input = getInput().get(0); - dout1 = getInput().get(1); - } - - long ret = -1; - if(dimString.equals("K") && filter != null) { - ret = getNonNegative(ret, getNonNegative(_cachedParams.K, filter._dim1)); - } - else if(dimString.equals("CRS") && filter != null) { - ret = getNonNegative(ret, getNonNegative(nonNegativeMultiply(_cachedParams.C, _cachedParams.R, _cachedParams.S), filter._dim2)); - } - else if(dimString.equals("N") && input != null) { - ret = getNonNegative(ret, getNonNegative(_cachedParams.N, input._dim1)); - } - else if(dimString.equals("CHW") && input != null) { - ret = getNonNegative(ret, getNonNegative(nonNegativeMultiply(_cachedParams.C, _cachedParams.H, _cachedParams.W), input._dim2)); - } - else if(dimString.equals("N") && dout != null) { - ret = getNonNegative(ret, getNonNegative(_cachedParams.N, dout._dim1)); - } - else if(dimString.equals("KPQ") && dout != null) { - ret = getNonNegative(ret, getNonNegative(nonNegativeMultiply(_cachedParams.K, _cachedParams.P, _cachedParams.Q), dout._dim2)); - } - else if(dimString.equals("N") && dout1 != null) { - ret = getNonNegative(ret, getNonNegative(_cachedParams.N, dout1._dim1)); - } - else if(dimString.equals("CPQ") && dout1 != null) { - ret = getNonNegative(ret, getNonNegative(nonNegativeMultiply(_cachedParams.C, _cachedParams.P, _cachedParams.Q), dout1._dim2)); - } - else if(dimString.equals("K")) { - ret = getNonNegative(ret, _cachedParams.K >= 0 ? _cachedParams.K : -1); - } - else if(dimString.equals("CRS")) { - ret = getNonNegative(ret, nonNegativeMultiply(_cachedParams.C, _cachedParams.R, _cachedParams.S)); - } - else if(dimString.equals("N")) { - ret = getNonNegative(ret, _cachedParams.N >= 0 ? _cachedParams.N : -1); - } - else if(dimString.equals("CHW")) { - ret = getNonNegative(ret, nonNegativeMultiply(_cachedParams.C, _cachedParams.H, _cachedParams.W)); - } - else if(dimString.equals("KPQ")) { - ret = getNonNegative(ret, nonNegativeMultiply(_cachedParams.K, _cachedParams.P, _cachedParams.Q)); - } - else if(dimString.equals("PQ")) { - ret = getNonNegative(ret, nonNegativeMultiply(_cachedParams.P, _cachedParams.Q)); - } - else if(dimString.equals("CPQ")) { - ret = getNonNegative(ret, nonNegativeMultiply(_cachedParams.C, _cachedParams.P, _cachedParams.Q)); - } - else { - throw new RuntimeException("Unsupported dimension:" + dimString + " for operator " + getOp().name()); - } - - if(LOG.isDebugEnabled() && ret < 0) { - LOG.debug("Unknown dimension " + dimString + " for ConvolutionOp:" + op.name() + - " img_dim=[" + _cachedParams.N + " " + _cachedParams.C + " " + _cachedParams.H + " " + _cachedParams.W + "]" + - " filter_dim=[" + _cachedParams.K + " " + _cachedParams.C + " " + _cachedParams.R + " " + _cachedParams.S + "]" + - " output_feature_map=[" + _cachedParams.P + " " + _cachedParams.Q + "] stride=[" + _cachedParams.stride_h + " " + _cachedParams.stride_w + "]" + - " pad=[" + _cachedParams.pad_h + " " + _cachedParams.pad_w + "]"); - } - return ret; - } - - private static long nonNegativeMultiply(long val1, long val2, long val3) { - if(val1 >= 0 && val2 >= 0 && val3 >= 0) { - return val1 * val2 * val3; - } - else return -1; - } - private static long nonNegativeMultiply(long val1, long val2) { - if(val1 >= 0 && val2 >= 0) { - return val1 * val2; - } - else return -1; - } - private static long getNonNegative(long val1, long val2) { - if(val1 >= 0 && val2 >= 0) { - if(val1 == val2) return val1; - else throw new RuntimeException("Incorrect dimensions in Convolution Hop: " + val1 + " != " + val2); - } - else if(val1 >= 0) return val1; - else if(val2 >= 0) return val2; - else return -1; - } - // ------------------------------------------------------------------------------------------------------ -} http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/hops/DnnOp.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/DnnOp.java b/src/main/java/org/apache/sysml/hops/DnnOp.java new file mode 100644 index 0000000..26f8f8c --- /dev/null +++ b/src/main/java/org/apache/sysml/hops/DnnOp.java @@ -0,0 +1,925 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.hops; + +import org.apache.sysml.api.DMLScript; +import org.apache.sysml.hops.rewrite.HopRewriteUtils; +import org.apache.sysml.lops.DnnTransform; +import org.apache.sysml.lops.DnnTransform.OperationTypes; +import org.apache.sysml.lops.Lop; +import org.apache.sysml.lops.LopProperties.ExecType; +import org.apache.sysml.parser.Expression.DataType; +import org.apache.sysml.parser.Expression.ValueType; +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.instructions.gpu.context.GPUContextPool; +import org.apache.sysml.runtime.matrix.MatrixCharacteristics; +import org.apache.sysml.runtime.matrix.data.DnnParameters; +import java.util.ArrayList; + +public class DnnOp extends MultiThreadedHop +{ + // ------------------------------------------------------------------------- + // This flag allows us to compile plans with less unknowns and also serves as future tensorblock integration. + // By default, these flags are turned on. + + // When this flag is turned on, we attempt to check the parent convolution hop for unknown dimensions. + // For example: in case of conv -> maxpool, the input channel/height/width of maxpool will match output channel/height/width of conv. + private static final boolean INFER_TENSOR_SHAPE_FROM_PARENT_CONV_OP = true; + // This guards us from cases where the user provides incorrect C,H,W parameters. + private static final boolean THROW_ERROR_IF_INFERRED_SHAPE_MISMATCH = true; + // ------------------------------------------------------------------------- + + // Specifies the type of this hop + private Hop.OpOpDnn op; + + private DnnOp() { + //default constructor for clone + } + + /** + * Create a hop from the builtin expression + * + * @param l name of the hop + * @param dt datatype (only supports matrix datatype) + * @param vt valuetype (only supports matrix valuetype) + * @param o type of this hop + * @param inp input hops + */ + public DnnOp(String l, DataType dt, ValueType vt, OpOpDnn o, ArrayList<Hop> inp) + { + super(l, dt, vt); + op = o; + + for( int i=0; i<inp.size(); i++ ) { + Hop in = inp.get(i); + getInput().add(i, in); + in.getParent().add(this); + } + + //compute unknown dims and nnz + refreshSizeInformation(); + } + + @Override + public void checkArity() { + HopsException.check(_input.size() >= 1, this, "should have at least one input but has %d inputs", _input.size()); + } + + public OpOpDnn getOp() { + return op; + } + + @Override + public String getOpString() { + return "" + HopsConv2Lops.get(op); + } + + private static boolean isEligibleForSpark() { + return false; + } + + @Override + public boolean isGPUEnabled() { + if(!DMLScript.USE_ACCELERATOR) + return false; + return true; + } + + @Override + public Lop constructLops() + { + //return already created lops + if( getLops() != null ) + return getLops(); + + ExecType et = optFindExecType(); + + ArrayList<Hop> inputs = getInput(); + switch( op ) + { + case MAX_POOL: + case MAX_POOL_BACKWARD: + case AVG_POOL: + case AVG_POOL_BACKWARD: + case CONV2D: + case CONV2D_BACKWARD_DATA: + case CONV2D_BACKWARD_FILTER: + case BIAS_ADD: + case BIAS_MULTIPLY: + { + if(et == ExecType.CP || et == ExecType.GPU) { + setLops(constructDnnLops(et, inputs)); + break; + } + else { + throw new HopsException("Unimplemented DnnOp for execution type: " + et.name()); + } + // break; + } + default: + throw new HopsException("Unsupported lops construction for operation type '"+op+"'."); + } + + //add reblock/checkpoint lops if necessary + constructAndSetLopsDataFlowProperties(); + + return getLops(); + } + + public void setOp(OpOpDnn op) { + this.op = op; + } + + private int getNumExpectedInputs() { + switch(op) { + case MAX_POOL_BACKWARD: + case AVG_POOL_BACKWARD: + case CONV2D: + case CONV2D_BACKWARD_FILTER: + case CONV2D_BACKWARD_DATA: + return 14; + case BIAS_ADD: + case BIAS_MULTIPLY: + return 2; + default: + return 13; + } + } + + /** + * Returns parent matrix X or null + * @param input input hop + * @return either null or X if input is max(X,0) or max(0,X) + */ + private static Hop isInputReLU(Hop input) { + if(HopRewriteUtils.isBinary(input, OpOp2.MAX)) { + if(HopRewriteUtils.isLiteralOfValue(input.getInput().get(0), 0)) { + return input.getInput().get(1); + } + else if(HopRewriteUtils.isLiteralOfValue(input.getInput().get(1), 0)) { + return input.getInput().get(0); + } + else + return null; + } + else + return null; + } + + private static boolean isInputConv2d(Hop input) { + return HopRewriteUtils.isDnn(input, OpOpDnn.CONV2D); + } + + /** + * Compares the input parameters for max_pool/max_pool_backward operations + * + * @return true if the following parameters match: stride=[stride, stride], padding=[pad, pad], input_shape=[numImg, numChannels, imgSize, imgSize], pool_size=[poolSize1, poolSize2] + */ + private static boolean isPoolingParametersEqualAndKnown(DnnParameters param1, DnnParameters param2) { + return isEqualAndKnown(param1.stride_h, param2.stride_h) && isEqualAndKnown(param1.stride_w, param2.stride_w) && + isEqualAndKnown(param1.pad_h, param2.pad_h) && isEqualAndKnown(param1.pad_w, param2.pad_w) && + isEqualAndKnown(param1.R, param2.R) && isEqualAndKnown(param1.S, param2.S) && + isEqualAndKnown(param1.N, param2.N) && isEqualAndKnown(param1.C, param2.C) && + isEqualAndKnown(param1.H, param2.H) && isEqualAndKnown(param1.W, param2.W); + } + + private static boolean isEqualAndKnown(int val1, int val2) { + return val1 >= 0 && val2 >= 0 && val1 == val2; + } + + /** + * Returns the output lop of max_pool/avg_pool operation with same parameters as this hop. + * If corresponding output lop is not found or if this is not a max_pool_backward operation, this function returns null + * + * @return output lop of max_pool/avg_pool operation with same parameters as this hop + */ + private Lop getMaxPoolOutputLop() { + if(op == OpOpDnn.MAX_POOL_BACKWARD || op == OpOpDnn.AVG_POOL_BACKWARD) { + OpOpDnn opType = (op == OpOpDnn.MAX_POOL_BACKWARD) ? OpOpDnn.MAX_POOL : OpOpDnn.AVG_POOL; + Hop inputImage = getInput().get(0); + for(Hop tmpParent : inputImage.getParent()) { + if(!(tmpParent instanceof DnnOp)) + continue; + DnnOp parent = (DnnOp) tmpParent; + if(parent.getOp() == opType && isPoolingParametersEqualAndKnown(parent._cachedParams, _cachedParams)) { + return parent.constructLops(); + } + } + } + return null; + } + + public Lop constructDnnLops(ExecType et, ArrayList<Hop> inputs) { + if(inputs.size() != getNumExpectedInputs()) + throw new HopsException("Incorrect number of inputs for " + op.name()); + + // --------------------------------------------------------------- + // Deal with fused operators and contruct lhsInputLop/optionalRhsInputLop + Lop lhsInputLop = null; Lop optionalRhsInputLop = null; + ArrayList<Hop> inputsOfPotentiallyFusedOp = inputs; + OperationTypes lopOp = HopsConv2Lops.get(op); + + // RELU_MAX_POOLING and RELU_MAX_POOLING_BACKWARD is extremely useful for CP backend + // by reducing unnecessary sparse-to-dense-to-sparse conversion. + // For other backends, this operators is not necessary as it reduces an additional relu operator. + Hop parentReLU = isInputReLU(inputs.get(0)); + if(OptimizerUtils.ALLOW_OPERATOR_FUSION && et == ExecType.CP && op == OpOpDnn.MAX_POOL && parentReLU != null) { + lhsInputLop = parentReLU.constructLops(); + lopOp = OperationTypes.RELU_MAX_POOLING; + } + else if(OptimizerUtils.ALLOW_OPERATOR_FUSION && et == ExecType.CP && op == OpOpDnn.MAX_POOL_BACKWARD && parentReLU != null) { + lhsInputLop = parentReLU.constructLops(); + lopOp = OperationTypes.RELU_MAX_POOLING_BACKWARD; + } + else if(OptimizerUtils.ALLOW_OPERATOR_FUSION && op == OpOpDnn.BIAS_ADD && isInputConv2d(inputs.get(0))) { + lopOp = OperationTypes.CONV2D_BIAS_ADD; + + // the first lop is image + lhsInputLop = inputs.get(0).getInput().get(0).constructLops(); + // the second lop is bias + optionalRhsInputLop = inputs.get(1).constructLops(); + + // Use the inputs from conv2d rather than bias_add + inputsOfPotentiallyFusedOp = inputs.get(0).getInput(); + } + else { + lhsInputLop = inputs.get(0).constructLops(); + } + // --------------------------------------------------------------- + + // --------------------------------------------------------------- + // Compute intermediate memory budget that can be passed to GPU operators + // for better CuDNN operator selection at runtime + double intermediateMemEstimate = computeIntermediateMemEstimate(-1, -1, -1 ); + if(et == ExecType.GPU && _dim1 >= 0 && _dim2 >= 0) { + // This enables us to compile more efficient matrix-matrix CuDNN operation instead of + // row-by-row invocation of multiple vector-matrix CuDNN operations. + // This is possible as the operations on GPU are single-threaded + double optimisticIntermediateMemEstimate = GPUContextPool.initialGPUMemBudget() - getOutputMemEstimate() - inputs.get(0).getOutputMemEstimate(); + if(optionalRhsInputLop != null) { + optimisticIntermediateMemEstimate -= inputs.get(1).getOutputMemEstimate(); + } + intermediateMemEstimate = Math.max(intermediateMemEstimate, optimisticIntermediateMemEstimate); + } + // --------------------------------------------------------------- + + // Construct the lop + Lop optionalMaxPoolOutput = (et == ExecType.GPU) ? getMaxPoolOutputLop() : null; + Lop[] l2inputs = new Lop[inputsOfPotentiallyFusedOp.size()-1]; + for( int i=1; i < inputsOfPotentiallyFusedOp.size(); i++ ) + l2inputs[i-1] = inputsOfPotentiallyFusedOp.get(i).constructLops(); + DnnTransform convolutionLop = new DnnTransform( + lhsInputLop, lopOp, getDataType(), getValueType(), et, + OptimizerUtils.getConstrainedNumThreads(_maxNumThreads), intermediateMemEstimate); + setOutputDimensions(convolutionLop); + setLineNumbers(convolutionLop); + + // --------------------------------------------------------------- + // Add input/output for parent lops of convolutionLop + lhsInputLop.addOutput(convolutionLop); + if(optionalRhsInputLop != null) { + convolutionLop.addInput(optionalRhsInputLop); + optionalRhsInputLop.addOutput(convolutionLop); + } + for( int i=0; i < l2inputs.length; i++ ) { + convolutionLop.addInput(l2inputs[i]); + l2inputs[i].addOutput(convolutionLop); + } + // Only valid for MAX_POOLING_BACKWARD on GPU + if(optionalMaxPoolOutput != null) { + convolutionLop.addInput(optionalMaxPoolOutput); + optionalMaxPoolOutput.addOutput(convolutionLop); + } + convolutionLop.updateLopProperties(); + + // TODO double check that optionalMaxPoolOutput adheres to proper + // ID ordering of constructed lops (previously hidden by setLevel) + + // --------------------------------------------------------------- + + return convolutionLop; + } + + + @Override + protected double computeOutputMemEstimate( long dim1, long dim2, long nnz ) + { + if(getOp() == OpOpDnn.BIAS_MULTIPLY) { + // in non-gpu mode, the worst case size of bias multiply operation is same as that of input. + if(DMLScript.USE_ACCELERATOR) + return OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, 1.0); + else + return OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, getInput().get(0).getSparsity()); + } + else { + double sparsity = 1.0; + return OptimizerUtils.estimateSizeExactSparsity(dim1, dim2, sparsity); + } + } + + // --------------------------------------------------------------- + // Utility methods to guard the computation of memory estimates in presense of unknowns + private static class IntermediateDimensions { + int dim1; int dim2; double sp; + public IntermediateDimensions(DnnOp h, String dim1Str, String dim2Str, double sp) { + dim1 = (int) h.getDim(dim1Str); + dim2 = (int) h.getDim(dim2Str); + this.sp = sp; + } + public IntermediateDimensions(DnnOp h, String dim1Str, String dim2Str) { + dim1 = (int) h.getDim(dim1Str); + dim2 = (int) h.getDim(dim2Str); + sp = 1; + } + public IntermediateDimensions(DnnOp h, int dim1, String dim2Str) { + this.dim1 = dim1; + dim2 = (int) h.getDim(dim2Str); + sp = 1; + } + + /** + * Add two computed memory estimates + * + * @param val1 memory estimate 1 + * @param val2 memory estimate 2 + * @return sum of memory estimates + */ + static double guardedAdd(double val1, double val2) { + if(val1 < 0 || val2 < 0) return OptimizerUtils.DEFAULT_SIZE; + double ret = val1 + val2; + if(ret >= OptimizerUtils.DEFAULT_SIZE) return OptimizerUtils.DEFAULT_SIZE; + else return ret; + } + + /** + * Compute memory estimates for given intermediate matrices + * + * @param intermediates list of intermediates + * @param numWorkers number of workers + * @return memory estimate + */ + public static double addEstimateSizes(ArrayList<IntermediateDimensions> intermediates, int numWorkers) { + double memBudget = 0; + for(int i = 0; i < intermediates.size(); i++) { + memBudget = guardedAdd(memBudget, OptimizerUtils.estimateSizeExactSparsity( + intermediates.get(i).dim1, intermediates.get(i).dim2, intermediates.get(i).sp)*numWorkers); + } + return memBudget; + } + + /** + * Compute max of two computed memory estimates + * @param val1 memory estimate 1 + * @param val2 memory estimate 2 + * @return max of memory estimates + */ + public static double guardedMax(double val1, double val2) { + if(val1 < 0 || val2 < 0) return OptimizerUtils.DEFAULT_SIZE; + double ret = Math.max(val1, val2); + if(ret >= OptimizerUtils.DEFAULT_SIZE) return OptimizerUtils.DEFAULT_SIZE; + else return ret; + } + } + + /** + * Helper utility to compute intermediate memory estimate + * + * @param gpuIntermediates intermediates for GPU + * @param cpIntermediates intermediates for CP + * @return memory estimates + */ + private double computeIntermediateMemEstimateHelper( + ArrayList<IntermediateDimensions> gpuIntermediates, + ArrayList<IntermediateDimensions> cpIntermediates) { + // Since CP operators use row-level parallelism by default + int numWorkers = (int) Math.min(OptimizerUtils.getConstrainedNumThreads(_maxNumThreads), Math.max(getDim("N"), 1)); + if(DMLScript.USE_ACCELERATOR) { + // Account for potential sparse-to-dense conversion + double gpuMemBudget = IntermediateDimensions.addEstimateSizes(gpuIntermediates, 1); + double cpMemoryBudget = IntermediateDimensions.addEstimateSizes(cpIntermediates, numWorkers); + if(cpMemoryBudget > gpuMemBudget) { + double oneThreadCPMemBudget = IntermediateDimensions.addEstimateSizes(cpIntermediates, 1); + if(oneThreadCPMemBudget <= gpuMemBudget) { + // Why limit CPU ? in-order to give more opportunity to compile GPU operators + cpMemoryBudget = oneThreadCPMemBudget; + } + } + // Finally, use the maximum of CP and GPU memory budget + return IntermediateDimensions.guardedMax(cpMemoryBudget, gpuMemBudget); + } + else { + // When -gpu flag is not provided, the memory estimates for CP are not affected. + return IntermediateDimensions.addEstimateSizes(cpIntermediates, numWorkers); + } + } + + @Override + protected double computeIntermediateMemEstimate( long ignoreDim1, long ignoreDim2, long ignoreNnz ) + { + ArrayList<IntermediateDimensions> gpuIntermediates = new ArrayList<>(); + ArrayList<IntermediateDimensions> cpIntermediates = new ArrayList<>(); + if(getOp() == OpOpDnn.CONV2D) { + // Assumption: To compile a GPU conv2d operator, following should fit on the GPU: + // 1. output in dense format (i.e. computeOutputMemEstimate) + // 2. input in any format + // 3. atleast one input row in dense format + // 4. filter in dense format + + // Account for potential sparse-to-dense conversion of atleast 1 input row and filter + gpuIntermediates.add(new IntermediateDimensions(this, 1, "CHW")); + gpuIntermediates.add(new IntermediateDimensions(this, "K", "CRS")); + + // im2col operation preserves the worst-case sparsity of the input. + cpIntermediates.add(new IntermediateDimensions(this, "CRS", "PQ", getInput().get(0).getSparsity())); + } + else if(getOp() == OpOpDnn.CONV2D_BACKWARD_DATA) { + // Assumption: To compile a GPU conv2d_backward_data operator, following should fit on the GPU: + // 1. output in dense format (i.e. computeOutputMemEstimate) + // 2. dout in any format + // 3. atleast one dout row in dense format + // 4. filter in dense format + + // Account for potential sparse-to-dense conversion of atleast 1 input row and filter + gpuIntermediates.add(new IntermediateDimensions(this, 1, "KPQ")); + gpuIntermediates.add(new IntermediateDimensions(this, "K", "CRS")); + + // There are 2 intermediates: rotate180 and input to col2im for conv2d_backward_data + // rotate180 preserves the "exact" sparsity of the dout matrix + cpIntermediates.add(new IntermediateDimensions(this, "PQ", "K", getInput().get(1).getSparsity())); + // Note: worst-case sparsity for the input of col2im (of size NPQ x CRS where N is determined by degree of parallelism) + cpIntermediates.add(new IntermediateDimensions(this, "PQ", "CRS")); + } + else if(getOp() == OpOpDnn.CONV2D_BACKWARD_FILTER) { + // Assumption: To compile a GPU conv2d_backward_filter operator, following should fit on the GPU: + // 1. output in dense format (i.e. computeOutputMemEstimate) + // 2. dout in any format + // 3. atleast one dout and input row in dense format + + // Account for potential sparse-to-dense conversion of atleast 1 input + dout row + gpuIntermediates.add(new IntermediateDimensions(this, 1, "CHW")); + gpuIntermediates.add(new IntermediateDimensions(this, 1, "KPQ")); + + // There are 2 intermediates: im2col and rotate180 for conv2d_backward_filter + // rotate180 preserves the "exact" sparsity of the dout matrix + cpIntermediates.add(new IntermediateDimensions(this, "PQ", "K", getInput().get(1).getSparsity())); + // im2col operation preserves the worst-case sparsity of the input. + cpIntermediates.add(new IntermediateDimensions(this, "CRS", "PQ", getInput().get(0).getSparsity())); + } + else if(getOp() == OpOpDnn.MAX_POOL || getOp() == OpOpDnn.AVG_POOL) { + // Account for potential sparse-to-dense conversion of at least 1 input row + gpuIntermediates.add(new IntermediateDimensions(this, 1, "CHW")); + } + else if(getOp() == OpOpDnn.MAX_POOL_BACKWARD || getOp() == OpOpDnn.AVG_POOL_BACKWARD) { + // Account for potential sparse-to-dense conversion of at least 1 input + dout row + gpuIntermediates.add(new IntermediateDimensions(this, 1, "CHW")); + gpuIntermediates.add(new IntermediateDimensions(this, 1, "CPQ")); + } + + if(gpuIntermediates.size() > 0 || cpIntermediates.size() > 0) + return computeIntermediateMemEstimateHelper(gpuIntermediates, cpIntermediates); + else + return 0; + } + + + @Override + protected long[] inferOutputCharacteristics( MemoTable memo ) + { + // [numRows, numCols, NNZ] + long[] ret = new long[3]; + + if(op == OpOpDnn.BIAS_ADD || op == OpOpDnn.BIAS_MULTIPLY) { + MatrixCharacteristics[] mc = memo.getAllInputStats(getInput()); + ret[0] = mc[0].rowsKnown() ? mc[0].getRows() : -1; + ret[1] = mc[0].colsKnown() ? mc[0].getCols() : -1; + ret[2] = -1; + return (ret[0]>=0 && ret[1]>=0) ? ret : null; + } + + refreshSizeInformation(); + ret[0] = _dim1; ret[1] = _dim2; ret[2] = _nnz; + + //safe return (create entry only if at least dims known) + return (ret[0]>0 && ret[1]>0) ? ret : null; + } + + + @Override + public boolean allowsAllExecTypes() + { + return true; + } + + @Override + protected ExecType optFindExecType() { + + checkAndSetForcedPlatform(); + + ExecType REMOTE = OptimizerUtils.isSparkExecutionMode() ? ExecType.SPARK : ExecType.MR; + + if( _etypeForced != null ) { + _etype = _etypeForced; + } + else { + if ( OptimizerUtils.isMemoryBasedOptLevel() ) { + _etype = findExecTypeByMemEstimate(); + } + else { + _etype = REMOTE; + } + + //check for valid CP dimensions and matrix size + checkAndSetInvalidCPDimsAndSize(); + } + + // TODO: Fix this after adding remaining spark instructions + _etype = !isEligibleForSpark() && _etype == REMOTE ? ExecType.CP : _etype; + + //mark for recompile (forever) + setRequiresRecompileIfNecessary(); + + return _etype; + } + + // Parameters recomputed in refreshSizeInformation and passed across many calls of getDim + private DnnParameters _cachedParams = new DnnParameters(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, _maxNumThreads); + + // stride1, stride2, padding1, padding2 + // input_shape1, input_shape2, input_shape3, input_shape4, + // filter_shape1, filter_shape2, filter_shape3, filter_shape4 + DnnParameters parseInput() { + + Hop imageHeightHop = null; Hop filterHeightHop = null; + if(op == OpOpDnn.MAX_POOL_BACKWARD || op == OpOpDnn.AVG_POOL_BACKWARD + || op == OpOpDnn.CONV2D + || op == OpOpDnn.CONV2D_BACKWARD_FILTER + || op == OpOpDnn.CONV2D_BACKWARD_DATA) { + imageHeightHop = getInput().get(8); + filterHeightHop = getInput().get(12); + _cachedParams.setIfUnknown( + getInput().get(6), // N + getInput().get(7), // C + imageHeightHop, // H + getInput().get(9), // W + getInput().get(10), // K + filterHeightHop, // R + getInput().get(13), // S + getInput().get(2), // stride_h + getInput().get(3), // stride_w + getInput().get(4), // pad+h + getInput().get(5), _maxNumThreads); + } + else { + imageHeightHop = getInput().get(7); + filterHeightHop = getInput().get(11); + _cachedParams.setIfUnknown( + getInput().get(5), + getInput().get(6), + imageHeightHop, + getInput().get(8), + getInput().get(9), + filterHeightHop, + getInput().get(12), + getInput().get(1), + getInput().get(2), + getInput().get(3), + getInput().get(4), _maxNumThreads); + } + + if(INFER_TENSOR_SHAPE_FROM_PARENT_CONV_OP) { + boolean isPool = (getOp() == OpOpDnn.MAX_POOL || getOp() == OpOpDnn.AVG_POOL); + boolean isConv = getOp() == OpOpDnn.CONV2D; + boolean unknownCHWPQ = _cachedParams.C < 0 || _cachedParams.H < 0 || _cachedParams.W < 0 || _cachedParams.P < 0 || _cachedParams.Q < 0; + if((isPool || isConv) && unknownCHWPQ) { + // Only infer input shape for convolution and maxpool + inferCHWPQFromParentOp(); + } + } + + if(imageHeightHop == filterHeightHop && _cachedParams.R < 0 && _cachedParams.H > 0) { + // Unknown R, but known H and both are equal + // This happens for one-dimensional conv2d where H=R and H can be inferred from the parent hop + _cachedParams.R = _cachedParams.H; + } + + // Compute P and Q if unknown. At script level, they are computed using following script: + // P = as.integer(floor((H + 2*pad_h - R)/stride_h + 1)) + // Q = as.integer(floor((W + 2*pad_w - S)/stride_w + 1)) + if(_cachedParams.P < 0 && _cachedParams.H >= 0 && _cachedParams.R >= 0 && _cachedParams.stride_h >= 0 && _cachedParams.pad_h >= 0) { + _cachedParams.P = (int) org.apache.sysml.runtime.util.DnnUtils.getP(_cachedParams.H, _cachedParams.R, _cachedParams.stride_h, _cachedParams.pad_h); + } + if(_cachedParams.Q < 0 && _cachedParams.W >= 0 && _cachedParams.S >= 0 && _cachedParams.stride_w >= 0 && _cachedParams.pad_w >= 0) { + _cachedParams.Q = (int) org.apache.sysml.runtime.util.DnnUtils.getQ(_cachedParams.W, _cachedParams.S, _cachedParams.stride_w, _cachedParams.pad_w); + } + + return _cachedParams; + } + + /** + * Utility method to check if the given hop is a BIAS_ADD hop + * + * @param hop the given hop + * @return true if the given hop is BIAS_ADD + */ + private static boolean isInputBiasAdd(Hop hop) { + return HopRewriteUtils.isDnn(hop, OpOpDnn.BIAS_ADD); + } + + /** + * Utility method to check if the inferred shapes are equal to the given shape with a guard for unknown + * + * @param dim1 inferred shape + * @param dim2 given shape + * @param paramType string denoting the parameter for pretty printing of the error message + */ + private void throwExceptionIfNotEqual(int dim1, int dim2, String paramType) { + if(dim1 >= 0 && dim2 >= 0 && dim1 != dim2) { + throw new DMLRuntimeException("Inferred " + paramType + " from parent doesn't match with given " + paramType + ":" + dim1 + " != " + dim2); + } + } + + /** + * Gets the values for the parameters C, H, W, P, Q from parent hops + */ + private void inferCHWPQFromParentOp() { + Hop tmp = getInput().get(0); + // Skip bias_add and go to its parent + tmp = isInputBiasAdd(tmp) ? tmp.getInput().get(0) : tmp; + Hop parentReLU = isInputReLU(tmp); + // Skip ReLU and go to its parent + tmp = (parentReLU != null) ? parentReLU : tmp; + + // Cast tmp as parent + DnnOp parentOp = (tmp instanceof DnnOp) ? ((DnnOp) tmp) : null; + + if(parentOp == null) + return; + else if(parentOp.getOp() == OpOpDnn.MAX_POOL || parentOp.getOp() == OpOpDnn.AVG_POOL) { + DnnParameters parentParam = parentOp.parseInput(); + int prevC = _cachedParams.C; int prevH = _cachedParams.H; int prevW = _cachedParams.W; + // [C, P, Q] from maxpool becomes [C, H, W] of next op + _cachedParams.C = (_cachedParams.C < 0) ? parentParam.C : _cachedParams.C; + _cachedParams.H = (_cachedParams.H < 0) ? parentParam.P : _cachedParams.H; + _cachedParams.W = (_cachedParams.W < 0) ? parentParam.Q : _cachedParams.W; + if(LOG.isDebugEnabled()) { + LOG.debug("Inferring [C,H,W] from maxpool parent: [" + prevC + "," + prevH + "," + prevW + "]-> [" + _cachedParams.C + "," + _cachedParams.H + "," + _cachedParams.W + "]"); + } + if(THROW_ERROR_IF_INFERRED_SHAPE_MISMATCH) { + throwExceptionIfNotEqual(prevC, _cachedParams.C, "C"); + throwExceptionIfNotEqual(prevH, _cachedParams.H, "H"); + throwExceptionIfNotEqual(prevW, _cachedParams.W, "W"); + } + } + else if(parentOp.getOp() == OpOpDnn.CONV2D) { + DnnParameters parentParam = parentOp.parseInput(); + int prevC = _cachedParams.C; int prevH = _cachedParams.H; int prevW = _cachedParams.W; + // [K, P, Q] from convolution becomes [C, H, W] of next op + _cachedParams.C = (_cachedParams.C < 0) ? parentParam.K : _cachedParams.C; + _cachedParams.H = (_cachedParams.H < 0) ? parentParam.P : _cachedParams.H; + _cachedParams.W = (_cachedParams.W < 0) ? parentParam.Q : _cachedParams.W; + if(LOG.isDebugEnabled()) { + LOG.debug("Inferring [C,H,W] from maxpool parent: [" + prevC + "," + prevH + "," + prevW + "]-> [" + _cachedParams.C + "," + _cachedParams.H + "," + _cachedParams.W + "]"); + } + if(THROW_ERROR_IF_INFERRED_SHAPE_MISMATCH) { + throwExceptionIfNotEqual(prevC, _cachedParams.C, "C"); + throwExceptionIfNotEqual(prevH, _cachedParams.H, "H"); + throwExceptionIfNotEqual(prevW, _cachedParams.W, "W"); + } + } + } + + @Override + public void refreshSizeInformation() + { + if(op == OpOpDnn.BIAS_ADD || op == OpOpDnn.BIAS_MULTIPLY) { + Hop input1 = getInput().get(0); + setDim1(input1.getDim1()); + setDim2(input1.getDim2()); + _nnz = -1; // cannot infer stats + return; + } + + // Reset the _cachedParams to avoid incorrect sizes + _cachedParams = new DnnParameters(-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, _maxNumThreads); + + switch(op) + { + case MAX_POOL: + case AVG_POOL: + { + _dim1 = getDim("N"); + _dim2 = getDim("CPQ"); + _nnz = -1; // cannot infer stats + break; + } + case MAX_POOL_BACKWARD: + case AVG_POOL_BACKWARD: + { + _dim1 = getDim("N"); + _dim2 = getDim("CHW"); + _nnz = -1; + break; + } + case CONV2D: + { + _dim1 = getDim("N"); + _dim2 = getDim("KPQ"); + _nnz = -1; // cannot infer stats + break; + } + case CONV2D_BACKWARD_DATA: + { + _dim1 = getDim("N"); + _dim2 = getDim("CHW"); + _nnz = -1; // cannot infer stats + break; + } + case CONV2D_BACKWARD_FILTER: + { + _dim1 = getDim("K"); + _dim2 = getDim("CRS"); + _nnz = -1; // cannot infer stats + break; + } + default: + throw new RuntimeException("The sizes are not refreshed for " + op.name()); + } + } + + @Override + public Object clone() throws CloneNotSupportedException + { + DnnOp ret = new DnnOp(); + + //copy generic attributes + ret.clone(this, false); + + //copy specific attributes + ret.op = op; + ret._maxNumThreads = _maxNumThreads; + return ret; + } + + @Override + public boolean compare( Hop that ) + { + if( !(that instanceof DnnOp) ) + return false; + + DnnOp that2 = (DnnOp)that; + + boolean ret = (op == that2.op) + && (getInput().size()==that.getInput().size()) + && _maxNumThreads == that2._maxNumThreads; + + //compare all childs + if( ret ) //sizes matched + for( int i=0; i<_input.size(); i++ ) + ret &= getInput().get(i) == that2.getInput().get(i); + + return ret; + } + + // ------------------------------------------------------------------------------------------------------ + // Utility methods to get the dimensions taking into account unknown dimensions + + /** + * Convenient method to get the dimensions required by ConvolutionOp. + * + * @param dimString can be K, CRS, N, CHW, KPQ, PQ + * @return either -1 or value associated with the dimString + */ + private long getDim(String dimString) { + if(op == OpOpDnn.BIAS_ADD || op == OpOpDnn.BIAS_MULTIPLY) { + throw new RuntimeException("getDim method should not be invoked for bias_add and bias_multiply"); + } + try { + parseInput(); + } catch (DMLRuntimeException e) { + throw new RuntimeException(e); + } + Hop filter = null; // shape: K x CRS + Hop input = null; // shape: N x CHW + Hop dout = null; // shape: N x KPQ + Hop dout1 = null; // shape: N x CPQ + + if(getOp() == OpOpDnn.CONV2D) { + input = getInput().get(0); + filter = getInput().get(1); + } + else if(getOp() == OpOpDnn.CONV2D_BACKWARD_DATA) { + filter = getInput().get(0); + dout = getInput().get(1); + } + else if(getOp() == OpOpDnn.CONV2D_BACKWARD_FILTER) { + input = getInput().get(0); + dout = getInput().get(1); + } + else if(getOp() == OpOpDnn.MAX_POOL || getOp() == OpOpDnn.AVG_POOL) { + input = getInput().get(0); + } + else if(getOp() == OpOpDnn.MAX_POOL_BACKWARD || getOp() == OpOpDnn.AVG_POOL_BACKWARD) { + input = getInput().get(0); + dout1 = getInput().get(1); + } + + long ret = -1; + if(dimString.equals("K") && filter != null) { + ret = getNonNegative(ret, getNonNegative(_cachedParams.K, filter._dim1)); + } + else if(dimString.equals("CRS") && filter != null) { + ret = getNonNegative(ret, getNonNegative(nonNegativeMultiply(_cachedParams.C, _cachedParams.R, _cachedParams.S), filter._dim2)); + } + else if(dimString.equals("N") && input != null) { + ret = getNonNegative(ret, getNonNegative(_cachedParams.N, input._dim1)); + } + else if(dimString.equals("CHW") && input != null) { + ret = getNonNegative(ret, getNonNegative(nonNegativeMultiply(_cachedParams.C, _cachedParams.H, _cachedParams.W), input._dim2)); + } + else if(dimString.equals("N") && dout != null) { + ret = getNonNegative(ret, getNonNegative(_cachedParams.N, dout._dim1)); + } + else if(dimString.equals("KPQ") && dout != null) { + ret = getNonNegative(ret, getNonNegative(nonNegativeMultiply(_cachedParams.K, _cachedParams.P, _cachedParams.Q), dout._dim2)); + } + else if(dimString.equals("N") && dout1 != null) { + ret = getNonNegative(ret, getNonNegative(_cachedParams.N, dout1._dim1)); + } + else if(dimString.equals("CPQ") && dout1 != null) { + ret = getNonNegative(ret, getNonNegative(nonNegativeMultiply(_cachedParams.C, _cachedParams.P, _cachedParams.Q), dout1._dim2)); + } + else if(dimString.equals("K")) { + ret = getNonNegative(ret, _cachedParams.K >= 0 ? _cachedParams.K : -1); + } + else if(dimString.equals("CRS")) { + ret = getNonNegative(ret, nonNegativeMultiply(_cachedParams.C, _cachedParams.R, _cachedParams.S)); + } + else if(dimString.equals("N")) { + ret = getNonNegative(ret, _cachedParams.N >= 0 ? _cachedParams.N : -1); + } + else if(dimString.equals("CHW")) { + ret = getNonNegative(ret, nonNegativeMultiply(_cachedParams.C, _cachedParams.H, _cachedParams.W)); + } + else if(dimString.equals("KPQ")) { + ret = getNonNegative(ret, nonNegativeMultiply(_cachedParams.K, _cachedParams.P, _cachedParams.Q)); + } + else if(dimString.equals("PQ")) { + ret = getNonNegative(ret, nonNegativeMultiply(_cachedParams.P, _cachedParams.Q)); + } + else if(dimString.equals("CPQ")) { + ret = getNonNegative(ret, nonNegativeMultiply(_cachedParams.C, _cachedParams.P, _cachedParams.Q)); + } + else { + throw new RuntimeException("Unsupported dimension:" + dimString + " for operator " + getOp().name()); + } + + if(LOG.isDebugEnabled() && ret < 0) { + LOG.debug("Unknown dimension " + dimString + " for DnnOp:" + op.name() + + " img_dim=[" + _cachedParams.N + " " + _cachedParams.C + " " + _cachedParams.H + " " + _cachedParams.W + "]" + + " filter_dim=[" + _cachedParams.K + " " + _cachedParams.C + " " + _cachedParams.R + " " + _cachedParams.S + "]" + + " output_feature_map=[" + _cachedParams.P + " " + _cachedParams.Q + "] stride=[" + _cachedParams.stride_h + " " + _cachedParams.stride_w + "]" + + " pad=[" + _cachedParams.pad_h + " " + _cachedParams.pad_w + "]"); + } + return ret; + } + + private static long nonNegativeMultiply(long val1, long val2, long val3) { + if(val1 >= 0 && val2 >= 0 && val3 >= 0) { + return val1 * val2 * val3; + } + else return -1; + } + private static long nonNegativeMultiply(long val1, long val2) { + if(val1 >= 0 && val2 >= 0) { + return val1 * val2; + } + else return -1; + } + private static long getNonNegative(long val1, long val2) { + if(val1 >= 0 && val2 >= 0) { + if(val1 == val2) return val1; + else throw new RuntimeException("Incorrect dimensions in DnnOp: " + val1 + " != " + val2); + } + else if(val1 >= 0) return val1; + else if(val2 >= 0) return val2; + else return -1; + } + // ------------------------------------------------------------------------------------------------------ +} http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/hops/Hop.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/Hop.java b/src/main/java/org/apache/sysml/hops/Hop.java index 563c693..53cfdfa 100644 --- a/src/main/java/org/apache/sysml/hops/Hop.java +++ b/src/main/java/org/apache/sysml/hops/Hop.java @@ -141,7 +141,7 @@ public abstract class Hop implements ParseInfo * Check whether this Hop has a correct number of inputs. * * (Some Hops can have a variable number of inputs, such as DataOp, DataGenOp, ParameterizedBuiltinOp, - * ReorgOp, TernaryOp, QuaternaryOp, MultipleOp, ConvolutionOp, and SpoofFusedOp.) + * ReorgOp, TernaryOp, QuaternaryOp, MultipleOp, DnnOp, and SpoofFusedOp.) * * Parameterized Hops (such as DataOp) can check that the number of parameters matches the number of inputs. * @@ -1096,7 +1096,7 @@ public abstract class Hop implements ParseInfo //DIAG_V2M, DIAG_M2V, } - public enum ConvOp { + public enum OpOpDnn { MAX_POOL, MAX_POOL_BACKWARD, AVG_POOL, AVG_POOL_BACKWARD, CONV2D, CONV2D_BACKWARD_FILTER, CONV2D_BACKWARD_DATA, BIAS_ADD, BIAS_MULTIPLY @@ -1160,18 +1160,18 @@ public abstract class Hop implements ParseInfo } - protected static final HashMap<ConvOp, org.apache.sysml.lops.ConvolutionTransform.OperationTypes> HopsConv2Lops; + protected static final HashMap<OpOpDnn, org.apache.sysml.lops.DnnTransform.OperationTypes> HopsConv2Lops; static { HopsConv2Lops = new HashMap<>(); - HopsConv2Lops.put(ConvOp.MAX_POOL, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.MAX_POOL); - HopsConv2Lops.put(ConvOp.MAX_POOL_BACKWARD, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.MAX_POOL_BACKWARD); - HopsConv2Lops.put(ConvOp.AVG_POOL, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.AVG_POOL); - HopsConv2Lops.put(ConvOp.AVG_POOL_BACKWARD, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.AVG_POOL_BACKWARD); - HopsConv2Lops.put(ConvOp.CONV2D, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.CONV2D); - HopsConv2Lops.put(ConvOp.BIAS_ADD, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.BIAS_ADD); - HopsConv2Lops.put(ConvOp.BIAS_MULTIPLY, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.BIAS_MULTIPLY); - HopsConv2Lops.put(ConvOp.CONV2D_BACKWARD_FILTER, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.CONV2D_BACKWARD_FILTER); - HopsConv2Lops.put(ConvOp.CONV2D_BACKWARD_DATA, org.apache.sysml.lops.ConvolutionTransform.OperationTypes.CONV2D_BACKWARD_DATA); + HopsConv2Lops.put(OpOpDnn.MAX_POOL, org.apache.sysml.lops.DnnTransform.OperationTypes.MAX_POOL); + HopsConv2Lops.put(OpOpDnn.MAX_POOL_BACKWARD, org.apache.sysml.lops.DnnTransform.OperationTypes.MAX_POOL_BACKWARD); + HopsConv2Lops.put(OpOpDnn.AVG_POOL, org.apache.sysml.lops.DnnTransform.OperationTypes.AVG_POOL); + HopsConv2Lops.put(OpOpDnn.AVG_POOL_BACKWARD, org.apache.sysml.lops.DnnTransform.OperationTypes.AVG_POOL_BACKWARD); + HopsConv2Lops.put(OpOpDnn.CONV2D, org.apache.sysml.lops.DnnTransform.OperationTypes.CONV2D); + HopsConv2Lops.put(OpOpDnn.BIAS_ADD, org.apache.sysml.lops.DnnTransform.OperationTypes.BIAS_ADD); + HopsConv2Lops.put(OpOpDnn.BIAS_MULTIPLY, org.apache.sysml.lops.DnnTransform.OperationTypes.BIAS_MULTIPLY); + HopsConv2Lops.put(OpOpDnn.CONV2D_BACKWARD_FILTER, org.apache.sysml.lops.DnnTransform.OperationTypes.CONV2D_BACKWARD_FILTER); + HopsConv2Lops.put(OpOpDnn.CONV2D_BACKWARD_DATA, org.apache.sysml.lops.DnnTransform.OperationTypes.CONV2D_BACKWARD_DATA); } protected static final HashMap<Hop.Direction, org.apache.sysml.lops.PartialAggregate.DirectionTypes> HopsDirection2Lops; http://git-wip-us.apache.org/repos/asf/systemml/blob/9fa5a09b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java index 9765fc8..7872c91 100644 --- a/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java +++ b/src/main/java/org/apache/sysml/hops/rewrite/HopRewriteUtils.java @@ -34,6 +34,7 @@ import org.apache.sysml.hops.AggBinaryOp; import org.apache.sysml.hops.AggUnaryOp; import org.apache.sysml.hops.BinaryOp; import org.apache.sysml.hops.DataOp; +import org.apache.sysml.hops.DnnOp; import org.apache.sysml.hops.Hop; import org.apache.sysml.hops.Hop.AggOp; import org.apache.sysml.hops.Hop.DataGenMethod; @@ -43,6 +44,7 @@ import org.apache.sysml.hops.Hop.Direction; import org.apache.sysml.hops.Hop.FileFormatTypes; import org.apache.sysml.hops.Hop.OpOp2; import org.apache.sysml.hops.Hop.OpOp3; +import org.apache.sysml.hops.Hop.OpOpDnn; import org.apache.sysml.hops.Hop.OpOpN; import org.apache.sysml.hops.Hop.ParamBuiltinOp; import org.apache.sysml.hops.Hop.ReOrgOp; @@ -1041,6 +1043,15 @@ public class HopRewriteUtils && ArrayUtils.contains(types, ((NaryOp) hop).getOp())); } + public static boolean isDnn(Hop hop, OpOpDnn type) { + return hop instanceof DnnOp && ((DnnOp)hop).getOp()==type; + } + + public static boolean isDnn(Hop hop, OpOpDnn... types) { + return ( hop instanceof DnnOp + && ArrayUtils.contains(types, ((DnnOp) hop).getOp())); + } + public static boolean isNonZeroIndicator(Hop pred, Hop hop ) { if( pred instanceof BinaryOp && ((BinaryOp)pred).getOp()==OpOp2.NOTEQUAL
