http://git-wip-us.apache.org/repos/asf/systemml/blob/1f323976/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
index 63c57c9..82a76b6 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCUDA.java
@@ -108,9 +108,8 @@ public class LibMatrixCUDA {
        
        /**
         * Sets the internal state based on the DMLScript.DATA_TYPE
-        * @throws DMLRuntimeException if error
         */
-       public static void resetFloatingPointPrecision() throws 
DMLRuntimeException {
+       public static void resetFloatingPointPrecision() {
                
if(DMLScript.FLOATING_POINT_PRECISION.equalsIgnoreCase("double")) {
                        LibMatrixCUDA.CUDNN_DATA_TYPE = 
jcuda.jcudnn.cudnnDataType.CUDNN_DATA_DOUBLE;
                        LibMatrixCUDA.cudaSupportFunctions = new 
DoublePrecisionCudaSupportFunctions();
@@ -143,19 +142,12 @@ public class LibMatrixCUDA {
        //***************************** UTILS ********************************/
        //********************************************************************/
 
-       /*
-       static GPUContext gCtx throws DMLRuntimeException {
-                       return GPUContext.gCtx;
-       }
-        */
-
        /**
         * Utility function to get maximum number of threads supported by the 
active CUDA device.
         * @param gCtx a valid {@link GPUContext}
         * @return max threads
-        * @throws DMLRuntimeException if exception occurs
         */
-       static int getMaxThreads(GPUContext gCtx) throws DMLRuntimeException{
+       static int getMaxThreads(GPUContext gCtx){
                if (_MAX_THREADS == -1){
                        _MAX_THREADS = gCtx.getMaxThreadsPerBlock();
                }
@@ -166,9 +158,8 @@ public class LibMatrixCUDA {
         * Utility function to get maximum number of blocks supported by the 
active CUDA device.
         * @param gCtx a valid {@link GPUContext}
         * @return max blocks
-        * @throws DMLRuntimeException if exception occurs
         */
-       static int getMaxBlocks(GPUContext gCtx) throws DMLRuntimeException{
+       static int getMaxBlocks(GPUContext gCtx) {
                if (_MAX_BLOCKS == -1){
                        _MAX_BLOCKS = gCtx.getMaxBlocks();
                }
@@ -179,9 +170,8 @@ public class LibMatrixCUDA {
         * Utility function to get the warp size supported by the active CUDA 
device.
         * @param gCtx a valid {@link GPUContext}
         * @return warp size
-        * @throws DMLRuntimeException if exception occurs
         */
-       static int getWarpSize(GPUContext gCtx) throws DMLRuntimeException {
+       static int getWarpSize(GPUContext gCtx) {
                if (_WARP_SIZE == -1) {
                        _WARP_SIZE = gCtx.getWarpSize();
                }
@@ -202,9 +192,8 @@ public class LibMatrixCUDA {
         * @param mo matrix object
         * @param recomputeDenseNNZ recompute NNZ if dense
         * @return number of non-zeroes
-        * @throws DMLRuntimeException if error
         */
-       public static long getNnz(GPUContext gCtx, String instName, 
MatrixObject mo, boolean recomputeDenseNNZ) throws DMLRuntimeException {
+       public static long getNnz(GPUContext gCtx, String instName, 
MatrixObject mo, boolean recomputeDenseNNZ) {
                if(mo.getGPUObject(gCtx) != null && 
mo.getGPUObject(gCtx).isAllocated()) {
                        return mo.getGPUObject(gCtx).getNnz(instName, 
recomputeDenseNNZ);
                }
@@ -214,25 +203,25 @@ public class LibMatrixCUDA {
        }
 
 
-       protected static cusparseHandle getCusparseHandle(GPUContext gCtx) 
throws DMLRuntimeException{
+       protected static cusparseHandle getCusparseHandle(GPUContext gCtx) {
                return gCtx.getCusparseHandle();
        }
 
-       protected static cublasHandle getCublasHandle(GPUContext gCtx) throws 
DMLRuntimeException {
+       protected static cublasHandle getCublasHandle(GPUContext gCtx) {
                return gCtx.getCublasHandle();
        }
 
-       protected static JCudaKernels getCudaKernels(GPUContext gCtx) throws 
DMLRuntimeException {
+       protected static JCudaKernels getCudaKernels(GPUContext gCtx) {
                return gCtx.getKernels();
        }
        
-       public static Pointer double2float(GPUContext gCtx, Pointer A, Pointer 
ret, int numElems) throws DMLRuntimeException {
+       public static Pointer double2float(GPUContext gCtx, Pointer A, Pointer 
ret, int numElems) {
                getCudaKernels(gCtx).launchKernel("double2float", 
ExecutionConfig.getConfigForSimpleVectorOperations(numElems),
                                A, ret, numElems);
                return ret;
        }
        
-       public static Pointer float2double(GPUContext gCtx, Pointer A, Pointer 
ret, int numElems) throws DMLRuntimeException {
+       public static Pointer float2double(GPUContext gCtx, Pointer A, Pointer 
ret, int numElems) {
                getCudaKernels(gCtx).launchKernel("float2double", 
ExecutionConfig.getConfigForSimpleVectorOperations(numElems),
                                A, ret, numElems);
                return ret;
@@ -278,9 +267,8 @@ public class LibMatrixCUDA {
         * @param input input matrix object
         * @param instName  the invoking instruction's name for record {@link 
Statistics}.
         * @return jcuda pointer
-        * @throws DMLRuntimeException if error occurs while sparse to dense 
conversion
         */
-       protected static Pointer getDensePointer(GPUContext gCtx, MatrixObject 
input, String instName) throws DMLRuntimeException {
+       protected static Pointer getDensePointer(GPUContext gCtx, MatrixObject 
input, String instName) {
                if(isInSparseFormat(gCtx, input)) {
                        input.getGPUObject(gCtx).sparseToDense(instName);
                }
@@ -293,9 +281,8 @@ public class LibMatrixCUDA {
         * @param input input matrix
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @return a sparse matrix pointer
-        * @throws DMLRuntimeException if error occurs
         */
-       protected static CSRPointer getSparsePointer(GPUContext gCtx, 
MatrixObject input, String instName) throws DMLRuntimeException {
+       protected static CSRPointer getSparsePointer(GPUContext gCtx, 
MatrixObject input, String instName) {
                if(!isInSparseFormat(gCtx, input)) {
                        input.getGPUObject(gCtx).denseToSparse();
                }
@@ -322,9 +309,8 @@ public class LibMatrixCUDA {
         * @param input input image
         * @param dout  next layer error propogation
         * @param outputBlock output
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       public static void reluBackward(GPUContext gCtx, String instName, 
MatrixObject input, MatrixObject dout, MatrixObject outputBlock) throws 
DMLRuntimeException {
+       public static void reluBackward(GPUContext gCtx, String instName, 
MatrixObject input, MatrixObject dout, MatrixObject outputBlock) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : reluBackward" + ", GPUContext=" + 
gCtx);
                }
@@ -352,9 +338,8 @@ public class LibMatrixCUDA {
         * @param outputBlock output
         * @param C number of channels
         * @param HW height*width
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       public static void channelSums(GPUContext gCtx, String instName, 
MatrixObject input, MatrixObject outputBlock, long C, long HW) throws 
DMLRuntimeException {
+       public static void channelSums(GPUContext gCtx, String instName, 
MatrixObject input, MatrixObject outputBlock, long C, long HW) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : channelSums" + ", GPUContext=" + gCtx);
                }
@@ -384,9 +369,8 @@ public class LibMatrixCUDA {
         * @param input input image
         * @param bias bias
         * @param outputBlock output
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       public static void biasMultiply(GPUContext gCtx, String instName, 
MatrixObject input, MatrixObject bias, MatrixObject outputBlock) throws 
DMLRuntimeException {
+       public static void biasMultiply(GPUContext gCtx, String instName, 
MatrixObject input, MatrixObject bias, MatrixObject outputBlock) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : biasMultiply" + ", GPUContext=" + 
gCtx);
                }
@@ -425,9 +409,8 @@ public class LibMatrixCUDA {
         * @param input input image
         * @param bias bias
         * @param outputBlock output
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       public static void biasAdd(GPUContext gCtx, String instName, 
MatrixObject input, MatrixObject bias, MatrixObject outputBlock) throws 
DMLRuntimeException {
+       public static void biasAdd(GPUContext gCtx, String instName, 
MatrixObject input, MatrixObject bias, MatrixObject outputBlock) {
                Pointer imagePointer = getDensePointer(gCtx, input, instName);
                Pointer biasPointer = getDensePointer(gCtx, bias, instName);
                Pointer outputPointer = getDensePointer(gCtx, outputBlock, 
instName);
@@ -453,9 +436,8 @@ public class LibMatrixCUDA {
         * @param rows rows in input image
         * @param cols cols in input image
         * @param k rows in bias
-        * @throws DMLRuntimeException
         */
-       private static void biasAdd(GPUContext gCtx, String instName, Pointer 
image, Pointer bias, Pointer output, int rows, int cols, int k) throws 
DMLRuntimeException {
+       private static void biasAdd(GPUContext gCtx, String instName, Pointer 
image, Pointer bias, Pointer output, int rows, int cols, int k) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : biasAdd" + ", GPUContext=" + gCtx);
                }
@@ -491,10 +473,9 @@ public class LibMatrixCUDA {
         * @param left             input matrix, as in a tsmm expression like A 
%*% A' or A' %*% A, we just need to check whether the left one is transposed or 
not, I named it 'left'
         * @param outputName       output matrix name
         * @param isLeftTransposed if true, left transposed
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void matmultTSMM(ExecutionContext ec, GPUContext gCtx, 
String instName, MatrixObject left, String outputName,
-                       boolean isLeftTransposed) throws DMLRuntimeException {
+                       boolean isLeftTransposed) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : matmultTSMM" + ", GPUContext=" + gCtx);
                }
@@ -550,9 +531,8 @@ public class LibMatrixCUDA {
         * @param gCtx     a valid {@link GPUContext}
         * @param instName instruction name
         * @param ret      upper triangular matrix
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       private static void copyUpperToLowerTriangle(GPUContext gCtx, String 
instName, MatrixObject ret) throws DMLRuntimeException {
+       private static void copyUpperToLowerTriangle(GPUContext gCtx, String 
instName, MatrixObject ret) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : copyUpperToLowerTriangle" + ", 
GPUContext=" + gCtx);
                }
@@ -589,10 +569,8 @@ public class LibMatrixCUDA {
         * @param in1      input matrix
         * @param output   output matrix/scalar name
         * @param op       Instance of {@link AggregateUnaryOperator} which 
encapsulates the direction of reduction/aggregation and the reduction operation.
-        * @throws DMLRuntimeException if {@link DMLRuntimeException} occurs
         */
-       public static void unaryAggregate(ExecutionContext ec, GPUContext gCtx, 
String instName, MatrixObject in1, String output, AggregateUnaryOperator op)
-                       throws DMLRuntimeException {
+       public static void unaryAggregate(ExecutionContext ec, GPUContext gCtx, 
String instName, MatrixObject in1, String output, AggregateUnaryOperator op) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
                if(LOG.isTraceEnabled()) {
@@ -923,9 +901,8 @@ public class LibMatrixCUDA {
         * @param out           output matrix on GPU
         * @param rlen  row length
         * @param clen  column length
-        * @throws DMLRuntimeException if error
         */
-       private static void squareMatrix(GPUContext gCtx, String instName, 
Pointer in, Pointer out, int rlen, int clen) throws DMLRuntimeException {
+       private static void squareMatrix(GPUContext gCtx, String instName, 
Pointer in, Pointer out, int rlen, int clen) {
                ScalarOperator power2op = new 
RightScalarOperator(Power.getPowerFnObject(), 2);
                matrixScalarOp(gCtx, instName, in, 2, rlen, clen, out, 
power2op);
        }
@@ -937,9 +914,8 @@ public class LibMatrixCUDA {
         * @param in                                                    {@link 
Pointer} to matrix in device memory
         * @param n                                                             
size of array
         * @return      the reduced value
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       private static double reduceAll(GPUContext gCtx, String instName, 
String kernelFunction, Pointer in, int n) throws DMLRuntimeException {
+       private static double reduceAll(GPUContext gCtx, String instName, 
String kernelFunction, Pointer in, int n) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : reduceAll for " + kernelFunction + ", 
GPUContext=" + gCtx);
                }
@@ -981,9 +957,8 @@ public class LibMatrixCUDA {
         * @param out                                                   {@link 
Pointer} to output matrix in device memory (size - rows * 1)
         * @param rows                                          number of rows 
in input matrix
         * @param cols                                          number of 
columns in input matrix
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       private static void reduceRow(GPUContext gCtx, String instName, String 
kernelFunction, Pointer in, Pointer out, int rows, int cols) throws 
DMLRuntimeException {
+       private static void reduceRow(GPUContext gCtx, String instName, String 
kernelFunction, Pointer in, Pointer out, int rows, int cols) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : reduceRow for " + kernelFunction + ", 
GPUContext=" + gCtx);
                }
@@ -1009,9 +984,8 @@ public class LibMatrixCUDA {
         * @param out                                                   {@link 
Pointer} to output matrix in device memory (size - 1 * cols)
         * @param rows                                          number of rows 
in input matrix
         * @param cols                                          number of 
columns in input matrix
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       private static void reduceCol(GPUContext gCtx, String instName, String 
kernelFunction, Pointer in, Pointer out, int rows, int cols) throws 
DMLRuntimeException {
+       private static void reduceCol(GPUContext gCtx, String instName, String 
kernelFunction, Pointer in, Pointer out, int rows, int cols) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : reduceCol for " + kernelFunction + ", 
GPUContext=" + gCtx);
                }
@@ -1033,7 +1007,7 @@ public class LibMatrixCUDA {
         * @param n size of input array
         * @return integer array containing {blocks, threads, shared memory}
         */
-       private static int[] getKernelParamsForReduceAll(GPUContext gCtx, int 
n) throws DMLRuntimeException{
+       private static int[] getKernelParamsForReduceAll(GPUContext gCtx, int 
n) {
                final int MAX_THREADS = getMaxThreads(gCtx);
                final int MAX_BLOCKS = getMaxBlocks(gCtx);
                final int WARP_SIZE = getWarpSize(gCtx);
@@ -1056,7 +1030,7 @@ public class LibMatrixCUDA {
         * @param cols number of columns in input matrix
         * @return integer array containing {blocks, threads, shared memory}
         */
-       private static int[] getKernelParamsForReduceByRow(GPUContext gCtx, int 
rows, int cols) throws DMLRuntimeException {
+       private static int[] getKernelParamsForReduceByRow(GPUContext gCtx, int 
rows, int cols) {
                final int WARP_SIZE = getWarpSize(gCtx);
                final int MAX_THREADS = getMaxThreads(gCtx);
                int threads = (cols < MAX_THREADS *2) ? nextPow2((cols + 1)/ 2) 
: MAX_THREADS;
@@ -1068,7 +1042,7 @@ public class LibMatrixCUDA {
                return new int[] {blocks, threads, sharedMemSize};
        }
 
-       private static int[] getKernelParamsForReduceByCol(GPUContext gCtx, int 
rows, int cols) throws DMLRuntimeException {
+       private static int[] getKernelParamsForReduceByCol(GPUContext gCtx, int 
rows, int cols) {
                final int MAX_THREADS = getMaxThreads(gCtx);
                final int MAX_BLOCKS = getMaxBlocks(gCtx);
                final int WARP_SIZE = getWarpSize(gCtx);
@@ -1112,9 +1086,8 @@ public class LibMatrixCUDA {
         * @param in         input matrix
         * @param outputName output matrix name
         * @param op         scalar operator
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       public static void matrixScalarRelational(ExecutionContext ec, 
GPUContext gCtx, String instName, MatrixObject in, String outputName, 
ScalarOperator op) throws DMLRuntimeException {
+       public static void matrixScalarRelational(ExecutionContext ec, 
GPUContext gCtx, String instName, MatrixObject in, String outputName, 
ScalarOperator op) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
                double constant = op.getConstant();
@@ -1149,9 +1122,8 @@ public class LibMatrixCUDA {
         * @param outputName        output matrix name
         * @param isInputTransposed true if input transposed
         * @param op                scalar operator
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       public static void matrixScalarArithmetic(ExecutionContext ec, 
GPUContext gCtx, String instName, MatrixObject in, String outputName, boolean 
isInputTransposed, ScalarOperator op) throws DMLRuntimeException {
+       public static void matrixScalarArithmetic(ExecutionContext ec, 
GPUContext gCtx, String instName, MatrixObject in, String outputName, boolean 
isInputTransposed, ScalarOperator op) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
                double constant = op.getConstant();
@@ -1233,10 +1205,9 @@ public class LibMatrixCUDA {
         * @param in2 input matrix 2
         * @param outputName output matrix name
         * @param op binary operator
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void matrixMatrixRelational(ExecutionContext ec, 
GPUContext gCtx, String instName, MatrixObject in1, MatrixObject in2,
-                       String outputName, BinaryOperator op) throws 
DMLRuntimeException {
+                       String outputName, BinaryOperator op) {
 
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
@@ -1270,10 +1241,9 @@ public class LibMatrixCUDA {
         * @param isLeftTransposed  true if left-transposed
         * @param isRightTransposed true if right-transposed
         * @param op                binary operator
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void matrixMatrixArithmetic(ExecutionContext ec, 
GPUContext gCtx, String instName, MatrixObject in1, MatrixObject in2,
-                       String outputName, boolean isLeftTransposed, boolean 
isRightTransposed, BinaryOperator op) throws DMLRuntimeException {
+                       String outputName, boolean isLeftTransposed, boolean 
isRightTransposed, BinaryOperator op) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
                boolean isCUDALibAvailable = (op.fn instanceof Plus || op.fn 
instanceof Minus) && !isSparseAndEmpty(gCtx, in1) && !isSparseAndEmpty(gCtx, 
in2) && !isVector(in1) && !isVector(in2);
@@ -1309,10 +1279,9 @@ public class LibMatrixCUDA {
         * @param outputName        output variable name
         * @param isInputTransposed true if input is transposed
         * @param op                operator
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void matrixScalarOp(ExecutionContext ec, GPUContext gCtx, 
String instName, MatrixObject in, String outputName, boolean isInputTransposed,
-                       ScalarOperator op) throws DMLRuntimeException {
+                       ScalarOperator op) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
                if(isInputTransposed)
@@ -1341,9 +1310,8 @@ public class LibMatrixCUDA {
         * @param clenA    column lenght of matrix a
         * @param c        the dense output matrix
         * @param op       operation to perform
-        * @throws DMLRuntimeException throws runtime exception
         */
-       private static void matrixScalarOp(GPUContext gCtx, String instName, 
Pointer a, double scalar, int rlenA, int clenA, Pointer c, ScalarOperator op) 
throws DMLRuntimeException {
+       private static void matrixScalarOp(GPUContext gCtx, String instName, 
Pointer a, double scalar, int rlenA, int clenA, Pointer c, ScalarOperator op) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : matrix_scalar_op" + ", GPUContext=" + 
gCtx);
                }
@@ -1369,10 +1337,9 @@ public class LibMatrixCUDA {
         * @param isLeftTransposed  true if left matrix is transposed
         * @param isRightTransposed true if right matrix is transposed
         * @param op                operator
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static void matrixMatrixOp(ExecutionContext ec, GPUContext 
gCtx, String instName, MatrixObject in1, MatrixObject in2,
-                       String outputName, boolean isLeftTransposed, boolean 
isRightTransposed, BinaryOperator op) throws DMLRuntimeException {
+                       String outputName, boolean isLeftTransposed, boolean 
isRightTransposed, BinaryOperator op) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
                boolean isEmpty1 = isSparseAndEmpty(gCtx, in1);
@@ -1447,9 +1414,8 @@ public class LibMatrixCUDA {
         * @param vecStatusB    if matrix B is a vector
         * @param c                                             output matrix 
of size (maxRlen, maxClen) allocated on GPU
         * @param op                                    the operation to perform
-        * @throws DMLRuntimeException
         */
-       private static void matrixMatrixOp(GPUContext gCtx, String instName, 
Pointer a, Pointer b, int maxRlen, int maxClen, int vecStatusA, int vecStatusB, 
Pointer c, BinaryOperator op) throws DMLRuntimeException {
+       private static void matrixMatrixOp(GPUContext gCtx, String instName, 
Pointer a, Pointer b, int maxRlen, int maxClen, int vecStatusA, int vecStatusB, 
Pointer c, BinaryOperator op) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : matrix_matrix_cellwise_op" + ", 
GPUContext=" + gCtx);
                }
@@ -1502,7 +1468,7 @@ public class LibMatrixCUDA {
                return isEmpty1;
        }
 
-       private static void deviceCopy(ExecutionContext ec, GPUContext gCtx, 
String instName, MatrixObject src, String outputName, boolean 
isInputTransposed) throws DMLRuntimeException {
+       private static void deviceCopy(ExecutionContext ec, GPUContext gCtx, 
String instName, MatrixObject src, String outputName, boolean 
isInputTransposed) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
                if(!isInputTransposed)
@@ -1518,9 +1484,8 @@ public class LibMatrixCUDA {
         * @param instName   the invoking instruction's name for record {@link 
Statistics}.
         * @param src        source matrix
         * @param outputName destination variable name
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       private static void deviceCopy(ExecutionContext ec, GPUContext gCtx, 
String instName, MatrixObject src, String outputName) throws 
DMLRuntimeException {
+       private static void deviceCopy(ExecutionContext ec, GPUContext gCtx, 
String instName, MatrixObject src, String outputName) {
                Pointer srcPtr = getDensePointer(gCtx, src, instName); // TODO: 
FIXME: Implement sparse kernel
                MatrixObject out = ec.getMatrixObject(outputName);
                getDenseMatrixOutputForGPUInstruction(ec, instName, outputName, 
toInt(src.getNumRows()), toInt(src.getNumColumns()));   // Allocated the dense 
output matrix
@@ -1537,9 +1502,8 @@ public class LibMatrixCUDA {
         * @param outputName    (internal) name of the matrix that is to be 
filled
         * @param numRows number of rows of output matrix object
         * @param numCols number of columns of output matrix object
-        * @throws DMLRuntimeException if error
         */
-       private static void setOutputToConstant(ExecutionContext ec, GPUContext 
gCtx, String instName, double constant, String outputName, long numRows, long 
numCols) throws DMLRuntimeException {
+       private static void setOutputToConstant(ExecutionContext ec, GPUContext 
gCtx, String instName, double constant, String outputName, long numRows, long 
numCols) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
                if(constant == 0) {
@@ -1566,9 +1530,8 @@ public class LibMatrixCUDA {
         * @param dest destination matrix
         * @param rlen number of rows
         * @param clen number of columns
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       private static void deviceCopy(String instName, Pointer src, Pointer 
dest, int rlen, int clen) throws DMLRuntimeException {
+       private static void deviceCopy(String instName, Pointer src, Pointer 
dest, int rlen, int clen) {
                long t0=0;
                if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
                int size = rlen * clen * sizeOfDataType;
@@ -1585,7 +1548,7 @@ public class LibMatrixCUDA {
         * 11=min, 12=max, 13=and, 14=or, 15=minus1multiply, 16=minusnz,
         * 17=modulus, 18=integer division}
         */
-       private static int getBinaryOp(ValueFunction fn) throws 
DMLRuntimeException {
+       private static int getBinaryOp(ValueFunction fn) {
                if(fn instanceof Plus) return 0;
                else if(fn instanceof Minus) return 1;
                else if(fn instanceof Multiply) return 2;
@@ -1626,10 +1589,9 @@ public class LibMatrixCUDA {
         * @param isRightTransposed true if right matrix is transposed
         * @param alpha alpha
         * @param beta beta
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static void dgeam(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, MatrixObject in2, String outputName,
-                       boolean isLeftTransposed, boolean isRightTransposed, 
double alpha, double beta) throws DMLRuntimeException {
+                       boolean isLeftTransposed, boolean isRightTransposed, 
double alpha, double beta) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
                if(LOG.isTraceEnabled()) {
@@ -1757,9 +1719,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in input matrix
         * @param outputName output matrix name
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       public static void transpose(ExecutionContext ec, GPUContext gCtx, 
String instName, MatrixObject in, String outputName) throws DMLRuntimeException 
{
+       public static void transpose(ExecutionContext ec, GPUContext gCtx, 
String instName, MatrixObject in, String outputName) {
                // C = alpha* op( A ) + beta* op ( B )
                // = 1.0 * A^T + 0.0 * A^T
                if (ec.getGPUContext(0) != gCtx)
@@ -1771,7 +1732,7 @@ public class LibMatrixCUDA {
        //******************* End of Re-org Functions ************************/
        //********************************************************************/
 
-       public static int toInt(long num) throws DMLRuntimeException {
+       public static int toInt(long num) {
                if(num >= Integer.MAX_VALUE || num <= Integer.MIN_VALUE) {
                        throw new DMLRuntimeException("GPU : Exceeded supported 
size " + num);
                }
@@ -1791,10 +1752,9 @@ public class LibMatrixCUDA {
         * @param in1 input matrix object
         * @param ixrange index range (0-based)
         * @param outputName output matrix object
-        * @throws DMLRuntimeException if error occurs
         */
        public static void sliceOperations(ExecutionContext ec, GPUContext 
gCtx, String instName, MatrixObject in1,
-                       IndexRange ixrange, String outputName) throws 
DMLRuntimeException {
+                       IndexRange ixrange, String outputName) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException(
                                        "GPU : Invalid internal state, the 
GPUContext set with the ExecutionContext is not the same used to run this 
LibMatrixCUDA function");
@@ -1842,10 +1802,9 @@ public class LibMatrixCUDA {
         * @param cl column lower
         * @param cu column upper
         * @param inClen input number of columns
-        * @throws DMLRuntimeException if error occurs
         */
        protected static void sliceDenseDense(GPUContext gCtx, String instName, 
Pointer inPointer, Pointer outPointer, 
-                       int rl, int ru, int cl, int cu, int inClen) throws 
DMLRuntimeException {
+                       int rl, int ru, int cl, int cu, int inClen) {
                long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() 
: 0;
                long retClen = cu - cl + 1;
                if (inClen == retClen) {
@@ -1871,10 +1830,9 @@ public class LibMatrixCUDA {
         * @param cl column lower
         * @param cu column upper
         * @param inClen number of columns of input matrix
-        * @throws DMLRuntimeException if error
         */
        protected static void sliceSparseDense(GPUContext gCtx, String 
instName, CSRPointer inPointer, Pointer outPointer, 
-                       int rl, int ru, int cl, int cu, int inClen) throws 
DMLRuntimeException {
+                       int rl, int ru, int cl, int cu, int inClen) {
                int size = getNnz(inPointer, rl, ru);
                // Return since nnz of the output is 0 as outPointer is 
expected to be zeroed out.
                if(size == 0) return;
@@ -1920,7 +1878,7 @@ public class LibMatrixCUDA {
                return ruPtr[0] - rlPtr[0];
        }
 
-       public static void cbind(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, MatrixObject in2, String outputName) throws 
DMLRuntimeException {
+       public static void cbind(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, MatrixObject in2, String outputName) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
                if(LOG.isTraceEnabled()) {
@@ -1955,7 +1913,7 @@ public class LibMatrixCUDA {
 
        }
 
-       public static void rbind(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, MatrixObject in2, String outputName) throws 
DMLRuntimeException {
+       public static void rbind(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, MatrixObject in2, String outputName) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
                if(LOG.isTraceEnabled()) {
@@ -2007,9 +1965,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void exp(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void exp(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : exp" + ", GPUContext=" + gCtx);
                }
@@ -2024,9 +1981,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void sqrt(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void sqrt(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : sqrt" + ", GPUContext=" + gCtx);
                }
@@ -2041,9 +1997,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void round(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void round(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : round" + ", GPUContext=" + gCtx);
                }
@@ -2058,9 +2013,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void abs(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void abs(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : abs" + ", GPUContext=" + gCtx);
                }
@@ -2075,9 +2029,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void log(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void log(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : log" + ", GPUContext=" + gCtx);
                }
@@ -2092,9 +2045,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void floor(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void floor(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : floor" + ", GPUContext=" + gCtx);
                }
@@ -2109,9 +2061,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void ceil(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void ceil(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : ceil" + ", GPUContext=" + gCtx);
                }
@@ -2126,9 +2077,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void sin(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void sin(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : sin" + ", GPUContext=" + gCtx);
                }
@@ -2143,9 +2093,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void cos(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void cos(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : cos" + ", GPUContext=" + gCtx);
                }
@@ -2160,9 +2109,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void tan(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void tan(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : tan" + ", GPUContext=" + gCtx);
                }
@@ -2177,9 +2125,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void sinh(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void sinh(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : sinh" + ", GPUContext=" + gCtx);
                }
@@ -2194,9 +2141,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void cosh(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void cosh(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : cosh" + ", GPUContext=" + gCtx);
                }
@@ -2211,9 +2157,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void tanh(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void tanh(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : tanh" + ", GPUContext=" + gCtx);
                }
@@ -2228,9 +2173,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void asin(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void asin(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : asin" + ", GPUContext=" + gCtx);
                }
@@ -2245,9 +2189,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void acos(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void acos(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : acos" + ", GPUContext=" + gCtx);
                }
@@ -2262,9 +2205,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void atan(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void atan(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : atan" + ", GPUContext=" + gCtx);
                }
@@ -2279,9 +2221,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void sign(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void sign(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : sign" + ", GPUContext=" + gCtx);
                }
@@ -2296,9 +2237,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void sigmoid(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void sigmoid(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : sigmoid" + ", GPUContext=" + gCtx);
                }
@@ -2317,9 +2257,8 @@ public class LibMatrixCUDA {
         * @param outputName output matrix name
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param kernelTimer the name of the timer to measure the kernel 
invocation
-        * @throws DMLRuntimeException
         */
-       private static void unaryOp(ExecutionContext ec, GPUContext gCtx, 
MatrixObject in1, String kernel, double sparseAndEmptyFillValue, String 
outputName, String instName, String kernelTimer) throws DMLRuntimeException {
+       private static void unaryOp(ExecutionContext ec, GPUContext gCtx, 
MatrixObject in1, String kernel, double sparseAndEmptyFillValue, String 
outputName, String instName, String kernelTimer) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
                GPUObject in = in1.getGPUObject(gCtx);
@@ -2352,10 +2291,9 @@ public class LibMatrixCUDA {
         * @param in2 input matrix 2
         * @param outputName output matrix name
         * @param constant pointer constant
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void axpy(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, MatrixObject in2,
-                       String outputName,  double constant) throws 
DMLRuntimeException {
+                       String outputName,  double constant) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
                Pointer A = getDensePointer(gCtx, in1, instName);
@@ -2412,9 +2350,8 @@ public class LibMatrixCUDA {
         * @param in1        input matrix A
         * @param in2        input matrix B
         * @param outputName name of the output matrix
-        * @throws DMLRuntimeException if an error occurs
         */
-       public static void solve(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, MatrixObject in2, String outputName) throws 
DMLRuntimeException {
+       public static void solve(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, MatrixObject in2, String outputName) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
 
@@ -2533,9 +2470,8 @@ public class LibMatrixCUDA {
         * @param numRows number of rows of output matrix object
         * @param numCols number of columns of output matrix object
         * @return      the matrix object
-        * @throws DMLRuntimeException  if an error occurs
         */
-       protected static MatrixObject 
getDenseMatrixOutputForGPUInstruction(ExecutionContext ec, String instName, 
String name, long numRows, long numCols) throws DMLRuntimeException {
+       protected static MatrixObject 
getDenseMatrixOutputForGPUInstruction(ExecutionContext ec, String instName, 
String name, long numRows, long numCols) {
                long t0=0;
                if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
                Pair<MatrixObject, Boolean> mb = 
ec.getDenseMatrixOutputForGPUInstruction(name, numRows, numCols);
@@ -2555,9 +2491,8 @@ public class LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param name  name of input matrix (that the {@link ExecutionContext} 
is aware of)
         * @return      the matrix object
-        * @throws DMLRuntimeException  if an error occurs
         */
-       private static MatrixObject 
getSparseMatrixOutputForGPUInstruction(ExecutionContext ec, long numRows, long 
numCols, long nnz, String instName, String name) throws DMLRuntimeException {
+       private static MatrixObject 
getSparseMatrixOutputForGPUInstruction(ExecutionContext ec, long numRows, long 
numCols, long nnz, String instName, String name) {
                long t0=0;
                if (DMLScript.FINEGRAINED_STATISTICS) t0 = System.nanoTime();
                Pair<MatrixObject, Boolean> mb = 
ec.getSparseMatrixOutputForGPUInstruction(name, numRows, numCols, nnz);

http://git-wip-us.apache.org/repos/asf/systemml/blob/1f323976/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
index 6642ee0..26a4d2e 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNN.java
@@ -72,7 +72,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
        protected static int CONVOLUTION_PREFERENCE = 
cudnnConvolutionFwdPreference.CUDNN_CONVOLUTION_FWD_NO_WORKSPACE;
        private static final Log LOG = 
LogFactory.getLog(LibMatrixCuDNN.class.getName());
 
-       protected static cudnnHandle getCudnnHandle(GPUContext gCtx) throws 
DMLRuntimeException {
+       protected static cudnnHandle getCudnnHandle(GPUContext gCtx) {
                return gCtx.getCudnnHandle();
        }
        
@@ -99,11 +99,9 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param P        output height
         * @param Q        output width
         * @param intermediateMemoryBudget intermediate memory budget
-        * @throws DMLRuntimeException if error
         */
        public static void conv2dBiasAdd(GPUContext gCtx, String instName, 
MatrixObject image, MatrixObject bias, MatrixObject filter, MatrixObject 
output, int N, int C, int H, int W,
-                       int K, int R, int S, int pad_h, int pad_w, int 
stride_h, int stride_w, int P, int Q, double intermediateMemoryBudget)
-                                       throws DMLRuntimeException {
+                       int K, int R, int S, int pad_h, int pad_w, int 
stride_h, int stride_w, int P, int Q, double intermediateMemoryBudget) {
                conv2d(gCtx, instName, image, filter, output, N, C, H, W, K, R, 
S, pad_h, pad_w, stride_h, stride_w, P, Q, intermediateMemoryBudget);
                //cudaDeviceSynchronize;
                biasAdd(gCtx, instName, output, bias, output);
@@ -129,10 +127,9 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param P        output height
         * @param Q        output width
         * @return output im2col pointer (the caller is expected to free this 
pointer) or null if image is an empty matrix
-        * @throws DMLRuntimeException if error
         */
        private static Pointer denseIm2col(GPUContext gCtx, String instName, 
MatrixObject image, boolean isSparseImage, long N, long C, long H, long W,
-                       int R, int S, int pad_h, int pad_w, int stride_h, int 
stride_w, int P, int Q) throws DMLRuntimeException {
+                       int R, int S, int pad_h, int pad_w, int stride_h, int 
stride_w, int P, int Q) {
                Pointer im2colPointer = null;
                long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() 
: 0;
                if(isSparseImage) {
@@ -185,10 +182,9 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param P        output height
         * @param Q        output width
         * @param intermediateMemoryBudget intermediate memory budget
-        * @throws DMLRuntimeException if error
         */
        public static void conv2d(GPUContext gCtx, String instName, 
MatrixObject image, MatrixObject filter, MatrixObject outputBlock, int N, int 
C, int H, int W,
-                       int K, int R, int S, int pad_h, int pad_w, int 
stride_h, int stride_w, int P, int Q, double intermediateMemoryBudget) throws 
DMLRuntimeException {
+                       int K, int R, int S, int pad_h, int pad_w, int 
stride_h, int stride_w, int P, int Q, double intermediateMemoryBudget) {
 
                long CHW = C*H*W; long KPQ = K*P*Q; long CRS = C*R*S; 
                long NCHW = N*CHW; long NKPQ = N*KPQ; long KCRS = K*CRS;
@@ -272,9 +268,8 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in1   input matrix
         * @param outputName    output matrix name
-        * @throws DMLRuntimeException  if DMLRuntimeException occurs
         */
-       public static void softmax(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) throws DMLRuntimeException {
+       public static void softmax(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in1, String outputName) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : softmax" + ", GPUContext=" + gCtx);
                }
@@ -297,9 +292,8 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param H height
         * @param W width
         * @return cudnn tensor descriptor
-        * @throws DMLRuntimeException if the input descriptor and matrix 
dimensions don't match
         */
-       private static cudnnTensorDescriptor allocateTensorDescriptor(int N, 
int C, int H, int W) throws DMLRuntimeException {
+       private static cudnnTensorDescriptor allocateTensorDescriptor(int N, 
int C, int H, int W) {
                cudnnTensorDescriptor tensorDescriptor = new 
cudnnTensorDescriptor();
                cudnnCreateTensorDescriptor(tensorDescriptor);
                cudnnSetTensor4dDescriptor(tensorDescriptor, CUDNN_TENSOR_NCHW, 
LibMatrixCUDA.CUDNN_DATA_TYPE, N, C, H, W);
@@ -316,9 +310,8 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param dim4 input2 number of columns
         * @param dim5 output number of rows
         * @param dim6 output number of columns
-        * @throws DMLRuntimeException the exception with the appropriate 
message
         */
-       private static void throwCuDNNDimensionError(long dim1, long dim2, long 
dim3, long dim4) throws DMLRuntimeException {
+       private static void throwCuDNNDimensionError(long dim1, long dim2, long 
dim3, long dim4) {
                throw new DMLRuntimeException("The dimensions of input/output 
matrices is too large to execute a CuDNN kernel. "
                                + "Max CuDNN matrix size:" + 
maxNumElementsOfCuDNNTensor + ". "
                                + "Given input matrix dimensions: [" + dim1 + 
"," + dim2 + "]. Output dimension:  [" + dim3 + "," + dim4 + "].");
@@ -333,9 +326,8 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param dim4 input2 number of columns
         * @param dim5 output number of rows
         * @param dim6 output number of columns
-        * @throws DMLRuntimeException the exception with the appropriate 
message
         */
-       private static void throwCuDNNDimensionError(long dim1, long dim2, long 
dim3, long dim4, long dim5, long dim6) throws DMLRuntimeException {
+       private static void throwCuDNNDimensionError(long dim1, long dim2, long 
dim3, long dim4, long dim5, long dim6) {
                throw new DMLRuntimeException("The dimensions of input/output 
matrices is too large to execute a CuDNN kernel. "
                                + "Max CuDNN matrix size:" + 
maxNumElementsOfCuDNNTensor + ". "
                                + "Given input matrix dimensions: [" + dim1 + 
"," + dim2 + "], [" + dim3 + "," + dim4 + "]. Output dimension: [" + dim5 + "," 
+ dim6 + "]");
@@ -352,11 +344,9 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param filter   the filter allocated on the GPU
         * @param output   the output matrix allocated on the GPU
         * @param algo     cudnn algorithm wrapper
-        * @throws DMLRuntimeException if error
         */
        private static void cudnnConv2d(GPUContext gCtx, String instName, 
Pointer image, Pointer filter, Pointer output, 
-                       LibMatrixCuDNNConvolutionAlgorithm algo)
-                                       throws DMLRuntimeException {
+                       LibMatrixCuDNNConvolutionAlgorithm algo) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : conv2d" + ", GPUContext=" + gCtx);
                }
@@ -400,12 +390,11 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param P output activation height
         * @param Q output activation width
         * @param intermediateMemoryBudget intermediate memory budget
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void conv2dBackwardFilter(GPUContext gCtx, String 
instName, MatrixObject image, MatrixObject dout,
                        MatrixObject outputBlock, int N, int C, int H, int W, 
int K, int R,
                        int S, int pad_h, int pad_w, int stride_h, int 
stride_w, int P,
-                       int Q, double intermediateMemoryBudget) throws 
DMLRuntimeException {
+                       int Q, double intermediateMemoryBudget) {
                long CHW = C*H*W; long KPQ = K*P*Q; long CRS = C*R*S; 
                long NCHW = N*CHW; long NKPQ = N*KPQ; long KCRS = K*CRS;
                
@@ -474,10 +463,9 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param doutPointer pointer to errors from next layer
         * @param dwPointer  output errors
         * @param algo     cudnn algorithm wrapper
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static void cudnnConv2dBackwardFilter(GPUContext gCtx, String 
instName, Pointer imagePointer, Pointer doutPointer,
-                       Pointer dwPointer, LibMatrixCuDNNConvolutionAlgorithm 
algo) throws DMLRuntimeException {
+                       Pointer dwPointer, LibMatrixCuDNNConvolutionAlgorithm 
algo) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : conv2dBackwardFilter" + ", 
GPUContext=" + gCtx);
                }
@@ -517,12 +505,11 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param P output activation height
         * @param Q output activation width
         * @param intermediateMemoryBudget intermediate memory budget
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void conv2dBackwardData(GPUContext gCtx, String instName, 
MatrixObject filter, MatrixObject dout,
                        MatrixObject output, int N, int C, int H, int W, int K, 
int R,
                        int S, int pad_h, int pad_w, int stride_h, int 
stride_w, int P,
-                       int Q, double intermediateMemoryBudget) throws 
DMLRuntimeException {
+                       int Q, double intermediateMemoryBudget) {
                long CHW = C*H*W; long KPQ = K*P*Q; long CRS = C*R*S; 
                long NCHW = N*CHW; long NKPQ = N*KPQ; long KCRS = K*CRS;
 
@@ -579,10 +566,9 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param dy pointer to errors from next layer
         * @param dx pointer to  output errors
         * @param algo cudnn algorithm wrapper
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        private static void cudnnConv2dBackwardData(GPUContext gCtx, String 
instName, Pointer w, Pointer dy,
-                       Pointer dx, LibMatrixCuDNNConvolutionAlgorithm algo) 
throws DMLRuntimeException {
+                       Pointer dx, LibMatrixCuDNNConvolutionAlgorithm algo) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : conv2dBackwardData" + ", GPUContext=" 
+ gCtx);
                }
@@ -621,12 +607,11 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param Q                             (W - S + 1 + 2*pad_w)/stride_w
         * @param poolingType   type of pooling
         * @param intermediateMemoryBudget intermediate memory budget
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void pooling(GPUContext gCtx, String instName, 
MatrixObject image,
                        MatrixObject outputBlock, int N, int C, int H, int W, 
int K, int R,
                        int S, int pad_h, int pad_w, int stride_h, int 
stride_w, int P,
-                       int Q, PoolingType poolingType, double 
intermediateMemoryBudget) throws DMLRuntimeException {
+                       int Q, PoolingType poolingType, double 
intermediateMemoryBudget) {
                long CHW = C*H*W; long CPQ = C*P*Q;  
                long NCHW = N*CHW; long NCPQ = N*CPQ; 
 
@@ -654,7 +639,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
        private static void cudnnPoolingHelper(GPUContext gCtx, String 
instName, Pointer x,
                        Pointer y, int N, int C, int H, int W, int K, int R,
                        int S, int pad_h, int pad_w, int stride_h, int 
stride_w, int P,
-                       int Q, PoolingType poolingType) throws 
DMLRuntimeException {
+                       int Q, PoolingType poolingType) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : perform pooling" + ", GPUContext=" + 
gCtx);
                }
@@ -700,12 +685,11 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param Q                             (W - S + 1 + 2*pad_w)/stride_w
         * @param poolingType   type of pooling
         * @param intermediateMemoryBudget intermediate memory budget
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void poolingBackward(GPUContext gCtx, String instName, 
MatrixObject image, MatrixObject dout,
                        MatrixObject maxpoolOutput, MatrixObject outputBlock, 
int N, int C, int H, int W, int K, int R,
                        int S, int pad_h, int pad_w, int stride_h, int 
stride_w, int P,
-                       int Q, PoolingType poolingType, double 
intermediateMemoryBudget) throws DMLRuntimeException {
+                       int Q, PoolingType poolingType, double 
intermediateMemoryBudget) {
                long CHW = C*H*W; long CPQ = C*P*Q;  
                long NCHW = N*CHW; long NCPQ = N*CPQ; 
 
@@ -750,7 +734,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                        Pointer x, Pointer dy, Pointer y, Pointer dx, 
                        int N, int C, int H, int W, int K, int R,
                        int S, int pad_h, int pad_w, int stride_h, int 
stride_w, int P,
-                       int Q, PoolingType poolingType) throws 
DMLRuntimeException {
+                       int Q, PoolingType poolingType) {
                if(LOG.isTraceEnabled()) {
                        LOG.trace("GPU : maxpoolingBackward" + ", GPUContext=" 
+ gCtx);
                }
@@ -793,7 +777,7 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
                }
        }
 
-       private static void cudnnReLU(GPUContext gCtx, String instName, 
MatrixObject in, Pointer dstData, cudnnTensorDescriptor srcTensorDesc) throws 
DMLRuntimeException {
+       private static void cudnnReLU(GPUContext gCtx, String instName, 
MatrixObject in, Pointer dstData, cudnnTensorDescriptor srcTensorDesc) {
                long t0=0;
                try {
                        if(LOG.isTraceEnabled()) {
@@ -828,9 +812,8 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param instName the invoking instruction's name for record {@link 
Statistics}.
         * @param in input matrix
         * @param outputName    name of the output matrix
-        * @throws DMLRuntimeException  if an error occurs
         */
-       public static void relu(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in, String outputName) throws DMLRuntimeException {
+       public static void relu(ExecutionContext ec, GPUContext gCtx, String 
instName, MatrixObject in, String outputName) {
                if (ec.getGPUContext(0) != gCtx)
                        throw new DMLRuntimeException("GPU : Invalid internal 
state, the GPUContext set with the ExecutionContext is not the same used to run 
this LibMatrixCUDA function");
                long N = in.getNumRows();
@@ -867,9 +850,8 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * @param image input matrix object
         * @param instName name of the instruction
         * @return jcuda pointer
-        * @throws DMLRuntimeException if error occurs while sparse to dense 
conversion
         */
-       protected static Pointer getDensePointerForCuDNN(GPUContext gCtx, 
MatrixObject image, String instName) throws DMLRuntimeException {
+       protected static Pointer getDensePointerForCuDNN(GPUContext gCtx, 
MatrixObject image, String instName) {
                long numElems = image.getNumRows()*image.getNumColumns();
                if(numElems > maxNumElementsOfCuDNNTensor) {
                        throw new DMLRuntimeException("CuDNN restriction: the 
size of input tensor cannot have greater than 2 giga-elements, but has " + 
numElems + " (i.e. [" + image.getNumRows() + " X " + image.getNumColumns() + 
"]). Hint: try reducing the mini-batch size.");
@@ -881,9 +863,8 @@ public class LibMatrixCuDNN extends LibMatrixCUDA {
         * Convenience method for checking the status of CuDNN kernel.
         *
         * @param status status returned by CuDNN
-        * @throws DMLRuntimeException if status is not CUDNN_STATUS_SUCCESS
         */
-       protected static void checkStatus(int status) throws 
DMLRuntimeException {
+       protected static void checkStatus(int status) {
                if(status != cudnnStatus.CUDNN_STATUS_SUCCESS)
                        throw new DMLRuntimeException("Error status returned by 
CuDNN:" + jcuda.jcudnn.cudnnStatus.stringFor(status));
        }

http://git-wip-us.apache.org/repos/asf/systemml/blob/1f323976/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
index 835cb15..574e332 100644
--- 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
+++ 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNConvolutionAlgorithm.java
@@ -69,7 +69,7 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements 
java.lang.AutoCloseab
        GPUContext gCtx = null; String instName = null;
        
        private LibMatrixCuDNNConvolutionAlgorithm(GPUContext gCtx, String 
instName, int N, int C, int H, int W, int K, int R, int S, 
-                       int pad_h, int pad_w, int stride_h, int stride_w, int 
P, int Q) throws DMLRuntimeException {
+                       int pad_h, int pad_w, int stride_h, int stride_w, int 
P, int Q) {
                int padding[] = {pad_h, pad_w};
                int strides[] = {stride_h, stride_w};
                convDesc = allocateConvolutionDescriptor(padding, strides);
@@ -126,11 +126,10 @@ public class LibMatrixCuDNNConvolutionAlgorithm 
implements java.lang.AutoCloseab
         * @param Q        output width
         * @param workspaceLimit maximum intermediate memory to use
         * @return algorithm wrapper
-        * @throws DMLRuntimeException if error occurs
         */
        public static LibMatrixCuDNNConvolutionAlgorithm 
cudnnGetConvolutionForwardAlgorithm(
                        GPUContext gCtx, String instName, int N, int C, int H, 
int W, int K, int R, int S, 
-                       int pad_h, int pad_w, int stride_h, int stride_w, int 
P, int Q, long workspaceLimit) throws DMLRuntimeException {
+                       int pad_h, int pad_w, int stride_h, int stride_w, int 
P, int Q, long workspaceLimit) {
                long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() 
: 0;
                LibMatrixCuDNNConvolutionAlgorithm ret = new 
LibMatrixCuDNNConvolutionAlgorithm(gCtx, instName, N, C, H, W, K, R, S, 
                                pad_h, pad_w, stride_h, stride_w, P, Q);
@@ -170,11 +169,10 @@ public class LibMatrixCuDNNConvolutionAlgorithm 
implements java.lang.AutoCloseab
         * @param Q        output width
         * @param workspaceLimit maximum intermediate memory to use
         * @return algorithm wrapper
-        * @throws DMLRuntimeException if error occurs
         */
        public static LibMatrixCuDNNConvolutionAlgorithm 
cudnnGetConvolutionBackwardFilterAlgorithm(
                        GPUContext gCtx, String instName, int N, int C, int H, 
int W, int K, int R, int S, 
-                       int pad_h, int pad_w, int stride_h, int stride_w, int 
P, int Q, long workspaceLimit) throws DMLRuntimeException {
+                       int pad_h, int pad_w, int stride_h, int stride_w, int 
P, int Q, long workspaceLimit) {
                long t1 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() 
: 0;
                LibMatrixCuDNNConvolutionAlgorithm ret = new 
LibMatrixCuDNNConvolutionAlgorithm(gCtx, instName, N, C, H, W, K, R, S, 
                                pad_h, pad_w, stride_h, stride_w, P, Q);
@@ -217,11 +215,10 @@ public class LibMatrixCuDNNConvolutionAlgorithm 
implements java.lang.AutoCloseab
         * @param Q        output width
         * @param workspaceLimit maximum intermediate memory to use
         * @return algorithm wrapper
-        * @throws DMLRuntimeException if error occurs
         */
        public static LibMatrixCuDNNConvolutionAlgorithm 
cudnnGetConvolutionBackwardDataAlgorithm(
                        GPUContext gCtx, String instName, int N, int C, int H, 
int W, int K, int R, int S, 
-                       int pad_h, int pad_w, int stride_h, int stride_w, int 
P, int Q, long workspaceLimit) throws DMLRuntimeException {
+                       int pad_h, int pad_w, int stride_h, int stride_w, int 
P, int Q, long workspaceLimit) {
                LibMatrixCuDNNConvolutionAlgorithm ret = new 
LibMatrixCuDNNConvolutionAlgorithm(gCtx, instName, N, C, H, W, K, R, S, 
                                pad_h, pad_w, stride_h, stride_w, P, Q);
                if(H == R || W == S) {
@@ -258,9 +255,8 @@ public class LibMatrixCuDNNConvolutionAlgorithm implements 
java.lang.AutoCloseab
         * @param H height
         * @param W width
         * @return cudnn tensor descriptor
-        * @throws DMLRuntimeException if the input descriptor and matrix 
dimensions don't match
         */
-       private static cudnnTensorDescriptor allocateTensorDescriptor(int N, 
int C, int H, int W) throws DMLRuntimeException {
+       private static cudnnTensorDescriptor allocateTensorDescriptor(int N, 
int C, int H, int W) {
                cudnnTensorDescriptor tensorDescriptor = new 
cudnnTensorDescriptor();
                cudnnCreateTensorDescriptor(tensorDescriptor);
                cudnnSetTensor4dDescriptor(tensorDescriptor, CUDNN_TENSOR_NCHW, 
LibMatrixCUDA.CUDNN_DATA_TYPE, N, C, H, W);

http://git-wip-us.apache.org/repos/asf/systemml/blob/1f323976/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
index 33f2cb5..f52da30 100644
--- 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
+++ 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNInputRowFetcher.java
@@ -43,9 +43,8 @@ public class LibMatrixCuDNNInputRowFetcher extends 
LibMatrixCUDA implements java
         * @param gCtx current gpu context
         * @param instName name of the instruction
         * @param image input matrix object.
-        * @throws DMLRuntimeException if error
         */
-       public LibMatrixCuDNNInputRowFetcher(GPUContext gCtx, String instName, 
MatrixObject image) throws DMLRuntimeException {
+       public LibMatrixCuDNNInputRowFetcher(GPUContext gCtx, String instName, 
MatrixObject image) {
                this.gCtx = gCtx; this.instName = instName;
                numColumns = LibMatrixCUDA.toInt(image.getNumColumns());
                isInputInSparseFormat = LibMatrixCUDA.isInSparseFormat(gCtx, 
image);
@@ -56,9 +55,8 @@ public class LibMatrixCuDNNInputRowFetcher extends 
LibMatrixCUDA implements java
         * Copy the nth row and return the dense pointer
         * @param n zero-based row index
         * @return dense pointer containing the nth row. This row is reused in 
the next iteration
-        * @throws DMLRuntimeException ?
         */
-       public Pointer getNthRow(int n) throws DMLRuntimeException {
+       public Pointer getNthRow(int n) {
                if(isInputInSparseFormat) {
                        jcuda.runtime.JCuda.cudaDeviceSynchronize();
                        long t0 = DMLScript.FINEGRAINED_STATISTICS ? 
System.nanoTime() : 0;

http://git-wip-us.apache.org/repos/asf/systemml/blob/1f323976/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNPoolingDescriptors.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNPoolingDescriptors.java
 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNPoolingDescriptors.java
index 8c9dea4..ffa7c22 100644
--- 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNPoolingDescriptors.java
+++ 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuDNNPoolingDescriptors.java
@@ -29,7 +29,6 @@ import static jcuda.jcudnn.cudnnPoolingMode.CUDNN_POOLING_MAX;
 import static 
jcuda.jcudnn.cudnnPoolingMode.CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING;
 import static jcuda.jcudnn.cudnnTensorFormat.CUDNN_TENSOR_NCHW;
 
-import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.runtime.instructions.gpu.context.GPUContext;
 import org.apache.sysml.runtime.matrix.data.LibMatrixDNN.PoolingType;
 
@@ -84,12 +83,11 @@ public class LibMatrixCuDNNPoolingDescriptors implements 
java.lang.AutoCloseable
         * @param Q                             (W - S + 1 + 2*pad_w)/stride_w
         * @param poolingType   type of pooling
         * @return decriptor wrapper
-        * @throws DMLRuntimeException if error occurs
         */
        public static LibMatrixCuDNNPoolingDescriptors 
cudnnPoolingBackwardDescriptors(GPUContext gCtx, 
                        String instName, int N, int C, int H, int W, int K, int 
R,
                        int S, int pad_h, int pad_w, int stride_h, int 
stride_w, int P,
-                       int Q, PoolingType poolingType) throws 
DMLRuntimeException {
+                       int Q, PoolingType poolingType) {
                LibMatrixCuDNNPoolingDescriptors ret = new 
LibMatrixCuDNNPoolingDescriptors();
                ret.xDesc = allocateTensorDescriptor(N, C, H, W);
                ret.yDesc = allocateTensorDescriptor(N, C, P, Q);
@@ -119,12 +117,11 @@ public class LibMatrixCuDNNPoolingDescriptors implements 
java.lang.AutoCloseable
         * @param Q                             (W - S + 1 + 2*pad_w)/stride_w
         * @param poolingType   type of pooling
         * @return decriptor wrapper
-        * @throws DMLRuntimeException if error occurs
         */
        public static LibMatrixCuDNNPoolingDescriptors 
cudnnPoolingDescriptors(GPUContext gCtx, 
                        String instName, int N, int C, int H, int W, int K, int 
R,
                        int S, int pad_h, int pad_w, int stride_h, int 
stride_w, int P,
-                       int Q, PoolingType poolingType) throws 
DMLRuntimeException {
+                       int Q, PoolingType poolingType) {
                LibMatrixCuDNNPoolingDescriptors ret = new 
LibMatrixCuDNNPoolingDescriptors();
                ret.xDesc = allocateTensorDescriptor(N, C, H, W);
                ret.yDesc = allocateTensorDescriptor(N, C, P, Q);
@@ -139,9 +136,8 @@ public class LibMatrixCuDNNPoolingDescriptors implements 
java.lang.AutoCloseable
         * @param H height
         * @param W width
         * @return cudnn tensor descriptor
-        * @throws DMLRuntimeException if the input descriptor and matrix 
dimensions don't match
         */
-       private static cudnnTensorDescriptor allocateTensorDescriptor(int N, 
int C, int H, int W) throws DMLRuntimeException {
+       private static cudnnTensorDescriptor allocateTensorDescriptor(int N, 
int C, int H, int W) {
                cudnnTensorDescriptor tensorDescriptor = new 
cudnnTensorDescriptor();
                cudnnCreateTensorDescriptor(tensorDescriptor);
                cudnnSetTensor4dDescriptor(tensorDescriptor, CUDNN_TENSOR_NCHW, 
LibMatrixCUDA.CUDNN_DATA_TYPE, N, C, H, W);

http://git-wip-us.apache.org/repos/asf/systemml/blob/1f323976/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
index 21d9fd1..f476dfe 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixCuMatMult.java
@@ -70,7 +70,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
                private boolean isRightTransposed; // is op(B) = t(B)
 
                public CuMatMultParameters(long leftNumRows1, long 
leftNumCols1, long rightNumRows1, long rightNumCols1,
-                               boolean isLeftTransposed1, boolean 
isRightTransposed1) throws DMLRuntimeException {
+                               boolean isLeftTransposed1, boolean 
isRightTransposed1) {
                        leftNumRows = leftNumRows1;
                        leftNumCols = leftNumCols1;
                        rightNumRows = rightNumRows1;
@@ -80,7 +80,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
                        setDimensions();
                }
 
-               public void rowToColumnMajor() throws DMLRuntimeException {
+               public void rowToColumnMajor() {
                        // To compensate for the input matrices being in 
row-major format
                        // instead of column-major (the way cublas expects)
                        isRightTransposed = swap(isLeftTransposed, 
isLeftTransposed = isRightTransposed);
@@ -89,7 +89,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
                        setDimensions();
                }
 
-               private void validate() throws DMLRuntimeException {
+               private void validate() {
                        int k1 = toInt(isRightTransposed ? rightNumCols : 
rightNumRows);
                        if (k != k1)
                                throw new DMLRuntimeException("Dimension 
mismatch: " + k + " != " + k1 + " [" + leftNumRows + ","
@@ -97,7 +97,7 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
                                                + isRightTransposed);
                }
 
-               private void setDimensions() throws DMLRuntimeException {
+               private void setDimensions() {
                        // Validate the dimensions
                        m = toInt(isLeftTransposed ? leftNumCols : leftNumRows);
                        n = toInt(isRightTransposed ? rightNumRows : 
rightNumCols);
@@ -134,13 +134,10 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
         *            op for A, transposed or not
         * @param isRightTransposed
         *            op for B, tranposed or not
-        * @throws DMLRuntimeException
-        *             if DMLRuntimeException occurs
         * @return output of matrix multiply
         */
        public static MatrixObject matmult(ExecutionContext ec, GPUContext 
gCtx, String instName, MatrixObject left,
-                       MatrixObject right, String outputName, boolean 
isLeftTransposed, boolean isRightTransposed)
-                       throws DMLRuntimeException {
+                       MatrixObject right, String outputName, boolean 
isLeftTransposed, boolean isRightTransposed) {
                boolean isM1Sparse = isInSparseFormat(gCtx, left);
                boolean isM2Sparse = isInSparseFormat(gCtx, right);
                MatrixObject output = ec.getMatrixObject(outputName);
@@ -265,12 +262,10 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
         *            is op(A) = t(A)
         * @param isRightTransposed
         *            is op(B) = t(B)
-        * @throws DMLRuntimeException
-        *             if error
         */
        static void sparseDenseMatMult(GPUContext gCtx, String instName, 
Pointer C, CSRPointer A, Pointer B,
                        long leftNumRows, long leftNumColumns, long 
rightNumRows, long rightNumColumns, long outRLen, long outCLen,
-                       boolean isLeftTransposed, boolean isRightTransposed) 
throws DMLRuntimeException {
+                       boolean isLeftTransposed, boolean isRightTransposed) {
                // t(C) = t(B) %*% t(A)
                Pointer output = null;
                if (outRLen != 1 && outCLen != 1) {
@@ -314,11 +309,9 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
         *            right matrix pointer
         * @param param
         *            BLAS parameters
-        * @throws DMLRuntimeException
-        *             if error
         */
        private static void denseSparseMatMult(cusparseHandle handle, String 
instName, Pointer C, Pointer A, CSRPointer B,
-                       CuMatMultParameters param) throws DMLRuntimeException {
+                       CuMatMultParameters param) {
                long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() 
: 0;
                String kernel = 
GPUInstruction.MISC_TIMER_SPARSE_MATRIX_DENSE_MATRIX_LIB;
                // Ignoring sparse vector dense matrix multiplication and dot 
product
@@ -365,11 +358,9 @@ public class LibMatrixCuMatMult extends LibMatrixCUDA {
         *            right matrix pointer
         * @param param
         *            BLAS parameters
-        * @throws DMLRuntimeException
-        *             if error
         */
        private static void denseDenseMatMult(cublasHandle handle, String 
instName, Pointer C, Pointer A, Pointer B,
-                       CuMatMultParameters param) throws DMLRuntimeException {
+                       CuMatMultParameters param) {
                long t0 = DMLScript.FINEGRAINED_STATISTICS ? System.nanoTime() 
: 0;
                String kernel = null;
                param.rowToColumnMajor();

http://git-wip-us.apache.org/repos/asf/systemml/blob/1f323976/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
index 4cb154b..17dbdc0 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNN.java
@@ -140,9 +140,8 @@ public class LibMatrixDNN {
         * @param filter filter
         * @param outputBlock output of convolution
         * @param params convolution parameters
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       public static void conv2d(MatrixBlock input, MatrixBlock filter, 
MatrixBlock outputBlock, ConvolutionParameters params) throws 
DMLRuntimeException {
+       public static void conv2d(MatrixBlock input, MatrixBlock filter, 
MatrixBlock outputBlock, ConvolutionParameters params) {
                LibMatrixDNN.checkInputsConv2d(input, filter, outputBlock, 
params);
                if(params.bias != null && params.bias.isInSparseFormat())
                        params.bias.sparseToDense(); // Since bias is extremely 
small array
@@ -161,9 +160,9 @@ public class LibMatrixDNN {
         * @param dout errors from next layer
         * @param outputBlock  output errors
         * @param params convolution parameters
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
-        */
-       public static void conv2dBackwardData(MatrixBlock filter, MatrixBlock 
dout, MatrixBlock outputBlock, ConvolutionParameters params) throws 
DMLRuntimeException {
+        
+*/
+       public static void conv2dBackwardData(MatrixBlock filter, MatrixBlock 
dout, MatrixBlock outputBlock, ConvolutionParameters params) {
                checkInputsConv2dBackwardData(filter, dout, outputBlock, 
params);
                
                long nnz = 
execute(LibMatrixDNNConv2d.getConv2dBackwardDataWorkers(params), params);
@@ -180,9 +179,8 @@ public class LibMatrixDNN {
         * @param dout errors from next layer
         * @param outputBlock  output errors
         * @param params convolution parameters
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       public static void conv2dBackwardFilter(MatrixBlock input, MatrixBlock 
dout, MatrixBlock outputBlock, ConvolutionParameters params) throws 
DMLRuntimeException {
+       public static void conv2dBackwardFilter(MatrixBlock input, MatrixBlock 
dout, MatrixBlock outputBlock, ConvolutionParameters params) {
                checkInputsConv2dBackwardFilter(input, dout, outputBlock, 
params);
                
                
execute(LibMatrixDNNConv2d.getConv2dBackwardFilterWorkers(params), params);
@@ -192,7 +190,7 @@ public class LibMatrixDNN {
                outputBlock.examSparsity();
        }
        
-       public static void pooling(MatrixBlock input, MatrixBlock output, 
ConvolutionParameters params, PoolingType poolType) throws DMLRuntimeException {
+       public static void pooling(MatrixBlock input, MatrixBlock output, 
ConvolutionParameters params, PoolingType poolType) {
                params.input1 = input;
                params.output = output;
                
@@ -222,10 +220,9 @@ public class LibMatrixDNN {
         * @param params convolution parameters
         * @param performReluBackward perform ReLU backward
         * @param poolType type of pooling
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
        public static void poolingBackward(MatrixBlock input, MatrixBlock dout, 
MatrixBlock outputBlock, 
-                       ConvolutionParameters params, boolean 
performReluBackward, PoolingType poolType) throws DMLRuntimeException {
+                       ConvolutionParameters params, boolean 
performReluBackward, PoolingType poolType) {
                params.input1 = input;
                params.input2 = dout;
                params.output = outputBlock;
@@ -269,9 +266,8 @@ public class LibMatrixDNN {
         * @param dout errors from next layer
         * @param outputBlock output matrix
         * @param numThreads number of threads
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       public static void reluBackward(MatrixBlock input, MatrixBlock dout, 
MatrixBlock outputBlock, int numThreads) throws DMLRuntimeException {
+       public static void reluBackward(MatrixBlock input, MatrixBlock dout, 
MatrixBlock outputBlock, int numThreads) {
                int N = input.getNumRows();
                ConvolutionParameters params = new ConvolutionParameters(N, -1, 
-1, -1, -1, -1, -1, -1, -1, -1, -1, numThreads);
                params.input1 = input;
@@ -299,9 +295,8 @@ public class LibMatrixDNN {
         * @param bias bias matrix
         * @param outputBlock output matrix
         * @param numThreads number of threads
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       public static void biasAdd(MatrixBlock input, MatrixBlock bias, 
MatrixBlock outputBlock, int numThreads) throws DMLRuntimeException {
+       public static void biasAdd(MatrixBlock input, MatrixBlock bias, 
MatrixBlock outputBlock, int numThreads) {
                int N = input.getNumRows();
                int K = bias.getNumRows();
                int PQ = input.getNumColumns() / K;
@@ -348,9 +343,8 @@ public class LibMatrixDNN {
         * @param bias bias matrix
         * @param outputBlock output matrix
         * @param numThreads number of threads
-        * @throws DMLRuntimeException if DMLRuntimeException occurs
         */
-       public static void biasMultiply(MatrixBlock input, MatrixBlock bias, 
MatrixBlock outputBlock, int numThreads) throws DMLRuntimeException {
+       public static void biasMultiply(MatrixBlock input, MatrixBlock bias, 
MatrixBlock outputBlock, int numThreads) {
                int N = input.getNumRows();
                int K = bias.getNumRows();
                int PQ = input.getNumColumns() / K;
@@ -423,9 +417,8 @@ public class LibMatrixDNN {
         *  
         * @param tasks deep learning related tasks
         * @param params convolution parameters
-        * @throws DMLRuntimeException if the error occurs
         */
-       private static long execute(ArrayList<Callable<Long>> tasks, 
ConvolutionParameters params) throws DMLRuntimeException {
+       private static long execute(ArrayList<Callable<Long>> tasks, 
ConvolutionParameters params) {
                int k = 
OptimizerUtils.getConstrainedNumThreads(params.numThreads);
                long lnnz = 0;
                try {
@@ -451,16 +444,16 @@ public class LibMatrixDNN {
                return lnnz;
        }
        
-       private static void checkOrThrowException(String msg, long lhs, long 
rhs) throws DMLRuntimeException {
+       private static void checkOrThrowException(String msg, long lhs, long 
rhs) {
                if(lhs != rhs)
                        throw new DMLRuntimeException(msg + ":" + lhs + " != " 
+ rhs);
        }
-       private static void checkOrThrowException(String msg, long lhs, long 
rhs1, long rhs2, long rhs3) throws DMLRuntimeException {
+       private static void checkOrThrowException(String msg, long lhs, long 
rhs1, long rhs2, long rhs3) {
                if(lhs != (rhs1*rhs2*rhs3))
                        throw new DMLRuntimeException(msg + ":" + lhs + " != (" 
+ rhs1 + " * " + rhs2 + " * " + rhs3);
        }
        
-       static void checkInputsConv2dBackwardData(MatrixBlock filter, 
MatrixBlock dout, MatrixBlock outputBlock, ConvolutionParameters params)  
throws DMLRuntimeException {
+       static void checkInputsConv2dBackwardData(MatrixBlock filter, 
MatrixBlock dout, MatrixBlock outputBlock, ConvolutionParameters params) {
                params.input1 = filter;
                params.input2 = dout;
                params.output = outputBlock;
@@ -485,7 +478,7 @@ public class LibMatrixDNN {
                }
        }
        
-       static void checkInputsConv2dBackwardFilter(MatrixBlock input, 
MatrixBlock dout, MatrixBlock outputBlock, ConvolutionParameters params)  
throws DMLRuntimeException {
+       static void checkInputsConv2dBackwardFilter(MatrixBlock input, 
MatrixBlock dout, MatrixBlock outputBlock, ConvolutionParameters params) {
                params.input1 = input;
                params.input2 = dout;
                params.output = outputBlock;
@@ -510,7 +503,7 @@ public class LibMatrixDNN {
                }
        }
        
-       static void checkInputsConv2d(MatrixBlock input, MatrixBlock filter, 
MatrixBlock outputBlock, ConvolutionParameters params) throws 
DMLRuntimeException {
+       static void checkInputsConv2d(MatrixBlock input, MatrixBlock filter, 
MatrixBlock outputBlock, ConvolutionParameters params) {
                params.input1 = input;
                params.input2 = filter;
                params.output = outputBlock;

http://git-wip-us.apache.org/repos/asf/systemml/blob/1f323976/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
index 436735e..4221ace 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNConv2d.java
@@ -39,9 +39,8 @@ public class LibMatrixDNNConv2d
         * 
         * @param params convolution parameters
         * @return list of callable tasks for performing conv2d
-        * @throws DMLRuntimeException if error occurs
         */
-       public static ArrayList<Callable<Long>> 
getConv2dWorkers(ConvolutionParameters params) throws DMLRuntimeException {
+       public static ArrayList<Callable<Long>> 
getConv2dWorkers(ConvolutionParameters params) {
                ArrayList<Callable<Long>> ret = new ArrayList<>();
                
                // Try to create twice as many tasks as threads for improved 
load balance
@@ -85,9 +84,8 @@ public class LibMatrixDNNConv2d
         * 
         * @param params convolution parameters
         * @return list of callable tasks for performing conv2d backward filter
-        * @throws DMLRuntimeException if error occurs
         */
-       public static ArrayList<Callable<Long>> 
getConv2dBackwardFilterWorkers(ConvolutionParameters params) throws 
DMLRuntimeException {
+       public static ArrayList<Callable<Long>> 
getConv2dBackwardFilterWorkers(ConvolutionParameters params) {
                ArrayList<Callable<Long>> ret = new ArrayList<>();
                // Try to create as many tasks as threads. 
                // Creating more tasks will help in tail, but would have 
additional overhead of maintaining the intermediate
@@ -122,9 +120,8 @@ public class LibMatrixDNNConv2d
         * 
         * @param params convolution parameters
         * @return list of callable tasks for performing conv2d backward data
-        * @throws DMLRuntimeException if error occurs
         */
-       public static ArrayList<Callable<Long>> 
getConv2dBackwardDataWorkers(ConvolutionParameters params) throws 
DMLRuntimeException {
+       public static ArrayList<Callable<Long>> 
getConv2dBackwardDataWorkers(ConvolutionParameters params) {
                ArrayList<Callable<Long>> ret = new ArrayList<>();
                
                // Try to create as many tasks as threads. 
@@ -619,7 +616,7 @@ public class LibMatrixDNNConv2d
                }
        }
        
-       private static void getRowInDenseFormat(MatrixBlock input, int n, 
double []  ret) throws DMLRuntimeException {
+       private static void getRowInDenseFormat(MatrixBlock input, int n, 
double []  ret) {
                if(input.getNumColumns() != ret.length) {
                        throw new DMLRuntimeException("Invalid parameters");
                }

http://git-wip-us.apache.org/repos/asf/systemml/blob/1f323976/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNHelper.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNHelper.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNHelper.java
index b985c42..6d92313 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNHelper.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNHelper.java
@@ -19,7 +19,6 @@
 package org.apache.sysml.runtime.matrix.data;
 
 
-import org.apache.sysml.runtime.DMLRuntimeException;
 import org.apache.sysml.utils.NativeHelper;
 
 
@@ -58,7 +57,7 @@ public class LibMatrixDNNHelper
        }
        
        protected static void singleThreadedMatMult(MatrixBlock m1, MatrixBlock 
m2, MatrixBlock ret, 
-               boolean recomputeNNZM1, boolean recomputeNNZM2, 
ConvolutionParameters params) throws DMLRuntimeException {
+               boolean recomputeNNZM1, boolean recomputeNNZM2, 
ConvolutionParameters params) {
                if( !params.enableNative || m1.sparse || m2.sparse ) {
                        prepNonZerosForMatrixMult(m1, recomputeNNZM1);
                        prepNonZerosForMatrixMult(m2, recomputeNNZM2);

http://git-wip-us.apache.org/repos/asf/systemml/blob/1f323976/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNIm2Col.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNIm2Col.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNIm2Col.java
index 5c72c4e..65f53d9 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNIm2Col.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixDNNIm2Col.java
@@ -193,7 +193,7 @@ public class LibMatrixDNNIm2Col
        // Therefore, it is provided as utility function rather than an 
operator (like im2col or rotate180)
        
        //Converts input: PQ X CRS matrix and writes to 1 X CHW
-       public static void col2imOverSingleImage(int outputN, MatrixBlock 
input, ConvolutionParameters params) throws DMLRuntimeException {
+       public static void col2imOverSingleImage(int outputN, MatrixBlock 
input, ConvolutionParameters params) {
                if(input.rlen != params.P*params.Q || input.clen != 
params.C*params.R*params.S) {
                        throw new DMLRuntimeException("Incorrect input 
dimensions");
                }
@@ -242,7 +242,7 @@ public class LibMatrixDNNIm2Col
        
        // Converts input: PQ X CRS matrix and writes to 1 X CHW if inputN == 0
        // Or converts input: NPQ X CRS matrix and writes to N X CHW 
-       private static void col2IMDenseInput(int inputN, int outputN, double [] 
inputArray, double [] outputArray, ConvolutionParameters params) throws 
DMLRuntimeException {
+       private static void col2IMDenseInput(int inputN, int outputN, double [] 
inputArray, double [] outputArray, ConvolutionParameters params) {
                final int outputNOffset = outputN*params.C*params.H*params.W;
                final int HW = params.H*params.W;
                final int inputNPQ = inputN*params.P*params.Q;

Reply via email to