Repository: systemml Updated Branches: refs/heads/master 59e9e4d8b -> a2ec38da6
[SYSTEMML-2187] Fix robustness native mkl single/double conv2d ops This patch makes two fixes to increase the robustness of native mkl single-/double-precision conv2d bias add operations: 1) Report all caught errors, including the error code for unhandled codes. 2) Reset the output block on fallback to java conv2d after failures to ensure correctness because these operations might not overwrite all values for sparse operations, including the avoidance of unnecessary output buffer copies. Finally, this patch also includes a consolidated build script to simplify the build of mkl and openblas shared libraries. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/a2ec38da Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/a2ec38da Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/a2ec38da Branch: refs/heads/master Commit: a2ec38da6c320f1e90057f2596afe12e698294d6 Parents: 59e9e4d Author: Matthias Boehm <[email protected]> Authored: Fri Mar 16 21:10:25 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Fri Mar 16 21:10:54 2018 -0700 ---------------------------------------------------------------------- src/main/cpp/build.sh | 55 +++++++++++++++++++ src/main/cpp/check-dependency-linux-x86_64.sh | 45 --------------- .../cpp/lib/libsystemml_mkl-Linux-x86_64.so | Bin 32104 -> 32168 bytes .../lib/libsystemml_openblas-Linux-x86_64.so | Bin 36192 -> 31240 bytes src/main/cpp/libmatrixdnn.cpp | 9 +-- .../runtime/matrix/data/LibMatrixNative.java | 54 ++++++++---------- 6 files changed, 84 insertions(+), 79 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/a2ec38da/src/main/cpp/build.sh ---------------------------------------------------------------------- diff --git a/src/main/cpp/build.sh b/src/main/cpp/build.sh new file mode 100644 index 0000000..fc8e25c --- /dev/null +++ b/src/main/cpp/build.sh @@ -0,0 +1,55 @@ +#!/bin/bash +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# This shell script compiles the required shared libraries for 64-bit Linux on x86 machine + +# yum whatprovides libgcc_s.so.1 +# GNU Standard C++ Library: libstdc++.so.6 +# GCC version 4.8 shared support library: libgcc_s.so.1 +# The GNU libc libraries: libm.so.6, libdl.so.2, libc.so.6, libpthread.so.0 +# GCC OpenMP v3.0 shared support library: libgomp.so.1 +gcc_toolkit="libgcc_s.so\|libm.so\|libstdc++\|libc.so\|libdl.so\|libgomp.so\|libpthread.so" +linux_loader="linux-vdso.so\|ld-linux-x86-64.so" +intel_mkl="libmkl_rt.so" + +# Fortran runtime: libgfortran.so.3 +# GCC __float128 shared support library: libquadmath.so.0 +openblas="libopenblas.so\|libgfortran.so\|libquadmath.so" + +# configure and compile INTEL MKL +mkdir INTEL && cd INTEL +cmake -DUSE_INTEL_MKL=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_CXX_FLAGS="-DUSE_GNU_THREADING -m64" .. +make install && cd .. && rm -R INTEL + +# configure and compile OPENBLAS +mkdir OPENBLAS && cd OPENBLAS +cmake -DUSE_OPEN_BLAS=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_C_COMPILER=gcc -DCMAKE_CXX_COMPILER=g++ -DCMAKE_CXX_FLAGS="-m64" .. +make install && cd .. && rm -R OPENBLAS + +# check dependencies linux x86_64 +echo "-----------------------------------------------------------------------" +echo "Check for unexpected dependencies added after code change or new setup:" +echo "Non-standard dependencies for libsystemml_mkl-linux-x86_64.so" +ldd lib/libsystemml_mkl-Linux-x86_64.so | grep -v $gcc_toolkit"\|"$linux_loader"\|"$intel_mkl +echo "Non-standard dependencies for libsystemml_openblas-linux-x86_64.so" +ldd lib/libsystemml_openblas-Linux-x86_64.so | grep -v $gcc_toolkit"\|"$linux_loader"\|"$openblas +echo "-----------------------------------------------------------------------" http://git-wip-us.apache.org/repos/asf/systemml/blob/a2ec38da/src/main/cpp/check-dependency-linux-x86_64.sh ---------------------------------------------------------------------- diff --git a/src/main/cpp/check-dependency-linux-x86_64.sh b/src/main/cpp/check-dependency-linux-x86_64.sh deleted file mode 100755 index 40f0bb0..0000000 --- a/src/main/cpp/check-dependency-linux-x86_64.sh +++ /dev/null @@ -1,45 +0,0 @@ -#!/bin/bash -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- -# This shell script compiles the required shared libraries for 64-bit Linux on x86 machine - -# yum whatprovides libgcc_s.so.1 -# GNU Standard C++ Library: libstdc++.so.6 -# GCC version 4.8 shared support library: libgcc_s.so.1 -# The GNU libc libraries: libm.so.6, libdl.so.2, libc.so.6, libpthread.so.0 -# GCC OpenMP v3.0 shared support library: libgomp.so.1 -gcc_toolkit="libgcc_s.so\|libm.so\|libstdc++\|libc.so\|libdl.so\|libgomp.so\|libpthread.so" -linux_loader="linux-vdso.so\|ld-linux-x86-64.so" -intel_mkl="libmkl_rt.so" - -# Fortran runtime: libgfortran.so.3 -# GCC __float128 shared support library: libquadmath.so.0 -openblas="libopenblas.so\|libgfortran.so\|libquadmath.so" - -echo "-----------------------------------------------------------------------" -echo "Check for unexpected dependencies added after code change or new setup:" -echo "Non-standard dependencies for libpreload_systemml-linux-x86_64.so" -ldd lib/libpreload_systemml-Linux-x86_64.so | grep -v $gcc_toolkit"\|"$linux_loader -echo "Non-standard dependencies for libsystemml_mkl-linux-x86_64.so" -ldd lib/libsystemml_mkl-Linux-x86_64.so | grep -v $gcc_toolkit"\|"$linux_loader"\|"$intel_mkl -echo "Non-standard dependencies for libsystemml_openblas-linux-x86_64.so" -ldd lib/libsystemml_openblas-Linux-x86_64.so | grep -v $gcc_toolkit"\|"$linux_loader"\|"$openblas -echo "-----------------------------------------------------------------------" http://git-wip-us.apache.org/repos/asf/systemml/blob/a2ec38da/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so ---------------------------------------------------------------------- diff --git a/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so b/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so index db96497..faaf5f4 100755 Binary files a/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so and b/src/main/cpp/lib/libsystemml_mkl-Linux-x86_64.so differ http://git-wip-us.apache.org/repos/asf/systemml/blob/a2ec38da/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so ---------------------------------------------------------------------- diff --git a/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so b/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so index 2fdcddf..16b0b5d 100755 Binary files a/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so and b/src/main/cpp/lib/libsystemml_openblas-Linux-x86_64.so differ http://git-wip-us.apache.org/repos/asf/systemml/blob/a2ec38da/src/main/cpp/libmatrixdnn.cpp ---------------------------------------------------------------------- diff --git a/src/main/cpp/libmatrixdnn.cpp b/src/main/cpp/libmatrixdnn.cpp index 717cb26..b00c015 100644 --- a/src/main/cpp/libmatrixdnn.cpp +++ b/src/main/cpp/libmatrixdnn.cpp @@ -143,10 +143,11 @@ template<class FP> void im2col(FP* inputArray, FP* outputArray, int N, int C, in // Returns true if error bool MKL_DNN_ERROR(dnnError_t code) { if(code == E_SUCCESS) return false; - else if(code == E_INCORRECT_INPUT_PARAMETER) std::cerr << "ERROR: Incorrect input parameter\n"; - else if(code == E_MEMORY_ERROR) std::cerr << "ERROR: Memory error\n"; - else if(code == E_UNSUPPORTED_DIMENSION) std::cerr << "ERROR: Unsupported dimensions\n"; - else if(code == E_UNIMPLEMENTED) std::cerr << "ERROR: Unimplemented operation\n"; + else if(code == E_INCORRECT_INPUT_PARAMETER) std::cerr << "MKL ERROR: Incorrect input parameter.\n"; + else if(code == E_MEMORY_ERROR) std::cerr << "MKL ERROR: Memory error.\n"; + else if(code == E_UNSUPPORTED_DIMENSION) std::cerr << "MKL ERROR: Unsupported dimensions.\n"; + else if(code == E_UNIMPLEMENTED) std::cerr << "MKL ERROR: Unimplemented operation.\n"; + else std::cerr << "MKL ERROR: Unhandled error code = " << code << ".\n"; return true; } #endif http://git-wip-us.apache.org/repos/asf/systemml/blob/a2ec38da/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java index 7e0a6d7..15007b3 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java @@ -24,6 +24,8 @@ import java.nio.FloatBuffer; import java.util.Arrays; import java.util.stream.IntStream; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.sysml.api.DMLScript; import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.conf.DMLConfig; @@ -34,6 +36,8 @@ import org.apache.sysml.utils.Statistics; public class LibMatrixNative { + private static final Log LOG = LogFactory.getLog(LibMatrixNative.class.getName()); + // ThreadLocal reuse of direct buffers for inputs/outputs (extended on demand). // note: since we anyway have to convert from double to float, we use // preallocated direct buffers (with thread-local reuse and resizing on demand) @@ -128,31 +132,17 @@ public class LibMatrixNative params.numThreads = params.numThreads <= 0 ? NativeHelper.getMaxNumThreads() : params.numThreads; if(NativeHelper.isNativeLibraryLoaded() && !input.isInSparseFormat() && !filter.isInSparseFormat()) { setNumThreads(params); + long start = DMLScript.STATISTICS ? System.nanoTime() : 0; + int nnz = 0; if(params.bias == null) { - long start = DMLScript.STATISTICS ? System.nanoTime() : 0; - int nnz = NativeHelper.conv2dDense(input.getDenseBlockValues(), filter.getDenseBlockValues(), + nnz = NativeHelper.conv2dDense(input.getDenseBlockValues(), filter.getDenseBlockValues(), outputBlock.getDenseBlockValues(), params.N, params.C, params.H, params.W, params.K, params.R, params.S, params.stride_h, params.stride_w, params.pad_h, params.pad_w, params.P, params.Q, params.numThreads); - if(nnz != -1) { - if(DMLScript.STATISTICS) { - Statistics.nativeConv2dTime += System.nanoTime() - start; - Statistics.numNativeConv2dCalls.increment(); - } - // post-processing: maintain nnz - outputBlock.setNonZeros(nnz); - return; - } - else { - // Fall back to Java when failures - Statistics.incrementNativeFailuresCounter(); - } } else { if(params.bias.isInSparseFormat()) params.bias.sparseToDense(); // Bias matrix is usually extremely small - long start = DMLScript.STATISTICS ? System.nanoTime() : 0; - int nnz = -1; if( isSinglePrecision() ) { FloatBuffer finput = toFloatBuffer(input.getDenseBlockValues(), inBuff, true); FloatBuffer fbias = toFloatBuffer(params.bias.getDenseBlockValues(), biasBuff, true); @@ -162,7 +152,8 @@ public class LibMatrixNative params.N, params.C, params.H, params.W, params.K, params.R, params.S, params.stride_h, params.stride_w, params.pad_h, params.pad_w, params.P, params.Q, params.numThreads); - fromFloatBuffer(outBuff.get(), outputBlock.getDenseBlockValues()); + if( nnz != -1 ) + fromFloatBuffer(outBuff.get(), outputBlock.getDenseBlockValues()); } else { //Double nnz = NativeHelper.dconv2dBiasAddDense(input.getDenseBlockValues(), params.bias.getDenseBlockValues(), @@ -171,19 +162,22 @@ public class LibMatrixNative params.stride_h, params.stride_w, params.pad_h, params.pad_w, params.P, params.Q, params.numThreads); } - if(nnz != -1) { - if(DMLScript.STATISTICS) { - Statistics.nativeConv2dTime += System.nanoTime() - start; - Statistics.numNativeConv2dCalls.increment(); - } - // post-processing: maintain nnz - outputBlock.setNonZeros(nnz); - return; - } - else { - // Fall back to Java when failures - Statistics.incrementNativeFailuresCounter(); + } + //post processing and error handling + if(nnz != -1) { + if(DMLScript.STATISTICS) { + Statistics.nativeConv2dTime += System.nanoTime() - start; + Statistics.numNativeConv2dCalls.increment(); } + outputBlock.setNonZeros(nnz); + return; + } + else { + // Fall back to Java in case of failures, reset output to ensure correctness + LOG.warn("Native conv2d call returned with error - falling back to java operator."); + if( !(isSinglePrecision() && params.bias!=null) ) + outputBlock.reset(); + Statistics.incrementNativeFailuresCounter(); } }
