Repository: systemml Updated Branches: refs/heads/master a2ec38da6 -> 9080dec7d
[SYSTEMML-2187] Disable single-precision MKL conv2d_bias_add This patch temporarily disables the single-precision MKL conv2d_bias_add operations (which call dnnExecute_F32) due to incorrect results on newer processors with AVX2 or AVX-512 instruction set (tested on Xeon E5-2620 v3 and Xeon Gold 6138). Interestingly, this issue does not show up on older processors (tested on Xeon E5-2440) or double-precision in general, so we fall back to the default case of double precision in these cases because they are generally faster than the single-precision openmp based conv2d kernels. For example, on a Xeon Gold 6138, lenet with 2000 iterations executed 2243 conv2d_bias_add operations, which took 49s with dnnExecute_F64, while they took 77s with the single-precision openmp kernel. Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/9080dec7 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/9080dec7 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/9080dec7 Branch: refs/heads/master Commit: 9080dec7d2ca406cda96bee9df15fcb176487867 Parents: a2ec38d Author: Matthias Boehm <[email protected]> Authored: Sun Mar 18 15:04:39 2018 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sun Mar 18 15:04:39 2018 -0700 ---------------------------------------------------------------------- .../org/apache/sysml/runtime/matrix/data/LibMatrixNative.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/9080dec7/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java index 15007b3..4eb284e 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixNative.java @@ -143,7 +143,11 @@ public class LibMatrixNative else { if(params.bias.isInSparseFormat()) params.bias.sparseToDense(); // Bias matrix is usually extremely small - if( isSinglePrecision() ) { + //NOTE: We temporarily disable MKL FP32 conv2d_bias_add due to incorrect results on + //newer processors with AVX2 and AVX-512 instruction set (library bug or alignment issue) + //Experiments have shown that falling back to the MKL FP64 primitives is generally faster + //than falling back to the custom openmp FP32 implementation. + if( isSinglePrecision() && !NativeHelper.getCurrentBLAS().equalsIgnoreCase("mkl") ) { FloatBuffer finput = toFloatBuffer(input.getDenseBlockValues(), inBuff, true); FloatBuffer fbias = toFloatBuffer(params.bias.getDenseBlockValues(), biasBuff, true); FloatBuffer ffilter = toFloatBuffer(filter.getDenseBlockValues(), filterBuff, true);
