Hello community, here is the log from the commit of package libclc for openSUSE:Factory checked in at 2017-04-17 10:20:29 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/libclc (Old) and /work/SRC/openSUSE:Factory/.libclc.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "libclc" Mon Apr 17 10:20:29 2017 rev:5 rq:487862 version:0.2.0+git.20170225 Changes: -------- --- /work/SRC/openSUSE:Factory/libclc/libclc.changes 2017-01-11 11:55:20.509641033 +0100 +++ /work/SRC/openSUSE:Factory/.libclc.new/libclc.changes 2017-04-17 10:20:33.683072434 +0200 @@ -1,0 +2,20 @@ +Wed Apr 12 19:37:03 UTC 2017 - [email protected] + +- Update rpmlintrc to include both lib dir .pc files. + +------------------------------------------------------------------- +Mon Apr 10 15:44:21 UTC 2017 - [email protected] + +- Update to version 0.2.0+git.20170225: + * Fix build since llvm r286566 and require at least llvm 4.0 + * Fix build since r286752. + * math: Add expm1 builtin function + * math: Add logb builtin + * math: Add native_rsqrt builtin function + * Add the correct prefixes to the cl_khr_fp64 pragma + * Move BufferPtr into the block where it it being used + * math: Add native_tan as wrapper to tan + * .gitignore: Ignore amdgcn-mesa object directory + * math: Implement sinh function + +------------------------------------------------------------------- Old: ---- libclc-0.2.0+git.20160921.tar.xz New: ---- libclc-0.2.0+git.20170225.tar.xz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ libclc.spec ++++++ --- /var/tmp/diff_new_pack.rP6WPu/_old 2017-04-17 10:20:34.430966514 +0200 +++ /var/tmp/diff_new_pack.rP6WPu/_new 2017-04-17 10:20:34.430966514 +0200 @@ -1,7 +1,7 @@ # # spec file for package libclc # -# Copyright (c) 2016 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,7 +17,7 @@ Name: libclc -Version: 0.2.0+git.20160921 +Version: 0.2.0+git.20170225 Release: 0 Summary: OpenCL C programming language library License: BSD-3-Clause or MIT @@ -27,8 +27,8 @@ Source1: %{name}-rpmlintrc BuildRequires: gcc BuildRequires: libstdc++-devel >= 3.9 -BuildRequires: llvm-clang-devel >= 3.9 -BuildRequires: llvm-devel >= 3.9 +BuildRequires: llvm-clang-devel >= 4.0 +BuildRequires: llvm-devel >= 4.0 BuildRequires: ncurses-devel BuildRequires: pkgconfig BuildRequires: python ++++++ _service ++++++ --- /var/tmp/diff_new_pack.rP6WPu/_old 2017-04-17 10:20:34.462961982 +0200 +++ /var/tmp/diff_new_pack.rP6WPu/_new 2017-04-17 10:20:34.462961982 +0200 @@ -4,7 +4,7 @@ <param name="url">https://github.com/llvm-mirror/libclc.git</param> <param name="submodules">enable</param> <param name="changesgenerate">enable</param> - <param name="revision">520743b0b72862a987ead6213dc1a5321a2010f9</param> + <param name="revision">17648cd846390e294feafef21c32c7106eac1e24</param> <param name="versionformat">0.2.0+git.%cd</param> </service> <service name="recompress" mode="disabled"> ++++++ _servicedata ++++++ --- /var/tmp/diff_new_pack.rP6WPu/_old 2017-04-17 10:20:34.482959150 +0200 +++ /var/tmp/diff_new_pack.rP6WPu/_new 2017-04-17 10:20:34.482959150 +0200 @@ -1,4 +1,6 @@ <servicedata> -<service name="tar_scm"> - <param name="url">https://github.com/llvm-mirror/libclc.git</param> - <param name="changesrevision">520743b0b72862a987ead6213dc1a5321a2010f9</param></service></servicedata> \ No newline at end of file + <service name="tar_scm"> + <param name="url">https://github.com/llvm-mirror/libclc.git</param> + <param name="changesrevision">17648cd846390e294feafef21c32c7106eac1e24</param> + </service> +</servicedata> ++++++ libclc-0.2.0+git.20160921.tar.xz -> libclc-0.2.0+git.20170225.tar.xz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/configure.py new/libclc-0.2.0+git.20170225/configure.py --- old/libclc-0.2.0+git.20160921/configure.py 2016-09-21 22:15:55.000000000 +0200 +++ new/libclc-0.2.0+git.20170225/configure.py 2017-02-25 03:46:53.000000000 +0100 @@ -69,8 +69,8 @@ llvm_int_version = int(llvm_version[0]) * 100 + int(llvm_version[1]) * 10 llvm_string_version = 'LLVM' + llvm_version[0] + '.' + llvm_version[1] -if llvm_int_version < 390: - print "libclc requires LLVM >= 3.9" +if llvm_int_version < 400: + print "libclc requires LLVM >= 4.0" sys.exit(1) llvm_system_libs = llvm_config(['--system-libs']) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/include/clc/clc.h new/libclc-0.2.0+git.20170225/generic/include/clc/clc.h --- old/libclc-0.2.0+git.20160921/generic/include/clc/clc.h 2016-09-21 22:15:55.000000000 +0200 +++ new/libclc-0.2.0+git.20170225/generic/include/clc/clc.h 2017-02-25 03:46:53.000000000 +0100 @@ -53,6 +53,7 @@ #include <clc/math/erf.h> #include <clc/math/erfc.h> #include <clc/math/exp.h> +#include <clc/math/expm1.h> #include <clc/math/exp10.h> #include <clc/math/exp2.h> #include <clc/math/fabs.h> @@ -75,6 +76,7 @@ #include <clc/math/log10.h> #include <clc/math/log1p.h> #include <clc/math/log2.h> +#include <clc/math/logb.h> #include <clc/math/mad.h> #include <clc/math/modf.h> #include <clc/math/nextafter.h> @@ -84,6 +86,7 @@ #include <clc/math/round.h> #include <clc/math/sin.h> #include <clc/math/sincos.h> +#include <clc/math/sinh.h> #include <clc/math/sinpi.h> #include <clc/math/sqrt.h> #include <clc/math/tan.h> @@ -100,6 +103,8 @@ #include <clc/math/native_powr.h> #include <clc/math/native_sin.h> #include <clc/math/native_sqrt.h> +#include <clc/math/native_rsqrt.h> +#include <clc/math/native_tan.h> #include <clc/math/rsqrt.h> /* 6.11.2.1 Floating-point macros */ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/include/clc/math/expm1.h new/libclc-0.2.0+git.20170225/generic/include/clc/math/expm1.h --- old/libclc-0.2.0+git.20160921/generic/include/clc/math/expm1.h 1970-01-01 01:00:00.000000000 +0100 +++ new/libclc-0.2.0+git.20170225/generic/include/clc/math/expm1.h 2017-02-25 03:46:53.000000000 +0100 @@ -0,0 +1,9 @@ +#undef exp + +#define __CLC_BODY <clc/math/unary_decl.inc> +#define __CLC_FUNCTION expm1 + +#include <clc/math/gentype.inc> + +#undef __CLC_BODY +#undef __CLC_FUNCTION diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/include/clc/math/logb.h new/libclc-0.2.0+git.20170225/generic/include/clc/math/logb.h --- old/libclc-0.2.0+git.20160921/generic/include/clc/math/logb.h 1970-01-01 01:00:00.000000000 +0100 +++ new/libclc-0.2.0+git.20170225/generic/include/clc/math/logb.h 2017-02-25 03:46:53.000000000 +0100 @@ -0,0 +1,2 @@ +#define __CLC_BODY <clc/math/logb.inc> +#include <clc/math/gentype.inc> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/include/clc/math/logb.inc new/libclc-0.2.0+git.20170225/generic/include/clc/math/logb.inc --- old/libclc-0.2.0+git.20160921/generic/include/clc/math/logb.inc 1970-01-01 01:00:00.000000000 +0100 +++ new/libclc-0.2.0+git.20170225/generic/include/clc/math/logb.inc 2017-02-25 03:46:53.000000000 +0100 @@ -0,0 +1 @@ +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE logb(__CLC_GENTYPE a); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/include/clc/math/native_rsqrt.h new/libclc-0.2.0+git.20170225/generic/include/clc/math/native_rsqrt.h --- old/libclc-0.2.0+git.20160921/generic/include/clc/math/native_rsqrt.h 1970-01-01 01:00:00.000000000 +0100 +++ new/libclc-0.2.0+git.20170225/generic/include/clc/math/native_rsqrt.h 2017-02-25 03:46:53.000000000 +0100 @@ -0,0 +1 @@ +#define native_rsqrt rsqrt diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/include/clc/math/native_tan.h new/libclc-0.2.0+git.20170225/generic/include/clc/math/native_tan.h --- old/libclc-0.2.0+git.20160921/generic/include/clc/math/native_tan.h 1970-01-01 01:00:00.000000000 +0100 +++ new/libclc-0.2.0+git.20170225/generic/include/clc/math/native_tan.h 2017-02-25 03:46:53.000000000 +0100 @@ -0,0 +1,10 @@ +//===-- generic/include/clc/math/native_tan.h -----------------------------===// + +// +// The LLVM Compiler Infrastructure +// +// This file is dual licensed under both the University of Illinois Open Source +// License and the MIT license. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#define native_tan tan diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/include/clc/math/sinh.h new/libclc-0.2.0+git.20170225/generic/include/clc/math/sinh.h --- old/libclc-0.2.0+git.20160921/generic/include/clc/math/sinh.h 1970-01-01 01:00:00.000000000 +0100 +++ new/libclc-0.2.0+git.20170225/generic/include/clc/math/sinh.h 2017-02-25 03:46:53.000000000 +0100 @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#define __CLC_BODY <clc/math/sinh.inc> +#include <clc/math/gentype.inc> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/include/clc/math/sinh.inc new/libclc-0.2.0+git.20170225/generic/include/clc/math/sinh.inc --- old/libclc-0.2.0+git.20160921/generic/include/clc/math/sinh.inc 1970-01-01 01:00:00.000000000 +0100 +++ new/libclc-0.2.0+git.20170225/generic/include/clc/math/sinh.inc 2017-02-25 03:46:53.000000000 +0100 @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2014, 2015 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE sinh(__CLC_GENTYPE x); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/include/clc/shared/vstore.h new/libclc-0.2.0+git.20170225/generic/include/clc/shared/vstore.h --- old/libclc-0.2.0+git.20160921/generic/include/clc/shared/vstore.h 2016-09-21 22:15:55.000000000 +0200 +++ new/libclc-0.2.0+git.20170225/generic/include/clc/shared/vstore.h 2017-02-25 03:46:53.000000000 +0100 @@ -29,7 +29,7 @@ _CLC_VECTOR_VSTORE_PRIM3(_half, half, float) #ifdef cl_khr_fp64 -#pragma cl_khr_fp64: enable +#pragma OPENCL EXTENSION cl_khr_fp64: enable _CLC_VECTOR_VSTORE_PRIM1(double) _CLC_VECTOR_VSTORE_PRIM3(_half, half, double) _CLC_VSTORE_DECL(_half, half, double, , __private) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/lib/SOURCES new/libclc-0.2.0+git.20170225/generic/lib/SOURCES --- old/libclc-0.2.0+git.20160921/generic/lib/SOURCES 2016-09-21 22:15:55.000000000 +0200 +++ new/libclc-0.2.0+git.20170225/generic/lib/SOURCES 2017-02-25 03:46:53.000000000 +0100 @@ -83,6 +83,7 @@ math/erfc.cl math/exp.cl math/exp_helper.cl +math/expm1.cl math/exp2.cl math/exp10.cl math/fdim.cl @@ -103,6 +104,7 @@ math/log10.cl math/log1p.cl math/log2.cl +math/logb.cl math/mad.cl math/modf.cl math/native_log.cl @@ -114,6 +116,7 @@ math/sin.cl math/sincos.cl math/sincos_helpers.cl +math/sinh.cl math/sinpi.cl math/clc_sqrt.cl math/sqrt.cl diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/lib/math/expm1.cl new/libclc-0.2.0+git.20170225/generic/lib/math/expm1.cl --- old/libclc-0.2.0+git.20160921/generic/lib/math/expm1.cl 1970-01-01 01:00:00.000000000 +0100 +++ new/libclc-0.2.0+git.20170225/generic/lib/math/expm1.cl 2017-02-25 03:46:53.000000000 +0100 @@ -0,0 +1,142 @@ +#include <clc/clc.h> + +#include "math.h" +#include "tables.h" +#include "../clcmacro.h" + +/* Refer to the exp routine for the underlying algorithm */ + +_CLC_OVERLOAD _CLC_DEF float expm1(float x) { + const float X_MAX = 0x1.62e42ep+6f; // 128*log2 : 88.722839111673 + const float X_MIN = -0x1.9d1da0p+6f; // -149*log2 : -103.27892990343184 + + const float R_64_BY_LOG2 = 0x1.715476p+6f; // 64/log2 : 92.332482616893657 + const float R_LOG2_BY_64_LD = 0x1.620000p-7f; // log2/64 lead: 0.0108032227 + const float R_LOG2_BY_64_TL = 0x1.c85fdep-16f; // log2/64 tail: 0.0000272020388 + + uint xi = as_uint(x); + int n = (int)(x * R_64_BY_LOG2); + float fn = (float)n; + + int j = n & 0x3f; + int m = n >> 6; + + float r = mad(fn, -R_LOG2_BY_64_TL, mad(fn, -R_LOG2_BY_64_LD, x)); + + // Truncated Taylor series + float z2 = mad(r*r, mad(r, mad(r, 0x1.555556p-5f, 0x1.555556p-3f), 0.5f), r); + + float m2 = as_float((m + EXPBIAS_SP32) << EXPSHIFTBITS_SP32); + float2 tv = USE_TABLE(exp_tbl_ep, j); + + float two_to_jby64_h = tv.s0 * m2; + float two_to_jby64_t = tv.s1 * m2; + float two_to_jby64 = two_to_jby64_h + two_to_jby64_t; + + z2 = mad(z2, two_to_jby64, two_to_jby64_t) + (two_to_jby64_h - 1.0f); + //Make subnormals work + z2 = x == 0.f ? x : z2; + z2 = x < X_MIN | m < -24 ? -1.0f : z2; + z2 = x > X_MAX ? as_float(PINFBITPATT_SP32) : z2; + z2 = isnan(x) ? x : z2; + + return z2; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, expm1, float) + +#ifdef cl_khr_fp64 + +#include "exp_helper.h" + +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +_CLC_OVERLOAD _CLC_DEF double expm1(double x) { + const double max_expm1_arg = 709.8; + const double min_expm1_arg = -37.42994775023704; + const double log_OnePlus_OneByFour = 0.22314355131420976; //0x3FCC8FF7C79A9A22 = log(1+1/4) + const double log_OneMinus_OneByFour = -0.28768207245178096; //0xBFD269621134DB93 = log(1-1/4) + const double sixtyfour_by_lnof2 = 92.33248261689366; //0x40571547652b82fe + const double lnof2_by_64_head = 0.010830424696223417; //0x3f862e42fefa0000 + const double lnof2_by_64_tail = 2.5728046223276688e-14; //0x3d1cf79abc9e3b39 + + // First, assume log(1-1/4) < x < log(1+1/4) i.e -0.28768 < x < 0.22314 + double u = as_double(as_ulong(x) & 0xffffffffff000000UL); + double v = x - u; + double y = u * u * 0.5; + double z = v * (x + u) * 0.5; + + double q = fma(x, + fma(x, + fma(x, + fma(x, + fma(x, + fma(x, + fma(x, + fma(x,2.4360682937111612e-8, 2.7582184028154370e-7), + 2.7558212415361945e-6), + 2.4801576918453420e-5), + 1.9841269447671544e-4), + 1.3888888890687830e-3), + 8.3333333334012270e-3), + 4.1666666666665560e-2), + 1.6666666666666632e-1); + q *= x * x * x; + + double z1g = (u + y) + (q + (v + z)); + double z1 = x + (y + (q + z)); + z1 = y >= 0x1.0p-7 ? z1g : z1; + + // Now assume outside interval around 0 + int n = (int)(x * sixtyfour_by_lnof2); + int j = n & 0x3f; + int m = n >> 6; + + double2 tv = USE_TABLE(two_to_jby64_ep_tbl, j); + double f1 = tv.s0; + double f2 = tv.s1; + double f = f1 + f2; + + double dn = -n; + double r = fma(dn, lnof2_by_64_tail, fma(dn, lnof2_by_64_head, x)); + + q = fma(r, + fma(r, + fma(r, + fma(r, 1.38889490863777199667e-03, 8.33336798434219616221e-03), + 4.16666666662260795726e-02), + 1.66666666665260878863e-01), + 5.00000000000000008883e-01); + q = fma(r*r, q, r); + + double twopm = as_double((long)(m + EXPBIAS_DP64) << EXPSHIFTBITS_DP64); + double twopmm = as_double((long)(EXPBIAS_DP64 - m) << EXPSHIFTBITS_DP64); + + // Computations for m > 52, including where result is close to Inf + ulong uval = as_ulong(0x1.0p+1023 * (f1 + (f * q + (f2)))); + int e = (int)(uval >> EXPSHIFTBITS_DP64) + 1; + + double zme1024 = as_double(((long)e << EXPSHIFTBITS_DP64) | (uval & MANTBITS_DP64)); + zme1024 = e == 2047 ? as_double(PINFBITPATT_DP64) : zme1024; + + double zmg52 = twopm * (f1 + fma(f, q, f2 - twopmm)); + zmg52 = m == 1024 ? zme1024 : zmg52; + + // For m < 53 + double zml53 = twopm * ((f1 - twopmm) + fma(f1, q, f2*(1.0 + q))); + + // For m < -7 + double zmln7 = fma(twopm, f1 + fma(f, q, f2), -1.0); + + z = m < 53 ? zml53 : zmg52; + z = m < -7 ? zmln7 : z; + z = x > log_OneMinus_OneByFour & x < log_OnePlus_OneByFour ? z1 : z; + z = x > max_expm1_arg ? as_double(PINFBITPATT_DP64) : z; + z = x < min_expm1_arg ? -1.0 : z; + + return z; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, expm1, double) + +#endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/lib/math/logb.cl new/libclc-0.2.0+git.20170225/generic/lib/math/logb.cl --- old/libclc-0.2.0+git.20160921/generic/lib/math/logb.cl 1970-01-01 01:00:00.000000000 +0100 +++ new/libclc-0.2.0+git.20170225/generic/lib/math/logb.cl 2017-02-25 03:46:53.000000000 +0100 @@ -0,0 +1,31 @@ +#include <clc/clc.h> +#include "math.h" +#include "../clcmacro.h" + +_CLC_OVERLOAD _CLC_DEF float logb(float x) { + int ax = as_int(x) & EXSIGNBIT_SP32; + float s = -118 - clz(ax); + float r = (ax >> EXPSHIFTBITS_SP32) - EXPBIAS_SP32; + r = ax >= PINFBITPATT_SP32 ? as_float(ax) : r; + r = ax < 0x00800000 ? s : r; + r = ax == 0 ? as_float(NINFBITPATT_SP32) : r; + return r; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, logb, float); + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +_CLC_OVERLOAD _CLC_DEF double logb(double x) { + long ax = as_long(x) & EXSIGNBIT_DP64; + double s = -1011L - clz(ax); + double r = (int) (ax >> EXPSHIFTBITS_DP64) - EXPBIAS_DP64; + r = ax >= PINFBITPATT_DP64 ? as_double(ax) : r; + r = ax < 0x0010000000000000L ? s : r; + r = ax == 0L ? as_double(NINFBITPATT_DP64) : r; + return r; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, logb, double) +#endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/lib/math/sinh.cl new/libclc-0.2.0+git.20170225/generic/lib/math/sinh.cl --- old/libclc-0.2.0+git.20160921/generic/lib/math/sinh.cl 1970-01-01 01:00:00.000000000 +0100 +++ new/libclc-0.2.0+git.20170225/generic/lib/math/sinh.cl 2017-02-25 03:46:53.000000000 +0100 @@ -0,0 +1,191 @@ +/* + * Copyright (c) 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <clc/clc.h> + +#include "math.h" +#include "tables.h" +#include "../clcmacro.h" + +_CLC_OVERLOAD _CLC_DEF float sinh(float x) +{ + // After dealing with special cases the computation is split into regions as follows. + // abs(x) >= max_sinh_arg: + // sinh(x) = sign(x)*Inf + // abs(x) >= small_threshold: + // sinh(x) = sign(x)*exp(abs(x))/2 computed using the splitexp and scaleDouble functions as for exp_amd(). + // abs(x) < small_threshold: + // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0))) + // sinh(x) is then sign(x)*z. + + const float max_sinh_arg = 0x1.65a9fap+6f; + const float small_threshold = 0x1.0a2b24p+3f; + + uint ux = as_uint(x); + uint aux = ux & EXSIGNBIT_SP32; + uint xs = ux ^ aux; + float y = as_float(aux); + + // We find the integer part y0 of y and the increment dy = y - y0. We then compute + // z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) + // where sinh(y0) and cosh(y0) are tabulated above. + int ind = (int) y; + ind = (uint)ind > 36U ? 0 : ind; + + float dy = y - ind; + float dy2 = dy * dy; + + float sdy = mad(dy2, + mad(dy2, + mad(dy2, + mad(dy2, + mad(dy2, + mad(dy2, 0.7746188980094184251527126e-12f, 0.160576793121939886190847e-9f), + 0.250521176994133472333666e-7f), + 0.275573191913636406057211e-5f), + 0.198412698413242405162014e-3f), + 0.833333333333329931873097e-2f), + 0.166666666666666667013899e0f); + sdy = mad(sdy, dy*dy2, dy); + + float cdy = mad(dy2, + mad(dy2, + mad(dy2, + mad(dy2, + mad(dy2, + mad(dy2, 0.1163921388172173692062032e-10f, 0.208744349831471353536305e-8f), + 0.275573350756016588011357e-6f), + 0.248015872460622433115785e-4f), + 0.138888888889814854814536e-2f), + 0.416666666666660876512776e-1f), + 0.500000000000000005911074e0f); + cdy = mad(cdy, dy2, 1.0f); + + float2 tv = USE_TABLE(sinhcosh_tbl, ind); + float z = mad(tv.s1, sdy, tv.s0 * cdy); + z = as_float(xs | as_uint(z)); + + // When y is large enough so that the negative exponential is negligible, + // so sinh(y) is approximated by sign(x)*exp(y)/2. + float t = exp(y - 0x1.62e500p-1f); + float zsmall = mad(0x1.a0210ep-18f, t, t); + zsmall = as_float(xs | as_uint(zsmall)); + z = y >= small_threshold ? zsmall : z; + + // Corner cases + float zinf = as_float(PINFBITPATT_SP32 | xs); + z = y >= max_sinh_arg ? zinf : z; + z = aux > PINFBITPATT_SP32 | aux < 0x38800000U ? x : z; + + return z; +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, sinh, float); + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable + +_CLC_OVERLOAD _CLC_DEF double sinh(double x) +{ + // After dealing with special cases the computation is split into + // regions as follows: + // + // abs(x) >= max_sinh_arg: + // sinh(x) = sign(x)*Inf + // + // abs(x) >= small_threshold: + // sinh(x) = sign(x)*exp(abs(x))/2 computed using the + // splitexp and scaleDouble functions as for exp_amd(). + // + // abs(x) < small_threshold: + // compute p = exp(y) - 1 and then z = 0.5*(p+(p/(p+1.0))) + // sinh(x) is then sign(x)*z. + + const double max_sinh_arg = 7.10475860073943977113e+02; // 0x408633ce8fb9f87e + + // This is where exp(-x) is insignificant compared to exp(x) = ln(2^27) + const double small_threshold = 0x1.2b708872320e2p+4; + + double y = fabs(x); + + // In this range we find the integer part y0 of y + // and the increment dy = y - y0. We then compute + // z = sinh(y) = sinh(y0)cosh(dy) + cosh(y0)sinh(dy) + // where sinh(y0) and cosh(y0) are obtained from tables + + int ind = min((int)y, 36); + double dy = y - ind; + double dy2 = dy * dy; + + double sdy = dy * dy2 * + fma(dy2, + fma(dy2, + fma(dy2, + fma(dy2, + fma(dy2, + fma(dy2, 0.7746188980094184251527126e-12, 0.160576793121939886190847e-9), + 0.250521176994133472333666e-7), + 0.275573191913636406057211e-5), + 0.198412698413242405162014e-3), + 0.833333333333329931873097e-2), + 0.166666666666666667013899e0); + + double cdy = dy2 * fma(dy2, + fma(dy2, + fma(dy2, + fma(dy2, + fma(dy2, + fma(dy2, 0.1163921388172173692062032e-10, 0.208744349831471353536305e-8), + 0.275573350756016588011357e-6), + 0.248015872460622433115785e-4), + 0.138888888889814854814536e-2), + 0.416666666666660876512776e-1), + 0.500000000000000005911074e0); + + // At this point sinh(dy) is approximated by dy + sdy. + // Shift some significant bits from dy to sdy. + double sdy1 = as_double(as_ulong(dy) & 0xfffffffff8000000UL); + double sdy2 = sdy + (dy - sdy1); + + double2 tv = USE_TABLE(cosh_tbl, ind); + double cl = tv.s0; + double ct = tv.s1; + tv = USE_TABLE(sinh_tbl, ind); + double sl = tv.s0; + double st = tv.s1; + + double z = fma(cl, sdy1, fma(sl, cdy, fma(cl, sdy2, fma(ct, sdy1, fma(st, cdy, ct*sdy2)) + st))) + sl; + + // Other cases + z = (y < 0x1.0p-28) | isnan(x) | isinf(x) ? y : z; + + double t = exp(y - 0x1.62e42fefa3800p-1); + t = fma(t, -0x1.ef35793c76641p-45, t); + z = y >= small_threshold ? t : z; + z = y >= max_sinh_arg ? as_double(PINFBITPATT_DP64) : z; + + return copysign(z, x); +} + +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, sinh, double) + +#endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/lib/math/tables.cl new/libclc-0.2.0+git.20170225/generic/lib/math/tables.cl --- old/libclc-0.2.0+git.20160921/generic/lib/math/tables.cl 2016-09-21 22:15:55.000000000 +0200 +++ new/libclc-0.2.0+git.20170225/generic/lib/math/tables.cl 2017-02-25 03:46:53.000000000 +0100 @@ -608,6 +608,142 @@ (float2)(0x1.428000p+0f, 0x1.45f31ap-13f) }; +DECLARE_TABLE(float, EXP_TBL, 65) = { + 0x1.000000p+0f, + 0x1.02c9a4p+0f, + 0x1.059b0ep+0f, + 0x1.087452p+0f, + 0x1.0b5586p+0f, + 0x1.0e3ec4p+0f, + 0x1.11301ep+0f, + 0x1.1429aap+0f, + 0x1.172b84p+0f, + 0x1.1a35bep+0f, + 0x1.1d4874p+0f, + 0x1.2063b8p+0f, + 0x1.2387a6p+0f, + 0x1.26b456p+0f, + 0x1.29e9e0p+0f, + 0x1.2d285ap+0f, + 0x1.306fe0p+0f, + 0x1.33c08cp+0f, + 0x1.371a74p+0f, + 0x1.3a7db4p+0f, + 0x1.3dea64p+0f, + 0x1.4160a2p+0f, + 0x1.44e086p+0f, + 0x1.486a2cp+0f, + 0x1.4bfdaep+0f, + 0x1.4f9b28p+0f, + 0x1.5342b6p+0f, + 0x1.56f474p+0f, + 0x1.5ab07ep+0f, + 0x1.5e76f2p+0f, + 0x1.6247ecp+0f, + 0x1.662388p+0f, + 0x1.6a09e6p+0f, + 0x1.6dfb24p+0f, + 0x1.71f75ep+0f, + 0x1.75feb6p+0f, + 0x1.7a1148p+0f, + 0x1.7e2f34p+0f, + 0x1.82589ap+0f, + 0x1.868d9ap+0f, + 0x1.8ace54p+0f, + 0x1.8f1aeap+0f, + 0x1.93737cp+0f, + 0x1.97d82ap+0f, + 0x1.9c4918p+0f, + 0x1.a0c668p+0f, + 0x1.a5503cp+0f, + 0x1.a9e6b6p+0f, + 0x1.ae89fap+0f, + 0x1.b33a2cp+0f, + 0x1.b7f770p+0f, + 0x1.bcc1eap+0f, + 0x1.c199bep+0f, + 0x1.c67f12p+0f, + 0x1.cb720ep+0f, + 0x1.d072d4p+0f, + 0x1.d5818ep+0f, + 0x1.da9e60p+0f, + 0x1.dfc974p+0f, + 0x1.e502eep+0f, + 0x1.ea4afap+0f, + 0x1.efa1bep+0f, + 0x1.f50766p+0f, + 0x1.fa7c18p+0f, + 0x1.000000p+1f, +}; + +DECLARE_TABLE(float2, EXP_TBL_EP, 65) = { + (float2) (0x1.000000p+0f, 0x0.000000p+0f), + (float2) (0x1.02c000p+0f, 0x1.347ceep-13f), + (float2) (0x1.058000p+0f, 0x1.b0d314p-12f), + (float2) (0x1.084000p+0f, 0x1.a28c3ap-11f), + (float2) (0x1.0b4000p+0f, 0x1.586cf8p-12f), + (float2) (0x1.0e0000p+0f, 0x1.f61968p-11f), + (float2) (0x1.110000p+0f, 0x1.80e808p-11f), + (float2) (0x1.140000p+0f, 0x1.4d5754p-11f), + (float2) (0x1.170000p+0f, 0x1.5c1e3ep-11f), + (float2) (0x1.1a0000p+0f, 0x1.adf5b6p-11f), + (float2) (0x1.1d4000p+0f, 0x1.0e62d0p-13f), + (float2) (0x1.204000p+0f, 0x1.1dc430p-11f), + (float2) (0x1.238000p+0f, 0x1.e9b9d4p-14f), + (float2) (0x1.268000p+0f, 0x1.a2b2f0p-11f), + (float2) (0x1.29c000p+0f, 0x1.4efa8ep-11f), + (float2) (0x1.2d0000p+0f, 0x1.42d372p-11f), + (float2) (0x1.304000p+0f, 0x1.7f0518p-11f), + (float2) (0x1.33c000p+0f, 0x1.164c82p-17f), + (float2) (0x1.370000p+0f, 0x1.a7373ap-12f), + (float2) (0x1.3a4000p+0f, 0x1.ed9a72p-11f), + (float2) (0x1.3dc000p+0f, 0x1.532608p-11f), + (float2) (0x1.414000p+0f, 0x1.0510fap-11f), + (float2) (0x1.44c000p+0f, 0x1.043030p-11f), + (float2) (0x1.484000p+0f, 0x1.515ae0p-11f), + (float2) (0x1.4bc000p+0f, 0x1.ed6a9ap-11f), + (float2) (0x1.4f8000p+0f, 0x1.b2769cp-12f), + (float2) (0x1.534000p+0f, 0x1.5ab4eap-15f), + (float2) (0x1.56c000p+0f, 0x1.a39b5ap-11f), + (float2) (0x1.5a8000p+0f, 0x1.83eea4p-11f), + (float2) (0x1.5e4000p+0f, 0x1.b78ad6p-11f), + (float2) (0x1.624000p+0f, 0x1.fac0e8p-14f), + (float2) (0x1.660000p+0f, 0x1.1c412ap-11f), + (float2) (0x1.6a0000p+0f, 0x1.3cccfep-13f), + (float2) (0x1.6dc000p+0f, 0x1.d91e32p-11f), + (float2) (0x1.71c000p+0f, 0x1.baf476p-11f), + (float2) (0x1.75c000p+0f, 0x1.f5ab20p-11f), + (float2) (0x1.7a0000p+0f, 0x1.1473eap-12f), + (float2) (0x1.7e0000p+0f, 0x1.799b66p-11f), + (float2) (0x1.824000p+0f, 0x1.89994cp-12f), + (float2) (0x1.868000p+0f, 0x1.b33688p-13f), + (float2) (0x1.8ac000p+0f, 0x1.ca8454p-13f), + (float2) (0x1.8f0000p+0f, 0x1.ae9914p-12f), + (float2) (0x1.934000p+0f, 0x1.9bd866p-11f), + (float2) (0x1.97c000p+0f, 0x1.829fdep-12f), + (float2) (0x1.9c4000p+0f, 0x1.230546p-13f), + (float2) (0x1.a0c000p+0f, 0x1.99ed76p-14f), + (float2) (0x1.a54000p+0f, 0x1.03b23ep-12f), + (float2) (0x1.a9c000p+0f, 0x1.35aabcp-11f), + (float2) (0x1.ae8000p+0f, 0x1.3f32b4p-13f), + (float2) (0x1.b30000p+0f, 0x1.d15c26p-11f), + (float2) (0x1.b7c000p+0f, 0x1.bb797cp-11f), + (float2) (0x1.bcc000p+0f, 0x1.e904bcp-16f), + (float2) (0x1.c18000p+0f, 0x1.9bdd84p-12f), + (float2) (0x1.c64000p+0f, 0x1.f8972ap-11f), + (float2) (0x1.cb4000p+0f, 0x1.906e76p-11f), + (float2) (0x1.d04000p+0f, 0x1.96a502p-11f), + (float2) (0x1.d58000p+0f, 0x1.8dcfbap-16f), + (float2) (0x1.da8000p+0f, 0x1.e603dap-12f), + (float2) (0x1.dfc000p+0f, 0x1.2e66f6p-13f), + (float2) (0x1.e50000p+0f, 0x1.773c58p-15f), + (float2) (0x1.ea4000p+0f, 0x1.5f4548p-13f), + (float2) (0x1.ef8000p+0f, 0x1.0df730p-11f), + (float2) (0x1.f50000p+0f, 0x1.d96db8p-14f), + (float2) (0x1.fa4000p+0f, 0x1.e0c0cep-11f), + (float2) (0x1.000000p+1f, 0x0.000000p+0f), +}; + TABLE_FUNCTION(float2, LOGE_TBL, loge_tbl); TABLE_FUNCTION(float, LOG_INV_TBL, log_inv_tbl); TABLE_FUNCTION(float2, LOG2_TBL, log2_tbl); @@ -618,6 +754,8 @@ TABLE_FUNCTION(float2, SINHCOSH_TBL, sinhcosh_tbl); TABLE_FUNCTION(float2, CBRT_TBL, cbrt_tbl); +TABLE_FUNCTION(float, EXP_TBL, exp_tbl); +TABLE_FUNCTION(float2, EXP_TBL_EP, exp_tbl_ep); #ifdef cl_khr_fp64 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/generic/lib/math/tables.h new/libclc-0.2.0+git.20170225/generic/lib/math/tables.h --- old/libclc-0.2.0+git.20160921/generic/lib/math/tables.h 2016-09-21 22:15:55.000000000 +0200 +++ new/libclc-0.2.0+git.20170225/generic/lib/math/tables.h 2017-02-25 03:46:53.000000000 +0100 @@ -44,6 +44,8 @@ TABLE_FUNCTION_DECL(uint4, pibits_tbl); TABLE_FUNCTION_DECL(float2, sinhcosh_tbl); TABLE_FUNCTION_DECL(float2, cbrt_tbl); +TABLE_FUNCTION_DECL(float, exp_tbl); +TABLE_FUNCTION_DECL(float2, exp_tbl_ep); #ifdef cl_khr_fp64 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/libclc-0.2.0+git.20160921/utils/prepare-builtins.cpp new/libclc-0.2.0+git.20170225/utils/prepare-builtins.cpp --- old/libclc-0.2.0+git.20160921/utils/prepare-builtins.cpp 2016-09-21 22:15:55.000000000 +0200 +++ new/libclc-0.2.0+git.20170225/utils/prepare-builtins.cpp 2017-02-25 03:46:53.000000000 +0100 @@ -1,4 +1,5 @@ -#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/Bitcode/BitcodeReader.h" +#include "llvm/Bitcode/BitcodeWriter.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/LLVMContext.h" @@ -35,12 +36,13 @@ { ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = MemoryBuffer::getFile(InputFilename); - std::unique_ptr<MemoryBuffer> &BufferPtr = BufferOrErr.get(); - if (std::error_code ec = BufferOrErr.getError()) + if (std::error_code ec = BufferOrErr.getError()) { ErrorMessage = ec.message(); - else { + } else { + std::unique_ptr<MemoryBuffer> &BufferPtr = BufferOrErr.get(); ErrorOr<std::unique_ptr<Module>> ModuleOrErr = - parseBitcodeFile(BufferPtr.get()->getMemBufferRef(), Context); + expectedToErrorOrAndEmitErrors(Context, + parseBitcodeFile(BufferPtr.get()->getMemBufferRef(), Context)); if (std::error_code ec = ModuleOrErr.getError()) ErrorMessage = ec.message(); ++++++ libclc-rpmlintrc ++++++ --- /var/tmp/diff_new_pack.rP6WPu/_old 2017-04-17 10:20:34.730924033 +0200 +++ /var/tmp/diff_new_pack.rP6WPu/_new 2017-04-17 10:20:34.730924033 +0200 @@ -3,4 +3,4 @@ # Files required at runtime by applications using OpenCL. addFilter("devel-file-in-non-devel-package.*/usr/include/clc/.*") -addFilter("devel-file-in-non-devel-package.*/usr/lib64/pkgconfig/libclc.pc") +addFilter("devel-file-in-non-devel-package.*/usr/lib[^/]*/pkgconfig/libclc.pc")
