Hello community, here is the log from the commit of package clpeak for openSUSE:Factory checked in at 2017-05-31 12:19:42 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/clpeak (Old) and /work/SRC/openSUSE:Factory/.clpeak.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "clpeak" Wed May 31 12:19:42 2017 rev:3 rq:499612 version:1.0+git.20170525 Changes: -------- --- /work/SRC/openSUSE:Factory/clpeak/clpeak.changes 2017-02-03 17:34:45.197734924 +0100 +++ /work/SRC/openSUSE:Factory/.clpeak.new/clpeak.changes 2017-05-31 12:20:18.079664600 +0200 @@ -1,0 +2,14 @@ +Tue May 30 08:35:31 UTC 2017 - [email protected] + +- Update to version 1.0+git.20170525: + * Add GTX 1080 Ti results + * Remove non printable chars from log file + * Add One Plus 3T results + * Macbook pro results submitted by Harry Mallon + * Rename Macbook_Pro_2013.log to Mac_Pro_Late_2013_.log + * Not passing half as kernel argument + * Fix uninitialized variable + * Ignore ignored-attributes warning in g++, clang++ + * Enable support for FreeBSD. + +------------------------------------------------------------------- Old: ---- clpeak-1.0+git.20160805.tar.xz New: ---- _servicedata clpeak-1.0+git.20170525.tar.xz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ clpeak.spec ++++++ --- /var/tmp/diff_new_pack.wMk4Ey/_old 2017-05-31 12:20:19.291493532 +0200 +++ /var/tmp/diff_new_pack.wMk4Ey/_new 2017-05-31 12:20:19.295492968 +0200 @@ -18,7 +18,7 @@ Name: clpeak -Version: 1.0+git.20160805 +Version: 1.0+git.20170525 Release: 0 Summary: Find peak OpenCL capacities like bandwidth & compute License: SUSE-Public-Domain @@ -51,7 +51,7 @@ %files %defattr (-,root,root) -%doc README.md LICENSE STATUS +%doc README.md LICENSE STATUS results %{_bindir}/%{name} %changelog ++++++ _servicedata ++++++ <servicedata> <service name="tar_scm"> <param name="url">git://github.com/krrishnarraj/clpeak</param> <param name="changesrevision">c23d2f77a9154d155eefee0538e453272921e5a5</param></service></servicedata>++++++ clpeak-1.0+git.20160805.tar.xz -> clpeak-1.0+git.20170525.tar.xz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clpeak-1.0+git.20160805/.gitmodules new/clpeak-1.0+git.20170525/.gitmodules --- old/clpeak-1.0+git.20160805/.gitmodules 1970-01-01 01:00:00.000000000 +0100 +++ new/clpeak-1.0+git.20170525/.gitmodules 2017-05-25 06:45:41.000000000 +0200 @@ -0,0 +1,3 @@ +[submodule "android/app/src/main/jni/libopencl-stub"] + path = android/app/src/main/jni/libopencl-stub + url = https://github.com/krrishnarraj/libopencl-stub diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clpeak-1.0+git.20160805/CMakeLists.txt new/clpeak-1.0+git.20170525/CMakeLists.txt --- old/clpeak-1.0+git.20160805/CMakeLists.txt 2016-08-05 08:23:31.000000000 +0200 +++ new/clpeak-1.0+git.20170525/CMakeLists.txt 2017-05-25 06:45:41.000000000 +0200 @@ -89,7 +89,7 @@ add_definitions("-march=native") endif() - add_definitions("-fPIC -Wall -Wextra -Wno-deprecated-declarations -Wno-unused-parameter") + add_definitions("-fPIC -Wall -Wextra -Wno-deprecated-declarations -Wno-unused-parameter -Wno-ignored-attributes") endif() # override cl.hp from deps. Its buggy or not present in come systems diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clpeak-1.0+git.20160805/include/common.h new/clpeak-1.0+git.20170525/include/common.h --- old/clpeak-1.0+git.20160805/include/common.h 2016-08-05 08:23:31.000000000 +0200 +++ new/clpeak-1.0+git.20170525/include/common.h 2017-05-25 06:45:41.000000000 +0200 @@ -2,7 +2,7 @@ #define COMMON_H #include <CL/cl.hpp> -#if defined(__APPLE__) || defined(__MACOSX) +#if defined(__APPLE__) || defined(__MACOSX) || defined(__FreeBSD__) #include <sys/types.h> #endif @@ -12,7 +12,9 @@ #define TAB " " #define NEWLINE "\n" +#ifndef __FreeBSD__ #define uint unsigned int +#endif #define MAX(X, Y) \ (X > Y)? X: Y; @@ -39,6 +41,8 @@ #elif defined(__arm__) #define OS_NAME "Linux ARM" #endif +#elif defined(__FreeBSD__) +#define OS_NAME "FreeBSD" #endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clpeak-1.0+git.20160805/results/Apple/Mac_Pro_Late_2013_.log new/clpeak-1.0+git.20170525/results/Apple/Mac_Pro_Late_2013_.log --- old/clpeak-1.0+git.20160805/results/Apple/Mac_Pro_Late_2013_.log 1970-01-01 01:00:00.000000000 +0100 +++ new/clpeak-1.0+git.20170525/results/Apple/Mac_Pro_Late_2013_.log 2017-05-25 06:45:41.000000000 +0200 @@ -0,0 +1,76 @@ +Platform: Apple + Device: AMD Radeon HD - FirePro D700 Compute Engine + Driver version : 1.2 (Mar 16 2017 18:19:56) (Macintosh) + Compute units : 32 + Clock frequency : 850 MHz + + Global memory bandwidth (GBPS) + float : 182.39 + float2 : 189.53 + float4 : 195.73 + float8 : 101.98 + float16 : 53.33 + + Single-precision compute (GFLOPS) + float : 2593.97 + float2 : 2591.22 + float4 : 2584.96 + float8 : 2572.53 + float16 : 2543.34 + + No half precision support! Skipped + + Double-precision compute (GFLOPS) + double : 654.65 + double2 : 654.62 + double4 : 653.86 + double8 : 652.82 + double16 : 650.63 + + Transfer bandwidth (GBPS) + enqueueWriteBuffer : 11.12 + enqueueReadBuffer : 11.92 + enqueueMapBuffer(for read) : 94.12 + memcpy from mapped ptr : 6.79 + enqueueUnmap(after write) : 7550.93 + memcpy to mapped ptr : 7.64 + + Kernel launch latency : 9.63 us + + Device: AMD Radeon HD - FirePro D700 Compute Engine + Driver version : 1.2 (Mar 16 2017 18:19:56) (Macintosh) + Compute units : 32 + Clock frequency : 850 MHz + + Global memory bandwidth (GBPS) + float : 184.77 + float2 : 191.74 + float4 : 197.64 + float8 : 102.48 + float16 : 53.56 + + Single-precision compute (GFLOPS) + float : 2599.49 + float2 : 2594.67 + float4 : 2590.64 + float8 : 2576.32 + float16 : 2547.82 + + No half precision support! Skipped + + Double-precision compute (GFLOPS) + double : 654.91 + double2 : 654.89 + double4 : 654.75 + double8 : 654.34 + double16 : 651.34 + + Transfer bandwidth (GBPS) + enqueueWriteBuffer : 10.05 + enqueueReadBuffer : 9.31 + enqueueMapBuffer(for read) : 85.90 + memcpy from mapped ptr : 6.73 + enqueueUnmap(after write) : 7389.83 + memcpy to mapped ptr : 7.70 + + Kernel launch latency : 10.21 us diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clpeak-1.0+git.20160805/results/NVIDIA_CUDA/GeForce_GTX_1080_Ti.log new/clpeak-1.0+git.20170525/results/NVIDIA_CUDA/GeForce_GTX_1080_Ti.log --- old/clpeak-1.0+git.20160805/results/NVIDIA_CUDA/GeForce_GTX_1080_Ti.log 1970-01-01 01:00:00.000000000 +0100 +++ new/clpeak-1.0+git.20170525/results/NVIDIA_CUDA/GeForce_GTX_1080_Ti.log 2017-05-25 06:45:41.000000000 +0200 @@ -0,0 +1,45 @@ + +Platform: NVIDIA CUDA + Device: Graphics Device + Driver version : 378.13 (Linux x64) + Compute units : 28 + Clock frequency : 1683 MHz + + Global memory bandwidth (GBPS) + float : 389.99 + float2 : 394.86 + float4 : 410.15 + float8 : 388.05 + float16 : 263.58 + + Single-precision compute (GFLOPS) + float : 11675.87 + float2 : 13240.07 + float4 : 13317.21 + float8 : 13151.05 + float16 : 12939.08 + + Double-precision compute (GFLOPS) + double : 425.21 + double2 : 432.63 + double4 : 425.45 + double8 : 420.62 + double16 : 409.39 + + Integer compute (GIOPS) + int : 3507.68 + int2 : 3801.87 + int4 : 3772.84 + int8 : 3774.45 + int16 : 3748.59 + + Transfer bandwidth (GBPS) + enqueueWriteBuffer : 9.96 + enqueueReadBuffer : 8.95 + enqueueMapBuffer(for read) : 11.11 + memcpy from mapped ptr : 12.16 + enqueueUnmap(after write) : 12.40 + memcpy to mapped ptr : 12.48 + + Kernel launch latency : 4.22 us + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clpeak-1.0+git.20160805/results/Qualcomm_Snapdragon/OnePlus_3T.log new/clpeak-1.0+git.20170525/results/Qualcomm_Snapdragon/OnePlus_3T.log --- old/clpeak-1.0+git.20160805/results/Qualcomm_Snapdragon/OnePlus_3T.log 1970-01-01 01:00:00.000000000 +0100 +++ new/clpeak-1.0+git.20170525/results/Qualcomm_Snapdragon/OnePlus_3T.log 2017-05-25 06:45:41.000000000 +0200 @@ -0,0 +1,45 @@ +Platform: QUALCOMM Snapdragon(TM) + Device: QUALCOMM Adreno(TM) + Driver version : OpenCL 2.0 QUALCOMM build: commit #6ff34ae changeid #I0ac3940325 Date: 09/23/16 Fri Local Branch: Remote Branch: refs/tags/AU_LINUX_ANDROID_LA.HB.1.3.2.06.00.01.214.261 Compiler E031.31.00.01 (Android) + Compute units : 4 + Clock frequency : 1 MHz + + Global memory bandwidth (GBPS) + float : 15.28 + float2 : 11.46 + float4 : 16.31 + float8 : 20.20 + float16 : 20.43 + + Single-precision compute (GFLOPS) + float : 249.85 + float2 : 249.99 + float4 : 237.66 + float8 : 263.18 + float16 : 202.15 + + half-precision compute (GFLOPS) + half : 260.10 + half2 : 391.85 + half4 : 383.45 + half8 : 270.69 + half16 : 202.19 + + No double precision support! Skipped + + Integer compute (GIOPS) + int : 52.21 + int2 : 55.46 + int4 : 71.81 + int8 : 69.74 + int16 : 69.22 + + Transfer bandwidth (GBPS) + enqueueWriteBuffer : 18.17 + enqueueReadBuffer : 8.72 + enqueueMapBuffer(for read) : 2839.08 + memcpy from mapped ptr : 8.45 + enqueueUnmap(after write) : 816.78 + memcpy to mapped ptr : 9.77 + + Kernel launch latency : 296.08 us diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clpeak-1.0+git.20160805/src/common.cpp new/clpeak-1.0+git.20170525/src/common.cpp --- old/clpeak-1.0+git.20160805/src/common.cpp 2016-08-05 08:23:31.000000000 +0200 +++ new/clpeak-1.0+git.20170525/src/common.cpp 2017-05-25 06:45:41.000000000 +0200 @@ -34,6 +34,7 @@ devInfo.maxGlobalSize = (uint)d.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>(); devInfo.maxClockFreq = (uint)d.getInfo<CL_DEVICE_MAX_CLOCK_FREQUENCY>(); devInfo.doubleSupported = false; + devInfo.halfSupported = false; std::string extns = d.getInfo<CL_DEVICE_EXTENSIONS>(); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clpeak-1.0+git.20160805/src/compute_hp.cpp new/clpeak-1.0+git.20170525/src/compute_hp.cpp --- old/clpeak-1.0+git.20160805/src/compute_hp.cpp 2016-08-05 08:23:31.000000000 +0200 +++ new/clpeak-1.0+git.20170525/src/compute_hp.cpp 2017-05-25 06:45:41.000000000 +0200 @@ -6,7 +6,7 @@ float timed, gflops; cl_uint workPerWI; cl::NDRange globalSize, localSize; - cl_half A = 0x1234; + cl_float A = 1.3f; int iters = devInfo.computeIters; if(!isComputeDP) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/clpeak-1.0+git.20160805/src/kernels/compute_hp_kernels.cl new/clpeak-1.0+git.20170525/src/kernels/compute_hp_kernels.cl --- old/clpeak-1.0+git.20160805/src/kernels/compute_hp_kernels.cl 2016-08-05 08:23:31.000000000 +0200 +++ new/clpeak-1.0+git.20170525/src/kernels/compute_hp_kernels.cl 2017-05-25 06:45:41.000000000 +0200 @@ -21,8 +21,9 @@ \n -__kernel void compute_hp_v1(__global half *ptr, half _A) +__kernel void compute_hp_v1(__global half *ptr, float _B) { + half _A = (half)_B; half x = _A; half y = (half)get_local_id(0); @@ -48,8 +49,9 @@ } -__kernel void compute_hp_v2(__global half *ptr, half _A) +__kernel void compute_hp_v2(__global half *ptr, float _B) { + half _A = (half)_B; half2 x = (half2)(_A, (_A+1)); half2 y = (half2)get_local_id(0); @@ -65,8 +67,9 @@ ptr[get_global_id(0)] = (y.S0) + (y.S1); } -__kernel void compute_hp_v4(__global half *ptr, half _A) +__kernel void compute_hp_v4(__global half *ptr, float _B) { + half _A = (half)_B; half4 x = (half4)(_A, (_A+1), (_A+2), (_A+3)); half4 y = (half4)get_local_id(0); @@ -83,8 +86,9 @@ } -__kernel void compute_hp_v8(__global half *ptr, half _A) +__kernel void compute_hp_v8(__global half *ptr, float _B) { + half _A = (half)_B; half8 x = (half8)(_A, (_A+1), (_A+2), (_A+3), (_A+4), (_A+5), (_A+6), (_A+7)); half8 y = (half8)get_local_id(0); @@ -96,8 +100,9 @@ ptr[get_global_id(0)] = (y.S0) + (y.S1) + (y.S2) + (y.S3) + (y.S4) + (y.S5) + (y.S6) + (y.S7); } -__kernel void compute_hp_v16(__global half *ptr, half _A) +__kernel void compute_hp_v16(__global half *ptr, float _B) { + half _A = (half)_B; half16 x = (half16)(_A, (_A+1), (_A+2), (_A+3), (_A+4), (_A+5), (_A+6), (_A+7), (_A+8), (_A+9), (_A+10), (_A+11), (_A+12), (_A+13), (_A+14), (_A+15)); half16 y = (half16)get_local_id(0);
