Hi!
I am new to the OpenCL/GPU world, and I probably expected too much from it. I
am computing the norm_1 of a vector, on the CPU and GPU, and I had these
results:
Platform Apple
Version OpenCL 1.2 (Jan 4 2017 22:35:59)
> Device type CPU
Version OpenCL 1.2 (Jan 4 2017 22:35:59)
> Device type GPU
Version OpenCL 1.2 (Jan 4 2017 22:35:59)
STARTING, TIMES ARE IN MILLISECONDS
Reserving CPU vector
Reserving CPU vector 16
Filling CPU vector
Filling CPU vector 9
Reserving GPU vector
Reserving GPU vector 82
Copying to GPU
Copying to GPU 158310
Computing norm_1 on GPU
Computing norm_1 on GPU 333
Computing norm_1 on CPU
Computing norm_1 on CPU 8
GPU: 5e+11 CPU: 5.00000e+11
Program ended with exit code: 0
As you can see, the GPU times are waaaaay higher than the CPU ones. My code is
really simple, and I am following the recommended conduct to build values on
the CPU and then copy them.
Is this bad performance due to my platform? I am running on a MacBook Pro now.
My code follows.
Thanks!
#include <iostream>
#include <vector>
#include <algorithm>
#include <cstdlib>
#include <numeric>
#include <chrono>
#define CL_USE_DEPRECATED_OPENCL_1_1_APIS
#define __CL_ENABLE_EXCEPTIONS
#define VIENNACL_WITH_OPENCL
#include "cl.hpp"
#include "viennacl/scalar.hpp"
#include "viennacl/vector.hpp"
#include "viennacl/ocl/backend.hpp"
#include "viennacl/linalg/norm_1.hpp"
int main(int argc, const char * argv[])
{
// This is what vienna sees
auto viennaplatforms = viennacl::ocl::get_platforms();
auto viennadevices = viennacl::ocl::platform().devices();
// See what standard OpenCL sees
std::vector<cl::Platform> platforms;
// Get platform
cl::Platform::get(&platforms);
// Temp
std::string s;
// Where the GPU lies
cl::Device gpudevice;
// Found a GPU
bool gpufound = false;
// See if we have a GPU
for (auto p : platforms)
{
s.clear();
p.getInfo(CL_PLATFORM_NAME, &s);
std::cout << "Platform " << s << std::endl;
s.clear();
p.getInfo(CL_PLATFORM_VERSION, &s);
std::cout << "Version " << s << std::endl;
std::cout << std::endl;
std::vector<cl::Device> devices;
p.getDevices(CL_DEVICE_TYPE_ALL, &devices);
for (auto d : devices)
{
std::size_t i = 4;
d.getInfo(CL_DEVICE_TYPE, &i);
std::cout << "> Device type " <<
(i & CL_DEVICE_TYPE_CPU ? "CPU" : "") <<
(i & CL_DEVICE_TYPE_GPU ? "GPU" : "") <<
(i & CL_DEVICE_TYPE_ACCELERATOR ? "ACCELERATOR" : "") <<
std::endl;
if (i & CL_DEVICE_TYPE_GPU)
{
gpudevice = d;
gpufound = true;
}
std::cout << "Version " << s << std::endl;
}
}
if (!gpufound)
{
std::cout << "NO GPU FOUND. ABORTING." << std::endl;
return 1;
}
// Size
int size = 1 * 1000 * 1000;
// Measuring time
auto start = std::chrono::steady_clock::now();
std::cout << std::endl << "STARTING, TIMES ARE IN MILLISECONDS" <<
std::endl << std::endl;
std::cout << "Reserving CPU vector " << std::endl;
start = std::chrono::steady_clock::now();
std::vector<double> cpuv;
cpuv.resize(size);
std::cout << "Reserving CPU vector " <<
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count()
<< std::endl << std::endl;
std::cout << "Filling CPU vector " << std::endl;
start = std::chrono::steady_clock::now();
std::iota(cpuv.begin(), cpuv.end(), 1.0 );
std::cout << "Filling CPU vector " <<
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count()
<< std::endl << std::endl;
std::cout << "Reserving GPU vector " << std::endl;
start = std::chrono::steady_clock::now();
viennacl::vector<float> gpuv;
gpuv.resize(size);
std::cout << "Reserving GPU vector " <<
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count()
<< std::endl << std::endl;
std::cout << "Copying to GPU " << std::endl;
start = std::chrono::steady_clock::now();
std::copy(cpuv.begin(), cpuv.end(), gpuv.begin());
std::cout << "Copying to GPU " <<
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count()
<< std::endl << std::endl;
std::cout << "Computing norm_1 on GPU " << std::endl;
start = std::chrono::steady_clock::now();
double gpunorm1 = viennacl::linalg::norm_1(gpuv);
std::cout << "Computing norm_1 on GPU " <<
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count()
<< std::endl << std::endl;
std::cout << "Computing norm_1 on CPU " << std::endl;
start = std::chrono::steady_clock::now();
double cpunorm1 = std::accumulate(cpuv.begin(), cpuv.end(), 0.0,
[](double a, double b){ return a +
std::abs(b); });
std::cout << "Computing norm_1 on CPU " <<
std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now()-start).count()
<< std::endl << std::endl;
std::cout << "GPU: " << gpunorm1 << " CPU: " << cpunorm1 << std::endl;
return 0;
}
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
ViennaCL-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/viennacl-devel