Dear ViennaCL users and developers,I was playing with the tutorial problem custom-cuda.cu. I found a very strange result.
|#include <iostream> #include <string> #ifndef VIENNACL_WITH_CUDA #define VIENNACL_WITH_CUDA #endif // ViennaCL headers #include "viennacl/vector.hpp" #include "viennacl/matrix.hpp" #include "viennacl/linalg/matrix_operations.hpp" #include "viennacl/linalg/norm_2.hpp" #include "viennacl/linalg/prod.hpp // Some helper functions for this tutorial: #include "Random.hpp" template<typename NumericT> __global__ void elementwise_multiplication(const NumericT * vec1, const NumericT * vec2, NumericT * result, unsigned int size) { for (unsigned int i = blockDim.x * blockIdx.x + threadIdx.x; i < size; i += gridDim.x * blockDim.x) result[i] = vec1[i] * vec2[i]; } int main() { typedef double NumericType; std::size_t N = 5; NumericType * device_vec1; NumericType * device_vec2; NumericType * device_result; cudaMalloc(&device_vec1, N * sizeof(NumericType)); cudaMalloc(&device_vec2, N * sizeof(NumericType)); cudaMalloc(&device_result, N * sizeof(NumericType)); std::vector<NumericType> temp(N); for (std::size_t i=0; i<temp.size(); ++i) temp[i] = NumericType(i); cudaMemcpy(device_vec1, &(temp[0]), temp.size() * sizeof(NumericType), cudaMemcpyHostToDevice ); for (std::size_t i=0; i<temp.size(); ++i) temp[i] = NumericType(2*i); cudaMemcpy(device_vec2, &(temp[0]), temp.size() * sizeof(NumericType), cudaMemcpyHostToDevice ); viennacl::vector<NumericType> vcl_vec1(device_vec1, viennacl::CUDA_MEMORY, N); viennacl::vector<NumericType> vcl_vec2(device_vec2, viennacl::CUDA_MEMORY, N); viennacl::vector<NumericType> vcl_result(device_result, viennacl::CUDA_MEMORY, N); std::cout << "Using existing kernel within ViennaCL:" << std::endl; elementwise_multiplication<<<128, 128>>>(vcl_vec1.handle().cuda_handle().get(), vcl_vec2.handle().cuda_handle().get(), vcl_result.handle().cuda_handle().get(), N); std::cout << "vec1 : " << vcl_vec1 << std::endl; std::cout << "vec2 : " << vcl_vec2 << std::endl; std::cout << "result: " << vcl_result << std::endl; std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl; return EXIT_SUCCESS;| } And after compiling the result I am getting the following result | Using existing kernel within ViennaCL: vec1 : [5](0,1,2,3,4) vec2 : [5](0,2,4,6,8) result: [5](60,160,260,360,460) !!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!| | |Should not kernel produce the following result? | result: [5](0,2,8,18,32)| Am I missing something ? Thank you in advance. With Best Regards, Arijit Hazra
------------------------------------------------------------------------------ One dashboard for servers and applications across Physical-Virtual-Cloud Widest out-of-the-box monitoring support with 50+ applications Performance metrics, stats and reports that give you Actionable Insights Deep dive visibility with transaction tracing using APM Insight. http://ad.doubleclick.net/ddm/clk/290420510;117567292;y
_______________________________________________ ViennaCL-support mailing list ViennaCL-support@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/viennacl-support