Dear ViennaCL users and developers,

I was playing with the tutorial problem custom-cuda.cu. I found a very strange result.

|#include <iostream>
#include <string>

 #ifndef VIENNACL_WITH_CUDA
      #define VIENNACL_WITH_CUDA
#endif

  // ViennaCL headers
 #include "viennacl/vector.hpp"
 #include "viennacl/matrix.hpp"
#include "viennacl/linalg/matrix_operations.hpp"
 #include "viennacl/linalg/norm_2.hpp"
 #include "viennacl/linalg/prod.hpp

 // Some helper functions for this tutorial:
  #include "Random.hpp"

template<typename NumericT>
    __global__ void elementwise_multiplication(const NumericT * vec1,
                                                                         const 
NumericT * vec2,
                                                                         
NumericT * result,
                                                                         
unsigned int size)
 {
         for (unsigned int i = blockDim.x * blockIdx.x + threadIdx.x;
                           i < size;
                         i += gridDim.x * blockDim.x)
                  result[i] = vec1[i] * vec2[i];
 }
int main()
{
      typedef double       NumericType;
      std::size_t N = 5;

     NumericType * device_vec1;
     NumericType * device_vec2;
     NumericType * device_result;

    cudaMalloc(&device_vec1,   N * sizeof(NumericType));
    cudaMalloc(&device_vec2,   N * sizeof(NumericType));
    cudaMalloc(&device_result, N * sizeof(NumericType));

    std::vector<NumericType> temp(N);
    for (std::size_t i=0; i<temp.size(); ++i)
         temp[i] = NumericType(i);
    cudaMemcpy(device_vec1, &(temp[0]), temp.size() * sizeof(NumericType),
                            cudaMemcpyHostToDevice );

  for (std::size_t i=0; i<temp.size(); ++i)
           temp[i] = NumericType(2*i);
  cudaMemcpy(device_vec2, &(temp[0]), temp.size() * sizeof(NumericType),
                        cudaMemcpyHostToDevice );

   viennacl::vector<NumericType> vcl_vec1(device_vec1, viennacl::CUDA_MEMORY, 
N);
   viennacl::vector<NumericType> vcl_vec2(device_vec2, viennacl::CUDA_MEMORY, 
N);
   viennacl::vector<NumericType> vcl_result(device_result, 
viennacl::CUDA_MEMORY, N);

   std::cout << "Using existing kernel within ViennaCL:" << std::endl;
   elementwise_multiplication<<<128, 
128>>>(vcl_vec1.handle().cuda_handle().get(),
                                   vcl_vec2.handle().cuda_handle().get(),
                                   vcl_result.handle().cuda_handle().get(),
                                   N);

   std::cout << "vec1  : " << vcl_vec1 << std::endl;
   std::cout << "vec2  : " << vcl_vec2 << std::endl;
   std::cout << "result: " << vcl_result << std::endl;

  std::cout << "!!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!" << std::endl;

  return EXIT_SUCCESS;|

}

And after compiling the result I am getting the following result

|        Using existing kernel within ViennaCL:
      vec1  : [5](0,1,2,3,4)
      vec2  : [5](0,2,4,6,8)
      result: [5](60,160,260,360,460)
      !!!! TUTORIAL COMPLETED SUCCESSFULLY !!!!|

|
|Should not kernel produce the following result?

|  result: [5](0,2,8,18,32)|



Am I missing something ? Thank you in advance.

With Best Regards,
Arijit Hazra
------------------------------------------------------------------------------
One dashboard for servers and applications across Physical-Virtual-Cloud 
Widest out-of-the-box monitoring support with 50+ applications
Performance metrics, stats and reports that give you Actionable Insights
Deep dive visibility with transaction tracing using APM Insight.
http://ad.doubleclick.net/ddm/clk/290420510;117567292;y
_______________________________________________
ViennaCL-support mailing list
ViennaCL-support@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/viennacl-support

Reply via email to