[GLO] Re: Nuevo CUDA Tool-kit 3.2 II tutorial

Joel Rodriguez Thu, 09 Dec 2010 17:18:38 -0800

Hola AFE´s, GLO´s ELUG´s

Los dos programas de abajo Fortran y C
calculan el cuadrado de dos números complejos.
De forma normal con un procesador en la computadora
y con la tarjeta video nVidia cuda de hasta 512 procesadores.


compilar como:

r...@pc-joel:~/NVIDIA_GPU_Computing_SDK/C/src/Fortran_Cuda#/usr/local/cuda/bin/nvcc -c -O3 Cuda_function.cur...@pc-joel:~/NVIDIA_GPU_Computing_SDK/C/src/Fortran_Cuda# gfortran -ocomplex_mul main.f90 Cuda_function.o -L/usr/local/cuda/lib -lcudart



para ejecutar:

r...@pc-joel:~/NVIDIA_GPU_Computing_SDK/C/src/Fortran_Cuda# ./complex_mul
 Results from Fortran

1 ( 1.0000000 , 2.0000000 ) ( -3.0000000 ,4.0000000 )2 ( 2.0000000 , 4.0000000 ) ( -12.000000 ,16.000000 )3 ( 3.0000000 , 6.0000000 ) ( -27.000000 ,36.000000 )4 ( 4.0000000 , 8.0000000 ) ( -48.000000 ,64.000000 )5 ( 5.0000000 , 10.000000 ) ( -75.000000 ,100.00000 )6 ( 6.0000000 , 12.000000 ) ( -108.00000 ,144.00000 )7 ( 7.0000000 , 14.000000 ) ( -147.00000 ,196.00000 )8 ( 8.0000000 , 16.000000 ) ( -192.00000 ,256.00000 )

 Results from CUDA

r...@pc-joel:~/NVIDIA_GPU_Computing_SDK/C/src/Fortran_Cuda#



main.f90  (programa en Fortran)
-------------------------------------------------------------------------------------
program main

implicit none

!define the floating point kind to be single precision
integer, parameter :: fp_kind = kind(0.0)

!define length of the array
integer, parameter :: N=8

complex(fp_kind), dimension(N) :: c, c2
integer :: i

! Initialize array c, compute c2=c*c
do i = 1, N
 c(i) = cmplx(i,2*i)
 c2(i)= c(i)*c(i)
end do

! Print results from Fortran
print *, "Results from Fortran"
do i = 1, N
 print *,i, c(i),c2(i)
end do

! Put
c2=cmplx(0.,0.)

! Do the same computation with CUDA.
! Fortran -> C -> CUDA ->C ->Fortran
call cudafunction(c,c2,N)

!Results from CUDA
print *, "Results from CUDA"
do i = 1, N
 print *,i, c(i),c2(i)
end do

end program main



Cuda_function.cu  (programa en C)
-------------------------------------------------------------------------------------------------
#include <stdio.h>
#include <cuComplex.h>
#include "cuda.h"


/* Define complex multiply operation */
__device__ cuComplex ComplexMul(cuComplex a, cuComplex b)
{
    cuComplex c;
    c.x = a.x * b.x - a.y * b.y;
    c.y = a.x * b.y + a.y * b.x;
    return c;

}

/* Define CUDA kernel that squares the input complex array */
__global__ void  square_complex(cuComplex *in, cuComplex *out, int N)
{
 unsigned int index   = blockIdx.x*blockDim.x+threadIdx.x;
 if( index<N )
  {
   out[index] = ComplexMul(in[index], in[index]);
  }

}


/*
   Fortran subroutine arguments are passed by references.
   call fun( array_a, array_b, N) will be mapped to
   function (*a, *b, *N);
*/

extern "C" void cudafunction_(cuComplex *a, cuComplex *b,  int *Np)
{
  int block_size=4;
  cuComplex *a_d;
  int N=*Np;

  /* Allocate complex array on device */
  cudaMalloc ((void **) &a_d , sizeof(cuComplex)*N);

  /* Copy array from host memory to device memory */
  cudaMemcpy( a_d, a,  sizeof(cuComplex)*N   ,cudaMemcpyHostToDevice);

  /* Compute execution configuration */
   dim3 dimBlock(block_size);
   dim3 dimGrid (N/dimBlock.x);
   if( N % block_size != 0 ) dimGrid.x+=1;

  /* Execute the kernel */
  square_complex<<<dimGrid,dimBlock>>>(a_d,a_d,N);

  /* Copy the result back */
   cudaMemcpy( b, a_d, sizeof(cuComplex)*N,cudaMemcpyDeviceToHost);

  /* Free memory on the device */
  cudaFree(a_d);

  return;
}






saludos,
:)

Joel Rodríguez

[GLO] Re: Nuevo CUDA Tool-kit 3.2 II tutorial

Responder a