Re: [PyCUDA] My modification to PyCUDA

2010-01-07 Thread Andreas Klöckner
On Mittwoch 30 Dezember 2009, Ying Wai (Daniel) Fan wrote:
 Andreas,
 
 I have done some changes to make arithmetic operation works with complex
 GPUArray objects. The patch is attached.

I don't quite agree with your treatment of the complex scalars.
Couple possibilities:

1) We ship a fixed version of the struct module that can serialize
complex numbers.

2) struct can be fooled into serializing 2f and get the real and imag
components separately.

3) We serialize numpy complex byte-for-byte via
buffer(numpy.complex(3j)), circumventing the need for
struct to understand complex.

What do you think?

Andreas


signature.asc
Description: This is a digitally signed message part.
___
PyCUDA mailing list
PyCUDA@tiker.net
http://tiker.net/mailman/listinfo/pycuda_tiker.net


Re: [PyCUDA] My modification to PyCUDA

2009-12-23 Thread Ying Wai (Daniel) Fan

Hi,

Regarding my complex arithmetic wrapper (complex.py) posted previously 
on this mailing list, the following changes need to be made to make it 
work. One usage scenario of the wraper is arithmetic operations on the 
output of CUFFT, which is in float2 datatype on the GPU.


diff --git a/pycuda/tools.py b/pycuda/tools.py
index f91a6d5..d7af3a9 100644
--- a/pycuda/tools.py
+++ b/pycuda/tools.py
@@ -371,6 +371,8 @@ def dtype_to_ctype(dtype, with_fp_tex_hack=False):
return fp_tex_double
else:
return double
+elif dtype == numpy.complex64:
+return float2
else:
raise ValueError, unable to map dtype '%s' % dtype

@@ -447,6 +449,7 @@ def parse_c_arg(c_arg):
elif tp in [char]: dtype = numpy.int8
elif tp in [unsigned char]: dtype = numpy.uint8
elif tp in [bool]: dtype = numpy.bool
+elif tp in [float2]: dtype = numpy.complex64
else: raise ValueError, unknown type '%s' % tp

return arg_class(dtype, name)

Daniel
This code is written by Ying Wai (Daniel) Fan, Emory University, Dec 2009.
   Add complex number (single precision) support to some of PyCUDA funcitons.
import numpy
from pytools import memoize
import pycuda
from pycuda.elementwise import get_elwise_kernel

def _all_complex(dtype_x, dtype_y, dtype_z):
   Check if all three datatypes are complex64.
   complex_arg_count = [dtype_x, dtype_y, dtype_z].count(numpy.complex64)
   if complex_arg_count == 0:
   return False
   elif complex_arg_count == 3:
   return True
   else:
   raise TypeError('Mixed operations between real and complex types not implemented.')

##
# axpbyz #
##
get_axpbyz_kernel_original = pycuda.elementwise.get_axpbyz_kernel

@memoize
def get_axpbyz_kernel(dtype_x, dtype_y, dtype_z):
if _all_complex(dtype_x, dtype_y, dtype_z):
return get_elwise_kernel(
float a, float2 *x, float b, float2 *y, float2 *z,
z[i].x = a*x[i].x + b*y[i].x; z[i].y = a*x[i].y + b*y[i].y,
axpbyz)
else:
return get_axpbyz_kernel_original(dtype_x, dtype_y, dtype_z) 

pycuda.elementwise.get_axpbyz_kernel = get_axpbyz_kernel


# multiply #

get_multiply_kernel_original = pycuda.elementwise.get_multiply_kernel

@memoize
def get_multiply_kernel(dtype_x, dtype_y, dtype_z):
if _all_complex(dtype_x, dtype_y, dtype_z):
return get_elwise_kernel(
float2 *x, float2 *y, float2 *z,
z[i].x = x[i].x * y[i].x - x[i].y * y[i].y; \
 z[i].y = x[i].x * y[i].y + x[i].y * y[i].x,
multiply)
else:
return get_multiply_kernel_original(dtype_x, dtype_y, dtype_z) 

pycuda.elementwise.get_multiply_kernel = get_multiply_kernel

def multiply(x, y, z, add_timer=None, stream=None):
Compute ``z = x * y``.
assert x.shape == y.shape

func = get_multiply_kernel(x.dtype, y.dtype, z.dtype)
func.set_block_shape(*x._block)

if add_timer is not None:
add_timer(3*x.size, func.prepared_timed_call(x._grid, 
  x.gpudata, y.gpudata, z.gpudata, x.mem_size))
else:
func.prepared_async_call(x._grid, stream,
  x.gpudata, y.gpudata, z.gpudata, x.mem_size)

##
# divide #
##
get_divide_kernel_original = pycuda.elementwise.get_divide_kernel

@memoize
def get_divide_kernel(dtype_x, dtype_y, dtype_z):
if _all_complex(dtype_x, dtype_y, dtype_z):
return get_elwise_kernel(
float2 *x, float2 *y, float2 *z,
norm_squared  = y[i].x * y[i].x + y[i].y * y[i].y; \
 z[i].x = (x[i].x * y[i].x + x[i].y * y[i].y) / norm_squared; \
 z[i].y = (-x[i].x * y[i].y + x[i].y * y[i].x) / norm_squared,
divide, loop_prep=float norm_squared;)
else:
return get_get_divide_kernel_original(dtype_x, dtype_y, dtype_z) 

pycuda.elementwise.get_divide_kernel = get_divide_kernel

##
# conjugate_multiply #
##
@memoize
def get_conjugate_multiply_kernel(dtype_x, dtype_y, dtype_z):
 return get_elwise_kernel(
 float2 *x, float2 *y, float2 *z,
 z[i].x = x[i].x * y[i].x + x[i].y * y[i].y; \
  z[i].y = x[i].y * y[i].x - x[i].x * y[i].y,
 conjugate_multiply)

def conjugate_multiply(x, y, z, add_timer=None, stream=None):
Compute ``z = conj(x) * y``.
assert x.shape == y.shape

func = get_conjugate_multiply_kernel(x.dtype, y.dtype, z.dtype)
func.set_block_shape(*x._block)

if add_timer is not None:
add_timer(3*x.size, func.prepared_timed_call(x._grid, 
  x.gpudata, y.gpudata, z.gpudata, x.mem_size))
else:
func.prepared_async_call(x._grid, stream,
  x.gpudata, y.gpudata, z.gpudata, x.mem_size)


# divide_by_scalar #

@memoize
def get_divide_by_scalar_kernel():
 return get_elwise_kernel(
 float2 *x, float y, float2 *z,
   

Re: [PyCUDA] My modification to PyCUDA

2009-12-14 Thread Ying Wai (Daniel) Fan

Hi Andreas,

 def platform_bits():
-from struct import calcsize
-return calcsize('l') * 8
+return tuple.__itemsize__ * 8

Why is this better?
  

I did not make any change there. It must be from older version of pycuda.


+elif dtype == numpy.complex64:
+return float2

This is incorrect.
  
float2 is the structure defined in CUDA. It has an x and a y 
fields. This structure is used in CUFFT to represent complex numbers. 
These two additional lines are used for moving complex arrays to and 
from GPUs, and for my CUFFT and complex arithmetics wrapper.



+elif tp in [bool]: return ?

? is not a documented type character in Python's struct module.
  
It is documented in the table here 
http://docs.python.org/library/struct.html

and also docstring of numpy.bool_


I have also written some codes for doing Fast Fourier transform and
complex number arithmetics with PyCUDA. If you think these can be
included in PyCUDA, I can send you these files too.



Please do, I think this could be very useful.
  

Should I send these codes as attachments or inline to the mailing list?

Daniel

___
PyCUDA mailing list
PyCUDA@tiker.net
http://tiker.net/mailman/listinfo/pycuda_tiker.net