Hi,
I'm modifying Theano to allow it to use the code generated by pycuda.
While doing so I needed 2 modifications to pycuda.
1) elemwise1.patch: This modification allow to pass the block and grid
to the ElementwiseKernel generated fct. If not provided, it continue
as before.
2) tools1.patch: recognize the npy_[u]{int}[8,16,32,64] and
npy_float[32,64] data type.
Do you have any questions/comments about those patch?
I don't use the gpuarray class that are passed to the pycuda fct. I
modified mine to mimic its interface. While doing so, I saw that you
use the attribute size and mem_size that seam to always have the same
value? Is that true? If so, why both?
thanks
Frédéric Bastien
--- /home/bastienf/repos/pycuda.git/pycuda/elementwise.py 2010-06-14 13:32:19.000000000 -0400
+++ elementwise.py 2010-07-15 12:25:13.000000000 -0400
@@ -114,7 +114,7 @@
"ElementwiseKernel can only be used with functions that have at least one " \
"vector argument"
- def __call__(self, *args):
+ def __call__(self, *args, **kwargs):
vectors = []
invocation_args = []
@@ -127,8 +127,17 @@
repr_vec = vectors[0]
invocation_args.append(repr_vec.mem_size)
- self.func.set_block_shape(*repr_vec._block)
- self.func.prepared_call(repr_vec._grid, *invocation_args)
+ _block = kwargs.get('block')
+ if _block:
+ self.func.set_block_shape(*_block)
+ else:
+ self.func.set_block_shape(*repr_vec._block)
+
+ _grid = kwargs.get('grid')
+ if _grid:
+ self.func.prepared_call(_grid, *invocation_args)
+ else:
+ self.func.prepared_call(repr_vec._grid, *invocation_args)
--- /home/bastienf/repos/pycuda.git/pycuda/tools.py 2010-06-14 13:32:19.000000000 -0400
+++ tools.py 2010-07-15 10:25:10.000000000 -0400
@@ -432,12 +432,14 @@
tp = c_arg[:decl_match.start()]
tp = " ".join(tp.split())
- if tp == "float": dtype = numpy.float32
- elif tp == "double": dtype = numpy.float64
+ if tp in ["float", "npy_float32"]: dtype = numpy.float32
+ elif tp in ["double", "npy_float64"]: dtype = numpy.float64
elif tp == "pycuda::complex<float>": dtype = numpy.complex64
elif tp == "pycuda::complex<double>": dtype = numpy.complex128
- elif tp in ["int", "signed int"]: dtype = numpy.int32
- elif tp in ["unsigned", "unsigned int"]: dtype = numpy.uint32
+ elif tp in ["int", "signed int", "npy_int32"]: dtype = numpy.int32
+ elif tp in ["unsigned", "unsigned int", "npy_uint32", "npy_ucs4"]: dtype = numpy.uint32
+ elif tp in ["npy_int64"]: dtype = numpy.int64
+ elif tp in ["npy_uint64"]: dtype = numpy.uint64
elif tp in ["long", "long int"]:
if platform_bits() == 64:
dtype = numpy.int64
@@ -448,12 +450,14 @@
dtype = numpy.uint64
else:
dtype = numpy.uint32
- elif tp in ["short", "short int"]: dtype = numpy.int16
- elif tp in ["unsigned short", "unsigned short int"]: dtype = numpy.uint16
- elif tp in ["char"]: dtype = numpy.int8
- elif tp in ["unsigned char"]: dtype = numpy.uint8
+ elif tp in ["short", "short int", "npy_int16"]: dtype = numpy.int16
+ elif tp in ["unsigned short", "unsigned short int", "npy_uint16"]: dtype = numpy.uint16
+ elif tp in ["char", "npy_int8"]: dtype = numpy.int8
+ elif tp in ["unsigned char", "npy_uint8"]: dtype = numpy.uint8
elif tp in ["bool"]: dtype = numpy.bool
- else: raise ValueError, "unknown type '%s'" % tp
+ else:
+ import pdb;pdb.set_trace()
+ raise ValueError, "unknown type '%s'" % tp
return arg_class(dtype, name)
_______________________________________________
PyCUDA mailing list
[email protected]
http://lists.tiker.net/listinfo/pycuda