I tried running test_driver.py and got the following errors:
-----
============================= test session starts
==============================
python: platform win32 -- Python 2.6.2 -- pytest-1.2.1
test object 1: test_driver.py
test_driver.py ..F..F.FF.F....F..
=================================== FAILURES
===================================
___________________________ TestDriver.test_gpuarray
___________________________
def f(*args, **kwargs):
import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more
than once
pycuda.driver.init()
ctx = make_default_context()
try:
assert isinstance(ctx.get_device().name(), str)
assert isinstance(ctx.get_device().compute_capability(), tuple)
assert isinstance(ctx.get_device().get_attributes(), dict)
> inner_f(*args, **kwargs)
c:\python26\lib\site-packages\pycuda\tools.py:496:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <test_driver.TestDriver instance at 0x0110D418>
@mark_cuda_test
def test_gpuarray(self):
import numpy
a = numpy.arange(200000, dtype=numpy.float32)
b = a + 17
import pycuda.gpuarray as gpuarray
a_g = gpuarray.to_gpu(a)
b_g = gpuarray.to_gpu(b)
> diff = (a_g-3*b_g+(-a_g)).get() - (a-3*b+(-a))
test_driver.py:139:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in
repr()] SafeRepr object at 0xf07058>
other = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in
repr()] SafeRepr object at 0xf07058>
def __sub__(self, other):
"""Substract an array from an array or a scalar from an array."""
if isinstance(other, GPUArray):
result = self._new_like_me(_get_common_dtype(self, other))
> return self._axpbyz(1, other, -1, result)
c:\python26\lib\site-packages\pycuda\gpuarray.py:241:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in
repr()] SafeRepr object at 0xf07058>
selffac = 1
other = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in
repr()] SafeRepr object at 0xf07058>
otherfac = -1
out = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in
repr()] SafeRepr object at 0xf07058>
add_timer = None, stream = None
def _axpbyz(self, selffac, other, otherfac, out, add_timer=None,
stream=None):
"""Compute ``out = selffac * self + otherfac*other``,
where `other` is a vector.."""
assert self.shape == other.shape
> func = elementwise.get_axpbyz_kernel(self.dtype, other.dtype,
out.dtype)
c:\python26\lib\site-packages\pycuda\gpuarray.py:144:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
dtype_x = dtype('float32'), dtype_y = dtype('float32')
dtype_z = dtype('float32')
> ???
<string>:1:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
func = <function get_axpbyz_kernel at 0x00E5F430>
@decorator
def context_dependent_memoize(func, *args):
try:
ctx_dict = func._pycuda_ctx_dep_memoize_dic
except AttributeError:
# FIXME: This may keep contexts alive longer than desired.
# But I guess since the memory in them is freed, who cares.
ctx_dict = func._pycuda_ctx_dep_memoize_dic = {}
cur_ctx = cuda.Context.get_current()
try:
return ctx_dict[cur_ctx][args]
except KeyError:
arg_dict = ctx_dict.setdefault(cur_ctx, {})
> result = func(*args)
c:\python26\lib\site-packages\pycuda\tools.py:478:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
dtype_x = dtype('float32'), dtype_y = dtype('float32')
dtype_z = dtype('float32')
@context_dependent_memoize
def get_axpbyz_kernel(dtype_x, dtype_y, dtype_z):
return get_elwise_kernel(
"%(tp_x)s a, %(tp_x)s *x, %(tp_y)s b, %(tp_y)s *y,
%(tp_z)s *z" % {
"tp_x": dtype_to_ctype(dtype_x),
"tp_y": dtype_to_ctype(dtype_y),
"tp_z": dtype_to_ctype(dtype_z),
},
"z[i] = a*x[i] + b*y[i]",
> "axpbyz")
c:\python26\lib\site-packages\pycuda\elementwise.py:313:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
arguments = 'float a, float *x, float b, float *y, float *z'
operation = 'z[i] = a*x[i] + b*y[i]', name = 'axpbyz', keep = False
options = []
def get_elwise_kernel(arguments, operation,
name="kernel", keep=False, options=[], **kwargs):
"""Return a L{pycuda.driver.Function} that performs the same
scalar operation
on one or several vectors.
"""
func, arguments = get_elwise_kernel_and_types(
> arguments, operation, name, keep, options, **kwargs)
c:\python26\lib\site-packages\pycuda\elementwise.py:97:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
arguments = [ScalarArg('a', float32), VectorArg('x', float32),
ScalarArg('b', float32), VectorArg('y', float32), VectorArg('z',
float32), ScalarArg('n', uint32)]
operation = 'z[i] = a*x[i] + b*y[i]', name = 'axpbyz', keep = False
options = []
def get_elwise_kernel_and_types(arguments, operation,
name="kernel", keep=False, options=[], **kwargs):
if isinstance(arguments, str):
from pycuda.tools import parse_c_arg
arguments = [parse_c_arg(arg) for arg in arguments.split(",")]
arguments.append(ScalarArg(numpy.uintp, "n"))
mod = get_elwise_module(arguments, operation, name,
> keep, options, **kwargs)
c:\python26\lib\site-packages\pycuda\elementwise.py:83:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
arguments = [ScalarArg('a', float32), VectorArg('x', float32),
ScalarArg('b', float32), VectorArg('y', float32), VectorArg('z',
float32), ScalarArg('n', uint32)]
operation = 'z[i] = a*x[i] + b*y[i]', name = 'axpbyz', keep = False
options = [], preamble = '', loop_prep = '', after_loop = ''
def get_elwise_module(arguments, operation,
name="kernel", keep=False, options=[],
preamble="", loop_prep="", after_loop=""):
from pycuda.compiler import SourceModule
return SourceModule("""
%(preamble)s
__global__ void %(name)s(%(arguments)s)
{
unsigned tid = threadIdx.x;
unsigned total_threads = gridDim.x*blockDim.x;
unsigned cta_start = blockDim.x*blockIdx.x;
unsigned i;
%(loop_prep)s;
for (i = cta_start + tid; i < n; i += total_threads)
{
%(operation)s;
}
%(after_loop)s;
}
""" % {
"arguments": ", ".join(arg.declarator() for arg in
arguments),
"operation": operation,
"name": name,
"preamble": preamble,
"loop_prep": loop_prep,
"after_loop": after_loop,
},
> options=options, keep=keep)
def get_elwise_kernel_and_types(arguments, operation,
name="kernel", keep=False, options=[], **kwargs):
c:\python26\lib\site-packages\pycuda\elementwise.py:72:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <pycuda.compiler.SourceModule object at 0x01115C50>
source = '\n \n\n __global__ void axpbyz(float a, float
*x, float b, float *y, float *z, unsigned int n)\n ...i +=
total_threads)\n {\n z[i] = a*x[i] + b*y[i];\n
}\n\n ;\n }\n '
nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = None
code = None, cache_dir = None, include_dirs = []
def __init__(self, source, nvcc="nvcc", options=[], keep=False,
no_extern_c=False, arch=None, code=None, cache_dir=None,
include_dirs=[]):
if arch is not None:
try:
from pycuda.driver import Context
capability = Context.get_device().compute_capability()
if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
from warnings import warn
warn("trying to compile for a compute capability "
"higher than selected GPU")
except:
pass
cubin = compile(source, nvcc, options, keep, no_extern_c,
arch, code, cache_dir, include_dirs)
from pycuda.driver import module_from_buffer
> self.module = module_from_buffer(cubin)
E LogicError: cuModuleLoadDataEx failed: invalid image -
c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
__________________________ TestDriver.test_3d_texture
__________________________
def f(*args, **kwargs):
import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more
than once
pycuda.driver.init()
ctx = make_default_context()
try:
assert isinstance(ctx.get_device().name(), str)
assert isinstance(ctx.get_device().compute_capability(), tuple)
assert isinstance(ctx.get_device().get_attributes(), dict)
> inner_f(*args, **kwargs)
c:\python26\lib\site-packages\pycuda\tools.py:496:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <test_driver.TestDriver instance at 0x0263EE18>
@mark_cuda_test
def test_3d_texture(self):
# adapted from code by Nicolas Pinto
w = 2
h = 4
d = 8
shape = (w, h, d)
a = numpy.asarray(
numpy.random.randn(*shape),
dtype=numpy.float32, order="F")
descr = drv.ArrayDescriptor3D()
descr.width = w
descr.height = h
descr.depth = d
descr.format = drv.dtype_to_array_format(a.dtype)
descr.num_channels = 1
descr.flags = 0
ary = drv.Array(descr)
copy = drv.Memcpy3D()
copy.set_src_host(a)
copy.set_dst_array(ary)
copy.width_in_bytes = copy.src_pitch = a.strides[1]
copy.src_height = copy.height = h
copy.depth = d
copy()
mod = SourceModule("""
texture<float, 3, cudaReadModeElementType> mtx_tex;
__global__ void copy_texture(float *dest)
{
int x = threadIdx.x;
int y = threadIdx.y;
int z = threadIdx.z;
int dx = blockDim.x;
int dy = blockDim.y;
int i = (z*dy + y)*dx + x;
dest[i] = tex3D(mtx_tex, x, y, z);
//dest[i] = x;
}
> """)
copy_texture = mod.get_function("copy_texture")
mtx_tex = mod.get_texref("mtx_tex")
mtx_tex.set_array(ary)
dest = numpy.zeros(shape, dtype=numpy.float32, order="F")
copy_texture(drv.Out(dest), block=shape, texrefs=[mtx_tex])
assert la.norm(dest-a) == 0
@mark_cuda_test
def test_prepared_invocation(self):
test_driver.py:421:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <pycuda.compiler.SourceModule object at 0x01121410>
source = '\n texture<float, 3, cudaReadModeElementType>
mtx_tex;\n\n __global__ void copy_texture(float *dest)\n ...nt i
= (z*dy + y)*dx + x;\n dest[i] = tex3D(mtx_tex, x, y, z);\n
//dest[i] = x;\n }\n '
nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = None
code = None, cache_dir = None, include_dirs = []
def __init__(self, source, nvcc="nvcc", options=[], keep=False,
no_extern_c=False, arch=None, code=None, cache_dir=None,
include_dirs=[]):
if arch is not None:
try:
from pycuda.driver import Context
capability = Context.get_device().compute_capability()
if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
from warnings import warn
warn("trying to compile for a compute capability "
"higher than selected GPU")
except:
pass
cubin = compile(source, nvcc, options, keep, no_extern_c,
arch, code, cache_dir, include_dirs)
from pycuda.driver import module_from_buffer
> self.module = module_from_buffer(cubin)
E LogicError: cuModuleLoadDataEx failed: invalid image -
c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
___________________ TestDriver.test_multichannel_2d_texture
____________________
def f(*args, **kwargs):
import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more
than once
pycuda.driver.init()
ctx = make_default_context()
try:
assert isinstance(ctx.get_device().name(), str)
assert isinstance(ctx.get_device().compute_capability(), tuple)
assert isinstance(ctx.get_device().get_attributes(), dict)
> inner_f(*args, **kwargs)
c:\python26\lib\site-packages\pycuda\tools.py:496:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <test_driver.TestDriver instance at 0x0263E710>
@mark_cuda_test
def test_multichannel_2d_texture(self):
mod = SourceModule("""
#define CHANNELS 4
texture<float4, 2, cudaReadModeElementType> mtx_tex;
__global__ void copy_texture(float *dest)
{
int row = threadIdx.x;
int col = threadIdx.y;
int w = blockDim.y;
float4 texval = tex2D(mtx_tex, row, col);
dest[(row*w+col)*CHANNELS + 0] = texval.x;
dest[(row*w+col)*CHANNELS + 1] = texval.y;
dest[(row*w+col)*CHANNELS + 2] = texval.z;
dest[(row*w+col)*CHANNELS + 3] = texval.w;
}
> """)
copy_texture = mod.get_function("copy_texture")
mtx_tex = mod.get_texref("mtx_tex")
shape = (5,6)
channels = 4
a = numpy.asarray(
numpy.random.randn(*((channels,)+shape)),
dtype=numpy.float32, order="F")
drv.bind_array_to_texref(
drv.make_multichannel_2d_array(a, order="F"), mtx_tex)
dest = numpy.zeros(shape+(channels,), dtype=numpy.float32)
copy_texture(drv.Out(dest),
block=shape+(1,),
texrefs=[mtx_tex]
)
reshaped_a = a.transpose(1,2,0)
#print reshaped_a
#print dest
assert la.norm(dest-reshaped_a) == 0
@mark_cuda_test
def test_multichannel_linear_texture(self):
test_driver.py:238:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <pycuda.compiler.SourceModule object at 0x0111BEF0>
source = '\n #define CHANNELS 4\n texture<float4, 2,
cudaReadModeElementType> mtx_tex;\n\n __global__ void...
dest[(row*w+col)*CHANNELS + 2] = texval.z;\n
dest[(row*w+col)*CHANNELS + 3] = texval.w;\n }\n '
nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = None
code = None, cache_dir = None, include_dirs = []
def __init__(self, source, nvcc="nvcc", options=[], keep=False,
no_extern_c=False, arch=None, code=None, cache_dir=None,
include_dirs=[]):
if arch is not None:
try:
from pycuda.driver import Context
capability = Context.get_device().compute_capability()
if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
from warnings import warn
warn("trying to compile for a compute capability "
"higher than selected GPU")
except:
pass
cubin = compile(source, nvcc, options, keep, no_extern_c,
arch, code, cache_dir, include_dirs)
from pycuda.driver import module_from_buffer
> self.module = module_from_buffer(cubin)
E LogicError: cuModuleLoadDataEx failed: invalid image -
c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
__________________________ TestDriver.test_2d_texture
__________________________
def f(*args, **kwargs):
import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more
than once
pycuda.driver.init()
ctx = make_default_context()
try:
assert isinstance(ctx.get_device().name(), str)
assert isinstance(ctx.get_device().compute_capability(), tuple)
assert isinstance(ctx.get_device().get_attributes(), dict)
> inner_f(*args, **kwargs)
c:\python26\lib\site-packages\pycuda\tools.py:496:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <test_driver.TestDriver instance at 0x02627F30>
@mark_cuda_test
def test_2d_texture(self):
mod = SourceModule("""
texture<float, 2, cudaReadModeElementType> mtx_tex;
__global__ void copy_texture(float *dest)
{
int row = threadIdx.x;
int col = threadIdx.y;
int w = blockDim.y;
dest[row*w+col] = tex2D(mtx_tex, row, col);
}
> """)
copy_texture = mod.get_function("copy_texture")
mtx_tex = mod.get_texref("mtx_tex")
shape = (3,4)
a = numpy.random.randn(*shape).astype(numpy.float32)
drv.matrix_to_texref(a, mtx_tex, order="F")
dest = numpy.zeros(shape, dtype=numpy.float32)
copy_texture(drv.Out(dest),
block=shape+(1,),
texrefs=[mtx_tex]
)
assert la.norm(dest-a) == 0
@mark_cuda_test
def test_multiple_2d_textures(self):
test_driver.py:170:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <pycuda.compiler.SourceModule object at 0x01115C90>
source = '\n texture<float, 2, cudaReadModeElementType>
mtx_tex;\n\n __global__ void copy_texture(float *dest)\n
...hreadIdx.y;\n int w = blockDim.y;\n dest[row*w+col]
= tex2D(mtx_tex, row, col);\n }\n '
nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = None
code = None, cache_dir = None, include_dirs = []
def __init__(self, source, nvcc="nvcc", options=[], keep=False,
no_extern_c=False, arch=None, code=None, cache_dir=None,
include_dirs=[]):
if arch is not None:
try:
from pycuda.driver import Context
capability = Context.get_device().compute_capability()
if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
from warnings import warn
warn("trying to compile for a compute capability "
"higher than selected GPU")
except:
pass
cubin = compile(source, nvcc, options, keep, no_extern_c,
arch, code, cache_dir, include_dirs)
from pycuda.driver import module_from_buffer
> self.module = module_from_buffer(cubin)
E LogicError: cuModuleLoadDataEx failed: invalid image -
c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
_____________________ TestDriver.test_multiple_2d_textures
_____________________
def f(*args, **kwargs):
import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more
than once
pycuda.driver.init()
ctx = make_default_context()
try:
assert isinstance(ctx.get_device().name(), str)
assert isinstance(ctx.get_device().compute_capability(), tuple)
assert isinstance(ctx.get_device().get_attributes(), dict)
> inner_f(*args, **kwargs)
c:\python26\lib\site-packages\pycuda\tools.py:496:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <test_driver.TestDriver instance at 0x02645620>
@mark_cuda_test
def test_multiple_2d_textures(self):
mod = SourceModule("""
texture<float, 2, cudaReadModeElementType> mtx_tex;
texture<float, 2, cudaReadModeElementType> mtx2_tex;
__global__ void copy_texture(float *dest)
{
int row = threadIdx.x;
int col = threadIdx.y;
int w = blockDim.y;
dest[row*w+col] =
tex2D(mtx_tex, row, col)
+
tex2D(mtx2_tex, row, col);
}
> """)
copy_texture = mod.get_function("copy_texture")
mtx_tex = mod.get_texref("mtx_tex")
mtx2_tex = mod.get_texref("mtx2_tex")
shape = (3,4)
a = numpy.random.randn(*shape).astype(numpy.float32)
b = numpy.random.randn(*shape).astype(numpy.float32)
drv.matrix_to_texref(a, mtx_tex, order="F")
drv.matrix_to_texref(b, mtx2_tex, order="F")
dest = numpy.zeros(shape, dtype=numpy.float32)
copy_texture(drv.Out(dest),
block=shape+(1,),
texrefs=[mtx_tex, mtx2_tex]
)
assert la.norm(dest-a-b) < 1e-6
@mark_cuda_test
def test_multichannel_2d_texture(self):
test_driver.py:202:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <pycuda.compiler.SourceModule object at 0x011215F0>
source = '\n texture<float, 2, cudaReadModeElementType>
mtx_tex;\n texture<float, 2, cudaReadModeElementType> mtx...
tex2D(mtx_tex, row, col)\n +\n
tex2D(mtx2_tex, row, col);\n }\n '
nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = None
code = None, cache_dir = None, include_dirs = []
def __init__(self, source, nvcc="nvcc", options=[], keep=False,
no_extern_c=False, arch=None, code=None, cache_dir=None,
include_dirs=[]):
if arch is not None:
try:
from pycuda.driver import Context
capability = Context.get_device().compute_capability()
if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
from warnings import warn
warn("trying to compile for a compute capability "
"higher than selected GPU")
except:
pass
cubin = compile(source, nvcc, options, keep, no_extern_c,
arch, code, cache_dir, include_dirs)
from pycuda.driver import module_from_buffer
> self.module = module_from_buffer(cubin)
E LogicError: cuModuleLoadDataEx failed: invalid image -
c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
_________________ TestDriver.test_multichannel_linear_texture
__________________
def f(*args, **kwargs):
import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more
than once
pycuda.driver.init()
ctx = make_default_context()
try:
assert isinstance(ctx.get_device().name(), str)
assert isinstance(ctx.get_device().compute_capability(), tuple)
assert isinstance(ctx.get_device().get_attributes(), dict)
> inner_f(*args, **kwargs)
c:\python26\lib\site-packages\pycuda\tools.py:496:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <test_driver.TestDriver instance at 0x02645710>
@mark_cuda_test
def test_multichannel_linear_texture(self):
mod = SourceModule("""
#define CHANNELS 4
texture<float4, 1, cudaReadModeElementType> mtx_tex;
__global__ void copy_texture(float *dest)
{
int i = threadIdx.x+blockDim.x*threadIdx.y;
float4 texval = tex1Dfetch(mtx_tex, i);
dest[i*CHANNELS + 0] = texval.x;
dest[i*CHANNELS + 1] = texval.y;
dest[i*CHANNELS + 2] = texval.z;
dest[i*CHANNELS + 3] = texval.w;
}
> """)
copy_texture = mod.get_function("copy_texture")
mtx_tex = mod.get_texref("mtx_tex")
shape = (16, 16)
channels = 4
a = numpy.random.randn(*(shape+(channels,))).astype(numpy.float32)
a_gpu = drv.to_device(a)
mtx_tex.set_address(a_gpu, a.nbytes)
mtx_tex.set_format(drv.array_format.FLOAT, 4)
dest = numpy.zeros(shape+(channels,), dtype=numpy.float32)
copy_texture(drv.Out(dest),
block=shape+(1,),
texrefs=[mtx_tex]
)
#print a
#print dest
assert la.norm(dest-a) == 0
@mark_cuda_test
def test_large_smem(self):
test_driver.py:276:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
_ _ _ _
self = <pycuda.compiler.SourceModule object at 0x0111BFD0>
source = '\n #define CHANNELS 4\n texture<float4, 1,
cudaReadModeElementType> mtx_tex;\n\n __global__
void...texval.y;\n dest[i*CHANNELS + 2] = texval.z;\n
dest[i*CHANNELS + 3] = texval.w;\n }\n '
nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = None
code = None, cache_dir = None, include_dirs = []
def __init__(self, source, nvcc="nvcc", options=[], keep=False,
no_extern_c=False, arch=None, code=None, cache_dir=None,
include_dirs=[]):
if arch is not None:
try:
from pycuda.driver import Context
capability = Context.get_device().compute_capability()
if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
from warnings import warn
warn("trying to compile for a compute capability "
"higher than selected GPU")
except:
pass
cubin = compile(source, nvcc, options, keep, no_extern_c,
arch, code, cache_dir, include_dirs)
from pycuda.driver import module_from_buffer
> self.module = module_from_buffer(cubin)
E LogicError: cuModuleLoadDataEx failed: invalid image -
c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
===================== 6 failed, 12 passed in 1.38 seconds
======================
-----
any help appreciated.
Thanks!
_______________________________________________
PyCUDA mailing list
pyc...@host304.hostmonster.com
http://host304.hostmonster.com/mailman/listinfo/pycuda_tiker.net