I tried running test_driver.py and got the following errors:

-----

============================= test session starts ==============================
python: platform win32 -- Python 2.6.2 -- pytest-1.2.1
test object 1: test_driver.py

test_driver.py ..F..F.FF.F....F..

=================================== FAILURES =================================== ___________________________ TestDriver.test_gpuarray ___________________________

    def f(*args, **kwargs):
        import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more than once
        pycuda.driver.init()

        ctx = make_default_context()
        try:
            assert isinstance(ctx.get_device().name(), str)
            assert isinstance(ctx.get_device().compute_capability(), tuple)
            assert isinstance(ctx.get_device().get_attributes(), dict)
>           inner_f(*args, **kwargs)

c:\python26\lib\site-packages\pycuda\tools.py:496:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <test_driver.TestDriver instance at 0x0110D418>

    @mark_cuda_test
    def test_gpuarray(self):
        import numpy
        a = numpy.arange(200000, dtype=numpy.float32)
        b = a + 17
        import pycuda.gpuarray as gpuarray
        a_g = gpuarray.to_gpu(a)
        b_g = gpuarray.to_gpu(b)
>       diff = (a_g-3*b_g+(-a_g)).get() - (a-3*b+(-a))

test_driver.py:139:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in repr()] SafeRepr object at 0xf07058> other = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in repr()] SafeRepr object at 0xf07058>

    def __sub__(self, other):
        """Substract an array from an array or a scalar from an array."""

        if isinstance(other, GPUArray):
            result = self._new_like_me(_get_common_dtype(self, other))
>           return self._axpbyz(1, other, -1, result)

c:\python26\lib\site-packages\pycuda\gpuarray.py:241:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in repr()] SafeRepr object at 0xf07058>
selffac = 1
other = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in repr()] SafeRepr object at 0xf07058>
otherfac = -1
out = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in repr()] SafeRepr object at 0xf07058>
add_timer = None, stream = None

def _axpbyz(self, selffac, other, otherfac, out, add_timer=None, stream=None):
        """Compute ``out = selffac * self + otherfac*other``,
            where `other` is a vector.."""
        assert self.shape == other.shape

> func = elementwise.get_axpbyz_kernel(self.dtype, other.dtype, out.dtype)

c:\python26\lib\site-packages\pycuda\gpuarray.py:144:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

dtype_x = dtype('float32'), dtype_y = dtype('float32')
dtype_z = dtype('float32')

>   ???

<string>:1:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

func = <function get_axpbyz_kernel at 0x00E5F430>

    @decorator
    def context_dependent_memoize(func, *args):
        try:
            ctx_dict = func._pycuda_ctx_dep_memoize_dic
        except AttributeError:
            # FIXME: This may keep contexts alive longer than desired.
            # But I guess since the memory in them is freed, who cares.
            ctx_dict = func._pycuda_ctx_dep_memoize_dic = {}

        cur_ctx = cuda.Context.get_current()

        try:
            return ctx_dict[cur_ctx][args]
        except KeyError:
            arg_dict = ctx_dict.setdefault(cur_ctx, {})
>           result = func(*args)

c:\python26\lib\site-packages\pycuda\tools.py:478:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

dtype_x = dtype('float32'), dtype_y = dtype('float32')
dtype_z = dtype('float32')

    @context_dependent_memoize
    def get_axpbyz_kernel(dtype_x, dtype_y, dtype_z):
        return get_elwise_kernel(
"%(tp_x)s a, %(tp_x)s *x, %(tp_y)s b, %(tp_y)s *y, %(tp_z)s *z" % {
                    "tp_x": dtype_to_ctype(dtype_x),
                    "tp_y": dtype_to_ctype(dtype_y),
                    "tp_z": dtype_to_ctype(dtype_z),
                    },
                "z[i] = a*x[i] + b*y[i]",
>               "axpbyz")

c:\python26\lib\site-packages\pycuda\elementwise.py:313:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

arguments = 'float a, float *x, float b, float *y, float *z'
operation = 'z[i] = a*x[i] + b*y[i]', name = 'axpbyz', keep = False
options = []

    def get_elwise_kernel(arguments, operation,
            name="kernel", keep=False, options=[], **kwargs):
"""Return a L{pycuda.driver.Function} that performs the same scalar operation
        on one or several vectors.
        """
        func, arguments = get_elwise_kernel_and_types(
>               arguments, operation, name, keep, options, **kwargs)

c:\python26\lib\site-packages\pycuda\elementwise.py:97:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

arguments = [ScalarArg('a', float32), VectorArg('x', float32), ScalarArg('b', float32), VectorArg('y', float32), VectorArg('z', float32), ScalarArg('n', uint32)]
operation = 'z[i] = a*x[i] + b*y[i]', name = 'axpbyz', keep = False
options = []

    def get_elwise_kernel_and_types(arguments, operation,
            name="kernel", keep=False, options=[], **kwargs):
        if isinstance(arguments, str):
            from pycuda.tools import parse_c_arg
            arguments = [parse_c_arg(arg) for arg in arguments.split(",")]

        arguments.append(ScalarArg(numpy.uintp, "n"))

        mod = get_elwise_module(arguments, operation, name,
>               keep, options, **kwargs)

c:\python26\lib\site-packages\pycuda\elementwise.py:83:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

arguments = [ScalarArg('a', float32), VectorArg('x', float32), ScalarArg('b', float32), VectorArg('y', float32), VectorArg('z', float32), ScalarArg('n', uint32)]
operation = 'z[i] = a*x[i] + b*y[i]', name = 'axpbyz', keep = False
options = [], preamble = '', loop_prep = '', after_loop = ''

    def get_elwise_module(arguments, operation,
            name="kernel", keep=False, options=[],
            preamble="", loop_prep="", after_loop=""):
        from pycuda.compiler import SourceModule
        return SourceModule("""
            %(preamble)s

            __global__ void %(name)s(%(arguments)s)
            {

              unsigned tid = threadIdx.x;
              unsigned total_threads = gridDim.x*blockDim.x;
              unsigned cta_start = blockDim.x*blockIdx.x;
              unsigned i;

              %(loop_prep)s;

              for (i = cta_start + tid; i < n; i += total_threads)
              {
                %(operation)s;
              }

              %(after_loop)s;
            }
            """ % {
"arguments": ", ".join(arg.declarator() for arg in arguments),
                "operation": operation,
                "name": name,
                "preamble": preamble,
                "loop_prep": loop_prep,
                "after_loop": after_loop,
                },
>           options=options, keep=keep)

    def get_elwise_kernel_and_types(arguments, operation,
            name="kernel", keep=False, options=[], **kwargs):

c:\python26\lib\site-packages\pycuda\elementwise.py:72:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <pycuda.compiler.SourceModule object at 0x01115C50>
source = '\n \n\n __global__ void axpbyz(float a, float *x, float b, float *y, float *z, unsigned int n)\n ...i += total_threads)\n {\n z[i] = a*x[i] + b*y[i];\n }\n\n ;\n }\n '
nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = None
code = None, cache_dir = None, include_dirs = []

    def __init__(self, source, nvcc="nvcc", options=[], keep=False,
            no_extern_c=False, arch=None, code=None, cache_dir=None,
            include_dirs=[]):
        if arch is not None:
            try:
                from pycuda.driver import Context
                capability = Context.get_device().compute_capability()
                if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
                    from warnings import warn
                    warn("trying to compile for a compute capability "
                            "higher than selected GPU")
            except:
                pass

        cubin = compile(source, nvcc, options, keep, no_extern_c,
                arch, code, cache_dir, include_dirs)

        from pycuda.driver import module_from_buffer
>       self.module = module_from_buffer(cubin)
E       LogicError: cuModuleLoadDataEx failed: invalid image -

c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
__________________________ TestDriver.test_3d_texture __________________________

    def f(*args, **kwargs):
        import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more than once
        pycuda.driver.init()

        ctx = make_default_context()
        try:
            assert isinstance(ctx.get_device().name(), str)
            assert isinstance(ctx.get_device().compute_capability(), tuple)
            assert isinstance(ctx.get_device().get_attributes(), dict)
>           inner_f(*args, **kwargs)

c:\python26\lib\site-packages\pycuda\tools.py:496:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <test_driver.TestDriver instance at 0x0263EE18>

    @mark_cuda_test
    def test_3d_texture(self):
        # adapted from code by Nicolas Pinto
        w = 2
        h = 4
        d = 8
        shape = (w, h, d)

        a = numpy.asarray(
                numpy.random.randn(*shape),
                dtype=numpy.float32, order="F")

        descr = drv.ArrayDescriptor3D()
        descr.width = w
        descr.height = h
        descr.depth = d
        descr.format = drv.dtype_to_array_format(a.dtype)
        descr.num_channels = 1
        descr.flags = 0

        ary = drv.Array(descr)

        copy = drv.Memcpy3D()
        copy.set_src_host(a)
        copy.set_dst_array(ary)
        copy.width_in_bytes = copy.src_pitch = a.strides[1]
        copy.src_height = copy.height = h
        copy.depth = d

        copy()

        mod = SourceModule("""
            texture<float, 3, cudaReadModeElementType> mtx_tex;

            __global__ void copy_texture(float *dest)
            {
              int x = threadIdx.x;
              int y = threadIdx.y;
              int z = threadIdx.z;
              int dx = blockDim.x;
              int dy = blockDim.y;
              int i = (z*dy + y)*dx + x;
              dest[i] = tex3D(mtx_tex, x, y, z);
              //dest[i] = x;
            }
>           """)

        copy_texture = mod.get_function("copy_texture")
        mtx_tex = mod.get_texref("mtx_tex")

        mtx_tex.set_array(ary)

        dest = numpy.zeros(shape, dtype=numpy.float32, order="F")
        copy_texture(drv.Out(dest), block=shape, texrefs=[mtx_tex])
        assert la.norm(dest-a) == 0

    @mark_cuda_test
    def test_prepared_invocation(self):

test_driver.py:421:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <pycuda.compiler.SourceModule object at 0x01121410>
source = '\n texture<float, 3, cudaReadModeElementType> mtx_tex;\n\n __global__ void copy_texture(float *dest)\n ...nt i = (z*dy + y)*dx + x;\n dest[i] = tex3D(mtx_tex, x, y, z);\n //dest[i] = x;\n }\n '
nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = None
code = None, cache_dir = None, include_dirs = []

    def __init__(self, source, nvcc="nvcc", options=[], keep=False,
            no_extern_c=False, arch=None, code=None, cache_dir=None,
            include_dirs=[]):
        if arch is not None:
            try:
                from pycuda.driver import Context
                capability = Context.get_device().compute_capability()
                if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
                    from warnings import warn
                    warn("trying to compile for a compute capability "
                            "higher than selected GPU")
            except:
                pass

        cubin = compile(source, nvcc, options, keep, no_extern_c,
                arch, code, cache_dir, include_dirs)

        from pycuda.driver import module_from_buffer
>       self.module = module_from_buffer(cubin)
E       LogicError: cuModuleLoadDataEx failed: invalid image -

c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
___________________ TestDriver.test_multichannel_2d_texture ____________________

    def f(*args, **kwargs):
        import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more than once
        pycuda.driver.init()

        ctx = make_default_context()
        try:
            assert isinstance(ctx.get_device().name(), str)
            assert isinstance(ctx.get_device().compute_capability(), tuple)
            assert isinstance(ctx.get_device().get_attributes(), dict)
>           inner_f(*args, **kwargs)

c:\python26\lib\site-packages\pycuda\tools.py:496:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <test_driver.TestDriver instance at 0x0263E710>

    @mark_cuda_test
    def test_multichannel_2d_texture(self):
        mod = SourceModule("""
            #define CHANNELS 4
            texture<float4, 2, cudaReadModeElementType> mtx_tex;

            __global__ void copy_texture(float *dest)
            {
              int row = threadIdx.x;
              int col = threadIdx.y;
              int w = blockDim.y;
              float4 texval = tex2D(mtx_tex, row, col);
              dest[(row*w+col)*CHANNELS + 0] = texval.x;
              dest[(row*w+col)*CHANNELS + 1] = texval.y;
              dest[(row*w+col)*CHANNELS + 2] = texval.z;
              dest[(row*w+col)*CHANNELS + 3] = texval.w;
            }
>           """)

        copy_texture = mod.get_function("copy_texture")
        mtx_tex = mod.get_texref("mtx_tex")

        shape = (5,6)
        channels = 4
        a = numpy.asarray(
                numpy.random.randn(*((channels,)+shape)),
                dtype=numpy.float32, order="F")
        drv.bind_array_to_texref(
            drv.make_multichannel_2d_array(a, order="F"), mtx_tex)

        dest = numpy.zeros(shape+(channels,), dtype=numpy.float32)
        copy_texture(drv.Out(dest),
                block=shape+(1,),
                texrefs=[mtx_tex]
                )
        reshaped_a = a.transpose(1,2,0)
        #print reshaped_a
        #print dest
        assert la.norm(dest-reshaped_a) == 0

    @mark_cuda_test
    def test_multichannel_linear_texture(self):

test_driver.py:238:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <pycuda.compiler.SourceModule object at 0x0111BEF0>
source = '\n #define CHANNELS 4\n texture<float4, 2, cudaReadModeElementType> mtx_tex;\n\n __global__ void... dest[(row*w+col)*CHANNELS + 2] = texval.z;\n dest[(row*w+col)*CHANNELS + 3] = texval.w;\n }\n '
nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = None
code = None, cache_dir = None, include_dirs = []

    def __init__(self, source, nvcc="nvcc", options=[], keep=False,
            no_extern_c=False, arch=None, code=None, cache_dir=None,
            include_dirs=[]):
        if arch is not None:
            try:
                from pycuda.driver import Context
                capability = Context.get_device().compute_capability()
                if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
                    from warnings import warn
                    warn("trying to compile for a compute capability "
                            "higher than selected GPU")
            except:
                pass

        cubin = compile(source, nvcc, options, keep, no_extern_c,
                arch, code, cache_dir, include_dirs)

        from pycuda.driver import module_from_buffer
>       self.module = module_from_buffer(cubin)
E       LogicError: cuModuleLoadDataEx failed: invalid image -

c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
__________________________ TestDriver.test_2d_texture __________________________

    def f(*args, **kwargs):
        import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more than once
        pycuda.driver.init()

        ctx = make_default_context()
        try:
            assert isinstance(ctx.get_device().name(), str)
            assert isinstance(ctx.get_device().compute_capability(), tuple)
            assert isinstance(ctx.get_device().get_attributes(), dict)
>           inner_f(*args, **kwargs)

c:\python26\lib\site-packages\pycuda\tools.py:496:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <test_driver.TestDriver instance at 0x02627F30>

    @mark_cuda_test
    def test_2d_texture(self):
        mod = SourceModule("""
            texture<float, 2, cudaReadModeElementType> mtx_tex;

            __global__ void copy_texture(float *dest)
            {
              int row = threadIdx.x;
              int col = threadIdx.y;
              int w = blockDim.y;
              dest[row*w+col] = tex2D(mtx_tex, row, col);
            }
>           """)

        copy_texture = mod.get_function("copy_texture")
        mtx_tex = mod.get_texref("mtx_tex")

        shape = (3,4)
        a = numpy.random.randn(*shape).astype(numpy.float32)
        drv.matrix_to_texref(a, mtx_tex, order="F")

        dest = numpy.zeros(shape, dtype=numpy.float32)
        copy_texture(drv.Out(dest),
                block=shape+(1,),
                texrefs=[mtx_tex]
                )
        assert la.norm(dest-a) == 0

    @mark_cuda_test
    def test_multiple_2d_textures(self):

test_driver.py:170:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <pycuda.compiler.SourceModule object at 0x01115C90>
source = '\n texture<float, 2, cudaReadModeElementType> mtx_tex;\n\n __global__ void copy_texture(float *dest)\n ...hreadIdx.y;\n int w = blockDim.y;\n dest[row*w+col] = tex2D(mtx_tex, row, col);\n }\n '
nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = None
code = None, cache_dir = None, include_dirs = []

    def __init__(self, source, nvcc="nvcc", options=[], keep=False,
            no_extern_c=False, arch=None, code=None, cache_dir=None,
            include_dirs=[]):
        if arch is not None:
            try:
                from pycuda.driver import Context
                capability = Context.get_device().compute_capability()
                if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
                    from warnings import warn
                    warn("trying to compile for a compute capability "
                            "higher than selected GPU")
            except:
                pass

        cubin = compile(source, nvcc, options, keep, no_extern_c,
                arch, code, cache_dir, include_dirs)

        from pycuda.driver import module_from_buffer
>       self.module = module_from_buffer(cubin)
E       LogicError: cuModuleLoadDataEx failed: invalid image -

c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
_____________________ TestDriver.test_multiple_2d_textures _____________________

    def f(*args, **kwargs):
        import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more than once
        pycuda.driver.init()

        ctx = make_default_context()
        try:
            assert isinstance(ctx.get_device().name(), str)
            assert isinstance(ctx.get_device().compute_capability(), tuple)
            assert isinstance(ctx.get_device().get_attributes(), dict)
>           inner_f(*args, **kwargs)

c:\python26\lib\site-packages\pycuda\tools.py:496:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <test_driver.TestDriver instance at 0x02645620>

    @mark_cuda_test
    def test_multiple_2d_textures(self):
        mod = SourceModule("""
            texture<float, 2, cudaReadModeElementType> mtx_tex;
            texture<float, 2, cudaReadModeElementType> mtx2_tex;

            __global__ void copy_texture(float *dest)
            {
              int row = threadIdx.x;
              int col = threadIdx.y;
              int w = blockDim.y;
              dest[row*w+col] =
                  tex2D(mtx_tex, row, col)
                  +
                  tex2D(mtx2_tex, row, col);
            }
>           """)

        copy_texture = mod.get_function("copy_texture")
        mtx_tex = mod.get_texref("mtx_tex")
        mtx2_tex = mod.get_texref("mtx2_tex")

        shape = (3,4)
        a = numpy.random.randn(*shape).astype(numpy.float32)
        b = numpy.random.randn(*shape).astype(numpy.float32)
        drv.matrix_to_texref(a, mtx_tex, order="F")
        drv.matrix_to_texref(b, mtx2_tex, order="F")

        dest = numpy.zeros(shape, dtype=numpy.float32)
        copy_texture(drv.Out(dest),
                block=shape+(1,),
                texrefs=[mtx_tex, mtx2_tex]
                )
        assert la.norm(dest-a-b) < 1e-6

    @mark_cuda_test
    def test_multichannel_2d_texture(self):

test_driver.py:202:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <pycuda.compiler.SourceModule object at 0x011215F0>
source = '\n texture<float, 2, cudaReadModeElementType> mtx_tex;\n texture<float, 2, cudaReadModeElementType> mtx... tex2D(mtx_tex, row, col)\n +\n tex2D(mtx2_tex, row, col);\n }\n '
nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = None
code = None, cache_dir = None, include_dirs = []

    def __init__(self, source, nvcc="nvcc", options=[], keep=False,
            no_extern_c=False, arch=None, code=None, cache_dir=None,
            include_dirs=[]):
        if arch is not None:
            try:
                from pycuda.driver import Context
                capability = Context.get_device().compute_capability()
                if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
                    from warnings import warn
                    warn("trying to compile for a compute capability "
                            "higher than selected GPU")
            except:
                pass

        cubin = compile(source, nvcc, options, keep, no_extern_c,
                arch, code, cache_dir, include_dirs)

        from pycuda.driver import module_from_buffer
>       self.module = module_from_buffer(cubin)
E       LogicError: cuModuleLoadDataEx failed: invalid image -

c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
_________________ TestDriver.test_multichannel_linear_texture __________________

    def f(*args, **kwargs):
        import pycuda.driver
# appears to be idempotent, i.e. no harm in calling it more than once
        pycuda.driver.init()

        ctx = make_default_context()
        try:
            assert isinstance(ctx.get_device().name(), str)
            assert isinstance(ctx.get_device().compute_capability(), tuple)
            assert isinstance(ctx.get_device().get_attributes(), dict)
>           inner_f(*args, **kwargs)

c:\python26\lib\site-packages\pycuda\tools.py:496:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <test_driver.TestDriver instance at 0x02645710>

    @mark_cuda_test
    def test_multichannel_linear_texture(self):
        mod = SourceModule("""
            #define CHANNELS 4
            texture<float4, 1, cudaReadModeElementType> mtx_tex;

            __global__ void copy_texture(float *dest)
            {
              int i = threadIdx.x+blockDim.x*threadIdx.y;
              float4 texval = tex1Dfetch(mtx_tex, i);
              dest[i*CHANNELS + 0] = texval.x;
              dest[i*CHANNELS + 1] = texval.y;
              dest[i*CHANNELS + 2] = texval.z;
              dest[i*CHANNELS + 3] = texval.w;
            }
>           """)

        copy_texture = mod.get_function("copy_texture")
        mtx_tex = mod.get_texref("mtx_tex")

        shape = (16, 16)
        channels = 4
        a = numpy.random.randn(*(shape+(channels,))).astype(numpy.float32)
        a_gpu = drv.to_device(a)
        mtx_tex.set_address(a_gpu, a.nbytes)
        mtx_tex.set_format(drv.array_format.FLOAT, 4)

        dest = numpy.zeros(shape+(channels,), dtype=numpy.float32)
        copy_texture(drv.Out(dest),
                block=shape+(1,),
                texrefs=[mtx_tex]
                )
        #print a
        #print dest
        assert la.norm(dest-a) == 0

    @mark_cuda_test
    def test_large_smem(self):

test_driver.py:276:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <pycuda.compiler.SourceModule object at 0x0111BFD0>
source = '\n #define CHANNELS 4\n texture<float4, 1, cudaReadModeElementType> mtx_tex;\n\n __global__ void...texval.y;\n dest[i*CHANNELS + 2] = texval.z;\n dest[i*CHANNELS + 3] = texval.w;\n }\n '
nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = None
code = None, cache_dir = None, include_dirs = []

    def __init__(self, source, nvcc="nvcc", options=[], keep=False,
            no_extern_c=False, arch=None, code=None, cache_dir=None,
            include_dirs=[]):
        if arch is not None:
            try:
                from pycuda.driver import Context
                capability = Context.get_device().compute_capability()
                if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
                    from warnings import warn
                    warn("trying to compile for a compute capability "
                            "higher than selected GPU")
            except:
                pass

        cubin = compile(source, nvcc, options, keep, no_extern_c,
                arch, code, cache_dir, include_dirs)

        from pycuda.driver import module_from_buffer
>       self.module = module_from_buffer(cubin)
E       LogicError: cuModuleLoadDataEx failed: invalid image -

c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
===================== 6 failed, 12 passed in 1.38 seconds ======================

-----

any help appreciated.

Thanks!



_______________________________________________
PyCUDA mailing list
pyc...@host304.hostmonster.com
http://host304.hostmonster.com/mailman/listinfo/pycuda_tiker.net

Reply via email to