Paul:

Thanks for your reply.

   Boost version 1.38 ( from following the example in the
wiki,http://wiki.tiker.net/PyCuda/Installation/Windows#Using_Visual_Studio_2008_.28alternative_on_January_2010.29)

   CUDA 3.0
   NVIDIA driver version 197.03 (quadro NVS 295,memory=256 megabytes)

Any help appreciated.

On Wed, Mar 24, 2010 at 12:03 AM, Paul Rigor (uci) <paul.ri...@uci.edu> wrote:
> What versions of boost, cuda and nvidia drivers do you currently have
> installed?
> Paul
>
> On Tue, Mar 23, 2010 at 8:44 PM, reckoner <recko...@gmail.com> wrote:
>>
>> I tried running test_driver.py and got the following errors:
>>
>> -----
>>
>> ============================= test session starts
>> ==============================
>> python: platform win32 -- Python 2.6.2 -- pytest-1.2.1
>> test object 1: test_driver.py
>>
>> test_driver.py ..F..F.FF.F....F..
>>
>> =================================== FAILURES
>> ===================================
>> ___________________________ TestDriver.test_gpuarray
>> ___________________________
>>
>>    def f(*args, **kwargs):
>>        import pycuda.driver
>>        # appears to be idempotent, i.e. no harm in calling it more than
>> once
>>        pycuda.driver.init()
>>
>>        ctx = make_default_context()
>>        try:
>>            assert isinstance(ctx.get_device().name(), str)
>>            assert isinstance(ctx.get_device().compute_capability(), tuple)
>>            assert isinstance(ctx.get_device().get_attributes(), dict)
>> >           inner_f(*args, **kwargs)
>>
>> c:\python26\lib\site-packages\pycuda\tools.py:496:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> self = <test_driver.TestDriver instance at 0x0110D418>
>>
>>    @mark_cuda_test
>>    def test_gpuarray(self):
>>        import numpy
>>        a = numpy.arange(200000, dtype=numpy.float32)
>>        b = a + 17
>>        import pycuda.gpuarray as gpuarray
>>        a_g = gpuarray.to_gpu(a)
>>        b_g = gpuarray.to_gpu(b)
>> >       diff = (a_g-3*b_g+(-a_g)).get() - (a-3*b+(-a))
>>
>> test_driver.py:139:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> self = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in
>> repr()] SafeRepr object at 0xf07058>
>> other = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in
>> repr()] SafeRepr object at 0xf07058>
>>
>>    def __sub__(self, other):
>>        """Substract an array from an array or a scalar from an array."""
>>
>>        if isinstance(other, GPUArray):
>>            result = self._new_like_me(_get_common_dtype(self, other))
>> >           return self._axpbyz(1, other, -1, result)
>>
>> c:\python26\lib\site-packages\pycuda\gpuarray.py:241:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> self = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in
>> repr()] SafeRepr object at 0xf07058>
>> selffac = 1
>> other = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in
>> repr()] SafeRepr object at 0xf07058>
>> otherfac = -1
>> out = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in repr()]
>> SafeRepr object at 0xf07058>
>> add_timer = None, stream = None
>>
>>    def _axpbyz(self, selffac, other, otherfac, out, add_timer=None,
>> stream=None):
>>        """Compute ``out = selffac * self + otherfac*other``,
>>            where `other` is a vector.."""
>>        assert self.shape == other.shape
>>
>> >       func = elementwise.get_axpbyz_kernel(self.dtype, other.dtype,
>> > out.dtype)
>>
>> c:\python26\lib\site-packages\pycuda\gpuarray.py:144:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> dtype_x = dtype('float32'), dtype_y = dtype('float32')
>> dtype_z = dtype('float32')
>>
>> >   ???
>>
>> <string>:1:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> func = <function get_axpbyz_kernel at 0x00E5F430>
>>
>>    @decorator
>>    def context_dependent_memoize(func, *args):
>>        try:
>>            ctx_dict = func._pycuda_ctx_dep_memoize_dic
>>        except AttributeError:
>>            # FIXME: This may keep contexts alive longer than desired.
>>            # But I guess since the memory in them is freed, who cares.
>>            ctx_dict = func._pycuda_ctx_dep_memoize_dic = {}
>>
>>        cur_ctx = cuda.Context.get_current()
>>
>>        try:
>>            return ctx_dict[cur_ctx][args]
>>        except KeyError:
>>            arg_dict = ctx_dict.setdefault(cur_ctx, {})
>> >           result = func(*args)
>>
>> c:\python26\lib\site-packages\pycuda\tools.py:478:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> dtype_x = dtype('float32'), dtype_y = dtype('float32')
>> dtype_z = dtype('float32')
>>
>>    @context_dependent_memoize
>>    def get_axpbyz_kernel(dtype_x, dtype_y, dtype_z):
>>        return get_elwise_kernel(
>>                "%(tp_x)s a, %(tp_x)s *x, %(tp_y)s b, %(tp_y)s *y, %(tp_z)s
>> *z" % {
>>                    "tp_x": dtype_to_ctype(dtype_x),
>>                    "tp_y": dtype_to_ctype(dtype_y),
>>                    "tp_z": dtype_to_ctype(dtype_z),
>>                    },
>>                "z[i] = a*x[i] + b*y[i]",
>> >               "axpbyz")
>>
>> c:\python26\lib\site-packages\pycuda\elementwise.py:313:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> arguments = 'float a, float *x, float b, float *y, float *z'
>> operation = 'z[i] = a*x[i] + b*y[i]', name = 'axpbyz', keep = False
>> options = []
>>
>>    def get_elwise_kernel(arguments, operation,
>>            name="kernel", keep=False, options=[], **kwargs):
>>        """Return a L{pycuda.driver.Function} that performs the same scalar
>> operation
>>        on one or several vectors.
>>        """
>>        func, arguments = get_elwise_kernel_and_types(
>> >               arguments, operation, name, keep, options, **kwargs)
>>
>> c:\python26\lib\site-packages\pycuda\elementwise.py:97:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> arguments = [ScalarArg('a', float32), VectorArg('x', float32),
>> ScalarArg('b', float32), VectorArg('y', float32), VectorArg('z', float32),
>> ScalarArg('n', uint32)]
>> operation = 'z[i] = a*x[i] + b*y[i]', name = 'axpbyz', keep = False
>> options = []
>>
>>    def get_elwise_kernel_and_types(arguments, operation,
>>            name="kernel", keep=False, options=[], **kwargs):
>>        if isinstance(arguments, str):
>>            from pycuda.tools import parse_c_arg
>>            arguments = [parse_c_arg(arg) for arg in arguments.split(",")]
>>
>>        arguments.append(ScalarArg(numpy.uintp, "n"))
>>
>>        mod = get_elwise_module(arguments, operation, name,
>> >               keep, options, **kwargs)
>>
>> c:\python26\lib\site-packages\pycuda\elementwise.py:83:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> arguments = [ScalarArg('a', float32), VectorArg('x', float32),
>> ScalarArg('b', float32), VectorArg('y', float32), VectorArg('z', float32),
>> ScalarArg('n', uint32)]
>> operation = 'z[i] = a*x[i] + b*y[i]', name = 'axpbyz', keep = False
>> options = [], preamble = '', loop_prep = '', after_loop = ''
>>
>>    def get_elwise_module(arguments, operation,
>>            name="kernel", keep=False, options=[],
>>            preamble="", loop_prep="", after_loop=""):
>>        from pycuda.compiler import SourceModule
>>        return SourceModule("""
>>            %(preamble)s
>>
>>            __global__ void %(name)s(%(arguments)s)
>>            {
>>
>>              unsigned tid = threadIdx.x;
>>              unsigned total_threads = gridDim.x*blockDim.x;
>>              unsigned cta_start = blockDim.x*blockIdx.x;
>>              unsigned i;
>>
>>              %(loop_prep)s;
>>
>>              for (i = cta_start + tid; i < n; i += total_threads)
>>              {
>>                %(operation)s;
>>              }
>>
>>              %(after_loop)s;
>>            }
>>            """ % {
>>                "arguments": ", ".join(arg.declarator() for arg in
>> arguments),
>>                "operation": operation,
>>                "name": name,
>>                "preamble": preamble,
>>                "loop_prep": loop_prep,
>>                "after_loop": after_loop,
>>                },
>> >           options=options, keep=keep)
>>
>>    def get_elwise_kernel_and_types(arguments, operation,
>>            name="kernel", keep=False, options=[], **kwargs):
>>
>> c:\python26\lib\site-packages\pycuda\elementwise.py:72:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> self = <pycuda.compiler.SourceModule object at 0x01115C50>
>> source = '\n        \n\n        __global__ void axpbyz(float a, float *x,
>> float b, float *y, float *z, unsigned int n)\n       ...i +=
>> total_threads)\n          {\n            z[i] = a*x[i] + b*y[i];\n     }\n\n
>>          ;\n        }\n        '
>> nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch =
>> None
>> code = None, cache_dir = None, include_dirs = []
>>
>>    def __init__(self, source, nvcc="nvcc", options=[], keep=False,
>>            no_extern_c=False, arch=None, code=None, cache_dir=None,
>>            include_dirs=[]):
>>        if arch is not None:
>>            try:
>>                from pycuda.driver import Context
>>                capability = Context.get_device().compute_capability()
>>                if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
>>                    from warnings import warn
>>                    warn("trying to compile for a compute capability "
>>                            "higher than selected GPU")
>>            except:
>>                pass
>>
>>        cubin = compile(source, nvcc, options, keep, no_extern_c,
>>                arch, code, cache_dir, include_dirs)
>>
>>        from pycuda.driver import module_from_buffer
>> >       self.module = module_from_buffer(cubin)
>> E       LogicError: cuModuleLoadDataEx failed: invalid image -
>>
>> c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
>> __________________________ TestDriver.test_3d_texture
>> __________________________
>>
>>    def f(*args, **kwargs):
>>        import pycuda.driver
>>        # appears to be idempotent, i.e. no harm in calling it more than
>> once
>>        pycuda.driver.init()
>>
>>        ctx = make_default_context()
>>        try:
>>            assert isinstance(ctx.get_device().name(), str)
>>            assert isinstance(ctx.get_device().compute_capability(), tuple)
>>            assert isinstance(ctx.get_device().get_attributes(), dict)
>> >           inner_f(*args, **kwargs)
>>
>> c:\python26\lib\site-packages\pycuda\tools.py:496:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> self = <test_driver.TestDriver instance at 0x0263EE18>
>>
>>    @mark_cuda_test
>>    def test_3d_texture(self):
>>        # adapted from code by Nicolas Pinto
>>        w = 2
>>        h = 4
>>        d = 8
>>        shape = (w, h, d)
>>
>>        a = numpy.asarray(
>>                numpy.random.randn(*shape),
>>                dtype=numpy.float32, order="F")
>>
>>        descr = drv.ArrayDescriptor3D()
>>        descr.width = w
>>        descr.height = h
>>        descr.depth = d
>>        descr.format = drv.dtype_to_array_format(a.dtype)
>>        descr.num_channels = 1
>>        descr.flags = 0
>>
>>        ary = drv.Array(descr)
>>
>>        copy = drv.Memcpy3D()
>>        copy.set_src_host(a)
>>        copy.set_dst_array(ary)
>>        copy.width_in_bytes = copy.src_pitch = a.strides[1]
>>        copy.src_height = copy.height = h
>>        copy.depth = d
>>
>>        copy()
>>
>>        mod = SourceModule("""
>>            texture<float, 3, cudaReadModeElementType> mtx_tex;
>>
>>            __global__ void copy_texture(float *dest)
>>            {
>>              int x = threadIdx.x;
>>              int y = threadIdx.y;
>>              int z = threadIdx.z;
>>              int dx = blockDim.x;
>>              int dy = blockDim.y;
>>              int i = (z*dy + y)*dx + x;
>>              dest[i] = tex3D(mtx_tex, x, y, z);
>>              //dest[i] = x;
>>            }
>> >           """)
>>
>>        copy_texture = mod.get_function("copy_texture")
>>        mtx_tex = mod.get_texref("mtx_tex")
>>
>>        mtx_tex.set_array(ary)
>>
>>        dest = numpy.zeros(shape, dtype=numpy.float32, order="F")
>>        copy_texture(drv.Out(dest), block=shape, texrefs=[mtx_tex])
>>        assert la.norm(dest-a) == 0
>>
>>    @mark_cuda_test
>>    def test_prepared_invocation(self):
>>
>> test_driver.py:421:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> self = <pycuda.compiler.SourceModule object at 0x01121410>
>> source = '\n        texture<float, 3, cudaReadModeElementType>
>> mtx_tex;\n\n        __global__ void copy_texture(float *dest)\n ...nt i =
>> (z*dy + y)*dx + x;\n          dest[i] = tex3D(mtx_tex, x, y, z);\n
>> //dest[i] = x;\n        }\n        '
>> nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch =
>> None
>> code = None, cache_dir = None, include_dirs = []
>>
>>    def __init__(self, source, nvcc="nvcc", options=[], keep=False,
>>            no_extern_c=False, arch=None, code=None, cache_dir=None,
>>            include_dirs=[]):
>>        if arch is not None:
>>            try:
>>                from pycuda.driver import Context
>>                capability = Context.get_device().compute_capability()
>>                if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
>>                    from warnings import warn
>>                    warn("trying to compile for a compute capability "
>>                            "higher than selected GPU")
>>            except:
>>                pass
>>
>>        cubin = compile(source, nvcc, options, keep, no_extern_c,
>>                arch, code, cache_dir, include_dirs)
>>
>>        from pycuda.driver import module_from_buffer
>> >       self.module = module_from_buffer(cubin)
>> E       LogicError: cuModuleLoadDataEx failed: invalid image -
>>
>> c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
>> ___________________ TestDriver.test_multichannel_2d_texture
>> ____________________
>>
>>    def f(*args, **kwargs):
>>        import pycuda.driver
>>        # appears to be idempotent, i.e. no harm in calling it more than
>> once
>>        pycuda.driver.init()
>>
>>        ctx = make_default_context()
>>        try:
>>            assert isinstance(ctx.get_device().name(), str)
>>            assert isinstance(ctx.get_device().compute_capability(), tuple)
>>            assert isinstance(ctx.get_device().get_attributes(), dict)
>> >           inner_f(*args, **kwargs)
>>
>> c:\python26\lib\site-packages\pycuda\tools.py:496:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> self = <test_driver.TestDriver instance at 0x0263E710>
>>
>>    @mark_cuda_test
>>    def test_multichannel_2d_texture(self):
>>        mod = SourceModule("""
>>            #define CHANNELS 4
>>            texture<float4, 2, cudaReadModeElementType> mtx_tex;
>>
>>            __global__ void copy_texture(float *dest)
>>            {
>>              int row = threadIdx.x;
>>              int col = threadIdx.y;
>>              int w = blockDim.y;
>>              float4 texval = tex2D(mtx_tex, row, col);
>>              dest[(row*w+col)*CHANNELS + 0] = texval.x;
>>              dest[(row*w+col)*CHANNELS + 1] = texval.y;
>>              dest[(row*w+col)*CHANNELS + 2] = texval.z;
>>              dest[(row*w+col)*CHANNELS + 3] = texval.w;
>>            }
>> >           """)
>>
>>        copy_texture = mod.get_function("copy_texture")
>>        mtx_tex = mod.get_texref("mtx_tex")
>>
>>        shape = (5,6)
>>        channels = 4
>>        a = numpy.asarray(
>>                numpy.random.randn(*((channels,)+shape)),
>>                dtype=numpy.float32, order="F")
>>        drv.bind_array_to_texref(
>>            drv.make_multichannel_2d_array(a, order="F"), mtx_tex)
>>
>>        dest = numpy.zeros(shape+(channels,), dtype=numpy.float32)
>>        copy_texture(drv.Out(dest),
>>                block=shape+(1,),
>>                texrefs=[mtx_tex]
>>                )
>>        reshaped_a = a.transpose(1,2,0)
>>        #print reshaped_a
>>        #print dest
>>        assert la.norm(dest-reshaped_a) == 0
>>
>>    @mark_cuda_test
>>    def test_multichannel_linear_texture(self):
>>
>> test_driver.py:238:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> self = <pycuda.compiler.SourceModule object at 0x0111BEF0>
>> source = '\n        #define CHANNELS 4\n        texture<float4, 2,
>> cudaReadModeElementType> mtx_tex;\n\n        __global__ void...
>> dest[(row*w+col)*CHANNELS + 2] = texval.z;\n dest[(row*w+col)*CHANNELS + 3]
>> = texval.w;\n        }\n        '
>> nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch =
>> None
>> code = None, cache_dir = None, include_dirs = []
>>
>>    def __init__(self, source, nvcc="nvcc", options=[], keep=False,
>>            no_extern_c=False, arch=None, code=None, cache_dir=None,
>>            include_dirs=[]):
>>        if arch is not None:
>>            try:
>>                from pycuda.driver import Context
>>                capability = Context.get_device().compute_capability()
>>                if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
>>                    from warnings import warn
>>                    warn("trying to compile for a compute capability "
>>                            "higher than selected GPU")
>>            except:
>>                pass
>>
>>        cubin = compile(source, nvcc, options, keep, no_extern_c,
>>                arch, code, cache_dir, include_dirs)
>>
>>        from pycuda.driver import module_from_buffer
>> >       self.module = module_from_buffer(cubin)
>> E       LogicError: cuModuleLoadDataEx failed: invalid image -
>>
>> c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
>> __________________________ TestDriver.test_2d_texture
>> __________________________
>>
>>    def f(*args, **kwargs):
>>        import pycuda.driver
>>        # appears to be idempotent, i.e. no harm in calling it more than
>> once
>>        pycuda.driver.init()
>>
>>        ctx = make_default_context()
>>        try:
>>            assert isinstance(ctx.get_device().name(), str)
>>            assert isinstance(ctx.get_device().compute_capability(), tuple)
>>            assert isinstance(ctx.get_device().get_attributes(), dict)
>> >           inner_f(*args, **kwargs)
>>
>> c:\python26\lib\site-packages\pycuda\tools.py:496:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> self = <test_driver.TestDriver instance at 0x02627F30>
>>
>>    @mark_cuda_test
>>    def test_2d_texture(self):
>>        mod = SourceModule("""
>>            texture<float, 2, cudaReadModeElementType> mtx_tex;
>>
>>            __global__ void copy_texture(float *dest)
>>            {
>>              int row = threadIdx.x;
>>              int col = threadIdx.y;
>>              int w = blockDim.y;
>>              dest[row*w+col] = tex2D(mtx_tex, row, col);
>>            }
>> >           """)
>>
>>        copy_texture = mod.get_function("copy_texture")
>>        mtx_tex = mod.get_texref("mtx_tex")
>>
>>        shape = (3,4)
>>        a = numpy.random.randn(*shape).astype(numpy.float32)
>>        drv.matrix_to_texref(a, mtx_tex, order="F")
>>
>>        dest = numpy.zeros(shape, dtype=numpy.float32)
>>        copy_texture(drv.Out(dest),
>>                block=shape+(1,),
>>                texrefs=[mtx_tex]
>>                )
>>        assert la.norm(dest-a) == 0
>>
>>    @mark_cuda_test
>>    def test_multiple_2d_textures(self):
>>
>> test_driver.py:170:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> self = <pycuda.compiler.SourceModule object at 0x01115C90>
>> source = '\n        texture<float, 2, cudaReadModeElementType>
>> mtx_tex;\n\n        __global__ void copy_texture(float *dest)\n
>> ...hreadIdx.y;\n          int w = blockDim.y;\n          dest[row*w+col] =
>> tex2D(mtx_tex, row, col);\n        }\n        '
>> nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch =
>> None
>> code = None, cache_dir = None, include_dirs = []
>>
>>    def __init__(self, source, nvcc="nvcc", options=[], keep=False,
>>            no_extern_c=False, arch=None, code=None, cache_dir=None,
>>            include_dirs=[]):
>>        if arch is not None:
>>            try:
>>                from pycuda.driver import Context
>>                capability = Context.get_device().compute_capability()
>>                if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
>>                    from warnings import warn
>>                    warn("trying to compile for a compute capability "
>>                            "higher than selected GPU")
>>            except:
>>                pass
>>
>>        cubin = compile(source, nvcc, options, keep, no_extern_c,
>>                arch, code, cache_dir, include_dirs)
>>
>>        from pycuda.driver import module_from_buffer
>> >       self.module = module_from_buffer(cubin)
>> E       LogicError: cuModuleLoadDataEx failed: invalid image -
>>
>> c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
>> _____________________ TestDriver.test_multiple_2d_textures
>> _____________________
>>
>>    def f(*args, **kwargs):
>>        import pycuda.driver
>>        # appears to be idempotent, i.e. no harm in calling it more than
>> once
>>        pycuda.driver.init()
>>
>>        ctx = make_default_context()
>>        try:
>>            assert isinstance(ctx.get_device().name(), str)
>>            assert isinstance(ctx.get_device().compute_capability(), tuple)
>>            assert isinstance(ctx.get_device().get_attributes(), dict)
>> >           inner_f(*args, **kwargs)
>>
>> c:\python26\lib\site-packages\pycuda\tools.py:496:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> self = <test_driver.TestDriver instance at 0x02645620>
>>
>>    @mark_cuda_test
>>    def test_multiple_2d_textures(self):
>>        mod = SourceModule("""
>>            texture<float, 2, cudaReadModeElementType> mtx_tex;
>>            texture<float, 2, cudaReadModeElementType> mtx2_tex;
>>
>>            __global__ void copy_texture(float *dest)
>>            {
>>              int row = threadIdx.x;
>>              int col = threadIdx.y;
>>              int w = blockDim.y;
>>              dest[row*w+col] =
>>                  tex2D(mtx_tex, row, col)
>>                  +
>>                  tex2D(mtx2_tex, row, col);
>>            }
>> >           """)
>>
>>        copy_texture = mod.get_function("copy_texture")
>>        mtx_tex = mod.get_texref("mtx_tex")
>>        mtx2_tex = mod.get_texref("mtx2_tex")
>>
>>        shape = (3,4)
>>        a = numpy.random.randn(*shape).astype(numpy.float32)
>>        b = numpy.random.randn(*shape).astype(numpy.float32)
>>        drv.matrix_to_texref(a, mtx_tex, order="F")
>>        drv.matrix_to_texref(b, mtx2_tex, order="F")
>>
>>        dest = numpy.zeros(shape, dtype=numpy.float32)
>>        copy_texture(drv.Out(dest),
>>                block=shape+(1,),
>>                texrefs=[mtx_tex, mtx2_tex]
>>                )
>>        assert la.norm(dest-a-b) < 1e-6
>>
>>    @mark_cuda_test
>>    def test_multichannel_2d_texture(self):
>>
>> test_driver.py:202:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> self = <pycuda.compiler.SourceModule object at 0x011215F0>
>> source = '\n        texture<float, 2, cudaReadModeElementType> mtx_tex;\n
>>        texture<float, 2, cudaReadModeElementType> mtx...
>>  tex2D(mtx_tex, row, col)\n              +\n tex2D(mtx2_tex, row, col);\n
>>      }\n        '
>> nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch =
>> None
>> code = None, cache_dir = None, include_dirs = []
>>
>>    def __init__(self, source, nvcc="nvcc", options=[], keep=False,
>>            no_extern_c=False, arch=None, code=None, cache_dir=None,
>>            include_dirs=[]):
>>        if arch is not None:
>>            try:
>>                from pycuda.driver import Context
>>                capability = Context.get_device().compute_capability()
>>                if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
>>                    from warnings import warn
>>                    warn("trying to compile for a compute capability "
>>                            "higher than selected GPU")
>>            except:
>>                pass
>>
>>        cubin = compile(source, nvcc, options, keep, no_extern_c,
>>                arch, code, cache_dir, include_dirs)
>>
>>        from pycuda.driver import module_from_buffer
>> >       self.module = module_from_buffer(cubin)
>> E       LogicError: cuModuleLoadDataEx failed: invalid image -
>>
>> c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
>> _________________ TestDriver.test_multichannel_linear_texture
>> __________________
>>
>>    def f(*args, **kwargs):
>>        import pycuda.driver
>>        # appears to be idempotent, i.e. no harm in calling it more than
>> once
>>        pycuda.driver.init()
>>
>>        ctx = make_default_context()
>>        try:
>>            assert isinstance(ctx.get_device().name(), str)
>>            assert isinstance(ctx.get_device().compute_capability(), tuple)
>>            assert isinstance(ctx.get_device().get_attributes(), dict)
>> >           inner_f(*args, **kwargs)
>>
>> c:\python26\lib\site-packages\pycuda\tools.py:496:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> self = <test_driver.TestDriver instance at 0x02645710>
>>
>>    @mark_cuda_test
>>    def test_multichannel_linear_texture(self):
>>        mod = SourceModule("""
>>            #define CHANNELS 4
>>            texture<float4, 1, cudaReadModeElementType> mtx_tex;
>>
>>            __global__ void copy_texture(float *dest)
>>            {
>>              int i = threadIdx.x+blockDim.x*threadIdx.y;
>>              float4 texval = tex1Dfetch(mtx_tex, i);
>>              dest[i*CHANNELS + 0] = texval.x;
>>              dest[i*CHANNELS + 1] = texval.y;
>>              dest[i*CHANNELS + 2] = texval.z;
>>              dest[i*CHANNELS + 3] = texval.w;
>>            }
>> >           """)
>>
>>        copy_texture = mod.get_function("copy_texture")
>>        mtx_tex = mod.get_texref("mtx_tex")
>>
>>        shape = (16, 16)
>>        channels = 4
>>        a = numpy.random.randn(*(shape+(channels,))).astype(numpy.float32)
>>        a_gpu = drv.to_device(a)
>>        mtx_tex.set_address(a_gpu, a.nbytes)
>>        mtx_tex.set_format(drv.array_format.FLOAT, 4)
>>
>>        dest = numpy.zeros(shape+(channels,), dtype=numpy.float32)
>>        copy_texture(drv.Out(dest),
>>                block=shape+(1,),
>>                texrefs=[mtx_tex]
>>                )
>>        #print a
>>        #print dest
>>        assert la.norm(dest-a) == 0
>>
>>    @mark_cuda_test
>>    def test_large_smem(self):
>>
>> test_driver.py:276:
>> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
>> _ _ _
>>
>> self = <pycuda.compiler.SourceModule object at 0x0111BFD0>
>> source = '\n        #define CHANNELS 4\n        texture<float4, 1,
>> cudaReadModeElementType> mtx_tex;\n\n        __global__ void...texval.y;\n
>>        dest[i*CHANNELS + 2] = texval.z;\n dest[i*CHANNELS + 3] = texval.w;\n
>>        }\n        '
>> nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch =
>> None
>> code = None, cache_dir = None, include_dirs = []
>>
>>    def __init__(self, source, nvcc="nvcc", options=[], keep=False,
>>            no_extern_c=False, arch=None, code=None, cache_dir=None,
>>            include_dirs=[]):
>>        if arch is not None:
>>            try:
>>                from pycuda.driver import Context
>>                capability = Context.get_device().compute_capability()
>>                if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
>>                    from warnings import warn
>>                    warn("trying to compile for a compute capability "
>>                            "higher than selected GPU")
>>            except:
>>                pass
>>
>>        cubin = compile(source, nvcc, options, keep, no_extern_c,
>>                arch, code, cache_dir, include_dirs)
>>
>>        from pycuda.driver import module_from_buffer
>> >       self.module = module_from_buffer(cubin)
>> E       LogicError: cuModuleLoadDataEx failed: invalid image -
>>
>> c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError
>> ===================== 6 failed, 12 passed in 1.38 seconds
>> ======================
>>
>> -----
>>
>> any help appreciated.
>>
>> Thanks!
>>
>>
>>
>> _______________________________________________
>> PyCUDA mailing list
>> pyc...@host304.hostmonster.com
>> http://host304.hostmonster.com/mailman/listinfo/pycuda_tiker.net
>
>
>
> --
> Paul Rigor
> Pre-doctoral BIT Fellow and Graduate Student
> Institute for Genomics and Bioinformatics
> Donald Bren School of Information and Computer Sciences
> University of California, Irvine
> http://www.ics.uci.edu/~prigor
>

_______________________________________________
PyCUDA mailing list
pyc...@host304.hostmonster.com
http://host304.hostmonster.com/mailman/listinfo/pycuda_tiker.net

Reply via email to