Paul: Thanks for your reply.
Boost version 1.38 ( from following the example in the wiki,http://wiki.tiker.net/PyCuda/Installation/Windows#Using_Visual_Studio_2008_.28alternative_on_January_2010.29) CUDA 3.0 NVIDIA driver version 197.03 (quadro NVS 295,memory=256 megabytes) Any help appreciated. On Wed, Mar 24, 2010 at 12:03 AM, Paul Rigor (uci) <paul.ri...@uci.edu> wrote: > What versions of boost, cuda and nvidia drivers do you currently have > installed? > Paul > > On Tue, Mar 23, 2010 at 8:44 PM, reckoner <recko...@gmail.com> wrote: >> >> I tried running test_driver.py and got the following errors: >> >> ----- >> >> ============================= test session starts >> ============================== >> python: platform win32 -- Python 2.6.2 -- pytest-1.2.1 >> test object 1: test_driver.py >> >> test_driver.py ..F..F.FF.F....F.. >> >> =================================== FAILURES >> =================================== >> ___________________________ TestDriver.test_gpuarray >> ___________________________ >> >> def f(*args, **kwargs): >> import pycuda.driver >> # appears to be idempotent, i.e. no harm in calling it more than >> once >> pycuda.driver.init() >> >> ctx = make_default_context() >> try: >> assert isinstance(ctx.get_device().name(), str) >> assert isinstance(ctx.get_device().compute_capability(), tuple) >> assert isinstance(ctx.get_device().get_attributes(), dict) >> > inner_f(*args, **kwargs) >> >> c:\python26\lib\site-packages\pycuda\tools.py:496: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> self = <test_driver.TestDriver instance at 0x0110D418> >> >> @mark_cuda_test >> def test_gpuarray(self): >> import numpy >> a = numpy.arange(200000, dtype=numpy.float32) >> b = a + 17 >> import pycuda.gpuarray as gpuarray >> a_g = gpuarray.to_gpu(a) >> b_g = gpuarray.to_gpu(b) >> > diff = (a_g-3*b_g+(-a_g)).get() - (a-3*b+(-a)) >> >> test_driver.py:139: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> self = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in >> repr()] SafeRepr object at 0xf07058> >> other = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in >> repr()] SafeRepr object at 0xf07058> >> >> def __sub__(self, other): >> """Substract an array from an array or a scalar from an array.""" >> >> if isinstance(other, GPUArray): >> result = self._new_like_me(_get_common_dtype(self, other)) >> > return self._axpbyz(1, other, -1, result) >> >> c:\python26\lib\site-packages\pycuda\gpuarray.py:241: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> self = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in >> repr()] SafeRepr object at 0xf07058> >> selffac = 1 >> other = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in >> repr()] SafeRepr object at 0xf07058> >> otherfac = -1 >> out = <[LogicError("cuMemcpyDtoH failed: invalid value") raised in repr()] >> SafeRepr object at 0xf07058> >> add_timer = None, stream = None >> >> def _axpbyz(self, selffac, other, otherfac, out, add_timer=None, >> stream=None): >> """Compute ``out = selffac * self + otherfac*other``, >> where `other` is a vector..""" >> assert self.shape == other.shape >> >> > func = elementwise.get_axpbyz_kernel(self.dtype, other.dtype, >> > out.dtype) >> >> c:\python26\lib\site-packages\pycuda\gpuarray.py:144: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> dtype_x = dtype('float32'), dtype_y = dtype('float32') >> dtype_z = dtype('float32') >> >> > ??? >> >> <string>:1: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> func = <function get_axpbyz_kernel at 0x00E5F430> >> >> @decorator >> def context_dependent_memoize(func, *args): >> try: >> ctx_dict = func._pycuda_ctx_dep_memoize_dic >> except AttributeError: >> # FIXME: This may keep contexts alive longer than desired. >> # But I guess since the memory in them is freed, who cares. >> ctx_dict = func._pycuda_ctx_dep_memoize_dic = {} >> >> cur_ctx = cuda.Context.get_current() >> >> try: >> return ctx_dict[cur_ctx][args] >> except KeyError: >> arg_dict = ctx_dict.setdefault(cur_ctx, {}) >> > result = func(*args) >> >> c:\python26\lib\site-packages\pycuda\tools.py:478: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> dtype_x = dtype('float32'), dtype_y = dtype('float32') >> dtype_z = dtype('float32') >> >> @context_dependent_memoize >> def get_axpbyz_kernel(dtype_x, dtype_y, dtype_z): >> return get_elwise_kernel( >> "%(tp_x)s a, %(tp_x)s *x, %(tp_y)s b, %(tp_y)s *y, %(tp_z)s >> *z" % { >> "tp_x": dtype_to_ctype(dtype_x), >> "tp_y": dtype_to_ctype(dtype_y), >> "tp_z": dtype_to_ctype(dtype_z), >> }, >> "z[i] = a*x[i] + b*y[i]", >> > "axpbyz") >> >> c:\python26\lib\site-packages\pycuda\elementwise.py:313: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> arguments = 'float a, float *x, float b, float *y, float *z' >> operation = 'z[i] = a*x[i] + b*y[i]', name = 'axpbyz', keep = False >> options = [] >> >> def get_elwise_kernel(arguments, operation, >> name="kernel", keep=False, options=[], **kwargs): >> """Return a L{pycuda.driver.Function} that performs the same scalar >> operation >> on one or several vectors. >> """ >> func, arguments = get_elwise_kernel_and_types( >> > arguments, operation, name, keep, options, **kwargs) >> >> c:\python26\lib\site-packages\pycuda\elementwise.py:97: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> arguments = [ScalarArg('a', float32), VectorArg('x', float32), >> ScalarArg('b', float32), VectorArg('y', float32), VectorArg('z', float32), >> ScalarArg('n', uint32)] >> operation = 'z[i] = a*x[i] + b*y[i]', name = 'axpbyz', keep = False >> options = [] >> >> def get_elwise_kernel_and_types(arguments, operation, >> name="kernel", keep=False, options=[], **kwargs): >> if isinstance(arguments, str): >> from pycuda.tools import parse_c_arg >> arguments = [parse_c_arg(arg) for arg in arguments.split(",")] >> >> arguments.append(ScalarArg(numpy.uintp, "n")) >> >> mod = get_elwise_module(arguments, operation, name, >> > keep, options, **kwargs) >> >> c:\python26\lib\site-packages\pycuda\elementwise.py:83: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> arguments = [ScalarArg('a', float32), VectorArg('x', float32), >> ScalarArg('b', float32), VectorArg('y', float32), VectorArg('z', float32), >> ScalarArg('n', uint32)] >> operation = 'z[i] = a*x[i] + b*y[i]', name = 'axpbyz', keep = False >> options = [], preamble = '', loop_prep = '', after_loop = '' >> >> def get_elwise_module(arguments, operation, >> name="kernel", keep=False, options=[], >> preamble="", loop_prep="", after_loop=""): >> from pycuda.compiler import SourceModule >> return SourceModule(""" >> %(preamble)s >> >> __global__ void %(name)s(%(arguments)s) >> { >> >> unsigned tid = threadIdx.x; >> unsigned total_threads = gridDim.x*blockDim.x; >> unsigned cta_start = blockDim.x*blockIdx.x; >> unsigned i; >> >> %(loop_prep)s; >> >> for (i = cta_start + tid; i < n; i += total_threads) >> { >> %(operation)s; >> } >> >> %(after_loop)s; >> } >> """ % { >> "arguments": ", ".join(arg.declarator() for arg in >> arguments), >> "operation": operation, >> "name": name, >> "preamble": preamble, >> "loop_prep": loop_prep, >> "after_loop": after_loop, >> }, >> > options=options, keep=keep) >> >> def get_elwise_kernel_and_types(arguments, operation, >> name="kernel", keep=False, options=[], **kwargs): >> >> c:\python26\lib\site-packages\pycuda\elementwise.py:72: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> self = <pycuda.compiler.SourceModule object at 0x01115C50> >> source = '\n \n\n __global__ void axpbyz(float a, float *x, >> float b, float *y, float *z, unsigned int n)\n ...i += >> total_threads)\n {\n z[i] = a*x[i] + b*y[i];\n }\n\n >> ;\n }\n ' >> nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = >> None >> code = None, cache_dir = None, include_dirs = [] >> >> def __init__(self, source, nvcc="nvcc", options=[], keep=False, >> no_extern_c=False, arch=None, code=None, cache_dir=None, >> include_dirs=[]): >> if arch is not None: >> try: >> from pycuda.driver import Context >> capability = Context.get_device().compute_capability() >> if tuple(map(int, tuple(arch.split("_")[1]))) > capability: >> from warnings import warn >> warn("trying to compile for a compute capability " >> "higher than selected GPU") >> except: >> pass >> >> cubin = compile(source, nvcc, options, keep, no_extern_c, >> arch, code, cache_dir, include_dirs) >> >> from pycuda.driver import module_from_buffer >> > self.module = module_from_buffer(cubin) >> E LogicError: cuModuleLoadDataEx failed: invalid image - >> >> c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError >> __________________________ TestDriver.test_3d_texture >> __________________________ >> >> def f(*args, **kwargs): >> import pycuda.driver >> # appears to be idempotent, i.e. no harm in calling it more than >> once >> pycuda.driver.init() >> >> ctx = make_default_context() >> try: >> assert isinstance(ctx.get_device().name(), str) >> assert isinstance(ctx.get_device().compute_capability(), tuple) >> assert isinstance(ctx.get_device().get_attributes(), dict) >> > inner_f(*args, **kwargs) >> >> c:\python26\lib\site-packages\pycuda\tools.py:496: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> self = <test_driver.TestDriver instance at 0x0263EE18> >> >> @mark_cuda_test >> def test_3d_texture(self): >> # adapted from code by Nicolas Pinto >> w = 2 >> h = 4 >> d = 8 >> shape = (w, h, d) >> >> a = numpy.asarray( >> numpy.random.randn(*shape), >> dtype=numpy.float32, order="F") >> >> descr = drv.ArrayDescriptor3D() >> descr.width = w >> descr.height = h >> descr.depth = d >> descr.format = drv.dtype_to_array_format(a.dtype) >> descr.num_channels = 1 >> descr.flags = 0 >> >> ary = drv.Array(descr) >> >> copy = drv.Memcpy3D() >> copy.set_src_host(a) >> copy.set_dst_array(ary) >> copy.width_in_bytes = copy.src_pitch = a.strides[1] >> copy.src_height = copy.height = h >> copy.depth = d >> >> copy() >> >> mod = SourceModule(""" >> texture<float, 3, cudaReadModeElementType> mtx_tex; >> >> __global__ void copy_texture(float *dest) >> { >> int x = threadIdx.x; >> int y = threadIdx.y; >> int z = threadIdx.z; >> int dx = blockDim.x; >> int dy = blockDim.y; >> int i = (z*dy + y)*dx + x; >> dest[i] = tex3D(mtx_tex, x, y, z); >> //dest[i] = x; >> } >> > """) >> >> copy_texture = mod.get_function("copy_texture") >> mtx_tex = mod.get_texref("mtx_tex") >> >> mtx_tex.set_array(ary) >> >> dest = numpy.zeros(shape, dtype=numpy.float32, order="F") >> copy_texture(drv.Out(dest), block=shape, texrefs=[mtx_tex]) >> assert la.norm(dest-a) == 0 >> >> @mark_cuda_test >> def test_prepared_invocation(self): >> >> test_driver.py:421: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> self = <pycuda.compiler.SourceModule object at 0x01121410> >> source = '\n texture<float, 3, cudaReadModeElementType> >> mtx_tex;\n\n __global__ void copy_texture(float *dest)\n ...nt i = >> (z*dy + y)*dx + x;\n dest[i] = tex3D(mtx_tex, x, y, z);\n >> //dest[i] = x;\n }\n ' >> nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = >> None >> code = None, cache_dir = None, include_dirs = [] >> >> def __init__(self, source, nvcc="nvcc", options=[], keep=False, >> no_extern_c=False, arch=None, code=None, cache_dir=None, >> include_dirs=[]): >> if arch is not None: >> try: >> from pycuda.driver import Context >> capability = Context.get_device().compute_capability() >> if tuple(map(int, tuple(arch.split("_")[1]))) > capability: >> from warnings import warn >> warn("trying to compile for a compute capability " >> "higher than selected GPU") >> except: >> pass >> >> cubin = compile(source, nvcc, options, keep, no_extern_c, >> arch, code, cache_dir, include_dirs) >> >> from pycuda.driver import module_from_buffer >> > self.module = module_from_buffer(cubin) >> E LogicError: cuModuleLoadDataEx failed: invalid image - >> >> c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError >> ___________________ TestDriver.test_multichannel_2d_texture >> ____________________ >> >> def f(*args, **kwargs): >> import pycuda.driver >> # appears to be idempotent, i.e. no harm in calling it more than >> once >> pycuda.driver.init() >> >> ctx = make_default_context() >> try: >> assert isinstance(ctx.get_device().name(), str) >> assert isinstance(ctx.get_device().compute_capability(), tuple) >> assert isinstance(ctx.get_device().get_attributes(), dict) >> > inner_f(*args, **kwargs) >> >> c:\python26\lib\site-packages\pycuda\tools.py:496: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> self = <test_driver.TestDriver instance at 0x0263E710> >> >> @mark_cuda_test >> def test_multichannel_2d_texture(self): >> mod = SourceModule(""" >> #define CHANNELS 4 >> texture<float4, 2, cudaReadModeElementType> mtx_tex; >> >> __global__ void copy_texture(float *dest) >> { >> int row = threadIdx.x; >> int col = threadIdx.y; >> int w = blockDim.y; >> float4 texval = tex2D(mtx_tex, row, col); >> dest[(row*w+col)*CHANNELS + 0] = texval.x; >> dest[(row*w+col)*CHANNELS + 1] = texval.y; >> dest[(row*w+col)*CHANNELS + 2] = texval.z; >> dest[(row*w+col)*CHANNELS + 3] = texval.w; >> } >> > """) >> >> copy_texture = mod.get_function("copy_texture") >> mtx_tex = mod.get_texref("mtx_tex") >> >> shape = (5,6) >> channels = 4 >> a = numpy.asarray( >> numpy.random.randn(*((channels,)+shape)), >> dtype=numpy.float32, order="F") >> drv.bind_array_to_texref( >> drv.make_multichannel_2d_array(a, order="F"), mtx_tex) >> >> dest = numpy.zeros(shape+(channels,), dtype=numpy.float32) >> copy_texture(drv.Out(dest), >> block=shape+(1,), >> texrefs=[mtx_tex] >> ) >> reshaped_a = a.transpose(1,2,0) >> #print reshaped_a >> #print dest >> assert la.norm(dest-reshaped_a) == 0 >> >> @mark_cuda_test >> def test_multichannel_linear_texture(self): >> >> test_driver.py:238: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> self = <pycuda.compiler.SourceModule object at 0x0111BEF0> >> source = '\n #define CHANNELS 4\n texture<float4, 2, >> cudaReadModeElementType> mtx_tex;\n\n __global__ void... >> dest[(row*w+col)*CHANNELS + 2] = texval.z;\n dest[(row*w+col)*CHANNELS + 3] >> = texval.w;\n }\n ' >> nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = >> None >> code = None, cache_dir = None, include_dirs = [] >> >> def __init__(self, source, nvcc="nvcc", options=[], keep=False, >> no_extern_c=False, arch=None, code=None, cache_dir=None, >> include_dirs=[]): >> if arch is not None: >> try: >> from pycuda.driver import Context >> capability = Context.get_device().compute_capability() >> if tuple(map(int, tuple(arch.split("_")[1]))) > capability: >> from warnings import warn >> warn("trying to compile for a compute capability " >> "higher than selected GPU") >> except: >> pass >> >> cubin = compile(source, nvcc, options, keep, no_extern_c, >> arch, code, cache_dir, include_dirs) >> >> from pycuda.driver import module_from_buffer >> > self.module = module_from_buffer(cubin) >> E LogicError: cuModuleLoadDataEx failed: invalid image - >> >> c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError >> __________________________ TestDriver.test_2d_texture >> __________________________ >> >> def f(*args, **kwargs): >> import pycuda.driver >> # appears to be idempotent, i.e. no harm in calling it more than >> once >> pycuda.driver.init() >> >> ctx = make_default_context() >> try: >> assert isinstance(ctx.get_device().name(), str) >> assert isinstance(ctx.get_device().compute_capability(), tuple) >> assert isinstance(ctx.get_device().get_attributes(), dict) >> > inner_f(*args, **kwargs) >> >> c:\python26\lib\site-packages\pycuda\tools.py:496: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> self = <test_driver.TestDriver instance at 0x02627F30> >> >> @mark_cuda_test >> def test_2d_texture(self): >> mod = SourceModule(""" >> texture<float, 2, cudaReadModeElementType> mtx_tex; >> >> __global__ void copy_texture(float *dest) >> { >> int row = threadIdx.x; >> int col = threadIdx.y; >> int w = blockDim.y; >> dest[row*w+col] = tex2D(mtx_tex, row, col); >> } >> > """) >> >> copy_texture = mod.get_function("copy_texture") >> mtx_tex = mod.get_texref("mtx_tex") >> >> shape = (3,4) >> a = numpy.random.randn(*shape).astype(numpy.float32) >> drv.matrix_to_texref(a, mtx_tex, order="F") >> >> dest = numpy.zeros(shape, dtype=numpy.float32) >> copy_texture(drv.Out(dest), >> block=shape+(1,), >> texrefs=[mtx_tex] >> ) >> assert la.norm(dest-a) == 0 >> >> @mark_cuda_test >> def test_multiple_2d_textures(self): >> >> test_driver.py:170: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> self = <pycuda.compiler.SourceModule object at 0x01115C90> >> source = '\n texture<float, 2, cudaReadModeElementType> >> mtx_tex;\n\n __global__ void copy_texture(float *dest)\n >> ...hreadIdx.y;\n int w = blockDim.y;\n dest[row*w+col] = >> tex2D(mtx_tex, row, col);\n }\n ' >> nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = >> None >> code = None, cache_dir = None, include_dirs = [] >> >> def __init__(self, source, nvcc="nvcc", options=[], keep=False, >> no_extern_c=False, arch=None, code=None, cache_dir=None, >> include_dirs=[]): >> if arch is not None: >> try: >> from pycuda.driver import Context >> capability = Context.get_device().compute_capability() >> if tuple(map(int, tuple(arch.split("_")[1]))) > capability: >> from warnings import warn >> warn("trying to compile for a compute capability " >> "higher than selected GPU") >> except: >> pass >> >> cubin = compile(source, nvcc, options, keep, no_extern_c, >> arch, code, cache_dir, include_dirs) >> >> from pycuda.driver import module_from_buffer >> > self.module = module_from_buffer(cubin) >> E LogicError: cuModuleLoadDataEx failed: invalid image - >> >> c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError >> _____________________ TestDriver.test_multiple_2d_textures >> _____________________ >> >> def f(*args, **kwargs): >> import pycuda.driver >> # appears to be idempotent, i.e. no harm in calling it more than >> once >> pycuda.driver.init() >> >> ctx = make_default_context() >> try: >> assert isinstance(ctx.get_device().name(), str) >> assert isinstance(ctx.get_device().compute_capability(), tuple) >> assert isinstance(ctx.get_device().get_attributes(), dict) >> > inner_f(*args, **kwargs) >> >> c:\python26\lib\site-packages\pycuda\tools.py:496: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> self = <test_driver.TestDriver instance at 0x02645620> >> >> @mark_cuda_test >> def test_multiple_2d_textures(self): >> mod = SourceModule(""" >> texture<float, 2, cudaReadModeElementType> mtx_tex; >> texture<float, 2, cudaReadModeElementType> mtx2_tex; >> >> __global__ void copy_texture(float *dest) >> { >> int row = threadIdx.x; >> int col = threadIdx.y; >> int w = blockDim.y; >> dest[row*w+col] = >> tex2D(mtx_tex, row, col) >> + >> tex2D(mtx2_tex, row, col); >> } >> > """) >> >> copy_texture = mod.get_function("copy_texture") >> mtx_tex = mod.get_texref("mtx_tex") >> mtx2_tex = mod.get_texref("mtx2_tex") >> >> shape = (3,4) >> a = numpy.random.randn(*shape).astype(numpy.float32) >> b = numpy.random.randn(*shape).astype(numpy.float32) >> drv.matrix_to_texref(a, mtx_tex, order="F") >> drv.matrix_to_texref(b, mtx2_tex, order="F") >> >> dest = numpy.zeros(shape, dtype=numpy.float32) >> copy_texture(drv.Out(dest), >> block=shape+(1,), >> texrefs=[mtx_tex, mtx2_tex] >> ) >> assert la.norm(dest-a-b) < 1e-6 >> >> @mark_cuda_test >> def test_multichannel_2d_texture(self): >> >> test_driver.py:202: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> self = <pycuda.compiler.SourceModule object at 0x011215F0> >> source = '\n texture<float, 2, cudaReadModeElementType> mtx_tex;\n >> texture<float, 2, cudaReadModeElementType> mtx... >> tex2D(mtx_tex, row, col)\n +\n tex2D(mtx2_tex, row, col);\n >> }\n ' >> nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = >> None >> code = None, cache_dir = None, include_dirs = [] >> >> def __init__(self, source, nvcc="nvcc", options=[], keep=False, >> no_extern_c=False, arch=None, code=None, cache_dir=None, >> include_dirs=[]): >> if arch is not None: >> try: >> from pycuda.driver import Context >> capability = Context.get_device().compute_capability() >> if tuple(map(int, tuple(arch.split("_")[1]))) > capability: >> from warnings import warn >> warn("trying to compile for a compute capability " >> "higher than selected GPU") >> except: >> pass >> >> cubin = compile(source, nvcc, options, keep, no_extern_c, >> arch, code, cache_dir, include_dirs) >> >> from pycuda.driver import module_from_buffer >> > self.module = module_from_buffer(cubin) >> E LogicError: cuModuleLoadDataEx failed: invalid image - >> >> c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError >> _________________ TestDriver.test_multichannel_linear_texture >> __________________ >> >> def f(*args, **kwargs): >> import pycuda.driver >> # appears to be idempotent, i.e. no harm in calling it more than >> once >> pycuda.driver.init() >> >> ctx = make_default_context() >> try: >> assert isinstance(ctx.get_device().name(), str) >> assert isinstance(ctx.get_device().compute_capability(), tuple) >> assert isinstance(ctx.get_device().get_attributes(), dict) >> > inner_f(*args, **kwargs) >> >> c:\python26\lib\site-packages\pycuda\tools.py:496: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> self = <test_driver.TestDriver instance at 0x02645710> >> >> @mark_cuda_test >> def test_multichannel_linear_texture(self): >> mod = SourceModule(""" >> #define CHANNELS 4 >> texture<float4, 1, cudaReadModeElementType> mtx_tex; >> >> __global__ void copy_texture(float *dest) >> { >> int i = threadIdx.x+blockDim.x*threadIdx.y; >> float4 texval = tex1Dfetch(mtx_tex, i); >> dest[i*CHANNELS + 0] = texval.x; >> dest[i*CHANNELS + 1] = texval.y; >> dest[i*CHANNELS + 2] = texval.z; >> dest[i*CHANNELS + 3] = texval.w; >> } >> > """) >> >> copy_texture = mod.get_function("copy_texture") >> mtx_tex = mod.get_texref("mtx_tex") >> >> shape = (16, 16) >> channels = 4 >> a = numpy.random.randn(*(shape+(channels,))).astype(numpy.float32) >> a_gpu = drv.to_device(a) >> mtx_tex.set_address(a_gpu, a.nbytes) >> mtx_tex.set_format(drv.array_format.FLOAT, 4) >> >> dest = numpy.zeros(shape+(channels,), dtype=numpy.float32) >> copy_texture(drv.Out(dest), >> block=shape+(1,), >> texrefs=[mtx_tex] >> ) >> #print a >> #print dest >> assert la.norm(dest-a) == 0 >> >> @mark_cuda_test >> def test_large_smem(self): >> >> test_driver.py:276: >> _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ >> _ _ _ >> >> self = <pycuda.compiler.SourceModule object at 0x0111BFD0> >> source = '\n #define CHANNELS 4\n texture<float4, 1, >> cudaReadModeElementType> mtx_tex;\n\n __global__ void...texval.y;\n >> dest[i*CHANNELS + 2] = texval.z;\n dest[i*CHANNELS + 3] = texval.w;\n >> }\n ' >> nvcc = 'nvcc', options = [], keep = False, no_extern_c = False, arch = >> None >> code = None, cache_dir = None, include_dirs = [] >> >> def __init__(self, source, nvcc="nvcc", options=[], keep=False, >> no_extern_c=False, arch=None, code=None, cache_dir=None, >> include_dirs=[]): >> if arch is not None: >> try: >> from pycuda.driver import Context >> capability = Context.get_device().compute_capability() >> if tuple(map(int, tuple(arch.split("_")[1]))) > capability: >> from warnings import warn >> warn("trying to compile for a compute capability " >> "higher than selected GPU") >> except: >> pass >> >> cubin = compile(source, nvcc, options, keep, no_extern_c, >> arch, code, cache_dir, include_dirs) >> >> from pycuda.driver import module_from_buffer >> > self.module = module_from_buffer(cubin) >> E LogicError: cuModuleLoadDataEx failed: invalid image - >> >> c:\python26\lib\site-packages\pycuda\compiler.py:216: LogicError >> ===================== 6 failed, 12 passed in 1.38 seconds >> ====================== >> >> ----- >> >> any help appreciated. >> >> Thanks! >> >> >> >> _______________________________________________ >> PyCUDA mailing list >> pyc...@host304.hostmonster.com >> http://host304.hostmonster.com/mailman/listinfo/pycuda_tiker.net > > > > -- > Paul Rigor > Pre-doctoral BIT Fellow and Graduate Student > Institute for Genomics and Bioinformatics > Donald Bren School of Information and Computer Sciences > University of California, Irvine > http://www.ics.uci.edu/~prigor > _______________________________________________ PyCUDA mailing list pyc...@host304.hostmonster.com http://host304.hostmonster.com/mailman/listinfo/pycuda_tiker.net