________________________________
From: Tom Fogal
Sent: Saturday, July 4, 2015 12:42 PM
To: [email protected]
Subject: 'pyfr run' KeyError for CUDA kernel


Hi all,


I've been upgrading/downgrading CUDA for other reasons on my system, and when I 
came back to my previously-working PyFR installation something seems to have 
broken.  Now when I run, I get errors like in the attached 'pyfr.log', a 
KeyError on a CUDA kernel.  This always happen about 1.5 minutes into the run, 
and the CUDA kernel within the KeyError exception is different each time.


I have tried with v1.0.0 and now 4ce740ff4d7ff3bd93b6f3ad00ee46815b6b810c to no 
avail.


I am presently running with CUDA v6.5, PyCUDA 2014.1.


Any ideas as to what I might have messed up?


Thanks,


-tom

-----------------------------------------------------------------------------------
This email message is for the sole use of the intended recipient(s) and may 
contain
confidential information.  Any unauthorized review, use, disclosure or 
distribution
is prohibited.  If you are not the intended recipient, please contact the 
sender by
reply email and destroy all copies of the original message.
-----------------------------------------------------------------------------------

-- 
You received this message because you are subscribed to the Google Groups "PyFR 
Mailing List" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To post to this group, send an email to [email protected].
Visit this group at http://groups.google.com/group/pyfrmailinglist.
For more options, visit https://groups.google.com/d/optout.
Mesh import ...
Mesh partitioning...
Simulation...
Traceback (most recent call last):
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/util.py", line 32, in __call__
KeyError: (<function CUDAKernelProvider._build_kernel at 0x7fcc0d4d3f28>, b'(X\x05\x00\x00\x00tfluxq\x00X\x90\x1d\x00\x00\n\n// Typedefs\ntypedef float fpdtype_t;\n\n\n\n\n\n\n\n\n\n\n\n\n__global__ void tflux(int _ny, int _nx, fpdtype_t* __restrict__ f_v, int lsdf, const fpdtype_t* __restrict__ smats_v, int lsdsmats, const fpdtype_t* __restrict__ u_v, int lsdu)\n               {\n                   int _x = blockIdx.x*blockDim.x + threadIdx.x;\n                   for (int _y = 0; _y < _ny && _x < _nx; ++_y)\n                   {\n                       \n    // Compute the flux (F = Fi + Fv)\n    fpdtype_t ftemp[3][5];\n    fpdtype_t p, v[3];\n    {\n\n    fpdtype_t invrho_ = 1.0f/u_v[(_y*5 + 0)*lsdu + _x], E_ = u_v[(_y*5 + 4)*lsdu + _x];\n\n    // Compute the velocities\n    fpdtype_t rhov_[3];\n    rhov_[0] = u_v[(_y*5 + 1)*lsdu + _x];\n    v[0] = invrho_*rhov_[0];\n    rhov_[1] = u_v[(_y*5 + 2)*lsdu + _x];\n    v[1] = invrho_*rhov_[1];\n    rhov_[2] = u_v[(_y*5 + 3)*lsdu + _x];\n    v[2] = invrho_*rhov_[2];\n\n    // Compute the pressure\n    p = 0.3999999999999999f*(E_ - 0.5f*invrho_*((rhov_[0])*(rhov_[0]) + (rhov_[1])*(rhov_[1]) + (rhov_[2])*(rhov_[2])));\n\n    // Density and energy fluxes\n    ftemp[0][0] = rhov_[0];\n    ftemp[0][4] = (E_ + p)*v[0];\n    ftemp[1][0] = rhov_[1];\n    ftemp[1][4] = (E_ + p)*v[1];\n    ftemp[2][0] = rhov_[2];\n    ftemp[2][4] = (E_ + p)*v[2];\n\n    // Momentum fluxes\n    ftemp[0][1] = rhov_[0]*v[0] + p;\n    ftemp[0][2] = rhov_[0]*v[1];\n    ftemp[0][3] = rhov_[0]*v[2];\n    ftemp[1][1] = rhov_[1]*v[0];\n    ftemp[1][2] = rhov_[1]*v[1] + p;\n    ftemp[1][3] = rhov_[1]*v[2];\n    ftemp[2][1] = rhov_[2]*v[0];\n    ftemp[2][2] = rhov_[2]*v[1];\n    ftemp[2][3] = rhov_[2]*v[2] + p;\n\n};\n    {\n\n    fpdtype_t rho_  = u_v[(_y*5 + 0)*lsdu + _x];\n    fpdtype_t rhou_ = u_v[(_y*5 + 1)*lsdu + _x], rhov_ = u_v[(_y*5 + 2)*lsdu + _x], rhow_ = u_v[(_y*5 + 3)*lsdu + _x];\n    fpdtype_t E_    = u_v[(_y*5 + 4)*lsdu + _x];\n\n    fpdtype_t rcprho_ = 1.0f/rho_;\n    fpdtype_t u_ = rcprho_*rhou_, v_ = rcprho_*rhov_, w_ = rcprho_*rhow_;\n\n    fpdtype_t rho_x_ = f_v[((0*_ny + _y)*5 + 0)*lsdf + _x];\n    fpdtype_t rho_y_ = f_v[((1*_ny + _y)*5 + 0)*lsdf + _x];\n    fpdtype_t rho_z_ = f_v[((2*_ny + _y)*5 + 0)*lsdf + _x];\n\n    // Velocity derivatives (rho_*grad[u_,v_,w_])\n    fpdtype_t u_x_ = f_v[((0*_ny + _y)*5 + 1)*lsdf + _x] - u_*rho_x_;\n    fpdtype_t u_y_ = f_v[((1*_ny + _y)*5 + 1)*lsdf + _x] - u_*rho_y_;\n    fpdtype_t u_z_ = f_v[((2*_ny + _y)*5 + 1)*lsdf + _x] - u_*rho_z_;\n    fpdtype_t v_x_ = f_v[((0*_ny + _y)*5 + 2)*lsdf + _x] - v_*rho_x_;\n    fpdtype_t v_y_ = f_v[((1*_ny + _y)*5 + 2)*lsdf + _x] - v_*rho_y_;\n    fpdtype_t v_z_ = f_v[((2*_ny + _y)*5 + 2)*lsdf + _x] - v_*rho_z_;\n    fpdtype_t w_x_ = f_v[((0*_ny + _y)*5 + 3)*lsdf + _x] - w_*rho_x_;\n    fpdtype_t w_y_ = f_v[((1*_ny + _y)*5 + 3)*lsdf + _x] - w_*rho_y_;\n    fpdtype_t w_z_ = f_v[((2*_ny + _y)*5 + 3)*lsdf + _x] - w_*rho_z_;\n\n    fpdtype_t E_x_ = f_v[((0*_ny + _y)*5 + 4)*lsdf + _x];\n    fpdtype_t E_y_ = f_v[((1*_ny + _y)*5 + 4)*lsdf + _x];\n    fpdtype_t E_z_ = f_v[((2*_ny + _y)*5 + 4)*lsdf + _x];\n\n    fpdtype_t mu_c_ = 7.39509972887452e-05f;\n\n\n    // Compute temperature derivatives (c_v*dT/d[x,y,z])\n    fpdtype_t T_x_ = rcprho_*(E_x_ - (rcprho_*rho_x_*E_ + u_*u_x_ + v_*v_x_ + w_*w_x_));\n    fpdtype_t T_y_ = rcprho_*(E_y_ - (rcprho_*rho_y_*E_ + u_*u_y_ + v_*v_y_ + w_*w_y_));\n    fpdtype_t T_z_ = rcprho_*(E_z_ - (rcprho_*rho_z_*E_ + u_*u_z_ + v_*v_z_ + w_*w_z_));\n\n    // Negated stress tensor elements\n    fpdtype_t t_xx_ = -2*mu_c_*rcprho_*(u_x_ - 0.3333333333333333f*(u_x_ + v_y_ + w_z_));\n    fpdtype_t t_yy_ = -2*mu_c_*rcprho_*(v_y_ - 0.3333333333333333f*(u_x_ + v_y_ + w_z_));\n    fpdtype_t t_zz_ = -2*mu_c_*rcprho_*(w_z_ - 0.3333333333333333f*(u_x_ + v_y_ + w_z_));\n    fpdtype_t t_xy_ = -mu_c_*rcprho_*(v_x_ + u_y_);\n    fpdtype_t t_xz_ = -mu_c_*rcprho_*(u_z_ + w_x_);\n    fpdtype_t t_yz_ = -mu_c_*rcprho_*(w_y_ + v_z_);\n\n    ftemp[0][1] += t_xx_;     ftemp[1][1] += t_xy_;     ftemp[2][1] += t_xz_;\n    ftemp[0][2] += t_xy_;     ftemp[1][2] += t_yy_;     ftemp[2][2] += t_yz_;\n    ftemp[0][3] += t_xz_;     ftemp[1][3] += t_yz_;     ftemp[2][3] += t_zz_;\n\n    ftemp[0][4] += u_*t_xx_ + v_*t_xy_ + w_*t_xz_ + -mu_c_*1.971830985915493f*T_x_;\n    ftemp[1][4] += u_*t_xy_ + v_*t_yy_ + w_*t_yz_ + -mu_c_*1.971830985915493f*T_y_;\n    ftemp[2][4] += u_*t_xz_ + v_*t_yz_ + w_*t_zz_ + -mu_c_*1.971830985915493f*T_z_;\n\n};\n\n    // Transform the fluxes\n    f_v[((0*_ny + _y)*5 + 0)*lsdf + _x] = smats_v[((0*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][0] + smats_v[((0*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][0] + smats_v[((0*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][0];\n    f_v[((0*_ny + _y)*5 + 1)*lsdf + _x] = smats_v[((0*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][1] + smats_v[((0*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][1] + smats_v[((0*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][1];\n    f_v[((0*_ny + _y)*5 + 2)*lsdf + _x] = smats_v[((0*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][2] + smats_v[((0*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][2] + smats_v[((0*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][2];\n    f_v[((0*_ny + _y)*5 + 3)*lsdf + _x] = smats_v[((0*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][3] + smats_v[((0*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][3] + smats_v[((0*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][3];\n    f_v[((0*_ny + _y)*5 + 4)*lsdf + _x] = smats_v[((0*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][4] + smats_v[((0*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][4] + smats_v[((0*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][4];\n    f_v[((1*_ny + _y)*5 + 0)*lsdf + _x] = smats_v[((1*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][0] + smats_v[((1*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][0] + smats_v[((1*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][0];\n    f_v[((1*_ny + _y)*5 + 1)*lsdf + _x] = smats_v[((1*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][1] + smats_v[((1*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][1] + smats_v[((1*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][1];\n    f_v[((1*_ny + _y)*5 + 2)*lsdf + _x] = smats_v[((1*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][2] + smats_v[((1*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][2] + smats_v[((1*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][2];\n    f_v[((1*_ny + _y)*5 + 3)*lsdf + _x] = smats_v[((1*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][3] + smats_v[((1*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][3] + smats_v[((1*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][3];\n    f_v[((1*_ny + _y)*5 + 4)*lsdf + _x] = smats_v[((1*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][4] + smats_v[((1*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][4] + smats_v[((1*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][4];\n    f_v[((2*_ny + _y)*5 + 0)*lsdf + _x] = smats_v[((2*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][0] + smats_v[((2*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][0] + smats_v[((2*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][0];\n    f_v[((2*_ny + _y)*5 + 1)*lsdf + _x] = smats_v[((2*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][1] + smats_v[((2*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][1] + smats_v[((2*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][1];\n    f_v[((2*_ny + _y)*5 + 2)*lsdf + _x] = smats_v[((2*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][2] + smats_v[((2*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][2] + smats_v[((2*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][2];\n    f_v[((2*_ny + _y)*5 + 3)*lsdf + _x] = smats_v[((2*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][3] + smats_v[((2*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][3] + smats_v[((2*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][3];\n    f_v[((2*_ny + _y)*5 + 4)*lsdf + _x] = smats_v[((2*_ny + _y)*3 + 0)*lsdsmats + _x]*ftemp[0][4] + smats_v[((2*_ny + _y)*3 + 1)*lsdsmats + _x]*ftemp[1][4] + smats_v[((2*_ny + _y)*3 + 2)*lsdsmats + _x]*ftemp[2][4];\n\n                   }\n               }\n\nq\x01]q\x02(cnumpy\nint32\nq\x03h\x03cnumpy\nint64\nq\x04h\x03h\x04h\x03h\x04h\x03etq\x05.', b'}q\x00.')

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/usr/lib/python3/dist-packages/pytools/__init__.py", line 424, in _deco
    return func._memoize_dic[args]
AttributeError: 'function' object has no attribute '_memoize_dic'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/tfogal/sw/bin/pyfr", line 9, in <module>
    load_entry_point('pyfr==1.0.0', 'console_scripts', 'pyfr')()
  File "/home/tfogal/sw/lib/python3.4/site-packages/mpmath-0.19-py3.4.egg/mpmath/ctx_mp.py", line 1301, in g
    return f(*args, **kwargs)
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/scripts/main.py", line 126, in main
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/scripts/main.py", line 247, in process_run
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/scripts/main.py", line 231, in _process_common
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/solvers/__init__.py", line 14, in get_solver
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/integrators/__init__.py", line 29, in get_integrator
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/integrators/controllers.py", line 120, in __init__
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/integrators/controllers.py", line 14, in __init__
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/integrators/steppers.py", line 190, in __init__
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/integrators/steppers.py", line 9, in __init__
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/integrators/writers.py", line 15, in __init__
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/integrators/base.py", line 47, in __init__
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/solvers/base/system.py", line 59, in __init__
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/solvers/base/system.py", line 158, in _gen_kernels
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/solvers/navstokes/elements.py", line 121, in <lambda>
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/backends/base/backend.py", line 173, in kernel
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/backends/base/kernels.py", line 162, in kernel_meth
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/util.py", line 34, in __call__
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/backends/cuda/provider.py", line 20, in _build_kernel
  File "/usr/lib/python3/dist-packages/pycuda/compiler.py", line 251, in __init__
    arch, code, cache_dir, include_dirs)
  File "/usr/lib/python3/dist-packages/pycuda/compiler.py", line 241, in compile
    return compile_plain(source, options, keep, nvcc, cache_dir)
  File "/usr/lib/python3/dist-packages/pycuda/compiler.py", line 79, in compile_plain
    checksum.update(get_nvcc_version(nvcc).encode("utf-8"))
  File "<string>", line 2, in get_nvcc_version
  File "/usr/lib/python3/dist-packages/pytools/__init__.py", line 427, in _deco
    result = func(*args)
  File "/usr/lib/python3/dist-packages/pycuda/compiler.py", line 13, in get_nvcc_version
    result, stdout, stderr = call_capture_output(cmdline)
  File "/usr/lib/python3/dist-packages/pytools/prefork.py", line 196, in call_capture_output
    return forker[0].call_capture_output(cmdline, cwd, error_on_nonzero)
  File "/usr/lib/python3/dist-packages/pytools/prefork.py", line 155, in call_capture_output
    error_on_nonzero)
  File "/usr/lib/python3/dist-packages/pytools/prefork.py", line 135, in _remote_invoke
    who="Prefork client", partner="prefork server")
  File "/usr/lib/python3/dist-packages/pytools/prefork.py", line 74, in _recv_packet
    % (who, partner))
ValueError: incomplete format
--------------------------------------------------------------------------
MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD 
with errorcode 1.

NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes.
You may or may not see output from other processes, depending on
exactly when Open MPI kills them.
--------------------------------------------------------------------------
Traceback (most recent call last):
  File "/home/tfogal/sw/bin/pyfr", line 9, in <module>
    load_entry_point('pyfr==1.0.0', 'console_scripts', 'pyfr')()
  File "/home/tfogal/sw/lib/python3.4/site-packages/mpmath-0.19-py3.4.egg/mpmath/ctx_mp.py", line 1301, in g
    return f(*args, **kwargs)
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/scripts/main.py", line 126, in main
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/scripts/main.py", line 247, in process_run
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/scripts/main.py", line 213, in _process_common
  File "/usr/lib/python3/dist-packages/pytools/prefork.py", line 173, in enable_prefork
    _fork_server(s_child)
  File "/usr/lib/python3/dist-packages/pytools/prefork.py", line 109, in _fork_server
    who="Prefork server", partner="parent")
  File "/usr/lib/python3/dist-packages/pytools/prefork.py", line 81, in _recv_packet
    packet += sock.recv(size)
TypeError: Can't convert 'bytes' object to str implicitly
Traceback (most recent call last):
  File "/home/tfogal/sw/bin/pyfr", line 9, in <module>
    load_entry_point('pyfr==1.0.0', 'console_scripts', 'pyfr')()
  File "/home/tfogal/sw/lib/python3.4/site-packages/mpmath-0.19-py3.4.egg/mpmath/ctx_mp.py", line 1301, in g
    return f(*args, **kwargs)
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/scripts/main.py", line 126, in main
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/scripts/main.py", line 247, in process_run
  File "/home/tfogal/sw/lib/python3.4/site-packages/pyfr-1.0.0-py3.4.egg/pyfr/scripts/main.py", line 213, in _process_common
  File "/usr/lib/python3/dist-packages/pytools/prefork.py", line 173, in enable_prefork
    _fork_server(s_child)
  File "/usr/lib/python3/dist-packages/pytools/prefork.py", line 109, in _fork_server
    who="Prefork server", partner="parent")
  File "/usr/lib/python3/dist-packages/pytools/prefork.py", line 74, in _recv_packet
    % (who, partner))
ValueError: incomplete format
--------------------------------------------------------------------------
mpirun has exited due to process rank 0 with PID 1332 on
node takeo exiting improperly. There are two reasons this could occur:

1. this process did not call "init" before exiting, but others in
the job did. This can cause a job to hang indefinitely while it waits
for all processes to call "init". By rule, if one process calls "init",
then ALL processes must call "init" prior to termination.

2. this process called "init", but exited without calling "finalize".
By rule, all processes that call "init" MUST call "finalize" prior to
exiting or it will be considered an "abnormal termination"

This may have caused other processes in the application to be
terminated by signals sent by mpirun (as reported here).
--------------------------------------------------------------------------

Attachment: run.sh
Description: run.sh

Reply via email to