Dear PyFR Developers,

I am trying to run a simulation with CUDA backend on 12 GPUs. However I get 
the following error:
Traceback (most recent call last):
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/util.py",
 
line 33, in __call__
    res = cache[key]
KeyError: (<function CUDAKernelProvider._build_kernel at 0x2b76ca4d9268>, 
b'\x80\x03X\x08\x00\x00\x00mpicfluxq\x00X3&\x00\x00\n\n// AoSoA 
macros\n#define SOA_SZ 32\n#define SOA_IX(a, v, nv) ((((a) / SOA_SZ)*(nv) + 
(v))*SOA_SZ + (a) % SOA_SZ)\n\n// Typedefs\ntypedef double 
fpdtype_t;\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n__global__ void 
mpicflux(int _nx, fpdtype_t* __restrict__ gradul_v, const int* __restrict__ 
gradul_vix, const int* __restrict__ gradul_vrstri, const fpdtype_t* 
__restrict__ magnl_v, const fpdtype_t* __restrict__ nl_v, int ldnl, 
fpdtype_t* __restrict__ ul_v, const int* __restrict__ ul_vix, fpdtype_t* 
__restrict__ ur_v)\n               {\n                   int _x = 
blockIdx.x*blockDim.x + threadIdx.x;\n                   #define X_IDX 
(_x)\n                   #define X_IDX_AOSOA(v, nv) SOA_IX(X_IDX, v, nv)\n  
                 if (_x < _nx)\n                   {\n                      
 \n    // Perform the Riemann solve\n    fpdtype_t ficomm[5], fvcomm;\n    
{\n\n    // Compute the left and right fluxes + velocities and pressures\n  
  fpdtype_t fl_[3][5], fr_[3][5];\n    fpdtype_t vl_[3], vr_[3];\n    
fpdtype_t pl_, pr_;\n\n    {\n\n    fpdtype_t invrho__ = 
1.0/ul_v[ul_vix[X_IDX] + SOA_SZ*(0)], E__ = ul_v[ul_vix[X_IDX] + 
SOA_SZ*(4)];\n\n    // Compute the velocities\n    fpdtype_t rhov__[3];\n  
  rhov__[0] = ul_v[ul_vix[X_IDX] + SOA_SZ*(1)];\n    vl_[0] = 
invrho__*rhov__[0];\n    rhov__[1] = ul_v[ul_vix[X_IDX] + SOA_SZ*(2)];\n    
vl_[1] = invrho__*rhov__[1];\n    rhov__[2] = ul_v[ul_vix[X_IDX] + 
SOA_SZ*(3)];\n    vl_[2] = invrho__*rhov__[2];\n\n    // Compute the 
pressure\n    pl_ = 0.3999999999999999*(E__ - 
0.5*invrho__*((rhov__[0])*(rhov__[0]) + (rhov__[1])*(rhov__[1]) + 
(rhov__[2])*(rhov__[2])));\n\n    // Density and energy fluxes\n    
fl_[0][0] = rhov__[0];\n    fl_[0][4] = (E__ + pl_)*vl_[0];\n    fl_[1][0] 
= rhov__[1];\n    fl_[1][4] = (E__ + pl_)*vl_[1];\n    fl_[2][0] = 
rhov__[2];\n    fl_[2][4] = (E__ + pl_)*vl_[2];\n\n    // Momentum 
fluxes\n    fl_[0][1] = rhov__[0]*vl_[0] + pl_;\n    fl_[0][2] = 
rhov__[0]*vl_[1];\n    fl_[0][3] = rhov__[0]*vl_[2];\n    fl_[1][1] = 
rhov__[1]*vl_[0];\n    fl_[1][2] = rhov__[1]*vl_[1] + pl_;\n    fl_[1][3] = 
rhov__[1]*vl_[2];\n    fl_[2][1] = rhov__[2]*vl_[0];\n    fl_[2][2] = 
rhov__[2]*vl_[1];\n    fl_[2][3] = rhov__[2]*vl_[2] + pl_;\n\n};\n    
{\n\n    fpdtype_t invrho__ = 1.0/ur_v[_nx*(0) + X_IDX], E__ = ur_v[_nx*(4) 
+ X_IDX];\n\n    // Compute the velocities\n    fpdtype_t rhov__[3];\n    
rhov__[0] = ur_v[_nx*(1) + X_IDX];\n    vr_[0] = invrho__*rhov__[0];\n    
rhov__[1] = ur_v[_nx*(2) + X_IDX];\n    vr_[1] = invrho__*rhov__[1];\n    
rhov__[2] = ur_v[_nx*(3) + X_IDX];\n    vr_[2] = invrho__*rhov__[2];\n\n    
// Compute the pressure\n    pr_ = 0.3999999999999999*(E__ - 
0.5*invrho__*((rhov__[0])*(rhov__[0]) + (rhov__[1])*(rhov__[1]) + 
(rhov__[2])*(rhov__[2])));\n\n    // Density and energy fluxes\n    
fr_[0][0] = rhov__[0];\n    fr_[0][4] = (E__ + pr_)*vr_[0];\n    fr_[1][0] 
= rhov__[1];\n    fr_[1][4] = (E__ + pr_)*vr_[1];\n    fr_[2][0] = 
rhov__[2];\n    fr_[2][4] = (E__ + pr_)*vr_[2];\n\n    // Momentum 
fluxes\n    fr_[0][1] = rhov__[0]*vr_[0] + pr_;\n    fr_[0][2] = 
rhov__[0]*vr_[1];\n    fr_[0][3] = rhov__[0]*vr_[2];\n    fr_[1][1] = 
rhov__[1]*vr_[0];\n    fr_[1][2] = rhov__[1]*vr_[1] + pr_;\n    fr_[1][3] = 
rhov__[1]*vr_[2];\n    fr_[2][1] = rhov__[2]*vr_[0];\n    fr_[2][2] = 
rhov__[2]*vr_[1];\n    fr_[2][3] = rhov__[2]*vr_[2] + pr_;\n\n};\n\n    // 
Sum the left and right velocities and take the normal\n    fpdtype_t nv_ = 
((nl_v[ldnl*(0) + X_IDX])*(vl_[0] + vr_[0]) + (nl_v[ldnl*(1) + 
X_IDX])*(vl_[1] + vr_[1]) + (nl_v[ldnl*(2) + X_IDX])*(vl_[2] + 
vr_[2]));\n\n    // Estimate the maximum wave speed / 2\n    fpdtype_t a_ = 
sqrt(0.35*(pl_ + pr_)/(ul_v[ul_vix[X_IDX] + SOA_SZ*(0)] + ur_v[_nx*(0) + 
X_IDX]))\n                + 0.25*fabs(nv_);\n\n    // Output\n    ficomm[0] 
= 0.5*(nl_v[ldnl*(0) + X_IDX]*(fl_[0][0] + fr_[0][0]) + nl_v[ldnl*(1) + 
X_IDX]*(fl_[1][0] + fr_[1][0]) + nl_v[ldnl*(2) + X_IDX]*(fl_[2][0] + 
fr_[2][0]))\n             + a_*(ul_v[ul_vix[X_IDX] + SOA_SZ*(0)] - 
ur_v[_nx*(0) + X_IDX]);\n    ficomm[1] = 0.5*(nl_v[ldnl*(0) + 
X_IDX]*(fl_[0][1] + fr_[0][1]) + nl_v[ldnl*(1) + X_IDX]*(fl_[1][1] + 
fr_[1][1]) + nl_v[ldnl*(2) + X_IDX]*(fl_[2][1] + fr_[2][1]))\n            
 + a_*(ul_v[ul_vix[X_IDX] + SOA_SZ*(1)] - ur_v[_nx*(1) + X_IDX]);\n    
ficomm[2] = 0.5*(nl_v[ldnl*(0) + X_IDX]*(fl_[0][2] + fr_[0][2]) + 
nl_v[ldnl*(1) + X_IDX]*(fl_[1][2] + fr_[1][2]) + nl_v[ldnl*(2) + 
X_IDX]*(fl_[2][2] + fr_[2][2]))\n             + a_*(ul_v[ul_vix[X_IDX] + 
SOA_SZ*(2)] - ur_v[_nx*(2) + X_IDX]);\n    ficomm[3] = 0.5*(nl_v[ldnl*(0) + 
X_IDX]*(fl_[0][3] + fr_[0][3]) + nl_v[ldnl*(1) + X_IDX]*(fl_[1][3] + 
fr_[1][3]) + nl_v[ldnl*(2) + X_IDX]*(fl_[2][3] + fr_[2][3]))\n            
 + a_*(ul_v[ul_vix[X_IDX] + SOA_SZ*(3)] - ur_v[_nx*(3) + X_IDX]);\n    
ficomm[4] = 0.5*(nl_v[ldnl*(0) + X_IDX]*(fl_[0][4] + fr_[0][4]) + 
nl_v[ldnl*(1) + X_IDX]*(fl_[1][4] + fr_[1][4]) + nl_v[ldnl*(2) + 
X_IDX]*(fl_[2][4] + fr_[2][4]))\n             + a_*(ul_v[ul_vix[X_IDX] + 
SOA_SZ*(4)] - ur_v[_nx*(4) + X_IDX]);\n\n};\n\n    fpdtype_t fvl[3][5] = 
{{0}};\n    {\n\n    fpdtype_t rho_  = ul_v[ul_vix[X_IDX] + SOA_SZ*(0)];\n  
  fpdtype_t rhou_ = ul_v[ul_vix[X_IDX] + SOA_SZ*(1)], rhov_ = 
ul_v[ul_vix[X_IDX] + SOA_SZ*(2)], rhow_ = ul_v[ul_vix[X_IDX] + 
SOA_SZ*(3)];\n    fpdtype_t E_    = ul_v[ul_vix[X_IDX] + SOA_SZ*(4)];\n\n  
  fpdtype_t rcprho_ = 1.0/rho_;\n    fpdtype_t u_ = rcprho_*rhou_, v_ = 
rcprho_*rhov_, w_ = rcprho_*rhow_;\n\n    fpdtype_t rho_x_ = 
gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(0) + SOA_SZ*(0)];\n    
fpdtype_t rho_y_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(1) + 
SOA_SZ*(0)];\n    fpdtype_t rho_z_ = gradul_v[gradul_vix[X_IDX] + 
gradul_vrstri[X_IDX]*(2) + SOA_SZ*(0)];\n\n    // Velocity derivatives 
(rho_*grad[u_,v_,w_])\n    fpdtype_t u_x_ = gradul_v[gradul_vix[X_IDX] + 
gradul_vrstri[X_IDX]*(0) + SOA_SZ*(1)] - u_*rho_x_;\n    fpdtype_t u_y_ = 
gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(1) + SOA_SZ*(1)] - 
u_*rho_y_;\n    fpdtype_t u_z_ = gradul_v[gradul_vix[X_IDX] + 
gradul_vrstri[X_IDX]*(2) + SOA_SZ*(1)] - u_*rho_z_;\n    fpdtype_t v_x_ = 
gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(0) + SOA_SZ*(2)] - 
v_*rho_x_;\n    fpdtype_t v_y_ = gradul_v[gradul_vix[X_IDX] + 
gradul_vrstri[X_IDX]*(1) + SOA_SZ*(2)] - v_*rho_y_;\n    fpdtype_t v_z_ = 
gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(2) + SOA_SZ*(2)] - 
v_*rho_z_;\n    fpdtype_t w_x_ = gradul_v[gradul_vix[X_IDX] + 
gradul_vrstri[X_IDX]*(0) + SOA_SZ*(3)] - w_*rho_x_;\n    fpdtype_t w_y_ = 
gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(1) + SOA_SZ*(3)] - 
w_*rho_y_;\n    fpdtype_t w_z_ = gradul_v[gradul_vix[X_IDX] + 
gradul_vrstri[X_IDX]*(2) + SOA_SZ*(3)] - w_*rho_z_;\n\n    fpdtype_t E_x_ = 
gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(0) + SOA_SZ*(4)];\n    
fpdtype_t E_y_ = gradul_v[gradul_vix[X_IDX] + gradul_vrstri[X_IDX]*(1) + 
SOA_SZ*(4)];\n    fpdtype_t E_z_ = gradul_v[gradul_vix[X_IDX] + 
gradul_vrstri[X_IDX]*(2) + SOA_SZ*(4)];\n\n    // Compute the temperature 
and viscosity\n    fpdtype_t cpT_ = 1.4*(rcprho_*E_ - 0.5*(u_*u_ + v_*v_ + 
w_*w_));\n    fpdtype_t Trat_ = 0.4*cpT_;\n    fpdtype_t mu_c_ = 
4.59985915432e-05*Trat_*sqrt(Trat_)\n                   / (cpT_ + 
1.270376356);\n\n    // Compute temperature derivatives 
(c_v*dT/d[x,y,z])\n    fpdtype_t T_x_ = rcprho_*(E_x_ - (rcprho_*rho_x_*E_ 
+ u_*u_x_ + v_*v_x_ + w_*w_x_));\n    fpdtype_t T_y_ = rcprho_*(E_y_ - 
(rcprho_*rho_y_*E_ + u_*u_y_ + v_*v_y_ + w_*w_y_));\n    fpdtype_t T_z_ = 
rcprho_*(E_z_ - (rcprho_*rho_z_*E_ + u_*u_z_ + v_*v_z_ + w_*w_z_));\n\n    
// Negated stress tensor elements\n    fpdtype_t t_xx_ = 
-2*mu_c_*rcprho_*(u_x_ - 0.3333333333333333*(u_x_ + v_y_ + w_z_));\n    
fpdtype_t t_yy_ = -2*mu_c_*rcprho_*(v_y_ - 0.3333333333333333*(u_x_ + v_y_ 
+ w_z_));\n    fpdtype_t t_zz_ = -2*mu_c_*rcprho_*(w_z_ - 
0.3333333333333333*(u_x_ + v_y_ + w_z_));\n    fpdtype_t t_xy_ = 
-mu_c_*rcprho_*(v_x_ + u_y_);\n    fpdtype_t t_xz_ = -mu_c_*rcprho_*(u_z_ + 
w_x_);\n    fpdtype_t t_yz_ = -mu_c_*rcprho_*(w_y_ + v_z_);\n\n    
fvl[0][1] += t_xx_;     fvl[1][1] += t_xy_;     fvl[2][1] += t_xz_;\n    
fvl[0][2] += t_xy_;     fvl[1][2] += t_yy_;     fvl[2][2] += t_yz_;\n    
fvl[0][3] += t_xz_;     fvl[1][3] += t_yz_;     fvl[2][3] += t_zz_;\n\n    
fvl[0][4] += u_*t_xx_ + v_*t_xy_ + w_*t_xz_ + 
-mu_c_*1.9444444444444444*T_x_;\n    fvl[1][4] += u_*t_xy_ + v_*t_yy_ + 
w_*t_yz_ + -mu_c_*1.9444444444444444*T_y_;\n    fvl[2][4] += u_*t_xz_ + 
v_*t_yz_ + w_*t_zz_ + -mu_c_*1.9444444444444444*T_z_;\n\n};\n    
{\n\n\n};\n\n\n    fvcomm = nl_v[ldnl*(0) + X_IDX]*fvl[0][0] + 
nl_v[ldnl*(1) + X_IDX]*fvl[1][0] + nl_v[ldnl*(2) + X_IDX]*fvl[2][0];\n    
fvcomm += 0.1*(ul_v[ul_vix[X_IDX] + SOA_SZ*(0)] - ur_v[_nx*(0) + 
X_IDX]);\n\n    ul_v[ul_vix[X_IDX] + SOA_SZ*(0)] = 
magnl_v[X_IDX]*(ficomm[0] + fvcomm);\n    fvcomm = nl_v[ldnl*(0) + 
X_IDX]*fvl[0][1] + nl_v[ldnl*(1) + X_IDX]*fvl[1][1] + nl_v[ldnl*(2) + 
X_IDX]*fvl[2][1];\n    fvcomm += 0.1*(ul_v[ul_vix[X_IDX] + SOA_SZ*(1)] - 
ur_v[_nx*(1) + X_IDX]);\n\n    ul_v[ul_vix[X_IDX] + SOA_SZ*(1)] = 
magnl_v[X_IDX]*(ficomm[1] + fvcomm);\n    fvcomm = nl_v[ldnl*(0) + 
X_IDX]*fvl[0][2] + nl_v[ldnl*(1) + X_IDX]*fvl[1][2] + nl_v[ldnl*(2) + 
X_IDX]*fvl[2][2];\n    fvcomm += 0.1*(ul_v[ul_vix[X_IDX] + SOA_SZ*(2)] - 
ur_v[_nx*(2) + X_IDX]);\n\n    ul_v[ul_vix[X_IDX] + SOA_SZ*(2)] = 
magnl_v[X_IDX]*(ficomm[2] + fvcomm);\n    fvcomm = nl_v[ldnl*(0) + 
X_IDX]*fvl[0][3] + nl_v[ldnl*(1) + X_IDX]*fvl[1][3] + nl_v[ldnl*(2) + 
X_IDX]*fvl[2][3];\n    fvcomm += 0.1*(ul_v[ul_vix[X_IDX] + SOA_SZ*(3)] - 
ur_v[_nx*(3) + X_IDX]);\n\n    ul_v[ul_vix[X_IDX] + SOA_SZ*(3)] = 
magnl_v[X_IDX]*(ficomm[3] + fvcomm);\n    fvcomm = nl_v[ldnl*(0) + 
X_IDX]*fvl[0][4] + nl_v[ldnl*(1) + X_IDX]*fvl[1][4] + nl_v[ldnl*(2) + 
X_IDX]*fvl[2][4];\n    fvcomm += 0.1*(ul_v[ul_vix[X_IDX] + SOA_SZ*(4)] - 
ur_v[_nx*(4) + X_IDX]);\n\n    ul_v[ul_vix[X_IDX] + SOA_SZ*(4)] = 
magnl_v[X_IDX]*(ficomm[4] + fvcomm);\n\n                   }\n              
     #undef X_IDX\n                   #undef X_IDX_AOSOA\n              
 
}\n\nq\x01]q\x02(cnumpy\nint32\nq\x03cnumpy\nint64\nq\x04h\x04h\x04h\x04h\x04h\x03h\x04h\x04h\x04e\x87q\x05.',
 
b'\x80\x03}q\x00.')

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/bin/pyfr", line 11, in 
<module>
    sys.exit(main())
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/__main__.py",
 
line 112, in main
    args.process(args)
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/__main__.py",
 
line 263, in process_restart
    _process_common(args, mesh, soln, cfg)
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/__main__.py",
 
line 226, in _process_common
    solver = get_solver(backend, rallocs, mesh, soln, cfg)
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/solvers/__init__.py",
 
line 16, in get_solver
    return get_integrator(backend, systemcls, rallocs, mesh, initsoln, cfg)
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/__init__.py",
 
line 36, in get_integrator
    return integrator(backend, systemcls, rallocs, mesh, initsoln, cfg)
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/std/controllers.py",
 
line 80, in __init__
    super().__init__(*args, **kwargs)
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/std/controllers.py",
 
line 14, in __init__
    super().__init__(*args, **kwargs)
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/std/steppers.py",
 
line 159, in __init__
    super().__init__(*args, **kwargs)
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/integrators/std/base.py",
 
line 19, in __init__
    nregs=self.nregs, cfg=cfg)
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/solvers/base/system.py",
 
line 64, in __init__
    self._gen_kernels(eles, int_inters, mpi_inters, bc_inters)
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/solvers/base/system.py",
 
line 193, in _gen_kernels
    kernels[pn, kn].append(kgetter())
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/solvers/navstokes/inters.py",
 
line 69, in <lambda>
    magnl=self._mag_pnorm_lhs, nl=self._norm_pnorm_lhs
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/backends/base/backend.py",
 
line 163, in kernel
    return kern(*args, **kwargs)
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/backends/base/kernels.py",
 
line 162, in kernel_meth
    fun = self._build_kernel(name, src, list(it.chain(*argt)))
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/util.py",
 
line 35, in __call__
    res = cache[key] = self.func(*args, **kwargs)
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pyfr/backends/cuda/provider.py",
 
line 20, in _build_kernel
    fun = compiler.SourceModule(src).get_function(name)
  File 
"/rwthfs/rz/cluster/home/am073751/PyFR/ENV4/lib/python3.7/site-packages/pycuda/compiler.py",
 
line 294, in __init__
    self.module = module_from_buffer(cubin)
pycuda._driver.LogicError: cuModuleLoadDataEx failed: device kernel image 
is invalid - error   : Binary format for key='0', ident='' is not recognized
application called MPI_Abort(MPI_COMM_WORLD, 1) - process 7

 The same case works just fine with OpenMP backend. I was wondering if you 
could help me with the error.


Regards 
Amir

-- 
You received this message because you are subscribed to the Google Groups "PyFR 
Mailing List" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
To view this discussion on the web, visit 
https://groups.google.com/d/msgid/pyfrmailinglist/67674790-5689-4ab7-a1b4-6314759bc956n%40googlegroups.com.

Reply via email to