I am getting this error on using simple dense layer model or CNN model 
using keras. Please help

GpuArrayException: ('The following error happened while compiling the 
node', GpuAdvancedIncSubtensor1_dev20{inplace=True, 
set_instead_of_inc=False}(GpuAlloc<None>{memset_0=True}.0, GpuReshape{2}.0, 
GpuReshape{1}.0), '\n', u'CUDA kernel compile failure ::\n0001\t#define 
local_barrier() __syncthreads()\n0002\t#define WITHIN_KERNEL extern "C" 
__device__\n0003\t#define KERNEL extern "C" __global__\n0004\t#define 
GLOBAL_MEM /* empty */\n0005\t#define LOCAL_MEM __shared__\n0006\t#define 
LOCAL_MEM_ARG /* empty */\n0007\t#ifdef NAN\n0008\t#undef 
NAN\n0009\t#endif\n0010\t#define NAN 
__int_as_float(0x7fffffff)\n0011\t#ifdef INFINITY\n0012\t#undef 
INFINITY\n0013\t#endif\n0014\t#define INFINITY 
__int_as_float(0x7f800000)\n0015\t#define LID_0 threadIdx.x\n0016\t#define 
LID_1 threadIdx.y\n0017\t#define LID_2 threadIdx.z\n0018\t#define LDIM_0 
blockDim.x\n0019\t#define LDIM_1 blockDim.y\n0020\t#define LDIM_2 
blockDim.z\n0021\t#define GID_0 blockIdx.x\n0022\t#define GID_1 
blockIdx.y\n0023\t#define GID_2 blockIdx.z\n0024\t#define GDIM_0 
gridDim.x\n0025\t#define GDIM_1 gridDim.y\n0026\t#define GDIM_2 
gridDim.z\n0027\t#define ga_bool unsigned char\n0028\t#define ga_byte 
signed char\n0029\t#define ga_ubyte unsigned char\n0030\t#define ga_short 
short\n0031\t#define ga_ushort unsigned short\n0032\t#define ga_int 
int\n0033\t#define ga_uint unsigned int\n0034\t#define ga_long long 
long\n0035\t#define ga_ulong unsigned long long\n0036\t#define ga_float 
float\n0037\t#define ga_double double\n0038\t#define ga_half 
ga_ushort\n0039\t#define ga_size size_t\n0040\t#define ga_ssize 
ptrdiff_t\n0041\t#define load_half(p) __half2float(*(p))\n0042\t#define 
store_half(p, v) (*(p) = __float2half_rn(v))\n0043\t#define 
GA_DECL_SHARED_PARAM(type, name)\n0044\t#define GA_DECL_SHARED_BODY(type, 
name) extern __shared__ type name[];\n0045\t#define GA_WARP_SIZE 
warpSize\n0046\t#line 1\n0047\t\n0048\t/*\n0049\t * This is an atomicAdd 
that works for doubles since that is not provided\n0050\t * natively by 
cuda.\n0051\t */\n0052\t__device__ ga_double atomicAdd(ga_double* address, 
ga_double val) {\n0053\t    unsigned long long int* address_as_ull 
=\n0054\t                                          (unsigned long long 
int*)address;\n0055\t    unsigned long long int old = *address_as_ull, 
assumed;\n0056\t    do {\n0057\t        assumed = old;\n0058\t        old = 
atomicCAS(address_as_ull, assumed,\n0059\t                        
__double_as_longlong(val +\n0060\t                        
__longlong_as_double(assumed)));\n0061\t    } while (assumed != 
old);\n0062\t    return 
__longlong_as_double(old);\n0063\t}\n0064\t\n0065\t__device__ ga_double 
atomicExch(ga_double *address, ga_double val) {\n0066\t    return 
atomicExch((unsigned long long int *)address,\n0067\t                      
__double_as_longlong(val));\n0068\t}\n0069\t\n0070\t/*\n0071\t * This is a 
version of atomicAdd that works for half-floats.  It may\n0072\t * read and 
write 2 bytes more than the size of the array if the array\n0073\t * has an 
uneven number of elements.  The actual value at that spot\n0074\t * will 
not be modified.\n0075\t */\n0076\t\n0077\t__device__ ga_half 
atomicAdd(ga_half *addr, ga_half val) {\n0078\t  ga_uint *base = (ga_uint 
*)((ga_size)addr & ~2);\n0079\t  ga_uint old, assumed, sum, new_;\n0080\t  
old = *base;\n0081\t  do {\n0082\t    assumed = old;\n0083\t    sum = 
__float2half_rn(\n0084\t      __half2float(val) +\n0085\t      
__half2float((ga_half)__byte_perm(old, 0,\n0086\t                    
 ((ga_size)addr & 2) ? 0x4432 : 0x4410)));\n0087\t    new_ = 
__byte_perm(old, sum, ((ga_size)addr & 2) ? 0x5410 : 0x3254);\n0088\t    
old = atomicCAS(base, assumed, new_);\n0089\t  } while (assumed != 
old);\n0090\t  return (ga_half)__byte_perm(old, 0,\n0091\t                  
                ((ga_size)addr & 2) ? 0x4432 : 
0x4410);\n0092\t}\n0093\t\n0094\t__device__ ga_half atomicExch(ga_half 
*addr, ga_half val) {\n0095\t  ga_uint *base = (ga_uint *)((ga_size)addr & 
~2);\n0096\t  ga_uint old, assumed, new_;\n0097\t  old = *base;\n0098\t  do 
{\n0099\t    assumed = old;\n0100\t    new_ = __byte_perm(old, val, 
((ga_size)addr & 2) ? 0x5410 : 0x3254);\n0101\t    old = atomicCAS(base, 
assumed, new_);\n0102\t  } while (assumed != old);\n0103\t  return 
(ga_half)__byte_perm(old, 0,\n0104\t                                  
((ga_size)addr & 2) ? 0x4432 : 0x4410);\n0105\t}\n0106\t\n0107\t        
KERNEL void k_vector_add_fast(const ga_size numRowsX,\n0108\t              
                        const ga_size numColsX,\n0109\t                    
                  const ga_ssize stridesX0,\n0110\t                        
              const ga_ssize stridesX1,\n0111\t                            
          ga_float *X,\n0112\t                                      const 
ga_size offset_X,\n0113\t                                      const 
ga_size numRowsY,\n0114\t                                      const 
ga_size numColsY,\n0115\t                                      const 
ga_ssize stridesY0,\n0116\t                                      const 
ga_ssize stridesY1,\n0117\t                                      ga_float 
*Y,\n0118\t                                      const ga_size 
offset_Y,\n0119\t                                      const ga_size 
numIndices,\n0120\t                                      const ga_ssize 
stridesIndices,\n0121\t                                      ga_int 
*indices_arr,\n0122\t                                      const ga_size 
offset_indices_arr,\n0123\t                                      const int 
set_instead_of_inc,\n0124\t                                      ga_int 
*err)\n0125\t        {\n0126\t             X = (ga_float *)(((char 
*)X)+offset_X);\n0127\t             Y = (ga_float *)(((char 
*)Y)+offset_Y);\n0128\t             indices_arr = (ga_int *)(((char 
*)indices_arr)+offset_indices_arr);\n0129\t             for (int i = 
(blockIdx.x); i < numIndices; i += gridDim.x)\n0130\t            
 {\n0131\t                  for(int j = (threadIdx.x); j < numColsX;j += 
blockDim.x)\n0132\t                  {\n0133\t                      
ga_ssize x_row = indices_arr[i * stridesIndices];\n0134\t                  
    if (x_row < 0)\n0135\t                          x_row += 
numRowsX;\n0136\t                      ga_ssize y_row = i;\n0137\t          
            if (x_row < numRowsX && x_row >= 0) {\n0138\t                  
      if (set_instead_of_inc) {\n0139\t                          
atomicExch(&X[(x_row * stridesX0) + (j * stridesX1)],\n0140\t              
                     Y[(y_row * stridesY0) + (j * stridesY1)]);\n0141\t    
                    } else {\n0142\t                          
atomicAdd(&X[(x_row * stridesX0) + (j * stridesX1)],\n0143\t                
                    Y[(y_row * stridesY0) + (j * stridesY1)]);\n0144\t      
                  }\n0145\t                      } else {\n0146\t          
              *err = 1;\n0147\t                      }\n0148\t              
    }\n0149\t             }\n0150\t             return;\n0151\t        
}\n0152\t        \n\nCompile log:\nNVRTC compile 
log::\ndefault_program(38): error: identifier "__half2float" is 
undefined\n\ndefault_program(37): error: identifier "__float2half_rn" is 
undefined\n\n2 errors detected in the compilation of 
"default_program".\n\n')

-- 

--- 
You received this message because you are subscribed to the Google Groups 
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Reply via email to