Re: [theano-users] Running code with CuDNN fails during compilation (mod.cpp) (Windows)

D. Kit Thu, 22 Sep 2016 16:28:42 -0700

Pascal,

Thank you. The FAIL definition seems to be the only thing preventing the 
compilation of that file. I updated the _lquote_macro function (in op.py) 
to output everything as a single line:
    def _lquote_macro(self, txt):
        res = []
        spl = txt.split('\n')
        for l in spl[:-1]:
            res.append(l)
        res.append(spl[-1])
        return ' '.join(res)


Everything compiled with this change.

--Dmitry


On Thursday, September 22, 2016 at 7:40:15 PM UTC+1, Pascal Lamblin wrote:
>
> Can you let us know which compiler (and which version) you are using? 
>
> On Thu, Sep 22, 2016, D. Kit wrote: 
> > Hello, 
> > 
> > I'm attempting to use conv3D2D in Theano, but when I get to the function 
> > compilation stage it fails with the following errors: 
> > 
> > mod.cpp:684:1: error: expected unqualified-id before '{' token 
> > 
> > mod.cpp:1345:1: error: expected '}' at end of input 
> > mod.cpp: In destructor '{anonymous}::__struct_compiled_op_d7db26d1cc9f 
> > 
> > mod.cpp:573:21: error: 'cleanup' was not declared in this scope 
> > mod.cpp: At global scope: 
> > mod.cpp:678:1: error: expected unqualified-id at end of input 
> >  #undef ITEMSIZE_INPUT_2 
> >  ^ 
> > mod.cpp:678:1: error: expected '}' at end of input 
> > 
> > I'm attaching the mod.cpp file that was generated by cc.py, op.py, 
> dnn.py, 
> > etc., The initialization code from dnn_base.c starts at line 684, but 
> there 
> > is no function name defined, so the code sits outside any useful code 
> > block. The "#section cleanup_code_struct" code section also appears to 
> have 
> > been inserted into the wrong spot. 
> > 
> > Can someone tell me how to fix this? 
> > 
> > --Dmitry 
> > 
> > -- 
> > 
> > --- 
> > You received this message because you are subscribed to the Google 
> Groups "theano-users" group. 
> > To unsubscribe from this group and stop receiving emails from it, send 
> an email to [email protected] <javascript:>. 
> > For more options, visit https://groups.google.com/d/optout. 
>
> > #include <Python.h> 
> > #include <iostream> 
> > #include "theano_mod_helper.h" 
> > #include <gpuarray/array.h> 
> > #include <gpuarray/kernel.h> 
> > #include <gpuarray/error.h> 
> > #include <gpuarray/buffer.h> 
> > #include <gpuarray/buffer_blas.h> 
> > #include <numpy/arrayobject.h> 
> > #include <gpuarray_api.h> 
> > #include <math.h> 
> > #include <numpy/arrayscalars.h> 
> > #include "cudnn.h" 
> > #include "cudnn_helper.h" 
> > #include "gpuarray_helper.h" 
> > #include "gpuarray/types.h" 
> > #include "gpuarray/array.h" 
> > #include "gpuarray/util.h" 
> > #include "gpuarray/ext_cuda.h" 
> > #include "gpuarray_api.h" 
> > #include "numpy_compat.h" 
> > ////////////////////// 
> > ////  Support Code 
> > ////////////////////// 
> > 
> > void _capsule_destructor(PyObject *o) { 
> >     void *d = PyCapsule_GetContext(o); 
> >     void *p = PyCapsule_GetPointer(o, NULL); 
> >     void (*f)(void *) = (void (*)(void *))d; 
> >     if (f != NULL) f(p); 
> > } 
> > 
> > 
> > 
> > 
> > static int 
> > c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) { 
> >   cudnnDataType_t dt; 
> >   size_t ds; 
> >   switch (var->ga.typecode) { 
> >   case GA_FLOAT: 
> >     dt = CUDNN_DATA_FLOAT; 
> >     break; 
> >   case GA_DOUBLE: 
> >     dt = CUDNN_DATA_DOUBLE; 
> >     break; 
> >   case GA_HALF: 
> >     dt = CUDNN_DATA_HALF; 
> >     break; 
> >   default: 
> >     PyErr_SetString(PyExc_TypeError, "Non-float datatype in 
> c_set_tensorNd"); 
> >     return -1; 
> >   } 
> >   ds = gpuarray_get_elsize(var->ga.typecode); 
> > 
> >   int strs[8], dims[8], default_stride = 1; 
> >   unsigned int nd = PyGpuArray_NDIM(var); 
> > 
> >   if (nd > 8) { 
> >     PyErr_SetString(PyExc_TypeError, "Tensor of more than 8d"); 
> >     return -1; 
> >   } 
> > 
> >   for (unsigned int _i = nd; _i > 0; _i--) { 
> >     unsigned int i = _i - 1; 
> >     strs[i] = PyGpuArray_STRIDE(var, i) ? 
> >       PyGpuArray_STRIDE(var, i)/ds : default_stride; 
> >     default_stride *= PyGpuArray_DIM(var, i); 
> >     dims[i] = PyGpuArray_DIM(var, i); 
> >   } 
> > 
> >   /* Tensors can't be smaller than 3d for cudnn so we pad the 
> >    * descriptor if they are */ 
> >   for (unsigned int i = nd; i < 3; i++) { 
> >     strs[i] = 1; 
> >     dims[i] = 1; 
> >   } 
> > 
> >   cudnnStatus_t err = cudnnSetTensorNdDescriptor(desc, dt, nd < 3 ? 3 : 
> nd, 
> >                                                  dims, strs); 
> >   if (err != CUDNN_STATUS_SUCCESS) { 
> >     PyErr_Format(PyExc_RuntimeError, 
> >                  "Could not set tensorNd descriptor: %s", 
> >                  cudnnGetErrorString(err)); 
> >     return -1; 
> >   } 
> >   return 0; 
> > } 
> > 
> > static int c_make_tensorNd(PyGpuArrayObject *var, 
> cudnnTensorDescriptor_t *desc) { 
> >   cudnnStatus_t err; 
> >   err = cudnnCreateTensorDescriptor(desc); 
> >   if (err != CUDNN_STATUS_SUCCESS) { 
> >     PyErr_Format(PyExc_RuntimeError, 
> >                  "Could not create tensor descriptor: %s", 
> >                  cudnnGetErrorString(err)); 
> >     return -1; 
> >   } 
> >   if (c_set_tensorNd(var, *desc) != 0) { 
> >     cudnnDestroyTensorDescriptor(*desc); 
> >     return -1; 
> >   } 
> >   return 0; 
> > } 
> > 
> > static int 
> > c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { 
> >   cudnnDataType_t dt; 
> >   cudnnStatus_t err; 
> > 
> >   if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) { 
> >     PyErr_SetString(PyExc_ValueError, 
> >                     "Only contiguous filters (kernels) are supported."); 
> >     return -1; 
> >   } 
> >   switch (var->ga.typecode) { 
> >   case GA_FLOAT: 
> >     dt = CUDNN_DATA_FLOAT; 
> >     break; 
> >   case GA_DOUBLE: 
> >     dt = CUDNN_DATA_DOUBLE; 
> >     break; 
> >   case GA_HALF: 
> >     dt = CUDNN_DATA_HALF; 
> >     break; 
> >   default: 
> >     PyErr_SetString(PyExc_TypeError, "Non-float datatype in 
> c_set_filter"); 
> >     return -1; 
> >   } 
> > 
> >   int dims[8]; 
> >   unsigned int nd = PyGpuArray_NDIM(var); 
> > 
> >   if (nd > 8) { 
> >     PyErr_SetString(PyExc_TypeError, "Tensor of more than 8d"); 
> >     return -1; 
> >   } 
> > 
> >   for (unsigned int _i = nd; _i > 0; _i--) { 
> >     unsigned int i = _i - 1; 
> >     dims[i] = PyGpuArray_DIM(var, i); 
> >   } 
> > 
> >   /* Filters can't be less than 3d so we pad */ 
> >   for (unsigned int i = nd; i < 3; i++) 
> >     dims[i] = 1; 
> > 
> >   if (nd < 3) 
> >     nd = 3; 
> > 
> > #if CUDNN_VERSION >= 5000 
> >     err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, 
> dims); 
> > #else 
> >     err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims); 
> > #endif 
> > 
> >   if (err != CUDNN_STATUS_SUCCESS) { 
> >     PyErr_Format(PyExc_RuntimeError, 
> >                  "Could not set filter descriptor: %s.", 
> >                  cudnnGetErrorString(err)); 
> >     return -1; 
> >   } 
> >   return 0; 
> > } 
> > 
> > static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t 
> *desc) { 
> >   cudnnStatus_t err; 
> >   err = cudnnCreateFilterDescriptor(desc); 
> >   if (err != CUDNN_STATUS_SUCCESS) { 
> >     PyErr_Format(PyExc_RuntimeError, 
> >                  "Could not create tensor descriptor: %s", 
> >                  cudnnGetErrorString(err)); 
> >     return -1; 
> >   } 
> >   if (c_set_filter(var, *desc) != 0) { 
> >     cudnnDestroyFilterDescriptor(*desc); 
> >     return -1; 
> >   } 
> >   return 0; 
> > } 
> > 
> > 
> > 
> >     namespace { 
> >     struct __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577 { 
> >         PyObject* __ERROR; 
> > 
> >         PyObject* storage_V3; 
> > PyObject* storage_V5; 
> > PyObject* storage_V7; 
> > PyObject* storage_V9; 
> > PyObject* storage_V11; 
> > PyObject* storage_V13; 
> > PyObject* storage_V1; 
> > PyObject* storage_V15; 
> >         
> >     PyObject* py_V15; 
> >     PyGpuContextObject *V15; 
> > 
> > #define DTYPE_INPUT_0 npy_float32 
> > 
> > #define TYPENUM_INPUT_0 11 
> > 
> > #define ITEMSIZE_INPUT_0 4 
> > 
> > #define DTYPE_INPUT_1 npy_float32 
> > 
> > #define TYPENUM_INPUT_1 11 
> > 
> > #define ITEMSIZE_INPUT_1 4 
> > 
> > #define DTYPE_INPUT_2 npy_float32 
> > 
> > #define TYPENUM_INPUT_2 11 
> > 
> > #define ITEMSIZE_INPUT_2 4 
> > 
> > #define DTYPE_INPUT_4 npy_float32 
> > 
> > #define TYPENUM_INPUT_4 11 
> > 
> > #define ITEMSIZE_INPUT_4 4 
> > 
> > #define DTYPE_INPUT_5 npy_float32 
> > 
> > #define TYPENUM_INPUT_5 11 
> > 
> > #define ITEMSIZE_INPUT_5 4 
> > 
> > #define DTYPE_OUTPUT_0 npy_float32 
> > 
> > #define TYPENUM_OUTPUT_0 11 
> > 
> > #define ITEMSIZE_OUTPUT_0 4 
> > 
> > #define APPLY_SPECIFIC(str) 
> str##_node_d7db26d1cc9fb59ee56289f4dda72577_0 
> > 
> > #define CONV_INPLACE 1 
> > 
> > #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM 
> > 
> > 
> > 
> > PyGpuContextObject *ctx; 
> > cudnnHandle_t APPLY_SPECIFIC(_handle); 
> > 
> > 
> > cudnnTensorDescriptor_t APPLY_SPECIFIC(input); 
> > cudnnTensorDescriptor_t APPLY_SPECIFIC(output); 
> > cudnnFilterDescriptor_t APPLY_SPECIFIC(kerns); 
> > 
> > 
> > 
> > #ifdef CHOOSE_ALGO 
> > int reuse_algo; 
> > cudnnConvolutionFwdAlgo_t prev_algo; 
> > #ifndef CHOOSE_ONCE 
> > size_t prev_img_dims[5]; 
> > size_t prev_kern_dims[5]; 
> > #endif 
> > #endif 
> > 
> > int 
> > APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject 
> *kerns, 
> >                          PyGpuArrayObject *om, 
> >                          cudnnConvolutionDescriptor_t desc, 
> >                          double alpha, double beta, 
> >                          PyGpuArrayObject **output, 
> >                          PyGpuContextObject *c) { 
> >   cudnnStatus_t err = CUDNN_STATUS_SUCCESS; 
> >   float af = alpha, bf = beta; 
> >   void *alpha_p; 
> >   void *beta_p; 
> > 
> >   if (PyGpuArray_DIMS(input)[1] != PyGpuArray_DIMS(kerns)[1]) { 
> >     PyErr_SetString(PyExc_ValueError, 
> >                     "images and kernel must have the same stack size"); 
> >     return 1; 
> >   } 
> > 
> >   if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1) 
> >     return 1; 
> >   if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) 
> >     return 1; 
> > 
> >   switch (input->ga.typecode) { 
> >   case GA_DOUBLE: 
> >     alpha_p = (void *)&alpha; 
> >     beta_p = (void *)&beta; 
> >     break; 
> >   case GA_FLOAT: 
> >   case GA_HALF: 
> >     alpha_p = (void *)&af; 
> >     beta_p = (void *)&bf; 
> >     break; 
> >   default: 
> >     PyErr_SetString(PyExc_TypeError, "Unsupported type in convolution"); 
> >     return 1; 
> >   } 
> > 
> > #ifdef CONV_INPLACE 
> >   Py_XDECREF(*output); 
> >   *output = om; 
> >   Py_INCREF(*output); 
> > #else 
> >   if (theano_prep_output(output, PyGpuArray_NDIM(om), 
> PyGpuArray_DIMS(om), 
> >                          om->ga.typecode, GA_C_ORDER, c) != 0) 
> >     return 1; 
> >   if (beta != 0.0 && pygpu_move(*output, om)) 
> >     return 1; 
> > #endif 
> > 
> >   if (c_set_tensorNd(*output, APPLY_SPECIFIC(output)) == -1) 
> >     return 1; 
> > 
> >   cudnnConvolutionFwdAlgo_t algo = CONV_ALGO; 
> > 
> >   cuda_enter(c->ctx); 
> > #ifdef CHOOSE_ALGO 
> > #ifndef CHOOSE_ONCE 
> >   reuse_algo = 1; 
> >   for (unsigned int i = 0; i < PyGpuArray_NDIM(input); i++) { 
> >     reuse_algo = (reuse_algo && 
> >                   PyGpuArray_DIM(input, i) == prev_img_dims[i]); 
> >     reuse_algo = (reuse_algo && 
> >                   PyGpuArray_DIM(kerns, i) == prev_kern_dims[i]); 
> >   } 
> > #endif 
> > 
> >   if (!reuse_algo) { 
> > #ifdef CHOOSE_TIME 
> >     int count; 
> >     cudnnConvolutionFwdAlgoPerf_t choice; 
> >     err = cudnnFindConvolutionForwardAlgorithm( 
> >       APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), 
> APPLY_SPECIFIC(kerns), 
> >       desc, APPLY_SPECIFIC(output), 1, &count, &choice); 
> > 
> >     if (err != CUDNN_STATUS_SUCCESS) { 
> >       PyErr_Format(PyExc_RuntimeError, 
> >                    "error selecting convolution algo: %s", 
> >                    cudnnGetErrorString(err)); 
> >       cuda_exit(c->ctx); 
> >       return 1; 
> >     } 
> >     algo = choice.algo; 
> > #else 
> >     size_t free; 
> >     int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, 
> &free); 
> > 
> >     if (err2 != GA_NO_ERROR) { 
> >       PyErr_Format(PyExc_RuntimeError, "Error when trying to find the " 
> >                    "memory information on the GPU"); 
> >       cuda_exit(c->ctx); 
> >       return 1; 
> >     } 
> > 
> >     err = cudnnGetConvolutionForwardAlgorithm( 
> >       APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), 
> APPLY_SPECIFIC(kerns), 
> >       desc, APPLY_SPECIFIC(output), 
> >       CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, free, &algo); 
> >     if (err != CUDNN_STATUS_SUCCESS) { 
> >       PyErr_Format(PyExc_RuntimeError, 
> >                    "error selecting convolution algo: %s", 
> >                    cudnnGetErrorString(err)); 
> >       cuda_exit(c->ctx); 
> >       return 1; 
> >     } 
> > #endif 
> >     prev_algo = algo; 
> >   } else { 
> >     algo = prev_algo; 
> >   } 
> > 
> > #ifdef CHOOSE_ONCE 
> >   reuse_algo = 1; 
> > #else 
> >   for (unsigned int i = 0; i < PyGpuArray_NDIM(input); i++) { 
> >     prev_img_dims[i] = PyGpuArray_DIM(input, i); 
> >     prev_kern_dims[i] = PyGpuArray_DIM(kerns, i); 
> >   } 
> > #endif 
> > 
> > #endif 
> > 
> >   /* These two algos are not supported for 3d conv */ 
> >   if (PyGpuArray_NDIM(input) == 5 && 
> >       (algo == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM || 
> >        algo == CUDNN_CONVOLUTION_FWD_ALGO_GEMM)) 
> >     algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; 
> > 
> >   // The FFT implementation does not support strides, 1x1 filters or 
> inputs 
> >   // with a spatial dimension larger than 1024. The tiled-FFT 
> implementation 
> >   // does not support strides. 
> >   // If the chosen implementation is FFT or tiled-FFT, validate that it 
> can 
> >   // be used on the current data and default to a safe implementation if 
> it 
> >   // can't. 
> >   // The following code is 2d-specific but it is fine as FFT and 
> tiled-FFT are 
> >   // defined only for 2d filters 
> >   if ((algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT || 
> >        algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) && 
> PyGpuArray_NDIM(input) == 4) { 
> > 
> >     // Extract the properties of the convolution descriptor 
> >     int nd; 
> >     int pad[2]; 
> >     int stride[2]; 
> >     int upscale[2]; 
> >     cudnnConvolutionMode_t mode; 
> >     cudnnDataType_t data_type; 
> >     err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride, 
> >                                              upscale, &mode, 
> &data_type); 
> >     if (err != CUDNN_STATUS_SUCCESS) { 
> >       PyErr_Format(PyExc_RuntimeError, 
> >                    "error getting convolution properties: %s", 
> >                    cudnnGetErrorString(err)); 
> >       cuda_exit(c->ctx); 
> >       return 1; 
> >     } 
> > 
> >     if (algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT) 
> >     { 
> >       if (stride[0] != 1 || stride[1] != 1 || 
> >           PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 
> 1024 || 
> >           (PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 
> 1)) 
> >       { 
> >         algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; 
> >       } 
> >     } 
> >     else 
> >     { 
> >       // algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING 
> >       if (stride[0] != 1 || stride[1] != 1) 
> >       { 
> >         algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; 
> >       } 
> >     } 
> >   } 
> > 
> >   { 
> >     size_t worksize; 
> >     gpudata *workspace; 
> >     err = 
> cudnnGetConvolutionForwardWorkspaceSize(APPLY_SPECIFIC(_handle), 
> >                                                   APPLY_SPECIFIC(input), 
> >                                                   APPLY_SPECIFIC(kerns), 
> >                                                   desc, 
> >                                                   
> APPLY_SPECIFIC(output), 
> >                                                   algo, 
> >                                                   &worksize); 
> > 
> >     if (err == CUDNN_STATUS_NOT_SUPPORTED) { 
> >       // Fallback to none algo if not supported 
> >       // TODO: Print a warning 
> >       algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; 
> > 
> >       err = 
> cudnnGetConvolutionForwardWorkspaceSize(APPLY_SPECIFIC(_handle), 
> >                                                     
> APPLY_SPECIFIC(input), 
> >                                                     
> APPLY_SPECIFIC(kerns), 
> >                                                     desc, 
> >                                                     
> APPLY_SPECIFIC(output), 
> >                                                     algo, 
> >                                                     &worksize); 
> >     } 
> > 
> >     if (err != CUDNN_STATUS_SUCCESS) { 
> >       PyErr_Format(PyExc_RuntimeError, 
> >                    "error getting worksize: %s", 
> >                    cudnnGetErrorString(err)); 
> >       cuda_exit(c->ctx); 
> >       return 1; 
> >     } 
> > 
> >     /* 
> >      * This is less than ideal since we need to free it after (which 
> >      * introduces a synchronization point. But we don't have a module 
> >      * to place a nice get_work_mem() function in. 
> >      */ 
> >     if (worksize != 0) { 
> >       workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL); 
> >       if (workspace == NULL) { 
> >         PyErr_SetString(PyExc_RuntimeError, 
> >                         "Could not allocate working memory"); 
> >         cuda_exit(c->ctx); 
> >         return 1; 
> >       } 
> >     } 
> > 
> >     cuda_wait(input->ga.data, GPUARRAY_CUDA_WAIT_READ); 
> >     cuda_wait(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ); 
> >     cuda_wait((*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); 
> > 
> >     err = cudnnConvolutionForward( 
> >       APPLY_SPECIFIC(_handle), 
> >       alpha_p, 
> >       APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input), 
> >       APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns), 
> >       desc, algo, 
> >       worksize == 0 ? NULL : *(void **)workspace, worksize, 
> >       beta_p, 
> >       APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*output)); 
> > 
> >     if (worksize != 0) 
> >       gpudata_release(workspace); 
> > 
> >     cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ); 
> >     cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ); 
> >     cuda_record((*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); 
> >   } 
> >   cuda_exit(c->ctx); 
> > 
> >   if (err != CUDNN_STATUS_SUCCESS) { 
> >     PyErr_Format(PyExc_RuntimeError, "error doing operation: %s", 
> >                  cudnnGetErrorString(err)); 
> >     return 1; 
> >   } 
> >   return 0; 
> > } 
> > 
> > 
> > #undef DTYPE_INPUT_0 
> > 
> > #undef TYPENUM_INPUT_0 
> > 
> > #undef ITEMSIZE_INPUT_0 
> > 
> > #undef DTYPE_INPUT_1 
> > 
> > #undef TYPENUM_INPUT_1 
> > 
> > #undef ITEMSIZE_INPUT_1 
> > 
> > #undef DTYPE_INPUT_2 
> > 
> > #undef TYPENUM_INPUT_2 
> > 
> > #undef ITEMSIZE_INPUT_2 
> > 
> > #undef DTYPE_INPUT_4 
> > 
> > #undef TYPENUM_INPUT_4 
> > 
> > #undef ITEMSIZE_INPUT_4 
> > 
> > #undef DTYPE_INPUT_5 
> > 
> > #undef TYPENUM_INPUT_5 
> > 
> > #undef ITEMSIZE_INPUT_5 
> > 
> > #undef DTYPE_OUTPUT_0 
> > 
> > #undef TYPENUM_OUTPUT_0 
> > 
> > #undef ITEMSIZE_OUTPUT_0 
> > 
> > #undef APPLY_SPECIFIC 
> > 
> > #undef CONV_INPLACE 
> > 
> > #undef CONV_ALGO 
> > 
> >         __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577() { 
> >             // This is only somewhat safe because we: 
> >             //  1) Are not a virtual class 
> >             //  2) Do not use any virtual classes in the members 
> >             //  3) Deal with mostly POD and pointers 
> > 
> >             // If this changes, we would have to revise this, but for 
> >             // now I am tired of chasing segfaults because 
> >             // initialization code had an error and some pointer has 
> >             // a junk value. 
> >             memset(this, 0, sizeof(*this)); 
> >         } 
> >         ~__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577(void) { 
> >             cleanup(); 
> >         } 
> > 
> >         int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* 
> storage_V5, PyObject* storage_V7, PyObject* storage_V9, PyObject* 
> storage_V11, PyObject* storage_V13, PyObject* storage_V1, PyObject* 
> storage_V15) { 
> >             Py_XINCREF(storage_V3); 
> > Py_XINCREF(storage_V5); 
> > Py_XINCREF(storage_V7); 
> > Py_XINCREF(storage_V9); 
> > Py_XINCREF(storage_V11); 
> > Py_XINCREF(storage_V13); 
> > Py_XINCREF(storage_V1); 
> > Py_XINCREF(storage_V15); 
> >             this->storage_V3 = storage_V3; 
> > this->storage_V5 = storage_V5; 
> > this->storage_V7 = storage_V7; 
> > this->storage_V9 = storage_V9; 
> > this->storage_V11 = storage_V11; 
> > this->storage_V13 = storage_V13; 
> > this->storage_V1 = storage_V1; 
> > this->storage_V15 = storage_V15; 
> >             
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> > 
> >     py_V15 = PyList_GET_ITEM(storage_V15, 0); 
> >     {Py_XINCREF(py_V15);} 
> >     
> > if (!PyObject_TypeCheck(py_V15, &PyGpuContextType)) { 
> >   PyErr_SetString(PyExc_TypeError, "expected a GpuContext"); 
> >   { 
> >         if (!PyErr_Occurred()) { 
> >             PyErr_SetString(PyExc_RuntimeError, 
> >                 "Unexpected error in an Op's C code. " 
> >                 "No Python exception was set."); 
> >             } 
> >         return 15; 
> > } 
> > } 
> > 
> > V15 = (PyGpuContextObject *)py_V15; 
> > Py_INCREF(V15); 
> > 
> > 
> > 
> > #define DTYPE_INPUT_0 npy_float32 
> > 
> > #define TYPENUM_INPUT_0 11 
> > 
> > #define ITEMSIZE_INPUT_0 4 
> > 
> > #define DTYPE_INPUT_1 npy_float32 
> > 
> > #define TYPENUM_INPUT_1 11 
> > 
> > #define ITEMSIZE_INPUT_1 4 
> > 
> > #define DTYPE_INPUT_2 npy_float32 
> > 
> > #define TYPENUM_INPUT_2 11 
> > 
> > #define ITEMSIZE_INPUT_2 4 
> > 
> > #define DTYPE_INPUT_4 npy_float32 
> > 
> > #define TYPENUM_INPUT_4 11 
> > 
> > #define ITEMSIZE_INPUT_4 4 
> > 
> > #define DTYPE_INPUT_5 npy_float32 
> > 
> > #define TYPENUM_INPUT_5 11 
> > 
> > #define ITEMSIZE_INPUT_5 4 
> > 
> > #define DTYPE_OUTPUT_0 npy_float32 
> > 
> > #define TYPENUM_OUTPUT_0 11 
> > 
> > #define ITEMSIZE_OUTPUT_0 4 
> > 
> > #define APPLY_SPECIFIC(str) 
> str##_node_d7db26d1cc9fb59ee56289f4dda72577_0 
> > 
> > #define CONV_INPLACE 1 
> > 
> > #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM 
> > 
> > #define FAIL { \ 
> > 
> >         if (!PyErr_Occurred()) { \ 
> > 
> >             PyErr_SetString(PyExc_RuntimeError, \ 
> > 
> >                 "Unexpected error in an Op's C code. " \ 
> > 
> >                 "No Python exception was set."); \ 
> > 
> >             } \ 
> > 
> >         return 17; \ 
> > 
> > } 
> > 
> > #define PARAMS V15 
> > 
> > 
> > 
> > { 
> >   // We need to keep a reference here to have it available in the 
> destructor. 
> >   ctx = PARAMS; 
> >   Py_INCREF(ctx); 
> > 
> >   cuda_enter(PARAMS->ctx); 
> >   cudnnStatus_t err; 
> >   APPLY_SPECIFIC(_handle) = NULL; 
> >   if ((err = cudnnCreate(&APPLY_SPECIFIC(_handle))) != 
> CUDNN_STATUS_SUCCESS) { 
> >     PyErr_Format(PyExc_RuntimeError, "could not create cuDNN handle: 
> %s", 
> >                  cudnnGetErrorString(err)); 
> >     cuda_exit(PARAMS->ctx); 
> >     FAIL; 
> >   } 
> >   if ((err = cudnnSetStream(APPLY_SPECIFIC(_handle), 
> >                             cuda_get_stream(PARAMS->ctx))) != 
> CUDNN_STATUS_SUCCESS) { 
> >     PyErr_Format(PyExc_RuntimeError, "Could not set cudnn stream: %s", 
> >                  cudnnGetErrorString(err)); 
> >     cuda_exit(PARAMS->ctx); 
> >     FAIL; 
> >   } 
> >   cuda_exit(PARAMS->ctx); 
> > } 
> > 
> > 
> > 
> > cudnnStatus_t APPLY_SPECIFIC(err); 
> > APPLY_SPECIFIC(input) = NULL; 
> > APPLY_SPECIFIC(output) = NULL; 
> > APPLY_SPECIFIC(kerns) = NULL; 
> > if ((APPLY_SPECIFIC(err) = 
> cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(input))) != 
> CUDNN_STATUS_SUCCESS) { 
> >   PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor 
> " 
> >                "(inp): %s", cudnnGetErrorString(APPLY_SPECIFIC(err))); 
> >   FAIL; 
> > } 
> > if ((APPLY_SPECIFIC(err) = 
> cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output))) != 
> CUDNN_STATUS_SUCCESS) { 
> >   PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor 
> " 
> >                "(out): %s", cudnnGetErrorString(APPLY_SPECIFIC(err))); 
> >   FAIL; 
> > } 
> > if ((APPLY_SPECIFIC(err) = 
> cudnnCreateFilterDescriptor(&APPLY_SPECIFIC(kerns))) != 
> CUDNN_STATUS_SUCCESS) { 
> >   PyErr_Format(PyExc_MemoryError, "could not allocate filter descriptor: 
> %s", 
> >                cudnnGetErrorString(APPLY_SPECIFIC(err))); 
> >   FAIL; 
> > } 
> > 
> > 
> > 
> > #ifdef CHOOSE_ALGO 
> > reuse_algo = 0; 
> > prev_algo = CONV_ALGO; 
> > #ifndef CHOOSE_ONCE 
> > memset(prev_img_dims, 0, sizeof(prev_img_dims)); 
> > memset(prev_kern_dims, 0, sizeof(prev_kern_dims)); 
> > #endif 
> > #endif 
> > 
> > 
> > 
> > #undef FAIL 
> > 
> > #undef PARAMS 
> > 
> > #undef DTYPE_INPUT_0 
> > 
> > #undef TYPENUM_INPUT_0 
> > 
> > #undef ITEMSIZE_INPUT_0 
> > 
> > #undef DTYPE_INPUT_1 
> > 
> > #undef TYPENUM_INPUT_1 
> > 
> > #undef ITEMSIZE_INPUT_1 
> > 
> > #undef DTYPE_INPUT_2 
> > 
> > #undef TYPENUM_INPUT_2 
> > 
> > #undef ITEMSIZE_INPUT_2 
> > 
> > #undef DTYPE_INPUT_4 
> > 
> > #undef TYPENUM_INPUT_4 
> > 
> > #undef ITEMSIZE_INPUT_4 
> > 
> > #undef DTYPE_INPUT_5 
> > 
> > #undef TYPENUM_INPUT_5 
> > 
> > #undef ITEMSIZE_INPUT_5 
> > 
> > #undef DTYPE_OUTPUT_0 
> > 
> > #undef TYPENUM_OUTPUT_0 
> > 
> > #undef ITEMSIZE_OUTPUT_0 
> > 
> > #undef APPLY_SPECIFIC 
> > 
> > #undef CONV_INPLACE 
> > 
> > #undef CONV_ALGO 
> >             this->__ERROR = __ERROR; 
> >             return 0; 
> >         } 
> >         void cleanup(void) { 
> >             __label_1: 
> > 
> > double __DUMMY_1; 
> > __label_3: 
> > 
> > double __DUMMY_3; 
> > __label_5: 
> > 
> > double __DUMMY_5; 
> > __label_7: 
> > 
> > double __DUMMY_7; 
> > __label_9: 
> > 
> > double __DUMMY_9; 
> > __label_11: 
> > 
> > double __DUMMY_11; 
> > __label_13: 
> > 
> > double __DUMMY_13; 
> > __label_15: 
> > Py_XDECREF(V15); V15 = NULL; 
> >     {Py_XDECREF(py_V15);} 
> >     
> > double __DUMMY_15; 
> > __label_18: 
> > 
> > 
> > #define DTYPE_INPUT_0 npy_float32 
> > 
> > #define TYPENUM_INPUT_0 11 
> > 
> > #define ITEMSIZE_INPUT_0 4 
> > 
> > #define DTYPE_INPUT_1 npy_float32 
> > 
> > #define TYPENUM_INPUT_1 11 
> > 
> > #define ITEMSIZE_INPUT_1 4 
> > 
> > #define DTYPE_INPUT_2 npy_float32 
> > 
> > #define TYPENUM_INPUT_2 11 
> > 
> > #define ITEMSIZE_INPUT_2 4 
> > 
> > #define DTYPE_INPUT_4 npy_float32 
> > 
> > #define TYPENUM_INPUT_4 11 
> > 
> > #define ITEMSIZE_INPUT_4 4 
> > 
> > #define DTYPE_INPUT_5 npy_float32 
> > 
> > #define TYPENUM_INPUT_5 11 
> > 
> > #define ITEMSIZE_INPUT_5 4 
> > 
> > #define DTYPE_OUTPUT_0 npy_float32 
> > 
> > #define TYPENUM_OUTPUT_0 11 
> > 
> > #define ITEMSIZE_OUTPUT_0 4 
> > 
> > #define APPLY_SPECIFIC(str) 
> str##_node_d7db26d1cc9fb59ee56289f4dda72577_0 
> > 
> > #define CONV_INPLACE 1 
> > 
> > #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM 
> > 
> > 
> > 
> > cuda_enter(ctx->ctx); 
> > cudnnDestroy(APPLY_SPECIFIC(_handle)); 
> > cuda_exit(ctx->ctx); 
> > Py_DECREF((PyObject *)ctx); 
> > 
> > 
> > if (APPLY_SPECIFIC(input) != NULL) 
> >   cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(input)); 
> > if (APPLY_SPECIFIC(output) != NULL) 
> >   cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output)); 
> > if (APPLY_SPECIFIC(kerns) != NULL) 
> >   cudnnDestroyFilterDescriptor(APPLY_SPECIFIC(kerns)); 
> > 
> > 
> > #undef DTYPE_INPUT_0 
> > 
> > #undef TYPENUM_INPUT_0 
> > 
> > #undef ITEMSIZE_INPUT_0 
> > 
> > #undef DTYPE_INPUT_1 
> > 
> > #undef TYPENUM_INPUT_1 
> > 
> > #undef ITEMSIZE_INPUT_1 
> > 
> > #undef DTYPE_INPUT_2 
> > 
> > #undef TYPENUM_INPUT_2 
> > 
> > #undef ITEMSIZE_INPUT_2 
> > 
> > #undef DTYPE_INPUT_4 
> > 
> > #undef TYPENUM_INPUT_4 
> > 
> > #undef ITEMSIZE_INPUT_4 
> > 
> > #undef DTYPE_INPUT_5 
> > 
> > #undef TYPENUM_INPUT_5 
> > 
> > #undef ITEMSIZE_INPUT_5 
> > 
> > #undef DTYPE_OUTPUT_0 
> > 
> > #undef TYPENUM_OUTPUT_0 
> > 
> > #undef ITEMSIZE_OUTPUT_0 
> > 
> > #undef APPLY_SPECIFIC 
> > 
> > #undef CONV_INPLACE 
> > 
> > #undef CONV_ALGO 
> > double __DUMMY_18; 
> > 
> >             Py_XDECREF(this->storage_V3); 
> > Py_XDECREF(this->storage_V5); 
> > Py_XDECREF(this->storage_V7); 
> > Py_XDECREF(this->storage_V9); 
> > Py_XDECREF(this->storage_V11); 
> > Py_XDECREF(this->storage_V13); 
> > Py_XDECREF(this->storage_V1); 
> > Py_XDECREF(this->storage_V15); 
> >         } 
> >         int run(void) { 
> >             int __failure = 0; 
> >             
> >     PyObject* py_V1; 
> >     
> >         PyGpuArrayObject *V1; 
> >         
> >     PyObject* py_V3; 
> >     
> >         PyGpuArrayObject *V3; 
> >         
> >     PyObject* py_V5; 
> >     
> >         PyGpuArrayObject *V5; 
> >         
> >     PyObject* py_V7; 
> >     
> >         PyGpuArrayObject *V7; 
> >         
> >     PyObject* py_V9; 
> >     
> >         cudnnConvolutionDescriptor_t V9; 
> >         
> >     PyObject* py_V11; 
> >     
> >                 typedef npy_float32 V11_dtype; // Deprecated use 
> dtype_V11 instead. 
> >                 typedef npy_float32 dtype_V11; 
> >             
> >         npy_float32 V11; 
> >         
> >     PyObject* py_V13; 
> >     
> >                 typedef npy_float32 V13_dtype; // Deprecated use 
> dtype_V13 instead. 
> >                 typedef npy_float32 dtype_V13; 
> >             
> >         npy_float32 V13; 
> >         
> > { 
> > 
> >     py_V1 = PyList_GET_ITEM(storage_V1, 0); 
> >     {Py_XINCREF(py_V1);} 
> >     
> >         if (py_V1 == Py_None) 
> >         { 
> >             V1 = NULL; 
> >         } 
> >         else 
> >         { 
> >             
> >         V1 = NULL; 
> >         if (py_V1 == Py_None) { 
> >             PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not 
> None"); 
> >             { 
> >         __failure = 2; 
> >         if (!PyErr_Occurred()) { 
> >             PyErr_SetString(PyExc_RuntimeError, 
> >                 "Unexpected error in an Op's C code. " 
> >                 "No Python exception was set."); 
> >             } 
> >         goto __label_2;} 
> >         } 
> >         /* First check if we are the base type exactly (the most common 
> case), 
> >            then do the full subclass check if needed. */ 
> >         if (py_V1->ob_type != &PyGpuArrayType && 
> >             !PyObject_TypeCheck(py_V1, &PyGpuArrayType)) { 
> >             PyErr_SetString(PyExc_ValueError, "expected a GpuArray"); 
> >             { 
> >         __failure = 2; 
> >         if (!PyErr_Occurred()) { 
> >             PyErr_SetString(PyExc_RuntimeError, 
> >                 "Unexpected error in an Op's C code. " 
> >                 "No Python exception was set."); 
> >             } 
> >         goto __label_2;} 
> >         } 
> >         V1 = (PyGpuArrayObject *)py_V1; 
> >         Py_INCREF(V1); 
> >         
> >         } 
> >         
> > { 
> > 
> >     py_V3 = PyList_GET_ITEM(storage_V3, 0); 
> >     {Py_XINCREF(py_V3);} 
> >     
> >         V3 = NULL; 
> >         if (py_V3 == Py_None) { 
> >             PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not 
> None"); 
> >             { 
> >         __failure = 4; 
> >         if (!PyErr_Occurred()) { 
> >             PyErr_SetString(PyExc_RuntimeError, 
> >                 "Unexpected error in an Op's C code. " 
> >                 "No Python exception was set."); 
> >             } 
> >         goto __label_4;} 
> >         } 
> >         /* First check if we are the base type exactly (the most common 
> case), 
> >            then do the full subclass check if needed. */ 
> >         if (py_V3->ob_type != &PyGpuArrayType && 
> >             !PyObject_TypeCheck(py_V3, &PyGpuArrayType)) { 
> >             PyErr_SetString(PyExc_ValueError, "expected a GpuArray"); 
> >             { 
> >         __failure = 4; 
> >         if (!PyErr_Occurred()) { 
> >             PyErr_SetString(PyExc_RuntimeError, 
> >                 "Unexpected error in an Op's C code. " 
> >                 "No Python exception was set."); 
> >             } 
> >         goto __label_4;} 
> >         } 
> >         V3 = (PyGpuArrayObject *)py_V3; 
> >         Py_INCREF(V3); 
> >         
> > { 
> > 
> >     py_V5 = PyList_GET_ITEM(storage_V5, 0); 
> >     {Py_XINCREF(py_V5);} 
> >     
> >         V5 = NULL; 
> >         if (py_V5 == Py_None) { 
> >             PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not 
> None"); 
> >             { 
> >         __failure = 6; 
> >         if (!PyErr_Occurred()) { 
> >             PyErr_SetString(PyExc_RuntimeError, 
> >                 "Unexpected error in an Op's C code. " 
> >                 "No Python exception was set."); 
> >             } 
> >         goto __label_6;} 
> >         } 
> >         /* First check if we are the base type exactly (the most common 
> case), 
> >            then do the full subclass check if needed. */ 
> >         if (py_V5->ob_type != &PyGpuArrayType && 
> >             !PyObject_TypeCheck(py_V5, &PyGpuArrayType)) { 
> >             PyErr_SetString(PyExc_ValueError, "expected a GpuArray"); 
> >             { 
> >         __failure = 6; 
> >         if (!PyErr_Occurred()) { 
> >             PyErr_SetString(PyExc_RuntimeError, 
> >                 "Unexpected error in an Op's C code. " 
> >                 "No Python exception was set."); 
> >             } 
> >         goto __label_6;} 
> >         } 
> >         V5 = (PyGpuArrayObject *)py_V5; 
> >         Py_INCREF(V5); 
> >         
> > { 
> > 
> >     py_V7 = PyList_GET_ITEM(storage_V7, 0); 
> >     {Py_XINCREF(py_V7);} 
> >     
> >         V7 = NULL; 
> >         if (py_V7 == Py_None) { 
> >             PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not 
> None"); 
> >             { 
> >         __failure = 8; 
> >         if (!PyErr_Occurred()) { 
> >             PyErr_SetString(PyExc_RuntimeError, 
> >                 "Unexpected error in an Op's C code. " 
> >                 "No Python exception was set."); 
> >             } 
> >         goto __label_8;} 
> >         } 
> >         /* First check if we are the base type exactly (the most common 
> case), 
> >            then do the full subclass check if needed. */ 
> >         if (py_V7->ob_type != &PyGpuArrayType && 
> >             !PyObject_TypeCheck(py_V7, &PyGpuArrayType)) { 
> >             PyErr_SetString(PyExc_ValueError, "expected a GpuArray"); 
> >             { 
> >         __failure = 8; 
> >         if (!PyErr_Occurred()) { 
> >             PyErr_SetString(PyExc_RuntimeError, 
> >                 "Unexpected error in an Op's C code. " 
> >                 "No Python exception was set."); 
> >             } 
> >         goto __label_8;} 
> >         } 
> >         V7 = (PyGpuArrayObject *)py_V7; 
> >         Py_INCREF(V7); 
> >         
> > { 
> > 
> >     py_V9 = PyList_GET_ITEM(storage_V9, 0); 
> >     {Py_XINCREF(py_V9);} 
> >     
> >   V9 = (cudnnConvolutionDescriptor_t)PyCapsule_GetPointer(py_V9, NULL); 
> >   if (V9 == NULL) { 
> >         __failure = 10; 
> >         if (!PyErr_Occurred()) { 
> >             PyErr_SetString(PyExc_RuntimeError, 
> >                 "Unexpected error in an Op's C code. " 
> >                 "No Python exception was set."); 
> >             } 
> >         goto __label_10;} 
> >         
> > { 
> > 
> >     py_V11 = PyList_GET_ITEM(storage_V11, 0); 
> >     {Py_XINCREF(py_V11);} 
> >     
> >             if (!PyObject_TypeCheck(py_V11, &PyFloat32ArrType_Type)) 
> >             { 
> >                 PyErr_Format(PyExc_ValueError, 
> >                     "Scalar check failed (npy_float32)"); 
> >                 { 
> >         __failure = 12; 
> >         if (!PyErr_Occurred()) { 
> >             PyErr_SetString(PyExc_RuntimeError, 
> >                 "Unexpected error in an Op's C code. " 
> >                 "No Python exception was set."); 
> >             } 
> >         goto __label_12;} 
> >             } 
> >             
> >         PyArray_ScalarAsCtype(py_V11, &V11); 
> >         
> > { 
> > 
> >     py_V13 = PyList_GET_ITEM(storage_V13, 0); 
> >     {Py_XINCREF(py_V13);} 
> >     
> >             if (!PyObject_TypeCheck(py_V13, &PyFloat32ArrType_Type)) 
> >             { 
> >                 PyErr_Format(PyExc_ValueError, 
> >                     "Scalar check failed (npy_float32)"); 
> >                 { 
> >         __failure = 14; 
> >         if (!PyErr_Occurred()) { 
> >             PyErr_SetString(PyExc_RuntimeError, 
> >                 "Unexpected error in an Op's C code. " 
> >                 "No Python exception was set."); 
> >             } 
> >         goto __label_14;} 
> >             } 
> >             
> >         PyArray_ScalarAsCtype(py_V13, &V13); 
> >         
> > { 
> > 
> > { 
> > // Op class GpuDnnConv 
> > 
> >                 #define APPLY_SPECIFIC(str) 
> str##_node_d7db26d1cc9fb59ee56289f4dda72577_0 
> > 
> > #define CONV_INPLACE 1 
> > 
> > #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM 
> >                 { 
> >                   if (APPLY_SPECIFIC(conv_fwd)(V3, V5, V7, V9, V11, V13, 
> &V1, V15) != 0) { 
> >                     { 
> >         __failure = 17; 
> >         if (!PyErr_Occurred()) { 
> >             PyErr_SetString(PyExc_RuntimeError, 
> >                 "Unexpected error in an Op's C code. " 
> >                 "No Python exception was set."); 
> >             } 
> >         goto __label_17;} 
> >                   } 
> >                 } 
> >                 #undef APPLY_SPECIFIC 
> > 
> > #undef CONV_INPLACE 
> > 
> > #undef CONV_ALGO 
> >                 __label_17: 
> > 
> > double __DUMMY_17; 
> > 
> > } 
> > __label_16: 
> > 
> > double __DUMMY_16; 
> > 
> > } 
> > __label_14: 
> > 
> >     {Py_XDECREF(py_V13);} 
> >     
> > double __DUMMY_14; 
> > 
> > } 
> > __label_12: 
> > 
> >     {Py_XDECREF(py_V11);} 
> >     
> > double __DUMMY_12; 
> > 
> > } 
> > __label_10: 
> > 
> >     {Py_XDECREF(py_V9);} 
> >     
> > double __DUMMY_10; 
> > 
> > } 
> > __label_8: 
> > Py_XDECREF(V7); V7 = NULL; 
> >     {Py_XDECREF(py_V7);} 
> >     
> > double __DUMMY_8; 
> > 
> > } 
> > __label_6: 
> > Py_XDECREF(V5); V5 = NULL; 
> >     {Py_XDECREF(py_V5);} 
> >     
> > double __DUMMY_6; 
> > 
> > } 
> > __label_4: 
> > Py_XDECREF(V3); V3 = NULL; 
> >     {Py_XDECREF(py_V3);} 
> >     
> > double __DUMMY_4; 
> > 
> > } 
> > __label_2: 
> > 
> >     if (!__failure) { 
> >       
> >         if (!V1) { 
> >             Py_XDECREF(py_V1); 
> >             Py_INCREF(Py_None); 
> >             py_V1 = Py_None; 
> >         } else if ((void *)py_V1 != (void *)V1) { 
> >             Py_XDECREF(py_V1); 
> >             py_V1 = (PyObject *)V1; 
> >             Py_INCREF(py_V1); 
> >         } 
> >         
> >       PyObject* old = PyList_GET_ITEM(storage_V1, 0); 
> >       {Py_XINCREF(py_V1);} 
> >       PyList_SET_ITEM(storage_V1, 0, py_V1); 
> >       {Py_XDECREF(old);} 
> >     } 
> >     Py_XDECREF(V1); V1 = NULL; 
> >     {Py_XDECREF(py_V1);} 
> >     
> > double __DUMMY_2; 
> > 
> > } 
> > 
> >             
> >         if (__failure) { 
> >             // When there is a failure, this code puts the exception 
> >             // in __ERROR. 
> >             PyObject* err_type = NULL; 
> >             PyObject* err_msg = NULL; 
> >             PyObject* err_traceback = NULL; 
> >             PyErr_Fetch(&err_type, &err_msg, &err_traceback); 
> >             if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} 
> >             if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} 
> >             if (!err_traceback) {err_traceback = Py_None; 
> Py_INCREF(Py_None);} 
> >             PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); 
> >             PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); 
> >             PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); 
> >             PyList_SET_ITEM(__ERROR, 0, err_type); 
> >             PyList_SET_ITEM(__ERROR, 1, err_msg); 
> >             PyList_SET_ITEM(__ERROR, 2, err_traceback); 
> >             {Py_XDECREF(old_err_type);} 
> >             {Py_XDECREF(old_err_msg);} 
> >             {Py_XDECREF(old_err_traceback);} 
> >         } 
> >         // The failure code is returned to index what code block failed. 
> >         return __failure; 
> >         
> >         } 
> >     }; 
> >     } 
> >     
> > 
> >         static int 
> __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_executor(__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577*
>  
> self) { 
> >             return self->run(); 
> >         } 
> > 
> >         static void 
> __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_destructor(void* 
> executor, void* self) { 
> >             delete 
> ((__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577*)self); 
> >         } 
> >         
> > ////////////////////// 
> > ////  Functions 
> > ////////////////////// 
> > static PyObject * instantiate(PyObject * self, PyObject *argtuple) { 
> >   assert(PyTuple_Check(argtuple)); 
> >   if (9 != PyTuple_Size(argtuple)){ 
> >      PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 
> 9, got %i", (int)PyTuple_Size(argtuple)); 
> >      return NULL; 
> >   } 
> >   __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577* struct_ptr = 
> new __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577(); 
> >   if (struct_ptr->init( PyTuple_GET_ITEM(argtuple, 
> 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 
> 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 
> 4),PyTuple_GET_ITEM(argtuple, 5),PyTuple_GET_ITEM(argtuple, 
> 6),PyTuple_GET_ITEM(argtuple, 7),PyTuple_GET_ITEM(argtuple, 8) ) != 0) { 
> >     delete struct_ptr; 
> >     return NULL; 
> >   } 
> >   PyObject* thunk = 
> PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_executor),
>  
> struct_ptr, 
> __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_destructor); 
> >   return thunk; } 
> > 
> > ////////////////////// 
> > ////  Module init 
> > ////////////////////// 
> > static PyMethodDef MyMethods[] = { 
> >         {"instantiate", instantiate, METH_VARARGS, "undocumented"} , 
> >         {NULL, NULL, 0, NULL} 
> > }; 
> > PyMODINIT_FUNC initd7db26d1cc9fb59ee56289f4dda72577(void){ 
> >    import_pygpu__gpuarray(); 
> >    import_array(); 
> >     
> > 
> > setup_ext_cuda(); 
> > 
> > 
> >    (void) Py_InitModule("d7db26d1cc9fb59ee56289f4dda72577", MyMethods); 
> > } 
>
>
> -- 
> Pascal 
>

-- 

--- 
You received this message because you are subscribed to the Google Groups 
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

Re: [theano-users] Running code with CuDNN fails during compilation (mod.cpp) (Windows)

Reply via email to