Hello, I'm attempting to use conv3D2D in Theano, but when I get to the function compilation stage it fails with the following errors:
mod.cpp:684:1: error: expected unqualified-id before '{' token mod.cpp:1345:1: error: expected '}' at end of input mod.cpp: In destructor '{anonymous}::__struct_compiled_op_d7db26d1cc9f mod.cpp:573:21: error: 'cleanup' was not declared in this scope mod.cpp: At global scope: mod.cpp:678:1: error: expected unqualified-id at end of input #undef ITEMSIZE_INPUT_2 ^ mod.cpp:678:1: error: expected '}' at end of input I'm attaching the mod.cpp file that was generated by cc.py, op.py, dnn.py, etc., The initialization code from dnn_base.c starts at line 684, but there is no function name defined, so the code sits outside any useful code block. The "#section cleanup_code_struct" code section also appears to have been inserted into the wrong spot. Can someone tell me how to fix this? --Dmitry -- --- You received this message because you are subscribed to the Google Groups "theano-users" group. To unsubscribe from this group and stop receiving emails from it, send an email to theano-users+unsubscr...@googlegroups.com. For more options, visit https://groups.google.com/d/optout.
#include <Python.h> #include <iostream> #include "theano_mod_helper.h" #include <gpuarray/array.h> #include <gpuarray/kernel.h> #include <gpuarray/error.h> #include <gpuarray/buffer.h> #include <gpuarray/buffer_blas.h> #include <numpy/arrayobject.h> #include <gpuarray_api.h> #include <math.h> #include <numpy/arrayscalars.h> #include "cudnn.h" #include "cudnn_helper.h" #include "gpuarray_helper.h" #include "gpuarray/types.h" #include "gpuarray/array.h" #include "gpuarray/util.h" #include "gpuarray/ext_cuda.h" #include "gpuarray_api.h" #include "numpy_compat.h" ////////////////////// //// Support Code ////////////////////// void _capsule_destructor(PyObject *o) { void *d = PyCapsule_GetContext(o); void *p = PyCapsule_GetPointer(o, NULL); void (*f)(void *) = (void (*)(void *))d; if (f != NULL) f(p); } static int c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) { cudnnDataType_t dt; size_t ds; switch (var->ga.typecode) { case GA_FLOAT: dt = CUDNN_DATA_FLOAT; break; case GA_DOUBLE: dt = CUDNN_DATA_DOUBLE; break; case GA_HALF: dt = CUDNN_DATA_HALF; break; default: PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensorNd"); return -1; } ds = gpuarray_get_elsize(var->ga.typecode); int strs[8], dims[8], default_stride = 1; unsigned int nd = PyGpuArray_NDIM(var); if (nd > 8) { PyErr_SetString(PyExc_TypeError, "Tensor of more than 8d"); return -1; } for (unsigned int _i = nd; _i > 0; _i--) { unsigned int i = _i - 1; strs[i] = PyGpuArray_STRIDE(var, i) ? PyGpuArray_STRIDE(var, i)/ds : default_stride; default_stride *= PyGpuArray_DIM(var, i); dims[i] = PyGpuArray_DIM(var, i); } /* Tensors can't be smaller than 3d for cudnn so we pad the * descriptor if they are */ for (unsigned int i = nd; i < 3; i++) { strs[i] = 1; dims[i] = 1; } cudnnStatus_t err = cudnnSetTensorNdDescriptor(desc, dt, nd < 3 ? 3 : nd, dims, strs); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "Could not set tensorNd descriptor: %s", cudnnGetErrorString(err)); return -1; } return 0; } static int c_make_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t *desc) { cudnnStatus_t err; err = cudnnCreateTensorDescriptor(desc); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "Could not create tensor descriptor: %s", cudnnGetErrorString(err)); return -1; } if (c_set_tensorNd(var, *desc) != 0) { cudnnDestroyTensorDescriptor(*desc); return -1; } return 0; } static int c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) { cudnnDataType_t dt; cudnnStatus_t err; if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) { PyErr_SetString(PyExc_ValueError, "Only contiguous filters (kernels) are supported."); return -1; } switch (var->ga.typecode) { case GA_FLOAT: dt = CUDNN_DATA_FLOAT; break; case GA_DOUBLE: dt = CUDNN_DATA_DOUBLE; break; case GA_HALF: dt = CUDNN_DATA_HALF; break; default: PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_filter"); return -1; } int dims[8]; unsigned int nd = PyGpuArray_NDIM(var); if (nd > 8) { PyErr_SetString(PyExc_TypeError, "Tensor of more than 8d"); return -1; } for (unsigned int _i = nd; _i > 0; _i--) { unsigned int i = _i - 1; dims[i] = PyGpuArray_DIM(var, i); } /* Filters can't be less than 3d so we pad */ for (unsigned int i = nd; i < 3; i++) dims[i] = 1; if (nd < 3) nd = 3; #if CUDNN_VERSION >= 5000 err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims); #else err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims); #endif if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "Could not set filter descriptor: %s.", cudnnGetErrorString(err)); return -1; } return 0; } static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t *desc) { cudnnStatus_t err; err = cudnnCreateFilterDescriptor(desc); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "Could not create tensor descriptor: %s", cudnnGetErrorString(err)); return -1; } if (c_set_filter(var, *desc) != 0) { cudnnDestroyFilterDescriptor(*desc); return -1; } return 0; } namespace { struct __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577 { PyObject* __ERROR; PyObject* storage_V3; PyObject* storage_V5; PyObject* storage_V7; PyObject* storage_V9; PyObject* storage_V11; PyObject* storage_V13; PyObject* storage_V1; PyObject* storage_V15; PyObject* py_V15; PyGpuContextObject *V15; #define DTYPE_INPUT_0 npy_float32 #define TYPENUM_INPUT_0 11 #define ITEMSIZE_INPUT_0 4 #define DTYPE_INPUT_1 npy_float32 #define TYPENUM_INPUT_1 11 #define ITEMSIZE_INPUT_1 4 #define DTYPE_INPUT_2 npy_float32 #define TYPENUM_INPUT_2 11 #define ITEMSIZE_INPUT_2 4 #define DTYPE_INPUT_4 npy_float32 #define TYPENUM_INPUT_4 11 #define ITEMSIZE_INPUT_4 4 #define DTYPE_INPUT_5 npy_float32 #define TYPENUM_INPUT_5 11 #define ITEMSIZE_INPUT_5 4 #define DTYPE_OUTPUT_0 npy_float32 #define TYPENUM_OUTPUT_0 11 #define ITEMSIZE_OUTPUT_0 4 #define APPLY_SPECIFIC(str) str##_node_d7db26d1cc9fb59ee56289f4dda72577_0 #define CONV_INPLACE 1 #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM PyGpuContextObject *ctx; cudnnHandle_t APPLY_SPECIFIC(_handle); cudnnTensorDescriptor_t APPLY_SPECIFIC(input); cudnnTensorDescriptor_t APPLY_SPECIFIC(output); cudnnFilterDescriptor_t APPLY_SPECIFIC(kerns); #ifdef CHOOSE_ALGO int reuse_algo; cudnnConvolutionFwdAlgo_t prev_algo; #ifndef CHOOSE_ONCE size_t prev_img_dims[5]; size_t prev_kern_dims[5]; #endif #endif int APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns, PyGpuArrayObject *om, cudnnConvolutionDescriptor_t desc, double alpha, double beta, PyGpuArrayObject **output, PyGpuContextObject *c) { cudnnStatus_t err = CUDNN_STATUS_SUCCESS; float af = alpha, bf = beta; void *alpha_p; void *beta_p; if (PyGpuArray_DIMS(input)[1] != PyGpuArray_DIMS(kerns)[1]) { PyErr_SetString(PyExc_ValueError, "images and kernel must have the same stack size"); return 1; } if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1) return 1; if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1) return 1; switch (input->ga.typecode) { case GA_DOUBLE: alpha_p = (void *)α beta_p = (void *)β break; case GA_FLOAT: case GA_HALF: alpha_p = (void *)⁡ beta_p = (void *)&bf; break; default: PyErr_SetString(PyExc_TypeError, "Unsupported type in convolution"); return 1; } #ifdef CONV_INPLACE Py_XDECREF(*output); *output = om; Py_INCREF(*output); #else if (theano_prep_output(output, PyGpuArray_NDIM(om), PyGpuArray_DIMS(om), om->ga.typecode, GA_C_ORDER, c) != 0) return 1; if (beta != 0.0 && pygpu_move(*output, om)) return 1; #endif if (c_set_tensorNd(*output, APPLY_SPECIFIC(output)) == -1) return 1; cudnnConvolutionFwdAlgo_t algo = CONV_ALGO; cuda_enter(c->ctx); #ifdef CHOOSE_ALGO #ifndef CHOOSE_ONCE reuse_algo = 1; for (unsigned int i = 0; i < PyGpuArray_NDIM(input); i++) { reuse_algo = (reuse_algo && PyGpuArray_DIM(input, i) == prev_img_dims[i]); reuse_algo = (reuse_algo && PyGpuArray_DIM(kerns, i) == prev_kern_dims[i]); } #endif if (!reuse_algo) { #ifdef CHOOSE_TIME int count; cudnnConvolutionFwdAlgoPerf_t choice; err = cudnnFindConvolutionForwardAlgorithm( APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns), desc, APPLY_SPECIFIC(output), 1, &count, &choice); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "error selecting convolution algo: %s", cudnnGetErrorString(err)); cuda_exit(c->ctx); return 1; } algo = choice.algo; #else size_t free; int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free); if (err2 != GA_NO_ERROR) { PyErr_Format(PyExc_RuntimeError, "Error when trying to find the " "memory information on the GPU"); cuda_exit(c->ctx); return 1; } err = cudnnGetConvolutionForwardAlgorithm( APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns), desc, APPLY_SPECIFIC(output), CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, free, &algo); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "error selecting convolution algo: %s", cudnnGetErrorString(err)); cuda_exit(c->ctx); return 1; } #endif prev_algo = algo; } else { algo = prev_algo; } #ifdef CHOOSE_ONCE reuse_algo = 1; #else for (unsigned int i = 0; i < PyGpuArray_NDIM(input); i++) { prev_img_dims[i] = PyGpuArray_DIM(input, i); prev_kern_dims[i] = PyGpuArray_DIM(kerns, i); } #endif #endif /* These two algos are not supported for 3d conv */ if (PyGpuArray_NDIM(input) == 5 && (algo == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM || algo == CUDNN_CONVOLUTION_FWD_ALGO_GEMM)) algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; // The FFT implementation does not support strides, 1x1 filters or inputs // with a spatial dimension larger than 1024. The tiled-FFT implementation // does not support strides. // If the chosen implementation is FFT or tiled-FFT, validate that it can // be used on the current data and default to a safe implementation if it // can't. // The following code is 2d-specific but it is fine as FFT and tiled-FFT are // defined only for 2d filters if ((algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT || algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) && PyGpuArray_NDIM(input) == 4) { // Extract the properties of the convolution descriptor int nd; int pad[2]; int stride[2]; int upscale[2]; cudnnConvolutionMode_t mode; cudnnDataType_t data_type; err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride, upscale, &mode, &data_type); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "error getting convolution properties: %s", cudnnGetErrorString(err)); cuda_exit(c->ctx); return 1; } if (algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT) { if (stride[0] != 1 || stride[1] != 1 || PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024 || (PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1)) { algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; } } else { // algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING if (stride[0] != 1 || stride[1] != 1) { algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; } } } { size_t worksize; gpudata *workspace; err = cudnnGetConvolutionForwardWorkspaceSize(APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns), desc, APPLY_SPECIFIC(output), algo, &worksize); if (err == CUDNN_STATUS_NOT_SUPPORTED) { // Fallback to none algo if not supported // TODO: Print a warning algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; err = cudnnGetConvolutionForwardWorkspaceSize(APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns), desc, APPLY_SPECIFIC(output), algo, &worksize); } if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "error getting worksize: %s", cudnnGetErrorString(err)); cuda_exit(c->ctx); return 1; } /* * This is less than ideal since we need to free it after (which * introduces a synchronization point. But we don't have a module * to place a nice get_work_mem() function in. */ if (worksize != 0) { workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL); if (workspace == NULL) { PyErr_SetString(PyExc_RuntimeError, "Could not allocate working memory"); cuda_exit(c->ctx); return 1; } } cuda_wait(input->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_wait(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_wait((*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); err = cudnnConvolutionForward( APPLY_SPECIFIC(_handle), alpha_p, APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input), APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns), desc, algo, worksize == 0 ? NULL : *(void **)workspace, worksize, beta_p, APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*output)); if (worksize != 0) gpudata_release(workspace); cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ); cuda_record((*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE); } cuda_exit(c->ctx); if (err != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "error doing operation: %s", cudnnGetErrorString(err)); return 1; } return 0; } #undef DTYPE_INPUT_0 #undef TYPENUM_INPUT_0 #undef ITEMSIZE_INPUT_0 #undef DTYPE_INPUT_1 #undef TYPENUM_INPUT_1 #undef ITEMSIZE_INPUT_1 #undef DTYPE_INPUT_2 #undef TYPENUM_INPUT_2 #undef ITEMSIZE_INPUT_2 #undef DTYPE_INPUT_4 #undef TYPENUM_INPUT_4 #undef ITEMSIZE_INPUT_4 #undef DTYPE_INPUT_5 #undef TYPENUM_INPUT_5 #undef ITEMSIZE_INPUT_5 #undef DTYPE_OUTPUT_0 #undef TYPENUM_OUTPUT_0 #undef ITEMSIZE_OUTPUT_0 #undef APPLY_SPECIFIC #undef CONV_INPLACE #undef CONV_ALGO __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577() { // This is only somewhat safe because we: // 1) Are not a virtual class // 2) Do not use any virtual classes in the members // 3) Deal with mostly POD and pointers // If this changes, we would have to revise this, but for // now I am tired of chasing segfaults because // initialization code had an error and some pointer has // a junk value. memset(this, 0, sizeof(*this)); } ~__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577(void) { cleanup(); } int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V9, PyObject* storage_V11, PyObject* storage_V13, PyObject* storage_V1, PyObject* storage_V15) { Py_XINCREF(storage_V3); Py_XINCREF(storage_V5); Py_XINCREF(storage_V7); Py_XINCREF(storage_V9); Py_XINCREF(storage_V11); Py_XINCREF(storage_V13); Py_XINCREF(storage_V1); Py_XINCREF(storage_V15); this->storage_V3 = storage_V3; this->storage_V5 = storage_V5; this->storage_V7 = storage_V7; this->storage_V9 = storage_V9; this->storage_V11 = storage_V11; this->storage_V13 = storage_V13; this->storage_V1 = storage_V1; this->storage_V15 = storage_V15; py_V15 = PyList_GET_ITEM(storage_V15, 0); {Py_XINCREF(py_V15);} if (!PyObject_TypeCheck(py_V15, &PyGpuContextType)) { PyErr_SetString(PyExc_TypeError, "expected a GpuContext"); { if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "Unexpected error in an Op's C code. " "No Python exception was set."); } return 15; } } V15 = (PyGpuContextObject *)py_V15; Py_INCREF(V15); #define DTYPE_INPUT_0 npy_float32 #define TYPENUM_INPUT_0 11 #define ITEMSIZE_INPUT_0 4 #define DTYPE_INPUT_1 npy_float32 #define TYPENUM_INPUT_1 11 #define ITEMSIZE_INPUT_1 4 #define DTYPE_INPUT_2 npy_float32 #define TYPENUM_INPUT_2 11 #define ITEMSIZE_INPUT_2 4 #define DTYPE_INPUT_4 npy_float32 #define TYPENUM_INPUT_4 11 #define ITEMSIZE_INPUT_4 4 #define DTYPE_INPUT_5 npy_float32 #define TYPENUM_INPUT_5 11 #define ITEMSIZE_INPUT_5 4 #define DTYPE_OUTPUT_0 npy_float32 #define TYPENUM_OUTPUT_0 11 #define ITEMSIZE_OUTPUT_0 4 #define APPLY_SPECIFIC(str) str##_node_d7db26d1cc9fb59ee56289f4dda72577_0 #define CONV_INPLACE 1 #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM #define FAIL { \ if (!PyErr_Occurred()) { \ PyErr_SetString(PyExc_RuntimeError, \ "Unexpected error in an Op's C code. " \ "No Python exception was set."); \ } \ return 17; \ } #define PARAMS V15 { // We need to keep a reference here to have it available in the destructor. ctx = PARAMS; Py_INCREF(ctx); cuda_enter(PARAMS->ctx); cudnnStatus_t err; APPLY_SPECIFIC(_handle) = NULL; if ((err = cudnnCreate(&APPLY_SPECIFIC(_handle))) != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "could not create cuDNN handle: %s", cudnnGetErrorString(err)); cuda_exit(PARAMS->ctx); FAIL; } if ((err = cudnnSetStream(APPLY_SPECIFIC(_handle), cuda_get_stream(PARAMS->ctx))) != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_RuntimeError, "Could not set cudnn stream: %s", cudnnGetErrorString(err)); cuda_exit(PARAMS->ctx); FAIL; } cuda_exit(PARAMS->ctx); } cudnnStatus_t APPLY_SPECIFIC(err); APPLY_SPECIFIC(input) = NULL; APPLY_SPECIFIC(output) = NULL; APPLY_SPECIFIC(kerns) = NULL; if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(input))) != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor " "(inp): %s", cudnnGetErrorString(APPLY_SPECIFIC(err))); FAIL; } if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output))) != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor " "(out): %s", cudnnGetErrorString(APPLY_SPECIFIC(err))); FAIL; } if ((APPLY_SPECIFIC(err) = cudnnCreateFilterDescriptor(&APPLY_SPECIFIC(kerns))) != CUDNN_STATUS_SUCCESS) { PyErr_Format(PyExc_MemoryError, "could not allocate filter descriptor: %s", cudnnGetErrorString(APPLY_SPECIFIC(err))); FAIL; } #ifdef CHOOSE_ALGO reuse_algo = 0; prev_algo = CONV_ALGO; #ifndef CHOOSE_ONCE memset(prev_img_dims, 0, sizeof(prev_img_dims)); memset(prev_kern_dims, 0, sizeof(prev_kern_dims)); #endif #endif #undef FAIL #undef PARAMS #undef DTYPE_INPUT_0 #undef TYPENUM_INPUT_0 #undef ITEMSIZE_INPUT_0 #undef DTYPE_INPUT_1 #undef TYPENUM_INPUT_1 #undef ITEMSIZE_INPUT_1 #undef DTYPE_INPUT_2 #undef TYPENUM_INPUT_2 #undef ITEMSIZE_INPUT_2 #undef DTYPE_INPUT_4 #undef TYPENUM_INPUT_4 #undef ITEMSIZE_INPUT_4 #undef DTYPE_INPUT_5 #undef TYPENUM_INPUT_5 #undef ITEMSIZE_INPUT_5 #undef DTYPE_OUTPUT_0 #undef TYPENUM_OUTPUT_0 #undef ITEMSIZE_OUTPUT_0 #undef APPLY_SPECIFIC #undef CONV_INPLACE #undef CONV_ALGO this->__ERROR = __ERROR; return 0; } void cleanup(void) { __label_1: double __DUMMY_1; __label_3: double __DUMMY_3; __label_5: double __DUMMY_5; __label_7: double __DUMMY_7; __label_9: double __DUMMY_9; __label_11: double __DUMMY_11; __label_13: double __DUMMY_13; __label_15: Py_XDECREF(V15); V15 = NULL; {Py_XDECREF(py_V15);} double __DUMMY_15; __label_18: #define DTYPE_INPUT_0 npy_float32 #define TYPENUM_INPUT_0 11 #define ITEMSIZE_INPUT_0 4 #define DTYPE_INPUT_1 npy_float32 #define TYPENUM_INPUT_1 11 #define ITEMSIZE_INPUT_1 4 #define DTYPE_INPUT_2 npy_float32 #define TYPENUM_INPUT_2 11 #define ITEMSIZE_INPUT_2 4 #define DTYPE_INPUT_4 npy_float32 #define TYPENUM_INPUT_4 11 #define ITEMSIZE_INPUT_4 4 #define DTYPE_INPUT_5 npy_float32 #define TYPENUM_INPUT_5 11 #define ITEMSIZE_INPUT_5 4 #define DTYPE_OUTPUT_0 npy_float32 #define TYPENUM_OUTPUT_0 11 #define ITEMSIZE_OUTPUT_0 4 #define APPLY_SPECIFIC(str) str##_node_d7db26d1cc9fb59ee56289f4dda72577_0 #define CONV_INPLACE 1 #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM cuda_enter(ctx->ctx); cudnnDestroy(APPLY_SPECIFIC(_handle)); cuda_exit(ctx->ctx); Py_DECREF((PyObject *)ctx); if (APPLY_SPECIFIC(input) != NULL) cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(input)); if (APPLY_SPECIFIC(output) != NULL) cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output)); if (APPLY_SPECIFIC(kerns) != NULL) cudnnDestroyFilterDescriptor(APPLY_SPECIFIC(kerns)); #undef DTYPE_INPUT_0 #undef TYPENUM_INPUT_0 #undef ITEMSIZE_INPUT_0 #undef DTYPE_INPUT_1 #undef TYPENUM_INPUT_1 #undef ITEMSIZE_INPUT_1 #undef DTYPE_INPUT_2 #undef TYPENUM_INPUT_2 #undef ITEMSIZE_INPUT_2 #undef DTYPE_INPUT_4 #undef TYPENUM_INPUT_4 #undef ITEMSIZE_INPUT_4 #undef DTYPE_INPUT_5 #undef TYPENUM_INPUT_5 #undef ITEMSIZE_INPUT_5 #undef DTYPE_OUTPUT_0 #undef TYPENUM_OUTPUT_0 #undef ITEMSIZE_OUTPUT_0 #undef APPLY_SPECIFIC #undef CONV_INPLACE #undef CONV_ALGO double __DUMMY_18; Py_XDECREF(this->storage_V3); Py_XDECREF(this->storage_V5); Py_XDECREF(this->storage_V7); Py_XDECREF(this->storage_V9); Py_XDECREF(this->storage_V11); Py_XDECREF(this->storage_V13); Py_XDECREF(this->storage_V1); Py_XDECREF(this->storage_V15); } int run(void) { int __failure = 0; PyObject* py_V1; PyGpuArrayObject *V1; PyObject* py_V3; PyGpuArrayObject *V3; PyObject* py_V5; PyGpuArrayObject *V5; PyObject* py_V7; PyGpuArrayObject *V7; PyObject* py_V9; cudnnConvolutionDescriptor_t V9; PyObject* py_V11; typedef npy_float32 V11_dtype; // Deprecated use dtype_V11 instead. typedef npy_float32 dtype_V11; npy_float32 V11; PyObject* py_V13; typedef npy_float32 V13_dtype; // Deprecated use dtype_V13 instead. typedef npy_float32 dtype_V13; npy_float32 V13; { py_V1 = PyList_GET_ITEM(storage_V1, 0); {Py_XINCREF(py_V1);} if (py_V1 == Py_None) { V1 = NULL; } else { V1 = NULL; if (py_V1 == Py_None) { PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not None"); { __failure = 2; if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "Unexpected error in an Op's C code. " "No Python exception was set."); } goto __label_2;} } /* First check if we are the base type exactly (the most common case), then do the full subclass check if needed. */ if (py_V1->ob_type != &PyGpuArrayType && !PyObject_TypeCheck(py_V1, &PyGpuArrayType)) { PyErr_SetString(PyExc_ValueError, "expected a GpuArray"); { __failure = 2; if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "Unexpected error in an Op's C code. " "No Python exception was set."); } goto __label_2;} } V1 = (PyGpuArrayObject *)py_V1; Py_INCREF(V1); } { py_V3 = PyList_GET_ITEM(storage_V3, 0); {Py_XINCREF(py_V3);} V3 = NULL; if (py_V3 == Py_None) { PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not None"); { __failure = 4; if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "Unexpected error in an Op's C code. " "No Python exception was set."); } goto __label_4;} } /* First check if we are the base type exactly (the most common case), then do the full subclass check if needed. */ if (py_V3->ob_type != &PyGpuArrayType && !PyObject_TypeCheck(py_V3, &PyGpuArrayType)) { PyErr_SetString(PyExc_ValueError, "expected a GpuArray"); { __failure = 4; if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "Unexpected error in an Op's C code. " "No Python exception was set."); } goto __label_4;} } V3 = (PyGpuArrayObject *)py_V3; Py_INCREF(V3); { py_V5 = PyList_GET_ITEM(storage_V5, 0); {Py_XINCREF(py_V5);} V5 = NULL; if (py_V5 == Py_None) { PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not None"); { __failure = 6; if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "Unexpected error in an Op's C code. " "No Python exception was set."); } goto __label_6;} } /* First check if we are the base type exactly (the most common case), then do the full subclass check if needed. */ if (py_V5->ob_type != &PyGpuArrayType && !PyObject_TypeCheck(py_V5, &PyGpuArrayType)) { PyErr_SetString(PyExc_ValueError, "expected a GpuArray"); { __failure = 6; if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "Unexpected error in an Op's C code. " "No Python exception was set."); } goto __label_6;} } V5 = (PyGpuArrayObject *)py_V5; Py_INCREF(V5); { py_V7 = PyList_GET_ITEM(storage_V7, 0); {Py_XINCREF(py_V7);} V7 = NULL; if (py_V7 == Py_None) { PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not None"); { __failure = 8; if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "Unexpected error in an Op's C code. " "No Python exception was set."); } goto __label_8;} } /* First check if we are the base type exactly (the most common case), then do the full subclass check if needed. */ if (py_V7->ob_type != &PyGpuArrayType && !PyObject_TypeCheck(py_V7, &PyGpuArrayType)) { PyErr_SetString(PyExc_ValueError, "expected a GpuArray"); { __failure = 8; if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "Unexpected error in an Op's C code. " "No Python exception was set."); } goto __label_8;} } V7 = (PyGpuArrayObject *)py_V7; Py_INCREF(V7); { py_V9 = PyList_GET_ITEM(storage_V9, 0); {Py_XINCREF(py_V9);} V9 = (cudnnConvolutionDescriptor_t)PyCapsule_GetPointer(py_V9, NULL); if (V9 == NULL) { __failure = 10; if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "Unexpected error in an Op's C code. " "No Python exception was set."); } goto __label_10;} { py_V11 = PyList_GET_ITEM(storage_V11, 0); {Py_XINCREF(py_V11);} if (!PyObject_TypeCheck(py_V11, &PyFloat32ArrType_Type)) { PyErr_Format(PyExc_ValueError, "Scalar check failed (npy_float32)"); { __failure = 12; if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "Unexpected error in an Op's C code. " "No Python exception was set."); } goto __label_12;} } PyArray_ScalarAsCtype(py_V11, &V11); { py_V13 = PyList_GET_ITEM(storage_V13, 0); {Py_XINCREF(py_V13);} if (!PyObject_TypeCheck(py_V13, &PyFloat32ArrType_Type)) { PyErr_Format(PyExc_ValueError, "Scalar check failed (npy_float32)"); { __failure = 14; if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "Unexpected error in an Op's C code. " "No Python exception was set."); } goto __label_14;} } PyArray_ScalarAsCtype(py_V13, &V13); { { // Op class GpuDnnConv #define APPLY_SPECIFIC(str) str##_node_d7db26d1cc9fb59ee56289f4dda72577_0 #define CONV_INPLACE 1 #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM { if (APPLY_SPECIFIC(conv_fwd)(V3, V5, V7, V9, V11, V13, &V1, V15) != 0) { { __failure = 17; if (!PyErr_Occurred()) { PyErr_SetString(PyExc_RuntimeError, "Unexpected error in an Op's C code. " "No Python exception was set."); } goto __label_17;} } } #undef APPLY_SPECIFIC #undef CONV_INPLACE #undef CONV_ALGO __label_17: double __DUMMY_17; } __label_16: double __DUMMY_16; } __label_14: {Py_XDECREF(py_V13);} double __DUMMY_14; } __label_12: {Py_XDECREF(py_V11);} double __DUMMY_12; } __label_10: {Py_XDECREF(py_V9);} double __DUMMY_10; } __label_8: Py_XDECREF(V7); V7 = NULL; {Py_XDECREF(py_V7);} double __DUMMY_8; } __label_6: Py_XDECREF(V5); V5 = NULL; {Py_XDECREF(py_V5);} double __DUMMY_6; } __label_4: Py_XDECREF(V3); V3 = NULL; {Py_XDECREF(py_V3);} double __DUMMY_4; } __label_2: if (!__failure) { if (!V1) { Py_XDECREF(py_V1); Py_INCREF(Py_None); py_V1 = Py_None; } else if ((void *)py_V1 != (void *)V1) { Py_XDECREF(py_V1); py_V1 = (PyObject *)V1; Py_INCREF(py_V1); } PyObject* old = PyList_GET_ITEM(storage_V1, 0); {Py_XINCREF(py_V1);} PyList_SET_ITEM(storage_V1, 0, py_V1); {Py_XDECREF(old);} } Py_XDECREF(V1); V1 = NULL; {Py_XDECREF(py_V1);} double __DUMMY_2; } if (__failure) { // When there is a failure, this code puts the exception // in __ERROR. PyObject* err_type = NULL; PyObject* err_msg = NULL; PyObject* err_traceback = NULL; PyErr_Fetch(&err_type, &err_msg, &err_traceback); if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);} if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);} if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);} PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0); PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1); PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2); PyList_SET_ITEM(__ERROR, 0, err_type); PyList_SET_ITEM(__ERROR, 1, err_msg); PyList_SET_ITEM(__ERROR, 2, err_traceback); {Py_XDECREF(old_err_type);} {Py_XDECREF(old_err_msg);} {Py_XDECREF(old_err_traceback);} } // The failure code is returned to index what code block failed. return __failure; } }; } static int __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_executor(__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577* self) { return self->run(); } static void __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_destructor(void* executor, void* self) { delete ((__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577*)self); } ////////////////////// //// Functions ////////////////////// static PyObject * instantiate(PyObject * self, PyObject *argtuple) { assert(PyTuple_Check(argtuple)); if (9 != PyTuple_Size(argtuple)){ PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 9, got %i", (int)PyTuple_Size(argtuple)); return NULL; } __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577* struct_ptr = new __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577(); if (struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4),PyTuple_GET_ITEM(argtuple, 5),PyTuple_GET_ITEM(argtuple, 6),PyTuple_GET_ITEM(argtuple, 7),PyTuple_GET_ITEM(argtuple, 8) ) != 0) { delete struct_ptr; return NULL; } PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_executor), struct_ptr, __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_destructor); return thunk; } ////////////////////// //// Module init ////////////////////// static PyMethodDef MyMethods[] = { {"instantiate", instantiate, METH_VARARGS, "undocumented"} , {NULL, NULL, 0, NULL} }; PyMODINIT_FUNC initd7db26d1cc9fb59ee56289f4dda72577(void){ import_pygpu__gpuarray(); import_array(); setup_ext_cuda(); (void) Py_InitModule("d7db26d1cc9fb59ee56289f4dda72577", MyMethods); }