Can you let us know which compiler (and which version) you are using?
On Thu, Sep 22, 2016, D. Kit wrote:
> Hello,
>
> I'm attempting to use conv3D2D in Theano, but when I get to the function
> compilation stage it fails with the following errors:
>
> mod.cpp:684:1: error: expected unqualified-id before '{' token
>
> mod.cpp:1345:1: error: expected '}' at end of input
> mod.cpp: In destructor '{anonymous}::__struct_compiled_op_d7db26d1cc9f
>
> mod.cpp:573:21: error: 'cleanup' was not declared in this scope
> mod.cpp: At global scope:
> mod.cpp:678:1: error: expected unqualified-id at end of input
> #undef ITEMSIZE_INPUT_2
> ^
> mod.cpp:678:1: error: expected '}' at end of input
>
> I'm attaching the mod.cpp file that was generated by cc.py, op.py, dnn.py,
> etc., The initialization code from dnn_base.c starts at line 684, but there
> is no function name defined, so the code sits outside any useful code
> block. The "#section cleanup_code_struct" code section also appears to have
> been inserted into the wrong spot.
>
> Can someone tell me how to fix this?
>
> --Dmitry
>
> --
>
> ---
> You received this message because you are subscribed to the Google Groups
> "theano-users" group.
> To unsubscribe from this group and stop receiving emails from it, send an
> email to [email protected].
> For more options, visit https://groups.google.com/d/optout.
> #include <Python.h>
> #include <iostream>
> #include "theano_mod_helper.h"
> #include <gpuarray/array.h>
> #include <gpuarray/kernel.h>
> #include <gpuarray/error.h>
> #include <gpuarray/buffer.h>
> #include <gpuarray/buffer_blas.h>
> #include <numpy/arrayobject.h>
> #include <gpuarray_api.h>
> #include <math.h>
> #include <numpy/arrayscalars.h>
> #include "cudnn.h"
> #include "cudnn_helper.h"
> #include "gpuarray_helper.h"
> #include "gpuarray/types.h"
> #include "gpuarray/array.h"
> #include "gpuarray/util.h"
> #include "gpuarray/ext_cuda.h"
> #include "gpuarray_api.h"
> #include "numpy_compat.h"
> //////////////////////
> //// Support Code
> //////////////////////
>
> void _capsule_destructor(PyObject *o) {
> void *d = PyCapsule_GetContext(o);
> void *p = PyCapsule_GetPointer(o, NULL);
> void (*f)(void *) = (void (*)(void *))d;
> if (f != NULL) f(p);
> }
>
>
>
>
> static int
> c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
> cudnnDataType_t dt;
> size_t ds;
> switch (var->ga.typecode) {
> case GA_FLOAT:
> dt = CUDNN_DATA_FLOAT;
> break;
> case GA_DOUBLE:
> dt = CUDNN_DATA_DOUBLE;
> break;
> case GA_HALF:
> dt = CUDNN_DATA_HALF;
> break;
> default:
> PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensorNd");
> return -1;
> }
> ds = gpuarray_get_elsize(var->ga.typecode);
>
> int strs[8], dims[8], default_stride = 1;
> unsigned int nd = PyGpuArray_NDIM(var);
>
> if (nd > 8) {
> PyErr_SetString(PyExc_TypeError, "Tensor of more than 8d");
> return -1;
> }
>
> for (unsigned int _i = nd; _i > 0; _i--) {
> unsigned int i = _i - 1;
> strs[i] = PyGpuArray_STRIDE(var, i) ?
> PyGpuArray_STRIDE(var, i)/ds : default_stride;
> default_stride *= PyGpuArray_DIM(var, i);
> dims[i] = PyGpuArray_DIM(var, i);
> }
>
> /* Tensors can't be smaller than 3d for cudnn so we pad the
> * descriptor if they are */
> for (unsigned int i = nd; i < 3; i++) {
> strs[i] = 1;
> dims[i] = 1;
> }
>
> cudnnStatus_t err = cudnnSetTensorNdDescriptor(desc, dt, nd < 3 ? 3 : nd,
> dims, strs);
> if (err != CUDNN_STATUS_SUCCESS) {
> PyErr_Format(PyExc_RuntimeError,
> "Could not set tensorNd descriptor: %s",
> cudnnGetErrorString(err));
> return -1;
> }
> return 0;
> }
>
> static int c_make_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t
> *desc) {
> cudnnStatus_t err;
> err = cudnnCreateTensorDescriptor(desc);
> if (err != CUDNN_STATUS_SUCCESS) {
> PyErr_Format(PyExc_RuntimeError,
> "Could not create tensor descriptor: %s",
> cudnnGetErrorString(err));
> return -1;
> }
> if (c_set_tensorNd(var, *desc) != 0) {
> cudnnDestroyTensorDescriptor(*desc);
> return -1;
> }
> return 0;
> }
>
> static int
> c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
> cudnnDataType_t dt;
> cudnnStatus_t err;
>
> if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) {
> PyErr_SetString(PyExc_ValueError,
> "Only contiguous filters (kernels) are supported.");
> return -1;
> }
> switch (var->ga.typecode) {
> case GA_FLOAT:
> dt = CUDNN_DATA_FLOAT;
> break;
> case GA_DOUBLE:
> dt = CUDNN_DATA_DOUBLE;
> break;
> case GA_HALF:
> dt = CUDNN_DATA_HALF;
> break;
> default:
> PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_filter");
> return -1;
> }
>
> int dims[8];
> unsigned int nd = PyGpuArray_NDIM(var);
>
> if (nd > 8) {
> PyErr_SetString(PyExc_TypeError, "Tensor of more than 8d");
> return -1;
> }
>
> for (unsigned int _i = nd; _i > 0; _i--) {
> unsigned int i = _i - 1;
> dims[i] = PyGpuArray_DIM(var, i);
> }
>
> /* Filters can't be less than 3d so we pad */
> for (unsigned int i = nd; i < 3; i++)
> dims[i] = 1;
>
> if (nd < 3)
> nd = 3;
>
> #if CUDNN_VERSION >= 5000
> err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims);
> #else
> err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims);
> #endif
>
> if (err != CUDNN_STATUS_SUCCESS) {
> PyErr_Format(PyExc_RuntimeError,
> "Could not set filter descriptor: %s.",
> cudnnGetErrorString(err));
> return -1;
> }
> return 0;
> }
>
> static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t
> *desc) {
> cudnnStatus_t err;
> err = cudnnCreateFilterDescriptor(desc);
> if (err != CUDNN_STATUS_SUCCESS) {
> PyErr_Format(PyExc_RuntimeError,
> "Could not create tensor descriptor: %s",
> cudnnGetErrorString(err));
> return -1;
> }
> if (c_set_filter(var, *desc) != 0) {
> cudnnDestroyFilterDescriptor(*desc);
> return -1;
> }
> return 0;
> }
>
>
>
> namespace {
> struct __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577 {
> PyObject* __ERROR;
>
> PyObject* storage_V3;
> PyObject* storage_V5;
> PyObject* storage_V7;
> PyObject* storage_V9;
> PyObject* storage_V11;
> PyObject* storage_V13;
> PyObject* storage_V1;
> PyObject* storage_V15;
>
> PyObject* py_V15;
> PyGpuContextObject *V15;
>
> #define DTYPE_INPUT_0 npy_float32
>
> #define TYPENUM_INPUT_0 11
>
> #define ITEMSIZE_INPUT_0 4
>
> #define DTYPE_INPUT_1 npy_float32
>
> #define TYPENUM_INPUT_1 11
>
> #define ITEMSIZE_INPUT_1 4
>
> #define DTYPE_INPUT_2 npy_float32
>
> #define TYPENUM_INPUT_2 11
>
> #define ITEMSIZE_INPUT_2 4
>
> #define DTYPE_INPUT_4 npy_float32
>
> #define TYPENUM_INPUT_4 11
>
> #define ITEMSIZE_INPUT_4 4
>
> #define DTYPE_INPUT_5 npy_float32
>
> #define TYPENUM_INPUT_5 11
>
> #define ITEMSIZE_INPUT_5 4
>
> #define DTYPE_OUTPUT_0 npy_float32
>
> #define TYPENUM_OUTPUT_0 11
>
> #define ITEMSIZE_OUTPUT_0 4
>
> #define APPLY_SPECIFIC(str) str##_node_d7db26d1cc9fb59ee56289f4dda72577_0
>
> #define CONV_INPLACE 1
>
> #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
>
>
>
> PyGpuContextObject *ctx;
> cudnnHandle_t APPLY_SPECIFIC(_handle);
>
>
> cudnnTensorDescriptor_t APPLY_SPECIFIC(input);
> cudnnTensorDescriptor_t APPLY_SPECIFIC(output);
> cudnnFilterDescriptor_t APPLY_SPECIFIC(kerns);
>
>
>
> #ifdef CHOOSE_ALGO
> int reuse_algo;
> cudnnConvolutionFwdAlgo_t prev_algo;
> #ifndef CHOOSE_ONCE
> size_t prev_img_dims[5];
> size_t prev_kern_dims[5];
> #endif
> #endif
>
> int
> APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
> PyGpuArrayObject *om,
> cudnnConvolutionDescriptor_t desc,
> double alpha, double beta,
> PyGpuArrayObject **output,
> PyGpuContextObject *c) {
> cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
> float af = alpha, bf = beta;
> void *alpha_p;
> void *beta_p;
>
> if (PyGpuArray_DIMS(input)[1] != PyGpuArray_DIMS(kerns)[1]) {
> PyErr_SetString(PyExc_ValueError,
> "images and kernel must have the same stack size");
> return 1;
> }
>
> if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
> return 1;
> if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
> return 1;
>
> switch (input->ga.typecode) {
> case GA_DOUBLE:
> alpha_p = (void *)α
> beta_p = (void *)β
> break;
> case GA_FLOAT:
> case GA_HALF:
> alpha_p = (void *)⁡
> beta_p = (void *)&bf;
> break;
> default:
> PyErr_SetString(PyExc_TypeError, "Unsupported type in convolution");
> return 1;
> }
>
> #ifdef CONV_INPLACE
> Py_XDECREF(*output);
> *output = om;
> Py_INCREF(*output);
> #else
> if (theano_prep_output(output, PyGpuArray_NDIM(om), PyGpuArray_DIMS(om),
> om->ga.typecode, GA_C_ORDER, c) != 0)
> return 1;
> if (beta != 0.0 && pygpu_move(*output, om))
> return 1;
> #endif
>
> if (c_set_tensorNd(*output, APPLY_SPECIFIC(output)) == -1)
> return 1;
>
> cudnnConvolutionFwdAlgo_t algo = CONV_ALGO;
>
> cuda_enter(c->ctx);
> #ifdef CHOOSE_ALGO
> #ifndef CHOOSE_ONCE
> reuse_algo = 1;
> for (unsigned int i = 0; i < PyGpuArray_NDIM(input); i++) {
> reuse_algo = (reuse_algo &&
> PyGpuArray_DIM(input, i) == prev_img_dims[i]);
> reuse_algo = (reuse_algo &&
> PyGpuArray_DIM(kerns, i) == prev_kern_dims[i]);
> }
> #endif
>
> if (!reuse_algo) {
> #ifdef CHOOSE_TIME
> int count;
> cudnnConvolutionFwdAlgoPerf_t choice;
> err = cudnnFindConvolutionForwardAlgorithm(
> APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
> desc, APPLY_SPECIFIC(output), 1, &count, &choice);
>
> if (err != CUDNN_STATUS_SUCCESS) {
> PyErr_Format(PyExc_RuntimeError,
> "error selecting convolution algo: %s",
> cudnnGetErrorString(err));
> cuda_exit(c->ctx);
> return 1;
> }
> algo = choice.algo;
> #else
> size_t free;
> int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free);
>
> if (err2 != GA_NO_ERROR) {
> PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
> "memory information on the GPU");
> cuda_exit(c->ctx);
> return 1;
> }
>
> err = cudnnGetConvolutionForwardAlgorithm(
> APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
> desc, APPLY_SPECIFIC(output),
> CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, free, &algo);
> if (err != CUDNN_STATUS_SUCCESS) {
> PyErr_Format(PyExc_RuntimeError,
> "error selecting convolution algo: %s",
> cudnnGetErrorString(err));
> cuda_exit(c->ctx);
> return 1;
> }
> #endif
> prev_algo = algo;
> } else {
> algo = prev_algo;
> }
>
> #ifdef CHOOSE_ONCE
> reuse_algo = 1;
> #else
> for (unsigned int i = 0; i < PyGpuArray_NDIM(input); i++) {
> prev_img_dims[i] = PyGpuArray_DIM(input, i);
> prev_kern_dims[i] = PyGpuArray_DIM(kerns, i);
> }
> #endif
>
> #endif
>
> /* These two algos are not supported for 3d conv */
> if (PyGpuArray_NDIM(input) == 5 &&
> (algo == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM ||
> algo == CUDNN_CONVOLUTION_FWD_ALGO_GEMM))
> algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
>
> // The FFT implementation does not support strides, 1x1 filters or inputs
> // with a spatial dimension larger than 1024. The tiled-FFT implementation
> // does not support strides.
> // If the chosen implementation is FFT or tiled-FFT, validate that it can
> // be used on the current data and default to a safe implementation if it
> // can't.
> // The following code is 2d-specific but it is fine as FFT and tiled-FFT are
> // defined only for 2d filters
> if ((algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT ||
> algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) &&
> PyGpuArray_NDIM(input) == 4) {
>
> // Extract the properties of the convolution descriptor
> int nd;
> int pad[2];
> int stride[2];
> int upscale[2];
> cudnnConvolutionMode_t mode;
> cudnnDataType_t data_type;
> err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
> upscale, &mode, &data_type);
> if (err != CUDNN_STATUS_SUCCESS) {
> PyErr_Format(PyExc_RuntimeError,
> "error getting convolution properties: %s",
> cudnnGetErrorString(err));
> cuda_exit(c->ctx);
> return 1;
> }
>
> if (algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT)
> {
> if (stride[0] != 1 || stride[1] != 1 ||
> PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024
> ||
> (PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1))
> {
> algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
> }
> }
> else
> {
> // algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
> if (stride[0] != 1 || stride[1] != 1)
> {
> algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
> }
> }
> }
>
> {
> size_t worksize;
> gpudata *workspace;
> err = cudnnGetConvolutionForwardWorkspaceSize(APPLY_SPECIFIC(_handle),
> APPLY_SPECIFIC(input),
> APPLY_SPECIFIC(kerns),
> desc,
> APPLY_SPECIFIC(output),
> algo,
> &worksize);
>
> if (err == CUDNN_STATUS_NOT_SUPPORTED) {
> // Fallback to none algo if not supported
> // TODO: Print a warning
> algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
>
> err = cudnnGetConvolutionForwardWorkspaceSize(APPLY_SPECIFIC(_handle),
> APPLY_SPECIFIC(input),
> APPLY_SPECIFIC(kerns),
> desc,
> APPLY_SPECIFIC(output),
> algo,
> &worksize);
> }
>
> if (err != CUDNN_STATUS_SUCCESS) {
> PyErr_Format(PyExc_RuntimeError,
> "error getting worksize: %s",
> cudnnGetErrorString(err));
> cuda_exit(c->ctx);
> return 1;
> }
>
> /*
> * This is less than ideal since we need to free it after (which
> * introduces a synchronization point. But we don't have a module
> * to place a nice get_work_mem() function in.
> */
> if (worksize != 0) {
> workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
> if (workspace == NULL) {
> PyErr_SetString(PyExc_RuntimeError,
> "Could not allocate working memory");
> cuda_exit(c->ctx);
> return 1;
> }
> }
>
> cuda_wait(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
> cuda_wait(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
> cuda_wait((*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
>
> err = cudnnConvolutionForward(
> APPLY_SPECIFIC(_handle),
> alpha_p,
> APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input),
> APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns),
> desc, algo,
> worksize == 0 ? NULL : *(void **)workspace, worksize,
> beta_p,
> APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*output));
>
> if (worksize != 0)
> gpudata_release(workspace);
>
> cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
> cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
> cuda_record((*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
> }
> cuda_exit(c->ctx);
>
> if (err != CUDNN_STATUS_SUCCESS) {
> PyErr_Format(PyExc_RuntimeError, "error doing operation: %s",
> cudnnGetErrorString(err));
> return 1;
> }
> return 0;
> }
>
>
> #undef DTYPE_INPUT_0
>
> #undef TYPENUM_INPUT_0
>
> #undef ITEMSIZE_INPUT_0
>
> #undef DTYPE_INPUT_1
>
> #undef TYPENUM_INPUT_1
>
> #undef ITEMSIZE_INPUT_1
>
> #undef DTYPE_INPUT_2
>
> #undef TYPENUM_INPUT_2
>
> #undef ITEMSIZE_INPUT_2
>
> #undef DTYPE_INPUT_4
>
> #undef TYPENUM_INPUT_4
>
> #undef ITEMSIZE_INPUT_4
>
> #undef DTYPE_INPUT_5
>
> #undef TYPENUM_INPUT_5
>
> #undef ITEMSIZE_INPUT_5
>
> #undef DTYPE_OUTPUT_0
>
> #undef TYPENUM_OUTPUT_0
>
> #undef ITEMSIZE_OUTPUT_0
>
> #undef APPLY_SPECIFIC
>
> #undef CONV_INPLACE
>
> #undef CONV_ALGO
>
> __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577() {
> // This is only somewhat safe because we:
> // 1) Are not a virtual class
> // 2) Do not use any virtual classes in the members
> // 3) Deal with mostly POD and pointers
>
> // If this changes, we would have to revise this, but for
> // now I am tired of chasing segfaults because
> // initialization code had an error and some pointer has
> // a junk value.
> memset(this, 0, sizeof(*this));
> }
> ~__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577(void) {
> cleanup();
> }
>
> int init(PyObject* __ERROR, PyObject* storage_V3, PyObject*
> storage_V5, PyObject* storage_V7, PyObject* storage_V9, PyObject*
> storage_V11, PyObject* storage_V13, PyObject* storage_V1, PyObject*
> storage_V15) {
> Py_XINCREF(storage_V3);
> Py_XINCREF(storage_V5);
> Py_XINCREF(storage_V7);
> Py_XINCREF(storage_V9);
> Py_XINCREF(storage_V11);
> Py_XINCREF(storage_V13);
> Py_XINCREF(storage_V1);
> Py_XINCREF(storage_V15);
> this->storage_V3 = storage_V3;
> this->storage_V5 = storage_V5;
> this->storage_V7 = storage_V7;
> this->storage_V9 = storage_V9;
> this->storage_V11 = storage_V11;
> this->storage_V13 = storage_V13;
> this->storage_V1 = storage_V1;
> this->storage_V15 = storage_V15;
>
>
>
>
>
>
>
>
>
> py_V15 = PyList_GET_ITEM(storage_V15, 0);
> {Py_XINCREF(py_V15);}
>
> if (!PyObject_TypeCheck(py_V15, &PyGpuContextType)) {
> PyErr_SetString(PyExc_TypeError, "expected a GpuContext");
> {
> if (!PyErr_Occurred()) {
> PyErr_SetString(PyExc_RuntimeError,
> "Unexpected error in an Op's C code. "
> "No Python exception was set.");
> }
> return 15;
> }
> }
>
> V15 = (PyGpuContextObject *)py_V15;
> Py_INCREF(V15);
>
>
>
> #define DTYPE_INPUT_0 npy_float32
>
> #define TYPENUM_INPUT_0 11
>
> #define ITEMSIZE_INPUT_0 4
>
> #define DTYPE_INPUT_1 npy_float32
>
> #define TYPENUM_INPUT_1 11
>
> #define ITEMSIZE_INPUT_1 4
>
> #define DTYPE_INPUT_2 npy_float32
>
> #define TYPENUM_INPUT_2 11
>
> #define ITEMSIZE_INPUT_2 4
>
> #define DTYPE_INPUT_4 npy_float32
>
> #define TYPENUM_INPUT_4 11
>
> #define ITEMSIZE_INPUT_4 4
>
> #define DTYPE_INPUT_5 npy_float32
>
> #define TYPENUM_INPUT_5 11
>
> #define ITEMSIZE_INPUT_5 4
>
> #define DTYPE_OUTPUT_0 npy_float32
>
> #define TYPENUM_OUTPUT_0 11
>
> #define ITEMSIZE_OUTPUT_0 4
>
> #define APPLY_SPECIFIC(str) str##_node_d7db26d1cc9fb59ee56289f4dda72577_0
>
> #define CONV_INPLACE 1
>
> #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
>
> #define FAIL { \
>
> if (!PyErr_Occurred()) { \
>
> PyErr_SetString(PyExc_RuntimeError, \
>
> "Unexpected error in an Op's C code. " \
>
> "No Python exception was set."); \
>
> } \
>
> return 17; \
>
> }
>
> #define PARAMS V15
>
>
>
> {
> // We need to keep a reference here to have it available in the destructor.
> ctx = PARAMS;
> Py_INCREF(ctx);
>
> cuda_enter(PARAMS->ctx);
> cudnnStatus_t err;
> APPLY_SPECIFIC(_handle) = NULL;
> if ((err = cudnnCreate(&APPLY_SPECIFIC(_handle))) != CUDNN_STATUS_SUCCESS) {
> PyErr_Format(PyExc_RuntimeError, "could not create cuDNN handle: %s",
> cudnnGetErrorString(err));
> cuda_exit(PARAMS->ctx);
> FAIL;
> }
> if ((err = cudnnSetStream(APPLY_SPECIFIC(_handle),
> cuda_get_stream(PARAMS->ctx))) !=
> CUDNN_STATUS_SUCCESS) {
> PyErr_Format(PyExc_RuntimeError, "Could not set cudnn stream: %s",
> cudnnGetErrorString(err));
> cuda_exit(PARAMS->ctx);
> FAIL;
> }
> cuda_exit(PARAMS->ctx);
> }
>
>
>
> cudnnStatus_t APPLY_SPECIFIC(err);
> APPLY_SPECIFIC(input) = NULL;
> APPLY_SPECIFIC(output) = NULL;
> APPLY_SPECIFIC(kerns) = NULL;
> if ((APPLY_SPECIFIC(err) =
> cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(input))) != CUDNN_STATUS_SUCCESS)
> {
> PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
> "(inp): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
> FAIL;
> }
> if ((APPLY_SPECIFIC(err) =
> cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output))) !=
> CUDNN_STATUS_SUCCESS) {
> PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
> "(out): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
> FAIL;
> }
> if ((APPLY_SPECIFIC(err) =
> cudnnCreateFilterDescriptor(&APPLY_SPECIFIC(kerns))) != CUDNN_STATUS_SUCCESS)
> {
> PyErr_Format(PyExc_MemoryError, "could not allocate filter descriptor: %s",
> cudnnGetErrorString(APPLY_SPECIFIC(err)));
> FAIL;
> }
>
>
>
> #ifdef CHOOSE_ALGO
> reuse_algo = 0;
> prev_algo = CONV_ALGO;
> #ifndef CHOOSE_ONCE
> memset(prev_img_dims, 0, sizeof(prev_img_dims));
> memset(prev_kern_dims, 0, sizeof(prev_kern_dims));
> #endif
> #endif
>
>
>
> #undef FAIL
>
> #undef PARAMS
>
> #undef DTYPE_INPUT_0
>
> #undef TYPENUM_INPUT_0
>
> #undef ITEMSIZE_INPUT_0
>
> #undef DTYPE_INPUT_1
>
> #undef TYPENUM_INPUT_1
>
> #undef ITEMSIZE_INPUT_1
>
> #undef DTYPE_INPUT_2
>
> #undef TYPENUM_INPUT_2
>
> #undef ITEMSIZE_INPUT_2
>
> #undef DTYPE_INPUT_4
>
> #undef TYPENUM_INPUT_4
>
> #undef ITEMSIZE_INPUT_4
>
> #undef DTYPE_INPUT_5
>
> #undef TYPENUM_INPUT_5
>
> #undef ITEMSIZE_INPUT_5
>
> #undef DTYPE_OUTPUT_0
>
> #undef TYPENUM_OUTPUT_0
>
> #undef ITEMSIZE_OUTPUT_0
>
> #undef APPLY_SPECIFIC
>
> #undef CONV_INPLACE
>
> #undef CONV_ALGO
> this->__ERROR = __ERROR;
> return 0;
> }
> void cleanup(void) {
> __label_1:
>
> double __DUMMY_1;
> __label_3:
>
> double __DUMMY_3;
> __label_5:
>
> double __DUMMY_5;
> __label_7:
>
> double __DUMMY_7;
> __label_9:
>
> double __DUMMY_9;
> __label_11:
>
> double __DUMMY_11;
> __label_13:
>
> double __DUMMY_13;
> __label_15:
> Py_XDECREF(V15); V15 = NULL;
> {Py_XDECREF(py_V15);}
>
> double __DUMMY_15;
> __label_18:
>
>
> #define DTYPE_INPUT_0 npy_float32
>
> #define TYPENUM_INPUT_0 11
>
> #define ITEMSIZE_INPUT_0 4
>
> #define DTYPE_INPUT_1 npy_float32
>
> #define TYPENUM_INPUT_1 11
>
> #define ITEMSIZE_INPUT_1 4
>
> #define DTYPE_INPUT_2 npy_float32
>
> #define TYPENUM_INPUT_2 11
>
> #define ITEMSIZE_INPUT_2 4
>
> #define DTYPE_INPUT_4 npy_float32
>
> #define TYPENUM_INPUT_4 11
>
> #define ITEMSIZE_INPUT_4 4
>
> #define DTYPE_INPUT_5 npy_float32
>
> #define TYPENUM_INPUT_5 11
>
> #define ITEMSIZE_INPUT_5 4
>
> #define DTYPE_OUTPUT_0 npy_float32
>
> #define TYPENUM_OUTPUT_0 11
>
> #define ITEMSIZE_OUTPUT_0 4
>
> #define APPLY_SPECIFIC(str) str##_node_d7db26d1cc9fb59ee56289f4dda72577_0
>
> #define CONV_INPLACE 1
>
> #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
>
>
>
> cuda_enter(ctx->ctx);
> cudnnDestroy(APPLY_SPECIFIC(_handle));
> cuda_exit(ctx->ctx);
> Py_DECREF((PyObject *)ctx);
>
>
> if (APPLY_SPECIFIC(input) != NULL)
> cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(input));
> if (APPLY_SPECIFIC(output) != NULL)
> cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output));
> if (APPLY_SPECIFIC(kerns) != NULL)
> cudnnDestroyFilterDescriptor(APPLY_SPECIFIC(kerns));
>
>
> #undef DTYPE_INPUT_0
>
> #undef TYPENUM_INPUT_0
>
> #undef ITEMSIZE_INPUT_0
>
> #undef DTYPE_INPUT_1
>
> #undef TYPENUM_INPUT_1
>
> #undef ITEMSIZE_INPUT_1
>
> #undef DTYPE_INPUT_2
>
> #undef TYPENUM_INPUT_2
>
> #undef ITEMSIZE_INPUT_2
>
> #undef DTYPE_INPUT_4
>
> #undef TYPENUM_INPUT_4
>
> #undef ITEMSIZE_INPUT_4
>
> #undef DTYPE_INPUT_5
>
> #undef TYPENUM_INPUT_5
>
> #undef ITEMSIZE_INPUT_5
>
> #undef DTYPE_OUTPUT_0
>
> #undef TYPENUM_OUTPUT_0
>
> #undef ITEMSIZE_OUTPUT_0
>
> #undef APPLY_SPECIFIC
>
> #undef CONV_INPLACE
>
> #undef CONV_ALGO
> double __DUMMY_18;
>
> Py_XDECREF(this->storage_V3);
> Py_XDECREF(this->storage_V5);
> Py_XDECREF(this->storage_V7);
> Py_XDECREF(this->storage_V9);
> Py_XDECREF(this->storage_V11);
> Py_XDECREF(this->storage_V13);
> Py_XDECREF(this->storage_V1);
> Py_XDECREF(this->storage_V15);
> }
> int run(void) {
> int __failure = 0;
>
> PyObject* py_V1;
>
> PyGpuArrayObject *V1;
>
> PyObject* py_V3;
>
> PyGpuArrayObject *V3;
>
> PyObject* py_V5;
>
> PyGpuArrayObject *V5;
>
> PyObject* py_V7;
>
> PyGpuArrayObject *V7;
>
> PyObject* py_V9;
>
> cudnnConvolutionDescriptor_t V9;
>
> PyObject* py_V11;
>
> typedef npy_float32 V11_dtype; // Deprecated use dtype_V11
> instead.
> typedef npy_float32 dtype_V11;
>
> npy_float32 V11;
>
> PyObject* py_V13;
>
> typedef npy_float32 V13_dtype; // Deprecated use dtype_V13
> instead.
> typedef npy_float32 dtype_V13;
>
> npy_float32 V13;
>
> {
>
> py_V1 = PyList_GET_ITEM(storage_V1, 0);
> {Py_XINCREF(py_V1);}
>
> if (py_V1 == Py_None)
> {
> V1 = NULL;
> }
> else
> {
>
> V1 = NULL;
> if (py_V1 == Py_None) {
> PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not
> None");
> {
> __failure = 2;
> if (!PyErr_Occurred()) {
> PyErr_SetString(PyExc_RuntimeError,
> "Unexpected error in an Op's C code. "
> "No Python exception was set.");
> }
> goto __label_2;}
> }
> /* First check if we are the base type exactly (the most common case),
> then do the full subclass check if needed. */
> if (py_V1->ob_type != &PyGpuArrayType &&
> !PyObject_TypeCheck(py_V1, &PyGpuArrayType)) {
> PyErr_SetString(PyExc_ValueError, "expected a GpuArray");
> {
> __failure = 2;
> if (!PyErr_Occurred()) {
> PyErr_SetString(PyExc_RuntimeError,
> "Unexpected error in an Op's C code. "
> "No Python exception was set.");
> }
> goto __label_2;}
> }
> V1 = (PyGpuArrayObject *)py_V1;
> Py_INCREF(V1);
>
> }
>
> {
>
> py_V3 = PyList_GET_ITEM(storage_V3, 0);
> {Py_XINCREF(py_V3);}
>
> V3 = NULL;
> if (py_V3 == Py_None) {
> PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not
> None");
> {
> __failure = 4;
> if (!PyErr_Occurred()) {
> PyErr_SetString(PyExc_RuntimeError,
> "Unexpected error in an Op's C code. "
> "No Python exception was set.");
> }
> goto __label_4;}
> }
> /* First check if we are the base type exactly (the most common case),
> then do the full subclass check if needed. */
> if (py_V3->ob_type != &PyGpuArrayType &&
> !PyObject_TypeCheck(py_V3, &PyGpuArrayType)) {
> PyErr_SetString(PyExc_ValueError, "expected a GpuArray");
> {
> __failure = 4;
> if (!PyErr_Occurred()) {
> PyErr_SetString(PyExc_RuntimeError,
> "Unexpected error in an Op's C code. "
> "No Python exception was set.");
> }
> goto __label_4;}
> }
> V3 = (PyGpuArrayObject *)py_V3;
> Py_INCREF(V3);
>
> {
>
> py_V5 = PyList_GET_ITEM(storage_V5, 0);
> {Py_XINCREF(py_V5);}
>
> V5 = NULL;
> if (py_V5 == Py_None) {
> PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not
> None");
> {
> __failure = 6;
> if (!PyErr_Occurred()) {
> PyErr_SetString(PyExc_RuntimeError,
> "Unexpected error in an Op's C code. "
> "No Python exception was set.");
> }
> goto __label_6;}
> }
> /* First check if we are the base type exactly (the most common case),
> then do the full subclass check if needed. */
> if (py_V5->ob_type != &PyGpuArrayType &&
> !PyObject_TypeCheck(py_V5, &PyGpuArrayType)) {
> PyErr_SetString(PyExc_ValueError, "expected a GpuArray");
> {
> __failure = 6;
> if (!PyErr_Occurred()) {
> PyErr_SetString(PyExc_RuntimeError,
> "Unexpected error in an Op's C code. "
> "No Python exception was set.");
> }
> goto __label_6;}
> }
> V5 = (PyGpuArrayObject *)py_V5;
> Py_INCREF(V5);
>
> {
>
> py_V7 = PyList_GET_ITEM(storage_V7, 0);
> {Py_XINCREF(py_V7);}
>
> V7 = NULL;
> if (py_V7 == Py_None) {
> PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not
> None");
> {
> __failure = 8;
> if (!PyErr_Occurred()) {
> PyErr_SetString(PyExc_RuntimeError,
> "Unexpected error in an Op's C code. "
> "No Python exception was set.");
> }
> goto __label_8;}
> }
> /* First check if we are the base type exactly (the most common case),
> then do the full subclass check if needed. */
> if (py_V7->ob_type != &PyGpuArrayType &&
> !PyObject_TypeCheck(py_V7, &PyGpuArrayType)) {
> PyErr_SetString(PyExc_ValueError, "expected a GpuArray");
> {
> __failure = 8;
> if (!PyErr_Occurred()) {
> PyErr_SetString(PyExc_RuntimeError,
> "Unexpected error in an Op's C code. "
> "No Python exception was set.");
> }
> goto __label_8;}
> }
> V7 = (PyGpuArrayObject *)py_V7;
> Py_INCREF(V7);
>
> {
>
> py_V9 = PyList_GET_ITEM(storage_V9, 0);
> {Py_XINCREF(py_V9);}
>
> V9 = (cudnnConvolutionDescriptor_t)PyCapsule_GetPointer(py_V9, NULL);
> if (V9 == NULL) {
> __failure = 10;
> if (!PyErr_Occurred()) {
> PyErr_SetString(PyExc_RuntimeError,
> "Unexpected error in an Op's C code. "
> "No Python exception was set.");
> }
> goto __label_10;}
>
> {
>
> py_V11 = PyList_GET_ITEM(storage_V11, 0);
> {Py_XINCREF(py_V11);}
>
> if (!PyObject_TypeCheck(py_V11, &PyFloat32ArrType_Type))
> {
> PyErr_Format(PyExc_ValueError,
> "Scalar check failed (npy_float32)");
> {
> __failure = 12;
> if (!PyErr_Occurred()) {
> PyErr_SetString(PyExc_RuntimeError,
> "Unexpected error in an Op's C code. "
> "No Python exception was set.");
> }
> goto __label_12;}
> }
>
> PyArray_ScalarAsCtype(py_V11, &V11);
>
> {
>
> py_V13 = PyList_GET_ITEM(storage_V13, 0);
> {Py_XINCREF(py_V13);}
>
> if (!PyObject_TypeCheck(py_V13, &PyFloat32ArrType_Type))
> {
> PyErr_Format(PyExc_ValueError,
> "Scalar check failed (npy_float32)");
> {
> __failure = 14;
> if (!PyErr_Occurred()) {
> PyErr_SetString(PyExc_RuntimeError,
> "Unexpected error in an Op's C code. "
> "No Python exception was set.");
> }
> goto __label_14;}
> }
>
> PyArray_ScalarAsCtype(py_V13, &V13);
>
> {
>
> {
> // Op class GpuDnnConv
>
> #define APPLY_SPECIFIC(str)
> str##_node_d7db26d1cc9fb59ee56289f4dda72577_0
>
> #define CONV_INPLACE 1
>
> #define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
> {
> if (APPLY_SPECIFIC(conv_fwd)(V3, V5, V7, V9, V11, V13, &V1,
> V15) != 0) {
> {
> __failure = 17;
> if (!PyErr_Occurred()) {
> PyErr_SetString(PyExc_RuntimeError,
> "Unexpected error in an Op's C code. "
> "No Python exception was set.");
> }
> goto __label_17;}
> }
> }
> #undef APPLY_SPECIFIC
>
> #undef CONV_INPLACE
>
> #undef CONV_ALGO
> __label_17:
>
> double __DUMMY_17;
>
> }
> __label_16:
>
> double __DUMMY_16;
>
> }
> __label_14:
>
> {Py_XDECREF(py_V13);}
>
> double __DUMMY_14;
>
> }
> __label_12:
>
> {Py_XDECREF(py_V11);}
>
> double __DUMMY_12;
>
> }
> __label_10:
>
> {Py_XDECREF(py_V9);}
>
> double __DUMMY_10;
>
> }
> __label_8:
> Py_XDECREF(V7); V7 = NULL;
> {Py_XDECREF(py_V7);}
>
> double __DUMMY_8;
>
> }
> __label_6:
> Py_XDECREF(V5); V5 = NULL;
> {Py_XDECREF(py_V5);}
>
> double __DUMMY_6;
>
> }
> __label_4:
> Py_XDECREF(V3); V3 = NULL;
> {Py_XDECREF(py_V3);}
>
> double __DUMMY_4;
>
> }
> __label_2:
>
> if (!__failure) {
>
> if (!V1) {
> Py_XDECREF(py_V1);
> Py_INCREF(Py_None);
> py_V1 = Py_None;
> } else if ((void *)py_V1 != (void *)V1) {
> Py_XDECREF(py_V1);
> py_V1 = (PyObject *)V1;
> Py_INCREF(py_V1);
> }
>
> PyObject* old = PyList_GET_ITEM(storage_V1, 0);
> {Py_XINCREF(py_V1);}
> PyList_SET_ITEM(storage_V1, 0, py_V1);
> {Py_XDECREF(old);}
> }
> Py_XDECREF(V1); V1 = NULL;
> {Py_XDECREF(py_V1);}
>
> double __DUMMY_2;
>
> }
>
>
> if (__failure) {
> // When there is a failure, this code puts the exception
> // in __ERROR.
> PyObject* err_type = NULL;
> PyObject* err_msg = NULL;
> PyObject* err_traceback = NULL;
> PyErr_Fetch(&err_type, &err_msg, &err_traceback);
> if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
> if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
> if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
> PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
> PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
> PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
> PyList_SET_ITEM(__ERROR, 0, err_type);
> PyList_SET_ITEM(__ERROR, 1, err_msg);
> PyList_SET_ITEM(__ERROR, 2, err_traceback);
> {Py_XDECREF(old_err_type);}
> {Py_XDECREF(old_err_msg);}
> {Py_XDECREF(old_err_traceback);}
> }
> // The failure code is returned to index what code block failed.
> return __failure;
>
> }
> };
> }
>
>
> static int
> __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_executor(__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577*
> self) {
> return self->run();
> }
>
> static void
> __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_destructor(void*
> executor, void* self) {
> delete
> ((__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577*)self);
> }
>
> //////////////////////
> //// Functions
> //////////////////////
> static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
> assert(PyTuple_Check(argtuple));
> if (9 != PyTuple_Size(argtuple)){
> PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 9,
> got %i", (int)PyTuple_Size(argtuple));
> return NULL;
> }
> __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577* struct_ptr = new
> __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577();
> if (struct_ptr->init( PyTuple_GET_ITEM(argtuple,
> 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple,
> 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple,
> 4),PyTuple_GET_ITEM(argtuple, 5),PyTuple_GET_ITEM(argtuple,
> 6),PyTuple_GET_ITEM(argtuple, 7),PyTuple_GET_ITEM(argtuple, 8) ) != 0) {
> delete struct_ptr;
> return NULL;
> }
> PyObject* thunk =
> PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_executor),
> struct_ptr,
> __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_destructor);
> return thunk; }
>
> //////////////////////
> //// Module init
> //////////////////////
> static PyMethodDef MyMethods[] = {
> {"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
> {NULL, NULL, 0, NULL}
> };
> PyMODINIT_FUNC initd7db26d1cc9fb59ee56289f4dda72577(void){
> import_pygpu__gpuarray();
> import_array();
>
>
> setup_ext_cuda();
>
>
> (void) Py_InitModule("d7db26d1cc9fb59ee56289f4dda72577", MyMethods);
> }
--
Pascal
--
---
You received this message because you are subscribed to the Google Groups
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.