Hello,
I'm attempting to use conv3D2D in Theano, but when I get to the function
compilation stage it fails with the following errors:
mod.cpp:684:1: error: expected unqualified-id before '{' token
mod.cpp:1345:1: error: expected '}' at end of input
mod.cpp: In destructor '{anonymous}::__struct_compiled_op_d7db26d1cc9f
mod.cpp:573:21: error: 'cleanup' was not declared in this scope
mod.cpp: At global scope:
mod.cpp:678:1: error: expected unqualified-id at end of input
#undef ITEMSIZE_INPUT_2
^
mod.cpp:678:1: error: expected '}' at end of input
I'm attaching the mod.cpp file that was generated by cc.py, op.py, dnn.py,
etc., The initialization code from dnn_base.c starts at line 684, but there
is no function name defined, so the code sits outside any useful code
block. The "#section cleanup_code_struct" code section also appears to have
been inserted into the wrong spot.
Can someone tell me how to fix this?
--Dmitry
--
---
You received this message because you are subscribed to the Google Groups
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.
#include <Python.h>
#include <iostream>
#include "theano_mod_helper.h"
#include <gpuarray/array.h>
#include <gpuarray/kernel.h>
#include <gpuarray/error.h>
#include <gpuarray/buffer.h>
#include <gpuarray/buffer_blas.h>
#include <numpy/arrayobject.h>
#include <gpuarray_api.h>
#include <math.h>
#include <numpy/arrayscalars.h>
#include "cudnn.h"
#include "cudnn_helper.h"
#include "gpuarray_helper.h"
#include "gpuarray/types.h"
#include "gpuarray/array.h"
#include "gpuarray/util.h"
#include "gpuarray/ext_cuda.h"
#include "gpuarray_api.h"
#include "numpy_compat.h"
//////////////////////
//// Support Code
//////////////////////
void _capsule_destructor(PyObject *o) {
void *d = PyCapsule_GetContext(o);
void *p = PyCapsule_GetPointer(o, NULL);
void (*f)(void *) = (void (*)(void *))d;
if (f != NULL) f(p);
}
static int
c_set_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t desc) {
cudnnDataType_t dt;
size_t ds;
switch (var->ga.typecode) {
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_tensorNd");
return -1;
}
ds = gpuarray_get_elsize(var->ga.typecode);
int strs[8], dims[8], default_stride = 1;
unsigned int nd = PyGpuArray_NDIM(var);
if (nd > 8) {
PyErr_SetString(PyExc_TypeError, "Tensor of more than 8d");
return -1;
}
for (unsigned int _i = nd; _i > 0; _i--) {
unsigned int i = _i - 1;
strs[i] = PyGpuArray_STRIDE(var, i) ?
PyGpuArray_STRIDE(var, i)/ds : default_stride;
default_stride *= PyGpuArray_DIM(var, i);
dims[i] = PyGpuArray_DIM(var, i);
}
/* Tensors can't be smaller than 3d for cudnn so we pad the
* descriptor if they are */
for (unsigned int i = nd; i < 3; i++) {
strs[i] = 1;
dims[i] = 1;
}
cudnnStatus_t err = cudnnSetTensorNdDescriptor(desc, dt, nd < 3 ? 3 : nd,
dims, strs);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set tensorNd descriptor: %s",
cudnnGetErrorString(err));
return -1;
}
return 0;
}
static int c_make_tensorNd(PyGpuArrayObject *var, cudnnTensorDescriptor_t *desc) {
cudnnStatus_t err;
err = cudnnCreateTensorDescriptor(desc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not create tensor descriptor: %s",
cudnnGetErrorString(err));
return -1;
}
if (c_set_tensorNd(var, *desc) != 0) {
cudnnDestroyTensorDescriptor(*desc);
return -1;
}
return 0;
}
static int
c_set_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t desc) {
cudnnDataType_t dt;
cudnnStatus_t err;
if (!GpuArray_IS_C_CONTIGUOUS(&var->ga)) {
PyErr_SetString(PyExc_ValueError,
"Only contiguous filters (kernels) are supported.");
return -1;
}
switch (var->ga.typecode) {
case GA_FLOAT:
dt = CUDNN_DATA_FLOAT;
break;
case GA_DOUBLE:
dt = CUDNN_DATA_DOUBLE;
break;
case GA_HALF:
dt = CUDNN_DATA_HALF;
break;
default:
PyErr_SetString(PyExc_TypeError, "Non-float datatype in c_set_filter");
return -1;
}
int dims[8];
unsigned int nd = PyGpuArray_NDIM(var);
if (nd > 8) {
PyErr_SetString(PyExc_TypeError, "Tensor of more than 8d");
return -1;
}
for (unsigned int _i = nd; _i > 0; _i--) {
unsigned int i = _i - 1;
dims[i] = PyGpuArray_DIM(var, i);
}
/* Filters can't be less than 3d so we pad */
for (unsigned int i = nd; i < 3; i++)
dims[i] = 1;
if (nd < 3)
nd = 3;
#if CUDNN_VERSION >= 5000
err = cudnnSetFilterNdDescriptor(desc, dt, CUDNN_TENSOR_NCHW, nd, dims);
#else
err = cudnnSetFilterNdDescriptor(desc, dt, nd, dims);
#endif
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not set filter descriptor: %s.",
cudnnGetErrorString(err));
return -1;
}
return 0;
}
static int c_make_filter(PyGpuArrayObject *var, cudnnFilterDescriptor_t *desc) {
cudnnStatus_t err;
err = cudnnCreateFilterDescriptor(desc);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"Could not create tensor descriptor: %s",
cudnnGetErrorString(err));
return -1;
}
if (c_set_filter(var, *desc) != 0) {
cudnnDestroyFilterDescriptor(*desc);
return -1;
}
return 0;
}
namespace {
struct __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577 {
PyObject* __ERROR;
PyObject* storage_V3;
PyObject* storage_V5;
PyObject* storage_V7;
PyObject* storage_V9;
PyObject* storage_V11;
PyObject* storage_V13;
PyObject* storage_V1;
PyObject* storage_V15;
PyObject* py_V15;
PyGpuContextObject *V15;
#define DTYPE_INPUT_0 npy_float32
#define TYPENUM_INPUT_0 11
#define ITEMSIZE_INPUT_0 4
#define DTYPE_INPUT_1 npy_float32
#define TYPENUM_INPUT_1 11
#define ITEMSIZE_INPUT_1 4
#define DTYPE_INPUT_2 npy_float32
#define TYPENUM_INPUT_2 11
#define ITEMSIZE_INPUT_2 4
#define DTYPE_INPUT_4 npy_float32
#define TYPENUM_INPUT_4 11
#define ITEMSIZE_INPUT_4 4
#define DTYPE_INPUT_5 npy_float32
#define TYPENUM_INPUT_5 11
#define ITEMSIZE_INPUT_5 4
#define DTYPE_OUTPUT_0 npy_float32
#define TYPENUM_OUTPUT_0 11
#define ITEMSIZE_OUTPUT_0 4
#define APPLY_SPECIFIC(str) str##_node_d7db26d1cc9fb59ee56289f4dda72577_0
#define CONV_INPLACE 1
#define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
PyGpuContextObject *ctx;
cudnnHandle_t APPLY_SPECIFIC(_handle);
cudnnTensorDescriptor_t APPLY_SPECIFIC(input);
cudnnTensorDescriptor_t APPLY_SPECIFIC(output);
cudnnFilterDescriptor_t APPLY_SPECIFIC(kerns);
#ifdef CHOOSE_ALGO
int reuse_algo;
cudnnConvolutionFwdAlgo_t prev_algo;
#ifndef CHOOSE_ONCE
size_t prev_img_dims[5];
size_t prev_kern_dims[5];
#endif
#endif
int
APPLY_SPECIFIC(conv_fwd)(PyGpuArrayObject *input, PyGpuArrayObject *kerns,
PyGpuArrayObject *om,
cudnnConvolutionDescriptor_t desc,
double alpha, double beta,
PyGpuArrayObject **output,
PyGpuContextObject *c) {
cudnnStatus_t err = CUDNN_STATUS_SUCCESS;
float af = alpha, bf = beta;
void *alpha_p;
void *beta_p;
if (PyGpuArray_DIMS(input)[1] != PyGpuArray_DIMS(kerns)[1]) {
PyErr_SetString(PyExc_ValueError,
"images and kernel must have the same stack size");
return 1;
}
if (c_set_tensorNd(input, APPLY_SPECIFIC(input)) == -1)
return 1;
if (c_set_filter(kerns, APPLY_SPECIFIC(kerns)) == -1)
return 1;
switch (input->ga.typecode) {
case GA_DOUBLE:
alpha_p = (void *)α
beta_p = (void *)β
break;
case GA_FLOAT:
case GA_HALF:
alpha_p = (void *)⁡
beta_p = (void *)&bf;
break;
default:
PyErr_SetString(PyExc_TypeError, "Unsupported type in convolution");
return 1;
}
#ifdef CONV_INPLACE
Py_XDECREF(*output);
*output = om;
Py_INCREF(*output);
#else
if (theano_prep_output(output, PyGpuArray_NDIM(om), PyGpuArray_DIMS(om),
om->ga.typecode, GA_C_ORDER, c) != 0)
return 1;
if (beta != 0.0 && pygpu_move(*output, om))
return 1;
#endif
if (c_set_tensorNd(*output, APPLY_SPECIFIC(output)) == -1)
return 1;
cudnnConvolutionFwdAlgo_t algo = CONV_ALGO;
cuda_enter(c->ctx);
#ifdef CHOOSE_ALGO
#ifndef CHOOSE_ONCE
reuse_algo = 1;
for (unsigned int i = 0; i < PyGpuArray_NDIM(input); i++) {
reuse_algo = (reuse_algo &&
PyGpuArray_DIM(input, i) == prev_img_dims[i]);
reuse_algo = (reuse_algo &&
PyGpuArray_DIM(kerns, i) == prev_kern_dims[i]);
}
#endif
if (!reuse_algo) {
#ifdef CHOOSE_TIME
int count;
cudnnConvolutionFwdAlgoPerf_t choice;
err = cudnnFindConvolutionForwardAlgorithm(
APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
desc, APPLY_SPECIFIC(output), 1, &count, &choice);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"error selecting convolution algo: %s",
cudnnGetErrorString(err));
cuda_exit(c->ctx);
return 1;
}
algo = choice.algo;
#else
size_t free;
int err2 = gpucontext_property(c->ctx, GA_CTX_PROP_FREE_GMEM, &free);
if (err2 != GA_NO_ERROR) {
PyErr_Format(PyExc_RuntimeError, "Error when trying to find the "
"memory information on the GPU");
cuda_exit(c->ctx);
return 1;
}
err = cudnnGetConvolutionForwardAlgorithm(
APPLY_SPECIFIC(_handle), APPLY_SPECIFIC(input), APPLY_SPECIFIC(kerns),
desc, APPLY_SPECIFIC(output),
CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT, free, &algo);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"error selecting convolution algo: %s",
cudnnGetErrorString(err));
cuda_exit(c->ctx);
return 1;
}
#endif
prev_algo = algo;
} else {
algo = prev_algo;
}
#ifdef CHOOSE_ONCE
reuse_algo = 1;
#else
for (unsigned int i = 0; i < PyGpuArray_NDIM(input); i++) {
prev_img_dims[i] = PyGpuArray_DIM(input, i);
prev_kern_dims[i] = PyGpuArray_DIM(kerns, i);
}
#endif
#endif
/* These two algos are not supported for 3d conv */
if (PyGpuArray_NDIM(input) == 5 &&
(algo == CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM ||
algo == CUDNN_CONVOLUTION_FWD_ALGO_GEMM))
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
// The FFT implementation does not support strides, 1x1 filters or inputs
// with a spatial dimension larger than 1024. The tiled-FFT implementation
// does not support strides.
// If the chosen implementation is FFT or tiled-FFT, validate that it can
// be used on the current data and default to a safe implementation if it
// can't.
// The following code is 2d-specific but it is fine as FFT and tiled-FFT are
// defined only for 2d filters
if ((algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT ||
algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING) && PyGpuArray_NDIM(input) == 4) {
// Extract the properties of the convolution descriptor
int nd;
int pad[2];
int stride[2];
int upscale[2];
cudnnConvolutionMode_t mode;
cudnnDataType_t data_type;
err = cudnnGetConvolutionNdDescriptor(desc, 2, &nd, pad, stride,
upscale, &mode, &data_type);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"error getting convolution properties: %s",
cudnnGetErrorString(err));
cuda_exit(c->ctx);
return 1;
}
if (algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT)
{
if (stride[0] != 1 || stride[1] != 1 ||
PyGpuArray_DIM(input, 2) > 1024 || PyGpuArray_DIM(input, 3) > 1024 ||
(PyGpuArray_DIM(kerns, 2) == 1 && PyGpuArray_DIM(kerns, 3) == 1))
{
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
}
}
else
{
// algo == CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING
if (stride[0] != 1 || stride[1] != 1)
{
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
}
}
}
{
size_t worksize;
gpudata *workspace;
err = cudnnGetConvolutionForwardWorkspaceSize(APPLY_SPECIFIC(_handle),
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns),
desc,
APPLY_SPECIFIC(output),
algo,
&worksize);
if (err == CUDNN_STATUS_NOT_SUPPORTED) {
// Fallback to none algo if not supported
// TODO: Print a warning
algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
err = cudnnGetConvolutionForwardWorkspaceSize(APPLY_SPECIFIC(_handle),
APPLY_SPECIFIC(input),
APPLY_SPECIFIC(kerns),
desc,
APPLY_SPECIFIC(output),
algo,
&worksize);
}
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError,
"error getting worksize: %s",
cudnnGetErrorString(err));
cuda_exit(c->ctx);
return 1;
}
/*
* This is less than ideal since we need to free it after (which
* introduces a synchronization point. But we don't have a module
* to place a nice get_work_mem() function in.
*/
if (worksize != 0) {
workspace = gpudata_alloc(c->ctx, worksize, NULL, 0, NULL);
if (workspace == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"Could not allocate working memory");
cuda_exit(c->ctx);
return 1;
}
}
cuda_wait(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_wait((*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
err = cudnnConvolutionForward(
APPLY_SPECIFIC(_handle),
alpha_p,
APPLY_SPECIFIC(input), PyGpuArray_DEV_DATA(input),
APPLY_SPECIFIC(kerns), PyGpuArray_DEV_DATA(kerns),
desc, algo,
worksize == 0 ? NULL : *(void **)workspace, worksize,
beta_p,
APPLY_SPECIFIC(output), PyGpuArray_DEV_DATA(*output));
if (worksize != 0)
gpudata_release(workspace);
cuda_record(input->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_record(kerns->ga.data, GPUARRAY_CUDA_WAIT_READ);
cuda_record((*output)->ga.data, GPUARRAY_CUDA_WAIT_WRITE);
}
cuda_exit(c->ctx);
if (err != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "error doing operation: %s",
cudnnGetErrorString(err));
return 1;
}
return 0;
}
#undef DTYPE_INPUT_0
#undef TYPENUM_INPUT_0
#undef ITEMSIZE_INPUT_0
#undef DTYPE_INPUT_1
#undef TYPENUM_INPUT_1
#undef ITEMSIZE_INPUT_1
#undef DTYPE_INPUT_2
#undef TYPENUM_INPUT_2
#undef ITEMSIZE_INPUT_2
#undef DTYPE_INPUT_4
#undef TYPENUM_INPUT_4
#undef ITEMSIZE_INPUT_4
#undef DTYPE_INPUT_5
#undef TYPENUM_INPUT_5
#undef ITEMSIZE_INPUT_5
#undef DTYPE_OUTPUT_0
#undef TYPENUM_OUTPUT_0
#undef ITEMSIZE_OUTPUT_0
#undef APPLY_SPECIFIC
#undef CONV_INPLACE
#undef CONV_ALGO
__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577() {
// This is only somewhat safe because we:
// 1) Are not a virtual class
// 2) Do not use any virtual classes in the members
// 3) Deal with mostly POD and pointers
// If this changes, we would have to revise this, but for
// now I am tired of chasing segfaults because
// initialization code had an error and some pointer has
// a junk value.
memset(this, 0, sizeof(*this));
}
~__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577(void) {
cleanup();
}
int init(PyObject* __ERROR, PyObject* storage_V3, PyObject* storage_V5, PyObject* storage_V7, PyObject* storage_V9, PyObject* storage_V11, PyObject* storage_V13, PyObject* storage_V1, PyObject* storage_V15) {
Py_XINCREF(storage_V3);
Py_XINCREF(storage_V5);
Py_XINCREF(storage_V7);
Py_XINCREF(storage_V9);
Py_XINCREF(storage_V11);
Py_XINCREF(storage_V13);
Py_XINCREF(storage_V1);
Py_XINCREF(storage_V15);
this->storage_V3 = storage_V3;
this->storage_V5 = storage_V5;
this->storage_V7 = storage_V7;
this->storage_V9 = storage_V9;
this->storage_V11 = storage_V11;
this->storage_V13 = storage_V13;
this->storage_V1 = storage_V1;
this->storage_V15 = storage_V15;
py_V15 = PyList_GET_ITEM(storage_V15, 0);
{Py_XINCREF(py_V15);}
if (!PyObject_TypeCheck(py_V15, &PyGpuContextType)) {
PyErr_SetString(PyExc_TypeError, "expected a GpuContext");
{
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
return 15;
}
}
V15 = (PyGpuContextObject *)py_V15;
Py_INCREF(V15);
#define DTYPE_INPUT_0 npy_float32
#define TYPENUM_INPUT_0 11
#define ITEMSIZE_INPUT_0 4
#define DTYPE_INPUT_1 npy_float32
#define TYPENUM_INPUT_1 11
#define ITEMSIZE_INPUT_1 4
#define DTYPE_INPUT_2 npy_float32
#define TYPENUM_INPUT_2 11
#define ITEMSIZE_INPUT_2 4
#define DTYPE_INPUT_4 npy_float32
#define TYPENUM_INPUT_4 11
#define ITEMSIZE_INPUT_4 4
#define DTYPE_INPUT_5 npy_float32
#define TYPENUM_INPUT_5 11
#define ITEMSIZE_INPUT_5 4
#define DTYPE_OUTPUT_0 npy_float32
#define TYPENUM_OUTPUT_0 11
#define ITEMSIZE_OUTPUT_0 4
#define APPLY_SPECIFIC(str) str##_node_d7db26d1cc9fb59ee56289f4dda72577_0
#define CONV_INPLACE 1
#define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
#define FAIL { \
if (!PyErr_Occurred()) { \
PyErr_SetString(PyExc_RuntimeError, \
"Unexpected error in an Op's C code. " \
"No Python exception was set."); \
} \
return 17; \
}
#define PARAMS V15
{
// We need to keep a reference here to have it available in the destructor.
ctx = PARAMS;
Py_INCREF(ctx);
cuda_enter(PARAMS->ctx);
cudnnStatus_t err;
APPLY_SPECIFIC(_handle) = NULL;
if ((err = cudnnCreate(&APPLY_SPECIFIC(_handle))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "could not create cuDNN handle: %s",
cudnnGetErrorString(err));
cuda_exit(PARAMS->ctx);
FAIL;
}
if ((err = cudnnSetStream(APPLY_SPECIFIC(_handle),
cuda_get_stream(PARAMS->ctx))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_RuntimeError, "Could not set cudnn stream: %s",
cudnnGetErrorString(err));
cuda_exit(PARAMS->ctx);
FAIL;
}
cuda_exit(PARAMS->ctx);
}
cudnnStatus_t APPLY_SPECIFIC(err);
APPLY_SPECIFIC(input) = NULL;
APPLY_SPECIFIC(output) = NULL;
APPLY_SPECIFIC(kerns) = NULL;
if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(input))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
"(inp): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL;
}
if ((APPLY_SPECIFIC(err) = cudnnCreateTensorDescriptor(&APPLY_SPECIFIC(output))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate tensor descriptor "
"(out): %s", cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL;
}
if ((APPLY_SPECIFIC(err) = cudnnCreateFilterDescriptor(&APPLY_SPECIFIC(kerns))) != CUDNN_STATUS_SUCCESS) {
PyErr_Format(PyExc_MemoryError, "could not allocate filter descriptor: %s",
cudnnGetErrorString(APPLY_SPECIFIC(err)));
FAIL;
}
#ifdef CHOOSE_ALGO
reuse_algo = 0;
prev_algo = CONV_ALGO;
#ifndef CHOOSE_ONCE
memset(prev_img_dims, 0, sizeof(prev_img_dims));
memset(prev_kern_dims, 0, sizeof(prev_kern_dims));
#endif
#endif
#undef FAIL
#undef PARAMS
#undef DTYPE_INPUT_0
#undef TYPENUM_INPUT_0
#undef ITEMSIZE_INPUT_0
#undef DTYPE_INPUT_1
#undef TYPENUM_INPUT_1
#undef ITEMSIZE_INPUT_1
#undef DTYPE_INPUT_2
#undef TYPENUM_INPUT_2
#undef ITEMSIZE_INPUT_2
#undef DTYPE_INPUT_4
#undef TYPENUM_INPUT_4
#undef ITEMSIZE_INPUT_4
#undef DTYPE_INPUT_5
#undef TYPENUM_INPUT_5
#undef ITEMSIZE_INPUT_5
#undef DTYPE_OUTPUT_0
#undef TYPENUM_OUTPUT_0
#undef ITEMSIZE_OUTPUT_0
#undef APPLY_SPECIFIC
#undef CONV_INPLACE
#undef CONV_ALGO
this->__ERROR = __ERROR;
return 0;
}
void cleanup(void) {
__label_1:
double __DUMMY_1;
__label_3:
double __DUMMY_3;
__label_5:
double __DUMMY_5;
__label_7:
double __DUMMY_7;
__label_9:
double __DUMMY_9;
__label_11:
double __DUMMY_11;
__label_13:
double __DUMMY_13;
__label_15:
Py_XDECREF(V15); V15 = NULL;
{Py_XDECREF(py_V15);}
double __DUMMY_15;
__label_18:
#define DTYPE_INPUT_0 npy_float32
#define TYPENUM_INPUT_0 11
#define ITEMSIZE_INPUT_0 4
#define DTYPE_INPUT_1 npy_float32
#define TYPENUM_INPUT_1 11
#define ITEMSIZE_INPUT_1 4
#define DTYPE_INPUT_2 npy_float32
#define TYPENUM_INPUT_2 11
#define ITEMSIZE_INPUT_2 4
#define DTYPE_INPUT_4 npy_float32
#define TYPENUM_INPUT_4 11
#define ITEMSIZE_INPUT_4 4
#define DTYPE_INPUT_5 npy_float32
#define TYPENUM_INPUT_5 11
#define ITEMSIZE_INPUT_5 4
#define DTYPE_OUTPUT_0 npy_float32
#define TYPENUM_OUTPUT_0 11
#define ITEMSIZE_OUTPUT_0 4
#define APPLY_SPECIFIC(str) str##_node_d7db26d1cc9fb59ee56289f4dda72577_0
#define CONV_INPLACE 1
#define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
cuda_enter(ctx->ctx);
cudnnDestroy(APPLY_SPECIFIC(_handle));
cuda_exit(ctx->ctx);
Py_DECREF((PyObject *)ctx);
if (APPLY_SPECIFIC(input) != NULL)
cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(input));
if (APPLY_SPECIFIC(output) != NULL)
cudnnDestroyTensorDescriptor(APPLY_SPECIFIC(output));
if (APPLY_SPECIFIC(kerns) != NULL)
cudnnDestroyFilterDescriptor(APPLY_SPECIFIC(kerns));
#undef DTYPE_INPUT_0
#undef TYPENUM_INPUT_0
#undef ITEMSIZE_INPUT_0
#undef DTYPE_INPUT_1
#undef TYPENUM_INPUT_1
#undef ITEMSIZE_INPUT_1
#undef DTYPE_INPUT_2
#undef TYPENUM_INPUT_2
#undef ITEMSIZE_INPUT_2
#undef DTYPE_INPUT_4
#undef TYPENUM_INPUT_4
#undef ITEMSIZE_INPUT_4
#undef DTYPE_INPUT_5
#undef TYPENUM_INPUT_5
#undef ITEMSIZE_INPUT_5
#undef DTYPE_OUTPUT_0
#undef TYPENUM_OUTPUT_0
#undef ITEMSIZE_OUTPUT_0
#undef APPLY_SPECIFIC
#undef CONV_INPLACE
#undef CONV_ALGO
double __DUMMY_18;
Py_XDECREF(this->storage_V3);
Py_XDECREF(this->storage_V5);
Py_XDECREF(this->storage_V7);
Py_XDECREF(this->storage_V9);
Py_XDECREF(this->storage_V11);
Py_XDECREF(this->storage_V13);
Py_XDECREF(this->storage_V1);
Py_XDECREF(this->storage_V15);
}
int run(void) {
int __failure = 0;
PyObject* py_V1;
PyGpuArrayObject *V1;
PyObject* py_V3;
PyGpuArrayObject *V3;
PyObject* py_V5;
PyGpuArrayObject *V5;
PyObject* py_V7;
PyGpuArrayObject *V7;
PyObject* py_V9;
cudnnConvolutionDescriptor_t V9;
PyObject* py_V11;
typedef npy_float32 V11_dtype; // Deprecated use dtype_V11 instead.
typedef npy_float32 dtype_V11;
npy_float32 V11;
PyObject* py_V13;
typedef npy_float32 V13_dtype; // Deprecated use dtype_V13 instead.
typedef npy_float32 dtype_V13;
npy_float32 V13;
{
py_V1 = PyList_GET_ITEM(storage_V1, 0);
{Py_XINCREF(py_V1);}
if (py_V1 == Py_None)
{
V1 = NULL;
}
else
{
V1 = NULL;
if (py_V1 == Py_None) {
PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not None");
{
__failure = 2;
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
goto __label_2;}
}
/* First check if we are the base type exactly (the most common case),
then do the full subclass check if needed. */
if (py_V1->ob_type != &PyGpuArrayType &&
!PyObject_TypeCheck(py_V1, &PyGpuArrayType)) {
PyErr_SetString(PyExc_ValueError, "expected a GpuArray");
{
__failure = 2;
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
goto __label_2;}
}
V1 = (PyGpuArrayObject *)py_V1;
Py_INCREF(V1);
}
{
py_V3 = PyList_GET_ITEM(storage_V3, 0);
{Py_XINCREF(py_V3);}
V3 = NULL;
if (py_V3 == Py_None) {
PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not None");
{
__failure = 4;
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
goto __label_4;}
}
/* First check if we are the base type exactly (the most common case),
then do the full subclass check if needed. */
if (py_V3->ob_type != &PyGpuArrayType &&
!PyObject_TypeCheck(py_V3, &PyGpuArrayType)) {
PyErr_SetString(PyExc_ValueError, "expected a GpuArray");
{
__failure = 4;
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
goto __label_4;}
}
V3 = (PyGpuArrayObject *)py_V3;
Py_INCREF(V3);
{
py_V5 = PyList_GET_ITEM(storage_V5, 0);
{Py_XINCREF(py_V5);}
V5 = NULL;
if (py_V5 == Py_None) {
PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not None");
{
__failure = 6;
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
goto __label_6;}
}
/* First check if we are the base type exactly (the most common case),
then do the full subclass check if needed. */
if (py_V5->ob_type != &PyGpuArrayType &&
!PyObject_TypeCheck(py_V5, &PyGpuArrayType)) {
PyErr_SetString(PyExc_ValueError, "expected a GpuArray");
{
__failure = 6;
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
goto __label_6;}
}
V5 = (PyGpuArrayObject *)py_V5;
Py_INCREF(V5);
{
py_V7 = PyList_GET_ITEM(storage_V7, 0);
{Py_XINCREF(py_V7);}
V7 = NULL;
if (py_V7 == Py_None) {
PyErr_SetString(PyExc_ValueError, "expected a GpuArray, not None");
{
__failure = 8;
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
goto __label_8;}
}
/* First check if we are the base type exactly (the most common case),
then do the full subclass check if needed. */
if (py_V7->ob_type != &PyGpuArrayType &&
!PyObject_TypeCheck(py_V7, &PyGpuArrayType)) {
PyErr_SetString(PyExc_ValueError, "expected a GpuArray");
{
__failure = 8;
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
goto __label_8;}
}
V7 = (PyGpuArrayObject *)py_V7;
Py_INCREF(V7);
{
py_V9 = PyList_GET_ITEM(storage_V9, 0);
{Py_XINCREF(py_V9);}
V9 = (cudnnConvolutionDescriptor_t)PyCapsule_GetPointer(py_V9, NULL);
if (V9 == NULL) {
__failure = 10;
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
goto __label_10;}
{
py_V11 = PyList_GET_ITEM(storage_V11, 0);
{Py_XINCREF(py_V11);}
if (!PyObject_TypeCheck(py_V11, &PyFloat32ArrType_Type))
{
PyErr_Format(PyExc_ValueError,
"Scalar check failed (npy_float32)");
{
__failure = 12;
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
goto __label_12;}
}
PyArray_ScalarAsCtype(py_V11, &V11);
{
py_V13 = PyList_GET_ITEM(storage_V13, 0);
{Py_XINCREF(py_V13);}
if (!PyObject_TypeCheck(py_V13, &PyFloat32ArrType_Type))
{
PyErr_Format(PyExc_ValueError,
"Scalar check failed (npy_float32)");
{
__failure = 14;
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
goto __label_14;}
}
PyArray_ScalarAsCtype(py_V13, &V13);
{
{
// Op class GpuDnnConv
#define APPLY_SPECIFIC(str) str##_node_d7db26d1cc9fb59ee56289f4dda72577_0
#define CONV_INPLACE 1
#define CONV_ALGO CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM
{
if (APPLY_SPECIFIC(conv_fwd)(V3, V5, V7, V9, V11, V13, &V1, V15) != 0) {
{
__failure = 17;
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_RuntimeError,
"Unexpected error in an Op's C code. "
"No Python exception was set.");
}
goto __label_17;}
}
}
#undef APPLY_SPECIFIC
#undef CONV_INPLACE
#undef CONV_ALGO
__label_17:
double __DUMMY_17;
}
__label_16:
double __DUMMY_16;
}
__label_14:
{Py_XDECREF(py_V13);}
double __DUMMY_14;
}
__label_12:
{Py_XDECREF(py_V11);}
double __DUMMY_12;
}
__label_10:
{Py_XDECREF(py_V9);}
double __DUMMY_10;
}
__label_8:
Py_XDECREF(V7); V7 = NULL;
{Py_XDECREF(py_V7);}
double __DUMMY_8;
}
__label_6:
Py_XDECREF(V5); V5 = NULL;
{Py_XDECREF(py_V5);}
double __DUMMY_6;
}
__label_4:
Py_XDECREF(V3); V3 = NULL;
{Py_XDECREF(py_V3);}
double __DUMMY_4;
}
__label_2:
if (!__failure) {
if (!V1) {
Py_XDECREF(py_V1);
Py_INCREF(Py_None);
py_V1 = Py_None;
} else if ((void *)py_V1 != (void *)V1) {
Py_XDECREF(py_V1);
py_V1 = (PyObject *)V1;
Py_INCREF(py_V1);
}
PyObject* old = PyList_GET_ITEM(storage_V1, 0);
{Py_XINCREF(py_V1);}
PyList_SET_ITEM(storage_V1, 0, py_V1);
{Py_XDECREF(old);}
}
Py_XDECREF(V1); V1 = NULL;
{Py_XDECREF(py_V1);}
double __DUMMY_2;
}
if (__failure) {
// When there is a failure, this code puts the exception
// in __ERROR.
PyObject* err_type = NULL;
PyObject* err_msg = NULL;
PyObject* err_traceback = NULL;
PyErr_Fetch(&err_type, &err_msg, &err_traceback);
if (!err_type) {err_type = Py_None;Py_INCREF(Py_None);}
if (!err_msg) {err_msg = Py_None; Py_INCREF(Py_None);}
if (!err_traceback) {err_traceback = Py_None; Py_INCREF(Py_None);}
PyObject* old_err_type = PyList_GET_ITEM(__ERROR, 0);
PyObject* old_err_msg = PyList_GET_ITEM(__ERROR, 1);
PyObject* old_err_traceback = PyList_GET_ITEM(__ERROR, 2);
PyList_SET_ITEM(__ERROR, 0, err_type);
PyList_SET_ITEM(__ERROR, 1, err_msg);
PyList_SET_ITEM(__ERROR, 2, err_traceback);
{Py_XDECREF(old_err_type);}
{Py_XDECREF(old_err_msg);}
{Py_XDECREF(old_err_traceback);}
}
// The failure code is returned to index what code block failed.
return __failure;
}
};
}
static int __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_executor(__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577* self) {
return self->run();
}
static void __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_destructor(void* executor, void* self) {
delete ((__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577*)self);
}
//////////////////////
//// Functions
//////////////////////
static PyObject * instantiate(PyObject * self, PyObject *argtuple) {
assert(PyTuple_Check(argtuple));
if (9 != PyTuple_Size(argtuple)){
PyErr_Format(PyExc_TypeError, "Wrong number of arguments, expected 9, got %i", (int)PyTuple_Size(argtuple));
return NULL;
}
__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577* struct_ptr = new __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577();
if (struct_ptr->init( PyTuple_GET_ITEM(argtuple, 0),PyTuple_GET_ITEM(argtuple, 1),PyTuple_GET_ITEM(argtuple, 2),PyTuple_GET_ITEM(argtuple, 3),PyTuple_GET_ITEM(argtuple, 4),PyTuple_GET_ITEM(argtuple, 5),PyTuple_GET_ITEM(argtuple, 6),PyTuple_GET_ITEM(argtuple, 7),PyTuple_GET_ITEM(argtuple, 8) ) != 0) {
delete struct_ptr;
return NULL;
}
PyObject* thunk = PyCObject_FromVoidPtrAndDesc((void*)(&__struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_executor), struct_ptr, __struct_compiled_op_d7db26d1cc9fb59ee56289f4dda72577_destructor);
return thunk; }
//////////////////////
//// Module init
//////////////////////
static PyMethodDef MyMethods[] = {
{"instantiate", instantiate, METH_VARARGS, "undocumented"} ,
{NULL, NULL, 0, NULL}
};
PyMODINIT_FUNC initd7db26d1cc9fb59ee56289f4dda72577(void){
import_pygpu__gpuarray();
import_array();
setup_ext_cuda();
(void) Py_InitModule("d7db26d1cc9fb59ee56289f4dda72577", MyMethods);
}