>>> import theano
===============================
00001 #include <Python.h>
00002 #include "theano_mod_helper.h"
00003 #include "structmember.h"
00004 #include <sys/time.h>
00005
00006 #if PY_VERSION_HEX >= 0x03000000
00007 #include "numpy/npy_3kcompat.h"
00008 #define PyCObject_AsVoidPtr NpyCapsule_AsVoidPtr
00009 #define PyCObject_GetDesc NpyCapsule_GetDesc
00010 #define PyCObject_Check NpyCapsule_Check
00011 #endif
00012
00013 #ifndef Py_TYPE
00014 #define Py_TYPE(obj) obj->ob_type
00015 #endif
00016
00017 /**
00018
00019 TODO:
00020 - Check max supported depth of recursion
00021 - CLazyLinker should add context information to errors caught
during evaluation. Say what node we were on, add the traceback attached to
the node.
00022 - Clear containers of fully-useed intermediate results if allow_gc
is 1
00023 - Add timers for profiling
00024 - Add support for profiling space used.
00025
00026
00027 */
00028 static double pytime(const struct timeval * tv)
00029 {
00030 struct timeval t;
00031 if (!tv)
00032 {
00033 tv = &t;
00034 gettimeofday(&t, NULL);
00035 }
00036 return (double) tv->tv_sec + (double) tv->tv_usec / 1000000.0;
00037 }
00038
00039 /**
00040 Helper routine to convert a PyList of integers to a c array of
integers.
00041 */
00042 static int unpack_list_of_ssize_t(PyObject * pylist, Py_ssize_t
**dst, Py_ssize_t *len,
00043 const char* kwname)
00044 {
00045 Py_ssize_t buflen, *buf;
00046 if (!PyList_Check(pylist))
00047 {
00048 PyErr_Format(PyExc_TypeError, "%s must be list", kwname);
00049 return -1;
00050 }
00051 assert (NULL == *dst);
00052 *len = buflen = PyList_Size(pylist);
00053 *dst = buf = (Py_ssize_t*)calloc(buflen, sizeof(Py_ssize_t));
00054 assert(buf);
00055 for (int ii = 0; ii < buflen; ++ii)
00056 {
00057 PyObject * el_i = PyList_GetItem(pylist, ii);
00058 Py_ssize_t n_i = PyNumber_AsSsize_t(el_i, PyExc_IndexError);
00059 if (PyErr_Occurred())
00060 {
00061 free(buf);
00062 *dst = NULL;
00063 return -1;
00064 }
00065 buf[ii] = n_i;
00066 }
00067 return 0;
00068 }
00069
00070 /**
00071
00072 CLazyLinker
00073
00074
00075 */
00076 typedef struct {
00077 PyObject_HEAD
00078 /* Type-specific fields go here. */
00079 PyObject * nodes; // the python list of nodes
00080 PyObject * thunks; // python list of thunks
00081 PyObject * pre_call_clear; //list of cells to clear on call.
00082 int allow_gc;
00083 Py_ssize_t n_applies;
00084 int n_vars; // number of variables in the graph
00085 int * var_computed; // 1 or 0 for every variable
00086 PyObject ** var_computed_cells;
00087 PyObject ** var_value_cells;
00088 Py_ssize_t **dependencies; // list of vars dependencies for GC
00089 Py_ssize_t *n_dependencies;
00090
00091 Py_ssize_t n_output_vars;
00092 Py_ssize_t * output_vars; // variables that *must* be
evaluated by call
00093
00094 int * is_lazy; // 1 or 0 for every thunk
00095
00096 Py_ssize_t * var_owner; // nodes[[var_owner[var_idx]]] is
var[var_idx]->owner
00097 int * var_has_owner; // 1 or 0
00098
00099 Py_ssize_t * node_n_inputs;
00100 Py_ssize_t * node_n_outputs;
00101 Py_ssize_t ** node_inputs;
00102 Py_ssize_t ** node_outputs;
00103 Py_ssize_t * node_inputs_outputs_base; // node_inputs and
node_outputs point into this
00104 Py_ssize_t * node_n_prereqs;
00105 Py_ssize_t ** node_prereqs;
00106
00107 Py_ssize_t * update_storage; // input cells to update with the
last outputs in output_vars
00108 Py_ssize_t n_updates;
00109
00110 void ** thunk_cptr_fn;
00111 void ** thunk_cptr_data;
00112 PyObject * call_times;
00113 PyObject * call_counts;
00114 int do_timing;
00115 int need_update_inputs;
00116 int position_of_error; // -1 for no error, otw the index into
`thunks` that failed.
00117 } CLazyLinker;
00118
00119
00120 static void
00121 CLazyLinker_dealloc(PyObject* _self)
00122 {
00123 CLazyLinker* self = (CLazyLinker *) _self;
00124 free(self->thunk_cptr_fn);
00125 free(self->thunk_cptr_data);
00126
00127 free(self->is_lazy);
00128
00129 free(self->update_storage);
00130
00131 if (self->node_n_prereqs)
00132 {
00133 for (int i = 0; i < self->n_applies; ++i)
00134 {
00135 free(self->node_prereqs[i]);
00136 }
00137 }
00138 free(self->node_n_prereqs);
00139 free(self->node_prereqs);
00140 free(self->node_inputs_outputs_base);
00141 free(self->node_n_inputs);
00142 free(self->node_n_outputs);
00143 free(self->node_inputs);
00144 free(self->node_outputs);
00145
00146 if (self->dependencies)
00147 {
00148 for (int i = 0; i < self->n_vars; ++i)
00149 {
00150 free(self->dependencies[i]);
00151 }
00152 free(self->dependencies);
00153 free(self->n_dependencies);
00154 }
00155
00156 free(self->var_owner);
00157 free(self->var_has_owner);
00158 free(self->var_computed);
00159 if (self->var_computed_cells)
00160 {
00161 for (int i = 0; i < self->n_vars; ++i)
00162 {
00163 Py_DECREF(self->var_computed_cells[i]);
00164 Py_DECREF(self->var_value_cells[i]);
00165 }
00166 }
00167 free(self->var_computed_cells);
00168 free(self->var_value_cells);
00169 free(self->output_vars);
00170
00171 Py_XDECREF(self->nodes);
00172 Py_XDECREF(self->thunks);
00173 Py_XDECREF(self->call_times);
00174 Py_XDECREF(self->call_counts);
00175 Py_XDECREF(self->pre_call_clear);
00176 Py_TYPE(self)->tp_free((PyObject*)self);
00177 }
00178 static PyObject *
00179 CLazyLinker_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
00180 {
00181 CLazyLinker *self;
00182
00183 self = (CLazyLinker *)type->tp_alloc(type, 0);
00184 if (self != NULL) {
00185 self->nodes = NULL;
00186 self->thunks = NULL;
00187 self->pre_call_clear = NULL;
00188
00189 self->allow_gc = 1;
00190 self->n_applies = 0;
00191 self->n_vars = 0;
00192 self->var_computed = NULL;
00193 self->var_computed_cells = NULL;
00194 self->var_value_cells = NULL;
00195 self->dependencies = NULL;
00196 self->n_dependencies = NULL;
00197
00198 self->n_output_vars = 0;
00199 self->output_vars = NULL;
00200
00201 self->is_lazy = NULL;
00202
00203 self->var_owner = NULL;
00204 self->var_has_owner = NULL;
00205
00206 self->node_n_inputs = NULL;
00207 self->node_n_outputs = NULL;
00208 self->node_inputs = NULL;
00209 self->node_outputs = NULL;
00210 self->node_inputs_outputs_base = NULL;
00211 self->node_prereqs = NULL;
00212 self->node_n_prereqs = NULL;
00213
00214 self->update_storage = NULL;
00215 self->n_updates = 0;
00216
00217 self->thunk_cptr_data = NULL;
00218 self->thunk_cptr_fn = NULL;
00219 self->call_times = NULL;
00220 self->call_counts = NULL;
00221 self->do_timing = 0;
00222
00223 self->need_update_inputs = 0;
00224 self->position_of_error = -1;
00225 }
00226 return (PyObject *)self;
00227 }
00228
00229 static int
00230 CLazyLinker_init(CLazyLinker *self, PyObject *args, PyObject *kwds)
00231 {
00232 static char *kwlist[] = {
00233 (char*)"nodes",
00234 (char*)"thunks",
00235 (char*)"pre_call_clear",
00236 (char*)"allow_gc",
00237 (char*)"call_counts",
00238 (char*)"call_times",
00239 (char*)"compute_map_list",
00240 (char*)"storage_map_list",
00241 (char*)"base_input_output_list",
00242 (char*)"node_n_inputs",
00243 (char*)"node_n_outputs",
00244 (char*)"node_input_offset",
00245 (char*)"node_output_offset",
00246 (char*)"var_owner",
00247 (char*)"is_lazy_list",
00248 (char*)"output_vars",
00249 (char*)"node_prereqs",
00250 (char*)"node_output_size",
00251 (char*)"update_storage",
00252 (char*)"dependencies",
00253 NULL};
00254
00255 PyObject *compute_map_list=NULL,
00256 *storage_map_list=NULL,
00257 *base_input_output_list=NULL,
00258 *node_n_inputs=NULL,
00259 *node_n_outputs=NULL,
00260 *node_input_offset=NULL,
00261 *node_output_offset=NULL,
00262 *var_owner=NULL,
00263 *is_lazy=NULL,
00264 *output_vars=NULL,
00265 *node_prereqs=NULL,
00266 *node_output_size=NULL,
00267 *update_storage=NULL,
00268 *dependencies=NULL;
00269
00270 assert(!self->nodes);
00271 if (! PyArg_ParseTupleAndKeywords(args, kwds,
"OOOiOOOOOOOOOOOOOOOO", kwlist,
00272 &self->nodes,
00273 &self->thunks,
00274 &self->pre_call_clear,
00275 &self->allow_gc,
00276 &self->call_counts,
00277 &self->call_times,
00278 &compute_map_list,
00279 &storage_map_list,
00280 &base_input_output_list,
00281 &node_n_inputs,
00282 &node_n_outputs,
00283 &node_input_offset,
00284 &node_output_offset,
00285 &var_owner,
00286 &is_lazy,
00287 &output_vars,
00288 &node_prereqs,
00289 &node_output_size,
00290 &update_storage,
00291 &dependencies
00292 ))
00293 return -1;
00294 Py_INCREF(self->nodes);
00295 Py_INCREF(self->thunks);
00296 Py_INCREF(self->pre_call_clear);
00297 Py_INCREF(self->call_counts);
00298 Py_INCREF(self->call_times);
00299
00300 Py_ssize_t n_applies = PyList_Size(self->nodes);
00301
00302 self->n_applies = n_applies;
00303 self->n_vars = PyList_Size(var_owner);
00304
00305 if (PyList_Size(self->thunks) != n_applies) return -1;
00306 if (PyList_Size(self->call_counts) != n_applies) return -1;
00307 if (PyList_Size(self->call_times) != n_applies) return -1;
00308
00309 // allocated and initialize thunk_cptr_data and thunk_cptr_fn
00310 if (n_applies)
00311 {
00312 self->thunk_cptr_data = (void**)calloc(n_applies,
sizeof(void*));
00313 self->thunk_cptr_fn = (void**)calloc(n_applies,
sizeof(void*));
00314 self->is_lazy = (int*)calloc(n_applies, sizeof(int));
00315 self->node_prereqs = (Py_ssize_t**)calloc(n_applies,
sizeof(Py_ssize_t*));
00316 self->node_n_prereqs = (Py_ssize_t*)calloc(n_applies,
sizeof(Py_ssize_t));
00317 assert(self->node_prereqs);
00318 assert(self->node_n_prereqs);
00319 assert(self->is_lazy);
00320 assert(self->thunk_cptr_fn);
00321 assert(self->thunk_cptr_data);
00322
00323 for (int i = 0; i < n_applies; ++i)
00324 {
00325 PyObject * thunk = PyList_GetItem(self->thunks, i);
00326 //thunk is borrowed
00327 if (PyObject_HasAttrString(thunk, "cthunk"))
00328 {
00329 PyObject * cthunk = PyObject_GetAttrString(thunk,
"cthunk");
00330 //new reference
00331 assert (cthunk && PyCObject_Check(cthunk));
00332 self->thunk_cptr_fn[i] =
PyCObject_AsVoidPtr(cthunk);
00333 self->thunk_cptr_data[i] =
PyCObject_GetDesc(cthunk);
00334 Py_DECREF(cthunk);
00335 // cthunk is kept alive by membership in
self->thunks
00336 }
00337
00338 PyObject * el_i = PyList_GetItem(is_lazy, i);
00339 self->is_lazy[i] = PyNumber_AsSsize_t(el_i, NULL);
00340
00341 /* now get the prereqs */
00342 el_i = PyList_GetItem(node_prereqs, i);
00343 assert (PyList_Check(el_i));
00344 self->node_n_prereqs[i] = PyList_Size(el_i);
00345 if (self->node_n_prereqs[i])
00346 {
00347 self->node_prereqs[i] = (Py_ssize_t*)malloc(
00348
PyList_Size(el_i)*sizeof(Py_ssize_t));
00349 for (int j = 0; j < PyList_Size(el_i); ++j)
00350 {
00351 PyObject * el_ij = PyList_GetItem(el_i, j);
00352 Py_ssize_t N = PyNumber_AsSsize_t(el_ij,
PyExc_IndexError);
00353 if (PyErr_Occurred())
00354 return -1;
00355 // N < n. variables
00356 assert(N < PyList_Size(var_owner));
00357 self->node_prereqs[i][j] = N;
00358 }
00359 }
00360 }
00361 }
00362 if (PyList_Check(base_input_output_list))
00363 {
00364 Py_ssize_t n_inputs_outputs_base =
PyList_Size(base_input_output_list);
00365 self->node_inputs_outputs_base =
(Py_ssize_t*)calloc(n_inputs_outputs_base,sizeof(Py_ssize_t));
00366 assert(self->node_inputs_outputs_base);
00367 for (int i = 0; i < n_inputs_outputs_base; ++i)
00368 {
00369 PyObject *el_i =
PyList_GetItem(base_input_output_list, i);
00370 Py_ssize_t idx = PyNumber_AsSsize_t(el_i,
PyExc_IndexError);
00371 if (PyErr_Occurred()) return -1;
00372 self->node_inputs_outputs_base[i] = idx;
00373 }
00374 self->node_n_inputs =
(Py_ssize_t*)calloc(n_applies,sizeof(Py_ssize_t));
00375 assert(self->node_n_inputs);
00376 self->node_n_outputs =
(Py_ssize_t*)calloc(n_applies,sizeof(Py_ssize_t));
00377 assert(self->node_n_outputs);
00378 self->node_inputs =
(Py_ssize_t**)calloc(n_applies,sizeof(Py_ssize_t*));
00379 assert(self->node_inputs);
00380 self->node_outputs =
(Py_ssize_t**)calloc(n_applies,sizeof(Py_ssize_t*));
00381 assert(self->node_outputs);
00382 for (int i = 0; i < n_applies; ++i)
00383 {
00384 Py_ssize_t N;
00385 N = PyNumber_AsSsize_t(PyList_GetItem(node_n_inputs,
i),PyExc_IndexError);
00386 if (PyErr_Occurred()) return -1;
00387 assert (N <= n_inputs_outputs_base);
00388 self->node_n_inputs[i] = N;
00389 N = PyNumber_AsSsize_t(PyList_GetItem(node_n_outputs,
i),PyExc_IndexError);
00390 if (PyErr_Occurred()) return -1;
00391 assert (N <= n_inputs_outputs_base);
00392 self->node_n_outputs[i] = N;
00393 N =
PyNumber_AsSsize_t(PyList_GetItem(node_input_offset, i),PyExc_IndexError);
00394 if (PyErr_Occurred()) return -1;
00395 assert (N <= n_inputs_outputs_base);
00396 self->node_inputs[i] =
&self->node_inputs_outputs_base[N];
00397 N =
PyNumber_AsSsize_t(PyList_GetItem(node_output_offset, i),PyExc_IndexError);
00398 if (PyErr_Occurred()) return -1;
00399 assert (N <= n_inputs_outputs_base);
00400 self->node_outputs[i] =
&self->node_inputs_outputs_base[N];
00401 }
00402 }
00403 else
00404 {
00405 PyErr_SetString(PyExc_TypeError, "base_input_output_list
must be list");
00406 return -1;
00407 }
00408
00409 // allocation for var_owner
00410 if (PyList_Check(var_owner))
00411 {
00412 self->var_owner =
(Py_ssize_t*)calloc(self->n_vars,sizeof(Py_ssize_t));
00413 self->var_has_owner =
(int*)calloc(self->n_vars,sizeof(int));
00414 self->var_computed =
(int*)calloc(self->n_vars,sizeof(int));
00415 self->var_computed_cells =
(PyObject**)calloc(self->n_vars,sizeof(PyObject*));
00416 self->var_value_cells =
(PyObject**)calloc(self->n_vars,sizeof(PyObject*));
00417 for (int i = 0; i < self->n_vars; ++i)
00418 {
00419 PyObject * el_i = PyList_GetItem(var_owner, i);
00420 if (el_i == Py_None)
00421 {
00422 self->var_has_owner[i] = 0;
00423 }
00424 else
00425 {
00426 Py_ssize_t N = PyNumber_AsSsize_t(el_i,
PyExc_IndexError);
00427 if (PyErr_Occurred()) return -1;
00428 assert (N <= n_applies);
00429 self->var_owner[i] = N;
00430 self->var_has_owner[i] = 1;
00431 }
00432 self->var_computed_cells[i] =
PyList_GetItem(compute_map_list, i);
00433 Py_INCREF(self->var_computed_cells[i]);
00434 self->var_value_cells[i] =
PyList_GetItem(storage_map_list, i);
00435 Py_INCREF(self->var_value_cells[i]);
00436 }
00437 }
00438 else
00439 {
00440 PyErr_SetString(PyExc_TypeError, "var_owner must be list");
00441 return -1;
00442 }
00443
00444 if (dependencies != Py_None)
00445 {
00446 self->dependencies = (Py_ssize_t**)calloc(self->n_vars,
sizeof(Py_ssize_t *));
00447 self->n_dependencies = (Py_ssize_t*)calloc(self->n_vars,
sizeof(Py_ssize_t));
00448 assert(self->dependencies);
00449 assert(self->n_dependencies);
00450
00451 for (int i = 0; i < self->n_vars; ++i)
00452 {
00453 PyObject *tmp = PyList_GetItem(dependencies, i);
00454 // refcounting - tmp is borrowed
00455 if (unpack_list_of_ssize_t(tmp,
&self->dependencies[i], &self->n_dependencies[i],
00456 "dependencies"))
00457 return -1;
00458 }
00459 }
00460
00461 if (unpack_list_of_ssize_t(output_vars, &self->output_vars,
&self->n_output_vars,
00462 "output_vars"))
00463 return -1;
00464 for (int i = 0; i < self->n_output_vars; ++i)
00465 {
00466 assert(self->output_vars[i] < self->n_vars);
00467 }
00468 if (unpack_list_of_ssize_t(update_storage,
&self->update_storage, &self->n_updates,
00469 "updates_storage"))
00470 return -1;
00471 return 0;
00472 }
00473 static void set_position_of_error(CLazyLinker * self, int
owner_idx)
00474 {
00475 if (self->position_of_error == -1)
00476 {
00477 self->position_of_error = owner_idx;
00478 }
00479 }
00480 static PyObject * pycall(CLazyLinker * self, Py_ssize_t node_idx,
int verbose)
00481 {
00482 // call thunk to see which inputs it wants
00483 PyObject * thunk = PyList_GetItem(self->thunks, node_idx);
00484 // refcounting - thunk is borrowed
00485 PyObject * rval = NULL;
00486 if (self->do_timing)
00487 {
00488 double t0 = pytime(NULL);
00489 if (verbose) fprintf(stderr, "calling via Python (node
%i)\n", (int)node_idx);
00490 rval = PyObject_CallObject(thunk, NULL);
00491 if (rval)
00492 {
00493 double t1 = pytime(NULL);
00494 double ti = PyFloat_AsDouble(
00495 PyList_GetItem(self->call_times,
node_idx));
00496 PyList_SetItem(self->call_times, node_idx,
00497 PyFloat_FromDouble(t1 - t0 + ti));
00498 PyObject * count = PyList_GetItem(self->call_counts,
node_idx);
00499 long icount = PyInt_AsLong(count);
00500 PyList_SetItem(self->call_counts, node_idx,
00501 PyInt_FromLong(icount + 1));
00502 }
00503 }
00504 else
00505 {
00506 if (verbose)
00507 {
00508 fprintf(stderr, "calling via Python (node %i)\n",
(int)node_idx);
00509 }
00510 rval = PyObject_CallObject(thunk, NULL);
00511 }
00512 return rval;
00513 }
00514 static int c_call(CLazyLinker * self, Py_ssize_t node_idx, int
verbose)
00515 {
00516 void * ptr_addr = self->thunk_cptr_fn[node_idx];
00517 int (*fn)(void*) = (int (*)(void*))(ptr_addr);
00518 if (verbose) fprintf(stderr, "calling non-lazy shortcut (node
%i)\n", (int)node_idx);
00519 int err = 0;
00520 if (self->do_timing)
00521 {
00522 double t0 = pytime(NULL);
00523 err = fn(self->thunk_cptr_data[node_idx]);
00524 double t1 = pytime(NULL);
00525 double ti =
PyFloat_AsDouble(PyList_GetItem(self->call_times, node_idx));
00526 PyList_SetItem(self->call_times, node_idx,
PyFloat_FromDouble(t1 - t0 + ti));
00527 PyObject * count = PyList_GetItem(self->call_counts,
node_idx);
00528 long icount = PyInt_AsLong(count);
00529 PyList_SetItem(self->call_counts, node_idx,
PyInt_FromLong(icount+1));
00530 }
00531 else
00532 {
00533 err = fn(self->thunk_cptr_data[node_idx]);
00534 }
00535
00536 if (err)
00537 {
00538 // cast the argument to a PyList (as described near line 226
of cc.py)
00539 PyObject * __ERROR =
((PyObject**)self->thunk_cptr_data[node_idx])[0];
00540 assert (PyList_Check(__ERROR));
00541 assert (PyList_Size(__ERROR) == 3);
00542 PyObject * err_type = PyList_GetItem(__ERROR, 0); //stolen
ref
00543 PyObject * err_msg = PyList_GetItem(__ERROR, 1); //stolen ref
00544 PyObject * err_trace = PyList_GetItem(__ERROR, 2); //stolen
ref
00545 PyList_SET_ITEM(__ERROR, 0, Py_None); Py_INCREF(Py_None);
//clobbers old ref
00546 PyList_SET_ITEM(__ERROR, 1, Py_None); Py_INCREF(Py_None);
//clobbers old ref
00547 PyList_SET_ITEM(__ERROR, 2, Py_None); Py_INCREF(Py_None);
//clobbers old ref
00548
00549 assert(!PyErr_Occurred()); // because CLinker hid the
exception in __ERROR aka data
00550 PyErr_Restore(err_type, err_msg, err_trace); //steals refs
to args
00551 }
00552 if (err) set_position_of_error(self, node_idx);
00553 return err;
00554 }
00555 static
00556 int lazy_rec_eval(CLazyLinker * self, Py_ssize_t var_idx,
PyObject*one, PyObject*zero)
00557 {
00558 PyObject *rval = NULL;
00559 int verbose = 0;
00560 int err = 0;
00561
00562 if (verbose) fprintf(stderr, "lazy_rec computing %i\n",
(int)var_idx);
00563
00564 if (self->var_computed[var_idx] || !self->var_has_owner[var_idx])
00565 return 0;
00566
00567 Py_ssize_t owner_idx = self->var_owner[var_idx];
00568
00569 // STEP 1: compute the pre-requirements of the node
00570 // Includes input nodes for non-lazy ops.
00571 for (int i = 0; i < self->node_n_prereqs[owner_idx]; ++i)
00572 {
00573 Py_ssize_t prereq_idx = self->node_prereqs[owner_idx][i];
00574 if (!self->var_computed[prereq_idx])
00575 {
00576 err = lazy_rec_eval(self, prereq_idx, one, zero);
00577 if (err) return err;
00578 }
00579 assert (self->var_computed[prereq_idx]);
00580 }
00581
00582 // STEP 2: compute the node itself
00583 if (self->is_lazy[owner_idx])
00584 {
00585 // update the compute_map cells corresponding to the inputs
of this thunk
00586 for (int i = 0; i < self->node_n_inputs[owner_idx]; ++i)
00587 {
00588 int in_idx = self->node_inputs[owner_idx][i];
00589 if (self->var_computed[in_idx])
00590 {
00591 Py_INCREF(one);
00592 err =
PyList_SetItem(self->var_computed_cells[in_idx], 0, one);
00593 }
00594 else
00595 {
00596 Py_INCREF(zero);
00597 err =
PyList_SetItem(self->var_computed_cells[in_idx], 0, zero);
00598 }
00599 if (err) goto fail;
00600 }
00601
00602 rval = pycall(self, owner_idx, verbose);
00603 // refcounting - rval is new ref
00604 //TODO: to prevent infinite loops
00605 // - consider check that a thunk does not ask for an input
that is already computed
00606 if (rval == NULL)
00607 {
00608 assert (PyErr_Occurred());
00609 err = 1;
00610 goto fail;
00611 }
00612
00613 //update the computed-ness of any output cells
00614 for (int i = 0; i < self->node_n_outputs[owner_idx]; ++i)
00615 {
00616 int out_idx = self->node_outputs[owner_idx][i];
00617 PyObject * el_i =
PyList_GetItem(self->var_computed_cells[out_idx], 0);
00618 Py_ssize_t N = PyNumber_AsSsize_t(el_i,
PyExc_IndexError);
00619 if (PyErr_Occurred())
00620 {
00621 err = -1;
00622 goto pyfail;
00623 }
00624 assert (N==0 || N==1);
00625 self->var_computed[out_idx] = N;
00626 }
00627 if (!self->var_computed[var_idx])
00628 {
00629 /*
00630 * If self is not computed after the call, this means
that some
00631 * inputs are needed. Compute the ones on the returned
list
00632 * and try to compute the current node again (with
recursive call).
00633 * This allows a node to request more nodes more than
once before
00634 * finally yielding a result.
00635 */
00636 if (!PyList_Check(rval))
00637 {
00638 //TODO: More helpful error to help find *which node*
made this
00639 // bad thunk
00640 PyErr_SetString(PyExc_TypeError,
00641 "lazy thunk should return a list");
00642 err = 1;
00643 goto pyfail;
00644 }
00645
00646 if (!PyList_Size(rval))
00647 {
00648 PyErr_SetString(PyExc_ValueError,
00649 "lazy thunk returned empty list
without computing output");
00650 err = 1;
00651 goto pyfail;
00652 }
00653
00654 for (int i = 0; i < PyList_Size(rval); ++i)
00655 {
00656 PyObject * el_i = PyList_GetItem(rval, i);
00657 Py_ssize_t N = PyNumber_AsSsize_t(el_i,
PyExc_IndexError);
00658 if (PyErr_Occurred())
00659 {
00660 err = 1;
00661 goto pyfail;
00662 }
00663 assert (N <= self->node_n_inputs[owner_idx]);
00664 Py_ssize_t input_idx =
self->node_inputs[owner_idx][N];
00665 err = lazy_rec_eval(self, input_idx, one, zero);
00666 if (err) goto pyfail;
00667 }
00668
00669 Py_DECREF(rval);
00670 /*
00671 * We intentionally skip all the end-of-function
processing
00672 * (mark outputs, GC) as it will be performed by the call
00673 * that actually manages to compute the result.
00674 */
00675 return lazy_rec_eval(self, var_idx, one, zero);
00676 }
00677
00678 Py_DECREF(rval);
00679 }
00680 else //owner is not a lazy op. Ensure all intputs are evaluated.
00681 {
00682 // loop over inputs to owner
00683 // call lazy_rec_eval on each one that is not computed.
00684 // if there's an error, pass it up the stack
00685 for (int i = 0; i < self->node_n_inputs[owner_idx]; ++i)
00686 {
00687 Py_ssize_t input_idx = self->node_inputs[owner_idx][i];
00688 if (!self->var_computed[input_idx])
00689 {
00690 err = lazy_rec_eval(self, input_idx, one, zero);
00691 if (err) return err;
00692 }
00693 assert (self->var_computed[input_idx]);
00694 }
00695
00696 // call the thunk for this owner.
00697 if (self->thunk_cptr_fn[owner_idx])
00698 {
00699 err = c_call(self, owner_idx, verbose);
00700 if (err) goto fail;
00701 }
00702 else
00703 {
00704 rval = pycall(self, owner_idx, verbose);
00705 //rval is new ref
00706 if (rval) //pycall returned normally (no exception)
00707 {
00708 if (rval == Py_None)
00709 {
00710 Py_DECREF(rval); //ignore a return of None
00711 }
00712 else if (PyList_Check(rval))
00713 {
00714 PyErr_SetString(PyExc_TypeError,
00715 "non-lazy thunk should return
None, not list");
00716 err = 1;
00717 goto pyfail;
00718 }
00719 else // don't know what it returned, but it wasn't
right.
00720 {
00721 PyErr_SetObject(PyExc_TypeError, rval);
00722 err = 1;
00723 // We don't release rval since we put it in the
error above
00724 goto fail;
00725 }
00726 }
00727 else // pycall returned NULL (internal error)
00728 {
00729 err = 1;
00730 goto fail;
00731 }
00732 }
00733 }
00734
00735 // loop over all outputs and mark them as computed
00736 for (int i = 0; i < self->node_n_outputs[owner_idx]; ++i)
00737 {
00738 self->var_computed[self->node_outputs[owner_idx][i]] = 1;
00739 }
00740
00741 // Free vars that are not needed anymore
00742 if (self->allow_gc)
00743 {
00744 for (int i = 0; i < self->node_n_inputs[owner_idx]; ++i)
00745 {
00746 int cleanup = 1;
00747 Py_ssize_t i_idx = self->node_inputs[owner_idx][i];
00748 if (!self->var_has_owner[i_idx])
00749 continue;
00750
00751 for (int j = 0; j < self->n_output_vars; ++j)
00752 {
00753 if (i_idx == self->output_vars[j])
00754 {
00755 cleanup = 0;
00756 break;
00757 }
00758 }
00759 if (!cleanup) continue;
00760
00761 for (int j = 0; j < self->n_dependencies[i_idx]; ++j)
00762 {
00763 if
(!self->var_computed[self->dependencies[i_idx][j]])
00764 {
00765 cleanup = 0;
00766 break;
00767 }
00768 }
00769 if (!cleanup) continue;
00770
00771 Py_INCREF(Py_None);
00772 err = PyList_SetItem(self->var_value_cells[i_idx], 0,
Py_None);
00773 //See the Stack gc implementation for why we change it to 2 and
not 0.
00774 self->var_computed[i_idx] = 2;
00775 if (err) goto fail;
00776 }
00777 }
00778
00779 return 0;
00780 pyfail:
00781 Py_DECREF(rval);
00782 fail:
00783 set_position_of_error(self, owner_idx);
00784 return err;
00785 }
00786
00787 static PyObject *
00788 CLazyLinker_call(PyObject *_self, PyObject *args, PyObject *kwds)
00789 {
00790 CLazyLinker * self = (CLazyLinker*)_self;
00791 static char *kwlist[] = {
00792 (char*)"time_thunks",
00793 (char *)"n_calls",
00794 NULL};
00795 int n_calls=1;
00796 if (! PyArg_ParseTupleAndKeywords(args, kwds, "|ii", kwlist,
00797 &self->do_timing,
00798 &n_calls))
00799 return NULL;
00800 int err = 0;
00801 self->position_of_error = -1;
00802 // create constants used to fill the var_compute_cells
00803 PyObject * one = PyInt_FromLong(1);
00804 PyObject * zero = PyInt_FromLong(0);
00805
00806 // pre-allocate our return value
00807 Py_INCREF(Py_None);
00808 PyObject * rval = Py_None;
00809 //clear storage of pre_call_clear elements
00810 for (int call_i = 0; call_i < n_calls && (!err); ++call_i)
00811 {
00812 Py_ssize_t n_pre_call_clear =
PyList_Size(self->pre_call_clear);
00813 assert(PyList_Check(self->pre_call_clear));
00814 for (int i = 0; i < n_pre_call_clear; ++i)
00815 {
00816 PyObject * el_i = PyList_GetItem(self->pre_call_clear,
i);
00817 Py_INCREF(Py_None);
00818 PyList_SetItem(el_i, 0, Py_None);
00819 }
00820 //clear the computed flag out of all non-input vars
00821 for (int i = 0; i < self->n_vars; ++i)
00822 {
00823 self->var_computed[i] = !self->var_has_owner[i];
00824 if (self->var_computed[i])
00825 {
00826 Py_INCREF(one);
00827 PyList_SetItem(self->var_computed_cells[i], 0, one);
00828 }
00829 else
00830 {
00831 Py_INCREF(zero);
00832 PyList_SetItem(self->var_computed_cells[i], 0, zero);
00833 }
00834 }
00835
00836 for (int i = 0; i < self->n_output_vars && (!err); ++i)
00837 {
00838 err = lazy_rec_eval(self, self->output_vars[i], one,
zero);
00839 }
00840
00841 if (!err)
00842 {
00843 // save references to outputs prior to updating storage
containers
00844 assert (self->n_output_vars >= self->n_updates);
00845 Py_DECREF(rval);
00846 rval = PyList_New(self->n_output_vars);
00847 for (int i = 0; i < (self->n_output_vars); ++i)
00848 {
00849 Py_ssize_t src = self->output_vars[i];
00850 PyObject * item =
PyList_GetItem(self->var_value_cells[src], 0);
00851 if (self->var_computed[src] != 1)
00852 {
00853 err = 1;
00854 PyErr_Format(PyExc_AssertionError,
00855 "The compute map of output %d
should contain "
00856 "1 at the end of execution, not
%d.",
00857 i, self->var_computed[src]);
00858 break;
00859 }
00860 Py_INCREF(item);
00861 PyList_SetItem(rval, i, item);
00862 }
00863 }
00864
00865 if (!err)
00866 {
00867 // Update the inputs that have an update rule
00868 for (int i = 0; i < self->n_updates; ++i)
00869 {
00870 PyObject* tmp = PyList_GetItem(rval,
self->n_output_vars - self->n_updates + i);
00871 Py_INCREF(tmp);
00872 Py_ssize_t dst = self->update_storage[i];
00873 PyList_SetItem(self->var_value_cells[dst], 0, tmp);
00874 }
00875 }
00876 }
00877
00878 /*
00879 Clear everything that is left and not an output. This is
needed
00880 for lazy evaluation since the current GC algo is too
conservative
00881 with lazy graphs.
00882 */
00883 if (self->allow_gc && !err)
00884 {
00885 for (Py_ssize_t i = 0; i < self->n_vars; ++i)
00886 {
00887 int do_cleanup = 1;
00888 if (!self->var_has_owner[i] || !self->var_computed[i])
00889 continue;
00890 for (int j = 0; j < self->n_output_vars; ++j)
00891 {
00892 if (i == self->output_vars[j])
00893 {
00894 do_cleanup = 0;
00895 break;
00896 }
00897 }
00898 if (!do_cleanup)
00899 continue;
00900 Py_INCREF(Py_None);
00901 PyList_SetItem(self->var_value_cells[i], 0, Py_None);
00902 }
00903 }
00904 Py_DECREF(one);
00905 Py_DECREF(zero);
00906 if (err)
00907 {
00908 Py_DECREF(rval);
00909 return NULL;
00910 }
00911 return rval;
00912 }
00913
00914 #if 0
00915 static PyMethodDef CLazyLinker_methods[] = {
00916 {
00917 //"name", (PyCFunction)CLazyLinker_accept, METH_VARARGS,
"Return the name, combining the first and last name"
00918 },
00919 {NULL} /* Sentinel */
00920 };
00921 #endif
00922
00923
00924 static PyObject *
00925 CLazyLinker_get_allow_gc(CLazyLinker *self, void *closure)
00926 {
00927 return PyBool_FromLong(self->allow_gc);
00928 }
00929
00930 static int
00931 CLazyLinker_set_allow_gc(CLazyLinker *self, PyObject *value, void
*closure)
00932 {
00933 if(!PyBool_Check(value))
00934 return -1;
00935
00936 if (value == Py_True)
00937 self->allow_gc = true;
00938 else
00939 self->allow_gc = false;
00940 return 0;
00941 }
00942
00943 static PyGetSetDef CLazyLinker_getset[] = {
00944 {(char*)"allow_gc",
00945 (getter)CLazyLinker_get_allow_gc,
00946 (setter)CLazyLinker_set_allow_gc,
00947 (char*)"do this function support allow_gc",
00948 NULL},
00949 {NULL, NULL, NULL, NULL} /* Sentinel */
00950 };
00951 static PyMemberDef CLazyLinker_members[] = {
00952 {(char*)"nodes", T_OBJECT_EX, offsetof(CLazyLinker, nodes), 0,
00953 (char*)"list of nodes"},
00954 {(char*)"thunks", T_OBJECT_EX, offsetof(CLazyLinker, thunks),
0,
00955 (char*)"list of thunks in program"},
00956 {(char*)"call_counts", T_OBJECT_EX, offsetof(CLazyLinker,
call_counts), 0,
00957 (char*)"number of calls of each thunk"},
00958 {(char*)"call_times", T_OBJECT_EX, offsetof(CLazyLinker,
call_times), 0,
00959 (char*)"total runtime in each thunk"},
00960 {(char*)"position_of_error", T_INT, offsetof(CLazyLinker,
position_of_error), 0,
00961 (char*)"position of failed thunk"},
00962 {(char*)"time_thunks", T_INT, offsetof(CLazyLinker,
do_timing), 0,
00963 (char*)"bool: nonzero means call will time thunks"},
00964 {(char*)"need_update_inputs", T_INT, offsetof(CLazyLinker,
need_update_inputs), 0,
00965 (char*)"bool: nonzero means Function.__call__ must implement
update mechanism"},
00966 {NULL} /* Sentinel */
00967 };
00968
00969 static PyTypeObject lazylinker_ext_CLazyLinkerType = {
00970 #if defined(NPY_PY3K)
00971 PyVarObject_HEAD_INIT(NULL, 0)
00972 #else
00973 PyObject_HEAD_INIT(NULL)
00974 0, /*ob_size*/
00975 #endif
00976 "lazylinker_ext.CLazyLinker", /*tp_name*/
00977 sizeof(CLazyLinker), /*tp_basicsize*/
00978 0, /*tp_itemsize*/
00979 CLazyLinker_dealloc, /*tp_dealloc*/
00980 0, /*tp_print*/
00981 0, /*tp_getattr*/
00982 0, /*tp_setattr*/
00983 0, /*tp_compare*/
00984 0, /*tp_repr*/
00985 0, /*tp_as_number*/
00986 0, /*tp_as_sequence*/
00987 0, /*tp_as_mapping*/
00988 0, /*tp_hash */
00989 CLazyLinker_call, /*tp_call*/
00990 0, /*tp_str*/
00991 0, /*tp_getattro*/
00992 0, /*tp_setattro*/
00993 0, /*tp_as_buffer*/
00994 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
00995 "CLazyLinker object", /* tp_doc */
00996 0, /* tp_traverse */
00997 0, /* tp_clear */
00998 0, /* tp_richcompare */
00999 0, /* tp_weaklistoffset */
01000 0, /* tp_iter */
01001 0, /* tp_iternext */
01002 0,//CLazyLinker_methods, /* tp_methods */
01003 CLazyLinker_members, /* tp_members */
01004 CLazyLinker_getset, /* tp_getset */
01005 0, /* tp_base */
01006 0, /* tp_dict */
01007 0, /* tp_descr_get */
01008 0, /* tp_descr_set */
01009 0, /* tp_dictoffset */
01010 (initproc)CLazyLinker_init,/* tp_init */
01011 0, /* tp_alloc */
01012 CLazyLinker_new, /* tp_new */
01013 };
01014
01015 static PyObject * get_version(PyObject *dummy, PyObject *args)
01016 {
01017 PyObject *result = PyFloat_FromDouble(0.21);
01018 return result;
01019 }
01020
01021 static PyMethodDef lazylinker_ext_methods[] = {
01022 {"get_version", get_version, METH_VARARGS, "Get extension
version."},
01023 {NULL, NULL, 0, NULL} /* Sentinel */
01024 };
01025
01026 #if defined(NPY_PY3K)
01027 static struct PyModuleDef moduledef = {
01028 PyModuleDef_HEAD_INIT,
01029 "lazylinker_ext",
01030 NULL,
01031 -1,
01032 lazylinker_ext_methods,
01033 NULL,
01034 NULL,
01035 NULL,
01036 NULL
01037 };
01038 #endif
01039 #if defined(NPY_PY3K)
01040 #define RETVAL m
01041 PyMODINIT_FUNC
01042 PyInit_lazylinker_ext(void) {
01043 #else
01044 #define RETVAL
01045 PyMODINIT_FUNC
01046 initlazylinker_ext(void)
01047 {
01048 #endif
01049 PyObject* m;
01050
01051 lazylinker_ext_CLazyLinkerType.tp_new = PyType_GenericNew;
01052 if (PyType_Ready(&lazylinker_ext_CLazyLinkerType) < 0)
01053 return RETVAL;
01054 #if defined(NPY_PY3K)
01055 m = PyModule_Create(&moduledef);
01056 #else
01057 m = Py_InitModule3("lazylinker_ext", lazylinker_ext_methods,
01058 "Example module that creates an extension
type.");
01059 #endif
01060 Py_INCREF(&lazylinker_ext_CLazyLinkerType);
01061 PyModule_AddObject(m, "CLazyLinker", (PyObject
*)&lazylinker_ext_CLazyLinkerType);
01062
01063 return RETVAL;
01064 }
01065
===============================
Problem occurred during compilation with the command line below:
/usr/bin/g++ -shared -g -march=core2 -mcx16 -msahf -maes -mpclmul -mpopcnt
-mavx --param l1-cache-size=32 --param l1-cache-line-size=64 --param
l2-cache-size=20480 -mtune=generic
-DNPY_NO_DEPRECATED_API=NPY_1_7_API_VERSION -m64 -fPIC
-I/usr/local/lib/python3.5/site-packages/numpy/core/include
-I/usr/local/include/python3.5m
-I/usr/local/lib/python3.5/site-packages/theano/gof -fvisibility=hidden -o
/home/malab/.theano/compiledir_Linux-2.6-el6.x86_64-x86_64-with-centos-6.8-Final-x86_64-3.5.0-64/lazylinker_ext/lazylinker_ext.so
/home/malab/.theano/compiledir_Linux-2.6-el6.x86_64-x86_64-with-centos-6.8-Final-x86_64-3.5.0-64/lazylinker_ext/mod.cpp
-L/usr/local/lib -lpython3.5m
/usr/bin/ld: /usr/local/lib/libpython3.5m.a(abstract.o): relocation
R_X86_64_32S against `_PyObject_NextNotImplemented' can not be used when
making a shared object; recompile with -fPIC
/usr/local/lib/libpython3.5m.a: could not read symbols: Bad value
collect2: ld returned 1 exit status
Traceback (most recent call last):
File "/usr/local/lib/python3.5/site-packages/theano/gof/lazylinker_c.py",
line 74, in <module>
raise ImportError()
ImportError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.5/site-packages/theano/gof/lazylinker_c.py",
line 91, in <module>
raise ImportError()
ImportError
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "/usr/local/lib/python3.5/site-packages/theano/__init__.py", line
63, in <module>
from theano.compile import (
File "/usr/local/lib/python3.5/site-packages/theano/compile/__init__.py",
line 9, in <module>
from theano.compile.function_module import *
File
"/usr/local/lib/python3.5/site-packages/theano/compile/function_module.py",
line 22, in <module>
import theano.compile.mode
File "/usr/local/lib/python3.5/site-packages/theano/compile/mode.py",
line 12, in <module>
import theano.gof.vm
File "/usr/local/lib/python3.5/site-packages/theano/gof/vm.py", line 638,
in <module>
from . import lazylinker_c
File "/usr/local/lib/python3.5/site-packages/theano/gof/lazylinker_c.py",
line 126, in <module>
preargs=args)
File "/usr/local/lib/python3.5/site-packages/theano/gof/cmodule.py", line
2204, in compile_str
(status, compile_stderr.replace('\n', '. ')))
Exception: Compilation failed (return status=1): /usr/bin/ld:
/usr/local/lib/libpython3.5m.a(abstract.o): relocation R_X86_64_32S against
`_PyObject_NextNotImplemented' can not be used when making a shared object;
recompile with -fPIC. /usr/local/lib/libpython3.5m.a: could not read
symbols: Bad value. collect2: ld returned 1 exit status.
--
---
You received this message because you are subscribed to the Google Groups
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.