Changeset: 9694d2d8e907 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9694d2d8e907
Modified Files:
monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
monetdb5/extras/pyapi/formatinput.c
monetdb5/extras/pyapi/formatinput.h
monetdb5/extras/pyapi/pyapi.c
sql/backends/monet5/Tests/pyapi08.sql
sql/backends/monet5/Tests/pyapi14.sql
sql/backends/monet5/Tests/pyapi14.stable.err
sql/backends/monet5/Tests/pyapi14.stable.out
Branch: pyapi
Log Message:
We don't need to rewrite the Marshal module.
diffs (truncated from 705 to 300 lines):
diff --git a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
--- a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
+++ b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
@@ -1,43 +1,8 @@
-def consts_to_string(consts):
- result = ""
- for const in consts:
- if not str(type(const).__name__) == "code":
- result = result + '(' + str(type(const).__name__) + ':' +
str(const) + ')'
- else:
- result = result + '(code:{@' + code_object_to_string(const) + '})'
- return result
-
-def names_to_string(names):
- result = ""
- for name in names:
- result = result + name + ','
- return result
-
-def format_code(code):
- result = "";
- for i in code:
- if ord(i) < 16: result = result + '\\\\x0' + hex(ord(i))[2:]
- else: result = result + '\\\\x' + hex(ord(i))[2:]
- return result
-
def code_object_to_string(codeobject):
- args = codeobject.co_argcount
- nlocals = codeobject.co_nlocals
- stacksize = codeobject.co_stacksize
- flags = codeobject.co_flags
- code = format_code(codeobject.co_code)
- consts = codeobject.co_consts
- names = codeobject.co_names
- varnames = codeobject.co_varnames
- freevars = codeobject.co_freevars
- cellvars = codeobject.co_cellvars
- filename = codeobject.co_filename
- name = codeobject.co_name
- firstlineno = codeobject.co_firstlineno
- lnotab = format_code(codeobject.co_lnotab)
- return str(args) + '@' + str(nlocals) + '@' + str(stacksize) + '@' +
str(flags) + '@' + code + '@' + consts_to_string(consts) + '@' +
names_to_string(names) + '@' + names_to_string(varnames) + '@' +
names_to_string(freevars) + '@' + names_to_string(cellvars) + '@' + filename +
'@' + name + '@' + str(firstlineno) + '@' + lnotab + '@'
+ import marshal, string
+ return '@' + "".join('\\x' + x.encode('hex') for x in
marshal.dumps(codeobject))
def function_to_string(fun):
return code_object_to_string(fun.__code__)
diff --git a/monetdb5/extras/pyapi/formatinput.c
b/monetdb5/extras/pyapi/formatinput.c
--- a/monetdb5/extras/pyapi/formatinput.c
+++ b/monetdb5/extras/pyapi/formatinput.c
@@ -13,213 +13,51 @@
const size_t additional_argcount = 3;
const char * additional_args[] = {"_columns", "_column_types", "_conn"};
-//! Parse a PyCodeObject from a string, the string is expected to be in the
format {@<encoded_function>};, where <encoded_function> is all the PyCodeObject
properties in order
+//! Parse a PyCodeObject from a string, the string is expected to be in the
format {@<encoded_function>};, where <encoded_function> is the Marshalled code
object
PyObject *PyCodeObject_ParseString(char *string, char **msg);
-
-char* GetArg(char *string, char *storage, int index);
-char* GetArg(char *string, char *storage, int index)
-{
- int i, j = 0, k = 0;
- int brackets = 0;
- int length = strlen(string);
- for(i = 2; i < length; i++) {
- if (string[i] == '(') brackets++;
- else if (string[i] == ')') brackets--;
- if (brackets == 0 && string[i] == '@') {
- j++;
- if (j > index) {
- storage[k] = '\0';
- break;
- }
- }
- else if (j == index) {
- storage[k++] = string[i];
- }
- }
- return storage;
-}
-
-size_t ModifyCode(char *string, char *storage);
-size_t ModifyCode(char *string, char *storage)
-{
- int i = 0, j = 0, token = 0;
- int length = strlen(string);
- char tokens[3]; tokens[2] = '\0';
- for(i = 0; i < length; i++) {
- if (string[i] == '\\') {
- token = 1;
- } else if (token == 1) {
- token++;
- if (string[i] != 'x') token = 0;
- } else if (token > 1 && token < 4) {
- tokens[token - 2] = string[i];
- token++;
- if (token == 4) {
- storage[j++] = strtol(tokens, NULL, 16);
- }
- } else {
- storage[j++] = string[i];
- }
- }
- storage[j] = '\0';
- return j;
-}
-
-PyObject *GetConstantObject(char *string, char *storage, char **msg);
-PyObject *GetConstantObject(char *string, char *storage, char **msg)
-{
- int numbers = 0, i = 0, j = 0, k = 0;
- int length = strlen(string);
- PyObject *result;
- bool type = false;
- char type_str[100];
- int brackets = 0;
- //first get the amount of constant objects
- for(i = 0; i < length; i++) {
- if (string[i] == '(') brackets++;
- if (string[i] == ')') {
- brackets--;
- if (brackets == 0) numbers++;
- }
- }
- if (brackets != 0) {
- //invalid number of brackets
- *msg = createException(MAL, "pyapi.eval", "Invalid number of brackets
in encoded Python code object string.");
- return NULL;
- }
-
- //then create the python tuple and fill it with the actual python objects
- result = PyTuple_New(numbers);
- for(i = 0; i < length; i++) {
- if (string[i] == '(') brackets++;
- if (string[i] == ')') {
- brackets--;
- if (brackets == 0) {
- PyObject *object = NULL;
- storage[k] = '\0';
- //parse object type
- if (strcmp(type_str, "NoneType") == 0) {
- object = Py_None;
- } else if (strcmp(type_str, "int") == 0) {
- object = PyInt_FromString(storage, NULL, 0);
- } else if (strcmp(type_str, "long") == 0) {
- object = PyLong_FromString(storage, NULL, 0);
- } else if (strcmp(type_str, "float") == 0) {
- dbl d;
- str_to_dbl(storage, strlen(storage), &d);
- object = PyFloat_FromDouble(d);
- } else if (strcmp(type_str, "str") == 0) {
- object = PyString_FromString(storage);
- } else if (strcmp(type_str, "unicode") == 0) {
- object = PyUnicode_FromString(storage);
- } else if (strcmp(type_str, "code") == 0) {
- //recursive call, we've got a function within this
function, so we have to parse another code object
- object = PyCodeObject_ParseString(storage, msg);
- } else {
- *msg = createException(MAL, "pyapi.eval", "Unrecognized
constant type %s in encoded Python code object string.", type_str);
- return NULL;
- }
- PyTuple_SetItem(result, j, object);
-
- type = false;
- j++;
- k = 0;
- continue;
- }
- }
-
- if (string[i] == ':' && !type) {
- type = true;
- type_str[k] = '\0';
- k = 0;
- } else {
- if (type) {
- storage[k++] = string[i];
- } else {
- if (string[i] != '(') {
- type_str[k++] = string[i];
- }
- }
- }
- }
- return result;
-}
-
-PyObject* GetStringTuple(char *string, char *storage);
-PyObject* GetStringTuple(char *string, char *storage)
-{
- int numbers = 0, i = 0, j = 0, k = 0;
- int length = strlen(string);
- PyObject *result;
- //first get the amount of entries in the string by looking at the amount
of commas
- for(i = 0; i < length; i++) {
- if (string[i] == ',') {
- numbers++;
- }
- }
- //then create the pytuple and actually create every PyStringObject
- result = PyTuple_New(numbers);
- for(i = 0; i < length; i++) {
- if (string[i] == ',' || i == length - 1) {
- PyObject *obj;
- storage[k] = '\0';
- obj = PyString_FromString(storage);
- PyTuple_SetItem(result ,j, obj);
- j++;
- k = 0;
- } else {
- storage[k++] = string[i];
- }
- }
- return result;
-}
-
PyObject *PyCodeObject_ParseString(char *string, char **msg)
{
- int argcount, nlocals, stacksize, flags, firstlineno;
- PyObject *code, *name, *filename, *lnotab;
- PyObject *consts, *names, *varnames, *freevars, *cellvars;
- size_t size;
- char *temp_string = GDKmalloc(strlen(string));
- char *temp_string2 = GDKmalloc(strlen(string));
- if (temp_string == NULL || temp_string2 == NULL) {
+ size_t length = strlen(string);
+ PyObject *code_object, *tuple, *mystr;
+ char *code_copy = GDKmalloc(length * sizeof(char));
+ char hex[3];
+ size_t i, j;
+ hex[2] = '\0';
+ if (code_copy == NULL) {
*msg = createException(MAL, "pyapi.eval", MAL_MALLOC_FAIL);
return NULL;
}
-
- //argcount is a single int
- argcount = atoi(GetArg(string, temp_string, 0));
- //nlocals is a single int
- nlocals = atoi(GetArg(string, temp_string, 1));
- //stacksize is a single int
- stacksize = atoi(GetArg(string, temp_string, 2));
- //flags is a single int
- flags = atoi(GetArg(string, temp_string, 3));
- //now parse the code, the code has tokens like "\\x01", which have to be
changed to a single character literal '\x01', this is done in the ModifyCode
function
- size = ModifyCode(GetArg(string, temp_string, 4), temp_string2);
- code = PyString_FromStringAndSize(temp_string2, size);
- //now parse the constants, constants are a list of python objects in the
form of (type:value) (ex: (int:20)(int:33)(str:hello))
- consts = GetConstantObject(GetArg(string, temp_string, 5), temp_string2,
msg);
- //now parse the names, this is a list of strings delimited by commas (ex:
name,name2,)
- names = GetStringTuple(GetArg(string, temp_string, 6), temp_string2);
- //now parse the varnames, same as above
- varnames = GetStringTuple(GetArg(string, temp_string, 7), temp_string2);
- //now parse the freevars, same as above
- freevars = GetStringTuple(GetArg(string, temp_string, 8), temp_string2);
- //now parse the cellvars, same as above
- cellvars = GetStringTuple(GetArg(string, temp_string, 9), temp_string2);
- //now parse the filename, a single string
- filename = PyString_FromString(GetArg(string, temp_string, 10));
- //now parse the function name, a single string
- name = PyString_FromString(GetArg(string, temp_string, 11));
- //now parse the line number, a single int
- firstlineno = atoi(GetArg(string, temp_string, 12));
- //now parse lnotab, this is a mapping from <addr> -> <lineno> for error
reporting purposes, it also has tokens like "\\x01"
- size = ModifyCode(GetArg(string, temp_string, 13), temp_string2);
- lnotab = PyString_FromStringAndSize(temp_string2, size);
-
- GDKfree(temp_string); GDKfree(temp_string2);
- return (PyObject*)PyCode_New(argcount, nlocals, stacksize, flags, code,
consts, names, varnames, freevars, cellvars, filename, name, firstlineno,
lnotab);
+ // decode hex codes (e.g. \x00) in the string to the actual numeric
representation
+ for(i = 2, j = 0; i < length - 2; i++) {
+ if (string[i] == '\\' && string[i + 1] == '\\') i++;
+ if (string[i] == '\\' && string[i + 1] == 't') {
+ code_copy[j++] = '\t';
+ i++;
+ } else if (string[i] == '\\' && string[i + 1] == 'n') {
+ code_copy[j++] = '\n';
+ i++;
+ } else if (string[i] == '\\' && string[i + 1] == 'x') {
+ hex[0] = string[i + 2]; hex[1] = string[i + 3];
+ code_copy[j++] = (char)strtol(hex, NULL, 16);
+ i += 3;
+ } else {
+ code_copy[j++] = string[i];
+ }
+ }
+ code_copy[j] = '\0';
+ tuple = PyTuple_New(1);
+ mystr = PyString_FromStringAndSize(code_copy, j); // use FromStringAndSize
because the string is not null-terminated
+ PyTuple_SetItem(tuple, 0, mystr);
+ code_object = PyObject_CallObject(marshal_loads, tuple);
+ Py_DECREF(tuple);
+ GDKfree(code_copy);
+ if (code_object == NULL) {
+ PyErr_Print();
+ *msg = createException(MAL, "pyapi.eval", "Failed to marshal.loads()
encoded object");
+ return NULL;
+ }
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list