Changeset: 9694d2d8e907 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=9694d2d8e907
Modified Files:
        monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
        monetdb5/extras/pyapi/formatinput.c
        monetdb5/extras/pyapi/formatinput.h
        monetdb5/extras/pyapi/pyapi.c
        sql/backends/monet5/Tests/pyapi08.sql
        sql/backends/monet5/Tests/pyapi14.sql
        sql/backends/monet5/Tests/pyapi14.stable.err
        sql/backends/monet5/Tests/pyapi14.stable.out
Branch: pyapi
Log Message:

We don't need to rewrite the Marshal module.


diffs (truncated from 705 to 300 lines):

diff --git a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py 
b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
--- a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
+++ b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
@@ -1,43 +1,8 @@
 
 
-def consts_to_string(consts):
-    result = ""
-    for const in consts:
-        if not str(type(const).__name__) == "code":
-            result = result + '(' + str(type(const).__name__) + ':' + 
str(const) + ')'
-        else:
-            result = result + '(code:{@' + code_object_to_string(const) + '})'
-    return result
-
-def names_to_string(names):
-    result = ""
-    for name in names:
-        result = result + name + ','
-    return result
-
-def format_code(code):
-    result = "";
-    for i in code:
-        if ord(i) < 16: result = result + '\\\\x0' + hex(ord(i))[2:]
-        else: result = result + '\\\\x' + hex(ord(i))[2:]
-    return result
-
 def code_object_to_string(codeobject):
-    args = codeobject.co_argcount
-    nlocals = codeobject.co_nlocals
-    stacksize = codeobject.co_stacksize
-    flags = codeobject.co_flags
-    code = format_code(codeobject.co_code)
-    consts = codeobject.co_consts
-    names = codeobject.co_names
-    varnames = codeobject.co_varnames
-    freevars = codeobject.co_freevars
-    cellvars = codeobject.co_cellvars
-    filename = codeobject.co_filename
-    name = codeobject.co_name
-    firstlineno = codeobject.co_firstlineno
-    lnotab = format_code(codeobject.co_lnotab)
-    return str(args) + '@' + str(nlocals) + '@' + str(stacksize) + '@' + 
str(flags) + '@' + code + '@' + consts_to_string(consts) + '@' + 
names_to_string(names) + '@' + names_to_string(varnames) + '@' + 
names_to_string(freevars) + '@' + names_to_string(cellvars) + '@' + filename + 
'@' + name + '@' + str(firstlineno) + '@' + lnotab + '@'
+    import marshal, string
+    return '@' + "".join('\\x' + x.encode('hex') for x in 
marshal.dumps(codeobject))
 
 def function_to_string(fun):
     return code_object_to_string(fun.__code__)
diff --git a/monetdb5/extras/pyapi/formatinput.c 
b/monetdb5/extras/pyapi/formatinput.c
--- a/monetdb5/extras/pyapi/formatinput.c
+++ b/monetdb5/extras/pyapi/formatinput.c
@@ -13,213 +13,51 @@
 const size_t additional_argcount = 3;
 const char * additional_args[] = {"_columns", "_column_types", "_conn"};
 
-//! Parse a PyCodeObject from a string, the string is expected to be in the 
format {@<encoded_function>};, where <encoded_function> is all the PyCodeObject 
properties in order
+//! Parse a PyCodeObject from a string, the string is expected to be in the 
format {@<encoded_function>};, where <encoded_function> is the Marshalled code 
object
 PyObject *PyCodeObject_ParseString(char *string, char **msg);
-
-char* GetArg(char *string, char *storage, int index);
-char* GetArg(char *string, char *storage, int index)
-{
-    int i, j = 0, k = 0;
-    int brackets = 0;
-    int length = strlen(string);
-    for(i = 2; i < length; i++) {
-        if (string[i] == '(') brackets++;
-        else if (string[i] == ')') brackets--;
-        if (brackets == 0 && string[i] == '@') {
-            j++;
-            if (j > index) {
-                storage[k] = '\0';
-                break;
-            }
-        }
-        else if (j == index) {
-            storage[k++] = string[i];
-        }
-    }
-    return storage;
-}
-
-size_t ModifyCode(char *string, char *storage);
-size_t ModifyCode(char *string, char *storage)
-{
-    int i = 0, j = 0, token = 0;
-    int length = strlen(string);
-    char tokens[3]; tokens[2] = '\0';
-    for(i = 0; i < length; i++) {
-        if (string[i] == '\\') {
-            token = 1;
-        } else if (token == 1) {
-            token++;
-            if (string[i] != 'x') token = 0;
-        } else if (token > 1 && token < 4) {
-            tokens[token - 2] = string[i];
-            token++;
-            if (token == 4) {
-                storage[j++] = strtol(tokens, NULL, 16);
-            }
-        } else {
-            storage[j++] = string[i];
-        }
-    }
-    storage[j] = '\0';
-    return j;
-}
-
-PyObject *GetConstantObject(char *string, char *storage, char **msg);
-PyObject *GetConstantObject(char *string, char *storage, char **msg)
-{
-    int numbers = 0, i = 0, j = 0, k = 0;
-    int length = strlen(string);
-    PyObject *result;
-    bool type = false;
-    char type_str[100];
-    int brackets = 0;
-    //first get the amount of constant objects
-    for(i = 0; i < length; i++) {
-        if (string[i] == '(') brackets++;
-        if (string[i] == ')') {
-            brackets--;
-            if (brackets == 0) numbers++;
-        }
-    }
-    if (brackets != 0) {
-        //invalid number of brackets
-        *msg = createException(MAL, "pyapi.eval", "Invalid number of brackets 
in encoded Python code object string.");
-        return NULL;
-    }
-
-    //then create the python tuple and fill it with the actual python objects
-    result = PyTuple_New(numbers);
-    for(i = 0; i < length; i++) {
-        if (string[i] == '(') brackets++;
-        if (string[i] == ')') {
-            brackets--;
-            if (brackets == 0) {
-                PyObject *object = NULL;
-                storage[k] = '\0';
-                //parse object type
-                if (strcmp(type_str, "NoneType") == 0) {
-                    object = Py_None;
-                } else if (strcmp(type_str, "int") == 0) {
-                    object = PyInt_FromString(storage, NULL, 0);
-                } else if (strcmp(type_str, "long") == 0) {
-                    object = PyLong_FromString(storage, NULL, 0);
-                } else if (strcmp(type_str, "float") == 0) {
-                    dbl d;
-                    str_to_dbl(storage, strlen(storage), &d);
-                    object = PyFloat_FromDouble(d);
-                } else if (strcmp(type_str, "str") == 0) {
-                    object = PyString_FromString(storage);
-                } else if (strcmp(type_str, "unicode") == 0) {
-                    object = PyUnicode_FromString(storage);
-                } else if (strcmp(type_str, "code") == 0) {
-                    //recursive call, we've got a function within this 
function, so we have to parse another code object
-                    object = PyCodeObject_ParseString(storage, msg);
-                } else {
-                    *msg = createException(MAL, "pyapi.eval", "Unrecognized 
constant type %s in encoded Python code object string.", type_str);
-                    return NULL;
-                }
-                PyTuple_SetItem(result, j, object);
-
-                type = false;
-                j++;
-                k = 0;    
-                continue;            
-            }
-        } 
-
-        if (string[i] == ':' && !type) {
-            type = true;
-            type_str[k] = '\0';
-            k = 0;
-        } else {
-            if (type) {
-                storage[k++] = string[i];
-            } else {
-                if (string[i] != '(') {
-                    type_str[k++] = string[i];
-                }
-            }
-        }
-    }
-    return result;
-}
-
-PyObject* GetStringTuple(char *string, char *storage);
-PyObject* GetStringTuple(char *string, char *storage)
-{
-    int numbers = 0, i = 0, j = 0, k = 0;
-    int length = strlen(string);
-    PyObject *result;
-    //first get the amount of entries in the string by looking at the amount 
of commas
-    for(i = 0; i < length; i++) {
-        if (string[i] == ',') {
-            numbers++;
-        }
-    }
-    //then create the pytuple and actually create every PyStringObject
-    result = PyTuple_New(numbers); 
-    for(i = 0; i < length; i++) {
-        if (string[i] == ',' || i == length - 1) {
-            PyObject *obj;
-            storage[k] = '\0';
-            obj = PyString_FromString(storage);
-            PyTuple_SetItem(result ,j, obj);
-            j++;
-            k = 0;
-        } else {
-            storage[k++] = string[i];
-        }
-    }
-    return result;
-}
-
 PyObject *PyCodeObject_ParseString(char *string, char **msg)
 {
-    int argcount, nlocals, stacksize, flags, firstlineno;
-    PyObject *code, *name, *filename, *lnotab;
-    PyObject *consts, *names, *varnames, *freevars, *cellvars;
-    size_t size;
-    char *temp_string = GDKmalloc(strlen(string));
-    char *temp_string2 = GDKmalloc(strlen(string));
-    if (temp_string == NULL || temp_string2 == NULL) {
+    size_t length = strlen(string);
+    PyObject *code_object, *tuple, *mystr;
+    char *code_copy = GDKmalloc(length * sizeof(char));
+    char hex[3]; 
+    size_t i, j;
+    hex[2] = '\0';
+    if (code_copy == NULL) {
         *msg = createException(MAL, "pyapi.eval", MAL_MALLOC_FAIL);
         return NULL;
     }
-
-    //argcount is a single int
-    argcount = atoi(GetArg(string, temp_string, 0));
-    //nlocals is a single int
-    nlocals = atoi(GetArg(string, temp_string, 1));
-    //stacksize is a single int
-    stacksize = atoi(GetArg(string, temp_string, 2));
-    //flags is a single int
-    flags = atoi(GetArg(string, temp_string, 3));
-    //now parse the code, the code has tokens like "\\x01", which have to be 
changed to a single character literal '\x01', this is done in the ModifyCode 
function
-    size = ModifyCode(GetArg(string, temp_string, 4), temp_string2);
-    code = PyString_FromStringAndSize(temp_string2, size);
-    //now parse the constants, constants are a list of python objects in the 
form of (type:value) (ex: (int:20)(int:33)(str:hello))
-    consts = GetConstantObject(GetArg(string, temp_string, 5), temp_string2, 
msg);
-    //now parse the names, this is a list of strings delimited by commas (ex: 
name,name2,)
-    names = GetStringTuple(GetArg(string, temp_string, 6), temp_string2);
-    //now parse the varnames, same as above
-    varnames = GetStringTuple(GetArg(string, temp_string, 7), temp_string2);
-    //now parse the freevars, same as above
-    freevars = GetStringTuple(GetArg(string, temp_string, 8), temp_string2);
-    //now parse the cellvars, same as above
-    cellvars = GetStringTuple(GetArg(string, temp_string, 9), temp_string2);
-    //now parse the filename, a single string
-    filename = PyString_FromString(GetArg(string, temp_string, 10));
-    //now parse the function name, a single string
-    name = PyString_FromString(GetArg(string, temp_string, 11));
-    //now parse the line number, a single int
-    firstlineno = atoi(GetArg(string, temp_string, 12));
-    //now parse lnotab, this is a mapping from <addr> -> <lineno> for error 
reporting purposes, it also has tokens like "\\x01"
-    size = ModifyCode(GetArg(string, temp_string, 13), temp_string2);
-    lnotab = PyString_FromStringAndSize(temp_string2, size);
-
-    GDKfree(temp_string); GDKfree(temp_string2);
-    return (PyObject*)PyCode_New(argcount, nlocals, stacksize, flags, code, 
consts, names, varnames, freevars, cellvars, filename, name, firstlineno, 
lnotab);
+    // decode hex codes (e.g. \x00) in the string to the actual numeric 
representation
+    for(i = 2, j = 0; i < length - 2; i++) {
+        if (string[i] == '\\' && string[i + 1] == '\\') i++;
+        if (string[i] == '\\' && string[i + 1] == 't') {
+            code_copy[j++] = '\t';
+            i++;
+        } else if (string[i] == '\\' && string[i + 1] == 'n') {
+            code_copy[j++] = '\n';
+            i++;
+        } else if (string[i] == '\\' && string[i + 1] == 'x') {
+            hex[0] = string[i + 2]; hex[1] = string[i + 3];
+            code_copy[j++] = (char)strtol(hex, NULL, 16);
+            i += 3;
+        } else {
+            code_copy[j++] = string[i];
+        }
+    }
+    code_copy[j] = '\0';
+    tuple = PyTuple_New(1);
+    mystr = PyString_FromStringAndSize(code_copy, j); // use FromStringAndSize 
because the string is not null-terminated
+    PyTuple_SetItem(tuple, 0, mystr);
+    code_object = PyObject_CallObject(marshal_loads, tuple);
+    Py_DECREF(tuple);
+    GDKfree(code_copy);
+    if (code_object == NULL) {
+        PyErr_Print();
+        *msg = createException(MAL, "pyapi.eval", "Failed to marshal.loads() 
encoded object");
+        return NULL;
+    }
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to