Changeset: 998b958a8f23 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=998b958a8f23
Added Files:
sql/backends/monet5/Tests/pyapi00.sql
sql/backends/monet5/Tests/pyapi01.sql
sql/backends/monet5/Tests/pyapi02.sql
Modified Files:
monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC
monetdb5/extras/pyapi/pyapi.c
sql/backends/monet5/Tests/All
sql/backends/monet5/sql_gencode.c
sql/include/sql_catalog.h
sql/server/rel_psm.c
sql/server/sql_parser.y
Branch: pyapi
Log Message:
Added the Pyapi to the SQL front and added some testcases for it [WIP].
diffs (truncated from 750 to 300 lines):
diff --git a/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC
b/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC
--- a/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC
+++ b/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC
@@ -1,3 +1,17 @@
+
+
+a:= bat.new(:oid,:int);
+bat.append(a,1:int);
+bat.append(a,2:int);
+bat.append(a,3:int);
+bat.append(a,4:int);
+b:= bat.new(:oid,:int);
+bat.append(b,4:int);
+bat.append(b,3:int);
+bat.append(b,2:int);
+bat.append(b,1:int);
+result:bat[:oid,:int] := pyapi.eval(nil:ptr, "return(arg1*sum(arg2)*arg3)", a,
b, 2);
+io.print(result);
# bits with null value
bbit:= bat.new(:oid,:bit);
diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -124,6 +124,7 @@ static int pyapiInitialized = FALSE;
BATsetcount(bat, count); }
+//this code is not necessary
#define NP_TO_BAT_MULTI(bat, mtpe, nptpe, npyconversion) {
\
count = PyArray_DIMS((PyArrayObject*)pResult)[1]; \
pCol = (PyArrayObject*)PyArray_ZEROS(1, (npy_intp[1]) { count },
nptpe, false); \
@@ -197,6 +198,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
pyapi_enableflag);
}
+
pycalllen = strlen(exprStr) + sizeof(argnames) + 1000;
expr_ind_len = strlen(exprStr) + 1000;
@@ -209,6 +211,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
// TODO: free args and rcall
}
+
// first argument after the return contains the pointer to the sql_func
structure
if (sqlfun != NULL && sqlfun->ops->cnt > 0) {
int carg = pci->retc + 2;
@@ -240,12 +243,12 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
// for each input column (BAT):
for (i = pci->retc + 2; i < pci->argc; i++) {
PyObject *vararray = NULL;
- // null mask for masked array
-
// turn scalars into one-valued BATs
// TODO: also do this for Python? Or should scalar values be 'simple'
variables?
- if (!isaBatType(getArgType(mb,pci,i))) {
- b = BATnew(TYPE_void, getArgType(mb, pci, i), 0, TRANSIENT);
+ if (!isaBatType(getArgType(mb,pci,i)))
+ {
+ //the argument is a scalar, check which scalar type it is
+ /*b = BATnew(TYPE_void, getArgType(mb, pci, i), 0, TRANSIENT);
if (b == NULL) {
msg = createException(MAL, "pyapi.eval", MAL_MALLOC_FAIL);
goto wrapup;
@@ -253,7 +256,9 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
if ( getArgType(mb,pci,i) == TYPE_str)
BUNappend(b, *getArgReference_str(stk, pci, i), FALSE);
else
+ {
BUNappend(b, getArgReference(stk, pci, i), FALSE);
+ }
BATsetcount(b, 1);
BATseqbase(b, 0);
BATsettrivprop(b);
@@ -262,195 +267,251 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
if (b == NULL) {
msg = createException(MAL, "pyapi.eval", MAL_MALLOC_FAIL);
goto wrapup;
+ }*/
+ switch(getArgType(mb,pci,i))
+ {
+ case TYPE_bte:
+ vararray =
PyInt_FromLong((long)(*(bte*)getArgReference(stk, pci, i)));
+ break;
+ case TYPE_sht:
+ vararray =
PyInt_FromLong((long)(*(sht*)getArgReference(stk, pci, i)));
+ break;
+ case TYPE_int:
+ vararray =
PyInt_FromLong((long)(*(int*)getArgReference(stk, pci, i)));
+ break;
+ case TYPE_lng:
+ vararray =
PyLong_FromLong((long)(*(lng*)getArgReference(stk, pci, i)));
+ break;
+ case TYPE_flt:
+ vararray =
PyFloat_FromDouble((double)(*(flt*)getArgReference(stk, pci, i)));
+ break;
+ case TYPE_dbl:
+ vararray =
PyFloat_FromDouble((double)(*(dbl*)getArgReference(stk, pci, i)));
+ break;
+ case TYPE_hge:
+ {
+ char hex[40];
+ const hge *t = (const hge *) getArgReference(stk, pci,
i);
+ snprintf_huge(hex, 40, *t);
+ //then we create a PyLong from that string by parsing
it
+ vararray = PyLong_FromString(hex, NULL, 16);
+ }
+ break;
+ case TYPE_str:
+ vararray =
PyString_FromString((char*)getArgReference_str(stk, pci, i));
+ break;
+ default:
+ msg = createException(MAL, "pyapi.eval", "Unsupported
scalar type.");
+ goto wrapup;
}
+ if (vararray == NULL)
+ {
+ msg = createException(MAL, "pyapi.eval", "Something went wrong
converting the MonetDB scalar to a Python scalar.");
+ goto wrapup;
+ }
+ PyTuple_SetItem(pArgs, ai++, vararray);
}
- switch (ATOMstorage(getColumnType(getArgType(mb,pci,i)))) {
- case TYPE_bte:
- vararray = BAT_TO_NP(b, bte, NPY_INT8);
- break;
- case TYPE_sht:
- vararray = BAT_TO_NP(b, sht, NPY_INT16);
- break;
- case TYPE_int:
- vararray = BAT_TO_NP(b, int, NPY_INT32);
- break;
- case TYPE_lng:
- vararray = BAT_TO_NP(b, lng, NPY_INT64);
- break;
- case TYPE_flt:
- vararray = BAT_TO_NP(b, flt, NPY_FLOAT32);
- break;
- case TYPE_dbl:
- vararray = BAT_TO_NP(b, dbl, NPY_FLOAT64);
- break;
- case TYPE_str:
- li = bat_iterator(b);
+ else
+ {
+ b = BATdescriptor(*getArgReference_bat(stk, pci, i));
+ if (b == NULL) {
+ msg = createException(MAL, "pyapi.eval", MAL_MALLOC_FAIL);
+ goto wrapup;
+ }
+ //the argument is a BAT, convert it to a numpy array
+ switch (ATOMstorage(getColumnType(getArgType(mb,pci,i)))) {
+ case TYPE_bte:
+ vararray = BAT_TO_NP(b, bte, NPY_INT8);
+ break;
+ case TYPE_sht:
+ vararray = BAT_TO_NP(b, sht, NPY_INT16);
+ break;
+ case TYPE_int:
+ vararray = BAT_TO_NP(b, int, NPY_INT32);
+ break;
+ case TYPE_lng:
+ vararray = BAT_TO_NP(b, lng, NPY_INT64);
+ break;
+ case TYPE_flt:
+ vararray = BAT_TO_NP(b, flt, NPY_FLOAT32);
+ break;
+ case TYPE_dbl:
+ vararray = BAT_TO_NP(b, dbl, NPY_FLOAT64);
+ break;
+ case TYPE_str:
+ li = bat_iterator(b);
- //we first loop over all the strings in the BAT to find the
maximum length of a single string
- //this is because NUMPY only supports strings with a fixed maximum
length
- maxsize = 0;
- count = BATcount(b);
- BATloop(b, p, q)
- {
- const char *t = (const char *) BUNtail(li, p);
- const size_t length = (const size_t) strlen(t);
+ //we first loop over all the strings in the BAT to find the
maximum length of a single string
+ //this is because NUMPY only supports strings with a fixed
maximum length
+ maxsize = 0;
+ count = BATcount(b);
+ BATloop(b, p, q)
+ {
+ const char *t = (const char *) BUNtail(li, p);
+ const size_t length = (const size_t) strlen(t);
- if (strlen(t) > maxsize)
- maxsize = length;
+ if (strlen(t) > maxsize)
+ maxsize = length;
+ }
+
+ //create a NPY_UNICODE array object
+ vararray = PyArray_New(
+ &PyArray_Type,
+ 1,
+ (npy_intp[1]) {count},
+ NPY_UNICODE,
+ NULL,
+ NULL,
+ maxsize * 4, //we have to do maxsize*4 because
NPY_UNICODE is stored as UNICODE-32 (i.e. 4 bytes per character)
+ 0,
+ NULL);
+
+ //fill the NPY_UNICODE array object using the PyArray_SETITEM
function
+ j = 0;
+ BATloop(b, p, q)
+ {
+ const char *t = (const char *) BUNtail(li, p);
+ PyObject *obj;
+ if (strcmp(t, str_nil) == 0)
+ {
+ //str_nil isn't a valid UTF-8 character (it's 0x80),
so we need to decode it as Latin1
+ obj = PyUnicode_DecodeLatin1(t, strlen(t), "strict");
+ }
+ else
+ {
+ obj = PyUnicode_DecodeUTF8(t, strlen(t), "strict");
+ }
+ if (obj == NULL)
+ {
+ PyErr_Print();
+ msg = createException(MAL, "pyapi.eval", "Failed to
decode string as UTF-8.");
+ goto wrapup;
+ }
+ PyArray_SETITEM((PyArrayObject*)vararray,
PyArray_GETPTR1((PyArrayObject*)vararray, j), obj);
+ j++;
+ }
+ break;
+ case TYPE_hge:
+ li = bat_iterator(b);
+ count = BATcount(b);
+
+ //create a NPY_OBJECT array to hold the huge type
+ vararray = PyArray_New(
+ &PyArray_Type,
+ 1,
+ (npy_intp[1]) {count},
+ NPY_OBJECT,
+ NULL,
+ NULL,
+ 128, //128 bits per value
+ 0,
+ NULL);
+
+ j = 0;
+ printf("!WARNING: Type \"hge\" (128 bit) is unsupported by
Numpy. The numbers are instead converted to python objects of type \"long\".
This is likely very slow.\n");
+ BATloop(b, p, q)
+ {
+ //we first convert the huge to a string in hex format
+ char hex[40];
+ PyObject *obj;
+ const hge *t = (const hge *) BUNtail(li, p);
+ snprintf_huge(hex, 40, *t);
+ //then we create a PyLong from that string by parsing it
+ obj = PyLong_FromString(hex, NULL, 16);
+ if (obj == NULL)
+ {
+ PyErr_Print();
+ msg = createException(MAL, "pyapi.eval", "Failed to
convert huge array.");
+ goto wrapup;
+ }
+ PyArray_SETITEM((PyArrayObject*)vararray,
PyArray_GETPTR1((PyArrayObject*)vararray, j), obj);
+ j++;
+ }
+ break;
+
+ /*
+ //Convert huge to double, this might be preferrable so I'll
leave this code here.//
+ li = bat_iterator(b);
+ count = BATcount(b);
+ vararray = PyArray_New(
+ &PyArray_Type,
+ 1,
+ (npy_intp[1]) {count},
+ NPY_DOUBLE,
+ NULL,
+ NULL,
+ 0,
+ 0,
+ NULL);
+ j = 0;
+ BATloop(b, p, q)
+ {
+
+ const hge *t = (const hge *) BUNtail(li, p);
+ PyObject *obj = PyFloat_FromDouble((double) *t);
+ if (obj == NULL)
+ {
+ PyErr_Print();
+ msg = createException(MAL, "pyapi.eval", "Failed to
convert huge array.");
+ goto wrapup;
+ }
+ PyArray_SETITEM((PyArrayObject*)vararray,
PyArray_GETPTR1((PyArrayObject*)vararray, j), obj);
+ j++;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list