Changeset: aefe683f0b5e for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=aefe683f0b5e
Modified Files:
monetdb5/extras/pyapi/Tests/pyapi02.malC
monetdb5/extras/pyapi/pyapi.c
Branch: pyapi
Log Message:
Python API: mask array (working once, unclear why)
diffs (207 lines):
diff --git a/monetdb5/extras/pyapi/Tests/pyapi02.malC
b/monetdb5/extras/pyapi/Tests/pyapi02.malC
--- a/monetdb5/extras/pyapi/Tests/pyapi02.malC
+++ b/monetdb5/extras/pyapi/Tests/pyapi02.malC
@@ -8,16 +8,16 @@ bat.append(bbit,0:bit);
bat.append(bbit,1:bit);
bat.append(bbit,0:bit);
bat.append(bbit,nil:bit);
-rbit:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bbit);
+rbit:bat[:oid,:dbl] :=
pyapi.eval(nil:ptr,"print(arg1)\nreturn([numpy.add(arg1.filled(0),42)])",bbit);
io.print(rbit);
bbte:= bat.new(:oid,:bte);
bat.append(bbte,42:bte);
bat.append(bbte,84:bte);
-bat.append(bbte,254:bte);
+bat.append(bbte,111:bte);
bat.append(bbte,0:bte);
bat.append(bbte,nil:bte);
-rbte:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bbte);
+rbte:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([[42,43]])",bbte);
io.print(rbte);
bsht:= bat.new(:oid,:sht);
@@ -26,7 +26,7 @@ bat.append(bsht,82:sht);
bat.append(bsht,0:sht);
bat.append(bsht,3276:sht);
bat.append(bsht,nil:sht);
-rsht:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bsht);
+rsht:bat[:oid,:dbl] :=
pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bsht);
io.print(rsht);
bint:= bat.new(:oid,:int);
@@ -35,7 +35,7 @@ bat.append(bint,846930886:int);
bat.append(bint,1681692777:int);
bat.append(bint,1714636915:int);
bat.append(bint,nil:int);
-rint:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bint);
+rint:bat[:oid,:dbl] :=
pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bint);
io.print(rint);
bwrd:= bat.new(:oid,:wrd);
@@ -43,7 +43,7 @@ bat.append(bwrd,1804289383:wrd);
bat.append(bwrd,846930886:wrd);
bat.append(bwrd,1681692777:wrd);
bat.append(bwrd,1714636915:wrd);
-rwrd:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bwrd);
+rwrd:bat[:oid,:dbl] :=
pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bwrd);
io.print(rwrd);
blng:= bat.new(:oid,:lng);
@@ -51,7 +51,7 @@ bat.append(blng,1804289383L);
bat.append(blng,846930886L);
bat.append(blng,1681692777L);
bat.append(blng,1714636915L);
-rlng:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",blng);
+rlng:bat[:oid,:dbl] :=
pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",blng);
io.print(rlng);
# not sure what to with hge, numpy only supports 128 bits when sizeof(long)=16
@@ -89,7 +89,7 @@ bat.append(bstr,"asdf":str);
bat.append(bstr,"sd asd asd asd asd a":str);
bat.append(bstr,"":str);
bat.append(bstr,nil:str);
-rstr:bat[:oid,:int] := rapi.eval(nil:ptr,"unlist(lapply(arg1,nchar))",bstr);
+rstr:bat[:oid,:int] := pyapi.eval(nil:ptr,"unlist(lapply(arg1,nchar))",bstr);
io.print(rstr);
@@ -103,26 +103,26 @@ bat.append(binto,1681692777:int);
bat.append(binto,1714636915:int);
bat.append(binto,nil:int);
-rintbi:bat[:oid,:int] := rapi.eval(nil:ptr,"arg1",binto);
+rintbi:bat[:oid,:int] := pyapi.eval(nil:ptr,"arg1",binto);
io.print(rintbi);
-rintbi2:bat[:oid,:int] := rapi.eval(nil:ptr,"as.integer(arg1)",binto);
+rintbi2:bat[:oid,:int] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto);
io.print(rintbi2);
-rintbl:bat[:oid,:lng] := rapi.eval(nil:ptr,"as.integer(arg1)",binto);
+rintbl:bat[:oid,:lng] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto);
io.print(rintbl);
-rintbh:bat[:oid,:hge] := rapi.eval(nil:ptr,"as.integer(arg1)",binto);
+rintbh:bat[:oid,:hge] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto);
io.print(rintbh);
-rintbd:bat[:oid,:dbl] := rapi.eval(nil:ptr,"as.numeric(arg1)",binto);
+rintbd:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"as.numeric(arg1)",binto);
io.print(rintbd);
-rintbs:bat[:oid,:str] := rapi.eval(nil:ptr,"as.character(arg1)",binto);
+rintbs:bat[:oid,:str] := pyapi.eval(nil:ptr,"as.character(arg1)",binto);
io.print(rintbs);
# factors should be strings
-rintbf:bat[:oid,:str] := rapi.eval(nil:ptr,"as.factor(arg1)",binto);
+rintbf:bat[:oid,:str] := pyapi.eval(nil:ptr,"as.factor(arg1)",binto);
io.print(rintbf);
diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -58,6 +58,7 @@ static int pyapiInitialized = FALSE;
*(mtpe*) PyArray_GETPTR1(pCol, j); }
\
BATsetcount(bat, cnt); }
+// TODO: also handle the case if someone returns a masked array
#define _PYAPI_DEBUG_
@@ -152,6 +153,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
// for each input column (BAT):
for (i = pci->retc + 2; i < pci->argc; i++) {
PyObject *vararray = NULL;
+ // null mask for masked array
// turn scalars into one-valued BATs
// TODO: also do this for Python? Or should scalar values be
'simple' variables?
@@ -176,6 +178,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
}
}
+
switch (ATOMstorage(getColumnType(getArgType(mb,pci,i)))) {
case TYPE_bte:
vararray = BAT_TO_NP(b, bte, NPY_INT8);
@@ -195,16 +198,50 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
case TYPE_dbl:
vararray = BAT_TO_NP(b, dbl, NPY_FLOAT64);
break;
- // TODO: handle NULLs!
// TODO: implement other types (strings, boolean)
default:
msg = createException(MAL, "pyapi.eval", "unknown
argument type ");
goto wrapup;
}
+
+ // we use numpy.ma to deal with possible NULL values in the data
+ // once numpy comes with proper NA support, this will change
+ {
+ PyObject *mafunc =
PyObject_GetAttrString(PyImport_Import(
+ PyString_FromString("numpy.ma")),
"masked_array");
+ PyObject *maargs = PyTuple_New(2);
+ PyArrayObject* nullmask = (PyArrayObject*)
PyArray_ZEROS(1,
+ (npy_intp[1])
{BATcount(b)}, NPY_BOOL, 0);
+
+ const void *nil = ATOMnilptr(b->ttype);
+ int (*atomcmp)(const void *, const void *) =
ATOMcompare(b->ttype);
+ BATiter bi = bat_iterator(b);
+
+ if (b->T->nil) {
+ size_t j;
+ for (j = 0; j < BATcount(b); j++) {
+ if ((*atomcmp)(BUNtail(bi, BUNfirst(b)
+ j), nil) == 0) {
+ // Houston we have a NULL
+ PyArray_SETITEM(nullmask,
PyArray_GETPTR1(nullmask, j), Py_True);
+ }
+ }
+ }
+ PyTuple_SetItem(maargs, 0, vararray);
+ PyTuple_SetItem(maargs, 1, (PyObject*) nullmask);
+ vararray = PyObject_CallObject(mafunc, maargs);
+ if (!vararray) {
+ msg = createException(MAL, "pyapi.eval",
"UUUH");
+ goto wrapup;
+ }
+ }
+ PyTuple_SetItem(pArgs, ai++, vararray);
+
+ // TODO: we cannot clean this up just yet, there may be a
shallow copy referenced in python.
+ // TODO: do this later
+
BBPunfix(b->batCacheid);
- PyTuple_SetItem(pArgs, ai++, vararray);
}
// create argument list
@@ -284,11 +321,13 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
msg = createException(MAL, "pyapi.eval", "Command too large");
goto wrapup;
}
-
{
int pyret;
PyObject *pFunc, *pModule;
+ // TODO: does this create overhead?, see if we can share the
import
+ PyRun_SimpleString("import numpy");
+
pModule = PyImport_Import(PyString_FromString("__main__"));
pyret = PyRun_SimpleString(pycall);
pFunc = PyObject_GetAttrString(pModule, "pyfun");
@@ -298,9 +337,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
goto wrapup;
}
- // TODO: does this create overhead?, see if we can share the
import
- PyRun_SimpleString("import numpy");
-
pResult = PyObject_CallObject(pFunc, pArgs);
if (PyErr_Occurred()) {
PyObject *pErrType, *pErrVal, *pErrTb;
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list