Changeset: 6fe592bdf6cc for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=6fe592bdf6cc
Added Files:
        monetdb5/extras/pyapi/Tests/pyapi01.malC
Modified Files:
        monetdb5/extras/pyapi/Tests/All
        monetdb5/extras/pyapi/Tests/pyapi02.malC
        monetdb5/extras/pyapi/pyapi.c
Branch: pyapi
Log Message:

Fixed strings to work properly. Fixed a bug caused by multiple PyInterpreters. 
It is now possible to return a single array rather than a list containing a 
single array.


diffs (truncated from 470 to 300 lines):

diff --git a/monetdb5/extras/pyapi/Tests/All b/monetdb5/extras/pyapi/Tests/All
--- a/monetdb5/extras/pyapi/Tests/All
+++ b/monetdb5/extras/pyapi/Tests/All
@@ -1,2 +1,3 @@
 HAVE_LIBPY?pyapi00
+HAVE_LIBPY?pyapi01
 HAVE_LIBPY?pyapi02
diff --git a/monetdb5/extras/pyapi/Tests/pyapi01.malC 
b/monetdb5/extras/pyapi/Tests/pyapi01.malC
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/pyapi/Tests/pyapi01.malC
@@ -0,0 +1,8 @@
+#strings testing
+bstr:= bat.new(:oid,:str);
+bat.append(bstr,"asdf":str);
+bat.append(bstr,"sd asd asd asd asd a":str);
+bat.append(bstr,"":str);
+bat.append(bstr,"test":str);
+rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"return(arg1)",bstr);
+io.print(rstr);
diff --git a/monetdb5/extras/pyapi/Tests/pyapi02.malC 
b/monetdb5/extras/pyapi/Tests/pyapi02.malC
--- a/monetdb5/extras/pyapi/Tests/pyapi02.malC
+++ b/monetdb5/extras/pyapi/Tests/pyapi02.malC
@@ -2,14 +2,14 @@
 
 # inty types
 
-bbit:= bat.new(:oid,:bit);
-bat.append(bbit,1:bit);
-bat.append(bbit,0:bit);
-bat.append(bbit,1:bit);
-bat.append(bbit,0:bit);
-bat.append(bbit,nil:bit);
-rbit:bat[:oid,:dbl] := 
pyapi.eval(nil:ptr,"print(arg1)\nreturn([numpy.add(arg1.filled(0),42)])",bbit);
-io.print(rbit);
+#bbit:= bat.new(:oid,:bit);
+#bat.append(bbit,1:bit);
+#bat.append(bbit,0:bit);
+#bat.append(bbit,1:bit);
+#bat.append(bbit,0:bit);
+#bat.append(bbit,nil:bit);
+#rbit:bat[:oid,:dbl] := 
pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bbit);
+#io.print(rbit);
 
 bbte:= bat.new(:oid,:bte);
 bat.append(bbte,42:bte);
@@ -17,14 +17,15 @@ bat.append(bbte,84:bte);
 bat.append(bbte,111:bte);
 bat.append(bbte,0:bte);
 bat.append(bbte,nil:bte);
-rbte:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([[42,43]])",bbte);
+io.print(bbte);
+rbte:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return numpy.add(arg1, 99)",bbte);
 io.print(rbte);
 
 bsht:= bat.new(:oid,:sht);
 bat.append(bsht,42:sht);
 bat.append(bsht,82:sht);
 bat.append(bsht,0:sht);
-bat.append(bsht,3276:sht);
+bat.append(bsht,240:sht);
 bat.append(bsht,nil:sht);
 rsht:bat[:oid,:dbl] := 
pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bsht);
 io.print(rsht);
@@ -43,6 +44,7 @@ bat.append(bwrd,1804289383:wrd);
 bat.append(bwrd,846930886:wrd);
 bat.append(bwrd,1681692777:wrd);
 bat.append(bwrd,1714636915:wrd);
+bat.append(bwrd,nil:wrd);
 rwrd:bat[:oid,:dbl] := 
pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bwrd);
 io.print(rwrd);
 
@@ -70,7 +72,8 @@ bat.append(bflt,18042.89383:flt);
 bat.append(bflt,846.930886:flt);
 bat.append(bflt,16.81692777:flt);
 bat.append(bflt,1714636.915:flt);
-rflt:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bflt);
+bat.append(bflt,nil:flt);
+rflt:bat[:oid,:dbl] := 
pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bflt);
 io.print(rflt);
 
 bdbl:= bat.new(:oid,:dbl);
@@ -79,18 +82,18 @@ bat.append(bdbl,84.6930886:dbl);
 bat.append(bdbl,168169.2777:dbl);
 bat.append(bdbl,17146369.15:dbl);
 bat.append(bdbl,nil:dbl);
-rdbl:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return([numpy.add(arg1,1)])",bdbl);
+rdbl:bat[:oid,:dbl] := 
pyapi.eval(nil:ptr,"return([numpy.add(arg1.filled(0),1)])",bdbl);
 io.print(rdbl);
 
 # strings
 
-bstr:= bat.new(:oid,:str);
-bat.append(bstr,"asdf":str);
-bat.append(bstr,"sd asd asd asd asd a":str);
-bat.append(bstr,"":str);
-bat.append(bstr,nil:str);
-rstr:bat[:oid,:int] := pyapi.eval(nil:ptr,"unlist(lapply(arg1,nchar))",bstr);
-io.print(rstr);
+#bstr:= bat.new(:oid,:str);
+#bat.append(bstr,"asdf":str);
+#bat.append(bstr,"sd asd asd asd asd a":str);
+#bat.append(bstr,"":str);
+#bat.append(bstr,"test":str);
+#rstr:bat[:oid,:int] := 
pyapi.eval(nil:ptr,"print(arg1);\nreturn([arg1])",bstr);
+#io.print(rstr);
 
 
 
@@ -103,26 +106,26 @@ bat.append(binto,1681692777:int);
 bat.append(binto,1714636915:int);
 bat.append(binto,nil:int);
 
-rintbi:bat[:oid,:int] := pyapi.eval(nil:ptr,"arg1",binto);
+rintbi:bat[:oid,:int] := pyapi.eval(nil:ptr,"return [arg1.filled(0)]",binto);
 io.print(rintbi);
 
-rintbi2:bat[:oid,:int] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto);
-io.print(rintbi2);
+# rintbi2:bat[:oid,:int] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto);
+# io.print(rintbi2);
 
-rintbl:bat[:oid,:lng] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto);
-io.print(rintbl);
+# rintbl:bat[:oid,:lng] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto);
+# io.print(rintbl);
 
-rintbh:bat[:oid,:hge] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto);
-io.print(rintbh);
+# rintbh:bat[:oid,:hge] := pyapi.eval(nil:ptr,"as.integer(arg1)",binto);
+# io.print(rintbh);
 
-rintbd:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"as.numeric(arg1)",binto);
-io.print(rintbd);
+# rintbd:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"as.numeric(arg1)",binto);
+# io.print(rintbd);
 
-rintbs:bat[:oid,:str] := pyapi.eval(nil:ptr,"as.character(arg1)",binto);
-io.print(rintbs);
+# rintbs:bat[:oid,:str] := pyapi.eval(nil:ptr,"as.character(arg1)",binto);
+# io.print(rintbs);
 
-# factors should be strings
-rintbf:bat[:oid,:str] := pyapi.eval(nil:ptr,"as.factor(arg1)",binto);
-io.print(rintbf);
+# # factors should be strings
+# rintbf:bat[:oid,:str] := pyapi.eval(nil:ptr,"as.factor(arg1)",binto);
+# io.print(rintbf);
 
 
diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -27,12 +27,15 @@
 #include <string.h>
 
 const char* pyapi_enableflag = "embedded_py";
+char *NPYConstToString(int);
+bool IsPyArrayObject(PyObject *);
 
 int PyAPIEnabled(void) {
        return (GDKgetenv_istrue(pyapi_enableflag)
                        || GDKgetenv_isyes(pyapi_enableflag));
 }
 
+
 // TODO: exclude pyapi from mergetable, too
 // TODO: add to SQL layer
 // TODO: can we call the Python interpreter in a multi-thread environment?
@@ -49,7 +52,14 @@ static int pyapiInitialized = FALSE;
                PyArrayObject* pCol = (PyArrayObject*) PyArray_FromAny(pColO, \
                        PyArray_DescrFromType(nptpe), 1, 1, NPY_ARRAY_CARRAY |  
  \
                        NPY_ARRAY_FORCECAST, NULL);                             
  \
-               size_t cnt = PyArray_DIMS(pCol)[0], j;                        \
+               size_t cnt = 0;                                               \
+               if (pCol == NULL)                                               
                                          \
+               {                                                               
                                                          \
+                       pCol = (PyArrayObject*) PyArray_FromAny(pColO, NULL, 1, 
1,  NPY_ARRAY_CARRAY, NULL);  \
+                       msg = createException(MAL, "pyapi.eval", "Wrong return 
type in python function. Expected an array of type \"%s\" as return value, but 
the python function returned an array of type \"%s\".", #mtpe, 
NPYConstToString(PyArray_DTYPE(pCol)->type_num));        \
+                       goto wrapup;                                            
                                          \
+               }                                                               
                                                          \
+               cnt = PyArray_DIMS(pCol)[0], j;                               \
                bat = BATnew(TYPE_void, TYPE_##mtpe, cnt, TRANSIENT);         \
                BATseqbase(bat, 0); bat->T->nil = 0; bat->T->nonil = 1;       \
                bat->tkey = 0; bat->tsorted = 0; bat->trevsorted = 0;         \
@@ -58,6 +68,7 @@ static int pyapiInitialized = FALSE;
                                        *(mtpe*) PyArray_GETPTR1(pCol, j); }    
          \
                BATsetcount(bat, cnt); }
 
+//todo: NULL
 // TODO: also handle the case if someone returns a masked array
 
 #define _PYAPI_DEBUG_
@@ -97,8 +108,12 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
        node * argnode;
        int seengrp = FALSE;
        PyObject *pArgs, *pResult; // this is going to be the parameter tuple
-       PyThreadState* tstate;
+       BUN p = 0, q = 0;
+       BATiter li;
 
+       size_t count;
+       size_t maxsize;
+       size_t j;
 
        if (!PyAPIEnabled()) {
                throw(MAL, "pyapi.eval",
@@ -120,7 +135,7 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
 
        // this isolates our interpreter, so it's safe to run pyapi 
multithreaded
        // TODO: verify this
-       tstate = Py_NewInterpreter();
+       /*tstate = Py_NewInterpreter();*/
 
        // first argument after the return contains the pointer to the sql_func 
structure
        if (sqlfun != NULL && sqlfun->ops->cnt > 0) {
@@ -178,7 +193,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                        }
                }
 
-
                switch (ATOMstorage(getColumnType(getArgType(mb,pci,i)))) {
                case TYPE_bte:
                        vararray = BAT_TO_NP(b, bte, NPY_INT8);
@@ -198,8 +212,49 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                case TYPE_dbl:
                        vararray = BAT_TO_NP(b, dbl, NPY_FLOAT64);
                        break;
+               case TYPE_str:
+                       li = bat_iterator(b);
 
-               // TODO: implement other types (strings, boolean)
+                       //we first loop over all the strings in the BAT to find 
the maximum length of a single string
+                       //this is because NUMPY only supports strings with a 
fixed maximum length
+                       maxsize = 0;
+                       count = BATcount(b);
+                       BATloop(b, p, q)
+                       {
+                               const char *t = (const char *) BUNtail(li, p);
+                               const size_t length = (const size_t) strlen(t);
+
+                               if (strlen(t) > maxsize)
+                                       maxsize = length;
+
+                       }
+
+                       //create a NPY_STRING array object
+                       vararray = PyArray_New(
+                               &PyArray_Type, 
+                               1, 
+                               (npy_intp[1]) {count},  
+                       NPY_STRING, 
+                       NULL, 
+                       NULL, 
+                       maxsize,             
+                               0, 
+                               NULL);
+
+                       //fill the NPY_STRING array object using the 
PyArray_SETITEM function
+                       j = 0;
+                       BATloop(b, p, q)
+                       {
+                               const char *t = (const char *) BUNtail(li, p);
+                               PyArray_SETITEM((PyArrayObject*)vararray, 
PyArray_GETPTR1((PyArrayObject*)vararray, j), PyString_FromString(t));
+                               j++;
+                       }
+                       break;
+               case TYPE_hge:
+                       vararray = BAT_TO_NP(b, hge, NPY_LONGLONG);
+                       break;
+
+               // TODO: implement other types (boolean)
                default:
                        msg = createException(MAL, "pyapi.eval", "unknown 
argument type ");
                        goto wrapup;
@@ -218,7 +273,8 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                        int (*atomcmp)(const void *, const void *) = 
ATOMcompare(b->ttype);
                        BATiter bi = bat_iterator(b);
 
-                       if (b->T->nil) {
+                       if (b->T->nil) 
+                       {
                                size_t j;
                                for (j = 0; j < BATcount(b); j++) {
                                        if ((*atomcmp)(BUNtail(bi, BUNfirst(b) 
+ j), nil) == 0) {
@@ -227,8 +283,10 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                                        }
                                }
                        }
+
                        PyTuple_SetItem(maargs, 0, vararray);
                        PyTuple_SetItem(maargs, 1, (PyObject*) nullmask);
+                               
                        vararray = PyObject_CallObject(mafunc, maargs);
                        if (!vararray) {
                                msg = createException(MAL, "pyapi.eval", 
"UUUH");
@@ -241,7 +299,6 @@ str PyAPIeval(MalBlkPtr mb, MalStkPtr st
                // TODO: do this later
 
                BBPunfix(b->batCacheid);
_______________________________________________
checkin-list mailing list
checkin-list@monetdb.org
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to