Changeset: 0d867589a540 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=0d867589a540
Added Files:
        monetdb5/extras/pyapi/Tests/pyapi_returntypes.malC
Modified Files:
        monetdb5/extras/pyapi/Tests/All
        monetdb5/extras/pyapi/Tests/pyapi_pandas.malC
        monetdb5/extras/pyapi/pyapi.c
Branch: pyapi
Log Message:

Reworked how python return types are handled. All correct return types should 
now work properly, and incorrect return types should produce an understandable 
error message.


diffs (truncated from 769 to 300 lines):

diff --git a/monetdb5/extras/pyapi/Tests/All b/monetdb5/extras/pyapi/Tests/All
--- a/monetdb5/extras/pyapi/Tests/All
+++ b/monetdb5/extras/pyapi/Tests/All
@@ -2,3 +2,4 @@ HAVE_LIBPY?pyapi_numpy_boolean
 HAVE_LIBPY?pyapi_types_string
 HAVE_LIBPY?pyapi_types_numeric
 HAVE_LIBPY?pyapi_pandas
+HAVE_LIBPY?pyapi_returntypes
diff --git a/monetdb5/extras/pyapi/Tests/pyapi_pandas.malC 
b/monetdb5/extras/pyapi/Tests/pyapi_pandas.malC
--- a/monetdb5/extras/pyapi/Tests/pyapi_pandas.malC
+++ b/monetdb5/extras/pyapi/Tests/pyapi_pandas.malC
@@ -1,3 +1,4 @@
+
 
 
 
@@ -6,3 +7,32 @@
 # CSV location is hardcoded (sorry) 
----->---------->----------->-------->------vvvv
 r:bat[:oid,:int] := pyapi.eval(nil:ptr, "import pandas as pd\ndf = 
pd.read_csv(\"/home/mytherin/Programs/MonetDB/monetdb5/extras/pyapi/Tests/bikedata.csv\")\nreturn([list(df['Berri
 1'])])");
 io.print(r);
+
+# use pandas to perform data aggregation
+g:= bat.new(:oid,:int);
+bat.append(g,1);
+bat.append(g,2);
+bat.append(g,3);
+bat.append(g,1);
+bat.append(g,2);
+bat.append(g,3);
+bat.append(g,1);
+bat.append(g,2);
+bat.append(g,3);
+bat.append(g,1);
+
+b:= bat.new(:oid,:dbl);
+bat.append(b,1804289383:dbl);
+bat.append(b,846930886:dbl);
+bat.append(b,1681692777:dbl);
+bat.append(b,1714636915:dbl);
+bat.append(b,1957747793:dbl);
+bat.append(b,424238335:dbl);
+bat.append(b,719885386:dbl);
+bat.append(b,1649760492:dbl);
+bat.append(b,596516649:dbl);
+bat.append(b,1189641421:dbl);
+
+(r:bat[:oid,:int], s:bat[:oid,:dbl]) := pyapi.eval(nil:ptr,"import pandas as 
pd\ndf = pd.DataFrame({\'Group\': arg1, \'Values\': arg2})\ngrouped = 
df.groupby('Group')\nreturn([grouped.groups.keys(),grouped.sum().values.flatten()])",
 g, b);
+
+io.print(r,s);
diff --git a/monetdb5/extras/pyapi/Tests/pyapi_returntypes.malC 
b/monetdb5/extras/pyapi/Tests/pyapi_returntypes.malC
new file mode 100644
--- /dev/null
+++ b/monetdb5/extras/pyapi/Tests/pyapi_returntypes.malC
@@ -0,0 +1,184 @@
+#########################
+#########################
+## --CORRECT RETURNS-- ##
+#########################
+#########################
+
+####################
+# --SINGLE VALUE-- #
+####################
+# return a single integer value
+r:bat[:oid,:int] := pyapi.eval(nil:ptr,"return(33)");
+io.print(r);
+
+# return a single float value
+r:bat[:oid,:dbl] := pyapi.eval(nil:ptr,"return(12.31)");
+io.print(r);
+
+# return a single string value
+r:bat[:oid,:str] := pyapi.eval(nil:ptr,"return(\"test\")");
+io.print(r);
+
+#####################
+# --PYTHON ARRAYS-- #
+#####################
+# return an array with a single value
+r:bat[:oid,:int] := pyapi.eval(nil:ptr,"return([33])");
+io.print(r);
+
+# return an array containing a single array
+r:bat[:oid,:int] := pyapi.eval(nil:ptr,"return([[33]])");
+io.print(r);
+
+# return an array containing two arrays
+(r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"return([[33], 
[42]])");
+io.print(r, s);
+
+# return an array containing three arrays
+(r:bat[:oid,:int], s:bat[:oid,:int], t:bat[:oid,:int]) := 
pyapi.eval(nil:ptr,"return([[33], [42], [66]])");
+io.print(r, s, t);
+
+# return an array containing two arrays containing strings
+(r:bat[:oid,:str], s:bat[:oid,:str]) := 
pyapi.eval(nil:ptr,"return([[\"Hello\", \"Hello Again\"], [\"Hello Again 
Again\",\"That's quite enough.\"]])");
+io.print(r,s);
+
+####################
+# --NUMPY ARRAYS-- #
+####################
+# return a single numpy array
+r:bat[:oid,:int] := 
pyapi.eval(nil:ptr,"return(numpy.array([33,11,332,51,88]))");
+io.print(r);
+
+# return an array containing two numpy arrays
+(r:bat[:oid,:int], s:bat[:oid,:int]) := 
pyapi.eval(nil:ptr,"return([numpy.array([33]), numpy.array([44])])");
+io.print(r, s);
+
+# return a numpy array containing two arrays of unequal size
+(r:bat[:oid,:int], s:bat[:oid,:int]) := 
pyapi.eval(nil:ptr,"return(numpy.array([[33,24,55], [44,66, 345, 77, 66]]))");
+io.print(s);
+
+# return a numpy array containing two numpy arrays of unequal size
+(r:bat[:oid,:int], s:bat[:oid,:int]) := 
pyapi.eval(nil:ptr,"return(numpy.array([numpy.array([33]), numpy.array([44, 45, 
77])]))");
+io.print(s);
+
+# return a multi-dimensional numpy array containing integers
+#(r:bat[:oid,:int], s:bat[:oid,:int]) := 
pyapi.eval(nil:ptr,"return(numpy.array([[33, 24, 55], [44, 66,345]]))");
+#io.print(r,s);
+
+# return a multi-dimensional numpy array containing doubles 
+(r:bat[:oid,:dbl], s:bat[:oid,:dbl]) := 
pyapi.eval(nil:ptr,"return(numpy.array([[27.42, 18.62, 33.81], [12.55, 41.44, 
20.88]]))");
+io.print(r,s);
+
+# return a multi-dimensional numpy array containing strings
+(r:bat[:oid,:str], s:bat[:oid,:str]) := 
pyapi.eval(nil:ptr,"return(numpy.array([[\"Hëllo\", \"Hello Again\"], [\"Hello 
Again Again\",\"That's quite enough.\"]]))");
+io.print(r,s);
+
+# return a multi-dimensional numpy array containing unicode strings
+bstr:= bat.new(:oid,:str);
+bat.append(bstr,"Hannes Mühleisen":str);
+bat.append(bstr,"héllo":str);
+bat.append(bstr,"éáú üüäãö":str);
+bat.append(bstr,"幺巾乡阜阝测试一些中国符号":str);
+bat.append(bstr,"いくつかの日本のシンボルをテストします":str);
+(r:bat[:oid,:str], s:bat[:oid,:str]) := 
pyapi.eval(nil:ptr,"return(numpy.array([arg1, arg1]))",bstr);
+io.print(r,s);
+
+# return a multi-dimensional numpy array containing integers, but store them 
in a string BAT
+(r:bat[:oid,:str], s:bat[:oid,:str]) := 
pyapi.eval(nil:ptr,"return(numpy.array([[33, 24, 55], [44, 66,345]]))");
+io.print(r,s);
+
+#####################
+# --MASKED ARRAYS-- #
+#####################
+# return a single masked array
+r:bat[:oid,:int] := pyapi.eval(nil:ptr, 
"return(numpy.ma.masked_array([862,43,24],[1,1,0]))");
+io.print(r);
+
+# return a single masked array in an array
+r:bat[:oid,:int] := pyapi.eval(nil:ptr, 
"return([numpy.ma.masked_array([862,43,24],[1,1,0])])");
+io.print(r);
+
+# return a single masked array in a numpy array (this is kind of weird, it 
removes the entire masked array so it's a single numpy array)
+r:bat[:oid,:int] := pyapi.eval(nil:ptr, 
"return(numpy.array([numpy.ma.masked_array([862,43,24],[1,1,0])]))");
+io.print(r);
+
+# return a single masked array containing strings
+r:bat[:oid,:str] := pyapi.eval(nil:ptr, 
"return(numpy.ma.masked_array([\"Test\",\"Four\",24],[0,0,1]))");
+io.print(r);
+
+# return a multi-dimensional masked array with even dimensions
+(r:bat[:oid,:int], s:bat[:oid,:int]) := 
pyapi.eval(nil:ptr,"return(numpy.ma.masked_array([[1, 2, 3], [1,2,3]], 
[[0,0,0], [1,0,0]]))");
+io.print(r,s);
+
+# return a multi-dimensional masked array with uneven dimensions 
+(r:bat[:oid,:int], s:bat[:oid,:int]) := 
pyapi.eval(nil:ptr,"return(numpy.ma.masked_array([[1, 2, 3], [1,2,3,4]], 0))");
+io.print(r,s);
+
+# return a masked array, holding multiple masked arrays of uneven length (if 
they had even length, the test would be identical to the one above because 
numpy would automatically convert the object to one big masked array)
+(r:bat[:oid,:int], s:bat[:oid,:int]) := 
pyapi.eval(nil:ptr,"return(numpy.ma.masked_array([numpy.ma.masked_array([17,2,28],
 [0,1,0]), numpy.ma.masked_array([1,2,3,4], [1,0,0,0])], 0))");
+io.print(r,s);
+
+##############
+# --PANDAS-- #
+##############
+g:= bat.new(:oid,:int);
+bat.append(g,1);
+bat.append(g,2);
+bat.append(g,3);
+bat.append(g,1);
+bat.append(g,2);
+bat.append(g,3);
+bat.append(g,1);
+bat.append(g,2);
+bat.append(g,3);
+bat.append(g,1);
+
+b:= bat.new(:oid,:int);
+bat.append(b,4:int);
+bat.append(b,24:int);
+bat.append(b,363:int);
+bat.append(b,63:int);
+bat.append(b,838:int);
+bat.append(b,454:int);
+bat.append(b,22:int);
+bat.append(b,663:int);
+bat.append(b,123:int);
+bat.append(b,442:int);
+
+# return a pandas dataframe
+(r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"import pandas as 
pd\ndf = pd.DataFrame({\'Group\': arg1, \'Values\': arg2})\nreturn(df)", g, b);
+io.print(r,s);
+
+#######################
+#######################
+## --WRONG RETURNS-- ##
+#######################
+#######################
+
+# return a string when an int is expected
+r:bat[:oid,:int] := pyapi.eval(nil:ptr,"return(\"Test\")");
+
+# return an unsupported object
+(r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"class 
NewClass:\n\tx = 5\n\n\treturn(NewClass())");
+
+# return a scalar when multiple returns are expected
+(r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"return(12)");
+
+# return a pandas dataframe with the incorrect format
+(r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"import pandas as 
pd\ndf = pd.DataFrame({\'Group\': arg1, \'Values\': arg2, 'Values2': 
arg2})\nreturn(df)", g, b);
+
+# return a single numpy array when two are expected
+(r:bat[:oid,:int], s:bat[:oid,:int]) := 
pyapi.eval(nil:ptr,"return(numpy.array([12]))");
+
+# return a single array when two are expected
+(r:bat[:oid,:int], s:bat[:oid,:int]) := pyapi.eval(nil:ptr,"return([12])");
+
+# return two arrays when a single array is expected
+r:bat[:oid,:int] := pyapi.eval(nil:ptr,"return([[33,24,55], [44,66,345]])");
+
+# return a single UTF-32 encoded string
+a:bat[:oid,:str] := pyapi.eval(nil:ptr,"x = 
unicode(\"hello\")\nreturn(x.encode(\"utf32\"))");
+
+# return a UTF-32 encoded string in a numpy array
+(str1:bat[:oid,:str], str2:bat[:oid,:str]) := pyapi.eval(nil:ptr,"x = 
unicode(\"hello\")\nreturn(numpy.array([[x.encode(\"utf32\")], 
[x.encode(\"utf32\")]]))");
+
diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -28,7 +28,10 @@
 
 const char* pyapi_enableflag = "embedded_py";
 char *NPYConstToString(int);
-bool IsPyArrayObject(PyObject *, int);
+bool IsPyScalar(PyObject *object);
+bool IsNPYArray(PyObject *object);
+bool IsNPYMaskedArray(PyObject *object);
+bool IsPandasDataFrame(PyObject *object);
 
 int PyAPIEnabled(void) {
        return (GDKgetenv_istrue(pyapi_enableflag)
@@ -55,8 +58,16 @@ static int pyapiInitialized = FALSE;
                if (pCol == NULL)                                               
                                          \
                {                                                               
                                                          \
                        pCol = (PyArrayObject*) PyArray_FromAny(pColO, NULL, 1, 
1,  NPY_ARRAY_CARRAY, NULL);  \
-                       msg = createException(MAL, "pyapi.eval", "Wrong return 
type in python function. Expected an array of type \"%s\" as return value, but 
the python function returned an array of type \"%s\".", #mtpe, 
NPYConstToString(PyArray_DTYPE(pCol)->type_num));        \
-                       goto wrapup;                                            
                                          \
+                       if (nptpe == NPY_UNICODE && 
PyArray_DTYPE(pCol)->type_num == NPY_STRING) \
+                       {                                                       
                                                                                
\
+                               msg = createException(MAL, "pyapi.eval", "Could 
not convert the string array to UTF-8. We currently only support UTF-8 
formatted strings."); \
+                               goto wrapup;                                    
                                          \
+                       }                                                       
                                                                \
+                       else                                                    
                                                        \
+                       {                                                       
    \
+                               msg = createException(MAL, "pyapi.eval", "Wrong 
return type in python function. Expected an array of type \"%s\" as return 
value, but the python function returned an array of type \"%s\".", #nptpe, 
NPYConstToString(PyArray_DTYPE(pCol)->type_num));       \
+                               goto wrapup;                                    
                                                  \
+                       }                                                       
                                                                \
                }                                                               
                                                          \
                count = PyArray_DIMS(pCol)[0];                                \
                bat = BATnew(TYPE_void, TYPE_##mtpe, count, TRANSIENT);         
\
@@ -87,8 +98,29 @@ static int pyapiInitialized = FALSE;
                } bat->T->nonil = 1 - bat->T->nil;                            \
                BATsetcount(bat, count); }
 
-//todo: NULL
-// TODO: also handle the case if someone returns a masked array
+
+#define NP_TO_BAT_MULTI(bat, mtpe, nptpe, npyconversion) {                     
            \
+               count = PyArray_DIMS((PyArrayObject*)pResult)[1]; \
+               pCol = (PyArrayObject*)PyArray_ZEROS(1, (npy_intp[1]) { count 
}, nptpe, false); \
+               if (pCol == NULL) \
+               { \
+                       msg = createException(MAL, "pyapi.eval", "Failure to 
create an empty array of type \"%s\", this might be because we ran out of 
memory.", #mtpe);        \
+                       goto wrapup;                                          \
+               } \
+               for(j = 0; j < count; j++) \
+               { \
+                       PyObject *obj = 
npyconversion(*(mtpe*)PyArray_GETPTR2((PyArrayObject*)pResult, i, j)); \
+                       PyArray_SETITEM((PyArrayObject*)pCol, 
PyArray_GETPTR1((PyArrayObject*)pCol, j), obj); \
+               } \
+               if (pMask != NULL) \
+               { \
+                       pMaskArray = (PyArrayObject*) PyArray_ZEROS(1, 
(npy_intp[1]) { count }, NPY_BOOL, 0); \
+                       for(j = 0; j < count; j++) \
+                       { \
+                               PyArray_SETITEM(pMaskArray, 
PyArray_GETPTR1(pMaskArray, j), PyArray_GETITEM((PyArrayObject*)pMask, 
PyArray_GETPTR2((PyArrayObject*)pMask, i, j))); \
+                       } \
+               } \
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to