Changeset: 77ad4b6244eb for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=77ad4b6244eb
Modified Files:
configure.ag
monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC
monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC
monetdb5/extras/pyapi/connection.c
monetdb5/extras/pyapi/formatinput.c
monetdb5/extras/pyapi/pyapi.c
monetdb5/extras/pyapi/pytypes.c
monetdb5/extras/pyapi/type_conversion.c
monetdb5/extras/pyapi/type_conversion.h
monetdb5/extras/pyapi/unicode.h
sql/backends/monet5/Tests/pyapi00.sql
sql/backends/monet5/Tests/pyapi09.sql
sql/backends/monet5/Tests/pyapi14.sql
sql/backends/monet5/Tests/pyapi16.sql
sql/backends/monet5/Tests/pyapi21.sql
sql/backends/monet5/Tests/pyapi24.sql
Branch: pythonudf
Log Message:
Added experimental Python3 support.
Two new configure options allow you to select the python client and the
python-config to link to.
--with-pyversion=FILE specifies the python client (e.g. /usr/bin/python3 for
python3)
--with-pyconfig=FILE specifies the python-config (e.g. /usr/bin/python3-config
for python3)
Note that Python3 support is not complete, loading marshalled code objects is
disabled (Tests:pyapi08,pyapi14) and returning 'bytes' objects triggers an
assertion (Tests:pyapi09, pyapi21).
In the future, we might want to have separate languages for Python2 and Python3
(e.g. LANGUAGE PYTHON, LANGUAGE PYTHON3), just like Postgres.
diffs (truncated from 743 to 300 lines):
diff --git a/configure.ag b/configure.ag
--- a/configure.ag
+++ b/configure.ag
@@ -1235,6 +1235,14 @@ case "$have_python3" in
;;
esac
+AC_ARG_WITH(pyconfig,
+ AS_HELP_STRING([--with-pyconfig=FILE], [python-config is installed as
FILE]),
+ have_pyconfig="$withval")
+
+AC_ARG_WITH(pyversion,
+ AS_HELP_STRING([--with-pyversion=FILE], [python is installed as FILE]),
+ have_pyversion="$withval")
+
dnl Figure out a default for PYTHON2 or PYTHON3
AC_PATH_PROG(PYTHON,python,no,$PATH)
PYTHON_MAJ=unknown
@@ -2305,24 +2313,33 @@ if test "x$enable_pyintegration" != xno;
AC_MSG_ERROR([--enable-pyintegration value must be
yes|no|auto|absolute path of python-config])
;;
esac
- AC_PATH_PROG(PYCMD,python-config,,$XPATH)
- if test "x$PYCMD" = x; then
- if test "x$enable_pyintegration" = xyes; then
- AC_MSG_ERROR([python-config library required for Python
integration support])
- else
- have_libpy="no"
- why_have_libpy="(python-config command not found)"
- enable_pyintegration=no
- disable_pyintegration="(python-config command not
found)"
+ if test "x$have_pyconfig" = x; then
+ AC_PATH_PROG(PYCMD,python-config,,$XPATH)
+ if test "x$PYCMD" = x; then
+ if test "x$enable_pyintegration" = xyes; then
+ AC_MSG_ERROR([python-config library required
for Python integration support])
+ else
+ have_libpy="no"
+ why_have_libpy="(python-config command not
found)"
+ enable_pyintegration=no
+ disable_pyintegration="(python-config command
not found)"
+ fi
fi
+ else
+ PYCMD="$have_pyconfig"
fi
- PYTHON_CMD=`$PYCMD --exec-prefix`/bin/python
- NUMPYVER=`$PYTHON_CMD -c "import numpy; print
numpy.__version__.split('.').__getitem__(1) >= 7"`
+
+ if test "x$have_pyversion" = x; then
+ PYTHON_CMD=$PYTHON2
+ else
+ PYTHON_CMD=$have_pyversion
+ fi
+ NUMPYVER=`$PYTHON_CMD -c "import numpy;
print(int(numpy.__version__.split('.').__getitem__(1)) >= 7)"`
#check numpyconfig.h because autoconf tests includes by compiling a
small C program, and other numpy headers do not compile without Python.h
AC_CHECK_HEADER(
[numpy/numpyconfig.h],
[NUMPYHEADERS=True],
- [NUMPYHEADERS=`$PYTHON_CMD -c "import numpy, os; print
os.path.isfile(os.path.join(numpy.get_include(), 'numpy/arrayobject.h'))"`]
+ [NUMPYHEADERS=`$PYTHON_CMD -c "import numpy, os;
print(os.path.isfile(os.path.join(numpy.get_include(),
'numpy/arrayobject.h')))"`]
)
if [test "x$NUMPYVER" = x] || [test "x$NUMPYVER" = xFalse]; then
if test "x$enable_pyintegration" = xyes; then
@@ -2342,7 +2359,9 @@ if test "x$enable_pyintegration" != xno;
enable_pyintegration=no
disable_pyintegration="(numpy/arrayobject.h not found)"
fi
- elif libpy_CFLAGS=`\`$PYCMD --exec-prefix\`/bin/python -c "from
distutils.sysconfig import get_python_inc; import numpy; print ' -I' +
get_python_inc() + ' -I' + numpy.get_include();"` && libpy_LIBS=`$PYCMD
--ldflags`; then
+ else
+ libpy_CFLAGS=`$PYTHON_CMD -c "from distutils.sysconfig import
get_python_inc; import numpy; print(' -I' + get_python_inc() + ' -I' +
numpy.get_include());"`
+ libpy_LIBS=`$PYCMD --ldflags`
have_libpy=yes
AC_DEFINE(HAVE_LIBPY, 1, [Define if we can link to python])
AC_SUBST(libpy_CFLAGS, $libpy_CFLAGS)
diff --git a/monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC
b/monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC
--- a/monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC
+++ b/monetdb5/extras/pyapi/Tests/pyapi_numpy_numeric_nested.malC
@@ -73,7 +73,7 @@ bat.append(fib, 28:int);
bat.append(fib, 29:int);
# define a function within the main function and map it to a BAT
-s:bat[:oid,:dbl] := pyapi.eval(nil:ptr, "def fibonacci(nmbr):\n\tif (nmbr ==
0): return 0\n\tif (nmbr == 1): return 1\n\ta = 0\n\tb = 1\n\tfor i in range(0,
nmbr - 1):\n\t\tc = a + b\n\t\ta = b\n\t\tb = c\n\treturn
b\nreturn(map(fibonacci, arg1))", fib);
+s:bat[:oid,:dbl] := pyapi.eval(nil:ptr, "def fibonacci(nmbr):\n\tif (nmbr ==
0): return 0\n\tif (nmbr == 1): return 1\n\ta = 0\n\tb = 1\n\tfor i in range(0,
nmbr - 1):\n\t\tc = a + b\n\t\ta = b\n\t\tb = c\n\treturn
b\nreturn(list(map(fibonacci, arg1)))", fib);
io.print(s);
indices:= bat.new(:oid,:int);
@@ -231,5 +231,5 @@ bat.append(indices, 150:int);
# now do it while returning a hge, to test returning very large numbers
-s:bat[:oid,:hge] := pyapi.eval(nil:ptr, "def fibonacci(nmbr):\n\tif (nmbr ==
0): return 0\n\tif (nmbr == 1): return 1\n\ta = 0\n\tb = 1\n\tfor i in range(0,
nmbr - 1):\n\t\tc = a + b\n\t\ta = b\n\t\tb = c\n\treturn
b\nreturn(map(fibonacci, arg1))", indices);
+s:bat[:oid,:hge] := pyapi.eval(nil:ptr, "def fibonacci(nmbr):\n\tif (nmbr ==
0): return 0\n\tif (nmbr == 1): return 1\n\ta = 0\n\tb = 1\n\tfor i in range(0,
nmbr - 1):\n\t\tc = a + b\n\t\ta = b\n\t\tb = c\n\treturn
b\nreturn(list(map(fibonacci, arg1)))", indices);
io.print(s);
diff --git a/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC
b/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC
--- a/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC
+++ b/monetdb5/extras/pyapi/Tests/pyapi_types_numeric.malC
@@ -110,6 +110,6 @@ bat.append(bint,846930886:int);
bat.append(bint,67:int);
bat.append(bint,124124124:int);
rstr:bat[:oid,:str] := pyapi.eval(nil:ptr,"return(arg1)",bint);
-rint:bat[:oid,:int] := pyapi.eval(nil:ptr,"return([map(int, arg1)])",rstr);
+rint:bat[:oid,:int] := pyapi.eval(nil:ptr,"return([list(map(int,
arg1))])",rstr);
io.print(rint);
diff --git a/monetdb5/extras/pyapi/connection.c
b/monetdb5/extras/pyapi/connection.c
--- a/monetdb5/extras/pyapi/connection.c
+++ b/monetdb5/extras/pyapi/connection.c
@@ -14,6 +14,12 @@
#endif
#include <numpy/arrayobject.h>
+#if PY_MAJOR_VERSION >= 3
+#define IS_PY3K
+#define PyString_CheckExact PyUnicode_CheckExact
+#define PyString_FromString PyUnicode_FromString
+#endif
+
static PyObject *
_connection_execute(Py_ConnectionObject *self, PyObject *args)
{
@@ -28,8 +34,14 @@ static PyObject *
PyObject *result;
res_table* output = NULL;
char *res = NULL;
+ char *query;
+#ifndef IS_PY3K
+ query = ((PyStringObject*)args)->ob_sval;
+#else
+ query = PyUnicode_AsUTF8(args);
+#endif
- res = _connection_query(self->cntxt, ((PyStringObject*)args)->ob_sval,
&output);
+ res = _connection_query(self->cntxt, query, &output);
if (res != MAL_SUCCEED) {
PyErr_Format(PyExc_Exception, "SQL Query Failed: %s", (res ? res :
"<no error>"));
return NULL;
@@ -67,10 +79,16 @@ static PyObject *
else
#ifdef HAVE_FORK
{
+ char *query;
+#ifndef IS_PY3K
+ query = ((PyStringObject*)args)->ob_sval;
+#else
+ query = PyUnicode_AsUTF8(args);
+#endif
// This is a mapped process, we do not want forked processes to touch
the database
// Only the main process may touch the database, so we ship the query
back to the main process
// copy the query into shared memory and tell the main process there
is a query to handle
- strncpy(self->query_ptr->query, ((PyStringObject*)args)->ob_sval,
8192);
+ strncpy(self->query_ptr->query, query, 8192);
self->query_ptr->pending_query = true;
//free the main process so it can work on the query
change_semaphore_value(self->query_sem, 0, 1);
@@ -213,6 +231,9 @@ PyTypeObject Py_ConnectionType = {
0,
0,
0
+#ifdef IS_PY3K
+ ,0
+#endif
};
void _connection_cleanup_result(void* output)
diff --git a/monetdb5/extras/pyapi/formatinput.c
b/monetdb5/extras/pyapi/formatinput.c
--- a/monetdb5/extras/pyapi/formatinput.c
+++ b/monetdb5/extras/pyapi/formatinput.c
@@ -10,9 +10,18 @@
#include "gdk.h"
#include "mal_exception.h"
+#if PY_MAJOR_VERSION >= 3
+#define IS_PY3K
+#define PyString_FromStringAndSize PyUnicode_FromStringAndSize
+#endif
+
const size_t additional_argcount = 3;
const char * additional_args[] = {"_columns", "_column_types", "_conn"};
+#if PY_MAJOR_VERSION >= 3
+#define IS_PY3K
+#endif
+
//! Parse a PyCodeObject from a string, the string is expected to be in the
format {@<encoded_function>};, where <encoded_function> is the Marshalled code
object
PyObject *PyCodeObject_ParseString(char *string, char **msg);
PyObject *PyCodeObject_ParseString(char *string, char **msg)
@@ -94,10 +103,14 @@ char* FormatCode(char* code, char **args
char base_start[] = "def pyfun(";
char base_end[] = "):\n";
*msg = NULL;
+#ifndef IS_PY3K
if (code[1] == '@') {
*code_object = PyCodeObject_ParseString(code, msg);
return NULL;
}
+#else
+ (void) code_object;
+#endif
indentation_levels = (size_t*)GDKzalloc(max_indentation * sizeof(size_t));
statements_per_level = (size_t*)GDKzalloc(max_indentation *
sizeof(size_t));
diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -46,6 +46,21 @@
#include <sys/wait.h>
#endif
+#if PY_MAJOR_VERSION >= 3
+#define IS_PY3K
+#define PyString_FromString PyUnicode_FromString
+#define PyString_Check PyUnicode_Check
+#define PyString_CheckExact PyUnicode_CheckExact
+#define PyString_AsString PyUnicode_AsUTF8
+#define PyString_AS_STRING PyUnicode_AsUTF8
+#define PyInt_FromLong PyLong_FromLong
+#define PyInt_Check PyLong_Check
+#define PythonUnicodeType char
+#else
+#define PythonUnicodeType Py_UNICODE
+
+#endif
+
const char* pyapi_enableflag = "embedded_py";
const char* verbose_enableflag = "enable_pyverbose";
const char* warning_enableflag = "enable_pywarnings";
@@ -318,7 +333,7 @@ Array of type %s no copying will be need
case NPY_DOUBLE:
\
case NPY_LONGDOUBLE: NP_COL_BAT_LOOP(bat, mtpe, dbl); break;
\
case NPY_STRING: NP_COL_BAT_LOOP_FUNC(bat, mtpe,
str_to_##mtpe, char); break;
\
- case NPY_UNICODE: NP_COL_BAT_LOOP_FUNC(bat, mtpe,
unicode_to_##mtpe, Py_UNICODE); break;
\
+ case NPY_UNICODE: NP_COL_BAT_LOOP_FUNC(bat, mtpe,
unicode_to_##mtpe, PythonUnicodeType); break;
\
case NPY_OBJECT: NP_COL_BAT_LOOP_FUNC(bat, mtpe,
pyobject_to_##mtpe, PyObject*); break;
\
default:
\
msg = createException(MAL, "pyapi.eval", "Unrecognized
type. Could not convert to %s.\n", BatType_Format(TYPE_##mtpe));
\
@@ -1265,7 +1280,11 @@ aggrwrapup:
msg = createException(MAL, "pyapi.eval", "Expected a
string key in the dictionary, but received an object of type %s",
colname->ob_type->tp_name);
goto wrapup;
}
+#ifndef IS_PY3K
retnames[i] = ((PyStringObject*)colname)->ob_sval;
+#else
+ retnames[i] = PyUnicode_AsUTF8(colname);
+#endif
}
}
pResult = PyDict_CheckForConversion(pResult, retcols, retnames,
&msg);
@@ -1614,7 +1633,11 @@ str
bool PyType_IsPyScalar(PyObject *object)
{
if (object == NULL) return false;
- return (PyArray_CheckScalar(object) || PyInt_Check(object) ||
PyFloat_Check(object) || PyLong_Check(object) || PyString_Check(object) ||
PyBool_Check(object) || PyUnicode_Check(object) || PyByteArray_Check(object));
+ return (PyArray_CheckScalar(object) || PyInt_Check(object) ||
PyFloat_Check(object) || PyLong_Check(object) || PyString_Check(object) ||
PyBool_Check(object) || PyUnicode_Check(object) || PyByteArray_Check(object)
+#ifdef IS_PY3K
+ || PyBytes_Check(object)
+#endif
+ );
}
@@ -2417,7 +2440,7 @@ BAT *PyObject_ConvertToBAT(PyReturn *ret
b->T->nil = 1;
BUNappend(b, str_nil, FALSE);
} else {
- if (!string_copy(&data[(index_offset * ret->count
+ iu) * ret->memory_size], utf8_string, ret->memory_size)) {
+ if (!string_copy(&data[(index_offset * ret->count
+ iu) * ret->memory_size], utf8_string, ret->memory_size, true)) {
msg = createException(MAL, "pyapi.eval",
"Invalid string encoding used. Please return a regular ASCII string, or a
Numpy_Unicode object.\n");
goto wrapup;
}
@@ -2462,21 +2485,32 @@ BAT *PyObject_ConvertToBAT(PyReturn *ret
} else {
//we try to handle as many types as possible
PyObject *obj = *((PyObject**) &data[(index_offset
* ret->count + iu) * ret->memory_size]);
+#ifndef IS_PY3K
if (PyString_CheckExact(obj)) {
char *str = ((PyStringObject*)obj)->ob_sval;
- if (!string_copy(str, utf8_string, strlen(str)
+ 1)) {
+ if (!string_copy(str, utf8_string, strlen(str)
+ 1, false)) {
msg = createException(MAL, "pyapi.eval",
"Invalid string encoding used. Please return a regular ASCII string, or a
Numpy_Unicode object.\n");
goto wrapup;
}
- } else if (PyByteArray_CheckExact(obj)) {
+ } else
+#endif
+ if (PyByteArray_CheckExact(obj)) {
char *str =
((PyByteArrayObject*)obj)->ob_bytes;
- if (!string_copy(str, utf8_string, strlen(str)
+ 1)) {
+ if (!string_copy(str, utf8_string, strlen(str)
+ 1, false)) {
msg = createException(MAL, "pyapi.eval",
"Invalid string encoding used. Please return a regular ASCII string, or a
Numpy_Unicode object.\n");
goto wrapup;
}
} else if (PyUnicode_CheckExact(obj)) {
+#ifndef IS_PY3K
Py_UNICODE *str =
(Py_UNICODE*)((PyUnicodeObject*)obj)->str;
utf32_to_utf8(0,
((PyUnicodeObject*)obj)->length, utf8_string, str);
+#else
+ char *str = PyUnicode_AsUTF8(obj);
+ if (!string_copy(str, utf8_string, strlen(str)
+ 1, true)) {
+ msg = createException(MAL, "pyapi.eval",
"Invalid string encoding used. Please return a regular ASCII string, or a
Numpy_Unicode object.\n");
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list