Changeset: 1372ff2b60db for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=1372ff2b60db
Modified Files:
        monetdb5/extras/pyapi/pyapi.c
Branch: pythonudf
Log Message:

Perform duplicate elimination on small string heaps to speed up string 
conversion.


diffs (112 lines):

diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -10,6 +10,7 @@
 #include "mal.h"
 #include "mal_stack.h"
 #include "mal_linker.h"
+#include "gdk_atoms.h"
 #include "gdk_utils.h"
 #include "gdk.h"
 #include "sql_catalog.h"
@@ -1889,32 +1890,80 @@ PyObject *PyArrayObject_FromBAT(PyInput 
                 PyObject **data = 
((PyObject**)PyArray_DATA((PyArrayObject*)vararray));
                 PyObject *obj;
                 j = 0;
-                if (unicode) {
-                    BATloop(b, p, q) {
-                        char *t = (char *) BUNtail(li, p);
-                        if (strcmp(t, str_nil) == 0) {
-                             //str_nil isn't a valid UTF-8 character (it's 
0x80), so we can't decode it as UTF-8 (it will throw an error)
-                            obj = PyUnicode_FromString("-");
-                        } else {
-                            //otherwise we can just decode the string as UTF-8
-                            obj = PyUnicode_FromString(t);
-                        }
-
-                        if (obj == NULL) {
-                            msg = createException(MAL, "pyapi.eval", "Failed 
to create string.");
+                if (unicode) {                    
+                    if (GDK_ELIMDOUBLES(b->T->vheap)) {
+                        PyObject** pyptrs = GDKzalloc(b->T->vheap->free * 
sizeof(PyObject*));
+                        if (!pyptrs) {
+                            msg = createException(MAL, "pyapi.eval", 
MAL_MALLOC_FAIL" PyObject strings.");
                             goto wrapup;
                         }
-                        data[j++] = obj;
+                        BATloop(b, p, q) {
+                            const char *t = (const char *) BUNtail(li, p);
+                            ptrdiff_t offset = t - b->T->vheap->base;
+                            if (!pyptrs[offset]) {
+                                if (strcmp(t, str_nil) == 0) {
+                                     //str_nil isn't a valid UTF-8 character 
(it's 0x80), so we can't decode it as UTF-8 (it will throw an error)
+                                    pyptrs[offset] = PyUnicode_FromString("-");
+                                } else {
+                                    //otherwise we can just decode the string 
as UTF-8
+                                    pyptrs[offset] = PyUnicode_FromString(t);
+                                }
+                                if (!pyptrs[offset]) {
+                                    msg = createException(MAL, "pyapi.eval", 
"Failed to create string.");
+                                    goto wrapup;
+                                }
+
+                            }
+                            data[j++] = pyptrs[offset];
+                        }
+                        GDKfree(pyptrs);
+                    }
+                    else {
+                    BATloop(b, p, q) {
+                            char *t = (char *) BUNtail(li, p);
+                            if (strcmp(t, str_nil) == 0) {
+                                 //str_nil isn't a valid UTF-8 character (it's 
0x80), so we can't decode it as UTF-8 (it will throw an error)
+                                obj = PyUnicode_FromString("-");
+                            } else {
+                                //otherwise we can just decode the string as 
UTF-8
+                                obj = PyUnicode_FromString(t);
+                            }
+
+                            if (obj == NULL) {
+                                msg = createException(MAL, "pyapi.eval", 
"Failed to create string.");
+                                goto wrapup;
+                            }
+                            data[j++] = obj;
+                        }
                     }
                 } else {
-                    BATloop(b, p, q) {
-                        char *t = (char *) BUNtail(li, p);
-                        obj = PyString_FromString(t);
-                        if (obj == NULL) {
-                            msg = createException(MAL, "pyapi.eval", "Failed 
to create string.");
+                    /* special case where we exploit the duplicate-eliminated 
string heap */
+                    if (GDK_ELIMDOUBLES(b->T->vheap)) {
+                        PyObject** pyptrs = GDKzalloc(b->T->vheap->free * 
sizeof(PyObject*));
+                        if (!pyptrs) {
+                            msg = createException(MAL, "pyapi.eval", 
MAL_MALLOC_FAIL" PyObject strings.");
                             goto wrapup;
                         }
-                        data[j++] = obj;
+                        BATloop(b, p, q) {
+                            const char *t = (const char *) BUNtail(li, p);
+                            ptrdiff_t offset = t - b->T->vheap->base;
+                            if (!pyptrs[offset]) {
+                                pyptrs[offset] = PyString_FromString(t);
+                            }
+                            data[j++] = pyptrs[offset];
+                        }
+                        GDKfree(pyptrs);
+                    }
+                    else {
+                        BATloop(b, p, q) {
+                            char *t = (char *) BUNtail(li, p);
+                            obj = PyString_FromString(t);
+                            if (obj == NULL) {
+                                msg = createException(MAL, "pyapi.eval", 
"Failed to create string.");
+                                goto wrapup;
+                            }
+                            data[j++] = obj;
+                        }
                     }
                 }
             }
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to