Changeset: d62348db6043 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=d62348db6043
Modified Files:
        monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
        monetdb5/extras/pyapi/pyapi.c
        monetdb5/extras/pyapi/pyapi.h
Branch: pyapi
Log Message:

Improved speed of generating the NULL mask.


diffs (145 lines):

diff --git a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh 
b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
--- a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
+++ b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
@@ -191,12 +191,12 @@ function pyapi_test_input_null() {
 }
 
 function pyapi_test_output() {
-    pyapi_run_single_test "Output Testing (Zero Copy)" "" "OUTPUT" 
output_zerocopy "$OUTPUT_TESTING_NTESTS" "$OUTPUT_TESTING_SIZES"
+    pyapi_run_single_test "Output Testing (Zero Copy)" "--set 
gdk_mmap_minsize=99999999999999999999999" "OUTPUT" output_zerocopy 
"$OUTPUT_TESTING_NTESTS" "$OUTPUT_TESTING_SIZES"
     if [ $? -ne 0 ]; then
         return 1
     fi
 
-    pyapi_run_single_test "Output Testing (Copy)" "--set 
disable_pyzerocopyoutput=true" "OUTPUT" output_copy "$OUTPUT_TESTING_NTESTS" 
"$OUTPUT_TESTING_SIZES"
+    pyapi_run_single_test "Output Testing (Copy)" "--set 
disable_pyzerocopyoutput=true --set gdk_mmap_minsize=99999999999999999999999" 
"OUTPUT" output_copy "$OUTPUT_TESTING_NTESTS" "$OUTPUT_TESTING_SIZES"
     if [ $? -ne 0 ]; then
         return 1
     fi
@@ -254,12 +254,12 @@ function pyapi_test_string_unicode_ascii
 }
 
 function pyapi_test_bytearray_vs_string() {
-    pyapi_run_single_test "String Testing (ByteArray Object)" "" 
"STRING_SAMELENGTH" string_bytearrayobject "$STRINGSAMELENGTH_TESTING_NTESTS" 
"$STRINGSAMELENGTH_TESTING_SIZES"
+    pyapi_run_single_test "String Testing (ByteArray Object)" "--set 
enable_bytearray=true" "STRING_SAMELENGTH" string_bytearrayobject 
"$STRINGSAMELENGTH_TESTING_NTESTS" "$STRINGSAMELENGTH_TESTING_SIZES"
     if [ $? -ne 0 ]; then
         return 1
     fi
 
-    pyapi_run_single_test "String Testing (String Object)" "--set 
disable_bytearray=true" "STRING_SAMELENGTH" string_stringobject 
"$STRINGSAMELENGTH_TESTING_NTESTS" "$STRINGSAMELENGTH_TESTING_SIZES"
+    pyapi_run_single_test "String Testing (String Object)" "" 
"STRING_SAMELENGTH" string_stringobject "$STRINGSAMELENGTH_TESTING_NTESTS" 
"$STRINGSAMELENGTH_TESTING_SIZES"
     if [ $? -ne 0 ]; then
         return 1
     fi
diff --git a/monetdb5/extras/pyapi/pyapi.c b/monetdb5/extras/pyapi/pyapi.c
--- a/monetdb5/extras/pyapi/pyapi.c
+++ b/monetdb5/extras/pyapi/pyapi.c
@@ -54,13 +54,15 @@ const char* zerocopyoutput_disableflag =
 const char* numpy_string_array_enableflag = "enable_numpystringarray";
 const char* alwaysunicode_enableflag = "enable_alwaysunicode";
 const char* lazyarray_enableflag = "enable_lazyarray";
-const char* bytearray_disableflag = "disable_bytearray";
+const char* oldnullmask_enableflag = "enable_oldnullmask";
+const char* bytearray_enableflag = "enable_bytearray";
 const char* benchmark_output_flag = "pyapi_benchmark_output";
 static bool option_zerocopyinput;
 static bool option_zerocopyoutput;
 static bool option_numpy_string_array;
 static bool option_bytearray;
 static bool option_lazyarray;
+static bool option_oldnullmask;
 static bool option_alwaysunicode;
 static char *benchmark_output;
 #endif
@@ -1250,7 +1252,8 @@ str
         option_zerocopyinput = !(GDKgetenv_isyes(zerocopyinput_disableflag) || 
GDKgetenv_istrue(zerocopyinput_disableflag));
         option_zerocopyoutput = !(GDKgetenv_isyes(zerocopyoutput_disableflag) 
|| GDKgetenv_istrue(zerocopyoutput_disableflag));
         option_numpy_string_array = 
GDKgetenv_isyes(numpy_string_array_enableflag) || 
GDKgetenv_istrue(numpy_string_array_enableflag);
-        option_bytearray = !(GDKgetenv_isyes(bytearray_disableflag) || 
GDKgetenv_istrue(bytearray_disableflag));
+        option_bytearray = GDKgetenv_isyes(bytearray_enableflag) || 
GDKgetenv_istrue(bytearray_enableflag);
+        option_oldnullmask = GDKgetenv_isyes(oldnullmask_enableflag) || 
GDKgetenv_istrue(oldnullmask_enableflag);
         option_lazyarray = GDKgetenv_isyes(lazyarray_enableflag) || 
GDKgetenv_istrue(lazyarray_enableflag);
         option_alwaysunicode = (GDKgetenv_isyes(alwaysunicode_enableflag) || 
GDKgetenv_istrue(alwaysunicode_enableflag));
         benchmark_output = GDKgetenv(benchmark_output_flag);
@@ -1739,22 +1742,64 @@ wrapup:
     return NULL;
 }
 
+#define CreateNullMask(tpe)                                        \
+    for(j = 0; j < count; j++) {                                   \
+        mask_data[j] = *((tpe*)BUNtail(bi, BUNfirst(b) + j)) == tpe##_nil;  \
+        found_nil = found_nil || mask_data[j];                     \
+    }                                                             
+
 PyObject *PyNullMask_FromBAT(BAT *b, size_t t_start, size_t t_end)
 {
     // We will now construct the Masked array, we start by setting everything 
to False
-    PyArrayObject* nullmask = (PyArrayObject*) PyArray_ZEROS(1, (npy_intp[1]) 
{(t_end - t_start)}, NPY_BOOL, 0);
+    size_t count = t_end - t_start;
+    PyArrayObject* nullmask = (PyArrayObject*) PyArray_ZEROS(1, (npy_intp[1]) 
{( count )}, NPY_BOOL, 0);
     const void *nil = ATOMnilptr(b->ttype);
-    int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
     size_t j;
     bool found_nil = false;
     BATiter bi = bat_iterator(b);
+    bool *mask_data = (bool*)PyArray_DATA(nullmask);
 
-    for (j = 0; j < t_end - t_start; j++) {
-        if ((*atomcmp)(BUNtail(bi, BUNfirst(b) + t_start + j), nil) == 0) {
-            ((bool*)PyArray_DATA(nullmask))[j] = true;
-            found_nil = true;
+#ifdef _PYAPI_TESTING_
+    if (!option_oldnullmask) {
+#endif
+    switch(ATOMstorage(getColumnType(b->T->type)))
+    {
+        case TYPE_bit: CreateNullMask(bit); break;
+        case TYPE_bte: CreateNullMask(bte); break;
+        case TYPE_sht: CreateNullMask(sht); break;
+        case TYPE_int: CreateNullMask(int); break;
+        case TYPE_lng: CreateNullMask(lng); break;
+        case TYPE_flt: CreateNullMask(flt); break;
+        case TYPE_dbl: CreateNullMask(dbl); break;
+#ifdef HAVE_HGE
+        case TYPE_hge: CreateNullMask(hge); break;
+#endif
+        case TYPE_str:
+        {
+            int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
+            for (j = 0; j < count; j++) {
+                mask_data[j] = (*atomcmp)(BUNtail(bi, BUNfirst(b) + j), nil) 
== 0;
+                found_nil = found_nil || mask_data[j];
+            }
+            break;
+        }
+        default:
+            //todo: do something with the error?
+            return NULL;
+    }
+#ifdef _PYAPI_TESTING_
+    } else {
+        int (*atomcmp)(const void *, const void *) = ATOMcompare(b->ttype);
+        for (j = 0; j < count; j++) {
+            if ((*atomcmp)(BUNtail(bi, BUNfirst(b) + j), nil) == 0) {
+                ((bool*)PyArray_DATA(nullmask))[j] = true;
+                found_nil = true;
+            }
         }
     }
+#endif
+       
+    
     if (!found_nil) {
         Py_DECREF(nullmask);
         Py_RETURN_NONE;
diff --git a/monetdb5/extras/pyapi/pyapi.h b/monetdb5/extras/pyapi/pyapi.h
--- a/monetdb5/extras/pyapi/pyapi.h
+++ b/monetdb5/extras/pyapi/pyapi.h
@@ -26,7 +26,6 @@
 #define _PYAPI_WARNINGS_
 // Enable debug mode, does literally nothing right now, but hey we have this 
nice #define here anyway
 #define _PYAPI_DEBUG_
- #define _PYAPI_TESTING_
 #endif
 
 #ifdef _PYAPI_VERBOSE_
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to