Changeset: 686bdd7e4220 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=686bdd7e4220
Modified Files:
        monetdb5/extras/pyapi/Benchmarks/graph.py
        monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
        monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
Branch: pyapi
Log Message:

Added plpython quantile benchmark and added optimizations for postgres/pytables 
benchmarks.


diffs (229 lines):

diff --git a/monetdb5/extras/pyapi/Benchmarks/graph.py 
b/monetdb5/extras/pyapi/Benchmarks/graph.py
--- a/monetdb5/extras/pyapi/Benchmarks/graph.py
+++ b/monetdb5/extras/pyapi/Benchmarks/graph.py
@@ -21,7 +21,7 @@ y_log = False
 line_plot = False
 fill_time = False
 data_start = 3
-graph_colors = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 
'white', 'orange', 'darkgreen', 'darkgray', 'gold', 'darkorchid', 'darkred', 
'violet', 'lavenderblush', 'lightsalmon', 'midnightblue', 'moccasin', 
'papayawhip', 'turquoise', 'violet']
+graph_colors = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black', 
'white', 'orange', 'lightgreen', 'darkgray', 'gold', 'darkorchid', 'darkred', 
'violet', 'powderblue', 'rosybrown', 'midnightblue', 'moccasin', 'papayawhip', 
'turquoise', 'violet']
 for i in range(3, len(arguments)):
     if '-xlog' in arguments[i]: x_log = True
     elif '-ylog' in arguments[i]: y_log = True
@@ -104,7 +104,7 @@ def position_log(x, graphnr, graphs):
         position += width_log(x, i, graphs)
     return position
 def width_normal(x, graphnr, graphs):
-    return x / graphs * 2
+    return x / graphs * 3
 def position_normal(x, graphnr, graphs):
     position = x
     for i in range(0, graphnr):
diff --git a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py 
b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
--- a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
+++ b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
@@ -607,30 +607,50 @@ else:
         def postgres_init():
             if str(args_input_database).lower() == "plpython":
                 if function_name == "identity":
-                    source = "  return a"
+                    source = "return a[0]"
+                    return_value = "integer"
                 elif function_name == "sqroot":
-                    source = "  import math\n  return math.sqrt(abs(a))"
+                    source = "return numpy.sqrt(numpy.abs(a))"
+                    return_value = "float"
+                elif function_name == "quantile":
+                    source = "return numpy.percentile(a, 50)"
+                    return_value = "float"
                 else: raise Exception("Unsupported function %s" % 
function_name)
 
                 createdb_sql = """
+DROP TABLE IF EXISTS integers;
 CREATE TABLE integers(i integer);
-
 COPY integers FROM '%s' DELIMITER ',' CSV;
 
-CREATE FUNCTION %s(a integer)
+DROP TABLE IF EXISTS dummy;
+CREATE TABLE dummy(i integer);
+INSERT INTO dummy VALUES (0);
+
+DROP FUNCTION IF EXISTS %s(integer);
+CREATE FUNCTION %s(inp integer)
   RETURNS %s
 AS $$
+import numpy
+cursor = plpy.cursor('SELECT i FROM integers')
+a = numpy.array([], dtype=numpy.int32)
+while True:
+   rv = cursor.fetch(10000000)
+   if not rv: break
+   python_array = [x['i'] for x in rv]
+   numpy_array = numpy.array(python_array, dtype=numpy.int32)
+   a = numpy.concatenate( (a, numpy_array) )
 %s
-$$ LANGUAGE plpythonu;""" % (input_file, function_name, return_value, source)
+$$ LANGUAGE plpythonu;""" % (input_file, function_name, function_name, 
return_value, source)
 
                 run_sql = """
-                SELECT MIN(%s(i)) FROM integers;
+                SELECT %s(i) FROM dummy;
                 """ % function_name
             elif str(args_input_database).lower() == "postgres":
                 if function_name == "quantile":
                     run_sql = "SELECT percentile_cont(0.5) WITHIN GROUP(ORDER 
BY i) FROM integers;"
                 else: raise Exception("Unsupported function %s" % 
function_name)
                 createdb_sql = """
+                DROP TABLE IF EXISTS integers;
                 CREATE TABLE integers(i integer);
 
                 COPY integers FROM '%s' DELIMITER ',' CSV;""" % input_file
@@ -734,7 +754,11 @@ AS $$
         elif str(args_input_database).lower() == "monetdbmapi":
             def monetdb_execute():
                 c.execute('SELECT * FROM integers')
-                result = c.fetchall()
+                result = numpy.array([], dtype=numpy.int32)
+                while True:
+                    arr = c.fetchmany(10000)
+                    if len(arr) == 0: break
+                    result = numpy.concatenate((result, arr[0]))
                 function(numpy.array(result, dtype=numpy.int32))
         elif str(args_input_database).lower() == "monetdb":
             def monetdb_execute():
@@ -758,7 +782,7 @@ AS $$
         dropdb = os.environ["POSTGRES_DROPDB_COMMAND"]
         os.system(initdb)
         import psycopg2
-        conn = psycopg2.connect("dbname=%s host=/tmp/" % dbname)
+        conn = psycopg2.connect(dbname=dbname, host="/tmp/")
         c = conn.cursor()
         def psycopg2_init():
             return None
@@ -769,23 +793,30 @@ AS $$
 
 
         def psycopg2_execute():
-            c.execute("SELECT * FROM integers;")
-            result = c.fetchall()
+            c2 = conn.cursor("named_cursor")
+            c2.execute("SELECT * FROM integers;")
+            result = numpy.array([], dtype=numpy.int32)
+            while True:
+                arr = c2.fetchmany(10000)
+                if len(arr) == 0: break
+                result = numpy.concatenate((result, arr[0]))
             function(numpy.array(result, dtype=numpy.int32))
+            c2.close()
 
         def psycopg2_clear():
             c.execute("DROP TABLE integers;")
 
         def psycopg2_final():
+            c.close()
+            conn.close()
             os.system(dropdb)
-            conn.close()
             os.remove(input_file)
 
         execute_test(input_type, psycopg2_init, psycopg2_load, 
psycopg2_execute, psycopg2_clear, psycopg2_final)
     elif str(args_input_database).lower() == "pytables":
         import tables, pandas as pd
 
-        table_file = 'testfile.h5'
+        table_file = 'testfile.h5file'
 
         description = dict()
         description['i'] = tables.Int32Col()
@@ -794,24 +825,21 @@ AS $$
             return None
 
         def pytables_load():
-            file = tables.open_file(table_file, mode='w', title='test file')
-            group = file.create_group('/', 'integers', 'integer_data')
-            table = file.create_table(group, 'values', description, "example")
-            values = table.row
-            for x in pd.read_csv(input_file).values:
-                values['i'] = int(x)
-                values.append()
-            table.flush()
-            file.close()
+            h5file = tables.open_file(table_file, mode='w', title='Benchmark 
Integers')
+            root = h5file.root
+            numpy_array = numpy.array(pd.read_csv(input_file).values, 
dtype=numpy.int32)
+            h5file.create_array(root, 'integers', numpy_array)
+            h5file.flush()
+            h5file.close()
 
         def pytables_execute():
-            file = tables.open_file(table_file, mode='r')
-            table = file.root.integers.values
-            result = [x['i'] for x in table.iterrows()]
-            function(numpy.array(result, dtype=numpy.int32))
+            h5file = tables.open_file(table_file, mode='r', 
driver="H5FD_CORE", driver_core_backing_store=0)
+            array = h5file.root.integers.read()
+            function(array)
+            h5file.close()
 
         def pytables_clear():
-            os.remove('testfile.h5')
+            os.remove(table_file)
 
         def pytables_final():
             os.remove(input_file)
diff --git a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh 
b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
--- a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
+++ b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
@@ -209,7 +209,7 @@ function monetdbmapi_run_single_test() {
 
 function postgres_run_single_test() {
     # start server
-    setsid $POSTGRES_SERVER_COMMAND > /dev/null && sleep 5
+    setsid $POSTGRES_SERVER_COMMAND -c autovacuum=off -c random_page_cost=3.5 
-c geqo_threshold=15 -c from_collapse_limit=14 -c join_collapse_limit=14 -c 
default_statistics_target=10000 -c constraint_exclusion=on -c 
checkpoint_completion_target=0.9 -c wal_buffers=16MB -c checkpoint_segments=128 
-c shared_buffers=256GB -c effective_cache_size=768GB -c work_mem=128GB > 
/dev/null && sleep 5
     # call python test script
     python "$PYAPI_TESTFILE" $5 $1 $2 $3 $MSERVER_PORT $4
     # finish testing, kill postgres
@@ -521,11 +521,11 @@ export ntests_sqroot=3
 export sizes_sqroot="10"
 
 export ntests_quantile=3
-export sizes_quantile="10 100 1000"
+export sizes_quantile="1 1000"
 
 export PYTHON_TESTS=("identity" "sqroot" "quantile")
 export PYTHON_MAP_TESTS=("identity" "sqroot")
-export PLPYTHON_TESTS=("identity" "sqroot")
+export PLPYTHON_TESTS=("quantile")
 export POSTGRES_TESTS=("quantile")
 export MONETDB_TESTS=("quantile")
 
@@ -748,7 +748,9 @@ function comparison_graph() {
 
 }
 
-export BUILD_DIR=/export/scratch1/raasveld/build
+export BUILD_DIR=/export/scratch2/raasveld/build
+export CPATH=/export/scratch2/raasveld/build/include
+export LIBRARY_PATH=/export/scratch2/raasveld/build/lib
 function install_cfitsio() {
     wget ftp://heasarc.gsfc.nasa.gov/software/fitsio/c/cfitsio_latest.tar.gz 
&& tar xvzf cfitsio_latest.tar.gz && cd cfitsio && ./configure --enable-sse2 
--prefix=$BUILD_DIR --enable-reentrant && make install
 }
@@ -762,9 +764,15 @@ function install_casacore() {
     wget https://github.com/casacore/casacore/archive/master.zip && unzip 
master.zip && rm master.zip && cd casacore-master && mkdir build && cd build && 
cmake -DCMAKE_INSTALL_PREFIX:PATH=$BUILD_DIR -DBUILD_PYTHON=ON .. && make all 
install
 }
 
+function install_pythoncasacore() {
+    wget https://github.com/casacore/python-casacore/archive/master.zip && 
unzip master.zip && rm master.zip && cd python-casacore-master && python 
setup.py install --user
+}
+
 function install_lofar() {
     wget https://github.com/transientskp/tkp/archive/master.zip && unzip 
master.zip && rm master.zip && cd tkp-master && python setup.py install --user
 }
 
-#export PYAPI_TESTFILE=/local/raasveld/monetdb_testing.py
-#export LD_LIBRARY_PATH=/local/raasveld/build/lib
+
+
+export PYAPI_TESTFILE=/local/raasveld/monetdb_testing.py
+export LD_LIBRARY_PATH=/local/raasveld/build/lib
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to