Changeset: 686bdd7e4220 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=686bdd7e4220
Modified Files:
monetdb5/extras/pyapi/Benchmarks/graph.py
monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
Branch: pyapi
Log Message:
Added plpython quantile benchmark and added optimizations for postgres/pytables
benchmarks.
diffs (229 lines):
diff --git a/monetdb5/extras/pyapi/Benchmarks/graph.py
b/monetdb5/extras/pyapi/Benchmarks/graph.py
--- a/monetdb5/extras/pyapi/Benchmarks/graph.py
+++ b/monetdb5/extras/pyapi/Benchmarks/graph.py
@@ -21,7 +21,7 @@ y_log = False
line_plot = False
fill_time = False
data_start = 3
-graph_colors = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black',
'white', 'orange', 'darkgreen', 'darkgray', 'gold', 'darkorchid', 'darkred',
'violet', 'lavenderblush', 'lightsalmon', 'midnightblue', 'moccasin',
'papayawhip', 'turquoise', 'violet']
+graph_colors = ['blue', 'green', 'red', 'cyan', 'magenta', 'yellow', 'black',
'white', 'orange', 'lightgreen', 'darkgray', 'gold', 'darkorchid', 'darkred',
'violet', 'powderblue', 'rosybrown', 'midnightblue', 'moccasin', 'papayawhip',
'turquoise', 'violet']
for i in range(3, len(arguments)):
if '-xlog' in arguments[i]: x_log = True
elif '-ylog' in arguments[i]: y_log = True
@@ -104,7 +104,7 @@ def position_log(x, graphnr, graphs):
position += width_log(x, i, graphs)
return position
def width_normal(x, graphnr, graphs):
- return x / graphs * 2
+ return x / graphs * 3
def position_normal(x, graphnr, graphs):
position = x
for i in range(0, graphnr):
diff --git a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
--- a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
+++ b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
@@ -607,30 +607,50 @@ else:
def postgres_init():
if str(args_input_database).lower() == "plpython":
if function_name == "identity":
- source = " return a"
+ source = "return a[0]"
+ return_value = "integer"
elif function_name == "sqroot":
- source = " import math\n return math.sqrt(abs(a))"
+ source = "return numpy.sqrt(numpy.abs(a))"
+ return_value = "float"
+ elif function_name == "quantile":
+ source = "return numpy.percentile(a, 50)"
+ return_value = "float"
else: raise Exception("Unsupported function %s" %
function_name)
createdb_sql = """
+DROP TABLE IF EXISTS integers;
CREATE TABLE integers(i integer);
-
COPY integers FROM '%s' DELIMITER ',' CSV;
-CREATE FUNCTION %s(a integer)
+DROP TABLE IF EXISTS dummy;
+CREATE TABLE dummy(i integer);
+INSERT INTO dummy VALUES (0);
+
+DROP FUNCTION IF EXISTS %s(integer);
+CREATE FUNCTION %s(inp integer)
RETURNS %s
AS $$
+import numpy
+cursor = plpy.cursor('SELECT i FROM integers')
+a = numpy.array([], dtype=numpy.int32)
+while True:
+ rv = cursor.fetch(10000000)
+ if not rv: break
+ python_array = [x['i'] for x in rv]
+ numpy_array = numpy.array(python_array, dtype=numpy.int32)
+ a = numpy.concatenate( (a, numpy_array) )
%s
-$$ LANGUAGE plpythonu;""" % (input_file, function_name, return_value, source)
+$$ LANGUAGE plpythonu;""" % (input_file, function_name, function_name,
return_value, source)
run_sql = """
- SELECT MIN(%s(i)) FROM integers;
+ SELECT %s(i) FROM dummy;
""" % function_name
elif str(args_input_database).lower() == "postgres":
if function_name == "quantile":
run_sql = "SELECT percentile_cont(0.5) WITHIN GROUP(ORDER
BY i) FROM integers;"
else: raise Exception("Unsupported function %s" %
function_name)
createdb_sql = """
+ DROP TABLE IF EXISTS integers;
CREATE TABLE integers(i integer);
COPY integers FROM '%s' DELIMITER ',' CSV;""" % input_file
@@ -734,7 +754,11 @@ AS $$
elif str(args_input_database).lower() == "monetdbmapi":
def monetdb_execute():
c.execute('SELECT * FROM integers')
- result = c.fetchall()
+ result = numpy.array([], dtype=numpy.int32)
+ while True:
+ arr = c.fetchmany(10000)
+ if len(arr) == 0: break
+ result = numpy.concatenate((result, arr[0]))
function(numpy.array(result, dtype=numpy.int32))
elif str(args_input_database).lower() == "monetdb":
def monetdb_execute():
@@ -758,7 +782,7 @@ AS $$
dropdb = os.environ["POSTGRES_DROPDB_COMMAND"]
os.system(initdb)
import psycopg2
- conn = psycopg2.connect("dbname=%s host=/tmp/" % dbname)
+ conn = psycopg2.connect(dbname=dbname, host="/tmp/")
c = conn.cursor()
def psycopg2_init():
return None
@@ -769,23 +793,30 @@ AS $$
def psycopg2_execute():
- c.execute("SELECT * FROM integers;")
- result = c.fetchall()
+ c2 = conn.cursor("named_cursor")
+ c2.execute("SELECT * FROM integers;")
+ result = numpy.array([], dtype=numpy.int32)
+ while True:
+ arr = c2.fetchmany(10000)
+ if len(arr) == 0: break
+ result = numpy.concatenate((result, arr[0]))
function(numpy.array(result, dtype=numpy.int32))
+ c2.close()
def psycopg2_clear():
c.execute("DROP TABLE integers;")
def psycopg2_final():
+ c.close()
+ conn.close()
os.system(dropdb)
- conn.close()
os.remove(input_file)
execute_test(input_type, psycopg2_init, psycopg2_load,
psycopg2_execute, psycopg2_clear, psycopg2_final)
elif str(args_input_database).lower() == "pytables":
import tables, pandas as pd
- table_file = 'testfile.h5'
+ table_file = 'testfile.h5file'
description = dict()
description['i'] = tables.Int32Col()
@@ -794,24 +825,21 @@ AS $$
return None
def pytables_load():
- file = tables.open_file(table_file, mode='w', title='test file')
- group = file.create_group('/', 'integers', 'integer_data')
- table = file.create_table(group, 'values', description, "example")
- values = table.row
- for x in pd.read_csv(input_file).values:
- values['i'] = int(x)
- values.append()
- table.flush()
- file.close()
+ h5file = tables.open_file(table_file, mode='w', title='Benchmark
Integers')
+ root = h5file.root
+ numpy_array = numpy.array(pd.read_csv(input_file).values,
dtype=numpy.int32)
+ h5file.create_array(root, 'integers', numpy_array)
+ h5file.flush()
+ h5file.close()
def pytables_execute():
- file = tables.open_file(table_file, mode='r')
- table = file.root.integers.values
- result = [x['i'] for x in table.iterrows()]
- function(numpy.array(result, dtype=numpy.int32))
+ h5file = tables.open_file(table_file, mode='r',
driver="H5FD_CORE", driver_core_backing_store=0)
+ array = h5file.root.integers.read()
+ function(array)
+ h5file.close()
def pytables_clear():
- os.remove('testfile.h5')
+ os.remove(table_file)
def pytables_final():
os.remove(input_file)
diff --git a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
--- a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
+++ b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
@@ -209,7 +209,7 @@ function monetdbmapi_run_single_test() {
function postgres_run_single_test() {
# start server
- setsid $POSTGRES_SERVER_COMMAND > /dev/null && sleep 5
+ setsid $POSTGRES_SERVER_COMMAND -c autovacuum=off -c random_page_cost=3.5
-c geqo_threshold=15 -c from_collapse_limit=14 -c join_collapse_limit=14 -c
default_statistics_target=10000 -c constraint_exclusion=on -c
checkpoint_completion_target=0.9 -c wal_buffers=16MB -c checkpoint_segments=128
-c shared_buffers=256GB -c effective_cache_size=768GB -c work_mem=128GB >
/dev/null && sleep 5
# call python test script
python "$PYAPI_TESTFILE" $5 $1 $2 $3 $MSERVER_PORT $4
# finish testing, kill postgres
@@ -521,11 +521,11 @@ export ntests_sqroot=3
export sizes_sqroot="10"
export ntests_quantile=3
-export sizes_quantile="10 100 1000"
+export sizes_quantile="1 1000"
export PYTHON_TESTS=("identity" "sqroot" "quantile")
export PYTHON_MAP_TESTS=("identity" "sqroot")
-export PLPYTHON_TESTS=("identity" "sqroot")
+export PLPYTHON_TESTS=("quantile")
export POSTGRES_TESTS=("quantile")
export MONETDB_TESTS=("quantile")
@@ -748,7 +748,9 @@ function comparison_graph() {
}
-export BUILD_DIR=/export/scratch1/raasveld/build
+export BUILD_DIR=/export/scratch2/raasveld/build
+export CPATH=/export/scratch2/raasveld/build/include
+export LIBRARY_PATH=/export/scratch2/raasveld/build/lib
function install_cfitsio() {
wget ftp://heasarc.gsfc.nasa.gov/software/fitsio/c/cfitsio_latest.tar.gz
&& tar xvzf cfitsio_latest.tar.gz && cd cfitsio && ./configure --enable-sse2
--prefix=$BUILD_DIR --enable-reentrant && make install
}
@@ -762,9 +764,15 @@ function install_casacore() {
wget https://github.com/casacore/casacore/archive/master.zip && unzip
master.zip && rm master.zip && cd casacore-master && mkdir build && cd build &&
cmake -DCMAKE_INSTALL_PREFIX:PATH=$BUILD_DIR -DBUILD_PYTHON=ON .. && make all
install
}
+function install_pythoncasacore() {
+ wget https://github.com/casacore/python-casacore/archive/master.zip &&
unzip master.zip && rm master.zip && cd python-casacore-master && python
setup.py install --user
+}
+
function install_lofar() {
wget https://github.com/transientskp/tkp/archive/master.zip && unzip
master.zip && rm master.zip && cd tkp-master && python setup.py install --user
}
-#export PYAPI_TESTFILE=/local/raasveld/monetdb_testing.py
-#export LD_LIBRARY_PATH=/local/raasveld/build/lib
+
+
+export PYAPI_TESTFILE=/local/raasveld/monetdb_testing.py
+export LD_LIBRARY_PATH=/local/raasveld/build/lib
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list