Changeset: dc67d325758f for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=dc67d325758f
Modified Files:
monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
Branch: pyapi
Log Message:
Use C program for generating random strings in benchmarks.
diffs (165 lines):
diff --git a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
--- a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
+++ b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
@@ -281,67 +281,27 @@ elif str(arguments[1]).lower() == "outpu
cursor.execute('rollback')
elif str(arguments[1]).lower() == "string_samelength" or
str(arguments[1]).lower() == "string_extremeunicode":
- #todo: this
- #benchmark_dir = os.environ["PYAPI_BENCHMARKS_DIR"]
- #os.system("gcc " + benchmark_dir + "/randomstrings.c -o randomstrings")
+ benchmark_dir = os.environ["PYAPI_BENCHMARKS_DIR"]
+ os.system("gcc " + benchmark_dir + "/randomstrings.c -o randomstrings")
+ result_path = os.path.join(os.getcwd(), 'result.txt')
- #def generate_strings_samelength():
- # file = open("result.txt", 'r')
- # content = file.read()
- # strings = content.split(' ')
- # result = numpy.array(strings)
- # return result
if str(arguments[1]).lower() == "string_samelength":
- def generate_strings_samelength(mb, length):
- def random_string(length):
- import random
- import string
- result = ""
- for i in range(0, length):
- result += random.choice(['0', '1', '2', '3', '4', '5',
'6', '7', '8', '9'])
- return result
- import random
- import math
- byte_size = mb * 1000 * 1000
- string_size_byte = length
- string_count = int(byte_size / string_size_byte)
- if length < 15:
- min_int = math.pow(10, length - 1)
- max_int = math.pow(10, length) - 1
- strings = numpy.random.random_integers(min_int, max_int,
string_count).astype('S' + str(length))
- return strings
- else:
- strings = numpy.zeros(string_count, dtype='S' + str(length))
- for i in range(0, string_count):
- strings[i] = random_string(length)
- return strings
- cursor.execute(export_function(generate_strings_samelength, ['float',
'integer'], ['i string'], table=True, test=False))
+ def generate_strings_samelength(f, length):
+ file = open(f, 'r')
+ content = file.read()
+ strings = content.split(' ')
+ result = numpy.array(strings)
+ return result
+ cursor.execute(export_function(generate_strings_samelength, ['string',
'integer'], ['i string'], table=True, test=False))
else:
- def generate_strings_samelength(mb, length):
- def random_string(length):
- import random
- import string
- result = ""
- for i in range(0, length):
- result += random.choice(['0', '1', '2', '3', '4', '5',
'6', '7', '8', '9'])
- return result
- import random
- import math
- byte_size = mb * 1000 * 1000
- string_size_byte = length
- string_count = int(byte_size / string_size_byte)
- strings = None
- if length < 15:
- min_int = math.pow(10, length - 1)
- max_int = math.pow(10, length) - 1
- strings = numpy.random.random_integers(min_int, max_int,
string_count).astype('U' + str(length))
- else:
- strings = numpy.zeros(string_count, dtype='U' + str(length))
- for i in range(0, string_count):
- strings[i] = random_string(length)
- strings[string_count - 1] = unichr(0x100) * length
- return strings
- cursor.execute(export_function(generate_strings_samelength, ['float',
'integer'], ['i string'], table=True, test=False))
+ def generate_strings_samelength(f, length):
+ file = open(f, 'r')
+ content = file.read()
+ strings = content.split(' ')
+ result = numpy.array(strings).astype("U%d" % length)
+ result[len(result) - 1] = unichr(0x100) * length
+ return result
+ cursor.execute(export_function(generate_strings_samelength, ['string',
'integer'], ['i string'], table=True, test=False))
mb = []
lens = []
@@ -360,8 +320,8 @@ elif str(arguments[1]).lower() == "strin
for j in range(0,len(mb)):
size = mb[j]
length = lens[j]
- #os.system("./randomstrings %s %s result.txt" % (str(size),
str(length)))
- cursor.execute('create table strings as SELECT * FROM
generate_strings_samelength(' + str(size) + ',' + str(length) + ') with data;')
+ os.system("%s %s %s %s" % ("./randomstrings", str(size), str(length),
result_path))
+ cursor.execute('create table strings as SELECT * FROM
generate_strings_samelength(\'' + result_path + '\',' + str(length) + ') with
data;')
results = []
result_file = open(temp_file, 'w+')
result_file.write("Peak Memory Usage (Bytes)\tExecution Time (s)\n")
@@ -385,7 +345,11 @@ elif str(arguments[1]).lower() == "strin
#cursor.execute('drop function import_test');
cursor.execute('rollback')
elif str(arguments[1]).lower() == "string_extremelength":
- def generate_strings_extreme(extreme_length, string_count):
+ benchmark_dir = os.environ["PYAPI_BENCHMARKS_DIR"]
+ os.system("gcc " + benchmark_dir + "/randomstrings.c -o randomstrings")
+ result_path = os.path.join(os.getcwd(), 'result.txt')
+
+ def generate_strings_extreme(f, extreme_length):
def random_string(length):
import random
import string
@@ -393,15 +357,13 @@ elif str(arguments[1]).lower() == "strin
for i in range(0, length):
result += random.choice(string.printable)
return result
- import random
- import math
- result = numpy.array([], dtype=object)
- result = numpy.append(result, random_string(extreme_length))
- for i in range(0, string_count - 1):
- result = numpy.append(result, random_string(1))
+ file = open(f, 'r')
+ content = file.read()
+ strings = content.split(' ')
+ result = numpy.array(strings).astype('object')
+ result[0] = random_string(extreme_length)
return result
-
- cursor.execute(export_function(generate_strings_extreme, ['integer',
'integer'], ['i string'], table=True, test=False))
+ cursor.execute(export_function(generate_strings_extreme, ['string',
'integer'], ['i string'], table=True, test=False))
extreme_lengths = []
string_counts = []
@@ -420,7 +382,11 @@ elif str(arguments[1]).lower() == "strin
for j in range(0,len(extreme_lengths)):
str_len = extreme_lengths[j]
str_count = string_counts[j]
- cursor.execute('create table strings as SELECT * FROM
generate_strings_extreme(' + str(str_len) + ',' + str(str_count) + ') with
data;')
+ string_mb = float(str_count) / (1000 ** 2)
+ print("%s %s %s %s" % ("./randomstrings", str(string_mb), str(1),
result_path))
+ os.system("%s %s %s %s" % ("./randomstrings", str(string_mb), str(1),
result_path))
+ cursor.execute('create table strings as SELECT * FROM
generate_strings_extreme(\'' + result_path + '\',' + str(str_len) + ') with
data;')
+ print('create table strings as SELECT * FROM
generate_strings_extreme(\'' + result_path + '\',' + str(str_len) + ') with
data;')
results = []
result_file = open(temp_file, 'w+')
result_file.write("Peak Memory Usage (Bytes)\tExecution Time (s)\n")
@@ -565,3 +531,4 @@ else:
print("Unrecognized test type \"" + arguments[1] + "\", exiting...")
sys.exit(1)
+
diff --git a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
--- a/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
+++ b/monetdb5/extras/pyapi/Benchmarks/pyapi_test.sh
@@ -1,7 +1,7 @@
# The base directory of testing, a new folder is created in this base
directory [$PYAPI_TEST_DIR], and everything is done in that new folder
-export PYAPI_BASE_DIR=/home/mytherin/
+export PYAPI_BASE_DIR=/export/scratch1/raasveld
# The terminal to start mserver with, examples are gnome-terminal, xterm,
konsole
export TERMINAL=x-terminal-emulator
# Port used by the MSERVER
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list