Changeset: 33277cf73ed7 for MonetDB
URL: http://dev.monetdb.org/hg/MonetDB?cmd=changeset;node=33277cf73ed7
Modified Files:
        monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
        monetdb5/extras/pyapi/Benchmarks/randomstrings.c
Branch: pyapi
Log Message:

Load test data from CSV file.


diffs (128 lines):

diff --git a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py 
b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
--- a/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
+++ b/monetdb5/extras/pyapi/Benchmarks/monetdb_testing.py
@@ -286,38 +286,13 @@ elif str(arguments[1]).lower() == "strin
     result_path = os.path.join(os.getcwd(), 'result.txt')
 
     if str(arguments[1]).lower() == "string_samelength":
-        def generate_strings_samelength(f, length):
-            file = open(f, 'r')
-            content = file.read()
-            strings = content.split(' ')
-            max_numpy_size = 1000000000
-            result = numpy.zeros(0, dtype="S%d" % length)
-            tempstr = len(strings)
-            current = 0
-            while tempstr > max_numpy_size:
-                result = numpy.append(result, 
numpy.array(strings[current:current + max_numpy_size]))
-                tempstr -= max_numpy_size
-                current += max_numpy_size
-            result = numpy.append(result, 
numpy.array(strings[current:len(strings) - 1]))
-            return result
-        cursor.execute(export_function(generate_strings_samelength, ['string', 
'integer'], ['i string'], table=True, test=False))
+        def generate_strings_samelength(length):
+            return 'A' * length
+        cursor.execute(export_function(generate_strings_samelength, 
['integer'], ['i string'], table=True, test=False))
     else:
-        def generate_strings_samelength(f, length):
-            file = open(f, 'r')
-            content = file.read()
-            strings = content.split(' ')
-            max_numpy_size = 1000000000
-            result = numpy.zeros(0, dtype="U%d" % length)
-            tempstr = len(strings)
-            current = 0
-            while tempstr > max_numpy_size:
-                result = numpy.append(result, 
numpy.array(strings[current:current + max_numpy_size]))
-                tempstr -= max_numpy_size
-                current += max_numpy_size
-            result = numpy.append(result, 
numpy.array(strings[current:len(strings) - 1]))
-            result[len(result) - 1] = unichr(0x100) * length
-            return result
-        cursor.execute(export_function(generate_strings_samelength, ['string', 
'integer'], ['i string'], table=True, test=False))
+        def generate_strings_samelength(length):
+            return unichr(0x100) * length
+        cursor.execute(export_function(generate_strings_samelength, 
['integer'], ['i string'], table=True, test=False))
 
     mb = []
     lens = []
@@ -337,7 +312,10 @@ elif str(arguments[1]).lower() == "strin
         size = mb[j]
         length = lens[j]
         os.system("%s %s %s %s" % ("./randomstrings", str(size), str(length), 
result_path))
-        cursor.execute('create table strings as SELECT * FROM 
generate_strings_samelength(\'' + result_path + '\',' + str(length) + ') with 
data;')
+        cursor.execute('CREATE TABLE strings(i string);')
+        cursor.execute("COPY INTO strings FROM '%s';" % result_path)
+        cursor.execute('INSERT INTO strings SELECT * FROM 
generate_strings_samelength(' + str(length) + ');')
+        #cursor.execute('create table strings as SELECT * FROM 
generate_strings_samelength(\'' + result_path + '\',' + str(length) + ') with 
data;')
         results = []
         result_file = open(temp_file, 'w+')
         result_file.write("Peak Memory Usage (Bytes)\tExecution Time (s)\n")
@@ -365,7 +343,7 @@ elif str(arguments[1]).lower() == "strin
     os.system("gcc " + benchmark_dir + "/randomstrings.c -o randomstrings")
     result_path = os.path.join(os.getcwd(), 'result.txt')
 
-    def generate_strings_extreme(f, extreme_length):
+    def generate_strings_extreme(extreme_length):
         def random_string(length):
             import random
             import string
@@ -373,13 +351,8 @@ elif str(arguments[1]).lower() == "strin
             for i in range(0, length):
                 result += random.choice(string.printable)
             return result
-        file = open(f, 'r')
-        content = file.read()
-        strings = content.split(' ')
-        result = numpy.array(strings).astype('object')
-        result[0] = random_string(extreme_length)
-        return result
-    cursor.execute(export_function(generate_strings_extreme, ['string', 
'integer'], ['i string'], table=True, test=False))
+        return random_string(extreme_length)
+    cursor.execute(export_function(generate_strings_extreme, ['integer'], ['i 
string'], table=True, test=False))
 
     extreme_lengths = []
     string_counts = []
@@ -399,10 +372,11 @@ elif str(arguments[1]).lower() == "strin
         str_len = extreme_lengths[j]
         str_count = string_counts[j]
         string_mb = float(str_count) / (1000 ** 2)
-        print("%s %s %s %s" % ("./randomstrings", str(string_mb), str(1), 
result_path))
         os.system("%s %s %s %s" % ("./randomstrings", str(string_mb), str(1), 
result_path))
-        cursor.execute('create table strings as SELECT * FROM 
generate_strings_extreme(\'' + result_path + '\',' + str(str_len) + ') with 
data;')
-        print('create table strings as SELECT * FROM 
generate_strings_extreme(\'' + result_path + '\',' + str(str_len) + ') with 
data;')
+        cursor.execute('CREATE TABLE strings(i string);')
+        cursor.execute("COPY INTO strings FROM '%s';" % result_path)
+        cursor.execute('INSERT INTO strings SELECT * FROM 
generate_strings_extreme(' + str(str_len) + ');')
+        #print('create table strings as SELECT * FROM 
generate_strings_extreme(\'' + result_path + '\',' + str(str_len) + ') with 
data;')
         results = []
         result_file = open(temp_file, 'w+')
         result_file.write("Peak Memory Usage (Bytes)\tExecution Time (s)\n")
diff --git a/monetdb5/extras/pyapi/Benchmarks/randomstrings.c 
b/monetdb5/extras/pyapi/Benchmarks/randomstrings.c
--- a/monetdb5/extras/pyapi/Benchmarks/randomstrings.c
+++ b/monetdb5/extras/pyapi/Benchmarks/randomstrings.c
@@ -35,7 +35,7 @@ int main(int argc, char *argv[])
        size_t result_size = string_length * string_count + string_count - 1;
        char *result = malloc(sizeof(char) * result_size + 1);
        result[result_size] = '\0';
-       char sep = ' ';
+       char sep = '\n';
        size_t i, j;
 
        for(i = 0; i < result_size; i += string_length + 1) {
@@ -49,6 +49,7 @@ int main(int argc, char *argv[])
        size_t MAX_BUFFER_SIZE = 100000;
        {
                FILE *f = fopen(argv[3], "w");
+               fprintf(f, "s\n");
                for(i = 0; i < result_size / MAX_BUFFER_SIZE; i++) {
                        size_t tempindex = (i + 1) * MAX_BUFFER_SIZE;
                        char tmp = result[tempindex];
@@ -57,6 +58,7 @@ int main(int argc, char *argv[])
                        result[tempindex] = tmp;
                }
                fprintf(f, "%s", result + i * MAX_BUFFER_SIZE);
+               fprintf(f, "\n");
                fclose(f);
        }
 
_______________________________________________
checkin-list mailing list
[email protected]
https://www.monetdb.org/mailman/listinfo/checkin-list

Reply via email to