Benchmark additional scripts and new conf folder. - Adding more utility scripts for the benchmark. - Also moved the configuration files into a separate folder.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/b8fc3612 Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/b8fc3612 Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/b8fc3612 Branch: refs/heads/site Commit: b8fc36122eab4320c710c9922a617670260e5c58 Parents: 6c90193 Author: Preston Carman <[email protected]> Authored: Mon Jun 23 14:28:28 2014 -0700 Committer: Preston Carman <[email protected]> Committed: Mon Jun 23 14:28:28 2014 -0700 ---------------------------------------------------------------------- .../noaa-ghcn-daily/conf/weather_example.xml | 35 +++++++ .../conf/weather_example_cluster.xml | 58 ++++++++++++ .../noaa-ghcn-daily/scripts/weather_example.xml | 35 ------- .../scripts/weather_example_cluster.xml | 58 ------------ .../resources/util/find_averages_in_logs.py | 97 ++++++++++++++++++++ .../src/main/resources/util/merge_xml_files.py | 88 ++++++++++++++++++ 6 files changed, 278 insertions(+), 93 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/vxquery/blob/b8fc3612/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml new file mode 100644 index 0000000..2c15a33 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example.xml @@ -0,0 +1,35 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<data xmlns="data"> + <name>Local Example</name> + <save_path>/data</save_path> + <package>ghcnd_all</package> + <node> + <id>localhost</id> + <cluster_ip>127.0.0.1</cluster_ip> + </node> + <dataset> + <name>tiny-example</name> + <test>local_speed_up</test> + <save_path>/data</save_path> + <partition_type>small_files</partition_type> + <partitions_per_path>1</partitions_per_path> + <partitions_per_path>2</partitions_per_path> + <partitions_per_path>4</partitions_per_path> + <partitions_per_path>8</partitions_per_path> + </dataset> +</data> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b8fc3612/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml new file mode 100644 index 0000000..7d05ac0 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/conf/weather_example_cluster.xml @@ -0,0 +1,58 @@ +<!-- + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> +<data xmlns="data"> + <name>Cluster Example</name> + <save_path>/data</save_path> + <package>ghcnd_all</package> + <node> + <id>machine1</id> + <cluster_ip>127.0.0.1</cluster_ip> + </node> + <node> + <id>machine2</id> + <cluster_ip>127.0.0.2</cluster_ip> + </node> + <node> + <id>machine3</id> + <cluster_ip>127.0.0.3</cluster_ip> + </node> + <node> + <id>machine4</id> + <cluster_ip>127.0.0.4</cluster_ip> + </node> + <node> + <id>machine5</id> + <cluster_ip>127.0.0.5</cluster_ip> + </node> + <dataset> + <name>tiny-1drive</name> + <test>speed_up</test> + <test>batch_scale_out</test> + <save_path>/data</save_path> + <partition_type>small_files</partition_type> + <partitions_per_path>1</partitions_per_path> + </dataset> + <dataset> + <name>small-2drives</name> + <test>speed_up</test> + <test>batch_scale_out</test> + <save_path>/data</save_path> + <save_path>/data2</save_path> + <partition_type>large_files</partition_type> + <partitions_per_path>1</partitions_per_path> + </dataset> +</data> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b8fc3612/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml deleted file mode 100644 index 2c15a33..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example.xml +++ /dev/null @@ -1,35 +0,0 @@ -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> -<data xmlns="data"> - <name>Local Example</name> - <save_path>/data</save_path> - <package>ghcnd_all</package> - <node> - <id>localhost</id> - <cluster_ip>127.0.0.1</cluster_ip> - </node> - <dataset> - <name>tiny-example</name> - <test>local_speed_up</test> - <save_path>/data</save_path> - <partition_type>small_files</partition_type> - <partitions_per_path>1</partitions_per_path> - <partitions_per_path>2</partitions_per_path> - <partitions_per_path>4</partitions_per_path> - <partitions_per_path>8</partitions_per_path> - </dataset> -</data> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b8fc3612/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml deleted file mode 100644 index 7d05ac0..0000000 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_example_cluster.xml +++ /dev/null @@ -1,58 +0,0 @@ -<!-- - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. ---> -<data xmlns="data"> - <name>Cluster Example</name> - <save_path>/data</save_path> - <package>ghcnd_all</package> - <node> - <id>machine1</id> - <cluster_ip>127.0.0.1</cluster_ip> - </node> - <node> - <id>machine2</id> - <cluster_ip>127.0.0.2</cluster_ip> - </node> - <node> - <id>machine3</id> - <cluster_ip>127.0.0.3</cluster_ip> - </node> - <node> - <id>machine4</id> - <cluster_ip>127.0.0.4</cluster_ip> - </node> - <node> - <id>machine5</id> - <cluster_ip>127.0.0.5</cluster_ip> - </node> - <dataset> - <name>tiny-1drive</name> - <test>speed_up</test> - <test>batch_scale_out</test> - <save_path>/data</save_path> - <partition_type>small_files</partition_type> - <partitions_per_path>1</partitions_per_path> - </dataset> - <dataset> - <name>small-2drives</name> - <test>speed_up</test> - <test>batch_scale_out</test> - <save_path>/data</save_path> - <save_path>/data2</save_path> - <partition_type>large_files</partition_type> - <partitions_per_path>1</partitions_per_path> - </dataset> -</data> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b8fc3612/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py b/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py new file mode 100644 index 0000000..1cd7939 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/util/find_averages_in_logs.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import fnmatch +import getopt +import glob +import os +import sys +import csv + +SEARCH_STRING = 'Average execution time:' + +def find_files(directory, pattern): + for root, dirs, files in os.walk(directory): + for basename in files: + if fnmatch.fnmatch(basename, pattern): + yield (root, basename) + + +def main(argv): + ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv + ''' + log_folder = "" + save_file = "" + data_type = "" + + # Get the base folder + try: + opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", "data_type="]) + except getopt.GetoptError: + print 'The file options for list_xml_files.py were not correctly specified.' + print 'To see a full list of options try:' + print ' $ python list_xml_files.py -h' + sys.exit(2) + for opt, arg in opts: + if opt == '-h': + print 'Options:' + print ' -f The base folder to build XML file list.' + print ' -s The save file.' + sys.exit() + elif opt in ('-f', "--folder"): + # check if file exists. + if os.path.exists(arg): + log_folder = arg + else: + print 'Error: Argument must be a folder name for --folder (-f).' + sys.exit() + elif opt in ('-s', "--save_file"): + save_file = arg + elif opt in ('-t', "--data_type"): + data_type = arg + + # Required fields to run the script. + if log_folder == "" or not os.path.exists(log_folder): + print 'Error: The folder path option must be supplied: --folder (-f).' + sys.exit() + if save_file == "": + print 'Error: The folder path option must be supplied: --save_file (-s).' + sys.exit() + + list_xml_csv = '' + with open(save_file, 'w') as outfile: + csvfile = csv.writer(outfile) + for path, filename in find_files(log_folder, '*.log'): + # Only write out a specific type of data xml documents found in a specific path. + with open(path + "/" + filename) as infile: + folders = path.replace(log_folder, "") + for line in infile: + # Skip the root tags. + if line.startswith(SEARCH_STRING): + time_split = line.split(" ") + name_split = filename.split(".") + folder_split = folders.split("/") + + # Build data row + row = folder_split + row.append(name_split[0]) + row.append(time_split[3]) + row.append(name_split[2]) + csvfile.writerow(row) + + +if __name__ == "__main__": + main(sys.argv[1:]) http://git-wip-us.apache.org/repos/asf/vxquery/blob/b8fc3612/vxquery-benchmark/src/main/resources/util/merge_xml_files.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/util/merge_xml_files.py b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py new file mode 100644 index 0000000..8a6952b --- /dev/null +++ b/vxquery-benchmark/src/main/resources/util/merge_xml_files.py @@ -0,0 +1,88 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import fnmatch +import getopt +import glob +import os +import sys + +XML_PREFIX = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?><root>' + "\n" +XML_SUFFIX = '</root>' + "\n" + +def find_files(directory, pattern): + for root, dirs, files in os.walk(directory): + for basename in files: + if fnmatch.fnmatch(basename, pattern): + yield (root, basename) + + +def main(argv): + ''' Same as bash: find $FOLDER -type f -name "*.xml" -exec basename {} \; > list_xml.csv + ''' + xml_folder = "" + save_file = "" + data_type = "" + + # Get the base folder + try: + opts, args = getopt.getopt(argv, "f:hs:t:", ["folder=", "save_file=", "data_type="]) + except getopt.GetoptError: + print 'The file options for list_xml_files.py were not correctly specified.' + print 'To see a full list of options try:' + print ' $ python list_xml_files.py -h' + sys.exit(2) + for opt, arg in opts: + if opt == '-h': + print 'Options:' + print ' -f The base folder to build XML file list.' + print ' -s The save file.' + sys.exit() + elif opt in ('-f', "--folder"): + # check if file exists. + if os.path.exists(arg): + xml_folder = arg + else: + print 'Error: Argument must be a folder name for --folder (-f).' + sys.exit() + elif opt in ('-s', "--save_file"): + save_file = arg + elif opt in ('-t', "--data_type"): + data_type = arg + + # Required fields to run the script. + if xml_folder == "" or not os.path.exists(xml_folder): + print 'Error: The folder path option must be supplied: --folder (-f).' + sys.exit() + if save_file == "": + print 'Error: The folder path option must be supplied: --save_file (-s).' + sys.exit() + + list_xml_csv = '' + with open(save_file, 'w') as outfile: + outfile.write(XML_PREFIX) + for path, filename in find_files(xml_folder, '*.xml'): + # Only write out a specific type of data xml documents found in a specific path. + if data_type in path: + with open(path + "/" + filename) as infile: + for line in infile: + # Skip the root tags. + if line != XML_PREFIX and line != XML_SUFFIX: + outfile.write(line) + outfile.write(XML_SUFFIX) + +if __name__ == "__main__": + main(sys.argv[1:])
