Repository: asterixdb Updated Branches: refs/heads/master 52528555e -> 8e36d657b
[NO ISSUE][TEST] Add import order to perf test - user model changes: no - storage format changes: no - interface changes: no Details: 1. Add data file removal after import to save space. 2. Add import order to import small datasets first. Change-Id: I6ce4e55d4fbd5e082e9f01c526f258891574cb68 Reviewed-on: https://asterix-gerrit.ics.uci.edu/1965 Reviewed-by: Till Westmann <ti...@apache.org> Tested-by: Till Westmann <ti...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/8e36d657 Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/8e36d657 Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/8e36d657 Branch: refs/heads/master Commit: 8e36d657b27e671086b078f6fcd68e9bf13dc85b Parents: 5252855 Author: Xikui Wang <xkk...@gmail.com> Authored: Wed Aug 23 19:15:18 2017 -0700 Committer: Xikui Wang <xkk...@gmail.com> Committed: Wed Aug 23 19:21:31 2017 -0700 ---------------------------------------------------------------------- .../main/resources/benchmarks/tpch/load/load.yml | 8 ++++---- .../resources/benchmarks/tpch/load/settings.yml | 2 ++ .../resources/others/SparkSQL/ansible/load_tpch.yml | 2 +- .../main/resources/others/SparkSQL/run_sparksql.sh | 16 ++++++++-------- 4 files changed, 15 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/asterixdb/blob/8e36d657/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/load/load.yml ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/load/load.yml b/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/load/load.yml index ff22a7d..9835be9 100644 --- a/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/load/load.yml +++ b/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/load/load.yml @@ -28,8 +28,8 @@ include: loadgen.yml - name: Load all datasets - shell: "ansible-playbook -i {{ inventory }} {{ runquery }} --extra-vars=\"query_file={{ dml }} report=false\"" - with_fileglob: - - "{{ dml_dir }}/*" + shell: "ansible-playbook -i {{ inventory }} {{ runquery }} --extra-vars=\"query_file={{ dml_dir }}/load{{ dataset }}.sqlpp report=false\"; + ansible ncs -i {{ inventory }} -m shell -a \"rm -f {{ data_dir }}/{{ dataset | lower}}.tbl*\"" + with_items: "{{ dataset_import_order }}" loop_control: - loop_var: dml + loop_var: dataset http://git-wip-us.apache.org/repos/asf/asterixdb/blob/8e36d657/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/load/settings.yml ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/load/settings.yml b/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/load/settings.yml index 17a3638..4f3030e 100644 --- a/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/load/settings.yml +++ b/asterixdb/asterix-benchmark/src/main/resources/benchmarks/tpch/load/settings.yml @@ -23,6 +23,8 @@ partitioned_datasets: ["LineItem", "Orders", "Customer", "Part", "Partsupp", "Su # Datasets with a single file. centralized_datasets: ["Region", "Nation"] +dataset_import_order: ["Region", "Nation", "Supplier", "Part", "Customer", "Partsupp", "Orders", "LineItem"] + # Temp directory. temp_dir: "/tmp/asterixdb" http://git-wip-us.apache.org/repos/asf/asterixdb/blob/8e36d657/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/load_tpch.yml ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/load_tpch.yml b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/load_tpch.yml index ea635f4..5b07eed 100644 --- a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/load_tpch.yml +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/load_tpch.yml @@ -30,4 +30,4 @@ - include_vars: "{{ playbook_dir }}/../conf/private_ip.yml" - include_vars: ../../../benchmarks/tpch/gen/settings.yml - name: Put data on to HDFS - shell: for i in `ls -S {{ data_dir }}/`; do {{ hadoop_base }}/bin/hdfs dfs -put -f {{ data_dir }}/$i hdfs://{{ cc_ip }}:9000/{{ data_dir }}; rm {{ data_dir }}/$i; done \ No newline at end of file + shell: for i in `ls -rS {{ data_dir }}/`; do {{ hadoop_base }}/bin/hdfs dfs -put -f {{ data_dir }}/$i hdfs://{{ cc_ip }}:9000/{{ data_dir }}; rm {{ data_dir }}/$i; done \ No newline at end of file http://git-wip-us.apache.org/repos/asf/asterixdb/blob/8e36d657/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/run_sparksql.sh ---------------------------------------------------------------------- diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/run_sparksql.sh b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/run_sparksql.sh index 9b14f67..18d4962 100755 --- a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/run_sparksql.sh +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/run_sparksql.sh @@ -45,14 +45,14 @@ then exit 1 fi # Load data -ansible-playbook -i $INVENTORY $SCRIPT_PATH/../../benchmarks/tpch/gen/gen.yml +ansible-playbook -i $INVENTORY $SCRIPT_PATH/../../benchmarks/tpch/gen/gen.yml # Configure HDFS -ansible-playbook -i $INVENTORY $SCRIPT_PATH/ansible/install_hdfs.yml -ansible-playbook -i $INVENTORY $SCRIPT_PATH/ansible/start_hdfs.yml +ansible-playbook -i $INVENTORY $SCRIPT_PATH/ansible/install_hdfs.yml +ansible-playbook -i $INVENTORY $SCRIPT_PATH/ansible/start_hdfs.yml # Configure Spark -ansible-playbook -i $INVENTORY $SCRIPT_PATH/ansible/install_spark.yml -ansible-playbook -i $INVENTORY $SCRIPT_PATH/ansible/start_spark.yml -ansible-playbook -i $INVENTORY $SCRIPT_PATH/ansible/load_tpch.yml +ansible-playbook -i $INVENTORY $SCRIPT_PATH/ansible/install_spark.yml +ansible-playbook -i $INVENTORY $SCRIPT_PATH/ansible/start_spark.yml +ansible-playbook -i $INVENTORY $SCRIPT_PATH/ansible/load_tpch.yml # Execute queries -ansible-playbook -i $INVENTORY --extra-vars="metric='${SYSTEM_NAME}'" $SCRIPT_PATH/ansible/prepare_queries.yml -ansible-playbook -i $INVENTORY $SCRIPT_PATH/ansible/execute_queries.yml \ No newline at end of file +ansible-playbook -i $INVENTORY --extra-vars="metric='${SYSTEM_NAME}'" $SCRIPT_PATH/ansible/prepare_queries.yml +ansible-playbook -i $INVENTORY $SCRIPT_PATH/ansible/execute_queries.yml \ No newline at end of file