Xikui Wang has uploaded a new change for review. https://asterix-gerrit.ics.uci.edu/1879
Change subject: [WIP] Add performance test for AsterixDB ...................................................................... [WIP] Add performance test for AsterixDB Change-Id: Ic9ff8efa0be71bde67190ba6f9fbd647c7799084 --- A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/execute_queries.yml A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/gen_tpch.yml A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/install_hdfs.yml A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/install_sparks.yml A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/load_tpch.yml A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/prepare_queries.yml A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/spark_sql_settings.yml A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/start_hdfs.yml A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/start_sparks.yml A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/stop_hdfs.yml A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/core-site-template.xml A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/execute-query.tmpl A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/hdfs-site-template.xml A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/ssh_config A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q01.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q02.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q03.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q04.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q05.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q06.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q07.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q07_variant.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q08.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q09.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q10.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q11.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q12.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q13.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q14.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q15.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q16.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q17.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q18.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q19.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q20.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q21.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q22.txt A asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/run_sparksql.sh 38 files changed, 1,699 insertions(+), 0 deletions(-) git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb refs/changes/79/1879/1 diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/execute_queries.yml b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/execute_queries.yml new file mode 100644 index 0000000..4ff15bb --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/execute_queries.yml @@ -0,0 +1,27 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +- hosts: cc + tasks: + - include_vars: spark_sql_settings.yml + - include_vars: ../../../conf/benchmark_setting.yml + - name: Execute query with Spark + shell: bash {{ spark_base }}/bin/spark-shell --executor-memory {{ spark_memory }}m --master spark://{{ groups['cc'][0] }}:7077 -i main.scala + - name: Report result to REST + shell: while read LINE; do curl -XPOST -d '$LINE' {{ result_url }}; done < ~/{{ result_file }} \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/gen_tpch.yml b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/gen_tpch.yml new file mode 100644 index 0000000..0683bfb --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/gen_tpch.yml @@ -0,0 +1,25 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +- hosts: cc + tasks: + - name: Configure binary on master + include: ../../../benchmarks/tpch/gen/compile.yml + - name: Populate dataset on master + include: ../../../benchmarks/tpch/gen/gendata.yml diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/install_hdfs.yml b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/install_hdfs.yml new file mode 100644 index 0000000..2e69cb7 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/install_hdfs.yml @@ -0,0 +1,53 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +- name: Install HDFS + hosts: all + tasks: + - include_vars: spark_sql_settings.xml + - unarchive: + src: http://apache.mirrors.hoobly.com/hadoop/common/hadoop-2.8.0/hadoop-2.8.0.tar.gz + dest: /home/{{ user }} + remote_src: True + +- name: Add slaves to name node + hosts: cc + tasks: + - include_vars: spark_sql_settings.yml + - file: + path: '{{ hadoop_base }}/etc/hadoop/slaves' + state: absent + - shell: 'echo {{ item }} >> {{ hadoop_base }}/etc/hadoop/slaves' + with_items: + "{{ groups['ncs'] }}" + +- name: Populate configuration file + hosts: all + tasks: + - include_vars: spark_sql_settings.yml + - name: Populate core-site-template.xml + template: + src: conf/core-site-template.xml + dest: "{{ hadoop_base }}/etc/hadoop/core-site.xml" + - name: Populate hdfs-site-template.xml + template: + src: conf/hdfs-site-template.xml + dest: "{{ hadoop_base }}/etc/hadoop/hdfs-site.xml" + - name: Add memory constraint on all nodes + shell: echo "export HADOOP_HEAPSIZE={{ hdfs_memory }}" >> {{ hadoop_base }}/etc/hadoop/hadoop-env.sh \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/install_sparks.yml b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/install_sparks.yml new file mode 100644 index 0000000..35ee517 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/install_sparks.yml @@ -0,0 +1,36 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +- name: Install Sparks + hosts: all + tasks: + - include_vars: spark_sql_settings.yml + - name: Clean old installation if exsits + file: + path: "{{ spark_base }}" + state: absent + - name: Unarchive Sparks from remote + unarchive: + src: https://d3kbcqa49mib13.cloudfront.net/spark-2.1.1-bin-hadoop2.7.tgz + dest: /home/{{ user }} + remote_src: True + - name: Make configuration file + shell: "cp {{ spark_base }}/conf/spark-env.sh.template {{ spark_base }}/conf/spark-env.sh" + - name: Add memory option + shell: echo "SPARK_WORKER_MEMORY={{ spark_memory }}m" >> {{ spark_base }}/conf/spark-env.sh \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/load_tpch.yml b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/load_tpch.yml new file mode 100644 index 0000000..b1d0cab --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/load_tpch.yml @@ -0,0 +1,26 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +- hosts: cc + tasks: + - include_vars: spark_sql_settings.yml + - name: Create TPCH data dir on HDFS + shell: '{{ hadoop_base }}/bin/hdfs dfs -mkdir -p {{ data_dir }}' + - name: Put data onto HDFS + shell: ' {{hadoop_base }}/bin/hdfs dfs -put {{ data_dir }}/* {{ data_dir }}' \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/prepare_queries.yml b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/prepare_queries.yml new file mode 100644 index 0000000..b523d47 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/prepare_queries.yml @@ -0,0 +1,30 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +- hosts: cc + tasks: + - include_vars: spark_sql_settings.yml + - name: Sync queries directory + synchronize: + src: ../{{ query_files_root }} + dest: /home/{{ user }}/ + - name: Ship script template + template: + src: ../conf/execute-query.tmpl + dest: /home/{{ user }}/main.scala \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/spark_sql_settings.yml b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/spark_sql_settings.yml new file mode 100644 index 0000000..e946248 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/spark_sql_settings.yml @@ -0,0 +1,40 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +user: ec2-user + +hadoop_base: /home/{{ user }}/hadoop-2.8.0 + +spark_base: /home/{{ user }}/spark-2.1.1-bin-hadoop2.7 + +home_dir: "/home/{{ user }}" + +data_dir: "{{ home_dir }}/tpch" + +hdfs_memory: 256 + +spark_memory: 512 + +test_round: 1 + +query_files_root: "queries" + +dfs_replication: 1 + +result_file: "result.txt" diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/start_hdfs.yml b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/start_hdfs.yml new file mode 100644 index 0000000..adde1f6 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/start_hdfs.yml @@ -0,0 +1,64 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +- name: Prepare password-less on master + hosts: cc + tasks: + - include_vars: spark_sql_settings.yml + - name: Generate key on master + user: + name: "{{ user }}" + generate_ssh_key: yes + ssh_key_bits: 2048 + ssh_key_file: .ssh/id_rsa + - name: Make passwordless to current host + shell: "cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys" + - name: Download key from master + fetch: + src: ~/.ssh/id_rsa.pub + dest: ../conf/master.key + flat: yes + - name: Bypass host check on master + copy: + src: ../conf/ssh_config + dest: ~/.ssh/config + mode: 0600 + +- name: Prepare password-less on slaves + hosts: ncs + tasks: + - name: Ship master key to slaves + copy: + src: conf/master.key + dest: "~/master.key" + mode: 0600 + - name: Add master key to authorized_keys list + shell: "cat ~/master.key >> ~/.ssh/authorized_keys" + +- name: Start HDFS + hosts: cc + tasks: + - name: Empty tmp + shell: "sudo rm -rf *" + args: + chdir: /tmp/ + - name: Format HDFS + shell: 'bash {{ hadoop_base }}/bin/hadoop namenode -format' + - name: Start name node + shell: 'bash {{ hadoop_base }}/sbin/start-dfs.sh' \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/start_sparks.yml b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/start_sparks.yml new file mode 100644 index 0000000..be6b96f --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/start_sparks.yml @@ -0,0 +1,27 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +- hosts: cc + tasks: + - include_vars: spark_sql_settings.yml + - shell: 'echo {{ item }} >> {{ spark_base }}/conf/slaves' + with_items: + "{{ groups['ncs'] }}" + - name: Start Sparks + shell: "bash {{ spark_base }}/sbin/start-all.sh" \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/stop_hdfs.yml b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/stop_hdfs.yml new file mode 100644 index 0000000..f312727 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/ansible/stop_hdfs.yml @@ -0,0 +1,24 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +- hosts: all + tasks: + - include_vars: instance_settings.yml + - name: Stop hdfs + shell: 'bash {{ hadoop_base }}/sbin/stop-dfs.sh' \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/core-site-template.xml b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/core-site-template.xml new file mode 100644 index 0000000..2e89fbe --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/core-site-template.xml @@ -0,0 +1,42 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +<?xml version="1.0" encoding="UTF-8"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> + +<!-- Put site-specific property overrides in this file. --> +<configuration> + <property> + <name>fs.defaultFS</name> + <value>hdfs://{{ groups['cc'][0] }}:9000</value> + </property> +</configuration> diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/execute-query.tmpl b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/execute-query.tmpl new file mode 100644 index 0000000..d850e81 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/execute-query.tmpl @@ -0,0 +1,92 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +import org.apache.spark.sql.Encoders +import scala.io.Source +import java.io._ +import org.apache.http.impl.client.DefaultHttpClient + +val hdfs_host = "{{ groups['master'][0] }}:9000" +val hdfs_data_root = "hdfs://" + hdfs_host + "/home/{{ user }}/tpch/" +val queries_root = "{{ query_files_root }}/" +val round = {{ test_round }} +val metric = "SparkSQL" + +// Table Initialization +case class NATION(N_NATIONKEY: Int, N_NAME: String, N_REGIONKEY: Int, N_COMMENT: String) +case class REGION(R_REGIONKEY: Int, R_NAME: String, R_COMMENT: String) +case class PART(P_PARTKEY: Int, P_NAME: String, P_MFGR: String, P_BRAND: String, P_TYPE: String, P_SIZE: Int, P_CONTAINER: String, P_RETAILPRICE: Float, P_COMMENT: String) +case class SUPPLIER (S_SUPPKEY: Int, S_NAME: String, S_ADDRESS: String, S_NATIONKEY: Int, S_PHONE: String, S_ACCTBAL: Float, S_COMMENT: String) +case class PARTSUPP ( S_PARTKEY: Int, PS_SUPPKEY: Int, PS_AVAILQTY: Int, PS_SUPPLYCOST: Float, PS_COMMENT: String) +case class CUSTOMER (C_CUSTKEY: Int, C_NAME : String, C_ADDRESS : String, C_NATIONKEY: Int, C_PHONE : String, C_ACCTBAL: Float , C_MKTSEGMENT : String , C_COMMENT : String) +case class ORDERS (O_ORDERKEY: Int, O_CUSTKEY: Int, O_ORDERSTATUS : String, O_TOTALPRICE: Float, O_ORDERDATE: java.sql.Timestamp, + O_ORDERPRIORITY: String, O_CLERK : String, O_SHIPPRIORITY: Int, O_COMMENT: String) +case class LINEITEM (L_ORDERKEY: Int, L_PARTKEY: Int, L_SUPPKEY: Int, L_LINENUMBER: Int, L_QUANTITY: Float, + L_EXTENDEDPRICE: Float, L_DISCOUNT: Float, L_TAX: Float, L_RETURNFLAG: String, L_LINESTATUS : String, + L_SHIPDATE: java.sql.Timestamp, L_COMMITDATE: java.sql.Timestamp, L_RECEIPTDATE: java.sql.Timestamp, L_SHIPINSTRUCT: String, + L_SHIPMODE: String, L_COMMENT: String) + + +val nation = spark.read.option("delimiter","|").schema(Encoders.product[NATION].schema).csv(hdfs_data_root + "nation.tbl") +val region = spark.read.option("delimiter","|").schema(Encoders.product[REGION].schema).csv(hdfs_data_root + "region.tbl") +val part = spark.read.option("delimiter","|").schema(Encoders.product[PART].schema).csv(hdfs_data_root + "part.tbl") +val supp = spark.read.option("delimiter","|").schema(Encoders.product[SUPPLIER].schema).csv(hdfs_data_root + "supplier.tbl") +val part_supp = spark.read.option("delimiter","|").schema(Encoders.product[PARTSUPP].schema).csv(hdfs_data_root + "partsupp.tbl") +val customer = spark.read.option("delimiter","|").schema(Encoders.product[CUSTOMER].schema).csv(hdfs_data_root + "customer.tbl") +val orders = spark.read.option("delimiter","|").schema(Encoders.product[ORDERS].schema).csv(hdfs_data_root + "orders.tbl") +val lineitem = spark.read.option("delimiter","|").schema(Encoders.product[LINEITEM].schema).csv(hdfs_data_root + "lineitem.tbl") + + +nation.createOrReplaceTempView("NATION") +region.createOrReplaceTempView("REGION") +part.createOrReplaceTempView("PART") +supp.createOrReplaceTempView("SUPPLIER") +part_supp.createOrReplaceTempView("PARTSUPP") +customer.createOrReplaceTempView("CUSTOMER") +orders.createOrReplaceTempView("ORDERS") +lineitem.createOrReplaceTempView("LINEITEM") + +// Execute Query +val queries_dir = new File(queries_root) +val etime = collection.mutable.Map[String, Float]() +for (i <- 1 to round) { + for (query_file <- queries_dir.listFiles()) { + print("Processing Query "+ query_file) + val file_name = query_file.getName() + val queries = Source.fromFile(query_file) + val t0 = System.nanoTime() + var query = "" + queries.getLines.foreach { line => query += (line + "\n")} + spark.sql(query).count() + val t1 = System.nanoTime() + val elapsed = (t1 - t0) / 1000000000.0f + if (i > 0) { + if (!etime.contains(file_name)) { + etime(file_name) = 0 + } + etime(file_name) += elapsed + } + } +} + +// Write result +val writer = new PrintWriter(new File("{{ result_file }}")) +for ((k, v) <- etime) writer.print("{\"group\": \""+ k.split('.')(0) + ".sqlpp\", \"metric\": \"" + metric + "\", \"value\": "+ v +"}\n") +writer.close() +System.exit(0) \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/hdfs-site-template.xml b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/hdfs-site-template.xml new file mode 100644 index 0000000..1a4ebfe --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/hdfs-site-template.xml @@ -0,0 +1,43 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +<?xml version="1.0" encoding="UTF-8"?> +<?xml-stylesheet type="text/xsl" href="configuration.xsl"?> +<!-- + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. See accompanying LICENSE file. +--> + +<!-- Put site-specific property overrides in this file. --> + +<configuration> + <property> + <name>dfs.replication</name> + <value>{{ dfs_replication }}</value> + </property> +</configuration> diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/ssh_config b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/ssh_config new file mode 100644 index 0000000..51781cf --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/conf/ssh_config @@ -0,0 +1,21 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +Host *.amazonaws.com 0.0.0.0 + StrictHostKeyChecking no diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q01.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q01.txt new file mode 100644 index 0000000..0b0c8ec --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q01.txt @@ -0,0 +1,35 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +SELECT l.L_RETURNFLAG, + l.L_LINESTATUS, + sum(l.L_QUANTITY) AS sum_qty, + sum(l.L_EXTENDEDPRICE) AS sum_base_price, + sum(l.L_EXTENDEDPRICE * (1 - l.L_DISCOUNT)) AS sum_disc_price, + sum(l.L_EXTENDEDPRICE * (1 - l.L_DISCOUNT) * (1 + l.L_TAX)) AS sum_charge, + avg(l.l_quantity) AS ave_qty, + avg(l.L_EXTENDEDPRICE) AS ave_price, + avg(l.L_DISCOUNT) AS ave_disc, + count(*) AS count_order +FROM LINEITEM AS l +WHERE l.L_SHIPDATE <= "1998-09-02" +GROUP BY l.L_RETURNFLAG, + l.L_LINESTATUS +ORDER BY l.L_RETURNFLAG, + l.L_LINESTATUS \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q02.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q02.txt new file mode 100644 index 0000000..1089ce8 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q02.txt @@ -0,0 +1,57 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +WITH q2_minimum_cost_supplier_tmp1 AS + (SELECT s.S_ACCTBAL, + s.S_NAME, + n.N_NAME, + p.P_PARTKEY, + ps.PS_SUPPLYCOST, + p.P_MFGR, + s.S_ADDRESS, + s.S_PHONE, + s.S_COMMENT + FROM NATION n + JOIN REGION r ON n.N_REGIONKEY = r.R_REGIONKEY + AND r.R_NAME = "EUROPE" + JOIN SUPPLIER s ON s.S_NATIONKEY = n.N_NATIONKEY + JOIN PARTSUPP ps ON s.S_SUPPKEY = ps.PS_SUPPKEY + JOIN PART p ON p.P_PARTKEY = ps.S_PARTKEY + AND p.P_TYPE LIKE "%BRASS" + AND p.P_SIZE = 15), + q2_minimum_cost_supplier_tmp2 AS + (SELECT p.P_PARTKEY, + min(p.PS_SUPPLYCOST) AS PS_MIN_SUPPLYCOST + FROM q2_minimum_cost_supplier_tmp1 p + GROUP BY p.P_PARTKEY) +SELECT t1.S_ACCTBAL, + t1.S_NAME, + t1.N_NAME, + t1.P_PARTKEY, + t1.P_MFGR AS P_MFGR, + t1.S_ADDRESS, + t1.S_PHONE, + t1.S_COMMENT +FROM q2_minimum_cost_supplier_tmp1 t1 +JOIN q2_minimum_cost_supplier_tmp2 t2 ON t1.P_PARTKEY = t2.P_PARTKEY +AND t1.PS_SUPPLYCOST=t2.PS_MIN_SUPPLYCOST +ORDER BY t1.S_ACCTBAL DESC, + t1.N_NAME, + t1.S_NAME, + t1.P_PARTKEY LIMIT 100 \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q03.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q03.txt new file mode 100644 index 0000000..0b0c8ec --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q03.txt @@ -0,0 +1,35 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +SELECT l.L_RETURNFLAG, + l.L_LINESTATUS, + sum(l.L_QUANTITY) AS sum_qty, + sum(l.L_EXTENDEDPRICE) AS sum_base_price, + sum(l.L_EXTENDEDPRICE * (1 - l.L_DISCOUNT)) AS sum_disc_price, + sum(l.L_EXTENDEDPRICE * (1 - l.L_DISCOUNT) * (1 + l.L_TAX)) AS sum_charge, + avg(l.l_quantity) AS ave_qty, + avg(l.L_EXTENDEDPRICE) AS ave_price, + avg(l.L_DISCOUNT) AS ave_disc, + count(*) AS count_order +FROM LINEITEM AS l +WHERE l.L_SHIPDATE <= "1998-09-02" +GROUP BY l.L_RETURNFLAG, + l.L_LINESTATUS +ORDER BY l.L_RETURNFLAG, + l.L_LINESTATUS \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q04.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q04.txt new file mode 100644 index 0000000..2aebce0 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q04.txt @@ -0,0 +1,31 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +WITH tmp AS + (SELECT l.L_ORDERKEY AS O_ORDERKEY + FROM LINEITEM AS l + WHERE l.L_COMMITDATE < l.L_RECEIPTDATE) +SELECT o.O_ORDERPRIORITY, + count(*) AS COUNT +FROM ORDERS AS o +JOIN tmp AS t ON o.O_ORDERKEY = t.O_ORDERKEY +WHERE o.O_ORDERDATE >= "1993-07-01" + AND o.O_ORDERDATE < "1993-10-01" +GROUP BY o.O_ORDERPRIORITY +ORDER BY O_ORDERPRIORITY \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q05.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q05.txt new file mode 100644 index 0000000..cb3c26a --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q05.txt @@ -0,0 +1,52 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +SELECT O1.N_NAME, + SUM(O1.L_EXTENDEDPRICE * (1 - O1.L_DISCOUNT)) AS REVENUE +FROM CUSTOMER C +JOIN + (SELECT L1.N_NAME, + L1.L_EXTENDEDPRICE, + L1.L_DISCOUNT, + L1.S_NATIONKEY, + O.O_CUSTKEY + FROM ORDERS O + JOIN + (SELECT S1.N_NAME, + L.L_EXTENDEDPRICE, + L.L_DISCOUNT, + L.L_ORDERKEY, + S1.S_NATIONKEY + FROM LINEITEM L + JOIN + (SELECT N1.N_NAME, + S.S_SUPPKEY, + S.S_NATIONKEY + FROM SUPPLIER S + JOIN + (SELECT N.N_NAME, + N.N_NATIONKEY + FROM NATION N + JOIN REGION R ON N.N_REGIONKEY = R.R_REGIONKEY + AND R.R_NAME = "ASIA") N1 ON S.S_NATIONKEY = N1.N_NATIONKEY) S1 ON L.L_SUPPKEY = S1.S_SUPPKEY) L1 ON L1.L_ORDERKEY = O.O_ORDERKEY + AND O.O_ORDERDATE >= "1994-01-01" + AND O.O_ORDERDATE < "1995-01-01") O1 ON C.C_NATIONKEY = O1.S_NATIONKEY +AND C.C_CUSTKEY = O1.O_CUSTKEY +GROUP BY O1.N_NAME +ORDER BY REVENUE DESC \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q06.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q06.txt new file mode 100644 index 0000000..cfe6fd8 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q06.txt @@ -0,0 +1,26 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +SELECT SUM(L.L_EXTENDEDPRICE * L.L_DISCOUNT) +FROM LINEITEM AS L +WHERE L.L_SHIPDATE >= "1994-01-01" + AND L.L_SHIPDATE < "1995-01-01" + AND L.L_DISCOUNT >= 0.05 + AND L.L_DISCOUNT <= 0.07 + AND L.L_QUANTITY < 24 \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q07.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q07.txt new file mode 100644 index 0000000..56b198b --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q07.txt @@ -0,0 +1,71 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +WITH q7_volume_shipping_tmp AS + (SELECT N1.N_NAME AS SUPP_NATION, + N2.N_NAME AS CUST_NATION, + N1.N_NATIONKEY AS S_NATIONKEY, + N2.N_NATIONKEY AS C_NATIONKEY + FROM NATION AS N1, + NATION AS N2 + WHERE (N1.N_NAME="FRANCE" + AND N2.N_NAME="GERMANY") + OR (N1.N_NAME="GERMANY" + AND N2.N_NAME="FRANCE")) +SELECT SUPP_NATION, + CUST_NATION, + L_YEAR, + SUM(VOLUME) AS REVENUE +FROM + (SELECT T.SUPP_NATION, + T.CUST_NATION, + YEAR(L3.L_SHIPDATE) AS L_YEAR, + L3.L_EXTENDEDPRICE * (1 - L3.L_DISCOUNT) AS VOLUME + FROM + (SELECT L2.L_SHIPDATE, + L2.L_EXTENDEDPRICE, + L2.L_DISCOUNT, + L2.C_NATIONKEY, + S.S_NATIONKEY + FROM SUPPLIER S + JOIN + (SELECT L1.L_SHIPDATE, + L1.L_EXTENDEDPRICE, + L1.L_DISCOUNT, + L1.L_SUPPKEY, + C.C_NATIONKEY + FROM CUSTOMER C + JOIN + (SELECT L.L_SHIPDATE, + L.L_EXTENDEDPRICE, + L.L_DISCOUNT, + L.L_SUPPKEY, + O.O_CUSTKEY + FROM ORDERS O + JOIN LINEITEM L ON O.O_ORDERKEY = L.L_ORDERKEY + AND L.L_SHIPDATE >= "1995-01-01" + AND L.L_SHIPDATE <= "1996-12-31") L1 ON C.C_CUSTKEY = L1.O_CUSTKEY) L2 ON S.S_SUPPKEY = L2.L_SUPPKEY) L3 + JOIN q7_volume_shipping_tmp T ON T.C_NATIONKEY = L3.C_NATIONKEY + AND T.S_NATIONKEY = L3.S_NATIONKEY) SHIPPING +GROUP BY SUPP_NATION, + CUST_NATION, + L_YEAR +ORDER BY SUPP_NATION, + CUST_NATION, + L_YEAR \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q07_variant.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q07_variant.txt new file mode 100644 index 0000000..7fc8c89 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q07_variant.txt @@ -0,0 +1,71 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +WITH q7_volume_shipping_tmp AS + (SELECT N1.N_NAME AS SUPP_NATION, + N2.N_NAME AS CUST_NATION, + N1.N_NATIONKEY AS S_NATIONKEY, + N2.N_NATIONKEY AS C_NATIONKEY + FROM NATION AS N1, + NATION AS N2 + WHERE (N1.N_NAME="FRANCE" + AND N2.N_NAME="GERMANY") + OR (N1.N_NAME="GERMANY" + AND N2.N_NAME="FRANCE")) +SELECT SUPP_NATION, + CUST_NATION, + L_YEAR, + SUM(VOLUME) AS REVENUE +FROM + (SELECT T.SUPP_NATION, + T.CUST_NATION, + YEAR(L3.L_SHIPDATE) AS L_YEAR, + L3.L_EXTENDEDPRICE * (1 - L3.L_DISCOUNT) AS VOLUME + FROM q7_volume_shipping_tmp T + JOIN + (SELECT L2.L_SHIPDATE, + L2.L_EXTENDEDPRICE, + L2.L_DISCOUNT, + L2.C_NATIONKEY, + S.S_NATIONKEY + FROM SUPPLIER S + JOIN + (SELECT L1.L_SHIPDATE, + L1.L_EXTENDEDPRICE, + L1.L_DISCOUNT, + L1.L_SUPPKEY, + C.C_NATIONKEY + FROM CUSTOMER C + JOIN + (SELECT L.L_SHIPDATE, + L.L_EXTENDEDPRICE, + L.L_DISCOUNT, + L.L_SUPPKEY, + O.O_CUSTKEY + FROM ORDERS O + JOIN LINEITEM L ON O.O_ORDERKEY = L.L_ORDERKEY + AND L.L_SHIPDATE >= "1995-01-01" + AND L.L_SHIPDATE <= "1996-12-31") L1 ON C.C_CUSTKEY = L1.O_CUSTKEY) L2 ON S.S_SUPPKEY = L2.L_SUPPKEY) L3 ON T.C_NATIONKEY = L3.C_NATIONKEY + AND T.S_NATIONKEY = L3.S_NATIONKEY) SHIPPING +GROUP BY SUPP_NATION, + CUST_NATION, + L_YEAR +ORDER BY SUPP_NATION, + CUST_NATION, + L_YEAR \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q08.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q08.txt new file mode 100644 index 0000000..2808ae5 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q08.txt @@ -0,0 +1,48 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +SELECT O_YEAR, + SUM(CASE WHEN T.S_NAME = "BRAZIL" THEN T.REVENUE ELSE 0.0 END) / SUM(T.REVENUE) AS MKT_SHARE FROM + (SELECT YEAR(SLNRCOP.O_ORDERDATE) AS O_YEAR, SLNRCOP.L_EXTENDEDPRICE * (1 - SLNRCOP.L_DISCOUNT) AS REVENUE, N2.N_NAME AS S_NAME + FROM + (SELECT LNRCOP.O_ORDERDATE, LNRCOP.L_DISCOUNT, LNRCOP.L_EXTENDEDPRICE, LNRCOP.L_SUPPKEY, S.S_NATIONKEY + FROM SUPPLIER S, + (SELECT LNRCO.O_ORDERDATE, LNRCO.L_DISCOUNT, LNRCO.L_EXTENDEDPRICE, LNRCO.L_SUPPKEY + FROM + (SELECT NRCO.O_ORDERDATE, L.L_PARTKEY, L.L_DISCOUNT, L.L_EXTENDEDPRICE, L.L_SUPPKEY + FROM LINEITEM L, + (SELECT O.O_ORDERDATE, O.O_ORDERKEY + FROM ORDERS O, + (SELECT C.C_CUSTKEY + FROM CUSTOMER C, + (SELECT N.N_NATIONKEY + FROM NATION N, REGION R + WHERE N.N_REGIONKEY = R.R_REGIONKEY + AND R.R_NAME = "AMERICA") AS NR + WHERE C.C_NATIONKEY = NR.N_NATIONKEY) AS NRC + WHERE NRC.C_CUSTKEY = O.O_CUSTKEY) AS NRCO + WHERE L.L_ORDERKEY = NRCO.O_ORDERKEY + AND NRCO.O_ORDERDATE >= "1995-01-01" + AND NRCO.O_ORDERDATE < "1996-12-31") AS LNRCO, PART P + WHERE P.P_PARTKEY = LNRCO.L_PARTKEY + AND P.P_TYPE = "ECONOMY ANODIZED STEEL") AS LNRCOP + WHERE S.S_SUPPKEY = LNRCOP.L_SUPPKEY) AS SLNRCOP, NATION N2 + WHERE SLNRCOP.S_NATIONKEY = N2.N_NATIONKEY) AS T +GROUP BY O_YEAR +ORDER BY O_YEAR \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q09.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q09.txt new file mode 100644 index 0000000..0009573 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q09.txt @@ -0,0 +1,64 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +SELECT NATION, + O_YEAR, + SUM(AMOUNT) AS SUM_PROFIT +FROM + (SELECT L3.N_NAME AS NATION, + YEAR(O.O_ORDERDATE) AS O_YEAR, + L3.L_EXTENDEDPRICE * (1 - L3.L_DISCOUNT) - L3.PS_SUPPLYCOST * L3.L_QUANTITY AS AMOUNT + FROM ORDERS O + JOIN + (SELECT L2.L_EXTENDEDPRICE, + L2.L_DISCOUNT, + L2.L_QUANTITY, + L2.L_ORDERKEY, + L2.N_NAME, + L2. PS_SUPPLYCOST + FROM PART P + JOIN + (SELECT L1.L_EXTENDEDPRICE, + L1.L_DISCOUNT, + L1.L_QUANTITY, + L1.L_PARTKEY, + L1.L_ORDERKEY, + L1.N_NAME, + PS.PS_SUPPLYCOST + FROM PARTSUPP PS + JOIN + (SELECT L.L_SUPPKEY, + L.L_EXTENDEDPRICE, + L.L_DISCOUNT, + L.L_QUANTITY, + L.L_PARTKEY, + L.L_ORDERKEY, + S1.N_NAME + FROM + (SELECT S.S_SUPPKEY, + N.N_NAME + FROM NATION N + JOIN SUPPLIER S ON N.N_NATIONKEY = S.S_NATIONKEY) S1 + JOIN LINEITEM L ON S1.S_SUPPKEY = L.L_SUPPKEY) L1 ON PS.PS_SUPPKEY = L1.L_SUPPKEY + AND PS.S_PARTKEY = L1.L_PARTKEY) L2 ON P.P_NAME LIKE "%GREEN%" + AND P.P_PARTKEY = L2.L_PARTKEY) L3 ON O.O_ORDERKEY = L3.L_ORDERKEY) PROFIT +GROUP BY NATION, + O_YEAR +ORDER BY NATION, + O_YEAR DESC \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q10.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q10.txt new file mode 100644 index 0000000..56cbbb1 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q10.txt @@ -0,0 +1,64 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +SELECT C_CUSTKEY, + C_NAME, + SUM(LOCN.L_EXTENDEDPRICE * (1 - LOCN.L_DISCOUNT)) AS REVENUE, + C_ACCTBAL, + N_NAME, + C_ADDRESS, + C_PHONE, + C_COMMENT +FROM + (SELECT OCN.C_CUSTKEY, + OCN.C_NAME, + OCN.C_ACCTBAL, + OCN.N_NAME, + OCN.C_ADDRESS, + OCN.C_PHONE, + OCN.C_COMMENT, + L.L_EXTENDEDPRICE, + L.L_DISCOUNT + FROM LINEITEM AS L, + + (SELECT C.C_CUSTKEY, + C.C_NAME, + C.C_ACCTBAL, + N.N_NAME, + C.C_ADDRESS, + C.C_PHONE, + C.C_COMMENT, + O.O_ORDERKEY + FROM ORDERS AS O, + CUSTOMER AS C, + NATION AS N + WHERE C.C_CUSTKEY = O.O_CUSTKEY + AND O.O_ORDERDATE >= "1993-10-01" + AND O.O_ORDERDATE < "1994-01-01" + AND C.C_NATIONKEY = N.N_NATIONKEY) AS OCN + WHERE L.L_ORDERKEY = OCN.O_ORDERKEY + AND L.L_RETURNFLAG = "R") AS LOCN +GROUP BY C_CUSTKEY, + C_NAME, + C_ACCTBAL, + C_PHONE, + N_NAME, + C_ADDRESS, + C_COMMENT +ORDER BY REVENUE DESC LIMIT 20 \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q11.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q11.txt new file mode 100644 index 0000000..548d4aa --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q11.txt @@ -0,0 +1,42 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +WITH s1 AS + (SELECT SUM(PS.PS_SUPPLYCOST * PS.PS_AVAILQTY) + FROM PARTSUPP AS PS, + + (SELECT S.S_SUPPKEY + FROM SUPPLIER AS S, + NATION AS N + WHERE S.S_NATIONKEY = N.N_NATIONKEY + AND N.N_NAME = "GERMANY") AS SN + WHERE PS.PS_SUPPKEY = SN.S_SUPPKEY) +SELECT S_PARTKEY, + SUM(PS.PS_SUPPLYCOST * PS.PS_AVAILQTY) AS PART_VALUE +FROM PARTSUPP PS, + (SELECT S.S_SUPPKEY + FROM SUPPLIER AS S, + NATION AS N + WHERE S.S_NATIONKEY = N.N_NATIONKEY + AND N.N_NAME = "GERMANY") SN +WHERE PS.PS_SUPPKEY = SN.S_SUPPKEY +GROUP BY PS.S_PARTKEY HAVING PART_VALUE > + (SELECT * + FROM s1) * 0.0001000 +ORDER BY PART_VALUE DESC \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q12.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q12.txt new file mode 100644 index 0000000..229b568 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q12.txt @@ -0,0 +1,35 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +SELECT L.L_SHIPMODE, + SUM(CASE WHEN O.O_ORDERPRIORITY = "1-URGENT" + OR O.O_ORDERPRIORITY = "2-HIGH" THEN 1 ELSE 0 END) HIGH_LINE_COUNT, + SUM(CASE WHEN O.O_ORDERPRIORITY = "1-URGENT" + OR O.O_ORDERPRIORITY = "2-HIGH" THEN 0 ELSE 1 END) LOW_LINE_COUNT +FROM LINEITEM L, + ORDERS O +WHERE O.O_ORDERKEY = L.L_ORDERKEY + AND L.L_COMMITDATE < L.L_RECEIPTDATE + AND L.L_SHIPDATE < L.L_COMMITDATE + AND L.L_RECEIPTDATE >= "1994-01-01" + AND L.L_RECEIPTDATE < "1995-01-01" + AND (L.L_SHIPMODE = "MAIL" + OR L.L_SHIPMODE = "SHIP") +GROUP BY L.L_SHIPMODE +ORDER BY L.L_SHIPMODE \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q13.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q13.txt new file mode 100644 index 0000000..56583b8 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q13.txt @@ -0,0 +1,32 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +SELECT C_COUNT, COUNT(*) AS CUSTDIST +FROM ( + SELECT C_CUSTKEY, SUM(O_ORDERKEY_COUNT) AS C_COUNT + FROM ( + SELECT C.C_CUSTKEY, COUNT(O.O_ORDERKEY) AS O_ORDERKEY_COUNT + FROM (CUSTOMER C LEFT OUTER JOIN ORDERS O) + WHERE C.C_CUSTKEY = O.O_CUSTKEY AND O.O_COMMENT NOT LIKE "%SPECIAL%REQUESTS%" + GROUP BY C.C_CUSTKEY + ) CO + GROUP BY C_CUSTKEY +) GCO +GROUP BY C_COUNT +ORDER BY CUSTDIST DESC,C_COUNT DESC \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q14.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q14.txt new file mode 100644 index 0000000..140fee4 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q14.txt @@ -0,0 +1,25 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +SELECT 100.0 * SUM(CASE WHEN P.P_TYPE LIKE "PROMO%" THEN L.L_EXTENDEDPRICE * (1 - L.L_DISCOUNT) ELSE 0.0 END) / SUM(L.L_EXTENDEDPRICE * (1 - L.L_DISCOUNT)) +FROM LINEITEM L, + PART P +WHERE L.L_PARTKEY = P.P_PARTKEY + AND L.L_SHIPDATE >= "1995-09-01" + AND L.L_SHIPDATE < "1995-10-01" \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q15.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q15.txt new file mode 100644 index 0000000..765da69 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q15.txt @@ -0,0 +1,43 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +WITH REVENUE AS + (SELECT L.L_SUPPKEY AS SUPPLIER_NO, + SUM(L.L_EXTENDEDPRICE * (1 - L.L_DISCOUNT)) AS TOTAL_REVENUE + FROM LINEITEM L + WHERE L.L_SHIPDATE >= "1996-01-01" + AND L.L_SHIPDATE < "1996-04-01" + GROUP BY L.L_SUPPKEY), + m AS + (SELECT MAX(R2.TOTAL_REVENUE) + FROM REVENUE R2) +SELECT S.S_SUPPKEY, + S.S_NAME, + S.S_ADDRESS, + S.S_PHONE, + R.TOTAL_REVENUE +FROM SUPPLIER S, + REVENUE R +WHERE S.S_SUPPKEY = R.SUPPLIER_NO + AND R.TOTAL_REVENUE < + (SELECT * + FROM m) + 0.000000001 + AND R.TOTAL_REVENUE > + (SELECT * + FROM m) - 0.000000001 \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q16.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q16.txt new file mode 100644 index 0000000..7bc9b08 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q16.txt @@ -0,0 +1,66 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +WITH tmp AS + (SELECT PSP.P_BRAND, + PSP.P_TYPE, + PSP.P_SIZE, + PSP.PS_SUPPKEY + FROM + (SELECT P.P_BRAND, + P.P_TYPE, + P.P_SIZE, + PS.PS_SUPPKEY + FROM PARTSUPP PS, + PART P + WHERE P.P_PARTKEY = PS.S_PARTKEY + AND P.P_BRAND != "BRAND#45" + AND P.P_TYPE NOT LIKE "MEDIUM POLISHED%") AS PSP, + SUPPLIER S + WHERE PSP.PS_SUPPKEY = S.S_SUPPKEY + AND S.S_COMMENT NOT LIKE "%CUSTOMER%COMPLAINTS%") +SELECT P_BRAND, + P_TYPE, + P_SIZE, + COUNT(PS_SUPPKEY) SUPPLIER_CNT +FROM + (SELECT P_BRAND, + P_TYPE, + P_SIZE, + PS_SUPPKEY + FROM tmp + WHERE P_SIZE = 49 + OR P_SIZE = 14 + OR P_SIZE = 23 + OR P_SIZE = 45 + OR P_SIZE = 19 + OR P_SIZE = 3 + OR P_SIZE = 36 + OR P_SIZE = 9 + GROUP BY P_BRAND, + P_TYPE, + P_SIZE, + PS_SUPPKEY) AS T2 +GROUP BY P_BRAND, + P_TYPE, + P_SIZE +ORDER BY SUPPLIER_CNT DESC, + P_BRAND, + P_TYPE, + P_SIZE \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q17.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q17.txt new file mode 100644 index 0000000..8ad8ad4 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q17.txt @@ -0,0 +1,33 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +WITH tmp AS + (SELECT L_PARTKEY T_PARTKEY, + 0.2 * AVG(L_QUANTITY) T_AVG_QUANTITY + FROM LINEITEM + GROUP BY L_PARTKEY) +SELECT * +FROM tmp T, + LINEITEM L, + PART P +WHERE P.P_PARTKEY = L.L_PARTKEY + AND P.P_CONTAINER = "MED BOX" + AND P.P_BRAND = "BRAND#23" + AND L.L_PARTKEY = T.T_PARTKEY + AND L.L_QUANTITY < T.T_AVG_QUANTITY \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q18.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q18.txt new file mode 100644 index 0000000..99044da --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q18.txt @@ -0,0 +1,42 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +WITH tmp AS + (SELECT L_ORDERKEY, + SUM(L_QUANTITY) T_SUM_QUANTITY + FROM LINEITEM + GROUP BY L_ORDERKEY) +SELECT C.C_NAME, + C.C_CUSTKEY, + O.O_ORDERKEY, + O.O_ORDERDATE, + O.O_TOTALPRICE, + SUM(L.L_QUANTITY) SUM_QUANTITY +FROM CUSTOMER C +JOIN ORDERS O ON C.C_CUSTKEY = O.O_CUSTKEY +JOIN TMP T ON O.O_ORDERKEY = T.L_ORDERKEY +JOIN LINEITEM L ON T.L_ORDERKEY = L.L_ORDERKEY +WHERE T.T_SUM_QUANTITY > 30 +GROUP BY C.C_NAME, + C.C_CUSTKEY, + O.O_ORDERKEY, + O.O_ORDERDATE, + O.O_TOTALPRICE +ORDER BY O.O_TOTALPRICE DESC, + O.O_ORDERDATE LIMIT 100 \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q19.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q19.txt new file mode 100644 index 0000000..a40d6a1 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q19.txt @@ -0,0 +1,49 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +WITH tmp AS + (SELECT L_PARTKEY AS LPKEY, + L_QUANTITY AS QUANTITY, + L_EXTENDEDPRICE AS EXTNDPRICE, + L_DISCOUNT AS DISCOUNT + FROM LINEITEM + WHERE (L_SHIPMODE = "AIR" + OR L_SHIPMODE = "AIR REG") + AND L_SHIPINSTRUCT = "DELIVER IN PERSON") +SELECT SUM(L.EXTNDPRICE * (1 - L.DISCOUNT)) +FROM tmp L +JOIN PART P ON P.P_PARTKEY = L.LPKEY +WHERE (P.P_BRAND = "BRAND#12" + AND P.P_CONTAINER REGEXP "SM CASE|SM BOX|SM PACK|SM PKG" + AND L.QUANTITY >= 1 + AND L.QUANTITY <= 11 + AND P.P_SIZE >= 1 + AND P.P_SIZE <= 5) + OR (P.P_BRAND = "BRAND#23" + AND P.P_CONTAINER REGEXP "MED BAG|MED BOX|MED PKG|MED PACK" + AND L.QUANTITY >= 10 + AND L.QUANTITY <= 20 + AND P.P_SIZE >= 1 + AND P.P_SIZE <= 10) + OR (P.P_BRAND = "BRAND#34" + AND P.P_CONTAINER REGEXP "LG CASE|LG BOX|LG PACK|LG PKG" + AND L.QUANTITY >= 20 + AND L.QUANTITY <= 30 + AND P.P_SIZE >= 1 + AND P.P_SIZE <= 15) \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q20.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q20.txt new file mode 100644 index 0000000..b91eff9 --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q20.txt @@ -0,0 +1,52 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +WITH Q20_TMP1 AS + (SELECT DISTINCT P_PARTKEY + FROM PART + WHERE P_NAME LIKE "FOREST%"), + Q20_TMP2 AS + (SELECT L_PARTKEY, + L_SUPPKEY, + 0.5 * SUM(L_QUANTITY) AS SUM_QUANTITY + FROM LINEITEM + WHERE L_SHIPDATE >= "1994-01-01" + AND L_SHIPDATE < "1995-01-01" + GROUP BY L_PARTKEY, + L_SUPPKEY), + Q20_TMP3 AS + (SELECT PS_SUPPKEY, + PS_AVAILQTY, + T2.SUM_QUANTITY + FROM PARTSUPP + JOIN Q20_TMP1 T1 ON S_PARTKEY = T1.P_PARTKEY + JOIN Q20_TMP2 T2 ON S_PARTKEY = T2.L_PARTKEY + AND PS_SUPPKEY = T2.L_SUPPKEY), + Q20_TMP4 AS + (SELECT PS_SUPPKEY + FROM Q20_TMP3 + WHERE PS_AVAILQTY > SUM_QUANTITY + GROUP BY PS_SUPPKEY) +SELECT S.S_NAME, + S.S_ADDRESS +FROM SUPPLIER S +JOIN NATION N ON S.S_NATIONKEY = N.N_NATIONKEY +JOIN Q20_TMP4 T4 ON S.S_SUPPKEY = T4.PS_SUPPKEY +WHERE N.N_NAME = "CANADA" +ORDER BY S.S_NAME \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q21.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q21.txt new file mode 100644 index 0000000..2e784bb --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q21.txt @@ -0,0 +1,74 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +WITH TMP1 AS + (SELECT L_ORDERKEY, + COUNT(L_SUPPKEY) AS COUNT_SUPPKEY, + MAX(L_SUPPKEY) AS MAX_SUPPKEY + FROM + (SELECT L_ORDERKEY, + L_SUPPKEY + FROM LINEITEM L + GROUP BY L_ORDERKEY, + L_SUPPKEY) AS L2 + GROUP BY L_ORDERKEY), + TMP2 AS + (SELECT L2.L_ORDERKEY, + COUNT(L_SUPPKEY) AS COUNT_SUPPKEY, + MAX(L_SUPPKEY) AS MAX_SUPPKEY + FROM + (SELECT L_ORDERKEY, + L_SUPPKEY + FROM LINEITEM L + WHERE L_RECEIPTDATE > L_COMMITDATE + GROUP BY L_ORDERKEY, + L_SUPPKEY) AS L2 + GROUP BY L_ORDERKEY) +SELECT T4.S_NAME, + COUNT(*) AS NUMWAIT +FROM + (SELECT T3.S_NAME, + T3.L_SUPPKEY, + T2.L_ORDERKEY, + COUNT_SUPPKEY, + MAX_SUPPKEY + FROM + (SELECT NS.S_NAME, + T1.L_ORDERKEY, + L.L_SUPPKEY + FROM LINEITEM L, + + (SELECT S.S_NAME, + S.S_SUPPKEY + FROM NATION N, + SUPPLIER S + WHERE S.S_NATIONKEY = N.N_NATIONKEY + AND N.N_NAME="SAUDI ARABIA") AS NS, + ORDERS O, + TMP1 AS T1 + WHERE NS.S_SUPPKEY = L.L_SUPPKEY + AND L.L_RECEIPTDATE > L.L_COMMITDATE + AND O.O_ORDERKEY = T1.L_ORDERKEY + AND L.L_ORDERKEY = T1.L_ORDERKEY + AND O.O_ORDERSTATUS = "F") AS T3 + JOIN TMP2 AS T2 ON COUNT_SUPPKEY >= 0 + AND T3.L_ORDERKEY = T2.L_ORDERKEY) AS T4 +GROUP BY T4.S_NAME +ORDER BY NUMWAIT DESC, + T4.S_NAME \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q22.txt b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q22.txt new file mode 100644 index 0000000..786067d --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/queries/q22.txt @@ -0,0 +1,55 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +WITH q22_customer_tmp AS + (SELECT C_ACCTBAL, + C_CUSTKEY, + SUBSTRING(C_PHONE,1,2) AS CNTRYCODE + FROM CUSTOMER + WHERE SUBSTRING(C_PHONE,1,2) = "13" + OR SUBSTRING(C_PHONE,1,2) = "31" + OR SUBSTRING(C_PHONE,1,2) = "23" + OR SUBSTRING(C_PHONE,1,2) = "29" + OR SUBSTRING(C_PHONE,1,2) = "30" + OR SUBSTRING(C_PHONE,1,2) = "18" + OR SUBSTRING(C_PHONE,1,2) = "17"), + AVG AS + (SELECT AVG(C_ACCTBAL) + FROM CUSTOMER + WHERE C_ACCTBAL > 0.0 + AND (SUBSTRING(C_PHONE,1,2) = "13" + OR SUBSTRING(C_PHONE,1,2) = "31" + OR SUBSTRING(C_PHONE,1,2) = "23" + OR SUBSTRING(C_PHONE,1,2) = "29" + OR SUBSTRING(C_PHONE,1,2) = "30" + OR SUBSTRING(C_PHONE,1,2) = "18" + OR SUBSTRING(C_PHONE,1,2) = "17")) +SELECT CNTRYCODE, + COUNT(*) AS NUMCUST, + SUM(C_ACCTBAL) AS TOTACCTBAL +FROM Q22_CUSTOMER_TMP AS CT +WHERE CT.C_ACCTBAL > + (SELECT * + FROM AVG) + AND EXISTS + (SELECT * + FROM ORDERS AS O + WHERE CT.C_CUSTKEY = O.O_CUSTKEY) +GROUP BY CNTRYCODE +ORDER BY CNTRYCODE \ No newline at end of file diff --git a/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/run_sparksql.sh b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/run_sparksql.sh new file mode 100755 index 0000000..e17282f --- /dev/null +++ b/asterixdb/asterix-benchmark/src/main/resources/others/SparkSQL/run_sparksql.sh @@ -0,0 +1,47 @@ +# ------------------------------------------------------------ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# ------------------------------------------------------------ + +#!/bin/bash + +pushd `dirname $0` > /dev/null +SCRIPT_PATH=`pwd -P` +popd > /dev/null +export ANSIBLE_HOST_KEY_CHECKING=false + +INVENTORY=$1 + +# Checks the existence of the inventory file. +if [ ! -f "$INVENTORY" ]; +then + echo "The inventory file \"$INVENTORY\" does not exist." + exit 1 +fi + +# Configure HDFS +ansible-playbook -i $INVENTORY install_hdfs.yml +ansible-playbook -i $INVENTORY start_hdfs.yml +# Configure Sparks +ansible-playbook -i $INVENTORY install_sparks.yml +ansible-playbook -i $INVENTORY start_sparks.yml +# Generate data +ansible-playbook -i $INVENTORY gen_tpch.yml +ansible-playbook -i $INVENTORY load_tpch.yml +# Execute queries +ansible-playbook -i $INVENTORY prepare_queries.yml +ansible-playbook -i $INVENTORY execute_queries.yml \ No newline at end of file -- To view, visit https://asterix-gerrit.ics.uci.edu/1879 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Ic9ff8efa0be71bde67190ba6f9fbd647c7799084 Gerrit-PatchSet: 1 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Xikui Wang <[email protected]>
