spark git commit: [SPARK-22161][SQL] Add Impala-modified TPC-DS queries
Repository: spark Updated Branches: refs/heads/branch-2.2 8b2d8385c -> ac9a0f692 [SPARK-22161][SQL] Add Impala-modified TPC-DS queries ## What changes were proposed in this pull request? Added IMPALA-modified TPCDS queries to TPC-DS query suites. - Ref: https://github.com/cloudera/impala-tpcds-kit/tree/master/queries ## How was this patch tested? N/A Author: gatorsmile Closes #19386 from gatorsmile/addImpalaQueries. (cherry picked from commit 9ed7394a68315126b2dd00e53a444cc65b5a62ea) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ac9a0f69 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ac9a0f69 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ac9a0f69 Branch: refs/heads/branch-2.2 Commit: ac9a0f6923a72ec8f92fe88760cf50a67497b666 Parents: 8b2d838 Author: gatorsmile Authored: Fri Sep 29 08:59:42 2017 -0700 Committer: gatorsmile Committed: Fri Sep 29 09:00:15 2017 -0700 -- .../resources/tpcds-modifiedQueries/q10.sql | 70 ++ .../resources/tpcds-modifiedQueries/q19.sql | 38 .../resources/tpcds-modifiedQueries/q27.sql | 43 .../test/resources/tpcds-modifiedQueries/q3.sql | 228 +++ .../resources/tpcds-modifiedQueries/q34.sql | 45 .../resources/tpcds-modifiedQueries/q42.sql | 28 +++ .../resources/tpcds-modifiedQueries/q43.sql | 36 +++ .../resources/tpcds-modifiedQueries/q46.sql | 80 +++ .../resources/tpcds-modifiedQueries/q52.sql | 27 +++ .../resources/tpcds-modifiedQueries/q53.sql | 37 +++ .../resources/tpcds-modifiedQueries/q55.sql | 24 ++ .../resources/tpcds-modifiedQueries/q59.sql | 83 +++ .../resources/tpcds-modifiedQueries/q63.sql | 29 +++ .../resources/tpcds-modifiedQueries/q65.sql | 58 + .../resources/tpcds-modifiedQueries/q68.sql | 62 + .../test/resources/tpcds-modifiedQueries/q7.sql | 31 +++ .../resources/tpcds-modifiedQueries/q73.sql | 49 .../resources/tpcds-modifiedQueries/q79.sql | 59 + .../resources/tpcds-modifiedQueries/q89.sql | 43 .../resources/tpcds-modifiedQueries/q98.sql | 32 +++ .../resources/tpcds-modifiedQueries/ss_max.sql | 14 ++ .../org/apache/spark/sql/TPCDSQuerySuite.scala | 26 ++- 22 files changed, 1141 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql -- diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql b/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql new file mode 100755 index 000..79dd3d5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql @@ -0,0 +1,70 @@ +-- start query 10 in stream 0 using template query10.tpl +with +v1 as ( + select + ws_bill_customer_sk as customer_sk + from web_sales, + date_dim + where ws_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy between 4 and 4+3 + union all + select +cs_ship_customer_sk as customer_sk + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy between 4 and 4+3 +), +v2 as ( + select +ss_customer_sk as customer_sk + from store_sales, + date_dim + where ss_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy between 4 and 4+3 +) +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 +from customer c +join customer_address ca on (c.c_current_addr_sk = ca.ca_address_sk) +join customer_demographics on (cd_demo_sk = c.c_current_cdemo_sk) +left semi join v1 on (v1.customer_sk = c.c_customer_sk) +left semi join v2 on (v2.customer_sk = c.c_customer_sk) +where + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') +group by + cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +order by + cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100 +-- end query 10 in stream 0 using template query10.tpl http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q19.sql -- diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q19.
spark git commit: [SPARK-22161][SQL] Add Impala-modified TPC-DS queries
Repository: spark Updated Branches: refs/heads/master ecbe416ab -> 9ed7394a6 [SPARK-22161][SQL] Add Impala-modified TPC-DS queries ## What changes were proposed in this pull request? Added IMPALA-modified TPCDS queries to TPC-DS query suites. - Ref: https://github.com/cloudera/impala-tpcds-kit/tree/master/queries ## How was this patch tested? N/A Author: gatorsmile Closes #19386 from gatorsmile/addImpalaQueries. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ed7394a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ed7394a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ed7394a Branch: refs/heads/master Commit: 9ed7394a68315126b2dd00e53a444cc65b5a62ea Parents: ecbe416 Author: gatorsmile Authored: Fri Sep 29 08:59:42 2017 -0700 Committer: gatorsmile Committed: Fri Sep 29 08:59:42 2017 -0700 -- .../resources/tpcds-modifiedQueries/q10.sql | 70 ++ .../resources/tpcds-modifiedQueries/q19.sql | 38 .../resources/tpcds-modifiedQueries/q27.sql | 43 .../test/resources/tpcds-modifiedQueries/q3.sql | 228 +++ .../resources/tpcds-modifiedQueries/q34.sql | 45 .../resources/tpcds-modifiedQueries/q42.sql | 28 +++ .../resources/tpcds-modifiedQueries/q43.sql | 36 +++ .../resources/tpcds-modifiedQueries/q46.sql | 80 +++ .../resources/tpcds-modifiedQueries/q52.sql | 27 +++ .../resources/tpcds-modifiedQueries/q53.sql | 37 +++ .../resources/tpcds-modifiedQueries/q55.sql | 24 ++ .../resources/tpcds-modifiedQueries/q59.sql | 83 +++ .../resources/tpcds-modifiedQueries/q63.sql | 29 +++ .../resources/tpcds-modifiedQueries/q65.sql | 58 + .../resources/tpcds-modifiedQueries/q68.sql | 62 + .../test/resources/tpcds-modifiedQueries/q7.sql | 31 +++ .../resources/tpcds-modifiedQueries/q73.sql | 49 .../resources/tpcds-modifiedQueries/q79.sql | 59 + .../resources/tpcds-modifiedQueries/q89.sql | 43 .../resources/tpcds-modifiedQueries/q98.sql | 32 +++ .../resources/tpcds-modifiedQueries/ss_max.sql | 14 ++ .../org/apache/spark/sql/TPCDSQuerySuite.scala | 26 ++- 22 files changed, 1141 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9ed7394a/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql -- diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql b/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql new file mode 100755 index 000..79dd3d5 --- /dev/null +++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql @@ -0,0 +1,70 @@ +-- start query 10 in stream 0 using template query10.tpl +with +v1 as ( + select + ws_bill_customer_sk as customer_sk + from web_sales, + date_dim + where ws_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy between 4 and 4+3 + union all + select +cs_ship_customer_sk as customer_sk + from catalog_sales, + date_dim + where cs_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy between 4 and 4+3 +), +v2 as ( + select +ss_customer_sk as customer_sk + from store_sales, + date_dim + where ss_sold_date_sk = d_date_sk + and d_year = 2002 + and d_moy between 4 and 4+3 +) +select + cd_gender, + cd_marital_status, + cd_education_status, + count(*) cnt1, + cd_purchase_estimate, + count(*) cnt2, + cd_credit_rating, + count(*) cnt3, + cd_dep_count, + count(*) cnt4, + cd_dep_employed_count, + count(*) cnt5, + cd_dep_college_count, + count(*) cnt6 +from customer c +join customer_address ca on (c.c_current_addr_sk = ca.ca_address_sk) +join customer_demographics on (cd_demo_sk = c.c_current_cdemo_sk) +left semi join v1 on (v1.customer_sk = c.c_customer_sk) +left semi join v2 on (v2.customer_sk = c.c_customer_sk) +where + ca_county in ('Walker County','Richland County','Gaines County','Douglas County','Dona Ana County') +group by + cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +order by + cd_gender, + cd_marital_status, + cd_education_status, + cd_purchase_estimate, + cd_credit_rating, + cd_dep_count, + cd_dep_employed_count, + cd_dep_college_count +limit 100 +-- end query 10 in stream 0 using template query10.tpl http://git-wip-us.apache.org/repos/asf/spark/blob/9ed7394a/sql/core/src/test/resources/tpcds-modifiedQueries/q19.sql -- diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q19.sql b/sql/core/src/test/resources/tpcds-modifiedQueries/q19.sql new file mode 100755 index 000..1799