spark git commit: [SPARK-22161][SQL] Add Impala-modified TPC-DS queries

2017-09-29 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/branch-2.2 8b2d8385c -> ac9a0f692


[SPARK-22161][SQL] Add Impala-modified TPC-DS queries

## What changes were proposed in this pull request?

Added IMPALA-modified TPCDS queries to TPC-DS query suites.

- Ref: https://github.com/cloudera/impala-tpcds-kit/tree/master/queries

## How was this patch tested?
N/A

Author: gatorsmile 

Closes #19386 from gatorsmile/addImpalaQueries.

(cherry picked from commit 9ed7394a68315126b2dd00e53a444cc65b5a62ea)
Signed-off-by: gatorsmile 


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ac9a0f69
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ac9a0f69
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ac9a0f69

Branch: refs/heads/branch-2.2
Commit: ac9a0f6923a72ec8f92fe88760cf50a67497b666
Parents: 8b2d838
Author: gatorsmile 
Authored: Fri Sep 29 08:59:42 2017 -0700
Committer: gatorsmile 
Committed: Fri Sep 29 09:00:15 2017 -0700

--
 .../resources/tpcds-modifiedQueries/q10.sql |  70 ++
 .../resources/tpcds-modifiedQueries/q19.sql |  38 
 .../resources/tpcds-modifiedQueries/q27.sql |  43 
 .../test/resources/tpcds-modifiedQueries/q3.sql | 228 +++
 .../resources/tpcds-modifiedQueries/q34.sql |  45 
 .../resources/tpcds-modifiedQueries/q42.sql |  28 +++
 .../resources/tpcds-modifiedQueries/q43.sql |  36 +++
 .../resources/tpcds-modifiedQueries/q46.sql |  80 +++
 .../resources/tpcds-modifiedQueries/q52.sql |  27 +++
 .../resources/tpcds-modifiedQueries/q53.sql |  37 +++
 .../resources/tpcds-modifiedQueries/q55.sql |  24 ++
 .../resources/tpcds-modifiedQueries/q59.sql |  83 +++
 .../resources/tpcds-modifiedQueries/q63.sql |  29 +++
 .../resources/tpcds-modifiedQueries/q65.sql |  58 +
 .../resources/tpcds-modifiedQueries/q68.sql |  62 +
 .../test/resources/tpcds-modifiedQueries/q7.sql |  31 +++
 .../resources/tpcds-modifiedQueries/q73.sql |  49 
 .../resources/tpcds-modifiedQueries/q79.sql |  59 +
 .../resources/tpcds-modifiedQueries/q89.sql |  43 
 .../resources/tpcds-modifiedQueries/q98.sql |  32 +++
 .../resources/tpcds-modifiedQueries/ss_max.sql  |  14 ++
 .../org/apache/spark/sql/TPCDSQuerySuite.scala  |  26 ++-
 22 files changed, 1141 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql
--
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql
new file mode 100755
index 000..79dd3d5
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql
@@ -0,0 +1,70 @@
+-- start query 10 in stream 0 using template query10.tpl
+with 
+v1 as (
+  select 
+ ws_bill_customer_sk as customer_sk
+  from web_sales,
+   date_dim
+  where ws_sold_date_sk = d_date_sk
+  and d_year = 2002
+  and d_moy between 4 and 4+3
+  union all
+  select 
+cs_ship_customer_sk as customer_sk
+  from catalog_sales,
+   date_dim 
+  where cs_sold_date_sk = d_date_sk
+  and d_year = 2002
+  and d_moy between 4 and 4+3
+),
+v2 as (
+  select 
+ss_customer_sk as customer_sk
+  from store_sales,
+   date_dim
+  where ss_sold_date_sk = d_date_sk
+  and d_year = 2002
+  and d_moy between 4 and 4+3 
+)
+select
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  count(*) cnt1,
+  cd_purchase_estimate,
+  count(*) cnt2,
+  cd_credit_rating,
+  count(*) cnt3,
+  cd_dep_count,
+  count(*) cnt4,
+  cd_dep_employed_count,
+  count(*) cnt5,
+  cd_dep_college_count,
+  count(*) cnt6
+from customer c
+join customer_address ca on (c.c_current_addr_sk = ca.ca_address_sk)
+join customer_demographics on (cd_demo_sk = c.c_current_cdemo_sk) 
+left semi join v1 on (v1.customer_sk = c.c_customer_sk) 
+left semi join v2 on (v2.customer_sk = c.c_customer_sk)
+where 
+  ca_county in ('Walker County','Richland County','Gaines County','Douglas 
County','Dona Ana County')
+group by 
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  cd_purchase_estimate,
+  cd_credit_rating,
+  cd_dep_count,
+  cd_dep_employed_count,
+  cd_dep_college_count
+order by 
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  cd_purchase_estimate,
+  cd_credit_rating,
+  cd_dep_count,
+  cd_dep_employed_count,
+  cd_dep_college_count
+limit 100
+-- end query 10 in stream 0 using template query10.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/ac9a0f69/sql/core/src/test/resources/tpcds-modifiedQueries/q19.sql
--
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q19.

spark git commit: [SPARK-22161][SQL] Add Impala-modified TPC-DS queries

2017-09-29 Thread lixiao
Repository: spark
Updated Branches:
  refs/heads/master ecbe416ab -> 9ed7394a6


[SPARK-22161][SQL] Add Impala-modified TPC-DS queries

## What changes were proposed in this pull request?

Added IMPALA-modified TPCDS queries to TPC-DS query suites.

- Ref: https://github.com/cloudera/impala-tpcds-kit/tree/master/queries

## How was this patch tested?
N/A

Author: gatorsmile 

Closes #19386 from gatorsmile/addImpalaQueries.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ed7394a
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ed7394a
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ed7394a

Branch: refs/heads/master
Commit: 9ed7394a68315126b2dd00e53a444cc65b5a62ea
Parents: ecbe416
Author: gatorsmile 
Authored: Fri Sep 29 08:59:42 2017 -0700
Committer: gatorsmile 
Committed: Fri Sep 29 08:59:42 2017 -0700

--
 .../resources/tpcds-modifiedQueries/q10.sql |  70 ++
 .../resources/tpcds-modifiedQueries/q19.sql |  38 
 .../resources/tpcds-modifiedQueries/q27.sql |  43 
 .../test/resources/tpcds-modifiedQueries/q3.sql | 228 +++
 .../resources/tpcds-modifiedQueries/q34.sql |  45 
 .../resources/tpcds-modifiedQueries/q42.sql |  28 +++
 .../resources/tpcds-modifiedQueries/q43.sql |  36 +++
 .../resources/tpcds-modifiedQueries/q46.sql |  80 +++
 .../resources/tpcds-modifiedQueries/q52.sql |  27 +++
 .../resources/tpcds-modifiedQueries/q53.sql |  37 +++
 .../resources/tpcds-modifiedQueries/q55.sql |  24 ++
 .../resources/tpcds-modifiedQueries/q59.sql |  83 +++
 .../resources/tpcds-modifiedQueries/q63.sql |  29 +++
 .../resources/tpcds-modifiedQueries/q65.sql |  58 +
 .../resources/tpcds-modifiedQueries/q68.sql |  62 +
 .../test/resources/tpcds-modifiedQueries/q7.sql |  31 +++
 .../resources/tpcds-modifiedQueries/q73.sql |  49 
 .../resources/tpcds-modifiedQueries/q79.sql |  59 +
 .../resources/tpcds-modifiedQueries/q89.sql |  43 
 .../resources/tpcds-modifiedQueries/q98.sql |  32 +++
 .../resources/tpcds-modifiedQueries/ss_max.sql  |  14 ++
 .../org/apache/spark/sql/TPCDSQuerySuite.scala  |  26 ++-
 22 files changed, 1141 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/9ed7394a/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql
--
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql
new file mode 100755
index 000..79dd3d5
--- /dev/null
+++ b/sql/core/src/test/resources/tpcds-modifiedQueries/q10.sql
@@ -0,0 +1,70 @@
+-- start query 10 in stream 0 using template query10.tpl
+with 
+v1 as (
+  select 
+ ws_bill_customer_sk as customer_sk
+  from web_sales,
+   date_dim
+  where ws_sold_date_sk = d_date_sk
+  and d_year = 2002
+  and d_moy between 4 and 4+3
+  union all
+  select 
+cs_ship_customer_sk as customer_sk
+  from catalog_sales,
+   date_dim 
+  where cs_sold_date_sk = d_date_sk
+  and d_year = 2002
+  and d_moy between 4 and 4+3
+),
+v2 as (
+  select 
+ss_customer_sk as customer_sk
+  from store_sales,
+   date_dim
+  where ss_sold_date_sk = d_date_sk
+  and d_year = 2002
+  and d_moy between 4 and 4+3 
+)
+select
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  count(*) cnt1,
+  cd_purchase_estimate,
+  count(*) cnt2,
+  cd_credit_rating,
+  count(*) cnt3,
+  cd_dep_count,
+  count(*) cnt4,
+  cd_dep_employed_count,
+  count(*) cnt5,
+  cd_dep_college_count,
+  count(*) cnt6
+from customer c
+join customer_address ca on (c.c_current_addr_sk = ca.ca_address_sk)
+join customer_demographics on (cd_demo_sk = c.c_current_cdemo_sk) 
+left semi join v1 on (v1.customer_sk = c.c_customer_sk) 
+left semi join v2 on (v2.customer_sk = c.c_customer_sk)
+where 
+  ca_county in ('Walker County','Richland County','Gaines County','Douglas 
County','Dona Ana County')
+group by 
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  cd_purchase_estimate,
+  cd_credit_rating,
+  cd_dep_count,
+  cd_dep_employed_count,
+  cd_dep_college_count
+order by 
+  cd_gender,
+  cd_marital_status,
+  cd_education_status,
+  cd_purchase_estimate,
+  cd_credit_rating,
+  cd_dep_count,
+  cd_dep_employed_count,
+  cd_dep_college_count
+limit 100
+-- end query 10 in stream 0 using template query10.tpl

http://git-wip-us.apache.org/repos/asf/spark/blob/9ed7394a/sql/core/src/test/resources/tpcds-modifiedQueries/q19.sql
--
diff --git a/sql/core/src/test/resources/tpcds-modifiedQueries/q19.sql 
b/sql/core/src/test/resources/tpcds-modifiedQueries/q19.sql
new file mode 100755
index 000..1799