http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q8.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q8.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q8.test new file mode 100644 index 0000000..be99b2f --- /dev/null +++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q8.test @@ -0,0 +1,71 @@ +==== +---- QUERY: TPCDS-Q8 +select + s_store_name, + round(sum(ss_net_profit), 2) +from + store_sales + join store on (store_sales.ss_store_sk = store.s_store_sk) + join + (select + a.ca_zip + from + (select + substr(ca_zip, 1, 5) ca_zip, + count( *) cnt + from + customer_address + join customer on (customer_address.ca_address_sk = customer.c_current_addr_sk) + where + c_preferred_cust_flag = 'Y' + group by + ca_zip + having + count(*) > 10 + ) a + left semi join + (select + substr(ca_zip, 1, 5) ca_zip + from + customer_address + where + substr(ca_zip, 1, 5) in ('89436', '30868', '65085', '22977', '83927', '77557', '58429', '40697', '80614', '10502', '32779', + '91137', '61265', '98294', '17921', '18427', '21203', '59362', '87291', '84093', '21505', '17184', '10866', '67898', '25797', + '28055', '18377', '80332', '74535', '21757', '29742', '90885', '29898', '17819', '40811', '25990', '47513', '89531', '91068', + '10391', '18846', '99223', '82637', '41368', '83658', '86199', '81625', '26696', '89338', '88425', '32200', '81427', '19053', + '77471', '36610', '99823', '43276', '41249', '48584', '83550', '82276', '18842', '78890', '14090', '38123', '40936', '34425', + '19850', '43286', '80072', '79188', '54191', '11395', '50497', '84861', '90733', '21068', '57666', '37119', '25004', '57835', + '70067', '62878', '95806', '19303', '18840', '19124', '29785', '16737', '16022', '49613', '89977', '68310', '60069', '98360', + '48649', '39050', '41793', '25002', '27413', '39736', '47208', '16515', '94808', '57648', '15009', '80015', '42961', '63982', + '21744', '71853', '81087', '67468', '34175', '64008', '20261', '11201', '51799', '48043', '45645', '61163', '48375', '36447', + '57042', '21218', '41100', '89951', '22745', '35851', '83326', '61125', '78298', '80752', '49858', '52940', '96976', '63792', + '11376', '53582', '18717', '90226', '50530', '94203', '99447', '27670', '96577', '57856', '56372', '16165', '23427', '54561', + '28806', '44439', '22926', '30123', '61451', '92397', '56979', '92309', '70873', '13355', '21801', '46346', '37562', '56458', + '28286', '47306', '99555', '69399', '26234', '47546', '49661', '88601', '35943', '39936', '25632', '24611', '44166', '56648', + '30379', '59785', '11110', '14329', '93815', '52226', '71381', '13842', '25612', '63294', '14664', '21077', '82626', '18799', + '60915', '81020', '56447', '76619', '11433', '13414', '42548', '92713', '70467', '30884', '47484', '16072', '38936', '13036', + '88376', '45539', '35901', '19506', '65690', '73957', '71850', '49231', '14276', '20005', '18384', '76615', '11635', '38177', + '55607', '41369', '95447', '58581', '58149', '91946', '33790', '76232', '75692', '95464', '22246', '51061', '56692', '53121', + '77209', '15482', '10688', '14868', '45907', '73520', '72666', '25734', '17959', '24677', '66446', '94627', '53535', '15560', + '41967', '69297', '11929', '59403', '33283', '52232', '57350', '43933', '40921', '36635', '10827', '71286', '19736', '80619', + '25251', '95042', '15526', '36496', '55854', '49124', '81980', '35375', '49157', '63512', '28944', '14946', '36503', '54010', + '18767', '23969', '43905', '66979', '33113', '21286', '58471', '59080', '13395', '79144', '70373', '67031', '38360', '26705', + '50906', '52406', '26066', '73146', '15884', '31897', '30045', '61068', '45550', '92454', '13376', '14354', '19770', '22928', + '97790', '50723', '46081', '30202', '14410', '20223', '88500', '67298', '13261', '14172', '81410', '93578', '83583', '46047', + '94167', '82564', '21156', '15799', '86709', '37931', '74703', '83103', '23054', '70470', '72008', '49247', '91911', '69998', + '20961', '70070', '63197', '54853', '88191', '91830', '49521', '19454', '81450', '89091', '62378', '25683', '61869', '51744', + '36580', '85778', '36871', '48121', '28810', '83712', '45486', '67393', '26935', '42393', '20132', '55349', '86057', '21309', + '80218', '10094', '11357', '48819', '39734', '40758', '30432', '21204', '29467', '30214', '61024', '55307', '74621', '11622', + '68908', '33032', '52868', '99194', '99900', '84936', '69036', '99149', '45013', '32895', '59004', '32322', '14933', '32936', + '33562', '72550', '27385', '58049', '58200', '16808', '21360', '32961', '18586', '79307', '15492') + ) b + on (a.ca_zip = b.ca_zip) + ) v1 on (substr(store.s_zip, 1, 2) = substr(v1.ca_zip, 1, 2)) +where + ss_sold_date_sk between 2452276 and 2452366 +group by + s_store_name +order by + s_store_name +limit 100; +====
http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q88.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q88.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q88.test new file mode 100644 index 0000000..bb2beb8 --- /dev/null +++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q88.test @@ -0,0 +1,93 @@ +==== +---- QUERY: TPCDS-Q88 +select * +from + (select count(*) h8_30_to_9 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 8 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2)) + and store.s_store_name = 'ese') s1, + (select count(*) h9_to_9_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2)) + and store.s_store_name = 'ese') s2, + (select count(*) h9_30_to_10 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 9 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2)) + and store.s_store_name = 'ese') s3, + (select count(*) h10_to_10_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2)) + and store.s_store_name = 'ese') s4, + (select count(*) h10_30_to_11 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 10 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2)) + and store.s_store_name = 'ese') s5, + (select count(*) h11_to_11_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2)) + and store.s_store_name = 'ese') s6, + (select count(*) h11_30_to_12 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 11 + and time_dim.t_minute >= 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2)) + and store.s_store_name = 'ese') s7, + (select count(*) h12_to_12_30 + from store_sales, household_demographics , time_dim, store + where ss_sold_time_sk = time_dim.t_time_sk + and ss_hdemo_sk = household_demographics.hd_demo_sk + and ss_store_sk = s_store_sk + and time_dim.t_hour = 12 + and time_dim.t_minute < 30 + and ((household_demographics.hd_dep_count = 1 and household_demographics.hd_vehicle_count<=1+2) or + (household_demographics.hd_dep_count = 0 and household_demographics.hd_vehicle_count<=0+2) or + (household_demographics.hd_dep_count = 2 and household_demographics.hd_vehicle_count<=2+2)) + and store.s_store_name = 'ese') s8; +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q89.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q89.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q89.test new file mode 100644 index 0000000..6677b07 --- /dev/null +++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q89.test @@ -0,0 +1,33 @@ +==== +---- QUERY: TPCDS-Q89 +/* Modifications: Added Partition Key filter because Impala does not do dynamic partition + pruning.*/ +select * from (select i_category, i_class, i_brand, s_store_name, s_company_name + d_moy, round(sum_sales, 2) sum_sales, + round(avg_monthly_sales, 2) avg_monthly_sales +from ( +select i_category, i_class, i_brand, + s_store_name, s_company_name, + d_moy, + sum(ss_sales_price) sum_sales, + avg(sum(ss_sales_price)) over + (partition by i_category, i_brand, s_store_name, s_company_name) + avg_monthly_sales +from item, store_sales, date_dim, store +where ss_item_sk = i_item_sk and + ss_sold_date_sk = d_date_sk and + ss_store_sk = s_store_sk and + d_year in (2000) and + ((i_category in ('Children','Music','Home') and + i_class in ('toddlers','pop','lighting') + ) + or (i_category in ('Jewelry','Books','Sports') and + i_class in ('costume','travel','football') + )) + and ss_sold_date_sk between 2451545 and 2451910 -- partition key filter +group by i_category, i_class, i_brand, + s_store_name, s_company_name, d_moy) tmp1 +where case when (avg_monthly_sales <> 0) then (abs(sum_sales - avg_monthly_sales) / avg_monthly_sales) else null end > 0.1 +order by sum_sales - avg_monthly_sales, s_store_name +limit 100) tmp2; +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q96.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q96.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q96.test new file mode 100644 index 0000000..1687bc4 --- /dev/null +++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q96.test @@ -0,0 +1,17 @@ +==== +---- QUERY: TPCDS-Q96 +SELECT + COUNT(*) AS total +FROM store_sales ss +JOIN time_dim td + ON (ss.ss_sold_time_sk = td.t_time_sk) +JOIN household_demographics hd + ON (ss.ss_hdemo_sk = hd.hd_demo_sk) +JOIN store s + ON (ss.ss_store_sk = s.s_store_sk) +WHERE + td.t_hour = 8 + AND td.t_minute >= 30 + AND hd.hd_dep_count = 5 + AND s.s_store_name = 'ese'; +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/testdata/workloads/tpcds/queries/tpcds-kudu-q98.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/tpcds/queries/tpcds-kudu-q98.test b/testdata/workloads/tpcds/queries/tpcds-kudu-q98.test new file mode 100644 index 0000000..fcd2081 --- /dev/null +++ b/testdata/workloads/tpcds/queries/tpcds-kudu-q98.test @@ -0,0 +1,34 @@ +==== +---- QUERY: TPCDS-Q98 +select + i_item_desc, + i_category, + i_class, + i_current_price, + round(sum(ss_ext_sales_price), 2) as itemrevenue, + round(sum(ss_ext_sales_price)*100/ sum(sum(ss_ext_sales_price)) + over (partition by i_class), 2) as revenueratio +from + store_sales, + item, + date_dim +where + ss_item_sk = i_item_sk + and i_category in ('Jewelry', 'Sports', 'Books') + and ss_sold_date_sk = d_date_sk + and ss_sold_date_sk between 2451911 and 2451941 + and d_date between '2001-01-01' and '2001-01-31' -- original uses interval and the +group by + i_item_id, + i_item_desc, + i_category, + i_class, + i_current_price +order by + i_category, + i_class, + i_item_id, + i_item_desc, + revenueratio +limit 1000; +==== http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/tests/comparison/db_connection.py ---------------------------------------------------------------------- diff --git a/tests/comparison/db_connection.py b/tests/comparison/db_connection.py index 13125c2..9076da8 100644 --- a/tests/comparison/db_connection.py +++ b/tests/comparison/db_connection.py @@ -225,6 +225,11 @@ class DbCursor(object): db_name = db_name.lower() self.execute('CREATE DATABASE ' + db_name) + def create_db_if_not_exists(self, db_name): + LOG.info("Creating database %s", db_name) + db_name = db_name.lower() + self.execute('CREATE DATABASE IF NOT EXISTS ' + db_name) + def drop_db_if_exists(self, db_name): '''This should not be called from a conn to the database being dropped.''' db_name = db_name.lower() http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/8a49ceaa/tests/stress/concurrent_select.py ---------------------------------------------------------------------- diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py index b76d713..fb24f65 100755 --- a/tests/stress/concurrent_select.py +++ b/tests/stress/concurrent_select.py @@ -897,13 +897,16 @@ class QueryRunner(object): return hash_thread.result -def load_tpc_queries(workload): - """Returns a list of TPC queries. 'workload' should either be 'tpch' or 'tpcds'.""" +def load_tpc_queries(workload, load_in_kudu=False): + """Returns a list of TPC queries. 'workload' should either be 'tpch' or 'tpcds'. + If 'load_in_kudu' is True, it loads only queries specified for the Kudu storage + engine.""" LOG.info("Loading %s queries", workload) queries = list() query_dir = os.path.join(os.path.dirname(__file__), "..", "..", "testdata", "workloads", workload, "queries") - file_name_pattern = re.compile(r"-(q\d+).test$") + engine = 'kudu-' if load_in_kudu else '' + file_name_pattern = re.compile(r"%s-%s(q\d+).test$" % (workload, engine)) for query_file in os.listdir(query_dir): match = file_name_pattern.search(query_file) if not match: @@ -1339,6 +1342,10 @@ def main(): parser.add_argument("--tpch-db", help="If provided, TPC-H queries will be used.") parser.add_argument("--tpch-nested-db", help="If provided, nested TPC-H queries will be used.") + parser.add_argument("--tpch-kudu-db", + help="If provided, TPC-H queries for Kudu will be used.") + parser.add_argument("--tpcds-kudu-db", + help="If provided, TPC-DS queries for Kudu will be used.") parser.add_argument("--random-db", help="If provided, random queries will be used.") parser.add_argument("--random-query-count", type=int, default=50, @@ -1375,9 +1382,10 @@ def main(): LOG.debug("CLI args: %s" % (args, )) if not args.tpcds_db and not args.tpch_db and not args.random_db \ - and not args.tpch_nested_db and not args.query_file_path: - raise Exception("At least one of --tpcds-db, --tpch-db," - "--tpch-nested-db, --random-db, --query-file-path is required") + and not args.tpch_nested_db and not args.tpch_kudu_db \ + and not args.tpcds_kudu_db and not args.query_file_path: + raise Exception("At least one of --tpcds-db, --tpch-db, --tpch-kudu-db," + "--tpcds-kudu-db, --tpch-nested-db, --random-db, --query-file-path is required") # The stress test sets these, so callers cannot override them. IGNORE_QUERY_OPTIONS = frozenset([ @@ -1451,6 +1459,16 @@ def main(): for query in tpch_nested_queries: query.db_name = args.tpch_nested_db queries.extend(tpch_nested_queries) + if args.tpch_kudu_db: + tpch_kudu_queries = load_tpc_queries("tpch", load_in_kudu=True) + for query in tpch_kudu_queries: + query.db_name = args.tpch_kudu_db + queries.extend(tpch_kudu_queries) + if args.tpcds_kudu_db: + tpcds_kudu_queries = load_tpc_queries("tpcds", load_in_kudu=True) + for query in tpcds_kudu_queries: + query.db_name = args.tpcds_kudu_db + queries.extend(tpcds_kudu_queries) for idx in xrange(len(queries) - 1, -1, -1): query = queries[idx] if query.sql in queries_with_runtime_info_by_db_and_sql[query.db_name]:
