ngsg commented on code in PR #5707: URL: https://github.com/apache/hive/pull/5707#discussion_r2006799035
########## ql/src/test/queries/clientpositive/bucketmapjoin_auto_reduce_parallel.q: ########## @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS source_table2; +DROP TABLE IF EXISTS target_table2; + +create table source_table2(date_col date, string_col string, decimal_col decimal(38,0)) clustered by (decimal_col) into 7 buckets; +insert into table source_table2 values +('2022-08-30', 'pipeline', '50000000000000000005905545593'), ('2022-08-16', 'pipeline', '50000000000000000005905545593'), ('2022-09-01', 'pipeline', '50000000000000000006008686831'), ('2022-08-30', 'pipeline', '50000000000000000005992620837'), ('2022-09-01', 'pipeline', '50000000000000000005992620837'), ('2022-09-01', 'pipeline', '50000000000000000005992621067'), +('2022-08-30', 'pipeline', '50000000000000000005992621067'); + +create table target_table2(date_col date, string_col string, decimal_col decimal(38,0)) clustered by (decimal_col) into 7 buckets; +insert into table target_table2 values +('2017-05-17', 'pipeline', '50000000000000000000441610525'), ('2018-12-20', 'pipeline', '50000000000000000001048981030'), ('2020-06-30', 'pipeline', '50000000000000000002332575516'), ('2021-08-16', 'pipeline', '50000000000000000003897973989'), ('2017-06-06', 'pipeline', '50000000000000000000449148729'), ('2017-09-08', 'pipeline', '50000000000000000000525378314'), +('2022-08-30', 'pipeline', '50000000000000000005905545593'), ('2022-08-16', 'pipeline', '50000000000000000005905545593'), ('2018-05-03', 'pipeline', '50000000000000000000750826355'), ('2020-01-10', 'pipeline', '50000000000000000001816579677'), ('2021-11-01', 'pipeline', '50000000000000000004269423714'), ('2017-11-07', 'pipeline', '50000000000000000000585901787'), +('2019-10-15', 'pipeline', '50000000000000000001598843430'), ('2020-04-01', 'pipeline', '50000000000000000002035795461'), ('2020-02-24', 'pipeline', '50000000000000000001932600185'), ('2020-04-27', 'pipeline', '50000000000000000002108160849'), ('2016-07-05', 'pipeline', '50000000000000000000054405114'), ('2020-06-02', 'pipeline', '50000000000000000002234387967'), +('2020-08-21', 'pipeline', '50000000000000000002529168758'), ('2021-02-17', 'pipeline', '50000000000000000003158511687'); + +set hive.auto.convert.join=true; +set hive.optimize.dynamic.partition.hashjoin=false; +set hive.convert.join.bucket.mapjoin.tez=true; +set hive.vectorized.execution.enabled=false; + +set hive.optimize.bucketmapjoin=true; +set hive.tez.dynamic.partition.pruning=true; -- Required for dynamic parallelism Review Comment: NIT: Could we remove this line as `hive.tez.dynamic.partition.pruning` is set to true by default? (Also, as far as I know, DPP makes sense when we join a table with a partitioned table.) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org