[ https://issues.apache.org/jira/browse/TRAFODION-2655?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
David Wayne Birdsall resolved TRAFODION-2655. --------------------------------------------- Resolution: Fixed Fix Version/s: 2.2-incubating > MDAM plans on prefixes sometimes not chosen when they should be > --------------------------------------------------------------- > > Key: TRAFODION-2655 > URL: https://issues.apache.org/jira/browse/TRAFODION-2655 > Project: Apache Trafodion > Issue Type: Bug > Components: sql-cmp > Affects Versions: 2.2-incubating > Environment: Large clusters > Reporter: David Wayne Birdsall > Assignee: David Wayne Birdsall > Fix For: 2.2-incubating > > > Below are two scripts that reproduce the problems. > In the first problem, a query using parameters does not get an MDAM plan, > even though it would be beneficial to do so. If the parameters on column C2 > are replaced with literals that are close enough together, we do get the > desired MDAM plan. > In the second problem, using a larger version of the same table, a query with > equality predicates on the first and third key columns ("first" and "third" > ignoring the SALT column) does not get an MDAM plan on the first key column > but chooses a full table scan instead. > These problems were noticed on a cluster; it's not practical to create tables > of the necessary size on a workstation. The scripts below reproduce the > problem on a workstation by faking the Optimizer into thinking it is dealing > with much larger tables. > First script: > ?section setup > drop table if exists MDAM_Q1_TEST; > create table MDAM_Q1_TEST > ( c1 integer not null, -- will have uec 2 > c2 integer not null, -- will have uec 1,000,000 > c3 integer not null, -- will have uec 766 > c4 integer not null, -- will have uec 46 > c5 integer not null, -- will have uec don't care (but 1,000,000) > primary key ( c1, c2, c3, c4 ) ) > salt using 4 partitions; > upsert using load into MDAM_Q1_TEST > select mod(c0+c1+c2+c3+c4,2), > c0 + 10*c1 + 100*c2 + 1000*c3 + 10000*c4 + 100000*c5, > c3 + 10*c1 + 74*c5, > c4 + 4*c5, > c5 + 10*c4 + 100*c3 + 1000*c2 + 10000*c1 + 100000*c0 > from (values(1)) t > transpose 0,1,2,3,4,5,6,7,8,9 as c0 > transpose 0,1,2,3,4,5,6,7,8,9 as c1 > transpose 0,1,2,3,4,5,6,7,8,9 as c2 > transpose 0,1,2,3,4,5,6,7,8,9 as c3 > transpose 0,1,2,3,4,5,6,7,8,9 as c4 > transpose 0,1,2,3,4,5,6,7,8,9 as c5; > update statistics for table MDAM_Q1_TEST on every column; > -- next fake out the statistics so the optimizer will think there > -- are 265 million rows in the table > update sb_histograms set rowcount = 265 * rowcount > where table_uid = (select object_uid from "_MD_".objects where object_name = > 'MDAM_Q1_TEST'); > update sb_histogram_intervals set interval_rowcount = 265 * interval_rowcount > where table_uid = (select object_uid from "_MD_".objects where object_name = > 'MDAM_Q1_TEST'); > showstats for table MDAM_Q1_TEST on existing columns; > ?section query1 > prepare xx from > select * from MDAM_Q1_TEST > where c1 = ? and c2 > ? and c2 <= ? and c3 = ? and c4 = ?; > explain xx; -- the plan gets a full scan instead of MDAM > Second script > ?section setup > drop table if exists MDAM_Q2_TEST; > create table MDAM_Q2_TEST > ( c1 integer not null, -- will have uec 100 > c2 integer not null, -- will have uec 2,000,000 > c3 integer not null, -- will have uec 9,100 > c4 integer not null, -- will have uec 11,800 > c5 integer not null, -- will have uec don't care > primary key ( c1, c2, c3, c4 ) ) > salt using 4 partitions; > upsert using load into MDAM_Q2_TEST > select c3 + 10*c4, > c0 + 10*c1 + 100*c2 + 1000*c3 + 10000*c4 + 100000*c5 + 1000000*c6, > c3 + 10*c1 + 100*c5 + 900*c4, > c4 + 10*c5 + 100*c2 + 600*c1 + 600*c3, > c5 + 10*c4 + 100*c3 + 1000*c2 + 10000*c1 + 100000*c0 + 7*c6 > from (values(1)) t > transpose 0,1,2,3,4,5,6,7,8,9 as c0 > transpose 0,1,2,3,4,5,6,7,8,9 as c1 > transpose 0,1,2,3,4,5,6,7,8,9 as c2 > transpose 0,1,2,3,4,5,6,7,8,9 as c3 > transpose 0,1,2,3,4,5,6,7,8,9 as c4 > transpose 0,1,2,3,4,5,6,7,8,9 as c5 > transpose 0,1 as c6; > ?section setupstats > update statistics for table MDAM_Q2_TEST on every column; > -- next fake out the statistics so the optimizer will think there > -- are 11 billion rows in the table > update sb_histograms set rowcount = 5500 * rowcount > where table_uid = (select object_uid from "_MD_".objects where object_name = > 'MDAM_Q2_TEST'); > update sb_histogram_intervals set interval_rowcount = 5500 * interval_rowcount > where table_uid = (select object_uid from "_MD_".objects where object_name = > 'MDAM_Q2_TEST'); > -- finally fake out the UEC on C2 so optimizer will think > -- the UEC is 5 billion instead of 2 million > update sb_histograms set total_uec = 2500 * total_uec > where table_uid = (select object_uid from "_MD_".objects where object_name = > 'MDAM_Q2_TEST') > and column_number = 1 and colcount = 1; > update sb_histogram_intervals i set interval_uec = 2500 * interval_uec > where i.table_uid = (select object_uid from "_MD_".objects where object_name > = 'MDAM_Q2_TEST') > and i.histogram_id = (select histogram_id from sb_histograms h where > h.table_uid = i.table_uid and h.column_number = 1 and h.colcount = 1); > -- update the UECs in the multi-column histograms that include C2 > update sb_histograms set total_uec = 5000000000 where total_uec = 2000000 > and table_uid = (select object_uid from "_MD_".objects where object_name = > 'MDAM_Q2_TEST'); > update sb_histogram_intervals set interval_uec = 5000000000 where > interval_uec = 2000000 > and table_uid = (select object_uid from "_MD_".objects where object_name = > 'MDAM_Q2_TEST'); > showstats for table MDAM_Q2_TEST on existing columns; > ?section query2 > prepare xx from > select * from MDAM_Q2_TEST > where c1 = 23 and c3 = 497; > explain xx; -- the plan does not get MDAM; but MDAM on C1 would be a good > plan > ?section query2x > prepare yy from > select * from MDAM_Q2_TEST > where c1 = 23; > explain yy; -- here we do get an MDAM plan on C1 > > -- This message was sent by Atlassian JIRA (v6.4.14#64029)