[jira] [Updated] (TRAFODION-2655) MDAM plans on prefixes sometimes not chosen when they should be

David Wayne Birdsall (JIRA) Tue, 20 Jun 2017 11:59:25 -0700

     [ 
https://issues.apache.org/jira/browse/TRAFODION-2655?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]


David Wayne Birdsall updated TRAFODION-2655:
--------------------------------------------
    Description: 
Below are two scripts that reproduce the problems.

In the first problem, a query using parameters does not get an MDAM plan, even 
though it would be beneficial to do so. If the parameters on column C2 are 
replaced with literals that are close enough together, we do get the desired 
MDAM plan.

In the second problem, using a larger version of the same table, a query with 
equality predicates on the first and third key columns ("first" and "third" 
ignoring the SALT column) does not get an MDAM plan on the first key column but 
chooses a full table scan instead.

These problems were noticed on a cluster; it's not practical to create tables 
of the necessary size on a workstation. The scripts below reproduce the problem 
on a workstation by faking the Optimizer into thinking it is dealing with much 
larger tables.

First script:

?section setup

drop table if exists MDAM_Q1_TEST;

create table MDAM_Q1_TEST
( c1 integer not null,  -- will have uec 2
  c2 integer not null,  -- will have uec 1,000,000
  c3 integer not null,  -- will have uec 766
  c4 integer not null,  -- will have uec 46
  c5 integer not null,  -- will have uec don't care (but 1,000,000)
  primary key ( c1, c2, c3, c4 ) )
salt using 4 partitions;

upsert using load into MDAM_Q1_TEST
select mod(c0+c1+c2+c3+c4,2),
       c0 + 10*c1 + 100*c2 + 1000*c3 + 10000*c4 + 100000*c5,
       c3 + 10*c1 + 74*c5,
       c4 + 4*c5,
       c5 + 10*c4 + 100*c3 + 1000*c2 + 10000*c1 + 100000*c0       
from (values(1)) t
transpose 0,1,2,3,4,5,6,7,8,9 as c0
transpose 0,1,2,3,4,5,6,7,8,9 as c1
transpose 0,1,2,3,4,5,6,7,8,9 as c2
transpose 0,1,2,3,4,5,6,7,8,9 as c3
transpose 0,1,2,3,4,5,6,7,8,9 as c4
transpose 0,1,2,3,4,5,6,7,8,9 as c5;

update statistics for table MDAM_Q1_TEST on every column;

-- next fake out the statistics so the optimizer will think there
-- are 265 million rows in the table

update sb_histograms set rowcount = 265 * rowcount
 where table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q1_TEST');

update sb_histogram_intervals set interval_rowcount = 265 * interval_rowcount
 where table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q1_TEST');

showstats for table MDAM_Q1_TEST on existing columns;

?section query1

prepare xx from
select * from MDAM_Q1_TEST
where c1 = ? and c2 > ? and c2 <= ? and c3 = ? and c4 = ?;

explain xx;  -- the plan gets a full scan instead of MDAM

Second script

?section setup

drop table if exists MDAM_Q2_TEST;

create table MDAM_Q2_TEST
( c1 integer not null,  -- will have uec 100
  c2 integer not null,  -- will have uec 2,000,000
  c3 integer not null,  -- will have uec 9,100
  c4 integer not null,  -- will have uec 11,800
  c5 integer not null,  -- will have uec don't care 
  primary key ( c1, c2, c3, c4 ) )
salt using 4 partitions;

upsert using load into MDAM_Q2_TEST
select c3 + 10*c4,
       c0 + 10*c1 + 100*c2 + 1000*c3 + 10000*c4 + 100000*c5 + 1000000*c6,
       c3 + 10*c1 + 100*c5 + 900*c4,
       c4 + 10*c5 + 100*c2 + 600*c1 + 600*c3,
       c5 + 10*c4 + 100*c3 + 1000*c2 + 10000*c1 + 100000*c0 + 7*c6       
from (values(1)) t
transpose 0,1,2,3,4,5,6,7,8,9 as c0
transpose 0,1,2,3,4,5,6,7,8,9 as c1
transpose 0,1,2,3,4,5,6,7,8,9 as c2
transpose 0,1,2,3,4,5,6,7,8,9 as c3
transpose 0,1,2,3,4,5,6,7,8,9 as c4
transpose 0,1,2,3,4,5,6,7,8,9 as c5
transpose 0,1 as c6;

?section setupstats

update statistics for table MDAM_Q2_TEST on every column;

-- next fake out the statistics so the optimizer will think there
-- are 11 billion rows in the table

update sb_histograms set rowcount = 5500 * rowcount
 where table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q2_TEST');

update sb_histogram_intervals set interval_rowcount = 5500 * interval_rowcount
 where table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q2_TEST');

-- finally fake out the UEC on C2 so optimizer will think
-- the UEC is 5 billion instead of 2 million

update sb_histograms set total_uec = 2500 * total_uec
 where table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q2_TEST')
   and column_number = 1 and colcount = 1;

update sb_histogram_intervals i set interval_uec = 2500 * interval_uec
 where i.table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q2_TEST')
   and i.histogram_id = (select histogram_id from sb_histograms h where 
h.table_uid = i.table_uid and h.column_number = 1 and h.colcount = 1);

-- update the UECs in the multi-column histograms that include C2

update sb_histograms set total_uec = 5000000000 where total_uec = 2000000
   and table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q2_TEST');

update sb_histogram_intervals set interval_uec = 5000000000 where interval_uec 
= 2000000
   and table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q2_TEST');



showstats for table MDAM_Q2_TEST on existing columns;

?section query2

prepare xx from
select * from MDAM_Q2_TEST
where c1 = 23 and c3 = 497;

explain xx;  -- the plan does not get MDAM; but MDAM on C1 would be a good plan

?section query2x

prepare yy from
select * from MDAM_Q2_TEST
where c1 = 23;

explain yy;  -- here we do get an MDAM plan on C1


 


 



  was:
Below are two scripts that reproduce the problems.

In the first problem, a query using parameters does not get an MDAM plan, even 
though it would be beneficial to do so. If the parameters on column C2 are 
replaced with literals that are close enough together, we do get the desired 
MDAM plan.

In the second problem, using a larger version of the same table, a query with 
equality predicates on the first and third key columns ("first" and "third" 
ignoring the SALT column) does not get an MDAM plan on the first key column but 
chooses a full table scan instead.

These problems were noticed on a cluster; it's not practical to create tables 
of the necessary size on a workstation. The scripts below reproduce the problem 
by faking the Optimizer into thinking it is dealing with much larger tables.

First script:

?section setup

drop table if exists MDAM_Q1_TEST;

create table MDAM_Q1_TEST
( c1 integer not null,  -- will have uec 2
  c2 integer not null,  -- will have uec 1,000,000
  c3 integer not null,  -- will have uec 766
  c4 integer not null,  -- will have uec 46
  c5 integer not null,  -- will have uec don't care (but 1,000,000)
  primary key ( c1, c2, c3, c4 ) )
salt using 4 partitions;

upsert using load into MDAM_Q1_TEST
select mod(c0+c1+c2+c3+c4,2),
       c0 + 10*c1 + 100*c2 + 1000*c3 + 10000*c4 + 100000*c5,
       c3 + 10*c1 + 74*c5,
       c4 + 4*c5,
       c5 + 10*c4 + 100*c3 + 1000*c2 + 10000*c1 + 100000*c0       
from (values(1)) t
transpose 0,1,2,3,4,5,6,7,8,9 as c0
transpose 0,1,2,3,4,5,6,7,8,9 as c1
transpose 0,1,2,3,4,5,6,7,8,9 as c2
transpose 0,1,2,3,4,5,6,7,8,9 as c3
transpose 0,1,2,3,4,5,6,7,8,9 as c4
transpose 0,1,2,3,4,5,6,7,8,9 as c5;

update statistics for table MDAM_Q1_TEST on every column;

-- next fake out the statistics so the optimizer will think there
-- are 265 million rows in the table

update sb_histograms set rowcount = 265 * rowcount
 where table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q1_TEST');

update sb_histogram_intervals set interval_rowcount = 265 * interval_rowcount
 where table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q1_TEST');

showstats for table MDAM_Q1_TEST on existing columns;

?section query1

prepare xx from
select * from MDAM_Q1_TEST
where c1 = ? and c2 > ? and c2 <= ? and c3 = ? and c4 = ?;

explain xx;  -- the plan gets a full scan instead of MDAM

Second script

?section setup

drop table if exists MDAM_Q2_TEST;

create table MDAM_Q2_TEST
( c1 integer not null,  -- will have uec 100
  c2 integer not null,  -- will have uec 2,000,000
  c3 integer not null,  -- will have uec 9,100
  c4 integer not null,  -- will have uec 11,800
  c5 integer not null,  -- will have uec don't care 
  primary key ( c1, c2, c3, c4 ) )
salt using 4 partitions;

upsert using load into MDAM_Q2_TEST
select c3 + 10*c4,
       c0 + 10*c1 + 100*c2 + 1000*c3 + 10000*c4 + 100000*c5 + 1000000*c6,
       c3 + 10*c1 + 100*c5 + 900*c4,
       c4 + 10*c5 + 100*c2 + 600*c1 + 600*c3,
       c5 + 10*c4 + 100*c3 + 1000*c2 + 10000*c1 + 100000*c0 + 7*c6       
from (values(1)) t
transpose 0,1,2,3,4,5,6,7,8,9 as c0
transpose 0,1,2,3,4,5,6,7,8,9 as c1
transpose 0,1,2,3,4,5,6,7,8,9 as c2
transpose 0,1,2,3,4,5,6,7,8,9 as c3
transpose 0,1,2,3,4,5,6,7,8,9 as c4
transpose 0,1,2,3,4,5,6,7,8,9 as c5
transpose 0,1 as c6;

?section setupstats

update statistics for table MDAM_Q2_TEST on every column;

-- next fake out the statistics so the optimizer will think there
-- are 11 billion rows in the table

update sb_histograms set rowcount = 5500 * rowcount
 where table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q2_TEST');

update sb_histogram_intervals set interval_rowcount = 5500 * interval_rowcount
 where table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q2_TEST');

-- finally fake out the UEC on C2 so optimizer will think
-- the UEC is 5 billion instead of 2 million

update sb_histograms set total_uec = 2500 * total_uec
 where table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q2_TEST')
   and column_number = 1 and colcount = 1;

update sb_histogram_intervals i set interval_uec = 2500 * interval_uec
 where i.table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q2_TEST')
   and i.histogram_id = (select histogram_id from sb_histograms h where 
h.table_uid = i.table_uid and h.column_number = 1 and h.colcount = 1);

-- update the UECs in the multi-column histograms that include C2

update sb_histograms set total_uec = 5000000000 where total_uec = 2000000
   and table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q2_TEST');

update sb_histogram_intervals set interval_uec = 5000000000 where interval_uec 
= 2000000
   and table_uid = (select object_uid from "_MD_".objects where object_name = 
'MDAM_Q2_TEST');



showstats for table MDAM_Q2_TEST on existing columns;

?section query2

prepare xx from
select * from MDAM_Q2_TEST
where c1 = 23 and c3 = 497;

explain xx;  -- the plan does not get MDAM; but MDAM on C1 would be a good plan

?section query2x

prepare yy from
select * from MDAM_Q2_TEST
where c1 = 23;

explain yy;  -- here we do get an MDAM plan on C1


 


 




> MDAM plans on prefixes sometimes not chosen when they should be
> ---------------------------------------------------------------
>
>                 Key: TRAFODION-2655
>                 URL: https://issues.apache.org/jira/browse/TRAFODION-2655
>             Project: Apache Trafodion
>          Issue Type: Bug
>          Components: sql-cmp
>    Affects Versions: 2.2-incubating
>         Environment: Large clusters
>            Reporter: David Wayne Birdsall
>            Assignee: David Wayne Birdsall
>
> Below are two scripts that reproduce the problems.
> In the first problem, a query using parameters does not get an MDAM plan, 
> even though it would be beneficial to do so. If the parameters on column C2 
> are replaced with literals that are close enough together, we do get the 
> desired MDAM plan.
> In the second problem, using a larger version of the same table, a query with 
> equality predicates on the first and third key columns ("first" and "third" 
> ignoring the SALT column) does not get an MDAM plan on the first key column 
> but chooses a full table scan instead.
> These problems were noticed on a cluster; it's not practical to create tables 
> of the necessary size on a workstation. The scripts below reproduce the 
> problem on a workstation by faking the Optimizer into thinking it is dealing 
> with much larger tables.
> First script:
> ?section setup
> drop table if exists MDAM_Q1_TEST;
> create table MDAM_Q1_TEST
> ( c1 integer not null,  -- will have uec 2
>   c2 integer not null,  -- will have uec 1,000,000
>   c3 integer not null,  -- will have uec 766
>   c4 integer not null,  -- will have uec 46
>   c5 integer not null,  -- will have uec don't care (but 1,000,000)
>   primary key ( c1, c2, c3, c4 ) )
> salt using 4 partitions;
> upsert using load into MDAM_Q1_TEST
> select mod(c0+c1+c2+c3+c4,2),
>        c0 + 10*c1 + 100*c2 + 1000*c3 + 10000*c4 + 100000*c5,
>        c3 + 10*c1 + 74*c5,
>        c4 + 4*c5,
>        c5 + 10*c4 + 100*c3 + 1000*c2 + 10000*c1 + 100000*c0       
> from (values(1)) t
> transpose 0,1,2,3,4,5,6,7,8,9 as c0
> transpose 0,1,2,3,4,5,6,7,8,9 as c1
> transpose 0,1,2,3,4,5,6,7,8,9 as c2
> transpose 0,1,2,3,4,5,6,7,8,9 as c3
> transpose 0,1,2,3,4,5,6,7,8,9 as c4
> transpose 0,1,2,3,4,5,6,7,8,9 as c5;
> update statistics for table MDAM_Q1_TEST on every column;
> -- next fake out the statistics so the optimizer will think there
> -- are 265 million rows in the table
> update sb_histograms set rowcount = 265 * rowcount
>  where table_uid = (select object_uid from "_MD_".objects where object_name = 
> 'MDAM_Q1_TEST');
> update sb_histogram_intervals set interval_rowcount = 265 * interval_rowcount
>  where table_uid = (select object_uid from "_MD_".objects where object_name = 
> 'MDAM_Q1_TEST');
> showstats for table MDAM_Q1_TEST on existing columns;
> ?section query1
> prepare xx from
> select * from MDAM_Q1_TEST
> where c1 = ? and c2 > ? and c2 <= ? and c3 = ? and c4 = ?;
> explain xx;  -- the plan gets a full scan instead of MDAM
> Second script
> ?section setup
> drop table if exists MDAM_Q2_TEST;
> create table MDAM_Q2_TEST
> ( c1 integer not null,  -- will have uec 100
>   c2 integer not null,  -- will have uec 2,000,000
>   c3 integer not null,  -- will have uec 9,100
>   c4 integer not null,  -- will have uec 11,800
>   c5 integer not null,  -- will have uec don't care 
>   primary key ( c1, c2, c3, c4 ) )
> salt using 4 partitions;
> upsert using load into MDAM_Q2_TEST
> select c3 + 10*c4,
>        c0 + 10*c1 + 100*c2 + 1000*c3 + 10000*c4 + 100000*c5 + 1000000*c6,
>        c3 + 10*c1 + 100*c5 + 900*c4,
>        c4 + 10*c5 + 100*c2 + 600*c1 + 600*c3,
>        c5 + 10*c4 + 100*c3 + 1000*c2 + 10000*c1 + 100000*c0 + 7*c6       
> from (values(1)) t
> transpose 0,1,2,3,4,5,6,7,8,9 as c0
> transpose 0,1,2,3,4,5,6,7,8,9 as c1
> transpose 0,1,2,3,4,5,6,7,8,9 as c2
> transpose 0,1,2,3,4,5,6,7,8,9 as c3
> transpose 0,1,2,3,4,5,6,7,8,9 as c4
> transpose 0,1,2,3,4,5,6,7,8,9 as c5
> transpose 0,1 as c6;
> ?section setupstats
> update statistics for table MDAM_Q2_TEST on every column;
> -- next fake out the statistics so the optimizer will think there
> -- are 11 billion rows in the table
> update sb_histograms set rowcount = 5500 * rowcount
>  where table_uid = (select object_uid from "_MD_".objects where object_name = 
> 'MDAM_Q2_TEST');
> update sb_histogram_intervals set interval_rowcount = 5500 * interval_rowcount
>  where table_uid = (select object_uid from "_MD_".objects where object_name = 
> 'MDAM_Q2_TEST');
> -- finally fake out the UEC on C2 so optimizer will think
> -- the UEC is 5 billion instead of 2 million
> update sb_histograms set total_uec = 2500 * total_uec
>  where table_uid = (select object_uid from "_MD_".objects where object_name = 
> 'MDAM_Q2_TEST')
>    and column_number = 1 and colcount = 1;
> update sb_histogram_intervals i set interval_uec = 2500 * interval_uec
>  where i.table_uid = (select object_uid from "_MD_".objects where object_name 
> = 'MDAM_Q2_TEST')
>    and i.histogram_id = (select histogram_id from sb_histograms h where 
> h.table_uid = i.table_uid and h.column_number = 1 and h.colcount = 1);
> -- update the UECs in the multi-column histograms that include C2
> update sb_histograms set total_uec = 5000000000 where total_uec = 2000000
>    and table_uid = (select object_uid from "_MD_".objects where object_name = 
> 'MDAM_Q2_TEST');
> update sb_histogram_intervals set interval_uec = 5000000000 where 
> interval_uec = 2000000
>    and table_uid = (select object_uid from "_MD_".objects where object_name = 
> 'MDAM_Q2_TEST');
> showstats for table MDAM_Q2_TEST on existing columns;
> ?section query2
> prepare xx from
> select * from MDAM_Q2_TEST
> where c1 = 23 and c3 = 497;
> explain xx;  -- the plan does not get MDAM; but MDAM on C1 would be a good 
> plan
> ?section query2x
> prepare yy from
> select * from MDAM_Q2_TEST
> where c1 = 23;
> explain yy;  -- here we do get an MDAM plan on C1
>  
>  



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

[jira] [Updated] (TRAFODION-2655) MDAM plans on prefixes sometimes not chosen when they should be

Reply via email to