Zoltan Haindrich created HIVE-18454:
---------------------------------------

             Summary: Incorrect rownum estimation in joins
                 Key: HIVE-18454
                 URL: https://issues.apache.org/jira/browse/HIVE-18454
             Project: Hive
          Issue Type: Sub-task
          Components: Statistics
            Reporter: Zoltan Haindrich


I've seen this probably earlier...row counts seems to be off the 
charts...120000 rows estimated when the table has only 10 rows

{code}
create table s (x int);

insert into s values
(1),(2),(3),(4),(5),
(6),(7),(8),(9),(10);

create table tu(id_uv int,id_uw int,u int);
create table tv(id_uv int,v int);
create table tw(id_uw int,w int);

from s
insert overwrite table tu
        select x,x,x 
        where x<=6 or x=10
insert overwrite table tv
        select x,x              
        where x<=3 or x=10
insert overwrite table tw
        select x,x              
;

set hive.explain.user=true;

explain analyze
select sum(u*v*w) from tu
        join tv on (tu.id_uv=tv.id_uv)
        join tw on (tu.id_uw=tw.id_uw)
        where w>9 and u>1 and v>3;

desc formatted tv;
{code}





--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to