[ 
https://issues.apache.org/jira/browse/HIVE-20434?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Sergey Shelukhin updated HIVE-20434:
------------------------------------
    Description: 
{noformat}
set hive.stats.dbclass=fs;
set hive.stats.fetch.column.stats=true;
set datanucleus.cache.collections=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set hive.mapred.mode=nonstrict;

set hive.stats.autogather=true;
set hive.stats.column.autogather=true;
set hive.compute.query.using.stats=true;
set hive.explain.user=false;

set hive.fetch.task.conversion=none;
set hive.query.results.cache.enabled=false;

create table stats_part(key int,value string) partitioned by (p int) 
tblproperties ("transactional"="false");
insert into table stats_part partition(p=101) values (1, "foo");
explain select count(key) from stats_part; -- <== stats are accurate

ALTER TABLE stats_part CHANGE COLUMN key key2 int;
explain select count(key2) from stats_part; -- <== stats are now inaccurate

analyze table stats_part partition(p) compute statistics for columns;
explain select count(key2) from stats_part; -- <== stats are now accurate again

alter table stats_part add partition(p=105);
explain select count(key2) from stats_part; -- <== stats are now inaccurate

analyze table stats_part partition(p) compute statistics for columns;
explain select count(key2) from stats_part; -- <== stats are still inaccurate!


drop table stats_part;
{noformat}

Seems like it's impossible to have correct stats on a table with an empty 
partition.






  was:
{noformat}
set hive.stats.dbclass=fs;
set hive.stats.fetch.column.stats=true;
set datanucleus.cache.collections=false;
set hive.merge.mapfiles=false;
set hive.merge.mapredfiles=false;
set hive.mapred.mode=nonstrict;

set hive.stats.autogather=true;
set hive.stats.column.autogather=true;
set hive.compute.query.using.stats=true;
set hive.explain.user=false;

set hive.fetch.task.conversion=none;
set hive.query.results.cache.enabled=false;

create table stats_part(key int,value string) partitioned by (p int) 
tblproperties ("transactional"="false");
insert into table stats_part partition(p=101) values (1, "foo");
explain select count(key) from stats_part; -- <== stats are correct

ALTER TABLE stats_part CHANGE COLUMN key key2 int;
explain select count(key2) from stats_part; -- <== stats are now incorrect

analyze table stats_part partition(p) compute statistics for columns;
explain select count(key2) from stats_part; -- <== stats are now correct again

alter table stats_part add partition(p=105);
explain select count(key2) from stats_part; -- <== stats are now incorrect

analyze table stats_part partition(p) compute statistics for columns;
explain select count(key2) from stats_part; -- <== stats are still incorrect!


drop table stats_part;
{noformat}

Seems like it's impossible to have correct stats on a table with an empty 
partition.







> analyze on an empty partition doesn't mark stats as accurate/produce stats
> --------------------------------------------------------------------------
>
>                 Key: HIVE-20434
>                 URL: https://issues.apache.org/jira/browse/HIVE-20434
>             Project: Hive
>          Issue Type: Bug
>            Reporter: Sergey Shelukhin
>            Assignee: Ashutosh Chauhan
>            Priority: Major
>
> {noformat}
> set hive.stats.dbclass=fs;
> set hive.stats.fetch.column.stats=true;
> set datanucleus.cache.collections=false;
> set hive.merge.mapfiles=false;
> set hive.merge.mapredfiles=false;
> set hive.mapred.mode=nonstrict;
> set hive.stats.autogather=true;
> set hive.stats.column.autogather=true;
> set hive.compute.query.using.stats=true;
> set hive.explain.user=false;
> set hive.fetch.task.conversion=none;
> set hive.query.results.cache.enabled=false;
> create table stats_part(key int,value string) partitioned by (p int) 
> tblproperties ("transactional"="false");
> insert into table stats_part partition(p=101) values (1, "foo");
> explain select count(key) from stats_part; -- <== stats are accurate
> ALTER TABLE stats_part CHANGE COLUMN key key2 int;
> explain select count(key2) from stats_part; -- <== stats are now inaccurate
> analyze table stats_part partition(p) compute statistics for columns;
> explain select count(key2) from stats_part; -- <== stats are now accurate 
> again
> alter table stats_part add partition(p=105);
> explain select count(key2) from stats_part; -- <== stats are now inaccurate
> analyze table stats_part partition(p) compute statistics for columns;
> explain select count(key2) from stats_part; -- <== stats are still inaccurate!
> drop table stats_part;
> {noformat}
> Seems like it's impossible to have correct stats on a table with an empty 
> partition.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to