Pengcheng Xiong created HIVE-12661:
--------------------------------------
Summary: StatsSetupConst.COLUMN_STATS_ACCURATE is not used
correctly
Key: HIVE-12661
URL: https://issues.apache.org/jira/browse/HIVE-12661
Project: Hive
Issue Type: Bug
Reporter: Pengcheng Xiong
Assignee: Pengcheng Xiong
PROBLEM:
Hive stats are autogathered properly till an 'analyze table [tablename] compute
statistics for columns' is run. Then it does not auto-update the stats till the
command is run again. repo:
{code}
set hive.stats.autogather=true;
set hive.stats.atomic=false ;
set hive.stats.collect.rawdatasize=true ;
set hive.stats.collect.scancols=false ;
set hive.stats.collect.tablekeys=false ;
set hive.stats.fetch.column.stats=true;
set hive.stats.fetch.partition.stats=true ;
set hive.stats.reliable=false ;
set hive.compute.query.using.stats=true;
CREATE TABLE `default`.`calendar` (`year` int) ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde' STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat' TBLPROPERTIES (
'orc.compress'='NONE') ;
insert into calendar values (2010), (2011), (2012);
select * from calendar;
+----------------+--+
| calendar.year |
+----------------+--+
| 2010 |
| 2011 |
| 2012 |
+----------------+--+
select max(year) from calendar;
| 2012 |
insert into calendar values (2013);
select * from calendar;
+----------------+--+
| calendar.year |
+----------------+--+
| 2010 |
| 2011 |
| 2012 |
| 2013 |
+----------------+--+
select max(year) from calendar;
| 2013 |
insert into calendar values (2014);
select max(year) from calendar;
| 2014 |
analyze table calendar compute statistics for columns;
insert into calendar values (2015);
select max(year) from calendar;
| 2014 |
insert into calendar values (2016), (2017), (2018);
select max(year) from calendar;
| 2014 |
analyze table calendar compute statistics for columns;
select max(year) from calendar;
| 2018 |
{code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)