David Wayne Birdsall created TRAFODION-2662:
-----------------------------------------------
Summary: Incremental UPDATE STATS fails on very large sample tables
Key: TRAFODION-2662
URL: https://issues.apache.org/jira/browse/TRAFODION-2662
Project: Apache Trafodion
Issue Type: Bug
Components: sql-cmp
Affects Versions: 2.2-incubating
Environment: Large clusters
Reporter: David Wayne Birdsall
Assignee: David Wayne Birdsall
A user attempted to use the incremental UPDATE STATISTICS feature on a 90
billion row table. The persistent sample table was created using a sampling
rate of 5 per cent, and so it had about 4.5 billion rows. The incremental
UPDATE STATISTICS command failed with an error like the following:
>>Update statistics for table traf1 on existing columns incremental where C2 >=
>>'2017-06-10 00:00:00';
*** ERROR[9219] Incremental UPDATE STATISTICS: An operation failed, possibly
due to an invalid WHERE clause.
*** ERROR[15001] A syntax error occurred at or before:
SELECT FROM TRAFODION.SEABASE.TRAF_SAMPLE_32_1498066565_999632 WHERE C2 >= '2
^ (12 characters from start of SQL statement)
*** ERROR[8822] The statement was not prepared.
--- SQL operation failed with errors.
The script below reproduces the problem on a workstation. The script creates a
table, puts a few rows in it, creates a sample table, then doctors the sample
table metadata to make UPDATE STATISTICS think it has 4.5 billion rows. The
failure then occurs.
?section setup
drop table if exists traf1;
CREATE TABLE traf1
(
C1 CHAR(32) CHARACTER SET ISO88591 COLLATE
DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
, C2 TIMESTAMP(6) NO DEFAULT NOT NULL NOT
DROPPABLE NOT SERIALIZED
, C3 CHAR(32) CHARACTER SET ISO88591 COLLATE
DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
, C4 VARCHAR(4096 BYTES) CHARACTER SET UTF8
COLLATE DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
, C5 VARCHAR(1024 BYTES) CHARACTER SET UTF8
COLLATE DEFAULT DEFAULT NULL NOT SERIALIZED
, C6 SMALLINT DEFAULT 1 NOT SERIALIZED
, PRIMARY KEY (C1 ASC, C2 DESC, C3 ASC)
)
SALT USING 4 PARTITIONS
ON (C1)
ATTRIBUTES ALIGNED FORMAT
HBASE_OPTIONS
(
DATA_BLOCK_ENCODING = 'FAST_DIFF',
BLOCKSIZE = '262144',
MEMSTORE_FLUSH_SIZE = '1073741824'
)
;
insert into traf1 values ('abcdef',current_timestamp - interval '20' day,
'0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
insert into traf1 values ('abcdef01',current_timestamp - interval '20' day,
'0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
insert into traf1 values ('012345',current_timestamp - interval '20' day,
'abc012','www.blahblah.com','a bit of it is fun',2);
insert into traf1 values ('01234567',current_timestamp - interval '20' day,
'abc01234','www.blahblahblah.com','a bit of it is fun',31);
insert into traf1 values ('def01234',current_timestamp - interval '20' day,
'0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
insert into traf1 values ('adbf01',current_timestamp - interval '20' day,
'0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
insert into traf1 values ('a0b0c0',current_timestamp - interval '20' day,
'abc012','www.blahblah.com','a bit of it is fun',2);
insert into traf1 values ('021357',current_timestamp - interval '20' day,
'abc01234','www.blahblahblah.com','a bit of it is fun',30);
insert into traf1 values ('abcdef',current_timestamp - interval '10' day,
'0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
insert into traf1 values ('abcddf01',current_timestamp - interval '10' day,
'0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
insert into traf1 values ('01234345',current_timestamp - interval '10' day,
'abc012','www.blahblah.com','a bit of it is fun',2);
insert into traf1 values ('034567',current_timestamp - interval '10' day,
'abc01234','www.blahblahblah.com','a bit of it is fun',31);
insert into traf1 values ('def8201234',current_timestamp - interval '10' day,
'0123456789abcdef','www.xyz.stuff.com','a bit of stuff',18);
insert into traf1 values ('adbf9a01',current_timestamp - interval '10' day,
'0123456789abcdef01','www.C4edin.com','a bit of try twice',2);
insert into traf1 values ('a0bbc0c0',current_timestamp - interval '10' day,
'abc012','www.blahblah.com','a bit of it is fun',2);
insert into traf1 values ('02111357',current_timestamp - interval '10' day,
'abc01234','www.blahblahblah.com','a bit of it is fun',30);
insert into traf1 values
('def01234',current_timestamp,'01234789abcdef','www.xyz.stuff.com','a bit of
stuff',14);
insert into traf1 values
('adbf01',current_timestamp,'0123456789abef01','www.xyz.morestuff.com','a bit
of more stuff',12);
insert into traf1 values
('a0b0c0',current_timestamp,'abc03452','www.blahblah.com','a bit of it is
fun',21);
insert into traf1 values
('02431357',current_timestamp,'abcd001234','www.blahblahblah.com','a bit of it
is fun',39);
insert into traf1 values
('defab01234',current_timestamp,'01a0234789abcdef','www.cnn.com','a bit of
stuff',14);
insert into traf1 values
('adb9ef01',current_timestamp,'012333456789abef01','www.youtube.com','a bit of
more stuff',8);
insert into traf1 values
('a2b0b0c0',current_timestamp,'ab3452','www.blahblah.com','a bit of it is
fun',21);
insert into traf1 values
('0243731357',current_timestamp,'abcd00122a34','www.blahblahblah.com','a bit of
it is fun',39);
?section repro
UPDATE STATISTICS FOR TABLE traf1 ON EVERY COLUMN SAMPLE RANDOM 50 PERCENT
PERSISTENT;
-- doctor REQUESTED_SAMPLE_ROWS, ACTUAL_SAMPLE_ROWS, SAMPLING_RATIO
-- from sb_persistent_samples to make UPDATE STATS think the sample table
-- has 4.5 billion rows
update sb_persistent_samples set REQUESTED_SAMPLE_ROWS = 4500000000,
ACTUAL_SAMPLE_ROWS = 4500000000
where table_uid = (select object_uid from "_MD_".objects where object_name =
'TRAF1');
cqd ustat_log 'reproULOG.txt';
update statistics log on;
Update statistics for table traf1 on existing columns incremental where C2 >=
'2017-06-10 00:00:00';
exit;
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)