David Wayne Birdsall created TRAFODION-2662:
-----------------------------------------------

             Summary: Incremental UPDATE STATS fails on very large sample tables
                 Key: TRAFODION-2662
                 URL: https://issues.apache.org/jira/browse/TRAFODION-2662
             Project: Apache Trafodion
          Issue Type: Bug
          Components: sql-cmp
    Affects Versions: 2.2-incubating
         Environment: Large clusters
            Reporter: David Wayne Birdsall
            Assignee: David Wayne Birdsall


A user attempted to use the incremental UPDATE STATISTICS feature on a 90 
billion row table. The persistent sample table was created using a sampling 
rate of 5 per cent, and so it had about 4.5 billion rows. The incremental 
UPDATE STATISTICS command failed with an error like the following:

>>Update statistics for table traf1 on existing columns incremental where C2 >= 
>>'2017-06-10 00:00:00';

*** ERROR[9219] Incremental UPDATE STATISTICS: An operation failed, possibly 
due to an invalid WHERE clause.

*** ERROR[15001] A syntax error occurred at or before: 
SELECT  FROM TRAFODION.SEABASE.TRAF_SAMPLE_32_1498066565_999632 WHERE  C2 >= '2
           ^ (12 characters from start of SQL statement)

*** ERROR[8822] The statement was not prepared.

--- SQL operation failed with errors.

The script below reproduces the problem on a workstation. The script creates a 
table, puts a few rows in it, creates a sample table, then doctors the sample 
table metadata to make UPDATE STATISTICS think it has 4.5 billion rows. The 
failure then occurs.

?section setup

drop table if exists traf1;

CREATE TABLE traf1
  ( 
    C1 CHAR(32) CHARACTER SET ISO88591 COLLATE
      DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
  , C2 TIMESTAMP(6) NO DEFAULT NOT NULL NOT
      DROPPABLE NOT SERIALIZED
  , C3 CHAR(32) CHARACTER SET ISO88591 COLLATE
      DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
  , C4 VARCHAR(4096 BYTES) CHARACTER SET UTF8
      COLLATE DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
  , C5 VARCHAR(1024 BYTES) CHARACTER SET UTF8
      COLLATE DEFAULT DEFAULT NULL NOT SERIALIZED
  , C6 SMALLINT DEFAULT 1 NOT SERIALIZED
  , PRIMARY KEY (C1 ASC, C2 DESC, C3 ASC)
  )
  SALT USING 4 PARTITIONS
       ON (C1)
 ATTRIBUTES ALIGNED FORMAT 
  HBASE_OPTIONS 
  ( 
    DATA_BLOCK_ENCODING = 'FAST_DIFF',
    BLOCKSIZE = '262144',
    MEMSTORE_FLUSH_SIZE = '1073741824' 
  ) 
;

insert into traf1 values ('abcdef',current_timestamp - interval '20' day,
  '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);

insert into traf1 values ('abcdef01',current_timestamp - interval '20' day,
  '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);

insert into traf1 values ('012345',current_timestamp - interval '20' day,
  'abc012','www.blahblah.com','a bit of it is fun',2);

insert into traf1 values ('01234567',current_timestamp - interval '20' day,
  'abc01234','www.blahblahblah.com','a bit of it is fun',31);

insert into traf1 values ('def01234',current_timestamp - interval '20' day,
'0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);

insert into traf1 values ('adbf01',current_timestamp - interval '20' day,
'0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);

insert into traf1 values ('a0b0c0',current_timestamp - interval '20' day,
'abc012','www.blahblah.com','a bit of it is fun',2);

insert into traf1 values ('021357',current_timestamp - interval '20' day,
'abc01234','www.blahblahblah.com','a bit of it is fun',30);

insert into traf1 values ('abcdef',current_timestamp - interval '10' day,
  '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);

insert into traf1 values ('abcddf01',current_timestamp - interval '10' day,
  '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);

insert into traf1 values ('01234345',current_timestamp - interval '10' day,
  'abc012','www.blahblah.com','a bit of it is fun',2);

insert into traf1 values ('034567',current_timestamp - interval '10' day,
  'abc01234','www.blahblahblah.com','a bit of it is fun',31);

insert into traf1 values ('def8201234',current_timestamp - interval '10' day,
'0123456789abcdef','www.xyz.stuff.com','a bit of stuff',18);

insert into traf1 values ('adbf9a01',current_timestamp - interval '10' day,
'0123456789abcdef01','www.C4edin.com','a bit of try twice',2);

insert into traf1 values ('a0bbc0c0',current_timestamp - interval '10' day,
'abc012','www.blahblah.com','a bit of it is fun',2);

insert into traf1 values ('02111357',current_timestamp - interval '10' day,
'abc01234','www.blahblahblah.com','a bit of it is fun',30);

insert into traf1 values 
('def01234',current_timestamp,'01234789abcdef','www.xyz.stuff.com','a bit of 
stuff',14);

insert into traf1 values 
('adbf01',current_timestamp,'0123456789abef01','www.xyz.morestuff.com','a bit 
of more stuff',12);

insert into traf1 values 
('a0b0c0',current_timestamp,'abc03452','www.blahblah.com','a bit of it is 
fun',21);

insert into traf1 values 
('02431357',current_timestamp,'abcd001234','www.blahblahblah.com','a bit of it 
is fun',39);

insert into traf1 values 
('defab01234',current_timestamp,'01a0234789abcdef','www.cnn.com','a bit of 
stuff',14);

insert into traf1 values 
('adb9ef01',current_timestamp,'012333456789abef01','www.youtube.com','a bit of 
more stuff',8);

insert into traf1 values 
('a2b0b0c0',current_timestamp,'ab3452','www.blahblah.com','a bit of it is 
fun',21);

insert into traf1 values 
('0243731357',current_timestamp,'abcd00122a34','www.blahblahblah.com','a bit of 
it is fun',39);

?section repro

UPDATE STATISTICS FOR TABLE traf1 ON EVERY COLUMN SAMPLE RANDOM 50 PERCENT 
PERSISTENT;

-- doctor REQUESTED_SAMPLE_ROWS, ACTUAL_SAMPLE_ROWS, SAMPLING_RATIO
-- from sb_persistent_samples to make UPDATE STATS think the sample table
-- has 4.5 billion rows

update sb_persistent_samples set REQUESTED_SAMPLE_ROWS = 4500000000, 
ACTUAL_SAMPLE_ROWS = 4500000000
 where table_uid = (select object_uid from "_MD_".objects where object_name = 
'TRAF1');


cqd ustat_log 'reproULOG.txt';

update statistics log on;

Update statistics for table traf1 on existing columns incremental where C2 >= 
'2017-06-10 00:00:00'; 

exit;







--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to