[
https://issues.apache.org/jira/browse/TRAFODION-2662?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16066730#comment-16066730
]
ASF GitHub Bot commented on TRAFODION-2662:
-------------------------------------------
Github user asfgit closed the pull request at:
https://github.com/apache/incubator-trafodion/pull/1149
> Incremental UPDATE STATS fails on very large sample tables
> ----------------------------------------------------------
>
> Key: TRAFODION-2662
> URL: https://issues.apache.org/jira/browse/TRAFODION-2662
> Project: Apache Trafodion
> Issue Type: Bug
> Components: sql-cmp
> Affects Versions: 2.2-incubating
> Environment: Large clusters
> Reporter: David Wayne Birdsall
> Assignee: David Wayne Birdsall
> Fix For: 2.2-incubating
>
>
> A user attempted to use the incremental UPDATE STATISTICS feature on a 90
> billion row table. The persistent sample table was created using a sampling
> rate of 5 per cent, and so it had about 4.5 billion rows. The incremental
> UPDATE STATISTICS command failed with an error like the following:
> >>Update statistics for table traf1 on existing columns incremental where C2
> >>>= '2017-06-10 00:00:00';
> *** ERROR[9219] Incremental UPDATE STATISTICS: An operation failed, possibly
> due to an invalid WHERE clause.
> *** ERROR[15001] A syntax error occurred at or before:
> SELECT FROM TRAFODION.SEABASE.TRAF_SAMPLE_32_1498066565_999632 WHERE C2 >=
> '2
> ^ (12 characters from start of SQL statement)
> *** ERROR[8822] The statement was not prepared.
> --- SQL operation failed with errors.
> The script below reproduces the problem on a workstation. The script creates
> a table, puts a few rows in it, creates a sample table, then doctors the
> sample table metadata to make UPDATE STATISTICS think it has 4.5 billion
> rows. The failure then occurs.
> ?section setup
> drop table if exists traf1;
> CREATE TABLE traf1
> (
> C1 CHAR(32) CHARACTER SET ISO88591 COLLATE
> DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
> , C2 TIMESTAMP(6) NO DEFAULT NOT NULL NOT
> DROPPABLE NOT SERIALIZED
> , C3 CHAR(32) CHARACTER SET ISO88591 COLLATE
> DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
> , C4 VARCHAR(4096 BYTES) CHARACTER SET UTF8
> COLLATE DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
> , C5 VARCHAR(1024 BYTES) CHARACTER SET UTF8
> COLLATE DEFAULT DEFAULT NULL NOT SERIALIZED
> , C6 SMALLINT DEFAULT 1 NOT SERIALIZED
> , PRIMARY KEY (C1 ASC, C2 DESC, C3 ASC)
> )
> SALT USING 4 PARTITIONS
> ON (C1)
> ATTRIBUTES ALIGNED FORMAT
> HBASE_OPTIONS
> (
> DATA_BLOCK_ENCODING = 'FAST_DIFF',
> BLOCKSIZE = '262144',
> MEMSTORE_FLUSH_SIZE = '1073741824'
> )
> ;
> insert into traf1 values ('abcdef',current_timestamp - interval '20' day,
> '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
> insert into traf1 values ('abcdef01',current_timestamp - interval '20' day,
> '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
> insert into traf1 values ('012345',current_timestamp - interval '20' day,
> 'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('01234567',current_timestamp - interval '20' day,
> 'abc01234','www.blahblahblah.com','a bit of it is fun',31);
> insert into traf1 values ('def01234',current_timestamp - interval '20' day,
> '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
> insert into traf1 values ('adbf01',current_timestamp - interval '20' day,
> '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
> insert into traf1 values ('a0b0c0',current_timestamp - interval '20' day,
> 'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('021357',current_timestamp - interval '20' day,
> 'abc01234','www.blahblahblah.com','a bit of it is fun',30);
> insert into traf1 values ('abcdef',current_timestamp - interval '10' day,
> '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
> insert into traf1 values ('abcddf01',current_timestamp - interval '10' day,
> '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
> insert into traf1 values ('01234345',current_timestamp - interval '10' day,
> 'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('034567',current_timestamp - interval '10' day,
> 'abc01234','www.blahblahblah.com','a bit of it is fun',31);
> insert into traf1 values ('def8201234',current_timestamp - interval '10' day,
> '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',18);
> insert into traf1 values ('adbf9a01',current_timestamp - interval '10' day,
> '0123456789abcdef01','www.C4edin.com','a bit of try twice',2);
> insert into traf1 values ('a0bbc0c0',current_timestamp - interval '10' day,
> 'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('02111357',current_timestamp - interval '10' day,
> 'abc01234','www.blahblahblah.com','a bit of it is fun',30);
> insert into traf1 values
> ('def01234',current_timestamp,'01234789abcdef','www.xyz.stuff.com','a bit of
> stuff',14);
> insert into traf1 values
> ('adbf01',current_timestamp,'0123456789abef01','www.xyz.morestuff.com','a bit
> of more stuff',12);
> insert into traf1 values
> ('a0b0c0',current_timestamp,'abc03452','www.blahblah.com','a bit of it is
> fun',21);
> insert into traf1 values
> ('02431357',current_timestamp,'abcd001234','www.blahblahblah.com','a bit of
> it is fun',39);
> insert into traf1 values
> ('defab01234',current_timestamp,'01a0234789abcdef','www.cnn.com','a bit of
> stuff',14);
> insert into traf1 values
> ('adb9ef01',current_timestamp,'012333456789abef01','www.youtube.com','a bit
> of more stuff',8);
> insert into traf1 values
> ('a2b0b0c0',current_timestamp,'ab3452','www.blahblah.com','a bit of it is
> fun',21);
> insert into traf1 values
> ('0243731357',current_timestamp,'abcd00122a34','www.blahblahblah.com','a bit
> of it is fun',39);
> ?section repro
> UPDATE STATISTICS FOR TABLE traf1 ON EVERY COLUMN SAMPLE RANDOM 50 PERCENT
> PERSISTENT;
> -- doctor REQUESTED_SAMPLE_ROWS, ACTUAL_SAMPLE_ROWS, SAMPLING_RATIO
> -- from sb_persistent_samples to make UPDATE STATS think the sample table
> -- has 4.5 billion rows
> update sb_persistent_samples set REQUESTED_SAMPLE_ROWS = 4500000000,
> ACTUAL_SAMPLE_ROWS = 4500000000
> where table_uid = (select object_uid from "_MD_".objects where object_name =
> 'TRAF1');
> cqd ustat_log 'reproULOG.txt';
> update statistics log on;
> Update statistics for table traf1 on existing columns incremental where C2 >=
> '2017-06-10 00:00:00';
> exit;
--
This message was sent by Atlassian JIRA
(v6.4.14#64029)