[ 
https://issues.apache.org/jira/browse/TRAFODION-2662?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16072974#comment-16072974
 ] 

ASF GitHub Bot commented on TRAFODION-2662:
-------------------------------------------

GitHub user DaveBirdsall opened a pull request:

    https://github.com/apache/incubator-trafodion/pull/1162

    [TRAFODION-2662] Remove some non-determinism from compGeneral/TEST023

    This is yet another attempt to fix compGeneral/TEST023. The sample table 
names generated for persistent sample tables could vary in length, as they were 
functions of object UIDs and timestamps. The changes here pad these numbers 
with leading zeroes out to a fixed length, which should eliminate variability 
in line wrap in the test results. While I was at it, I found a bug: there was a 
buffer overrun in this code (my bad); the buffer is now the proper length.

You can merge this pull request into a Git repository by running:

    $ git pull https://github.com/DaveBirdsall/incubator-trafodion 
Trafodion2662FixTest

Alternatively you can review and apply these changes as the patch at:

    https://github.com/apache/incubator-trafodion/pull/1162.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

    This closes #1162
    
----
commit 8dbae2c6ea835cb6bcb1bc753c95aa1757301a2e
Author: Dave Birdsall <dbirds...@apache.org>
Date:   2017-07-03T23:31:39Z

    [TRAFODION-2662] Remove some non-determinism from compGeneral/TEST023

----


> Incremental UPDATE STATS fails on very large sample tables
> ----------------------------------------------------------
>
>                 Key: TRAFODION-2662
>                 URL: https://issues.apache.org/jira/browse/TRAFODION-2662
>             Project: Apache Trafodion
>          Issue Type: Bug
>          Components: sql-cmp
>    Affects Versions: 2.2-incubating
>         Environment: Large clusters
>            Reporter: David Wayne Birdsall
>            Assignee: David Wayne Birdsall
>             Fix For: 2.2-incubating
>
>
> A user attempted to use the incremental UPDATE STATISTICS feature on a 90 
> billion row table. The persistent sample table was created using a sampling 
> rate of 5 per cent, and so it had about 4.5 billion rows. The incremental 
> UPDATE STATISTICS command failed with an error like the following:
> >>Update statistics for table traf1 on existing columns incremental where C2 
> >>>= '2017-06-10 00:00:00';
> *** ERROR[9219] Incremental UPDATE STATISTICS: An operation failed, possibly 
> due to an invalid WHERE clause.
> *** ERROR[15001] A syntax error occurred at or before: 
> SELECT  FROM TRAFODION.SEABASE.TRAF_SAMPLE_32_1498066565_999632 WHERE  C2 >= 
> '2
>            ^ (12 characters from start of SQL statement)
> *** ERROR[8822] The statement was not prepared.
> --- SQL operation failed with errors.
> The script below reproduces the problem on a workstation. The script creates 
> a table, puts a few rows in it, creates a sample table, then doctors the 
> sample table metadata to make UPDATE STATISTICS think it has 4.5 billion 
> rows. The failure then occurs.
> ?section setup
> drop table if exists traf1;
> CREATE TABLE traf1
>   ( 
>     C1 CHAR(32) CHARACTER SET ISO88591 COLLATE
>       DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
>   , C2 TIMESTAMP(6) NO DEFAULT NOT NULL NOT
>       DROPPABLE NOT SERIALIZED
>   , C3 CHAR(32) CHARACTER SET ISO88591 COLLATE
>       DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
>   , C4 VARCHAR(4096 BYTES) CHARACTER SET UTF8
>       COLLATE DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
>   , C5 VARCHAR(1024 BYTES) CHARACTER SET UTF8
>       COLLATE DEFAULT DEFAULT NULL NOT SERIALIZED
>   , C6 SMALLINT DEFAULT 1 NOT SERIALIZED
>   , PRIMARY KEY (C1 ASC, C2 DESC, C3 ASC)
>   )
>   SALT USING 4 PARTITIONS
>        ON (C1)
>  ATTRIBUTES ALIGNED FORMAT 
>   HBASE_OPTIONS 
>   ( 
>     DATA_BLOCK_ENCODING = 'FAST_DIFF',
>     BLOCKSIZE = '262144',
>     MEMSTORE_FLUSH_SIZE = '1073741824' 
>   ) 
> ;
> insert into traf1 values ('abcdef',current_timestamp - interval '20' day,
>   '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
> insert into traf1 values ('abcdef01',current_timestamp - interval '20' day,
>   '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
> insert into traf1 values ('012345',current_timestamp - interval '20' day,
>   'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('01234567',current_timestamp - interval '20' day,
>   'abc01234','www.blahblahblah.com','a bit of it is fun',31);
> insert into traf1 values ('def01234',current_timestamp - interval '20' day,
> '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
> insert into traf1 values ('adbf01',current_timestamp - interval '20' day,
> '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
> insert into traf1 values ('a0b0c0',current_timestamp - interval '20' day,
> 'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('021357',current_timestamp - interval '20' day,
> 'abc01234','www.blahblahblah.com','a bit of it is fun',30);
> insert into traf1 values ('abcdef',current_timestamp - interval '10' day,
>   '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
> insert into traf1 values ('abcddf01',current_timestamp - interval '10' day,
>   '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
> insert into traf1 values ('01234345',current_timestamp - interval '10' day,
>   'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('034567',current_timestamp - interval '10' day,
>   'abc01234','www.blahblahblah.com','a bit of it is fun',31);
> insert into traf1 values ('def8201234',current_timestamp - interval '10' day,
> '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',18);
> insert into traf1 values ('adbf9a01',current_timestamp - interval '10' day,
> '0123456789abcdef01','www.C4edin.com','a bit of try twice',2);
> insert into traf1 values ('a0bbc0c0',current_timestamp - interval '10' day,
> 'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('02111357',current_timestamp - interval '10' day,
> 'abc01234','www.blahblahblah.com','a bit of it is fun',30);
> insert into traf1 values 
> ('def01234',current_timestamp,'01234789abcdef','www.xyz.stuff.com','a bit of 
> stuff',14);
> insert into traf1 values 
> ('adbf01',current_timestamp,'0123456789abef01','www.xyz.morestuff.com','a bit 
> of more stuff',12);
> insert into traf1 values 
> ('a0b0c0',current_timestamp,'abc03452','www.blahblah.com','a bit of it is 
> fun',21);
> insert into traf1 values 
> ('02431357',current_timestamp,'abcd001234','www.blahblahblah.com','a bit of 
> it is fun',39);
> insert into traf1 values 
> ('defab01234',current_timestamp,'01a0234789abcdef','www.cnn.com','a bit of 
> stuff',14);
> insert into traf1 values 
> ('adb9ef01',current_timestamp,'012333456789abef01','www.youtube.com','a bit 
> of more stuff',8);
> insert into traf1 values 
> ('a2b0b0c0',current_timestamp,'ab3452','www.blahblah.com','a bit of it is 
> fun',21);
> insert into traf1 values 
> ('0243731357',current_timestamp,'abcd00122a34','www.blahblahblah.com','a bit 
> of it is fun',39);
> ?section repro
> UPDATE STATISTICS FOR TABLE traf1 ON EVERY COLUMN SAMPLE RANDOM 50 PERCENT 
> PERSISTENT;
> -- doctor REQUESTED_SAMPLE_ROWS, ACTUAL_SAMPLE_ROWS, SAMPLING_RATIO
> -- from sb_persistent_samples to make UPDATE STATS think the sample table
> -- has 4.5 billion rows
> update sb_persistent_samples set REQUESTED_SAMPLE_ROWS = 4500000000, 
> ACTUAL_SAMPLE_ROWS = 4500000000
>  where table_uid = (select object_uid from "_MD_".objects where object_name = 
> 'TRAF1');
> cqd ustat_log 'reproULOG.txt';
> update statistics log on;
> Update statistics for table traf1 on existing columns incremental where C2 >= 
> '2017-06-10 00:00:00'; 
> exit;



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to