[ 
https://issues.apache.org/jira/browse/TRAFODION-2662?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16063319#comment-16063319
 ] 

ASF GitHub Bot commented on TRAFODION-2662:
-------------------------------------------

GitHub user DaveBirdsall opened a pull request:

    https://github.com/apache/incubator-trafodion/pull/1145

    [TRAFODION-2662] Fix compGeneral/TEST023 expected results

    The expected results file that I submitted in 
https://github.com/apache/incubator-trafodion/pull/1142 was not quite correct. 
I produced it on a clean instance without running test core/TEST000 first. The 
latter test changes the default schema, and that turns out to be required.

You can merge this pull request into a Git repository by running:

    $ git pull https://github.com/DaveBirdsall/incubator-trafodion 
FixCompGeneralTest023

Alternatively you can review and apply these changes as the patch at:

    https://github.com/apache/incubator-trafodion/pull/1145.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

    This closes #1145
    
----
commit 8f3826c4b53b1e742ec951b8297c863fcee93b60
Author: Dave Birdsall <dbirds...@apache.org>
Date:   2017-06-26T16:04:37Z

    [TRAFODION-266] Fix compGeneral/TEST023 expected results

----


> Incremental UPDATE STATS fails on very large sample tables
> ----------------------------------------------------------
>
>                 Key: TRAFODION-2662
>                 URL: https://issues.apache.org/jira/browse/TRAFODION-2662
>             Project: Apache Trafodion
>          Issue Type: Bug
>          Components: sql-cmp
>    Affects Versions: 2.2-incubating
>         Environment: Large clusters
>            Reporter: David Wayne Birdsall
>            Assignee: David Wayne Birdsall
>
> A user attempted to use the incremental UPDATE STATISTICS feature on a 90 
> billion row table. The persistent sample table was created using a sampling 
> rate of 5 per cent, and so it had about 4.5 billion rows. The incremental 
> UPDATE STATISTICS command failed with an error like the following:
> >>Update statistics for table traf1 on existing columns incremental where C2 
> >>>= '2017-06-10 00:00:00';
> *** ERROR[9219] Incremental UPDATE STATISTICS: An operation failed, possibly 
> due to an invalid WHERE clause.
> *** ERROR[15001] A syntax error occurred at or before: 
> SELECT  FROM TRAFODION.SEABASE.TRAF_SAMPLE_32_1498066565_999632 WHERE  C2 >= 
> '2
>            ^ (12 characters from start of SQL statement)
> *** ERROR[8822] The statement was not prepared.
> --- SQL operation failed with errors.
> The script below reproduces the problem on a workstation. The script creates 
> a table, puts a few rows in it, creates a sample table, then doctors the 
> sample table metadata to make UPDATE STATISTICS think it has 4.5 billion 
> rows. The failure then occurs.
> ?section setup
> drop table if exists traf1;
> CREATE TABLE traf1
>   ( 
>     C1 CHAR(32) CHARACTER SET ISO88591 COLLATE
>       DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
>   , C2 TIMESTAMP(6) NO DEFAULT NOT NULL NOT
>       DROPPABLE NOT SERIALIZED
>   , C3 CHAR(32) CHARACTER SET ISO88591 COLLATE
>       DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
>   , C4 VARCHAR(4096 BYTES) CHARACTER SET UTF8
>       COLLATE DEFAULT NO DEFAULT NOT NULL NOT DROPPABLE NOT SERIALIZED
>   , C5 VARCHAR(1024 BYTES) CHARACTER SET UTF8
>       COLLATE DEFAULT DEFAULT NULL NOT SERIALIZED
>   , C6 SMALLINT DEFAULT 1 NOT SERIALIZED
>   , PRIMARY KEY (C1 ASC, C2 DESC, C3 ASC)
>   )
>   SALT USING 4 PARTITIONS
>        ON (C1)
>  ATTRIBUTES ALIGNED FORMAT 
>   HBASE_OPTIONS 
>   ( 
>     DATA_BLOCK_ENCODING = 'FAST_DIFF',
>     BLOCKSIZE = '262144',
>     MEMSTORE_FLUSH_SIZE = '1073741824' 
>   ) 
> ;
> insert into traf1 values ('abcdef',current_timestamp - interval '20' day,
>   '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
> insert into traf1 values ('abcdef01',current_timestamp - interval '20' day,
>   '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
> insert into traf1 values ('012345',current_timestamp - interval '20' day,
>   'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('01234567',current_timestamp - interval '20' day,
>   'abc01234','www.blahblahblah.com','a bit of it is fun',31);
> insert into traf1 values ('def01234',current_timestamp - interval '20' day,
> '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
> insert into traf1 values ('adbf01',current_timestamp - interval '20' day,
> '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
> insert into traf1 values ('a0b0c0',current_timestamp - interval '20' day,
> 'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('021357',current_timestamp - interval '20' day,
> 'abc01234','www.blahblahblah.com','a bit of it is fun',30);
> insert into traf1 values ('abcdef',current_timestamp - interval '10' day,
>   '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',14);
> insert into traf1 values ('abcddf01',current_timestamp - interval '10' day,
>   '0123456789abcdef01','www.xyz.morestuff.com','a bit of more stuff',12);
> insert into traf1 values ('01234345',current_timestamp - interval '10' day,
>   'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('034567',current_timestamp - interval '10' day,
>   'abc01234','www.blahblahblah.com','a bit of it is fun',31);
> insert into traf1 values ('def8201234',current_timestamp - interval '10' day,
> '0123456789abcdef','www.xyz.stuff.com','a bit of stuff',18);
> insert into traf1 values ('adbf9a01',current_timestamp - interval '10' day,
> '0123456789abcdef01','www.C4edin.com','a bit of try twice',2);
> insert into traf1 values ('a0bbc0c0',current_timestamp - interval '10' day,
> 'abc012','www.blahblah.com','a bit of it is fun',2);
> insert into traf1 values ('02111357',current_timestamp - interval '10' day,
> 'abc01234','www.blahblahblah.com','a bit of it is fun',30);
> insert into traf1 values 
> ('def01234',current_timestamp,'01234789abcdef','www.xyz.stuff.com','a bit of 
> stuff',14);
> insert into traf1 values 
> ('adbf01',current_timestamp,'0123456789abef01','www.xyz.morestuff.com','a bit 
> of more stuff',12);
> insert into traf1 values 
> ('a0b0c0',current_timestamp,'abc03452','www.blahblah.com','a bit of it is 
> fun',21);
> insert into traf1 values 
> ('02431357',current_timestamp,'abcd001234','www.blahblahblah.com','a bit of 
> it is fun',39);
> insert into traf1 values 
> ('defab01234',current_timestamp,'01a0234789abcdef','www.cnn.com','a bit of 
> stuff',14);
> insert into traf1 values 
> ('adb9ef01',current_timestamp,'012333456789abef01','www.youtube.com','a bit 
> of more stuff',8);
> insert into traf1 values 
> ('a2b0b0c0',current_timestamp,'ab3452','www.blahblah.com','a bit of it is 
> fun',21);
> insert into traf1 values 
> ('0243731357',current_timestamp,'abcd00122a34','www.blahblahblah.com','a bit 
> of it is fun',39);
> ?section repro
> UPDATE STATISTICS FOR TABLE traf1 ON EVERY COLUMN SAMPLE RANDOM 50 PERCENT 
> PERSISTENT;
> -- doctor REQUESTED_SAMPLE_ROWS, ACTUAL_SAMPLE_ROWS, SAMPLING_RATIO
> -- from sb_persistent_samples to make UPDATE STATS think the sample table
> -- has 4.5 billion rows
> update sb_persistent_samples set REQUESTED_SAMPLE_ROWS = 4500000000, 
> ACTUAL_SAMPLE_ROWS = 4500000000
>  where table_uid = (select object_uid from "_MD_".objects where object_name = 
> 'TRAF1');
> cqd ustat_log 'reproULOG.txt';
> update statistics log on;
> Update statistics for table traf1 on existing columns incremental where C2 >= 
> '2017-06-10 00:00:00'; 
> exit;



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

Reply via email to