On Nov 23, 2010, at 1:37 PM, Yeb Havinga wrote:
>>>> j = 0;
>>>> for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i)) {
>>>>   FPrint* v = deserialize_fprint(entv[i].key);
>>> 
>>> Isn't this off by one?  Offset numbers are 1-based, so the maxoff
>>> computation is wrong.
> The first for loop of all others compare with i <= maxoff instead of i < 
> maxoff.

You are right: I am missing the last one, there.  (During a memory-debugging 
phase entv[entryvec-n - 1] was always invalid, probably as a memory overwrite 
error but I fixed that later and never changed it back.)

On the other hand, there are two problems:

1. the maximum size on a GiST page is 4240 bytes, so I cannot add a full-size 
Datum using this kind of hash-key setup (the base Datum size is 4230 bytes on a 
64-bit machine).  The example test cases I used were smaller in order to get 
around that issue: they are 2326 bytes base size.  

2. Even after fixing the Picksplit() loop, the dropped-leaf problem still 
manifests itself:

postgres=# set enable_seqscan=false;
SET
postgres=# set enable_indexscan=true;
SET
postgres=# create table fps2 (id serial, soid character(24) not null, 
fingerprint fprint not null);
NOTICE:  CREATE TABLE will create implicit sequence "fps2_id_seq" for serial 
column "fps2.id"
CREATE TABLE
postgres=# create index fps2_fingerprint_ix on fps2 using gist (fingerprint 
fprint_gist_ops);
CREATE INDEX
postgres=# \i xaa
psql:xaa:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
INSERT 0 1
postgres=# \i xab
psql:xab:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
INSERT 0 1
postgres=# \i xac
psql:xac:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
INSERT 0 1
postgres=# \i xad
psql:xad:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
INSERT 0 1
postgres=# select gist_stat('fps2_fingerprint_ix');
               gist_stat               
---------------------------------------
 Number of levels:          1         +
 Number of pages:           1         +
 Number of leaf pages:      1         +
 Number of tuples:          4         +
 Number of invalid tuples:  0         +
 Number of leaf tuples:     4         +
 Total size of tuples:      5628 bytes+
 Total size of leaf tuples: 5628 bytes+
 Total size of index:       8192 bytes+

postgres=# \i xae
psql:xae:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
INSERT 0 1
postgres=# select gist_stat('fps2_fingerprint_ix');
               gist_stat               
---------------------------------------
 Number of levels:          1         +
 Number of pages:           1         +
 Number of leaf pages:      1         +
 Number of tuples:          5         +
 Number of invalid tuples:  0         +
 Number of leaf tuples:     5         +
 Total size of tuples:      7032 bytes+
 Total size of leaf tuples: 7032 bytes+
 Total size of index:       8192 bytes+

postgres=# \i xaf
psql:xaf:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
psql:xaf:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaf:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaf:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaf:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaf:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaf:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaf:1: NOTICE:  [pgfprint.c:fprint_picksplit:659] entered picksplit
psql:xaf:1: NOTICE:  [pgfprint.c:fprint_picksplit:838] split: 3 left, 2 right
psql:xaf:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
psql:xaf:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
INSERT 0 1
postgres=# select gist_stat('fps2_fingerprint_ix');
               gist_stat                
----------------------------------------
 Number of levels:          2          +
 Number of pages:           3          +
 Number of leaf pages:      2          +
 Number of tuples:          7          +
 Number of invalid tuples:  0          +
 Number of leaf tuples:     5          +
 Total size of tuples:      9864 bytes +
 Total size of leaf tuples: 7044 bytes +
 Total size of index:       24576 bytes+

postgres=# select id, soid from fps2;
 id |           soid           
----+--------------------------
  1 | 4c65a39d4d9bca2c33000082
  2 | 4c65a39d4d9bca2c3300008a
  3 | 4c65a39d4d9bca2c33000090
  4 | 4c65a39d4d9bca2c33000099
  5 | 4c65a39d4d9bca2c330000a5
  6 | 4c65a39d4d9bca2c330000a8

postgres=# select f1.id, f2.id, fprint_cmp(f1.fingerprint, f2.fingerprint) from 
fps2 f1 join fps2 f2 on f1.fingerprint=f2.fingerprint;
 id | id |    fprint_cmp    
----+----+------------------
  1 |  1 | 1.00031467691569
  2 |  2 | 1.00031467691569
  4 |  4 | 1.00031467691569
  5 |  5 | 1.00031467691569
  6 |  6 | 1.00031467691569
 
So GiST does not include a tuple for row 3; one of the old tuples.  

After inserting a few more rows to trigger another Picksplit():

postgres=# \i xag
psql:xag:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
psql:xag:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xag:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xag:1: NOTICE:  [pgfprint.c:fprint_penalty:913] entered penalty
psql:xag:1: NOTICE:  [pgfprint.c:fprint_penalty:935] penalty: 
703.4312133789062500
psql:xag:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xag:1: NOTICE:  [pgfprint.c:fprint_penalty:913] entered penalty
psql:xag:1: NOTICE:  [pgfprint.c:fprint_penalty:935] penalty: 
832.1127319335937500
psql:xag:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xag:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xag:1: NOTICE:  [pgfprint.c:fprint_union:453] entered union
psql:xag:1: NOTICE:  [pgfprint.c:fprint_same:951] entered same
psql:xag:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
psql:xag:1: NOTICE:  [pgfprint.c:fprint_compress:384] returning non-leafkey 
entry raw
INSERT 0 1
postgres=# \i xah
psql:xah:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
psql:xah:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xah:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xah:1: NOTICE:  [pgfprint.c:fprint_penalty:913] entered penalty
psql:xah:1: NOTICE:  [pgfprint.c:fprint_penalty:935] penalty: 
880.7246093750000000
psql:xah:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xah:1: NOTICE:  [pgfprint.c:fprint_penalty:913] entered penalty
psql:xah:1: NOTICE:  [pgfprint.c:fprint_penalty:935] penalty: 
903.4860839843750000
psql:xah:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xah:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xah:1: NOTICE:  [pgfprint.c:fprint_union:453] entered union
psql:xah:1: NOTICE:  [pgfprint.c:fprint_same:951] entered same
psql:xah:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
psql:xah:1: NOTICE:  [pgfprint.c:fprint_compress:384] returning non-leafkey 
entry raw
INSERT 0 1
postgres=# \i xai
psql:xai:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
psql:xai:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xai:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xai:1: NOTICE:  [pgfprint.c:fprint_penalty:913] entered penalty
psql:xai:1: NOTICE:  [pgfprint.c:fprint_penalty:935] penalty: 
904.7127075195312500
psql:xai:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xai:1: NOTICE:  [pgfprint.c:fprint_penalty:913] entered penalty
psql:xai:1: NOTICE:  [pgfprint.c:fprint_penalty:935] penalty: 
907.4243164062500000
psql:xai:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xai:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xai:1: NOTICE:  [pgfprint.c:fprint_union:453] entered union
psql:xai:1: NOTICE:  [pgfprint.c:fprint_same:951] entered same
psql:xai:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
psql:xai:1: NOTICE:  [pgfprint.c:fprint_compress:384] returning non-leafkey 
entry raw
INSERT 0 1
postgres=# \i xaj
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_penalty:913] entered penalty
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_penalty:935] penalty: 
910.3089599609375000
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_penalty:913] entered penalty
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_penalty:935] penalty: 
906.1793212890625000
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_picksplit:659] entered picksplit
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_picksplit:838] split: 2 left, 3 right
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_compress:384] returning non-leafkey 
entry raw
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_compress:384] returning non-leafkey 
entry raw
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_decompress:419] entered decompress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_union:453] entered union
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_compress:379] entered compress
psql:xaj:1: NOTICE:  [pgfprint.c:fprint_compress:384] returning non-leafkey 
entry raw
INSERT 0 1
postgres=# select id, soid from fps2;
 id |           soid           
----+--------------------------
  1 | 4c65a39d4d9bca2c33000082
  2 | 4c65a39d4d9bca2c3300008a
  3 | 4c65a39d4d9bca2c33000090
  4 | 4c65a39d4d9bca2c33000099
  5 | 4c65a39d4d9bca2c330000a5
  6 | 4c65a39d4d9bca2c330000a8
  7 | 4c65a39d4d9bca2c330000b0
  8 | 4c65a39d4d9bca2c330000be
  9 | 4c65a39d4d9bca2c330000c8
 10 | 4c65a39d4d9bca2c330000d3
(10 rows)

postgres=# select f1.id, f2.id, fprint_cmp(f1.fingerprint, f2.fingerprint) from 
fps2 f1 join fps2 f2 on f1.fingerprint=f2.fingerprint;
 id | id |    fprint_cmp    
----+----+------------------
  1 |  1 | 1.00031467691569
  4 |  4 | 1.00031467691569
  5 |  5 | 1.00031467691569
  6 |  6 | 1.00031467691569
  7 |  7 | 1.00031467691569
  8 |  8 | 1.00031467691569
  9 |  9 | 1.00031467691569
 10 | 10 | 1.00031467691569
(8 rows)

Index tuples for rows 3 and 2 have been dropped.



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to