Hi all, Please find attached a simple patch adding fillfactor as storage parameter for GIN indexes. The default value is the same as the one currently aka 100 to have the pages completely packed when a GIN index is created.
Note that to have this feature correctly working, the fix I sent yesterday to set up isBuild for the entry insertion is needed (patch attached as well here to facilitate the review): http://www.postgresql.org/message-id/cab7npqsc4vq9mhkqm_yvafcteho-iuy8skbxydnmgnai1xn...@mail.gmail.com Here are the results of some tests with a simple pg_trgm index on the English translation of "Les Miserables": CREATE EXTENSION pg_trgm; CREATE TABLE les_miserables (num serial, line text); COPY les_miserables (line) FROM '/to/path/pg135.txt'; CREATE INDEX les_miserables_100 ON les_miserables USING gin (line gin_trgm_ops); CREATE INDEX les_miserables_40 ON les_miserables USING gin (line gin_trgm_ops) with (fillfactor = 40); CREATE INDEX les_miserables_20 ON les_miserables USING gin (line gin_trgm_ops) with (fillfactor = 20); CREATE INDEX les_miserables_80 ON les_miserables USING gin (line gin_trgm_ops) with (fillfactor = 80); CREATE INDEX les_miserables_10 ON les_miserables USING gin (line gin_trgm_ops) with (fillfactor = 10); SELECT relname, pg_size_pretty(pg_relation_size(oid)), reloptions FROM pg_class where relname like 'les_miserables_%'; relname | pg_size_pretty | reloptions ------------------------+----------------+----------------- les_miserables_100 | 8256 kB | null les_miserables_20 | 14 MB | {fillfactor=20} les_miserables_40 | 11 MB | {fillfactor=40} les_miserables_80 | 8712 kB | {fillfactor=80} les_miserables_num_seq | 8192 bytes | null (5 rows) I am adding that to the commit fest of December. Regards, -- Michael
From eda0730d991f8b4dfbacc4d7a953ec5bff8b2ffe Mon Sep 17 00:00:00 2001 From: Michael Paquier <michael@otacoo.com> Date: Fri, 21 Nov 2014 13:40:11 +0900 Subject: [PATCH 1/2] Fix flag marking GIN index as being built for new entries This was somewhat missing in the current implementation, and leaded to problems for code that needed special handling with fresh indexes being built. Note that this does not impact current code as there are no such operations being done yet but it may be a problem if in the future a bug fix needs to make this distinction. --- src/backend/access/gin/gininsert.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/backend/access/gin/gininsert.c b/src/backend/access/gin/gininsert.c index c1ad0fd..c6d8b40 100644 --- a/src/backend/access/gin/gininsert.c +++ b/src/backend/access/gin/gininsert.c @@ -191,6 +191,7 @@ ginEntryInsert(GinState *ginstate, buildStats->nEntries++; ginPrepareEntryScan(&btree, attnum, key, category, ginstate); + btree.isBuild = (buildStats != NULL); stack = ginFindLeafPage(&btree, false); page = BufferGetPage(stack->buffer); -- 2.1.3
From eb48305ac0295fa4a46ffec5f8db447cd4c5f6b2 Mon Sep 17 00:00:00 2001 From: Michael Paquier <michael@otacoo.com> Date: Fri, 21 Nov 2014 14:08:54 +0900 Subject: [PATCH 2/2] Support fillfactor for GIN indexes Users can call this new storage parameter to fill in the entry and leaf pages of a newly-built index as wanted. Fillfactor range varies between 20 and 100. --- doc/src/sgml/ref/create_index.sgml | 4 ++-- src/backend/access/common/reloptions.c | 9 +++++++++ src/backend/access/gin/gindatapage.c | 22 ++++++++++++++++++---- src/backend/access/gin/ginentrypage.c | 20 +++++++++++++++++++- src/backend/access/gin/ginutil.c | 3 ++- src/include/access/gin_private.h | 3 +++ 6 files changed, 53 insertions(+), 8 deletions(-) diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml index 6b2ee28..c0ba24a 100644 --- a/doc/src/sgml/ref/create_index.sgml +++ b/doc/src/sgml/ref/create_index.sgml @@ -294,8 +294,8 @@ CREATE [ UNIQUE ] INDEX [ CONCURRENTLY ] [ [ IF NOT EXISTS ] <replaceable class= <para> The optional <literal>WITH</> clause specifies <firstterm>storage parameters</> for the index. Each index method has its own set of allowed - storage parameters. The B-tree, hash, GiST and SP-GiST index methods all - accept this parameter: + storage parameters. The B-tree, hash, GIN, GiST and SP-GiST index methods + all accept this parameter: </para> <variablelist> diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c index c16b38e..7137ba9 100644 --- a/src/backend/access/common/reloptions.c +++ b/src/backend/access/common/reloptions.c @@ -15,6 +15,7 @@ #include "postgres.h" +#include "access/gin_private.h" #include "access/gist_private.h" #include "access/hash.h" #include "access/htup_details.h" @@ -133,6 +134,14 @@ static relopt_int intRelOpts[] = }, { { + "fillfactor", + "Packs gin index pages only to this percentage", + RELOPT_KIND_GIN + }, + GIN_DEFAULT_FILLFACTOR, GIN_MIN_FILLFACTOR, 100 + }, + { + { "autovacuum_vacuum_threshold", "Minimum number of tuple updates or deletes prior to vacuum", RELOPT_KIND_HEAP | RELOPT_KIND_TOAST diff --git a/src/backend/access/gin/gindatapage.c b/src/backend/access/gin/gindatapage.c index 012225e..f322004 100644 --- a/src/backend/access/gin/gindatapage.c +++ b/src/backend/access/gin/gindatapage.c @@ -446,11 +446,21 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, leafSegmentInfo *lastleftinfo; ItemPointerData maxOldItem; ItemPointerData remaining; + int fillfactor; Assert(GinPageIsData(page)); rbound = *GinDataPageGetRightBound(page); + /* Grab option values */ + if (btree->index->rd_options) + { + GinOptions *options = (GinOptions *) btree->index->rd_options; + fillfactor = options->fillfactor; + } + else + fillfactor = GIN_DEFAULT_FILLFACTOR; + /* * Count how many of the new items belong to this page. */ @@ -511,15 +521,19 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack, /* * If we're appending to the end of the page, we will append as many items - * as we can fit (after splitting), and stop when the pages becomes full. - * Otherwise we have to limit the number of new items to insert, because - * once we start packing we can't just stop when we run out of space, - * because we must make sure that all the old items still fit. + * as we can fit up to the given fillfactor at build (after splitting), + * and stop when the pages becomes full at this rate. Otherwise we have to + * limit the number of new items to insert, because once we start packing + * we can't just stop when we run out of space, because we must make sure + * that all the old items still fit. */ if (GinPageIsCompressed(page)) freespace = GinDataLeafPageGetFreeSpace(page); + else if (btree->isBuild) + freespace = BLCKSZ * (100 - fillfactor) / 100; else freespace = 0; + if (append) { /* diff --git a/src/backend/access/gin/ginentrypage.c b/src/backend/access/gin/ginentrypage.c index 2dae7b9..36eccd7 100644 --- a/src/backend/access/gin/ginentrypage.c +++ b/src/backend/access/gin/ginentrypage.c @@ -462,6 +462,16 @@ entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off, Size releasedsz = 0; Size addedsz; Page page = BufferGetPage(buf); + int fillfactor; + int freespace; + + if (btree->index->rd_options) + { + GinOptions *options = (GinOptions *) btree->index->rd_options; + fillfactor = options->fillfactor; + } + else + fillfactor = GIN_DEFAULT_FILLFACTOR; Assert(insertData->entry); Assert(!GinPageIsData(page)); @@ -475,7 +485,15 @@ entryIsEnoughSpace(GinBtree btree, Buffer buf, OffsetNumber off, addedsz = MAXALIGN(IndexTupleSize(insertData->entry)) + sizeof(ItemIdData); - if (PageGetFreeSpace(page) + releasedsz >= addedsz) + /* + * Calculate freespace available. When building the index take into + * account the fillfactor. + */ + if (btree->isBuild) + freespace = PageGetFreeSpace(page) - BLCKSZ * (100 - fillfactor) / 100; + else + freespace = PageGetFreeSpace(page); + if (freespace + releasedsz >= addedsz) return true; return false; diff --git a/src/backend/access/gin/ginutil.c b/src/backend/access/gin/ginutil.c index f593a72..bfdfd0c 100644 --- a/src/backend/access/gin/ginutil.c +++ b/src/backend/access/gin/ginutil.c @@ -527,7 +527,8 @@ ginoptions(PG_FUNCTION_ARGS) static const relopt_parse_elt tab[] = { {"fastupdate", RELOPT_TYPE_BOOL, offsetof(GinOptions, useFastUpdate)}, {"gin_pending_list_limit", RELOPT_TYPE_INT, offsetof(GinOptions, - pendingListCleanupSize)} + pendingListCleanupSize)}, + {"fillfactor", RELOPT_TYPE_INT, offsetof(GinOptions, fillfactor)} }; options = parseRelOptions(reloptions, validate, RELOPT_KIND_GIN, diff --git a/src/include/access/gin_private.h b/src/include/access/gin_private.h index 3d46f20..855091b 100644 --- a/src/include/access/gin_private.h +++ b/src/include/access/gin_private.h @@ -315,6 +315,7 @@ typedef struct GinOptions int32 vl_len_; /* varlena header (do not touch directly!) */ bool useFastUpdate; /* use fast updates? */ int pendingListCleanupSize; /* maximum size of pending list */ + int fillfactor; /* page fillfactor in percent (0..100) */ } GinOptions; #define GIN_DEFAULT_USE_FASTUPDATE true @@ -327,6 +328,8 @@ typedef struct GinOptions ((GinOptions *) (relation)->rd_options)->pendingListCleanupSize : \ gin_pending_list_limit) +#define GIN_MIN_FILLFACTOR 20 +#define GIN_DEFAULT_FILLFACTOR 100 /* Macros for buffer lock/unlock operations */ #define GIN_UNLOCK BUFFER_LOCK_UNLOCK -- 2.1.3
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers