Hello again Tom,

We have our upgrade to 8.2.1 scheduled for this weekend, and we noticed your message regarding the vacuum patch being applied to 8.2 and back-patched. I expect I know the answer to this next question :) but I was wondering if the patch referenced below has also been bundled into the normal source download of 8.2.1 or if we would still need to manually apply it?

- Fix a performance problem in databases with large numbers of tables
 (or other types of pg_class entry): the function
 pgstat_vacuum_tabstat, invoked during VACUUM startup, had runtime
 proportional to the number of stats table entries times the number
 of pg_class rows; in other words O(N^2) if the stats collector's
 information is reasonably complete.  Replace list searching with a
 hash table to bring it back to O(N) behavior.  Per report from kim
 at myemma.com.  Back-patch as far as 8.1; 8.0 and before use
 different coding here.

Thanks,
Kim


Tom Lane wrote:

I wrote:
What I think we need to do about this is
(1) fix pgstat_vacuum_tabstats to have non-O(N^2) behavior; I'm thinking
of using a hash table for the OIDs instead of a linear list.  Should be
a pretty small change; I'll work on it today.

I've applied the attached patch to 8.2 to do the above.  Please give it
a try and see how much it helps for you.  Some limited testing here
confirms a noticeable improvement in VACUUM startup time at 10000
tables, and of course it should be 100X worse with 100000 tables.

I am still confused why you didn't see the problem in 8.1, though.
This code is just about exactly the same in 8.1.  Maybe you changed
your stats collector settings when moving to 8.2?

                        regards, tom lane

------------------------------------------------------------------------

Index: pgstat.c
===================================================================
RCS file: /cvsroot/pgsql/src/backend/postmaster/pgstat.c,v
retrieving revision 1.140
diff -c -r1.140 pgstat.c
*** pgstat.c    21 Nov 2006 20:59:52 -0000      1.140
--- pgstat.c    11 Jan 2007 22:32:30 -0000
***************
*** 159,164 ****
--- 159,165 ----
 static void pgstat_read_statsfile(HTAB **dbhash, Oid onlydb);
 static void backend_read_statsfile(void);
 static void pgstat_read_current_status(void);
+ static HTAB *pgstat_collect_oids(Oid catalogid);
static void pgstat_setheader(PgStat_MsgHdr *hdr, StatMsgType mtype);
 static void pgstat_send(void *msg, int len);
***************
*** 657,666 ****
 void
 pgstat_vacuum_tabstat(void)
 {
!       List       *oidlist;
!       Relation        rel;
!       HeapScanDesc scan;
!       HeapTuple       tup;
        PgStat_MsgTabpurge msg;
        HASH_SEQ_STATUS hstat;
        PgStat_StatDBEntry *dbentry;
--- 658,664 ----
 void
 pgstat_vacuum_tabstat(void)
 {
!       HTAB       *htab;
        PgStat_MsgTabpurge msg;
        HASH_SEQ_STATUS hstat;
        PgStat_StatDBEntry *dbentry;
***************
*** 679,693 ****
        /*
         * Read pg_database and make a list of OIDs of all existing databases
         */
!       oidlist = NIL;
!       rel = heap_open(DatabaseRelationId, AccessShareLock);
!       scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
!       while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
!       {
!               oidlist = lappend_oid(oidlist, HeapTupleGetOid(tup));
!       }
!       heap_endscan(scan);
!       heap_close(rel, AccessShareLock);
/*
         * Search the database hash table for dead databases and tell the
--- 677,683 ----
        /*
         * Read pg_database and make a list of OIDs of all existing databases
         */
!       htab = pgstat_collect_oids(DatabaseRelationId);
/*
         * Search the database hash table for dead databases and tell the
***************
*** 698,709 ****
        {
                Oid                     dbid = dbentry->databaseid;
! if (!list_member_oid(oidlist, dbid))
                        pgstat_drop_database(dbid);
        }
/* Clean up */
!       list_free(oidlist);
/*
         * Lookup our own database entry; if not found, nothing more to do.
--- 688,701 ----
        {
                Oid                     dbid = dbentry->databaseid;
! CHECK_FOR_INTERRUPTS(); ! ! if (hash_search(htab, (void *) &dbid, HASH_FIND, NULL) == NULL)
                        pgstat_drop_database(dbid);
        }
/* Clean up */
!       hash_destroy(htab);
/*
         * Lookup our own database entry; if not found, nothing more to do.
***************
*** 717,731 ****
        /*
         * Similarly to above, make a list of all known relations in this DB.
         */
!       oidlist = NIL;
!       rel = heap_open(RelationRelationId, AccessShareLock);
!       scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
!       while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
!       {
!               oidlist = lappend_oid(oidlist, HeapTupleGetOid(tup));
!       }
!       heap_endscan(scan);
!       heap_close(rel, AccessShareLock);
/*
         * Initialize our messages table counter to zero
--- 709,715 ----
        /*
         * Similarly to above, make a list of all known relations in this DB.
         */
!       htab = pgstat_collect_oids(RelationRelationId);
/*
         * Initialize our messages table counter to zero
***************
*** 738,750 ****
        hash_seq_init(&hstat, dbentry->tables);
        while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&hstat)) != 
NULL)
        {
!               if (list_member_oid(oidlist, tabentry->tableid))
                        continue;
/*
                 * Not there, so add this table's Oid to the message
                 */
!               msg.m_tableid[msg.m_nentries++] = tabentry->tableid;
/*
                 * If the message is full, send it out and reinitialize to empty
--- 722,738 ----
        hash_seq_init(&hstat, dbentry->tables);
        while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&hstat)) != 
NULL)
        {
!               Oid                     tabid = tabentry->tableid;
! ! CHECK_FOR_INTERRUPTS(); ! ! if (hash_search(htab, (void *) &tabid, HASH_FIND, NULL) != NULL)
                        continue;
/*
                 * Not there, so add this table's Oid to the message
                 */
!               msg.m_tableid[msg.m_nentries++] = tabid;
/*
                 * If the message is full, send it out and reinitialize to empty
***************
*** 776,782 ****
        }
/* Clean up */
!       list_free(oidlist);
 }
--- 764,813 ----
        }
/* Clean up */
!       hash_destroy(htab);
! }
! ! ! /* ----------
!  * pgstat_collect_oids() -
!  *
!  *    Collect the OIDs of either all databases or all tables, according to
!  *    the parameter, into a temporary hash table.  Caller should hash_destroy
!  *    the result when done with it.
!  * ----------
!  */
! static HTAB *
! pgstat_collect_oids(Oid catalogid)
! {
!       HTAB       *htab;
!       HASHCTL         hash_ctl;
!       Relation        rel;
!       HeapScanDesc scan;
!       HeapTuple       tup;
! ! memset(&hash_ctl, 0, sizeof(hash_ctl));
!       hash_ctl.keysize = sizeof(Oid);
!       hash_ctl.entrysize = sizeof(Oid);
!       hash_ctl.hash = oid_hash;
!       htab = hash_create("Temporary table of OIDs",
!                                          PGSTAT_TAB_HASH_SIZE,
!                                          &hash_ctl,
!                                          HASH_ELEM | HASH_FUNCTION);
! ! rel = heap_open(catalogid, AccessShareLock);
!       scan = heap_beginscan(rel, SnapshotNow, 0, NULL);
!       while ((tup = heap_getnext(scan, ForwardScanDirection)) != NULL)
!       {
!               Oid             thisoid = HeapTupleGetOid(tup);
! ! CHECK_FOR_INTERRUPTS(); ! ! (void) hash_search(htab, (void *) &thisoid, HASH_ENTER, NULL);
!       }
!       heap_endscan(scan);
!       heap_close(rel, AccessShareLock);
! ! return htab;
 }

Reply via email to