I'm planning to apply the attached patch to hide the ARC-related data
structures in freelist.c.  There are no data structure or algorithm
changes, just removal of ARC details from the shared header
buf_internals.h, plus minor refactoring to hide the knowledge that ARC
wants a twice-normal-size buffer lookup hashtable.  This should allow us
to make the ARC-or-not changes within freelist.c only.

I did leave the "bufNext" fields of BufferDesc as-is.  Strictly speaking
this list is an internal data structure of freelist.c, but moving these
fields out of BufferDesc would require a larger change than seems
worthwhile, especially seeing that an LRU algorithm could make use of
the same list.

Any objections?

                        regards, tom lane

*** src/backend/storage/buffer/buf_init.c.orig  Fri Dec 31 17:46:08 2004
--- src/backend/storage/buffer/buf_init.c       Thu Feb  3 14:51:18 2005
***************
*** 73,79 ****
   *            aborts, it should only unpin the buffers exactly the number of 
times it
   *            has pinned them, so that it will not blow away buffers of 
another
   *            backend.
-  *
   */
  
  
--- 73,78 ----
***************
*** 120,133 ****
                block = BufferBlocks;
  
                /*
!                * link the buffers into a single linked list. This will become
!                * the LIFO list of unused buffers returned by
!                * StrategyGetBuffer().
                 */
                for (i = 0; i < NBuffers; block += BLCKSZ, buf++, i++)
                {
                        Assert(ShmemIsValid((unsigned long) block));
  
                        buf->bufNext = i + 1;
  
                        CLEAR_BUFFERTAG(buf->tag);
--- 119,135 ----
                block = BufferBlocks;
  
                /*
!                * Initialize all the buffer headers.
                 */
                for (i = 0; i < NBuffers; block += BLCKSZ, buf++, i++)
                {
                        Assert(ShmemIsValid((unsigned long) block));
  
+                       /*
+                        * The bufNext fields link together all totally-unused 
buffers.
+                        * Subsequent management of this list is done by
+                        * StrategyGetBuffer().
+                        */
                        buf->bufNext = i + 1;
  
                        CLEAR_BUFFERTAG(buf->tag);
***************
*** 142,148 ****
                        buf->wait_backend_id = 0;
                }
  
!               /* Correct last entry */
                BufferDescriptors[NBuffers - 1].bufNext = -1;
  
                LWLockRelease(BufMgrLock);
--- 144,150 ----
                        buf->wait_backend_id = 0;
                }
  
!               /* Correct last entry of linked list */
                BufferDescriptors[NBuffers - 1].bufNext = -1;
  
                LWLockRelease(BufMgrLock);
***************
*** 178,184 ****
  
        /*
         * Convert shmem offsets into addresses as seen by this process. This
!        * is just to speed up the BufferGetBlock() macro.
         */
        for (i = 0; i < NBuffers; i++)
                BufferBlockPointers[i] = (Block) 
MAKE_PTR(BufferDescriptors[i].data);
--- 180,187 ----
  
        /*
         * Convert shmem offsets into addresses as seen by this process. This
!        * is just to speed up the BufferGetBlock() macro.  It is OK to do this
!        * without any lock since the data pointers never change.
         */
        for (i = 0; i < NBuffers; i++)
                BufferBlockPointers[i] = (Block) 
MAKE_PTR(BufferDescriptors[i].data);
***************
*** 201,214 ****
        /* size of data pages */
        size += NBuffers * MAXALIGN(BLCKSZ);
  
!       /* size of buffer hash table */
!       size += hash_estimate_size(NBuffers * 2, sizeof(BufferLookupEnt));
! 
!       /* size of the shared replacement strategy control block */
!       size += MAXALIGN(sizeof(BufferStrategyControl));
! 
!       /* size of the ARC directory blocks */
!       size += MAXALIGN(NBuffers * 2 * sizeof(BufferStrategyCDB));
  
        return size;
  }
--- 204,211 ----
        /* size of data pages */
        size += NBuffers * MAXALIGN(BLCKSZ);
  
!       /* size of stuff controlled by freelist.c */
!       size += StrategyShmemSize();
  
        return size;
  }
*** src/backend/storage/buffer/buf_table.c.orig Fri Dec 31 17:46:08 2004
--- src/backend/storage/buffer/buf_table.c      Thu Feb  3 14:40:17 2005
***************
*** 1,11 ****
  /*-------------------------------------------------------------------------
   *
   * buf_table.c
!  *      routines for finding buffers in the buffer pool.
   *
!  * NOTE: these days, what this table actually provides is a mapping from
!  * BufferTags to CDB indexes, not directly to buffers.        The function 
names
!  * are thus slight misnomers.
   *
   * Note: all routines in this file assume that the BufMgrLock is held
   * by the caller, so no synchronization is needed.
--- 1,11 ----
  /*-------------------------------------------------------------------------
   *
   * buf_table.c
!  *      routines for mapping BufferTags to buffer indexes.
   *
!  * NOTE: this module is called only by freelist.c, and the "buffer IDs"
!  * it deals with are whatever freelist.c needs them to be; they may not be
!  * directly equivalent to Buffer numbers.
   *
   * Note: all routines in this file assume that the BufMgrLock is held
   * by the caller, so no synchronization is needed.
***************
*** 26,37 ****
  #include "storage/bufmgr.h"
  
  
  static HTAB *SharedBufHash;
  
  
  /*
   * Initialize shmem hash table for mapping buffers
!  *            size is the desired hash table size (2*NBuffers for ARC 
algorithm)
   */
  void
  InitBufTable(int size)
--- 26,54 ----
  #include "storage/bufmgr.h"
  
  
+ /* entry for buffer lookup hashtable */
+ typedef struct
+ {
+       BufferTag       key;                    /* Tag of a disk page */
+       int                     id;                             /* Associated 
buffer ID */
+ } BufferLookupEnt;
+ 
  static HTAB *SharedBufHash;
  
  
  /*
+  * Estimate space needed for mapping hashtable
+  *            size is the desired hash table size (possibly more than 
NBuffers)
+  */
+ int
+ BufTableShmemSize(int size)
+ {
+       return hash_estimate_size(size, sizeof(BufferLookupEnt));
+ }
+ 
+ /*
   * Initialize shmem hash table for mapping buffers
!  *            size is the desired hash table size (possibly more than 
NBuffers)
   */
  void
  InitBufTable(int size)
***************
*** 56,62 ****
  
  /*
   * BufTableLookup
!  *            Lookup the given BufferTag; return CDB index, or -1 if not found
   */
  int
  BufTableLookup(BufferTag *tagPtr)
--- 73,79 ----
  
  /*
   * BufTableLookup
!  *            Lookup the given BufferTag; return buffer ID, or -1 if not found
   */
  int
  BufTableLookup(BufferTag *tagPtr)
***************
*** 76,85 ****
  
  /*
   * BufTableInsert
!  *            Insert a hashtable entry for given tag and CDB index
   */
  void
! BufTableInsert(BufferTag *tagPtr, int cdb_id)
  {
        BufferLookupEnt *result;
        bool            found;
--- 93,102 ----
  
  /*
   * BufTableInsert
!  *            Insert a hashtable entry for given tag and buffer ID
   */
  void
! BufTableInsert(BufferTag *tagPtr, int buf_id)
  {
        BufferLookupEnt *result;
        bool            found;
***************
*** 92,106 ****
                                (errcode(ERRCODE_OUT_OF_MEMORY),
                                 errmsg("out of shared memory")));
  
!       if (found)                                      /* found something else 
in the table? */
                elog(ERROR, "shared buffer hash table corrupted");
  
!       result->id = cdb_id;
  }
  
  /*
   * BufTableDelete
!  *            Delete the hashtable entry for given tag
   */
  void
  BufTableDelete(BufferTag *tagPtr)
--- 109,123 ----
                                (errcode(ERRCODE_OUT_OF_MEMORY),
                                 errmsg("out of shared memory")));
  
!       if (found)                                      /* found something 
already in the table? */
                elog(ERROR, "shared buffer hash table corrupted");
  
!       result->id = buf_id;
  }
  
  /*
   * BufTableDelete
!  *            Delete the hashtable entry for given tag (which must exist)
   */
  void
  BufTableDelete(BufferTag *tagPtr)
*** src/backend/storage/buffer/freelist.c.orig  Fri Dec 31 17:46:08 2004
--- src/backend/storage/buffer/freelist.c       Thu Feb  3 14:45:50 2005
***************
*** 25,30 ****
--- 25,75 ----
  #include "storage/bufmgr.h"
  
  
+ /*
+  * Definitions for the buffer replacement strategy
+  */
+ #define STRAT_LIST_UNUSED     (-1)
+ #define STRAT_LIST_B1         0
+ #define STRAT_LIST_T1         1
+ #define STRAT_LIST_T2         2
+ #define STRAT_LIST_B2         3
+ #define STRAT_NUM_LISTS               4
+ 
+ /*
+  * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC)
+  */
+ typedef struct
+ {
+       int                     prev;                   /* list links */
+       int                     next;
+       short           list;                   /* ID of list it is currently 
in */
+       bool            t1_vacuum;              /* t => present only because of 
VACUUM */
+       TransactionId t1_xid;           /* the xid this entry went onto T1 */
+       BufferTag       buf_tag;                /* page identifier */
+       int                     buf_id;                 /* currently assigned 
data buffer, or -1 */
+ } BufferStrategyCDB;
+ 
+ /*
+  * The shared ARC control information.
+  */
+ typedef struct
+ {
+       int                     target_T1_size; /* What T1 size are we aiming 
for */
+       int                     listUnusedCDB;  /* All unused StrategyCDB */
+       int                     listHead[STRAT_NUM_LISTS];              /* ARC 
lists B1, T1, T2
+                                                                               
                 * and B2 */
+       int                     listTail[STRAT_NUM_LISTS];
+       int                     listSize[STRAT_NUM_LISTS];
+       Buffer          listFreeBuffers;        /* List of unused buffers */
+ 
+       long            num_lookup;             /* Some hit statistics */
+       long            num_hit[STRAT_NUM_LISTS];
+       time_t          stat_report;
+ 
+       /* Array of CDB's starts here */
+       BufferStrategyCDB cdb[1];       /* VARIABLE SIZE ARRAY */
+ } BufferStrategyControl;
+ 
  /* GUC variable: time in seconds between statistics reports */
  int                   DebugSharedBuffers = 0;
  
***************
*** 811,816 ****
--- 856,883 ----
        return num_buffer_dirty;
  }
  
+ 
+ /*
+  * StrategyShmemSize
+  *
+  * estimate the size of shared memory used by the freelist-related structures.
+  */
+ int
+ StrategyShmemSize(void)
+ {
+       int                     size = 0;
+ 
+       /* size of CDB lookup hash table */
+       size += BufTableShmemSize(NBuffers * 2);
+ 
+       /* size of the shared replacement strategy control block */
+       size += MAXALIGN(sizeof(BufferStrategyControl));
+ 
+       /* size of the ARC directory blocks */
+       size += MAXALIGN(NBuffers * 2 * sizeof(BufferStrategyCDB));
+ 
+       return size;
+ }
  
  /*
   * StrategyInitialize -- initialize the buffer cache replacement
*** src/include/storage/buf_internals.h.orig    Fri Dec 31 17:47:02 2004
--- src/include/storage/buf_internals.h Thu Feb  3 14:51:11 2005
***************
*** 17,24 ****
  
  #include "storage/backendid.h"
  #include "storage/buf.h"
- #include "storage/lmgr.h"
  #include "storage/lwlock.h"
  
  
  /*
--- 17,25 ----
  
  #include "storage/backendid.h"
  #include "storage/buf.h"
  #include "storage/lwlock.h"
+ #include "storage/shmem.h"
+ #include "utils/rel.h"
  
  
  /*
***************
*** 40,49 ****
   * Buffer tag identifies which disk block the buffer contains.
   *
   * Note: the BufferTag data must be sufficient to determine where to write the
!  * block, even during a "blind write" with no relcache entry.  It's possible
!  * that the backend flushing the buffer doesn't even believe the relation is
!  * visible yet (its xact may have started before the xact that created the
!  * rel).  The storage manager must be able to cope anyway.
   *
   * Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have
   * to be fixed to zero them, since this struct is used as a hash key.
--- 41,50 ----
   * Buffer tag identifies which disk block the buffer contains.
   *
   * Note: the BufferTag data must be sufficient to determine where to write the
!  * block, without reference to pg_class or pg_tablespace entries.  It's
!  * possible that the backend flushing the buffer doesn't even believe the
!  * relation is visible yet (its xact may have started before the xact that
!  * created the rel).  The storage manager must be able to cope anyway.
   *
   * Note: if there's any pad bytes in the struct, INIT_BUFFERTAG will have
   * to be fixed to zero them, since this struct is used as a hash key.
***************
*** 107,164 ****
  
  #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
  
- /* entry for buffer lookup hashtable */
- typedef struct
- {
-       BufferTag       key;                    /* Tag of a disk page */
-       int                     id;                             /* CDB id of 
associated CDB */
- } BufferLookupEnt;
  
! /*
!  * Definitions for the buffer replacement strategy
!  */
! #define STRAT_LIST_UNUSED     (-1)
! #define STRAT_LIST_B1         0
! #define STRAT_LIST_T1         1
! #define STRAT_LIST_T2         2
! #define STRAT_LIST_B2         3
! #define STRAT_NUM_LISTS               4
! 
! /*
!  * The Cache Directory Block (CDB) of the Adaptive Replacement Cache (ARC)
!  */
! typedef struct
! {
!       int                     prev;                   /* list links */
!       int                     next;
!       short           list;                   /* ID of list it is currently 
in */
!       bool            t1_vacuum;              /* t => present only because of 
VACUUM */
!       TransactionId t1_xid;           /* the xid this entry went onto T1 */
!       BufferTag       buf_tag;                /* page identifier */
!       int                     buf_id;                 /* currently assigned 
data buffer, or -1 */
! } BufferStrategyCDB;
! 
! /*
!  * The shared ARC control information.
!  */
! typedef struct
! {
!       int                     target_T1_size; /* What T1 size are we aiming 
for */
!       int                     listUnusedCDB;  /* All unused StrategyCDB */
!       int                     listHead[STRAT_NUM_LISTS];              /* ARC 
lists B1, T1, T2
!                                                                               
                 * and B2 */
!       int                     listTail[STRAT_NUM_LISTS];
!       int                     listSize[STRAT_NUM_LISTS];
!       Buffer          listFreeBuffers;        /* List of unused buffers */
! 
!       long            num_lookup;             /* Some hit statistics */
!       long            num_hit[STRAT_NUM_LISTS];
!       time_t          stat_report;
! 
!       /* Array of CDB's starts here */
!       BufferStrategyCDB cdb[1];       /* VARIABLE SIZE ARRAY */
! } BufferStrategyControl;
  
  
  /* counters in buf_init.c */
  extern long int ReadBufferCount;
--- 108,119 ----
  
  #define BufferDescriptorGetBuffer(bdesc) ((bdesc)->buf_id + 1)
  
  
! /* in bufmgr.c */
! extern BufferDesc *BufferDescriptors;
  
+ /* in localbuf.c */
+ extern BufferDesc *LocalBufferDescriptors;
  
  /* counters in buf_init.c */
  extern long int ReadBufferCount;
***************
*** 170,180 ****
  
  
  /*
!  * Bufmgr Interface:
   */
  
- /* Internal routines: only called by bufmgr */
- 
  /* freelist.c */
  extern BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck,
                                         int *cdb_found_index);
--- 125,133 ----
  
  
  /*
!  * Internal routines: only called by bufmgr
   */
  
  /* freelist.c */
  extern BufferDesc *StrategyBufferLookup(BufferTag *tagPtr, bool recheck,
                                         int *cdb_found_index);
***************
*** 185,204 ****
  extern void StrategyHintVacuum(bool vacuum_active);
  extern int StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags,
                                                int max_buffers);
  extern void StrategyInitialize(bool init);
  
  /* buf_table.c */
  extern void InitBufTable(int size);
  extern int    BufTableLookup(BufferTag *tagPtr);
! extern void BufTableInsert(BufferTag *tagPtr, int cdb_id);
  extern void BufTableDelete(BufferTag *tagPtr);
  
- /* bufmgr.c */
- extern BufferDesc *BufferDescriptors;
- 
  /* localbuf.c */
- extern BufferDesc *LocalBufferDescriptors;
- 
  extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum,
                                 bool *foundPtr);
  extern void WriteLocalBuffer(Buffer buffer, bool release);
--- 138,154 ----
  extern void StrategyHintVacuum(bool vacuum_active);
  extern int StrategyDirtyBufferList(BufferDesc **buffers, BufferTag *buftags,
                                                int max_buffers);
+ extern int    StrategyShmemSize(void);
  extern void StrategyInitialize(bool init);
  
  /* buf_table.c */
+ extern int    BufTableShmemSize(int size);
  extern void InitBufTable(int size);
  extern int    BufTableLookup(BufferTag *tagPtr);
! extern void BufTableInsert(BufferTag *tagPtr, int buf_id);
  extern void BufTableDelete(BufferTag *tagPtr);
  
  /* localbuf.c */
  extern BufferDesc *LocalBufferAlloc(Relation reln, BlockNumber blockNum,
                                 bool *foundPtr);
  extern void WriteLocalBuffer(Buffer buffer, bool release);

---------------------------(end of broadcast)---------------------------
TIP 8: explain analyze is your friend

Reply via email to