Should this be marked as TODO?

---------------------------------------------------------------------------

Mitsuru IWASAKI wrote:
> Hi,
> 
> > On 05/07/2011 03:32 AM, Mitsuru IWASAKI wrote:
> > > For 1, I've just finish my work.  The latest patch is available at:
> > > http://people.freebsd.org/~iwasaki/postgres/buffer-cache-hibernation-postgresql-20110507.patch
> > >    
> > 
> > Reminder here--we can't accept code based on it being published to a web 
> > page.  You'll need to e-mail it to the pgsql-hackers mailing list to be 
> > considered for the next PostgreSQL CommitFest, which is starting in a 
> > few weeks.  Code submitted to the mailing list is considered a release 
> > of it to the project under the PostgreSQL license, which we can't just 
> > assume for things when given only a URL to them.
> 
> Sorry about that, but I had enough time to revise my patches this week-end.
> I attached the patches in this mail, and will update CommitFest page soon.
> 
> > Also, you suggested you were out of time to work on this.  If that's the 
> > case, we'd like to know that so we don't keep cc'ing you about things in 
> > expectation of an answer.  Someone else may pick this up as a project to 
> > continue working on.  But it's going to need a fair amount of revision 
> > before it matches what people want here, and I'm not sure how much of 
> > what you've written is going to end up in any commit that may happen 
> > from this idea.
> 
> It seems that I don't have enough time to complete this work.
> You don't need to keep cc'ing me, and I'm very happy if postgres to be
> the first DBMS which support buffer cache hibernation feature.
> 
> Thanks!
> 
> 
> diff --git src/backend/access/transam/xlog.c src/backend/access/transam/xlog.c
> index b0e4c41..7a3a207 100644
> --- src/backend/access/transam/xlog.c
> +++ src/backend/access/transam/xlog.c
> @@ -4834,6 +4834,19 @@ ReadControlFile(void)
>  #endif
>  }
>  
> +bool
> +GetControlFile(ControlFileData *controlFile)
> +{
> +     if (ControlFile == NULL)
> +     {
> +             return false;
> +     }
> +
> +     memcpy(controlFile, ControlFile, sizeof(ControlFileData));
> +
> +     return true;
> +}
> +
>  void
>  UpdateControlFile(void)
>  {
> diff --git src/backend/bootstrap/bootstrap.c src/backend/bootstrap/bootstrap.c
> index fc093cc..7ecf6bb 100644
> --- src/backend/bootstrap/bootstrap.c
> +++ src/backend/bootstrap/bootstrap.c
> @@ -360,6 +360,15 @@ AuxiliaryProcessMain(int argc, char *argv[])
>       BaseInit();
>  
>       /*
> +      * Only StartupProcess can call ResumeBufferCacheHibernation() after
> +      * InitFileAccess() and smgrinit().
> +      */
> +     if (auxType == StartupProcess && BufferCacheHibernationLevel > 0)
> +     {
> +             ResumeBufferCacheHibernation();
> +     }
> +
> +     /*
>        * When we are an auxiliary process, we aren't going to do the full
>        * InitPostgres pushups, but there are a couple of things that need to 
> get
>        * lit up even in an auxiliary process.
> diff --git src/backend/storage/buffer/buf_init.c 
> src/backend/storage/buffer/buf_init.c
> index dadb49d..52eb51a 100644
> --- src/backend/storage/buffer/buf_init.c
> +++ src/backend/storage/buffer/buf_init.c
> @@ -127,6 +127,14 @@ InitBufferPool(void)
>  
>       /* Init other shared buffer-management stuff */
>       StrategyInitialize(!foundDescs);
> +
> +     if (BufferCacheHibernationLevel > 0)
> +     {
> +             
> ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS,
> +                     (char *)BufferDescriptors, sizeof(BufferDesc), 
> NBuffers);
> +             
> ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS,
> +                     (char *)BufferBlocks, BLCKSZ, NBuffers);
> +     }
>  }
>  
>  /*
> diff --git src/backend/storage/buffer/bufmgr.c 
> src/backend/storage/buffer/bufmgr.c
> index f96685d..dba8ebf 100644
> --- src/backend/storage/buffer/bufmgr.c
> +++ src/backend/storage/buffer/bufmgr.c
> @@ -31,6 +31,7 @@
>  #include "postgres.h"
>  
>  #include <sys/file.h>
> +#include <sys/stat.h>
>  #include <unistd.h>
>  
>  #include "catalog/catalog.h"
> @@ -61,6 +62,13 @@
>  #define BUF_WRITTEN                          0x01
>  #define BUF_REUSABLE                 0x02
>  
> +/*
> + * Buffer Cache Hibernation stuff.
> + */
> +/* enable this to debug buffer cache hibernation. */
> +#if 0
> +#define DEBUG_BUFFER_CACHE_HIBERNATION
> +#endif
>  
>  /* GUC variables */
>  bool         zero_damaged_pages = false;
> @@ -765,6 +773,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, 
> ForkNumber forkNum,
>                               }
>                       }
>  
> +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
> +                     elog(DEBUG5,
> +                             "alloc  
> [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
> +                                     buf->buf_id, buf->flags, 
> buf->usage_count, buf->refcount,
> +                                     buf->wait_backend_pid, buf->freeNext,
> +                                     newHash, newTag.rnode.spcNode,
> +                                     newTag.rnode.dbNode, 
> newTag.rnode.relNode,
> +                                     newTag.forkNum, newTag.blockNum);
> +#endif
> +
>                       return buf;
>               }
>  
> @@ -800,6 +818,16 @@ BufferAlloc(SMgrRelation smgr, char relpersistence, 
> ForkNumber forkNum,
>        * the old content is no longer relevant.  (The usage_count starts out 
> at
>        * 1 so that the buffer can survive one clock-sweep pass.)
>        */
> +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
> +     elog(DEBUG5,
> +             "rename [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
> +                     buf->buf_id, buf->flags, buf->usage_count, 
> buf->refcount,
> +                     buf->wait_backend_pid, buf->freeNext,
> +                     oldHash, oldTag.rnode.spcNode,
> +                     oldTag.rnode.dbNode, oldTag.rnode.relNode,
> +                     oldTag.forkNum, oldTag.blockNum);
> +#endif
> +
>       buf->tag = newTag;
>       buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | 
> BM_CHECKPOINT_NEEDED | BM_IO_ERROR | BM_PERMANENT);
>       if (relpersistence == RELPERSISTENCE_PERMANENT)
> @@ -2772,3 +2800,716 @@ local_buffer_write_error_callback(void *arg)
>               pfree(path);
>       }
>  }
> +
> +/* ----------------------------------------------------------------
> + *           Buffer Cache Hibernation support stuff
> + *
> + * Suspend/resume buffer cache data structure using hibernation files
> + * at shutdown/startup.
> + * ----------------------------------------------------------------
> + */
> +
> +int  BufferCacheHibernationLevel = 0;
> +
> +#define      BUFFER_CACHE_HIBERNATION_FILE_STRATEGY          
> "global/pg_buffer_cache_hibernation_strategy"
> +#define      BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS       
> "global/pg_buffer_cache_hibernation_descriptors"
> +#define      BUFFER_CACHE_HIBERNATION_FILE_BLOCKS            
> "global/pg_buffer_cache_hibernation_blocks"
> +#define      BUFFER_CACHE_HIBERNATION_FILE_CRC32                     
> "global/pg_buffer_cache_hibernation_crc32"
> +
> +static struct
> +{
> +     char            *hibernation_file;
> +     char            *data_ptr;
> +     Size            record_length;  
> +     Size            num_records;    
> +     pg_crc32        crc;
> +} BufferCacheHibernationData[] =
> +{
> +     /* BufferStrategyControl */
> +     {
> +             BUFFER_CACHE_HIBERNATION_FILE_STRATEGY,
> +             NULL, 0, 0, 0
> +     },
> +
> +     /* BufferDescriptors */
> +     {
> +             BUFFER_CACHE_HIBERNATION_FILE_DESCRIPTORS,
> +             NULL, 0, 0, 0
> +     },
> +
> +     /* BufferBlocks */
> +     {
> +             BUFFER_CACHE_HIBERNATION_FILE_BLOCKS,
> +             NULL, 0, 0, 0
> +     },
> +
> +     /* End-of-list marker */
> +     {
> +             NULL,
> +             NULL, 0, 0, 0
> +     },
> +};
> +
> +static ControlFileData       controlFile;
> +static bool                          controlFileInitialized = false;
> +
> +/*
> + * AtProcExit_BufferCacheHibernation:
> + *           store the buffer cache into hibernation files at shutdown.
> + */
> +static void
> +AtProcExit_BufferCacheHibernation(int code, Datum arg)
> +{
> +     BufferHibernationFileType       id;
> +     int                                                     i;
> +     int                                                     fd;
> +
> +     if (BufferCacheHibernationLevel == 0)
> +     {
> +             return;
> +     }
> +
> +     /*
> +      * get the control file to check the system state validation.
> +      */
> +     if (GetControlFile(&controlFile) == false)
> +     {
> +             elog(WARNING,
> +                     "could not get control file, "
> +                     "aborting buffer cache hibernation");
> +             return;
> +     }
> +
> +     if (controlFile.state != DB_SHUTDOWNED)
> +     {
> +             elog(WARNING,
> +                     "database system was not shut down normally, "
> +                     "aborting buffer cache hibernation");
> +             return;
> +     }
> +
> +     /*
> +      * suspend buffer cache data structure into hibernation files.
> +      */
> +     for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; 
> id++)
> +     {
> +             Size            record_length;
> +             Size            num_records;
> +             char            *ptr;
> +             pg_crc32        crc;
> +
> +             if (BufferCacheHibernationLevel < 2 &&
> +                     id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +             {
> +                     continue;
> +             }
> +
> +             if (BufferCacheHibernationData[id].data_ptr == NULL ||
> +                     BufferCacheHibernationData[id].record_length == 0 ||
> +                     BufferCacheHibernationData[id].num_records == 0)
> +             {
> +                     elog(WARNING,
> +                             "ResisterBufferCacheHibernation() was not 
> called for %s",
> +                             
> BufferCacheHibernationData[id].hibernation_file);
> +                     goto cleanup;
> +             }
> +
> +             fd = 
> BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
> +                             O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, 
> S_IRUSR | S_IWUSR);
> +             if (fd < 0)
> +             {
> +                     elog(WARNING,
> +                             "could not open %s",
> +                             
> BufferCacheHibernationData[id].hibernation_file);
> +                     goto cleanup;
> +             }
> +
> +             record_length = BufferCacheHibernationData[id].record_length;
> +             num_records = BufferCacheHibernationData[id].num_records;
> +
> +             elog(NOTICE,
> +                     "buffer cache hibernate into %s",
> +                     BufferCacheHibernationData[id].hibernation_file);
> +
> +             INIT_CRC32(crc);
> +             for (i = 0; i < num_records; i++)
> +             {
> +                     ptr = BufferCacheHibernationData[id].data_ptr + (i * 
> record_length);
> +                     if (write(fd, (void *)ptr, record_length) != 
> record_length)
> +                     {
> +                             elog(WARNING,
> +                                     "could not write %s",
> +                                     
> BufferCacheHibernationData[id].hibernation_file);
> +                             goto cleanup;
> +                     }
> +
> +                     COMP_CRC32(crc, ptr, record_length);
> +             }
> +
> +             FIN_CRC32(crc);
> +             close(fd);
> +
> +             BufferCacheHibernationData[id].crc = crc;
> +     }
> +
> +     /*
> +      * save the computed crc values for the validations at resuming.
> +      */
> +     fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32,
> +                     O_CREAT | O_WRONLY | O_TRUNC | PG_BINARY, S_IRUSR | 
> S_IWUSR);
> +     if (fd < 0)
> +     {
> +             elog(WARNING,
> +                     "could not open %s",
> +                     BUFFER_CACHE_HIBERNATION_FILE_CRC32);
> +             goto cleanup;
> +     }
> +
> +     for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; 
> id++)
> +     {
> +             pg_crc32        crc;
> +
> +             if (BufferCacheHibernationLevel < 2 &&
> +                     id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +             {
> +                     continue;
> +             }
> +
> +             crc = BufferCacheHibernationData[id].crc;
> +             if (write(fd, (void *)&crc, sizeof(pg_crc32)) != 
> sizeof(pg_crc32))
> +             {
> +                     elog(WARNING,
> +                             "could not write %s for %s",
> +                             BUFFER_CACHE_HIBERNATION_FILE_CRC32,
> +                             
> BufferCacheHibernationData[id].hibernation_file);
> +                     goto cleanup;
> +             }
> +     }
> +     close(fd);
> +
> +     elog(NOTICE,
> +             "buffer cache suspended successfully");
> +
> +     return;
> +
> +cleanup:
> +     for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; 
> id++)
> +     {
> +             unlink(BufferCacheHibernationData[id].hibernation_file);
> +     }
> +
> +     return;
> +}
> +
> +/*
> + * ResisterBufferCacheHibernation:
> + *           register the buffer cache data structure info.
> + */
> +void
> +ResisterBufferCacheHibernation(BufferHibernationFileType id, char *ptr, Size 
> record_length, Size num_records)
> +{
> +     static bool                                     first_time = true;
> +
> +     if (BufferCacheHibernationLevel == 0)
> +     {
> +             return;
> +     }
> +
> +     if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY &&
> +             id != BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS &&
> +             id != BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +     {
> +             return;
> +     }
> +
> +     if (first_time)
> +     {
> +             /*
> +              * AtProcExit_BufferCacheHibernation to be called at shutdown.
> +              */
> +             on_shmem_exit(AtProcExit_BufferCacheHibernation, 0);
> +             first_time = false;
> +     }
> +
> +     /*
> +      * get the control file to check the system state and
> +      * hibernation file validations.
> +      */
> +     if (controlFileInitialized == false)
> +     {
> +             if (GetControlFile(&controlFile) == true)
> +             {
> +                     controlFileInitialized = true;
> +             }
> +     }
> +
> +     BufferCacheHibernationData[id].data_ptr = ptr;
> +     BufferCacheHibernationData[id].record_length = record_length;
> +     BufferCacheHibernationData[id].num_records = num_records;
> +}
> +
> +/*
> + * ResumeBufferCacheHibernation:
> + *           resume the buffer cache from hibernation file at startup.
> + */
> +void
> +ResumeBufferCacheHibernation(void)
> +{
> +     BufferHibernationFileType       id;
> +     int                                                     i;
> +     int                                                     fd;
> +     Size                                            num_records;
> +     Size                                            record_length;
> +     char                                            *buf_common;
> +     int                                                     oldNBuffers;
> +     bool                                            buffer_block_processed;
> +
> +     if (BufferCacheHibernationLevel == 0)
> +     {
> +             return;
> +     }
> +
> +     buf_common = NULL;
> +     buffer_block_processed = false;
> +
> +     /*
> +      * lock all buffer descriptors to prevent other processes from
> +      * updating buffers.
> +      */
> +     for (i = 0; i < NBuffers; i++)
> +     {
> +             BufferDesc      *buf;
> +
> +             buf = &BufferDescriptors[i];
> +             LockBufHdr(buf);
> +     }
> +
> +     /*
> +      * get the control file to check the system state and
> +      * hibernation file validations.
> +      */
> +     if (controlFileInitialized == false)
> +     {
> +             elog(WARNING,
> +                     "could not get control file, "
> +                     "aborting buffer cache hibernation");
> +             goto cleanup;
> +     }
> +
> +     if (controlFile.state != DB_SHUTDOWNED)
> +     {
> +             elog(WARNING,
> +                     "database system was not shut down normally, "
> +                     "aborting buffer cache hibernation");
> +             goto cleanup;
> +     }
> +
> +     /*
> +      * read the crc values which was computed when the hibernation
> +      * files were created.
> +      */
> +     fd = BasicOpenFile(BUFFER_CACHE_HIBERNATION_FILE_CRC32,
> +                     O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
> +     if (fd < 0)
> +     {
> +             elog(WARNING,
> +                     "could not open %s",
> +                     BUFFER_CACHE_HIBERNATION_FILE_CRC32);
> +             goto cleanup;
> +     }
> +
> +     for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; 
> id++)
> +     {
> +             pg_crc32        crc;
> +
> +             if (BufferCacheHibernationLevel < 2 &&
> +                     id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +             {
> +                     continue;
> +             }
> +
> +             if (read(fd, (void *)&crc, sizeof(pg_crc32)) != 
> sizeof(pg_crc32))
> +             {
> +                     if (BufferCacheHibernationLevel == 2 &&
> +                             id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +                     {
> +                             /*
> +                              * if buffer_cache_hibernation_level changes 1 
> to 2,
> +                              * the crc value of buffer block hibernation 
> file may not exist.
> +                              * just ignore it here.
> +                              */
> +                             continue;
> +                     }
> +
> +                     elog(WARNING,
> +                             "could not read %s for %s",
> +                             BUFFER_CACHE_HIBERNATION_FILE_CRC32,
> +                             
> BufferCacheHibernationData[id].hibernation_file);
> +                     close(fd);
> +                     goto cleanup;
> +             }
> +             BufferCacheHibernationData[id].crc = crc;
> +     }
> +
> +     close(fd);
> +
> +     /*
> +      * allocate a buffer to read the contents of the hibernation files
> +      * for validations.
> +      */
> +     record_length = 0;
> +     for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; 
> id++)
> +     {
> +             if (record_length < 
> BufferCacheHibernationData[id].record_length)
> +             {
> +                     record_length = 
> BufferCacheHibernationData[id].record_length;
> +             }
> +     }
> +
> +     buf_common = malloc(record_length);
> +     Assert(buf_common != NULL);
> +
> +     /* assume that the number of buffers have not changed. */
> +     oldNBuffers = NBuffers;
> +
> +     /*
> +      * check if all hibernation files are valid.
> +      */
> +     for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; 
> id++)
> +     {
> +             struct stat     sb;
> +             pg_crc32        crc;
> +
> +             if (BufferCacheHibernationLevel < 2 &&
> +                     id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +             {
> +                     continue;
> +             }
> +
> +             if (BufferCacheHibernationData[id].data_ptr == NULL ||
> +                     BufferCacheHibernationData[id].record_length == 0 ||
> +                     BufferCacheHibernationData[id].num_records == 0)
> +             {
> +                     elog(WARNING,
> +                             "ResisterBufferCacheHibernation() was not 
> called for %s",
> +                             
> BufferCacheHibernationData[id].hibernation_file);
> +                     goto cleanup;
> +             }
> +
> +             fd = 
> BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
> +                             O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
> +             if (fd < 0)
> +             {
> +                     if (BufferCacheHibernationLevel == 2 &&
> +                             id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +                     {
> +                             /*
> +                              * if buffer_cache_hibernation_level changes 1 
> to 2,
> +                              * the buffer block hibernation file may not 
> exist.
> +                              * just ignore it here.
> +                              */
> +                             continue;
> +                     }
> +
> +                     goto cleanup;
> +             }
> +
> +             if (fstat(fd, &sb) < 0)
> +             {
> +                     elog(WARNING,
> +                             "could not get stats of the buffer cache 
> hibernation file: %s",
> +                             
> BufferCacheHibernationData[id].hibernation_file);
> +                     close(fd);
> +                     goto cleanup;
> +             }
> +
> +             record_length = BufferCacheHibernationData[id].record_length;
> +             num_records = BufferCacheHibernationData[id].num_records;
> +
> +             if (sb.st_size != (record_length * num_records))
> +             {
> +                     /* The size of StrategyControl should be the same 
> always. */
> +                     if (id == BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY ||
> +                             (sb.st_size % record_length) > 0)
> +                     {
> +                             elog(WARNING,
> +                                     "size mismatch on the buffer cache 
> hibernation file: %s",
> +                                     
> BufferCacheHibernationData[id].hibernation_file);
> +                             close(fd);
> +                             goto cleanup;
> +                     }
> +
> +                     /*
> +                      * The number of records of buffer descriptors and 
> blocks
> +                      * should be the same.
> +                      */
> +                     if (oldNBuffers != NBuffers &&
> +                             oldNBuffers != (sb.st_size / record_length))
> +                     {
> +                             elog(WARNING,
> +                                     "size mismatch on the buffer cache 
> hibernation file: %s",
> +                                     
> BufferCacheHibernationData[id].hibernation_file);
> +                             close(fd);
> +                             goto cleanup;
> +                     }
> +                     
> +                     oldNBuffers = sb.st_size / record_length;
> +
> +                     elog(NOTICE,
> +                             "shared_buffers have changed from %d to %d: %s",
> +                             oldNBuffers, NBuffers,
> +                             
> BufferCacheHibernationData[id].hibernation_file);
> +
> +                     /* use the original size to compute CRC of the 
> hibernation file. */
> +                     num_records = oldNBuffers;
> +             }
> +
> +             if ((pg_time_t)sb.st_mtime < controlFile.time)
> +             {
> +                     elog(WARNING,
> +                             "the hibernation file is older than control 
> file: %s",
> +                             
> BufferCacheHibernationData[id].hibernation_file);
> +                     close(fd);
> +                     goto cleanup;
> +             }
> +
> +             INIT_CRC32(crc);
> +             for (i = 0; i < num_records; i++)
> +             {
> +                     if (read(fd, (void *)buf_common, record_length) != 
> record_length)
> +                     {
> +                             elog(WARNING,
> +                                     "could not read the buffer cache 
> hibernation file: %s",
> +                                     
> BufferCacheHibernationData[id].hibernation_file);
> +                             close(fd);
> +                             goto cleanup;
> +                     }
> +
> +                     COMP_CRC32(crc, buf_common, record_length);
> +
> +                     /*
> +                      * buffer descriptors validations.
> +                      */
> +                     if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS)
> +                     {
> +                             BufferDesc      *buf;
> +                             BufFlags        abnormal_flags;
> +
> +                             if (i >= NBuffers)
> +                             {
> +                                     continue;
> +                             }
> +
> +                             abnormal_flags = (BM_DIRTY | BM_IO_IN_PROGRESS 
> | BM_IO_ERROR |
> +                                                               
> BM_JUST_DIRTIED | BM_PIN_COUNT_WAITER);
> +
> +                             buf = (BufferDesc *)buf_common;
> +
> +                             if (buf->flags & abnormal_flags)
> +                             {
> +                                     elog(WARNING,
> +                                             "abnormal flags in buffer 
> descriptors: %d",
> +                                             buf->flags);
> +                                     close(fd);
> +                                     goto cleanup;
> +                             }
> +
> +                             if (buf->usage_count > BM_MAX_USAGE_COUNT)
> +                             {
> +                                     elog(WARNING,
> +                                             "invalid usage count in buffer 
> descriptors: %d",
> +                                             buf->usage_count);
> +                                     close(fd);
> +                                     goto cleanup;
> +                             }
> +
> +                             if (buf->buf_id < 0 || buf->buf_id >= 
> num_records)
> +                             {
> +                                     elog(WARNING,
> +                                             "invalid buffer id in buffer 
> descriptors: %d",
> +                                             buf->buf_id);
> +                                     close(fd);
> +                                     goto cleanup;
> +                             }
> +                     }
> +             }
> +
> +             FIN_CRC32(crc);
> +             close(fd);
> +
> +             if (!EQ_CRC32(BufferCacheHibernationData[id].crc, crc))
> +             {
> +                     elog(WARNING,
> +                             "crc mismatch on the buffer cache hibernation 
> file: %s",
> +                             
> BufferCacheHibernationData[id].hibernation_file);
> +                     close(fd);
> +                     goto cleanup;
> +             }
> +     }
> +
> +     /*
> +      * resume the buffer cache data structure from the hibernation files.
> +      */
> +     for (id = 0; BufferCacheHibernationData[id].hibernation_file != NULL; 
> id++)
> +     {
> +             int                     fd;
> +             char            *ptr;
> +
> +             if (BufferCacheHibernationLevel < 2 &&
> +                     id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +             {
> +                     continue;
> +             }
> +
> +             record_length = BufferCacheHibernationData[id].record_length;
> +             num_records = BufferCacheHibernationData[id].num_records;
> +
> +             if (id != BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY)
> +             {
> +                     /* use the smaller number of buffers. */
> +                     num_records = (oldNBuffers < NBuffers)? oldNBuffers : 
> NBuffers;
> +             }
> +
> +             fd = 
> BasicOpenFile(BufferCacheHibernationData[id].hibernation_file,
> +                             O_RDONLY | PG_BINARY, S_IRUSR | S_IWUSR);
> +             if (fd < 0)
> +             {
> +                     if (BufferCacheHibernationLevel == 2 &&
> +                             id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +                     {
> +                             /*
> +                              * if buffer_cache_hibernation_level changes 1 
> to 2,
> +                              * the buffer block hibernation file may not 
> exist.
> +                              * just ignore it here.
> +                              */
> +                             continue;
> +                     }
> +
> +                     goto cleanup;
> +             }
> +
> +             elog(NOTICE,
> +                     "buffer cache resume from %s(%d bytes * %d records)",
> +                     BufferCacheHibernationData[id].hibernation_file,
> +                     record_length, num_records);
> +
> +             for (i = 0; i < num_records; i++)
> +             {
> +                     ptr = BufferCacheHibernationData[id].data_ptr + (i * 
> record_length);
> +                     read(fd, (void *)ptr, record_length);
> +
> +                     /* Re-lock the buffer descriptor if necessary. */
> +                     if (id == BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS)
> +                     {
> +                             BufferDesc      *buf;
> +
> +                             buf = (BufferDesc *)ptr;
> +                             if (IsUnlockBufHdr(buf))
> +                             {
> +                                     LockBufHdr(buf);
> +                             }
> +                     }
> +             }
> +
> +             close(fd);
> +
> +             if (id == BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS)
> +             {
> +                     buffer_block_processed = true;
> +             }
> +     }
> +
> +     if (buffer_block_processed == false)
> +     {
> +             /* we didn't use the buffer block hibernation file, so delete 
> it now. */
> +             id = BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS;
> +             unlink(BufferCacheHibernationData[id].hibernation_file);
> +     }
> +
> +     /*
> +      * set the rest data structures (eg. lookup hashtable) up
> +      * based on the buffer descriptors.
> +      */
> +     num_records = (oldNBuffers < NBuffers)? oldNBuffers : NBuffers;
> +     for (i = 0; i < num_records; i++)
> +     {
> +             BufferDesc              *buf;
> +             BufferTag               newTag;
> +             uint32                  newHash;
> +             int                             buf_id;
> +
> +             buf = &BufferDescriptors[i];
> +             if (buf->tag.rnode.spcNode      == InvalidOid &&
> +                     buf->tag.rnode.dbNode   == InvalidOid &&
> +                     buf->tag.rnode.relNode  == InvalidOid)
> +             {
> +                     continue;
> +             }
> +
> +             INIT_BUFFERTAG(newTag, buf->tag.rnode, buf->tag.forkNum, 
> buf->tag.blockNum);
> +             newHash = BufTableHashCode(&newTag);
> +
> +             if (buffer_block_processed == false)
> +             {
> +                     Block                   bufBlock;
> +                     SMgrRelation    smgr;
> +
> +                     /*
> +                      * re-read buffer block.
> +                      */
> +                     bufBlock = BufHdrGetBlock(buf);
> +                     smgr = smgropen(buf->tag.rnode, InvalidBackendId);
> +                     smgrread(smgr, newTag.forkNum, newTag.blockNum, (char 
> *) bufBlock);
> +             }
> +
> +             buf_id = BufTableInsert(&newTag, newHash, buf->buf_id);
> +             if (buf_id != -1)
> +             {
> +                     /* the entry exists already, return it to the freelist. 
> */
> +                     buf->refcount = 0;
> +                     buf->flags = 0;
> +                     InvalidateBuffer(buf);
> +                     continue;
> +             }
> +
> +             /* clear wait_backend_pid because the process was terminated 
> already. */
> +             buf->wait_backend_pid = 0;
> +
> +#ifdef DEBUG_BUFFER_CACHE_HIBERNATION
> +             elog(DEBUG5,
> +                     "resume [%d]\t%03x,%d,%d,%d,%d\t%08x,%d,%d,%d,%d,%d",
> +                             buf->buf_id, buf->flags, buf->usage_count, 
> buf->refcount,
> +                             buf->wait_backend_pid, buf->freeNext,
> +                             newHash, newTag.rnode.spcNode,
> +                             newTag.rnode.dbNode, newTag.rnode.relNode,
> +                             newTag.forkNum, newTag.blockNum);
> +#endif
> +     }
> +
> +     /*
> +      * adjust StrategyControl based on the change of shared_buffers.
> +      */
> +     if (oldNBuffers != NBuffers)
> +     {
> +             AdjustStrategyControl(oldNBuffers);
> +     }
> +
> +     elog(NOTICE,
> +             "buffer cache resumed successfully");
> +
> +cleanup:
> +     for (i = 0; i < NBuffers; i++)
> +     {
> +             BufferDesc      *buf;
> +
> +             buf = &BufferDescriptors[i];
> +             UnlockBufHdr(buf);
> +     }
> +
> +     if (buf_common != NULL)
> +     {
> +             free(buf_common);
> +     }
> +
> +     return;
> +}
> diff --git src/backend/storage/buffer/freelist.c 
> src/backend/storage/buffer/freelist.c
> index bf9903b..ffc101d 100644
> --- src/backend/storage/buffer/freelist.c
> +++ src/backend/storage/buffer/freelist.c
> @@ -347,6 +347,12 @@ StrategyInitialize(bool init)
>       }
>       else
>               Assert(!init);
> +
> +     if (BufferCacheHibernationLevel > 0)
> +     {
> +             
> ResisterBufferCacheHibernation(BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY,
> +                     (char *)StrategyControl, sizeof(BufferStrategyControl), 
> 1);
> +     }
>  }
>  
>  
> @@ -521,3 +527,47 @@ StrategyRejectBuffer(BufferAccessStrategy strategy, 
> volatile BufferDesc *buf)
>  
>       return true;
>  }
> +
> +/*
> + * AdjustStrategyControl -- adjust the member variables of StrategyControl
> + *
> + * If the shared_buffers setting had changed, restored StrategyControl
> + * needs to be adjusted for in both cases of shrinking and enlarging.
> + * This is called only from bufmgr.c:ResumeBufferCacheHibernation().
> + */
> +void
> +AdjustStrategyControl(int oldNBuffers)
> +{
> +     if (oldNBuffers == NBuffers)
> +     {
> +             return;
> +     }
> +
> +     /* enlarge or shrink the free buffer based on current NBuffers. */
> +     StrategyControl->lastFreeBuffer = NBuffers - 1;
> +
> +     /* shared_buffers shrunk. */
> +     if (oldNBuffers > NBuffers)
> +     {
> +             if (StrategyControl->nextVictimBuffer >= NBuffers)
> +             {
> +                     /* set the tail of buffers. */
> +                     StrategyControl->nextVictimBuffer = NBuffers - 1;
> +             }
> +
> +             if (StrategyControl->firstFreeBuffer >= NBuffers)
> +             {
> +                     /* set FREENEXT_END_OF_LIST(-1). */
> +                     StrategyControl->firstFreeBuffer = FREENEXT_END_OF_LIST;
> +             }
> +     }
> +     else
> +     /* shared_buffers enlarged. */
> +     {
> +             if (StrategyControl->firstFreeBuffer < 0)
> +             {
> +                     /* set the next entry of the tail of old buffers. */
> +                     StrategyControl->firstFreeBuffer = oldNBuffers;
> +             }
> +     }
> +}
> diff --git src/backend/utils/misc/guc.c src/backend/utils/misc/guc.c
> index 738e215..5affc6e 100644
> --- src/backend/utils/misc/guc.c
> +++ src/backend/utils/misc/guc.c
> @@ -2361,6 +2361,18 @@ static struct config_int ConfigureNamesInt[] =
>               NULL, NULL, NULL
>       },
>  
> +     {
> +             {"buffer_cache_hibernation_level", PGC_POSTMASTER, UNGROUPED,
> +                     gettext_noop("Sets buffer cache hibernation level."),
> +                     gettext_noop("0 to disable(default), "
> +                                              "1 for saving buffer 
> descriptors only(recommended), "
> +                                              "2 for saving buffer 
> descriptors and buffer blocks(slower at shutdown).")
> +             },
> +             &BufferCacheHibernationLevel,
> +             0, 0, 2,
> +             NULL, NULL, NULL
> +     },
> +
>       /* End-of-list marker */
>       {
>               {NULL, 0, 0, NULL, NULL}, NULL, 0, 0, 0, NULL, NULL, NULL
> diff --git src/backend/utils/misc/postgresql.conf.sample 
> src/backend/utils/misc/postgresql.conf.sample
> index b8a1582..44b6ff3 100644
> --- src/backend/utils/misc/postgresql.conf.sample
> +++ src/backend/utils/misc/postgresql.conf.sample
> @@ -119,6 +119,17 @@
>  #maintenance_work_mem = 16MB         # min 1MB
>  #max_stack_depth = 2MB                       # min 100kB
>  
> +
> +# Buffer Cache Hibernation:
> +#  Suspend/resume buffer cache data structure using hibernation files
> +#  at shutdown/startup.
> +#buffer_cache_hibernation_level = 0  # Sets buffer cache hibernation level.
> +                                     # 0 to disable(default),
> +                                     # 1 for saving buffer descriptors only
> +                                     #   (recommended),
> +                                     # 2 for saving buffer descriptors and
> +                                     #   buffer blocks(slower at shutdown).
> +
>  # - Kernel Resource Usage -
>  
>  #max_files_per_process = 1000                # min 25
> diff --git src/include/access/xlog.h src/include/access/xlog.h
> index 7056fd6..7a9fb99 100644
> --- src/include/access/xlog.h
> +++ src/include/access/xlog.h
> @@ -13,6 +13,7 @@
>  
>  #include "access/rmgr.h"
>  #include "access/xlogdefs.h"
> +#include "catalog/pg_control.h"
>  #include "lib/stringinfo.h"
>  #include "storage/buf.h"
>  #include "utils/pg_crc.h"
> @@ -294,6 +295,7 @@ extern bool XLogInsertAllowed(void);
>  extern void GetXLogReceiptTime(TimestampTz *rtime, bool *fromStream);
>  extern XLogRecPtr GetXLogReplayRecPtr(void);
>  
> +extern bool GetControlFile(ControlFileData *controlFile);
>  extern void UpdateControlFile(void);
>  extern uint64 GetSystemIdentifier(void);
>  extern Size XLOGShmemSize(void);
> diff --git src/include/storage/buf_internals.h 
> src/include/storage/buf_internals.h
> index b7d4ea5..d537ef1 100644
> --- src/include/storage/buf_internals.h
> +++ src/include/storage/buf_internals.h
> @@ -167,6 +167,7 @@ typedef struct sbufdesc
>   */
>  #define LockBufHdr(bufHdr)           SpinLockAcquire(&(bufHdr)->buf_hdr_lock)
>  #define UnlockBufHdr(bufHdr) SpinLockRelease(&(bufHdr)->buf_hdr_lock)
> +#define IsUnlockBufHdr(bufHdr)       SpinLockFree(&(bufHdr)->buf_hdr_lock)
>  
>  
>  /* in buf_init.c */
> @@ -190,6 +191,7 @@ extern bool StrategyRejectBuffer(BufferAccessStrategy 
> strategy,
>  extern int   StrategySyncStart(uint32 *complete_passes, uint32 
> *num_buf_alloc);
>  extern Size StrategyShmemSize(void);
>  extern void StrategyInitialize(bool init);
> +extern void AdjustStrategyControl(int oldNBuffers);
>  
>  /* buf_table.c */
>  extern Size BufTableShmemSize(int size);
> diff --git src/include/storage/bufmgr.h src/include/storage/bufmgr.h
> index b8fc87e..ddfeb9d 100644
> --- src/include/storage/bufmgr.h
> +++ src/include/storage/bufmgr.h
> @@ -211,6 +211,20 @@ extern void BgBufferSync(void);
>  
>  extern void AtProcExit_LocalBuffers(void);
>  
> +/* buffer cache hibernation support stuff */
> +extern int   BufferCacheHibernationLevel;
> +
> +typedef enum BufferHibernationFileType
> +{   
> +    BUFFER_CACHE_HIBERNATION_TYPE_STRATEGY,
> +    BUFFER_CACHE_HIBERNATION_TYPE_DESCRIPTORS,
> +    BUFFER_CACHE_HIBERNATION_TYPE_BLOCKS
> +} BufferHibernationFileType;
> +
> +extern void ResisterBufferCacheHibernation(BufferHibernationFileType id,
> +                             char *ptr, Size record_length, Size 
> num_records);
> +extern void ResumeBufferCacheHibernation(void);
> +
>  /* in freelist.c */
>  extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType 
> btype);
>  extern void FreeAccessStrategy(BufferAccessStrategy strategy);
> 
> -- 
> Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
> To make changes to your subscription:
> http://www.postgresql.org/mailpref/pgsql-hackers

-- 
  Bruce Momjian  <br...@momjian.us>        http://momjian.us
  EnterpriseDB                             http://enterprisedb.com

  + It's impossible for everything to be true. +

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to