On 3.1.2013 20:33, Magnus Hagander wrote:
> On Thu, Jan 3, 2013 at 8:31 PM, Tomas Vondra <t...@fuzzy.cz> wrote:
>> On 3.1.2013 18:47, Heikki Linnakangas wrote:
>>> How about creating the new directory as a direct subdir of $PGDATA,
>>> rather than buried in global? "global" is supposed to contain data
>>> related to shared catalog relations (plus pg_control), so it doesn't
>>> seem like the right location for per-database stat files. Also, if we're
>>> going to have admins manually zapping the directory (hopefully when the
>>> system is offline), that's less scary if the directory is not buried as
>>> deep.
>>
>> That's clearly possible and it's a trivial change. I was thinking about
>> that actually, but then I placed the directory into "global" because
>> that's where the "pgstat.stat" originally was.
> 
> Yeah, +1 for a separate directory not in global.

OK, I moved the files from "global/stat" to "stat".

Tomas
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index be3adf1..4ec485e 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -64,10 +64,14 @@
 
 /* ----------
  * Paths for the statistics files (relative to installation's $PGDATA).
+ * Permanent and temprorary, global and per-database files.
  * ----------
  */
-#define PGSTAT_STAT_PERMANENT_FILENAME         "global/pgstat.stat"
-#define PGSTAT_STAT_PERMANENT_TMPFILE          "global/pgstat.tmp"
+#define PGSTAT_STAT_PERMANENT_DIRECTORY                "stat"
+#define PGSTAT_STAT_PERMANENT_FILENAME         "stat/global.stat"
+#define PGSTAT_STAT_PERMANENT_TMPFILE          "stat/global.tmp"
+#define PGSTAT_STAT_PERMANENT_DB_FILENAME      "stat/%d.stat"
+#define PGSTAT_STAT_PERMANENT_DB_TMPFILE       "stat/%d.tmp"
 
 /* ----------
  * Timer definitions.
@@ -115,8 +119,11 @@ int                        
pgstat_track_activity_query_size = 1024;
  * Built from GUC parameter
  * ----------
  */
+char      *pgstat_stat_directory = NULL;
 char      *pgstat_stat_filename = NULL;
 char      *pgstat_stat_tmpname = NULL;
+char      *pgstat_stat_db_filename = NULL;
+char      *pgstat_stat_db_tmpname = NULL;
 
 /*
  * BgWriter global statistics counters (unused in other processes).
@@ -219,11 +226,16 @@ static int        localNumBackends = 0;
  */
 static PgStat_GlobalStats globalStats;
 
-/* Last time the collector successfully wrote the stats file */
-static TimestampTz last_statwrite;
+/* Write request info for each database */
+typedef struct DBWriteRequest
+{
+       Oid                     databaseid;             /* OID of the database 
to write */
+       TimestampTz request_time;       /* timestamp of the last write request 
*/
+} DBWriteRequest;
 
-/* Latest statistics request time from backends */
-static TimestampTz last_statrequest;
+/* Latest statistics request time from backends for each DB */
+static DBWriteRequest * last_statrequests = NULL;
+static int num_statrequests = 0;
 
 static volatile bool need_exit = false;
 static volatile bool got_SIGHUP = false;
@@ -252,11 +264,17 @@ static void pgstat_sighup_handler(SIGNAL_ARGS);
 static PgStat_StatDBEntry *pgstat_get_db_entry(Oid databaseid, bool create);
 static PgStat_StatTabEntry *pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry,
                                         Oid tableoid, bool create);
-static void pgstat_write_statsfile(bool permanent);
-static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent);
+static void pgstat_write_statsfile(bool permanent, bool force);
+static void pgstat_write_db_statsfile(PgStat_StatDBEntry * dbentry, bool 
permanent);
+static void pgstat_write_db_dummyfile(Oid databaseid);
+static HTAB *pgstat_read_statsfile(Oid onlydb, bool permanent, bool onlydbs);
+static void pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB 
*funchash, bool permanent);
 static void backend_read_statsfile(void);
 static void pgstat_read_current_status(void);
 
+static bool pgstat_write_statsfile_needed();
+static bool pgstat_db_requested(Oid databaseid);
+
 static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
 static void pgstat_send_funcstats(void);
 static HTAB *pgstat_collect_oids(Oid catalogid);
@@ -285,7 +303,6 @@ static void 
pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int le
 static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
 static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
 
-
 /* ------------------------------------------------------------
  * Public functions called from postmaster follow
  * ------------------------------------------------------------
@@ -549,8 +566,34 @@ startup_failed:
 void
 pgstat_reset_all(void)
 {
-       unlink(pgstat_stat_filename);
-       unlink(PGSTAT_STAT_PERMANENT_FILENAME);
+       DIR * dir;
+       struct dirent * entry;
+
+       dir = AllocateDir(pgstat_stat_directory);
+       while ((entry = ReadDir(dir, pgstat_stat_directory)) != NULL)
+       {
+               char fname[strlen(pgstat_stat_directory) + 
strlen(entry->d_name) + 1];
+
+               if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, 
"..") == 0)
+                       continue;
+
+               sprintf(fname, "%s/%s", pgstat_stat_directory, entry->d_name);
+               unlink(fname);
+       }
+       FreeDir(dir);
+
+       dir = AllocateDir(PGSTAT_STAT_PERMANENT_DIRECTORY);
+       while ((entry = ReadDir(dir, PGSTAT_STAT_PERMANENT_DIRECTORY)) != NULL)
+       {
+               char fname[strlen(PGSTAT_STAT_PERMANENT_FILENAME) + 
strlen(entry->d_name) + 1];
+
+               if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, 
"..") == 0)
+                       continue;
+
+               sprintf(fname, "%s/%s", PGSTAT_STAT_PERMANENT_FILENAME, 
entry->d_name);
+               unlink(fname);
+       }
+       FreeDir(dir);
 }
 
 #ifdef EXEC_BACKEND
@@ -1408,13 +1451,14 @@ pgstat_ping(void)
  * ----------
  */
 static void
-pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time)
+pgstat_send_inquiry(TimestampTz clock_time, TimestampTz cutoff_time, Oid 
databaseid)
 {
        PgStat_MsgInquiry msg;
 
        pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_INQUIRY);
        msg.clock_time = clock_time;
        msg.cutoff_time = cutoff_time;
+       msg.databaseid = databaseid;
        pgstat_send(&msg, sizeof(msg));
 }
 
@@ -3004,6 +3048,7 @@ PgstatCollectorMain(int argc, char *argv[])
        int                     len;
        PgStat_Msg      msg;
        int                     wr;
+       bool            first_write = true;
 
        IsUnderPostmaster = true;       /* we are a postmaster subprocess now */
 
@@ -3053,17 +3098,11 @@ PgstatCollectorMain(int argc, char *argv[])
        init_ps_display("stats collector process", "", "", "");
 
        /*
-        * Arrange to write the initial status file right away
-        */
-       last_statrequest = GetCurrentTimestamp();
-       last_statwrite = last_statrequest - 1;
-
-       /*
         * Read in an existing statistics stats file or initialize the stats to
-        * zero.
+        * zero (read data for all databases, including table/func stats).
         */
        pgStatRunningInCollector = true;
-       pgStatDBHash = pgstat_read_statsfile(InvalidOid, true);
+       pgStatDBHash = pgstat_read_statsfile(InvalidOid, true, false);
 
        /*
         * Loop to process messages until we get SIGQUIT or detect ungraceful
@@ -3107,10 +3146,14 @@ PgstatCollectorMain(int argc, char *argv[])
 
                        /*
                         * Write the stats file if a new request has arrived 
that is not
-                        * satisfied by existing file.
+                        * satisfied by existing file (force writing all files 
if it's
+                        * the first write after startup).
                         */
-                       if (last_statwrite < last_statrequest)
-                               pgstat_write_statsfile(false);
+                       if (first_write || pgstat_write_statsfile_needed())
+                       {
+                               pgstat_write_statsfile(false, first_write);
+                               first_write = false;
+                       }
 
                        /*
                         * Try to receive and process a message.  This will not 
block,
@@ -3269,7 +3312,7 @@ PgstatCollectorMain(int argc, char *argv[])
        /*
         * Save the final stats to reuse at next startup.
         */
-       pgstat_write_statsfile(true);
+       pgstat_write_statsfile(true, true);
 
        exit(0);
 }
@@ -3429,23 +3472,25 @@ pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid 
tableoid, bool create)
  *     shutting down only), remove the temporary file so that backends
  *     starting up under a new postmaster can't read the old data before
  *     the new collector is ready.
+ * 
+ *     When the 'force' is false, only the requested databases (listed in
+ *     last_statrequests) will be written. If 'force' is true, all databases
+ *     will be written (this is used e.g. at shutdown).
  * ----------
  */
 static void
-pgstat_write_statsfile(bool permanent)
+pgstat_write_statsfile(bool permanent, bool force)
 {
        HASH_SEQ_STATUS hstat;
-       HASH_SEQ_STATUS tstat;
-       HASH_SEQ_STATUS fstat;
        PgStat_StatDBEntry *dbentry;
-       PgStat_StatTabEntry *tabentry;
-       PgStat_StatFuncEntry *funcentry;
        FILE       *fpout;
        int32           format_id;
        const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_TMPFILE : 
pgstat_stat_tmpname;
        const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : 
pgstat_stat_filename;
        int                     rc;
 
+       elog(DEBUG1, "writing statsfile '%s'", statfile);
+       
        /*
         * Open the statistics temp file to write out the current values.
         */
@@ -3484,6 +3529,20 @@ pgstat_write_statsfile(bool permanent)
        while ((dbentry = (PgStat_StatDBEntry *) hash_seq_search(&hstat)) != 
NULL)
        {
                /*
+                * Write our the tables and functions into a separate file, but 
only
+                * if the database is in the requests or if it's a forced write 
(then
+                * all the DBs need to be written - e.g. at the shutdown).
+                * 
+                * We need to do this before the dbentry write to write the 
proper
+                * timestamp to the global file.
+                */
+               if (force || pgstat_db_requested(dbentry->databaseid)) {
+                       elog(DEBUG1, "writing statsfile for DB %d", 
dbentry->databaseid);
+                       dbentry->stats_timestamp = globalStats.stats_timestamp;
+                       pgstat_write_db_statsfile(dbentry, permanent);
+               }
+
+               /*
                 * Write out the DB entry including the number of live 
backends. We
                 * don't write the tables or functions pointers, since they're 
of no
                 * use to any other process.
@@ -3493,29 +3552,10 @@ pgstat_write_statsfile(bool permanent)
                (void) rc;                              /* we'll check for 
error with ferror */
 
                /*
-                * Walk through the database's access stats per table.
-                */
-               hash_seq_init(&tstat, dbentry->tables);
-               while ((tabentry = (PgStat_StatTabEntry *) 
hash_seq_search(&tstat)) != NULL)
-               {
-                       fputc('T', fpout);
-                       rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, 
fpout);
-                       (void) rc;                      /* we'll check for 
error with ferror */
-               }
-
-               /*
-                * Walk through the database's function stats table.
-                */
-               hash_seq_init(&fstat, dbentry->functions);
-               while ((funcentry = (PgStat_StatFuncEntry *) 
hash_seq_search(&fstat)) != NULL)
-               {
-                       fputc('F', fpout);
-                       rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, 
fpout);
-                       (void) rc;                      /* we'll check for 
error with ferror */
-               }
-
-               /*
                 * Mark the end of this DB
+                * 
+                * TODO Does using these chars still make sense, when the 
tables/func
+                * stats are moved to a separate file?
                 */
                fputc('d', fpout);
        }
@@ -3527,6 +3567,28 @@ pgstat_write_statsfile(bool permanent)
         */
        fputc('E', fpout);
 
+       /* In any case, we can just throw away all the db requests, but we need 
to
+        * write dummy files for databases without a stat entry (it would cause
+        * issues in pgstat_read_db_statsfile_timestamp and pgstat wait 
timeouts).
+        * This may happend e.g. for shared DB (oid = 0) right after initdb.
+        */
+       if (last_statrequests != NULL)
+       {
+               int i = 0;
+               for (i = 0; i < num_statrequests; i++)
+               {
+                       /* Create dummy files for requested databases without a 
proper
+                        * dbentry. It's much easier this way than dealing with 
multiple
+                        * timestamps, possibly existing but not yet written 
DBs etc. */
+                       if (! 
pgstat_get_db_entry(last_statrequests[i].databaseid, false))
+                               
pgstat_write_db_dummyfile(last_statrequests[i].databaseid);
+               }
+
+               pfree(last_statrequests);
+               last_statrequests = NULL;
+               num_statrequests = 0;
+       }
+
        if (ferror(fpout))
        {
                ereport(LOG,
@@ -3552,57 +3614,247 @@ pgstat_write_statsfile(bool permanent)
                                                tmpfile, statfile)));
                unlink(tmpfile);
        }
-       else
+
+       if (permanent)
+               unlink(pgstat_stat_filename);
+}
+
+
+/* ----------
+ * pgstat_write_db_statsfile() -
+ *
+ *     Tell the news. This writes stats file for a single database.
+ *
+ *     If writing to the permanent file (happens when the collector is
+ *     shutting down only), remove the temporary file so that backends
+ *     starting up under a new postmaster can't read the old data before
+ *     the new collector is ready.
+ * ----------
+ */
+static void
+pgstat_write_db_statsfile(PgStat_StatDBEntry * dbentry, bool permanent)
+{
+       HASH_SEQ_STATUS tstat;
+       HASH_SEQ_STATUS fstat;
+       PgStat_StatTabEntry *tabentry;
+       PgStat_StatFuncEntry *funcentry;
+       FILE       *fpout;
+       int32           format_id;
+       const char *tmpfile = permanent ? PGSTAT_STAT_PERMANENT_DB_TMPFILE : 
pgstat_stat_db_tmpname;
+       const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_DB_FILENAME : 
pgstat_stat_db_filename;
+       int                     rc;
+
+       /*
+        * OIDs are 32-bit values, so 10 chars should be safe, +1 for the \0 
byte
+        */
+       char db_tmpfile[strlen(tmpfile) + 11];
+       char db_statfile[strlen(statfile) + 11];
+
+       /*
+        * Append database OID at the end of the basic filename (both for tmp 
and target file).
+        */
+       snprintf(db_tmpfile, strlen(tmpfile) + 11, tmpfile, 
dbentry->databaseid);
+       snprintf(db_statfile, strlen(statfile) + 11, statfile, 
dbentry->databaseid);
+
+       elog(DEBUG1, "writing statsfile '%s'", db_statfile);
+
+       /*
+        * Open the statistics temp file to write out the current values.
+        */
+       fpout = AllocateFile(db_tmpfile, PG_BINARY_W);
+       if (fpout == NULL)
        {
-               /*
-                * Successful write, so update last_statwrite.
-                */
-               last_statwrite = globalStats.stats_timestamp;
+               ereport(LOG,
+                               (errcode_for_file_access(),
+                                errmsg("could not open temporary statistics 
file \"%s\": %m",
+                                               db_tmpfile)));
+               return;
+       }
 
-               /*
-                * If there is clock skew between backends and the collector, 
we could
-                * receive a stats request time that's in the future.  If so, 
complain
-                * and reset last_statrequest.  Resetting ensures that no 
inquiry
-                * message can cause more than one stats file write to occur.
-                */
-               if (last_statrequest > last_statwrite)
-               {
-                       char       *reqtime;
-                       char       *mytime;
-
-                       /* Copy because timestamptz_to_str returns a static 
buffer */
-                       reqtime = pstrdup(timestamptz_to_str(last_statrequest));
-                       mytime = pstrdup(timestamptz_to_str(last_statwrite));
-                       elog(LOG, "last_statrequest %s is later than 
collector's time %s",
-                                reqtime, mytime);
-                       pfree(reqtime);
-                       pfree(mytime);
-
-                       last_statrequest = last_statwrite;
-               }
+       /*
+        * Write the file header --- currently just a format ID.
+        */
+       format_id = PGSTAT_FILE_FORMAT_ID;
+       rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
+       (void) rc;                                      /* we'll check for 
error with ferror */
+
+       /*
+        * Write the timestamp.
+        */
+       rc = fwrite(&(globalStats.stats_timestamp), 
sizeof(globalStats.stats_timestamp), 1, fpout);
+       (void) rc;                                      /* we'll check for 
error with ferror */
+
+       /*
+        * Walk through the database's access stats per table.
+        */
+       hash_seq_init(&tstat, dbentry->tables);
+       while ((tabentry = (PgStat_StatTabEntry *) hash_seq_search(&tstat)) != 
NULL)
+       {
+               fputc('T', fpout);
+               rc = fwrite(tabentry, sizeof(PgStat_StatTabEntry), 1, fpout);
+               (void) rc;                      /* we'll check for error with 
ferror */
        }
 
+       /*
+        * Walk through the database's function stats table.
+        */
+       hash_seq_init(&fstat, dbentry->functions);
+       while ((funcentry = (PgStat_StatFuncEntry *) hash_seq_search(&fstat)) 
!= NULL)
+       {
+               fputc('F', fpout);
+               rc = fwrite(funcentry, sizeof(PgStat_StatFuncEntry), 1, fpout);
+               (void) rc;                      /* we'll check for error with 
ferror */
+       }
+
+       /*
+        * No more output to be done. Close the temp file and replace the old
+        * pgstat.stat with it.  The ferror() check replaces testing for error
+        * after each individual fputc or fwrite above.
+        */
+       fputc('E', fpout);
+
+       if (ferror(fpout))
+       {
+               ereport(LOG,
+                               (errcode_for_file_access(),
+                          errmsg("could not write temporary statistics file 
\"%s\": %m",
+                                         db_tmpfile)));
+               FreeFile(fpout);
+               unlink(db_tmpfile);
+       }
+       else if (FreeFile(fpout) < 0)
+       {
+               ereport(LOG,
+                               (errcode_for_file_access(),
+                          errmsg("could not close temporary statistics file 
\"%s\": %m",
+                                         db_tmpfile)));
+               unlink(db_tmpfile);
+       }
+       else if (rename(db_tmpfile, db_statfile) < 0)
+       {
+               ereport(LOG,
+                               (errcode_for_file_access(),
+                                errmsg("could not rename temporary statistics 
file \"%s\" to \"%s\": %m",
+                                               db_tmpfile, db_statfile)));
+               unlink(db_tmpfile);
+       }
+       
        if (permanent)
-               unlink(pgstat_stat_filename);
+       {
+               char db_statfile[strlen(pgstat_stat_db_filename) + 11];
+               snprintf(db_statfile, strlen(pgstat_stat_db_filename) + 11,
+                                pgstat_stat_db_filename, dbentry->databaseid);
+               elog(DEBUG1, "removing temporary stat file '%s'", db_statfile);
+               unlink(db_statfile);
+       }
 }
 
 
 /* ----------
+ * pgstat_write_db_dummyfile() -
+ *
+ *     All this does is writing a dummy stat file for databases without dbentry
+ *     yet. It basically writes just a file header - format ID and a timestamp.
+ * ----------
+ */
+static void
+pgstat_write_db_dummyfile(Oid databaseid)
+{
+       FILE       *fpout;
+       int32           format_id;
+       int                     rc;
+
+       /*
+        * OIDs are 32-bit values, so 10 chars should be safe, +1 for the \0 
byte
+        */
+       char db_tmpfile[strlen(pgstat_stat_db_tmpname) + 11];
+       char db_statfile[strlen(pgstat_stat_db_filename) + 11];
+
+       /*
+        * Append database OID at the end of the basic filename (both for tmp 
and target file).
+        */
+       snprintf(db_tmpfile, strlen(pgstat_stat_db_tmpname) + 11, 
pgstat_stat_db_tmpname, databaseid);
+       snprintf(db_statfile, strlen(pgstat_stat_db_filename) + 11, 
pgstat_stat_db_filename, databaseid);
+
+       elog(DEBUG1, "writing statsfile '%s'", db_statfile);
+
+       /*
+        * Open the statistics temp file to write out the current values.
+        */
+       fpout = AllocateFile(db_tmpfile, PG_BINARY_W);
+       if (fpout == NULL)
+       {
+               ereport(LOG,
+                               (errcode_for_file_access(),
+                                errmsg("could not open temporary statistics 
file \"%s\": %m",
+                                               db_tmpfile)));
+               return;
+       }
+
+       /*
+        * Write the file header --- currently just a format ID.
+        */
+       format_id = PGSTAT_FILE_FORMAT_ID;
+       rc = fwrite(&format_id, sizeof(format_id), 1, fpout);
+       (void) rc;                                      /* we'll check for 
error with ferror */
+
+       /*
+        * Write the timestamp.
+        */
+       rc = fwrite(&(globalStats.stats_timestamp), 
sizeof(globalStats.stats_timestamp), 1, fpout);
+       (void) rc;                                      /* we'll check for 
error with ferror */
+
+       /*
+        * No more output to be done. Close the temp file and replace the old
+        * pgstat.stat with it.  The ferror() check replaces testing for error
+        * after each individual fputc or fwrite above.
+        */
+       fputc('E', fpout);
+
+       if (ferror(fpout))
+       {
+               ereport(LOG,
+                               (errcode_for_file_access(),
+                          errmsg("could not write temporary dummy statistics 
file \"%s\": %m",
+                                         db_tmpfile)));
+               FreeFile(fpout);
+               unlink(db_tmpfile);
+       }
+       else if (FreeFile(fpout) < 0)
+       {
+               ereport(LOG,
+                               (errcode_for_file_access(),
+                          errmsg("could not close temporary dummy statistics 
file \"%s\": %m",
+                                         db_tmpfile)));
+               unlink(db_tmpfile);
+       }
+       else if (rename(db_tmpfile, db_statfile) < 0)
+       {
+               ereport(LOG,
+                               (errcode_for_file_access(),
+                                errmsg("could not rename temporary dummy 
statistics file \"%s\" to \"%s\": %m",
+                                               db_tmpfile, db_statfile)));
+               unlink(db_tmpfile);
+       }
+
+}
+
+/* ----------
  * pgstat_read_statsfile() -
  *
  *     Reads in an existing statistics collector file and initializes the
  *     databases' hash table (whose entries point to the tables' hash tables).
+ * 
+ *     Allows reading only the global stats (at database level), which is just
+ *     enough for many purposes (e.g. autovacuum launcher etc.). If this is
+ *     sufficient for you, use onlydbs=true.
  * ----------
  */
 static HTAB *
-pgstat_read_statsfile(Oid onlydb, bool permanent)
+pgstat_read_statsfile(Oid onlydb, bool permanent, bool onlydbs)
 {
        PgStat_StatDBEntry *dbentry;
        PgStat_StatDBEntry dbbuf;
-       PgStat_StatTabEntry *tabentry;
-       PgStat_StatTabEntry tabbuf;
-       PgStat_StatFuncEntry funcbuf;
-       PgStat_StatFuncEntry *funcentry;
        HASHCTL         hash_ctl;
        HTAB       *dbhash;
        HTAB       *tabhash = NULL;
@@ -3613,6 +3865,11 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
        const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : 
pgstat_stat_filename;
 
        /*
+        * If we want a db-level stats only, we don't want a particular db.
+        */
+       Assert(!((onlydb != InvalidOid) && onlydbs));
+
+       /*
         * The tables will live in pgStatLocalContext.
         */
        pgstat_setup_memcxt();
@@ -3758,6 +4015,16 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
                                 */
                                tabhash = dbentry->tables;
                                funchash = dbentry->functions;
+
+                               /*
+                                * Read the data from the file for this 
database. If there was
+                                * onlydb specified (!= InvalidOid), we would 
not get here because
+                                * of a break above. So we don't need to 
recheck.
+                                */
+                               if (! onlydbs)
+                                       
pgstat_read_db_statsfile(dbentry->databaseid, tabhash, funchash,
+                                                                               
        permanent);
+
                                break;
 
                                /*
@@ -3768,6 +4035,105 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
                                funchash = NULL;
                                break;
 
+                       case 'E':
+                               goto done;
+
+                       default:
+                               ereport(pgStatRunningInCollector ? LOG : 
WARNING,
+                                               (errmsg("corrupted statistics 
file \"%s\"",
+                                                               statfile)));
+                               goto done;
+               }
+       }
+
+done:
+       FreeFile(fpin);
+
+       if (permanent)
+               unlink(PGSTAT_STAT_PERMANENT_FILENAME);
+
+       return dbhash;
+}
+
+
+/* ----------
+ * pgstat_read_db_statsfile() -
+ *
+ *     Reads in an existing statistics collector db file and initializes the
+ *     tables and functions hash tables (for the database identified by Oid).
+ * ----------
+ */
+static void
+pgstat_read_db_statsfile(Oid databaseid, HTAB *tabhash, HTAB *funchash, bool 
permanent)
+{
+       PgStat_StatTabEntry *tabentry;
+       PgStat_StatTabEntry tabbuf;
+       PgStat_StatFuncEntry funcbuf;
+       PgStat_StatFuncEntry *funcentry;
+       FILE       *fpin;
+       int32           format_id;
+       TimestampTz timestamp;
+       bool            found;
+       const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_DB_FILENAME : 
pgstat_stat_db_filename;
+
+       /*
+        * OIDs are 32-bit values, so 10 chars should be safe, +1 for the \0 
byte
+        */
+       char db_statfile[strlen(statfile) + 11];
+
+       /*
+        * Append database OID at the end of the basic filename (both for tmp 
and target file).
+        */
+       snprintf(db_statfile, strlen(statfile) + 11, statfile, databaseid);
+
+       /*
+        * Try to open the status file. If it doesn't exist, the backends simply
+        * return zero for anything and the collector simply starts from scratch
+        * with empty counters.
+        *
+        * ENOENT is a possibility if the stats collector is not running or has
+        * not yet written the stats file the first time.  Any other failure
+        * condition is suspicious.
+        */
+       if ((fpin = AllocateFile(db_statfile, PG_BINARY_R)) == NULL)
+       {
+               if (errno != ENOENT)
+                       ereport(pgStatRunningInCollector ? LOG : WARNING,
+                                       (errcode_for_file_access(),
+                                        errmsg("could not open statistics file 
\"%s\": %m",
+                                                       db_statfile)));
+               return;
+       }
+
+       /*
+        * Verify it's of the expected format.
+        */
+       if (fread(&format_id, 1, sizeof(format_id), fpin) != sizeof(format_id)
+               || format_id != PGSTAT_FILE_FORMAT_ID)
+       {
+               ereport(pgStatRunningInCollector ? LOG : WARNING,
+                               (errmsg("corrupted statistics file \"%s\"", 
db_statfile)));
+               goto done;
+       }
+
+       /*
+        * Read global stats struct
+        */
+       if (fread(&timestamp, 1, sizeof(timestamp), fpin) != sizeof(timestamp))
+       {
+               ereport(pgStatRunningInCollector ? LOG : WARNING,
+                               (errmsg("corrupted statistics file \"%s\"", 
db_statfile)));
+               goto done;
+       }
+
+       /*
+        * We found an existing collector stats file. Read it and put all the
+        * hashtable entries into place.
+        */
+       for (;;)
+       {
+               switch (fgetc(fpin))
+               {
                                /*
                                 * 'T'  A PgStat_StatTabEntry follows.
                                 */
@@ -3777,7 +4143,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
                                {
                                        ereport(pgStatRunningInCollector ? LOG 
: WARNING,
                                                        (errmsg("corrupted 
statistics file \"%s\"",
-                                                                       
statfile)));
+                                                                       
db_statfile)));
                                        goto done;
                                }
 
@@ -3795,7 +4161,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
                                {
                                        ereport(pgStatRunningInCollector ? LOG 
: WARNING,
                                                        (errmsg("corrupted 
statistics file \"%s\"",
-                                                                       
statfile)));
+                                                                       
db_statfile)));
                                        goto done;
                                }
 
@@ -3811,7 +4177,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
                                {
                                        ereport(pgStatRunningInCollector ? LOG 
: WARNING,
                                                        (errmsg("corrupted 
statistics file \"%s\"",
-                                                                       
statfile)));
+                                                                       
db_statfile)));
                                        goto done;
                                }
 
@@ -3829,7 +4195,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
                                {
                                        ereport(pgStatRunningInCollector ? LOG 
: WARNING,
                                                        (errmsg("corrupted 
statistics file \"%s\"",
-                                                                       
statfile)));
+                                                                       
db_statfile)));
                                        goto done;
                                }
 
@@ -3845,7 +4211,7 @@ pgstat_read_statsfile(Oid onlydb, bool permanent)
                        default:
                                ereport(pgStatRunningInCollector ? LOG : 
WARNING,
                                                (errmsg("corrupted statistics 
file \"%s\"",
-                                                               statfile)));
+                                                               db_statfile)));
                                goto done;
                }
        }
@@ -3854,37 +4220,47 @@ done:
        FreeFile(fpin);
 
        if (permanent)
-               unlink(PGSTAT_STAT_PERMANENT_FILENAME);
+       {
+               char db_statfile[strlen(PGSTAT_STAT_PERMANENT_DB_FILENAME) + 
11];
+               snprintf(db_statfile, strlen(PGSTAT_STAT_PERMANENT_DB_FILENAME) 
+ 11,
+                                PGSTAT_STAT_PERMANENT_DB_FILENAME, databaseid);
+               elog(DEBUG1, "removing permanent stats file '%s'", db_statfile);
+               unlink(db_statfile);
+       }
 
-       return dbhash;
+       return;
 }
 
 /* ----------
- * pgstat_read_statsfile_timestamp() -
+ * pgstat_read_db_statsfile_timestamp() -
  *
- *     Attempt to fetch the timestamp of an existing stats file.
+ *     Attempt to fetch the timestamp of an existing stats file (for a DB).
  *     Returns TRUE if successful (timestamp is stored at *ts).
  * ----------
  */
 static bool
-pgstat_read_statsfile_timestamp(bool permanent, TimestampTz *ts)
+pgstat_read_db_statsfile_timestamp(Oid databaseid, bool permanent, TimestampTz 
*ts)
 {
-       PgStat_GlobalStats myGlobalStats;
+       TimestampTz timestamp;
        FILE       *fpin;
        int32           format_id;
-       const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_FILENAME : 
pgstat_stat_filename;
+       const char *statfile = permanent ? PGSTAT_STAT_PERMANENT_DB_FILENAME : 
pgstat_stat_db_filename;
+       char db_statfile[strlen(statfile) + 11];
+
+       /* format the db statfile filename */
+       snprintf(db_statfile, strlen(statfile) + 11, statfile, databaseid);
 
        /*
         * Try to open the status file.  As above, anything but ENOENT is worthy
         * of complaining about.
         */
-       if ((fpin = AllocateFile(statfile, PG_BINARY_R)) == NULL)
+       if ((fpin = AllocateFile(db_statfile, PG_BINARY_R)) == NULL)
        {
                if (errno != ENOENT)
                        ereport(pgStatRunningInCollector ? LOG : WARNING,
                                        (errcode_for_file_access(),
                                         errmsg("could not open statistics file 
\"%s\": %m",
-                                                       statfile)));
+                                                       db_statfile)));
                return false;
        }
 
@@ -3895,7 +4271,7 @@ pgstat_read_statsfile_timestamp(bool permanent, 
TimestampTz *ts)
                || format_id != PGSTAT_FILE_FORMAT_ID)
        {
                ereport(pgStatRunningInCollector ? LOG : WARNING,
-                               (errmsg("corrupted statistics file \"%s\"", 
statfile)));
+                               (errmsg("corrupted statistics file \"%s\"", 
db_statfile)));
                FreeFile(fpin);
                return false;
        }
@@ -3903,15 +4279,15 @@ pgstat_read_statsfile_timestamp(bool permanent, 
TimestampTz *ts)
        /*
         * Read global stats struct
         */
-       if (fread(&myGlobalStats, 1, sizeof(myGlobalStats), fpin) != 
sizeof(myGlobalStats))
+       if (fread(&timestamp, 1, sizeof(TimestampTz), fpin) != 
sizeof(TimestampTz))
        {
                ereport(pgStatRunningInCollector ? LOG : WARNING,
-                               (errmsg("corrupted statistics file \"%s\"", 
statfile)));
+                               (errmsg("corrupted statistics file \"%s\"", 
db_statfile)));
                FreeFile(fpin);
                return false;
        }
 
-       *ts = myGlobalStats.stats_timestamp;
+       *ts = timestamp;
 
        FreeFile(fpin);
        return true;
@@ -3947,7 +4323,7 @@ backend_read_statsfile(void)
 
                CHECK_FOR_INTERRUPTS();
 
-               ok = pgstat_read_statsfile_timestamp(false, &file_ts);
+               ok = pgstat_read_db_statsfile_timestamp(MyDatabaseId, false, 
&file_ts);
 
                cur_ts = GetCurrentTimestamp();
                /* Calculate min acceptable timestamp, if we didn't already */
@@ -4006,7 +4382,7 @@ backend_read_statsfile(void)
                                pfree(mytime);
                        }
 
-                       pgstat_send_inquiry(cur_ts, min_ts);
+                       pgstat_send_inquiry(cur_ts, min_ts, MyDatabaseId);
                        break;
                }
 
@@ -4016,7 +4392,7 @@ backend_read_statsfile(void)
 
                /* Not there or too old, so kick the collector and wait a bit */
                if ((count % PGSTAT_INQ_LOOP_COUNT) == 0)
-                       pgstat_send_inquiry(cur_ts, min_ts);
+                       pgstat_send_inquiry(cur_ts, min_ts, MyDatabaseId);
 
                pg_usleep(PGSTAT_RETRY_DELAY * 1000L);
        }
@@ -4026,9 +4402,16 @@ backend_read_statsfile(void)
 
        /* Autovacuum launcher wants stats about all databases */
        if (IsAutoVacuumLauncherProcess())
-               pgStatDBHash = pgstat_read_statsfile(InvalidOid, false);
+               /* 
+                * FIXME Does it really need info including tables/functions? 
Or is it enough to read
+                * database-level stats? It seems to me the launcher needs 
PgStat_StatDBEntry only
+                * (at least that's how I understand the 
rebuild_database_list() in autovacuum.c),
+                * because pgstat_stattabentries are used in do_autovacuum() 
only, that that's what's
+                * executed in workers ... So maybe we'd be just fine by 
reading in the dbentries?
+                */
+               pgStatDBHash = pgstat_read_statsfile(InvalidOid, false, true);
        else
-               pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false);
+               pgStatDBHash = pgstat_read_statsfile(MyDatabaseId, false, 
false);
 }
 
 
@@ -4084,44 +4467,84 @@ pgstat_clear_snapshot(void)
 static void
 pgstat_recv_inquiry(PgStat_MsgInquiry *msg, int len)
 {
-       /*
-        * Advance last_statrequest if this requestor has a newer cutoff time
-        * than any previous request.
-        */
-       if (msg->cutoff_time > last_statrequest)
-               last_statrequest = msg->cutoff_time;
+       int i = 0;
+       bool found = false;
+       PgStat_StatDBEntry *dbentry;
+
+       elog(DEBUG1, "received inquiry for %d", msg->databaseid);
 
        /*
-        * If the requestor's local clock time is older than last_statwrite, we
-        * should suspect a clock glitch, ie system time going backwards; though
-        * the more likely explanation is just delayed message receipt.  It is
-        * worth expending a GetCurrentTimestamp call to be sure, since a large
-        * retreat in the system clock reading could otherwise cause us to 
neglect
-        * to update the stats file for a long time.
+        * Find the last write request for this DB (found=true in that case). 
Plain
+        * linear search, not really worth doing any magic here (probably).
         */
-       if (msg->clock_time < last_statwrite)
+       for (i = 0; i < num_statrequests; i++)
+       {
+               if (last_statrequests[i].databaseid == msg->databaseid)
+               {
+                       found = true;
+                       break;
+               }
+       }
+       
+       if (found)
+       {
+               /*
+                * There already is a request for this DB, so lets advance the
+                * request time  if this requestor has a newer cutoff time
+                * than any previous request.
+                */
+               if (msg->cutoff_time > last_statrequests[i].request_time)
+                       last_statrequests[i].request_time = msg->cutoff_time;
+       }
+       else
        {
-               TimestampTz cur_ts = GetCurrentTimestamp();
+               /*
+                * There's no request for this DB yet, so lets create it 
(allocate a
+                * space for it, set the values).
+                */
+               if (last_statrequests == NULL)
+                       last_statrequests = palloc(sizeof(DBWriteRequest));
+               else
+                       last_statrequests = repalloc(last_statrequests,
+                                                               
(num_statrequests + 1)*sizeof(DBWriteRequest));
+               
+               last_statrequests[num_statrequests].databaseid = 
msg->databaseid;
+               last_statrequests[num_statrequests].request_time = 
msg->clock_time;
+               num_statrequests += 1;
 
-               if (cur_ts < last_statwrite)
+               /*
+               * If the requestor's local clock time is older than 
last_statwrite, we
+               * should suspect a clock glitch, ie system time going 
backwards; though
+               * the more likely explanation is just delayed message receipt.  
It is
+               * worth expending a GetCurrentTimestamp call to be sure, since 
a large
+               * retreat in the system clock reading could otherwise cause us 
to neglect
+               * to update the stats file for a long time.
+               */
+               dbentry = pgstat_get_db_entry(msg->databaseid, false);
+               if ((dbentry != NULL) && (msg->clock_time < 
dbentry->stats_timestamp))
                {
-                       /*
-                        * Sure enough, time went backwards.  Force a new stats 
file write
-                        * to get back in sync; but first, log a complaint.
-                        */
-                       char       *writetime;
-                       char       *mytime;
-
-                       /* Copy because timestamptz_to_str returns a static 
buffer */
-                       writetime = pstrdup(timestamptz_to_str(last_statwrite));
-                       mytime = pstrdup(timestamptz_to_str(cur_ts));
-                       elog(LOG, "last_statwrite %s is later than collector's 
time %s",
-                                writetime, mytime);
-                       pfree(writetime);
-                       pfree(mytime);
-
-                       last_statrequest = cur_ts;
-                       last_statwrite = last_statrequest - 1;
+                       TimestampTz cur_ts = GetCurrentTimestamp();
+
+                       if (cur_ts < dbentry->stats_timestamp)
+                       {
+                               /*
+                               * Sure enough, time went backwards.  Force a 
new stats file write
+                               * to get back in sync; but first, log a 
complaint.
+                               */
+                               char       *writetime;
+                               char       *mytime;
+
+                               /* Copy because timestamptz_to_str returns a 
static buffer */
+                               writetime = 
pstrdup(timestamptz_to_str(dbentry->stats_timestamp));
+                               mytime = pstrdup(timestamptz_to_str(cur_ts));
+                               elog(LOG, "last_statwrite %s is later than 
collector's time %s for "
+                                       "db %d", writetime, mytime, 
dbentry->databaseid);
+                               pfree(writetime);
+                               pfree(mytime);
+
+                               
last_statrequests[num_statrequests].request_time = cur_ts;
+                               dbentry->stats_timestamp = cur_ts - 1;
+                       }
                }
        }
 }
@@ -4278,10 +4701,17 @@ pgstat_recv_dropdb(PgStat_MsgDropdb *msg, int len)
        dbentry = pgstat_get_db_entry(msg->m_databaseid, false);
 
        /*
-        * If found, remove it.
+        * If found, remove it (along with the db statfile).
         */
        if (dbentry)
        {
+               char db_statfile[strlen(pgstat_stat_db_filename) + 11];
+               snprintf(db_statfile, strlen(pgstat_stat_db_filename) + 11,
+                                pgstat_stat_filename, dbentry->databaseid);
+               
+               elog(DEBUG1, "removing %s", db_statfile);
+               unlink(db_statfile);
+               
                if (dbentry->tables != NULL)
                        hash_destroy(dbentry->tables);
                if (dbentry->functions != NULL)
@@ -4687,3 +5117,58 @@ pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len)
                                                   HASH_REMOVE, NULL);
        }
 }
+
+/* ----------
+ * pgstat_write_statsfile_needed() -
+ *
+ *     Checks whether there's a db stats request, requiring a file write.
+ * 
+ *     TODO Seems that thanks the way we handle last_statrequests (erase after
+ *     a write), this is unnecessary. Just check that there's at least one
+ *     request and you're done. Although there might be delayed requests ...
+ * ----------
+ */
+
+static bool pgstat_write_statsfile_needed()
+{
+       int i = 0;
+       PgStat_StatDBEntry *dbentry;
+       
+       /* Check the databases if they need to refresh the stats. */
+       for (i = 0; i < num_statrequests; i++)
+       {
+               dbentry = pgstat_get_db_entry(last_statrequests[i].databaseid, 
false);
+               
+               /* No dbentry yet or too old. */
+               if ((! dbentry) ||
+                       (dbentry->stats_timestamp < 
last_statrequests[i].request_time)) {
+                       return true;
+               }
+               
+       }
+       
+       /* Well, everything was written recently ... */
+       return false;
+}
+
+/* ----------
+ * pgstat_write_statsfile_needed() -
+ *
+ *     Checks whether stats for a particular DB need to be written to a file).
+ * ----------
+ */
+
+static bool
+pgstat_db_requested(Oid databaseid)
+{
+       int i = 0;
+       
+       /* Check the databases if they need to refresh the stats. */
+       for (i = 0; i < num_statrequests; i++)
+       {
+               if (last_statrequests[i].databaseid == databaseid)
+                       return true;
+       }
+       
+       return false;
+}
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 2cf34ce..e3e432b 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -8730,20 +8730,43 @@ static void
 assign_pgstat_temp_directory(const char *newval, void *extra)
 {
        /* check_canonical_path already canonicalized newval for us */
+       char       *dname;
        char       *tname;
        char       *fname;
-
-       tname = guc_malloc(ERROR, strlen(newval) + 12);         /* /pgstat.tmp 
*/
-       sprintf(tname, "%s/pgstat.tmp", newval);
-       fname = guc_malloc(ERROR, strlen(newval) + 13);         /* /pgstat.stat 
*/
-       sprintf(fname, "%s/pgstat.stat", newval);
-
+       char       *tname_db;
+       char       *fname_db;
+
+       /* directory */
+       dname = guc_malloc(ERROR, strlen(newval) + 1);          /* runtime dir 
*/
+       sprintf(dname, "%s", newval);
+
+       /* global stats */
+       tname = guc_malloc(ERROR, strlen(newval) + 12);         /* /global.tmp 
*/
+       sprintf(tname, "%s/global.tmp", newval);
+       fname = guc_malloc(ERROR, strlen(newval) + 13);         /* /global.stat 
*/
+       sprintf(fname, "%s/global.stat", newval);
+
+       /* per-db stats */
+       tname_db = guc_malloc(ERROR, strlen(newval) + 8);               /* 
/%d.tmp */
+       sprintf(tname_db, "%s/%%d.tmp", newval);
+       fname_db = guc_malloc(ERROR, strlen(newval) + 9);               /* 
/%d.stat */
+       sprintf(fname_db, "%s/%%d.stat", newval);
+
+       if (pgstat_stat_directory)
+               free(pgstat_stat_directory);
+       pgstat_stat_directory = dname;
        if (pgstat_stat_tmpname)
                free(pgstat_stat_tmpname);
        pgstat_stat_tmpname = tname;
        if (pgstat_stat_filename)
                free(pgstat_stat_filename);
        pgstat_stat_filename = fname;
+       if (pgstat_stat_db_tmpname)
+               free(pgstat_stat_db_tmpname);
+       pgstat_stat_db_tmpname = tname_db;
+       if (pgstat_stat_db_filename)
+               free(pgstat_stat_db_filename);
+       pgstat_stat_db_filename = fname_db;
 }
 
 static bool
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 3e05ac3..a8a2639 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -179,6 +179,7 @@ char           *restrict_env;
 #endif
 const char *subdirs[] = {
        "global",
+       "stat",
        "pg_xlog",
        "pg_xlog/archive_status",
        "pg_clog",
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 613c1c2..b3467d2 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -205,6 +205,7 @@ typedef struct PgStat_MsgInquiry
        PgStat_MsgHdr m_hdr;
        TimestampTz clock_time;         /* observed local clock time */
        TimestampTz cutoff_time;        /* minimum acceptable file timestamp */
+       Oid                     databaseid;             /* requested DB 
(InvalidOid => all DBs) */
 } PgStat_MsgInquiry;
 
 
@@ -514,7 +515,7 @@ typedef union PgStat_Msg
  * ------------------------------------------------------------
  */
 
-#define PGSTAT_FILE_FORMAT_ID  0x01A5BC9A
+#define PGSTAT_FILE_FORMAT_ID  0xA240CA47
 
 /* ----------
  * PgStat_StatDBEntry                  The collector's data per database
@@ -545,6 +546,7 @@ typedef struct PgStat_StatDBEntry
        PgStat_Counter n_block_write_time;
 
        TimestampTz stat_reset_timestamp;
+       TimestampTz stats_timestamp;            /* time of db stats file update 
*/
 
        /*
         * tables and functions must be last in the struct, because we don't 
write
@@ -722,8 +724,11 @@ extern bool pgstat_track_activities;
 extern bool pgstat_track_counts;
 extern int     pgstat_track_functions;
 extern PGDLLIMPORT int pgstat_track_activity_query_size;
+extern char *pgstat_stat_directory;
 extern char *pgstat_stat_tmpname;
 extern char *pgstat_stat_filename;
+extern char *pgstat_stat_db_tmpname;
+extern char *pgstat_stat_db_filename;
 
 /*
  * BgWriter statistics counters are updated directly by bgwriter and bufmgr
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to