I have some time now for working on the mystery of why there are no block write statistics in the database. I hacked out the statistics collection and reporting side already, rough patch attached if you want to see the boring parts.

I guessed that there had to be a gotcha behind why this wasn't done before now, and I've found one so far. All of the read statistics are collected with a macro that expects to know a Relation number. Callers to ReadBuffer pass one. On the write side, the two places that increment the existing write counters (pg_stat_statements, vacuum) are MarkBufferDirty and MarkBufferDirtyHint. Neither of those is given a Relation though. Inspecting the Buffer passed can only find the buffer tag's RelFileNode.

I've thought of two paths to get a block write count out of that so far:

-Provide a function to find the Relation from the RelFileNode. There is a warning about the perils of assuming you can map that way from a buftag value in buf_internals.h though:

"Note: the BufferTag data must be sufficient to determine where to write the block, without reference to pg_class or pg_tablespace entries. It's possible that the backend flushing the buffer doesn't even believe the relation is visible yet (its xact may have started before the xact that created the rel). The storage manager must be able to cope anyway."

-Modify every caller of MarkDirty* to include a relation when that information is available. There are about 200 callers of those functions around, so that won't be fun. I noted that many of them are in the XLog replay functions, which won't have the relation--but those aren't important to count anyway.

Neither of these options feels very good to me, so selecting between the two feels like picking the lesser of two evils. I'm fine with chugging through all of the callers to modify MarkDirty*, but I didn't want to do that only to have the whole approach rejected as wrong. That's why I stopped here to get some feedback.

The way that MarkDirty requires this specific underlying storage manager behavior to work properly strikes me as as a bit of a layering violation too. I'd like the read and write paths to have a similar API, but here they don't even operate on the same type of inputs. Addressing that is probably harder than just throwing a hack on the existing code though.

--
Greg Smith   2ndQuadrant US    g...@2ndquadrant.com   Baltimore, MD
PostgreSQL Training, Services, and 24x7 Support www.2ndQuadrant.com
diff --git a/src/backend/catalog/system_views.sql 
b/src/backend/catalog/system_views.sql
index a03bfa6..1773d59 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -467,15 +467,19 @@ CREATE VIEW pg_statio_all_tables AS
             pg_stat_get_blocks_fetched(C.oid) -
                     pg_stat_get_blocks_hit(C.oid) AS heap_blks_read,
             pg_stat_get_blocks_hit(C.oid) AS heap_blks_hit,
+            pg_stat_get_blocks_dirtied(C.oid) AS heap_blks_dirtied,
             sum(pg_stat_get_blocks_fetched(I.indexrelid) -
                     pg_stat_get_blocks_hit(I.indexrelid))::bigint AS 
idx_blks_read,
             sum(pg_stat_get_blocks_hit(I.indexrelid))::bigint AS idx_blks_hit,
+            sum(pg_stat_get_blocks_dirtied(I.indexrelid))::bigint AS 
idx_blks_dirtied,
             pg_stat_get_blocks_fetched(T.oid) -
                     pg_stat_get_blocks_hit(T.oid) AS toast_blks_read,
             pg_stat_get_blocks_hit(T.oid) AS toast_blks_hit,
+            pg_stat_get_blocks_dirtied(T.oid) AS toast_blks_dirtied,
             pg_stat_get_blocks_fetched(X.oid) -
                     pg_stat_get_blocks_hit(X.oid) AS tidx_blks_read,
             pg_stat_get_blocks_hit(X.oid) AS tidx_blks_hit
+            pg_stat_get_blocks_dirted(X.oid) AS tidx_blks_dirtied
     FROM pg_class C LEFT JOIN
             pg_index I ON C.oid = I.indrelid LEFT JOIN
             pg_class T ON C.reltoastrelid = T.oid LEFT JOIN
@@ -530,6 +534,7 @@ CREATE VIEW pg_statio_all_indexes AS
             pg_stat_get_blocks_fetched(I.oid) -
                     pg_stat_get_blocks_hit(I.oid) AS idx_blks_read,
             pg_stat_get_blocks_hit(I.oid) AS idx_blks_hit
+            pg_stat_get_blocks_dirtied(I.oid) AS idx_blks_dirtied
     FROM pg_class C JOIN
             pg_index X ON C.oid = X.indrelid JOIN
             pg_class I ON I.oid = X.indexrelid
@@ -554,6 +559,7 @@ CREATE VIEW pg_statio_all_sequences AS
             pg_stat_get_blocks_fetched(C.oid) -
                     pg_stat_get_blocks_hit(C.oid) AS blks_read,
             pg_stat_get_blocks_hit(C.oid) AS blks_hit
+            pg_stat_get_blocks_dirtied(C.oid) AS blks_dirtied
     FROM pg_class C
             LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)
     WHERE C.relkind = 'S';
@@ -622,6 +628,7 @@ CREATE VIEW pg_stat_database AS
             pg_stat_get_db_blocks_fetched(D.oid) -
                     pg_stat_get_db_blocks_hit(D.oid) AS blks_read,
             pg_stat_get_db_blocks_hit(D.oid) AS blks_hit,
+            pg_stat_get_db_blocks_dirtied(D.oid) AS blks_dirtied,
             pg_stat_get_db_tuples_returned(D.oid) AS tup_returned,
             pg_stat_get_db_tuples_fetched(D.oid) AS tup_fetched,
             pg_stat_get_db_tuples_inserted(D.oid) AS tup_inserted,
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 29d986a..2a720f4 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -3350,6 +3350,7 @@ reset_dbentry_counters(PgStat_StatDBEntry *dbentry)
        dbentry->n_xact_rollback = 0;
        dbentry->n_blocks_fetched = 0;
        dbentry->n_blocks_hit = 0;
+       dbentry->n_blocks_dirtied = 0;
        dbentry->n_tuples_returned = 0;
        dbentry->n_tuples_fetched = 0;
        dbentry->n_tuples_inserted = 0;
@@ -3454,6 +3455,7 @@ pgstat_get_tab_entry(PgStat_StatDBEntry *dbentry, Oid 
tableoid, bool create)
                result->changes_since_analyze = 0;
                result->blocks_fetched = 0;
                result->blocks_hit = 0;
+               result->blocks_dirtied = 0;
                result->vacuum_timestamp = 0;
                result->vacuum_count = 0;
                result->autovac_vacuum_timestamp = 0;
@@ -4517,6 +4519,7 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
                        tabentry->changes_since_analyze = 
tabmsg->t_counts.t_changed_tuples;
                        tabentry->blocks_fetched = 
tabmsg->t_counts.t_blocks_fetched;
                        tabentry->blocks_hit = tabmsg->t_counts.t_blocks_hit;
+                       tabentry->blocks_dirtied = 
tabmsg->t_counts.t_blocks_dirtied;
 
                        tabentry->vacuum_timestamp = 0;
                        tabentry->vacuum_count = 0;
@@ -4544,6 +4547,7 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
                        tabentry->changes_since_analyze += 
tabmsg->t_counts.t_changed_tuples;
                        tabentry->blocks_fetched += 
tabmsg->t_counts.t_blocks_fetched;
                        tabentry->blocks_hit += tabmsg->t_counts.t_blocks_hit;
+                       tabentry->blocks_dirtied += 
tabmsg->t_counts.t_blocks_dirtied;
                }
 
                /* Clamp n_live_tuples in case of negative delta_live_tuples */
@@ -4561,6 +4565,7 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
                dbentry->n_tuples_deleted += tabmsg->t_counts.t_tuples_deleted;
                dbentry->n_blocks_fetched += tabmsg->t_counts.t_blocks_fetched;
                dbentry->n_blocks_hit += tabmsg->t_counts.t_blocks_hit;
+               dbentry->n_blocks_dirtied += tabmsg->t_counts.t_blocks_dirtied;
        }
 }
 
diff --git a/src/backend/storage/buffer/bufmgr.c 
b/src/backend/storage/buffer/bufmgr.c
index 1c41428..9009d9e 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -1009,6 +1009,15 @@ MarkBufferDirty(Buffer buffer)
         */
        if (!(bufHdr->flags & BM_DIRTY))
        {
+               /*
+                * TODO This is not a legitimate lookup of the relation that
+                * pgstat_count_buffer_dirtied is normally looking for.
+                * This is the relation relfilnode.  Comments in buf_internals.h
+                * point out that the buffer manager might be writing data for
+                * relations that aren't visible yet.
+                */
+               pgstat_count_buffer_dirtied(bufHdr->tag->rnode);
+
                VacuumPageDirty++;
                pgBufferUsage.shared_blks_dirtied++;
                if (VacuumCostActive)
@@ -2700,6 +2709,14 @@ MarkBufferDirtyHint(Buffer buffer)
 
                if (dirtied)
                {
+                       /*
+                        * TODO This is not a legitimate lookup of the relation 
that
+                        * pgstat_count_buffer_dirtied is normally looking for.
+                        * This is the relation relfilnode.  Comments in 
buf_internals.h
+                        * point out that the buffer manager might be writing 
data for
+                        * relations that aren't visible yet.
+                        */
+                       pgstat_count_buffer_dirtied(bufHdr->tag->rnode);
                        VacuumPageDirty++;
                        if (VacuumCostActive)
                                VacuumCostBalance += VacuumCostPageDirty;
diff --git a/src/backend/utils/adt/pgstatfuncs.c 
b/src/backend/utils/adt/pgstatfuncs.c
index 8c1a767..c9e28a3 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -36,6 +36,7 @@ extern Datum pg_stat_get_live_tuples(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_dead_tuples(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_blocks_fetched(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_blocks_hit(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_blocks_dirtied(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_last_vacuum_time(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_last_autovacuum_time(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_last_analyze_time(PG_FUNCTION_ARGS);
@@ -68,6 +69,7 @@ extern Datum pg_stat_get_db_xact_commit(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_db_xact_rollback(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_db_blocks_fetched(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_db_blocks_hit(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_db_blocks_dirtied(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_db_tuples_returned(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_db_tuples_fetched(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_db_tuples_inserted(PG_FUNCTION_ARGS);
@@ -107,6 +109,7 @@ extern Datum 
pg_stat_get_xact_tuples_deleted(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_xact_tuples_hot_updated(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_xact_blocks_fetched(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_xact_blocks_hit(PG_FUNCTION_ARGS);
+extern Datum pg_stat_get_xact_blocks_dirtied(PG_FUNCTION_ARGS);
 
 extern Datum pg_stat_get_xact_function_calls(PG_FUNCTION_ARGS);
 extern Datum pg_stat_get_xact_function_total_time(PG_FUNCTION_ARGS);
@@ -297,6 +300,21 @@ pg_stat_get_blocks_hit(PG_FUNCTION_ARGS)
 }
 
 Datum
+pg_stat_get_blocks_dirtied(PG_FUNCTION_ARGS)
+{
+       Oid                     relid = PG_GETARG_OID(0);
+       int64           result;
+       PgStat_StatTabEntry *tabentry;
+
+       if ((tabentry = pgstat_fetch_stat_tabentry(relid)) == NULL)
+               result = 0;
+       else
+               result = (int64) (tabentry->blocks_dirtied);
+
+       PG_RETURN_INT64(result);
+}
+
+Datum
 pg_stat_get_last_vacuum_time(PG_FUNCTION_ARGS)
 {
        Oid                     relid = PG_GETARG_OID(0);
@@ -1120,6 +1138,22 @@ pg_stat_get_db_blocks_hit(PG_FUNCTION_ARGS)
 
 
 Datum
+pg_stat_get_db_blocks_dirtied(PG_FUNCTION_ARGS)
+{
+       Oid                     dbid = PG_GETARG_OID(0);
+       int64           result;
+       PgStat_StatDBEntry *dbentry;
+
+       if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+               result = 0;
+       else
+               result = (int64) (dbentry->n_blocks_dirtied);
+
+       PG_RETURN_INT64(result);
+}
+
+
+Datum
 pg_stat_get_db_tuples_returned(PG_FUNCTION_ARGS)
 {
        Oid                     dbid = PG_GETARG_OID(0);
@@ -1611,6 +1645,21 @@ pg_stat_get_xact_blocks_hit(PG_FUNCTION_ARGS)
 }
 
 Datum
+pg_stat_get_xact_blocks_dirtied(PG_FUNCTION_ARGS)
+{
+       Oid                     relid = PG_GETARG_OID(0);
+       int64           result;
+       PgStat_TableStatus *tabentry;
+
+       if ((tabentry = find_tabstat_entry(relid)) == NULL)
+               result = 0;
+       else
+               result = (int64) (tabentry->t_counts.t_blocks_dirtied);
+
+       PG_RETURN_INT64(result);
+}
+
+Datum
 pg_stat_get_xact_function_calls(PG_FUNCTION_ARGS)
 {
        Oid                     funcid = PG_GETARG_OID(0);
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index feecbf9..8a7f073 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -2597,6 +2597,8 @@ DATA(insert OID = 1934 (  pg_stat_get_blocks_fetched      
PGNSP PGUID 12 1 0 0 0 f f
 DESCR("statistics: number of blocks fetched");
 DATA(insert OID = 1935 (  pg_stat_get_blocks_hit               PGNSP PGUID 12 
1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ 
pg_stat_get_blocks_hit _null_ _null_ _null_ ));
 DESCR("statistics: number of blocks found in cache");
+DATA(insert OID = 3177 (  pg_stat_get_blocks_dirtied           PGNSP PGUID 12 
1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ 
pg_stat_get_blocks_hit _null_ _null_ _null_ ));
+DESCR("statistics: number of blocks dirtied");
 DATA(insert OID = 2781 (  pg_stat_get_last_vacuum_time PGNSP PGUID 12 1 0 0 0 
f f f f t f s 1 0 1184 "26" _null_ _null_ _null_ _null_  
pg_stat_get_last_vacuum_time _null_ _null_ _null_ ));
 DESCR("statistics: last manual vacuum time for a table");
 DATA(insert OID = 2782 (  pg_stat_get_last_autovacuum_time PGNSP PGUID 12 1 0 
0 0 f f f f t f s 1 0 1184 "26" _null_ _null_ _null_ _null_      
pg_stat_get_last_autovacuum_time _null_ _null_ _null_ ));
@@ -2651,6 +2653,8 @@ DATA(insert OID = 1944 (  pg_stat_get_db_blocks_fetched 
PGNSP PGUID 12 1 0 0 0 f
 DESCR("statistics: blocks fetched for database");
 DATA(insert OID = 1945 (  pg_stat_get_db_blocks_hit            PGNSP PGUID 12 
1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ 
pg_stat_get_db_blocks_hit _null_ _null_ _null_ ));
 DESCR("statistics: blocks found in cache for database");
+DATA(insert OID = 3178 (  pg_stat_get_db_blocks_dirtied                PGNSP 
PGUID 12 1 0 0 0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ 
pg_stat_get_db_blocks_hit _null_ _null_ _null_ ));
+DESCR("statistics: blocks dirtied in cache for database");
 DATA(insert OID = 2758 (  pg_stat_get_db_tuples_returned PGNSP PGUID 12 1 0 0 
0 f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ 
pg_stat_get_db_tuples_returned _null_ _null_ _null_ ));
 DESCR("statistics: tuples returned for database");
 DATA(insert OID = 2759 (  pg_stat_get_db_tuples_fetched PGNSP PGUID 12 1 0 0 0 
f f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ 
pg_stat_get_db_tuples_fetched _null_ _null_ _null_ ));
@@ -2733,6 +2737,8 @@ DATA(insert OID = 3044 (  pg_stat_get_xact_blocks_fetched 
        PGNSP PGUID 12 1 0 0
 DESCR("statistics: number of blocks fetched in current transaction");
 DATA(insert OID = 3045 (  pg_stat_get_xact_blocks_hit                  PGNSP 
PGUID 12 1 0 0 0 f f f f t f v 1 0 20 "26" _null_ _null_ _null_ _null_ 
pg_stat_get_xact_blocks_hit _null_ _null_ _null_ ));
 DESCR("statistics: number of blocks found in cache in current transaction");
+DATA(insert OID = 3179 (  pg_stat_get_xact_blocks_dirtied              PGNSP 
PGUID 12 1 0 0 0 f f f f t f v 1 0 20 "26" _null_ _null_ _null_ _null_ 
pg_stat_get_xact_blocks_hit _null_ _null_ _null_ ));
+DESCR("statistics: number of blocks dirtied in current transaction");
 DATA(insert OID = 3046 (  pg_stat_get_xact_function_calls              PGNSP 
PGUID 12 1 0 0 0 f f f f t f v 1 0 20 "26" _null_ _null_ _null_ _null_ 
pg_stat_get_xact_function_calls _null_ _null_ _null_ ));
 DESCR("statistics: number of function calls in current transaction");
 DATA(insert OID = 3047 (  pg_stat_get_xact_function_total_time PGNSP PGUID 12 
1 0 0 0 f f f f t f v 1 0 701 "26" _null_ _null_ _null_ _null_ 
pg_stat_get_xact_function_total_time _null_ _null_ _null_ ));
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index fb242e4..d79577e 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -97,6 +97,7 @@ typedef struct PgStat_TableCounts
 
        PgStat_Counter t_blocks_fetched;
        PgStat_Counter t_blocks_hit;
+       PgStat_Counter t_blocks_dirtied;
 } PgStat_TableCounts;
 
 /* Possible targets for resetting cluster-wide shared values */
@@ -528,6 +529,7 @@ typedef struct PgStat_StatDBEntry
        PgStat_Counter n_xact_rollback;
        PgStat_Counter n_blocks_fetched;
        PgStat_Counter n_blocks_hit;
+       PgStat_Counter n_blocks_dirtied;
        PgStat_Counter n_tuples_returned;
        PgStat_Counter n_tuples_fetched;
        PgStat_Counter n_tuples_inserted;
@@ -581,6 +583,7 @@ typedef struct PgStat_StatTabEntry
 
        PgStat_Counter blocks_fetched;
        PgStat_Counter blocks_hit;
+       PgStat_Counter blocks_dirtied;
 
        TimestampTz vacuum_timestamp;           /* user initiated vacuum */
        PgStat_Counter vacuum_count;
@@ -834,6 +837,11 @@ extern void pgstat_initstats(Relation rel);
                if ((rel)->pgstat_info != NULL)                                 
                        \
                        (rel)->pgstat_info->t_counts.t_blocks_hit++;            
        \
        } while (0)
+#define pgstat_count_buffer_dirty(rel)                                         
                \
+       do {                                                                    
                                                \
+               if ((rel)->pgstat_info != NULL)                                 
                        \
+                       (rel)->pgstat_info->t_counts.t_blocks_dirty++;          
        \
+       } while (0)
 #define pgstat_count_buffer_read_time(n)                                       
                \
        (pgStatBlockReadTime += (n))
 #define pgstat_count_buffer_write_time(n)                                      
                \
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to