Re: [HACKERS] New statistics for WAL buffer dirty writes

2013-09-17 Thread Fujii Masao
On Wed, Sep 11, 2013 at 12:43 PM, Satoshi Nagayasu sn...@uptime.jp wrote:
 (2013/09/10 22:48), Peter Eisentraut wrote:
 On 9/10/13 3:37 AM, Satoshi Nagayasu wrote:
 Thanks for checking. Revised one attached.

 Please fix compiler warning:

 walwriter.c: In function ‘WalWriterMain’:
 walwriter.c:293:3: warning: implicit declaration of function
 ‘pgstat_send_walwriter’ [-Wimplicit-function-declaration]

 Thanks. Fixed.

The patch looks good to me. I have some comments:

The description of pg_stat_reset_shared() should mention
pg_stat_walwriter in the document.

We should implment something like pg_stat_reset_shared('all') so that
we can easily reset
all cluster-wide statistics counters to zero?

Some background workers may write WAL because WAL buffer is full. So you seem to
need to change those processes so that they also can increase the
xlog_dirty_writes
counter.

Regards,

-- 
Fujii Masao


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2013-09-10 Thread Satoshi Nagayasu
Thanks for checking. Revised one attached.

(2013/09/10 6:43), Peter Eisentraut wrote:
 On 9/6/13 11:32 PM, Satoshi Nagayasu wrote:
 The revised patch for wal buffer statistics is attached.
 A test script is also attached. Please take a look.
 
 You have duplicate OIDs.  Run the script duplicate_oids to find them.
 

-- 
Satoshi Nagayasu sn...@uptime.jp
Uptime Technologies, LLC. http://www.uptime.jp
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 23ebc11..cdced7f 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1878,6 +1878,13 @@ include 'filename'
 results in most cases.
/para
 
+   para
+When you see pg_stat_walwriter.dirty_write, which means number
+of buffer flushing at buffer full, is continuously increasing
+in your running server, you may need to enlarge this buffer
+size.
+   /para
+
   /listitem
  /varlistentry
 
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 4ec6981..15d9202 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -278,6 +278,14 @@ postgres: replaceableuser/ replaceabledatabase/ 
replaceablehost/ re
  /row
 
  row
+  
entrystructnamepg_stat_walwriter/indextermprimarypg_stat_walwriter/primary/indexterm/entry
+  entryOne row only, showing statistics about the wal writer
+   process's activity. See xref linkend=pg-stat-walwriter-view
+   for details.
+ /entry
+ /row
+
+ row
   
entrystructnamepg_stat_database/indextermprimarypg_stat_database/primary/indexterm/entry
   entryOne row per database, showing database-wide statistics. See
xref linkend=pg-stat-database-view for details.
@@ -735,6 +743,39 @@ postgres: replaceableuser/ replaceabledatabase/ 
replaceablehost/ re
single row, containing global data for the cluster.
   /para
 
+  table id=pg-stat-walwriter-view xreflabel=pg_stat_walwriter
+   titlestructnamepg_stat_walwriter/structname View/title
+
+   tgroup cols=3
+thead
+row
+  entryColumn/entry
+  entryType/entry
+  entryDescription/entry
+ /row
+/thead
+
+tbody
+ row
+  entrystructfielddirty_writes//entry
+  entrytypebigint/type/entry
+  entryNumber of dirty writes, which means flushing wal buffers
+   because of its full./entry
+ /row
+ row
+  entrystructfieldstats_reset//entry
+  entrytypetimestamp with time zone/type/entry
+  entryTime at which these statistics were last reset/entry
+ /row
+/tbody
+/tgroup
+  /table
+
+  para
+   The structnamepg_stat_walwriter/structname view will always have a
+   single row, containing global data for the cluster.
+  /para
+
   table id=pg-stat-database-view xreflabel=pg_stat_database
titlestructnamepg_stat_database/structname View/title
tgroup cols=3
diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index dc47c47..d0e85c9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2467,6 +2467,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
WriteRqst.Write = OldPageRqstPtr;
WriteRqst.Flush = 0;
XLogWrite(WriteRqst, false);
+   WalWriterStats.m_xlog_dirty_writes++;
LWLockRelease(WALWriteLock);

TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
}
diff --git a/src/backend/catalog/system_views.sql 
b/src/backend/catalog/system_views.sql
index 575a40f..12a2ed0 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -686,6 +686,11 @@ CREATE VIEW pg_stat_bgwriter AS
 pg_stat_get_buf_alloc() AS buffers_alloc,
 pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
 
+CREATE VIEW pg_stat_walwriter AS
+SELECT
+pg_stat_get_xlog_dirty_writes() AS dirty_writes,
+pg_stat_get_wal_stat_reset_time() AS stats_reset;
+
 CREATE VIEW pg_user_mappings AS
 SELECT
 U.oid   AS umid,
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index b5ce2f6..8c56af5 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -129,6 +129,14 @@ char  *pgstat_stat_tmpname = NULL;
  */
 PgStat_MsgBgWriter BgWriterStats;
 
+/*
+ * WalWriter statistics counter.
+ * This counter is incremented by each XLogWrite call,
+ * both in the wal writer process and each backend.
+ * And then, sent to the stat collector process.
+ */
+PgStat_MsgWalWriter WalWriterStats;
+
 /* --
  * Local data
  * --
@@ -293,6 +301,7 @@ static void pgstat_recv_autovac(PgStat_MsgAutovacStart 
*msg, int len);
 static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
 static void 

Re: [HACKERS] New statistics for WAL buffer dirty writes

2013-09-10 Thread Peter Eisentraut
On 9/10/13 3:37 AM, Satoshi Nagayasu wrote:
 Thanks for checking. Revised one attached.

Please fix compiler warning:

walwriter.c: In function ‘WalWriterMain’:
walwriter.c:293:3: warning: implicit declaration of function
‘pgstat_send_walwriter’ [-Wimplicit-function-declaration]



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2013-09-10 Thread Satoshi Nagayasu
(2013/09/10 22:48), Peter Eisentraut wrote:
 On 9/10/13 3:37 AM, Satoshi Nagayasu wrote:
 Thanks for checking. Revised one attached.
 
 Please fix compiler warning:
 
 walwriter.c: In function ‘WalWriterMain’:
 walwriter.c:293:3: warning: implicit declaration of function
 ‘pgstat_send_walwriter’ [-Wimplicit-function-declaration]

Thanks. Fixed.

-- 
Satoshi Nagayasu sn...@uptime.jp
Uptime Technologies, LLC. http://www.uptime.jp
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 23ebc11..cdced7f 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1878,6 +1878,13 @@ include 'filename'
 results in most cases.
/para
 
+   para
+When you see pg_stat_walwriter.dirty_write, which means number
+of buffer flushing at buffer full, is continuously increasing
+in your running server, you may need to enlarge this buffer
+size.
+   /para
+
   /listitem
  /varlistentry
 
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 4ec6981..15d9202 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -278,6 +278,14 @@ postgres: replaceableuser/ replaceabledatabase/ 
replaceablehost/ re
  /row
 
  row
+  
entrystructnamepg_stat_walwriter/indextermprimarypg_stat_walwriter/primary/indexterm/entry
+  entryOne row only, showing statistics about the wal writer
+   process's activity. See xref linkend=pg-stat-walwriter-view
+   for details.
+ /entry
+ /row
+
+ row
   
entrystructnamepg_stat_database/indextermprimarypg_stat_database/primary/indexterm/entry
   entryOne row per database, showing database-wide statistics. See
xref linkend=pg-stat-database-view for details.
@@ -735,6 +743,39 @@ postgres: replaceableuser/ replaceabledatabase/ 
replaceablehost/ re
single row, containing global data for the cluster.
   /para
 
+  table id=pg-stat-walwriter-view xreflabel=pg_stat_walwriter
+   titlestructnamepg_stat_walwriter/structname View/title
+
+   tgroup cols=3
+thead
+row
+  entryColumn/entry
+  entryType/entry
+  entryDescription/entry
+ /row
+/thead
+
+tbody
+ row
+  entrystructfielddirty_writes//entry
+  entrytypebigint/type/entry
+  entryNumber of dirty writes, which means flushing wal buffers
+   because of its full./entry
+ /row
+ row
+  entrystructfieldstats_reset//entry
+  entrytypetimestamp with time zone/type/entry
+  entryTime at which these statistics were last reset/entry
+ /row
+/tbody
+/tgroup
+  /table
+
+  para
+   The structnamepg_stat_walwriter/structname view will always have a
+   single row, containing global data for the cluster.
+  /para
+
   table id=pg-stat-database-view xreflabel=pg_stat_database
titlestructnamepg_stat_database/structname View/title
tgroup cols=3
diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index dc47c47..d0e85c9 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -2467,6 +2467,7 @@ AdvanceXLInsertBuffer(XLogRecPtr upto, bool opportunistic)
WriteRqst.Write = OldPageRqstPtr;
WriteRqst.Flush = 0;
XLogWrite(WriteRqst, false);
+   WalWriterStats.m_xlog_dirty_writes++;
LWLockRelease(WALWriteLock);

TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
}
diff --git a/src/backend/catalog/system_views.sql 
b/src/backend/catalog/system_views.sql
index 575a40f..12a2ed0 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -686,6 +686,11 @@ CREATE VIEW pg_stat_bgwriter AS
 pg_stat_get_buf_alloc() AS buffers_alloc,
 pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
 
+CREATE VIEW pg_stat_walwriter AS
+SELECT
+pg_stat_get_xlog_dirty_writes() AS dirty_writes,
+pg_stat_get_wal_stat_reset_time() AS stats_reset;
+
 CREATE VIEW pg_user_mappings AS
 SELECT
 U.oid   AS umid,
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index b5ce2f6..8c56af5 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -129,6 +129,14 @@ char  *pgstat_stat_tmpname = NULL;
  */
 PgStat_MsgBgWriter BgWriterStats;
 
+/*
+ * WalWriter statistics counter.
+ * This counter is incremented by each XLogWrite call,
+ * both in the wal writer process and each backend.
+ * And then, sent to the stat collector process.
+ */
+PgStat_MsgWalWriter WalWriterStats;
+
 /* --
  * Local data
  * --
@@ -293,6 +301,7 @@ static void pgstat_recv_autovac(PgStat_MsgAutovacStart 
*msg, int len);
 static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, 

Re: [HACKERS] New statistics for WAL buffer dirty writes

2013-09-09 Thread Peter Eisentraut
On 9/6/13 11:32 PM, Satoshi Nagayasu wrote:
 The revised patch for wal buffer statistics is attached.
 A test script is also attached. Please take a look.

You have duplicate OIDs.  Run the script duplicate_oids to find them.



-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2013-09-09 Thread Peter Geoghegan
On Mon, Sep 9, 2013 at 2:43 PM, Peter Eisentraut pete...@gmx.net wrote:
 You have duplicate OIDs.  Run the script duplicate_oids to find them.

Are you considering picking up the script that Andrew wrote to
automate that as part of the build? I wonder why that didn't end up
going anywhere.


-- 
Peter Geoghegan


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2013-09-09 Thread Peter Eisentraut
On Mon, 2013-09-09 at 14:51 -0700, Peter Geoghegan wrote:
 On Mon, Sep 9, 2013 at 2:43 PM, Peter Eisentraut pete...@gmx.net wrote:
  You have duplicate OIDs.  Run the script duplicate_oids to find them.
 
 Are you considering picking up the script that Andrew wrote to
 automate that as part of the build? I wonder why that didn't end up
 going anywhere.

It is automated.  Andrew's rewrite is still worth considering, and I had
planned to do that, but it doesn't provide any functionality we don't
already have.




-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2013-09-09 Thread Peter Geoghegan
On Mon, Sep 9, 2013 at 6:05 PM, Peter Eisentraut pete...@gmx.net wrote:
 It is automated.

Oh, yeah. I see that the maintainer-check target does that. I should
probably get into the habit of using targets other than
check/installcheck, as you recently demonstrated.

-- 
Peter Geoghegan


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2013-09-06 Thread Satoshi Nagayasu

Hi,

The revised patch for wal buffer statistics is attached.
A test script is also attached. Please take a look.

Regards,

(2013/07/19 7:49), Satoshi Nagayasu wrote:

Will revise and re-resubmit for the next CF.

Regards,

2013/07/19 1:06, Alvaro Herrera wrote:


What happened to this patch?  We were waiting on an updated version from
you.


Satoshi Nagayasu wrote:

(2012/12/10 3:06), Tomas Vondra wrote:

On 29.10.2012 04:58, Satoshi Nagayasu wrote:

2012/10/24 1:12, Alvaro Herrera wrote:

Satoshi Nagayasu escribi�:


With this patch, walwriter process and each backend process
would sum up dirty writes, and send it to the stat collector.
So, the value could be saved in the stat file, and could be
kept on restarting.

The statistics could be retreive with using
pg_stat_get_xlog_dirty_writes() function, and could be reset
with calling pg_stat_reset_shared('walwriter').

Now, I have one concern.

The reset time could be captured in
globalStats.stat_reset_timestamp,
but this value is the same with the bgwriter one.

So, once pg_stat_reset_shared('walwriter') is called,
stats_reset column in pg_stat_bgwriter does represent
the reset time for walwriter, not for bgwriter.

How should we handle this?  Should we split this value?
And should we have new system view for walwriter?


I think the answer to the two last questions is yes.  It doesn't
seem to
make sense, to me, to have a single reset timings for what are
effectively two separate things.

Please submit an updated patch to next CF.  I'm marking this one
returned with feedback.  Thanks.



I attached the latest one, which splits the reset_time
for bgwriter and walwriter, and provides new system view,
called pg_stat_walwriter, to show the dirty write counter
and the reset time.


I've done a quick review of the v4 patch:


Thanks for the review, and sorry for my delayed response.


1) applies fine on HEAD, compiles fine

2) make installcheck fails because of a difference in the 'rules'
test suite (there's a new view pg_stat_walwriter - see the
attached patch for a fixed version or expected/rules.out)


Ah, I forgot about the regression test. I will fix it. Thanks.


3) I do agree with Alvaro that using the same struct for two separate
components (bgwriter and walwriter) seems a bit awkward. For
example
you need to have two separate stat_reset fields, the reset code
becomes much more verbose (because you need to list individual
fields) etc.

So I'd vote to either split this into two structures or keeping it
as a single structure (although with two views on top of it).


Ok, I will split it into two structs, PgStat_BgWriterGlobalStats and
PgStat_WalWriterGlobalStats, and will modify PgStat_GlobalStats to
hold those two structs in the stat collector.


4) Are there any other fields that might be interesting? Right now
there's just dirty_writes but I guess there are other values.
E.g.
how much data was actually written etc.?


AFAIK, I think those numbers can be obtained by calling
pg_current_xlog_insert_location() or pg_current_xlog_location(),
but if we need it, I will add it.

Regards,








--
Satoshi Nagayasu sn...@uptime.jp
Uptime Technologies, LLC. http://www.uptime.jp
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 23ebc11..cdced7f 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1878,6 +1878,13 @@ include 'filename'
 results in most cases.
/para
 
+   para
+When you see pg_stat_walwriter.dirty_write, which means number
+of buffer flushing at buffer full, is continuously increasing
+in your running server, you may need to enlarge this buffer
+size.
+   /para
+
   /listitem
  /varlistentry
 
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 4ec6981..15d9202 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -278,6 +278,14 @@ postgres: replaceableuser/ replaceabledatabase/ 
replaceablehost/ re
  /row
 
  row
+  
entrystructnamepg_stat_walwriter/indextermprimarypg_stat_walwriter/primary/indexterm/entry
+  entryOne row only, showing statistics about the wal writer
+   process's activity. See xref linkend=pg-stat-walwriter-view
+   for details.
+ /entry
+ /row
+
+ row
   
entrystructnamepg_stat_database/indextermprimarypg_stat_database/primary/indexterm/entry
   entryOne row per database, showing database-wide statistics. See
xref linkend=pg-stat-database-view for details.
@@ -735,6 +743,39 @@ postgres: replaceableuser/ replaceabledatabase/ 
replaceablehost/ re
single row, containing global data for the cluster.
   /para
 
+  table id=pg-stat-walwriter-view xreflabel=pg_stat_walwriter
+   titlestructnamepg_stat_walwriter/structname View/title
+
+   tgroup cols=3
+thead
+row
+  entryColumn/entry
+  entryType/entry
+  entryDescription/entry
+ /row
+/thead
+
+tbody

Re: [HACKERS] New statistics for WAL buffer dirty writes

2013-07-18 Thread Alvaro Herrera

What happened to this patch?  We were waiting on an updated version from
you.


Satoshi Nagayasu wrote:
 (2012/12/10 3:06), Tomas Vondra wrote:
 On 29.10.2012 04:58, Satoshi Nagayasu wrote:
 2012/10/24 1:12, Alvaro Herrera wrote:
 Satoshi Nagayasu escribi�:
 
 With this patch, walwriter process and each backend process
 would sum up dirty writes, and send it to the stat collector.
 So, the value could be saved in the stat file, and could be
 kept on restarting.
 
 The statistics could be retreive with using
 pg_stat_get_xlog_dirty_writes() function, and could be reset
 with calling pg_stat_reset_shared('walwriter').
 
 Now, I have one concern.
 
 The reset time could be captured in globalStats.stat_reset_timestamp,
 but this value is the same with the bgwriter one.
 
 So, once pg_stat_reset_shared('walwriter') is called,
 stats_reset column in pg_stat_bgwriter does represent
 the reset time for walwriter, not for bgwriter.
 
 How should we handle this?  Should we split this value?
 And should we have new system view for walwriter?
 
 I think the answer to the two last questions is yes.  It doesn't seem to
 make sense, to me, to have a single reset timings for what are
 effectively two separate things.
 
 Please submit an updated patch to next CF.  I'm marking this one
 returned with feedback.  Thanks.
 
 
 I attached the latest one, which splits the reset_time
 for bgwriter and walwriter, and provides new system view,
 called pg_stat_walwriter, to show the dirty write counter
 and the reset time.
 
 I've done a quick review of the v4 patch:
 
 Thanks for the review, and sorry for my delayed response.
 
 1) applies fine on HEAD, compiles fine
 
 2) make installcheck fails because of a difference in the 'rules'
 test suite (there's a new view pg_stat_walwriter - see the
 attached patch for a fixed version or expected/rules.out)
 
 Ah, I forgot about the regression test. I will fix it. Thanks.
 
 3) I do agree with Alvaro that using the same struct for two separate
 components (bgwriter and walwriter) seems a bit awkward. For example
 you need to have two separate stat_reset fields, the reset code
 becomes much more verbose (because you need to list individual
 fields) etc.
 
 So I'd vote to either split this into two structures or keeping it
 as a single structure (although with two views on top of it).
 
 Ok, I will split it into two structs, PgStat_BgWriterGlobalStats and
 PgStat_WalWriterGlobalStats, and will modify PgStat_GlobalStats to
 hold those two structs in the stat collector.
 
 4) Are there any other fields that might be interesting? Right now
 there's just dirty_writes but I guess there are other values. E.g.
 how much data was actually written etc.?
 
 AFAIK, I think those numbers can be obtained by calling
 pg_current_xlog_insert_location() or pg_current_xlog_location(),
 but if we need it, I will add it.
 
 Regards,


-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training  Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2013-07-18 Thread Satoshi Nagayasu

Will revise and re-resubmit for the next CF.

Regards,

2013/07/19 1:06, Alvaro Herrera wrote:


What happened to this patch?  We were waiting on an updated version from
you.


Satoshi Nagayasu wrote:

(2012/12/10 3:06), Tomas Vondra wrote:

On 29.10.2012 04:58, Satoshi Nagayasu wrote:

2012/10/24 1:12, Alvaro Herrera wrote:

Satoshi Nagayasu escribi�:


With this patch, walwriter process and each backend process
would sum up dirty writes, and send it to the stat collector.
So, the value could be saved in the stat file, and could be
kept on restarting.

The statistics could be retreive with using
pg_stat_get_xlog_dirty_writes() function, and could be reset
with calling pg_stat_reset_shared('walwriter').

Now, I have one concern.

The reset time could be captured in globalStats.stat_reset_timestamp,
but this value is the same with the bgwriter one.

So, once pg_stat_reset_shared('walwriter') is called,
stats_reset column in pg_stat_bgwriter does represent
the reset time for walwriter, not for bgwriter.

How should we handle this?  Should we split this value?
And should we have new system view for walwriter?


I think the answer to the two last questions is yes.  It doesn't seem to
make sense, to me, to have a single reset timings for what are
effectively two separate things.

Please submit an updated patch to next CF.  I'm marking this one
returned with feedback.  Thanks.



I attached the latest one, which splits the reset_time
for bgwriter and walwriter, and provides new system view,
called pg_stat_walwriter, to show the dirty write counter
and the reset time.


I've done a quick review of the v4 patch:


Thanks for the review, and sorry for my delayed response.


1) applies fine on HEAD, compiles fine

2) make installcheck fails because of a difference in the 'rules'
test suite (there's a new view pg_stat_walwriter - see the
attached patch for a fixed version or expected/rules.out)


Ah, I forgot about the regression test. I will fix it. Thanks.


3) I do agree with Alvaro that using the same struct for two separate
components (bgwriter and walwriter) seems a bit awkward. For example
you need to have two separate stat_reset fields, the reset code
becomes much more verbose (because you need to list individual
fields) etc.

So I'd vote to either split this into two structures or keeping it
as a single structure (although with two views on top of it).


Ok, I will split it into two structs, PgStat_BgWriterGlobalStats and
PgStat_WalWriterGlobalStats, and will modify PgStat_GlobalStats to
hold those two structs in the stat collector.


4) Are there any other fields that might be interesting? Right now
there's just dirty_writes but I guess there are other values. E.g.
how much data was actually written etc.?


AFAIK, I think those numbers can be obtained by calling
pg_current_xlog_insert_location() or pg_current_xlog_location(),
but if we need it, I will add it.

Regards,






--
Satoshi Nagayasu sn...@uptime.jp
Uptime Technologies, LLC. http://www.uptime.jp


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2013-01-20 Thread Satoshi Nagayasu

(2012/11/27 7:42), Alvaro Herrera wrote:

Satoshi Nagayasu escribió:


I attached the latest one, which splits the reset_time
for bgwriter and walwriter, and provides new system view,
called pg_stat_walwriter, to show the dirty write counter
and the reset time.


Thanks.  I gave this a look and I have a couple of comments:

1. The counter is called dirty_write.  I imagine that this comes
directly from the name of the nearby DTrace probe,
TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START.  That probe comes from
email 494c1565.3060...@sun.com committed in 4ee79fd20d9a.  But there
wasn't much discussion about the name back then.  Maybe that was fine at
the time because it was pretty much an internal matter, being so deep in
the code.  But we're now going to expose it to regular users, so we'd
better choose a very good name because we're going to be stuck with it
for a very long time.  And the name dirty doesn't ring with me too
well; what matters here is not that we're writing a buffer that is
dirty, but the fact that we're writing while holding the WalInsertLock,
so the name should convey the fact that this is a locked write or
something like that.  Or at least that's how I see the issue.  Note the
documentation wording:

+  entrystructfielddirty_writes//entry
+  entrytypebigint/type/entry
+  entryNumber of dirty writes, which means flushing wal buffers
+   because of its full./entry


Yes, dirty_writes came from the probe name, and if it needs to be 
changed, buffers_flush would make sense for me in this situation, 
because this counter is intended to show WAL writes due to wal buffer 
full.



2. Should we put bgwriter and walwriter data in separate structs?  I
don't think this is necessary, but maybe someone opines differently?


I tried to minimize an impact of this patch, but if I can change this 
struct, yes, I'd like to split into two structs.



3.
+/*
+ * WalWriter global statistics counter.
+ * Despite its name, this counter is actually used not only in walwriter,
+ * but also in each backend process to sum up xlog dirty writes.
+ * Those processes would increment this counter in each XLogWrite call,
+ * then send it to the stat collector process.
+ */
+PgStat_MsgWalWriter WalWriterStats;

Maybe we should use a different name for the struct, to avoid having to
excuse ourselves for the name being wrong ...


Ok. How about WalBufferStats? I think this name could be accepted in 
both the wal writer and each backend process.


--
Satoshi Nagayasu sn...@uptime.jp
Uptime Technologies, LLC. http://www.uptime.jp


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2013-01-20 Thread Satoshi Nagayasu

(2012/12/10 3:06), Tomas Vondra wrote:

On 29.10.2012 04:58, Satoshi Nagayasu wrote:

2012/10/24 1:12, Alvaro Herrera wrote:

Satoshi Nagayasu escribi�:


With this patch, walwriter process and each backend process
would sum up dirty writes, and send it to the stat collector.
So, the value could be saved in the stat file, and could be
kept on restarting.

The statistics could be retreive with using
pg_stat_get_xlog_dirty_writes() function, and could be reset
with calling pg_stat_reset_shared('walwriter').

Now, I have one concern.

The reset time could be captured in globalStats.stat_reset_timestamp,
but this value is the same with the bgwriter one.

So, once pg_stat_reset_shared('walwriter') is called,
stats_reset column in pg_stat_bgwriter does represent
the reset time for walwriter, not for bgwriter.

How should we handle this?  Should we split this value?
And should we have new system view for walwriter?


I think the answer to the two last questions is yes.  It doesn't seem to
make sense, to me, to have a single reset timings for what are
effectively two separate things.

Please submit an updated patch to next CF.  I'm marking this one
returned with feedback.  Thanks.



I attached the latest one, which splits the reset_time
for bgwriter and walwriter, and provides new system view,
called pg_stat_walwriter, to show the dirty write counter
and the reset time.


I've done a quick review of the v4 patch:


Thanks for the review, and sorry for my delayed response.


1) applies fine on HEAD, compiles fine

2) make installcheck fails because of a difference in the 'rules'
test suite (there's a new view pg_stat_walwriter - see the
attached patch for a fixed version or expected/rules.out)


Ah, I forgot about the regression test. I will fix it. Thanks.


3) I do agree with Alvaro that using the same struct for two separate
components (bgwriter and walwriter) seems a bit awkward. For example
you need to have two separate stat_reset fields, the reset code
becomes much more verbose (because you need to list individual
fields) etc.

So I'd vote to either split this into two structures or keeping it
as a single structure (although with two views on top of it).


Ok, I will split it into two structs, PgStat_BgWriterGlobalStats and 
PgStat_WalWriterGlobalStats, and will modify PgStat_GlobalStats to hold 
those two structs in the stat collector.



4) Are there any other fields that might be interesting? Right now
there's just dirty_writes but I guess there are other values. E.g.
how much data was actually written etc.?


AFAIK, I think those numbers can be obtained by calling 
pg_current_xlog_insert_location() or pg_current_xlog_location(),

but if we need it, I will add it.

Regards,



Tomas







--
Satoshi Nagayasu sn...@uptime.jp
Uptime Technologies, LLC. http://www.uptime.jp


--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-12-09 Thread Tomas Vondra
On 29.10.2012 04:58, Satoshi Nagayasu wrote:
 2012/10/24 1:12, Alvaro Herrera wrote:
 Satoshi Nagayasu escribi�:

 With this patch, walwriter process and each backend process
 would sum up dirty writes, and send it to the stat collector.
 So, the value could be saved in the stat file, and could be
 kept on restarting.

 The statistics could be retreive with using
 pg_stat_get_xlog_dirty_writes() function, and could be reset
 with calling pg_stat_reset_shared('walwriter').

 Now, I have one concern.

 The reset time could be captured in globalStats.stat_reset_timestamp,
 but this value is the same with the bgwriter one.

 So, once pg_stat_reset_shared('walwriter') is called,
 stats_reset column in pg_stat_bgwriter does represent
 the reset time for walwriter, not for bgwriter.

 How should we handle this?  Should we split this value?
 And should we have new system view for walwriter?

 I think the answer to the two last questions is yes.  It doesn't seem to
 make sense, to me, to have a single reset timings for what are
 effectively two separate things.

 Please submit an updated patch to next CF.  I'm marking this one
 returned with feedback.  Thanks.

 
 I attached the latest one, which splits the reset_time
 for bgwriter and walwriter, and provides new system view,
 called pg_stat_walwriter, to show the dirty write counter
 and the reset time.

I've done a quick review of the v4 patch:

1) applies fine on HEAD, compiles fine

2) make installcheck fails because of a difference in the 'rules'
   test suite (there's a new view pg_stat_walwriter - see the
   attached patch for a fixed version or expected/rules.out)

3) I do agree with Alvaro that using the same struct for two separate
   components (bgwriter and walwriter) seems a bit awkward. For example
   you need to have two separate stat_reset fields, the reset code
   becomes much more verbose (because you need to list individual
   fields) etc.

   So I'd vote to either split this into two structures or keeping it
   as a single structure (although with two views on top of it).

4) Are there any other fields that might be interesting? Right now
   there's just dirty_writes but I guess there are other values. E.g.
   how much data was actually written etc.?

Tomas

diff --git a/contrib/pgbench/pgbench.c b/contrib/pgbench/pgbench.c
index e376452..334ce4c 100644
--- a/contrib/pgbench/pgbench.c
+++ b/contrib/pgbench/pgbench.c
@@ -135,6 +135,11 @@ intunlogged_tables = 0;
 double sample_rate = 0.0;
 
 /*
+ * logging steps (seconds between log messages)
+ */
+intlog_step_seconds = 5;
+
+/*
  * tablespace selection
  */
 char  *tablespace = NULL;
@@ -1362,6 +1367,11 @@ init(bool is_no_vacuum)
charsql[256];
int i;
 
+   /* used to track elapsed time and estimate of the remaining time */
+   instr_time  start, diff;
+   double  elapsed_sec, remaining_sec;
+   int log_interval = 1;
+
if ((con = doConnect()) == NULL)
exit(1);
 
@@ -1430,6 +1440,8 @@ init(bool is_no_vacuum)
}
PQclear(res);
 
+   INSTR_TIME_SET_CURRENT(start);
+
for (i = 0; i  naccounts * scale; i++)
{
int j = i + 1;
@@ -1441,10 +1453,27 @@ init(bool is_no_vacuum)
exit(1);
}
 
-   if (j % 10 == 0)
-   fprintf(stderr, %d of %d tuples (%d%%) done.\n,
-   j, naccounts * scale,
-   (int) (((int64) j * 100) / (naccounts * 
scale)));
+   /* let's not call the timing for each row, but only each 100 
rows */
+   if (j % 100 == 0 || j == scale * naccounts)
+   {
+   INSTR_TIME_SET_CURRENT(diff);
+   INSTR_TIME_SUBTRACT(diff, start);
+
+   elapsed_sec = INSTR_TIME_GET_DOUBLE(diff);
+   remaining_sec = (scale * naccounts - j) * elapsed_sec / 
j;
+
+   /* have we reached the next interval? */
+   if (elapsed_sec = log_interval * log_step_seconds) {
+
+   fprintf(stderr, %d of %d tuples (%d%%) done 
(elapsed %.2f s, remaining %.2f s).\n,
+   j, naccounts * scale,
+   (int) (((int64) j * 100) / 
(naccounts * scale)), elapsed_sec, remaining_sec);
+
+   /* skip to the next interval */
+   log_interval = 
(int)ceil(elapsed_sec/log_step_seconds);
+   }
+   }
+
}
if (PQputline(con, \\.\n))
{
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index b7df8ce..bf9acc5 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ 

Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-11-26 Thread Alvaro Herrera
Satoshi Nagayasu escribió:

 I attached the latest one, which splits the reset_time
 for bgwriter and walwriter, and provides new system view,
 called pg_stat_walwriter, to show the dirty write counter
 and the reset time.

Thanks.  I gave this a look and I have a couple of comments:

1. The counter is called dirty_write.  I imagine that this comes
directly from the name of the nearby DTrace probe,
TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_START.  That probe comes from
email 494c1565.3060...@sun.com committed in 4ee79fd20d9a.  But there
wasn't much discussion about the name back then.  Maybe that was fine at
the time because it was pretty much an internal matter, being so deep in
the code.  But we're now going to expose it to regular users, so we'd
better choose a very good name because we're going to be stuck with it
for a very long time.  And the name dirty doesn't ring with me too
well; what matters here is not that we're writing a buffer that is
dirty, but the fact that we're writing while holding the WalInsertLock,
so the name should convey the fact that this is a locked write or
something like that.  Or at least that's how I see the issue.  Note the
documentation wording:

+  entrystructfielddirty_writes//entry
+  entrytypebigint/type/entry
+  entryNumber of dirty writes, which means flushing wal buffers
+   because of its full./entry


2. Should we put bgwriter and walwriter data in separate structs?  I
don't think this is necessary, but maybe someone opines differently?


3. 
+/*
+ * WalWriter global statistics counter.
+ * Despite its name, this counter is actually used not only in walwriter,
+ * but also in each backend process to sum up xlog dirty writes.
+ * Those processes would increment this counter in each XLogWrite call,
+ * then send it to the stat collector process.
+ */
+PgStat_MsgWalWriter WalWriterStats;

Maybe we should use a different name for the struct, to avoid having to
excuse ourselves for the name being wrong ...

-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training  Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-10-28 Thread Satoshi Nagayasu

2012/10/24 1:12, Alvaro Herrera wrote:

Satoshi Nagayasu escribió:


With this patch, walwriter process and each backend process
would sum up dirty writes, and send it to the stat collector.
So, the value could be saved in the stat file, and could be
kept on restarting.

The statistics could be retreive with using
pg_stat_get_xlog_dirty_writes() function, and could be reset
with calling pg_stat_reset_shared('walwriter').

Now, I have one concern.

The reset time could be captured in globalStats.stat_reset_timestamp,
but this value is the same with the bgwriter one.

So, once pg_stat_reset_shared('walwriter') is called,
stats_reset column in pg_stat_bgwriter does represent
the reset time for walwriter, not for bgwriter.

How should we handle this?  Should we split this value?
And should we have new system view for walwriter?


I think the answer to the two last questions is yes.  It doesn't seem to
make sense, to me, to have a single reset timings for what are
effectively two separate things.

Please submit an updated patch to next CF.  I'm marking this one
returned with feedback.  Thanks.



I attached the latest one, which splits the reset_time
for bgwriter and walwriter, and provides new system view,
called pg_stat_walwriter, to show the dirty write counter
and the reset time.

Regards,
--
Satoshi Nagayasu sn...@uptime.jp
Uptime Technologies, LLC. http://www.uptime.jp
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index b4fcbaf..0ae885b 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -1907,6 +1907,13 @@ include 'filename'
 results in most cases.
/para
 
+   para
+When you see pg_stat_walwriter.dirty_write, which means number
+of buffer flushing at buffer full, is continuously increasing
+in your running server, you may need to enlarge this buffer
+size.
+   /para
+
   /listitem
  /varlistentry
 
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 39ccfbb..3117f91 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -278,6 +278,14 @@ postgres: replaceableuser/ replaceabledatabase/ 
replaceablehost/ re
  /row
 
  row
+  
entrystructnamepg_stat_walwriter/indextermprimarypg_stat_walwriter/primary/indexterm/entry
+  entryOne row only, showing statistics about the wal writer
+   process's activity. See xref linkend=pg-stat-walwriter-view
+   for details.
+ /entry
+ /row
+
+ row
   
entrystructnamepg_stat_database/indextermprimarypg_stat_database/primary/indexterm/entry
   entryOne row per database, showing database-wide statistics. See
xref linkend=pg-stat-database-view for details.
@@ -735,6 +743,39 @@ postgres: replaceableuser/ replaceabledatabase/ 
replaceablehost/ re
single row, containing global data for the cluster.
   /para
 
+  table id=pg-stat-walwriter-view xreflabel=pg_stat_walwriter
+   titlestructnamepg_stat_walwriter/structname View/title
+
+   tgroup cols=3
+thead
+row
+  entryColumn/entry
+  entryType/entry
+  entryDescription/entry
+ /row
+/thead
+
+tbody
+ row
+  entrystructfielddirty_writes//entry
+  entrytypebigint/type/entry
+  entryNumber of dirty writes, which means flushing wal buffers
+   because of its full./entry
+ /row
+ row
+  entrystructfieldstats_reset//entry
+  entrytypetimestamp with time zone/type/entry
+  entryTime at which these statistics were last reset/entry
+ /row
+/tbody
+/tgroup
+  /table
+
+  para
+   The structnamepg_stat_walwriter/structname view will always have a
+   single row, containing global data for the cluster.
+  /para
+
   table id=pg-stat-database-view xreflabel=pg_stat_database
titlestructnamepg_stat_database/structname View/title
tgroup cols=3
diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index d251d08..631a0af 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -1347,6 +1347,7 @@ AdvanceXLInsertBuffer(bool new_segment)
WriteRqst.Write = OldPageRqstPtr;
WriteRqst.Flush = 0;
XLogWrite(WriteRqst, false, false);
+   WalWriterStats.m_xlog_dirty_writes++;
LWLockRelease(WALWriteLock);
TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
}
diff --git a/src/backend/catalog/system_views.sql 
b/src/backend/catalog/system_views.sql
index 607a72f..40f0c34 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -671,6 +671,11 @@ CREATE VIEW pg_stat_bgwriter AS
 pg_stat_get_buf_alloc() AS buffers_alloc,
 pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
 
+CREATE VIEW pg_stat_walwriter AS
+SELECT
+

Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-10-23 Thread Alvaro Herrera
Satoshi Nagayasu escribió:

 With this patch, walwriter process and each backend process
 would sum up dirty writes, and send it to the stat collector.
 So, the value could be saved in the stat file, and could be
 kept on restarting.
 
 The statistics could be retreive with using
 pg_stat_get_xlog_dirty_writes() function, and could be reset
 with calling pg_stat_reset_shared('walwriter').
 
 Now, I have one concern.
 
 The reset time could be captured in globalStats.stat_reset_timestamp,
 but this value is the same with the bgwriter one.
 
 So, once pg_stat_reset_shared('walwriter') is called,
 stats_reset column in pg_stat_bgwriter does represent
 the reset time for walwriter, not for bgwriter.
 
 How should we handle this?  Should we split this value?
 And should we have new system view for walwriter?

I think the answer to the two last questions is yes.  It doesn't seem to
make sense, to me, to have a single reset timings for what are
effectively two separate things.

Please submit an updated patch to next CF.  I'm marking this one
returned with feedback.  Thanks.

-- 
Álvaro Herrerahttp://www.2ndQuadrant.com/
PostgreSQL Development, 24x7 Support, Training  Services


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-09-25 Thread Satoshi Nagayasu

Hi,

2012/08/12 7:11, Jeff Janes wrote:

On Sat, Jul 28, 2012 at 3:33 PM, Jeff Janes jeff.ja...@gmail.com wrote:

On Sat, Jul 7, 2012 at 9:17 PM, Satoshi Nagayasu sn...@uptime.jp wrote:

Hi,

Jeff Janes has pointed out that my previous patch could hold
a number of the dirty writes only in single local backend, and
it could not hold all over the cluster, because the counter
was allocated in the local process memory.

That's true, and I have fixed it with moving the counter into
the shared memory, as a member of XLogCtlWrite, to keep total
dirty writes in the cluster.




...


The comment XLogCtrlWrite must be protected with WALWriteLock
mis-spells XLogCtlWrite.

The final patch will need to add a sections to the documentation.


Thanks to Robert and Tom for addressing my concerns about the pointer
volatility.

I think there is enough consensus that this is useful without adding
more things to it, like histograms or high water marks.

However, I do think we will want to add a way to query for the time of
the last reset, as other monitoring features are going that way.

Is it OK that the count is reset upon a server restart?
pg_stat_bgwriter, for example, does not do that.  Unfortunately I
think fixing this in an acceptable way will be harder than the entire
rest of the patch was.


The coding looks OK to me, it applies and builds, and passes make
check, and does what it says.  I didn't do performance testing, as it
is hard to believe it would have a meaningful effect.

I'll marked it as waiting on author, for the documentation and reset
time.  I'd ask a more senior hacker to comment on the durability over
restarts.


I have rewritten the patch to deal with dirty write statistics
through pgstat collector as bgwriter does.
Yeah, it's a bit bigger rewrite.

With this patch, walwriter process and each backend process
would sum up dirty writes, and send it to the stat collector.
So, the value could be saved in the stat file, and could be
kept on restarting.

The statistics could be retreive with using
pg_stat_get_xlog_dirty_writes() function, and could be reset
with calling pg_stat_reset_shared('walwriter').

Now, I have one concern.

The reset time could be captured in globalStats.stat_reset_timestamp,
but this value is the same with the bgwriter one.

So, once pg_stat_reset_shared('walwriter') is called,
stats_reset column in pg_stat_bgwriter does represent
the reset time for walwriter, not for bgwriter.

How should we handle this?  Should we split this value?
And should we have new system view for walwriter?

Of course, I will work on documentation next.

Regards,



Cheers,

Jeff


--
Satoshi Nagayasu sn...@uptime.jp
Uptime Technologies, LLC. http://www.uptime.jp
diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index ff56c26..234d568 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -1518,6 +1518,7 @@ AdvanceXLInsertBuffer(bool new_segment)
WriteRqst.Write = OldPageRqstPtr;
WriteRqst.Flush = 0;
XLogWrite(WriteRqst, false, false);
+   WalWriterStats.m_xlog_dirty_writes++;
LWLockRelease(WALWriteLock);
TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
}
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 8389d5c..f031be1 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -125,6 +125,15 @@ char  *pgstat_stat_tmpname = NULL;
  */
 PgStat_MsgBgWriter BgWriterStats;
 
+/*
+ * WalWriter global statistics counter.
+ * Despite its name, this counter is actually used not only in walwriter,
+ * but also in each backend process to sum up xlog dirty writes.
+ * Those processes would increment this counter in each XLogWrite call,
+ * then send it to the stat collector process.
+ */
+PgStat_MsgWalWriter WalWriterStats;
+
 /* --
  * Local data
  * --
@@ -279,6 +288,7 @@ static void pgstat_recv_autovac(PgStat_MsgAutovacStart 
*msg, int len);
 static void pgstat_recv_vacuum(PgStat_MsgVacuum *msg, int len);
 static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
 static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
+static void pgstat_recv_walwriter(PgStat_MsgWalWriter *msg, int len);
 static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
 static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
 static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int 
len);
@@ -1188,6 +1198,8 @@ pgstat_reset_shared_counters(const char *target)
 
if (strcmp(target, bgwriter) == 0)
msg.m_resettarget = RESET_BGWRITER;
+   else if (strcmp(target, walwriter) == 0)
+   msg.m_resettarget = RESET_WALWRITER;
else
ereport(ERROR,
 

Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-08-21 Thread Robert Haas
On Sat, Aug 11, 2012 at 6:11 PM, Jeff Janes jeff.ja...@gmail.com wrote:
 However, I do think we will want to add a way to query for the time of
 the last reset, as other monitoring features are going that way.

That should be easy to add.

 Is it OK that the count is reset upon a server restart?

I think it's OK.  The reason why many of our stats are kept in the
stats file is because we have a limited amount of shared memory and
therefore can't guarantee (for example) that there's enough to keep
stats about EVERY table, since the number of tables is unlimited.
However, in cases where the data to be stored is fixed-size, and
especially when it's fixed-size and small, there's a lot of sense to
keeping the data in shared memory rather than sending stats collector
messages.  It's a lot less overhead, for one thing.  Maybe at some
point someone will want to devise a way to hibernate such stats to
disk at shutdown (or periodically) and reload them on startup, but it
doesn't seem like a must-have to me.

Other opinions may vary, of course.

 I'll marked it as waiting on author, for the documentation and reset
 time.

Yeah, we definitely need some documentation.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company


-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-08-11 Thread Jeff Janes
On Sat, Jul 28, 2012 at 3:33 PM, Jeff Janes jeff.ja...@gmail.com wrote:
 On Sat, Jul 7, 2012 at 9:17 PM, Satoshi Nagayasu sn...@uptime.jp wrote:
 Hi,

 Jeff Janes has pointed out that my previous patch could hold
 a number of the dirty writes only in single local backend, and
 it could not hold all over the cluster, because the counter
 was allocated in the local process memory.

 That's true, and I have fixed it with moving the counter into
 the shared memory, as a member of XLogCtlWrite, to keep total
 dirty writes in the cluster.


...

 The comment XLogCtrlWrite must be protected with WALWriteLock
 mis-spells XLogCtlWrite.

 The final patch will need to add a sections to the documentation.

Thanks to Robert and Tom for addressing my concerns about the pointer
volatility.

I think there is enough consensus that this is useful without adding
more things to it, like histograms or high water marks.

However, I do think we will want to add a way to query for the time of
the last reset, as other monitoring features are going that way.

Is it OK that the count is reset upon a server restart?
pg_stat_bgwriter, for example, does not do that.  Unfortunately I
think fixing this in an acceptable way will be harder than the entire
rest of the patch was.


The coding looks OK to me, it applies and builds, and passes make
check, and does what it says.  I didn't do performance testing, as it
is hard to believe it would have a meaningful effect.

I'll marked it as waiting on author, for the documentation and reset
time.  I'd ask a more senior hacker to comment on the durability over
restarts.

Cheers,

Jeff

-
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-08-01 Thread Robert Haas
On Tue, Jul 31, 2012 at 4:06 PM, Tom Lane t...@sss.pgh.pa.us wrote:
 Robert Haas robertmh...@gmail.com writes:
 IMHO, the way we have it now is kind of a mess.  SpinLockAcquire and
 SpinLockRelease are required to be CPU barriers, but they are not
 required to be compiler barriers.  If we changed that so that they
 were required to act as barriers of both flavors,

 Since they are macros, how do you propose to do that exactly?

Why does it matter that they are macros?

 I agree that volatile-izing everything in the vicinity is a sucky
 solution, but the last time we looked at this there did not seem to
 be a better one.

Well, Linux has a barrier() primitive which is defined as a
compiler-barrier, so I don't see why we shouldn't be able to manage
the same thing.  In fact, we've already got it, though it's presently
unused; see storage/barrier.h.

Looking over s_lock.h, it looks like TAS is typically defined using
__asm__ __volatile__, and the __asm__ is marked as clobbering memory.
As the fine comments say this prevents gcc from thinking it can cache
the values of shared-memory fields across the asm code, which is
another way of saying that it's a compiler barrier.  However, there's
no similar guard in S_UNLOCK, which is simply declared as a volatile
store, and therefore compiler ordering is guaranteed only with respect
to other volatile pointer references.  If we added something of the
form  __asm__ __volatile__( : : : memory) in there, it should
serve as a full compiler barrier.  That might have to go in a static
inline function as we do with TAS, but I think it should work.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-08-01 Thread Tom Lane
Robert Haas robertmh...@gmail.com writes:
 On Tue, Jul 31, 2012 at 4:06 PM, Tom Lane t...@sss.pgh.pa.us wrote:
 I agree that volatile-izing everything in the vicinity is a sucky
 solution, but the last time we looked at this there did not seem to
 be a better one.

 Well, Linux has a barrier() primitive which is defined as a
 compiler-barrier, so I don't see why we shouldn't be able to manage
 the same thing.  In fact, we've already got it, though it's presently
 unused; see storage/barrier.h.

Solving the problem for linux only, or gcc only, isn't going to get us
to a place where we can stop volatile-izing call sites.  We need to be
sure it works for every single case supported by s_lock.h.

I think you may be right that using __asm__ __volatile__ in gcc
S_UNLOCK cases would be a big step forward, but it needs more research
to see if that's the only fix needed.

regards, tom lane

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-08-01 Thread Robert Haas
On Wed, Aug 1, 2012 at 10:12 AM, Tom Lane t...@sss.pgh.pa.us wrote:
 Solving the problem for linux only, or gcc only, isn't going to get us
 to a place where we can stop volatile-izing call sites.  We need to be
 sure it works for every single case supported by s_lock.h.

Yep, that's the problem all right.

 I think you may be right that using __asm__ __volatile__ in gcc
 S_UNLOCK cases would be a big step forward, but it needs more research
 to see if that's the only fix needed.

I agree, but I will note that I have done a fair bit of research on
this already, and there are definitions in storage/barrier.h for
pg_compiler_barrier() that cover gcc, icc, HP's aCC, MSVC, and Borland
C.  There are probably other wacky compilers out there, though:
looking at the build farm, I see Sun Studio and sco cc as cases that
would likely need some attention.  Are there any compilers not
represented in the build-farm that we'd mind breaking?

If we can get working pg_compiler_barrier() definitions for all the
compilers we care about, the rest is probably mostly a question of
going through s_lock.h and inserting compiler barriers anywhere that
they aren't already implied by the existing code.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-07-31 Thread Robert Haas
On Sat, Jul 28, 2012 at 6:33 PM, Jeff Janes jeff.ja...@gmail.com wrote:
 A concern I have is whether the XLogCtlWrite *Write pointer needs to
 be declared volatile, to prevent the compiler from pushing operations
 on them outside of the locks (and so memory barriers) that formally
 protect them.  However I see that existing code with Insert also does
 not use volatile, so maybe my concern is baseless.  Perhaps the
 compiler guarantees to not move operations on pointers over the
 boundaries of function calls?  The pattern elsewhere in the code seems
 to be to use volatiles for things protected by spin-locks (implemented
 by macros) but not for things protected by LWLocks.

Yes, our code is only correct if we assume that the compiler performs
no global optimizations - i.e. no movement of code between functions.

IMHO, the way we have it now is kind of a mess.  SpinLockAcquire and
SpinLockRelease are required to be CPU barriers, but they are not
required to be compiler barriers.  If we changed that so that they
were required to act as barriers of both flavors, then (1) we wouldn't
need volatile in as many places, (2) we would be less prone to bugs
caused by the omission of not-obviously-necessary volatile markings,
and (3) we would remove one possible source of breakage that might be
induced by a globally optimizing compiler.  As things stand today,
making a previously-global function static could result in working
code breaking, because the static function might be inlined where the
global function wasn't.  Ouch.

Anyway, unless and until we make a definitional change of the sort
described above, any pointers used within a spinlock critical section
must be volatile; and pray that the compiler doesn't inline anything
you weren't expecting.

-- 
Robert Haas
EnterpriseDB: http://www.enterprisedb.com
The Enterprise PostgreSQL Company

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-07-31 Thread Tom Lane
Robert Haas robertmh...@gmail.com writes:
 IMHO, the way we have it now is kind of a mess.  SpinLockAcquire and
 SpinLockRelease are required to be CPU barriers, but they are not
 required to be compiler barriers.  If we changed that so that they
 were required to act as barriers of both flavors,

Since they are macros, how do you propose to do that exactly?

I agree that volatile-izing everything in the vicinity is a sucky
solution, but the last time we looked at this there did not seem to
be a better one.

regards, tom lane

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-07-29 Thread Simon Riggs
On 7 July 2012 18:06, Robert Haas robertmh...@gmail.com wrote:

 Sure, but I doubt that would be as informative as this.  It's no big deal if 
 you hit 100% every once in a while; what you really want to know is whether 
 it's happening once per second or once per week.

Agreed.

I can't see an easy way of recording the high water mark % and I'm not
sure how we'd use it if we had it.

Let's just track how often we run out of space because that is when
bad things happen, not before.

-- 
 Simon Riggs   http://www.2ndQuadrant.com/
 PostgreSQL Development, 24x7 Support, Training  Services

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-07-28 Thread Jeff Janes
On Sat, Jul 7, 2012 at 9:17 PM, Satoshi Nagayasu sn...@uptime.jp wrote:
 Hi,

 Jeff Janes has pointed out that my previous patch could hold
 a number of the dirty writes only in single local backend, and
 it could not hold all over the cluster, because the counter
 was allocated in the local process memory.

 That's true, and I have fixed it with moving the counter into
 the shared memory, as a member of XLogCtlWrite, to keep total
 dirty writes in the cluster.

A concern I have is whether the XLogCtlWrite *Write pointer needs to
be declared volatile, to prevent the compiler from pushing operations
on them outside of the locks (and so memory barriers) that formally
protect them.  However I see that existing code with Insert also does
not use volatile, so maybe my concern is baseless.  Perhaps the
compiler guarantees to not move operations on pointers over the
boundaries of function calls?  The pattern elsewhere in the code seems
to be to use volatiles for things protected by spin-locks (implemented
by macros) but not for things protected by LWLocks.

The comment XLogCtrlWrite must be protected with WALWriteLock
mis-spells XLogCtlWrite.

The final patch will need to add a sections to the documentation.

Cheers,

Jeff

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-07-07 Thread Euler Taveira
On 07-07-2012 09:00, Satoshi Nagayasu wrote:
 I've created new patch to get/reset statistics of WAL buffer
 writes (flushes) caused by WAL buffer full.
 
This new statistic doesn't solve your problem (tune wal_buffers). It doesn't
give you the wal_buffers value. It only says hey, I needed more buffers so I
write those dirty ones. It doesn't say how many. I would like to have
something that says hey, you have 1000 buffers available and  you are using
100 buffers (10%). This new statistic is only useful for decreasing the
WALWriteLock contention.


-- 
   Euler Taveira de Oliveira - Timbira   http://www.timbira.com.br/
   PostgreSQL: Consultoria, Desenvolvimento, Suporte 24x7 e Treinamento

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-07-07 Thread Satoshi Nagayasu
2012/07/07 22:07, Euler Taveira wrote:
 On 07-07-2012 09:00, Satoshi Nagayasu wrote:
 I've created new patch to get/reset statistics of WAL buffer
 writes (flushes) caused by WAL buffer full.

 This new statistic doesn't solve your problem (tune wal_buffers). It doesn't
 give you the wal_buffers value. It only says hey, I needed more buffers so I
 write those dirty ones. It doesn't say how many. I would like to have
 something that says hey, you have 1000 buffers available and  you are using
 100 buffers (10%). This new statistic is only useful for decreasing the
 WALWriteLock contention.

I agree with that it would not tell the exact number for wal_buffers,
but it would help DBA understand what's actually happening around WAL
buffers.

Also, decreasing the WALWriteLock contention is obviously important
for DBA in terms of improving database performance.

Actually, that's the reason why I'm working on another statistics. :)
http://archives.postgresql.org/pgsql-hackers/2012-06/msg01489.php

Regards,
-- 
Satoshi Nagayasu sn...@uptime.jp
Uptime Technologies, LLC. http://www.uptime.jp

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-07-07 Thread Robert Haas
On Jul 7, 2012, at 9:07 AM, Euler Taveira eu...@timbira.com wrote:
 On 07-07-2012 09:00, Satoshi Nagayasu wrote:
 I've created new patch to get/reset statistics of WAL buffer
 writes (flushes) caused by WAL buffer full.
 
 This new statistic doesn't solve your problem (tune wal_buffers). It doesn't
 give you the wal_buffers value. It only says hey, I needed more buffers so I
 write those dirty ones. It doesn't say how many. I would like to have
 something that says hey, you have 1000 buffers available and  you are using
 100 buffers (10%). This new statistic is only useful for decreasing the
 WALWriteLock contention.

The number of WAL buffers that you are using is going to change so quickly as 
to be utterly meaningless.  I don't really see that there's any statistic we 
could gather that would tell us how many WAL buffers are needed.  This patch 
seems like it's on the right track, at least telling you how often you're 
running out.

I'm interested to run some benchmarks with this; I think it could be quite 
informative.

...Robert
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-07-07 Thread Magnus Hagander
On Sat, Jul 7, 2012 at 3:52 PM, Robert Haas robertmh...@gmail.com wrote:
 On Jul 7, 2012, at 9:07 AM, Euler Taveira eu...@timbira.com wrote:
 On 07-07-2012 09:00, Satoshi Nagayasu wrote:
 I've created new patch to get/reset statistics of WAL buffer
 writes (flushes) caused by WAL buffer full.

 This new statistic doesn't solve your problem (tune wal_buffers). It doesn't
 give you the wal_buffers value. It only says hey, I needed more buffers so I
 write those dirty ones. It doesn't say how many. I would like to have
 something that says hey, you have 1000 buffers available and  you are using
 100 buffers (10%). This new statistic is only useful for decreasing the
 WALWriteLock contention.

 The number of WAL buffers that you are using is going to change so quickly as 
 to be utterly meaningless.  I don't really see that there's any statistic we 
 could gather that would tell us how many WAL buffers are needed.  This patch 
 seems like it's on the right track, at least telling you how often you're 
 running out.

We could keep a high watermark of what's the largest percentage we've
used, perhaps?

-- 
 Magnus Hagander
 Me: http://www.hagander.net/
 Work: http://www.redpill-linpro.com/

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-07-07 Thread Robert Haas
On Jul 7, 2012, at 8:54 AM, Magnus Hagander mag...@hagander.net wrote:
 On Sat, Jul 7, 2012 at 3:52 PM, Robert Haas robertmh...@gmail.com wrote:
 On Jul 7, 2012, at 9:07 AM, Euler Taveira eu...@timbira.com wrote:
 On 07-07-2012 09:00, Satoshi Nagayasu wrote:
 I've created new patch to get/reset statistics of WAL buffer
 writes (flushes) caused by WAL buffer full.
 
 This new statistic doesn't solve your problem (tune wal_buffers). It doesn't
 give you the wal_buffers value. It only says hey, I needed more buffers so 
 I
 write those dirty ones. It doesn't say how many. I would like to have
 something that says hey, you have 1000 buffers available and  you are using
 100 buffers (10%). This new statistic is only useful for decreasing the
 WALWriteLock contention.
 
 The number of WAL buffers that you are using is going to change so quickly 
 as to be utterly meaningless.  I don't really see that there's any statistic 
 we could gather that would tell us how many WAL buffers are needed.  This 
 patch seems like it's on the right track, at least telling you how often 
 you're running out.
 
 We could keep a high watermark of what's the largest percentage we've
 used, perhaps?

Sure, but I doubt that would be as informative as this.  It's no big deal if 
you hit 100% every once in a while; what you really want to know is whether 
it's happening once per second or once per week.

...Robert
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-07-07 Thread Magnus Hagander
On Sat, Jul 7, 2012 at 7:06 PM, Robert Haas robertmh...@gmail.com wrote:
 On Jul 7, 2012, at 8:54 AM, Magnus Hagander mag...@hagander.net wrote:
 On Sat, Jul 7, 2012 at 3:52 PM, Robert Haas robertmh...@gmail.com wrote:
 On Jul 7, 2012, at 9:07 AM, Euler Taveira eu...@timbira.com wrote:
 On 07-07-2012 09:00, Satoshi Nagayasu wrote:
 I've created new patch to get/reset statistics of WAL buffer
 writes (flushes) caused by WAL buffer full.

 This new statistic doesn't solve your problem (tune wal_buffers). It 
 doesn't
 give you the wal_buffers value. It only says hey, I needed more buffers 
 so I
 write those dirty ones. It doesn't say how many. I would like to have
 something that says hey, you have 1000 buffers available and  you are 
 using
 100 buffers (10%). This new statistic is only useful for decreasing the
 WALWriteLock contention.

 The number of WAL buffers that you are using is going to change so quickly 
 as to be utterly meaningless.  I don't really see that there's any 
 statistic we could gather that would tell us how many WAL buffers are 
 needed.  This patch seems like it's on the right track, at least telling 
 you how often you're running out.

 We could keep a high watermark of what's the largest percentage we've
 used, perhaps?

 Sure, but I doubt that would be as informative as this.  It's no big deal if 
 you hit 100% every once in a while; what you really want to know is whether 
 it's happening once per second or once per week.

I'm not suggesting one or the other, I'm suggesting that both values
might be interesting. Though in reality, you'd want that high
watermark to only count if it was the state for more than n, which
is a lot more difficult to get. so yeah, maybe that's overkill to even
try.


-- 
 Magnus Hagander
 Me: http://www.hagander.net/
 Work: http://www.redpill-linpro.com/

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers


Re: [HACKERS] New statistics for WAL buffer dirty writes

2012-07-07 Thread Satoshi Nagayasu
Hi,

Jeff Janes has pointed out that my previous patch could hold
a number of the dirty writes only in single local backend, and
it could not hold all over the cluster, because the counter
was allocated in the local process memory.

That's true, and I have fixed it with moving the counter into
the shared memory, as a member of XLogCtlWrite, to keep total
dirty writes in the cluster.

Regards,

2012/07/07 21:00, Satoshi Nagayasu wrote:
 Hi all,
 
 I've created new patch to get/reset statistics of WAL buffer
 writes (flushes) caused by WAL buffer full.
 
 This patch provides two new functions, pg_stat_get_xlog_dirty_write()
 and pg_stat_reset_xlog_dirty_write(), which have been designed to
 determine an appropriate value for WAL buffer size.
 
 If this counter is increasing in the production environment,
 it would mean that the WAL buffer size is too small to hold
 xlog records generated the transactions. So, you can increase
 your WAL buffer size to keep xlog records and to reduce WAL writes.
 
 I think this patch would not affect to WAL write performance,
 but still paying attention to it.
 
 Any comments or suggestions?
 
 Regards,
 
 ---
 [snaga@devvm03 src]$ psql -p 15432 postgres
 psql (9.3devel)
 Type help for help.
 
 postgres=# SELECT pg_stat_get_xlog_dirty_write();
   pg_stat_get_xlog_dirty_write
 --
  0
 (1 row)
 
 postgres=# \q
 [snaga@devvm03 src]$ pgbench -p 15432 -s 10 -c 32 -t 1000 postgres
 Scale option ignored, using pgbench_branches table count = 10
 starting vacuum...end.
 transaction type: TPC-B (sort of)
 scaling factor: 10
 query mode: simple
 number of clients: 32
 number of threads: 1
 number of transactions per client: 1000
 number of transactions actually processed: 32000/32000
 tps = 141.937738 (including connections establishing)
 tps = 142.123457 (excluding connections establishing)
 [snaga@devvm03 src]$ psql -p 15432 postgres
 psql (9.3devel)
 Type help for help.
 
 postgres=# SELECT pg_stat_get_xlog_dirty_write();
   pg_stat_get_xlog_dirty_write
 --
  0
 (1 row)
 
 postgres=# begin;
 BEGIN
 postgres=# DELETE FROM pgbench_accounts;
 DELETE 100
 postgres=# commit;
 COMMIT
 postgres=# SELECT pg_stat_get_xlog_dirty_write();
   pg_stat_get_xlog_dirty_write
 --
  19229
 (1 row)
 
 postgres=# SELECT pg_stat_reset_xlog_dirty_write();
   pg_stat_reset_xlog_dirty_write
 
 
 (1 row)
 
 postgres=# SELECT pg_stat_get_xlog_dirty_write();
   pg_stat_get_xlog_dirty_write
 --
  0
 (1 row)
 
 postgres=# \q
 [snaga@devvm03 src]$
 ---
 
 


-- 
Satoshi Nagayasu sn...@uptime.jp
Uptime Technologies, LLC. http://www.uptime.jp
diff --git a/src/backend/access/transam/xlog.c 
b/src/backend/access/transam/xlog.c
index 642c129..893acf8 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -370,6 +370,11 @@ typedef struct XLogCtlWrite
 {
int curridx;/* cache index of next 
block to write */
pg_time_t   lastSegSwitchTime;  /* time of last xlog 
segment switch */
+
+   /*
+* Counter for WAL dirty buffer writes.
+*/
+   uint64  WalBufferWriteDirtyCount;
 } XLogCtlWrite;
 
 /*
@@ -1504,6 +1509,8 @@ AdvanceXLInsertBuffer(bool new_segment)
}
else
{
+   XLogCtlWrite *Write = XLogCtl-Write;
+
/*
 * Have to write buffers while holding insert 
lock. This is
 * not good, so only write as much as we 
absolutely must.
@@ -1512,6 +1519,10 @@ AdvanceXLInsertBuffer(bool new_segment)
WriteRqst.Write = OldPageRqstPtr;
WriteRqst.Flush = 0;
XLogWrite(WriteRqst, false, false);
+   /*
+* XLogCtrlWrite must be protected with 
WALWriteLock.
+*/
+   Write-WalBufferWriteDirtyCount++;
LWLockRelease(WALWriteLock);
TRACE_POSTGRESQL_WAL_BUFFER_WRITE_DIRTY_DONE();
}
@@ -10492,3 +10503,26 @@ SetWalWriterSleeping(bool sleeping)
xlogctl-WalWriterSleeping = sleeping;
SpinLockRelease(xlogctl-info_lck);
 }
+
+uint64
+xlog_dirty_write_counter_get()
+{
+   XLogCtlWrite *Write = XLogCtl-Write;
+   uint64 count;
+
+   LWLockAcquire(WALWriteLock, LW_SHARED);
+   count = Write-WalBufferWriteDirtyCount;
+