Hello,

At Wed, 20 Apr 2016 15:14:16 +0200, Petr Jelinek <p...@2ndquadrant.com> wrote 
in <571780a8.4070...@2ndquadrant.com>
> I noticed sporadic segfaults when selecting from pg_stat_activity on
> current HEAD.
> 
> The culprit is the 53be0b1add7064ca5db3cd884302dfc3268d884e commit
> which added more wait info into the pg_stat_get_activity(). More
> specifically, the following code is broken:
> 
> +                       proc = BackendPidGetProc(beentry->st_procpid);
> + wait_event_type = pgstat_get_wait_event_type(proc->wait_event_info);
> 
> This needs to check if proc is NULL. When reading the code I noticed
> that the new functions pg_stat_get_backend_wait_event_type() and
> pg_stat_get_backend_wait_event() suffer from the same problem.

Good catch.

> Here is PoC patch which fixes the problem. I am wondering if we should
> raise warning in the pg_stat_get_backend_wait_event_type() and
> pg_stat_get_backend_wait_event() like the pg_signal_backend() does
> when proc is NULL instead of just returning NULL which is what this
> patch does though.

It still makes the two relevant columns in pg_stat_activity
inconsistent each other since it reads the procarray entry twice
without a lock on procarray.

The attached patch adds pgstat_get_wait_event_info to read
wait_event_info in more appropriate way. Then change
pg_stat_get_wait_event(_type) to take the wait_event_info.

Does this work for you?

We still may have an inconsistency between weit_event and query,
or beentry itself but preventing it would need to have local
copies of wait_event_info of all corresponding entries in
procarray, which will be overdone.

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 41f4374..999b7e7 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -55,6 +55,7 @@
 #include "storage/latch.h"
 #include "storage/lmgr.h"
 #include "storage/pg_shmem.h"
+#include "storage/procarray.h"
 #include "storage/procsignal.h"
 #include "storage/sinvaladt.h"
 #include "utils/ascii.h"
@@ -3118,6 +3119,27 @@ pgstat_read_current_status(void)
 }
 
 /* ----------
+ * pgstat_get_wait_event_info() -
+ *
+ *	Return the wait_event_info for a procid. 0 if the proc is no longer
+ *	living or has no waiting event.
+ */
+uint32
+pgstat_get_wait_event_info(int procpid)
+{
+	uint32		ret = 0;
+	PGPROC	   *proc;
+
+	LWLockAcquire(ProcArrayLock, LW_SHARED);
+	proc = BackendPidGetProcWithLock(procpid);
+	if (proc)
+		ret = proc->wait_event_info;
+	LWLockRelease(ProcArrayLock);
+
+	return ret;
+}
+
+/* ----------
  * pgstat_get_wait_event_type() -
  *
  *	Return a string representing the current wait event type, backend is
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index 64c4cc4..72776ab 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -679,7 +679,7 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 		bool		nulls[PG_STAT_GET_ACTIVITY_COLS];
 		LocalPgBackendStatus *local_beentry;
 		PgBackendStatus *beentry;
-		PGPROC	   *proc;
+		uint32		wait_event_info;
 		const char *wait_event_type;
 		const char *wait_event;
 
@@ -716,6 +716,14 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 			continue;
 		}
 
+		/*
+		 * Read wait event. This may be inconsistent with the beentry when the
+		 * corresponding procarray entry has been removed or modified after
+		 * the beentry was copied, but we don't need such strict consistency
+		 * for this information.
+		 */
+		wait_event_info = pgstat_get_wait_event_info(beentry->st_procpid);
+
 		/* Values available to all callers */
 		values[0] = ObjectIdGetDatum(beentry->st_databaseid);
 		values[1] = Int32GetDatum(beentry->st_procpid);
@@ -782,14 +790,13 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 
 			values[5] = CStringGetTextDatum(beentry->st_activity);
 
-			proc = BackendPidGetProc(beentry->st_procpid);
-			wait_event_type = pgstat_get_wait_event_type(proc->wait_event_info);
+			wait_event_type = pgstat_get_wait_event_type(wait_event_info);
 			if (wait_event_type)
 				values[6] = CStringGetTextDatum(wait_event_type);
 			else
 				nulls[6] = true;
 
-			wait_event = pgstat_get_wait_event(proc->wait_event_info);
+			wait_event = pgstat_get_wait_event(wait_event_info);
 			if (wait_event)
 				values[7] = CStringGetTextDatum(wait_event);
 			else
@@ -983,7 +990,6 @@ pg_stat_get_backend_wait_event_type(PG_FUNCTION_ARGS)
 {
 	int32		beid = PG_GETARG_INT32(0);
 	PgBackendStatus *beentry;
-	PGPROC	   *proc;
 	const char *wait_event_type;
 
 	if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL)
@@ -992,8 +998,8 @@ pg_stat_get_backend_wait_event_type(PG_FUNCTION_ARGS)
 		wait_event_type = "<insufficient privilege>";
 	else
 	{
-		proc = BackendPidGetProc(beentry->st_procpid);
-		wait_event_type = pgstat_get_wait_event_type(proc->wait_event_info);
+		wait_event_type = pgstat_get_wait_event_type(
+			pgstat_get_wait_event_info(beentry->st_procpid));
 	}
 
 	if (!wait_event_type)
@@ -1007,7 +1013,6 @@ pg_stat_get_backend_wait_event(PG_FUNCTION_ARGS)
 {
 	int32		beid = PG_GETARG_INT32(0);
 	PgBackendStatus *beentry;
-	PGPROC	   *proc;
 	const char *wait_event;
 
 	if ((beentry = pgstat_fetch_stat_beentry(beid)) == NULL)
@@ -1016,8 +1021,8 @@ pg_stat_get_backend_wait_event(PG_FUNCTION_ARGS)
 		wait_event = "<insufficient privilege>";
 	else
 	{
-		proc = BackendPidGetProc(beentry->st_procpid);
-		wait_event = pgstat_get_wait_event(proc->wait_event_info);
+		wait_event = pgstat_get_wait_event(
+			pgstat_get_wait_event_info(beentry->st_procpid));
 	}
 
 	if (!wait_event)
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 4be09fe..228e865 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -969,6 +969,7 @@ extern void pgstat_report_activity(BackendState state, const char *cmd_str);
 extern void pgstat_report_tempfile(size_t filesize);
 extern void pgstat_report_appname(const char *appname);
 extern void pgstat_report_xact_timestamp(TimestampTz tstamp);
+extern uint32 pgstat_get_wait_event_info(int procpid);
 extern const char *pgstat_get_wait_event(uint32 wait_event_info);
 extern const char *pgstat_get_wait_event_type(uint32 wait_event_info);
 extern const char *pgstat_get_backend_current_activity(int pid, bool checkUser);
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to