From c4f47ddf81acb95e465c488057aa8701f5614747 Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm@gmail.com>
Date: Fri, 12 Feb 2021 14:06:44 +0100
Subject: [PATCH v11 1/3] Add progress-reported components for COPY progress
 reporting

The command, io target and excluded tuple count (by the
COPY ... FROM ... WHERE -clause) are now reported in the pg_stat_progress_copy
view. Additionally, the column lines_processed is renamed to tuples_processed
to disambiguate the meaning of this column in cases of CSV and BINARY copies
and to stay consistent with regards to names in the pg_stat_progress_*-views.

Of special interest is the reporting of io_type, with which we can
distinguish logical replications' initial table synchronization workers'
progress without having to join the pg_stat_activity view.
---
 doc/src/sgml/monitoring.sgml         | 42 +++++++++++++++++++++++++---
 src/backend/catalog/system_views.sql | 11 +++++++-
 src/backend/commands/copyfrom.c      | 27 ++++++++++++++++--
 src/backend/commands/copyto.c        | 22 ++++++++++++---
 src/include/commands/progress.h      | 15 +++++++++-
 src/test/regress/expected/rules.out  | 15 +++++++++-
 6 files changed, 118 insertions(+), 14 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 3513e127b7..c10de3406d 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -6541,8 +6541,32 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
        <structfield>relid</structfield> <type>oid</type>
       </para>
       <para>
-       OID of the table on which the <command>COPY</command> command is executed.
-       It is set to 0 if copying from a <command>SELECT</command> query.
+       OID of the table on which the <command>COPY</command> command is being
+       executed. It is set to 0 if copying from a <command>SELECT</command>
+       query.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>command</structfield> <type>text</type>
+      </para>
+      <para>
+       The command that is running: <literal>COPY FROM</literal>, or
+       <literal>COPY TO</literal>.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>io_type</structfield> <type>text</type>
+      </para>
+      <para>
+       The io type that the data is read from or written to:
+       <literal>FILE</literal>, <literal>PROGRAM</literal>,
+       <literal>STDIO</literal> (for <command>COPY FROM STDIN</command> and
+       <command>COPY TO STDOUT</command>), or <literal>CALLBACK</literal>
+       (used in the table synchronization background worker).
       </para></entry>
      </row>
 
@@ -6567,10 +6591,20 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
 
      <row>
       <entry role="catalog_table_entry"><para role="column_definition">
-       <structfield>lines_processed</structfield> <type>bigint</type>
+       <structfield>tuples_processed</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of tuples already processed by <command>COPY</command> command.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>tuples_excluded</structfield> <type>bigint</type>
       </para>
       <para>
-       Number of lines already processed by <command>COPY</command> command.
+       Number of tuples not processed because they were excluded by the
+       <command>WHERE</command> clause of the <command>COPY</command> command.
       </para></entry>
      </row>
     </tbody>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index fc94a73a54..d94714adb0 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1130,9 +1130,18 @@ CREATE VIEW pg_stat_progress_copy AS
     SELECT
         S.pid AS pid, S.datid AS datid, D.datname AS datname,
         S.relid AS relid,
+        CASE S.param5 WHEN 1 THEN 'COPY FROM'
+                      WHEN 2 THEN 'COPY TO'
+                      END AS command,
+        CASE S.param6 WHEN 1 THEN 'FILE'
+                      WHEN 2 THEN 'PROGRAM'
+                      WHEN 3 THEN 'STDIO'
+                      WHEN 4 THEN 'CALLBACK'
+                      END AS io_type,
         S.param1 AS bytes_processed,
         S.param2 AS bytes_total,
-        S.param3 AS lines_processed
+        S.param3 AS tuples_processed,
+        S.param4 AS tuples_excluded
     FROM pg_stat_get_progress_info('COPY') AS S
         LEFT JOIN pg_database D ON S.datid = D.oid;
 
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index f05e2d2347..72b71f5058 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -539,7 +539,8 @@ CopyFrom(CopyFromState cstate)
 	BulkInsertState bistate = NULL;
 	CopyInsertMethod insertMethod;
 	CopyMultiInsertInfo multiInsertInfo = {0};	/* pacify compiler */
-	uint64		processed = 0;
+	int64		processed = 0;
+	int64		excluded = 0;
 	bool		has_before_insert_row_trig;
 	bool		has_instead_insert_row_trig;
 	bool		leafpart_use_multi_insert = false;
@@ -869,7 +870,11 @@ CopyFrom(CopyFromState cstate)
 			econtext->ecxt_scantuple = myslot;
 			/* Skip items that don't match COPY's WHERE clause */
 			if (!ExecQual(cstate->qualexpr, econtext))
+			{
+				/* Report that this tuple was filtered out by the WHERE clause */
+				pgstat_progress_update_param(PROGRESS_COPY_TUPLES_EXCLUDED, ++excluded);
 				continue;
+			}
 		}
 
 		/* Determine the partition to insert the tuple into */
@@ -1107,7 +1112,7 @@ CopyFrom(CopyFromState cstate)
 			 * for counting tuples inserted by an INSERT command. Update
 			 * progress of the COPY command as well.
 			 */
-			pgstat_progress_update_param(PROGRESS_COPY_LINES_PROCESSED, ++processed);
+			pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, ++processed);
 		}
 	}
 
@@ -1193,6 +1198,16 @@ BeginCopyFrom(ParseState *pstate,
 	ExprState **defexprs;
 	MemoryContext oldcontext;
 	bool		volatile_defexprs;
+	const int	progress_cols[] = {
+		PROGRESS_COPY_COMMAND,
+		PROGRESS_COPY_IO_TYPE,
+		PROGRESS_COPY_BYTES_TOTAL
+	};
+	int64		progress_vals[] = {
+		PROGRESS_COPY_COMMAND_FROM,
+		0,
+		0
+	};
 
 	/* Allocate workspace and zero all fields */
 	cstate = (CopyFromStateData *) palloc0(sizeof(CopyFromStateData));
@@ -1430,11 +1445,13 @@ BeginCopyFrom(ParseState *pstate,
 
 	if (data_source_cb)
 	{
+		progress_vals[1] = PROGRESS_COPY_IO_TYPE_CALLBACK;
 		cstate->copy_src = COPY_CALLBACK;
 		cstate->data_source_cb = data_source_cb;
 	}
 	else if (pipe)
 	{
+		progress_vals[1] = PROGRESS_COPY_IO_TYPE_STDIO;
 		Assert(!is_program);	/* the grammar does not allow this */
 		if (whereToSendOutput == DestRemote)
 			ReceiveCopyBegin(cstate);
@@ -1447,6 +1464,7 @@ BeginCopyFrom(ParseState *pstate,
 
 		if (cstate->is_program)
 		{
+			progress_vals[1] = PROGRESS_COPY_IO_TYPE_PROGRAM;
 			cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_R);
 			if (cstate->copy_file == NULL)
 				ereport(ERROR,
@@ -1458,6 +1476,7 @@ BeginCopyFrom(ParseState *pstate,
 		{
 			struct stat st;
 
+			progress_vals[1] = PROGRESS_COPY_IO_TYPE_FILE;
 			cstate->copy_file = AllocateFile(cstate->filename, PG_BINARY_R);
 			if (cstate->copy_file == NULL)
 			{
@@ -1484,10 +1503,12 @@ BeginCopyFrom(ParseState *pstate,
 						(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 						 errmsg("\"%s\" is a directory", cstate->filename)));
 
-			pgstat_progress_update_param(PROGRESS_COPY_BYTES_TOTAL, st.st_size);
+			progress_vals[2] = st.st_size;
 		}
 	}
 
+	pgstat_progress_update_multi_param(3, progress_cols, progress_vals);
+
 	if (cstate->opts.binary)
 	{
 		/* Read and verify binary header */
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index 46155015cf..1d4bdabb3a 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -353,6 +353,14 @@ BeginCopyTo(ParseState *pstate,
 	TupleDesc	tupDesc;
 	int			num_phys_attrs;
 	MemoryContext oldcontext;
+	const int	progress_cols[] = {
+		PROGRESS_COPY_COMMAND,
+		PROGRESS_COPY_IO_TYPE
+	};
+	int64		progress_vals[] = {
+		PROGRESS_COPY_COMMAND_TO,
+		0
+	};
 
 	if (rel != NULL && rel->rd_rel->relkind != RELKIND_RELATION)
 	{
@@ -659,6 +667,8 @@ BeginCopyTo(ParseState *pstate,
 
 	if (pipe)
 	{
+		progress_vals[1] = PROGRESS_COPY_IO_TYPE_STDIO;
+
 		Assert(!is_program);	/* the grammar does not allow this */
 		if (whereToSendOutput != DestRemote)
 			cstate->copy_file = stdout;
@@ -670,6 +680,7 @@ BeginCopyTo(ParseState *pstate,
 
 		if (is_program)
 		{
+			progress_vals[1] = PROGRESS_COPY_IO_TYPE_PROGRAM;
 			cstate->copy_file = OpenPipeStream(cstate->filename, PG_BINARY_W);
 			if (cstate->copy_file == NULL)
 				ereport(ERROR,
@@ -682,6 +693,7 @@ BeginCopyTo(ParseState *pstate,
 			mode_t		oumask; /* Pre-existing umask value */
 			struct stat st;
 
+			progress_vals[1] = PROGRESS_COPY_IO_TYPE_FILE;
 			/*
 			 * Prevent write to relative path ... too easy to shoot oneself in
 			 * the foot by overwriting a database file ...
@@ -731,6 +743,8 @@ BeginCopyTo(ParseState *pstate,
 	/* initialize progress */
 	pgstat_progress_start_command(PROGRESS_COMMAND_COPY,
 								  cstate->rel ? RelationGetRelid(cstate->rel) : InvalidOid);
+	pgstat_progress_update_multi_param(2, progress_cols, progress_vals);
+
 	cstate->bytes_processed = 0;
 
 	MemoryContextSwitchTo(oldcontext);
@@ -881,8 +895,8 @@ DoCopyTo(CopyToState cstate)
 			/* Format and send the data */
 			CopyOneRowTo(cstate, slot);
 
-			/* Increment amount of processed tuples and update the progress */
-			pgstat_progress_update_param(PROGRESS_COPY_LINES_PROCESSED, ++processed);
+			/* Increment the number of processed tuples, and report the progress */
+			pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, ++processed);
 		}
 
 		ExecDropSingleTupleTableSlot(slot);
@@ -1251,8 +1265,8 @@ copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
 	/* Send the data */
 	CopyOneRowTo(cstate, slot);
 
-	/* Increment amount of processed tuples and update the progress */
-	pgstat_progress_update_param(PROGRESS_COPY_LINES_PROCESSED, ++myState->processed);
+	/* Increment the number of processed tuples, and report the progress */
+	pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, ++myState->processed);
 
 	return true;
 }
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index 95ec5d02e9..508b562d92 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -136,6 +136,19 @@
 /* Commands of PROGRESS_COPY */
 #define PROGRESS_COPY_BYTES_PROCESSED 0
 #define PROGRESS_COPY_BYTES_TOTAL 1
-#define PROGRESS_COPY_LINES_PROCESSED 2
+#define PROGRESS_COPY_TUPLES_PROCESSED 2
+#define PROGRESS_COPY_TUPLES_EXCLUDED 3
+#define PROGRESS_COPY_COMMAND 4
+#define PROGRESS_COPY_IO_TYPE 5
+
+/* Commands of PROGRESS_COPY_COMMAND */
+#define PROGRESS_COPY_COMMAND_FROM 1
+#define PROGRESS_COPY_COMMAND_TO 2
+
+/* Types of PROGRESS_COPY_INOUT_TYPE */
+#define PROGRESS_COPY_IO_TYPE_FILE 1
+#define PROGRESS_COPY_IO_TYPE_PROGRAM 2
+#define PROGRESS_COPY_IO_TYPE_STDIO 3
+#define PROGRESS_COPY_IO_TYPE_CALLBACK 4
 
 #endif
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index b1c9b7bdfe..ace98af739 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1950,9 +1950,22 @@ pg_stat_progress_copy| SELECT s.pid,
     s.datid,
     d.datname,
     s.relid,
+        CASE s.param5
+            WHEN 1 THEN 'COPY FROM'::text
+            WHEN 2 THEN 'COPY TO'::text
+            ELSE NULL::text
+        END AS command,
+        CASE s.param6
+            WHEN 1 THEN 'FILE'::text
+            WHEN 2 THEN 'PROGRAM'::text
+            WHEN 3 THEN 'STDIO'::text
+            WHEN 4 THEN 'CALLBACK'::text
+            ELSE NULL::text
+        END AS io_type,
     s.param1 AS bytes_processed,
     s.param2 AS bytes_total,
-    s.param3 AS lines_processed
+    s.param3 AS tuples_processed,
+    s.param4 AS tuples_excluded
    FROM (pg_stat_get_progress_info('COPY'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
      LEFT JOIN pg_database d ON ((s.datid = d.oid)));
 pg_stat_progress_create_index| SELECT s.pid,
-- 
2.20.1

