On Wed, 6 Jan 2021 at 22:45, Tomas Vondra <tomas.von...@enterprisedb.com> wrote:
>
> On 1/5/21 11:02 AM, Josef Šimánek wrote:
> > I'm attaching the whole patch since commitfest failed to ingest the
> > last incremental on CI.
> >
>
> Yeah, the whole patch needs to be attached for the commitfest tester to
> work correctly - it can't apply pieces from multiple messages, etc.
>
> Anyway, I pushed this last version of patch, after a couple more tweaks,
> mainly to the docs - one place used pg_stat_copy_progress, the section
> was not indexed properly, and so on.

Thank you all, I'd love to use this in the future to keep track of
(e.g.) my backup/restore progress.

For my previously mentioned extension to keep track of filtered tuples
that are excluded by the WHERE-clause, PFA patch 0001 that keeps track
of that, in line with the current column name style of lines.

If so desired, I'll split this off into a different thread & CF entry.

> I see Matthias proposed to change "lines" to "tuples" - I only saw the
> message after pushing, but I probably wouldn't make that change anyway.
> The CSV docs seem to talk about lines, newlines etc. so it seems fine.
> If not, we can change that.

The CSV docs, sure. But copy doesn't only process CSVs; it also has
text (which does have a # lines = # tuples / rows guarantee) and
binary (in which the 'line' vocabulary doesn't make sense, and in
which the 'tuples' vocabulary is used). Additionally, most mentions of
postgres' logical rows/tuples in the COPY documentation use the 'rows'
terminology ('tuples' for FORMAT BINARY), and use 'line' only for
external format's textual representation's strings delimited by
newlines (which I believe is not exactly what we're counting).

One common user of COPY is the pg_dump tool, and that uses binary
format by default (iirc).

Additionally, all comments surrounding the *LINES_PROCESSED updates
only mention 'tuples', so I'd like to strongly suggest (a variant of)
attached patch 0002 to keep the vocabulary consistent by using
'tuples' instead of 'lines'.


With regards,

Matthias van de Meent
From d988c86d60d2b49b52e547b42c644493a377d44a Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postg...@gmail.com>
Date: Thu, 7 Jan 2021 16:39:57 +0100
Subject: [PATCH v1 1/2] Add progress reporting for filtered rows.

COPY ... FROM ... WHERE (condition) may drop rows, which we now register as
explicitly being dropped.
---
 doc/src/sgml/monitoring.sgml         | 10 ++++++++++
 src/backend/catalog/system_views.sql |  3 ++-
 src/backend/commands/copyfrom.c      |  5 +++++
 src/include/commands/progress.h      |  1 +
 4 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 43fe8ae383..bf18ea23bd 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -6498,6 +6498,16 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
        Number of lines already processed by <command>COPY</command> command.
       </para></entry>
      </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>lines_filtered</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of lines not processed due to being filtered by the
+       <command>WHERE</command> clause of the <command>COPY</command> command.
+      </para></entry>
+     </row>
     </tbody>
    </tgroup>
   </table>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 5d89e77dbe..83d62c5286 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1124,7 +1124,8 @@ CREATE VIEW pg_stat_progress_copy AS
         S.relid AS relid,
         S.param1 AS bytes_processed,
         S.param2 AS bytes_total,
-        S.param3 AS lines_processed
+        S.param3 AS lines_processed,
+        S.param4 AS lines_filtered
     FROM pg_stat_get_progress_info('COPY') AS S
         LEFT JOIN pg_database D ON S.datid = D.oid;
 
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 08b6f782c7..212f4c67d6 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -540,6 +540,7 @@ CopyFrom(CopyFromState cstate)
 	CopyInsertMethod insertMethod;
 	CopyMultiInsertInfo multiInsertInfo = {0};	/* pacify compiler */
 	uint64		processed = 0;
+	uint64		filtered = 0;
 	bool		has_before_insert_row_trig;
 	bool		has_instead_insert_row_trig;
 	bool		leafpart_use_multi_insert = false;
@@ -868,7 +869,11 @@ CopyFrom(CopyFromState cstate)
 			econtext->ecxt_scantuple = myslot;
 			/* Skip items that don't match COPY's WHERE clause */
 			if (!ExecQual(cstate->qualexpr, econtext))
+			{
+				/* Report that this tuple was filtered out by the WHERE clause */
+				pgstat_progress_update_param(PROGRESS_COPY_LINES_FILTERED, ++filtered);
 				continue;
+			}
 		}
 
 		/* Determine the partition to insert the tuple into */
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index 95ec5d02e9..eb0f4e70d6 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -137,5 +137,6 @@
 #define PROGRESS_COPY_BYTES_PROCESSED 0
 #define PROGRESS_COPY_BYTES_TOTAL 1
 #define PROGRESS_COPY_LINES_PROCESSED 2
+#define PROGRESS_COPY_LINES_FILTERED 3
 
 #endif
-- 
2.20.1

From 8c071aa1757d3471af2868a6c300106241e7f6ed Mon Sep 17 00:00:00 2001
From: Matthias van de Meent <boekewurm+postg...@gmail.com>
Date: Thu, 7 Jan 2021 16:51:30 +0100
Subject: [PATCH v1 2/2] Rename lines* to tuples* in COPY progress reporting
 view

This pulls the names of the columns in line with the other progress
reporting views and the comments surrounding the code that updates the columns.

It also makes it clear to the user that for the foreign data consumed or
produced in the COPY command, the line count need not be related to these
columns.
---
 doc/src/sgml/monitoring.sgml         | 8 ++++----
 src/backend/catalog/system_views.sql | 4 ++--
 src/backend/commands/copyfrom.c      | 4 ++--
 src/backend/commands/copyto.c        | 4 ++--
 src/include/commands/progress.h      | 4 ++--
 5 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index bf18ea23bd..f9b474ae0b 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -6492,19 +6492,19 @@ SELECT pg_stat_get_backend_pid(s.backendid) AS pid,
 
      <row>
       <entry role="catalog_table_entry"><para role="column_definition">
-       <structfield>lines_processed</structfield> <type>bigint</type>
+       <structfield>tuples_processed</structfield> <type>bigint</type>
       </para>
       <para>
-       Number of lines already processed by <command>COPY</command> command.
+       Number of tuples already processed by <command>COPY</command> command.
       </para></entry>
      </row>
 
      <row>
       <entry role="catalog_table_entry"><para role="column_definition">
-       <structfield>lines_filtered</structfield> <type>bigint</type>
+       <structfield>tuples_filtered</structfield> <type>bigint</type>
       </para>
       <para>
-       Number of lines not processed due to being filtered by the
+       Number of tuples not processed due to being filtered by the
        <command>WHERE</command> clause of the <command>COPY</command> command.
       </para></entry>
      </row>
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 83d62c5286..3e73b2435a 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1124,8 +1124,8 @@ CREATE VIEW pg_stat_progress_copy AS
         S.relid AS relid,
         S.param1 AS bytes_processed,
         S.param2 AS bytes_total,
-        S.param3 AS lines_processed,
-        S.param4 AS lines_filtered
+        S.param3 AS tuples_processed,
+        S.param4 AS tuples_filtered
     FROM pg_stat_get_progress_info('COPY') AS S
         LEFT JOIN pg_database D ON S.datid = D.oid;
 
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 212f4c67d6..d3eedc4194 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -871,7 +871,7 @@ CopyFrom(CopyFromState cstate)
 			if (!ExecQual(cstate->qualexpr, econtext))
 			{
 				/* Report that this tuple was filtered out by the WHERE clause */
-				pgstat_progress_update_param(PROGRESS_COPY_LINES_FILTERED, ++filtered);
+				pgstat_progress_update_param(PROGRESS_COPY_TUPLES_FILTERED, ++filtered);
 				continue;
 			}
 		}
@@ -1110,7 +1110,7 @@ CopyFrom(CopyFromState cstate)
 			 * for counting tuples inserted by an INSERT command. Update
 			 * progress of the COPY command as well.
 			 */
-			pgstat_progress_update_param(PROGRESS_COPY_LINES_PROCESSED, ++processed);
+			pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, ++processed);
 		}
 	}
 
diff --git a/src/backend/commands/copyto.c b/src/backend/commands/copyto.c
index e04ec1e331..9ffe7a6ee3 100644
--- a/src/backend/commands/copyto.c
+++ b/src/backend/commands/copyto.c
@@ -954,7 +954,7 @@ CopyTo(CopyToState cstate)
 			CopyOneRowTo(cstate, slot);
 
 			/* Increment amount of processed tuples and update the progress */
-			pgstat_progress_update_param(PROGRESS_COPY_LINES_PROCESSED, ++processed);
+			pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, ++processed);
 		}
 
 		ExecDropSingleTupleTableSlot(slot);
@@ -1321,7 +1321,7 @@ copy_dest_receive(TupleTableSlot *slot, DestReceiver *self)
 	CopyOneRowTo(cstate, slot);
 
 	/* Increment amount of processed tuples and update the progress */
-	pgstat_progress_update_param(PROGRESS_COPY_LINES_PROCESSED, ++myState->processed);
+	pgstat_progress_update_param(PROGRESS_COPY_TUPLES_PROCESSED, ++myState->processed);
 
 	return true;
 }
diff --git a/src/include/commands/progress.h b/src/include/commands/progress.h
index eb0f4e70d6..31dfcc0094 100644
--- a/src/include/commands/progress.h
+++ b/src/include/commands/progress.h
@@ -136,7 +136,7 @@
 /* Commands of PROGRESS_COPY */
 #define PROGRESS_COPY_BYTES_PROCESSED 0
 #define PROGRESS_COPY_BYTES_TOTAL 1
-#define PROGRESS_COPY_LINES_PROCESSED 2
-#define PROGRESS_COPY_LINES_FILTERED 3
+#define PROGRESS_COPY_TUPLES_PROCESSED 2
+#define PROGRESS_COPY_TUPLES_FILTERED 3
 
 #endif
-- 
2.20.1

Reply via email to