Although v7 patch doesn't have commit messages on the patch, I think leave commit message is good for reviewers.

Sure, didn't notice it. Added the commit message to the updated patch.


  * Note: DON'T convert this error to "soft" style (errsave/ereturn). We
  * want this data type to stay permanently in the hard-error world so that
  * it can be used for testing that such cases still work reasonably.

From this point of view, I think this is a supposed way of using widget.

I agree, it's a good approach for checking datatype errors, because that's what was intended.


OTOH widget is declared in create_type.sql and I'm not sure it's ok to use it in another test copy2.sql.

I think that other regress tests with 'widget' type that will be created in the future can be not only in the create_type.sql. So it's not a problem that some type or function is taken from another regress test. For example, the table 'onek' is used in many regress tests.


Regards,

Damir Belyalov

Postgres Professional
From 0e1193e00bb5ee810a015a2baaf7c79e395a54c7 Mon Sep 17 00:00:00 2001
From: Damir Belyalov <dam.be...@gmail.com>
Date: Fri, 15 Sep 2023 11:14:57 +0300
Subject: [PATCH v7] Add new COPY option IGNORE_DATATYPE_ERRORS

Currently entire COPY fails even when there is one unexpected data
regarding data type or range.
IGNORE_DATATYPE_ERRORS ignores these errors and skips them and COPY
data which don't contain problem.

This patch uses the soft error handling infrastructure, which is
introduced by d9f7f5d32f20.

Author: Damir Belyalov, Atsushi Torikoshi

---
 doc/src/sgml/ref/copy.sgml               | 13 +++++++++
 src/backend/commands/copy.c              | 13 +++++++++
 src/backend/commands/copyfrom.c          | 37 ++++++++++++++++++++++++
 src/backend/commands/copyfromparse.c     | 20 ++++++++++---
 src/bin/psql/tab-complete.c              |  3 +-
 src/include/commands/copy.h              |  1 +
 src/include/commands/copyfrom_internal.h |  3 ++
 src/test/regress/expected/copy2.out      | 28 ++++++++++++++++++
 src/test/regress/sql/copy2.sql           | 26 +++++++++++++++++
 9 files changed, 139 insertions(+), 5 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 4d614a0225..d5cdbb4025 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -43,6 +43,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     FORCE_QUOTE { ( <replaceable class="parameter">column_name</replaceable> [, ...] ) | * }
     FORCE_NOT_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
     FORCE_NULL ( <replaceable class="parameter">column_name</replaceable> [, ...] )
+    IGNORE_DATATYPE_ERRORS [ <replaceable class="parameter">boolean</replaceable> ]
     ENCODING '<replaceable class="parameter">encoding_name</replaceable>'
 </synopsis>
  </refsynopsisdiv>
@@ -370,6 +371,18 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
     </listitem>
    </varlistentry>
 
+   <varlistentry>
+    <term><literal>IGNORE_DATATYPE_ERRORS</literal></term>
+    <listitem>
+     <para>
+      Drops rows that contain malformed data while copying. These are rows
+      with columns where the data type's input-function raises an error.
+      This option is not allowed when using binary format.  Note that this
+      is only supported in current <command>COPY</command> syntax.
+     </para>
+    </listitem>
+   </varlistentry>
+
    <varlistentry>
     <term><literal>ENCODING</literal></term>
     <listitem>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index f14fae3308..beb73f5357 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -419,6 +419,7 @@ ProcessCopyOptions(ParseState *pstate,
 	bool		format_specified = false;
 	bool		freeze_specified = false;
 	bool		header_specified = false;
+	bool		ignore_datatype_errors_specified = false;
 	ListCell   *option;
 
 	/* Support external use for option sanity checking */
@@ -458,6 +459,13 @@ ProcessCopyOptions(ParseState *pstate,
 			freeze_specified = true;
 			opts_out->freeze = defGetBoolean(defel);
 		}
+		else if (strcmp(defel->defname, "ignore_datatype_errors") == 0)
+		{
+			if (ignore_datatype_errors_specified)
+				errorConflictingDefElem(defel, pstate);
+			ignore_datatype_errors_specified = true;
+			opts_out->ignore_datatype_errors = defGetBoolean(defel);
+		}
 		else if (strcmp(defel->defname, "delimiter") == 0)
 		{
 			if (opts_out->delim)
@@ -594,6 +602,11 @@ ProcessCopyOptions(ParseState *pstate,
 				(errcode(ERRCODE_SYNTAX_ERROR),
 				 errmsg("cannot specify DEFAULT in BINARY mode")));
 
+	if (opts_out->binary && opts_out->ignore_datatype_errors)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("cannot specify IGNORE_DATATYPE_ERRORS in BINARY mode")));
+
 	/* Set defaults for omitted options */
 	if (!opts_out->delim)
 		opts_out->delim = opts_out->csv_mode ? "," : "\t";
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 70871ed819..b18aea6376 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -752,6 +752,14 @@ CopyFrom(CopyFromState cstate)
 		ti_options |= TABLE_INSERT_FROZEN;
 	}
 
+	/* Set up soft error handler for IGNORE_DATATYPE_ERRORS */
+	if (cstate->opts.ignore_datatype_errors)
+	{
+		ErrorSaveContext escontext = {T_ErrorSaveContext};
+		escontext.details_wanted = true;
+		cstate->escontext = escontext;
+	}
+
 	/*
 	 * We need a ResultRelInfo so we can use the regular executor's
 	 * index-entry-making machinery.  (There used to be a huge amount of code
@@ -987,7 +995,36 @@ CopyFrom(CopyFromState cstate)
 
 		/* Directly store the values/nulls array in the slot */
 		if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
+		{
+			if (cstate->opts.ignore_datatype_errors &&
+				cstate->ignored_errors_count > 0)
+				ereport(WARNING,
+						errmsg("%zd rows were skipped due to data type incompatibility",
+							   cstate->ignored_errors_count));
 			break;
+		}
+
+		/* Soft error occured, skip this tuple and log the reason */
+		if (cstate->escontext.error_occurred)
+		{
+			ErrorSaveContext new_escontext = {T_ErrorSaveContext};
+
+			/* Adjust elevel so we don't jump out */
+			cstate->escontext.error_data->elevel = WARNING;
+
+			/*
+			 * Despite the name, this won't raise an error since elevel is
+			 * WARNING now.
+			 */
+			ThrowErrorData(cstate->escontext.error_data);
+
+			ExecClearTuple(myslot);
+
+			new_escontext.details_wanted = true;
+			cstate->escontext = new_escontext;
+
+			continue;
+		}
 
 		ExecStoreVirtualTuple(myslot);
 
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index f553734582..cf4dad1106 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -70,6 +70,7 @@
 #include "libpq/pqformat.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
+#include "nodes/miscnodes.h"
 #include "pgstat.h"
 #include "port/pg_bswap.h"
 #include "utils/builtins.h"
@@ -956,10 +957,21 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
 				values[m] = ExecEvalExpr(defexprs[m], econtext, &nulls[m]);
 			}
 			else
-				values[m] = InputFunctionCall(&in_functions[m],
-											  string,
-											  typioparams[m],
-											  att->atttypmod);
+				/*
+				 * If IGNORE_DATATYPE_ERRORS is enabled, skip rows with
+				 * datatype errors.
+				 */
+				if (!InputFunctionCallSafe(&in_functions[m],
+										   string,
+										   typioparams[m],
+										   att->atttypmod,
+										   (Node *) &cstate->escontext,
+										   &values[m]))
+				{
+					cstate->ignored_errors_count++;
+
+					return true;
+				}
 
 			cstate->cur_attname = NULL;
 			cstate->cur_attval = NULL;
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index 779fdc90cb..2fba51f648 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -2869,7 +2869,8 @@ psql_completion(const char *text, int start, int end)
 	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "("))
 		COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
 					  "HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
-					  "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT");
+					  "FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT",
+					  "IGNORE_DATATYPE_ERRORS");
 
 	/* Complete COPY <sth> FROM|TO filename WITH (FORMAT */
 	else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT"))
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 33175868f6..c2e55ac21f 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -42,6 +42,7 @@ typedef struct CopyFormatOptions
 								 * -1 if not specified */
 	bool		binary;			/* binary format? */
 	bool		freeze;			/* freeze rows on loading? */
+	bool		ignore_datatype_errors;  /* ignore rows with datatype errors */
 	bool		csv_mode;		/* Comma Separated Value format? */
 	CopyHeaderChoice header_line;	/* header line? */
 	char	   *null_print;		/* NULL marker string (server encoding!) */
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index ac2c16f8b8..e5bdae2d25 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -16,6 +16,7 @@
 
 #include "commands/copy.h"
 #include "commands/trigger.h"
+#include "nodes/miscnodes.h"
 
 /*
  * Represents the different source cases we need to worry about at
@@ -94,6 +95,8 @@ typedef struct CopyFromStateData
 								 * default value */
 	FmgrInfo   *in_functions;	/* array of input functions for each attrs */
 	Oid		   *typioparams;	/* array of element types for in_functions */
+	ErrorSaveContext escontext; /* soft error trapper during in_functions execution */
+	int64		ignored_errors_count; /* total number of ignored errors */
 	int		   *defmap;			/* array of default att numbers related to
 								 * missing att */
 	ExprState **defexprs;		/* array of default att expressions for all
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index faf1a4d1b0..ac9c99f083 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -82,6 +82,8 @@ COPY x to stdin (format BINARY, delimiter ',');
 ERROR:  cannot specify DELIMITER in BINARY mode
 COPY x to stdin (format BINARY, null 'x');
 ERROR:  cannot specify NULL in BINARY mode
+COPY x to stdin (format BINARY, ignore_datatype_errors);
+ERROR:  cannot specify IGNORE_DATATYPE_ERRORS in BINARY mode
 COPY x to stdin (format TEXT, force_quote(a));
 ERROR:  COPY force quote available only in CSV mode
 COPY x from stdin (format CSV, force_quote(a));
@@ -666,6 +668,30 @@ SELECT * FROM instead_of_insert_tbl;
 (2 rows)
 
 COMMIT;
+-- tests for IGNORE_DATATYPE_ERRORS option
+CREATE TABLE check_ign_err (n int, m int[], k int);
+COPY check_ign_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS);
+WARNING:  invalid input syntax for type integer: "a"
+WARNING:  value "3333333333" is out of range for type integer
+WARNING:  invalid input syntax for type integer: "a"
+WARNING:  invalid input syntax for type integer: ""
+WARNING:  4 rows were skipped due to data type incompatibility
+SELECT * FROM check_ign_err;
+ n |  m  | k 
+---+-----+---
+ 1 | {1} | 1
+ 5 | {5} | 5
+(2 rows)
+
+-- test datatype error that can't be handled as soft: should fail
+CREATE TABLE hard_err(foo widget);
+COPY hard_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS);
+ERROR:  invalid input syntax for type widget: "1"
+CONTEXT:  COPY hard_err, line 1, column foo: "1"
+-- test missing data: should fail
+COPY check_ign_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS);
+ERROR:  missing data for column "k"
+CONTEXT:  COPY check_ign_err, line 1: "1	{1}"
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
@@ -680,6 +706,8 @@ DROP TABLE instead_of_insert_tbl;
 DROP VIEW instead_of_insert_tbl_view;
 DROP VIEW instead_of_insert_tbl_view_2;
 DROP FUNCTION fun_instead_of_insert_tbl();
+DROP TABLE check_ign_err;
+DROP TABLE hard_err;
 --
 -- COPY FROM ... DEFAULT
 --
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index d759635068..e8c2c1aca3 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -70,6 +70,7 @@ COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
 -- incorrect options
 COPY x to stdin (format BINARY, delimiter ',');
 COPY x to stdin (format BINARY, null 'x');
+COPY x to stdin (format BINARY, ignore_datatype_errors);
 COPY x to stdin (format TEXT, force_quote(a));
 COPY x from stdin (format CSV, force_quote(a));
 COPY x to stdout (format TEXT, force_not_null(a));
@@ -464,6 +465,29 @@ test1
 SELECT * FROM instead_of_insert_tbl;
 COMMIT;
 
+-- tests for IGNORE_DATATYPE_ERRORS option
+CREATE TABLE check_ign_err (n int, m int[], k int);
+COPY check_ign_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS);
+1	{1}	1
+a	{2}	2
+3	{3}	3333333333
+4	{a, 4}	4
+
+5	{5}	5
+\.
+SELECT * FROM check_ign_err;
+
+-- test datatype error that can't be handled as soft: should fail
+CREATE TABLE hard_err(foo widget);
+COPY hard_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS);
+1
+\.
+
+-- test missing data: should fail
+COPY check_ign_err FROM STDIN WITH (IGNORE_DATATYPE_ERRORS);
+1	{1}
+\.
+
 -- clean up
 DROP TABLE forcetest;
 DROP TABLE vistest;
@@ -478,6 +502,8 @@ DROP TABLE instead_of_insert_tbl;
 DROP VIEW instead_of_insert_tbl_view;
 DROP VIEW instead_of_insert_tbl_view_2;
 DROP FUNCTION fun_instead_of_insert_tbl();
+DROP TABLE check_ign_err;
+DROP TABLE hard_err;
 
 --
 -- COPY FROM ... DEFAULT
-- 
2.34.1

Reply via email to