+ /*
+ * Here we end processing of current COPY row.
+ * Update copy state counter for number of erroneous rows.
+ */
+ cstate->num_errors++;
+ cstate->escontext->error_occurred = true;
+
+ /* Only print this NOTICE message, if it will not be followed by ERROR */
+ if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE &&
+ (
+ (cstate->opts.on_error == COPY_ON_ERROR_NULL &&
cstate->opts.reject_limit > 0 && cstate->num_errors <=
cstate->opts.reject_limit) ||
+ (cstate->opts.on_error == COPY_ON_ERROR_IGNORE &&
(cstate->opts.reject_limit == 0 || cstate->num_errors <=
cstate->opts.reject_limit))
+ ))
{
this is kind of hard to comprehend.
so attached is a simple version of it based on v8.
for copy (on_error set_to_null)
1. not allow specifying reject_limit option
2. ereport ERROR for not-null constraint violation for domain type.
for example:
CREATE DOMAIN d_int_not_null AS INT NOT NULL CHECK(value > 0);
CREATE TABLE t1 (a d_int_not_null);
COPY t1 FROM STDIN WITH (on_error set_to_null);
these 3 values: \N a -1
will error out, the error message will be:
ERROR: domain d_int_not_null does not allow null values
From 47bdeda8911596950463e70e33253e773ef13192 Mon Sep 17 00:00:00 2001
From: jian he <[email protected]>
Date: Fri, 13 Dec 2024 11:03:16 +0800
Subject: [PATCH v9 1/1] new COPY on_error option: set_to_null
extent "on_error action", introduce new option: on_error set_to_null.
due to current grammar, we cannot use "on_error null", so i choose on_error set_to_null.
any data type conversion errors while the COPY FROM process will set that column value to be NULL.
this will only work with COPY FROM with non-binary format.
However this will respect the not-null constraint, meaning, if you actually
converted error to null, but the column has not-null constraint, not-null
constraint violation ERROR will be reported.
regress test contains corner case for not-null with domain constraint.
discussion: https://postgr.es/m/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=bp3d1_asfe...@mail.gmail.com
---
doc/src/sgml/ref/copy.sgml | 26 ++++++++----
src/backend/commands/copy.c | 6 ++-
src/backend/commands/copyfrom.c | 31 +++++++++------
src/backend/commands/copyfromparse.c | 47 +++++++++++++++++++++-
src/bin/psql/tab-complete.in.c | 2 +-
src/include/commands/copy.h | 1 +
src/include/commands/copyfrom_internal.h | 4 +-
src/test/regress/expected/copy2.out | 50 ++++++++++++++++++++++++
src/test/regress/sql/copy2.sql | 45 +++++++++++++++++++++
9 files changed, 188 insertions(+), 24 deletions(-)
diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 8394402f09..4b847c25f3 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -394,21 +394,33 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable
Specifies how to behave when encountering an error converting a column's
input value into its data type.
An <replaceable class="parameter">error_action</replaceable> value of
- <literal>stop</literal> means fail the command, while
- <literal>ignore</literal> means discard the input row and continue with the next one.
+ <literal>stop</literal> means fail the command,
+ <literal>ignore</literal> means discard the input row and continue with the next one, and
+ <literal>set_to_null</literal> means replace columns containing erroneous input values with <literal>null</literal> and move to the next field.
The default is <literal>stop</literal>.
</para>
<para>
- The <literal>ignore</literal> option is applicable only for <command>COPY FROM</command>
+ The <literal>ignore</literal> and <literal>set_to_null</literal> options are applicable only for <command>COPY FROM</command>
when the <literal>FORMAT</literal> is <literal>text</literal> or <literal>csv</literal>.
</para>
<para>
- A <literal>NOTICE</literal> message containing the ignored row count is
+ For <literal>ignore</literal> option,
+ a <literal>NOTICE</literal> message containing the ignored row count is
emitted at the end of the <command>COPY FROM</command> if at least one
- row was discarded. When <literal>LOG_VERBOSITY</literal> option is set to
- <literal>verbose</literal>, a <literal>NOTICE</literal> message
+ row was discarded.
+ For <literal>set_to_null</literal> option,
+ a <literal>NOTICE</literal> message containing the row count that erroneous input values replaced by to null happened is
+ emitted at the end of the <command>COPY FROM</command> if at least one row was replaced.
+ </para>
+ <para>
+ When <literal>LOG_VERBOSITY</literal> option is set to
+ <literal>verbose</literal>, for <literal>ignore</literal> option, a <literal>NOTICE</literal> message
containing the line of the input file and the column name whose input
- conversion has failed is emitted for each discarded row.
+ conversion has failed is emitted for each discarded row;
+ for <literal>set_to_null</literal> option,
+ a <literal>NOTICE</literal> message
+ containing the line of the input file and the column name whose input value has been replaced by null
+ is emitted for each row where input conversion has failed ;
When it is set to <literal>silent</literal>, no message is emitted
regarding ignored rows.
</para>
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 2d98ecf3f4..f494d2d64c 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -403,12 +403,14 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
parser_errposition(pstate, def->location)));
/*
- * Allow "stop", or "ignore" values.
+ * Allow "stop", "ignore", "set_to_null" values.
*/
if (pg_strcasecmp(sval, "stop") == 0)
return COPY_ON_ERROR_STOP;
if (pg_strcasecmp(sval, "ignore") == 0)
return COPY_ON_ERROR_IGNORE;
+ if (pg_strcasecmp(sval, "set_to_null") == 0)
+ return COPY_ON_ERROR_NULL;
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -918,7 +920,7 @@ ProcessCopyOptions(ParseState *pstate,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("only ON_ERROR STOP is allowed in BINARY mode")));
- if (opts_out->reject_limit && !opts_out->on_error)
+ if (opts_out->reject_limit && opts_out->on_error != COPY_ON_ERROR_IGNORE)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
/*- translator: first and second %s are the names of COPY option, e.g.
diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c
index 4d52c93c30..a9e61f5e26 100644
--- a/src/backend/commands/copyfrom.c
+++ b/src/backend/commands/copyfrom.c
@@ -1003,7 +1003,7 @@ CopyFrom(CopyFromState cstate)
if (!NextCopyFrom(cstate, econtext, myslot->tts_values, myslot->tts_isnull))
break;
- if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE &&
+ if ((cstate->opts.on_error == COPY_ON_ERROR_IGNORE || cstate->opts.on_error == COPY_ON_ERROR_NULL) &&
cstate->escontext->error_occurred)
{
/*
@@ -1321,14 +1321,22 @@ CopyFrom(CopyFromState cstate)
/* Done, clean up */
error_context_stack = errcallback.previous;
- if (cstate->opts.on_error != COPY_ON_ERROR_STOP &&
- cstate->num_errors > 0 &&
+ if (cstate->num_errors > 0 &&
cstate->opts.log_verbosity >= COPY_LOG_VERBOSITY_DEFAULT)
- ereport(NOTICE,
- errmsg_plural("%llu row was skipped due to data type incompatibility",
- "%llu rows were skipped due to data type incompatibility",
- (unsigned long long) cstate->num_errors,
- (unsigned long long) cstate->num_errors));
+ {
+ if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+ ereport(NOTICE,
+ errmsg_plural("%llu row was skipped due to data type incompatibility",
+ "%llu rows were skipped due to data type incompatibility",
+ (unsigned long long) cstate->num_errors,
+ (unsigned long long) cstate->num_errors));
+ else if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+ ereport(NOTICE,
+ errmsg_plural("Erroneous values in %llu row was replaced with NULL",
+ "Erroneous values in %llu rows were replaced with NULL",
+ (unsigned long long) cstate->num_errors,
+ (unsigned long long) cstate->num_errors));
+ }
if (bistate != NULL)
FreeBulkInsertState(bistate);
@@ -1474,10 +1482,11 @@ BeginCopyFrom(ParseState *pstate,
cstate->escontext->error_occurred = false;
/*
- * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other
- * options later
+ * Currently we only support COPY_ON_ERROR_IGNORE, COPY_ON_ERROR_NULL.
+ * We'll add other options later
*/
- if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE)
+ if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE ||
+ cstate->opts.on_error == COPY_ON_ERROR_NULL)
cstate->escontext->details_wanted = false;
}
else
diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c
index d1d43b53d8..7f98f66ebe 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -871,6 +871,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
int fldct;
int fieldno;
char *string;
+ bool current_row_erroneous = false;
/* read raw fields in the next line */
if (!NextCopyFromRawFields(cstate, &field_strings, &fldct))
@@ -949,7 +950,8 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
/*
* If ON_ERROR is specified with IGNORE, skip rows with soft
- * errors
+ * errors. If ON_ERROR is specified with SET_TO_NULL, try
+ * to replace attribute value with NULL.
*/
else if (!InputFunctionCallSafe(&in_functions[m],
string,
@@ -960,9 +962,48 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
{
Assert(cstate->opts.on_error != COPY_ON_ERROR_STOP);
+ /*
+ * we use it to count the number of rows (not fields)
+ * successfully doing on_error set_to_null.
+ */
+ current_row_erroneous = true;
+
+ if (cstate->opts.on_error == COPY_ON_ERROR_NULL)
+ {
+ /*
+ * we need another InputFunctionCallSafe for domain with a
+ * not-null constraint. if domain don't have not-null
+ * then continue to the next field, otherwise error out.
+ */
+ cstate->escontext->error_occurred = false;
+ if (InputFunctionCallSafe(&in_functions[m],
+ NULL,
+ typioparams[m],
+ att->atttypmod,
+ (Node *) cstate->escontext,
+ &values[m]))
+ {
+ nulls[m] = true;
+
+ if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
+ ereport(NOTICE,
+ errmsg("replaced row attribute \"%s\" with NULL due to data type incompatibility at line %llu",
+ cstate->cur_attname,
+ (unsigned long long) cstate->cur_lineno));
+ continue;
+ }
+ else
+ ereport(ERROR,
+ errcode(ERRCODE_NOT_NULL_VIOLATION),
+ errmsg("domain %s does not allow null values", format_type_be(typioparams[m])),
+ errdatatype(typioparams[m]));
+ }
+
cstate->num_errors++;
+ cstate->escontext->error_occurred = true;
- if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE)
+ if (cstate->opts.log_verbosity == COPY_LOG_VERBOSITY_VERBOSE &&
+ cstate->opts.on_error != COPY_ON_ERROR_NULL)
{
/*
* Since we emit line number and column info in the below
@@ -1001,6 +1042,8 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext,
cstate->cur_attval = NULL;
}
+ if (current_row_erroneous)
+ cstate->num_errors++;
Assert(fieldno == attr_count);
}
else
diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c
index bbd08770c3..fc2260b541 100644
--- a/src/bin/psql/tab-complete.in.c
+++ b/src/bin/psql/tab-complete.in.c
@@ -3235,7 +3235,7 @@ match_previous_words(int pattern_id,
COMPLETE_WITH("FORMAT", "FREEZE", "DELIMITER", "NULL",
"HEADER", "QUOTE", "ESCAPE", "FORCE_QUOTE",
"FORCE_NOT_NULL", "FORCE_NULL", "ENCODING", "DEFAULT",
- "ON_ERROR", "LOG_VERBOSITY");
+ "ON_ERROR", "SET_TO_NULL", "LOG_VERBOSITY");
/* Complete COPY <sth> FROM|TO filename WITH (FORMAT */
else if (Matches("COPY|\\copy", MatchAny, "FROM|TO", MatchAny, "WITH", "(", "FORMAT"))
diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h
index 4002a7f538..051ca12d10 100644
--- a/src/include/commands/copy.h
+++ b/src/include/commands/copy.h
@@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice
{
COPY_ON_ERROR_STOP = 0, /* immediately throw errors, default */
COPY_ON_ERROR_IGNORE, /* ignore errors */
+ COPY_ON_ERROR_NULL, /* set error field to null */
} CopyOnErrorChoice;
/*
diff --git a/src/include/commands/copyfrom_internal.h b/src/include/commands/copyfrom_internal.h
index cad52fcc78..5eaacd8c6b 100644
--- a/src/include/commands/copyfrom_internal.h
+++ b/src/include/commands/copyfrom_internal.h
@@ -98,7 +98,9 @@ typedef struct CopyFromStateData
ErrorSaveContext *escontext; /* soft error trapper during in_functions
* execution */
uint64 num_errors; /* total number of rows which contained soft
- * errors */
+ * errors, for ON_ERROR set_to_null, it's the
+ * number of rows successfully converted to null
+ */
int *defmap; /* array of default att numbers related to
* missing att */
ExprState **defexprs; /* array of default att expressions for all
diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out
index 64ea33aeae..f03dd24d54 100644
--- a/src/test/regress/expected/copy2.out
+++ b/src/test/regress/expected/copy2.out
@@ -81,6 +81,12 @@ COPY x from stdin (on_error ignore, on_error ignore);
ERROR: conflicting or redundant options
LINE 1: COPY x from stdin (on_error ignore, on_error ignore);
^
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
+ERROR: conflicting or redundant options
+LINE 1: COPY x from stdin (on_error set_to_null, on_error set_to_nul...
+ ^
+COPY x FROM stdin WITH (on_error set_to_null, reject_limit 2);
+ERROR: COPY REJECT_LIMIT requires ON_ERROR to be set to IGNORE
COPY x from stdin (log_verbosity default, log_verbosity verbose);
ERROR: conflicting or redundant options
LINE 1: COPY x from stdin (log_verbosity default, log_verbosity verb...
@@ -92,6 +98,8 @@ COPY x from stdin (format BINARY, null 'x');
ERROR: cannot specify NULL in BINARY mode
COPY x from stdin (format BINARY, on_error ignore);
ERROR: only ON_ERROR STOP is allowed in BINARY mode
+COPY x from stdin (format BINARY, on_error set_to_null);
+ERROR: only ON_ERROR STOP is allowed in BINARY mode
COPY x from stdin (on_error unsupported);
ERROR: COPY ON_ERROR "unsupported" not recognized
LINE 1: COPY x from stdin (on_error unsupported);
@@ -124,6 +132,10 @@ COPY x to stdout (format BINARY, on_error unsupported);
ERROR: COPY ON_ERROR cannot be used with COPY TO
LINE 1: COPY x to stdout (format BINARY, on_error unsupported);
^
+COPY x to stdin (on_error set_to_null);
+ERROR: COPY ON_ERROR cannot be used with COPY TO
+LINE 1: COPY x to stdin (on_error set_to_null);
+ ^
COPY x from stdin (log_verbosity unsupported);
ERROR: COPY LOG_VERBOSITY "unsupported" not recognized
LINE 1: COPY x from stdin (log_verbosity unsupported);
@@ -769,6 +781,42 @@ CONTEXT: COPY check_ign_err
NOTICE: skipping row due to data type incompatibility at line 8 for column "k": "a"
CONTEXT: COPY check_ign_err
NOTICE: 6 rows were skipped due to data type incompatibility
+CREATE DOMAIN d_int_not_null AS INT NOT NULL CHECK(value > 0);
+CREATE DOMAIN d_int_positive_maybe_null AS INT CHECK(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, b d_int_positive_maybe_null, c INT);
+\pset null NULL
+--fail, colum a cannot set to NULL value
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+ERROR: domain d_int_not_null does not allow null values
+CONTEXT: COPY t_on_error_null, line 1, column a: null input
+--fail, colum a is domain with not-null constraint
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+ERROR: domain d_int_not_null does not allow null values
+CONTEXT: COPY t_on_error_null, line 1, column a: "a"
+--fail, colum a cannot set to NULL value
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+ERROR: domain d_int_not_null does not allow null values
+CONTEXT: COPY t_on_error_null, line 1, column a: "-1"
+--fail. less data
+COPY t_on_error_null FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR: missing data for column "c"
+CONTEXT: COPY t_on_error_null, line 1: "1,1"
+--fail. extra data
+COPY t_on_error_null FROM STDIN WITH (delimiter ',', on_error set_to_null);
+ERROR: extra data after last expected column
+CONTEXT: COPY t_on_error_null, line 1: "1,2,3,4"
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+NOTICE: Erroneous values in 3 rows were replaced with NULL
+-- check inserted content
+select * from t_on_error_null;
+ a | b | c
+----+------+------
+ 10 | NULL | NULL
+ 11 | NULL | 12
+ 13 | 14 | NULL
+(3 rows)
+
-- tests for on_error option with log_verbosity and null constraint via domain
CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -828,6 +876,8 @@ DROP VIEW instead_of_insert_tbl_view;
DROP VIEW instead_of_insert_tbl_view_2;
DROP FUNCTION fun_instead_of_insert_tbl();
DROP TABLE check_ign_err;
+DROP TABLE t_on_error_null;
+DROP DOMAIN d_int_not_null;
DROP TABLE check_ign_err2;
DROP DOMAIN dcheck_ign_err2;
DROP TABLE hard_err;
diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql
index 45273557ce..2bbb1ecce1 100644
--- a/src/test/regress/sql/copy2.sql
+++ b/src/test/regress/sql/copy2.sql
@@ -67,12 +67,15 @@ COPY x from stdin (force_null (a), force_null (b));
COPY x from stdin (convert_selectively (a), convert_selectively (b));
COPY x from stdin (encoding 'sql_ascii', encoding 'sql_ascii');
COPY x from stdin (on_error ignore, on_error ignore);
+COPY x from stdin (on_error set_to_null, on_error set_to_null);
+COPY x FROM stdin WITH (on_error set_to_null, reject_limit 2);
COPY x from stdin (log_verbosity default, log_verbosity verbose);
-- incorrect options
COPY x from stdin (format BINARY, delimiter ',');
COPY x from stdin (format BINARY, null 'x');
COPY x from stdin (format BINARY, on_error ignore);
+COPY x from stdin (format BINARY, on_error set_to_null);
COPY x from stdin (on_error unsupported);
COPY x from stdin (format TEXT, force_quote(a));
COPY x from stdin (format TEXT, force_quote *);
@@ -87,6 +90,7 @@ COPY x from stdin (format TEXT, force_null *);
COPY x to stdout (format CSV, force_null(a));
COPY x to stdout (format CSV, force_null *);
COPY x to stdout (format BINARY, on_error unsupported);
+COPY x to stdin (on_error set_to_null);
COPY x from stdin (log_verbosity unsupported);
COPY x from stdin with (reject_limit 1);
COPY x from stdin with (on_error ignore, reject_limit 0);
@@ -534,6 +538,45 @@ a {2} 2
8 {8} 8
\.
+CREATE DOMAIN d_int_not_null AS INT NOT NULL CHECK(value > 0);
+CREATE DOMAIN d_int_positive_maybe_null AS INT CHECK(value > 0);
+CREATE TABLE t_on_error_null (a d_int_not_null, b d_int_positive_maybe_null, c INT);
+
+\pset null NULL
+--fail, colum a cannot set to NULL value
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+\N 11 13
+\.
+
+--fail, colum a is domain with not-null constraint
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+a 11 14
+\.
+
+--fail, colum a cannot set to NULL value
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+-1 11 13
+\.
+
+--fail. less data
+COPY t_on_error_null FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,1
+\.
+--fail. extra data
+COPY t_on_error_null FROM STDIN WITH (delimiter ',', on_error set_to_null);
+1,2,3,4
+\.
+
+--ok
+COPY t_on_error_null FROM STDIN WITH (on_error set_to_null);
+10 a d
+11 b 12
+13 14 e
+\.
+
+-- check inserted content
+select * from t_on_error_null;
+
-- tests for on_error option with log_verbosity and null constraint via domain
CREATE DOMAIN dcheck_ign_err2 varchar(15) NOT NULL;
CREATE TABLE check_ign_err2 (n int, m int[], k int, l dcheck_ign_err2);
@@ -603,6 +646,8 @@ DROP VIEW instead_of_insert_tbl_view;
DROP VIEW instead_of_insert_tbl_view_2;
DROP FUNCTION fun_instead_of_insert_tbl();
DROP TABLE check_ign_err;
+DROP TABLE t_on_error_null;
+DROP DOMAIN d_int_not_null;
DROP TABLE check_ign_err2;
DROP DOMAIN dcheck_ign_err2;
DROP TABLE hard_err;
--
2.34.1