attached v2. syntax: `on_error set_to_null` based on upthread discussion, now if you specified `on_error set_to_null` and your column has `not null` constraint, we convert the error field to null, so it may error while bulk inserting for violating NOT NULL constraint.
From c95bb7b7c072f510b9a60695714be21345f21591 Mon Sep 17 00:00:00 2001 From: jian he <jian.universal...@gmail.com> Date: Sat, 10 Feb 2024 15:08:41 +0800 Subject: [PATCH v2 1/1] on_error set_to_null
any data type conversion errors while COPY FROM will set that column value to be NULL. discussion: https://www.postgresql.org/message-id/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=bp3d1_asfe...@mail.gmail.com --- doc/src/sgml/ref/copy.sgml | 1 + src/backend/commands/copy.c | 2 ++ src/backend/commands/copyfrom.c | 30 ++++++++++++++++++++++------ src/backend/commands/copyfromparse.c | 28 ++++++++++++++++++++++++-- src/include/commands/copy.h | 1 + src/test/regress/expected/copy2.out | 22 ++++++++++++++++++++ src/test/regress/sql/copy2.sql | 23 +++++++++++++++++++++ 7 files changed, 99 insertions(+), 8 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 55764fc1..d8b609b6 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -390,6 +390,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable An <replaceable class="parameter">error_action</replaceable> value of <literal>stop</literal> means fail the command, while <literal>ignore</literal> means discard the input row and continue with the next one. + <literal>set_to_null</literal> means the input value will set to <literal>null</literal> and continue with the next field. The default is <literal>stop</literal>. </para> <para> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index cc0786c6..9c7d6ebd 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -422,6 +422,8 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from) return COPY_ON_ERROR_STOP; if (pg_strcasecmp(sval, "ignore") == 0) return COPY_ON_ERROR_IGNORE; + if (pg_strcasecmp(sval, "set_to_null") == 0) + return COPY_ON_ERROR_NULL; ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 41f6bc43..2a87bcf3 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -1005,6 +1005,7 @@ CopyFrom(CopyFromState cstate) * information according to ON_ERROR. */ if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE) + { /* * Just make ErrorSaveContext ready for the next NextCopyFrom. @@ -1013,11 +1014,18 @@ CopyFrom(CopyFromState cstate) */ cstate->escontext->error_occurred = false; - /* Report that this tuple was skipped by the ON_ERROR clause */ - pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED, - ++skipped); + /* Report that this tuple was skipped by the ON_ERROR clause */ + pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED, + ++skipped); - continue; + continue; + } + /* + * Just make ErrorSaveContext ready for the next NextCopyFrom. + * + */ + if (cstate->opts.on_error == COPY_ON_ERROR_NULL) + cstate->escontext->error_occurred = false; } ExecStoreVirtualTuple(myslot); @@ -1312,7 +1320,7 @@ CopyFrom(CopyFromState cstate) /* Done, clean up */ error_context_stack = errcallback.previous; - if (cstate->opts.on_error != COPY_ON_ERROR_STOP && + if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE && cstate->num_errors > 0) ereport(NOTICE, errmsg_plural("%llu row was skipped due to data type incompatibility", @@ -1320,6 +1328,14 @@ CopyFrom(CopyFromState cstate) (unsigned long long) cstate->num_errors, (unsigned long long) cstate->num_errors)); + if (cstate->opts.on_error == COPY_ON_ERROR_NULL && + cstate->num_errors > 0) + ereport(NOTICE, + errmsg_plural("some columns of %llu rows, value was converted to NULL due to data type incompatibility", + "some columns of %llu rows, value were converted to NULL due to data type incompatibility", + (unsigned long long) cstate->num_errors, + (unsigned long long) cstate->num_errors)); + if (bistate != NULL) FreeBulkInsertState(bistate); @@ -1463,11 +1479,13 @@ BeginCopyFrom(ParseState *pstate, cstate->escontext->error_occurred = false; /* - * Currently we only support COPY_ON_ERROR_IGNORE. We'll add other + * Currently we only support COPY_ON_ERROR_IGNORE, COPY_ON_ERROR_NULL. We'll add other * options later */ if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE) cstate->escontext->details_wanted = false; + else if (cstate->opts.on_error == COPY_ON_ERROR_NULL) + cstate->escontext->details_wanted = false; } else cstate->escontext = NULL; diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 90675636..9d77c3d1 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -873,6 +873,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, int fldct; int fieldno; char *string; + bool error_happened = false; /* read raw fields in the next line */ if (!NextCopyFromRawFields(cstate, &field_strings, &fldct)) @@ -960,14 +961,37 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, (Node *) cstate->escontext, &values[m])) { - cstate->num_errors++; - return true; + if (cstate->opts.on_error == COPY_ON_ERROR_NULL) + { + + values[m] = (Datum) 0; + nulls[m] = true; + /* here, we need set error_occurred to false, so COPY will be continue */ + cstate->escontext->error_occurred = false; + + /* does any conversion error ever happened in all the fields */ + if (!error_happened) + error_happened = true; + } + else + { + cstate->num_errors++; + return true; + } + } cstate->cur_attname = NULL; cstate->cur_attval = NULL; } + /* update num_errors. one row with multiple errors field only count 1*/ + if (error_happened) + { + cstate->num_errors++; + cstate->escontext->error_occurred = true; + } + Assert(fieldno == attr_count); } else diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index b3da3cb0..931fe09b 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice { COPY_ON_ERROR_STOP = 0, /* immediately throw errors, default */ COPY_ON_ERROR_IGNORE, /* ignore errors */ + COPY_ON_ERROR_NULL, /* set error field to null */ } CopyOnErrorChoice; /* diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 25c401ce..879da283 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -751,6 +751,28 @@ CONTEXT: COPY check_ign_err, line 1: "1 {1}" COPY check_ign_err FROM STDIN WITH (on_error ignore); ERROR: extra data after last expected column CONTEXT: COPY check_ign_err, line 1: "1 {1} 3 abc" +truncate check_ign_err; +COPY check_ign_err FROM STDIN WITH (on_error set_to_null); +NOTICE: some columns of 3 rows, value were converted to NULL due to data type incompatibility +\pset null NULL +SELECT * FROM check_ign_err; + n | m | k +------+-----+------ + 1 | {1} | NULL + 2 | {2} | 1 + 3 | {3} | 2 + 4 | {4} | NULL + NULL | {5} | NULL +(5 rows) + +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null); +ERROR: missing data for column "k" +CONTEXT: COPY check_ign_err, line 1: "1," +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null); +ERROR: extra data after last expected column +CONTEXT: COPY check_ign_err, line 1: "1,{1},1,1" -- clean up DROP TABLE forcetest; DROP TABLE vistest; diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index b5e549e8..67bf45a7 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -534,6 +534,29 @@ COPY check_ign_err FROM STDIN WITH (on_error ignore); 1 {1} 3 abc \. + +truncate check_ign_err; +COPY check_ign_err FROM STDIN WITH (on_error set_to_null); +1 {1} a +2 {2} 1 +3 {3} 2 +4 {4} b +a {5} c +\. + +\pset null NULL +SELECT * FROM check_ign_err; + +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null); +1, +\. + +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error set_to_null); +1,{1},1,1 +\. + -- clean up DROP TABLE forcetest; DROP TABLE vistest; -- 2.34.1