The idea of on_error is to tolerate errors, I think. if a column has a not null constraint, let it cannot be used with (on_error 'null')
Based on this, I've made a patch. based on COPY Synopsis: ON_ERROR 'error_action' on_error 'null', the keyword NULL should be single quoted. demo: COPY check_ign_err FROM STDIN WITH (on_error 'null'); 1 {1} a 2 {2} 1 3 {3} 2 4 {4} b a {5} c \. \pset null NULL SELECT * FROM check_ign_err; n | m | k ------+-----+------ 1 | {1} | NULL 2 | {2} | 1 3 | {3} | 2 4 | {4} | NULL NULL | {5} | NULL
From 19afa942af22fd3d2ed2436c6bc7ce02f00bb570 Mon Sep 17 00:00:00 2001 From: jian he <jian.universal...@gmail.com> Date: Sat, 3 Feb 2024 14:04:08 +0800 Subject: [PATCH v1 1/1] introduce copy on_error 'null' option on_error 'null', null needs single quoted. any data type conversion error will treat that column value to be NULL. it will not work with column have not null constraint, we check this at BeginCopyFrom. discussion: https://www.postgresql.org/message-id/CAKFQuwawy1e6YR4S=j+y7pXqg_Dw1WBVrgvf=bp3d1_asfe...@mail.gmail.com --- doc/src/sgml/ref/copy.sgml | 1 + src/backend/commands/copy.c | 2 ++ src/backend/commands/copyfrom.c | 28 ++++++++++++++++++++++++---- src/backend/commands/copyfromparse.c | 27 +++++++++++++++++++++++++-- src/include/commands/copy.h | 1 + src/test/regress/expected/copy2.out | 26 ++++++++++++++++++++++++++ src/test/regress/sql/copy2.sql | 28 ++++++++++++++++++++++++++++ 7 files changed, 107 insertions(+), 6 deletions(-) diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml index 55764fc1..d3c4ebdc 100644 --- a/doc/src/sgml/ref/copy.sgml +++ b/doc/src/sgml/ref/copy.sgml @@ -390,6 +390,7 @@ COPY { <replaceable class="parameter">table_name</replaceable> [ ( <replaceable An <replaceable class="parameter">error_action</replaceable> value of <literal>stop</literal> means fail the command, while <literal>ignore</literal> means discard the input row and continue with the next one. + <literal>null</literal> means the input value will be <literal>null</literal> and continue with the next one. The default is <literal>stop</literal>. </para> <para> diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c index cc0786c6..01ce47b0 100644 --- a/src/backend/commands/copy.c +++ b/src/backend/commands/copy.c @@ -422,6 +422,8 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from) return COPY_ON_ERROR_STOP; if (pg_strcasecmp(sval, "ignore") == 0) return COPY_ON_ERROR_IGNORE; + if (pg_strcasecmp(sval, "null") == 0) + return COPY_ON_ERROR_NULL; ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), diff --git a/src/backend/commands/copyfrom.c b/src/backend/commands/copyfrom.c index 1fe70b91..f4c5704e 100644 --- a/src/backend/commands/copyfrom.c +++ b/src/backend/commands/copyfrom.c @@ -1005,6 +1005,7 @@ CopyFrom(CopyFromState cstate) * information according to ON_ERROR. */ if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE) + { /* * Just make ErrorSaveContext ready for the next NextCopyFrom. @@ -1013,11 +1014,18 @@ CopyFrom(CopyFromState cstate) */ cstate->escontext->error_occurred = false; - /* Report that this tuple was skipped by the ON_ERROR clause */ - pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED, - ++skipped); + /* Report that this tuple was skipped by the ON_ERROR clause */ + pgstat_progress_update_param(PROGRESS_COPY_TUPLES_SKIPPED, + ++skipped); - continue; + continue; + } + /* + * Just make ErrorSaveContext ready for the next NextCopyFrom. + * + */ + if (cstate->opts.on_error == COPY_ON_ERROR_NULL) + cstate->escontext->error_occurred = false; } ExecStoreVirtualTuple(myslot); @@ -1313,6 +1321,7 @@ CopyFrom(CopyFromState cstate) error_context_stack = errcallback.previous; if (cstate->opts.on_error != COPY_ON_ERROR_STOP && + cstate->opts.on_error != COPY_ON_ERROR_NULL && cstate->num_errors > 0) ereport(NOTICE, errmsg_plural("%llu row was skipped due to data type incompatibility", @@ -1468,6 +1477,8 @@ BeginCopyFrom(ParseState *pstate, */ if (cstate->opts.on_error == COPY_ON_ERROR_IGNORE) cstate->escontext->details_wanted = false; + else if (cstate->opts.on_error == COPY_ON_ERROR_NULL) + cstate->escontext->details_wanted = false; } else cstate->escontext = NULL; @@ -1621,6 +1632,15 @@ BeginCopyFrom(ParseState *pstate, if (att->attisdropped) continue; + /* + * we can specify on_error 'null', but it can only apply to columns + * don't have not null constraint. + */ + if (att->attnotnull && cstate->opts.on_error == COPY_ON_ERROR_NULL) + ereport(ERROR, + (errcode(ERRCODE_BAD_COPY_FILE_FORMAT), + errmsg("copy on_error 'null' cannot be used with not null constraint column"))); + /* Fetch the input function and typioparam info */ if (cstate->opts.binary) getTypeBinaryInputInfo(att->atttypid, diff --git a/src/backend/commands/copyfromparse.c b/src/backend/commands/copyfromparse.c index 7cacd0b7..9475a9dc 100644 --- a/src/backend/commands/copyfromparse.c +++ b/src/backend/commands/copyfromparse.c @@ -881,6 +881,7 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, int fldct; int fieldno; char *string; + bool error_happened = false; /* read raw fields in the next line */ if (!NextCopyFromRawFields(cstate, &field_strings, &fldct)) @@ -968,14 +969,36 @@ NextCopyFrom(CopyFromState cstate, ExprContext *econtext, (Node *) cstate->escontext, &values[m])) { - cstate->num_errors++; - return true; + if (cstate->opts.on_error == COPY_ON_ERROR_NULL) + { + + values[m] = (Datum) 0; + nulls[m] = true; + /* here, we need set error_occurred to false, so COPY will be continue */ + cstate->escontext->error_occurred = false; + + if (!error_happened) + error_happened = true; + } + else + { + cstate->num_errors++; + return true; + } + } cstate->cur_attname = NULL; cstate->cur_attval = NULL; } + /* accumate num_errors. one row multiple errors only count 1*/ + if (error_happened) + { + cstate->num_errors++; + cstate->escontext->error_occurred = true; + } + Assert(fieldno == attr_count); } else diff --git a/src/include/commands/copy.h b/src/include/commands/copy.h index b3da3cb0..6c3733fa 100644 --- a/src/include/commands/copy.h +++ b/src/include/commands/copy.h @@ -38,6 +38,7 @@ typedef enum CopyOnErrorChoice { COPY_ON_ERROR_STOP = 0, /* immediately throw errors, default */ COPY_ON_ERROR_IGNORE, /* ignore errors */ + COPY_ON_ERROR_NULL, /* transform error field to null */ } CopyOnErrorChoice; /* diff --git a/src/test/regress/expected/copy2.out b/src/test/regress/expected/copy2.out index 25c401ce..0038d6c8 100644 --- a/src/test/regress/expected/copy2.out +++ b/src/test/regress/expected/copy2.out @@ -751,6 +751,31 @@ CONTEXT: COPY check_ign_err, line 1: "1 {1}" COPY check_ign_err FROM STDIN WITH (on_error ignore); ERROR: extra data after last expected column CONTEXT: COPY check_ign_err, line 1: "1 {1} 3 abc" +truncate check_ign_err; +COPY check_ign_err FROM STDIN WITH (on_error 'null'); +\pset null NULL +SELECT * FROM check_ign_err; + n | m | k +------+-----+------ + 1 | {1} | NULL + 2 | {2} | 1 + 3 | {3} | 2 + 4 | {4} | NULL + NULL | {5} | NULL +(5 rows) + +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error 'null'); +ERROR: missing data for column "k" +CONTEXT: COPY check_ign_err, line 1: "1," +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error 'null'); +ERROR: extra data after last expected column +CONTEXT: COPY check_ign_err, line 1: "1,{1},1,1" +CREATE temp TABLE check_on_err_null (n int, m int not null, k int); +--should fail. since not null constraint conflict with on_error null +COPY check_on_err_null FROM STDIN WITH (on_error 'null'); +ERROR: copy on_error 'null' cannot be used with not null constraint column -- clean up DROP TABLE forcetest; DROP TABLE vistest; @@ -767,6 +792,7 @@ DROP VIEW instead_of_insert_tbl_view_2; DROP FUNCTION fun_instead_of_insert_tbl(); DROP TABLE check_ign_err; DROP TABLE hard_err; +DROP TABLE check_on_err_null; -- -- COPY FROM ... DEFAULT -- diff --git a/src/test/regress/sql/copy2.sql b/src/test/regress/sql/copy2.sql index b5e549e8..dacde209 100644 --- a/src/test/regress/sql/copy2.sql +++ b/src/test/regress/sql/copy2.sql @@ -534,6 +534,33 @@ COPY check_ign_err FROM STDIN WITH (on_error ignore); 1 {1} 3 abc \. + +truncate check_ign_err; +COPY check_ign_err FROM STDIN WITH (on_error 'null'); +1 {1} a +2 {2} 1 +3 {3} 2 +4 {4} b +a {5} c +\. + +\pset null NULL +SELECT * FROM check_ign_err; + +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error 'null'); +1, +\. + +--should fail. +COPY check_ign_err FROM STDIN WITH (delimiter ',', on_error 'null'); +1,{1},1,1 +\. + +CREATE temp TABLE check_on_err_null (n int, m int not null, k int); +--should fail. since not null constraint conflict with on_error null +COPY check_on_err_null FROM STDIN WITH (on_error 'null'); + -- clean up DROP TABLE forcetest; DROP TABLE vistest; @@ -550,6 +577,7 @@ DROP VIEW instead_of_insert_tbl_view_2; DROP FUNCTION fun_instead_of_insert_tbl(); DROP TABLE check_ign_err; DROP TABLE hard_err; +DROP TABLE check_on_err_null; -- -- COPY FROM ... DEFAULT -- 2.34.1