On 2024-11-12 14:51, Yugo Nagata wrote:

Thanks for your review!

On Tue, 12 Nov 2024 10:16:50 +0900
torikoshia <torikos...@oss.nttdata.com> wrote:

On 2024-11-12 01:49, Fujii Masao wrote:
> On 2024/11/11 21:45, torikoshia wrote:
>>> Thanks for adding the comment. It clearly states that REJECT_LIMIT
>>> can be
>>> a single-quoted string. However, it might also be helpful to mention
>>> that
>>> it can be provided as an int64 in the COPY command option. How about
>>> updating it like this?
>>>
>>> ------------------------------------
>>> REJECT_LIMIT can be specified in two ways: as an int64 for the COPY
>>> command
>>> option or as a single-quoted string for the foreign table option
>>> using
>>> file_fdw. Therefore this function needs to handle both formats.
>>> ------------------------------------
>>
>> Thanks! it seems better.
>>
>>
>> Attached v3 patch.
>
> Thanks for updating the patch! It looks like you forgot to attach it,
> though.

Oops, thanks for pointing it out.
Here it is.

I have one minor comment.

+                               ereport(ERROR,
+                                               
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+                                                errmsg("skipped more than 
REJECT_LIMIT (%lld) rows due to data
type incompatibility",
+                                                               (long long) 
cstate->opts.reject_limit)));

Do we have to keep this message consistent with ones of COPY command?
With foreign data wrappers, it seems common that the option name is passed in lowercase, so is it better to use "skipped more than reject_limit ..." rather
than using uppercase?

Agreed.
Attached v4 patch.

--
Regards,

--
Atsushi Torikoshi
Seconded from NTT DATA GROUP CORPORATION to SRA OSS K.K.
From 114d8dd2c57bf8b9cf712b8fa401c7b8eaa8a958 Mon Sep 17 00:00:00 2001
From: Atsushi Torikoshi <torikos...@oss.nttdata.com>
Date: Wed, 13 Nov 2024 21:32:25 +0900
Subject: [PATCH v4] file_fdw: Add reject_limit option to file_fdw

Commit 4ac2a9bece introduced REJECT_LIMIT option for COPY. This patch
extends support for this option to file_fdw.

As well as REJECT_LIMIT option for COPY, this option limits the
maximum number of erroneous rows that can be skipped.
If more rows encounter data type conversion errors than allowed by
reject_limit, the access to the file_fdw foreign table will fail
with an error, even when on_error = 'ignore'.

Based on the synopsis of the CREATE/ALTER FOREIGN TABLE commands, the
value for the foreign table's option must be single-quoted.
To accommodate this, the patch modifies defGetCopyRejectLimitOption
to accept not only int64 but also strings.
---
 contrib/file_fdw/data/agg.bad          |  1 +
 contrib/file_fdw/expected/file_fdw.out | 16 ++++++++++++++--
 contrib/file_fdw/file_fdw.c            |  8 ++++++++
 contrib/file_fdw/sql/file_fdw.sql      |  6 +++++-
 doc/src/sgml/file-fdw.sgml             | 12 ++++++++++++
 src/backend/commands/copy.c            | 16 +++++++++++++++-
 6 files changed, 55 insertions(+), 4 deletions(-)

diff --git a/contrib/file_fdw/data/agg.bad b/contrib/file_fdw/data/agg.bad
index 3415b15007..04279ce55b 100644
--- a/contrib/file_fdw/data/agg.bad
+++ b/contrib/file_fdw/data/agg.bad
@@ -2,3 +2,4 @@
 100;@99.097@
 0;@aaa@
 42;@324.78@
+1;@bbb@
diff --git a/contrib/file_fdw/expected/file_fdw.out b/contrib/file_fdw/expected/file_fdw.out
index 593fdc782e..81efe63eef 100644
--- a/contrib/file_fdw/expected/file_fdw.out
+++ b/contrib/file_fdw/expected/file_fdw.out
@@ -206,10 +206,10 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
 SELECT * FROM agg_bad;               -- ERROR
 ERROR:  invalid input syntax for type real: "aaa"
 CONTEXT:  COPY agg_bad, line 3, column b: "aaa"
--- on_error and log_verbosity tests
+-- on_error, log_verbosity and reject_limit tests
 ALTER FOREIGN TABLE agg_bad OPTIONS (ADD on_error 'ignore');
 SELECT * FROM agg_bad;
-NOTICE:  1 row was skipped due to data type incompatibility
+NOTICE:  2 rows were skipped due to data type incompatibility
   a  |   b    
 -----+--------
  100 | 99.097
@@ -224,6 +224,18 @@ SELECT * FROM agg_bad;
   42 | 324.78
 (2 rows)
 
+ALTER FOREIGN TABLE agg_bad OPTIONS (ADD reject_limit '1'); -- ERROR
+SELECT * FROM agg_bad;
+ERROR:  skipped more than reject_limit (1) rows due to data type incompatibility
+CONTEXT:  COPY agg_bad, line 5, column b: "bbb"
+ALTER FOREIGN TABLE agg_bad OPTIONS (SET reject_limit '2');
+SELECT * FROM agg_bad;
+  a  |   b    
+-----+--------
+ 100 | 99.097
+  42 | 324.78
+(2 rows)
+
 ANALYZE agg_bad;
 -- misc query tests
 \t on
diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index 043204c3e7..6c64f81098 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -77,6 +77,7 @@ static const struct FileFdwOption valid_options[] = {
 	{"encoding", ForeignTableRelationId},
 	{"on_error", ForeignTableRelationId},
 	{"log_verbosity", ForeignTableRelationId},
+	{"reject_limit", ForeignTableRelationId},
 	{"force_not_null", AttributeRelationId},
 	{"force_null", AttributeRelationId},
 
@@ -788,6 +789,13 @@ retry:
 			 */
 			ResetPerTupleExprContext(estate);
 
+			if (cstate->opts.reject_limit > 0 &&
+				cstate->num_errors > cstate->opts.reject_limit)
+				ereport(ERROR,
+						(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+						 errmsg("skipped more than reject_limit (%lld) rows due to data type incompatibility",
+								(long long) cstate->opts.reject_limit)));
+
 			/* Repeat NextCopyFrom() until no soft error occurs */
 			goto retry;
 		}
diff --git a/contrib/file_fdw/sql/file_fdw.sql b/contrib/file_fdw/sql/file_fdw.sql
index edd77c5cd2..0d6920e1f6 100644
--- a/contrib/file_fdw/sql/file_fdw.sql
+++ b/contrib/file_fdw/sql/file_fdw.sql
@@ -150,11 +150,15 @@ SELECT * FROM agg_csv c JOIN agg_text t ON (t.a = c.a) ORDER BY c.a;
 -- error context report tests
 SELECT * FROM agg_bad;               -- ERROR
 
--- on_error and log_verbosity tests
+-- on_error, log_verbosity and reject_limit tests
 ALTER FOREIGN TABLE agg_bad OPTIONS (ADD on_error 'ignore');
 SELECT * FROM agg_bad;
 ALTER FOREIGN TABLE agg_bad OPTIONS (ADD log_verbosity 'silent');
 SELECT * FROM agg_bad;
+ALTER FOREIGN TABLE agg_bad OPTIONS (ADD reject_limit '1'); -- ERROR
+SELECT * FROM agg_bad;
+ALTER FOREIGN TABLE agg_bad OPTIONS (SET reject_limit '2');
+SELECT * FROM agg_bad;
 ANALYZE agg_bad;
 
 -- misc query tests
diff --git a/doc/src/sgml/file-fdw.sgml b/doc/src/sgml/file-fdw.sgml
index bb3579b077..882d9a76d2 100644
--- a/doc/src/sgml/file-fdw.sgml
+++ b/doc/src/sgml/file-fdw.sgml
@@ -138,6 +138,18 @@
    </listitem>
   </varlistentry>
 
+  <varlistentry>
+   <term><literal>reject_limit</literal></term>
+
+   <listitem>
+    <para>
+     Specifies the maximum number of errors tolerated while converting a column's
+     input value to its data type, the same as <command>COPY</command>'s
+    <literal>REJECT_LIMIT</literal> option.
+    </para>
+   </listitem>
+  </varlistentry>
+
   <varlistentry>
    <term><literal>log_verbosity</literal></term>
 
diff --git a/src/backend/commands/copy.c b/src/backend/commands/copy.c
index 3485ba8663..2d98ecf3f4 100644
--- a/src/backend/commands/copy.c
+++ b/src/backend/commands/copy.c
@@ -420,11 +420,25 @@ defGetCopyOnErrorChoice(DefElem *def, ParseState *pstate, bool is_from)
 
 /*
  * Extract REJECT_LIMIT value from a DefElem.
+ *
+ * REJECT_LIMIT can be specified in two ways: as an int64 for the COPY command
+ * option or as a single-quoted string for the foreign table option using
+ * file_fdw. Therefore this function needs to handle both formats.
  */
 static int64
 defGetCopyRejectLimitOption(DefElem *def)
 {
-	int64		reject_limit = defGetInt64(def);
+	int64		reject_limit;
+
+	if (def->arg == NULL)
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("%s requires a numeric value",
+						def->defname)));
+	else if (nodeTag(def->arg) == T_String)
+		reject_limit = pg_strtoint64(strVal(def->arg));
+	else
+		reject_limit = defGetInt64(def);
 
 	if (reject_limit <= 0)
 		ereport(ERROR,
-- 
2.39.2

Reply via email to