Hi,

PFA a patch that attempts to fix the bug that \. on a line
by itself is handled incorrectly by COPY FROM ... CSV.
This issue has been discussed several times previously,
for instance in [1] and [2], and mentioned in the
doc for \copy in commit 42d3125.

There's one case that works today: when
the line is part of a multi-line quoted section,
and the data is read from a file, not from the client.
In other situations, an error is raised or the data is cut at
the point of \. without an error.

The patch addresses that issue in the server and in psql,
except for the case of inlined data, where \. cannot be 
both valid data and an EOF marker at the same time, so
it keeps treating it as an EOF marker. 


[1]
https://www.postgresql.org/message-id/10e3eff6-eb04-4b3f-aeb4-b920192b9...@manitou-mail.org
[2]
https://www.postgresql.org/message-id/8aeab305-5e94-4fa5-82bf-6da6baee6e05%40app.fastmail.com


Best regards,
-- 
Daniel Vérité
https://postgresql.verite.pro/
Twitter: @DanielVerite
From 868b1e065cf714f315bab65129fd05a1d60fc81b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20V=C3=A9rit=C3=A9?= <dan...@manitou-mail.org>
Date: Mon, 18 Dec 2023 20:47:02 +0100
Subject: [PATCH v1] Support backslash-dot on a line by itself as valid data in
 COPY FROM...CSV

---
 doc/src/sgml/ref/psql-ref.sgml       |  4 ----
 src/backend/commands/copyfromparse.c | 13 ++---------
 src/bin/psql/copy.c                  | 32 ++++++++++++++++++++--------
 src/test/regress/expected/copy.out   | 15 +++++++++++++
 src/test/regress/sql/copy.sql        | 11 ++++++++++
 5 files changed, 51 insertions(+), 24 deletions(-)

diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index cc7d797159..d94e3cacfc 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -1119,10 +1119,6 @@ INSERT INTO tbl1 VALUES ($1, $2) \bind 'first value' 
'second value' \g
         destination, because all data must pass through the client/server
         connection.  For large amounts of data the <acronym>SQL</acronym>
         command might be preferable.
-        Also, because of this pass-through method, <literal>\copy
-        ... from</literal> in <acronym>CSV</acronym> mode will erroneously
-        treat a <literal>\.</literal> data value alone on a line as an
-        end-of-input marker.
         </para>
         </tip>
 
diff --git a/src/backend/commands/copyfromparse.c 
b/src/backend/commands/copyfromparse.c
index f553734582..b4c291b25b 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -1137,7 +1137,6 @@ CopyReadLineText(CopyFromState cstate)
        bool            result = false;
 
        /* CSV variables */
-       bool            first_char_in_line = true;
        bool            in_quote = false,
                                last_was_esc = false;
        char            quotec = '\0';
@@ -1335,10 +1334,9 @@ CopyReadLineText(CopyFromState cstate)
                }
 
                /*
-                * In CSV mode, we only recognize \. alone on a line.  This is 
because
-                * \. is a valid CSV data value.
+                * In CSV mode, \. is a valid CSV data value anywhere in the 
line.
                 */
-               if (c == '\\' && (!cstate->opts.csv_mode || first_char_in_line))
+               if (c == '\\' && !cstate->opts.csv_mode)
                {
                        char            c2;
 
@@ -1442,14 +1440,7 @@ CopyReadLineText(CopyFromState cstate)
                        }
                }
 
-               /*
-                * This label is for CSV cases where \. appears at the start of 
a
-                * line, but there is more text after it, meaning it was a data 
value.
-                * We are more strict for \. in CSV mode because \. could be a 
data
-                * value, while in non-CSV mode, \. cannot be a data value.
-                */
 not_end_of_copy:
-               first_char_in_line = false;
        }                                                       /* end of outer 
loop */
 
        /*
diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c
index dbbbdb8898..f31a7acbb6 100644
--- a/src/bin/psql/copy.c
+++ b/src/bin/psql/copy.c
@@ -620,20 +620,34 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool 
isbinary, PGresult **res)
                                /* current line is done? */
                                if (buf[buflen - 1] == '\n')
                                {
-                                       /* check for EOF marker, but not on a 
partial line */
-                                       if (at_line_begin)
+                                       /*
+                                        * When at the beginning of the line, 
check for EOF marker.
+                                        * If the marker is found and the data 
is inlined,
+                                        * we must stop at this point.  If not, 
the \. line can be
+                                        * sent to the server, and we let it 
decide whether
+                                        * it's an EOF or not depending on the 
format: in
+                                        * basic TEXT, \. is going to be 
interpreted as an EOF, in
+                                        * CSV, it will not.
+                                        */
+                                       if (at_line_begin && copystream == 
pset.cur_cmd_source)
                                        {
-                                               /*
-                                                * This code erroneously 
assumes '\.' on a line alone
-                                                * inside a quoted CSV string 
terminates the \copy.
-                                                * 
https://www.postgresql.org/message-id/e1tdnvq-0001ju...@wrigleys.postgresql.org
-                                                *
-                                                * 
https://www.postgresql.org/message-id/bfcd57e4-8f23-4c3e-a5db-2571d0920...@beta.fastmail.com
-                                                */
                                                if ((linelen == 3 && 
memcmp(fgresult, "\\.\n", 3) == 0) ||
                                                        (linelen == 4 && 
memcmp(fgresult, "\\.\r\n", 4) == 0))
                                                {
                                                        copydone = true;
+                                                       /*
+                                                        * Remove the EOF 
marker from the data sent, unless
+                                                        * using the obsolete 
v2 protocol that needs it.
+                                                        * In the case of CSV, 
the EOF marker must be
+                                                        * removed, otherwise 
it would be interpreted by
+                                                        * the server as valid 
data.
+                                                        */
+                                                       if (copystream == 
pset.cur_cmd_source &&
+                                                               
PQprotocolVersion(conn) >= 3)
+                                                       {
+                                                               *fgresult='\0';
+                                                               buflen -= 
linelen;
+                                                       }
                                                }
                                        }
 
diff --git a/src/test/regress/expected/copy.out 
b/src/test/regress/expected/copy.out
index b48365ec98..fc1e6cfd7b 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -32,6 +32,21 @@ select * from copytest except select * from copytest2;
 -------+------+--------
 (0 rows)
 
+--- test unquoted .\ as data inside CSV
+truncate copytest2;
+insert into copytest2(test) values('line1'), ('\.'), ('line2');
+copy (select test from copytest2 order by test collate "C") to :'filename' csv;
+-- get the data back in with copy
+truncate copytest2;
+copy copytest2(test) from :'filename' csv;
+select test from copytest2 order by test collate "C";
+ test  
+-------
+ \.
+ line1
+ line2
+(3 rows)
+
 -- test header line feature
 create temp table copytest3 (
        c1 int,
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index 43d2e906dd..24ecb0485a 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -38,6 +38,17 @@ copy copytest2 from :'filename' csv quote '''' escape E'\\';
 
 select * from copytest except select * from copytest2;
 
+--- test unquoted .\ as data inside CSV
+
+truncate copytest2;
+
+insert into copytest2(test) values('line1'), ('\.'), ('line2');
+copy (select test from copytest2 order by test collate "C") to :'filename' csv;
+-- get the data back in with copy
+truncate copytest2;
+copy copytest2(test) from :'filename' csv;
+select test from copytest2 order by test collate "C";
+
 
 -- test header line feature
 
-- 
2.34.1

Reply via email to