Hi,

The CI patch tester fails on this patch, because it has a label
at the end of a C block, which I'm learning is a C23 feature
that happens to be supported by gcc 11 [1], but is not portable.

PFA an update fixing this, plus removing an obsolete chunk
in the COPY documentation that v2 left out.


[1] https://gcc.gnu.org/onlinedocs/gcc/Mixed-Labels-and-Declarations.html


Best regards,
-- 
Daniel Vérité
https://postgresql.verite.pro/
Twitter: @DanielVerite
From 98f38aff440ad683aa1bd7a30fd960f1d0101191 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Daniel=20V=C3=A9rit=C3=A9?= <dan...@manitou-mail.org>
Date: Sun, 31 Dec 2023 14:47:05 +0100
Subject: [PATCH v4] Support backslash-dot on a line by itself as valid data in
 COPY FROM...CSV

---
 doc/src/sgml/ref/copy.sgml           |  6 +--
 doc/src/sgml/ref/psql-ref.sgml       |  4 --
 src/backend/commands/copyfromparse.c | 57 +++++++---------------------
 src/bin/psql/copy.c                  | 32 +++++++++++-----
 src/test/regress/expected/copy.out   | 18 +++++++++
 src/test/regress/sql/copy.sql        | 12 ++++++
 6 files changed, 67 insertions(+), 62 deletions(-)

diff --git a/doc/src/sgml/ref/copy.sgml b/doc/src/sgml/ref/copy.sgml
index 18ecc69c33..e719053e3d 100644
--- a/doc/src/sgml/ref/copy.sgml
+++ b/doc/src/sgml/ref/copy.sgml
@@ -761,11 +761,7 @@ COPY <replaceable class="parameter">count</replaceable>
     format, <literal>\.</literal>, the end-of-data marker, could also appear
     as a data value.  To avoid any misinterpretation, a <literal>\.</literal>
     data value appearing as a lone entry on a line is automatically
-    quoted on output, and on input, if quoted, is not interpreted as the
-    end-of-data marker.  If you are loading a file created by another
-    application that has a single unquoted column and might have a
-    value of <literal>\.</literal>, you might need to quote that value in the
-    input file.
+    quoted on output.
    </para>
 
    <note>
diff --git a/doc/src/sgml/ref/psql-ref.sgml b/doc/src/sgml/ref/psql-ref.sgml
index cc7d797159..d94e3cacfc 100644
--- a/doc/src/sgml/ref/psql-ref.sgml
+++ b/doc/src/sgml/ref/psql-ref.sgml
@@ -1119,10 +1119,6 @@ INSERT INTO tbl1 VALUES ($1, $2) \bind 'first value' 
'second value' \g
         destination, because all data must pass through the client/server
         connection.  For large amounts of data the <acronym>SQL</acronym>
         command might be preferable.
-        Also, because of this pass-through method, <literal>\copy
-        ... from</literal> in <acronym>CSV</acronym> mode will erroneously
-        treat a <literal>\.</literal> data value alone on a line as an
-        end-of-input marker.
         </para>
         </tip>
 
diff --git a/src/backend/commands/copyfromparse.c 
b/src/backend/commands/copyfromparse.c
index f553734582..67e046d450 100644
--- a/src/backend/commands/copyfromparse.c
+++ b/src/backend/commands/copyfromparse.c
@@ -136,14 +136,6 @@ if (1) \
        } \
 } else ((void) 0)
 
-/* Undo any read-ahead and jump out of the block. */
-#define NO_END_OF_COPY_GOTO \
-if (1) \
-{ \
-       input_buf_ptr = prev_raw_ptr + 1; \
-       goto not_end_of_copy; \
-} else ((void) 0)
-
 /* NOTE: there's a copy of this in copyto.c */
 static const char BinarySignature[11] = "PGCOPY\n\377\r\n\0";
 
@@ -1137,7 +1129,6 @@ CopyReadLineText(CopyFromState cstate)
        bool            result = false;
 
        /* CSV variables */
-       bool            first_char_in_line = true;
        bool            in_quote = false,
                                last_was_esc = false;
        char            quotec = '\0';
@@ -1335,10 +1326,9 @@ CopyReadLineText(CopyFromState cstate)
                }
 
                /*
-                * In CSV mode, we only recognize \. alone on a line.  This is 
because
-                * \. is a valid CSV data value.
+                * In CSV mode, backslash is a normal character.
                 */
-               if (c == '\\' && (!cstate->opts.csv_mode || first_char_in_line))
+               if (c == '\\' && !cstate->opts.csv_mode)
                {
                        char            c2;
 
@@ -1371,21 +1361,15 @@ CopyReadLineText(CopyFromState cstate)
 
                                        if (c2 == '\n')
                                        {
-                                               if (!cstate->opts.csv_mode)
-                                                       ereport(ERROR,
-                                                                       
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-                                                                        
errmsg("end-of-copy marker does not match previous newline style")));
-                                               else
-                                                       NO_END_OF_COPY_GOTO;
+                                               ereport(ERROR,
+                                                               
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+                                                                
errmsg("end-of-copy marker does not match previous newline style")));
                                        }
                                        else if (c2 != '\r')
                                        {
-                                               if (!cstate->opts.csv_mode)
-                                                       ereport(ERROR,
-                                                                       
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-                                                                        
errmsg("end-of-copy marker corrupt")));
-                                               else
-                                                       NO_END_OF_COPY_GOTO;
+                                               ereport(ERROR,
+                                                               
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+                                                                
errmsg("end-of-copy marker corrupt")));
                                        }
                                }
 
@@ -1396,12 +1380,9 @@ CopyReadLineText(CopyFromState cstate)
 
                                if (c2 != '\r' && c2 != '\n')
                                {
-                                       if (!cstate->opts.csv_mode)
-                                               ereport(ERROR,
-                                                               
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
-                                                                
errmsg("end-of-copy marker corrupt")));
-                                       else
-                                               NO_END_OF_COPY_GOTO;
+                                       ereport(ERROR,
+                                                       
(errcode(ERRCODE_BAD_COPY_FILE_FORMAT),
+                                                        errmsg("end-of-copy 
marker corrupt")));
                                }
 
                                if ((cstate->eol_type == EOL_NL && c2 != '\n') 
||
@@ -1425,7 +1406,7 @@ CopyReadLineText(CopyFromState cstate)
                                result = true;  /* report EOF */
                                break;
                        }
-                       else if (!cstate->opts.csv_mode)
+                       else
                        {
                                /*
                                 * If we are here, it means we found a 
backslash followed by
@@ -1433,23 +1414,11 @@ CopyReadLineText(CopyFromState cstate)
                                 * after a backslash is special, so we skip 
over that second
                                 * character too.  If we didn't do that \\. 
would be
                                 * considered an eof-of copy, while in non-CSV 
mode it is a
-                                * literal backslash followed by a period.  In 
CSV mode,
-                                * backslashes are not special, so we want to 
process the
-                                * character after the backslash just like a 
normal character,
-                                * so we don't increment in those cases.
+                                * literal backslash followed by a period.
                                 */
                                input_buf_ptr++;
                        }
                }
-
-               /*
-                * This label is for CSV cases where \. appears at the start of 
a
-                * line, but there is more text after it, meaning it was a data 
value.
-                * We are more strict for \. in CSV mode because \. could be a 
data
-                * value, while in non-CSV mode, \. cannot be a data value.
-                */
-not_end_of_copy:
-               first_char_in_line = false;
        }                                                       /* end of outer 
loop */
 
        /*
diff --git a/src/bin/psql/copy.c b/src/bin/psql/copy.c
index dbbbdb8898..f31a7acbb6 100644
--- a/src/bin/psql/copy.c
+++ b/src/bin/psql/copy.c
@@ -620,20 +620,34 @@ handleCopyIn(PGconn *conn, FILE *copystream, bool 
isbinary, PGresult **res)
                                /* current line is done? */
                                if (buf[buflen - 1] == '\n')
                                {
-                                       /* check for EOF marker, but not on a 
partial line */
-                                       if (at_line_begin)
+                                       /*
+                                        * When at the beginning of the line, 
check for EOF marker.
+                                        * If the marker is found and the data 
is inlined,
+                                        * we must stop at this point.  If not, 
the \. line can be
+                                        * sent to the server, and we let it 
decide whether
+                                        * it's an EOF or not depending on the 
format: in
+                                        * basic TEXT, \. is going to be 
interpreted as an EOF, in
+                                        * CSV, it will not.
+                                        */
+                                       if (at_line_begin && copystream == 
pset.cur_cmd_source)
                                        {
-                                               /*
-                                                * This code erroneously 
assumes '\.' on a line alone
-                                                * inside a quoted CSV string 
terminates the \copy.
-                                                * 
https://www.postgresql.org/message-id/e1tdnvq-0001ju...@wrigleys.postgresql.org
-                                                *
-                                                * 
https://www.postgresql.org/message-id/bfcd57e4-8f23-4c3e-a5db-2571d0920...@beta.fastmail.com
-                                                */
                                                if ((linelen == 3 && 
memcmp(fgresult, "\\.\n", 3) == 0) ||
                                                        (linelen == 4 && 
memcmp(fgresult, "\\.\r\n", 4) == 0))
                                                {
                                                        copydone = true;
+                                                       /*
+                                                        * Remove the EOF 
marker from the data sent, unless
+                                                        * using the obsolete 
v2 protocol that needs it.
+                                                        * In the case of CSV, 
the EOF marker must be
+                                                        * removed, otherwise 
it would be interpreted by
+                                                        * the server as valid 
data.
+                                                        */
+                                                       if (copystream == 
pset.cur_cmd_source &&
+                                                               
PQprotocolVersion(conn) >= 3)
+                                                       {
+                                                               *fgresult='\0';
+                                                               buflen -= 
linelen;
+                                                       }
                                                }
                                        }
 
diff --git a/src/test/regress/expected/copy.out 
b/src/test/regress/expected/copy.out
index b48365ec98..645e849bbd 100644
--- a/src/test/regress/expected/copy.out
+++ b/src/test/regress/expected/copy.out
@@ -32,6 +32,24 @@ select * from copytest except select * from copytest2;
 -------+------+--------
 (0 rows)
 
+--- test unquoted \. as data inside CSV
+-- do not use copy out to export the data, as it would quote \.
+\o :filename
+\qecho line1
+\qecho '\\.'
+\qecho line2
+\o
+-- get the data back in with copy
+truncate copytest2;
+copy copytest2(test) from :'filename' csv;
+select test from copytest2 order by test collate "C";
+ test  
+-------
+ \.
+ line1
+ line2
+(3 rows)
+
 -- test header line feature
 create temp table copytest3 (
        c1 int,
diff --git a/src/test/regress/sql/copy.sql b/src/test/regress/sql/copy.sql
index 43d2e906dd..68a58740cb 100644
--- a/src/test/regress/sql/copy.sql
+++ b/src/test/regress/sql/copy.sql
@@ -38,6 +38,18 @@ copy copytest2 from :'filename' csv quote '''' escape E'\\';
 
 select * from copytest except select * from copytest2;
 
+--- test unquoted \. as data inside CSV
+-- do not use copy out to export the data, as it would quote \.
+\o :filename
+\qecho line1
+\qecho '\\.'
+\qecho line2
+\o
+-- get the data back in with copy
+truncate copytest2;
+copy copytest2(test) from :'filename' csv;
+select test from copytest2 order by test collate "C";
+
 
 -- test header line feature
 
-- 
2.34.1

Reply via email to