On 10.04.21 03:38, Laurenz Albe wrote:
On Fri, 2021-04-09 at 19:44 +0200, I wrote:
SQL-standard function body

psql needs some new intelligence to keep track of function body
boundaries so that it doesn't send off statements when it sees
semicolons that are inside a function body.

This causes psql to fail to recognize the semicolon in the following
statement as the end of a statement:

   IMPORT FOREIGN SCHEMA x FROM SERVER y INTO z OPTIONS (case 'lower');

The cause is the "case", which is recognized as the start of a function
body.

Would it be an option to recognize BEGIN and CASE as starting a
function body only if they are *not* inside parentheses, like in
the attached?

Here is an improved patch, which treats END in the same fashion
(not properly indented for readability).

Thanks, I took another look at this and augmented your change with a change that tracks whether the statement starts with CREATE [OR REPLACE] {FUNCTION|PROCEDURE}. That should make it pretty safe. What do you think?
From 26251be9be89583278573ed28540e5f16f2616fb Mon Sep 17 00:00:00 2001
From: Peter Eisentraut <[email protected]>
Date: Mon, 12 Apr 2021 17:19:59 +0200
Subject: [PATCH] psql: Refine lexing of BEGIN...END blocks in CREATE FUNCTION
 statements

Only track BEGIN...END blocks if they are in a CREATE [OR REPLACE]
{FUNCTION|PROCEDURE} statement.  Ignore if in parentheses.

Reported-by: Laurenz Albe <[email protected]>
---
 src/fe_utils/psqlscan.l             | 53 ++++++++++++++++++++++++-----
 src/include/fe_utils/psqlscan_int.h |  8 ++++-
 2 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index 4ec57e96a9..991b7de0b5 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -870,18 +870,55 @@ other                     .
 
 
 {identifier}   {
-                                       cur_state->identifier_count++;
-                                       if (pg_strcasecmp(yytext, "begin") == 0
-                                               || pg_strcasecmp(yytext, 
"case") == 0)
+                                       /*
+                                        * We need to track if we are inside a 
BEGIN .. END block
+                                        * in a function definition, so that 
semicolons contained
+                                        * therein don't terminate the whole 
statement.  Short of
+                                        * writing a full parser here, the 
following heuristic
+                                        * should work.  First, we track 
whether the beginning of
+                                        * the statement matches CREATE [OR 
REPLACE]
+                                        * {FUNCTION|PROCEDURE}
+                                        */
+
+                                       if (cur_state->identifier_count == 0)
+                                               memset(cur_state->identifiers, 
0, sizeof(cur_state->identifiers));
+
+                                       if (pg_strcasecmp(yytext, "create") == 
0 ||
+                                               pg_strcasecmp(yytext, 
"function") == 0 ||
+                                               pg_strcasecmp(yytext, 
"procedure") == 0 ||
+                                               pg_strcasecmp(yytext, "or") == 
0 ||
+                                               pg_strcasecmp(yytext, 
"replace") == 0)
                                        {
-                                               if (cur_state->identifier_count 
> 1)
-                                                       
cur_state->begin_depth++;
+                                               if (cur_state->identifier_count 
< sizeof(cur_state->identifiers))
+                                                       
cur_state->identifiers[cur_state->identifier_count] = pg_tolower((unsigned 
char) yytext[0]);
                                        }
-                                       else if (pg_strcasecmp(yytext, "end") 
== 0)
+
+                                       cur_state->identifier_count++;
+
+                                       if (cur_state->identifiers[0] == 'c' &&
+                                               (cur_state->identifiers[1] == 
'f' || cur_state->identifiers[1] == 'p' ||
+                                                (cur_state->identifiers[1] == 
'o' && cur_state->identifiers[2] == 'r' &&
+                                                 (cur_state->identifiers[3] == 
'f' || cur_state->identifiers[3] == 'p'))) &&
+                                               cur_state->paren_depth == 0)
                                        {
-                                               if (cur_state->begin_depth > 0)
-                                                       
cur_state->begin_depth--;
+                                               if (pg_strcasecmp(yytext, 
"begin") == 0)
+                                                       
cur_state->begin_depth++;
+                                               else if (pg_strcasecmp(yytext, 
"case") == 0)
+                                               {
+                                                       /*
+                                                        * CASE also ends with 
END.  We only need to track
+                                                        * this if we are 
already inside a BEGIN.
+                                                        */
+                                                       if 
(cur_state->begin_depth >= 1)
+                                                               
cur_state->begin_depth++;
+                                               }
+                                               else if (pg_strcasecmp(yytext, 
"end") == 0)
+                                               {
+                                                       if 
(cur_state->begin_depth > 0)
+                                                               
cur_state->begin_depth--;
+                                               }
                                        }
+
                                        ECHO;
                                }
 
diff --git a/src/include/fe_utils/psqlscan_int.h 
b/src/include/fe_utils/psqlscan_int.h
index 91d7d4d5c6..434e554961 100644
--- a/src/include/fe_utils/psqlscan_int.h
+++ b/src/include/fe_utils/psqlscan_int.h
@@ -114,8 +114,14 @@ typedef struct PsqlScanStateData
        int                     paren_depth;    /* depth of nesting in 
parentheses */
        int                     xcdepth;                /* depth of nesting in 
slash-star comments */
        char       *dolqstart;          /* current $foo$ quote start string */
+
+       /*
+        * State to track boundaries of BEGIN ... END blocks in function
+        * definitions, so that semicolons do not send query too early.
+        */
        int                     identifier_count;       /* identifiers since 
start of statement */
-       int                     begin_depth;    /* depth of begin/end routine 
body blocks */
+       char            identifiers[4]; /* records the first few identifiers */
+       int                     begin_depth;    /* depth of begin/end pairs */
 
        /*
         * Callback functions provided by the program making use of the lexer,
-- 
2.31.1

Reply via email to