From 8295efb9994e28c8b0c9b0e4992c1ed3cf891791 Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@2ndquadrant.com>
Date: Fri, 5 Jul 2019 14:26:00 +0700
Subject: [PATCH v4 3/3] Use separate start conditions for both UESCAPE and the
 following character.

This shrinks the transition array to 23696 elements and simplifies the
uescape/uescapefail rules.
---
 src/backend/parser/scan.l | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index c0aa6cd22e..1837636273 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -175,6 +175,7 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
  *  <xus> quoted string with Unicode escapes
  *  <xuend> end of a quoted string or identifier with Unicode escapes,
  *    UESCAPE can follow
+ *  <xuchar> escape character for Unicode escapes
  *  <xeu> Unicode surrogate pair in extended quoted string
  *
  * Remember to add an <<EOF>> case whenever you add a new exclusive state!
@@ -192,6 +193,7 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
 %x xui
 %x xus
 %x xuend
+%x xuchar
 %x xeu
 
 /*
@@ -295,10 +297,14 @@ xdstop			{dquote}
 xddouble		{dquote}{dquote}
 xdinside		[^"]+
 
-/* Unicode escapes */
-uescape			[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']{quote}
+/* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
+uescape			[uU][eE][sS][cC][aA][pP][eE]
 /* error rule to avoid backup */
-uescapefail		[uU][eE][sS][cC][aA][pP][eE]{whitespace}*"-"|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}[^']|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*{quote}|[uU][eE][sS][cC][aA][pP][eE]{whitespace}*|[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU]
+uescapefail		[uU][eE][sS][cC][aA][pP]|[uU][eE][sS][cC][aA]|[uU][eE][sS][cC]|[uU][eE][sS]|[uU][eE]|[uU]
+
+/* escape character */
+uescchar		{quote}[^']{quote}
+uesccharfail	{quote}[^']|{other}
 
 /* Quoted identifier with Unicode escapes */
 xuistart		[uU]&{dquote}
@@ -306,9 +312,8 @@ xuistart		[uU]&{dquote}
 /* Quoted string with Unicode escapes */
 xusstart		[uU]&{quote}
 
-/* Optional UESCAPE after a quoted string or identifier with Unicode escapes. */
-xustop1		{uescapefail}?
-xustop2		{uescape}
+/* End of string or identifier with Unicode escapes but no UESCAPE */
+xustop			{uescapefail}?
 
 /* error rule to avoid backup */
 xufailed		[uU]&
@@ -755,12 +760,12 @@ other			.
 					BEGIN(xuend);
 				}
 
-<xuend>{whitespace} {
-					/* stay in xuend state over whitespace */
+<xuend,xuchar>{whitespace} {
+					/* stay in xuend/xuchar state over whitespace */
 				}
 <xuend><<EOF>> |
 <xuend>{other} |
-<xuend>{xustop1} {
+<xuend>{xustop} {
 					/* no UESCAPE after the quote, throw back everything */
 					yyless(0);
 
@@ -788,8 +793,11 @@ other			.
 					else
 						yyerror("unhandled previous state in xuend");
 				}
-<xuend>{xustop2} {
+<xuend>{uescape} {
 					/* found UESCAPE after the end quote */
+					BEGIN(xuchar);
+				}
+<xuchar>{uescchar} {
 					if (!check_uescapechar(yytext[yyleng - 2]))
 					{
 						SET_YYLLOC();
@@ -820,8 +828,14 @@ other			.
 						return IDENT;
 					}
 					else
-						yyerror("unhandled previous state in xuend");
+						yyerror("unhandled previous state in xuchar");
+				}
+<xuchar><<EOF>> |
+<xuchar>{uesccharfail} {
+					SET_YYLLOC();
+					yyerror("missing or invalid Unicode escape character");
 				}
+
 <xd,xui>{xddouble}	{
 					addlitchar('"', yyscanner);
 				}
-- 
2.17.2 (Apple Git-113)

