From 9a5dfd7172aaf588612fe820f26e3134270a6eec Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@2ndquadrant.com>
Date: Fri, 5 Jul 2019 14:22:42 +0700
Subject: [PATCH v4 2/3] Unify xuiend and xusend into a single start condition

Whether scanning a string or an identifier with unicode escapes, we
enter a single state to look for a possible UESCAPE. This shrinks
the transition array to 26074.
---
 src/backend/parser/scan.l | 127 +++++++++++++++++++-------------------
 1 file changed, 63 insertions(+), 64 deletions(-)

diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index cbf3f6deca..c0aa6cd22e 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -172,9 +172,9 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
  *  <xe> extended quoted strings (support backslash escape sequences)
  *  <xdolq> $foo$ quoted strings
  *  <xui> quoted identifier with Unicode escapes
- *  <xuiend> end of a quoted identifier with Unicode escapes, UESCAPE can follow
  *  <xus> quoted string with Unicode escapes
- *  <xusend> end of a quoted string with Unicode escapes, UESCAPE can follow
+ *  <xuend> end of a quoted string or identifier with Unicode escapes,
+ *    UESCAPE can follow
  *  <xeu> Unicode surrogate pair in extended quoted string
  *
  * Remember to add an <<EOF>> case whenever you add a new exclusive state!
@@ -190,9 +190,8 @@ extern void core_yyset_column(int column_no, yyscan_t yyscanner);
 %x xe
 %x xdolq
 %x xui
-%x xuiend
 %x xus
-%x xusend
+%x xuend
 %x xeu
 
 /*
@@ -591,39 +590,14 @@ other			.
 							yylval->str = litbufdup(yyscanner);
 							return SCONST;
 						case xus:
-							/* xusend state looks for possible UESCAPE */
-							BEGIN(xusend);
+							/* xuend state looks for possible UESCAPE */
+							BEGIN(xuend);
 							break;
 						default:
 							yyerror("unhandled previous state after endquote");
 					}
 				}
 
-<xusend>{whitespace} {
-					/* stay in xusend state over whitespace */
-				}
-<xusend><<EOF>> |
-<xusend>{other} |
-<xusend>{xustop1} {
-					/* no UESCAPE after the quote, throw back everything */
-					yyless(0);
-					BEGIN(INITIAL);
-					yylval->str = litbuf_udeescape('\\', yyscanner);
-					return SCONST;
-				}
-<xusend>{xustop2} {
-					/* found UESCAPE after the end quote */
-					BEGIN(INITIAL);
-					if (!check_uescapechar(yytext[yyleng - 2]))
-					{
-						SET_YYLLOC();
-						ADVANCE_YYLLOC(yyleng - 2);
-						yyerror("invalid Unicode escape character");
-					}
-					yylval->str = litbuf_udeescape(yytext[yyleng - 2],
-												   yyscanner);
-					return SCONST;
-				}
 <xq,xe,xus>{xqdouble} {
 					addlitchar('\'', yyscanner);
 				}
@@ -776,52 +750,77 @@ other			.
 					return IDENT;
 				}
 <xui>{dquote} {
-					yyless(1);
-					/* xuiend state looks for possible UESCAPE */
-					BEGIN(xuiend);
+					/* xuend state looks for possible UESCAPE */
+					yyextra->state_before_quote_stop = YYSTATE;
+					BEGIN(xuend);
 				}
-<xuiend>{whitespace} {
-					/* stay in xuiend state over whitespace */
+
+<xuend>{whitespace} {
+					/* stay in xuend state over whitespace */
 				}
-<xuiend><<EOF>> |
-<xuiend>{other} |
-<xuiend>{xustop1} {
+<xuend><<EOF>> |
+<xuend>{other} |
+<xuend>{xustop1} {
 					/* no UESCAPE after the quote, throw back everything */
-					char	   *ident;
-					int			identlen;
-
 					yyless(0);
 
-					BEGIN(INITIAL);
-					if (yyextra->literallen == 0)
-						yyerror("zero-length delimited identifier");
-					ident = litbuf_udeescape('\\', yyscanner);
-					identlen = strlen(ident);
-					if (identlen >= NAMEDATALEN)
-						truncate_identifier(ident, identlen, true);
-					yylval->str = ident;
-					return IDENT;
+					if (yyextra->state_before_quote_stop == xus)
+					{
+						BEGIN(INITIAL);
+						yylval->str = litbuf_udeescape('\\', yyscanner);
+						return SCONST;
+					}
+					else if (yyextra->state_before_quote_stop == xui)
+					{
+						char	   *ident;
+						int			identlen;
+
+						BEGIN(INITIAL);
+						if (yyextra->literallen == 0)
+							yyerror("zero-length delimited identifier");
+						ident = litbuf_udeescape('\\', yyscanner);
+						identlen = strlen(ident);
+						if (identlen >= NAMEDATALEN)
+							truncate_identifier(ident, identlen, true);
+						yylval->str = ident;
+						return IDENT;
+					}
+					else
+						yyerror("unhandled previous state in xuend");
 				}
-<xuiend>{xustop2}	{
+<xuend>{xustop2} {
 					/* found UESCAPE after the end quote */
-					char	   *ident;
-					int			identlen;
-
-					BEGIN(INITIAL);
-					if (yyextra->literallen == 0)
-						yyerror("zero-length delimited identifier");
 					if (!check_uescapechar(yytext[yyleng - 2]))
 					{
 						SET_YYLLOC();
 						ADVANCE_YYLLOC(yyleng - 2);
 						yyerror("invalid Unicode escape character");
 					}
-					ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner);
-					identlen = strlen(ident);
-					if (identlen >= NAMEDATALEN)
-						truncate_identifier(ident, identlen, true);
-					yylval->str = ident;
-					return IDENT;
+
+					if (yyextra->state_before_quote_stop == xus)
+					{
+						BEGIN(INITIAL);
+						yylval->str = litbuf_udeescape(yytext[yyleng - 2],
+													   yyscanner);
+						return SCONST;
+					}
+					else if (yyextra->state_before_quote_stop == xui)
+					{
+						char	   *ident;
+						int			identlen;
+
+						BEGIN(INITIAL);
+						if (yyextra->literallen == 0)
+							yyerror("zero-length delimited identifier");
+						ident = litbuf_udeescape(yytext[yyleng - 2], yyscanner);
+						identlen = strlen(ident);
+						if (identlen >= NAMEDATALEN)
+							truncate_identifier(ident, identlen, true);
+						yylval->str = ident;
+						return IDENT;
+					}
+					else
+						yyerror("unhandled previous state in xuend");
 				}
 <xd,xui>{xddouble}	{
 					addlitchar('"', yyscanner);
-- 
2.17.2 (Apple Git-113)

