Patches 0001 - 0006 are same as the previous set.
0007 - fixes all the problems you reported till now and also the one I
found. The commit message describes the fixes in detail.

Hi,

I found a potential bug in the parsing of the left and right arrows. They can be broken up in - > and < - respectively. Does the SQL/PGQ standard really allow this?

I found this while working on a patch of our own and I was trying to figure out how you guys had solved this very same problem that we ran into, and if you had done so in a better way. The fundamental problem is that parsing the left arrow as one token is a bit tricky due to how PostgreSQL treats operators ending with minus or plus.

I have attached our very ugly solution for it (broken out from our patch) in case it helps you. Feel free to use it or ignore it. We do not plan to work on this right now since you are already working on the same problem.

I especially dislike the static variable in our patch. And as far as I understand it you can avoid the static by changing the lexer to use the push parser so it can emit multiple terminal tokens from one parsed token, but I have not looked into push parsers and have no idea how this would affect performance.

https://www.gnu.org/software/bison/manual/html_node/Push-Decl.html

Examples:

# SELECT count(*) FROM GRAPH_TABLE (g1 MATCH ()-[]->() COLUMNS (1 as one));
 count
-------
    32
(1 row)

# SELECT count(*) FROM GRAPH_TABLE (g1 MATCH ()-[]- >() COLUMNS (1 as one));
 count
-------
    32
(1 row)

# SELECT * FROM GRAPH_TABLE (myshop MATCH (o IS orders)<-[IS customer_orders]-(c IS customers) COLUMNS (c.name, o.ordered_when));
   name    | ordered_when
-----------+--------------
 customer1 | 2024-01-01
 customer2 | 2024-01-02
(2 rows)

# SELECT * FROM GRAPH_TABLE (myshop MATCH (o IS orders)< -[IS customer_orders]-(c IS customers) COLUMNS (c.name, o.ordered_when));
   name    | ordered_when
-----------+--------------
 customer1 | 2024-01-01
 customer2 | 2024-01-02
(2 rows)

Andreas
From 07c3e1908de413db9fa383165eac78df0a80ab50 Mon Sep 17 00:00:00 2001
From: Andreas Karlsson <andr...@proxel.se>
Date: Tue, 29 Oct 2024 20:23:24 +0100
Subject: [PATCH] Broken out tokeniziation of arrows

---
 src/backend/parser/gram.y         | 20 ++++++++++++++---
 src/backend/parser/scan.l         | 37 +++++++++++++++++++++++++++++++
 src/fe_utils/psqlscan.l           |  5 +++++
 src/interfaces/ecpg/preproc/pgc.l | 34 ++++++++++++++++++++++++++++
 src/pl/plpgsql/src/pl_gram.y      |  1 +
 5 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y
index dd458182f02..8f07a1c8c0c 100644
--- a/src/backend/parser/gram.y
+++ b/src/backend/parser/gram.y
@@ -677,6 +677,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %token <ival>	ICONST PARAM
 %token			TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
 %token			LESS_EQUALS GREATER_EQUALS NOT_EQUALS
+%token			LEFT_ARROW_LESS LEFT_ARROW_MINUS RIGHT_ARROW
 
 /*
  * If you want to make any keyword changes, update the keyword table in
@@ -817,7 +818,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %left		AND
 %right		NOT
 %nonassoc	IS ISNULL NOTNULL	/* IS sets precedence for IS NULL, etc */
-%nonassoc	'<' '>' '=' LESS_EQUALS GREATER_EQUALS NOT_EQUALS
+%nonassoc	'<' LEFT_ARROW_LESS '>' '=' LESS_EQUALS GREATER_EQUALS NOT_EQUALS
 %nonassoc	BETWEEN IN_P LIKE ILIKE SIMILAR NOT_LA
 %nonassoc	ESCAPE			/* ESCAPE must be just above LIKE/ILIKE/SIMILAR */
 
@@ -870,8 +871,8 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
 %nonassoc	UNBOUNDED NESTED /* ideally would have same precedence as IDENT */
 %nonassoc	IDENT PARTITION RANGE ROWS GROUPS PRECEDING FOLLOWING CUBE ROLLUP
 			SET KEYS OBJECT_P SCALAR VALUE_P WITH WITHOUT PATH
-%left		Op OPERATOR		/* multi-character ops and user-defined operators */
-%left		'+' '-'
+%left		Op OPERATOR RIGHT_ARROW	/* multi-character ops and user-defined operators */
+%left		'+' '-' LEFT_ARROW_MINUS
 %left		'*' '/' '%'
 %left		'^'
 /* Unary Operators */
@@ -14842,6 +14843,8 @@ a_expr:		c_expr									{ $$ = $1; }
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", NULL, $2, @1); }
 			| '-' a_expr					%prec UMINUS
 				{ $$ = doNegate($2, @1); }
+			| LEFT_ARROW_MINUS a_expr		%prec UMINUS
+				{ $$ = doNegate($2, @1); }
 			| a_expr '+' a_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "+", $1, $3, @2); }
 			| a_expr '-' a_expr
@@ -14856,6 +14859,8 @@ a_expr:		c_expr									{ $$ = $1; }
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "^", $1, $3, @2); }
 			| a_expr '<' a_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
+			| a_expr LEFT_ARROW_LESS a_expr
+				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
 			| a_expr '>' a_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $3, @2); }
 			| a_expr '=' a_expr
@@ -14866,6 +14871,8 @@ a_expr:		c_expr									{ $$ = $1; }
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $3, @2); }
 			| a_expr NOT_EQUALS a_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<>", $1, $3, @2); }
+			| a_expr RIGHT_ARROW a_expr
+				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "->", $1, $3, @2); }
 
 			| a_expr qual_Op a_expr				%prec Op
 				{ $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, $3, @2); }
@@ -15335,6 +15342,8 @@ b_expr:		c_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "^", $1, $3, @2); }
 			| b_expr '<' b_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
+			| b_expr LEFT_ARROW_LESS b_expr
+				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<", $1, $3, @2); }
 			| b_expr '>' b_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">", $1, $3, @2); }
 			| b_expr '=' b_expr
@@ -15345,6 +15354,8 @@ b_expr:		c_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, ">=", $1, $3, @2); }
 			| b_expr NOT_EQUALS b_expr
 				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "<>", $1, $3, @2); }
+			| b_expr RIGHT_ARROW b_expr
+				{ $$ = (Node *) makeSimpleA_Expr(AEXPR_OP, "->", $1, $3, @2); }
 			| b_expr qual_Op b_expr				%prec Op
 				{ $$ = (Node *) makeA_Expr(AEXPR_OP, $2, $1, $3, @2); }
 			| qual_Op b_expr					%prec Op
@@ -16503,16 +16514,19 @@ all_Op:		Op										{ $$ = $1; }
 
 MathOp:		 '+'									{ $$ = "+"; }
 			| '-'									{ $$ = "-"; }
+			| LEFT_ARROW_MINUS						{ $$ = "-"; }
 			| '*'									{ $$ = "*"; }
 			| '/'									{ $$ = "/"; }
 			| '%'									{ $$ = "%"; }
 			| '^'									{ $$ = "^"; }
 			| '<'									{ $$ = "<"; }
+			| LEFT_ARROW_LESS						{ $$ = "<"; }
 			| '>'									{ $$ = ">"; }
 			| '='									{ $$ = "="; }
 			| LESS_EQUALS							{ $$ = "<="; }
 			| GREATER_EQUALS						{ $$ = ">="; }
 			| NOT_EQUALS							{ $$ = "<>"; }
+			| RIGHT_ARROW							{ $$ = "->"; }
 		;
 
 qual_Op:	Op
diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l
index 994ed9995ac..d7ed04011b0 100644
--- a/src/backend/parser/scan.l
+++ b/src/backend/parser/scan.l
@@ -366,6 +366,7 @@ less_equals		"<="
 greater_equals	">="
 less_greater	"<>"
 not_equals		"!="
+right_arrow		"->"
 
 /*
  * "self" is the set of chars that should be returned as single-character
@@ -454,6 +455,10 @@ other			.
 
 %%
 
+%{
+		static bool inleftarrow = false;
+%}
+
 {whitespace}	{
 					/* ignore */
 				}
@@ -892,8 +897,18 @@ other			.
 					return NOT_EQUALS;
 				}
 
+{right_arrow}	{
+					SET_YYLLOC();
+					return RIGHT_ARROW;
+				}
+
 {self}			{
 					SET_YYLLOC();
+					if (yytext[0] == '-' && inleftarrow)
+					{
+						inleftarrow = false;
+						return LEFT_ARROW_MINUS;
+					}
 					return yytext[0];
 				}
 
@@ -919,6 +934,26 @@ other			.
 					if (slashstar)
 						nchars = slashstar - yytext;
 
+					if (nchars == 2 && yytext[0] == '<' && yytext[1] == '-')
+					{
+						/* Strip the unwanted chars from the token */
+						yyless(1);
+
+						inleftarrow = true;
+
+						return LEFT_ARROW_LESS;
+					}
+
+					if (nchars == 1 && yytext[0] == '-' && inleftarrow)
+					{
+						/* Strip the unwanted chars from the token */
+						if (nchars < yyleng)
+							yyless(nchars);
+
+						inleftarrow = false;
+						return LEFT_ARROW_MINUS;
+					}
+
 					/*
 					 * For SQL compatibility, '+' and '-' cannot be the
 					 * last char of a multi-char operator unless the operator
@@ -989,6 +1024,8 @@ other			.
 								return NOT_EQUALS;
 							if (yytext[0] == '!' && yytext[1] == '=')
 								return NOT_EQUALS;
+							if (yytext[0] == '-' && yytext[1] == '>')
+								return RIGHT_ARROW;
 						}
 					}
 
diff --git a/src/fe_utils/psqlscan.l b/src/fe_utils/psqlscan.l
index 8e8b049e15f..6f8fd7cd258 100644
--- a/src/fe_utils/psqlscan.l
+++ b/src/fe_utils/psqlscan.l
@@ -302,6 +302,7 @@ less_equals		"<="
 greater_equals	">="
 less_greater	"<>"
 not_equals		"!="
+right_arrow     "->"
 
 /*
  * "self" is the set of chars that should be returned as single-character
@@ -661,6 +662,10 @@ other			.
 					ECHO;
 				}
 
+{right_arrow}	{
+					ECHO;
+				}
+
 	/*
 	 * These rules are specific to psql --- they implement parenthesis
 	 * counting and detection of command-ending semicolon.  These must
diff --git a/src/interfaces/ecpg/preproc/pgc.l b/src/interfaces/ecpg/preproc/pgc.l
index f3c03482aec..d1fd12fe5af 100644
--- a/src/interfaces/ecpg/preproc/pgc.l
+++ b/src/interfaces/ecpg/preproc/pgc.l
@@ -335,6 +335,7 @@ less_equals		"<="
 greater_equals	">="
 less_greater	"<>"
 not_equals		"!="
+right_arrow		"->"
 
 /*
  * "self" is the set of chars that should be returned as single-character
@@ -463,6 +464,8 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 %%
 
 %{
+		static bool inleftarrow = false;
+
 		/* code to execute during start of each call of yylex() */
 		char *newdefsymbol = NULL;
 
@@ -854,6 +857,10 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					return NOT_EQUALS;
 				}
 
+{right_arrow}	{
+					return RIGHT_ARROW;
+				}
+
 {informix_special} {
 					/* are we simulating Informix? */
 					if (INFORMIX_MODE)
@@ -871,6 +878,11 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					 */
 					if (yytext[0] == ';' && struct_level == 0)
 						BEGIN(C);
+					if (yytext[0] == '-' && inleftarrow)
+					{
+						inleftarrow = false;
+						return LEFT_ARROW_MINUS;
+					}
 					return yytext[0];
 				}
 
@@ -896,6 +908,26 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 					if (slashstar)
 						nchars = slashstar - yytext;
 
+					if (nchars == 2 && yytext[0] == '<' && yytext[1] == '-')
+					{
+						/* Strip the unwanted chars from the token */
+						yyless(1);
+
+						inleftarrow = true;
+
+						return LEFT_ARROW_LESS;
+					}
+
+					if (nchars == 1 && yytext[0] == '-' && inleftarrow)
+					{
+						/* Strip the unwanted chars from the token */
+						if (nchars < yyleng)
+							yyless(nchars);
+
+						inleftarrow = false;
+						return LEFT_ARROW_MINUS;
+					}
+
 					/*
 					 * For SQL compatibility, '+' and '-' cannot be the
 					 * last char of a multi-char operator unless the operator
@@ -968,6 +1000,8 @@ cppline			{space}*#([^i][A-Za-z]*|{if}|{ifdef}|{ifndef}|{import})((\/\*[^*/]*\*+
 								return NOT_EQUALS;
 							if (yytext[0] == '!' && yytext[1] == '=')
 								return NOT_EQUALS;
+							if (yytext[0] == '-' && yytext[1] == '>')
+								return RIGHT_ARROW;
 						}
 					}
 
diff --git a/src/pl/plpgsql/src/pl_gram.y b/src/pl/plpgsql/src/pl_gram.y
index 8182ce28aa1..c5cea379554 100644
--- a/src/pl/plpgsql/src/pl_gram.y
+++ b/src/pl/plpgsql/src/pl_gram.y
@@ -237,6 +237,7 @@ static	void			check_raise_parameters(PLpgSQL_stmt_raise *stmt);
 %token <ival>	ICONST PARAM
 %token			TYPECAST DOT_DOT COLON_EQUALS EQUALS_GREATER
 %token			LESS_EQUALS GREATER_EQUALS NOT_EQUALS
+%token			LEFT_ARROW_LESS LEFT_ARROW_MINUS RIGHT_ARROW
 
 /*
  * Other tokens recognized by plpgsql's lexer interface layer (pl_scanner.c).
-- 
2.45.2

Reply via email to