Re: Define jsonpath functions as stable

Tom Lane Thu, 19 Sep 2019 09:26:18 -0700

I wrote:
> I found a spot that seemed like a reasonable place, and added some
> coverage of the point.  Updated patch attached.


Doc patch pushed.

> It seems to me that there are some discrepancies between what the spec
> says and what jsonpath_scan.l actually does, so maybe we should take a
> hard look at that code too.  The biggest issue is that jsonpath_scan.l
> seems to allow single- and double-quoted strings interchangeably, which is
> OK per ECMAScript, but then the SQL/JSON spec seems to be saying that only
> double-quoted strings are allowed.  I'd rather be conservative about this
> than get out in front of the spec and use syntax space that they might do
> something else with someday.

The attached proposed patch makes these changes:

1. Remove support for single-quoted literals in jsonpath.

2. Treat an unrecognized escape (e.g., "\z") as meaning the escaped
   character, rather than throwing an error.

3. A few cosmetic adjustments to make the jsonpath_scan code shorter and
   clearer (IMHO).

As for #1, although the SQL/JSON tech report does reference ECMAScript
which allows both single- and double-quoted strings, it seems to me
that their intent is to allow only the double-quoted variant.  They
specifically reference JSON string literals at one point, and of course
JSON only allows double-quoted.  Also, all of their discussion and
examples use double-quoted.  Plus you'd have to be pretty nuts to want
to use single-quoted when writing a jsonpath string literal inside a SQL
literal (and the tech report seems to contemplate that jsonpaths MUST be
string literals, though of course our implementation does not require
that).

As for #2, the existing code throws an error, but this is contrary
to clear statements in every single one of the relevant standards.

                        regards, tom lane

diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l
index 2165ffc..245255f 100644
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@@ -59,24 +59,23 @@ fprintf_to_ereport(const char *fmt, const char *msg)
 %option noyyfree
 
 /*
- * We use exclusive states for quoted, signle-quoted and non-quoted strings,
- * quoted variable names and C-tyle comments.
+ * We use exclusive states for quoted and non-quoted strings,
+ * quoted variable names and C-style comments.
  * Exclusive states:
  *  <xq> - quoted strings
  *  <xnq> - non-quoted strings
  *  <xvq> - quoted variable names
- *  <xsq> - single-quoted strings
  *  <xc> - C-style comment
  */
 
 %x xq
 %x xnq
 %x xvq
-%x xsq
 %x xc
 
-special		 [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
-any			[^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f]
+/* "other" means anything that's not special, blank, or '\' or '"' */
+special		[\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
+other		[^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\" \t\n\r\f]
 blank		[ \t\n\r\f]
 
 digit		[0-9]
@@ -95,7 +94,7 @@ hex_fail	\\x{hex_dig}{0,1}
 
 %%
 
-<xnq>{any}+						{
+<xnq>{other}+					{
 									addstring(false, yytext, yyleng);
 								}
 
@@ -105,13 +104,12 @@ hex_fail	\\x{hex_dig}{0,1}
 									return checkKeyword();
 								}
 
-
 <xnq>\/\*						{
 									yylval->str = scanstring;
 									BEGIN xc;
 								}
 
-<xnq>({special}|\"|\')			{
+<xnq>({special}|\")				{
 									yylval->str = scanstring;
 									yyless(0);
 									BEGIN INITIAL;
@@ -124,39 +122,37 @@ hex_fail	\\x{hex_dig}{0,1}
 									return checkKeyword();
 								}
 
-<xnq,xq,xvq,xsq>\\[\"\'\\]		{ addchar(false, yytext[1]); }
-
-<xnq,xq,xvq,xsq>\\b				{ addchar(false, '\b'); }
+<xnq,xq,xvq>\\b				{ addchar(false, '\b'); }
 
-<xnq,xq,xvq,xsq>\\f				{ addchar(false, '\f'); }
+<xnq,xq,xvq>\\f				{ addchar(false, '\f'); }
 
-<xnq,xq,xvq,xsq>\\n				{ addchar(false, '\n'); }
+<xnq,xq,xvq>\\n				{ addchar(false, '\n'); }
 
-<xnq,xq,xvq,xsq>\\r				{ addchar(false, '\r'); }
+<xnq,xq,xvq>\\r				{ addchar(false, '\r'); }
 
-<xnq,xq,xvq,xsq>\\t				{ addchar(false, '\t'); }
+<xnq,xq,xvq>\\t				{ addchar(false, '\t'); }
 
-<xnq,xq,xvq,xsq>\\v				{ addchar(false, '\v'); }
+<xnq,xq,xvq>\\v				{ addchar(false, '\v'); }
 
-<xnq,xq,xvq,xsq>{unicode}+		{ parseUnicode(yytext, yyleng); }
+<xnq,xq,xvq>{unicode}+		{ parseUnicode(yytext, yyleng); }
 
-<xnq,xq,xvq,xsq>{hex_char}		{ parseHexChar(yytext); }
+<xnq,xq,xvq>{hex_char}		{ parseHexChar(yytext); }
 
-<xnq,xq,xvq,xsq>{unicode}*{unicodefail}	{ yyerror(NULL, "invalid unicode sequence"); }
+<xnq,xq,xvq>{unicode}*{unicodefail}	{ yyerror(NULL, "invalid unicode sequence"); }
 
-<xnq,xq,xvq,xsq>{hex_fail}		{ yyerror(NULL, "invalid hex character sequence"); }
+<xnq,xq,xvq>{hex_fail}		{ yyerror(NULL, "invalid hex character sequence"); }
 
-<xnq,xq,xvq,xsq>{unicode}+\\	{
-									/* throw back the \\, and treat as unicode */
-									yyless(yyleng - 1);
-									parseUnicode(yytext, yyleng);
-								}
+<xnq,xq,xvq>{unicode}+\\	{
+								/* throw back the \\, and treat as unicode */
+								yyless(yyleng - 1);
+								parseUnicode(yytext, yyleng);
+							}
 
-<xnq,xq,xvq,xsq>\\.				{ yyerror(NULL, "escape sequence is invalid"); }
+<xnq,xq,xvq>\\.				{ addchar(false, yytext[1]); }
 
-<xnq,xq,xvq,xsq>\\				{ yyerror(NULL, "unexpected end after backslash"); }
+<xnq,xq,xvq>\\				{ yyerror(NULL, "unexpected end after backslash"); }
 
-<xq,xvq,xsq><<EOF>>				{ yyerror(NULL, "unexpected end of quoted string"); }
+<xq,xvq><<EOF>>				{ yyerror(NULL, "unexpected end of quoted string"); }
 
 <xq>\"							{
 									yylval->str = scanstring;
@@ -170,16 +166,8 @@ hex_fail	\\x{hex_dig}{0,1}
 									return VARIABLE_P;
 								}
 
-<xsq>\'							{
-									yylval->str = scanstring;
-									BEGIN INITIAL;
-									return STRING_P;
-								}
-
 <xq,xvq>[^\\\"]+				{ addstring(false, yytext, yyleng); }
 
-<xsq>[^\\\']+					{ addstring(false, yytext, yyleng); }
-
 <xc>\*\/						{ BEGIN INITIAL; }
 
 <xc>[^\*]+						{ }
@@ -210,7 +198,7 @@ hex_fail	\\x{hex_dig}{0,1}
 
 \>								{ return GREATER_P; }
 
-\${any}+						{
+\${other}+						{
 									addstring(true, yytext + 1, yyleng - 1);
 									addchar(false, '\0');
 									yylval->str = scanstring;
@@ -263,27 +251,22 @@ hex_fail	\\x{hex_dig}{0,1}
 
 ({realfail1}|{realfail2})		{ yyerror(NULL, "invalid floating point number"); }
 
-{any}+							{
-									addstring(true, yytext, yyleng);
-									BEGIN xnq;
-								}
-
 \"								{
 									addchar(true, '\0');
 									BEGIN xq;
 								}
 
-\'								{
-									addchar(true, '\0');
-									BEGIN xsq;
-								}
-
 \\								{
 									yyless(0);
 									addchar(true, '\0');
 									BEGIN xnq;
 								}
 
+{other}+						{
+									addstring(true, yytext, yyleng);
+									BEGIN xnq;
+								}
+
 <<EOF>>							{ yyterminate(); }
 
 %%
diff --git a/src/test/regress/expected/jsonpath.out b/src/test/regress/expected/jsonpath.out
index ea42ae3..fc971dc 100644
--- a/src/test/regress/expected/jsonpath.out
+++ b/src/test/regress/expected/jsonpath.out
@@ -171,30 +171,24 @@ select '"\b\f\r\n\t\v\"\''\\"'::jsonpath;
  "\b\f\r\n\t\u000b\"'\\"
 (1 row)
 
-select '''\b\f\r\n\t\v\"\''\\'''::jsonpath;
-        jsonpath         
--------------------------
- "\b\f\r\n\t\u000b\"'\\"
-(1 row)
-
 select '"\x50\u0067\u{53}\u{051}\u{00004C}"'::jsonpath;
  jsonpath 
 ----------
  "PgSQL"
 (1 row)
 
-select '''\x50\u0067\u{53}\u{051}\u{00004C}'''::jsonpath;
- jsonpath 
-----------
- "PgSQL"
-(1 row)
-
 select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::jsonpath;
       jsonpath       
 ---------------------
  $."fooPgSQL\t\"bar"
 (1 row)
 
+select '"\z"'::jsonpath;  -- unrecognized escape is just the literal char
+ jsonpath 
+----------
+ "z"
+(1 row)
+
 select '$.g ? ($.a == 1)'::jsonpath;
       jsonpath      
 --------------------
diff --git a/src/test/regress/expected/jsonpath_encoding.out b/src/test/regress/expected/jsonpath_encoding.out
index 8db6e47..ecffe09 100644
--- a/src/test/regress/expected/jsonpath_encoding.out
+++ b/src/test/regress/expected/jsonpath_encoding.out
@@ -81,84 +81,6 @@ select '"null \\u0000 escape"'::jsonpath as not_an_escape;
  "null \\u0000 escape"
 (1 row)
 
--- checks for single-quoted values
--- basic unicode input
-SELECT E'\'\u\''::jsonpath;		-- ERROR, incomplete escape
-ERROR:  invalid Unicode escape
-LINE 1: SELECT E'\'\u\''::jsonpath;
-               ^
-HINT:  Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u00\''::jsonpath;	-- ERROR, incomplete escape
-ERROR:  invalid Unicode escape
-LINE 1: SELECT E'\'\u00\''::jsonpath;
-               ^
-HINT:  Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u000g\''::jsonpath;	-- ERROR, g is not a hex digit
-ERROR:  invalid Unicode escape
-LINE 1: SELECT E'\'\u000g\''::jsonpath;
-               ^
-HINT:  Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u0000\''::jsonpath;	-- OK, legal escape
-ERROR:  invalid Unicode escape value at or near "E'\'\u0000"
-LINE 1: SELECT E'\'\u0000\''::jsonpath;
-               ^
-SELECT E'\'\uaBcD\''::jsonpath;	-- OK, uppercase and lower case both OK
- jsonpath 
-----------
- "ꯍ"
-(1 row)
-
--- handling of unicode surrogate pairs
-select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8;
- correct_in_utf8 
------------------
- "😄🐶"
-(1 row)
-
-select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ud83d\ud83d"
-LINE 1: select E'\'\ud83d\ud83d\''::jsonpath;
-               ^
-select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ude04"
-LINE 1: select E'\'\ude04\ud83d\''::jsonpath;
-               ^
-select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ud83dX"
-LINE 1: select E'\'\ud83dX\''::jsonpath;
-               ^
-select E'\'\ude04X\''::jsonpath; -- orphan low surrogate
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ude04"
-LINE 1: select E'\'\ude04X\''::jsonpath;
-               ^
---handling of simple unicode escapes
-select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8;
-    correct_in_utf8     
-------------------------
- "the Copyright © sign"
-(1 row)
-
-select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere;
-  correct_everywhere  
-----------------------
- "dollar $ character"
-(1 row)
-
-select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape;
-    not_an_escape     
-----------------------
- "dollar $ character"
-(1 row)
-
-select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
-ERROR:  invalid Unicode escape value at or near "E'\'null \u0000"
-LINE 1: select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
-               ^
-select E'\'null \\u0000 escape\''::jsonpath as not_an_escape;
-ERROR:  unsupported Unicode escape sequence
-LINE 1: select E'\'null \\u0000 escape\''::jsonpath as not_an_escape...
-               ^
-DETAIL:  \u0000 cannot be converted to text.
 -- checks for quoted key names
 -- basic unicode input
 SELECT '$."\u"'::jsonpath;		-- ERROR, incomplete escape
diff --git a/src/test/regress/expected/jsonpath_encoding_1.out b/src/test/regress/expected/jsonpath_encoding_1.out
index e6dff25..c8cc217 100644
--- a/src/test/regress/expected/jsonpath_encoding_1.out
+++ b/src/test/regress/expected/jsonpath_encoding_1.out
@@ -78,78 +78,6 @@ select '"null \\u0000 escape"'::jsonpath as not_an_escape;
  "null \\u0000 escape"
 (1 row)
 
--- checks for single-quoted values
--- basic unicode input
-SELECT E'\'\u\''::jsonpath;		-- ERROR, incomplete escape
-ERROR:  invalid Unicode escape
-LINE 1: SELECT E'\'\u\''::jsonpath;
-               ^
-HINT:  Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u00\''::jsonpath;	-- ERROR, incomplete escape
-ERROR:  invalid Unicode escape
-LINE 1: SELECT E'\'\u00\''::jsonpath;
-               ^
-HINT:  Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u000g\''::jsonpath;	-- ERROR, g is not a hex digit
-ERROR:  invalid Unicode escape
-LINE 1: SELECT E'\'\u000g\''::jsonpath;
-               ^
-HINT:  Unicode escapes must be \uXXXX or \UXXXXXXXX.
-SELECT E'\'\u0000\''::jsonpath;	-- OK, legal escape
-ERROR:  invalid Unicode escape value at or near "E'\'\u0000"
-LINE 1: SELECT E'\'\u0000\''::jsonpath;
-               ^
-SELECT E'\'\uaBcD\''::jsonpath;	-- OK, uppercase and lower case both OK
-ERROR:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'\uaBcD"
-LINE 1: SELECT E'\'\uaBcD\''::jsonpath;
-               ^
--- handling of unicode surrogate pairs
-select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8;
-ERROR:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'\ud83d\ude04"
-LINE 1: select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_...
-               ^
-select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ud83d\ud83d"
-LINE 1: select E'\'\ud83d\ud83d\''::jsonpath;
-               ^
-select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ude04"
-LINE 1: select E'\'\ude04\ud83d\''::jsonpath;
-               ^
-select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ud83dX"
-LINE 1: select E'\'\ud83dX\''::jsonpath;
-               ^
-select E'\'\ude04X\''::jsonpath; -- orphan low surrogate
-ERROR:  invalid Unicode surrogate pair at or near "E'\'\ude04"
-LINE 1: select E'\'\ude04X\''::jsonpath;
-               ^
---handling of simple unicode escapes
-select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8;
-ERROR:  Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8 at or near "E'\'the Copyright \u00a9"
-LINE 1: select E'\'the Copyright \u00a9 sign\''::jsonpath as correct...
-               ^
-select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere;
-  correct_everywhere  
-----------------------
- "dollar $ character"
-(1 row)
-
-select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape;
-    not_an_escape     
-----------------------
- "dollar $ character"
-(1 row)
-
-select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
-ERROR:  invalid Unicode escape value at or near "E'\'null \u0000"
-LINE 1: select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
-               ^
-select E'\'null \\u0000 escape\''::jsonpath as not_an_escape;
-ERROR:  unsupported Unicode escape sequence
-LINE 1: select E'\'null \\u0000 escape\''::jsonpath as not_an_escape...
-               ^
-DETAIL:  \u0000 cannot be converted to text.
 -- checks for quoted key names
 -- basic unicode input
 SELECT '$."\u"'::jsonpath;		-- ERROR, incomplete escape
diff --git a/src/test/regress/sql/jsonpath.sql b/src/test/regress/sql/jsonpath.sql
index 29ea77a..7afe252 100644
--- a/src/test/regress/sql/jsonpath.sql
+++ b/src/test/regress/sql/jsonpath.sql
@@ -30,10 +30,9 @@ select '$.a/+-1'::jsonpath;
 select '1 * 2 + 4 % -3 != false'::jsonpath;
 
 select '"\b\f\r\n\t\v\"\''\\"'::jsonpath;
-select '''\b\f\r\n\t\v\"\''\\'''::jsonpath;
 select '"\x50\u0067\u{53}\u{051}\u{00004C}"'::jsonpath;
-select '''\x50\u0067\u{53}\u{051}\u{00004C}'''::jsonpath;
 select '$.foo\x50\u0067\u{53}\u{051}\u{00004C}\t\"bar'::jsonpath;
+select '"\z"'::jsonpath;  -- unrecognized escape is just the literal char
 
 select '$.g ? ($.a == 1)'::jsonpath;
 select '$.g ? (@ == 1)'::jsonpath;
diff --git a/src/test/regress/sql/jsonpath_encoding.sql b/src/test/regress/sql/jsonpath_encoding.sql
index a3b5bc3..3a23b72 100644
--- a/src/test/regress/sql/jsonpath_encoding.sql
+++ b/src/test/regress/sql/jsonpath_encoding.sql
@@ -24,29 +24,6 @@ select '"dollar \\u0024 character"'::jsonpath as not_an_escape;
 select '"null \u0000 escape"'::jsonpath as not_unescaped;
 select '"null \\u0000 escape"'::jsonpath as not_an_escape;
 
--- checks for single-quoted values
-
--- basic unicode input
-SELECT E'\'\u\''::jsonpath;		-- ERROR, incomplete escape
-SELECT E'\'\u00\''::jsonpath;	-- ERROR, incomplete escape
-SELECT E'\'\u000g\''::jsonpath;	-- ERROR, g is not a hex digit
-SELECT E'\'\u0000\''::jsonpath;	-- OK, legal escape
-SELECT E'\'\uaBcD\''::jsonpath;	-- OK, uppercase and lower case both OK
-
--- handling of unicode surrogate pairs
-select E'\'\ud83d\ude04\ud83d\udc36\''::jsonpath as correct_in_utf8;
-select E'\'\ud83d\ud83d\''::jsonpath; -- 2 high surrogates in a row
-select E'\'\ude04\ud83d\''::jsonpath; -- surrogates in wrong order
-select E'\'\ud83dX\''::jsonpath; -- orphan high surrogate
-select E'\'\ude04X\''::jsonpath; -- orphan low surrogate
-
---handling of simple unicode escapes
-select E'\'the Copyright \u00a9 sign\''::jsonpath as correct_in_utf8;
-select E'\'dollar \u0024 character\''::jsonpath as correct_everywhere;
-select E'\'dollar \\u0024 character\''::jsonpath as not_an_escape;
-select E'\'null \u0000 escape\''::jsonpath as not_unescaped;
-select E'\'null \\u0000 escape\''::jsonpath as not_an_escape;
-
 -- checks for quoted key names
 
 -- basic unicode input

Re: Define jsonpath functions as stable

Reply via email to