On Mon, Mar 18, 2024 at 3:32 AM Andrew Dunstan <and...@dunslane.net> wrote:
> Not very easily. But I think and hope I've fixed the issue you've identified 
> above about returning before lex->token_start is properly set.
>
>  Attached is a new set of patches that does that and is updated for the 
> json_errdetaiil() changes.

Thanks!

>    ++           * Normally token_start would be ptok->data, but it could be 
> later,
>    ++           * see json_lex_string's handling of invalid escapes.
>     +           */
>    -+          lex->token_start = ptok->data;
>    ++          lex->token_start = dummy_lex.token_start;
>     +          lex->token_terminator = ptok->data + ptok->len;

By the same token (ha), the lex->token_terminator needs to be updated
from dummy_lex for some error paths. (IIUC, on success, the
token_terminator should always point to the end of the buffer. If it's
not possible to combine the two code paths, maybe it'd be good to
check that and assert/error out if we've incorrectly pulled additional
data into the partial token.)

With the incremental parser, I think prev_token_terminator is not
likely to be safe to use except in very specific circumstances, since
it could be pointing into a stale chunk. Some documentation around how
to use that safely in a semantic action would be good.

It looks like some of the newly added error handling paths cannot be
hit, because the production stack makes it logically impossible to get
there. (For example, if it takes a successfully lexed comma to
transition into JSON_PROD_MORE_ARRAY_ELEMENTS to begin with, then when
we pull that production's JSON_TOKEN_COMMA off the stack, we can't
somehow fail to match that same comma.) Assuming I haven't missed a
different way to get into that situation, could the "impossible" cases
have assert calls added?

I've attached two diffs. One is the group of tests I've been using
locally (called 002_inline.pl; I replaced the existing inline tests
with it), and the other is a set of potential fixes to get those tests
green.

Thanks,
--Jacob
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index 8273b35b01..f7608f84b9 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -928,11 +928,12 @@ pg_parse_json_incremental(JsonLexContext *lex,
                                case JSON_TOKEN_END:
                                        ctx = JSON_PARSE_END;
                                        break;
-
-                                       /*
-                                        * mostly we don't need to worry about 
non-terminals here,
-                                        * but there are a few cases where we 
do.
-                                        */
+                               case JSON_NT_MORE_ARRAY_ELEMENTS:
+                                       ctx = JSON_PARSE_ARRAY_NEXT;
+                                       break;
+                               case JSON_NT_ARRAY_ELEMENTS:
+                                       ctx = JSON_PARSE_ARRAY_START;
+                                       break;
                                case JSON_NT_MORE_KEY_PAIRS:
                                        ctx = JSON_PARSE_OBJECT_NEXT;
                                        break;
@@ -1412,7 +1413,7 @@ json_lex(JsonLexContext *lex)
                 * see json_lex_string's handling of invalid escapes.
                 */
                lex->token_start = dummy_lex.token_start;
-               lex->token_terminator = ptok->data + ptok->len;
+               lex->token_terminator = dummy_lex.token_terminator;
                if (partial_result == JSON_SUCCESS)
                        lex->inc_state->partial_completed = true;
                return partial_result;
commit 3d615593d6d79178a8b8a208172414806d40cc03
Author: Jacob Champion <jacob.champ...@enterprisedb.com>
Date:   Mon Mar 11 06:13:47 2024 -0700

    WIP: test many different inline cases

diff --git a/src/test/modules/test_json_parser/meson.build 
b/src/test/modules/test_json_parser/meson.build
index a5bedce94e..2563075c34 100644
--- a/src/test/modules/test_json_parser/meson.build
+++ b/src/test/modules/test_json_parser/meson.build
@@ -45,6 +45,7 @@ tests += {
   'tap': {
     'tests': [
       't/001_test_json_parser_incremental.pl',
+      't/002_inline.pl',
     ],
   },
 }
diff --git 
a/src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl 
b/src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl
index 477198b843..24f665c021 100644
--- a/src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl
+++ b/src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl
@@ -12,8 +12,6 @@ my $test_file = "$FindBin::RealBin/../tiny.json";
 
 my $exe = "test_json_parser_incremental";
 
-my @inlinetests = ("false", "12345", '"a"', "[1,2]");
-
 for (my $size = 64; $size > 0; $size--)
 {
        my ($stdout, $stderr) = run_command( [$exe, "-c", $size, $test_file] );
@@ -22,21 +20,4 @@ for (my $size = 64; $size > 0; $size--)
        is($stderr, "", "chunk size $size: no error output");
 }
 
-foreach my $test_string (@inlinetests)
-{
-       my ($fh, $fname) = tempfile();
-       print $fh "$test_string";
-       close($fh);
-
-       foreach my $size (1..6, 10, 20, 30)
-       {
-               next if $size > length($test_string);
-
-               my ($stdout, $stderr) = run_command( [$exe, "-c", $size, 
$fname] );
-
-               like($stdout, qr/SUCCESS/, "chunk size $size, input 
'$test_string': test succeeds");
-               is($stderr, "", "chunk size $size, input '$test_string': no 
error output");
-       }
-}
-
 done_testing();
diff --git a/src/test/modules/test_json_parser/t/002_inline.pl 
b/src/test/modules/test_json_parser/t/002_inline.pl
new file mode 100644
index 0000000000..4a2e7134a3
--- /dev/null
+++ b/src/test/modules/test_json_parser/t/002_inline.pl
@@ -0,0 +1,78 @@
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+use File::Temp qw(tempfile);
+
+sub test
+{
+       local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+       my ($name, $json, %params) = @_;
+       my $exe = "test_json_parser_incremental";
+       my $chunk = length($json);
+
+       if ($chunk > 64)
+       {
+               $chunk = 64;
+       }
+
+       my ($fh, $fname) = tempfile(UNLINK => 1);
+       print $fh "$json";
+       close($fh);
+
+       for (my $size = $chunk; $size > 0; $size--)
+       {
+               my ($stdout, $stderr) = run_command( [$exe, "-c", $size, 
$fname] );
+
+               if (defined($params{error}))
+               {
+                       unlike($stdout, qr/SUCCESS/, "$name, chunk size $size: 
test fails");
+                       like($stderr, $params{error}, "$name, chunk size $size: 
correct error output");
+               }
+               else
+               {
+                       like($stdout, qr/SUCCESS/, "$name, chunk size $size: 
test succeeds");
+                       is($stderr, "", "$name, chunk size $size: no error 
output");
+               }
+       }
+}
+
+test("number", "12345");
+test("string", '"hello"');
+test("boolean", "false");
+test("null", "null");
+test("empty object", "{}");
+test("empty array", "[]");
+test("array with number", "[12345]");
+test("array with numbers", "[12345,67890]");
+test("array with null", "[null]");
+test("array with string", '["hello"]');
+test("array with boolean", '[false]');
+test("single pair", '{"key": "value"}');
+test("heavily nested array", "[" x 3200 . "]" x 3200);
+
+test("unclosed empty object", "{", error => qr/input string ended 
unexpectedly/);
+test("bad key", "{{", error => qr/Expected string or "}", but found "\{"/);
+test("bad key", "{{}", error => qr/Expected string or "}", but found "\{"/);
+test("numeric key", "{1234: 2}", error => qr/Expected string or "}", but found 
"1234"/);
+test("second numeric key", '{"a": "a", 1234: 2}', error => qr/Expected string, 
but found "1234"/);
+test("unclosed object with pair", '{"key": "value"', error => qr/input string 
ended unexpectedly/);
+test("missing key value", '{"key": }', error => qr/Expected JSON value, but 
found "}"/);
+test("missing colon", '{"key" 12345}', error => qr/Expected ":", but found 
"12345"/);
+test("missing comma", '{"key": 12345 12345}', error => qr/Expected "," or "}", 
but found "12345"/);
+test("overnested array", "[" x 6401, error => qr/maximum number of levels for 
json is 6400/);
+test("overclosed array", "[]]", error => qr/Expected end of input, but found 
"]"/);
+test("unexpected token in array", "[ }}} ]", error => qr/Expected array 
element or "]", but found "}"/);
+test("junk punctuation", "[ ||| ]", error => qr/Token "|" is invalid/);
+test("missing comma in array", "[123 123]", error => qr/Expected "," or "]", 
but found "123"/);
+test("misspelled boolean", "tru", error => qr/Token "tru" is invalid/);
+test("misspelled boolean in array", "[tru]", error => qr/Token "tru" is 
invalid/);
+test("smashed top-level scalar", "12zz", error => qr/Token "12zz" is invalid/);
+test("smashed scalar in array", "[12zz]", error => qr/Token "12zz" is 
invalid/);
+test("unknown escape sequence", '"hello\vworld"', error => qr/Escape sequence 
"\\v" is invalid/);
+test("unescaped control", "\"hello\tworld\"", error => qr/Character with value 
0x09 must be escaped/);
+
+done_testing();
diff --git a/src/test/modules/test_json_parser/test_json_parser_incremental.c 
b/src/test/modules/test_json_parser/test_json_parser_incremental.c
index 2865dbc619..2cd31be7af 100644
--- a/src/test/modules/test_json_parser/test_json_parser_incremental.c
+++ b/src/test/modules/test_json_parser/test_json_parser_incremental.c
@@ -68,9 +68,7 @@ main(int argc, char **argv)
                                                                                
           false);
                        if (result != JSON_INCOMPLETE)
                        {
-                               fprintf(stderr,
-                                               "unexpected result %d 
(expecting %d) on token %s\n",
-                                               result, JSON_INCOMPLETE, 
lex.token_start);
+                               fprintf(stderr, "%s\n", json_errdetail(result, 
&lex));
                                exit(1);
                        }
                        resetStringInfo(&json);
@@ -82,9 +80,7 @@ main(int argc, char **argv)
                                                                                
           true);
                        if (result != JSON_SUCCESS)
                        {
-                               fprintf(stderr,
-                                               "unexpected result %d 
(expecting %d) on final chunk\n",
-                                               result, JSON_SUCCESS);
+                               fprintf(stderr, "%s\n", json_errdetail(result, 
&lex));
                                exit(1);
                        }
                        printf("SUCCESS!\n");

Reply via email to