On Mon, Mar 18, 2024 at 3:32 AM Andrew Dunstan <and...@dunslane.net> wrote: > Not very easily. But I think and hope I've fixed the issue you've identified > above about returning before lex->token_start is properly set. > > Attached is a new set of patches that does that and is updated for the > json_errdetaiil() changes.
Thanks! > ++ * Normally token_start would be ptok->data, but it could be > later, > ++ * see json_lex_string's handling of invalid escapes. > + */ > -+ lex->token_start = ptok->data; > ++ lex->token_start = dummy_lex.token_start; > + lex->token_terminator = ptok->data + ptok->len; By the same token (ha), the lex->token_terminator needs to be updated from dummy_lex for some error paths. (IIUC, on success, the token_terminator should always point to the end of the buffer. If it's not possible to combine the two code paths, maybe it'd be good to check that and assert/error out if we've incorrectly pulled additional data into the partial token.) With the incremental parser, I think prev_token_terminator is not likely to be safe to use except in very specific circumstances, since it could be pointing into a stale chunk. Some documentation around how to use that safely in a semantic action would be good. It looks like some of the newly added error handling paths cannot be hit, because the production stack makes it logically impossible to get there. (For example, if it takes a successfully lexed comma to transition into JSON_PROD_MORE_ARRAY_ELEMENTS to begin with, then when we pull that production's JSON_TOKEN_COMMA off the stack, we can't somehow fail to match that same comma.) Assuming I haven't missed a different way to get into that situation, could the "impossible" cases have assert calls added? I've attached two diffs. One is the group of tests I've been using locally (called 002_inline.pl; I replaced the existing inline tests with it), and the other is a set of potential fixes to get those tests green. Thanks, --Jacob
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c index 8273b35b01..f7608f84b9 100644 --- a/src/common/jsonapi.c +++ b/src/common/jsonapi.c @@ -928,11 +928,12 @@ pg_parse_json_incremental(JsonLexContext *lex, case JSON_TOKEN_END: ctx = JSON_PARSE_END; break; - - /* - * mostly we don't need to worry about non-terminals here, - * but there are a few cases where we do. - */ + case JSON_NT_MORE_ARRAY_ELEMENTS: + ctx = JSON_PARSE_ARRAY_NEXT; + break; + case JSON_NT_ARRAY_ELEMENTS: + ctx = JSON_PARSE_ARRAY_START; + break; case JSON_NT_MORE_KEY_PAIRS: ctx = JSON_PARSE_OBJECT_NEXT; break; @@ -1412,7 +1413,7 @@ json_lex(JsonLexContext *lex) * see json_lex_string's handling of invalid escapes. */ lex->token_start = dummy_lex.token_start; - lex->token_terminator = ptok->data + ptok->len; + lex->token_terminator = dummy_lex.token_terminator; if (partial_result == JSON_SUCCESS) lex->inc_state->partial_completed = true; return partial_result;
commit 3d615593d6d79178a8b8a208172414806d40cc03 Author: Jacob Champion <jacob.champ...@enterprisedb.com> Date: Mon Mar 11 06:13:47 2024 -0700 WIP: test many different inline cases diff --git a/src/test/modules/test_json_parser/meson.build b/src/test/modules/test_json_parser/meson.build index a5bedce94e..2563075c34 100644 --- a/src/test/modules/test_json_parser/meson.build +++ b/src/test/modules/test_json_parser/meson.build @@ -45,6 +45,7 @@ tests += { 'tap': { 'tests': [ 't/001_test_json_parser_incremental.pl', + 't/002_inline.pl', ], }, } diff --git a/src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl b/src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl index 477198b843..24f665c021 100644 --- a/src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl +++ b/src/test/modules/test_json_parser/t/001_test_json_parser_incremental.pl @@ -12,8 +12,6 @@ my $test_file = "$FindBin::RealBin/../tiny.json"; my $exe = "test_json_parser_incremental"; -my @inlinetests = ("false", "12345", '"a"', "[1,2]"); - for (my $size = 64; $size > 0; $size--) { my ($stdout, $stderr) = run_command( [$exe, "-c", $size, $test_file] ); @@ -22,21 +20,4 @@ for (my $size = 64; $size > 0; $size--) is($stderr, "", "chunk size $size: no error output"); } -foreach my $test_string (@inlinetests) -{ - my ($fh, $fname) = tempfile(); - print $fh "$test_string"; - close($fh); - - foreach my $size (1..6, 10, 20, 30) - { - next if $size > length($test_string); - - my ($stdout, $stderr) = run_command( [$exe, "-c", $size, $fname] ); - - like($stdout, qr/SUCCESS/, "chunk size $size, input '$test_string': test succeeds"); - is($stderr, "", "chunk size $size, input '$test_string': no error output"); - } -} - done_testing(); diff --git a/src/test/modules/test_json_parser/t/002_inline.pl b/src/test/modules/test_json_parser/t/002_inline.pl new file mode 100644 index 0000000000..4a2e7134a3 --- /dev/null +++ b/src/test/modules/test_json_parser/t/002_inline.pl @@ -0,0 +1,78 @@ +use strict; +use warnings; + +use PostgreSQL::Test::Utils; +use Test::More; + +use File::Temp qw(tempfile); + +sub test +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($name, $json, %params) = @_; + my $exe = "test_json_parser_incremental"; + my $chunk = length($json); + + if ($chunk > 64) + { + $chunk = 64; + } + + my ($fh, $fname) = tempfile(UNLINK => 1); + print $fh "$json"; + close($fh); + + for (my $size = $chunk; $size > 0; $size--) + { + my ($stdout, $stderr) = run_command( [$exe, "-c", $size, $fname] ); + + if (defined($params{error})) + { + unlike($stdout, qr/SUCCESS/, "$name, chunk size $size: test fails"); + like($stderr, $params{error}, "$name, chunk size $size: correct error output"); + } + else + { + like($stdout, qr/SUCCESS/, "$name, chunk size $size: test succeeds"); + is($stderr, "", "$name, chunk size $size: no error output"); + } + } +} + +test("number", "12345"); +test("string", '"hello"'); +test("boolean", "false"); +test("null", "null"); +test("empty object", "{}"); +test("empty array", "[]"); +test("array with number", "[12345]"); +test("array with numbers", "[12345,67890]"); +test("array with null", "[null]"); +test("array with string", '["hello"]'); +test("array with boolean", '[false]'); +test("single pair", '{"key": "value"}'); +test("heavily nested array", "[" x 3200 . "]" x 3200); + +test("unclosed empty object", "{", error => qr/input string ended unexpectedly/); +test("bad key", "{{", error => qr/Expected string or "}", but found "\{"/); +test("bad key", "{{}", error => qr/Expected string or "}", but found "\{"/); +test("numeric key", "{1234: 2}", error => qr/Expected string or "}", but found "1234"/); +test("second numeric key", '{"a": "a", 1234: 2}', error => qr/Expected string, but found "1234"/); +test("unclosed object with pair", '{"key": "value"', error => qr/input string ended unexpectedly/); +test("missing key value", '{"key": }', error => qr/Expected JSON value, but found "}"/); +test("missing colon", '{"key" 12345}', error => qr/Expected ":", but found "12345"/); +test("missing comma", '{"key": 12345 12345}', error => qr/Expected "," or "}", but found "12345"/); +test("overnested array", "[" x 6401, error => qr/maximum number of levels for json is 6400/); +test("overclosed array", "[]]", error => qr/Expected end of input, but found "]"/); +test("unexpected token in array", "[ }}} ]", error => qr/Expected array element or "]", but found "}"/); +test("junk punctuation", "[ ||| ]", error => qr/Token "|" is invalid/); +test("missing comma in array", "[123 123]", error => qr/Expected "," or "]", but found "123"/); +test("misspelled boolean", "tru", error => qr/Token "tru" is invalid/); +test("misspelled boolean in array", "[tru]", error => qr/Token "tru" is invalid/); +test("smashed top-level scalar", "12zz", error => qr/Token "12zz" is invalid/); +test("smashed scalar in array", "[12zz]", error => qr/Token "12zz" is invalid/); +test("unknown escape sequence", '"hello\vworld"', error => qr/Escape sequence "\\v" is invalid/); +test("unescaped control", "\"hello\tworld\"", error => qr/Character with value 0x09 must be escaped/); + +done_testing(); diff --git a/src/test/modules/test_json_parser/test_json_parser_incremental.c b/src/test/modules/test_json_parser/test_json_parser_incremental.c index 2865dbc619..2cd31be7af 100644 --- a/src/test/modules/test_json_parser/test_json_parser_incremental.c +++ b/src/test/modules/test_json_parser/test_json_parser_incremental.c @@ -68,9 +68,7 @@ main(int argc, char **argv) false); if (result != JSON_INCOMPLETE) { - fprintf(stderr, - "unexpected result %d (expecting %d) on token %s\n", - result, JSON_INCOMPLETE, lex.token_start); + fprintf(stderr, "%s\n", json_errdetail(result, &lex)); exit(1); } resetStringInfo(&json); @@ -82,9 +80,7 @@ main(int argc, char **argv) true); if (result != JSON_SUCCESS) { - fprintf(stderr, - "unexpected result %d (expecting %d) on final chunk\n", - result, JSON_SUCCESS); + fprintf(stderr, "%s\n", json_errdetail(result, &lex)); exit(1); } printf("SUCCESS!\n");