From bba7744e59941d8bb2f039e631d090d0e3956d6c Mon Sep 17 00:00:00 2001
From: Jacob Champion <jacob.champion@enterprisedb.com>
Date: Mon, 8 Apr 2024 15:31:17 -0700
Subject: [PATCH] json_lex_string: don't overread on bad UTF8

Inputs to pg_parse_json[_incremental] are not guaranteed to be
null-terminated, so pg_encoding_mblen_bounded (which uses strnlen) can
walk off the end of the buffer. Check against the end pointer instead.

TODO:
- pg_encoding_mblen_bounded() now has no callers; should we remove it?
- Do we really want to print incomplete UTF-8 sequences as-is once we
  know they're bad?
---
 src/common/jsonapi.c                              | 5 +++--
 src/test/modules/test_json_parser/t/002_inline.pl | 4 ++++
 2 files changed, 7 insertions(+), 2 deletions(-)
diff --git a/src/common/jsonapi.c b/src/common/jsonapi.c
index fc0cb36974..6633503490 100644
--- a/src/common/jsonapi.c
+++ b/src/common/jsonapi.c
@@ -1689,8 +1689,9 @@ json_lex_string(JsonLexContext *lex)
 	} while (0)
 #define FAIL_AT_CHAR_END(code) \
 	do { \
-		lex->token_terminator = \
-			s + pg_encoding_mblen_bounded(lex->input_encoding, s); \
+		lex->token_terminator = s + pg_encoding_mblen(lex->input_encoding, s); \
+		if (lex->token_terminator >= end) \
+			lex->token_terminator = end; \
 		return code; \
 	} while (0)
 
diff --git a/src/test/modules/test_json_parser/t/002_inline.pl b/src/test/modules/test_json_parser/t/002_inline.pl
index f83cec03f8..0335a26f47 100644
--- a/src/test/modules/test_json_parser/t/002_inline.pl
+++ b/src/test/modules/test_json_parser/t/002_inline.pl
@@ -128,5 +128,9 @@ test(
 	"incorrect escape count",
 	'"\\\\\\\\\\\\\\"',
 	error => qr/Token ""\\\\\\\\\\\\\\"" is invalid/);
+test(
+	"incomplete UTF-8 sequence",
+	"\"\\\x{F5}",  # three bytes: double-quote, backslash, <f5>
+	error => qr/(Token|Escape sequence) ""?\\\x{F5}" is invalid/);
 
 done_testing();
-- 
2.34.1