This is an automated email from the ASF dual-hosted git repository. coheigea pushed a commit to branch coheigea/json-unicode in repository https://gitbox.apache.org/repos/asf/cxf.git
commit c36fabd84d7c74bbdf91af824b08a45a40834368 Author: Colm O hEigeartaigh <[email protected]> AuthorDate: Mon May 25 17:08:29 2026 +0100 Support unicode characters in the json parser --- .../json/basic/JsonMapObjectReaderWriter.java | 91 ++++++++++++++++------ .../json/basic/JsonMapObjectReaderWriterTest.java | 46 +++++++++++ 2 files changed, 113 insertions(+), 24 deletions(-) diff --git a/rt/rs/extensions/json-basic/src/main/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriter.java b/rt/rs/extensions/json-basic/src/main/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriter.java index bbd6852302f..474e0832745 100644 --- a/rt/rs/extensions/json-basic/src/main/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriter.java +++ b/rt/rs/extensions/json-basic/src/main/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriter.java @@ -268,18 +268,7 @@ public class JsonMapObjectReaderWriter { } if (value instanceof String) { - if (((String) value).contains("\\/")) { - // Escape an encoded forward slash - value = ((String) value).replace("\\/", "/"); - } - if (((String) value).contains("\\\"")) { - // Escape an encoded quotation mark - value = ((String) value).replace("\\\"", "\""); - } - if (((String) value).contains("\\\\")) { - // Escape an encoded backslash - value = ((String) value).replace("\\\\", "\\"); - } + value = decodeEscapeSequences((String) value); } return value; } @@ -399,6 +388,69 @@ public class JsonMapObjectReaderWriter { } + /** + * Decodes all RFC 8259 section 7 JSON string escape sequences in a single + * left-to-right pass, producing the logical string value. + * + * <p>Recognised sequences: {@code \"}, {@code \\}, {@code \/}, {@code \b}, + * {@code \f}, {@code \n}, {@code \r}, {@code \t}, and four-digit hex Unicode + * escapes (backslash + {@code u} + four hex digits). + * + * <p>A single pass is used deliberately: sequential {@code String.replace} calls + * applied in separate passes can interact incorrectly (e.g. a raw {@code \\"} + * sequence would have its {@code \"} consumed by a "decode quotes" pass before + * the {@code \\} is consumed by a "decode backslashes" pass, yielding the wrong + * result). + */ + private static String decodeEscapeSequences(String s) { + int backslashIdx = s.indexOf(ESCAPE); + if (backslashIdx == -1) { + return s; // fast path: nothing to decode + } + StringBuilder sb = new StringBuilder(s.length()); + sb.append(s, 0, backslashIdx); + int i = backslashIdx; + while (i < s.length()) { + char c = s.charAt(i); + if (c != ESCAPE || i + 1 >= s.length()) { + sb.append(c); + i++; + continue; + } + char next = s.charAt(i + 1); + switch (next) { + case '"': sb.append('"'); i += 2; break; + case '\\': sb.append('\\'); i += 2; break; + case '/': sb.append('/'); i += 2; break; + case 'b': sb.append('\b'); i += 2; break; + case 'f': sb.append('\f'); i += 2; break; + case 'n': sb.append('\n'); i += 2; break; + case 'r': sb.append('\r'); i += 2; break; + case 't': sb.append('\t'); i += 2; break; + case 'u': + if (i + 5 < s.length()) { + String hex = s.substring(i + 2, i + 6); + try { + sb.append((char) Integer.parseInt(hex, 16)); + i += 6; + break; + } catch (NumberFormatException ignored) { + // not a valid four-digit hex sequence — fall through and keep '\' + } + } + sb.append(c); + i++; + break; + default: + // unrecognised escape — keep the backslash as-is + sb.append(c); + i++; + break; + } + } + return sb.toString(); + } + /** * Returns the index of the closing {@code "} that matches the opening quote at * {@code openQuoteIndex}, correctly skipping over escaped quotes ({@code \"}) and @@ -424,20 +476,11 @@ public class JsonMapObjectReaderWriter { } /** - * Decodes the JSON escape sequences that may appear in a key name: - * {@code \/} → {@code /}, {@code \"} → {@code "}, {@code \\} → {@code \}. + * Decodes the JSON escape sequences that may appear in a key name by delegating + * to the same single-pass decoder used for string values. */ private static String unescapeKeyName(String name) { - if (name.contains("\\/")) { - name = name.replace("\\/", "/"); - } - if (name.contains("\\\"")) { - name = name.replace("\\\"", "\""); - } - if (name.contains("\\\\")) { - name = name.replace("\\\\", "\\"); - } - return name; + return decodeEscapeSequences(name); } private String escapeJson(String value) { diff --git a/rt/rs/extensions/json-basic/src/test/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriterTest.java b/rt/rs/extensions/json-basic/src/test/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriterTest.java index a680ecba04d..c133de675db 100644 --- a/rt/rs/extensions/json-basic/src/test/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriterTest.java +++ b/rt/rs/extensions/json-basic/src/test/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriterTest.java @@ -281,6 +281,52 @@ public class JsonMapObjectReaderWriterTest { assertEquals("a\\", entry.getValue()); } + /** + * Regression test for "[MEDIUM] Unicode Escapes Not Decoded — Potential Bypass". + * + * <p>RFC 8259 section 7 requires that four-digit hex Unicode escape sequences + * (backslash + u + four hex digits) be decoded to the corresponding character. + * {@code readPrimitiveValue} only handles {@code \/}, {@code \"}, and {@code \\}; + * four-digit hex escapes and single-character escapes ({@code \n}, {@code \r}, + * {@code \t}, etc.) are returned as the raw literal escape text rather than the + * decoded character. + * + * <p>Security impact: a JWT whose {@code alg} header is written using four-digit hex + * escapes that spell {@code none} passes CXF's own algorithm check (the literal + * un-decoded sequence is not equal to {@code "none"}), while a downstream + * RFC-compliant consumer decodes the escapes and may skip signature verification + * entirely — a parser-differential bypass. + */ + @Test + public void testUnicodeEscapeInValueDecodedCorrectly() throws Exception { + // JSON: {"alg":"<none-as-4-digit-hex-escapes>"} — each character of "none" is written + // as its four-digit hex Unicode escape. A correct parser decodes them to "none". + // Bug: readPrimitiveValue does not decode four-digit hex escapes; the value is + // returned as the 24-character literal sequence rather than "none". + String json = "{\"alg\":\"\\u006e\\u006f\\u006e\\u0065\"}"; + Map<String, Object> map = new JsonMapObjectReaderWriter().fromJson(json); + assertEquals(1, map.size()); + assertEquals("none", map.get("alg")); + } + + /** + * Simpler companion to {@link #testUnicodeEscapeInValueDecodedCorrectly}: verifies + * that a four-digit hex Unicode escape embedded in the middle of a value string is + * decoded to the target character rather than kept as the raw escape text. + * + * <p>The letter {@code l} is U+006C; JSON {@code "hello"} should therefore + * produce the five-character string {@code hello}. + */ + @Test + public void testUnicodeEscapeEmbeddedInString() throws Exception { + // JSON: {"a":"hel<U+006C>o"} — U+006C is 'l', so the decoded value is "hello". + // Bug: the six-character literal sequence is returned instead of the decoded char. + String json = "{\"a\":\"hel\\u006co\"}"; + Map<String, Object> map = new JsonMapObjectReaderWriter().fromJson(json); + assertEquals(1, map.size()); + assertEquals("hello", map.get("a")); + } + @Test public void testRejectInfinityNumericValue() { assertInvalidNumericLiteral("Infinity");
