(cxf) 01/01: Support unicode characters in the json parser

coheigea Mon, 25 May 2026 09:10:23 -0700

This is an automated email from the ASF dual-hosted git repository.

coheigea pushed a commit to branch coheigea/json-unicode
in repository https://gitbox.apache.org/repos/asf/cxf.git


commit c36fabd84d7c74bbdf91af824b08a45a40834368
Author: Colm O hEigeartaigh <[email protected]>
AuthorDate: Mon May 25 17:08:29 2026 +0100

    Support unicode characters in the json parser
---
 .../json/basic/JsonMapObjectReaderWriter.java      | 91 ++++++++++++++++------
 .../json/basic/JsonMapObjectReaderWriterTest.java  | 46 +++++++++++
 2 files changed, 113 insertions(+), 24 deletions(-)

diff --git 
a/rt/rs/extensions/json-basic/src/main/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriter.java
 
b/rt/rs/extensions/json-basic/src/main/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriter.java
index bbd6852302f..474e0832745 100644
--- 
a/rt/rs/extensions/json-basic/src/main/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriter.java
+++ 
b/rt/rs/extensions/json-basic/src/main/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriter.java
@@ -268,18 +268,7 @@ public class JsonMapObjectReaderWriter {
         }
 
         if (value instanceof String) {
-            if (((String) value).contains("\\/")) {
-                // Escape an encoded forward slash
-                value = ((String) value).replace("\\/", "/");
-            }
-            if (((String) value).contains("\\\"")) {
-                // Escape an encoded quotation mark
-                value = ((String) value).replace("\\\"", "\"");
-            }
-            if (((String) value).contains("\\\\")) {
-                // Escape an encoded backslash
-                value = ((String) value).replace("\\\\", "\\");
-            }
+            value = decodeEscapeSequences((String) value);
         }
         return value;
     }
@@ -399,6 +388,69 @@ public class JsonMapObjectReaderWriter {
 
     }
 
+    /**
+     * Decodes all RFC 8259 section 7 JSON string escape sequences in a single
+     * left-to-right pass, producing the logical string value.
+     *
+     * <p>Recognised sequences: {@code \"}, {@code \\}, {@code \/}, {@code \b},
+     * {@code \f}, {@code \n}, {@code \r}, {@code \t}, and four-digit hex 
Unicode
+     * escapes (backslash + {@code u} + four hex digits).
+     *
+     * <p>A single pass is used deliberately: sequential {@code 
String.replace} calls
+     * applied in separate passes can interact incorrectly (e.g. a raw {@code 
\\"}
+     * sequence would have its {@code \"} consumed by a "decode quotes" pass 
before
+     * the {@code \\} is consumed by a "decode backslashes" pass, yielding the 
wrong
+     * result).
+     */
+    private static String decodeEscapeSequences(String s) {
+        int backslashIdx = s.indexOf(ESCAPE);
+        if (backslashIdx == -1) {
+            return s; // fast path: nothing to decode
+        }
+        StringBuilder sb = new StringBuilder(s.length());
+        sb.append(s, 0, backslashIdx);
+        int i = backslashIdx;
+        while (i < s.length()) {
+            char c = s.charAt(i);
+            if (c != ESCAPE || i + 1 >= s.length()) {
+                sb.append(c);
+                i++;
+                continue;
+            }
+            char next = s.charAt(i + 1);
+            switch (next) {
+            case '"':  sb.append('"');  i += 2; break;
+            case '\\': sb.append('\\'); i += 2; break;
+            case '/':  sb.append('/');  i += 2; break;
+            case 'b':  sb.append('\b'); i += 2; break;
+            case 'f':  sb.append('\f'); i += 2; break;
+            case 'n':  sb.append('\n'); i += 2; break;
+            case 'r':  sb.append('\r'); i += 2; break;
+            case 't':  sb.append('\t'); i += 2; break;
+            case 'u':
+                if (i + 5 < s.length()) {
+                    String hex = s.substring(i + 2, i + 6);
+                    try {
+                        sb.append((char) Integer.parseInt(hex, 16));
+                        i += 6;
+                        break;
+                    } catch (NumberFormatException ignored) {
+                        // not a valid four-digit hex sequence — fall through 
and keep '\'
+                    }
+                }
+                sb.append(c);
+                i++;
+                break;
+            default:
+                // unrecognised escape — keep the backslash as-is
+                sb.append(c);
+                i++;
+                break;
+            }
+        }
+        return sb.toString();
+    }
+
     /**
      * Returns the index of the closing {@code "} that matches the opening 
quote at
      * {@code openQuoteIndex}, correctly skipping over escaped quotes ({@code 
\"}) and
@@ -424,20 +476,11 @@ public class JsonMapObjectReaderWriter {
     }
 
     /**
-     * Decodes the JSON escape sequences that may appear in a key name:
-     * {@code \/} → {@code /}, {@code \"} → {@code "}, {@code \\} → {@code \}.
+     * Decodes the JSON escape sequences that may appear in a key name by 
delegating
+     * to the same single-pass decoder used for string values.
      */
     private static String unescapeKeyName(String name) {
-        if (name.contains("\\/")) {
-            name = name.replace("\\/", "/");
-        }
-        if (name.contains("\\\"")) {
-            name = name.replace("\\\"", "\"");
-        }
-        if (name.contains("\\\\")) {
-            name = name.replace("\\\\", "\\");
-        }
-        return name;
+        return decodeEscapeSequences(name);
     }
 
     private String escapeJson(String value) {
diff --git 
a/rt/rs/extensions/json-basic/src/test/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriterTest.java
 
b/rt/rs/extensions/json-basic/src/test/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriterTest.java
index a680ecba04d..c133de675db 100644
--- 
a/rt/rs/extensions/json-basic/src/test/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriterTest.java
+++ 
b/rt/rs/extensions/json-basic/src/test/java/org/apache/cxf/jaxrs/json/basic/JsonMapObjectReaderWriterTest.java
@@ -281,6 +281,52 @@ public class JsonMapObjectReaderWriterTest {
         assertEquals("a\\", entry.getValue());
     }
 
+    /**
+     * Regression test for "[MEDIUM] Unicode Escapes Not Decoded — Potential 
Bypass".
+     *
+     * <p>RFC 8259 section 7 requires that four-digit hex Unicode escape 
sequences
+     * (backslash + u + four hex digits) be decoded to the corresponding 
character.
+     * {@code readPrimitiveValue} only handles {@code \/}, {@code \"}, and 
{@code \\};
+     * four-digit hex escapes and single-character escapes ({@code \n}, {@code 
\r},
+     * {@code \t}, etc.) are returned as the raw literal escape text rather 
than the
+     * decoded character.
+     *
+     * <p>Security impact: a JWT whose {@code alg} header is written using 
four-digit hex
+     * escapes that spell {@code none} passes CXF's own algorithm check (the 
literal
+     * un-decoded sequence is not equal to {@code "none"}), while a downstream
+     * RFC-compliant consumer decodes the escapes and may skip signature 
verification
+     * entirely — a parser-differential bypass.
+     */
+    @Test
+    public void testUnicodeEscapeInValueDecodedCorrectly() throws Exception {
+        // JSON: {"alg":"<none-as-4-digit-hex-escapes>"} — each character of 
"none" is written
+        // as its four-digit hex Unicode escape.  A correct parser decodes 
them to "none".
+        // Bug: readPrimitiveValue does not decode four-digit hex escapes; the 
value is
+        // returned as the 24-character literal sequence rather than "none".
+        String json = "{\"alg\":\"\\u006e\\u006f\\u006e\\u0065\"}";
+        Map<String, Object> map = new 
JsonMapObjectReaderWriter().fromJson(json);
+        assertEquals(1, map.size());
+        assertEquals("none", map.get("alg"));
+    }
+
+    /**
+     * Simpler companion to {@link #testUnicodeEscapeInValueDecodedCorrectly}: 
verifies
+     * that a four-digit hex Unicode escape embedded in the middle of a value 
string is
+     * decoded to the target character rather than kept as the raw escape text.
+     *
+     * <p>The letter {@code l} is U+006C; JSON {@code "hello"} should therefore
+     * produce the five-character string {@code hello}.
+     */
+    @Test
+    public void testUnicodeEscapeEmbeddedInString() throws Exception {
+        // JSON: {"a":"hel<U+006C>o"} — U+006C is 'l', so the decoded value is 
"hello".
+        // Bug: the six-character literal sequence is returned instead of the 
decoded char.
+        String json = "{\"a\":\"hel\\u006co\"}";
+        Map<String, Object> map = new 
JsonMapObjectReaderWriter().fromJson(json);
+        assertEquals(1, map.size());
+        assertEquals("hello", map.get("a"));
+    }
+
     @Test
     public void testRejectInfinityNumericValue() {
         assertInvalidNumericLiteral("Infinity");

(cxf) 01/01: Support unicode characters in the json parser

Reply via email to