Hi,
Since it seems that the mailing list is scrubbing attachments, patch is
mentioned inline below.
diff a/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
b/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
--- a/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
+++ b/src/java.base/share/classes/jdk/internal/util/xml/impl/Parser.java
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2012, 2020, Oracle and/or its affiliates. All rights
reserved.
+ * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
@@ -1991,24 +1991,22 @@
case ';':
// Convert the character entity to a
character
try {
int i = Integer.parseInt(
new String(mBuff, idx + 1,
mBuffIdx - idx), 10);
- if (i >= 0xffff) {
- panic(FAULT);
+ // Restore the buffer offset
+ mBuffIdx = idx - 1;
+ for(char character : Character.toChars(i))
{
+ if (character == ' ' || mInp.next !=
null) {
+ bappend(character, flag);
+ } else {
+ bappend(character);
+ }
}
- ch = (char) i;
} catch (NumberFormatException nfe) {
panic(FAULT);
}
- // Restore the buffer offset
- mBuffIdx = idx - 1;
- if (ch == ' ' || mInp.next != null) {
- bappend(ch, flag);
- } else {
- bappend(ch);
- }
st = -1;
break;
case 'a':
// If the entity buffer is empty and
ch == 'x'
@@ -2032,24 +2030,22 @@
case ';':
// Convert the character entity to a
character
try {
int i = Integer.parseInt(
new String(mBuff, idx + 1,
mBuffIdx - idx), 16);
- if (i >= 0xffff) {
- panic(FAULT);
+ // Restore the buffer offset
+ mBuffIdx = idx - 1;
+ for(char character : Character.toChars(i))
{
+ if (character == ' ' || mInp.next !=
null) {
+ bappend(character, flag);
+ } else {
+ bappend(character);
+ }
}
- ch = (char) i;
} catch (NumberFormatException nfe) {
panic(FAULT);
}
- // Restore the buffer offset
- mBuffIdx = idx - 1;
- if (ch == ' ' || mInp.next != null) {
- bappend(ch, flag);
- } else {
- bappend(ch);
- }
st = -1;
break;
default:
panic(FAULT);
diff
a/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
b/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
---
a/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
+++
b/src/java.base/share/classes/jdk/internal/util/xml/impl/XMLStreamWriterImpl.java
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2012, 2018, Oracle and/or its affiliates. All rights
reserved.
+ * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation. Oracle designates this
@@ -358,10 +358,19 @@
*/
public void setDoIndent(boolean doIndent) {
_doIndent = doIndent;
}
+ /**
+ * Writes character reference in hex format.
+ */
+ private void writeCharRef(int codePoint) throws XMLStreamException {
+ _writer.write(ENCODING_PREFIX);
+ _writer.write(Integer.toHexString(codePoint));
+ _writer.write(SEMICOLON);
+ }
+
/**
* Writes XML content to underlying writer. Escapes characters unless
* escaping character feature is turned off.
*/
private void writeXMLContent(char[] content, int start, int length,
boolean escapeChars)
@@ -381,14 +390,19 @@
char ch = content[index];
if (!_writer.canEncode(ch)) {
_writer.write(content, startWritePos, index -
startWritePos);
- // Escape this char as underlying encoder cannot handle it
- _writer.write(ENCODING_PREFIX);
- _writer.write(Integer.toHexString(ch));
- _writer.write(SEMICOLON);
+ // Check if current and next characters forms a surrogate
pair
+ // and escape it to avoid generation of invalid xml content
+ if ( index != end - 1 && Character.isSurrogatePair(ch,
content[index+1])) {
+ writeCharRef(Character.toCodePoint(ch,
content[index+1]));
+ index++;
+ } else {
+ writeCharRef(ch);
+ }
+
startWritePos = index + 1;
continue;
}
switch (ch) {
@@ -453,14 +467,19 @@
char ch = content.charAt(index);
if (!_writer.canEncode(ch)) {
_writer.write(content, startWritePos, index -
startWritePos);
- // Escape this char as underlying encoder cannot handle it
- _writer.write(ENCODING_PREFIX);
- _writer.write(Integer.toHexString(ch));
- _writer.write(SEMICOLON);
+ // Check if current and next characters forms a surrogate
pair
+ // and escape it to avoid generation of invalid xml content
+ if ( index != end - 1 && Character.isSurrogatePair(ch,
content.charAt(index+1))) {
+ writeCharRef(Character.toCodePoint(ch,
content.charAt(index+1)));
+ index++;
+ } else {
+ writeCharRef(ch);
+ }
+
startWritePos = index + 1;
continue;
}
switch (ch) {
diff a/test/jdk/java/util/Properties/LoadAndStoreXML.java
b/test/jdk/java/util/Properties/LoadAndStoreXML.java
--- a/test/jdk/java/util/Properties/LoadAndStoreXML.java
+++ b/test/jdk/java/util/Properties/LoadAndStoreXML.java
@@ -1,7 +1,7 @@
/*
- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2021, Oracle and/or its affiliates. All rights
reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
@@ -21,11 +21,11 @@
* questions.
*/
/*
* @test
- * @bug 8000354 8000685 8004371 8043119
+ * @bug 8000354 8000685 8004371 8043119 8276207
* @summary Basic test of storeToXML and loadToXML
* @run main/othervm -Djava.security.manager=allow LoadAndStoreXML
*/
import java.io.ByteArrayInputStream;
@@ -136,10 +136,11 @@
props.put("k1", "foo");
props.put("k2", "bar");
props.put("k3",
"\u0020\u0391\u0392\u0393\u0394\u0395\u0396\u0397");
props.put("k4", "\u7532\u9aa8\u6587");
props.put("k5", "<java.home>/conf/jaxp.properties");
+ props.put("k6", "\uD834\uDD1E");
TestOutputStream out = new TestOutputStream();
props.storeToXML(out, null, encoding);
if (!out.isOpen())
throw new RuntimeException("OutputStream closed by
storeToXML");
@@ -241,10 +242,60 @@
}
}
}
}
+ /**
+ * Test loadFromXML with supplementary characters
+ */
+ static void testLoadWithSupplementaryCharacters() throws IOException {
+ System.out.println("testLoadWithSupplementaryCharacters");
+
+ Properties expected = new Properties();
+ expected.put("\uD834\uDD1E", "\uD834\uDD1E");
+
+ String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
+ "<!DOCTYPE properties SYSTEM \"
http://java.sun.com/dtd/properties.dtd\">" +
+ "<properties>" +
+ "<entry key=\"𝄞\">𝄞</entry>" +
+ "</properties>";
+
+ ByteArrayInputStream in = new
ByteArrayInputStream(s.getBytes("UTF-8"));
+ Properties props = new Properties();
+ props.loadFromXML(in);
+
+ if (!props.equals(expected)) {
+ System.err.println("loaded: " + props + ", expected: " +
expected);
+ throw new RuntimeException("Test failed");
+ }
+ }
+
+ /**
+ * Test storeToXML with supplementary characters
+ */
+ static void testStoreWithSupplementaryCharacters() throws IOException {
+ System.out.println("testStoreWithSupplementaryCharacters");
+
+ String s = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" +
System.lineSeparator() +
+ "<!DOCTYPE properties SYSTEM \"
http://java.sun.com/dtd/properties.dtd\">" + System.lineSeparator() +
+ "<properties>" + System.lineSeparator() +
+ "<entry key=\"Musical Symbols\">𝄞</entry>" +
System.lineSeparator() +
+ "</properties>" + System.lineSeparator();
+
+ Properties props = new Properties();
+ props.put("Musical Symbols", "\uD834\uDD1E");
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ props.storeToXML(out, null, "UTF-8");
+
+ String outXml = out.toString("UTF-8");
+
+ if (!outXml.equals(s)) {
+ System.err.println("stored: " + outXml + ", expected: " + s);
+ throw new RuntimeException("Test failed");
+ }
+ }
+
public static void main(String[] args) throws IOException {
testLoadAndStore("UTF-8", false);
testLoadAndStore("UTF-16", false);
testLoadAndStore("UTF-16BE", false);
@@ -252,10 +303,12 @@
testLoadAndStore("UTF-16BE", true);
testLoadAndStore("UTF-16LE", true);
testLoadWithoutEncoding();
testLoadWithBadEncoding();
testStoreWithBadEncoding();
+ testLoadWithSupplementaryCharacters();
+ testStoreWithSupplementaryCharacters();
// malformed documents
String src = System.getProperty("test.src");
String subdir = "invalidxml";
Path dir = (src == null) ? Paths.get(subdir) : Paths.get(src,
subdir);
On Sun, 31 Oct 2021 at 19:38, Anirvan Sarkar <powers.anir...@gmail.com>
wrote:
Hi,
Properties.loadFromXML/storeToXML works incorrectly for supplementary
characters after JDK-8042889[1] was integrated in JDK 9.
Properties.storeToXML now generates incorrect character references for
supplementary characters. This is similar to JDK-8145974[2] which was fixed
in the java.xml module in JDK 9.
Properties.loadFromXML now fails to parse character references for
supplementary characters and throws InvalidPropertiesFormatException.
Sample program which demonstrates these issues is in the JBS[3].
Proposed patch to fix this issue is attached.
JDK Tier 1 tests are all green.
If it looks fine then I will create a pull request based on this.
[1] : https://bugs.openjdk.java.net/browse/JDK-8042889
[2] : https://bugs.openjdk.java.net/browse/JDK-8145974
[3] : https://bugs.openjdk.java.net/browse/JDK-8276207
--
Anirvan