This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-io.git
The following commit(s) were added to refs/heads/master by this push:
new 841b5faf IO-815: XmlStreamReader encoding RE is too strict
841b5faf is described below
commit 841b5fafe13683389d078ec7a7adf736c05e038e
Author: Sebb <[email protected]>
AuthorDate: Tue Oct 3 22:10:45 2023 +0100
IO-815: XmlStreamReader encoding RE is too strict
---
src/changes/changes.xml | 3 +++
src/main/java/org/apache/commons/io/input/XmlStreamReader.java | 8 +++++++-
2 files changed, 10 insertions(+), 1 deletion(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index b42898b0..49264bf1 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -48,6 +48,9 @@ The <action> type attribute can be add,update,fix,remove.
<body>
<release version="2.14.1" date="202Y-MM-DD" description="Java 8 is
required.">
+ <action dev="sebb" type="fix" issue="IO-810" due-to="Laurence Gonsalves">
+ XmlStreamReader encoding match RE is too strict
+ </action>
<action dev="ggregory" type="fix" issue="IO-810" due-to="Gregor Dschung,
Gary Gregory">
Javadoc in FileUtils does not reflect code for thrown exceptions.
</action>
diff --git a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
index a38ef807..f50fc76e 100644
--- a/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
+++ b/src/main/java/org/apache/commons/io/input/XmlStreamReader.java
@@ -209,8 +209,14 @@ public class XmlStreamReader extends Reader {
// @formatter:off
"^<\\?xml\\s+"
+ "version\\s*=\\s*(?:(?:\"1\\.[0-9]+\")|(?:'1.[0-9]+'))\\s+"
- +
"encoding\\s*=\\s*((?:\"[A-Za-z]([A-Za-z0-9\\._]|-)*\")|(?:'[A-Za-z]([A-Za-z0-9\\\\._]|-)*'))",
+ + "encoding\\s*=\\s*"
+ + "((?:\"[A-Za-z0-9][A-Za-z0-9._+:-]*\")" // double-quoted
+ + "|(?:'[A-Za-z0-9][A-Za-z0-9._+:-]*'))", // single-quoted
Pattern.MULTILINE);
+ // N.B. the documented pattern is
+ // EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
+ // However this does not match all the aliases that are supported by Java.
+ // e.g. '437', 'ISO_8859-1:1987' and 'ebcdic-de-273+euro'
// @formatter:on
private static final String RAW_EX_1 = "Illegal encoding, BOM [{0}] XML
guess [{1}] XML prolog [{2}] encoding mismatch";