This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_3x by this push:
new f81df0a1a TIKA-4471 -- add unit tests to confirm defense against xxe
in sax, dom and stax -- add flexibility for DOM
f81df0a1a is described below
commit f81df0a1acbfff40a2ea026af6fd9f1b10f0e6b5
Author: tallison <[email protected]>
AuthorDate: Mon Sep 8 11:10:04 2025 -0400
TIKA-4471 -- add unit tests to confirm defense against xxe in sax, dom and
stax -- add flexibility for DOM
(cherry picked from commit 2c7caa7517442c4f4e9a7a7629d71f24962fb240)
---
.../org/apache/tika/utils/XMLReaderUtilsTest.java | 20 ++++++++++++++++----
1 file changed, 16 insertions(+), 4 deletions(-)
diff --git
a/tika-core/src/test/java/org/apache/tika/utils/XMLReaderUtilsTest.java
b/tika-core/src/test/java/org/apache/tika/utils/XMLReaderUtilsTest.java
index 3642fe1f4..9e58b0da9 100644
--- a/tika-core/src/test/java/org/apache/tika/utils/XMLReaderUtilsTest.java
+++ b/tika-core/src/test/java/org/apache/tika/utils/XMLReaderUtilsTest.java
@@ -136,9 +136,7 @@ public class XMLReaderUtilsTest {
XMLReaderUtils.parseSAX(new
ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)),
new ToTextContentHandler(), new ParseContext());
} catch (SAXException e) {
- if (e.getMessage() != null && e
- .getMessage()
- .contains("entity expansions")) {
+ if (e.getMessage() != null && e.getMessage().contains("entity
expansions")) {
//do nothing
} else {
throw e;
@@ -150,8 +148,22 @@ public class XMLReaderUtilsTest {
@Test
public void testDOMBillionLaughs() throws Exception {
//confirm that ExpandEntityReferences has been set to false.
+
+ //some implementations ignore the expandEntityReferences=false, and we
are still
+ //protected by the "The parser has encountered more than "20" entity
expansions" SAXException.
+ //We need to check for either: empty content and no exception, or this
SAXException
for (String xml : BILLION_LAUGHS) {
- Document doc = XMLReaderUtils.buildDOM(new
ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)), new ParseContext());
+ Document doc = null;
+ try {
+ doc = XMLReaderUtils.buildDOM(new
ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)), new ParseContext());
+ } catch (SAXException e) {
+ if (e.getMessage() != null && e.getMessage().contains("entity
expansions")) {
+ //do nothing
+ continue;
+ } else {
+ throw e;
+ }
+ }
NodeList nodeList = doc.getChildNodes();
StringBuilder sb = new StringBuilder();
dumpChildren(nodeList, sb);