This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch branch_3x in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_3x by this push: new ba53077ba TIKA-4471 -- fix i18n for new unit tests (#2319) ba53077ba is described below commit ba53077ba3cd49472864ec207aa25c999dac9e84 Author: Tim Allison <talli...@apache.org> AuthorDate: Tue Sep 9 08:51:50 2025 -0400 TIKA-4471 -- fix i18n for new unit tests (#2319) (cherry picked from commit 9db2fb8777be0702bd7fc7e634ba725099b7faf6) --- .../org/apache/tika/utils/XMLReaderUtilsTest.java | 46 ++++++++++++++++------ tika-parent/pom.xml | 4 +- 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/tika-core/src/test/java/org/apache/tika/utils/XMLReaderUtilsTest.java b/tika-core/src/test/java/org/apache/tika/utils/XMLReaderUtilsTest.java index 9e58b0da9..9f14f6636 100644 --- a/tika-core/src/test/java/org/apache/tika/utils/XMLReaderUtilsTest.java +++ b/tika-core/src/test/java/org/apache/tika/utils/XMLReaderUtilsTest.java @@ -22,10 +22,12 @@ import static org.junit.jupiter.api.Assertions.fail; import java.io.ByteArrayInputStream; import java.net.ConnectException; import java.nio.charset.StandardCharsets; +import java.util.Locale; import java.util.NoSuchElementException; import javax.xml.stream.XMLEventReader; import javax.xml.stream.XMLStreamException; +import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.Test; import org.w3c.dom.Document; import org.w3c.dom.Node; @@ -37,6 +39,12 @@ import org.apache.tika.sax.ToTextContentHandler; public class XMLReaderUtilsTest { + private static final Locale defaultLocale = Locale.getDefault(); + static { + //tests on content of Exception msgs require specifying locale. + //even this, though is not sufficient for the billion laughs tests ?! + Locale.setDefault(Locale.US); + } private static final String EXTERNAL_DTD_SIMPLE_FILE = "<?xml version=\"1.0\" standalone=\"no\"?><!DOCTYPE foo SYSTEM \"tutorials.dtd\"><foo/>"; private static final String EXTERNAL_DTD_SIMPLE_URL = "<?xml version=\"1.0\" standalone=\"no\"?><!DOCTYPE foo SYSTEM \"http://127.234.172.38:7845/bar\"><foo/>"; private static final String EXTERNAL_ENTITY = "<!DOCTYPE foo [" + " <!ENTITY bar SYSTEM \"http://127.234.172.38:7845/bar\">" + @@ -78,6 +86,11 @@ public class XMLReaderUtilsTest { private static final String[] BILLION_LAUGHS = new String[]{ BILLION_LAUGHS_CLASSICAL, BILLION_LAUGHS_VARIANT }; + @AfterAll + public static void tearDown() { + Locale.setDefault(defaultLocale); + } + //make sure that parseSAX actually defends against external entities @Test public void testSAX() throws Exception { @@ -136,11 +149,7 @@ public class XMLReaderUtilsTest { XMLReaderUtils.parseSAX(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)), new ToTextContentHandler(), new ParseContext()); } catch (SAXException e) { - if (e.getMessage() != null && e.getMessage().contains("entity expansions")) { - //do nothing - } else { - throw e; - } + limitCheck(e); } } } @@ -157,12 +166,8 @@ public class XMLReaderUtilsTest { try { doc = XMLReaderUtils.buildDOM(new ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)), new ParseContext()); } catch (SAXException e) { - if (e.getMessage() != null && e.getMessage().contains("entity expansions")) { - //do nothing - continue; - } else { - throw e; - } + limitCheck(e); + continue; } NodeList nodeList = doc.getChildNodes(); StringBuilder sb = new StringBuilder(); @@ -215,4 +220,23 @@ public class XMLReaderUtilsTest { } } } + + private void limitCheck(SAXException e) throws SAXException { + String msg = e.getLocalizedMessage(); + if (msg == null) { + throw e; + } + + //depending on the flavor/version of the jdk, entity expansions may be triggered + // OR entitySizeLimit may be triggered + //See TIKA-4471 + if (msg.contains("JAXP00010001") || //entity expansions + msg.contains("JAXP00010003") || //max entity size limit + msg.contains("JAXP00010004") || //TotalEntitySizeLimit + msg.contains("entity expansions") || + e.getMessage().contains("maxGeneralEntitySizeLimit")) { + return; + } + throw e; + } } diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml index 9ac3a68f5..1c0a64154 100644 --- a/tika-parent/pom.xml +++ b/tika-parent/pom.xml @@ -1306,7 +1306,9 @@ <artifactId>maven-surefire-plugin</artifactId> <version>${maven.surefire.version}</version> <configuration> - <argLine>-Xmx4g -Djava.awt.headless=true</argLine> + <!-- for manual testing of i18n, try for example: -Duser.language=zh -Duser.region=CN or + -Duser.language=de -Duser.country=DE --> + <argLine>-Xmx4g -Djava.awt.headless=true @{surefireArgLine}</argLine> </configuration> </plugin> <plugin>