This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/branch_3x by this push:
     new ba53077ba TIKA-4471 -- fix i18n for new unit tests (#2319)
ba53077ba is described below

commit ba53077ba3cd49472864ec207aa25c999dac9e84
Author: Tim Allison <talli...@apache.org>
AuthorDate: Tue Sep 9 08:51:50 2025 -0400

    TIKA-4471 -- fix i18n for new unit tests (#2319)
    
    (cherry picked from commit 9db2fb8777be0702bd7fc7e634ba725099b7faf6)
---
 .../org/apache/tika/utils/XMLReaderUtilsTest.java  | 46 ++++++++++++++++------
 tika-parent/pom.xml                                |  4 +-
 2 files changed, 38 insertions(+), 12 deletions(-)

diff --git 
a/tika-core/src/test/java/org/apache/tika/utils/XMLReaderUtilsTest.java 
b/tika-core/src/test/java/org/apache/tika/utils/XMLReaderUtilsTest.java
index 9e58b0da9..9f14f6636 100644
--- a/tika-core/src/test/java/org/apache/tika/utils/XMLReaderUtilsTest.java
+++ b/tika-core/src/test/java/org/apache/tika/utils/XMLReaderUtilsTest.java
@@ -22,10 +22,12 @@ import static org.junit.jupiter.api.Assertions.fail;
 import java.io.ByteArrayInputStream;
 import java.net.ConnectException;
 import java.nio.charset.StandardCharsets;
+import java.util.Locale;
 import java.util.NoSuchElementException;
 import javax.xml.stream.XMLEventReader;
 import javax.xml.stream.XMLStreamException;
 
+import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.Test;
 import org.w3c.dom.Document;
 import org.w3c.dom.Node;
@@ -37,6 +39,12 @@ import org.apache.tika.sax.ToTextContentHandler;
 
 public class XMLReaderUtilsTest {
 
+    private static final Locale defaultLocale = Locale.getDefault();
+    static {
+        //tests on content of Exception msgs require specifying locale.
+        //even this, though is not sufficient for the billion laughs tests ?!
+        Locale.setDefault(Locale.US);
+    }
     private static final String EXTERNAL_DTD_SIMPLE_FILE = "<?xml 
version=\"1.0\" standalone=\"no\"?><!DOCTYPE foo SYSTEM 
\"tutorials.dtd\"><foo/>";
     private static final String EXTERNAL_DTD_SIMPLE_URL = "<?xml 
version=\"1.0\" standalone=\"no\"?><!DOCTYPE foo SYSTEM 
\"http://127.234.172.38:7845/bar\";><foo/>";
     private static final String EXTERNAL_ENTITY =  "<!DOCTYPE foo [" + " 
<!ENTITY bar SYSTEM \"http://127.234.172.38:7845/bar\";>" +
@@ -78,6 +86,11 @@ public class XMLReaderUtilsTest {
 
     private static final String[] BILLION_LAUGHS = new String[]{ 
BILLION_LAUGHS_CLASSICAL, BILLION_LAUGHS_VARIANT };
 
+    @AfterAll
+    public static void tearDown() {
+        Locale.setDefault(defaultLocale);
+    }
+
     //make sure that parseSAX actually defends against external entities
     @Test
     public void testSAX() throws Exception {
@@ -136,11 +149,7 @@ public class XMLReaderUtilsTest {
                 XMLReaderUtils.parseSAX(new 
ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)),
                         new ToTextContentHandler(), new ParseContext());
             } catch (SAXException e) {
-                if (e.getMessage() != null && e.getMessage().contains("entity 
expansions")) {
-                    //do nothing
-                } else {
-                    throw e;
-                }
+                limitCheck(e);
             }
         }
     }
@@ -157,12 +166,8 @@ public class XMLReaderUtilsTest {
             try {
                 doc = XMLReaderUtils.buildDOM(new 
ByteArrayInputStream(xml.getBytes(StandardCharsets.UTF_8)), new ParseContext());
             } catch (SAXException e) {
-                if (e.getMessage() != null && e.getMessage().contains("entity 
expansions")) {
-                    //do nothing
-                    continue;
-                } else {
-                    throw e;
-                }
+                limitCheck(e);
+                continue;
             }
             NodeList nodeList = doc.getChildNodes();
             StringBuilder sb = new StringBuilder();
@@ -215,4 +220,23 @@ public class XMLReaderUtilsTest {
             }
         }
     }
+
+    private void limitCheck(SAXException e) throws SAXException {
+        String msg = e.getLocalizedMessage();
+        if (msg == null) {
+            throw e;
+        }
+
+        //depending on the flavor/version of the jdk, entity expansions may be 
triggered
+        // OR entitySizeLimit may be triggered
+        //See TIKA-4471
+        if (msg.contains("JAXP00010001") || //entity expansions
+                msg.contains("JAXP00010003") || //max entity size limit
+                msg.contains("JAXP00010004") || //TotalEntitySizeLimit
+                msg.contains("entity expansions") ||
+                e.getMessage().contains("maxGeneralEntitySizeLimit")) {
+            return;
+        }
+        throw e;
+    }
 }
diff --git a/tika-parent/pom.xml b/tika-parent/pom.xml
index 9ac3a68f5..1c0a64154 100644
--- a/tika-parent/pom.xml
+++ b/tika-parent/pom.xml
@@ -1306,7 +1306,9 @@
         <artifactId>maven-surefire-plugin</artifactId>
         <version>${maven.surefire.version}</version>
         <configuration>
-          <argLine>-Xmx4g -Djava.awt.headless=true</argLine>
+          <!-- for manual testing of i18n, try for example: -Duser.language=zh 
-Duser.region=CN or
+          -Duser.language=de -Duser.country=DE -->
+          <argLine>-Xmx4g -Djava.awt.headless=true @{surefireArgLine}</argLine>
         </configuration>
       </plugin>
       <plugin>

Reply via email to