This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch opennlp-1.x
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/opennlp-1.x by this push:
     new 584438942 [1.x] OPENNLP-1819: Align DictionaryEntryPersistor XML 
parsing with XmlUtil helper (#1080)
584438942 is described below

commit 584438942c87a84d1be0ffa0daf5309bbd20e3d5
Author: Richard Zowalla <[email protected]>
AuthorDate: Fri Jun 12 16:26:52 2026 +0200

    [1.x] OPENNLP-1819: Align DictionaryEntryPersistor XML parsing with XmlUtil 
helper (#1080)
    
    Backport of #1020 to opennlp-1.x.
    
    DictionaryEntryPersistor.create() built its reader via the deprecated and
    insecure XMLReaderFactory.createXMLReader(), bypassing the secure parser
    configuration. Route it through XmlUtil.createSaxParser().getXMLReader()
    so dictionary parsing benefits from the hardened, XXE-safe configuration
    (namespace awareness is now set on the factory).
    
    Also harden XmlUtil itself: disable external DTD/schema access and
    external general/parameter entities, disallow DOCTYPE declarations, turn
    off XInclude and entity-reference expansion. FEATURE_SECURE_PROCESSING is
    attempted in a guarded block so platforms that do not support it (e.g.
    Android) still work.
    
    Adapted for opennlp-1.x: no slf4j on this branch, so the unsupported-
    feature warning is emitted via System.err (the branch's logging idiom).
---
 .../serializer/DictionaryEntryPersistor.java       |  4 +-
 .../src/main/java/opennlp/tools/util/XmlUtil.java  | 51 +++++++++++++++++++---
 2 files changed, 46 insertions(+), 9 deletions(-)

diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
 
b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
index 603afade6..07e5aa16e 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
@@ -38,10 +38,10 @@ import org.xml.sax.Locator;
 import org.xml.sax.SAXException;
 import org.xml.sax.XMLReader;
 import org.xml.sax.helpers.AttributesImpl;
-import org.xml.sax.helpers.XMLReaderFactory;
 
 import opennlp.tools.util.InvalidFormatException;
 import opennlp.tools.util.StringList;
+import opennlp.tools.util.XmlUtil;
 import opennlp.tools.util.model.UncloseableInputStream;
 
 /**
@@ -217,7 +217,7 @@ public class DictionaryEntryPersistor {
 
     XMLReader xmlReader;
     try {
-      xmlReader = XMLReaderFactory.createXMLReader();
+      xmlReader = XmlUtil.createSaxParser().getXMLReader();
       xmlReader.setContentHandler(profileContentHandler);
       xmlReader.parse(new InputSource(new UncloseableInputStream(in)));
     }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java 
b/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java
index 39cc8dbe0..39ed353a0 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java
@@ -29,14 +29,34 @@ import org.xml.sax.SAXException;
 public class XmlUtil {
 
   /**
-   * Create a new DocumentBuilder which processes XML securely.
+   * Create a new {@link DocumentBuilder} which processes XML securely.
    *
-   * @return a DocumentBuilder
+   * @return A valid {@link DocumentBuilder} instance.
+   * @throws IllegalStateException Thrown if errors occurred creating the 
builder.
    */
   public static DocumentBuilder createDocumentBuilder() {
+    final DocumentBuilderFactory documentBuilderFactory = 
DocumentBuilderFactory.newInstance();
     try {
-      DocumentBuilderFactory documentBuilderFactory = 
DocumentBuilderFactory.newInstance();
       
documentBuilderFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
+    } catch (ParserConfigurationException e) {
+      // XMLConstants.FEATURE_SECURE_PROCESSING is not supported on Android.
+      // See DocumentBuilderFactory#setFeature
+      System.err.println("Failed to enable 
XMLConstants.FEATURE_SECURE_PROCESSING, " +
+          "it's unsupported on this platform: " + e.getMessage());
+    }
+    try {
+      documentBuilderFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, 
"");
+      documentBuilderFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, 
"");
+      documentBuilderFactory.setFeature(
+          "http://apache.org/xml/features/disallow-doctype-decl";, true);
+      documentBuilderFactory.setFeature(
+          "http://xml.org/sax/features/external-general-entities";, false);
+      documentBuilderFactory.setFeature(
+          "http://xml.org/sax/features/external-parameter-entities";, false);
+      documentBuilderFactory.setFeature(
+          "http://apache.org/xml/features/nonvalidating/load-external-dtd";, 
false);
+      documentBuilderFactory.setXIncludeAware(false);
+      documentBuilderFactory.setExpandEntityReferences(false);
       return documentBuilderFactory.newDocumentBuilder();
     } catch (ParserConfigurationException e) {
       throw new IllegalStateException(e);
@@ -44,15 +64,32 @@ public class XmlUtil {
   }
 
   /**
-   * Create a new SAXParser which processes XML securely.
+   * Create a new {@link SAXParser} which processes XML securely.
    *
-   * @return a SAXParser
+   * @return A valid {@link SAXParser} instance.
+   * @throws IllegalStateException Thrown if errors occurred creating the 
parser.
    */
   public static SAXParser createSaxParser() {
-    SAXParserFactory spf = SAXParserFactory.newInstance();
+    final SAXParserFactory spf = SAXParserFactory.newInstance();
+    spf.setNamespaceAware(true);
+    spf.setXIncludeAware(false);
     try {
       spf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
-      return spf.newSAXParser();
+    } catch (ParserConfigurationException | SAXException e) {
+      // XMLConstants.FEATURE_SECURE_PROCESSING is not supported on Android.
+      // See SAXParserFactory#setFeature
+      System.err.println("Failed to enable 
XMLConstants.FEATURE_SECURE_PROCESSING, " +
+          "it's unsupported on this platform: " + e.getMessage());
+    }
+    try {
+      spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl";, 
true);
+      spf.setFeature("http://xml.org/sax/features/external-general-entities";, 
false);
+      
spf.setFeature("http://xml.org/sax/features/external-parameter-entities";, 
false);
+      
spf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd";,
 false);
+      final SAXParser parser = spf.newSAXParser();
+      parser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, "");
+      parser.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, "");
+      return parser;
     } catch (ParserConfigurationException | SAXException e) {
       throw new IllegalStateException(e);
     }

Reply via email to