This is an automated email from the ASF dual-hosted git repository. rzo1 pushed a commit to branch OPENNLP-1835-1.x in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit fe68c8ef77d495222183bc3d651f1b9465e45b9d Author: Richard Zowalla <[email protected]> AuthorDate: Fri Jun 12 16:24:32 2026 +0200 OPENNLP-1835: Tolerate unsupported XML parser security options Backport of #1066 to opennlp-1.x. Some XML parser implementations (notably on Android, or alternative JAXP providers) do not support every hardening option set in XmlUtil. Setting them unconditionally made createDocumentBuilder()/createSaxParser() throw, breaking otherwise-valid parsing. Move each setFeature/setAttribute/setXIncludeAware/setProperty call into a "set...IfSupported" helper that catches the platform-specific failure (ParserConfigurationException, SAXNotRecognized/NotSupported, IllegalArgumentException, UnsupportedOperationException), logs a warning and continues. The remaining options are still applied. Adapted for opennlp-1.x: JUnit 4 test; warnings via System.err (no slf4j on this branch); the throwing test factory resolves its delegate without Java 9's DocumentBuilderFactory.newDefaultInstance() so it compiles on Java 8. --- .../src/main/java/opennlp/tools/util/XmlUtil.java | 87 +++++++++++++--- .../test/java/opennlp/tools/util/XmlUtilTest.java | 112 +++++++++++++++++++++ 2 files changed, 185 insertions(+), 14 deletions(-) diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java b/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java index 39ed353a0..0442b1250 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/XmlUtil.java @@ -25,6 +25,8 @@ import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.xml.sax.SAXException; +import org.xml.sax.SAXNotRecognizedException; +import org.xml.sax.SAXNotSupportedException; public class XmlUtil { @@ -45,17 +47,17 @@ public class XmlUtil { "it's unsupported on this platform: " + e.getMessage()); } try { - documentBuilderFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_DTD, ""); - documentBuilderFactory.setAttribute(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); - documentBuilderFactory.setFeature( + setAttributeIfSupported(documentBuilderFactory, XMLConstants.ACCESS_EXTERNAL_DTD, ""); + setAttributeIfSupported(documentBuilderFactory, XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); + setFeatureIfSupported(documentBuilderFactory, "http://apache.org/xml/features/disallow-doctype-decl", true); - documentBuilderFactory.setFeature( + setFeatureIfSupported(documentBuilderFactory, "http://xml.org/sax/features/external-general-entities", false); - documentBuilderFactory.setFeature( + setFeatureIfSupported(documentBuilderFactory, "http://xml.org/sax/features/external-parameter-entities", false); - documentBuilderFactory.setFeature( + setFeatureIfSupported(documentBuilderFactory, "http://apache.org/xml/features/nonvalidating/load-external-dtd", false); - documentBuilderFactory.setXIncludeAware(false); + setXIncludeAwareIfSupported(documentBuilderFactory, false); documentBuilderFactory.setExpandEntityReferences(false); return documentBuilderFactory.newDocumentBuilder(); } catch (ParserConfigurationException e) { @@ -72,7 +74,7 @@ public class XmlUtil { public static SAXParser createSaxParser() { final SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setNamespaceAware(true); - spf.setXIncludeAware(false); + setXIncludeAwareIfSupported(spf, false); try { spf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); } catch (ParserConfigurationException | SAXException e) { @@ -81,17 +83,74 @@ public class XmlUtil { System.err.println("Failed to enable XMLConstants.FEATURE_SECURE_PROCESSING, " + "it's unsupported on this platform: " + e.getMessage()); } + setFeatureIfSupported(spf, "http://apache.org/xml/features/disallow-doctype-decl", true); + setFeatureIfSupported(spf, "http://xml.org/sax/features/external-general-entities", false); + setFeatureIfSupported(spf, "http://xml.org/sax/features/external-parameter-entities", false); + setFeatureIfSupported(spf, "http://apache.org/xml/features/nonvalidating/load-external-dtd", + false); try { - spf.setFeature("http://apache.org/xml/features/disallow-doctype-decl", true); - spf.setFeature("http://xml.org/sax/features/external-general-entities", false); - spf.setFeature("http://xml.org/sax/features/external-parameter-entities", false); - spf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); final SAXParser parser = spf.newSAXParser(); - parser.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, ""); - parser.setProperty(XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); + setPropertyIfSupported(parser, XMLConstants.ACCESS_EXTERNAL_DTD, ""); + setPropertyIfSupported(parser, XMLConstants.ACCESS_EXTERNAL_SCHEMA, ""); return parser; } catch (ParserConfigurationException | SAXException e) { throw new IllegalStateException(e); } } + + private static void setFeatureIfSupported(DocumentBuilderFactory factory, String name, + boolean value) { + try { + factory.setFeature(name, value); + } catch (ParserConfigurationException e) { + System.err.println("Failed to set XML parser feature " + name + + ", it's unsupported on this platform: " + e.getMessage()); + } + } + + private static void setAttributeIfSupported(DocumentBuilderFactory factory, String name, + Object value) { + try { + factory.setAttribute(name, value); + } catch (IllegalArgumentException e) { + System.err.println("Failed to set XML parser attribute " + name + + ", it's unsupported on this platform: " + e.getMessage()); + } + } + + private static void setXIncludeAwareIfSupported(DocumentBuilderFactory factory, boolean state) { + try { + factory.setXIncludeAware(state); + } catch (UnsupportedOperationException e) { + System.err.println("Failed to set XML parser XInclude awareness, " + + "it's unsupported on this platform: " + e.getMessage()); + } + } + + private static void setPropertyIfSupported(SAXParser parser, String name, Object value) { + try { + parser.setProperty(name, value); + } catch (SAXNotRecognizedException | SAXNotSupportedException e) { + System.err.println("Failed to set XML parser property " + name + + ", it's unsupported on this platform: " + e.getMessage()); + } + } + + private static void setFeatureIfSupported(SAXParserFactory factory, String name, boolean value) { + try { + factory.setFeature(name, value); + } catch (ParserConfigurationException | SAXException e) { + System.err.println("Failed to set XML parser feature " + name + + ", it's unsupported on this platform: " + e.getMessage()); + } + } + + private static void setXIncludeAwareIfSupported(SAXParserFactory factory, boolean state) { + try { + factory.setXIncludeAware(state); + } catch (UnsupportedOperationException e) { + System.err.println("Failed to set XML parser XInclude awareness, " + + "it's unsupported on this platform: " + e.getMessage()); + } + } } diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/XmlUtilTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/XmlUtilTest.java new file mode 100644 index 000000000..95cfcbcfd --- /dev/null +++ b/opennlp-tools/src/test/java/opennlp/tools/util/XmlUtilTest.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.util; + +import java.io.StringReader; + +import javax.xml.XMLConstants; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; + +import org.junit.Assert; +import org.junit.Test; +import org.xml.sax.InputSource; + +public class XmlUtilTest { + + @Test + public void testCreateDocumentBuilderWithUnsupportedSecurityOptions() throws Exception { + String property = DocumentBuilderFactory.class.getName(); + String oldFactory = System.getProperty(property); + System.setProperty(property, ThrowingSecurityOptionsDocumentBuilderFactory.class.getName()); + try { + DocumentBuilder documentBuilder = XmlUtil.createDocumentBuilder(); + + Assert.assertEquals("root", documentBuilder.parse( + new InputSource(new StringReader("<root/>"))).getDocumentElement().getTagName()); + } finally { + if (oldFactory == null) { + System.clearProperty(property); + } else { + System.setProperty(property, oldFactory); + } + } + } + + public static class ThrowingSecurityOptionsDocumentBuilderFactory + extends DocumentBuilderFactory { + + private final DocumentBuilderFactory delegate = newRealFactory(); + + /** + * Resolves the platform default {@link DocumentBuilderFactory} without recursing into this + * class. {@link DocumentBuilderFactory#newDefaultInstance()} would do this directly, but it + * is only available since Java 9; on Java 8 we temporarily clear the factory system property + * so {@code newInstance()} falls back to the JAXP default implementation. + */ + private static DocumentBuilderFactory newRealFactory() { + String property = DocumentBuilderFactory.class.getName(); + String saved = System.getProperty(property); + System.clearProperty(property); + try { + return DocumentBuilderFactory.newInstance(); + } finally { + if (saved != null) { + System.setProperty(property, saved); + } + } + } + + @Override + public DocumentBuilder newDocumentBuilder() throws ParserConfigurationException { + return delegate.newDocumentBuilder(); + } + + @Override + public void setAttribute(String name, Object value) { + if (XMLConstants.ACCESS_EXTERNAL_DTD.equals(name)) { + throw new IllegalArgumentException(name); + } + delegate.setAttribute(name, value); + } + + @Override + public Object getAttribute(String name) { + return delegate.getAttribute(name); + } + + @Override + public void setFeature(String name, boolean value) throws ParserConfigurationException { + if ("http://apache.org/xml/features/disallow-doctype-decl".equals(name)) { + throw new ParserConfigurationException(name); + } + delegate.setFeature(name, value); + } + + @Override + public void setXIncludeAware(boolean state) { + throw new UnsupportedOperationException("XInclude"); + } + + @Override + public boolean getFeature(String name) throws ParserConfigurationException { + return delegate.getFeature(name); + } + } +}
