This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch 
OPENNLP-1476_Modernize_DictionaryEntryPersistor_to_create_XMLReader_via_javax.xml.parsers.SAXParserFactory
in repository https://gitbox.apache.org/repos/asf/opennlp.git

commit ad437e12a22b37f4006dbb9493da8be5bd244400
Author: Martin Wiesner <[email protected]>
AuthorDate: Fri Mar 3 22:04:09 2023 +0100

    OPENNLP-1476 Modernize DictionaryEntryPersistor to create XMLReader via 
javax.xml.parsers.SAXParserFactory
---
 .../serializer/DictionaryEntryPersistor.java       | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
 
b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
index 3394604c..af210726 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/dictionary/serializer/DictionaryEntryPersistor.java
@@ -24,6 +24,8 @@ import java.nio.charset.StandardCharsets;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
+import javax.xml.parsers.ParserConfigurationException;
+import javax.xml.parsers.SAXParserFactory;
 import javax.xml.transform.OutputKeys;
 import javax.xml.transform.Transformer;
 import javax.xml.transform.TransformerConfigurationException;
@@ -37,7 +39,6 @@ import org.xml.sax.Locator;
 import org.xml.sax.SAXException;
 import org.xml.sax.XMLReader;
 import org.xml.sax.helpers.AttributesImpl;
-import org.xml.sax.helpers.XMLReaderFactory;
 
 import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.util.InvalidFormatException;
@@ -51,6 +52,9 @@ import opennlp.tools.util.model.UncloseableInputStream;
  * @see Dictionary
  */
 public class DictionaryEntryPersistor {
+  
+  private static final SAXParserFactory SAX_PARSER_FACTORY = 
SAXParserFactory.newInstance();
+  private static final String SAX_FEATURE_NAMESPACES = 
"http://xml.org/sax/features/namespaces";;
 
   // TODO: should check for invalid format, make it save
   private static class DictionaryContenthandler implements ContentHandler {
@@ -199,14 +203,11 @@ public class DictionaryEntryPersistor {
     }
   }
 
-  private static final String CHARSET = StandardCharsets.UTF_8.name();
-
   private static final String DICTIONARY_ELEMENT = "dictionary";
   private static final String ENTRY_ELEMENT = "entry";
   private static final String TOKEN_ELEMENT = "token";
   private static final String ATTRIBUTE_CASE_SENSITIVE = "case_sensitive";
 
-
   /**
    * Creates {@link Entry}s from the given {@link InputStream} and
    * forwards these {@link Entry}s to the {@link EntryInserter}.
@@ -225,16 +226,19 @@ public class DictionaryEntryPersistor {
   public static boolean create(InputStream in, EntryInserter inserter)
       throws IOException {
 
-    DictionaryContenthandler profileContentHandler =
-        new DictionaryContenthandler(inserter);
+    DictionaryContenthandler profileContentHandler = new 
DictionaryContenthandler(inserter);
 
     XMLReader xmlReader;
     try {
-      xmlReader = XMLReaderFactory.createXMLReader();
+      xmlReader = SAX_PARSER_FACTORY.newSAXParser().getXMLReader();
+      // Note:
+      // There is a compatibility problem here: JAXP default is false while 
SAX 2 default is true!
+      // OpenNLP requires it activated!
+      xmlReader.setFeature(SAX_FEATURE_NAMESPACES, true);
       xmlReader.setContentHandler(profileContentHandler);
       xmlReader.parse(new InputSource(new UncloseableInputStream(in)));
     }
-    catch (SAXException e) {
+    catch (ParserConfigurationException | SAXException e) {
       throw new InvalidFormatException("The profile data stream has " +
           "an invalid format!", e);
     }
@@ -290,7 +294,7 @@ public class DictionaryEntryPersistor {
     }
 
     Transformer serializer = hd.getTransformer();
-    serializer.setOutputProperty(OutputKeys.ENCODING, CHARSET);
+    serializer.setOutputProperty(OutputKeys.ENCODING, 
StandardCharsets.UTF_8.name());
     serializer.setOutputProperty(OutputKeys.INDENT, "yes");
 
     hd.setResult(streamResult);

Reply via email to