jenkins-bot has submitted this change and it was merged. ( https://gerrit.wikimedia.org/r/381733 )
Change subject: Workaround to use JVM's UTF-8 decoder in place of Xerces one ...................................................................... Workaround to use JVM's UTF-8 decoder in place of Xerces one It's apparently still failing in some edge cases. Applying workaround recommended on the talk page in 2013: https://www.mediawiki.org/wiki/Manual_talk:MWDumper Bug: T176829 Change-Id: I07cef81fa997af699ace105ae72e4fe67fc7eef0 --- M src/org/mediawiki/importer/XmlDumpReader.java 1 file changed, 7 insertions(+), 1 deletion(-) Approvals: Brion VIBBER: Looks good to me, approved jenkins-bot: Verified diff --git a/src/org/mediawiki/importer/XmlDumpReader.java b/src/org/mediawiki/importer/XmlDumpReader.java index aa25b12..e2c33c6 100644 --- a/src/org/mediawiki/importer/XmlDumpReader.java +++ b/src/org/mediawiki/importer/XmlDumpReader.java @@ -27,6 +27,8 @@ import java.io.IOException; import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.Reader; import java.io.UnsupportedEncodingException; import java.util.Calendar; import java.util.GregorianCalendar; @@ -41,6 +43,7 @@ import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.DefaultHandler; +import org.xml.sax.InputSource; public class XmlDumpReader extends DefaultHandler { InputStream input; @@ -87,8 +90,11 @@ try { SAXParserFactory factory = SAXParserFactory.newInstance(); SAXParser parser = factory.newSAXParser(); + Reader reader = new InputStreamReader(input, "UTF-8"); + InputSource is = new InputSource(reader); + is.setEncoding("UTF-8"); - parser.parse(input, this); + parser.parse(is, this); } catch (ParserConfigurationException e) { throw (IOException)new IOException(e.getMessage()).initCause(e); } catch (SAXException e) { -- To view, visit https://gerrit.wikimedia.org/r/381733 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I07cef81fa997af699ace105ae72e4fe67fc7eef0 Gerrit-PatchSet: 1 Gerrit-Project: mediawiki/tools/mwdumper Gerrit-Branch: master Gerrit-Owner: Brion VIBBER <[email protected]> Gerrit-Reviewer: Brion VIBBER <[email protected]> Gerrit-Reviewer: jenkins-bot <> _______________________________________________ MediaWiki-commits mailing list [email protected] https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits
