Xqt has uploaded a new change for review. (
https://gerrit.wikimedia.org/r/392314 )
Change subject: [bugfix] fix xml decoding
......................................................................
[bugfix] fix xml decoding
- read xml encoding from xml file if no charset is given
- use utf-8 as default
Bug: T180915
Change-Id: I4091eb8428b2c0bffbda657ee59583857363e1d5
---
M pywikibot/comms/threadedhttp.py
1 file changed, 9 insertions(+), 1 deletion(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/14/392314/1
diff --git a/pywikibot/comms/threadedhttp.py b/pywikibot/comms/threadedhttp.py
index 4e1f9e1..a52b758 100644
--- a/pywikibot/comms/threadedhttp.py
+++ b/pywikibot/comms/threadedhttp.py
@@ -1,13 +1,14 @@
# -*- coding: utf-8 -*-
"""Http backend layer, formerly providing a httplib2 wrapper."""
from __future__ import absolute_import, unicode_literals
-# (C) Pywikibot team, 2007-2015
+# (C) Pywikibot team, 2007-2017
__version__ = '$Id$'
__docformat__ = 'epytext'
# standard python libraries
import codecs
+import re
import sys
if sys.version_info[0] > 2:
@@ -127,6 +128,13 @@
elif 'json' in content_type:
# application/json | application/sparql-results+json
self._header_encoding = 'utf-8'
+ elif 'xml' in content_type:
+ header = self.raw[:100].splitlines()[0]
+ m = re.search('encoding="(?P<encoding>.+)"', header)
+ if m:
+ self._header_encoding = m.group('encoding')
+ else:
+ self._header_encoding = 'utf-8'
else:
self._header_encoding = None
return self._header_encoding
--
To view, visit https://gerrit.wikimedia.org/r/392314
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I4091eb8428b2c0bffbda657ee59583857363e1d5
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits