Revision: 7366
Author: nicdumz
Date: 2009-10-04 15:28:18 +0000 (Sun, 04 Oct 2009)
Log Message:
-----------
xmlreader: parse redirect information from xml
Original patch from Santiago Mola
Modified Paths:
--------------
trunk/pywikipedia/tests/test_xmlreader.py
trunk/pywikipedia/xmlreader.py
Added Paths:
-----------
trunk/pywikipedia/tests/data/article-pyrus.xml
Added: trunk/pywikipedia/tests/data/article-pyrus.xml
===================================================================
--- trunk/pywikipedia/tests/data/article-pyrus.xml
(rev 0)
+++ trunk/pywikipedia/tests/data/article-pyrus.xml 2009-10-04 15:28:18 UTC
(rev 7366)
@@ -0,0 +1,101 @@
+<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/
http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
+ <siteinfo>
+ <sitename>Wikipedia</sitename>
+ <base>http://en.wikipedia.org/wiki/Main_Page</base>
+ <generator>MediaWiki 1.16alpha-wmf</generator>
+ <case>first-letter</case>
+ <namespaces>
+ <namespace key="-2">Media</namespace>
+ <namespace key="-1">Special</namespace>
+ <namespace key="0" />
+ <namespace key="1">Talk</namespace>
+ <namespace key="2">User</namespace>
+ <namespace key="3">User talk</namespace>
+ <namespace key="4">Wikipedia</namespace>
+ <namespace key="5">Wikipedia talk</namespace>
+ <namespace key="6">File</namespace>
+ <namespace key="7">File talk</namespace>
+ <namespace key="8">MediaWiki</namespace>
+ <namespace key="9">MediaWiki talk</namespace>
+ <namespace key="10">Template</namespace>
+ <namespace key="11">Template talk</namespace>
+ <namespace key="12">Help</namespace>
+ <namespace key="13">Help talk</namespace>
+ <namespace key="14">Category</namespace>
+ <namespace key="15">Category talk</namespace>
+ <namespace key="100">Portal</namespace>
+ <namespace key="101">Portal talk</namespace>
+ </namespaces>
+ </siteinfo>
+ <page>
+ <title>Pyrus</title>
+ <id>9261472</id>
+ <redirect />
+ <revision>
+ <id>104997415</id>
+ <timestamp>2007-02-02T02:39:52Z</timestamp>
+ <contributor>
+ <username>Melburnian</username>
+ <id>555187</id>
+ </contributor>
+ <comment>moved [[Pyrus]] to [[Pyrus (brand)]]: all links to ''Pyrus''
are related to the pear tree or fruit</comment>
+ <text xml:space="preserve">#REDIRECT [[Pyrus (brand)]]</text>
+ </revision>
+ <revision>
+ <id>104997738</id>
+ <timestamp>2007-02-02T02:41:24Z</timestamp>
+ <contributor>
+ <username>Melburnian</username>
+ <id>555187</id>
+ </contributor>
+ <comment>all links to ''Pyrus'' are related to the pear tree or
fruit</comment>
+ <text xml:space="preserve">#REDIRECT [[Pear]]</text>
+ </revision>
+ <revision>
+ <id>189729426</id>
+ <timestamp>2008-02-07T14:06:10Z</timestamp>
+ <contributor>
+ <username>Jkokemueller</username>
+ <id>6303952</id>
+ </contributor>
+ <comment>Added disambiguation</comment>
+ <text xml:space="preserve">'''Pyrus''' may refer to:
+
+* [[Pear]], trees of the genus ''Pyrus'' and the fruit of that tree, edible in
some species
+* [[Main//Pyrus DMS]], a [[Document Management System]]</text>
+ </revision>
+ <revision>
+ <id>190346463</id>
+ <timestamp>2008-02-10T07:21:12Z</timestamp>
+ <contributor>
+ <username>IceCreamAntisocial</username>
+ <id>346507</id>
+ </contributor>
+ <minor/>
+ <comment>rv</comment>
+ <text xml:space="preserve">#REDIRECT [[Pear]]</text>
+ </revision>
+ <revision>
+ <id>238138507</id>
+ <timestamp>2008-09-13T12:57:33Z</timestamp>
+ <contributor>
+ <username>Cottonapple4</username>
+ <id>7707615</id>
+ </contributor>
+ <comment>[[WP:AES|←]] Redirected page to [[Pear]]</comment>
+ <text xml:space="preserve">#REDIRECT [[Pear]]
+[[Category:Maloideae]]</text>
+ </revision>
+ <revision>
+ <id>238392911</id>
+ <timestamp>2008-09-14T17:08:56Z</timestamp>
+ <contributor>
+ <username>Rkitko</username>
+ <id>536375</id>
+ </contributor>
+ <minor/>
+ <comment>Reverted edits by
[[Special:Contributions/Cottonapple4|Cottonapple4]] ([[User
talk:Cottonapple4|talk]]) to last version by IceCreamAntisocial</comment>
+ <text xml:space="preserve">#REDIRECT [[Pear]]</text>
+ </revision>
+ </page>
+</mediawiki>
Modified: trunk/pywikipedia/tests/test_xmlreader.py
===================================================================
--- trunk/pywikipedia/tests/test_xmlreader.py 2009-10-04 15:11:01 UTC (rev
7365)
+++ trunk/pywikipedia/tests/test_xmlreader.py 2009-10-04 15:28:18 UTC (rev
7366)
@@ -14,6 +14,7 @@
self.assertEquals(u"24278", pages[0].id)
self.assertTrue(pages[0].text.startswith('Pears are [[tree]]s of'))
self.assertEquals(u"Quercusrobur", pages[1].username)
+ self.assertEquals(u"Pear", pages[0].title)
def test_XmlDumpFirstRev(self):
pages = [r for r in xmlreader.XmlDump("data/article-pear.xml").parse()]
@@ -22,7 +23,12 @@
self.assertEquals(u"Pear", pages[0].title)
self.assertEquals(u"24278", pages[0].id)
self.assertTrue(pages[0].text.startswith('Pears are [[tree]]s of'))
+ self.assertTrue(not pages[0].isredirect)
+ def test_XmlDumpRedirect(self):
+ pages = [r for r in
xmlreader.XmlDump("data/article-pyrus.xml").parse()]
+ self.assertTrue(pages[0].isredirect)
+
def test_MediaWikiXmlHandler(self):
handler = xmlreader.MediaWikiXmlHandler()
pages = []
@@ -30,9 +36,9 @@
pages.append(page)
handler.setCallback(pageDone)
xml.sax.parse("data/article-pear.xml", handler)
+ self.assertEquals(u"Pear", pages[0].title)
self.assertEquals(4, len(pages))
self.assertNotEquals("", pages[0].comment)
-
if __name__ == '__main__':
unittest.main()
Modified: trunk/pywikipedia/xmlreader.py
===================================================================
--- trunk/pywikipedia/xmlreader.py 2009-10-04 15:11:01 UTC (rev 7365)
+++ trunk/pywikipedia/xmlreader.py 2009-10-04 15:28:18 UTC (rev 7366)
@@ -56,7 +56,7 @@
"""
Represents a page.
"""
- def __init__(self, title, id, text, username, ipedit, timestamp,
editRestriction, moveRestriction, revisionid, comment):
+ def __init__(self, title, id, text, username, ipedit, timestamp,
editRestriction, moveRestriction, revisionid, comment, redirect):
# TODO: there are more tags we can read.
self.title = title
self.id = id
@@ -68,6 +68,7 @@
self.moveRestriction = moveRestriction
self.revisionid = revisionid
self.comment = comment
+ self.isredirect = redirect
class XmlHeaderEntry:
@@ -94,6 +95,7 @@
self.id = u''
self.revisionid = u''
self.comment = u''
+ self.isredirect = False
def setCallback(self, callback):
self.callback = callback
@@ -159,6 +161,8 @@
self.inContributorTag = False
elif name == 'restrictions':
self.editRestriction, self.moveRestriction =
parseRestrictions(self.restrictions)
+ elif name == 'redirect':
+ self.isredirect = True
elif name == 'revision':
# All done for this.
# Remove trailing newlines and spaces
@@ -178,7 +182,7 @@
text, self.username,
self.ipedit, timestamp,
self.editRestriction, self.moveRestriction,
- self.revisionid, self.comment)
+ self.revisionid, self.comment, self.isredirect)
self.inRevisionTag = False
self.callback(entry)
elif self.headercallback:
@@ -313,6 +317,7 @@
self.title = elem.findtext("{%s}title" % self.uri)
self.pageid = elem.findtext("{%s}id" % self.uri)
self.restrictions = elem.findtext("{%s}restrictions" % self.uri)
+ self.isredirect = elem.findtext("{%s}redirect" % self.uri) is not None
def _create_revision(self, revision):
"""Creates a Single revision"""
@@ -332,7 +337,8 @@
editRestriction=editRestriction,
moveRestriction=moveRestriction,
revisionid=revisionid,
- comment=comment
+ comment=comment,
+ redirect=self.isredirect
)
def regex_parse(self):
_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn