Revision: 7366
Author:   nicdumz
Date:     2009-10-04 15:28:18 +0000 (Sun, 04 Oct 2009)

Log Message:
-----------
xmlreader: parse redirect information from xml
Original patch from Santiago Mola

Modified Paths:
--------------
    trunk/pywikipedia/tests/test_xmlreader.py
    trunk/pywikipedia/xmlreader.py

Added Paths:
-----------
    trunk/pywikipedia/tests/data/article-pyrus.xml

Added: trunk/pywikipedia/tests/data/article-pyrus.xml
===================================================================
--- trunk/pywikipedia/tests/data/article-pyrus.xml                              
(rev 0)
+++ trunk/pywikipedia/tests/data/article-pyrus.xml      2009-10-04 15:28:18 UTC 
(rev 7366)
@@ -0,0 +1,101 @@
+<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ 
http://www.mediawiki.org/xml/export-0.3.xsd"; version="0.3" xml:lang="en">
+  <siteinfo>
+    <sitename>Wikipedia</sitename>
+    <base>http://en.wikipedia.org/wiki/Main_Page</base>
+    <generator>MediaWiki 1.16alpha-wmf</generator>
+    <case>first-letter</case>
+    <namespaces>
+      <namespace key="-2">Media</namespace>
+      <namespace key="-1">Special</namespace>
+      <namespace key="0" />
+      <namespace key="1">Talk</namespace>
+      <namespace key="2">User</namespace>
+      <namespace key="3">User talk</namespace>
+      <namespace key="4">Wikipedia</namespace>
+      <namespace key="5">Wikipedia talk</namespace>
+      <namespace key="6">File</namespace>
+      <namespace key="7">File talk</namespace>
+      <namespace key="8">MediaWiki</namespace>
+      <namespace key="9">MediaWiki talk</namespace>
+      <namespace key="10">Template</namespace>
+      <namespace key="11">Template talk</namespace>
+      <namespace key="12">Help</namespace>
+      <namespace key="13">Help talk</namespace>
+      <namespace key="14">Category</namespace>
+      <namespace key="15">Category talk</namespace>
+      <namespace key="100">Portal</namespace>
+      <namespace key="101">Portal talk</namespace>
+    </namespaces>
+  </siteinfo>
+  <page>
+    <title>Pyrus</title>
+    <id>9261472</id>
+    <redirect />
+    <revision>
+      <id>104997415</id>
+      <timestamp>2007-02-02T02:39:52Z</timestamp>
+      <contributor>
+        <username>Melburnian</username>
+        <id>555187</id>
+      </contributor>
+      <comment>moved [[Pyrus]] to [[Pyrus (brand)]]: all links to ''Pyrus'' 
are related to the pear tree or fruit</comment>
+      <text xml:space="preserve">#REDIRECT [[Pyrus (brand)]]</text>
+    </revision>
+    <revision>
+      <id>104997738</id>
+      <timestamp>2007-02-02T02:41:24Z</timestamp>
+      <contributor>
+        <username>Melburnian</username>
+        <id>555187</id>
+      </contributor>
+      <comment>all links to ''Pyrus'' are related to the pear tree or 
fruit</comment>
+      <text xml:space="preserve">#REDIRECT [[Pear]]</text>
+    </revision>
+    <revision>
+      <id>189729426</id>
+      <timestamp>2008-02-07T14:06:10Z</timestamp>
+      <contributor>
+        <username>Jkokemueller</username>
+        <id>6303952</id>
+      </contributor>
+      <comment>Added disambiguation</comment>
+      <text xml:space="preserve">'''Pyrus''' may refer to:
+
+* [[Pear]], trees of the genus ''Pyrus'' and the fruit of that tree, edible in 
some species
+* [[Main//Pyrus DMS]], a [[Document Management System]]</text>
+    </revision>
+    <revision>
+      <id>190346463</id>
+      <timestamp>2008-02-10T07:21:12Z</timestamp>
+      <contributor>
+        <username>IceCreamAntisocial</username>
+        <id>346507</id>
+      </contributor>
+      <minor/>
+      <comment>rv</comment>
+      <text xml:space="preserve">#REDIRECT [[Pear]]</text>
+    </revision>
+    <revision>
+      <id>238138507</id>
+      <timestamp>2008-09-13T12:57:33Z</timestamp>
+      <contributor>
+        <username>Cottonapple4</username>
+        <id>7707615</id>
+      </contributor>
+      <comment>[[WP:AES|←]] Redirected page to [[Pear]]</comment>
+      <text xml:space="preserve">#REDIRECT [[Pear]]
+[[Category:Maloideae]]</text>
+    </revision>
+    <revision>
+      <id>238392911</id>
+      <timestamp>2008-09-14T17:08:56Z</timestamp>
+      <contributor>
+        <username>Rkitko</username>
+        <id>536375</id>
+      </contributor>
+      <minor/>
+      <comment>Reverted edits by 
[[Special:Contributions/Cottonapple4|Cottonapple4]] ([[User 
talk:Cottonapple4|talk]]) to last version by IceCreamAntisocial</comment>
+      <text xml:space="preserve">#REDIRECT [[Pear]]</text>
+    </revision>
+  </page>
+</mediawiki>

Modified: trunk/pywikipedia/tests/test_xmlreader.py
===================================================================
--- trunk/pywikipedia/tests/test_xmlreader.py   2009-10-04 15:11:01 UTC (rev 
7365)
+++ trunk/pywikipedia/tests/test_xmlreader.py   2009-10-04 15:28:18 UTC (rev 
7366)
@@ -14,6 +14,7 @@
         self.assertEquals(u"24278", pages[0].id)
         self.assertTrue(pages[0].text.startswith('Pears are [[tree]]s of'))
         self.assertEquals(u"Quercusrobur", pages[1].username)
+        self.assertEquals(u"Pear", pages[0].title)
 
     def test_XmlDumpFirstRev(self):
         pages = [r for r in xmlreader.XmlDump("data/article-pear.xml").parse()]
@@ -22,7 +23,12 @@
         self.assertEquals(u"Pear", pages[0].title)
         self.assertEquals(u"24278", pages[0].id)
         self.assertTrue(pages[0].text.startswith('Pears are [[tree]]s of'))
+        self.assertTrue(not pages[0].isredirect)
 
+    def test_XmlDumpRedirect(self):
+        pages = [r for r in 
xmlreader.XmlDump("data/article-pyrus.xml").parse()]
+        self.assertTrue(pages[0].isredirect)
+
     def test_MediaWikiXmlHandler(self):
         handler = xmlreader.MediaWikiXmlHandler()
         pages = []
@@ -30,9 +36,9 @@
             pages.append(page)
         handler.setCallback(pageDone)
         xml.sax.parse("data/article-pear.xml", handler)
+        self.assertEquals(u"Pear", pages[0].title)
         self.assertEquals(4, len(pages))
         self.assertNotEquals("", pages[0].comment)
 
-
 if __name__ == '__main__':
     unittest.main()

Modified: trunk/pywikipedia/xmlreader.py
===================================================================
--- trunk/pywikipedia/xmlreader.py      2009-10-04 15:11:01 UTC (rev 7365)
+++ trunk/pywikipedia/xmlreader.py      2009-10-04 15:28:18 UTC (rev 7366)
@@ -56,7 +56,7 @@
     """
     Represents a page.
     """
-    def __init__(self, title, id, text, username, ipedit, timestamp, 
editRestriction, moveRestriction, revisionid, comment):
+    def __init__(self, title, id, text, username, ipedit, timestamp, 
editRestriction, moveRestriction, revisionid, comment, redirect):
         # TODO: there are more tags we can read.
         self.title = title
         self.id = id
@@ -68,6 +68,7 @@
         self.moveRestriction = moveRestriction
         self.revisionid = revisionid
         self.comment = comment
+        self.isredirect = redirect
 
 
 class XmlHeaderEntry:
@@ -94,6 +95,7 @@
         self.id = u''
         self.revisionid = u''
         self.comment = u''
+        self.isredirect = False
 
     def setCallback(self, callback):
         self.callback = callback
@@ -159,6 +161,8 @@
             self.inContributorTag = False
         elif name == 'restrictions':
             self.editRestriction, self.moveRestriction = 
parseRestrictions(self.restrictions)
+        elif name == 'redirect':
+            self.isredirect = True
         elif name == 'revision':
             # All done for this.
             # Remove trailing newlines and spaces
@@ -178,7 +182,7 @@
                              text, self.username, 
                              self.ipedit, timestamp, 
                              self.editRestriction, self.moveRestriction, 
-                             self.revisionid, self.comment)
+                             self.revisionid, self.comment, self.isredirect)
             self.inRevisionTag = False
             self.callback(entry)
         elif self.headercallback:
@@ -313,6 +317,7 @@
         self.title = elem.findtext("{%s}title" % self.uri)
         self.pageid = elem.findtext("{%s}id" % self.uri)
         self.restrictions = elem.findtext("{%s}restrictions" % self.uri)
+        self.isredirect = elem.findtext("{%s}redirect" % self.uri) is not None
 
     def _create_revision(self, revision):
         """Creates a Single revision"""
@@ -332,7 +337,8 @@
                        editRestriction=editRestriction,
                        moveRestriction=moveRestriction,
                        revisionid=revisionid,
-                       comment=comment
+                       comment=comment,
+                       redirect=self.isredirect
                       )
 
     def regex_parse(self):



_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

Reply via email to