Revision: 6823
Author:   philip
Date:     2009-05-05 03:48:13 +0000 (Tue, 05 May 2009)

Log Message:
-----------
* Modified the Site.search. We can't get a page title though '-search' due to a 
html-code change of MediaWiki.
* Fixed a bug of Site.search. The query string should  be encoded to utf-8, 
otherwise we can't search a unicode string with this function.
* Fixed a bug of Family.search_address. We need to start i(ns) from 0 because 
the main namespace's id is 0.

Modified Paths:
--------------
    trunk/pywikipedia/family.py
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/family.py
===================================================================
--- trunk/pywikipedia/family.py 2009-05-04 22:04:07 UTC (rev 6822)
+++ trunk/pywikipedia/family.py 2009-05-05 03:48:13 UTC (rev 6823)
@@ -3580,7 +3580,7 @@
                     # add all namespaces
                     namespaces = self.namespaces.keys()
                 for i in namespaces:
-                    if i > 0:
+                    if i >= 0:
                         namespace_params = namespace_params + '&ns%d=1' % i
 
         return 
"%s?title=%s:Search&search=%s&limit=%d%s&fulltext=1&useskin=monobook" % 
(self.path(code),

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py      2009-05-04 22:04:07 UTC (rev 6822)
+++ trunk/pywikipedia/wikipedia.py      2009-05-05 03:48:13 UTC (rev 6823)
@@ -5052,14 +5052,14 @@
     def search(self, query, number = 10, namespaces = None):
         """Yield search results (using Special:Search page) for query."""
         throttle = True
-        path = self.search_address(urllib.quote_plus(query),
+        path = self.search_address(urllib.quote_plus(query.encode('utf-8')),
                                    n=number, ns=namespaces)
         get_throttle()
         html = self.getUrl(path)
 
-        entryR = re.compile(ur'<li[^>]*><a href=".+?" 
title="(?P<title>.+?)">.+?</a>'
-                              '<br />(?P<match>.*?)<span 
style="color[^>]*>.+?: '
-                              '(?P<relevance>[0-9.]+)% - '
+        entryR = re.compile(ur'<li><a href=".+?" 
title="(?P<title>.+?)">.+?</a>'
+#                              '<br />(?P<match>.*?)<span 
style="color[^>]*>.+?: '
+#                              '(?P<relevance>[0-9.]+)% - '
 #                              '(?P<size>[0-9.]*) '
 #                              '(?P<sizeunit>[A-Za-z]) '
 #                              '\((?P<words>.+?) \w+\) - '
@@ -5068,8 +5068,8 @@
 
         for m in entryR.finditer(html):
             page = Page(self, m.group('title'))
-            match = m.group('match')
-            relevance = m.group('relevance')
+            #match = m.group('match')
+            #relevance = m.group('relevance')
             #size = m.group('size')
             ## sizeunit appears to always be "KB"
             #words = m.group('words')
@@ -5078,7 +5078,7 @@
             #print "%s - %s %s (%s words) - %s" % (relevance, size, sizeunit, 
words, date)
 
             #yield page, match, relevance, size, words, date
-            yield page, match, relevance, '', '', ''
+            yield page, '', '', '', '', ''
 
     # TODO: avoid code duplication for the following methods
     def newpages(self, number = 10, get_redirect = False, repeat = False, 
namespace = 0):



_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

Reply via email to