Revision: 7572
Author:   alexsh
Date:     2009-10-29 21:02:26 +0000 (Thu, 29 Oct 2009)

Log Message:
-----------
Simple handle google search from AJAX Search API

Modified Paths:
--------------
    trunk/pywikipedia/config.py
    trunk/pywikipedia/pagegenerators.py
    trunk/pywikipedia/wikipedia.py

Modified: trunk/pywikipedia/config.py
===================================================================
--- trunk/pywikipedia/config.py 2009-10-29 20:48:49 UTC (rev 7571)
+++ trunk/pywikipedia/config.py 2009-10-29 21:02:26 UTC (rev 7572)
@@ -324,8 +324,16 @@
 # you must install the pyGoogle module from http://pygoogle.sf.net/ and have a
 # Google Web API license key. Note that Google doesn't give out license keys
 # anymore.
+# --------------------
+# Google web API is obsoleted for long time, now we can use Google AJAX Search 
API,
+# You can signup an API key from 
http://code.google.com/apis/ajaxsearch/signup.html.
 google_key = ''
 
+
+# using Google AJAX Search API, it require the refer website, this variable 
save the refer web address
+# when you sign up the Key.
+google_api_refer = ''
+
 # Some scripts allow using the Yahoo! Search Web Services. To use this feature,
 # you must install the pYsearch module from http://pysearch.sourceforge.net/
 # and get a Yahoo AppID from http://developer.yahoo.com

Modified: trunk/pywikipedia/pagegenerators.py
===================================================================
--- trunk/pywikipedia/pagegenerators.py 2009-10-29 20:48:49 UTC (rev 7571)
+++ trunk/pywikipedia/pagegenerators.py 2009-10-29 21:02:26 UTC (rev 7572)
@@ -551,18 +551,43 @@
     def queryGoogle(self, query):
         #if config.google_key:
         if True:
-            #try:
+            try:
                 for url in self.queryViaSoapApi(query):
                     yield url
                 return
-            #except ImportError:
-                #pass
+            except ImportError:
+                for u in self.queryViaAPI(query):
+                    yield u
+                return
         # No google license key, or pygoogle not installed. Do it the ugly way.
         #for url in self.queryViaWeb(query):
         #    yield url
 
+    def queryViaAPI(self, query):
+        import json
+        url = u'http://ajax.googleapis.com/ajax/services/search/web?'
+        params = {
+            'key': config.google_key,
+            'v':'1.0',
+            'q': query,
+        }
+        url += urllib.urlencode(params)
+        
+        while True:
+            try:
+                wikipedia.output(u'Querying Google AJAX Search API...') #, 
offset %i' % offset)
+                result = json.loads(self.site.getUrl(url, refer = 
config.google_api_refer, no_hostname=True))
+                for res in result['responseData']['results']:
+                    yield res['url']
+            except:
+                wikipedia.output(u"An error occured. Retrying in 10 
seconds...")
+                time.sleep(10)
+                continue
+        
+    
     def queryViaSoapApi(self, query):
         import google
+        
         google.LICENSE_KEY = config.google_key
         offset = 0
         estimatedTotalResultsCount = None

Modified: trunk/pywikipedia/wikipedia.py
===================================================================
--- trunk/pywikipedia/wikipedia.py      2009-10-29 20:48:49 UTC (rev 7571)
+++ trunk/pywikipedia/wikipedia.py      2009-10-29 21:02:26 UTC (rev 7572)
@@ -5448,8 +5448,8 @@
 
         return response, data
 
-    def getUrl(self, path, retry = None, sysop = False, data = None,
-               compress = True, no_hostname = False, cookie_only=False, 
back_response=False):
+    def getUrl(self, path, retry = None, sysop = False, data = None, compress 
= True,
+               no_hostname = False, cookie_only=False, refer=None, 
back_response=False):
         """
         Low-level routine to get a URL from the wiki.
 
@@ -5486,6 +5486,8 @@
             url = path          # and other useful pages without using some 
other functions.
         else:
             url = '%s://%s%s' % (self.protocol(), self.hostname(), path)
+        if refer:
+            uo.addheader('Refer', refer)
         data = self.urlEncode(data)
 
         # Try to retrieve the page until it was successfully loaded (just in



_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

Reply via email to