Am 21.11.2010 11:01, schrieb Petar Milin:
On 20/11/10 22:34, Chris Rebert wrote:Thanks for believing in me ( ;-) ), but I am a newbie in Python world, although with some experience in other prog. languages. So, if I read pygoogle.py well, I sould add lr parameter in init and then after lines 68 and 102?On Thu, Nov 18, 2010 at 3:26 AM, neocortex<pmi...@gmail.com> wrote:The library doesn't seem to have built-in support for filtering by language (and Google lacks a search query-string-based operator for that), but it looks like you could implement that feature by adding an "lr" parameter with an appropriate value to the query `args` dictionary. See the "lr?" entry under "Web Search Specific Arguments" onhttp://code.google.com/apis/websearch/docs/reference.html, and lines 68& 102 of pygoogle.py.From those lines, it can be concluded that lr=lang_?? is not supported, unfortunately.Right; that's why I said "you could implement that feature". Pretty easily it would seem.
Correct. I just did not read source code enough to realize the parameter is needed twice. You just could hack that out, too :-) Normally (what is normal, anyway ?) it's good practice to create an contructor (__init__) in python which has the most common options as keyword arguments and default values assigned, like so: class Foo: def __init__(self,lr='lang_en',url='http://some.of.this'): self.lr = lr self.url = url ...
Thanks again! You guys here are very kind and helpful! Best, Petar
#!/usr/bin/python """ Google AJAX Search Module http://code.google.com/apis/ajaxsearch/documentation/reference.html """ try: import simplejson as json except: import json import urllib __author__ = "Kiran Bandla" __version__ = "0.1" URL = 'http://ajax.googleapis.com/ajax/services/search/web?' #Web Search Specific Arguments #http://code.google.com/apis/ajaxsearch/documentation/reference.html#_fonje_web #SAFE,FILTER """ SAFE This optional argument supplies the search safety level which may be one of: * safe=active - enables the highest level of safe search filtering * safe=moderate - enables moderate safe search filtering (default) * safe=off - disables safe search filtering """ SAFE_ACTIVE = "active" SAFE_MODERATE = "moderate" SAFE_OFF = "off" """ FILTER This optional argument controls turning on or off the duplicate content filter: * filter=0 - Turns off the duplicate content filter * filter=1 - Turns on the duplicate content filter (default) """ FILTER_OFF = 0 FILTER_ON = 1 #Standard URL Arguments #http://code.google.com/apis/ajaxsearch/documentation/reference.html#_fonje_args """ RSZ This optional argument supplies the number of results that the application would like to recieve. A value of small indicates a small result set size or 4 results. A value of large indicates a large result set or 8 results. If this argument is not supplied, a value of small is assumed. """ RSZ_SMALL = "small" RSZ_LARGE = "large" class pygoogle: def __init__(self,query,pages=10): self.pages = pages #Number of pages. default 10 self.query = query self.filter = FILTER_ON #Controls turning on or off the duplicate content filter. On = 1. self.rsz = RSZ_LARGE #Results per page. small = 4 /large = 8 self.safe = SAFE_OFF #SafeBrowsing - active/moderate/off self.lr = 'lang_en' # Default searches to english def __search__(self,print_results = False): results = [] for page in range(0,self.pages): rsz = 8 if self.rsz == RSZ_SMALL: rsz = 4 args = {'q' : self.query, 'v' : '1.0', 'start' : page*rsz, 'rsz': self.rsz, 'safe' : self.safe, 'filter' : self.filter, 'lr': self.lr } q = urllib.urlencode(args) search_results = urllib.urlopen(URL+q) data = json.loads(search_results.read()) if print_results: if data['responseStatus'] == 200: for result in data['responseData']['results']: if result: print '[%s]'%(urllib.unquote(result['titleNoFormatting'])) print result['content'].strip("<b>...</b>").replace("<b>",'').replace("</b>",'').replace("'","'").strip() print urllib.unquote(result['unescapedUrl'])+'\n' results.append(data) return results def search(self): """Returns a dict of Title/URLs""" results = {} for data in self.__search__(): for result in data['responseData']['results']: if result: title = urllib.unquote(result['titleNoFormatting']) results[title] = urllib.unquote(result['unescapedUrl']) return results def search_page_wise(self): """Returns a dict of page-wise urls""" results = {} for page in range(0,self.pages): args = {'q' : self.query, 'v' : '1.0', 'start' : page, 'rsz': RSZ_LARGE, 'safe' : SAFE_OFF, 'filter' : FILTER_ON, 'lr': self.lr } q = urllib.urlencode(args) search_results = urllib.urlopen(URL+q) data = json.loads(search_results.read()) urls = [] for result in data['responseData']['results']: if result: url = urllib.unquote(result['unescapedUrl']) urls.append(url) results[page] = urls return results def get_urls(self): """Returns list of result URLs""" results = [] for data in self.__search__(): for result in data['responseData']['results']: if result: results.append(urllib.unquote(result['unescapedUrl'])) return results def get_result_count(self): """Returns the number of results""" temp = self.pages self.pages = 1 result_count = 0 try: result_count = self.__search__()[0]['responseData']['cursor']['estimatedResultCount'] except Exception,e: print e finally: self.pages = temp return result_count def display_results(self): """Prints results (for command line)""" self.__search__(True) if __name__ == "__main__": import sys query = ' '.join(sys.argv[1:]) #print pygoogle(' '.join(sys.argv[1:])).display_results() g = pygoogle(query) print '*Found %s results*'%(g.get_result_count()) g.pages = 1 g.display_results()
<<attachment: stefan_sonnenberg.vcf>>
-- http://mail.python.org/mailman/listinfo/python-list