Hi,
Few guys had asked earlier on this group, how to schedule jobs like
cron. Here is the code that provides cron like functionality. The
example program fetches list of urls every one hour.
import os
from cStringIO import StringIO
from google.appengine.ext import webapp
from google.appengine.ext.webapp.util import run_wsgi_app
from google.appengine.ext.webapp import template
from google.appengine.api import urlfetch
from google.appengine.api import memcache
import time
import logging
import md5
def mysleep(r):
time.sleep(3)
def geturl(url):
try:
urlfetch.fetch(url)
except :
pass
def geturlkey(url):
n=md5.md5()
n.update(url)
return n.hexdigest()
def timer(func, interval):
timerlist = memcache.get('timer')
if(None == timerlist):
timerlist = []
timerlist.append({'func':func, 'interval':interval})
memcache.set('timer-'+func, interval)
memcache.set('timer', timerlist)
def loop(func, args):
looplist = memcache.get('loop')
if(None == looplist):
looplist = []
looplist.append({'func':func, 'args':args})
memcache.set('loop', looplist)
def handletimer(host, uindex):
timerlist = memcache.get('timer')
if(None == timerlist):
return False
current = None
for index in range(uindex, len(timerlist)):
if(None == memcache.get('timer-'+timerlist[index]
['func'])):
current = timerlist[index]
break
if(current == None):
for index in range(0, uindex):
if(None ==
memcache.get('timer-'+timerlist[index]['func'])):
current = timerlist[index]
break
if(current is not None):
memcache.set('timer-'+current['func'],
current['interval'])
try:
eval(current['func']+'()')
except:
pass
if((index+1) == len(timerlist)):
index = -1
geturl(host+'/next?t=t&i='+str(index+1))
return True
else:
return False
def handleloop(host, uindex):
looplist = memcache.get('loop')
if(None == looplist):
return False
if(len(looplist) > 0):
arg = looplist[0]['args'].pop(0)
func = looplist[0]['func']
if(len(looplist[0]['args']) == 0):
looplist.pop(0)
if((len(looplist) > 0) and (len(looplist[0]['args']) >
0)):
memcache.set('loop', looplist)
else:
memcache.delete('loop')
try:
eval(func+'('+repr(arg)+')')
except:
pass
geturl(host+'/next?t=l&i='+str(uindex+1))
return True
else:
return False
class MainPage(webapp.RequestHandler):
def get(self):
self.response.out.write('hello world')
class StartPage(webapp.RequestHandler):
def get(self):
data = memcache.get('status')
if(data == 'running'):
self.response.out.write("fail")
return
memcache.set('status', 'running')
memcache.delete('timer')
memcache.delete('loop')
startfunction()
geturl("http://"+self.request.headers["HOST"]+'/task')
self.response.out.write("ok")
class StopPage(webapp.RequestHandler):
def get(self):
memcache.set('status', 'stop')
self.response.out.write("ok")
class TimerPage(webapp.RequestHandler):
def get(self):
self.response.out.write("ok")
if not ('running' == memcache.get('status')):
return
index = int(self.request.get('i', '0'))
if(False ==
handletimer("http://"+self.request.headers["HOST"], index)):
retry = int(self.request.get('r', '0'))
mysleep(retry)
geturl("http://"+self.request.headers["HOST"]
+'/task?r='+str(retry+1))
class TaskPage(webapp.RequestHandler):
def get(self):
self.response.out.write("ok")
if not ('running' == memcache.get('status')):
return
if(False ==
handleloop("http://"+self.request.headers["HOST"], 0)):
if(False ==
handletimer("http://"+self.request.headers["HOST"], 0)):
retry = int(self.request.get('r',
'0'))
mysleep(retry)
geturl("http://"+self.request.headers["HOST"]+'/sleep?r='+str(retry
+1))
class LoopPage(webapp.RequestHandler):
def get(self):
self.response.out.write("ok")
if not ('running' == memcache.get('status')):
return
index = int(self.request.get('i', '0'))
if(False ==
handleloop("http://"+self.request.headers["HOST"], index)):
if(False ==
handletimer("http://"+self.request.headers["HOST"], 0)):
retry = int(self.request.get('r',
'0'))
geturl("http://"+self.request.headers["HOST"]+'/task?r='+str(retry+1))
class NextPage(webapp.RequestHandler):
def get(self):
self.response.out.write("ok")
if not ('running' == memcache.get('status')):
return
index = int(self.request.get('i', '0'))
urlt = self.request.get('t', 'l')
url = "loop"
if(urlt == 't'):
url = "timer"
geturl("http://"+self.request.headers["HOST"]+'/'+url
+'?i='+str(index))
class SleepPage(webapp.RequestHandler):
def get(self):
self.response.out.write("ok")
if not ('running' == memcache.get('status')):
return
retry = int(self.request.get('r', '0'))
mysleep(retry)
geturl("http://"+self.request.headers["HOST"]+'/task?
r='+str(retry+1))
application = webapp.WSGIApplication([('/', MainPage),
('/start', StartPage),
('/stop', StopPage),
('/task', TaskPage),
('/loop', LoopPage),
('/timer', TimerPage),
('/next', NextPage),
('/sleep', SleepPage)],
debug=True)
def main():
run_wsgi_app(application)
if __name__ == "__main__":
main()
#user's code
#list of urls to be fetched
urllist = ['http://www.google.com/', 'http://www.cnn.com/']
def getone(url):
try:
result = urlfetch.fetch(url)
if(result.status_code == 200):
memcache.set(geturlkey(url), '1', 60*60)
except :
pass
def getallurl():
global urllist
fetchlist = []
for url in urllist:
if (memcache.get(geturlkey(url)) is None):
fetchlist.append(url)
#this is equivalent to
#for url in fetchlist: getone(url)
loop('getone', fetchlist)
def startfunction():
#function getallurl will be called every 60*60 seconds
timer('getallurl', 60*60)
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups
"Google App Engine" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to [EMAIL PROTECTED]
For more options, visit this group at
http://groups.google.com/group/google-appengine?hl=en
-~----------~----~----~----~------~----~------~--~---