Hi,

Few guys had asked earlier on this group, how to schedule jobs like
cron. Here is the code that provides cron like functionality. The
example program fetches list of urls every one hour.

import os
from cStringIO import StringIO
from google.appengine.ext import webapp
from google.appengine.ext.webapp.util import run_wsgi_app
from google.appengine.ext.webapp import template
from google.appengine.api import urlfetch
from google.appengine.api import memcache
import time
import logging
import md5

def mysleep(r):
        time.sleep(3)

def geturl(url):
        try:
                urlfetch.fetch(url)
        except :
                pass

def geturlkey(url):
        n=md5.md5()
        n.update(url)
        return n.hexdigest()

def timer(func, interval):
        timerlist = memcache.get('timer')
        if(None == timerlist):
                timerlist = []
        timerlist.append({'func':func, 'interval':interval})
        memcache.set('timer-'+func, interval)
        memcache.set('timer', timerlist)

def loop(func, args):
        looplist = memcache.get('loop')
        if(None == looplist):
                looplist = []
        looplist.append({'func':func, 'args':args})
        memcache.set('loop', looplist)

def handletimer(host, uindex):
        timerlist = memcache.get('timer')
        if(None == timerlist):
                return False
        current = None
        for index in range(uindex, len(timerlist)):
                if(None == memcache.get('timer-'+timerlist[index]
['func'])):
                        current = timerlist[index]
                        break
        if(current == None):
                for index in range(0, uindex):
                        if(None ==
memcache.get('timer-'+timerlist[index]['func'])):
                                current = timerlist[index]
                                break
        if(current is not None):
                memcache.set('timer-'+current['func'],
current['interval'])
                try:
                        eval(current['func']+'()')
                except:
                        pass
                if((index+1) == len(timerlist)):
                        index = -1
                geturl(host+'/next?t=t&i='+str(index+1))
                return True
        else:
                return False

def handleloop(host, uindex):
        looplist = memcache.get('loop')
        if(None == looplist):
                return False
        if(len(looplist) > 0):
                arg = looplist[0]['args'].pop(0)
                func = looplist[0]['func']
                if(len(looplist[0]['args']) == 0):
                        looplist.pop(0)
                if((len(looplist) > 0) and (len(looplist[0]['args']) >
0)):
                        memcache.set('loop', looplist)
                else:
                        memcache.delete('loop')
                try:
                        eval(func+'('+repr(arg)+')')
                except:
                        pass
                geturl(host+'/next?t=l&i='+str(uindex+1))
                return True
        else:
                return False

class MainPage(webapp.RequestHandler):
        def get(self):
                self.response.out.write('hello world')

class StartPage(webapp.RequestHandler):
        def get(self):
                data = memcache.get('status')
                if(data == 'running'):
                        self.response.out.write("fail")
                        return
                memcache.set('status', 'running')
                memcache.delete('timer')
                memcache.delete('loop')
                startfunction()
                geturl("http://"+self.request.headers["HOST"]+'/task')
                self.response.out.write("ok")

class StopPage(webapp.RequestHandler):
        def get(self):
                memcache.set('status', 'stop')
                self.response.out.write("ok")

class TimerPage(webapp.RequestHandler):
        def get(self):
                self.response.out.write("ok")
                if not ('running' == memcache.get('status')):
                        return
                index = int(self.request.get('i', '0'))
                if(False ==
handletimer("http://"+self.request.headers["HOST";], index)):
                        retry = int(self.request.get('r', '0'))
                        mysleep(retry)
                        geturl("http://"+self.request.headers["HOST";]
+'/task?r='+str(retry+1))

class TaskPage(webapp.RequestHandler):
        def get(self):
                self.response.out.write("ok")
                if not ('running' == memcache.get('status')):
                        return
                if(False ==
handleloop("http://"+self.request.headers["HOST";], 0)):
                        if(False ==
handletimer("http://"+self.request.headers["HOST";], 0)):
                                retry = int(self.request.get('r',
'0'))
                                mysleep(retry)
 
geturl("http://"+self.request.headers["HOST"]+'/sleep?r='+str(retry
+1))

class LoopPage(webapp.RequestHandler):
        def get(self):
                self.response.out.write("ok")
                if not ('running' == memcache.get('status')):
                        return
                index = int(self.request.get('i', '0'))
                if(False ==
handleloop("http://"+self.request.headers["HOST";], index)):
                        if(False ==
handletimer("http://"+self.request.headers["HOST";], 0)):
                                retry = int(self.request.get('r',
'0'))
 
geturl("http://"+self.request.headers["HOST"]+'/task?r='+str(retry+1))

class NextPage(webapp.RequestHandler):
        def get(self):
                self.response.out.write("ok")
                if not ('running' == memcache.get('status')):
                        return
                index = int(self.request.get('i', '0'))
                urlt = self.request.get('t', 'l')
                url = "loop"
                if(urlt == 't'):
                        url = "timer"
                geturl("http://"+self.request.headers["HOST"]+'/'+url
+'?i='+str(index))

class SleepPage(webapp.RequestHandler):
        def get(self):
                self.response.out.write("ok")
                if not ('running' == memcache.get('status')):
                        return
                retry = int(self.request.get('r', '0'))
                mysleep(retry)
                geturl("http://"+self.request.headers["HOST"]+'/task?
r='+str(retry+1))

application = webapp.WSGIApplication([('/', MainPage),
                                     ('/start', StartPage),
                                     ('/stop', StopPage),
                                     ('/task', TaskPage),
                                     ('/loop', LoopPage),
                                     ('/timer', TimerPage),
                                     ('/next', NextPage),
                                     ('/sleep', SleepPage)],
                                     debug=True)

def main():
        run_wsgi_app(application)

if __name__ == "__main__":
        main()

#user's code
#list of urls to be fetched
urllist = ['http://www.google.com/', 'http://www.cnn.com/']
def getone(url):
        try:
                result = urlfetch.fetch(url)
                if(result.status_code == 200):
                        memcache.set(geturlkey(url), '1', 60*60)
        except :
                pass

def getallurl():
        global urllist
        fetchlist = []
        for url in urllist:
                if (memcache.get(geturlkey(url)) is None):
                        fetchlist.append(url)
        #this is equivalent to
        #for url in fetchlist: getone(url)
        loop('getone', fetchlist)

def startfunction():
        #function getallurl will be called every 60*60 seconds
        timer('getallurl', 60*60)

--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"Google App Engine" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to [EMAIL PROTECTED]
For more options, visit this group at 
http://groups.google.com/group/google-appengine?hl=en
-~----------~----~----~----~------~----~------~--~---

Reply via email to