better scheduler with correct sleep times
I need a scheduler which can delay execution of a function for certain period of time. My attempt was something like this: def delay(self, func, arg, delay_sec=0): fire_at = wallclock() + delay_sec self.queue.put((fire_at, func, arg)) def runner(self): while self.alive: fire_at, func, arg = self.queue.get(block=True) try: now = wallclock() if now fire_at: time.sleep(fire_at - now) func(arg) except Exception, e: log('DelayedTaskManager %s: %s\n' % (self.name, e)) finally: self.queue.task_done() But then I came up with the following case: 1. I call delay with delay_sec = 10 2. The scheduler goes to sleep for 10 seconds 3. In the meantime (lets say 1 second later) I delay another func but this time with delay_sec=0.5 4. The scheduler is sleeping and won't know call my second function for another 9 seconds insted of 0.5 I started googling for scheduler and found one in standard library but ih has the same code as mine (it calls the functions in the right order and my doesn't, but it still waits too long). The other schedulers from web are dealing with repeating tasks and such. So, I wrote this: # modification of http://code.activestate.com/recipes/87369/ class PriorityMinQueue(Queue): def top(self): try: return self.queue[0] except IndexError: return None def _init(self, maxsize): self.maxsize = maxsize self.queue = [] def _put(self, item): return heappush(self.queue, item) def _get(self): return heappop(self.queue) class DelayedTaskManager: def __init__(self, name): self.name = name self.queue = PriorityMinQueue() # can't use queue.not_empty condition because it isn't # signaled with notifyAll so I have to use my own self.sleeper = threading.Condition() def start(self): log('start delayed task manager %s with %d elements\n' % (self.name, self.queue.qsize())) self.alive = True self.thread = threading.Thread(target=self.runner) self.thread.setDaemon(True) self.thread.start() def stop(self): log('stop delayed task manager %s with %d elements\n' % (self.name, self.queue.qsize())) self.alive = False self._wake() self.thread.join() def delay(self, delay_sec, func, *arg, **kw): # even if delay is 0 or less, put to queue # so the function gets executed concurrently fire_at = wallclock() + delay_sec self.queue.put((fire_at, func, arg, kw)) self._wake() def _wake(self): with self.sleeper: self.sleeper.notify() def _wait(self, timeout): with self.sleeper: self.sleeper.wait(timeout) def runner(self): while self.alive: fire_at, func, arg, kw = self.queue.get(block=True) try: now = wallclock() while now fire_at: self._wait(fire_at - now) if not self.alive: # canceled log('delayed task manager %s was stoped\n', self.name) return self.queue.put((fire_at, func, arg, kw)) top = self.queue.top() if top is not None and top[0] fire_at: # temporally closer item, put back the old one self.queue.put((fire_at, func, arg, kw)) self.queue.task_done() fire_at, func, arg, kw = self.queue.get() now = wallclock() func(*arg, **kw) except Exception, e: log('delayed task manager %s: %s\n', self.name, e) finally: self.queue.task_done() Is there a better way or some library that does that? My observations: 1. Threading module uses time.sleep instead of time.clock which results in less precise results (on windows platform) if sys.platform==win32: #take care of differences in clock accuracy wallclock = time.clock else: wallclock = time.time 2. while analyzing threading module i noticed that wait() is implemented via loop and tiny sleep periods. I was expecting the usage of underlaying OS primitives and functions but then I remembered about GIL and quasi-multithreaded nature of Python. But still, isn't there a more precise method that interpreter itself could implement? Thanks, Tvrtko P.S. This was Python 2.5 -- http://mail.python.org/mailman/listinfo/python-list
Re: I CAN connect socket to any localhost port but I shouldn't be able to
On Jul 30, 4:48 am, Gabriel Genellina [EMAIL PROTECTED] wrote: En Tue, 29 Jul 2008 14:56:08 -0300, qvx [EMAIL PROTECTED] escribi : I don't have server listening on port 8084 but I can open socket to it (and to many other ports, tested for all8000) Your example fails -as expected- on my PC running Python 2.5.2 + Windows XP SP2. It may be something specific to your setup or your platform. py test(8084) Traceback (most recent call last): File stdin, line 1, in module File stdin, line 5, in test File string, line 1, in connect socket.error: (10061, 'Connection refused') -- Gabriel Genellina Thanks for confirmation. There is a similar function in CherryPy server which won't start anymore but it used to. I am currently examining Windows and any recently installed software. -- Tvrtko -- http://mail.python.org/mailman/listinfo/python-list
I CAN connect socket to any localhost port but I shouldn't be able to
Hi, I don't have server listening on port 8084 but I can open socket to it (and to many other ports, tested for all8000) import socket def test(port): af, socktype, proto, canonname, sa = socket.getaddrinfo('localhost', port, socket.AF_INET, socket.SOCK_STREAM)[0] s = socket.socket(af, socktype, proto) s.settimeout(1.0) s.connect(('localhost', port)) s.close() # This doesn't throw socket.error, it happily finishes for x in range(1, 8000): test(x) Thanks, Tvrtko -- http://mail.python.org/mailman/listinfo/python-list
Re: Simulating low bandwidth network on localhost
I forgot to tell you that I'm using Windows. -- http://mail.python.org/mailman/listinfo/python-list
Simulating low bandwidth network on localhost
I would like to test my CherryPy application in varying network conditions, ranging from localhost full speed to low badwidth (ie. 14.4kbps) and variable latency from milliseconds range to seconds range. How can I simulate this? Are there some tricks to be played with Python or is there a specialized software for this kind of things (preferably free). Thanks, qvx -- http://mail.python.org/mailman/listinfo/python-list
Re: Simulating low bandwidth network on localhost
Thanks. I'll try to find a version that works under Windows. So far I had little luck finding it. qvx -- http://mail.python.org/mailman/listinfo/python-list
Re: sqlobject performance problems (really)
qvx wrote: autocommit off attempt: connection_string = 'sqlite:/' + db_filename +'?autoCommit=0' no select attempt: t1 = T1(id=t1id, col1=r.col1, ...) I changed : conn_string = 'sqlite:/' + db_datoteka +'?autoCommit=0' conn = connectionForURI(conn_string) SQLObject._connection = conn into: conn_string = 'sqlite:/' + db_datoteka +'?autoCommit=0' conn = connectionForURI(conn_string) conn.autoCommit = False tran = conn.transaction() sqlhub.threadConnection = tran This seems to help in performance department. But it now fails with exception: * when input is unicode for ex: t1.col1 = unicode(col1, encoding='dbcs') = UnicodeEncodeError: 'ascii' codec can't encode character u'\u0107' ... File sqlobject\col.py, line 498, in from_python return value.encode(ascii) * when input is 'utf8' for ex: t1.col1 = unicode(col1, encoding='dbcs').encode('utf8') = UnicodeEncodeError: 'ascii' codec can't encode character u'\u017d' ... File sqlobject\col.py, line 489, in to_python return value.encode(ascii) * when input is 'dbcs' for ex: t1.col1 = col1_var # col1_var it is already in 'dbcs' = UnicodeDecodeError: 'utf8' codec can't decode bytes ... File sqlobject\dbconnection.py, line 295, in _executeRetry return cursor.execute(query) P.S. I'm a Windows Central European (windows-1250) user and my sys.setdefaultencoding is 'dbcs'. -- http://mail.python.org/mailman/listinfo/python-list
Help with unicode and sqlobject/pysqlite2
I really can't seem to make sqlobject/pysqlite2 save my local Easter European characters. I am a Windows-1250 user and I have sys.setdefaultencoding('dbcs') in my sitecustomize. How can I save data like dcc? This is equivalent to '\x9a\xf0\xe8\xe6\x9e' I'm using the latest version of sqlobject from SVN. qvx -- http://mail.python.org/mailman/listinfo/python-list
sqlobject performance problems (really)
I'm writing a small project and I decided to try pysqlite. The database consists of one master table with five columns and two detail tables with one and two columns each (not counting foreign key columns). The program scans an input file and inserts data into those three tables. First I used pysqlite (ver 2). It took a few seconds to parse and populate one thousand of main record and five-six thousand detail records (including a print statement for each main record). This is acceptable. Then I decided to give a try to sqlobject (I had to revert to pysqlite 1.x). I created something like this: class T1(SQLObject): col1 = StringCol() col2 = StringCol(length=5) ... det1 = MultipleJoin('T2') det2 = MultipleJoin('T3') class T2(SQLObject): ... t1 = ForeignKey('T1') class T2(SQLObject): ... t1 = ForeignKey('T1') My main loop looks like this: for r1 in par.parse(filename): # r1 is an intermediary object because I didn't # know how to instantiate an instance without # creating a record automatically, especially # because I didn't have all mandatory values # up until the end of the parse so I had to # keep the values in paralel instead of # storing them directly to my brand new class print r t1 = T1(col1=r.col1, ...) for r2 in r1.det1: t2 = T2(..., t1=t1) for r3 in r1.det2: t2 = T3(..., t1=t1) Now this takes around half a second for ONE master record!!! When I turned on the debug mode of connection I could see a lots of *select* and *commit* statements. I tried to disable autocommit but with no success. I also tried to explicitly provide ID column (hoping to avoid select) but also with no success. autocommit off attempt: connection_string = 'sqlite:/' + db_filename +'?autoCommit=0' no select attempt: t1 = T1(id=t1id, col1=r.col1, ...) Any ideas how to make sqlobject work as fast as plain pysqlite. P.S. I used 0.6.1 version of sqlobject, but later I downloaded fresh version from SVN (upgraded pysqlite to 2.x, downloaded formencode, ez_setup, setuptools and maybe others) but it still doesn't work any better. qvx -- http://mail.python.org/mailman/listinfo/python-list
Poor man's OCR: need performance improvement tips
Hi all, I have a performance problem in my app. It is a poor man's version of OCR app. I started this app as a prototype before implementing it in C++. But now, after I have a working copy in Python, I don't feel like doing the job again in C++. A speed improvement of at least 5 times would be necessary (I have to process several hundreds of thousands of images). The application works like this: 1. Load a raster image of alphabet. This image is accompanied with xml description of that image (positions of each letter etc.). No problems here. Using this information load raster data (data[][]) for each letter. 2. Load image which must be recognized and transform it into 1 bit image (to avoid palette issues). No problems here. 3. Detect lines of text in input picture. No problems here. 4. Process each line: compare pixels of each letter of alphabet with corresponding pixels in line of input picture. This consists of loops comparing pixel by pixel. This is my performance bottleneck. I'm using PIL for initial image processing. But then I use plain Python loops for pixel matrix comparision. One nice optimization was to call PIL.Image.getdata() at the begining and then use data[y*w+x] instead of PIL.Image.getpixel(xy). I would like to compare each character raster with corresponding image pixels in a single operation and avoid (Python) loops. Oh, one more thing. Letter raster matrices have varying width and constant height (due to proportional width font which is used). This compare function should signal if there is a single different pixel. Any library that can do that? Here is how I expected to solve this problem in C++. Each line of text (and letter) has height below 16 pixels. It can be artificially made into 16 pixels. I planned to linearize each letter's matrix by columns. Imagine leter with pixel indices numbered like this: 00 10 20 01 11 21 02 12 22 03 13 23 .. .. .. 0f 1f 2f I would convert it into 00 01 02 03 04 05 ... 2e 2f. Since each pixel is one bit wide, each column would be 2 octets long. I would do the same to the line of text of input picture. Then i would have to compare two buffers of length equal to the length of character. After successfull match, I would advance input stream by that number of bytes. Thanx qvx -- http://mail.python.org/mailman/listinfo/python-list
Re: Poor man's OCR: need performance improvement tips
I also have 0 OCR experience, but the case is simple enough. I know about scipy but I have 0 experience with it. I was actually hoping somebody who knows about it might have some recipe. I also tried psyco, but unfortunetly, the speedup is only few percent. I will check out ADaM's site. I was hoping to replace matrix comparison with something more efficient (minimal code change). I like the idea of dictionary lookup but it requires more code to be changed. If nothing else comes up I will probably try this method. Of course I will have to check the wider characters first so there will be presumably several lookups for each position. The only problem here is how to efficiently transform portions of input picture into suitable format (some kind of list/buffer). Thanks. -- http://mail.python.org/mailman/listinfo/python-list
Starting twisted service manually
I want to start twisted app from another GUI application and not via twistd. It works fine when started via twistd (1 2) but not when I try to start it manually (1 3) - nothing is listening to 8080 port. # (1) common part from nevow import rend, appserver from twisted.application import service, internet from twisted.internet import reactor class Index(rend.Page): ... # (2) part used when running via twistd application = service.Application(my-app) internet.TCPServer(8080, appserver.NevowSite(Index(r'D:\www'))).setServiceParent(application) # (3) attempt to start the same thing but inside a larger (wxPython) app def run_in_thread(): def runner(): application = service.Application(my-app) internet.TCPServer(8080, appserver.NevowSite(Index(r'D:\www'))).setServiceParent(application) reactor.run(0) thread.start_new_thread(runner, ()) I feel lost in twisted documentation and HOWTOs. Please help! Qvx -- http://mail.python.org/mailman/listinfo/python-list
Re: PEP on path module for standard library
Ron Adam wrote: Bengt Richter wrote: indulging what=my penchant for seeking the general behind the specific ;-) There is a thing called Asynchronous pluggable protocol. It is Microsoft's technology (don't flame me now): Asynchronous pluggable protocols enable developers to create pluggable protocol handlers, MIME filters, and namespace handlers that work with Microsoft® Internet Explorer... Applications can use pluggable protocol handlers to handle a custom Uniform Resource Locator (URL) protocol scheme or filter data for a designated MIME type. In other words you can develop you own plugin which would allow Internet Explorer to open URLs like rar://c/my/doc/book.rar. (I was going to write plugin for .rar in order to enable offsite browsing of downloaded portions of web sites, all from an archive file). You could give it a look. If only to see that it is Mycrosofthonic: http://msdn.microsoft.com/workshop/networking/pluggable/overview/aplugprot_overviews_entry.asp. Qvx -- http://mail.python.org/mailman/listinfo/python-list
Daylight savings and getmtime
Hello, I'we written a simple web deployment program which scans for the changes made to local copy of web site. Changed files are than packaged into a zip file and deployed to web server. Now here's the catch. Changes are computed using (1) log file from the last deployment and (2) local file system. Log file contains datestamps (integers) returned from os.path.getmtime(f) function at the time of last deployment. So i'm comparing two getmtime() values. The problem is when Daylight saving kicks in: suddenly all local files are reported as older than they were at the time of deployment. How do I compensate for this? Thanks, Tvrtko For those curious, here is the script. I apologize for Croatian comments and literals and missing private libraries, but I think the code is self-explanatory. __ # -*- coding: windows-1250 -*- from os.path import getmtime, join from os import walk, rename from zipfile import ZipFile, ZIP_DEFLATED from sets import Set from StringIO import StringIO from ftplib import FTP from qvx.io import adapt_stdout, unadapt_stdout from qvx.composite import Dot from qvx.compositeutil import read_composite import sys import time class DeploymentError(Exception): pass class Deployer: def __init__ (self, cfg_file): self.reset(cfg_file) def reset (self, cfg_file): self.read_cfg(cfg_file) self.local_files = [] self.remote_files = [] self.new_files= [] self.deleted_files= [] self.newer_files = [] self.older_files = [] self.www_all_time = None self.old_deployed = False def read_cfg (self, cfg_file): tree = read_composite(cfg_file) self.cfg = Dot(tree).DEPLOYMENT def prepare_file_lists (self): # Sastavi popis _datoteka_ u DIR direktoriju. # Izostavi datoteke iz _notes direktorija self.local_files = [] for root, dirs, files in walk(self.cfg.DIR): filtered = [join(root, f).replace('\\', '/') for f in files if f not in self.cfg.SKIP_FILES] self.local_files.extend(filtered) for skip_dir in self.cfg.SKIP_DIRS.split(','): if skip_dir.strip() in dirs: dirs.remove(skip_dir) # Sastavi popis datoteka na serveru # Koristi se sa informacijama od zadnjeg deploymenta # Popis se nalazi u www_all.txt datoteci self.remote_files = [] remote_stamps = {} zip = ZipFile(self.cfg.FILE, 'r') for line in zip.read('www_all.txt').split('\n'): name, stamp = line.split('\t') remote_stamps[name] = int(stamp) self.remote_files.append(name) self.www_all_time = zip.getinfo('www_all.txt').date_time # Deployment nije obavljen ako nije zapisan log self.old_deployed = 'deployment.log' in zip.namelist() zip.close() # Rastavi datoteke u tri kategorije: nove, obrisane i iste lset = Set(self.local_files) rset = Set(self.remote_files) self.new_files = list(lset - rset) self.deleted_files = list(rset - lset) common_files = list(lset rset) # Pogledaj to se promijenilo u zajedni#269;kim datotekama self.newer_files = [] self.older_files = [] for name in common_files: remotetime = remote_stamps[name] localtime = getmtime(name) #+ 3600 # Ako je razlika unutar sekunde, zanemari if abs(remotetime-localtime) int(self.cfg.IGNORE_SEC): if remotetime localtime: self.older_files.append(name) elif localtime remotetime: self.newer_files.append(name) def need_redeployment (self): return not self.old_deployed def check_changes (self): # Ne bi trebalo biti starijih if self.older_files: raise DeploymentError('Ne smije biti starijih datoteka!') if not (self.new_files or self.deleted_files or self.newer_files): raise DeploymentError('Nema promjena!') def make_deployment_file (self): # Uklju#269;i potrebne datoteke deployment = ZipFile('new_'+self.cfg.FILE, 'w', ZIP_DEFLATED) for name in self.new_files + self.newer_files: deployment.write(name) # Uklju#269;i popis svih datoteka all_files = '\n'.join([f+'\t'+str(getmtime(f)) for f in self.local_files]) deployment.writestr('www_all.txt', all_files) # Uklju#269;i popis datoteka za obrisati for_delete = '\n'.join(self.deleted_files) if for_delete: deployment.writestr('www_delete.txt', for_delete) deployment.close() print '\nNapravljena je nova deployment datoteka.' # Preimenuj deployment datoteke timestr = '%04d-%02d-%02d_%02d-%02d-%02d' % self.www_all_time