Hi Andreas,
I'm noticing that once in a while, the mem_alloc function seems to be
taking a long time (0.46 seconds for me); it doesn't happen in the first
allocation, and seems to occur irrespective of allocation size. Can you tell
me if you notice the same thing in any of your larger projects? Below is
some [maybe excessive for small debugging] code to wrap functions (decorator
@time_fcn(name)) with time.time() (such that the real time is measured
instead of the cpu time; I also found that time.sleep was necessary to get
better measurements).
regards,
Nicholas
stats = { }
running_timers = []
class RealElapsedTimer:
def __init__(self, name):
if not stats.has_key(name):
stats[name] = 0.0
self.name = name
def __repr__(self):
return "RealElapsedTimer[name = '%s', elapsed = %05.2f]" %(
self.name, self.time() - self.start)
def __enter__(self):
self.start = self.time()
running_timers.append(self)
def __exit__(self, type_, value, tb):
elapsed = max(0, self.time() - self.start)
stats[self.name] += elapsed
running_timers.remove(self)
if (elapsed > 0.1):
print("stopping", self)
import traceback
print(*traceback.format_stack(), sep="\n")
@classmethod
def time(cls):
time.sleep(0.0001)
return time.time()
def time_fcn(name = None):
def inner(f):
realname = name or f.__name__
def inner2(*argv, **kwargs):
with RealElapsedTimer(realname):
return f(*argv, **kwargs)
return inner2
return inner
def print_stats():
print("=== stats timers ===")
if running_timers:
print("running timers:", running_timers)
byvalue = [(v, k) for k, v in stats.items()]
byvalue.sort()
for v, k in byvalue:
print(" %20s: %05.2f s" %(k, v))
_______________________________________________
PyCuda mailing list
[email protected]
http://tiker.net/mailman/listinfo/pycuda_tiker.net