Support system wide profiling with multiple cpus.

This is accomplished via vectorized interfaces and using one thread per cpu.

Signed-off-by: Arun Sharma <arun.sharma@google.com>

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/python/README	Fri Mar 28 06:12:08 2008 +0000
@@ -0,0 +1,8 @@
+Requirements:
+
+To use the python bindings, you need the following packages:
+
+1. swig (http://www.swig.org)
+2. python-dev (http://www.python.org)
+3. pycpuid (http://code.google.com/p/pycpuid)
+   linux.sched is python package that comes with pycpuid.
diff -r f38807a86f76 python/self.py
--- a/python/self.py	Thu Mar 27 15:25:30 2008 -0700
+++ b/python/self.py	Fri Mar 28 06:12:08 2008 +0000
@@ -55,4 +55,4 @@ if __name__ == '__main__':
 
   # read the counts
   for i in xrange(s.npmds):
-    print """PMD%d\t%lu""" % (s.pmds[i].reg_num, s.pmds[i].reg_value)
+    print """PMD%d\t%lu""" % (s.pmds[0][i].reg_num, s.pmds[0][i].reg_value)
diff -r f38807a86f76 python/src/session.py
--- a/python/src/session.py	Thu Mar 27 15:25:30 2008 -0700
+++ b/python/src/session.py	Fri Mar 28 06:12:08 2008 +0000
@@ -23,11 +23,15 @@
 # 
 
 from perfmon import *
+from linux import sched
 import os
+from threading import Thread
+# Shouldn't be necessary for python version >= 2.5 
+from Queue25 import Queue
 
 # Common base class
 class Session:
-  def __init__(self):
+  def __init__(self, n):
     self.system = System()
     # Initialize
     opts = pfmlib_options_t()
@@ -36,30 +40,41 @@ class Session:
     pfm_initialize()
 
     # Setup context
-    self.ctx = pfarg_ctx_t()
-    self.ctx.zero()
-    self.ctx.ctx_flags = self.ctx_flags
-    self.fd = pfm_create_context(self.ctx, None, None, 0)
-
-  def __del__(self):
-    os.close(self.fd)
-
-  def dispatch_events(self, events):
+    self.ctxts = []
+    self.fds = []
+    self.inps = []
+    self.outps = []
+    self.pmcs = []
+    self.pmds = []
+    for i in xrange(n):
+      ctx = pfarg_ctx_t()
+      ctx.zero()
+      ctx.ctx_flags = self.ctx_flags
+      fd = pfm_create_context(ctx, None, None, 0)
+      self.ctxts.append(ctx)
+      self.fds.append(fd)
+
+  def __del__(self):
+    if self.__dict__.has_key("fds"):
+      for fd in self.fds:
+        os.close(fd)
+
+  def dispatch_event_one(self, events, which):
     # Select and dispatch events
-    self.inp = inp = pfmlib_input_param_t()
+    inp = pfmlib_input_param_t()
     for i in xrange(0, len(events)):
       pfm_find_full_event(events[i], inp.pfp_events[i])
     inp.pfp_dfl_plm = self.default_pl
     inp.pfp_flags = self.pfp_flags
-    self.outp = outp = pfmlib_output_param_t()
+    outp = pfmlib_output_param_t()
     cnt = len(events)
     inp.pfp_event_count = cnt
     pfm_dispatch_events(inp, None, outp, None)
 
     # pfp_pm_count may be > cnt
     cnt = outp.pfp_pmc_count
-    self.pmcs = pmcs = pmc(outp.pfp_pmc_count)
-    self.pmds = pmds = pmd(outp.pfp_pmd_count)
+    pmcs = pmc(outp.pfp_pmc_count)
+    pmds = pmd(outp.pfp_pmd_count)
     for i in xrange(outp.pfp_pmc_count):
       npmc = pfarg_pmc_t()
       npmc.reg_num = outp.pfp_pmcs[i].reg_num
@@ -74,53 +89,121 @@ class Session:
       pmds[i] = npmd
 
     # Program PMCs and PMDs
-    fd = self.fd
+    fd = self.fds[which]
     pfm_write_pmcs(fd, pmcs, outp.pfp_pmc_count)
     pfm_write_pmds(fd, pmds, outp.pfp_pmd_count)
 
-  def load(self):
-    fd = self.fd
+    # Save all the state in various vectors
+    self.inps.append(inp)
+    self.outps.append(outp)
+    self.pmcs.append(pmcs)
+    self.pmds.append(pmds)
+
+  def dispatch_events(self, events):
+    for i in xrange(len(self.fds)):
+      self.dispatch_event_one(events, i)
+
+  def load_one(self, i):
+    fd = self.fds[i]
     load = pfarg_load_t()
     load.zero()
-    load.load_pid = self.target
+    load.load_pid = self.targets[i]
     try:
       pfm_load_context(fd, load)
     except OSError, err:
       import errno
       if (err.errno == errno.EBUSY):
         err.strerror = "Another conflicting perfmon session?"
-      if (err.errno == errno.EINVAL) and self.ctx_flags == PFM_FL_SYSTEM_WIDE:
-        err.strerror = "Run this program on the same cpu as the target. " + \
-	  "Eg: taskset -c cpu script.py"
       raise err
 
+  def load(self):
+    for i in xrange(len(self.fds)):
+      self.load_one(i)
+    
+  def start_one(self, i):
+    pfm_start(self.fds[i], None)
+
   def start(self):
-    pfm_start(self.fd, None)
+    for i in xrange(len(self.fds)):
+      self.start_one(i)
+
+  def stop_one(self, i):
+    fd = self.fds[i]
+    pmds = self.pmds[i]
+    pfm_stop(fd)
+    pfm_read_pmds(fd, pmds, self.npmds)
 
   def stop(self):
-    fd = self.fd
-    cnt = self.outp.pfp_pmd_count
-    pfm_stop(fd)
-    pfm_read_pmds(fd, self.pmds, cnt)
+    for i in xrange(len(self.fds)):
+      self.stop_one(i)
+
+class PerfmonThread(Thread):
+  def __init__(self, session, i, cpu):
+    Thread.__init__(self)
+    self.cpu = cpu
+    self.session = session
+    self.index = i
+    self.done = 0
+
+  def run(self):
+    queue = self.session.queues[self.index]
+    cpu_set = sched.cpu_set_t()
+    cpu_set.set(self.cpu)
+    sched.setaffinity(0, cpu_set)
+    while not self.done:
+      # wait for a command from the master
+      method = queue.get()
+      method(self.session, self.index)
+      queue.task_done()
 
 class SystemWideSession(Session):
-  def __init__(self, cpu):
+  def __init__(self, cpulist):
     self.default_pl =  PFM_PLM3 | PFM_PLM0
-    self.target = cpu
+    self.targets = cpulist
     self.ctx_flags = PFM_FL_SYSTEM_WIDE
     self.pfp_flags = PFMLIB_PFP_SYSTEMWIDE
-    Session.__init__(self)
-
-  def __del__(self):
+    self.threads = []
+    self.queues = []
+    n = len(cpulist)
+    for i in xrange(n):
+      t = PerfmonThread(self, i, cpulist[i])
+      self.threads.append(t)
+      self.queues.append(Queue(0))
+      t.start()
+    Session.__init__(self, n)
+
+  def __del__(self):
+    self.cleanup()
     Session.__del__(self)
+
+  def cleanup(self):
+    for t in self.threads:
+      t.done = 1
+      self.stop_one(t.index)
+      t.join()
+
+  def load_one(self, i):
+    # Tell thread i to call Session.load_one()
+    self.queues[i].put(Session.load_one)
+    self.queues[i].join()
+
+  def start_one(self, i):
+    # Tell thread i to call Session.start_one()
+    self.queues[i].put(Session.start_one)
+    self.queues[i].join()
+
+  def stop_one(self, i):
+    # Tell thread i to call Session.stop_one()
+    self.queues[i].put(Session.stop_one)
+    self.queues[i].join()
 
 class PerThreadSession(Session):
   def __init__(self, pid):
-    self.target = pid
+    self.targets = [pid]
     self.default_pl =  PFM_PLM3
     self.ctx_flags = 0
     self.pfp_flags = 0
-    Session.__init__(self)
+    Session.__init__(self, 1)
 
   def __del__(self):
     Session.__del__(self)
diff -r f38807a86f76 python/sys.py
--- a/python/sys.py	Thu Mar 27 15:25:30 2008 -0700
+++ b/python/sys.py	Fri Mar 28 06:13:34 2008 +0000
@@ -23,42 +23,45 @@
 # 
 # System wide monitoring example. Copied from syst.c
 #
-# Prints the CPI (cycles per instruction) on a given cpu
-# Run as: taskset -c cpu ./sys.py -c cpu
+# Run as: ./sys.py -c cpulist -e eventlist
 
+import sys
 import os
 from optparse import OptionParser
-import random
 import time 
-import errno
 from perfmon import *
 
 if __name__ == '__main__':
-  parser = OptionParser()
-  parser.add_option("-c", "--cpu", help="Monitor this cpu",
-		    action="store", dest="cpu")
-  parser.add_option("-e", "--events", help="Events to use",
-                    action="store", dest="events")
-  parser.set_defaults(cpu=0)
-  (options, args) = parser.parse_args()
+  try:
+    parser = OptionParser()
+    parser.add_option("-e", "--events", help="Events to use",
+		       action="store", dest="events")
+    parser.add_option("-c", "--cpulist", help="CPUs to monitor",
+		       action="store", dest="cpulist")
+    parser.set_defaults(cpu=0)
+    (options, args) = parser.parse_args()
 
-  s = SystemWideSession(int(options.cpu))
+    cpus = options.cpulist.split(',')
+    cpus = [ int(c) for c in cpus ] 
+    s = SystemWideSession(cpus)
 
-  if options.events:
-    events = options.events.split(",")
-  else:
-    raise "You need to specify events to monitor"
+    if options.events:
+      events = options.events.split(",")
+    else:
+      raise "You need to specify events to monitor"
 
-  s.dispatch_events(events)
-  s.load()
+    s.dispatch_events(events)
+    s.load()
 
-  # Measuring loop
-  print "Printing CPI"
-  for i in range(1, 10):
-    s.start()
-    time.sleep(1)
-    s.stop()
-    # Print the counts
-    #for i in xrange(cnt):
-    #  print pmds[i].reg_value,
-    print 1.0 * s.pmds[0].reg_value/s.pmds[1].reg_value
+    # Measuring loop
+    for i in range(1, 10):
+      s.start()
+      time.sleep(1)
+      s.stop()
+      # Print the counts
+      for cpu in xrange(len(cpus)):
+	for i in xrange(s.npmds):
+	  print "CPU%d.PMD%d\t%lu""" % (cpu, s.pmds[cpu][i].reg_num, 
+				        s.pmds[cpu][i].reg_value)
+  finally:
+    s.cleanup()
