This patch implements the changes we have discussed so far, apart from choosing the <atomic> std library atomics over the GNU ones.

The behaviour and profiling coverage changes remains as described earlier. Just implementation details different to the last patch.

If I missed anything or am still getting it wrong, please inform.

Thank you
Amos
=== modified file 'configure.ac'
--- configure.ac        2012-06-12 08:50:53 +0000
+++ configure.ac        2012-06-13 03:32:10 +0000
@@ -378,7 +378,8 @@
 dnl
 dnl Check for atomic operations support in the compiler
 dnl
-AC_MSG_CHECKING([for atomic operations support])
+AC_CHECK_HEADERS(atomic)
+AC_MSG_CHECKING([for GNU atomic operations support])
 AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
     int n = 0;
 ]],[[

=== modified file 'lib/profiler/Profiler.cc'
--- lib/profiler/Profiler.cc    2012-07-02 12:28:10 +0000
+++ lib/profiler/Profiler.cc    2012-07-04 02:31:14 +0000
@@ -114,130 +114,78 @@
  */
 
 #include "squid.h"
+
+#if USE_XPROF_STATS
+
 #include "profiler/Profiler.h"
 
-#if USE_XPROF_STATS
-
-
-#if HAVE_ASSERT_H
-#include <assert.h>
-#endif
-#if HAVE_GNUMALLLOC_H
-#include <gnumalloc.h>
-#elif HAVE_MALLOC_H
-#include <malloc.h>
-#endif
-#if HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-/* Exported Data */
-TimersArray *xprof_Timers = NULL;
-
-/* Private stuff */
-
-
-/* new stuff */
-#define        MAXSTACKDEPTH   512
-
-struct _callstack_entry {
-    int timer;         /* index into timers array */
-    const char *name;
-    hrtime_t start, stop, accum;
-};
-
-struct _callstack_entry cstack[MAXSTACKDEPTH];
-int cstack_head = 0;
-
-#if defined(_MSC_VER) /* Microsoft C Compiler ONLY */
-static __inline void
-#else
-static inline void
-#endif
-xprof_update(xprof_stats_data * head)
-{
-    if (head->delta < head->best)
-        head->best = head->delta;
-    if (head->worst < head->delta)
-        head->worst = head->delta;
-    head->summ += head->delta;
-    ++head->count;
-}
-
+std::atomic<uint64_t> *xprof_ActiveTimers = NULL;
+std::atomic<uint64_t> *xprof_ActiveTimerCount = NULL;
 static xprof_stats_data *xp_UNACCOUNTED;
-static int xprof_inited = 0;
-
-static void
-xprof_InitLib(void)
-{
-    if (xprof_inited)
+static std::atomic<bool> xprof_inited = false;
+
+void
+XProfilerNode::Initialize()
+{
+    // init may only be done once. potential callers need
+    static std::atomic_flag xprof_GlobalLock = ATOMIC_FLAG_INIT;
+    while(std::atomic_flag_test_and_set_explicit(&xprof_globalLock, 
std::memory_order_acquire))
+         ; // spin until the global lock is acquired
+    // if still not set we now have a lock on it and can do the expensive 
allocations
+    // NP: xprof_inited is only set to true while locked.
+    if (!xprof_inited.load()) {
+        xprof_TimerStats = new TimersArray(XPROF_LAST + 2);
+        xprof_ActiveTimers = new std::atomic<uint64_t>(XPROF_LAST + 2);
+        xprof_ActiveTimerCount = new std::atomic<uint64_t>(XPROF_LAST + 2);
+        xprof_inited.store(true);
+    }
+    std::atomic_flag_clear_explicit(&xprof_globalLock, 
std::memory_order_release);
+}
+
+void
+XProfilerNode::start()
+{
+    if (started_ > 0)
         return;
 
-    xprof_Timers = static_cast<TimersArray *>(calloc(XPROF_LAST + 2, 
sizeof(xprof_stats_node)));
-
-    xprof_Timers[XPROF_PROF_UNACCOUNTED]->name = "PROF_UNACCOUNTED";
-    xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu.start = get_tick();
-    xp_UNACCOUNTED = &xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu;
-    cstack_head = 0;
-    xprof_inited = 1;
-}
-
-void
-xprof_start(xprof_type type, const char *timer)
-{
-    hrtime_t tt = get_tick();
-    if (!xprof_inited)
+    started_ = get_tick();
+
+    // kick initialization of timers statistics (if needed)
+    if (!xprof_inited.load()) {
         xprof_InitLib();
-
-    /* If nested, stop current stack frame */
-    if (cstack_head > 0) {
-        cstack[cstack_head - 1].accum += get_tick() - cstack[cstack_head - 
1].start;
-        cstack[cstack_head - 1].start = -1;
-    }
-
-    /* Are we at the first level? If so; stop the unaccounted timer */
-    if (cstack_head == 0) {
-        assert(xp_UNACCOUNTED->start != -1);
-        xp_UNACCOUNTED->delta = tt - xp_UNACCOUNTED->start;
-        xp_UNACCOUNTED->start = -1;
-        xprof_update(xp_UNACCOUNTED);
-    }
-
-    /* Allocate new stack frame */
-    cstack[cstack_head].start = tt;
-    cstack[cstack_head].stop = -1;
-    cstack[cstack_head].accum = 0;
-    cstack[cstack_head].timer = type;
-    cstack[cstack_head].name = timer;
-    ++cstack_head;
-    assert(cstack_head < MAXSTACKDEPTH);
-
+    }
+
+    xprof_ActiveTimers[type_] += started_;
+    ++xprof_ActiveTimerCount[type_];
+
+    xprof_stats_data * head = &(xprof_Timers[type_]->accu);
+    head->overheads += get_tick() - started_;
 }
 
 void
-xprof_stop(xprof_type type, const char *timer)
+XProfilerNode::stop()
 {
-    hrtime_t tt = get_tick();
-    assert(cstack_head > 0);
-    --cstack_head;
-    assert(cstack[cstack_head].timer == type);
-
-    /* Record timer details */
-    cstack[cstack_head].accum += tt - cstack[cstack_head].start;
-    xprof_Timers[type]->accu.delta = cstack[cstack_head].accum;
-    xprof_Timers[type]->name = timer;
-
-    /* Update */
-    xprof_update(&xprof_Timers[type]->accu);
-
-    /* Restart previous timer if we're not at the top level */
-    if (cstack_head > 0) {
-        cstack[cstack_head - 1].start = tt;
-        cstack[cstack_head - 1].stop = 0;
+    if (started_ < 0)
         return;
-    }
-    /* Get here? We're at the top level; unaccounted */
-    xp_UNACCOUNTED->start = tt;
+
+    hrtime_t stopped_ = get_tick();
+
+    // calculate how long this timer was running.
+    hrtime_t delta = stopped_ - started_;
+
+    // Record timer details into global counters
+
+    xprof_ActiveTimers[type_] -= started_;
+    --xprof_ActiveTimerCount[type_];
+
+    xprof_stats_data * head = &(xprof_Timers[type_]->accu);
+    if (delta < head->best.load())
+        head->best = delta;
+    if (delta > head->worst.load())
+        head->worst = delta;
+    head->summ += delta;
+    ++head->count;
+    head->overheads += get_tick() - stopped_;
 }
 
 #endif /* USE_XPROF_STATS */

=== modified file 'lib/profiler/Profiler.h'
--- lib/profiler/Profiler.h     2011-01-11 07:31:04 +0000
+++ lib/profiler/Profiler.h     2012-06-12 02:25:24 +0000
@@ -18,6 +18,10 @@
 
 #else /* USE_XPROF_STATS */
 
+#if HAVE_ATOMIC
+#include <atomic>
+#endif
+
 #define XP_NOBEST (hrtime_t)-1
 
     typedef struct _xprof_stats_node xprof_stats_node;
@@ -25,14 +29,16 @@
     typedef struct _xprof_stats_data xprof_stats_data;
 
     struct _xprof_stats_data {
-        hrtime_t start;
-        hrtime_t stop;
-        hrtime_t delta;
-        hrtime_t best;
-        hrtime_t worst;
-        hrtime_t count;
-        hrtime_t accum;
-        int64_t summ;
+        std::atomic_flag lock;
+        std::atomic<hrtime_t> start;
+        std::atomic<hrtime_t> stop;
+//        std::atomic<hrtime_t> delta; // unused?
+        std::atomic<hrtime_t> best;
+        std::atomic<hrtime_t> worst;
+        std::atomic<hrtime_t> count;
+        std::atomic<hrtime_t> accum;
+        std::atomic<int64_t> overheads;
+        std::atomic<int64_t> summ;
     };
 
     struct _xprof_stats_node {
@@ -44,15 +50,48 @@
     typedef xprof_stats_node TimersArray[1];
 
     /* public Data */
+    /// timer statistics histograms
     extern TimersArray *xprof_Timers;
 
+    /// count of active timers
+    extern std::atomic<uint64_t> *xprof_ActiveTimerCount;
+    /// currently running timer accumulations
+    extern std::atomic<uint64_t> *xprof_ActiveTimers;
+
     /* Exported functions */
-    extern void xprof_start(xprof_type type, const char *timer);
-    extern void xprof_stop(xprof_type type, const char *timer);
     extern void xprof_event(void *data);
 
-#define PROF_start(probename) xprof_start(XPROF_##probename, #probename)
-#define PROF_stop(probename) xprof_stop(XPROF_##probename, #probename)
+/*
+/// This class provides a thread-safe timer for use in code profiling
+/// Timer kicks off when the class is creaated and terminates on destruct
+/// or if stop() method is called explicitly.
+///
+/// Use PROF_start(name) and PROF_stop(name) macros
+*/
+class XProfilerNode
+{
+public:
+    XProfilerNode(xprof_type aType, const char *aName) : type_(aType), 
name_(aName) { start(); }
+    ~XProfilerNode() { stop(); }
+
+    // Initialize the xprof data arrays
+    // This function protects itself against re-entrant behaviour
+    // but in an expensive locking way. Potential callers should
+    // check with xprof_inited.load() to see if its needed first.
+    void initialize();
+
+    void start();
+    void stop();
+
+private:
+    xprof_type type_;     ///< which profile type to update when stopped
+    const char *name_;    ///< printable version of the timers type name
+    hrtime_t started_;    ///< when this timer was started
+};
+
+#define PROF_start(probename) XProfilerNode 
XProfTimer_##probename(XPROF_##probename, #probename)
+#define PROF_restart(probename) XProfTimer_##probename.start()
+#define PROF_stop(probename) XProfTimer_##probename.stop()
 
 #endif /* USE_XPROF_STATS */
 

=== modified file 'src/ProfStats.cc'
--- src/ProfStats.cc    2012-01-20 18:55:04 +0000
+++ src/ProfStats.cc    2012-06-12 02:34:06 +0000
@@ -39,6 +39,7 @@
 
 #include "event.h"
 #include "mgr/Registration.h"
+#include "profiler/Profiler.h"
 #include "SquidMath.h"
 #include "Store.h"
 
@@ -51,7 +52,6 @@
 static hrtime_t xprof_verystart = 0;
 static hrtime_t xprof_average_delta = 0;
 static int xprof_events = 0;
-static int xprof_inited = 0;
 static xprof_stats_data Totals;
 
 static TimersArray *xprof_stats_avg1sec = NULL;
@@ -113,13 +113,14 @@
 xprof_show_item(StoreEntry * sentry, const char *name, xprof_stats_data * hist)
 {
     storeAppendPrintf(sentry,
-                      "%s\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %" 
PRIu64 "\t %" PRIu64 "\t %.2f\t %6.3f\t\n",
+                      "%s\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %" 
PRIu64 "\t %" PRIu64 "\t" PRIu64 "\t %.2f\t %6.3f\t\n",
                       name,
                       hist->count,
                       hist->summ,
                       (hist->best != XP_NOBEST ? hist->best : 0),
                       hist->count ? hist->summ / hist->count : 0,
                       hist->worst,
+                      hist->overheads,
                       hist->count / time_frame,
                       Math::doublePercent((double) hist->summ, (double) 
hist->delta));
 }
@@ -149,7 +150,7 @@
                      );
 
     storeAppendPrintf(sentry,
-                      "Probe Name\t  Events\t cumulated time \t best case \t 
average \t worst case\t Rate / sec \t %% in int\n");
+                      "Probe Name\t  Events\t cumulated time \t best case \t 
average \t worst case\toverheads\t Rate / sec \t %% in int\n");
 
     for (i = 0; i < XPROF_LAST; i++) {
         if (!hist[i]->name)
@@ -166,6 +167,8 @@
 
         Totals.summ += show->summ;
 
+        Totals.overheads += show->overheads;
+
         Totals.best += (show->best != XP_NOBEST ? show->best : 0);
 
         Totals.worst += show->worst;
@@ -187,7 +190,7 @@
     int doavg = (xprof_events % secs);
 
     if (!*list)
-        *list = (TimersArray *)xcalloc(XPROF_LAST, sizeof(xprof_stats_node));
+        *list = new TimersArray(XPROF_LAST);
 
     hist = *list;
 
@@ -195,6 +198,7 @@
 
     for (i = 0; i < XPROF_LAST; i++) {
         hist[i]->name = head[i]->name;
+        hist[i]->accu.overheads += head[i]->accu.overheads;
         hist[i]->accu.summ += head[i]->accu.summ;
         hist[i]->accu.count += head[i]->accu.count;    /* accumulate multisec 
*/
 
@@ -259,40 +263,31 @@
 static inline void
 xprof_chk_overhead(int samples)
 {
+    PROF_start(PROF_OVERHEAD);
     while (samples--) {
-        PROF_start(PROF_OVERHEAD);
+        PROF_restart(PROF_OVERHEAD);
         PROF_stop(PROF_OVERHEAD);
     }
 }
 
 static void
-xprofRegisterWithCacheManager(void)
+xprof_Init(void)
 {
+    xprof_delta = xprof_verystart = xprof_start_t = get_tick();
     Mgr::RegisterAction("cpu_profile", "CPU Profiling Stats", xprof_summary, 
0, 1);
 }
 
-// FIXME:
-// this gets colled once per event. This doesn't seem to make much sense,
-// does it?
-static hrtime_t now;
-static void
-xprof_Init(void)
-{
-    if (xprof_inited)
-        return;
-
-    xprof_delta = xprof_verystart = xprof_start_t = now;
-
-    xprof_inited = 1;
-
-    xprofRegisterWithCacheManager(); //moved here so it's not double-init'ed
-}
-
 void
 xprof_event(void *data)
 {
-    now = get_tick();
-    xprof_Init();
+    static bool inited = false;
+    if (inited) {
+        xprof_Init();
+        inited = true;
+    }
+
+    hrtime_t now = get_tick();
+
     xprof_delta = now - xprof_start_t;
     xprof_start_t = now;
     xprof_events++;

Reply via email to