This patch implements the changes we have discussed so far, apart from
choosing the <atomic> std library atomics over the GNU ones.
The behaviour and profiling coverage changes remains as described
earlier. Just implementation details different to the last patch.
If I missed anything or am still getting it wrong, please inform.
Thank you
Amos
=== modified file 'configure.ac'
--- configure.ac 2012-06-12 08:50:53 +0000
+++ configure.ac 2012-06-13 03:32:10 +0000
@@ -378,7 +378,8 @@
dnl
dnl Check for atomic operations support in the compiler
dnl
-AC_MSG_CHECKING([for atomic operations support])
+AC_CHECK_HEADERS(atomic)
+AC_MSG_CHECKING([for GNU atomic operations support])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
int n = 0;
]],[[
=== modified file 'lib/profiler/Profiler.cc'
--- lib/profiler/Profiler.cc 2012-07-02 12:28:10 +0000
+++ lib/profiler/Profiler.cc 2012-07-04 02:31:14 +0000
@@ -114,130 +114,78 @@
*/
#include "squid.h"
+
+#if USE_XPROF_STATS
+
#include "profiler/Profiler.h"
-#if USE_XPROF_STATS
-
-
-#if HAVE_ASSERT_H
-#include <assert.h>
-#endif
-#if HAVE_GNUMALLLOC_H
-#include <gnumalloc.h>
-#elif HAVE_MALLOC_H
-#include <malloc.h>
-#endif
-#if HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-
-/* Exported Data */
-TimersArray *xprof_Timers = NULL;
-
-/* Private stuff */
-
-
-/* new stuff */
-#define MAXSTACKDEPTH 512
-
-struct _callstack_entry {
- int timer; /* index into timers array */
- const char *name;
- hrtime_t start, stop, accum;
-};
-
-struct _callstack_entry cstack[MAXSTACKDEPTH];
-int cstack_head = 0;
-
-#if defined(_MSC_VER) /* Microsoft C Compiler ONLY */
-static __inline void
-#else
-static inline void
-#endif
-xprof_update(xprof_stats_data * head)
-{
- if (head->delta < head->best)
- head->best = head->delta;
- if (head->worst < head->delta)
- head->worst = head->delta;
- head->summ += head->delta;
- ++head->count;
-}
-
+std::atomic<uint64_t> *xprof_ActiveTimers = NULL;
+std::atomic<uint64_t> *xprof_ActiveTimerCount = NULL;
static xprof_stats_data *xp_UNACCOUNTED;
-static int xprof_inited = 0;
-
-static void
-xprof_InitLib(void)
-{
- if (xprof_inited)
+static std::atomic<bool> xprof_inited = false;
+
+void
+XProfilerNode::Initialize()
+{
+ // init may only be done once. potential callers need
+ static std::atomic_flag xprof_GlobalLock = ATOMIC_FLAG_INIT;
+ while(std::atomic_flag_test_and_set_explicit(&xprof_globalLock,
std::memory_order_acquire))
+ ; // spin until the global lock is acquired
+ // if still not set we now have a lock on it and can do the expensive
allocations
+ // NP: xprof_inited is only set to true while locked.
+ if (!xprof_inited.load()) {
+ xprof_TimerStats = new TimersArray(XPROF_LAST + 2);
+ xprof_ActiveTimers = new std::atomic<uint64_t>(XPROF_LAST + 2);
+ xprof_ActiveTimerCount = new std::atomic<uint64_t>(XPROF_LAST + 2);
+ xprof_inited.store(true);
+ }
+ std::atomic_flag_clear_explicit(&xprof_globalLock,
std::memory_order_release);
+}
+
+void
+XProfilerNode::start()
+{
+ if (started_ > 0)
return;
- xprof_Timers = static_cast<TimersArray *>(calloc(XPROF_LAST + 2,
sizeof(xprof_stats_node)));
-
- xprof_Timers[XPROF_PROF_UNACCOUNTED]->name = "PROF_UNACCOUNTED";
- xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu.start = get_tick();
- xp_UNACCOUNTED = &xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu;
- cstack_head = 0;
- xprof_inited = 1;
-}
-
-void
-xprof_start(xprof_type type, const char *timer)
-{
- hrtime_t tt = get_tick();
- if (!xprof_inited)
+ started_ = get_tick();
+
+ // kick initialization of timers statistics (if needed)
+ if (!xprof_inited.load()) {
xprof_InitLib();
-
- /* If nested, stop current stack frame */
- if (cstack_head > 0) {
- cstack[cstack_head - 1].accum += get_tick() - cstack[cstack_head -
1].start;
- cstack[cstack_head - 1].start = -1;
- }
-
- /* Are we at the first level? If so; stop the unaccounted timer */
- if (cstack_head == 0) {
- assert(xp_UNACCOUNTED->start != -1);
- xp_UNACCOUNTED->delta = tt - xp_UNACCOUNTED->start;
- xp_UNACCOUNTED->start = -1;
- xprof_update(xp_UNACCOUNTED);
- }
-
- /* Allocate new stack frame */
- cstack[cstack_head].start = tt;
- cstack[cstack_head].stop = -1;
- cstack[cstack_head].accum = 0;
- cstack[cstack_head].timer = type;
- cstack[cstack_head].name = timer;
- ++cstack_head;
- assert(cstack_head < MAXSTACKDEPTH);
-
+ }
+
+ xprof_ActiveTimers[type_] += started_;
+ ++xprof_ActiveTimerCount[type_];
+
+ xprof_stats_data * head = &(xprof_Timers[type_]->accu);
+ head->overheads += get_tick() - started_;
}
void
-xprof_stop(xprof_type type, const char *timer)
+XProfilerNode::stop()
{
- hrtime_t tt = get_tick();
- assert(cstack_head > 0);
- --cstack_head;
- assert(cstack[cstack_head].timer == type);
-
- /* Record timer details */
- cstack[cstack_head].accum += tt - cstack[cstack_head].start;
- xprof_Timers[type]->accu.delta = cstack[cstack_head].accum;
- xprof_Timers[type]->name = timer;
-
- /* Update */
- xprof_update(&xprof_Timers[type]->accu);
-
- /* Restart previous timer if we're not at the top level */
- if (cstack_head > 0) {
- cstack[cstack_head - 1].start = tt;
- cstack[cstack_head - 1].stop = 0;
+ if (started_ < 0)
return;
- }
- /* Get here? We're at the top level; unaccounted */
- xp_UNACCOUNTED->start = tt;
+
+ hrtime_t stopped_ = get_tick();
+
+ // calculate how long this timer was running.
+ hrtime_t delta = stopped_ - started_;
+
+ // Record timer details into global counters
+
+ xprof_ActiveTimers[type_] -= started_;
+ --xprof_ActiveTimerCount[type_];
+
+ xprof_stats_data * head = &(xprof_Timers[type_]->accu);
+ if (delta < head->best.load())
+ head->best = delta;
+ if (delta > head->worst.load())
+ head->worst = delta;
+ head->summ += delta;
+ ++head->count;
+ head->overheads += get_tick() - stopped_;
}
#endif /* USE_XPROF_STATS */
=== modified file 'lib/profiler/Profiler.h'
--- lib/profiler/Profiler.h 2011-01-11 07:31:04 +0000
+++ lib/profiler/Profiler.h 2012-06-12 02:25:24 +0000
@@ -18,6 +18,10 @@
#else /* USE_XPROF_STATS */
+#if HAVE_ATOMIC
+#include <atomic>
+#endif
+
#define XP_NOBEST (hrtime_t)-1
typedef struct _xprof_stats_node xprof_stats_node;
@@ -25,14 +29,16 @@
typedef struct _xprof_stats_data xprof_stats_data;
struct _xprof_stats_data {
- hrtime_t start;
- hrtime_t stop;
- hrtime_t delta;
- hrtime_t best;
- hrtime_t worst;
- hrtime_t count;
- hrtime_t accum;
- int64_t summ;
+ std::atomic_flag lock;
+ std::atomic<hrtime_t> start;
+ std::atomic<hrtime_t> stop;
+// std::atomic<hrtime_t> delta; // unused?
+ std::atomic<hrtime_t> best;
+ std::atomic<hrtime_t> worst;
+ std::atomic<hrtime_t> count;
+ std::atomic<hrtime_t> accum;
+ std::atomic<int64_t> overheads;
+ std::atomic<int64_t> summ;
};
struct _xprof_stats_node {
@@ -44,15 +50,48 @@
typedef xprof_stats_node TimersArray[1];
/* public Data */
+ /// timer statistics histograms
extern TimersArray *xprof_Timers;
+ /// count of active timers
+ extern std::atomic<uint64_t> *xprof_ActiveTimerCount;
+ /// currently running timer accumulations
+ extern std::atomic<uint64_t> *xprof_ActiveTimers;
+
/* Exported functions */
- extern void xprof_start(xprof_type type, const char *timer);
- extern void xprof_stop(xprof_type type, const char *timer);
extern void xprof_event(void *data);
-#define PROF_start(probename) xprof_start(XPROF_##probename, #probename)
-#define PROF_stop(probename) xprof_stop(XPROF_##probename, #probename)
+/*
+/// This class provides a thread-safe timer for use in code profiling
+/// Timer kicks off when the class is creaated and terminates on destruct
+/// or if stop() method is called explicitly.
+///
+/// Use PROF_start(name) and PROF_stop(name) macros
+*/
+class XProfilerNode
+{
+public:
+ XProfilerNode(xprof_type aType, const char *aName) : type_(aType),
name_(aName) { start(); }
+ ~XProfilerNode() { stop(); }
+
+ // Initialize the xprof data arrays
+ // This function protects itself against re-entrant behaviour
+ // but in an expensive locking way. Potential callers should
+ // check with xprof_inited.load() to see if its needed first.
+ void initialize();
+
+ void start();
+ void stop();
+
+private:
+ xprof_type type_; ///< which profile type to update when stopped
+ const char *name_; ///< printable version of the timers type name
+ hrtime_t started_; ///< when this timer was started
+};
+
+#define PROF_start(probename) XProfilerNode
XProfTimer_##probename(XPROF_##probename, #probename)
+#define PROF_restart(probename) XProfTimer_##probename.start()
+#define PROF_stop(probename) XProfTimer_##probename.stop()
#endif /* USE_XPROF_STATS */
=== modified file 'src/ProfStats.cc'
--- src/ProfStats.cc 2012-01-20 18:55:04 +0000
+++ src/ProfStats.cc 2012-06-12 02:34:06 +0000
@@ -39,6 +39,7 @@
#include "event.h"
#include "mgr/Registration.h"
+#include "profiler/Profiler.h"
#include "SquidMath.h"
#include "Store.h"
@@ -51,7 +52,6 @@
static hrtime_t xprof_verystart = 0;
static hrtime_t xprof_average_delta = 0;
static int xprof_events = 0;
-static int xprof_inited = 0;
static xprof_stats_data Totals;
static TimersArray *xprof_stats_avg1sec = NULL;
@@ -113,13 +113,14 @@
xprof_show_item(StoreEntry * sentry, const char *name, xprof_stats_data * hist)
{
storeAppendPrintf(sentry,
- "%s\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %"
PRIu64 "\t %" PRIu64 "\t %.2f\t %6.3f\t\n",
+ "%s\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %"
PRIu64 "\t %" PRIu64 "\t" PRIu64 "\t %.2f\t %6.3f\t\n",
name,
hist->count,
hist->summ,
(hist->best != XP_NOBEST ? hist->best : 0),
hist->count ? hist->summ / hist->count : 0,
hist->worst,
+ hist->overheads,
hist->count / time_frame,
Math::doublePercent((double) hist->summ, (double)
hist->delta));
}
@@ -149,7 +150,7 @@
);
storeAppendPrintf(sentry,
- "Probe Name\t Events\t cumulated time \t best case \t
average \t worst case\t Rate / sec \t %% in int\n");
+ "Probe Name\t Events\t cumulated time \t best case \t
average \t worst case\toverheads\t Rate / sec \t %% in int\n");
for (i = 0; i < XPROF_LAST; i++) {
if (!hist[i]->name)
@@ -166,6 +167,8 @@
Totals.summ += show->summ;
+ Totals.overheads += show->overheads;
+
Totals.best += (show->best != XP_NOBEST ? show->best : 0);
Totals.worst += show->worst;
@@ -187,7 +190,7 @@
int doavg = (xprof_events % secs);
if (!*list)
- *list = (TimersArray *)xcalloc(XPROF_LAST, sizeof(xprof_stats_node));
+ *list = new TimersArray(XPROF_LAST);
hist = *list;
@@ -195,6 +198,7 @@
for (i = 0; i < XPROF_LAST; i++) {
hist[i]->name = head[i]->name;
+ hist[i]->accu.overheads += head[i]->accu.overheads;
hist[i]->accu.summ += head[i]->accu.summ;
hist[i]->accu.count += head[i]->accu.count; /* accumulate multisec
*/
@@ -259,40 +263,31 @@
static inline void
xprof_chk_overhead(int samples)
{
+ PROF_start(PROF_OVERHEAD);
while (samples--) {
- PROF_start(PROF_OVERHEAD);
+ PROF_restart(PROF_OVERHEAD);
PROF_stop(PROF_OVERHEAD);
}
}
static void
-xprofRegisterWithCacheManager(void)
+xprof_Init(void)
{
+ xprof_delta = xprof_verystart = xprof_start_t = get_tick();
Mgr::RegisterAction("cpu_profile", "CPU Profiling Stats", xprof_summary,
0, 1);
}
-// FIXME:
-// this gets colled once per event. This doesn't seem to make much sense,
-// does it?
-static hrtime_t now;
-static void
-xprof_Init(void)
-{
- if (xprof_inited)
- return;
-
- xprof_delta = xprof_verystart = xprof_start_t = now;
-
- xprof_inited = 1;
-
- xprofRegisterWithCacheManager(); //moved here so it's not double-init'ed
-}
-
void
xprof_event(void *data)
{
- now = get_tick();
- xprof_Init();
+ static bool inited = false;
+ if (inited) {
+ xprof_Init();
+ inited = true;
+ }
+
+ hrtime_t now = get_tick();
+
xprof_delta = now - xprof_start_t;
xprof_start_t = now;
xprof_events++;