On 11/06/2012 4:47 a.m., Alex Rousskov wrote:
On 06/08/2012 07:02 PM, Amos Jeffries wrote:
On 8/06/2012 2:46 a.m., Alex Rousskov wrote:
On 06/06/2012 07:25 PM, Amos Jeffries wrote:
+ if (delta< head->best)
+ head->best = delta;
+ if (delta> head->worst)
+ head->worst = delta;
+ head->summ += delta;
+ head->count++;
+ head->overheads += get_tick() - stopped_;
The above XProfilerNode::stop() code is not thread-safe because
multiple
threads may update the same "head" and you are not using atomics for
*head members.
Here is the patch implementing the above.
Ok, for now this is the second major TODO.
I'm torn between GCC __sync_* and C++11 atomic_* types.
There is maybe a wrapper compat layer needed temporarily I think.
Since SMP Squid already uses GCC __sync_* primitives and has the
corresponding wrappers, consider reusing them for your project. See
ipc/AtomicWord.h for a starting point.
The existing wrappers can be adjusted to use C++11 atomic_* types when
those are provided by the build environment, but that sounds like a very
different project to me.
The __sync seem to needs a lot of individual data entry locks and
un-locks for this.
Based on the code you have posted and my recollection of the old
profiling code, no explicit locks are needed. Perhaps my recollection is
wrong or your code is different now.
Alex.
As you wrote earlier:
"
The above XProfilerNode::stop() code is not thread-safe because multiple
threads may update the same "head" and you are not using atomics for
*head members.
"
The __sync_* atomics seem to be okay for simple boolean equality test,
add or subtract. But not so much for protecting against
less/greater-than tests or arbitrary alterations.
Here is the patch implementing the spinlocks on *head before multiple
read/compare/change operations.
Amos
=== modified file 'configure.ac'
--- configure.ac 2012-05-29 12:20:02 +0000
+++ configure.ac 2012-06-09 00:28:37 +0000
@@ -378,7 +378,8 @@
dnl
dnl Check for atomic operations support in the compiler
dnl
-AC_MSG_CHECKING([for atomic operations support])
+AC_CHECK_HEADERS(atomic)
+AC_MSG_CHECKING([for GNU atomic operations support])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[
int n = 0;
]],[[
=== modified file 'lib/profiler/Profiler.cc'
--- lib/profiler/Profiler.cc 2012-01-20 18:55:04 +0000
+++ lib/profiler/Profiler.cc 2012-06-07 00:52:43 +0000
@@ -118,10 +118,6 @@
#if USE_XPROF_STATS
-
-#if HAVE_ASSERT_H
-#include <assert.h>
-#endif
#if HAVE_GNUMALLLOC_H
#include <gnumalloc.h>
#elif HAVE_MALLOC_H
@@ -131,39 +127,10 @@
#include <unistd.h>
#endif
-/* Exported Data */
TimersArray *xprof_Timers = NULL;
-
-/* Private stuff */
-
-
-/* new stuff */
-#define MAXSTACKDEPTH 512
-
-struct _callstack_entry {
- int timer; /* index into timers array */
- const char *name;
- hrtime_t start, stop, accum;
-};
-
-struct _callstack_entry cstack[MAXSTACKDEPTH];
-int cstack_head = 0;
-
-#if defined(_MSC_VER) /* Microsoft C Compiler ONLY */
-static __inline void
-#else
-static inline void
-#endif
-xprof_update(xprof_stats_data * head)
-{
- if (head->delta < head->best)
- head->best = head->delta;
- if (head->worst < head->delta)
- head->worst = head->delta;
- head->summ += head->delta;
- head->count++;
-}
-
+uint64_t *xprof_ActiveTimers = NULL;
+uint64_t xprof_ActiveTimerCount = 0;
+std::atomic_flag xprof_GlobalLock = ATOMIC_FLAG_INIT;
static xprof_stats_data *xp_UNACCOUNTED;
static int xprof_inited = 0;
@@ -173,71 +140,72 @@
if (xprof_inited)
return;
- xprof_Timers = static_cast<TimersArray *>(calloc(XPROF_LAST + 2,
sizeof(xprof_stats_node)));
-
- xprof_Timers[XPROF_PROF_UNACCOUNTED]->name = "PROF_UNACCOUNTED";
- xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu.start = get_tick();
- xp_UNACCOUNTED = &xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu;
- cstack_head = 0;
+ while(std::atomic_flag_test_and_set_explicit(&xprof_globalLock,
std::memory_order_acquire))
+ ; // spin until the global lock is acquired
+ xprof_TimerStats = static_cast<TimersArray *>(calloc(XPROF_LAST + 2,
sizeof(xprof_stats_node)));
+ xprof_ActiveTimers = static_cast<uint64_t *>(calloc(XPROF_LAST + 2,
sizeof(uint64_t)));
+ xprof_ActiveTimerCount = static_cast<uint64_t *>(calloc(XPROF_LAST + 2,
sizeof(uint64_t)));
xprof_inited = 1;
-}
-
-void
-xprof_start(xprof_type type, const char *timer)
-{
- hrtime_t tt = get_tick();
- if (!xprof_inited)
- xprof_InitLib();
-
- /* If nested, stop current stack frame */
- if (cstack_head > 0) {
- cstack[cstack_head - 1].accum += get_tick() - cstack[cstack_head -
1].start;
- cstack[cstack_head - 1].start = -1;
- }
-
- /* Are we at the first level? If so; stop the unaccounted timer */
- if (cstack_head == 0) {
- assert(xp_UNACCOUNTED->start != -1);
- xp_UNACCOUNTED->delta = tt - xp_UNACCOUNTED->start;
- xp_UNACCOUNTED->start = -1;
- xprof_update(xp_UNACCOUNTED);
- }
-
- /* Allocate new stack frame */
- cstack[cstack_head].start = tt;
- cstack[cstack_head].stop = -1;
- cstack[cstack_head].accum = 0;
- cstack[cstack_head].timer = type;
- cstack[cstack_head].name = timer;
- cstack_head++;
- assert(cstack_head < MAXSTACKDEPTH);
-
-}
-
-void
-xprof_stop(xprof_type type, const char *timer)
-{
- hrtime_t tt = get_tick();
- assert(cstack_head > 0);
- cstack_head--;
- assert(cstack[cstack_head].timer == type);
-
- /* Record timer details */
- cstack[cstack_head].accum += tt - cstack[cstack_head].start;
- xprof_Timers[type]->accu.delta = cstack[cstack_head].accum;
- xprof_Timers[type]->name = timer;
-
- /* Update */
- xprof_update(&xprof_Timers[type]->accu);
-
- /* Restart previous timer if we're not at the top level */
- if (cstack_head > 0) {
- cstack[cstack_head - 1].start = tt;
- cstack[cstack_head - 1].stop = 0;
- return;
- }
- /* Get here? We're at the top level; unaccounted */
- xp_UNACCOUNTED->start = tt;
+ std::atomic_flag_clear_explicit(&xprof_globalLock,
std::memory_order_release);
+}
+
+void
+XProfilerNode::start()
+{
+ if (started_ > 0)
+ return;
+
+ started_ = get_tick();
+
+ // kick initialization of timers statistics (just in case)
+ xprof_InitLib();
+
+ while(std::atomic_flag_test_and_set_explicit(&xprof_globalLock,
std::memory_order_acquire))
+ ; // spin until the global lock is acquired
+ xprof_ActiveTimers[type] += started_;
+ xprof_ActiveTimerCount++;
+ std::atomic_flag_clear_explicit(&xprof_globalLock,
std::memory_order_release);
+
+ xprof_stats_data * head = &(xprof_Timers[type_]->accu);
+ while(std::atomic_flag_test_and_set_explicit(&head->lock,
std::memory_order_acquire))
+ ; // spin until the list head lock is acquired
+ head->overheads += get_tick() - started_;
+ std::atomic_flag_clear_explicit(&head->lock, std::memory_order_release);
+}
+
+void
+XProfilerNode::stop()
+{
+ if (started_ < 0)
+ return;
+
+ hrtime_t stopped_ = get_tick();
+
+ // calculate how long this timer was running.
+ hrtime_t delta = stopped_ - started_;
+
+ // kick initialization of timers statistics (just in case)
+ xprof_InitLib();
+
+ // Record timer details into global counters
+
+ while(std::atomic_flag_test_and_set_explicit(&xprof_globalLock,
std::memory_order_acquire))
+ ; // spin until the global lock is acquired
+ xprof_ActiveTimers[type_] -= started_;
+ xprof_ActiveTimerCount[type_]--;
+ std::atomic_flag_clear_explicit(&xprof_globalLock,
std::memory_order_release);
+
+ xprof_stats_data * head = &(xprof_Timers[type_]->accu);
+ while(std::atomic_flag_test_and_set_explicit(&head->lock,
std::memory_order_acquire))
+ ; // spin until the list head lock is acquired
+ if (delta < head->best)
+ head->best = delta;
+ if (delta > head->worst)
+ head->worst = delta;
+ head->summ += delta;
+ head->count++;
+ head->overheads += get_tick() - stopped_;
+ std::atomic_flag_clear_explicit(&head->lock, std::memory_order_release);
}
#endif /* USE_XPROF_STATS */
=== modified file 'lib/profiler/Profiler.h'
--- lib/profiler/Profiler.h 2011-01-11 07:31:04 +0000
+++ lib/profiler/Profiler.h 2012-06-07 00:33:42 +0000
@@ -18,6 +18,10 @@
#else /* USE_XPROF_STATS */
+#if HAVE_ATOMIC
+#include <atomic>
+#endif
+
#define XP_NOBEST (hrtime_t)-1
typedef struct _xprof_stats_node xprof_stats_node;
@@ -25,13 +29,15 @@
typedef struct _xprof_stats_data xprof_stats_data;
struct _xprof_stats_data {
+ std::atomic_flag lock;
hrtime_t start;
hrtime_t stop;
- hrtime_t delta;
+ hrtime_t delta; // unused?
hrtime_t best;
hrtime_t worst;
hrtime_t count;
hrtime_t accum;
+ int64_t overheads;
int64_t summ;
};
@@ -44,15 +50,42 @@
typedef xprof_stats_node TimersArray[1];
/* public Data */
+ /// timer statistics histograms
extern TimersArray *xprof_Timers;
+ /// count of active timers
+ extern uint64_t *xprof_ActiveTimerCount;
+ /// currently running timer accumulations
+ extern uint64_t *xprof_ActiveTimers;
+
/* Exported functions */
- extern void xprof_start(xprof_type type, const char *timer);
- extern void xprof_stop(xprof_type type, const char *timer);
extern void xprof_event(void *data);
-#define PROF_start(probename) xprof_start(XPROF_##probename, #probename)
-#define PROF_stop(probename) xprof_stop(XPROF_##probename, #probename)
+/*
+/// This class provides a thread-safe timer for use in code profiling
+/// Timer kicks off when the class is creaated and terminates on destruct
+/// or if stop() method is called explicitly.
+///
+/// Use PROF_start(name) and PROF_stop(name) macros
+*/
+class XProfilerNode
+{
+public:
+ XProfilerNode(xprof_type aType, const char *aName) : type_(aType),
name_(aName) { start(); }
+ ~XProfilerNode() { stop(); }
+
+ void start();
+ void stop();
+
+private:
+ xprof_type type_; ///< which profile type to update when stopped
+ const char *name_; ///< printable version of the timers type name
+ hrtime_t started_; ///< when this timer was started
+};
+
+#define PROF_start(probename) XProfilerNode
XProfTimer_##probename(XPROF_##probename, #probename)
+#define PROF_restart(probename) XProfTimer_##probename.start()
+#define PROF_stop(probename) XProfTimer_##probename.stop()
#endif /* USE_XPROF_STATS */
=== modified file 'src/ProfStats.cc'
--- src/ProfStats.cc 2012-01-20 18:55:04 +0000
+++ src/ProfStats.cc 2012-06-07 01:24:08 +0000
@@ -46,12 +46,12 @@
#define MAX_SORTLIST 200
+static hrtime_t now;
static hrtime_t xprof_delta = 0;
static hrtime_t xprof_start_t = 0;
static hrtime_t xprof_verystart = 0;
static hrtime_t xprof_average_delta = 0;
static int xprof_events = 0;
-static int xprof_inited = 0;
static xprof_stats_data Totals;
static TimersArray *xprof_stats_avg1sec = NULL;
@@ -113,13 +113,14 @@
xprof_show_item(StoreEntry * sentry, const char *name, xprof_stats_data * hist)
{
storeAppendPrintf(sentry,
- "%s\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %"
PRIu64 "\t %" PRIu64 "\t %.2f\t %6.3f\t\n",
+ "%s\t %" PRIu64 "\t %" PRIu64 "\t %" PRIu64 "\t %"
PRIu64 "\t %" PRIu64 "\t" PRIu64 "\t %.2f\t %6.3f\t\n",
name,
hist->count,
hist->summ,
(hist->best != XP_NOBEST ? hist->best : 0),
hist->count ? hist->summ / hist->count : 0,
hist->worst,
+ hist->overheads,
hist->count / time_frame,
Math::doublePercent((double) hist->summ, (double)
hist->delta));
}
@@ -149,7 +150,7 @@
);
storeAppendPrintf(sentry,
- "Probe Name\t Events\t cumulated time \t best case \t
average \t worst case\t Rate / sec \t %% in int\n");
+ "Probe Name\t Events\t cumulated time \t best case \t
average \t worst case\toverheads\t Rate / sec \t %% in int\n");
for (i = 0; i < XPROF_LAST; i++) {
if (!hist[i]->name)
@@ -166,6 +167,8 @@
Totals.summ += show->summ;
+ Totals.overheads += show->overheads;
+
Totals.best += (show->best != XP_NOBEST ? show->best : 0);
Totals.worst += show->worst;
@@ -194,7 +197,11 @@
now = get_tick();
for (i = 0; i < XPROF_LAST; i++) {
+ while(std::atomic_flag_test_and_set_explicit(&(head[i]->lock),
std::memory_order_acquire))
+ ; // spin until the list record lock is acquired
+
hist[i]->name = head[i]->name;
+ hist[i]->accu.overheads += head[i]->accu.overheads;
hist[i]->accu.summ += head[i]->accu.summ;
hist[i]->accu.count += head[i]->accu.count; /* accumulate multisec
*/
@@ -225,6 +232,8 @@
hist[i]->accu.delta = 0;
head[i]->accu.start = keep;
}
+
+ std::atomic_flag_clear_explicit(&(head[i]->lock),
std::memory_order_release);
}
}
@@ -259,41 +268,37 @@
static inline void
xprof_chk_overhead(int samples)
{
+ // NOTE: these profiler checks require both global and list xprof_* locks
+ PROF_start(PROF_OVERHEAD);
while (samples--) {
- PROF_start(PROF_OVERHEAD);
+ PROF_restart(PROF_OVERHEAD);
PROF_stop(PROF_OVERHEAD);
}
}
static void
-xprofRegisterWithCacheManager(void)
-{
- Mgr::RegisterAction("cpu_profile", "CPU Profiling Stats", xprof_summary,
0, 1);
-}
-
-// FIXME:
-// this gets colled once per event. This doesn't seem to make much sense,
-// does it?
-static hrtime_t now;
-static void
xprof_Init(void)
{
+ static int xprof_inited = 0;
if (xprof_inited)
return;
- xprof_delta = xprof_verystart = xprof_start_t = now;
-
+ xprof_delta = xprof_verystart = xprof_start_t = get_tick();
xprof_inited = 1;
-
- xprofRegisterWithCacheManager(); //moved here so it's not double-init'ed
+ Mgr::RegisterAction("cpu_profile", "CPU Profiling Stats", xprof_summary,
0, 1);
}
void
xprof_event(void *data)
{
- now = get_tick();
+ hrtime_t now = get_tick();
xprof_Init();
xprof_delta = now - xprof_start_t;
+ xprof_chk_overhead(2);
+
+ while(std::atomic_flag_test_and_set_explicit(&xprof_globalLock,
std::memory_order_acquire))
+ ; // spin until the global lock is acquired
+
xprof_start_t = now;
xprof_events++;
@@ -303,8 +308,6 @@
if (xprof_average_delta > (xprof_delta >> 1))
xprof_average_delta = xprof_average_delta - (xprof_average_delta >> 8)
+ (xprof_delta >> 8);
- xprof_chk_overhead(2);
-
xprof_average(&xprof_stats_avg24hour, 24 * 3600);
xprof_average(&xprof_stats_avg5hour, 5 * 3600);
@@ -323,6 +326,8 @@
xprof_average(&xprof_stats_avg1sec, 1);
+ std::atomic_flag_clear_explicit(&xprof_globalLock,
std::memory_order_release);
+
xprof_chk_overhead(30);
eventAdd("cpuProfiling", xprof_event, NULL, 1.0, 1);