Just trying share an alternative, could we still do the coverage clear inside 'pmd_thread_main()'. However, instead of contending for the 'coverage_mutex', use 'ovs_mutex_trylock()'. Since pmd thread runs in infinite loop, it can constantly try grabbing the lock and will eventually acquire it.
Something like this: diff --git a/lib/coverage.c b/lib/coverage.c index 6121956..9f23f99 100644 --- a/lib/coverage.c +++ b/lib/coverage.c @@ -272,6 +272,37 @@ coverage_clear(void) } } +/* Runs approximately every COVERAGE_CLEAR_INTERVAL amount of time to + * synchronize per-thread counters with global counters. Exits immediately + * without updating the 'thread_time' if cannot acquire the 'coverage_mutex'. + * This is to avoid lock contention for some performance-critical threads. + */ +void +coverage_try_clear(void) +{ + long long int now, *thread_time; + + now = time_msec(); + thread_time = coverage_clear_time_get(); + + /* Initialize the coverage_clear_time. */ + if (*thread_time == LLONG_MIN) { + *thread_time = now + COVERAGE_CLEAR_INTERVAL; + } + + if (now >= *thread_time) { + if (ovs_mutex_trylock(&coverage_mutex)) { + size_t i; + for (i = 0; i < n_coverage_counters; i++) { + struct coverage_counter *c = coverage_counters[i]; + c->total += c->count(); + } + ovs_mutex_unlock(&coverage_mutex); + *thread_time = now + COVERAGE_CLEAR_INTERVAL; + } + } +} + /* Runs approximately every COVERAGE_RUN_INTERVAL amount of time to update the * coverage counters' 'min' and 'hr' array. 'min' array is for cumulating * per second counts into per minute count. 'hr' array is for cumulating per diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index c144352..96884ec 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -2695,6 +2695,7 @@ reload: lc = 0; + coverage_try_clear(); emc_cache_slow_sweep(&pmd->flow_cache); ovsrcu_quiesce(); On Tue, Aug 11, 2015 at 6:24 PM, Alex Wang <al...@nicira.com> wrote: > Hi IIya, > > Thx a lot for writing up this fix, > > The reason that you did not see the pmd related coverage counter stats > (in "ovs-appctl coverage/show") is that we do not call coverage_clear() > anywhere in pmd_thread_main(). This is in that the coverage_clear() > requires holding the global mutex in the coverage module which can > be super expensive for dpdk packet processing. > > Want to admit that your fix is more complicated than I expect. Especially, > changing the global (extern) struct 'counter_##COUNTER' to per-thread > and expanding the array size by number of threads times can also be > expensive for main thread... > > Do you think there could be a simpler way to achieve this? I'd like to > think > about other possible solutions more, > > Thanks, > Alex Wang, > > > On Mon, Aug 10, 2015 at 7:45 AM, Ilya Maximets <i.maxim...@samsung.com> > wrote: > >> Currently coverage_counter_register() is called only from >> constructor functions at program initialization time by >> main thread. Coverage counter values are static and >> thread local. >> >> This means, that all COVERAGE_INC() calls from pmd threads >> leads to increment of thread local variables of that threads >> that can't be accessed by anyone else. And they are not used >> in final coverage accounting. >> >> Fix that by adding ability to add/remove coverage counters >> in runtime for newly created threads and counting coverage >> using counters from all threads. >> >> Signed-off-by: Ilya Maximets <i.maxim...@samsung.com> >> --- >> lib/coverage.c | 83 >> +++++++++++++++++++++++++++++++++++++++++++++---------- >> lib/coverage.h | 27 ++++++++++++------ >> lib/dpif-netdev.c | 4 ++- >> 3 files changed, 91 insertions(+), 23 deletions(-) >> >> diff --git a/lib/coverage.c b/lib/coverage.c >> index 6121956..2ae9e7a 100644 >> --- a/lib/coverage.c >> +++ b/lib/coverage.c >> @@ -30,14 +30,22 @@ VLOG_DEFINE_THIS_MODULE(coverage); >> >> /* The coverage counters. */ >> static struct coverage_counter **coverage_counters = NULL; >> -static size_t n_coverage_counters = 0; >> static size_t allocated_coverage_counters = 0; >> >> +static void (**coverage_initializers)(void) = NULL; >> +static size_t n_coverage_initializers = 0; >> +static size_t allocated_coverage_initializers = 0; >> + >> static struct ovs_mutex coverage_mutex = OVS_MUTEX_INITIALIZER; >> >> DEFINE_STATIC_PER_THREAD_DATA(long long int, coverage_clear_time, >> LLONG_MIN); >> static long long int coverage_run_time = LLONG_MIN; >> >> +volatile unsigned int *coverage_val[2048] = {NULL}; >> +volatile size_t n_coverage_counters = 0; >> + >> +DEFINE_STATIC_PER_THREAD_DATA(unsigned int, coverage_thread_start, 0); >> + >> /* Index counter used to compute the moving average array's index. */ >> static unsigned int idx_count = 0; >> >> @@ -54,7 +62,44 @@ coverage_counter_register(struct coverage_counter* >> counter) >> &allocated_coverage_counters, >> sizeof(struct coverage_counter*)); >> } >> - coverage_counters[n_coverage_counters++] = counter; >> + coverage_counters[n_coverage_counters] = counter; >> + coverage_val[n_coverage_counters++] = NULL; >> +} >> + >> +void >> +coverage_initializer_register(void (*f)(void)) >> +{ >> + if (n_coverage_initializers >= allocated_coverage_initializers) { >> + coverage_initializers = x2nrealloc(coverage_initializers, >> + &allocated_coverage_initializers, >> + sizeof *coverage_initializers); >> + } >> + coverage_initializers[n_coverage_initializers++] = f; >> +} >> + >> +void >> +coverage_register_new_thread(void) >> +{ >> + int i; >> + ovs_mutex_lock(&coverage_mutex); >> + *coverage_thread_start_get() = n_coverage_counters; >> + for (i = 0; i < n_coverage_initializers; i++) >> + coverage_initializers[i](); >> + ovs_mutex_unlock(&coverage_mutex); >> +} >> + >> +void >> +coverage_unregister_thread(void) >> +{ >> + int i; >> + ovs_mutex_lock(&coverage_mutex); >> + for (i = *coverage_thread_start_get() + n_coverage_initializers; >> + i < n_coverage_counters; i++) { >> + coverage_counters[i - n_coverage_initializers] = >> coverage_counters[i]; >> + coverage_val[i - n_coverage_initializers] = coverage_val[i]; >> + } >> + n_coverage_counters -= n_coverage_initializers; >> + ovs_mutex_unlock(&coverage_mutex); >> } >> >> static void >> @@ -102,13 +147,12 @@ coverage_hash(void) >> uint32_t hash = 0; >> int n_groups, i; >> >> + ovs_mutex_lock(&coverage_mutex); >> /* Sort coverage counters into groups with equal totals. */ >> c = xmalloc(n_coverage_counters * sizeof *c); >> - ovs_mutex_lock(&coverage_mutex); >> for (i = 0; i < n_coverage_counters; i++) { >> c[i] = coverage_counters[i]; >> } >> - ovs_mutex_unlock(&coverage_mutex); >> qsort(c, n_coverage_counters, sizeof *c, compare_coverage_counters); >> >> /* Hash the names in each group along with the rank. */ >> @@ -130,6 +174,7 @@ coverage_hash(void) >> i = j; >> } >> >> + ovs_mutex_unlock(&coverage_mutex); >> free(c); >> >> return hash_int(n_groups, hash); >> @@ -200,9 +245,11 @@ coverage_read(struct svec *lines) >> { >> struct coverage_counter **c = coverage_counters; >> unsigned long long int *totals; >> + unsigned long long int *total_sec, *total_min, *total_hr; >> size_t n_never_hit; >> uint32_t hash; >> size_t i; >> + int initial_n = n_coverage_initializers; >> >> hash = coverage_hash(); >> >> @@ -213,14 +260,20 @@ coverage_read(struct svec *lines) >> "hash=%08"PRIx32":", >> COVERAGE_RUN_INTERVAL/1000, hash)); >> >> - totals = xmalloc(n_coverage_counters * sizeof *totals); >> ovs_mutex_lock(&coverage_mutex); >> + totals = xcalloc(n_coverage_counters, sizeof *totals); >> + total_sec = xcalloc(n_coverage_counters, sizeof *total_sec); >> + total_min = xcalloc(n_coverage_counters, sizeof *total_min); >> + total_hr = xcalloc(n_coverage_counters, sizeof *total_hr); >> + >> for (i = 0; i < n_coverage_counters; i++) { >> - totals[i] = c[i]->total; >> + totals[i % initial_n] += c[i]->total; >> + total_sec[i % initial_n] += c[i]->min[(idx_count - 1) % >> MIN_AVG_LEN]; >> + total_min[i % initial_n] += coverage_array_sum(c[i]->min, >> MIN_AVG_LEN); >> + total_hr[i % initial_n] += coverage_array_sum(c[i]->hr, >> HR_AVG_LEN); >> } >> - ovs_mutex_unlock(&coverage_mutex); >> >> - for (i = 0; i < n_coverage_counters; i++) { >> + for (i = 0; i < initial_n; i++) { >> if (totals[i]) { >> /* Shows the averaged per-second rates for the last >> * COVERAGE_RUN_INTERVAL interval, the last minute and >> @@ -229,18 +282,22 @@ coverage_read(struct svec *lines) >> xasprintf("%-24s %5.1f/sec %9.3f/sec " >> "%13.4f/sec total: %llu", >> c[i]->name, >> - (c[i]->min[(idx_count - 1) % MIN_AVG_LEN] >> + (total_sec[i] >> * 1000.0 / COVERAGE_RUN_INTERVAL), >> - coverage_array_sum(c[i]->min, MIN_AVG_LEN) / >> 60.0, >> - coverage_array_sum(c[i]->hr, HR_AVG_LEN) / >> 3600.0, >> + total_min[i] / 60.0, >> + total_hr[i] / 3600.0, >> totals[i])); >> } else { >> n_never_hit++; >> } >> } >> + ovs_mutex_unlock(&coverage_mutex); >> >> svec_add_nocopy(lines, xasprintf("%"PRIuSIZE" events never hit", >> n_never_hit)); >> free(totals); >> + free(total_sec); >> + free(total_min); >> + free(total_hr); >> } >> >> /* Runs approximately every COVERAGE_CLEAR_INTERVAL amount of time to >> @@ -265,7 +322,7 @@ coverage_clear(void) >> ovs_mutex_lock(&coverage_mutex); >> for (i = 0; i < n_coverage_counters; i++) { >> struct coverage_counter *c = coverage_counters[i]; >> - c->total += c->count(); >> + c->total += c->count(i); >> } >> ovs_mutex_unlock(&coverage_mutex); >> *thread_time = now + COVERAGE_CLEAR_INTERVAL; >> @@ -340,10 +397,8 @@ coverage_array_sum(const unsigned int *arr, const >> unsigned int len) >> unsigned int sum = 0; >> size_t i; >> >> - ovs_mutex_lock(&coverage_mutex); >> for (i = 0; i < len; i++) { >> sum += arr[i]; >> } >> - ovs_mutex_unlock(&coverage_mutex); >> return sum; >> } >> diff --git a/lib/coverage.h b/lib/coverage.h >> index 832c433..1f5ae17 100644 >> --- a/lib/coverage.h >> +++ b/lib/coverage.h >> @@ -45,9 +45,9 @@ BUILD_ASSERT_DECL(COVERAGE_RUN_INTERVAL % >> COVERAGE_CLEAR_INTERVAL == 0); >> >> /* A coverage counter. */ >> struct coverage_counter { >> - const char *const name; /* Textual name. */ >> - unsigned int (*const count)(void); /* Gets, zeros this thread's >> count. */ >> - unsigned long long int total; /* Total count. */ >> + const char *const name; /* Textual name. */ >> + unsigned int (*const count)(unsigned int); /* Gets, zeros this >> thread's count. */ >> + unsigned long long int total; /* Total count. */ >> unsigned long long int last_total; >> /* The moving average arrays. */ >> unsigned int min[MIN_AVG_LEN]; >> @@ -55,15 +55,20 @@ struct coverage_counter { >> }; >> >> void coverage_counter_register(struct coverage_counter*); >> +void coverage_initializer_register(void (*f)(void)); >> +void coverage_register_new_thread(void); >> +void coverage_unregister_thread(void); >> >> +extern volatile unsigned int *coverage_val[2048]; >> +extern volatile size_t n_coverage_counters; >> /* Defines COUNTER. There must be exactly one such definition at file >> scope >> * within a program. */ >> #define COVERAGE_DEFINE(COUNTER) \ >> DEFINE_STATIC_PER_THREAD_DATA(unsigned int, \ >> counter_##COUNTER, 0); \ >> - static unsigned int COUNTER##_count(void) \ >> + static unsigned int COUNTER##_count(unsigned int i) \ >> { \ >> - unsigned int *countp = counter_##COUNTER##_get(); \ >> + volatile unsigned int *countp = coverage_val[i]; \ >> unsigned int count = *countp; \ >> *countp = 0; \ >> return count; \ >> @@ -72,11 +77,17 @@ void coverage_counter_register(struct >> coverage_counter*); >> { \ >> *counter_##COUNTER##_get() += n; \ >> } \ >> - extern struct coverage_counter counter_##COUNTER; \ >> - struct coverage_counter counter_##COUNTER \ >> + extern thread_local struct coverage_counter counter_##COUNTER; \ >> + thread_local struct coverage_counter counter_##COUNTER \ >> = { #COUNTER, COUNTER##_count, 0, 0, {0}, {0} }; \ >> - OVS_CONSTRUCTOR(COUNTER##_init) { \ >> + static void COUNTER##_initializer(void) { \ >> coverage_counter_register(&counter_##COUNTER); \ >> + coverage_val[n_coverage_counters - 1] = \ >> + counter_##COUNTER##_get(); \ >> + } \ >> + OVS_CONSTRUCTOR(COUNTER##_init) { \ >> + coverage_initializer_register(&COUNTER##_initializer); \ >> + COUNTER##_initializer(); \ >> } > > /* Adds 1 to COUNTER. */ >> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c >> index c144352..a92dd6a 100644 >> --- a/lib/dpif-netdev.c >> +++ b/lib/dpif-netdev.c >> @@ -34,6 +34,7 @@ >> #include "bitmap.h" >> #include "cmap.h" >> #include "csum.h" >> +#include "coverage.h" >> #include "dp-packet.h" >> #include "dpif.h" >> #include "dpif-provider.h" >> @@ -2666,7 +2667,7 @@ pmd_thread_main(void *f_) >> >> poll_cnt = 0; >> poll_list = NULL; >> - >> + coverage_register_new_thread(); >> /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */ >> ovsthread_setspecific(pmd->dp->per_pmd_key, pmd); >> pmd_thread_setaffinity_cpu(pmd->core_id); >> @@ -2717,6 +2718,7 @@ reload: >> } >> >> dp_netdev_pmd_reload_done(pmd); >> + coverage_unregister_thread(); >> >> free(poll_list); >> return NULL; >> -- >> 2.1.4 >> >> > _______________________________________________ dev mailing list dev@openvswitch.org http://openvswitch.org/mailman/listinfo/dev