On 08/21, Jim Harris wrote: >Ideally, get_tsc_freq_arch() is able to provide the >TSC rate using architecture-specific means. When that >is not possible, DPDK reverts to calculating the >TSC rate with a 100ms nanosleep or 1s sleep. The latter >occurs more frequently in VMs which often do not have >access to the data they need from arch-specific means >(CPUID leaf 0x15 or MSR 0xCE on x86). > >In secondary processes, the extra 100ms is especially >noticeable and consumes the bulk of rte_eal_init() >execution time. So in secondary processes, if >we cannot get the TSC rate using get_tsc_freq_arch(), >try to get the TSC rate from the primary process >instead using rte_mp_msg. This is much faster than >100ms. > >Reduces rte_eal_init() execution time in a secondary >process from 165ms to 66ms on my test system. > >Signed-off-by: Jim Harris <james.r.har...@intel.com> >Change-Id: I584419ed1c7d6f47841e0a0eb23f34c9f1186d35
This Change-Id line is unnecessary. Thanks, Xiaolong >--- > lib/librte_eal/common/eal_common_timer.c | 62 ++++++++++++++++++++++++++++++ > 1 file changed, 62 insertions(+) > >diff --git a/lib/librte_eal/common/eal_common_timer.c >b/lib/librte_eal/common/eal_common_timer.c >index 145543de7..ad965455d 100644 >--- a/lib/librte_eal/common/eal_common_timer.c >+++ b/lib/librte_eal/common/eal_common_timer.c >@@ -15,9 +15,17 @@ > #include <rte_log.h> > #include <rte_cycles.h> > #include <rte_pause.h> >+#include <rte_eal.h> >+#include <rte_errno.h> > > #include "eal_private.h" > >+#define EAL_TIMER_MP "eal_timer_mp_sync" >+ >+struct timer_mp_param { >+ uint64_t tsc_hz; >+}; >+ > /* The frequency of the RDTSC timer resolution */ > static uint64_t eal_tsc_resolution_hz; > >@@ -74,12 +82,58 @@ estimate_tsc_freq(void) > return RTE_ALIGN_MUL_NEAR(rte_rdtsc() - start, CYC_PER_10MHZ); > } > >+static uint64_t >+get_tsc_freq_from_primary(void) >+{ >+ struct rte_mp_msg mp_req = {0}; >+ struct rte_mp_reply mp_reply = {0}; >+ struct timer_mp_param *r; >+ struct timespec ts = {.tv_sec = 1, .tv_nsec = 0}; >+ uint64_t tsc_hz; >+ >+ strcpy(mp_req.name, EAL_TIMER_MP); >+ if (rte_mp_request_sync(&mp_req, &mp_reply, &ts) || >+ mp_reply.nb_received != 1) { >+ tsc_hz = 0; >+ } else { >+ r = (struct timer_mp_param *)mp_reply.msgs[0].param; >+ tsc_hz = r->tsc_hz; >+ } >+ >+ free(mp_reply.msgs); >+ return tsc_hz; >+} >+ >+static int >+timer_mp_primary(__attribute__((unused)) const struct rte_mp_msg *msg, >+ const void *peer) >+{ >+ struct rte_mp_msg reply = {0}; >+ struct timer_mp_param *r = (struct timer_mp_param *)reply.param; >+ >+ r->tsc_hz = eal_tsc_resolution_hz; >+ strcpy(reply.name, EAL_TIMER_MP); >+ reply.len_param = sizeof(*r); >+ >+ return rte_mp_reply(&reply, peer); >+} >+ > void > set_tsc_freq(void) > { > uint64_t freq; >+ int rc; > > freq = get_tsc_freq_arch(); >+ if (!freq && rte_eal_process_type() != RTE_PROC_PRIMARY) { >+ /* We couldn't get the TSC frequency through arch-specific >+ * means. If this is a secondary process, try to get the >+ * TSC frequency from the primary process - this will >+ * be much faster than get_tsc_freq() or estimate_tsc_freq() >+ * below. >+ */ >+ freq = get_tsc_freq_from_primary(); >+ } > if (!freq) > freq = get_tsc_freq(); > if (!freq) >@@ -87,6 +141,14 @@ set_tsc_freq(void) > > RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000); > eal_tsc_resolution_hz = freq; >+ if (rte_eal_process_type() == RTE_PROC_PRIMARY) { >+ rc = rte_mp_action_register(EAL_TIMER_MP, timer_mp_primary); >+ if (rc && rte_errno != ENOTSUP) { >+ RTE_LOG(WARNING, EAL, "Could not register mp_action - " >+ "secondary processes will calculate TSC rate " >+ "independently.\n"); >+ } >+ } > } > > void rte_delay_us_callback_register(void (*userfunc)(unsigned int)) >