For SA outbound packets, rte_atomic64_add_return is used to generate SQN atomically. This introduced an unnecessary full barrier by calling the '__sync' builtin implemented rte_atomic_XX API on aarch64. This patch optimized it with c11 atomic and eliminated the expensive barrier for aarch64.
Signed-off-by: Phil Yang <phil.y...@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com> Reviewed-by: Gavin Hu <gavin...@arm.com> --- lib/librte_ipsec/ipsec_sqn.h | 3 ++- lib/librte_ipsec/sa.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/lib/librte_ipsec/ipsec_sqn.h b/lib/librte_ipsec/ipsec_sqn.h index 0c2f76a..e884af7 100644 --- a/lib/librte_ipsec/ipsec_sqn.h +++ b/lib/librte_ipsec/ipsec_sqn.h @@ -128,7 +128,8 @@ esn_outb_update_sqn(struct rte_ipsec_sa *sa, uint32_t *num) n = *num; if (SQN_ATOMIC(sa)) - sqn = (uint64_t)rte_atomic64_add_return(&sa->sqn.outb.atom, n); + sqn = __atomic_add_fetch(&sa->sqn.outb.atom, n, + __ATOMIC_RELAXED); else { sqn = sa->sqn.outb.raw + n; sa->sqn.outb.raw = sqn; diff --git a/lib/librte_ipsec/sa.h b/lib/librte_ipsec/sa.h index d22451b..cab9a2e 100644 --- a/lib/librte_ipsec/sa.h +++ b/lib/librte_ipsec/sa.h @@ -120,7 +120,7 @@ struct rte_ipsec_sa { */ union { union { - rte_atomic64_t atom; + uint64_t atom; uint64_t raw; } outb; struct { -- 2.7.4