ovs_pcpu_storage is a per-CPU variable and relies on disabled BH for its locking. Without per-CPU locking in local_bh_disable() on PREEMPT_RT this data structure requires explicit locking. The data structure can be referenced recursive and there is a recursion counter to avoid too many recursions.
Add a local_lock_t to the data structure and use local_lock_nested_bh() for locking. Add an owner of the struct which is the current task and acquire the lock only if the structure is not owned by the current task. Cc: Aaron Conole <acon...@redhat.com> Cc: Eelco Chaudron <echau...@redhat.com> Cc: Ilya Maximets <i.maxim...@ovn.org> Cc: d...@openvswitch.org Signed-off-by: Sebastian Andrzej Siewior <bige...@linutronix.de> --- net/openvswitch/actions.c | 31 ++----------------------------- net/openvswitch/datapath.c | 24 ++++++++++++++++++++++++ net/openvswitch/datapath.h | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 29 deletions(-) diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 7e4a8d41b9ed6..435725c27a557 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -39,15 +39,6 @@ #include "flow_netlink.h" #include "openvswitch_trace.h" -struct deferred_action { - struct sk_buff *skb; - const struct nlattr *actions; - int actions_len; - - /* Store pkt_key clone when creating deferred action. */ - struct sw_flow_key pkt_key; -}; - #define MAX_L2_LEN (VLAN_ETH_HLEN + 3 * MPLS_HLEN) struct ovs_frag_data { unsigned long dst; @@ -64,28 +55,10 @@ struct ovs_frag_data { static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage); -#define DEFERRED_ACTION_FIFO_SIZE 10 -#define OVS_RECURSION_LIMIT 5 -#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2) -struct action_fifo { - int head; - int tail; - /* Deferred action fifo queue storage. */ - struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE]; +DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), }; -struct action_flow_keys { - struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; -}; - -struct ovs_pcpu_storage { - struct action_fifo action_fifos; - struct action_flow_keys flow_keys; - int exec_level; -}; - -static DEFINE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage); - /* Make a clone of the 'key', using the pre-allocated percpu 'flow_keys' * space. Return NULL if out of key spaces. */ diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index aaa6277bb49c2..6a304ae2d959c 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -244,11 +244,13 @@ void ovs_dp_detach_port(struct vport *p) /* Must be called with rcu_read_lock. */ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) { + struct ovs_pcpu_storage *ovs_pcpu = this_cpu_ptr(&ovs_pcpu_storage); const struct vport *p = OVS_CB(skb)->input_vport; struct datapath *dp = p->dp; struct sw_flow *flow; struct sw_flow_actions *sf_acts; struct dp_stats_percpu *stats; + bool ovs_pcpu_locked = false; u64 *stats_counter; u32 n_mask_hit; u32 n_cache_hit; @@ -290,10 +292,26 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) ovs_flow_stats_update(flow, key->tp.flags, skb); sf_acts = rcu_dereference(flow->sf_acts); + /* This path can be invoked recursively: Use the current task to + * identify recursive invocation - the lock must be acquired only once. + * Even with disabled bottom halves this can be preempted on PREEMPT_RT. + * Limit the locking to RT to avoid assigning `owner' if it can be + * avoided. + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT) && ovs_pcpu->owner != current) { + local_lock_nested_bh(&ovs_pcpu_storage.bh_lock); + ovs_pcpu->owner = current; + ovs_pcpu_locked = true; + } + error = ovs_execute_actions(dp, skb, sf_acts, key); if (unlikely(error)) net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n", ovs_dp_name(dp), error); + if (ovs_pcpu_locked) { + ovs_pcpu->owner = NULL; + local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock); + } stats_counter = &stats->n_hit; @@ -671,7 +689,13 @@ static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info) sf_acts = rcu_dereference(flow->sf_acts); local_bh_disable(); + local_lock_nested_bh(&ovs_pcpu_storage.bh_lock); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + this_cpu_write(ovs_pcpu_storage.owner, current); err = ovs_execute_actions(dp, packet, sf_acts, &flow->key); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + this_cpu_write(ovs_pcpu_storage.owner, NULL); + local_unlock_nested_bh(&ovs_pcpu_storage.bh_lock); local_bh_enable(); rcu_read_unlock(); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index a126407926058..4a665c3cfa906 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -173,6 +173,39 @@ struct ovs_net { bool xt_label; }; +struct deferred_action { + struct sk_buff *skb; + const struct nlattr *actions; + int actions_len; + + /* Store pkt_key clone when creating deferred action. */ + struct sw_flow_key pkt_key; +}; + +#define DEFERRED_ACTION_FIFO_SIZE 10 +#define OVS_RECURSION_LIMIT 5 +#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2) + +struct action_fifo { + int head; + int tail; + /* Deferred action fifo queue storage. */ + struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE]; +}; + +struct action_flow_keys { + struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; +}; + +struct ovs_pcpu_storage { + struct action_fifo action_fifos; + struct action_flow_keys flow_keys; + int exec_level; + struct task_struct *owner; + local_lock_t bh_lock; +}; +DECLARE_PER_CPU(struct ovs_pcpu_storage, ovs_pcpu_storage); + /** * enum ovs_pkt_hash_types - hash info to include with a packet * to send to userspace. -- 2.49.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev