Pablo Neira Ayuso wrote:
> +static void flow_offload_work(struct work_struct *work)
> +{
> + struct flow_hw_offload *offload, *next;
> +
> + spin_lock_bh(_hw_offload_lock);
> + list_for_each_entry_safe(offload, next, _hw_offload_pending_list,
> list) {
> + do_flow_offload(offload->flow);
This should not offload flows that already have DYING bit set.
> + nf_conntrack_put(>ct->ct_general);
> + list_del(>list);
> + kfree(offload);
> + }
> + spin_unlock_bh(_hw_offload_lock);
> +
> + queue_delayed_work(system_power_efficient_wq, _flow_offload_dwork,
> HZ);
> +}
Missed this on first round, 1 second is quite large.
[..]
> static int nft_flow_route(const struct nft_pktinfo *pkt,
> const struct nf_conn *ct,
> union flow_gateway *orig_gw,
> @@ -211,6 +290,7 @@ static void nft_flow_offload_eval(const struct nft_expr
> *expr,
> union flow_gateway orig_gateway, reply_gateway;
> struct net_device *outdev = pkt->xt.state->out;
> struct net_device *indev = pkt->xt.state->in;
> + struct flow_hw_offload *offload;
> enum ip_conntrack_info ctinfo;
> struct flow_offload *flow;
> struct nf_conn *ct;
> @@ -250,6 +330,21 @@ static void nft_flow_offload_eval(const struct nft_expr
> *expr,
> if (ret < 0)
> goto err2;
>
> + if (!indev->netdev_ops->ndo_flow_add)
> + return;
> +
> + offload = kmalloc(sizeof(struct flow_hw_offload), GFP_ATOMIC);
> + if (!offload)
> + return;
> +
> + nf_conntrack_get(>ct_general);
> + offload->ct = ct;
> + offload->flow = flow;
> +
> + spin_lock_bh(_hw_offload_lock);
> + list_add_tail(>list, _hw_offload_pending_list);
> + spin_unlock_bh(_hw_offload_lock);
> +
> return;
So this aims for lazy offloading (up to 1 second delay).
Is this intentional, e.g. to avoid offloading short-lived 'RR' flows?
I would have expected this to schedule the workqueue here, and not use
delayed wq at all (i.e., also no self-rescheduling from
flow_offload_work()).