Re: [ovs-dev] [PATCH] netdev-offload-tc: Reserve lower tc prios for ip ethertypes

2022-10-26 Thread Paul Blakey via dev




On 26/10/2022 17:13, Ilya Maximets wrote:

On 10/25/22 11:59, Roi Dayan wrote:

From: Paul Blakey 

Currently ethertype to prio hmap is static and the first ethertype
being used gets a lower priority. Usually there is an arp request
before the ip traffic and the arp ethertype gets a lower tc priority
while the ip traffic proto gets a higher priority.
In this case ip traffic will go through more hops in tc and HW.
Instead, reserve lower priorities for ip ethertypes.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
---
  lib/netdev-offload-tc.c | 31 +--
  lib/tc.h|  2 ++
  2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index f6f90a741fde..5b3f29d04c2a 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -325,6 +325,26 @@ struct prio_map_data {
  uint16_t prio;
  };
  
+static uint16_t

+get_next_available_prio(ovs_be16 protocol)
+{
+static uint16_t last_prio = TC_RESERVED_PRIORITY_MAX;
+
+if (protocol == htons(ETH_P_IP)) {
+return TC_RESERVED_PRIORITY_IPV4;
+} else if (protocol == htons(ETH_P_IPV6)) {
+return TC_RESERVED_PRIORITY_IPV6;
+}


Wouldn't this cause problems if multi_mask_per_prio is not supported?
i.e. we should not use the same prio for all IPv4 if they have different
masks in this case.

Or am I missing something?

Best regards, Ilya Maximets.



Good catch will fix.


+
+/* last_prio can overflow if there will be many different kinds of
+ * flows which shouldn't happen organically. */
+if (last_prio == UINT16_MAX) {
+return 0;
+}
+
+return ++last_prio;
+}
+
  /* Get free prio for tc flower
   * If prio is already allocated for mask/eth_type combination then return it.
   * If not assign new prio.
@@ -336,11 +356,11 @@ get_prio_for_tc_flower(struct tc_flower *flower)
  {
  static struct hmap prios = HMAP_INITIALIZER();
  static struct ovs_mutex prios_lock = OVS_MUTEX_INITIALIZER;
-static uint16_t last_prio = TC_RESERVED_PRIORITY_MAX;
  size_t key_len = sizeof(struct tc_flower_key);
  size_t hash = hash_int((OVS_FORCE uint32_t) flower->key.eth_type, 0);
  struct prio_map_data *data;
  struct prio_map_data *new_data;
+uint16_t prio;
  
  if (!multi_mask_per_prio) {

  hash = hash_bytes(>mask, key_len, hash);
@@ -359,21 +379,20 @@ get_prio_for_tc_flower(struct tc_flower *flower)
  }
  }
  
-if (last_prio == UINT16_MAX) {

-/* last_prio can overflow if there will be many different kinds of
- * flows which shouldn't happen organically. */
+prio = get_next_available_prio(flower->key.eth_type);
+if (!prio) {
  ovs_mutex_unlock(_lock);
  return 0;
  }
  
  new_data = xzalloc(sizeof *new_data);

  memcpy(_data->mask, >mask, key_len);
-new_data->prio = ++last_prio;
+new_data->prio = prio;
  new_data->protocol = flower->key.eth_type;
  hmap_insert(, _data->node, hash);
  ovs_mutex_unlock(_lock);
  
-return new_data->prio;

+return prio;
  }
  
  static uint32_t

diff --git a/lib/tc.h b/lib/tc.h
index 2e64ad372592..12753c16d405 100644
--- a/lib/tc.h
+++ b/lib/tc.h
@@ -49,6 +49,8 @@
  enum tc_flower_reserved_prio {
  TC_RESERVED_PRIORITY_NONE,
  TC_RESERVED_PRIORITY_POLICE,
+TC_RESERVED_PRIORITY_IPV4,
+TC_RESERVED_PRIORITY_IPV6,
  __TC_RESERVED_PRIORITY_MAX
  };
  #define TC_RESERVED_PRIORITY_MAX (__TC_RESERVED_PRIORITY_MAX -1)



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 4/5] netdev-offload-tc: Fix ignoring unknown tunnel keys.

2022-08-04 Thread Paul Blakey via dev




On 29/07/2022 17:53, Ilya Maximets wrote:

Current offloading code supports only limited number of tunnel keys
and silently ignores everything it doesn't understand.  This is
causing, for example, offloaded ERSPAN tunnels to not work, because
flow is offloaded, but ERSPAN options are not provided to TC.

There is a number of tunnel keys, which are supported by the userspace,
but silently ignored during offloading:

   OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT
   OVS_TUNNEL_KEY_ATTR_OAM
   OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS
   OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS

OVS_TUNNEL_KEY_ATTR_CSUM is kind of supported, but only for actions
and for some reason is set from the tunnel port instead of the
provided action, and not currently supported for the tunnel key in
the match.

Addig a default case to fail offloading of unknown attributes.  For
now explicitly allowing incorrect behavior for the DONT_FRAGMENT flag,
otherwise we'll break all tunnel offloading by default.  VXLAN and
ERSPAN options has to fail offloading, because the tunnel will not
work otherwise.  OAM is not a default configurations, so failing it
as well. The missing DONT_FRAGMENT flag though should, probably,
cause frequent flow revalidation, but that is not new with this patch.

Same for the 'match' key, only clearing masks that was actually
consumed, except for the DONT_FRAGMENT and CSUM flags, which are
explicitly allowed and highlighted as broken.

Also, destination port as well as CSUM configuration for unknown
reason was not taken from the actions list and were passed via HW
offload info instead of being consumed from the set() action.

Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-dev/2022-July/395522.html
Reported-by: Eelco Chaudron 
Fixes: 8f283af89298 ("netdev-tc-offloads: Implement netdev flow put using tc 
interface")
Signed-off-by: Ilya Maximets 
---
  lib/dpif-netlink.c  | 14 +-
  lib/netdev-offload-tc.c | 61 -
  lib/netdev-offload.h|  3 --
  3 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index a498d5667..d9243a735 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -2237,8 +2237,6 @@ parse_flow_put(struct dpif_netlink *dpif, struct 
dpif_flow_put *put)
  size_t left;
  struct netdev *dev;
  struct offload_info info;
-ovs_be16 dst_port = 0;
-uint8_t csum_on = false;
  int err;
  
  info.tc_modify_flow_deleted = false;

@@ -2258,10 +2256,9 @@ parse_flow_put(struct dpif_netlink *dpif, struct 
dpif_flow_put *put)
  return EOPNOTSUPP;
  }
  
-/* Get tunnel dst port */

+/* Check the output port for a tunnel. */
  NL_ATTR_FOR_EACH(nla, left, put->actions, put->actions_len) {
  if (nl_attr_type(nla) == OVS_ACTION_ATTR_OUTPUT) {
-const struct netdev_tunnel_config *tnl_cfg;
  struct netdev *outdev;
  odp_port_t out_port;
  
@@ -2271,19 +2268,10 @@ parse_flow_put(struct dpif_netlink *dpif, struct dpif_flow_put *put)

  err = EOPNOTSUPP;
  goto out;
  }
-tnl_cfg = netdev_get_tunnel_config(outdev);
-if (tnl_cfg && tnl_cfg->dst_port != 0) {
-dst_port = tnl_cfg->dst_port;
-}
-if (tnl_cfg) {
-csum_on = tnl_cfg->csum;
-}
  netdev_close(outdev);
  }
  }
  
-info.tp_dst_port = dst_port;

-info.tunnel_csum_on = csum_on;
  info.recirc_id_shared_with_tc = (dpif->user_features
   & OVS_DP_F_TC_RECIRC_SHARING);
  err = netdev_flow_put(dev, ,
diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index a3eee8df3..57385597a 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -1389,9 +1389,11 @@ static int
  parse_put_flow_set_action(struct tc_flower *flower, struct tc_action *action,
const struct nlattr *set, size_t set_len)
  {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
  const struct nlattr *tunnel;
  const struct nlattr *tun_attr;
  size_t tun_left, tunnel_len;
+bool attr_csum_present;
  
  if (nl_attr_type(set) == OVS_KEY_ATTR_MPLS) {

  return parse_mpls_set_action(flower, action, set);
@@ -1405,6 +1407,7 @@ parse_put_flow_set_action(struct tc_flower *flower, 
struct tc_action *action,
  tunnel = nl_attr_get(set);
  tunnel_len = nl_attr_get_size(set);
  
+attr_csum_present = false;

  action->type = TC_ACT_ENCAP;
  action->encap.id_present = false;
  flower->action_count++;
@@ -1431,6 +1434,19 @@ parse_put_flow_set_action(struct tc_flower *flower, 
struct tc_action *action,
  action->encap.ttl = nl_attr_get_u8(tun_attr);
  }
  break;
+case OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT: {
+/* XXX: This is wrong!  We're ignoring the DF flag configuration
+ * 

[ovs-dev] [PATCH net v4 1/1] net/sched: act_ct: Fix flow table lookup failure with no originating ifindex

2022-02-28 Thread Paul Blakey via dev
After cited commit optimizted hw insertion, flow table entries are
populated with ifindex information which was intended to only be used
for HW offload. This tuple ifindex is hashed in the flow table key, so
it must be filled for lookup to be successful. But tuple ifindex is only
relevant for the netfilter flowtables (nft), so it's not filled in
act_ct flow table lookup, resulting in lookup failure, and no SW
offload and no offload teardown for TCP connection FIN/RST packets.

To fix this, add new tc ifindex field to tuple, which will
only be used for offloading, not for lookup, as it will not be
part of the tuple hash.

Fixes: 9795ded7f924 ("net/sched: act_ct: Fill offloading tuple iifidx")
Signed-off-by: Paul Blakey 
---
Changelog:
   v3->v4:
 Accidently sent v2 as v3, Resent with correct patch as v4.
   v2->v3:
 As suggested by pablo, moved tc specific hardware offload related ifindex
 to a tc specific field, so wont be part of hash/lookup.
   v1->v2:
 Replaced flag withdx being zero at lookup().
 Fixed commit msg Fixes header subject

 include/net/netfilter/nf_flow_table.h |  6 +-
 net/netfilter/nf_flow_table_offload.c |  6 +-
 net/sched/act_ct.c| 13 +
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h 
b/include/net/netfilter/nf_flow_table.h
index a3647fadf1cc..bd59e950f4d6 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -96,6 +96,7 @@ enum flow_offload_xmit_type {
FLOW_OFFLOAD_XMIT_NEIGH,
FLOW_OFFLOAD_XMIT_XFRM,
FLOW_OFFLOAD_XMIT_DIRECT,
+   FLOW_OFFLOAD_XMIT_TC,
 };
 
 #define NF_FLOW_TABLE_ENCAP_MAX2
@@ -127,7 +128,7 @@ struct flow_offload_tuple {
struct { }  __hash;
 
u8  dir:2,
-   xmit_type:2,
+   xmit_type:3,
encap_num:2,
in_vlan_ingress:2;
u16 mtu;
@@ -142,6 +143,9 @@ struct flow_offload_tuple {
u8  h_source[ETH_ALEN];
u8  h_dest[ETH_ALEN];
} out;
+   struct {
+   u32 iifidx;
+   } tc;
};
 };
 
diff --git a/net/netfilter/nf_flow_table_offload.c 
b/net/netfilter/nf_flow_table_offload.c
index b561e0a44a45..fc4265acd9c4 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -110,7 +110,11 @@ static int nf_flow_rule_match(struct nf_flow_match *match,
nf_flow_rule_lwt_match(match, tun_info);
}
 
-   key->meta.ingress_ifindex = tuple->iifidx;
+   if (tuple->xmit_type == FLOW_OFFLOAD_XMIT_TC)
+   key->meta.ingress_ifindex = tuple->tc.iifidx;
+   else
+   key->meta.ingress_ifindex = tuple->iifidx;
+
mask->meta.ingress_ifindex = 0x;
 
if (tuple->encap_num > 0 && !(tuple->in_vlan_ingress & BIT(0)) &&
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f99247fc6468..0d779e48e78d 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -361,6 +361,13 @@ static void tcf_ct_flow_table_put(struct tcf_ct_params 
*params)
}
 }
 
+static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
+struct nf_conn_act_ct_ext *act_ct_ext, u8 dir)
+{
+   entry->tuplehash[dir].tuple.xmit_type = FLOW_OFFLOAD_XMIT_TC;
+   entry->tuplehash[dir].tuple.tc.iifidx = act_ct_ext->ifindex[dir];
+}
+
 static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
  struct nf_conn *ct,
  bool tcp)
@@ -385,10 +392,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table 
*ct_ft,
 
act_ct_ext = nf_conn_act_ct_ext_find(ct);
if (act_ct_ext) {
-   entry->tuplehash[FLOW_OFFLOAD_DIR_ORIGINAL].tuple.iifidx =
-   act_ct_ext->ifindex[IP_CT_DIR_ORIGINAL];
-   entry->tuplehash[FLOW_OFFLOAD_DIR_REPLY].tuple.iifidx =
-   act_ct_ext->ifindex[IP_CT_DIR_REPLY];
+   tcf_ct_flow_tc_ifidx(entry, act_ct_ext, 
FLOW_OFFLOAD_DIR_ORIGINAL);
+   tcf_ct_flow_tc_ifidx(entry, act_ct_ext, FLOW_OFFLOAD_DIR_REPLY);
}
 
err = flow_offload_add(_ft->nf_ft, entry);
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net v3 1/1] net/sched: act_ct: Fix flow table lookup failure with no originating ifindex

2022-02-28 Thread Paul Blakey via dev
Sent old v2 by mistake, please ignore, resending actual v3 as v4.
Thanks,
Paul.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net v3 1/1] net/sched: act_ct: Fix flow table lookup failure with no originating ifindex

2022-02-28 Thread Paul Blakey via dev
After cited commit optimizted hw insertion, flow table entries are
populated with ifindex information which was intended to only be used
for HW offload. This tuple ifindex is hashed in the flow table key, so
it must be filled for lookup to be successful. But tuple ifindex is only
relevant for the netfilter flowtables (nft), so it's not filled in
act_ct flow table lookup, resulting in lookup failure, and no SW
offload and no offload teardown for TCP connection FIN/RST packets.

To fix this, remove ifindex from hash, and allow lookup without
the ifindex. Act ct will lookup without the ifindex filled.

Fixes: 9795ded7f924 ("net/sched: act_ct: Fill offloading tuple iifidx")
Signed-off-by: Paul Blakey 
---
Changelog:
   v2->v3:
 As suggested by pablo, moved tc specific hardware offload related ifindex
 to a tc specific field, so wont be part of hash/lookup.
   v1->v2:
 Replaced flag withdx being zero at lookup().
 Fixed commit msg Fixes header subject

 include/net/netfilter/nf_flow_table.h | 3 +--
 net/netfilter/nf_flow_table_core.c| 3 +++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h 
b/include/net/netfilter/nf_flow_table.h
index a3647fadf1cc..61dc5e833557 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -114,8 +114,6 @@ struct flow_offload_tuple {
__be16  dst_port;
};
 
-   int iifidx;
-
u8  l3proto;
u8  l4proto;
struct {
@@ -126,6 +124,7 @@ struct flow_offload_tuple {
/* All members above are keys for lookups, see flow_offload_hash(). */
struct { }  __hash;
 
+   int iifidx;
u8  dir:2,
xmit_type:2,
encap_num:2,
diff --git a/net/netfilter/nf_flow_table_core.c 
b/net/netfilter/nf_flow_table_core.c
index b90eca7a2f22..01d32f08a1fd 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -257,6 +257,9 @@ static int flow_offload_hash_cmp(struct 
rhashtable_compare_arg *arg,
const struct flow_offload_tuple *tuple = arg->key;
const struct flow_offload_tuple_rhash *x = ptr;
 
+   if (tuple->iifidx && tuple->iifidx != x->tuple.iifidx)
+   return 1;
+
if (memcmp(>tuple, tuple, offsetof(struct flow_offload_tuple, 
__hash)))
return 1;
 
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net v5 1/1] openvswitch: Fix setting ipv6 fields causing hw csum failure

2022-02-23 Thread Paul Blakey via dev
Ipv6 ttl, label and tos fields are modified without first
pulling/pushing the ipv6 header, which would have updated
the hw csum (if available). This might cause csum validation
when sending the packet to the stack, as can be seen in
the trace below.

Fix this by updating skb->csum if available.

Trace resulted by ipv6 ttl dec and then sending packet
to conntrack [actions: set(ipv6(hlimit=63)),ct(zone=99)]:
[295241.900063] s_pf0vf2: hw csum failure
[295241.923191] Call Trace:
[295241.925728]  
[295241.927836]  dump_stack+0x5c/0x80
[295241.931240]  __skb_checksum_complete+0xac/0xc0
[295241.935778]  nf_conntrack_tcp_packet+0x398/0xba0 [nf_conntrack]
[295241.953030]  nf_conntrack_in+0x498/0x5e0 [nf_conntrack]
[295241.958344]  __ovs_ct_lookup+0xac/0x860 [openvswitch]
[295241.968532]  ovs_ct_execute+0x4a7/0x7c0 [openvswitch]
[295241.979167]  do_execute_actions+0x54a/0xaa0 [openvswitch]
[295242.001482]  ovs_execute_actions+0x48/0x100 [openvswitch]
[295242.006966]  ovs_dp_process_packet+0x96/0x1d0 [openvswitch]
[295242.012626]  ovs_vport_receive+0x6c/0xc0 [openvswitch]
[295242.028763]  netdev_frame_hook+0xc0/0x180 [openvswitch]
[295242.034074]  __netif_receive_skb_core+0x2ca/0xcb0
[295242.047498]  netif_receive_skb_internal+0x3e/0xc0
[295242.052291]  napi_gro_receive+0xba/0xe0
[295242.056231]  mlx5e_handle_rx_cqe_mpwrq_rep+0x12b/0x250 [mlx5_core]
[295242.062513]  mlx5e_poll_rx_cq+0xa0f/0xa30 [mlx5_core]
[295242.067669]  mlx5e_napi_poll+0xe1/0x6b0 [mlx5_core]
[295242.077958]  net_rx_action+0x149/0x3b0
[295242.086762]  __do_softirq+0xd7/0x2d6
[295242.090427]  irq_exit+0xf7/0x100
[295242.093748]  do_IRQ+0x7f/0xd0
[295242.096806]  common_interrupt+0xf/0xf
[295242.100559]  
[295242.102750] RIP: 0033:0x7f9022e88cbd
[295242.125246] RSP: 002b:7f9022282b20 EFLAGS: 0246 ORIG_RAX: 
ffda
[295242.132900] RAX: 0005 RBX: 0010 RCX: 

[295242.140120] RDX: 7f9022282ba8 RSI: 7f9022282a30 RDI: 
7f9014005c30
[295242.147337] RBP: 7f9014014d60 R08: 0020 R09: 
7f90254a8340
[295242.154557] R10: 7f9022282a28 R11: 0246 R12: 

[295242.161775] R13: 7f902308c000 R14: 002b R15: 
7f9022b71f40

Fixes: 3fdbd1ce11e5 ("openvswitch: add ipv6 'set' action")
Signed-off-by: Paul Blakey 
---
Changelog:
v5->v4:
  Replaced v4 helper with csum_replace.
  Removed extra space
v4->v3:
  Use new helper csum_block_replace
v2->v3:
  Use u8 instead of __u8
  Fix sparse warnings on conversions
v1->v2:
  Replaced push pull rcsum checksum calc with actual checksum calc

 include/net/checksum.h|  5 +
 net/openvswitch/actions.c | 46 ---
 2 files changed, 43 insertions(+), 8 deletions(-)

diff --git a/include/net/checksum.h b/include/net/checksum.h
index 5218041e5c8f..b6d9f9c4ab49 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -141,6 +141,11 @@ static inline void csum_replace2(__sum16 *sum, __be16 old, 
__be16 new)
*sum = ~csum16_add(csum16_sub(~(*sum), old), new);
 }
 
+static inline void csum_replace(__wsum *csum, __wsum old, __wsum new)
+{
+   *csum = csum_add(csum_sub(*csum, old), new);
+}
+
 struct sk_buff;
 void inet_proto_csum_replace4(__sum16 *sum, struct sk_buff *skb,
  __be32 from, __be32 to, bool pseudohdr);
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 076774034bb9..780d9e2246f3 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -423,12 +423,43 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 
l4_proto,
memcpy(addr, new_addr, sizeof(__be32[4]));
 }
 
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
+static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 
ipv6_tclass, u8 mask)
 {
+   u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
+
+   ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
+
+   if (skb->ip_summed == CHECKSUM_COMPLETE)
+   csum_replace(>csum, (__force __wsum)(old_ipv6_tclass << 
12),
+(__force __wsum)(ipv6_tclass << 12));
+
+   ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
+}
+
+static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 
mask)
+{
+   u32 ofl;
+
+   ofl = nh->flow_lbl[0] << 16 |  nh->flow_lbl[1] << 8 |  nh->flow_lbl[2];
+   fl = OVS_MASKED(ofl, fl, mask);
+
/* Bits 21-24 are always unmasked, so this retains their values. */
-   OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
-   OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
-   OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
+   nh->flow_lbl[0] = (u8)(fl >> 16);
+   nh->flow_lbl[1] = (u8)(fl >> 8);
+   nh->flow_lbl[2] = (u8)fl;
+
+   if (skb->ip_summed == CHECKSUM_COMPLETE)
+   csum_replace(>csum, (__force __wsum)htonl(ofl), 

Re: [ovs-dev] [PATCH net v2 1/1] net/sched: act_ct: Fix flow table lookup failure with no originating ifindex

2022-02-21 Thread Paul Blakey via dev




On Sun, 20 Feb 2022, Pablo Neira Ayuso wrote:

> Hi Paul,
> 
> On Sun, Feb 20, 2022 at 11:32:26AM +0200, Paul Blakey wrote:
> > After cited commit optimizted hw insertion, flow table entries are
> > populated with ifindex information which was intended to only be used
> > for HW offload. This tuple ifindex is hashed in the flow table key, so
> > it must be filled for lookup to be successful. But tuple ifindex is only
> > relevant for the netfilter flowtables (nft), so it's not filled in
> > act_ct flow table lookup, resulting in lookup failure, and no SW
> > offload and no offload teardown for TCP connection FIN/RST packets.
> > 
> > To fix this, remove ifindex from hash, and allow lookup without
> > the ifindex. Act ct will lookup without the ifindex filled.
> 
> I think it is good to add FLOW_OFFLOAD_XMIT_TC (instead of relying on
> FLOW_OFFLOAD_XMIT_UNSPEC), this allows for more tc specific fields in
> the future.
> 
> See attached patch.
> 
> Thanks.
> 

This patch will fix it, but ifindex which we fill is for the input device 
and not related to XMIT, exactly what tuple->iifidx means. We don't have 
XMIT, so I think it was ok to use  UNSPEC for now. If I use 
tuple->tc.iifidx as you suggest, tuple->iifidx  will remain unused.

I think once we have more fields that are really specific to TC, we 
can do what you sugguest, right now we can share the ifindex.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net v4 1/1] openvswitch: Fix setting ipv6 fields causing hw csum failure

2022-02-21 Thread Paul Blakey via dev




On Sun, 20 Feb 2022, David Laight wrote:

> From: Paul Blakey
> > Sent: 20 February 2022 13:21
> > 
> > Ipv6 ttl, label and tos fields are modified without first
> > pulling/pushing the ipv6 header, which would have updated
> > the hw csum (if available). This might cause csum validation
> > when sending the packet to the stack, as can be seen in
> > the trace below.
> > 
> > Fix this by updating skb->csum if available.
> ...
> > +static inline __wsum
> > +csum_block_replace(__wsum csum, __wsum old, __wsum new, int offset)
> > +{
> > +   return csum_block_add(csum_block_sub(csum, old, offset), new, offset);
> > +}
> 
> That look computationally OTT for sub 32bit adjustments.
> 
> It ought to be enough to do:
>   return csum_add(old_csum, 0xffff + new - old);
> 
> Although it will need 'tweaking' for odd aligned 24bit values.
> 
>   David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 
> 1PT, UK
> Registration No: 1397386 (Wales)
> 
> 

I'm not comfortable with manually implementing some low-level
bit fideling checksum calculation, while covering all cases.
And as you said it also doesnt have the offset param (odd aligned).
The same could be said about csum_block_add()/sub(), replace()
just uses them. I think the csum operations are more readable and
should  be inlined/optimize, so can we do it the readable way,
and then,  if needed, optimize it?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net v4 1/1] openvswitch: Fix setting ipv6 fields causing hw csum failure

2022-02-20 Thread Paul Blakey via dev
Ipv6 ttl, label and tos fields are modified without first
pulling/pushing the ipv6 header, which would have updated
the hw csum (if available). This might cause csum validation
when sending the packet to the stack, as can be seen in
the trace below.

Fix this by updating skb->csum if available.

Trace resulted by ipv6 ttl dec and then sending packet
to conntrack [actions: set(ipv6(hlimit=63)),ct(zone=99)]:
[295241.900063] s_pf0vf2: hw csum failure
[295241.923191] Call Trace:
[295241.925728]  
[295241.927836]  dump_stack+0x5c/0x80
[295241.931240]  __skb_checksum_complete+0xac/0xc0
[295241.935778]  nf_conntrack_tcp_packet+0x398/0xba0 [nf_conntrack]
[295241.953030]  nf_conntrack_in+0x498/0x5e0 [nf_conntrack]
[295241.958344]  __ovs_ct_lookup+0xac/0x860 [openvswitch]
[295241.968532]  ovs_ct_execute+0x4a7/0x7c0 [openvswitch]
[295241.979167]  do_execute_actions+0x54a/0xaa0 [openvswitch]
[295242.001482]  ovs_execute_actions+0x48/0x100 [openvswitch]
[295242.006966]  ovs_dp_process_packet+0x96/0x1d0 [openvswitch]
[295242.012626]  ovs_vport_receive+0x6c/0xc0 [openvswitch]
[295242.028763]  netdev_frame_hook+0xc0/0x180 [openvswitch]
[295242.034074]  __netif_receive_skb_core+0x2ca/0xcb0
[295242.047498]  netif_receive_skb_internal+0x3e/0xc0
[295242.052291]  napi_gro_receive+0xba/0xe0
[295242.056231]  mlx5e_handle_rx_cqe_mpwrq_rep+0x12b/0x250 [mlx5_core]
[295242.062513]  mlx5e_poll_rx_cq+0xa0f/0xa30 [mlx5_core]
[295242.067669]  mlx5e_napi_poll+0xe1/0x6b0 [mlx5_core]
[295242.077958]  net_rx_action+0x149/0x3b0
[295242.086762]  __do_softirq+0xd7/0x2d6
[295242.090427]  irq_exit+0xf7/0x100
[295242.093748]  do_IRQ+0x7f/0xd0
[295242.096806]  common_interrupt+0xf/0xf
[295242.100559]  
[295242.102750] RIP: 0033:0x7f9022e88cbd
[295242.125246] RSP: 002b:7f9022282b20 EFLAGS: 0246 ORIG_RAX: 
ffda
[295242.132900] RAX: 0005 RBX: 0010 RCX: 

[295242.140120] RDX: 7f9022282ba8 RSI: 7f9022282a30 RDI: 
7f9014005c30
[295242.147337] RBP: 7f9014014d60 R08: 0020 R09: 
7f90254a8340
[295242.154557] R10: 7f9022282a28 R11: 0246 R12: 

[295242.161775] R13: 7f902308c000 R14: 002b R15: 
7f9022b71f40

Fixes: 3fdbd1ce11e5 ("openvswitch: add ipv6 'set' action")
Signed-off-by: Paul Blakey 
---
 Changelog:
v4->v3:
  Use new helper csum_block_replace
v2->v3:
  Use u8 instead of __u8
  Fix sparse warnings on conversions
v1->v2:
  Replaced push pull rcsum checksum calc with actual checksum calc

 include/net/checksum.h|  7 ++
 net/openvswitch/actions.c | 47 ---
 2 files changed, 46 insertions(+), 8 deletions(-)

diff --git a/include/net/checksum.h b/include/net/checksum.h
index 5218041e5c8f..ce39e47b2881 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -106,6 +106,12 @@ csum_block_sub(__wsum csum, __wsum csum2, int offset)
return csum_block_add(csum, ~csum2, offset);
 }
 
+static inline __wsum
+csum_block_replace(__wsum csum, __wsum old, __wsum new, int offset)
+{
+   return csum_block_add(csum_block_sub(csum, old, offset), new, offset);
+}
+
 static inline __wsum csum_unfold(__sum16 n)
 {
return (__force __wsum)n;
@@ -184,4 +190,5 @@ static inline __wsum wsum_negate(__wsum val)
 {
return (__force __wsum)-((__force u32)val);
 }
+
 #endif
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 076774034bb9..1bc9037e4b9e 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -423,12 +423,44 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 
l4_proto,
memcpy(addr, new_addr, sizeof(__be32[4]));
 }
 
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
+static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 
ipv6_tclass, u8 mask)
 {
+   u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
+
+   ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
+
+   if (skb->ip_summed == CHECKSUM_COMPLETE)
+   skb->csum = csum_block_replace(skb->csum, (__force 
__wsum)(old_ipv6_tclass << 4),
+  (__force __wsum)(ipv6_tclass << 
4), 1);
+
+   ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
+}
+
+static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 
mask)
+{
+   u32 old_fl;
+
+   old_fl = nh->flow_lbl[0] << 16 |  nh->flow_lbl[1] << 8 |  
nh->flow_lbl[2];
+   fl = OVS_MASKED(old_fl, fl, mask);
+
/* Bits 21-24 are always unmasked, so this retains their values. */
-   OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
-   OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
-   OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
+   nh->flow_lbl[0] = (u8)(fl >> 16);
+   nh->flow_lbl[1] = (u8)(fl >> 8);
+   nh->flow_lbl[2] = (u8)fl;
+
+   if (skb->ip_summed == CHECKSUM_COMPLETE)
+  

[ovs-dev] [PATCH net v2 1/1] net/sched: act_ct: Fix flow table lookup failure with no originating ifindex

2022-02-20 Thread Paul Blakey via dev
After cited commit optimizted hw insertion, flow table entries are
populated with ifindex information which was intended to only be used
for HW offload. This tuple ifindex is hashed in the flow table key, so
it must be filled for lookup to be successful. But tuple ifindex is only
relevant for the netfilter flowtables (nft), so it's not filled in
act_ct flow table lookup, resulting in lookup failure, and no SW
offload and no offload teardown for TCP connection FIN/RST packets.

To fix this, remove ifindex from hash, and allow lookup without
the ifindex. Act ct will lookup without the ifindex filled.

Fixes: 9795ded7f924 ("net/sched: act_ct: Fill offloading tuple iifidx")
Signed-off-by: Paul Blakey 
---
 Changelog:
v1->v2:
Replaced flag with iifidx being zero at lookup().
Fixed commit msg Fixes header subject

 include/net/netfilter/nf_flow_table.h | 3 +--
 net/netfilter/nf_flow_table_core.c| 3 +++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h 
b/include/net/netfilter/nf_flow_table.h
index a3647fadf1cc..61dc5e833557 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -114,8 +114,6 @@ struct flow_offload_tuple {
__be16  dst_port;
};
 
-   int iifidx;
-
u8  l3proto;
u8  l4proto;
struct {
@@ -126,6 +124,7 @@ struct flow_offload_tuple {
/* All members above are keys for lookups, see flow_offload_hash(). */
struct { }  __hash;
 
+   int iifidx;
u8  dir:2,
xmit_type:2,
encap_num:2,
diff --git a/net/netfilter/nf_flow_table_core.c 
b/net/netfilter/nf_flow_table_core.c
index b90eca7a2f22..01d32f08a1fd 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -257,6 +257,9 @@ static int flow_offload_hash_cmp(struct 
rhashtable_compare_arg *arg,
const struct flow_offload_tuple *tuple = arg->key;
const struct flow_offload_tuple_rhash *x = ptr;
 
+   if (tuple->iifidx && tuple->iifidx != x->tuple.iifidx)
+   return 1;
+
if (memcmp(>tuple, tuple, offsetof(struct flow_offload_tuple, 
__hash)))
return 1;
 
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net 1/1] net/sched: act_ct: Fix flow table lookup failure with no originating ifindex

2022-02-20 Thread Paul Blakey via dev



On Fri, 18 Feb 2022, Pablo Neira Ayuso wrote:

> On Fri, Feb 18, 2022 at 12:55:07AM +0100, Pablo Neira Ayuso wrote:
> > On Thu, Feb 17, 2022 at 08:27:08PM -0300, Marcelo Ricardo Leitner wrote:
> > > On Thu, Feb 17, 2022 at 02:55:27PM +0100, Pablo Neira Ayuso wrote:
> > > > On Thu, Feb 17, 2022 at 11:34:24AM +0200, Paul Blakey wrote:
> > > > > After cited commit optimizted hw insertion, flow table entries are
> > > > > populated with ifindex information which was intended to only be used
> > > > > for HW offload. This tuple ifindex is hashed in the flow table key, so
> > > > > it must be filled for lookup to be successful. But tuple ifindex is 
> > > > > only
> > > > > relevant for the netfilter flowtables (nft), so it's not filled in
> > > > > act_ct flow table lookup, resulting in lookup failure, and no SW
> > > > > offload and no offload teardown for TCP connection FIN/RST packets.
> > > > > 
> > > > > To fix this, allow flow tables that don't hash the ifindex.
> > > > > Netfilter flow tables will keep using ifindex for a more specific
> > > > > offload, while act_ct will not.
> > > > 
> > > > Using iif == zero should be enough to specify not set?
> > > 
> > > You mean, when searching, if search input iif == zero, to simply not
> > > check it? That seems dangerous somehow.
> > 
> > dev_new_index() does not allocate ifindex as zero.
> > 
> > Anyway, @Paul: could you add a tc_ifidx field instead in the union
> > right after __hash instead to fix 9795ded7f924?
> 
> I mean this incomplete patch below:
> 
> diff --git a/include/net/netfilter/nf_flow_table.h 
> b/include/net/netfilter/nf_flow_table.h
> index a3647fadf1cc..d4fa4f716f68 100644
> --- a/include/net/netfilter/nf_flow_table.h
> +++ b/include/net/netfilter/nf_flow_table.h
> @@ -142,6 +142,7 @@ struct flow_offload_tuple {
> u8  h_source[ETH_ALEN];
> u8  h_dest[ETH_ALEN];
> } out;
> +   u32 tc_ifidx;
> };
>  };
> 
> You will need to update nf_flow_rule_match() to set key->meta.ingress_ifindex 
> to
> use tc_ifidx if it is set to non-zero value.
> 

I  understand how it could fix the original issue, but I don't think this
is better, because it makes tuple less generic. What you suggested with 
using 0 to avoid needing the new flag is good enough for me, and is 
cleaner in my opinion.

I'll send the == 0 one as V2 for chance you agree, and if you want to 
change to this, I won't mind sending it as V3.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net 1/1] net/sched: act_ct: Fix flow table lookup failure with no originating ifindex

2022-02-17 Thread Paul Blakey via dev
After cited commit optimizted hw insertion, flow table entries are
populated with ifindex information which was intended to only be used
for HW offload. This tuple ifindex is hashed in the flow table key, so
it must be filled for lookup to be successful. But tuple ifindex is only
relevant for the netfilter flowtables (nft), so it's not filled in
act_ct flow table lookup, resulting in lookup failure, and no SW
offload and no offload teardown for TCP connection FIN/RST packets.

To fix this, allow flow tables that don't hash the ifindex.
Netfilter flow tables will keep using ifindex for a more specific
offload, while act_ct will not.

Fixes: 9795ded7f924 ("net/sched: act_ct: Fill offloading tupledx")
Signed-off-by: Paul Blakey 
---
 include/net/netfilter/nf_flow_table.h | 8 
 net/netfilter/nf_flow_table_core.c| 6 ++
 net/sched/act_ct.c| 3 ++-
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h 
b/include/net/netfilter/nf_flow_table.h
index a3647fadf1cc..9b474414a936 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -64,8 +64,9 @@ struct nf_flowtable_type {
 };
 
 enum nf_flowtable_flags {
-   NF_FLOWTABLE_HW_OFFLOAD = 0x1,  /* NFT_FLOWTABLE_HW_OFFLOAD */
-   NF_FLOWTABLE_COUNTER= 0x2,  /* NFT_FLOWTABLE_COUNTER */
+   NF_FLOWTABLE_HW_OFFLOAD = 0x1,  /* 
NFT_FLOWTABLE_HW_OFFLOAD */
+   NF_FLOWTABLE_COUNTER= 0x2,  /* 
NFT_FLOWTABLE_COUNTER */
+   NF_FLOWTABLE_NO_IFINDEX_FILTERING   = 0x4,  /* Only used by act_ct 
*/
 };
 
 struct nf_flowtable {
@@ -114,8 +115,6 @@ struct flow_offload_tuple {
__be16  dst_port;
};
 
-   int iifidx;
-
u8  l3proto;
u8  l4proto;
struct {
@@ -126,6 +125,7 @@ struct flow_offload_tuple {
/* All members above are keys for lookups, see flow_offload_hash(). */
struct { }  __hash;
 
+   int iifidx;
u8  dir:2,
xmit_type:2,
encap_num:2,
diff --git a/net/netfilter/nf_flow_table_core.c 
b/net/netfilter/nf_flow_table_core.c
index b90eca7a2f22..f0cb2c7075c0 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -254,9 +254,15 @@ static u32 flow_offload_hash_obj(const void *data, u32 
len, u32 seed)
 static int flow_offload_hash_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
 {
+   const struct nf_flowtable *flow_table = container_of(arg->ht, struct 
nf_flowtable,
+rhashtable);
const struct flow_offload_tuple *tuple = arg->key;
const struct flow_offload_tuple_rhash *x = ptr;
 
+   if (!(flow_table->flags & NF_FLOWTABLE_NO_IFINDEX_FILTERING) &&
+   x->tuple.iifidx != tuple->iifidx)
+   return 1;
+
if (memcmp(>tuple, tuple, offsetof(struct flow_offload_tuple, 
__hash)))
return 1;
 
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f99247fc6468..22cd32ec9889 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -305,7 +305,8 @@ static int tcf_ct_flow_table_get(struct tcf_ct_params 
*params)
 
ct_ft->nf_ft.type = _ct;
ct_ft->nf_ft.flags |= NF_FLOWTABLE_HW_OFFLOAD |
- NF_FLOWTABLE_COUNTER;
+ NF_FLOWTABLE_COUNTER |
+ NF_FLOWTABLE_NO_IFINDEX_FILTERING;
err = nf_flow_table_init(_ft->nf_ft);
if (err)
goto err_init;
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net 1/1] net/sched: act_ct: Fix flow table lookup after ct clear or switching zones

2022-02-17 Thread Paul Blakey via dev
Flow table lookup is skipped if packet either went through ct clear
action (which set the IP_CT_UNTRACKED flag on the packet), or while
switching zones and there is already a connection associated with
the packet. This will result in no SW offload of the connection,
and the and connection not being removed from flow table with
TCP teardown (fin/rst packet).

To fix the above, remove these unneccary checks in flow
table lookup.

Fixes: 46475bb20f4b ("net/sched: act_ct: Software offload of established flows")
Signed-off-by: Paul Blakey 
---
 net/sched/act_ct.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f99247fc6468..33e70d60f0bf 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -533,11 +533,6 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params 
*p,
struct nf_conn *ct;
u8 dir;
 
-   /* Previously seen or loopback */
-   ct = nf_ct_get(skb, );
-   if ((ct && !nf_ct_is_template(ct)) || ctinfo == IP_CT_UNTRACKED)
-   return false;
-
switch (family) {
case NFPROTO_IPV4:
if (!tcf_ct_flow_table_fill_tuple_ipv4(skb, , ))
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net v3 1/1] openvswitch: Fix setting ipv6 fields causing hw csum failure

2022-02-16 Thread Paul Blakey via dev
Ipv6 ttl, label and tos fields are modified without first
pulling/pushing the ipv6 header, which would have updated
the hw csum (if available). This might cause csum validation
when sending the packet to the stack, as can be seen in
the trace below.

Fix this by updating skb->csum if available.

Trace resulted by ipv6 ttl dec and then sending packet
to conntrack [actions: set(ipv6(hlimit=63)),ct(zone=99)]:
[295241.900063] s_pf0vf2: hw csum failure
[295241.923191] Call Trace:
[295241.925728]  
[295241.927836]  dump_stack+0x5c/0x80
[295241.931240]  __skb_checksum_complete+0xac/0xc0
[295241.935778]  nf_conntrack_tcp_packet+0x398/0xba0 [nf_conntrack]
[295241.953030]  nf_conntrack_in+0x498/0x5e0 [nf_conntrack]
[295241.958344]  __ovs_ct_lookup+0xac/0x860 [openvswitch]
[295241.968532]  ovs_ct_execute+0x4a7/0x7c0 [openvswitch]
[295241.979167]  do_execute_actions+0x54a/0xaa0 [openvswitch]
[295242.001482]  ovs_execute_actions+0x48/0x100 [openvswitch]
[295242.006966]  ovs_dp_process_packet+0x96/0x1d0 [openvswitch]
[295242.012626]  ovs_vport_receive+0x6c/0xc0 [openvswitch]
[295242.028763]  netdev_frame_hook+0xc0/0x180 [openvswitch]
[295242.034074]  __netif_receive_skb_core+0x2ca/0xcb0
[295242.047498]  netif_receive_skb_internal+0x3e/0xc0
[295242.052291]  napi_gro_receive+0xba/0xe0
[295242.056231]  mlx5e_handle_rx_cqe_mpwrq_rep+0x12b/0x250 [mlx5_core]
[295242.062513]  mlx5e_poll_rx_cq+0xa0f/0xa30 [mlx5_core]
[295242.067669]  mlx5e_napi_poll+0xe1/0x6b0 [mlx5_core]
[295242.077958]  net_rx_action+0x149/0x3b0
[295242.086762]  __do_softirq+0xd7/0x2d6
[295242.090427]  irq_exit+0xf7/0x100
[295242.093748]  do_IRQ+0x7f/0xd0
[295242.096806]  common_interrupt+0xf/0xf
[295242.100559]  
[295242.102750] RIP: 0033:0x7f9022e88cbd
[295242.125246] RSP: 002b:7f9022282b20 EFLAGS: 0246 ORIG_RAX: 
ffda
[295242.132900] RAX: 0005 RBX: 0010 RCX: 

[295242.140120] RDX: 7f9022282ba8 RSI: 7f9022282a30 RDI: 
7f9014005c30
[295242.147337] RBP: 7f9014014d60 R08: 0020 R09: 
7f90254a8340
[295242.154557] R10: 7f9022282a28 R11: 0246 R12: 

[295242.161775] R13: 7f902308c000 R14: 002b R15: 
7f9022b71f40

Fixes: 3fdbd1ce11e5 ("openvswitch: add ipv6 'set' action")
Signed-off-by: Paul Blakey 
---
 Changelog:
v2->v3:
  Use u8 instead of __u8
  Fix sparse warnings on conversions
v1->v2:
  Replaced push pull rcsum checksum calc with actual checksum calc

 net/openvswitch/actions.c | 51 +--
 1 file changed, 43 insertions(+), 8 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 076774034bb9..02c63308433a 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -423,12 +423,48 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 
l4_proto,
memcpy(addr, new_addr, sizeof(__be32[4]));
 }
 
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
+static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, u8 
ipv6_tclass, u8 mask)
 {
+   u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
+
+   ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
+
+   if (skb->ip_summed == CHECKSUM_COMPLETE)
+   skb->csum =
+   ~csum_block_add(csum_block_sub(~skb->csum,
+  (__force __wsum) 
(ipv6_tclass << 4), 1),
+   (__force __wsum) (old_ipv6_tclass << 
4), 1);
+
+   ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
+}
+
+static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 
mask)
+{
+   u32 old_fl;
+
+   old_fl = nh->flow_lbl[0] << 16 |  nh->flow_lbl[1] << 8 |  
nh->flow_lbl[2];
+   fl = OVS_MASKED(old_fl, fl, mask);
+
/* Bits 21-24 are always unmasked, so this retains their values. */
-   OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
-   OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
-   OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
+   nh->flow_lbl[0] = (u8)(fl >> 16);
+   nh->flow_lbl[1] = (u8)(fl >> 8);
+   nh->flow_lbl[2] = (u8)fl;
+
+   if (skb->ip_summed == CHECKSUM_COMPLETE)
+   skb->csum = ~csum_block_add(csum_block_sub(~skb->csum,
+  (__force __wsum) 
htonl(fl), 0),
+   (__force __wsum) htonl(old_fl), 0);
+}
+
+static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, 
u8 mask)
+{
+   new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask);
+
+   if (skb->ip_summed == CHECKSUM_COMPLETE)
+   skb->csum = ~csum_block_add(csum_block_sub(~skb->csum, (__force 
__wsum) new_ttl,
+  1),
+   (__force __wsum) nh->hop_limit, 1);
+

[ovs-dev] [PATCH net v2 1/1] openvswitch: Fix setting ipv6 fields causing hw csum failure

2022-02-14 Thread Paul Blakey via dev
Ipv6 ttl, label and tos fields are modified without first
pulling/pushing the ipv6 header, which would have updated
the hw csum (if available). This might cause csum validation
when sending the packet to the stack, as can be seen in
the trace below.

Fix this by updating skb->csum if available.

Trace resulted by ipv6 ttl dec and then sending packet
to conntrack [actions: set(ipv6(hlimit=63)),ct(zone=99)]:
[295241.900063] s_pf0vf2: hw csum failure
[295241.923191] Call Trace:
[295241.925728]  
[295241.927836]  dump_stack+0x5c/0x80
[295241.931240]  __skb_checksum_complete+0xac/0xc0
[295241.935778]  nf_conntrack_tcp_packet+0x398/0xba0 [nf_conntrack]
[295241.953030]  nf_conntrack_in+0x498/0x5e0 [nf_conntrack]
[295241.958344]  __ovs_ct_lookup+0xac/0x860 [openvswitch]
[295241.968532]  ovs_ct_execute+0x4a7/0x7c0 [openvswitch]
[295241.979167]  do_execute_actions+0x54a/0xaa0 [openvswitch]
[295242.001482]  ovs_execute_actions+0x48/0x100 [openvswitch]
[295242.006966]  ovs_dp_process_packet+0x96/0x1d0 [openvswitch]
[295242.012626]  ovs_vport_receive+0x6c/0xc0 [openvswitch]
[295242.028763]  netdev_frame_hook+0xc0/0x180 [openvswitch]
[295242.034074]  __netif_receive_skb_core+0x2ca/0xcb0
[295242.047498]  netif_receive_skb_internal+0x3e/0xc0
[295242.052291]  napi_gro_receive+0xba/0xe0
[295242.056231]  mlx5e_handle_rx_cqe_mpwrq_rep+0x12b/0x250 [mlx5_core]
[295242.062513]  mlx5e_poll_rx_cq+0xa0f/0xa30 [mlx5_core]
[295242.067669]  mlx5e_napi_poll+0xe1/0x6b0 [mlx5_core]
[295242.077958]  net_rx_action+0x149/0x3b0
[295242.086762]  __do_softirq+0xd7/0x2d6
[295242.090427]  irq_exit+0xf7/0x100
[295242.093748]  do_IRQ+0x7f/0xd0
[295242.096806]  common_interrupt+0xf/0xf
[295242.100559]  
[295242.102750] RIP: 0033:0x7f9022e88cbd
[295242.125246] RSP: 002b:7f9022282b20 EFLAGS: 0246 ORIG_RAX: 
ffda
[295242.132900] RAX: 0005 RBX: 0010 RCX: 

[295242.140120] RDX: 7f9022282ba8 RSI: 7f9022282a30 RDI: 
7f9014005c30
[295242.147337] RBP: 7f9014014d60 R08: 0020 R09: 
7f90254a8340
[295242.154557] R10: 7f9022282a28 R11: 0246 R12: 

[295242.161775] R13: 7f902308c000 R14: 002b R15: 
7f9022b71f40

Fixes: 3fdbd1ce11e5 ("openvswitch: add ipv6 'set' action")
Signed-off-by: Paul Blakey 
---
 Changelog:
v1->v2:
  Replaced push pull rcsum checksum calc with actual checksum calc
 
 net/openvswitch/actions.c | 47 ---
 1 file changed, 39 insertions(+), 8 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 076774034bb9..3725801cb040 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -423,12 +423,44 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 
l4_proto,
memcpy(addr, new_addr, sizeof(__be32[4]));
 }
 
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
+static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, __u8 
ipv6_tclass, __u8 mask)
 {
+   __u8 old_ipv6_tclass = ipv6_get_dsfield(nh);
+
+   ipv6_tclass = OVS_MASKED(old_ipv6_tclass, ipv6_tclass, mask);
+
+   if (skb->ip_summed == CHECKSUM_COMPLETE)
+   skb->csum = ~csum_block_add(csum_block_sub(~skb->csum, 
ipv6_tclass << 4, 1),
+   old_ipv6_tclass << 4, 1);
+
+   ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
+}
+
+static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 
mask)
+{
+   u32 old_fl;
+
+   old_fl = nh->flow_lbl[0] << 16 |  nh->flow_lbl[1] << 8 |  
nh->flow_lbl[2];
+   fl = OVS_MASKED(old_fl, fl, mask);
+
/* Bits 21-24 are always unmasked, so this retains their values. */
-   OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
-   OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
-   OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
+   nh->flow_lbl[0] = (u8)(fl >> 16);
+   nh->flow_lbl[1] = (u8)(fl >> 8);
+   nh->flow_lbl[2] = (u8)fl;
+
+   if (skb->ip_summed == CHECKSUM_COMPLETE)
+   skb->csum = ~csum_block_add(csum_block_sub(~skb->csum, 
htonl(fl), 0),
+   htonl(old_fl), 0);
+}
+
+static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, 
u8 mask)
+{
+   new_ttl = OVS_MASKED(nh->hop_limit, new_ttl, mask);
+
+   if (skb->ip_summed == CHECKSUM_COMPLETE)
+   skb->csum = ~csum_block_add(csum_block_sub(~skb->csum, new_ttl, 
1),
+   nh->hop_limit, 1);
+   nh->hop_limit = new_ttl;
 }
 
 static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
@@ -546,18 +578,17 @@ static int set_ipv6(struct sk_buff *skb, struct 
sw_flow_key *flow_key,
}
}
if (mask->ipv6_tclass) {
-   ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
+   

Re: [ovs-dev] [PATCH net 1/1] openvswitch: Fix setting ipv6 fields causing hw csum failure

2022-02-12 Thread Paul Blakey via dev



On Thu, 10 Feb 2022, Jakub Kicinski wrote:

> On Thu, 10 Feb 2022 10:53:24 +0200 Paul Blakey wrote:
> > > The calls seem a little heavy for single byte replacements.
> > > Can you instead add a helper based on csum_replace4() maybe?
> > > 
> > > BTW doesn't pedit have the same problem?
> > 
> > I don't think they are heavier then csum_replace4,
> 
> csum_replace4 is a handful of instructions all of which will be inlined.
> csum_partial() is a function call and handles variable lengths.
> 
> > but they are more bulletproof in my opinion, since they handle both
> > the COMPLETE and PARTIAL csum cases (in __skb_postpull_rcsum())
> 
> Yes, that's why I said "add a helper based on", a skb helper which
> checks the csum type of the packet but instead of calling csum_partial
> for no reason does the adjustment directly.

Then sure, I will do that and send v2.

> 
> > and resemble what editing of the packet should have done - pull the
> > header, edit, and then push it back.
> 
> That's not what this code is doing, so the argument does not stand IMO.
> 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net 1/1] openvswitch: Fix setting ipv6 fields causing hw csum failure

2022-02-10 Thread Paul Blakey via dev



On Wed, 9 Feb 2022, Jakub Kicinski wrote:

> On Mon, 7 Feb 2022 16:41:01 +0200 Paul Blakey wrote:
> > Ipv6 ttl, label and tos fields are modified without first
> > pulling/pushing the ipv6 header, which would have updated
> > the hw csum (if available). This might cause csum validation
> > when sending the packet to the stack, as can be seen in
> > the trace below.
> > 
> > Fix this by calling postpush/postpull checksum calculation
> > which will update the hw csum if needed.
> 
> > -static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
> > +static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, __u8 
> > ipv6_tclass, __u8 mask)
> >  {
> > +   skb_postpull_rcsum(skb, nh, 4);
> > +
> > +   ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
> > +
> > +   skb_postpush_rcsum(skb, nh, 4);
> > +}
> 
> The calls seem a little heavy for single byte replacements.
> Can you instead add a helper based on csum_replace4() maybe?
> 
> BTW doesn't pedit have the same problem?
> 


I don't think they are heavier then csum_replace4, but they are more
bulletproof in my opinion, since they handle both the COMPLETE and PARTIAL
csum cases (in __skb_postpull_rcsum()) and resemble what editing of the 
packet should have done - pull the header, edit, and then push it back.

RE pedit, not really the issue here, but i guess pedit should be followed 
by act_csum with the relevant checksum fields (as we do when offloading 
ovs set() action to tc pedit + csum actions).
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net 1/1] openvswitch: Fix setting ipv6 fields causing hw csum failure

2022-02-07 Thread Paul Blakey via dev
Ipv6 ttl, label and tos fields are modified without first
pulling/pushing the ipv6 header, which would have updated
the hw csum (if available). This might cause csum validation
when sending the packet to the stack, as can be seen in
the trace below.

Fix this by calling postpush/postpull checksum calculation
which will update the hw csum if needed.

Trace resulted by ipv6 ttl dec and then sending packet
to conntrack [actions: set(ipv6(hlimit=63)),ct(zone=99)]:
[295241.900063] s_pf0vf2: hw csum failure
[295241.923191] Call Trace:
[295241.925728]  
[295241.927836]  dump_stack+0x5c/0x80
[295241.931240]  __skb_checksum_complete+0xac/0xc0
[295241.935778]  nf_conntrack_tcp_packet+0x398/0xba0 [nf_conntrack]
[295241.953030]  nf_conntrack_in+0x498/0x5e0 [nf_conntrack]
[295241.958344]  __ovs_ct_lookup+0xac/0x860 [openvswitch]
[295241.968532]  ovs_ct_execute+0x4a7/0x7c0 [openvswitch]
[295241.979167]  do_execute_actions+0x54a/0xaa0 [openvswitch]
[295242.001482]  ovs_execute_actions+0x48/0x100 [openvswitch]
[295242.006966]  ovs_dp_process_packet+0x96/0x1d0 [openvswitch]
[295242.012626]  ovs_vport_receive+0x6c/0xc0 [openvswitch]
[295242.028763]  netdev_frame_hook+0xc0/0x180 [openvswitch]
[295242.034074]  __netif_receive_skb_core+0x2ca/0xcb0
[295242.047498]  netif_receive_skb_internal+0x3e/0xc0
[295242.052291]  napi_gro_receive+0xba/0xe0
[295242.056231]  mlx5e_handle_rx_cqe_mpwrq_rep+0x12b/0x250 [mlx5_core]
[295242.062513]  mlx5e_poll_rx_cq+0xa0f/0xa30 [mlx5_core]
[295242.067669]  mlx5e_napi_poll+0xe1/0x6b0 [mlx5_core]
[295242.077958]  net_rx_action+0x149/0x3b0
[295242.086762]  __do_softirq+0xd7/0x2d6
[295242.090427]  irq_exit+0xf7/0x100
[295242.093748]  do_IRQ+0x7f/0xd0
[295242.096806]  common_interrupt+0xf/0xf
[295242.100559]  
[295242.102750] RIP: 0033:0x7f9022e88cbd
[295242.125246] RSP: 002b:7f9022282b20 EFLAGS: 0246 ORIG_RAX: 
ffda
[295242.132900] RAX: 0005 RBX: 0010 RCX: 

[295242.140120] RDX: 7f9022282ba8 RSI: 7f9022282a30 RDI: 
7f9014005c30
[295242.147337] RBP: 7f9014014d60 R08: 0020 R09: 
7f90254a8340
[295242.154557] R10: 7f9022282a28 R11: 0246 R12: 

[295242.161775] R13: 7f902308c000 R14: 002b R15: 
7f9022b71f40

Fixes: 3fdbd1ce11e5 ("openvswitch: add ipv6 'set' action")
Signed-off-by: Paul Blakey 
---
 net/openvswitch/actions.c | 29 -
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 076774034bb9..77b99dc82f95 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -423,12 +423,32 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 
l4_proto,
memcpy(addr, new_addr, sizeof(__be32[4]));
 }
 
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
+static void set_ipv6_dsfield(struct sk_buff *skb, struct ipv6hdr *nh, __u8 
ipv6_tclass, __u8 mask)
 {
+   skb_postpull_rcsum(skb, nh, 4);
+
+   ipv6_change_dsfield(nh, ~mask, ipv6_tclass);
+
+   skb_postpush_rcsum(skb, nh, 4);
+}
+
+static void set_ipv6_fl(struct sk_buff *skb, struct ipv6hdr *nh, u32 fl, u32 
mask)
+{
+   skb_postpull_rcsum(skb, nh, 4);
+
/* Bits 21-24 are always unmasked, so this retains their values. */
OVS_SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
OVS_SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
OVS_SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
+
+   skb_postpush_rcsum(skb, nh, 4);
+}
+
+static void set_ipv6_ttl(struct sk_buff *skb, struct ipv6hdr *nh, u8 new_ttl, 
u8 mask)
+{
+   __skb_postpull_rcsum(skb, >hop_limit, sizeof(nh->hop_limit), 1);
+   OVS_SET_MASKED(nh->hop_limit, new_ttl, mask);
+   __skb_postpush_rcsum(skb, >hop_limit, sizeof(nh->hop_limit), 1);
 }
 
 static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
@@ -546,18 +566,17 @@ static int set_ipv6(struct sk_buff *skb, struct 
sw_flow_key *flow_key,
}
}
if (mask->ipv6_tclass) {
-   ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
+   set_ipv6_dsfield(skb, nh, key->ipv6_tclass, mask->ipv6_tclass);
flow_key->ip.tos = ipv6_get_dsfield(nh);
}
if (mask->ipv6_label) {
-   set_ipv6_fl(nh, ntohl(key->ipv6_label),
+   set_ipv6_fl(skb, nh, ntohl(key->ipv6_label),
ntohl(mask->ipv6_label));
flow_key->ipv6.label =
*(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
}
if (mask->ipv6_hlimit) {
-   OVS_SET_MASKED(nh->hop_limit, key->ipv6_hlimit,
-  mask->ipv6_hlimit);
+   set_ipv6_ttl(skb, nh, key->ipv6_hlimit, mask->ipv6_hlimit);
flow_key->ip.ttl = nh->hop_limit;
}
return 0;
-- 
2.30.1


[ovs-dev] [PATCH net-next v3 1/1] net/sched: Enable tc skb ext allocation on chain miss only when needed

2022-02-03 Thread Paul Blakey via dev
Currently tc skb extension is used to send miss info from
tc to ovs datapath module, and driver to tc. For the tc to ovs
miss it is currently always allocated even if it will not
be used by ovs datapath (as it depends on a requested feature).

Export the static key which is used by openvswitch module to
guard this code path as well, so it will be skipped if ovs
datapath doesn't need it. Enable this code path once
ovs datapath needs it.

Signed-off-by: Paul Blakey 
---
 Changelog:
   v1->v2:
 Added ifdef on exports so inline stubs won't be duplicated in !CLS_ACT case
   v2->v3:
 Renamed funcs 's/tc_skb_ext_tc_ovs/tc_skb_ext_tc/'

 include/net/pkt_cls.h  | 11 ++
 net/openvswitch/datapath.c | 18 +--
 net/openvswitch/datapath.h |  2 --
 net/openvswitch/flow.c |  3 ++-
 net/sched/cls_api.c| 45 +++---
 5 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 676cb8ea9e15..a3b57a93228a 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -1028,4 +1028,15 @@ struct tc_fifo_qopt_offload {
};
 };
 
+#ifdef CONFIG_NET_CLS_ACT
+DECLARE_STATIC_KEY_FALSE(tc_skb_ext_tc);
+void tc_skb_ext_tc_enable(void);
+void tc_skb_ext_tc_disable(void);
+#define tc_skb_ext_tc_enabled() static_branch_unlikely(_skb_ext_tc)
+#else /* CONFIG_NET_CLS_ACT */
+static inline void tc_skb_ext_tc_enable(void) { }
+static inline void tc_skb_ext_tc_disable(void) { }
+#define tc_skb_ext_tc_enabled() false
+#endif
+
 #endif
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 67ad08320886..7e8a39a35627 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "datapath.h"
 #include "flow.h"
@@ -1601,8 +1602,6 @@ static void ovs_dp_reset_user_features(struct sk_buff 
*skb,
dp->user_features = 0;
 }
 
-DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
 static int ovs_dp_set_upcall_portids(struct datapath *dp,
  const struct nlattr *ids)
 {
@@ -1657,7 +1656,7 @@ u32 ovs_dp_get_upcall_portid(const struct datapath *dp, 
uint32_t cpu_id)
 
 static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
 {
-   u32 user_features = 0;
+   u32 user_features = 0, old_features = dp->user_features;
int err;
 
if (a[OVS_DP_ATTR_USER_FEATURES]) {
@@ -1696,10 +1695,12 @@ static int ovs_dp_change(struct datapath *dp, struct 
nlattr *a[])
return err;
}
 
-   if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
-   static_branch_enable(_recirc_sharing_support);
-   else
-   static_branch_disable(_recirc_sharing_support);
+   if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+   !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
+   tc_skb_ext_tc_enable();
+   else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+(old_features & OVS_DP_F_TC_RECIRC_SHARING))
+   tc_skb_ext_tc_disable();
 
return 0;
 }
@@ -1839,6 +1840,9 @@ static void __dp_destroy(struct datapath *dp)
struct flow_table *table = >table;
int i;
 
+   if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+   tc_skb_ext_tc_disable();
+
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
struct vport *vport;
struct hlist_node *n;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index fcfe6cb46441..0cd29971a907 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -253,8 +253,6 @@ static inline struct datapath *get_dp(struct net *net, int 
dp_ifindex)
 extern struct notifier_block ovs_dp_device_notifier;
 extern struct genl_family dp_vport_genl_family;
 
-DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
 void ovs_dp_detach_port(struct vport *);
 int ovs_dp_upcall(struct datapath *, struct sk_buff *,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 02096f2ec678..f6cd24fd530c 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "conntrack.h"
@@ -895,7 +896,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
key->mac_proto = res;
 
 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
-   if (static_branch_unlikely(_recirc_sharing_support)) {
+   if (tc_skb_ext_tc_enabled()) {
tc_ext = skb_ext_find(skb, TC_SKB_EXT);
key->recirc_id = tc_ext ? tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d4e27c679123..29af45964b99 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -49,6 +49,23 @@ static 

[ovs-dev] [PATCH net-next v2 1/1] net/sched: Enable tc skb ext allocation on chain miss only when needed

2022-02-01 Thread Paul Blakey via dev
Tc skb extension is used to send miss info from tc to ovs datapath
module, and is currently always allocated even if it will not
be used by ovs datapath (as it depends on a requested feature).

Export the static key which is used by openvswitch module to
guard this code path as well, so it will be skipped if ovs
datapath doesn't need it. Enable this code path once
ovs datapath needs it.

Signed-off-by: Paul Blakey 
---
 Changelog:
   v1->v2:
 Added ifdef on exports so inline stubs won't be duplicated in !CLS_ACT case

 include/net/pkt_cls.h  | 11 ++
 net/openvswitch/datapath.c | 18 +--
 net/openvswitch/datapath.h |  2 --
 net/openvswitch/flow.c |  3 ++-
 net/sched/cls_api.c| 45 +++---
 5 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 676cb8ea9e15..b4a34d3325d2 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -1028,4 +1028,15 @@ struct tc_fifo_qopt_offload {
};
 };
 
+#ifdef CONFIG_NET_CLS_ACT
+DECLARE_STATIC_KEY_FALSE(tc_skb_ext_tc_ovs);
+void tc_skb_ext_tc_ovs_enable(void);
+void tc_skb_ext_tc_ovs_disable(void);
+#define tc_skb_ext_tc_ovs_enabled() static_branch_unlikely(_skb_ext_tc_ovs)
+#else /* CONFIG_NET_CLS_ACT */
+static inline void tc_skb_ext_tc_ovs_enable(void) { }
+static inline void tc_skb_ext_tc_ovs_disable(void) { }
+#define tc_skb_ext_tc_ovs_enabled() false
+#endif
+
 #endif
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 67ad08320886..4c73a768abc5 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "datapath.h"
 #include "flow.h"
@@ -1601,8 +1602,6 @@ static void ovs_dp_reset_user_features(struct sk_buff 
*skb,
dp->user_features = 0;
 }
 
-DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
 static int ovs_dp_set_upcall_portids(struct datapath *dp,
  const struct nlattr *ids)
 {
@@ -1657,7 +1656,7 @@ u32 ovs_dp_get_upcall_portid(const struct datapath *dp, 
uint32_t cpu_id)
 
 static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
 {
-   u32 user_features = 0;
+   u32 user_features = 0, old_features = dp->user_features;
int err;
 
if (a[OVS_DP_ATTR_USER_FEATURES]) {
@@ -1696,10 +1695,12 @@ static int ovs_dp_change(struct datapath *dp, struct 
nlattr *a[])
return err;
}
 
-   if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
-   static_branch_enable(_recirc_sharing_support);
-   else
-   static_branch_disable(_recirc_sharing_support);
+   if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+   !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
+   tc_skb_ext_tc_ovs_enable();
+   else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+(old_features & OVS_DP_F_TC_RECIRC_SHARING))
+   tc_skb_ext_tc_ovs_disable();
 
return 0;
 }
@@ -1839,6 +1840,9 @@ static void __dp_destroy(struct datapath *dp)
struct flow_table *table = >table;
int i;
 
+   if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+   tc_skb_ext_tc_ovs_disable();
+
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
struct vport *vport;
struct hlist_node *n;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index fcfe6cb46441..0cd29971a907 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -253,8 +253,6 @@ static inline struct datapath *get_dp(struct net *net, int 
dp_ifindex)
 extern struct notifier_block ovs_dp_device_notifier;
 extern struct genl_family dp_vport_genl_family;
 
-DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
 void ovs_dp_detach_port(struct vport *);
 int ovs_dp_upcall(struct datapath *, struct sk_buff *,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 02096f2ec678..5839b00c99bc 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "conntrack.h"
@@ -895,7 +896,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
key->mac_proto = res;
 
 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
-   if (static_branch_unlikely(_recirc_sharing_support)) {
+   if (tc_skb_ext_tc_ovs_enabled()) {
tc_ext = skb_ext_find(skb, TC_SKB_EXT);
key->recirc_id = tc_ext ? tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d4e27c679123..781dbabdb0df 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -49,6 +49,23 @@ static LIST_HEAD(tcf_proto_base);
 /* Protects list of registered TC modules. It is pure 

[ovs-dev] [PATCH net-next 1/1] net/sched: Enable tc skb ext allocation on chain miss only when needed

2022-01-30 Thread Paul Blakey via dev
Tc skb extension is used to send miss info from tc to ovs datapath
module, and is currently always allocated even if it will not
be used by ovs datapath (as it depends on a requested feature).

Export the static key which is used by openvswitch module to
guard this code path as well, so it will be skipped if ovs
datapath doesn't need it. Enable this code path once
ovs datapath needs it.

Signed-off-by: Paul Blakey 
---
 include/net/pkt_cls.h  | 11 ++
 net/openvswitch/datapath.c | 18 +---
 net/openvswitch/datapath.h |  2 --
 net/openvswitch/flow.c |  3 ++-
 net/sched/cls_api.c| 43 ++
 5 files changed, 54 insertions(+), 23 deletions(-)

diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 676cb8ea9e15..b4a34d3325d2 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -1028,4 +1028,15 @@ struct tc_fifo_qopt_offload {
};
 };
 
+#ifdef CONFIG_NET_CLS_ACT
+DECLARE_STATIC_KEY_FALSE(tc_skb_ext_tc_ovs);
+void tc_skb_ext_tc_ovs_enable(void);
+void tc_skb_ext_tc_ovs_disable(void);
+#define tc_skb_ext_tc_ovs_enabled() static_branch_unlikely(_skb_ext_tc_ovs)
+#else /* CONFIG_NET_CLS_ACT */
+static inline void tc_skb_ext_tc_ovs_enable(void) { }
+static inline void tc_skb_ext_tc_ovs_disable(void) { }
+#define tc_skb_ext_tc_ovs_enabled() false
+#endif
+
 #endif
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 67ad08320886..4c73a768abc5 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -37,6 +37,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "datapath.h"
 #include "flow.h"
@@ -1601,8 +1602,6 @@ static void ovs_dp_reset_user_features(struct sk_buff 
*skb,
dp->user_features = 0;
 }
 
-DEFINE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
 static int ovs_dp_set_upcall_portids(struct datapath *dp,
  const struct nlattr *ids)
 {
@@ -1657,7 +1656,7 @@ u32 ovs_dp_get_upcall_portid(const struct datapath *dp, 
uint32_t cpu_id)
 
 static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
 {
-   u32 user_features = 0;
+   u32 user_features = 0, old_features = dp->user_features;
int err;
 
if (a[OVS_DP_ATTR_USER_FEATURES]) {
@@ -1696,10 +1695,12 @@ static int ovs_dp_change(struct datapath *dp, struct 
nlattr *a[])
return err;
}
 
-   if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
-   static_branch_enable(_recirc_sharing_support);
-   else
-   static_branch_disable(_recirc_sharing_support);
+   if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+   !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
+   tc_skb_ext_tc_ovs_enable();
+   else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
+(old_features & OVS_DP_F_TC_RECIRC_SHARING))
+   tc_skb_ext_tc_ovs_disable();
 
return 0;
 }
@@ -1839,6 +1840,9 @@ static void __dp_destroy(struct datapath *dp)
struct flow_table *table = >table;
int i;
 
+   if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
+   tc_skb_ext_tc_ovs_disable();
+
for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
struct vport *vport;
struct hlist_node *n;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index fcfe6cb46441..0cd29971a907 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -253,8 +253,6 @@ static inline struct datapath *get_dp(struct net *net, int 
dp_ifindex)
 extern struct notifier_block ovs_dp_device_notifier;
 extern struct genl_family dp_vport_genl_family;
 
-DECLARE_STATIC_KEY_FALSE(tc_recirc_sharing_support);
-
 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key);
 void ovs_dp_detach_port(struct vport *);
 int ovs_dp_upcall(struct datapath *, struct sk_buff *,
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 02096f2ec678..5839b00c99bc 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include "conntrack.h"
@@ -895,7 +896,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
key->mac_proto = res;
 
 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
-   if (static_branch_unlikely(_recirc_sharing_support)) {
+   if (tc_skb_ext_tc_ovs_enabled()) {
tc_ext = skb_ext_find(skb, TC_SKB_EXT);
key->recirc_id = tc_ext ? tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index d4e27c679123..683e870b11fa 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -49,6 +49,21 @@ static LIST_HEAD(tcf_proto_base);
 /* Protects list of registered TC modules. It is pure SMP lock. */
 static DEFINE_RWLOCK(cls_mod_lock);
 
+DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc_ovs);

[ovs-dev] [PATCH net v2 1/1] net: openvswitch: Fix ct_state nat flags for conns arriving from tc

2022-01-06 Thread Paul Blakey via dev
Netfilter conntrack maintains NAT flags per connection indicating
whether NAT was configured for the connection. Openvswitch maintains
NAT flags on the per packet flow key ct_state field, indicating
whether NAT was actually executed on the packet.

When a packet misses from tc to ovs the conntrack NAT flags are set.
However, NAT was not necessarily executed on the packet because the
connection's state might still be in NEW state. As such, openvswitch
wrongly assumes that NAT was executed and sets an incorrect flow key
NAT flags.

Fix this, by flagging to openvswitch which NAT was actually done in
act_ct via tc_skb_ext and tc_skb_cb to the openvswitch module, so
the packet flow key NAT flags will be correctly set.

Fixes: b57dc7c13ea9 ("net/sched: Introduce action ct")
Signed-off-by: Paul Blakey 
---
 Changelog:
   v1->v2:
 changed bool bitfield to u8
 removed extra space in struct
 added fixes line

 include/linux/skbuff.h  |  4 +++-
 include/net/pkt_sched.h |  4 +++-
 net/openvswitch/flow.c  | 16 +---
 net/sched/act_ct.c  |  6 ++
 net/sched/cls_api.c |  2 ++
 5 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4507d77d6941..60ab0c2fe567 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -287,7 +287,9 @@ struct tc_skb_ext {
__u32 chain;
__u16 mru;
__u16 zone;
-   bool post_ct;
+   u8 post_ct:1;
+   u8 post_ct_snat:1;
+   u8 post_ct_dnat:1;
 };
 #endif
 
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 9e71691c491b..9e7b21c0b3a6 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -197,7 +197,9 @@ struct tc_skb_cb {
struct qdisc_skb_cb qdisc_cb;
 
u16 mru;
-   bool post_ct;
+   u8 post_ct:1;
+   u8 post_ct_snat:1;
+   u8 post_ct_dnat:1;
u16 zone; /* Only valid if post_ct = true */
 };
 
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 6d262d9aa10e..02096f2ec678 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -859,7 +859,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
struct tc_skb_ext *tc_ext;
 #endif
-   bool post_ct = false;
+   bool post_ct = false, post_ct_snat = false, post_ct_dnat = false;
int res, err;
u16 zone = 0;
 
@@ -900,6 +900,8 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
key->recirc_id = tc_ext ? tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
post_ct = tc_ext ? tc_ext->post_ct : false;
+   post_ct_snat = post_ct ? tc_ext->post_ct_snat : false;
+   post_ct_dnat = post_ct ? tc_ext->post_ct_dnat : false;
zone = post_ct ? tc_ext->zone : 0;
} else {
key->recirc_id = 0;
@@ -911,8 +913,16 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
err = key_extract(skb, key);
if (!err) {
ovs_ct_fill_key(skb, key, post_ct);   /* Must be after 
key_extract(). */
-   if (post_ct && !skb_get_nfct(skb))
-   key->ct_zone = zone;
+   if (post_ct) {
+   if (!skb_get_nfct(skb)) {
+   key->ct_zone = zone;
+   } else {
+   if (!post_ct_dnat)
+   key->ct_state &= ~OVS_CS_F_DST_NAT;
+   if (!post_ct_snat)
+   key->ct_state &= ~OVS_CS_F_SRC_NAT;
+   }
+   }
}
return err;
 }
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index ab3591408419..2a17eb77c904 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -839,6 +839,12 @@ static int ct_nat_execute(struct sk_buff *skb, struct 
nf_conn *ct,
}
 
err = nf_nat_packet(ct, ctinfo, hooknum, skb);
+   if (err == NF_ACCEPT) {
+   if (maniptype == NF_NAT_MANIP_SRC)
+   tc_skb_cb(skb)->post_ct_snat = 1;
+   if (maniptype == NF_NAT_MANIP_DST)
+   tc_skb_cb(skb)->post_ct_dnat = 1;
+   }
 out:
return err;
 }
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 35c74bdde848..cc9409aa755e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1625,6 +1625,8 @@ int tcf_classify(struct sk_buff *skb,
ext->chain = last_executed_chain;
ext->mru = cb->mru;
ext->post_ct = cb->post_ct;
+   ext->post_ct_snat = cb->post_ct_snat;
+   ext->post_ct_dnat = cb->post_ct_dnat;
ext->zone = cb->zone;
}
 
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org

Re: [ovs-dev] [PATCH net 1/1] net: openvswitch: Fix ct_state nat flags for conns arriving from tc

2022-01-05 Thread Paul Blakey via dev


On Wed, 5 Jan 2022, Daniel Borkmann wrote:

> On 1/5/22 3:57 PM, Jamal Hadi Salim wrote:
> > On 2022-01-04 03:28, Paul Blakey wrote:
> > [..]
> >> --- a/include/linux/skbuff.h
> >> +++ b/include/linux/skbuff.h
> >> @@ -287,7 +287,9 @@ struct tc_skb_ext {
> >>   __u32 chain;
> >>   __u16 mru;
> >>   __u16 zone;
> >> -    bool post_ct;
> >> +    bool post_ct:1;
> >> +    bool post_ct_snat:1;
> >> +    bool post_ct_dnat:1;
> >>   };
> > 
> > is skb_ext intended only for ovs? If yes, why does it belong
> > in the core code? Ex: Looking at tcf_classify() which is such
> > a core function in the fast path any packet going via tc, it
> > is now encumbered with with checking presence of skb_ext.
> > I know passing around metadata is a paramount requirement
> > for programmability but this is getting messier with speacial
> > use cases for ovs and/or offload...
> 
> Full ack on the bloat for corner cases like ovs offload, especially
> given distros just enable most stuff anyway and therefore no light
> fast path as with !CONFIG_NET_TC_SKB_EXT. :(
> 
> Could this somehow be hidden behind static key or such if offloads
> are not used, so we can shrink it back to just calling into plain
> __tcf_classify() for sw-only use cases (like BPF)?
> 
> 

It is used for both tc -> ovs and driver -> tc path.

I think I can do what you suggest adn will work on something like
that,  but this specific patch  doesn't really change the ext 
allocation/derefences count (and probably  not the size as well).
So can  we take  this (not yet posted v2 after fixing what already 
mentioned) and I'll do a patch of what you suggest in net-next?

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net 1/1] net: openvswitch: Fix ct_state nat flags for conns arriving from tc

2022-01-05 Thread Paul Blakey via dev




On Tue, 4 Jan 2022, Jakub Kicinski wrote:

> On Tue, 4 Jan 2022 10:28:21 +0200 Paul Blakey wrote:
> > Netfilter conntrack maintains NAT flags per connection indicating
> > whether NAT was configured for the connection. Openvswitch maintains
> > NAT flags on the per packet flow key ct_state field, indicating
> > whether NAT was actually executed on the packet.
> > 
> > When a packet misses from tc to ovs the conntrack NAT flags are set.
> > However, NAT was not necessarily executed on the packet because the
> > connection's state might still be in NEW state. As such, openvswitch wrongly
> > assumes that NAT was executed and sets an incorrect flow key NAT flags.
> > 
> > Fix this, by flagging to openvswitch which NAT was actually done in
> > act_ct via tc_skb_ext and tc_skb_cb to the openvswitch module, so
> > the packet flow key NAT flags will be correctly set.
> 
> Fixes ?

I wasn't sure which patches to blame, I guess the bug was there from the
introduction of action ct in tc, so I'll blame that. 

> 
> > Signed-off-by: Paul Blakey 
> 
> > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> > index 4507d77d6941..bab45a009310 100644
> > --- a/include/linux/skbuff.h
> > +++ b/include/linux/skbuff.h
> > @@ -287,7 +287,9 @@ struct tc_skb_ext {
> > __u32 chain;
> > __u16 mru;
> > __u16 zone;
> > -   bool post_ct;
> > +   bool post_ct:1;
> > +   bool post_ct_snat:1;
> > +   bool post_ct_dnat:1;
> 
> single bit bool variables seem weird, use a unsigned int type, like u8.
> 
> >  };
> >  #endif
> >  
> > diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
> > index 9e71691c491b..a171dfa91910 100644
> > --- a/include/net/pkt_sched.h
> > +++ b/include/net/pkt_sched.h
> > @@ -197,7 +197,9 @@ struct tc_skb_cb {
> > struct qdisc_skb_cb qdisc_cb;
> >  
> > u16 mru;
> > -   bool post_ct;
> > +   bool post_ct: 1;
> 
> extra space

Will remove, and send v2.

> 
> > +   bool post_ct_snat:1;
> > +   bool post_ct_dnat:1;
> > u16 zone; /* Only valid if post_ct = true */
> >  };
> 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net 1/1] net: openvswitch: Fix ct_state nat flags for conns arriving from tc

2022-01-04 Thread Paul Blakey via dev
Netfilter conntrack maintains NAT flags per connection indicating
whether NAT was configured for the connection. Openvswitch maintains
NAT flags on the per packet flow key ct_state field, indicating
whether NAT was actually executed on the packet.

When a packet misses from tc to ovs the conntrack NAT flags are set.
However, NAT was not necessarily executed on the packet because the
connection's state might still be in NEW state. As such, openvswitch wrongly
assumes that NAT was executed and sets an incorrect flow key NAT flags.

Fix this, by flagging to openvswitch which NAT was actually done in
act_ct via tc_skb_ext and tc_skb_cb to the openvswitch module, so
the packet flow key NAT flags will be correctly set.

Signed-off-by: Paul Blakey 
---
 include/linux/skbuff.h  |  4 +++-
 include/net/pkt_sched.h |  4 +++-
 net/openvswitch/flow.c  | 16 +---
 net/sched/act_ct.c  |  6 ++
 net/sched/cls_api.c |  2 ++
 5 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4507d77d6941..bab45a009310 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -287,7 +287,9 @@ struct tc_skb_ext {
__u32 chain;
__u16 mru;
__u16 zone;
-   bool post_ct;
+   bool post_ct:1;
+   bool post_ct_snat:1;
+   bool post_ct_dnat:1;
 };
 #endif
 
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 9e71691c491b..a171dfa91910 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -197,7 +197,9 @@ struct tc_skb_cb {
struct qdisc_skb_cb qdisc_cb;
 
u16 mru;
-   bool post_ct;
+   bool post_ct: 1;
+   bool post_ct_snat:1;
+   bool post_ct_dnat:1;
u16 zone; /* Only valid if post_ct = true */
 };
 
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 6d262d9aa10e..02096f2ec678 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -859,7 +859,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
struct tc_skb_ext *tc_ext;
 #endif
-   bool post_ct = false;
+   bool post_ct = false, post_ct_snat = false, post_ct_dnat = false;
int res, err;
u16 zone = 0;
 
@@ -900,6 +900,8 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
key->recirc_id = tc_ext ? tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
post_ct = tc_ext ? tc_ext->post_ct : false;
+   post_ct_snat = post_ct ? tc_ext->post_ct_snat : false;
+   post_ct_dnat = post_ct ? tc_ext->post_ct_dnat : false;
zone = post_ct ? tc_ext->zone : 0;
} else {
key->recirc_id = 0;
@@ -911,8 +913,16 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
err = key_extract(skb, key);
if (!err) {
ovs_ct_fill_key(skb, key, post_ct);   /* Must be after 
key_extract(). */
-   if (post_ct && !skb_get_nfct(skb))
-   key->ct_zone = zone;
+   if (post_ct) {
+   if (!skb_get_nfct(skb)) {
+   key->ct_zone = zone;
+   } else {
+   if (!post_ct_dnat)
+   key->ct_state &= ~OVS_CS_F_DST_NAT;
+   if (!post_ct_snat)
+   key->ct_state &= ~OVS_CS_F_SRC_NAT;
+   }
+   }
}
return err;
 }
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index ab3591408419..2a17eb77c904 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -839,6 +839,12 @@ static int ct_nat_execute(struct sk_buff *skb, struct 
nf_conn *ct,
}
 
err = nf_nat_packet(ct, ctinfo, hooknum, skb);
+   if (err == NF_ACCEPT) {
+   if (maniptype == NF_NAT_MANIP_SRC)
+   tc_skb_cb(skb)->post_ct_snat = 1;
+   if (maniptype == NF_NAT_MANIP_DST)
+   tc_skb_cb(skb)->post_ct_dnat = 1;
+   }
 out:
return err;
 }
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 35c74bdde848..cc9409aa755e 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1625,6 +1625,8 @@ int tcf_classify(struct sk_buff *skb,
ext->chain = last_executed_chain;
ext->mru = cb->mru;
ext->post_ct = cb->post_ct;
+   ext->post_ct_snat = cb->post_ct_snat;
+   ext->post_ct_dnat = cb->post_ct_dnat;
ext->zone = cb->zone;
}
 
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next 3/3] net/mlx5: CT: Set flow source hint from provided tuple device

2022-01-03 Thread Paul Blakey via dev
Get originating device from tuple offload metadata match ingress_ifindex,
and set flow_source hint to either LOCAL for vf/sf reps, UPLINK for
uplink/wire/tunnel devices/bond, or ANY (as before this patch)
for all others.

This allows lower layer (software steering or firmware) to insert the tuple
rule only in one table (either rx or tx) instead of two (rx and tx).

Signed-off-by: Paul Blakey 
---
 drivers/net/ethernet/mellanox/mlx5/core/dev.c |  2 +-
 .../ethernet/mellanox/mlx5/core/en/tc_ct.c| 51 +--
 .../ethernet/mellanox/mlx5/core/mlx5_core.h   |  1 +
 3 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c 
b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
index a8b84d53dfb0..ba6dad97e308 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c
@@ -538,7 +538,7 @@ int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev)
return add_drivers(dev);
 }
 
-static bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev 
*peer_dev)
+bool mlx5_same_hw_devs(struct mlx5_core_dev *dev, struct mlx5_core_dev 
*peer_dev)
 {
u64 fsystem_guid, psystem_guid;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c 
b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 9f33729e7fc4..4a0d38d219ed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "lib/fs_chains.h"
 #include "en/tc_ct.h"
@@ -326,7 +327,33 @@ mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
 }
 
 static int
-mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec 
*spec,
+mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
+struct net_device *ndev)
+{
+   struct mlx5e_priv *other_priv = netdev_priv(ndev);
+   struct mlx5_core_dev *mdev = ct_priv->dev;
+   bool vf_rep, uplink_rep;
+
+   vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, 
other_priv->mdev);
+   uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, 
other_priv->mdev);
+
+   if (vf_rep)
+   return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+   if (uplink_rep)
+   return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+   if (is_vlan_dev(ndev))
+   return mlx5_tc_ct_get_flow_source_match(ct_priv, 
vlan_dev_real_dev(ndev));
+   if (netif_is_macvlan(ndev))
+   return mlx5_tc_ct_get_flow_source_match(ct_priv, 
macvlan_dev_real_dev(ndev));
+   if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev))
+   return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
+
+   return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
+}
+
+static int
+mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
+  struct mlx5_flow_spec *spec,
   struct flow_rule *rule)
 {
void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
@@ -341,8 +368,7 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct 
mlx5_flow_spec *spec,
 
flow_rule_match_basic(rule, );
 
-   mlx5e_tc_set_ethertype(priv->mdev, , true, headers_c,
-  headers_v);
+   mlx5e_tc_set_ethertype(ct_priv->dev, , true, headers_c, 
headers_v);
MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 match.mask->ip_proto);
MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
@@ -438,6 +464,23 @@ mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct 
mlx5_flow_spec *spec,
 ntohs(match.key->flags));
}
 
+   if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
+   struct flow_match_meta match;
+
+   flow_rule_match_meta(rule, );
+
+   if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
+   struct net_device *dev;
+
+   dev = dev_get_by_index(_net, 
match.key->ingress_ifindex);
+   if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, 
flow_source))
+   spec->flow_context.flow_source =
+   
mlx5_tc_ct_get_flow_source_match(ct_priv, dev);
+
+   dev_put(dev);
+   }
+   }
+
return 0;
 }
 
@@ -770,7 +813,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
attr->esw_attr->in_mdev = priv->mdev;
 
-   mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, 
flow_rule);
+   mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, 
MLX5_CT_ZONE_MASK);
 

[ovs-dev] [PATCH net-next 1/3] net/sched: act_ct: Fill offloading tuple iifidx

2022-01-03 Thread Paul Blakey via dev
Driver offloading ct tuples can use the information of which devices
received the packets that created the offloaded connections, to
more efficiently offload them only to the relevant device.

Add new act_ct nf conntrack extension, which is used to store the skb
devices before offloading the connection, and then fill in the tuple
iifindex so drivers can get the device via metadata dissector match.

Signed-off-by: Oz Shlomo 
Signed-off-by: Paul Blakey 
---
 include/net/netfilter/nf_conntrack_act_ct.h | 50 +
 include/net/netfilter/nf_conntrack_extend.h |  4 ++
 net/netfilter/nf_conntrack_core.c   |  6 ++-
 net/sched/act_ct.c  | 27 +++
 4 files changed, 86 insertions(+), 1 deletion(-)
 create mode 100644 include/net/netfilter/nf_conntrack_act_ct.h

diff --git a/include/net/netfilter/nf_conntrack_act_ct.h 
b/include/net/netfilter/nf_conntrack_act_ct.h
new file mode 100644
index ..078d3c52c03f
--- /dev/null
+++ b/include/net/netfilter/nf_conntrack_act_ct.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NF_CONNTRACK_ACT_CT_H
+#define _NF_CONNTRACK_ACT_CT_H
+
+#include 
+#include 
+#include 
+
+struct nf_conn_act_ct_ext {
+   int ifindex[IP_CT_DIR_MAX];
+};
+
+static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_find(const struct 
nf_conn *ct)
+{
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+   return nf_ct_ext_find(ct, NF_CT_EXT_ACT_CT);
+#else
+   return NULL;
+#endif
+}
+
+static inline struct nf_conn_act_ct_ext *nf_conn_act_ct_ext_add(struct nf_conn 
*ct)
+{
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+   struct nf_conn_act_ct_ext *act_ct = nf_ct_ext_find(ct, 
NF_CT_EXT_ACT_CT);
+
+   if (act_ct)
+   return act_ct;
+
+   act_ct = nf_ct_ext_add(ct, NF_CT_EXT_ACT_CT, GFP_ATOMIC);
+   return act_ct;
+#else
+   return NULL;
+#endif
+}
+
+static inline void nf_conn_act_ct_ext_fill(struct sk_buff *skb, struct nf_conn 
*ct,
+  enum ip_conntrack_info ctinfo)
+{
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+   struct nf_conn_act_ct_ext *act_ct_ext;
+
+   act_ct_ext = nf_conn_act_ct_ext_find(ct);
+   if (dev_net(skb->dev) == _net && act_ct_ext)
+   act_ct_ext->ifindex[CTINFO2DIR(ctinfo)] = skb->dev->ifindex;
+#endif
+}
+
+#endif /* _NF_CONNTRACK_ACT_CT_H */
diff --git a/include/net/netfilter/nf_conntrack_extend.h 
b/include/net/netfilter/nf_conntrack_extend.h
index e1e588387103..c7515d82ab06 100644
--- a/include/net/netfilter/nf_conntrack_extend.h
+++ b/include/net/netfilter/nf_conntrack_extend.h
@@ -27,6 +27,9 @@ enum nf_ct_ext_id {
 #endif
 #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
NF_CT_EXT_SYNPROXY,
+#endif
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+   NF_CT_EXT_ACT_CT,
 #endif
NF_CT_EXT_NUM,
 };
@@ -40,6 +43,7 @@ enum nf_ct_ext_id {
 #define NF_CT_EXT_TIMEOUT_TYPE struct nf_conn_timeout
 #define NF_CT_EXT_LABELS_TYPE struct nf_conn_labels
 #define NF_CT_EXT_SYNPROXY_TYPE struct nf_conn_synproxy
+#define NF_CT_EXT_ACT_CT_TYPE struct nf_conn_act_ct_ext
 
 /* Extensions: optional stuff which isn't permanently in struct. */
 struct nf_ct_ext {
diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index d7e313548066..01d6589fba6e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -47,6 +47,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -2626,7 +2627,7 @@ int nf_conntrack_set_hashsize(const char *val, const 
struct kernel_param *kp)
 static __always_inline unsigned int total_extension_size(void)
 {
/* remember to add new extensions below */
-   BUILD_BUG_ON(NF_CT_EXT_NUM > 9);
+   BUILD_BUG_ON(NF_CT_EXT_NUM > 10);
 
return sizeof(struct nf_ct_ext) +
   sizeof(struct nf_conn_help)
@@ -2649,6 +2650,9 @@ static __always_inline unsigned int 
total_extension_size(void)
 #endif
 #if IS_ENABLED(CONFIG_NETFILTER_SYNPROXY)
+ sizeof(struct nf_conn_synproxy)
+#endif
+#if IS_ENABLED(CONFIG_NET_ACT_CT)
+   + sizeof(struct nf_conn_act_ct_ext)
 #endif
;
 };
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index f9afb5abff21..ebdf7caf7084 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -32,6 +32,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 static struct workqueue_struct *act_ct_wq;
@@ -56,6 +57,12 @@ static const struct rhashtable_params zones_params = {
.automatic_shrinking = true,
 };
 
+static struct nf_ct_ext_type act_ct_extend __read_mostly = {
+   .len= sizeof(struct nf_conn_act_ct_ext),
+   .align  = __alignof__(struct nf_conn_act_ct_ext),
+   .id = NF_CT_EXT_ACT_CT,
+};
+
 static struct flow_action_entry *
 tcf_ct_flow_table_flow_action_get_next(struct flow_action *flow_action)
 {
@@ -358,6 +365,7 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table 
*ct_ft,
   

[ovs-dev] [PATCH net-next 2/3] net: openvswitch: Fill act ct extension

2022-01-03 Thread Paul Blakey via dev
To give drivers the originating device information for optimized
connection tracking offload, fill in act ct extension with
ifindex from skb.

Signed-off-by: Paul Blakey 
---
 net/openvswitch/conntrack.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 1b5eae57bc90..13294a55073a 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -25,6 +25,8 @@
 #include 
 #endif
 
+#include 
+
 #include "datapath.h"
 #include "conntrack.h"
 #include "flow.h"
@@ -1045,6 +1047,8 @@ static int __ovs_ct_lookup(struct net *net, struct 
sw_flow_key *key,
 */
nf_ct_set_tcp_be_liberal(ct);
}
+
+   nf_conn_act_ct_ext_fill(skb, ct, ctinfo);
}
 
return 0;
@@ -1245,6 +1249,8 @@ static int ovs_ct_commit(struct net *net, struct 
sw_flow_key *key,
 >labels.mask);
if (err)
return err;
+
+   nf_conn_act_ct_ext_add(ct);
} else if (IS_ENABLED(CONFIG_NF_CONNTRACK_LABELS) &&
   labels_nonzero(>labels.mask)) {
err = ovs_ct_set_labels(ct, key, >labels.value,
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next 0/3] net/sched: Pass originating device to drivers offloading ct connection

2022-01-03 Thread Paul Blakey via dev
Hi,

Currently, drivers register to a ct zone that can be shared by multiple
devices. This can be inefficient for the driver to offload, as it
needs to handle all the cases where the tuple can come from,
instead of where it's most likely will arive from.

For example, consider the following tc rules:
tc filter add dev dev1 ... flower action ct commit zone 5 \
   action mirred egress redirect dev dev2

tc filter add dev dev2 ... flower action ct zone 5 \
   action goto chain chain 2
tc filter add dev dev2 ... flower ct_state +trk+est ... \
   action mirred egress redirect dev dev1

Both dev2 and dev1 register to the zone 5 flow table (created
by act_ct). A tuple originating on dev1, going to dev2, will
be offloaded to both devices, and both will need to offload
both directions, resulting in 4 total rules. The traffic
will only hit originiating tuple on dev1, and reply tuple
on dev2.

By passing the originating device that created the connection
with the tuple, dev1 can choose to offload only the originating
tuple, and dev2 only the reply tuple. Resulting in a more
efficient offload.

The first patch adds an act_ct nf conntrack extension, to
temporarily store the originiating device from the skb before
offloading the connection once the connection is established.
Once sent to offload, it fills the tuple originating device.

The second patch get this information from tuples
which pass in openvswitch.

The third patch is Mellanox driver ct offload implementation using
this information to provide a hint to firmware of where this
offloaded tuple packets will arrive from (LOCAL or UPLINK port),
and thus increase insertion rate.

Paul Blakey (3):
  net/sched: act_ct: Fill offloading tuple iifidx
  net: openvswitch: Fill act ct extension
  net/mlx5: CT: Set flow source hint from provided tuple device

 drivers/net/ethernet/mellanox/mlx5/core/dev.c |  2 +-
 .../ethernet/mellanox/mlx5/core/en/tc_ct.c| 51 +--
 .../ethernet/mellanox/mlx5/core/mlx5_core.h   |  1 +
 include/net/netfilter/nf_conntrack_act_ct.h   | 50 ++
 include/net/netfilter/nf_conntrack_extend.h   |  4 ++
 net/netfilter/nf_conntrack_core.c |  6 ++-
 net/openvswitch/conntrack.c   |  6 +++
 net/sched/act_ct.c| 27 ++
 8 files changed, 141 insertions(+), 6 deletions(-)
 create mode 100644 include/net/netfilter/nf_conntrack_act_ct.h

-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net v3 2/3] net/sched: flow_dissector: Fix matching on zone id for invalid conns

2021-12-14 Thread Paul Blakey via dev
If ct rejects a flow, it removes the conntrack info from the skb.
act_ct sets the post_ct variable so the dissector will see this case
as an +tracked +invalid state, but the zone id is lost with the
conntrack info.

To restore the zone id on such cases, set the last executed zone,
via the tc control block, when passing ct, and read it back in the
dissector if there is no ct info on the skb (invalid connection).

Fixes: 7baf2429a1a9 ("net/sched: cls_flower add CT_FLAGS_INVALID flag support")
Signed-off-by: Paul Blakey 
---
 include/linux/skbuff.h| 2 +-
 include/net/pkt_sched.h   | 1 +
 net/core/flow_dissector.c | 3 ++-
 net/sched/act_ct.c| 1 +
 net/sched/cls_flower.c| 3 ++-
 5 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c8cb7e697d47..2ecf8cfd2223 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1380,7 +1380,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
u16 *ctinfo_map, size_t mapsize,
-   bool post_ct);
+   bool post_ct, u16 zone);
 void
 skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
 struct flow_dissector *flow_dissector,
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 05f18e81f3e8..9e71691c491b 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -198,6 +198,7 @@ struct tc_skb_cb {
 
u16 mru;
bool post_ct;
+   u16 zone; /* Only valid if post_ct = true */
 };
 
 static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 3255f57f5131..1b094c481f1d 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -238,7 +238,7 @@ void
 skb_flow_dissect_ct(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container, u16 *ctinfo_map,
-   size_t mapsize, bool post_ct)
+   size_t mapsize, bool post_ct, u16 zone)
 {
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
struct flow_dissector_key_ct *key;
@@ -260,6 +260,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
if (!ct) {
key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
TCA_FLOWER_KEY_CT_FLAGS_INVALID;
+   key->ct_zone = zone;
return;
}
 
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 98e248b9c0b1..ab3591408419 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1049,6 +1049,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct 
tc_action *a,
skb_push_rcsum(skb, nh_ofs);
 
tc_skb_cb(skb)->post_ct = true;
+   tc_skb_cb(skb)->zone = p->zone;
 out_clear:
if (defrag)
qdisc_skb_cb(skb)->pkt_len = skb->len;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9782b93db1b3..ef54ed395874 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -311,6 +311,7 @@ static int fl_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
 {
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
bool post_ct = tc_skb_cb(skb)->post_ct;
+   u16 zone = tc_skb_cb(skb)->zone;
struct fl_flow_key skb_key;
struct fl_flow_mask *mask;
struct cls_fl_filter *f;
@@ -328,7 +329,7 @@ static int fl_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
skb_flow_dissect_ct(skb, >dissector, _key,
fl_ct_info_to_flower_map,
ARRAY_SIZE(fl_ct_info_to_flower_map),
-   post_ct);
+   post_ct, zone);
skb_flow_dissect_hash(skb, >dissector, _key);
skb_flow_dissect(skb, >dissector, _key,
 FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net v3 3/3] net: openvswitch: Fix matching zone id for invalid conns arriving from tc

2021-12-14 Thread Paul Blakey via dev
Zone id is not restored if we passed ct and ct rejected the connection,
as there is no ct info on the skb.

Save the zone from tc skb cb to tc skb extension and pass it on to
ovs, use that info to restore the zone id for invalid connections.

Fixes: d29334c15d33 ("net/sched: act_api: fix miss set post_ct for ovs after do 
conntrack in act_ct")
Signed-off-by: Paul Blakey 
---
 include/linux/skbuff.h | 1 +
 net/openvswitch/flow.c | 8 +++-
 net/sched/cls_api.c| 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2ecf8cfd2223..4507d77d6941 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -286,6 +286,7 @@ struct nf_bridge_info {
 struct tc_skb_ext {
__u32 chain;
__u16 mru;
+   __u16 zone;
bool post_ct;
 };
 #endif
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 9713035b89e3..6d262d9aa10e 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "conntrack.h"
 #include "datapath.h"
@@ -860,6 +861,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
 #endif
bool post_ct = false;
int res, err;
+   u16 zone = 0;
 
/* Extract metadata from packet. */
if (tun_info) {
@@ -898,6 +900,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
key->recirc_id = tc_ext ? tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
post_ct = tc_ext ? tc_ext->post_ct : false;
+   zone = post_ct ? tc_ext->zone : 0;
} else {
key->recirc_id = 0;
}
@@ -906,8 +909,11 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
 #endif
 
err = key_extract(skb, key);
-   if (!err)
+   if (!err) {
ovs_ct_fill_key(skb, key, post_ct);   /* Must be after 
key_extract(). */
+   if (post_ct && !skb_get_nfct(skb))
+   key->ct_zone = zone;
+   }
return err;
 }
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a5050999d607..bede2bd47065 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1625,6 +1625,7 @@ int tcf_classify(struct sk_buff *skb,
ext->chain = last_executed_chain;
ext->mru = cb->mru;
ext->post_ct = cb->post_ct;
+   ext->zone = cb->zone;
}
 
return ret;
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net v3 1/3] net/sched: Extend qdisc control block with tc control block

2021-12-14 Thread Paul Blakey via dev
BPF layer extends the qdisc control block via struct bpf_skb_data_end
and because of that there is no more room to add variables to the
qdisc layer control block without going over the skb->cb size.

Extend the qdisc control block with a tc control block,
and move all tc related variables to there as a pre-step for
extending the tc control block with additional members.

Signed-off-by: Paul Blakey 
---
 include/net/pkt_sched.h   | 15 +++
 include/net/sch_generic.h |  2 --
 net/core/dev.c|  8 
 net/sched/act_ct.c| 14 +++---
 net/sched/cls_api.c   |  6 --
 net/sched/cls_flower.c|  3 ++-
 net/sched/sch_frag.c  |  3 ++-
 7 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index bf79f3a890af..05f18e81f3e8 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -193,4 +193,19 @@ static inline void skb_txtime_consumed(struct sk_buff *skb)
skb->tstamp = ktime_set(0, 0);
 }
 
+struct tc_skb_cb {
+   struct qdisc_skb_cb qdisc_cb;
+
+   u16 mru;
+   bool post_ct;
+};
+
+static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
+{
+   struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
+
+   BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
+   return cb;
+}
+
 #endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 22179b2fda72..c70e6d2b2fdd 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -447,8 +447,6 @@ struct qdisc_skb_cb {
};
 #define QDISC_CB_PRIV_LEN 20
unsigned char   data[QDISC_CB_PRIV_LEN];
-   u16 mru;
-   boolpost_ct;
 };
 
 typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
diff --git a/net/core/dev.c b/net/core/dev.c
index 2a352e668d10..c4708e2487fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3941,8 +3941,8 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct 
net_device *dev)
return skb;
 
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
-   qdisc_skb_cb(skb)->mru = 0;
-   qdisc_skb_cb(skb)->post_ct = false;
+   tc_skb_cb(skb)->mru = 0;
+   tc_skb_cb(skb)->post_ct = false;
mini_qdisc_bstats_cpu_update(miniq, skb);
 
switch (tcf_classify(skb, miniq->block, miniq->filter_list, _res, 
false)) {
@@ -5103,8 +5103,8 @@ sch_handle_ingress(struct sk_buff *skb, struct 
packet_type **pt_prev, int *ret,
}
 
qdisc_skb_cb(skb)->pkt_len = skb->len;
-   qdisc_skb_cb(skb)->mru = 0;
-   qdisc_skb_cb(skb)->post_ct = false;
+   tc_skb_cb(skb)->mru = 0;
+   tc_skb_cb(skb)->post_ct = false;
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
 
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 90866ae45573..98e248b9c0b1 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -690,10 +690,10 @@ static int tcf_ct_handle_fragments(struct net *net, 
struct sk_buff *skb,
   u8 family, u16 zone, bool *defrag)
 {
enum ip_conntrack_info ctinfo;
-   struct qdisc_skb_cb cb;
struct nf_conn *ct;
int err = 0;
bool frag;
+   u16 mru;
 
/* Previously seen (loopback)? Ignore. */
ct = nf_ct_get(skb, );
@@ -708,7 +708,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
return err;
 
skb_get(skb);
-   cb = *qdisc_skb_cb(skb);
+   mru = tc_skb_cb(skb)->mru;
 
if (family == NFPROTO_IPV4) {
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
@@ -722,7 +722,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
 
if (!err) {
*defrag = true;
-   cb.mru = IPCB(skb)->frag_max_size;
+   mru = IPCB(skb)->frag_max_size;
}
} else { /* NFPROTO_IPV6 */
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
@@ -735,7 +735,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
 
if (!err) {
*defrag = true;
-   cb.mru = IP6CB(skb)->frag_max_size;
+   mru = IP6CB(skb)->frag_max_size;
}
 #else
err = -EOPNOTSUPP;
@@ -744,7 +744,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
}
 
if (err != -EINPROGRESS)
-   *qdisc_skb_cb(skb) = cb;
+   tc_skb_cb(skb)->mru = mru;
skb_clear_hash(skb);
skb->ignore_df = 1;
return err;
@@ -963,7 +963,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct 
tc_action *a,
tcf_action_update_bstats(>common, skb);
 
if (clear) {
-   qdisc_skb_cb(skb)->post_ct = false;
+   

[ovs-dev] [PATCH net v3 0/3] net/sched: Fix ct zone matching for invalid conntrack state

2021-12-14 Thread Paul Blakey via dev
Hi,

Currently, when a packet is marked as invalid conntrack_in in act_ct,
post_ct will be set, and connection info (nf_conn) will be removed
from the skb. Later openvswitch and flower matching will parse this
as ct_state=+trk+inv. But because the connection info is missing,
there is also no zone info to match against even though the packet
is tracked.

This series fixes that, by passing the last executed zone by act_ct.
The zone info is passed along from act_ct to the ct flow dissector
(used by flower to extract zone info) and to ovs, the same way as post_ct
is passed, via qdisc layer skb cb to dissector, and via skb extension
to OVS.

Since adding any more data to qdisc skb cb, there will be no room 
for BPF skb cb to extend it and stay under skb->cb size, this series
moves the tc related info from within qdisc skb cb to a tc specific cb
that also extends it.

---
Changelog:
v3->v2:
  Moved tc skb cb details from dissector back to flower
v1->v2:
  Cover letter wording
  Added blamed CCs

Paul Blakey (3):
  net/sched: Extend qdisc control block with tc control block
  net/sched: flow_dissector: Fix matching on zone id for invalid conns
  net: openvswitch: Fix matching zone id for invalid conns arriving from tc

 include/linux/skbuff.h|  3 ++-
 include/net/pkt_sched.h   | 16 
 include/net/sch_generic.h |  2 --
 net/core/dev.c|  8 
 net/core/flow_dissector.c |  3 ++-
 net/openvswitch/flow.c|  8 +++-
 net/sched/act_ct.c| 15 ---
 net/sched/cls_api.c   |  7 +--
 net/sched/cls_flower.c|  6 --
 net/sched/sch_frag.c  |  3 ++-
 10 files changed, 50 insertions(+), 21 deletions(-)

-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net v2 2/3] net/sched: flow_dissector: Fix matching on zone id for invalid conns

2021-12-14 Thread Paul Blakey via dev



On Sat, 11 Dec 2021, Jakub Kicinski wrote:

> On Thu, 9 Dec 2021 09:57:33 +0200 Paul Blakey wrote:
> > @@ -238,10 +239,12 @@ void
> >  skb_flow_dissect_ct(const struct sk_buff *skb,
> > struct flow_dissector *flow_dissector,
> > void *target_container, u16 *ctinfo_map,
> > -   size_t mapsize, bool post_ct)
> > +   size_t mapsize)
> >  {
> >  #if IS_ENABLED(CONFIG_NF_CONNTRACK)
> > +   bool post_ct = tc_skb_cb(skb)->post_ct;
> > struct flow_dissector_key_ct *key;
> > +   u16 zone = tc_skb_cb(skb)->zone;
> > enum ip_conntrack_info ctinfo;
> > struct nf_conn_labels *cl;
> > struct nf_conn *ct;
> > @@ -260,6 +263,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
> > if (!ct) {
> > key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
> > TCA_FLOWER_KEY_CT_FLAGS_INVALID;
> > +   key->ct_zone = zone;
> > return;
> > }
> >  
> 
> Why is flow dissector expecting skb cb to be TC now?
> Please keep the appropriate abstractions intact.
> 

Will do. Sending v3.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net v2 3/3] net: openvswitch: Fix matching zone id for invalid conns arriving from tc

2021-12-08 Thread Paul Blakey via dev
Zone id is not restored if we passed ct and ct rejected the connection,
as there is no ct info on the skb.

Save the zone from tc skb cb to tc skb extension and pass it on to
ovs, use that info to restore the zone id for invalid connections.

Fixes: d29334c15d33 ("net/sched: act_api: fix miss set post_ct for ovs after do 
conntrack in act_ct")
Signed-off-by: Paul Blakey 
---
 include/linux/skbuff.h | 1 +
 net/openvswitch/flow.c | 8 +++-
 net/sched/cls_api.c| 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 155eb2ec54d8..28ad0c6bd0d5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -286,6 +286,7 @@ struct nf_bridge_info {
 struct tc_skb_ext {
__u32 chain;
__u16 mru;
+   __u16 zone;
bool post_ct;
 };
 #endif
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 9713035b89e3..6d262d9aa10e 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "conntrack.h"
 #include "datapath.h"
@@ -860,6 +861,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
 #endif
bool post_ct = false;
int res, err;
+   u16 zone = 0;
 
/* Extract metadata from packet. */
if (tun_info) {
@@ -898,6 +900,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
key->recirc_id = tc_ext ? tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
post_ct = tc_ext ? tc_ext->post_ct : false;
+   zone = post_ct ? tc_ext->zone : 0;
} else {
key->recirc_id = 0;
}
@@ -906,8 +909,11 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
 #endif
 
err = key_extract(skb, key);
-   if (!err)
+   if (!err) {
ovs_ct_fill_key(skb, key, post_ct);   /* Must be after 
key_extract(). */
+   if (post_ct && !skb_get_nfct(skb))
+   key->ct_zone = zone;
+   }
return err;
 }
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a5050999d607..bede2bd47065 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1625,6 +1625,7 @@ int tcf_classify(struct sk_buff *skb,
ext->chain = last_executed_chain;
ext->mru = cb->mru;
ext->post_ct = cb->post_ct;
+   ext->zone = cb->zone;
}
 
return ret;
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net v2 2/3] net/sched: flow_dissector: Fix matching on zone id for invalid conns

2021-12-08 Thread Paul Blakey via dev
If ct rejects a flow, it removes the conntrack info from the skb.
act_ct sets the post_ct variable so the dissector will see this case
as an +tracked +invalid state, but the zone id is lost with the
conntrack info.

To restore the zone id on such cases, set the last executed zone,
via the tc control block, when passing ct, and read it back in the
dissector if there is no ct info on the skb (invalid connection).

Fixes: 7baf2429a1a9 ("net/sched: cls_flower add CT_FLAGS_INVALID flag support")
Signed-off-by: Paul Blakey 
---
 include/linux/skbuff.h| 3 +--
 include/net/pkt_sched.h   | 1 +
 net/core/flow_dissector.c | 6 +-
 net/sched/act_ct.c| 1 +
 net/sched/cls_flower.c| 5 ++---
 5 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c8cb7e697d47..155eb2ec54d8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1379,8 +1379,7 @@ void
 skb_flow_dissect_ct(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
-   u16 *ctinfo_map, size_t mapsize,
-   bool post_ct);
+   u16 *ctinfo_map, size_t mapsize);
 void
 skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
 struct flow_dissector *flow_dissector,
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 05f18e81f3e8..9e71691c491b 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -198,6 +198,7 @@ struct tc_skb_cb {
 
u16 mru;
bool post_ct;
+   u16 zone; /* Only valid if post_ct = true */
 };
 
 static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 3255f57f5131..b52a4370162b 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -238,10 +239,12 @@ void
 skb_flow_dissect_ct(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container, u16 *ctinfo_map,
-   size_t mapsize, bool post_ct)
+   size_t mapsize)
 {
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
+   bool post_ct = tc_skb_cb(skb)->post_ct;
struct flow_dissector_key_ct *key;
+   u16 zone = tc_skb_cb(skb)->zone;
enum ip_conntrack_info ctinfo;
struct nf_conn_labels *cl;
struct nf_conn *ct;
@@ -260,6 +263,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
if (!ct) {
key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
TCA_FLOWER_KEY_CT_FLAGS_INVALID;
+   key->ct_zone = zone;
return;
}
 
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 98e248b9c0b1..ab3591408419 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1049,6 +1049,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct 
tc_action *a,
skb_push_rcsum(skb, nh_ofs);
 
tc_skb_cb(skb)->post_ct = true;
+   tc_skb_cb(skb)->zone = p->zone;
 out_clear:
if (defrag)
qdisc_skb_cb(skb)->pkt_len = skb->len;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9782b93db1b3..c1a017822c6e 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -310,7 +310,6 @@ static int fl_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
   struct tcf_result *res)
 {
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
-   bool post_ct = tc_skb_cb(skb)->post_ct;
struct fl_flow_key skb_key;
struct fl_flow_mask *mask;
struct cls_fl_filter *f;
@@ -327,8 +326,8 @@ static int fl_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
skb_flow_dissect_tunnel_info(skb, >dissector, _key);
skb_flow_dissect_ct(skb, >dissector, _key,
fl_ct_info_to_flower_map,
-   ARRAY_SIZE(fl_ct_info_to_flower_map),
-   post_ct);
+   ARRAY_SIZE(fl_ct_info_to_flower_map));
+
skb_flow_dissect_hash(skb, >dissector, _key);
skb_flow_dissect(skb, >dissector, _key,
 FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net v2 1/3] net/sched: Extend qdisc control block with tc control block

2021-12-08 Thread Paul Blakey via dev
BPF layer extends the qdisc control block via struct bpf_skb_data_end
and because of that there is no more room to add variables to the
qdisc layer control block without going over the skb->cb size.

Extend the qdisc control block with a tc control block,
and move all tc related variables to there as a pre-step for
extending the tc control block with additional members.

Signed-off-by: Paul Blakey 
---
 include/net/pkt_sched.h   | 15 +++
 include/net/sch_generic.h |  2 --
 net/core/dev.c|  8 
 net/sched/act_ct.c| 14 +++---
 net/sched/cls_api.c   |  6 --
 net/sched/cls_flower.c|  3 ++-
 net/sched/sch_frag.c  |  3 ++-
 7 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index bf79f3a890af..05f18e81f3e8 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -193,4 +193,19 @@ static inline void skb_txtime_consumed(struct sk_buff *skb)
skb->tstamp = ktime_set(0, 0);
 }
 
+struct tc_skb_cb {
+   struct qdisc_skb_cb qdisc_cb;
+
+   u16 mru;
+   bool post_ct;
+};
+
+static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
+{
+   struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
+
+   BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
+   return cb;
+}
+
 #endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 22179b2fda72..c70e6d2b2fdd 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -447,8 +447,6 @@ struct qdisc_skb_cb {
};
 #define QDISC_CB_PRIV_LEN 20
unsigned char   data[QDISC_CB_PRIV_LEN];
-   u16 mru;
-   boolpost_ct;
 };
 
 typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
diff --git a/net/core/dev.c b/net/core/dev.c
index 2a352e668d10..c4708e2487fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3941,8 +3941,8 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct 
net_device *dev)
return skb;
 
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
-   qdisc_skb_cb(skb)->mru = 0;
-   qdisc_skb_cb(skb)->post_ct = false;
+   tc_skb_cb(skb)->mru = 0;
+   tc_skb_cb(skb)->post_ct = false;
mini_qdisc_bstats_cpu_update(miniq, skb);
 
switch (tcf_classify(skb, miniq->block, miniq->filter_list, _res, 
false)) {
@@ -5103,8 +5103,8 @@ sch_handle_ingress(struct sk_buff *skb, struct 
packet_type **pt_prev, int *ret,
}
 
qdisc_skb_cb(skb)->pkt_len = skb->len;
-   qdisc_skb_cb(skb)->mru = 0;
-   qdisc_skb_cb(skb)->post_ct = false;
+   tc_skb_cb(skb)->mru = 0;
+   tc_skb_cb(skb)->post_ct = false;
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
 
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 90866ae45573..98e248b9c0b1 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -690,10 +690,10 @@ static int tcf_ct_handle_fragments(struct net *net, 
struct sk_buff *skb,
   u8 family, u16 zone, bool *defrag)
 {
enum ip_conntrack_info ctinfo;
-   struct qdisc_skb_cb cb;
struct nf_conn *ct;
int err = 0;
bool frag;
+   u16 mru;
 
/* Previously seen (loopback)? Ignore. */
ct = nf_ct_get(skb, );
@@ -708,7 +708,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
return err;
 
skb_get(skb);
-   cb = *qdisc_skb_cb(skb);
+   mru = tc_skb_cb(skb)->mru;
 
if (family == NFPROTO_IPV4) {
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
@@ -722,7 +722,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
 
if (!err) {
*defrag = true;
-   cb.mru = IPCB(skb)->frag_max_size;
+   mru = IPCB(skb)->frag_max_size;
}
} else { /* NFPROTO_IPV6 */
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
@@ -735,7 +735,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
 
if (!err) {
*defrag = true;
-   cb.mru = IP6CB(skb)->frag_max_size;
+   mru = IP6CB(skb)->frag_max_size;
}
 #else
err = -EOPNOTSUPP;
@@ -744,7 +744,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
}
 
if (err != -EINPROGRESS)
-   *qdisc_skb_cb(skb) = cb;
+   tc_skb_cb(skb)->mru = mru;
skb_clear_hash(skb);
skb->ignore_df = 1;
return err;
@@ -963,7 +963,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct 
tc_action *a,
tcf_action_update_bstats(>common, skb);
 
if (clear) {
-   qdisc_skb_cb(skb)->post_ct = false;
+   

[ovs-dev] [PATCH net v2 0/3] net/sched: Fix ct zone matching for invalid conntrack state

2021-12-08 Thread Paul Blakey via dev
Hi,

Currently, when a packet is marked as invalid conntrack_in in act_ct,
post_ct will be set, and connection info (nf_conn) will be removed
from the skb. Later openvswitch and flower matching will parse this
as ct_state=+trk+inv. But because the connection info is missing,
there is also no zone info to match against even though the packet
is tracked.

This series fixes that, by passing the last executed zone by act_ct.
The zone info is passed along from act_ct to the ct flow dissector
(used by flower to extract zone info) and to ovs, the same way as post_ct
is passed, via qdisc layer skb cb to dissector, and via skb extension
to OVS.

Since adding any more data to qdisc skb cb, there will be no room 
for BPF skb cb to extend it and stay under skb->cb size, this series
moves the tc related info from within qdisc skb cb to a tc specific cb
that also extends it.

---
Changelog:
1->2:
  Cover letter wording
  Added blamed CCs

Paul Blakey (3):
  net/sched: Extend qdisc control block with tc control block
  net/sched: flow_dissector: Fix matching on zone id for invalid conns
  net: openvswitch: Fix matching zone id for invalid conns arriving from tc

 include/linux/skbuff.h|  4 ++--
 include/net/pkt_sched.h   | 16 
 include/net/sch_generic.h |  2 --
 net/core/dev.c|  8 
 net/core/flow_dissector.c |  6 +-
 net/openvswitch/flow.c|  8 +++-
 net/sched/act_ct.c| 15 ---
 net/sched/cls_api.c   |  7 +--
 net/sched/cls_flower.c|  6 +++---
 net/sched/sch_frag.c  |  3 ++-
 10 files changed, 52 insertions(+), 23 deletions(-)

-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net 0/3] net/sched: Fix ct zone matching for invalid conntrack state

2021-12-08 Thread Paul Blakey via dev



On Thu, 9 Dec 2021, Jakub Kicinski wrote:

> On Wed, 8 Dec 2021 19:02:37 +0200 Paul Blakey wrote:
> > Currently, when a packet is marked as invalid conntrack_in in act_ct,
> > post_ct will be set, and connection info (nf_conn) will be removed
> > from the skb. Later openvswitch and flower matching will parse this
> > as ct_state=+trk+inv. But because the connection info is missing,
> > there is also no zone info to match against even though the packet
> > is tracked.
> > 
> > This series fixes that, by passing the last executed zone by act_ct.
> > The zone info is passed along from act_ct to the ct flow dissector
> > (used by flower to extract zone info) and to ovs, the same way as post_ct
> > is passed, via qdisc layer skb cb to dissector, and via skb extension
> > to OVS.
> > 
> > Since there was no more for BPF skb cb to extend the qdisc skb cb,
> > tc info on the qdisc skb cb is moved to a tc specific cb that extend it
> > instead of within it (same as BPF).
> 
> The last paragraph is missing words.
> 
> Please repost and cast a wider net for reviewers, you must CC blamed
> authors.
> 

sure, posted v2. Thanks
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net 2/3] net/sched: flow_dissector: Fix matching on zone id for invalid conns

2021-12-08 Thread Paul Blakey via dev
If ct rejects a flow, it removes the conntrack info from the skb.
act_ct sets the post_ct variable so the dissector will see this case
as an +tracked +invalid state, but the zone id is lost with the
conntrack info.

To restore the zone id on such cases, set the last executed zone,
via the tc control block, when passing ct, and read it back in the
dissector if there is no ct info on the skb (invalid connection).

Fixes: 7baf2429a1a9 ("net/sched: cls_flower add CT_FLAGS_INVALID flag support")
Signed-off-by: Paul Blakey 
---
 include/linux/skbuff.h| 3 +--
 include/net/pkt_sched.h   | 1 +
 net/core/flow_dissector.c | 6 +-
 net/sched/act_ct.c| 1 +
 net/sched/cls_flower.c| 5 ++---
 5 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c8cb7e697d47..155eb2ec54d8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1379,8 +1379,7 @@ void
 skb_flow_dissect_ct(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
-   u16 *ctinfo_map, size_t mapsize,
-   bool post_ct);
+   u16 *ctinfo_map, size_t mapsize);
 void
 skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
 struct flow_dissector *flow_dissector,
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 05f18e81f3e8..9e71691c491b 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -198,6 +198,7 @@ struct tc_skb_cb {
 
u16 mru;
bool post_ct;
+   u16 zone; /* Only valid if post_ct = true */
 };
 
 static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 3255f57f5131..b52a4370162b 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -238,10 +239,12 @@ void
 skb_flow_dissect_ct(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container, u16 *ctinfo_map,
-   size_t mapsize, bool post_ct)
+   size_t mapsize)
 {
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
+   bool post_ct = tc_skb_cb(skb)->post_ct;
struct flow_dissector_key_ct *key;
+   u16 zone = tc_skb_cb(skb)->zone;
enum ip_conntrack_info ctinfo;
struct nf_conn_labels *cl;
struct nf_conn *ct;
@@ -260,6 +263,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
if (!ct) {
key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
TCA_FLOWER_KEY_CT_FLAGS_INVALID;
+   key->ct_zone = zone;
return;
}
 
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 98e248b9c0b1..ab3591408419 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1049,6 +1049,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct 
tc_action *a,
skb_push_rcsum(skb, nh_ofs);
 
tc_skb_cb(skb)->post_ct = true;
+   tc_skb_cb(skb)->zone = p->zone;
 out_clear:
if (defrag)
qdisc_skb_cb(skb)->pkt_len = skb->len;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9782b93db1b3..c1a017822c6e 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -310,7 +310,6 @@ static int fl_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
   struct tcf_result *res)
 {
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
-   bool post_ct = tc_skb_cb(skb)->post_ct;
struct fl_flow_key skb_key;
struct fl_flow_mask *mask;
struct cls_fl_filter *f;
@@ -327,8 +326,8 @@ static int fl_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
skb_flow_dissect_tunnel_info(skb, >dissector, _key);
skb_flow_dissect_ct(skb, >dissector, _key,
fl_ct_info_to_flower_map,
-   ARRAY_SIZE(fl_ct_info_to_flower_map),
-   post_ct);
+   ARRAY_SIZE(fl_ct_info_to_flower_map));
+
skb_flow_dissect_hash(skb, >dissector, _key);
skb_flow_dissect(skb, >dissector, _key,
 FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net 3/3] net: openvswitch: Fix matching zone id for invalid conns arriving from tc

2021-12-08 Thread Paul Blakey via dev
Zone id is not restored if we passed ct and ct rejected the connection,
as there is no ct info on the skb.

Save the zone from tc skb cb to tc skb extension and pass it on to
ovs, use that info to restore the zone id for invalid connections.

Fixes: d29334c15d33 ("net/sched: act_api: fix miss set post_ct for ovs after do 
conntrack in act_ct")
Signed-off-by: Paul Blakey 
---
 include/linux/skbuff.h | 1 +
 net/openvswitch/flow.c | 8 +++-
 net/sched/cls_api.c| 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 155eb2ec54d8..28ad0c6bd0d5 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -286,6 +286,7 @@ struct nf_bridge_info {
 struct tc_skb_ext {
__u32 chain;
__u16 mru;
+   __u16 zone;
bool post_ct;
 };
 #endif
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 9713035b89e3..6d262d9aa10e 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "conntrack.h"
 #include "datapath.h"
@@ -860,6 +861,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
 #endif
bool post_ct = false;
int res, err;
+   u16 zone = 0;
 
/* Extract metadata from packet. */
if (tun_info) {
@@ -898,6 +900,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
key->recirc_id = tc_ext ? tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
post_ct = tc_ext ? tc_ext->post_ct : false;
+   zone = post_ct ? tc_ext->zone : 0;
} else {
key->recirc_id = 0;
}
@@ -906,8 +909,11 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
 #endif
 
err = key_extract(skb, key);
-   if (!err)
+   if (!err) {
ovs_ct_fill_key(skb, key, post_ct);   /* Must be after 
key_extract(). */
+   if (post_ct && !skb_get_nfct(skb))
+   key->ct_zone = zone;
+   }
return err;
 }
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a5050999d607..bede2bd47065 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1625,6 +1625,7 @@ int tcf_classify(struct sk_buff *skb,
ext->chain = last_executed_chain;
ext->mru = cb->mru;
ext->post_ct = cb->post_ct;
+   ext->zone = cb->zone;
}
 
return ret;
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net 1/3] net/sched: Extend qdisc control block with tc control block

2021-12-08 Thread Paul Blakey via dev
BPF layer extends the qdisc control block via struct bpf_skb_data_end
and because of that there is no more room to add variables to the
qdisc layer control block without going over the skb->cb size.

Extend the qdisc control block with a tc control block,
and move all tc related variables to there as a pre-step for
extending the tc control block with additional members.

Signed-off-by: Paul Blakey 
---
 include/net/pkt_sched.h   | 15 +++
 include/net/sch_generic.h |  2 --
 net/core/dev.c|  8 
 net/sched/act_ct.c| 14 +++---
 net/sched/cls_api.c   |  6 --
 net/sched/cls_flower.c|  3 ++-
 net/sched/sch_frag.c  |  3 ++-
 7 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index bf79f3a890af..05f18e81f3e8 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -193,4 +193,19 @@ static inline void skb_txtime_consumed(struct sk_buff *skb)
skb->tstamp = ktime_set(0, 0);
 }
 
+struct tc_skb_cb {
+   struct qdisc_skb_cb qdisc_cb;
+
+   u16 mru;
+   bool post_ct;
+};
+
+static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
+{
+   struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
+
+   BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
+   return cb;
+}
+
 #endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 22179b2fda72..c70e6d2b2fdd 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -447,8 +447,6 @@ struct qdisc_skb_cb {
};
 #define QDISC_CB_PRIV_LEN 20
unsigned char   data[QDISC_CB_PRIV_LEN];
-   u16 mru;
-   boolpost_ct;
 };
 
 typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
diff --git a/net/core/dev.c b/net/core/dev.c
index 2a352e668d10..c4708e2487fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3941,8 +3941,8 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct 
net_device *dev)
return skb;
 
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
-   qdisc_skb_cb(skb)->mru = 0;
-   qdisc_skb_cb(skb)->post_ct = false;
+   tc_skb_cb(skb)->mru = 0;
+   tc_skb_cb(skb)->post_ct = false;
mini_qdisc_bstats_cpu_update(miniq, skb);
 
switch (tcf_classify(skb, miniq->block, miniq->filter_list, _res, 
false)) {
@@ -5103,8 +5103,8 @@ sch_handle_ingress(struct sk_buff *skb, struct 
packet_type **pt_prev, int *ret,
}
 
qdisc_skb_cb(skb)->pkt_len = skb->len;
-   qdisc_skb_cb(skb)->mru = 0;
-   qdisc_skb_cb(skb)->post_ct = false;
+   tc_skb_cb(skb)->mru = 0;
+   tc_skb_cb(skb)->post_ct = false;
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
 
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 90866ae45573..98e248b9c0b1 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -690,10 +690,10 @@ static int tcf_ct_handle_fragments(struct net *net, 
struct sk_buff *skb,
   u8 family, u16 zone, bool *defrag)
 {
enum ip_conntrack_info ctinfo;
-   struct qdisc_skb_cb cb;
struct nf_conn *ct;
int err = 0;
bool frag;
+   u16 mru;
 
/* Previously seen (loopback)? Ignore. */
ct = nf_ct_get(skb, );
@@ -708,7 +708,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
return err;
 
skb_get(skb);
-   cb = *qdisc_skb_cb(skb);
+   mru = tc_skb_cb(skb)->mru;
 
if (family == NFPROTO_IPV4) {
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
@@ -722,7 +722,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
 
if (!err) {
*defrag = true;
-   cb.mru = IPCB(skb)->frag_max_size;
+   mru = IPCB(skb)->frag_max_size;
}
} else { /* NFPROTO_IPV6 */
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
@@ -735,7 +735,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
 
if (!err) {
*defrag = true;
-   cb.mru = IP6CB(skb)->frag_max_size;
+   mru = IP6CB(skb)->frag_max_size;
}
 #else
err = -EOPNOTSUPP;
@@ -744,7 +744,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
}
 
if (err != -EINPROGRESS)
-   *qdisc_skb_cb(skb) = cb;
+   tc_skb_cb(skb)->mru = mru;
skb_clear_hash(skb);
skb->ignore_df = 1;
return err;
@@ -963,7 +963,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct 
tc_action *a,
tcf_action_update_bstats(>common, skb);
 
if (clear) {
-   qdisc_skb_cb(skb)->post_ct = false;
+   

[ovs-dev] [PATCH net 0/3] net/sched: Fix ct zone matching for invalid conntrack state

2021-12-08 Thread Paul Blakey via dev
Hi,

Currently, when a packet is marked as invalid conntrack_in in act_ct,
post_ct will be set, and connection info (nf_conn) will be removed
from the skb. Later openvswitch and flower matching will parse this
as ct_state=+trk+inv. But because the connection info is missing,
there is also no zone info to match against even though the packet
is tracked.

This series fixes that, by passing the last executed zone by act_ct.
The zone info is passed along from act_ct to the ct flow dissector
(used by flower to extract zone info) and to ovs, the same way as post_ct
is passed, via qdisc layer skb cb to dissector, and via skb extension
to OVS.

Since there was no more for BPF skb cb to extend the qdisc skb cb,
tc info on the qdisc skb cb is moved to a tc specific cb that extend it
instead of within it (same as BPF).

Paul Blakey (3):
  net/sched: Extend qdisc control block with tc control block
  net/sched: flow_dissector: Fix matching on zone id for invalid conns
  net: openvswitch: Fix matching zone id for invalid conns arriving from tc

 include/linux/skbuff.h|  4 ++--
 include/net/pkt_sched.h   | 16 
 include/net/sch_generic.h |  2 --
 net/core/dev.c|  8 
 net/core/flow_dissector.c |  6 +-
 net/openvswitch/flow.c|  8 +++-
 net/sched/act_ct.c| 15 ---
 net/sched/cls_api.c   |  7 +--
 net/sched/cls_flower.c|  6 +++---
 net/sched/sch_frag.c  |  3 ++-
 10 files changed, 52 insertions(+), 23 deletions(-)

-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/2] tc: Fix stats byte count on fragmented packets

2021-12-01 Thread Paul Blakey via dev



On Mon, 29 Nov 2021, Ilya Maximets wrote:

> On 11/7/21 13:12, Roi Dayan via dev wrote:
> > From: Paul Blakey 
> > 
> > Fragmented packets with offset=0 are defragmented by tc act_ct, and
> > only when assembled pass to next action, in ovs offload case,
> > a goto action. Since stats are overwritten on each action dump,
> > only the stats for last action in the tc filter action priority
> > list is taken, the stats on the goto action, which count
> > only the assembled packets. See below for example.
> > 
> > Hardware updates just part of the actions (gact, ct, mirred) - those
> > that support stats_update() operation. Since datapath rules end
> > with either an output (mirred) or recirc/drop (both gact), tc rule
> > will at least have one action that supports it. For software packets,
> > the first action will have the max software packets count.
> > Tc dumps total packets (hw + sw) and hardware packets, then
> > software packets needs to be calculated from this (total - hw).
> > 
> > To fix the above, get hardware packets and calculate software packets
> > for each action, take the max of each set, then combine back
> > to get the total packets that went through software and hardware.
> > 
> > Example by running ping above MTU (ping  -s 2000):
> > ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=first),
> >   packets:14, bytes:19544,..., actions:ct(zone=1),recirc(0x1)
> > ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=later),
> >   packets:14, bytes:28392,..., actions:ct(zone=1),recirc(0x1)
> > 
> > Second rule should have had bytes=14*, but instead
> > it's bytes=14* > frags>.
> > 
> > Fixes: 576126a931cd ("netdev-offload-tc: Add conntrack support")
> > Signed-off-by: Paul Blakey 
> > Reviewed-by: Roi Dayan 
> > ---
> >  lib/netdev-offload-tc.c | 10 +-
> >  lib/tc.c| 34 --
> >  lib/tc.h|  3 ++-
> >  3 files changed, 35 insertions(+), 12 deletions(-)
> > 
> > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> > index 9845e8d3feae..3f7068c8e066 100644
> > --- a/lib/netdev-offload-tc.c
> > +++ b/lib/netdev-offload-tc.c
> > @@ -585,8 +585,10 @@ parse_tc_flower_to_stats(struct tc_flower *flower,
> >  }
> >  
> >  memset(stats, 0, sizeof *stats);
> > -stats->n_packets = get_32aligned_u64(>stats.n_packets);
> > -stats->n_bytes = get_32aligned_u64(>stats.n_bytes);
> > +stats->n_packets = get_32aligned_u64(>stats_sw.n_packets);
> > +stats->n_packets += get_32aligned_u64(>stats_hw.n_packets);
> > +stats->n_bytes = get_32aligned_u64(>stats_sw.n_bytes);
> > +stats->n_bytes += get_32aligned_u64(>stats_hw.n_bytes);
> >  stats->used = flower->lastused;
> >  }
> >  
> > @@ -2015,9 +2017,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
> >  if (stats) {
> >  memset(stats, 0, sizeof *stats);
> >  if (!tc_get_flower(, )) {
> > -stats->n_packets = get_32aligned_u64(_packets);
> > -stats->n_bytes = get_32aligned_u64(_bytes);
> > -stats->used = flower.lastused;
> > +parse_tc_flower_to_stats(, stats);
> >  }
> >  }
> >  
> > diff --git a/lib/tc.c b/lib/tc.c
> > index 38a1dfc0ebc8..961aef619815 100644
> > --- a/lib/tc.c
> > +++ b/lib/tc.c
> > @@ -1702,6 +1702,9 @@ static const struct nl_policy stats_policy[] = {
> >  [TCA_STATS_BASIC] = { .type = NL_A_UNSPEC,
> >.min_len = sizeof(struct gnet_stats_basic),
> >.optional = false, },
> > +[TCA_STATS_BASIC_HW] = { .type = NL_A_UNSPEC,
> > + .min_len = sizeof(struct gnet_stats_basic),
> > + .optional = true, },
> >  };
> >  
> >  static int
> > @@ -1714,8 +1717,11 @@ nl_parse_single_action(struct nlattr *action, struct 
> > tc_flower *flower,
> >  const char *act_kind;
> >  struct nlattr *action_attrs[ARRAY_SIZE(act_policy)];
> >  struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)];
> > -struct ovs_flow_stats *stats = >stats;
> > -const struct gnet_stats_basic *bs;
> > +struct ovs_flow_stats *stats_sw = >stats_sw;
> > +struct ovs_flow_stats *stats_hw = >stats_hw;
> > +const struct gnet_stats_basic *bs_all = NULL;
> > +const struct gnet_stats_basic *bs_hw = NULL;
> > +struct gnet_stats_basic bs_sw = { .packets = 0, .bytes = 0, };
> >  int err = 0;
> >  
> >  if (!nl_parse_nested(action, act_policy, action_attrs,
> > @@ -1771,10 +1777,26 @@ nl_parse_single_action(struct nlattr *action, 
> > struct tc_flower *flower,
> >  return EPROTO;
> >  }
> >  
> > -bs = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs);
> > -if (bs->packets) {
> > -put_32aligned_u64(>n_packets, bs->packets);
> > -put_32aligned_u64(>n_bytes, bs->bytes);
> > +bs_all = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof 
> > *bs_all);
> > +if (stats_attrs[TCA_STATS_BASIC_HW]) {

Re: [ovs-dev] [PATCH 2/2] tc: Fix stats byte count on fragmented packets

2021-12-01 Thread Paul Blakey via dev



On Mon, 29 Nov 2021, Ilya Maximets wrote:

> On 11/7/21 13:12, Roi Dayan via dev wrote:
> > From: Paul Blakey 
> > 
> > Fragmented packets with offset=0 are defragmented by tc act_ct, and
> > only when assembled pass to next action, in ovs offload case,
> > a goto action. Since stats are overwritten on each action dump,
> > only the stats for last action in the tc filter action priority
> > list is taken, the stats on the goto action, which count
> > only the assembled packets. See below for example.
> > 
> > Hardware updates just part of the actions (gact, ct, mirred) - those
> > that support stats_update() operation. Since datapath rules end
> > with either an output (mirred) or recirc/drop (both gact), tc rule
> > will at least have one action that supports it. For software packets,
> > the first action will have the max software packets count.
> > Tc dumps total packets (hw + sw) and hardware packets, then
> > software packets needs to be calculated from this (total - hw).
> > 
> > To fix the above, get hardware packets and calculate software packets
> > for each action, take the max of each set, then combine back
> > to get the total packets that went through software and hardware.
> > 
> > Example by running ping above MTU (ping  -s 2000):
> > ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=first),
> >   packets:14, bytes:19544,..., actions:ct(zone=1),recirc(0x1)
> > ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=later),
> >   packets:14, bytes:28392,..., actions:ct(zone=1),recirc(0x1)
> > 
> > Second rule should have had bytes=14*, but instead
> > it's bytes=14* > frags>.
> > 
> > Fixes: 576126a931cd ("netdev-offload-tc: Add conntrack support")
> > Signed-off-by: Paul Blakey 
> > Reviewed-by: Roi Dayan 
> > ---
> >  lib/netdev-offload-tc.c | 10 +-
> >  lib/tc.c| 34 --
> >  lib/tc.h|  3 ++-
> >  3 files changed, 35 insertions(+), 12 deletions(-)
> > 
> > diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
> > index 9845e8d3feae..3f7068c8e066 100644
> > --- a/lib/netdev-offload-tc.c
> > +++ b/lib/netdev-offload-tc.c
> > @@ -585,8 +585,10 @@ parse_tc_flower_to_stats(struct tc_flower *flower,
> >  }
> >  
> >  memset(stats, 0, sizeof *stats);
> > -stats->n_packets = get_32aligned_u64(>stats.n_packets);
> > -stats->n_bytes = get_32aligned_u64(>stats.n_bytes);
> > +stats->n_packets = get_32aligned_u64(>stats_sw.n_packets);
> > +stats->n_packets += get_32aligned_u64(>stats_hw.n_packets);
> > +stats->n_bytes = get_32aligned_u64(>stats_sw.n_bytes);
> > +stats->n_bytes += get_32aligned_u64(>stats_hw.n_bytes);
> >  stats->used = flower->lastused;
> >  }
> >  
> > @@ -2015,9 +2017,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
> >  if (stats) {
> >  memset(stats, 0, sizeof *stats);
> >  if (!tc_get_flower(, )) {
> > -stats->n_packets = get_32aligned_u64(_packets);
> > -stats->n_bytes = get_32aligned_u64(_bytes);
> > -stats->used = flower.lastused;
> > +parse_tc_flower_to_stats(, stats);
> >  }
> >  }
> >  
> > diff --git a/lib/tc.c b/lib/tc.c
> > index 38a1dfc0ebc8..961aef619815 100644
> > --- a/lib/tc.c
> > +++ b/lib/tc.c
> > @@ -1702,6 +1702,9 @@ static const struct nl_policy stats_policy[] = {
> >  [TCA_STATS_BASIC] = { .type = NL_A_UNSPEC,
> >.min_len = sizeof(struct gnet_stats_basic),
> >.optional = false, },
> > +[TCA_STATS_BASIC_HW] = { .type = NL_A_UNSPEC,
> > + .min_len = sizeof(struct gnet_stats_basic),
> > + .optional = true, },
> >  };
> >  
> >  static int
> > @@ -1714,8 +1717,11 @@ nl_parse_single_action(struct nlattr *action, struct 
> > tc_flower *flower,
> >  const char *act_kind;
> >  struct nlattr *action_attrs[ARRAY_SIZE(act_policy)];
> >  struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)];
> > -struct ovs_flow_stats *stats = >stats;
> > -const struct gnet_stats_basic *bs;
> > +struct ovs_flow_stats *stats_sw = >stats_sw;
> > +struct ovs_flow_stats *stats_hw = >stats_hw;
> > +const struct gnet_stats_basic *bs_all = NULL;
> > +const struct gnet_stats_basic *bs_hw = NULL;
> > +struct gnet_stats_basic bs_sw = { .packets = 0, .bytes = 0, };
> >  int err = 0;
> >  
> >  if (!nl_parse_nested(action, act_policy, action_attrs,
> > @@ -1771,10 +1777,26 @@ nl_parse_single_action(struct nlattr *action, 
> > struct tc_flower *flower,
> >  return EPROTO;
> >  }
> >  
> > -bs = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs);
> > -if (bs->packets) {
> > -put_32aligned_u64(>n_packets, bs->packets);
> > -put_32aligned_u64(>n_bytes, bs->bytes);
> > +bs_all = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof 
> > *bs_all);
> > +if (stats_attrs[TCA_STATS_BASIC_HW]) {

[ovs-dev] [PATCH 2/2] tc: Fix stats byte count on fragmented packets

2021-11-07 Thread Paul Blakey via dev
Fragmented packets with offset=0 are defragmented by tc act_ct, and
only when assembled pass to next action, in ovs offload case,
a goto action. Since stats are overwritten on each action dump,
only the stats for last action in the tc filter action priority
list is taken, the stats on the goto action, which count
only the assembled packets. See below for example.

Hardware updates just part of the actions (gact, ct, mirred) - those
that support stats_update() operation. Since datapath rules end
with either an output (mirred) or recirc/drop (both gact), tc rule
will at least have one action that supports it. For software packets,
the first action will have the max software packets count.
Tc dumps total packets (hw + sw) and hardware packets, then
software packets needs to be calculated from this (total - hw).

To fix the above, get hardware packets and calculate software packets
for each action, take the max of each set, then combine back
to get the total packets that went through software and hardware.

Example by running ping above MTU (ping  -s 2000):
ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=first),
  packets:14, bytes:19544,..., actions:ct(zone=1),recirc(0x1)
ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=later),
  packets:14, bytes:28392,..., actions:ct(zone=1),recirc(0x1)

Second rule should have had bytes=14*, but instead
it's bytes=14*.

Fixes: 576126a931cd ("netdev-offload-tc: Add conntrack support")
Signed-off-by: Paul Blakey 
---
 lib/netdev-offload-tc.c | 10 +-
 lib/tc.c| 34 --
 lib/tc.h|  3 ++-
 3 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/lib/netdev-offload-tc.c b/lib/netdev-offload-tc.c
index 9845e8d3f..3f7068c8e 100644
--- a/lib/netdev-offload-tc.c
+++ b/lib/netdev-offload-tc.c
@@ -585,8 +585,10 @@ parse_tc_flower_to_stats(struct tc_flower *flower,
 }
 
 memset(stats, 0, sizeof *stats);
-stats->n_packets = get_32aligned_u64(>stats.n_packets);
-stats->n_bytes = get_32aligned_u64(>stats.n_bytes);
+stats->n_packets = get_32aligned_u64(>stats_sw.n_packets);
+stats->n_packets += get_32aligned_u64(>stats_hw.n_packets);
+stats->n_bytes = get_32aligned_u64(>stats_sw.n_bytes);
+stats->n_bytes += get_32aligned_u64(>stats_hw.n_bytes);
 stats->used = flower->lastused;
 }
 
@@ -2015,9 +2017,7 @@ netdev_tc_flow_del(struct netdev *netdev OVS_UNUSED,
 if (stats) {
 memset(stats, 0, sizeof *stats);
 if (!tc_get_flower(, )) {
-stats->n_packets = get_32aligned_u64(_packets);
-stats->n_bytes = get_32aligned_u64(_bytes);
-stats->used = flower.lastused;
+parse_tc_flower_to_stats(, stats);
 }
 }
 
diff --git a/lib/tc.c b/lib/tc.c
index 38a1dfc0e..961aef619 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -1702,6 +1702,9 @@ static const struct nl_policy stats_policy[] = {
 [TCA_STATS_BASIC] = { .type = NL_A_UNSPEC,
   .min_len = sizeof(struct gnet_stats_basic),
   .optional = false, },
+[TCA_STATS_BASIC_HW] = { .type = NL_A_UNSPEC,
+ .min_len = sizeof(struct gnet_stats_basic),
+ .optional = true, },
 };
 
 static int
@@ -1714,8 +1717,11 @@ nl_parse_single_action(struct nlattr *action, struct 
tc_flower *flower,
 const char *act_kind;
 struct nlattr *action_attrs[ARRAY_SIZE(act_policy)];
 struct nlattr *stats_attrs[ARRAY_SIZE(stats_policy)];
-struct ovs_flow_stats *stats = >stats;
-const struct gnet_stats_basic *bs;
+struct ovs_flow_stats *stats_sw = >stats_sw;
+struct ovs_flow_stats *stats_hw = >stats_hw;
+const struct gnet_stats_basic *bs_all = NULL;
+const struct gnet_stats_basic *bs_hw = NULL;
+struct gnet_stats_basic bs_sw = { .packets = 0, .bytes = 0, };
 int err = 0;
 
 if (!nl_parse_nested(action, act_policy, action_attrs,
@@ -1771,10 +1777,26 @@ nl_parse_single_action(struct nlattr *action, struct 
tc_flower *flower,
 return EPROTO;
 }
 
-bs = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs);
-if (bs->packets) {
-put_32aligned_u64(>n_packets, bs->packets);
-put_32aligned_u64(>n_bytes, bs->bytes);
+bs_all = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC], sizeof *bs_all);
+if (stats_attrs[TCA_STATS_BASIC_HW]) {
+bs_hw = nl_attr_get_unspec(stats_attrs[TCA_STATS_BASIC_HW],
+   sizeof *bs_hw);
+
+bs_sw.packets = bs_all->packets - bs_hw->packets;
+bs_sw.bytes = bs_all->bytes - bs_hw->bytes;
+} else {
+bs_sw.packets = bs_all->packets;
+bs_sw.bytes = bs_all->bytes;
+}
+
+if (bs_sw.packets > get_32aligned_u64(_sw->n_packets)) {
+put_32aligned_u64(_sw->n_packets, bs_sw.packets);
+put_32aligned_u64(_sw->n_bytes, bs_sw.bytes);
+}
+
+if (bs_hw && bs_hw->packets > 

[ovs-dev] [PATCH 0/2] tc: Fix stats byte count on fragmented packets

2021-11-07 Thread Paul Blakey via dev
Series fixes the stats count on "later" fragmented packets (frag=later
rule).

Example by running ping above MTU (ping  -s 2000):
ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=first),
  packets:14, bytes:19544,..., actions:ct(zone=1),recirc(0x1)
ct_state(-trk),recirc_id(0),...,ipv4(proto=1,frag=later),
  packets:14, bytes:28392,..., actions:ct(zone=1),recirc(0x1)

Second rule should have had bytes=14*, but instead
it's bytes=14*.

Paul Blakey (2):
  compat: Add gen_stats include to define tc hw stats
  tc: Fix stats byte count on fragmented packets

 acinclude.m4  |  7 
 include/linux/automake.mk |  1 +
 include/linux/gen_stats.h | 81 +++
 lib/netdev-offload-tc.c   | 10 ++---
 lib/tc.c  | 34 +---
 lib/tc.h  |  3 +-
 6 files changed, 124 insertions(+), 12 deletions(-)
 create mode 100644 include/linux/gen_stats.h

-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/2] compat: Add gen_stats include to define tc hw stats

2021-11-07 Thread Paul Blakey via dev
Update kernel UAPI to support dumping hardware stats
of tc filters.

Signed-off-by: Paul Blakey 
---
 acinclude.m4  |  7 
 include/linux/automake.mk |  1 +
 include/linux/gen_stats.h | 81 +++
 3 files changed, 89 insertions(+)
 create mode 100644 include/linux/gen_stats.h

diff --git a/acinclude.m4 b/acinclude.m4
index 8ab690f47..c93ac0f4a 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -305,6 +305,13 @@ AC_DEFUN([OVS_CHECK_LINUX_TC], [
 ])],
 [AC_DEFINE([HAVE_TCA_SKBEDIT_FLAGS], [1],
[Define to 1 if TCA_SKBEDIT_FLAGS is available.])])
+
+  AC_COMPILE_IFELSE([
+AC_LANG_PROGRAM([#include ], [
+int x = TCA_STATS_PKT64;
+])],
+[AC_DEFINE([HAVE_TCA_STATS_PKT64], [1],
+   [Define to 1 if TCA_STATS_PKT64 is available.])])
 ])
 
 dnl OVS_CHECK_LINUX_SCTP_CT
diff --git a/include/linux/automake.mk b/include/linux/automake.mk
index 8f063f482..f857c7e08 100644
--- a/include/linux/automake.mk
+++ b/include/linux/automake.mk
@@ -2,6 +2,7 @@ noinst_HEADERS += \
include/linux/netlink.h \
include/linux/netfilter/nf_conntrack_sctp.h \
include/linux/pkt_cls.h \
+   include/linux/gen_stats.h \
include/linux/tc_act/tc_mpls.h \
include/linux/tc_act/tc_pedit.h \
include/linux/tc_act/tc_skbedit.h \
diff --git a/include/linux/gen_stats.h b/include/linux/gen_stats.h
new file mode 100644
index 0..6fae6f727
--- /dev/null
+++ b/include/linux/gen_stats.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef __LINUX_GEN_STATS_WRAPPER_H
+#define __LINUX_GEN_STATS_WRAPPER_H 1
+
+#if defined(__KERNEL__) || defined(HAVE_TCA_STATS_PKT64)
+#include_next 
+#else
+#include 
+
+enum {
+   TCA_STATS_UNSPEC,
+   TCA_STATS_BASIC,
+   TCA_STATS_RATE_EST,
+   TCA_STATS_QUEUE,
+   TCA_STATS_APP,
+   TCA_STATS_RATE_EST64,
+   TCA_STATS_PAD,
+   TCA_STATS_BASIC_HW,
+   TCA_STATS_PKT64,
+   __TCA_STATS_MAX,
+};
+#define TCA_STATS_MAX (__TCA_STATS_MAX - 1)
+
+/**
+ * struct gnet_stats_basic - byte/packet throughput statistics
+ * @bytes: number of seen bytes
+ * @packets: number of seen packets
+ */
+struct gnet_stats_basic {
+   __u64   bytes;
+   __u32   packets;
+};
+
+/**
+ * struct gnet_stats_rate_est - rate estimator
+ * @bps: current byte rate
+ * @pps: current packet rate
+ */
+struct gnet_stats_rate_est {
+   __u32   bps;
+   __u32   pps;
+};
+
+/**
+ * struct gnet_stats_rate_est64 - rate estimator
+ * @bps: current byte rate
+ * @pps: current packet rate
+ */
+struct gnet_stats_rate_est64 {
+   __u64   bps;
+   __u64   pps;
+};
+
+/**
+ * struct gnet_stats_queue - queuing statistics
+ * @qlen: queue length
+ * @backlog: backlog size of queue
+ * @drops: number of dropped packets
+ * @requeues: number of requeues
+ * @overlimits: number of enqueues over the limit
+ */
+struct gnet_stats_queue {
+   __u32   qlen;
+   __u32   backlog;
+   __u32   drops;
+   __u32   requeues;
+   __u32   overlimits;
+};
+
+/**
+ * struct gnet_estimator - rate estimator configuration
+ * @interval: sampling period
+ * @ewma_log: the log of measurement window weight
+ */
+struct gnet_estimator {
+   signed char interval;
+   unsigned char   ewma_log;
+};
+
+#endif /* __KERNEL__ || !HAVE_TCA_STATS_PKT64 */
+#endif /* __LINUX_GEN_STATS_WRAPPER_H */
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev