Re: [PATCH RFC,WIP 3/5] netfilter: nf_flow_offload: integration with conntrack

2017-11-03 Thread Florian Westphal
Pablo Neira Ayuso  wrote:
> This patch adds the IPS_OFFLOAD status bit, this new bit tells us that
> the conntrack entry is owned by the flow offload infrastructure. The
> timer of such conntrack entries is stopped - the conntrack garbage
> collector skips them - and they display no internal state in the case of
> TCP flows.
>
> Conntrack entries that have been offloaded to the flow table
> infrastructure cannot be deleted/flushed via ctnetlink. The flow table
> infrastructure is also responsible for releasing this conntrack entry.
> 
> Signed-off-by: Pablo Neira Ayuso 
> ---
> Instead of nf_flow_release_ct(), I'd rather keep a pointer reference to
> the conntrack object from the flow_offload entry, so we can skip the
> conntrack look up.

I agree, this would make sense.

> diff --git a/include/net/netfilter/nf_conntrack.h 
> b/include/net/netfilter/nf_conntrack.h
> index 8f3bd30511de..9af4bb0c2f46 100644
> --- a/include/net/netfilter/nf_conntrack.h
> +++ b/include/net/netfilter/nf_conntrack.h
> @@ -272,7 +272,8 @@ static inline unsigned long nf_ct_expires(const struct 
> nf_conn *ct)
>  
>  static inline bool nf_ct_is_expired(const struct nf_conn *ct)
>  {
> - return (__s32)(ct->timeout - nfct_time_stamp) <= 0;
> + return (__s32)(ct->timeout - nfct_time_stamp) <= 0 &&
> +!test_bit(IPS_OFFLOAD_BIT, >status);

An alternative would be to not touch nf_ct_is_expired() and instead ...
>  }
>  
> @@ -1011,12 +1014,14 @@ static void gc_worker(struct work_struct *work)
>   tmp = nf_ct_tuplehash_to_ctrack(h);
>  
>   scanned++;
> + if (test_bit(IPS_OFFLOAD_BIT, >status))
> + continue;
 
... advance/refresh ct->timeout from gc worker, i.e.

 if (test_bit(IPS_OFFLOAD_BIT, >status)) {
 ct->timeout = nfct_time_stamp + (1 DAY);
 continue;
 }

Would prevent normal path to ever see offloaded entry
as 'timed out', without having to check for the flag in lookup path
(OTOH the check should not be an issue either because lookup path
 has to access ct->status anyway).



[PATCH RFC,WIP 3/5] netfilter: nf_flow_offload: integration with conntrack

2017-11-03 Thread Pablo Neira Ayuso
This patch adds the IPS_OFFLOAD status bit, this new bit tells us that
the conntrack entry is owned by the flow offload infrastructure. The
timer of such conntrack entries is stopped - the conntrack garbage
collector skips them - and they display no internal state in the case of
TCP flows.

 # cat /proc/net/nf_conntrack
 ipv4 2 tcp  6 src=10.141.10.2 dst=147.75.205.195 sport=36392 dport=443 
src=147.75.205.195 dst=192.168.2.195 sport=443 dport=36392 [OFFLOAD] mark=0 
zone=0 use=2

Note the [OFFLOAD] tag in the listing.

Conntrack entries that have been offloaded to the flow table
infrastructure cannot be deleted/flushed via ctnetlink. The flow table
infrastructure is also responsible for releasing this conntrack entry.

Signed-off-by: Pablo Neira Ayuso 
---
Instead of nf_flow_release_ct(), I'd rather keep a pointer reference to
the conntrack object from the flow_offload entry, so we can skip the
conntrack look up.

 include/net/netfilter/nf_conntrack.h   |  3 +-
 include/uapi/linux/netfilter/nf_conntrack_common.h |  4 +++
 net/netfilter/nf_conntrack_core.c  |  7 -
 net/netfilter/nf_conntrack_netlink.c   | 15 -
 net/netfilter/nf_conntrack_proto_tcp.c |  3 ++
 net/netfilter/nf_conntrack_standalone.c| 12 +---
 net/netfilter/nf_flow_offload.c| 36 --
 7 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack.h 
b/include/net/netfilter/nf_conntrack.h
index 8f3bd30511de..9af4bb0c2f46 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -272,7 +272,8 @@ static inline unsigned long nf_ct_expires(const struct 
nf_conn *ct)
 
 static inline bool nf_ct_is_expired(const struct nf_conn *ct)
 {
-   return (__s32)(ct->timeout - nfct_time_stamp) <= 0;
+   return (__s32)(ct->timeout - nfct_time_stamp) <= 0 &&
+  !test_bit(IPS_OFFLOAD_BIT, >status);
 }
 
 /* use after obtaining a reference count */
diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h 
b/include/uapi/linux/netfilter/nf_conntrack_common.h
index dc947e59d03a..6b463b88182d 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -100,6 +100,10 @@ enum ip_conntrack_status {
IPS_HELPER_BIT = 13,
IPS_HELPER = (1 << IPS_HELPER_BIT),
 
+   /* Conntrack has been offloaded to flow table. */
+   IPS_OFFLOAD_BIT = 14,
+   IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
+
/* Be careful here, modifying these bits can make things messy,
 * so don't let users modify them directly.
 */
diff --git a/net/netfilter/nf_conntrack_core.c 
b/net/netfilter/nf_conntrack_core.c
index 01130392b7c0..48f36c4fb756 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -901,6 +901,9 @@ static unsigned int early_drop_list(struct net *net,
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
 
+   if (test_bit(IPS_OFFLOAD_BIT, >status))
+   continue;
+
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
continue;
@@ -1011,12 +1014,14 @@ static void gc_worker(struct work_struct *work)
tmp = nf_ct_tuplehash_to_ctrack(h);
 
scanned++;
+   if (test_bit(IPS_OFFLOAD_BIT, >status))
+   continue;
+
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
expired_count++;
continue;
}
-
if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
continue;
 
diff --git a/net/netfilter/nf_conntrack_netlink.c 
b/net/netfilter/nf_conntrack_netlink.c
index de4053d84364..79a74aec7c1e 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1105,6 +1105,14 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] 
= {
.len = NF_CT_LABELS_MAX_SIZE },
 };
 
+static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
+{
+   if (test_bit(IPS_OFFLOAD_BIT, >status))
+   return 0;
+
+   return ctnetlink_filter_match(ct, data);
+}
+
 static int ctnetlink_flush_conntrack(struct net *net,
 const struct nlattr * const cda[],
 u32 portid, int report)
@@ -1117,7 +1125,7 @@ static int ctnetlink_flush_conntrack(struct net *net,
return PTR_ERR(filter);
}
 
-   nf_ct_iterate_cleanup_net(net, ctnetlink_filter_match, filter,
+   nf_ct_iterate_cleanup_net(net,