Re: [PATCH RFC,WIP 2/5] netfilter: add software flow offload infrastructure

2017-11-03 Thread Florian Westphal
Pablo Neira Ayuso  wrote:
> +static int __init nf_flow_offload_module_init(void)
> +{
> + struct rhashtable_params params = flow_offload_rhash_params;
> + struct nf_hook_ops flow_offload_hook = {
> + .hook   = nf_flow_offload_hook,
> + .pf = NFPROTO_NETDEV,
> + .hooknum= NF_NETDEV_INGRESS,
> + .priority   = -100,

Magic number.  Should this be documented in nft?

Alternatively we could reject NETDEV_INGRESS base chains from
userspace if prio < 0 to prevent userspace rules from messing
with this flow offlaod infrastructure.

I guess the rationale of using auto-builtin hook is to avoid
forcing users to configure this with nftables rules?

> + rtnl_lock();
> + for_each_netdev(_net, dev) {
> + entry = kmalloc(sizeof(*entry), GFP_KERNEL);
> + if (!entry) {
> + rtnl_unlock();
> + return -ENOMEM;

This would need error unwinding (Unregistering the already-registered
hooks).

> + err = nf_register_net_hook(_net, >ops);
> + if (err < 0)
> + return err;

And here as well.


[PATCH RFC,WIP 2/5] netfilter: add software flow offload infrastructure

2017-11-03 Thread Pablo Neira Ayuso
This patch adds the generic software flow offload infrastructure. This
allows users to configure fast path for established flows that will not
follow the classic forwarding path.

This adds a new hook at netfilter ingress for each existing interface.
For each packet that hits the hook, we look up for an existing flow in
the table, if there is a hit, the packet is forwarded by using the
gateway and interfaces that are cached in the flow table entry.

This comes with a kernel thread to release flow table entries if no
packets are seen after a little while, so the flow table entry is
released.

Signed-off-by: Pablo Neira Ayuso 
---
 include/net/flow_offload.h  |  67 +++
 net/netfilter/Kconfig   |   7 +
 net/netfilter/Makefile  |   3 +
 net/netfilter/nf_flow_offload.c | 386 
 4 files changed, 463 insertions(+)
 create mode 100644 include/net/flow_offload.h
 create mode 100644 net/netfilter/nf_flow_offload.c

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
new file mode 100644
index ..30bfca7ed3f1
--- /dev/null
+++ b/include/net/flow_offload.h
@@ -0,0 +1,67 @@
+#ifndef _FLOW_OFFLOAD_H
+#define _FLOW_OFFLOAD_H
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+enum flow_offload_tuple_dir {
+   FLOW_OFFLOAD_DIR_ORIGINAL,
+   FLOW_OFFLOAD_DIR_REPLY,
+   __FLOW_OFFLOAD_DIR_MAX  = FLOW_OFFLOAD_DIR_REPLY,
+};
+#define FLOW_OFFLOAD_DIR_MAX   (__FLOW_OFFLOAD_DIR_MAX + 1)
+
+struct flow_offload_tuple {
+   union {
+   struct in_addr  src_v4;
+   struct in6_addr src_v6;
+   };
+   union {
+   struct in_addr  dst_v4;
+   struct in6_addr dst_v6;
+   };
+   struct {
+   __be16  src_port;
+   __be16  dst_port;
+   };
+
+   u8  l3proto;
+   u8  l4proto;
+   u8  dir;
+
+   int iifidx;
+   int oifidx;
+
+   union {
+   __be32  gateway;
+   struct in6_addr gateway6;
+   };
+};
+
+struct flow_offload_tuple_rhash {
+   struct rhash_head   node;
+   struct flow_offload_tuple   tuple;
+};
+
+#defineFLOW_OFFLOAD_SNAT   0x1
+#defineFLOW_OFFLOAD_DNAT   0x2
+#defineFLOW_OFFLOAD_HW 0x4
+
+struct flow_offload {
+   struct flow_offload_tuple_rhash tuplehash[FLOW_OFFLOAD_DIR_MAX];
+   u32 flags;
+   union {
+   /* Your private driver data here. */
+   u32 timeout;
+   };
+   struct rcu_head rcu_head;
+};
+
+int flow_offload_add(struct flow_offload *flow);
+void flow_offload_del(struct flow_offload *flow);
+struct flow_offload_tuple_rhash *flow_offload_lookup(struct flow_offload_tuple 
*tuple);
+
+#endif /* _FLOW_OFFLOAD_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index e4a13cc8a2e7..f022ca91f49d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -436,6 +436,13 @@ config NETFILTER_SYNPROXY
 
 endif # NF_CONNTRACK
 
+config NF_FLOW_OFFLOAD
+   tristate "Netfilter Generic Flow Offload (GFO) module"
+   help
+ This option adds the flow table core infrastructure.
+
+ To compile it as a module, choose M here.
+
 config NF_TABLES
select NETFILTER_NETLINK
tristate "Netfilter nf_tables support"
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index d3891c93edd6..518f54113e06 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -69,6 +69,9 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
 # generic packet duplication from netdev family
 obj-$(CONFIG_NF_DUP_NETDEV)+= nf_dup_netdev.o
 
+# generic flow table
+obj-$(CONFIG_NF_FLOW_OFFLOAD)+= nf_flow_offload.o
+
 # nf_tables
 nf_tables-objs := nf_tables_core.o nf_tables_api.o nf_tables_trace.o \
  nft_immediate.o nft_cmp.o nft_range.o nft_bitwise.o \
diff --git a/net/netfilter/nf_flow_offload.c b/net/netfilter/nf_flow_offload.c
new file mode 100644
index ..c967b29d11a6
--- /dev/null
+++ b/net/netfilter/nf_flow_offload.c
@@ -0,0 +1,386 @@
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+/* For layer 4 checksum field offset. */
+#include 
+#include 
+#include 
+
+static struct rhashtable flow_table;
+
+static u32 flow_offload_hash(const void *data, u32 len, u32 seed)
+{
+   const struct flow_offload_tuple *tuple = data;
+
+   return jhash(tuple, offsetof(struct flow_offload_tuple, l4proto), seed);
+}
+
+static u32 flow_offload_hash_obj(const void *data, u32 len, u32 seed)
+{
+   const struct