This adds a nftables frontend for the IMR->BPF translator.

This doesn't work via UMH yet.

AFAIU it should be possible to get transparent ebpf translation for
nftables, similar to the bpfilter/iptables UMH.

However, at this time I think its better to get IMR "right".

nftjit.ko currently needs libnftnl/libmnl but thats convenince on
my end and not a "must have".

Signed-off-by: Florian Westphal <f...@strlen.de>
---
 net/bpfilter/Makefile   |   7 +-
 net/bpfilter/nftables.c | 679 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 685 insertions(+), 1 deletion(-)
 create mode 100644 net/bpfilter/nftables.c

diff --git a/net/bpfilter/Makefile b/net/bpfilter/Makefile
index 5a85ef7d7a4d..a4064986dc2f 100644
--- a/net/bpfilter/Makefile
+++ b/net/bpfilter/Makefile
@@ -3,7 +3,12 @@
 # Makefile for the Linux BPFILTER layer.
 #
 
-hostprogs-y := bpfilter.ko
+hostprogs-y := nftjit.ko bpfilter.ko
 always := $(hostprogs-y)
 bpfilter.ko-objs := bpfilter.o tgts.o targets.o tables.o init.o ctor.o 
sockopt.o gen.o
+
+NFT_LIBS = -lnftnl
+nftjit.ko-objs := tgts.o targets.o tables.o init.o ctor.o gen.o nftables.o 
imr.o
+HOSTLOADLIBES_nftjit.ko = `pkg-config --libs libnftnl libmnl`
+
 HOSTCFLAGS += -I. -Itools/include/
diff --git a/net/bpfilter/nftables.c b/net/bpfilter/nftables.c
new file mode 100644
index 000000000000..5a756ccd03a1
--- /dev/null
+++ b/net/bpfilter/nftables.c
@@ -0,0 +1,679 @@
+/*
+ * based on previous code from:
+ *
+ * Copyright (c) 2013 Arturo Borrero Gonzalez <art...@netfilter.org>
+ * Copyright (c) 2013 Pablo Neira Ayuso <pa...@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <time.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <errno.h>
+#include <utils.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+
+#include <libmnl/libmnl.h>
+#include <libnftnl/common.h>
+#include <libnftnl/ruleset.h>
+#include <libnftnl/table.h>
+#include <libnftnl/chain.h>
+#include <libnftnl/set.h>
+#include <libnftnl/expr.h>
+#include <libnftnl/rule.h>
+
+#include <linux/if_ether.h>
+
+#include "bpfilter_mod.h"
+#include "imr.h"
+
+/* Hack, we don't link bpfilter.o */
+extern long int syscall (long int __sysno, ...);
+
+int sys_bpf(int cmd, union bpf_attr *attr, unsigned int size)
+{
+       return syscall(321, cmd, attr, size);
+}
+
+static int seq;
+
+static void memory_allocation_error(void) { perror("allocation failed"); 
exit(1); }
+
+static int nft_reg_to_imr_reg(int nfreg)
+{
+       switch (nfreg) {
+       case NFT_REG_VERDICT:
+               return IMR_REG_0;
+       /* old register numbers, 4 128 bit registers. */
+       case NFT_REG_1:
+               return IMR_REG_4;
+       case NFT_REG_2:
+               return IMR_REG_6;
+       case NFT_REG_3:
+               return IMR_REG_8;
+       case NFT_REG_4:
+               break;
+       /* new register numbers, 16 32 bit registers, map to old ones */
+       case NFT_REG32_00:
+               return IMR_REG_4;
+       case NFT_REG32_01:
+               return IMR_REG_5;
+       case NFT_REG32_02:
+               return IMR_REG_6;
+       default:
+               return -1;
+       }
+       return -1;
+}
+
+static int netlink_parse_immediate(const struct nftnl_expr *nle, void *out)
+{
+       struct imr_state *state = out;
+       struct imr_object *o = NULL;
+
+       if (nftnl_expr_is_set(nle, NFTNL_EXPR_IMM_DATA)) {
+               uint32_t len;
+               int reg;
+
+               nftnl_expr_get(nle, NFTNL_EXPR_IMM_DATA, &len);
+
+               switch (len) {
+               case sizeof(uint32_t):
+                       o = imr_object_alloc_imm32(nftnl_expr_get_u32(nle, 
NFTNL_EXPR_IMM_DATA));
+                       break;
+               case sizeof(uint64_t):
+                       o = imr_object_alloc_imm64(nftnl_expr_get_u64(nle, 
NFTNL_EXPR_IMM_DATA));
+                       break;
+               default:
+                       return -ENOTSUPP;
+               }
+               reg = nft_reg_to_imr_reg(nftnl_expr_get_u32(nle,
+                                            NFTNL_EXPR_IMM_DREG));
+               if (reg < 0) {
+                       imr_object_free(o);
+                       return reg;
+               }
+
+               imr_register_store(state, reg, o);
+               return 0;
+       } else if (nftnl_expr_is_set(nle, NFTNL_EXPR_IMM_VERDICT)) {
+               uint32_t verdict;
+               int ret;
+
+               if (nftnl_expr_is_set(nle, NFTNL_EXPR_IMM_CHAIN))
+                       return -ENOTSUPP;
+
+                verdict = nftnl_expr_get_u32(nle, NFTNL_EXPR_IMM_VERDICT);
+
+               switch (verdict) {
+               case NF_ACCEPT:
+                       o = imr_object_alloc_verdict(IMR_VERDICT_PASS);
+                       break;
+               case NF_DROP:
+                       o = imr_object_alloc_verdict(IMR_VERDICT_DROP);
+                       break;
+               default:
+                       fprintf(stderr, "Unhandled verdict %d\n", verdict);
+                       o = imr_object_alloc_verdict(IMR_VERDICT_DROP);
+                       break;
+               }
+
+               ret = imr_state_add_obj(state, o);
+               if (ret < 0)
+                       imr_object_free(o);
+
+               return ret;
+       }
+
+       return -ENOTSUPP;
+}
+
+static int netlink_parse_cmp(const struct nftnl_expr *nle, void *out)
+{
+       struct imr_object *o, *imm, *left;
+       struct imr_state *state = out;
+       enum imr_relop op;
+       uint32_t tmp, len;
+       int ret;
+       op = nftnl_expr_get_u32(nle, NFTNL_EXPR_CMP_OP);
+
+       switch (op) {
+        case NFT_CMP_EQ:
+               op = IMR_RELOP_EQ;
+               break;
+        case NFT_CMP_NEQ:
+               op = IMR_RELOP_NE;
+               break;
+       default:
+               return -ENOTSUPP;
+       }
+
+       nftnl_expr_get(nle, NFTNL_EXPR_CMP_DATA, &len);
+       switch (len) {
+       case sizeof(uint64_t):
+               imm = imr_object_alloc_imm64(nftnl_expr_get_u64(nle, 
NFTNL_EXPR_CMP_DATA));
+               break;
+       case sizeof(uint32_t):
+               imm = imr_object_alloc_imm32(nftnl_expr_get_u32(nle, 
NFTNL_EXPR_CMP_DATA));
+               break;
+       case sizeof(uint16_t):
+               tmp = nftnl_expr_get_u16(nle, NFTNL_EXPR_CMP_DATA);
+
+               imm = imr_object_alloc_imm32(tmp);
+               break;
+       case sizeof(uint8_t):
+               tmp = nftnl_expr_get_u8(nle, NFTNL_EXPR_CMP_DATA);
+
+               imm = imr_object_alloc_imm32(tmp);
+               break;
+       default:
+               return -ENOTSUPP;
+       }
+
+       if (!imm)
+               return -ENOMEM;
+
+       ret = nft_reg_to_imr_reg(nftnl_expr_get_u32(nle, NFTNL_EXPR_CMP_SREG));
+       if (ret < 0) {
+               imr_object_free(imm);
+               return ret;
+       }
+
+       left = imr_register_load(state, ret);
+       if (!left)
+               return -EINVAL;
+
+       o = imr_object_alloc_relational(op, left, imm);
+
+       return imr_state_add_obj(state, o);
+}
+
+static int netlink_parse_payload(const struct nftnl_expr *nle, void *out)
+{
+       struct imr_state *state = out;
+       enum imr_payload_base imr_base;
+       uint32_t base, offset, len;
+       struct imr_object *payload;
+       int ret;
+
+       if (nftnl_expr_is_set(nle, NFTNL_EXPR_PAYLOAD_SREG) ||
+           nftnl_expr_is_set(nle, NFTNL_EXPR_PAYLOAD_FLAGS))
+               return -EOPNOTSUPP;
+
+       base = nftnl_expr_get_u32(nle, NFTNL_EXPR_PAYLOAD_BASE);
+       offset = nftnl_expr_get_u32(nle, NFTNL_EXPR_PAYLOAD_OFFSET);
+       len = nftnl_expr_get_u32(nle, NFTNL_EXPR_PAYLOAD_LEN);
+
+       printf("payload: base %d off %d len %d\n", base, offset, len);
+
+       ret = nft_reg_to_imr_reg(nftnl_expr_get_u32(nle, 
NFTNL_EXPR_PAYLOAD_DREG));
+       if (ret < 0)
+               return ret;
+
+       switch (base) {
+       case NFT_PAYLOAD_LL_HEADER:
+               imr_base = IMR_PAYLOAD_BASE_LL;
+               break;
+       case NFT_PAYLOAD_NETWORK_HEADER:
+               imr_base = IMR_PAYLOAD_BASE_NH;
+               break;
+       case NFT_PAYLOAD_TRANSPORT_HEADER:
+               imr_base = IMR_PAYLOAD_BASE_TH;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       payload = imr_object_alloc_payload(imr_base, offset, len);
+       if (!payload)
+               return -ENOMEM;
+
+       imr_register_store(state, ret, payload);
+       return 0;
+}
+
+static const struct {
+       const char *name;
+       int (*parse)(const struct nftnl_expr *nle,
+                                void *);
+} netlink_parsers[] = {
+       { .name = "immediate",  .parse = netlink_parse_immediate },
+       { .name = "cmp",        .parse = netlink_parse_cmp },
+       { .name = "payload",    .parse = netlink_parse_payload },
+};
+
+static int expr_parse_cb(struct nftnl_expr *expr, void *data)
+{
+       const char *name = nftnl_expr_get_str(expr, NFTNL_EXPR_NAME);
+       struct imr_state *state = data;
+       unsigned int i;
+
+       if (!name)
+               return -1;
+
+       for (i = 0; i < ARRAY_SIZE(netlink_parsers); i++) {
+               if (strcmp(netlink_parsers[i].name, name))
+                       continue;
+
+               printf("parse: %s\n", nftnl_expr_get_str(expr, 
NFTNL_EXPR_NAME));
+               netlink_parsers[i].parse(expr, state);
+       }
+
+       return 0;
+}
+
+static int rule_parse_cb(struct nftnl_rule *rule, void *data)
+{
+       struct imr_state *state = data;
+       int ret;
+
+       ret = imr_state_rule_begin(state);
+       if (ret < 0)
+               return ret;
+       nftnl_expr_foreach(rule, expr_parse_cb, data);
+
+       return imr_state_rule_end(state);
+}
+
+static int
+mnl_talk(struct mnl_socket *nf_sock, const void *data, unsigned int len,
+        int (*cb)(const struct nlmsghdr *nlh, void *data), void *cb_data)
+{
+       char buf[MNL_SOCKET_BUFFER_SIZE];
+       uint32_t portid = mnl_socket_get_portid(nf_sock);
+       int ret;
+
+       if (mnl_socket_sendto(nf_sock, data, len) < 0)
+               return -1;
+
+       ret = mnl_socket_recvfrom(nf_sock, buf, sizeof(buf));
+       while (ret > 0) {
+               ret = mnl_cb_run(buf, ret, seq, portid, cb, cb_data);
+               if (ret <= 0)
+                       goto out;
+
+               ret = mnl_socket_recvfrom(nf_sock, buf, sizeof(buf));
+       }
+out:
+       if (ret < 0 && errno == EAGAIN)
+               return 0;
+
+       return ret;
+}
+
+/*
+ * Rule
+ */
+static int rule_cb(const struct nlmsghdr *nlh, void *data)
+{
+       struct nftnl_rule_list *nlr_list = data;
+       struct nftnl_rule *r;
+
+       r = nftnl_rule_alloc();
+       if (r == NULL)
+               memory_allocation_error();
+
+       if (nftnl_rule_nlmsg_parse(nlh, r) < 0)
+               goto err_free;
+
+       nftnl_rule_list_add_tail(r, nlr_list);
+       return MNL_CB_OK;
+
+err_free:
+       nftnl_rule_free(r);
+       return MNL_CB_OK;
+}
+
+static struct nftnl_rule_list *mnl_rule_dump(struct mnl_socket *nf_sock,
+                                          int family)
+{
+       char buf[MNL_SOCKET_BUFFER_SIZE];
+       struct nlmsghdr *nlh;
+       struct nftnl_rule_list *nlr_list;
+       int ret;
+
+       nlr_list = nftnl_rule_list_alloc();
+       if (nlr_list == NULL)
+               memory_allocation_error();
+
+       nlh = nftnl_rule_nlmsg_build_hdr(buf, NFT_MSG_GETRULE, family,
+                                        NLM_F_DUMP, seq);
+
+       ret = mnl_talk(nf_sock, nlh, nlh->nlmsg_len, rule_cb, nlr_list);
+       if (ret < 0)
+               goto err;
+
+       return nlr_list;
+err:
+       nftnl_rule_list_free(nlr_list);
+       return NULL;
+}
+
+/*
+ * Chain
+ */
+static int chain_cb(const struct nlmsghdr *nlh, void *data)
+{
+       struct nftnl_chain_list *nlc_list = data;
+       struct nftnl_chain *c;
+
+       c = nftnl_chain_alloc();
+       if (c == NULL)
+               memory_allocation_error();
+
+       if (nftnl_chain_nlmsg_parse(nlh, c) < 0)
+               goto err_free;
+
+       nftnl_chain_list_add_tail(c, nlc_list);
+       return MNL_CB_OK;
+
+err_free:
+       nftnl_chain_free(c);
+       return MNL_CB_OK;
+}
+
+static struct nftnl_chain_list *mnl_chain_dump(struct mnl_socket *nf_sock,
+                                            int family)
+{
+       char buf[MNL_SOCKET_BUFFER_SIZE];
+       struct nlmsghdr *nlh;
+       struct nftnl_chain_list *nlc_list;
+       int ret;
+
+       nlc_list = nftnl_chain_list_alloc();
+       if (nlc_list == NULL)
+               memory_allocation_error();
+
+       nlh = nftnl_chain_nlmsg_build_hdr(buf, NFT_MSG_GETCHAIN, family,
+                                       NLM_F_DUMP, seq);
+
+       ret = mnl_talk(nf_sock, nlh, nlh->nlmsg_len, chain_cb, nlc_list);
+       if (ret < 0)
+               goto err;
+
+       return nlc_list;
+err:
+       nftnl_chain_list_free(nlc_list);
+       return NULL;
+}
+
+/*
+ * Table
+ */
+static int table_cb(const struct nlmsghdr *nlh, void *data)
+{
+       struct nftnl_ruleset *rs = data;
+       struct nftnl_table *t;
+
+       t = nftnl_table_alloc();
+       if (t == NULL)
+               memory_allocation_error();
+
+       if (nftnl_table_nlmsg_parse(nlh, t) < 0)
+               goto err_free;
+
+       nftnl_ruleset_set(rs, NFTNL_RULESET_TABLELIST, t);
+
+       return MNL_CB_OK;
+
+err_free:
+       nftnl_table_free(t);
+       return MNL_CB_ERROR;
+}
+
+/*
+ * Set elements
+ */
+static int set_elem_cb(const struct nlmsghdr *nlh, void *data)
+{
+       nftnl_set_elems_nlmsg_parse(nlh, data);
+       return MNL_CB_OK;
+}
+
+static int mnl_setelem_get(struct mnl_socket *nf_sock, struct nftnl_set *nls)
+{
+       char buf[MNL_SOCKET_BUFFER_SIZE];
+       struct nlmsghdr *nlh;
+       uint32_t family = nftnl_set_get_u32(nls, NFTNL_SET_FAMILY);
+
+       nlh = nftnl_set_nlmsg_build_hdr(buf, NFT_MSG_GETSETELEM, family,
+                                     NLM_F_DUMP|NLM_F_ACK, seq);
+       nftnl_set_nlmsg_build_payload(nlh, nls);
+
+       return mnl_talk(nf_sock, nlh, nlh->nlmsg_len, set_elem_cb, nls);
+}
+
+/*
+ * Set
+ */
+static int set_cb(const struct nlmsghdr *nlh, void *data)
+{
+       struct nftnl_set_list *nls_list = data;
+       struct nftnl_set *s;
+
+       s = nftnl_set_alloc();
+       if (s == NULL)
+               memory_allocation_error();
+
+       if (nftnl_set_nlmsg_parse(nlh, s) < 0)
+               goto err_free;
+
+       nftnl_set_list_add_tail(s, nls_list);
+       return MNL_CB_OK;
+
+err_free:
+       nftnl_set_free(s);
+       return MNL_CB_OK;
+}
+
+static struct nftnl_set_list *
+mnl_set_dump(struct mnl_socket *nf_sock, int family)
+{
+       char buf[MNL_SOCKET_BUFFER_SIZE];
+       struct nlmsghdr *nlh;
+       struct nftnl_set *s;
+       struct nftnl_set_list *nls_list;
+       struct nftnl_set *si;
+       struct nftnl_set_list_iter *i;
+       int ret;
+
+       s = nftnl_set_alloc();
+       if (s == NULL)
+               memory_allocation_error();
+
+       nlh = nftnl_set_nlmsg_build_hdr(buf, NFT_MSG_GETSET, family,
+                                     NLM_F_DUMP|NLM_F_ACK, seq);
+       nftnl_set_nlmsg_build_payload(nlh, s);
+       nftnl_set_free(s);
+
+       nls_list = nftnl_set_list_alloc();
+       if (nls_list == NULL)
+               memory_allocation_error();
+
+       ret = mnl_talk(nf_sock, nlh, nlh->nlmsg_len, set_cb, nls_list);
+       if (ret < 0)
+               goto err;
+
+       i = nftnl_set_list_iter_create(nls_list);
+       if (i == NULL)
+               memory_allocation_error();
+
+       si = nftnl_set_list_iter_next(i);
+       while (si != NULL) {
+               if (mnl_setelem_get(nf_sock, si) != 0) {
+                       perror("E: Unable to get set elements");
+                       nftnl_set_list_iter_destroy(i);
+                       goto err;
+               }
+               si = nftnl_set_list_iter_next(i);
+       }
+
+       nftnl_set_list_iter_destroy(i);
+
+       return nls_list;
+err:
+       nftnl_set_list_free(nls_list);
+       return NULL;
+}
+
+static struct nftnl_ruleset *mnl_table_ruleset(struct mnl_socket *nf_sock,
+                                              int family,
+                                              const char *table)
+{
+       char buf[MNL_SOCKET_BUFFER_SIZE];
+       struct nftnl_ruleset *rs;
+       struct nftnl_table *t;
+       struct nlmsghdr *nlh;
+       int ret;
+
+       nlh = nftnl_table_nlmsg_build_hdr(buf, NFT_MSG_GETTABLE, family,
+                                         NLM_F_ACK, seq);
+       t = nftnl_table_alloc();
+       if (t == NULL)
+               memory_allocation_error();
+
+       nftnl_table_set(t, NFTNL_TABLE_NAME, table);
+       nftnl_table_nlmsg_build_payload(nlh, t);
+       nftnl_table_free(t);
+
+       rs = nftnl_ruleset_alloc();
+       if (rs == NULL)
+               memory_allocation_error();
+       ret = mnl_talk(nf_sock, nlh, nlh->nlmsg_len, table_cb, rs);
+       if (ret < 0)
+               goto err;
+
+       return rs;
+err:
+       nftnl_ruleset_free(rs);
+       return NULL;
+}
+
+static struct nftnl_ruleset *mnl_ruleset_dump(struct mnl_socket *nf_sock, int 
family)
+{
+       struct nftnl_ruleset *rs;
+       struct nftnl_chain_list *c;
+       struct nftnl_set_list *s;
+       struct nftnl_rule_list *r;
+       uint32_t type = NFTNL_OUTPUT_DEFAULT;
+
+       rs = mnl_table_ruleset(nf_sock, family, "filter");
+       if (!rs)
+               return NULL;
+
+       c = mnl_chain_dump(nf_sock, family);
+       if (c != NULL)
+               nftnl_ruleset_set(rs, NFTNL_RULESET_CHAINLIST, c);
+
+       s = mnl_set_dump(nf_sock, family);
+       if (s != NULL)
+               nftnl_ruleset_set(rs, NFTNL_RULESET_SETLIST, s);
+
+       r = mnl_rule_dump(nf_sock, family);
+       if (r != NULL)
+               nftnl_ruleset_set(rs, NFTNL_RULESET_RULELIST, r);
+
+        nftnl_ruleset_fprintf(stdout, rs, type, 0);
+       return rs;
+}
+
+/* ether type ne 0x800 accept */
+static int nft_ipv4_only(struct imr_state *state)
+{
+       struct imr_object *eth_p_ip, *lltype, *relop;
+       int ret;
+
+       imr_state_rule_begin(state);
+       lltype = imr_object_alloc_payload(IMR_PAYLOAD_BASE_LL,
+                                         offsetof(struct ethhdr, h_proto),
+                                         sizeof(uint16_t));
+       if (!lltype)
+               return -ENOMEM;
+
+       eth_p_ip = imr_object_alloc_imm32(htons(ETH_P_IP));
+       if (!eth_p_ip) {
+               imr_object_free(lltype);
+               return -ENOMEM;
+       }
+
+       relop = imr_object_alloc_relational(IMR_RELOP_NE, lltype, eth_p_ip);
+       if (!relop) {
+               imr_object_free(eth_p_ip);
+               imr_object_free(lltype);
+               return -ENOMEM;
+       }
+
+       ret = imr_state_add_obj(state, relop);
+       if (ret == 0) {
+               ret = imr_state_add_obj(state, 
imr_object_alloc_verdict(IMR_VERDICT_PASS));
+               if (ret == 0)
+                       return imr_state_rule_end(state);
+       }
+
+       return ret;
+}
+
+static int nft2imr(const struct nftnl_ruleset *rs)
+{
+       struct nftnl_rule_list *l = nftnl_ruleset_get(rs, 
NFTNL_RULESET_RULELIST);
+       struct imr_state *state;
+       int ret;
+
+       state = imr_state_alloc();
+       if (!state)
+               return -ENOMEM;
+
+       ret = nft_ipv4_only(state);
+
+       ret = nftnl_rule_list_foreach(l, rule_parse_cb, state);
+       if (ret < 0) {
+               imr_state_free(state);
+               return ret;
+       }
+
+       imr_state_print(stdout, state);
+       imr_do_bpf(state);
+       imr_state_free(state);
+
+       return 0;
+}
+
+int main(int argc, char *argv[])
+{
+       struct mnl_socket *nl;
+       struct nftnl_ruleset *rs;
+
+       if (argc > 2) {
+               fprintf(stderr, "%s {json}\n",
+                       argv[0]);
+               exit(EXIT_FAILURE);
+       }
+
+       nl = mnl_socket_open(NETLINK_NETFILTER);
+       if (nl == NULL) {
+               perror("mnl_socket_open");
+               exit(EXIT_FAILURE);
+       }
+
+       if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) {
+               perror("mnl_socket_bind");
+               exit(EXIT_FAILURE);
+       }
+
+       seq = time(NULL);
+
+       rs = mnl_ruleset_dump(nl, NFPROTO_IPV4);
+       if (rs == NULL) {
+               perror("ruleset_dump");
+               exit(EXIT_FAILURE);
+       }
+
+       return nft2imr(rs);
+}
-- 
2.16.1

Reply via email to