Hello Stefan,

On Wed, Aug 10, 2022 at 02:38:16PM +0000, Stefan Butz wrote:
> Hi everyone,
> 
> this mail includes a patch to store pf rules in a red-black tree.
> Currently they are stored in a linked list.
> My system configured with 16000 rules takes about 10 minutes
> to print them out using `pfctl -sr`.
> This patch decreases the time to 4 seconds.
> I was not able to measure a time increase for rule insertion.
> Inserting a lot of labels is still very slow.
> This has to be attacked separatly.
> 
> 

    below is a diff which should make DIOCGETRULE fast enough. It introduces
    a transaction (struct pf_trans).  it comes from my 'work-in-progress' tree.
    I'd like to introduce transactions to move all memory allocations done on
    behalf of ioctl(2) in pf(4) outside of PF_LOCK() scope.  Seeing your diff
    I've realized we can use pf_trans also to speed up DIOCGETRULE.


diff below is based on my work-in-progress tree/branch. There might be some
rough edges, but it compiles and seems to work. if people will like the idea
I'll polish the change and commit it. It will also help me to make upcoming
diffs smaller.

At this point I'd like to know if idea presented in diff below makes
kind of sense.

    there is `struct pf_trans` which keeps context for operations
    which involve multiple ioctl(2) calls (such as ADDRULE, GETRULE,...)

    ruleset.ticket is renamed to ruleset.version because ticket is know
    kept in newly introduced pf_trans.

    the workflow for process which wants to read/modify ruleset. In case
    of DIOCGETRULES/DIOCGETRULE operations the workflow looks as follows:

    transaction (pf_trans) is created by DIOCGETRULES

        transaction gets reference to anchor/ruleset

        we also remember current version of ruleset in transaction

        we also rmemeber a pointer to the first rule in ruleset/anchor

        we return transaction ticket to pfctl(8) process

    then pfctl(8) issues DIOCGETRULE, it passes transaction ticket to ioctl(2)

        we use ticket to look up a transaction created by DIOCGETRULES

        we check if version kept in transaction matches the version at
        ruleset.  If there is a mismatch ruleset got modified and we bail out.

        if rule stored in transaction is NULL we return ENOENT
        to indicate ruleset is either empty or there are no more rules

        if version matches then we may safely proceed, the rule pointer
        must be valid because matching version number warrants nothing
        was modified.

        we copy rule to buffer

        and we store a next rule in transaction.

    all transactions created by process are currently destroyed in pfclose()

thanks for thoughts/feedback.

regards
sashan

--------8<---------------8<---------------8<------------------8<--------
diff --git a/sbin/pfctl/pfctl.c b/sbin/pfctl/pfctl.c
index 9f14b955ca7..cf9def71c02 100644
--- a/sbin/pfctl/pfctl.c
+++ b/sbin/pfctl/pfctl.c
@@ -833,7 +833,7 @@ pfctl_show_rules(int dev, char *path, int opts, enum 
pfctl_show format,
     char *anchorname, int depth, int wildcard, long shownr)
 {
        struct pfioc_rule pr;
-       u_int32_t nr, mnr, header = 0;
+       u_int32_t header = 0;
        int len = strlen(path), ret = 0;
        char *npath, *p;
 
@@ -884,24 +884,9 @@ pfctl_show_rules(int dev, char *path, int opts, enum 
pfctl_show format,
                goto error;
        }
 
-       if (shownr < 0) {
-               mnr = pr.nr;
-               nr = 0;
-       } else if (shownr < pr.nr) {
-               nr = shownr;
-               mnr = shownr + 1;
-       } else {
-               warnx("rule %ld not found", shownr);
-               ret = -1;
-               goto error;
-       }
-       for (; nr < mnr; ++nr) {
-               pr.nr = nr;
-               if (ioctl(dev, DIOCGETRULE, &pr) == -1) {
-                       warn("DIOCGETRULE");
-                       ret = -1;
-                       goto error;
-               }
+       while (ioctl(dev, DIOCGETRULE, &pr) != -1) {
+               if ((shownr != -1) && (shownr != pr.nr))
+                       continue;
 
                /* anchor is the same for all rules in it */
                if (pr.rule.anchor_wildcard == 0)
@@ -956,6 +941,13 @@ pfctl_show_rules(int dev, char *path, int opts, enum 
pfctl_show format,
                case PFCTL_SHOW_NOTHING:
                        break;
                }
+               errno = 0;
+       }
+
+       if ((errno != 0) && (errno != ENOENT)) {
+               warn("DIOCGETRULE");
+               ret = -1;
+               goto error;
        }
 
        /*
diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c
index b540afc6fe2..6ef89d162b3 100644
--- a/sys/net/pf_ioctl.c
+++ b/sys/net/pf_ioctl.c
@@ -116,6 +116,11 @@ void                        pf_qid_unref(u_int16_t);
 int                     pf_states_clr(struct pfioc_state_kill *);
 int                     pf_states_get(struct pfioc_states *);
 
+struct pf_trans        *pf_open_trans(pid_t);
+struct pf_trans                *pf_find_trans(u_int32_t);
+void                    pf_free_trans(struct pf_trans *);
+void                    pf_rollback_trans(struct pf_trans *);
+
 struct pf_rule          pf_default_rule, pf_default_rule_new;
 
 struct {
@@ -165,6 +170,8 @@ int                  pf_rtlabel_add(struct pf_addr_wrap *);
 void                    pf_rtlabel_remove(struct pf_addr_wrap *);
 void                    pf_rtlabel_copyout(struct pf_addr_wrap *);
 
+u_int32_t trans_ticket = 1;
+LIST_HEAD(trans, pf_trans)     pf_ioctl_trans;
 
 void
 pfattach(int num)
@@ -273,8 +280,27 @@ pfopen(dev_t dev, int flags, int fmt, struct proc *p)
 int
 pfclose(dev_t dev, int flags, int fmt, struct proc *p)
 {
+       struct pf_trans *w, *s;
+       LIST_HEAD(trans, pf_trans)      tmp_list;
+
        if (minor(dev) >= 1)
                return (ENXIO);
+
+       LIST_INIT(&tmp_list);
+       rw_enter_write(&pfioctl_rw);
+       LIST_FOREACH_SAFE(w, &pf_ioctl_trans, entry, s) {
+               if (w->pid == p->p_p->ps_pid) {
+                       LIST_REMOVE(w, entry);
+                       LIST_INSERT_HEAD(&tmp_list, w, entry);
+               }
+       }
+       rw_exit_write(&pfioctl_rw);
+
+       while ((w = LIST_FIRST(&tmp_list)) != NULL) {
+               LIST_REMOVE(w, entry);
+               pf_free_trans(w);
+       }
+
        return (0);
 }
 
@@ -359,7 +385,7 @@ pf_purge_rule(struct pf_rule *rule)
        ruleset->rules.active.rcount--;
        TAILQ_FOREACH(rule, ruleset->rules.active.ptr, entries)
                rule->nr = nr++;
-       ruleset->rules.active.ticket++;
+       ruleset->rules.active.version++;
        pf_calc_skip_steps(ruleset->rules.active.ptr);
        pf_remove_if_empty_ruleset(ruleset);
 
@@ -525,31 +551,32 @@ pf_qid_unref(u_int16_t qid)
 }
 
 int
-pf_begin_rules(u_int32_t *ticket, const char *anchor)
+pf_begin_rules(u_int32_t *version, const char *anchor)
 {
        struct pf_ruleset       *rs;
        struct pf_rule          *rule;
 
+
        if ((rs = pf_find_or_create_ruleset(anchor)) == NULL)
                return (EINVAL);
        while ((rule = TAILQ_FIRST(rs->rules.inactive.ptr)) != NULL) {
                pf_rm_rule(rs->rules.inactive.ptr, rule);
                rs->rules.inactive.rcount--;
        }
-       *ticket = ++rs->rules.inactive.ticket;
+       *version = ++rs->rules.inactive.version;
        rs->rules.inactive.open = 1;
        return (0);
 }
 
 void
-pf_rollback_rules(u_int32_t ticket, char *anchor)
+pf_rollback_rules(u_int32_t version, char *anchor)
 {
        struct pf_ruleset       *rs;
        struct pf_rule          *rule;
 
        rs = pf_find_ruleset(anchor);
        if (rs == NULL || !rs->rules.inactive.open ||
-           rs->rules.inactive.ticket != ticket)
+           rs->rules.inactive.version != version)
                return;
        while ((rule = TAILQ_FIRST(rs->rules.inactive.ptr)) != NULL) {
                pf_rm_rule(rs->rules.inactive.ptr, rule);
@@ -828,7 +855,7 @@ pf_hash_rule(MD5_CTX *ctx, struct pf_rule *rule)
 }
 
 int
-pf_commit_rules(u_int32_t ticket, char *anchor)
+pf_commit_rules(u_int32_t version, char *anchor)
 {
        struct pf_ruleset       *rs;
        struct pf_rule          *rule;
@@ -840,7 +867,7 @@ pf_commit_rules(u_int32_t ticket, char *anchor)
 
        rs = pf_find_ruleset(anchor);
        if (rs == NULL || !rs->rules.inactive.open ||
-           ticket != rs->rules.inactive.ticket)
+           version != rs->rules.inactive.version)
                return (EBUSY);
 
        if (rs == &pf_main_ruleset)
@@ -855,7 +882,7 @@ pf_commit_rules(u_int32_t ticket, char *anchor)
        rs->rules.inactive.ptr = old_rules;
        rs->rules.inactive.rcount = old_rcount;
 
-       rs->rules.active.ticket = rs->rules.inactive.ticket;
+       rs->rules.active.version = rs->rules.inactive.version;
        pf_calc_skip_steps(rs->rules.active.ptr);
 
 
@@ -1148,10 +1175,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
                        return (EACCES);
                }
 
-       if (flags & FWRITE)
-               rw_enter_write(&pfioctl_rw);
-       else
-               rw_enter_read(&pfioctl_rw);
+       rw_enter_write(&pfioctl_rw);
 
        switch (cmd) {
 
@@ -1197,7 +1221,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
 
                NET_LOCK();
                PF_LOCK();
-               pq->ticket = pf_main_ruleset.rules.active.ticket;
+               pq->ticket = pf_main_ruleset.rules.active.version;
 
                /* save state to not run over them all each time? */
                qs = TAILQ_FIRST(pf_queues_active);
@@ -1218,7 +1242,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
 
                NET_LOCK();
                PF_LOCK();
-               if (pq->ticket != pf_main_ruleset.rules.active.ticket) {
+               if (pq->ticket != pf_main_ruleset.rules.active.version) {
                        error = EBUSY;
                        PF_UNLOCK();
                        NET_UNLOCK();
@@ -1249,7 +1273,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
 
                NET_LOCK();
                PF_LOCK();
-               if (pq->ticket != pf_main_ruleset.rules.active.ticket) {
+               if (pq->ticket != pf_main_ruleset.rules.active.version) {
                        error = EBUSY;
                        PF_UNLOCK();
                        NET_UNLOCK();
@@ -1296,7 +1320,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
 
                NET_LOCK();
                PF_LOCK();
-               if (q->ticket != pf_main_ruleset.rules.inactive.ticket) {
+               if (q->ticket != pf_main_ruleset.rules.inactive.version) {
                        error = EBUSY;
                        PF_UNLOCK();
                        NET_UNLOCK();
@@ -1392,7 +1416,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
                        pf_rule_free(rule);
                        goto fail;
                }
-               if (pr->ticket != ruleset->rules.inactive.ticket) {
+               if (pr->ticket != ruleset->rules.inactive.version) {
                        error = EBUSY;
                        PF_UNLOCK();
                        NET_UNLOCK();
@@ -1454,7 +1478,9 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
        case DIOCGETRULES: {
                struct pfioc_rule       *pr = (struct pfioc_rule *)addr;
                struct pf_ruleset       *ruleset;
-               struct pf_rule          *tail;
+               struct pf_rule          *rule;
+               struct pf_trans         *t;
+               u_int32_t                ruleset_version;
 
                NET_LOCK();
                PF_LOCK();
@@ -1466,14 +1492,25 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
                        NET_UNLOCK();
                        goto fail;
                }
-               tail = TAILQ_LAST(ruleset->rules.active.ptr, pf_rulequeue);
-               if (tail)
-                       pr->nr = tail->nr + 1;
+               rule = TAILQ_LAST(ruleset->rules.active.ptr, pf_rulequeue);
+               if (rule)
+                       pr->nr = rule->nr + 1;
                else
                        pr->nr = 0;
-               pr->ticket = ruleset->rules.active.ticket;
+               ruleset_version = ruleset->rules.active.version;
+               pf_anchor_take(ruleset->anchor);
+               rule = TAILQ_FIRST(ruleset->rules.active.ptr);
                PF_UNLOCK();
                NET_UNLOCK();
+
+               t = pf_open_trans(p->p_p->ps_pid);
+               t->getrule.anchor = (ruleset == &pf_main_ruleset) ?
+                   &pf_main_anchor : ruleset->anchor;
+               t->getrule.version = ruleset_version;
+               t->getrule.rule = rule;
+               pr->ticket = t->ticket;
+               DPFPRINTF(LOG_DEBUG, "DIOCGETRULES: tversion: %u vs. rversion: 
%u\n", t->getrule.version, ruleset_version);
+
                break;
        }
 
@@ -1481,29 +1518,27 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
                struct pfioc_rule       *pr = (struct pfioc_rule *)addr;
                struct pf_ruleset       *ruleset;
                struct pf_rule          *rule;
+               struct pf_trans         *t;
                int                      i;
 
+               t = pf_find_trans(pr->ticket);
+               if (t == NULL)
+                       return (EINVAL);
+               KASSERT(t->pid == p->p_p->ps_pid);
+
                NET_LOCK();
                PF_LOCK();
-               pr->anchor[sizeof(pr->anchor) - 1] = '\0';
-               ruleset = pf_find_ruleset(pr->anchor);
-               if (ruleset == NULL) {
-                       error = EINVAL;
-                       PF_UNLOCK();
-                       NET_UNLOCK();
-                       goto fail;
-               }
-               if (pr->ticket != ruleset->rules.active.ticket) {
+               KASSERT(t->getrule.anchor != NULL);
+               ruleset = &t->getrule.anchor->ruleset;
+               if (t->getrule.version != ruleset->rules.active.version) {
                        error = EBUSY;
                        PF_UNLOCK();
                        NET_UNLOCK();
                        goto fail;
                }
-               rule = TAILQ_FIRST(ruleset->rules.active.ptr);
-               while ((rule != NULL) && (rule->nr != pr->nr))
-                       rule = TAILQ_NEXT(rule, entries);
+               rule = t->getrule.rule;
                if (rule == NULL) {
-                       error = EBUSY;
+                       error = ENOENT;
                        PF_UNLOCK();
                        NET_UNLOCK();
                        goto fail;
@@ -1544,6 +1579,8 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
                        rule->bytes[0] = rule->bytes[1] = 0;
                        rule->states_tot = 0;
                }
+               pr->nr = rule->nr;
+               t->getrule.rule = TAILQ_NEXT(rule, entries);
                PF_UNLOCK();
                NET_UNLOCK();
                break;
@@ -1569,7 +1606,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
                        if (ruleset == NULL)
                                error = EINVAL;
                        else
-                               pcr->ticket = ++ruleset->rules.active.ticket;
+                               pcr->ticket = ++ruleset->rules.active.version;
 
                        PF_UNLOCK();
                        NET_UNLOCK();
@@ -1619,7 +1656,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
                        goto fail;
                }
 
-               if (pcr->ticket != ruleset->rules.active.ticket) {
+               if (pcr->ticket != ruleset->rules.active.version) {
                        error = EINVAL;
                        PF_UNLOCK();
                        NET_UNLOCK();
@@ -1717,7 +1754,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
                TAILQ_FOREACH(oldrule, ruleset->rules.active.ptr, entries)
                        oldrule->nr = nr++;
 
-               ruleset->rules.active.ticket++;
+               ruleset->rules.active.version++;
 
                pf_calc_skip_steps(ruleset->rules.active.ptr);
                pf_remove_if_empty_ruleset(ruleset);
@@ -2655,7 +2692,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
                                rs = pf_find_ruleset(ioe->anchor);
                                if (rs == NULL ||
                                    !rs->rules.inactive.open ||
-                                   rs->rules.inactive.ticket !=
+                                   rs->rules.inactive.version !=
                                    ioe->ticket) {
                                        PF_UNLOCK();
                                        NET_UNLOCK();
@@ -3031,10 +3068,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
struct proc *p)
                break;
        }
 fail:
-       if (flags & FWRITE)
-               rw_exit_write(&pfioctl_rw);
-       else
-               rw_exit_read(&pfioctl_rw);
+       rw_exit_write(&pfioctl_rw);
 
        return (error);
 }
@@ -3253,3 +3287,52 @@ pf_sysctl(void *oldp, size_t *oldlenp, void *newp, 
size_t newlen)
 
        return sysctl_rdstruct(oldp, oldlenp, newp, &pfs, sizeof(pfs));
 }
+
+struct pf_trans *
+pf_open_trans(pid_t pid)
+{
+       struct pf_trans *t;
+
+       rw_assert_wrlock(&pfioctl_rw);
+
+       t = malloc(sizeof(*t), M_TEMP, M_WAITOK|M_ZERO);
+       t->pid = pid;
+       t->ticket = trans_ticket++;
+       LIST_INSERT_HEAD(&pf_ioctl_trans, t, entry);
+
+       return (t);
+}
+
+struct pf_trans *
+pf_find_trans(u_int32_t ticket)
+{
+       struct pf_trans *t;
+
+       rw_assert_anylock(&pfioctl_rw);
+
+       LIST_FOREACH(t, &pf_ioctl_trans, entry) {
+               if (t->ticket == ticket)
+                       break;
+       }
+
+       return (t);
+}
+
+void
+pf_free_trans(struct pf_trans *t)
+{
+       /*
+        * remove anchors, and main ruleset
+        */
+       pf_anchor_rele(t->getrule.anchor);
+       free(t, M_TEMP, sizeof(*t));
+}
+
+void
+pf_rollback_trans(struct pf_trans *t)
+{
+       if (t != NULL) {
+               LIST_REMOVE(t, entry);
+               pf_free_trans(t);
+       }
+}
diff --git a/sys/net/pf_ruleset.c b/sys/net/pf_ruleset.c
index 6131895751e..9d17d71e124 100644
--- a/sys/net/pf_ruleset.c
+++ b/sys/net/pf_ruleset.c
@@ -233,6 +233,9 @@ pf_create_anchor(struct pf_anchor *parent, const char 
*aname)
 
        pf_init_ruleset(&anchor->ruleset);
        anchor->ruleset.anchor = anchor;
+#ifdef _KERNEL
+       refcnt_init(&anchor->ref);
+#endif
 
        return (anchor);
 }
@@ -308,7 +311,7 @@ pf_remove_if_empty_ruleset(struct pf_ruleset *ruleset)
                if ((parent = ruleset->anchor->parent) != NULL)
                        RB_REMOVE(pf_anchor_node, &parent->children,
                            ruleset->anchor);
-               rs_pool_put_anchor(ruleset->anchor);
+               pf_anchor_rele(ruleset->anchor);
                if (parent == NULL)
                        return;
                ruleset = &parent->ruleset;
@@ -431,3 +434,27 @@ pf_remove_anchor(struct pf_rule *r)
                pf_remove_if_empty_ruleset(&r->anchor->ruleset);
        r->anchor = NULL;
 }
+
+void
+pf_anchor_rele(struct pf_anchor *anchor)
+{
+       if ((anchor == NULL) || (anchor == &pf_main_anchor))
+               return;
+
+#ifdef _KERNEL
+       if (refcnt_rele(&anchor->ref))
+               rs_pool_put_anchor(anchor);
+#else
+       rs_pool_put_anchor(anchor);
+#endif
+}
+
+struct pf_anchor *
+pf_anchor_take(struct pf_anchor *anchor)
+{
+#ifdef _KERNEL
+       if ((anchor != NULL) && (anchor != &pf_main_anchor))
+               refcnt_take(&anchor->ref);
+#endif
+       return (anchor);
+}
diff --git a/sys/net/pfvar.h b/sys/net/pfvar.h
index 6f88ff687ed..42d664e0133 100644
--- a/sys/net/pfvar.h
+++ b/sys/net/pfvar.h
@@ -928,7 +928,7 @@ struct pf_ruleset {
                struct {
                        struct pf_rulequeue     *ptr;
                        u_int32_t                rcount;
-                       u_int32_t                ticket;
+                       u_int32_t                version;
                        int                      open;
                }                        active, inactive;
        }                        rules;
@@ -950,6 +950,7 @@ struct pf_anchor {
        struct pf_ruleset        ruleset;
        int                      refcnt;        /* anchor rules */
        int                      match;
+       struct refcnt            ref;           /* memory references */
 };
 RB_PROTOTYPE(pf_anchor_global, pf_anchor, entry_global, pf_anchor_compare)
 RB_PROTOTYPE(pf_anchor_node, pf_anchor, entry_node, pf_anchor_compare)
@@ -1929,6 +1930,8 @@ struct pf_ruleset         *pf_get_leaf_ruleset(char *, 
char **);
 struct pf_anchor       *pf_create_anchor(struct pf_anchor *, const char *);
 struct pf_ruleset      *pf_find_or_create_ruleset(const char *);
 void                    pf_rs_initialize(void);
+void                    pf_anchor_rele(struct pf_anchor *);
+struct pf_anchor       *pf_anchor_take(struct pf_anchor *);
 
 /* The fingerprint functions can be linked into userland programs (tcpdump) */
 int    pf_osfp_add(struct pf_osfp_ioctl *);
diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h
index ec8f61a800d..37ef8a28631 100644
--- a/sys/net/pfvar_priv.h
+++ b/sys/net/pfvar_priv.h
@@ -39,6 +39,8 @@
 
 #include <sys/rwlock.h>
 #include <sys/mutex.h>
+#include <sys/queue.h>
+#include <net/pfvar.h>
 
 /*
  *
@@ -209,6 +211,17 @@ struct pf_pdesc {
        } hdr;
 };
 
+struct pf_trans {
+       pid_t                   pid;            /* process id */
+       u_int32_t               ticket;
+       LIST_ENTRY(pf_trans)    entry;
+       struct {
+                       u_int32_t                version;
+                       struct pf_anchor        *anchor;
+                       struct pf_rule          *rule;
+       } getrule;
+};
+
 extern struct task     pf_purge_task;
 extern struct timeout  pf_purge_to;
 

Reply via email to