Hello, </snip> On Wed, Apr 26, 2023 at 11:37:58AM +1000, David Gwynne wrote: > > fail: > > - if (flags & FWRITE) > > - rw_exit_write(&pfioctl_rw); > > - else > > - rw_exit_read(&pfioctl_rw); > > + rw_exit_write(&pfioctl_rw); > > i dont think having the open mode flags affect whether you take a shared > or exclusive lock makes sense anyway, so im happy with these bits.
while updating my diff I've noticed pfclose() needs same change: dropping 'flags & FWRITE' test. This is fixed i new diff below. </snip> > > int unit = minor(dev); > > if (unit & ((1 << CLONE_SHIFT) - 1)) > return (ENXIO); > > this has some ties into the second issue, which is that we shouldn't > use the pid as an identifier for state across syscalls like this > in the kernel. the reason is that userland could be weird or buggy > (or hostile) and fork in between creating a transaction and ending > a transaction, but it will still have the fd it from from open(/dev/pf). > instead, we should be using the whole dev_t or the minor number, > as long as it includes the bits that the cloning infrastructure > uses, as the identifier. in new diff I'm using a minor(dev) to identify transaction owner. > > i would also check that the process^Wminor number that created a > transaction is the same when looking up a transaction in pf_find_trans. the pf_find_trans() is a dead code in diff below as it is not being used there. Just to make sure I got things right so I can update '3/3' diff in stack accordingly. Let's assume snippet below comes from pfioctl() function int pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) { ... t = pf_find_trans(ticket); if (t == NULL) return (ENXIO); KASSERT(t->pft_unit == minor(dev)); is that kind of check in KASSET() something you have on your mind? perhaps I can trade KASSERT() to regular code: if (t->pft_unit != minor(dev)) return (EPERM); thank you for pointers to bpf.c updated diff is below. regards sashan --------8<---------------8<---------------8<------------------8<-------- diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c index 1141069dcf6..b9904c545c5 100644 --- a/sys/net/pf_ioctl.c +++ b/sys/net/pf_ioctl.c @@ -117,6 +117,11 @@ void pf_qid_unref(u_int16_t); int pf_states_clr(struct pfioc_state_kill *); int pf_states_get(struct pfioc_states *); +struct pf_trans *pf_open_trans(uint32_t); +struct pf_trans *pf_find_trans(uint64_t); +void pf_free_trans(struct pf_trans *); +void pf_rollback_trans(struct pf_trans *); + struct pf_rule pf_default_rule, pf_default_rule_new; struct { @@ -168,6 +173,8 @@ int pf_rtlabel_add(struct pf_addr_wrap *); void pf_rtlabel_remove(struct pf_addr_wrap *); void pf_rtlabel_copyout(struct pf_addr_wrap *); +uint64_t trans_ticket = 1; +LIST_HEAD(, pf_trans) pf_ioctl_trans = LIST_HEAD_INITIALIZER(pf_trans); void pfattach(int num) @@ -293,6 +300,28 @@ pfopen(dev_t dev, int flags, int fmt, struct proc *p) int pfclose(dev_t dev, int flags, int fmt, struct proc *p) { + struct pf_trans *w, *s; + LIST_HEAD(, pf_trans) tmp_list; + uint32_t unit = minor(dev); + + if (minor(dev) >= 1) + return (ENXIO); + + LIST_INIT(&tmp_list); + rw_enter_write(&pfioctl_rw); + LIST_FOREACH_SAFE(w, &pf_ioctl_trans, pft_entry, s) { + if (w->pft_unit == unit) { + LIST_REMOVE(w, pft_entry); + LIST_INSERT_HEAD(&tmp_list, w, pft_entry); + } + } + rw_exit_write(&pfioctl_rw); + + while ((w = LIST_FIRST(&tmp_list)) != NULL) { + LIST_REMOVE(w, pft_entry); + pf_free_trans(w); + } + return (0); } @@ -1142,10 +1171,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) return (EACCES); } - if (flags & FWRITE) - rw_enter_write(&pfioctl_rw); - else - rw_enter_read(&pfioctl_rw); + rw_enter_write(&pfioctl_rw); switch (cmd) { @@ -3022,10 +3048,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } fail: - if (flags & FWRITE) - rw_exit_write(&pfioctl_rw); - else - rw_exit_read(&pfioctl_rw); + rw_exit_write(&pfioctl_rw); return (error); } @@ -3244,3 +3267,52 @@ pf_sysctl(void *oldp, size_t *oldlenp, void *newp, size_t newlen) return sysctl_rdstruct(oldp, oldlenp, newp, &pfs, sizeof(pfs)); } + +struct pf_trans * +pf_open_trans(uint32_t unit) +{ + static uint64_t ticket = 1; + struct pf_trans *t; + + rw_assert_wrlock(&pfioctl_rw); + + t = malloc(sizeof(*t), M_TEMP, M_WAITOK); + memset(t, 0, sizeof(struct pf_trans)); + t->pft_unit = unit; + t->pft_ticket = ticket++; + + LIST_INSERT_HEAD(&pf_ioctl_trans, t, pft_entry); + + return (t); +} + +struct pf_trans * +pf_find_trans(uint64_t ticket) +{ + struct pf_trans *t; + + rw_assert_anylock(&pfioctl_rw); + + LIST_FOREACH(t, &pf_ioctl_trans, pft_entry) { + if (t->pft_ticket == ticket) + break; + } + + return (t); +} + +void +pf_free_trans(struct pf_trans *t) +{ + free(t, M_TEMP, sizeof(*t)); +} + +void +pf_rollback_trans(struct pf_trans *t) +{ + if (t != NULL) { + rw_assert_wrlock(&pfioctl_rw); + LIST_REMOVE(t, pft_entry); + pf_free_trans(t); + } +} diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h index 38fff6473aa..5af2027733a 100644 --- a/sys/net/pfvar_priv.h +++ b/sys/net/pfvar_priv.h @@ -322,6 +322,17 @@ enum { extern struct cpumem *pf_anchor_stack; +enum pf_trans_type { + PF_TRANS_NONE, + PF_TRANS_MAX +}; + +struct pf_trans { + LIST_ENTRY(pf_trans) pft_entry; + uint32_t pft_unit; /* process id */ + uint64_t pft_ticket; + enum pf_trans_type pft_type; +}; extern struct task pf_purge_task; extern struct timeout pf_purge_to;