Re: uvm: __inline -> inline

2020-09-22 Thread Vitaliy Makkoveev
ok mvs

> On 22 Sep 2020, at 10:15, Martin Pieuchot  wrote:
> 
> Spell inline correctly, also reduce the diff with NetBSD for uvm_amap.c
> and uvm_fault.c.
> 
> ok?
> 
> Index: uvm/uvm_addr.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_addr.c,v
> retrieving revision 1.28
> diff -u -p -r1.28 uvm_addr.c
> --- uvm/uvm_addr.c13 Sep 2020 10:05:25 -  1.28
> +++ uvm/uvm_addr.c22 Sep 2020 07:12:10 -
> @@ -186,7 +186,7 @@ uvm_addr_entrybyspace(struct uaddr_free_
> }
> #endif /* !SMALL_KERNEL */
> 
> -static __inline vaddr_t
> +static inline vaddr_t
> uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset)
> {
>   vaddr_t adjusted;
> @@ -201,7 +201,7 @@ uvm_addr_align_forward(vaddr_t addr, vad
>   return (adjusted < addr ? adjusted + align : adjusted);
> }
> 
> -static __inline vaddr_t
> +static inline vaddr_t
> uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset)
> {
>   vaddr_t adjusted;
> Index: uvm/uvm_amap.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_amap.c,v
> retrieving revision 1.82
> diff -u -p -r1.82 uvm_amap.c
> --- uvm/uvm_amap.c4 Jan 2020 16:17:29 -   1.82
> +++ uvm/uvm_amap.c22 Sep 2020 07:07:45 -
> @@ -63,20 +63,20 @@ static char amap_small_pool_names[UVM_AM
>  */
> 
> static struct vm_amap *amap_alloc1(int, int, int);
> -static __inline void amap_list_insert(struct vm_amap *);
> -static __inline void amap_list_remove(struct vm_amap *);   
> +static inline void amap_list_insert(struct vm_amap *);
> +static inline void amap_list_remove(struct vm_amap *);   
> 
> struct vm_amap_chunk *amap_chunk_get(struct vm_amap *, int, int, int);
> void amap_chunk_free(struct vm_amap *, struct vm_amap_chunk *);
> void amap_wiperange_chunk(struct vm_amap *, struct vm_amap_chunk *, int, int);
> 
> -static __inline void
> +static inline void
> amap_list_insert(struct vm_amap *amap)
> {
>   LIST_INSERT_HEAD(_list, amap, am_list);
> }
> 
> -static __inline void
> +static inline void
> amap_list_remove(struct vm_amap *amap)
> { 
>   LIST_REMOVE(amap, am_list);
> @@ -190,13 +190,10 @@ amap_chunk_free(struct vm_amap *amap, st
>  * here are some in-line functions to help us.
>  */
> 
> -static __inline void pp_getreflen(int *, int, int *, int *);
> -static __inline void pp_setreflen(int *, int, int, int);
> -
> /*
>  * pp_getreflen: get the reference and length for a specific offset
>  */
> -static __inline void
> +static inline void
> pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
> {
> 
> @@ -212,7 +209,7 @@ pp_getreflen(int *ppref, int offset, int
> /*
>  * pp_setreflen: set the reference and length for a specific offset
>  */
> -static __inline void
> +static inline void
> pp_setreflen(int *ppref, int offset, int ref, int len)
> {
>   if (len == 1) {
> Index: uvm/uvm_aobj.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
> retrieving revision 1.86
> diff -u -p -r1.86 uvm_aobj.c
> --- uvm/uvm_aobj.c18 Jul 2019 23:47:33 -  1.86
> +++ uvm/uvm_aobj.c22 Sep 2020 07:11:50 -
> @@ -256,7 +256,7 @@ uao_find_swhash_elt(struct uvm_aobj *aob
> /*
>  * uao_find_swslot: find the swap slot number for an aobj/pageidx
>  */
> -__inline static int
> +inline static int
> uao_find_swslot(struct uvm_aobj *aobj, int pageidx)
> {
> 
> Index: uvm/uvm_fault.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
> retrieving revision 1.98
> diff -u -p -r1.98 uvm_fault.c
> --- uvm/uvm_fault.c   12 Sep 2020 17:08:49 -  1.98
> +++ uvm/uvm_fault.c   22 Sep 2020 07:07:59 -
> @@ -159,7 +159,7 @@ static struct uvm_advice uvmadvice[MADV_
>  * private prototypes
>  */
> static void uvmfault_amapcopy(struct uvm_faultinfo *);
> -static __inline void uvmfault_anonflush(struct vm_anon **, int);
> +static inline void uvmfault_anonflush(struct vm_anon **, int);
> void  uvmfault_unlockmaps(struct uvm_faultinfo *, boolean_t);
> void  uvmfault_update_stats(struct uvm_faultinfo *);
> 
> @@ -171,7 +171,7 @@ void  uvmfault_update_stats(struct uvm_fa
>  *
>  * => does not have to deactivate page if it is busy
>  */
> -static __inline void
> +static inline void
> uvmfault_anonflush(struct vm_anon **anons, int n)
> {
>   int lcv;
> Index: uvm/uvm_map.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> retrieving revision 1.267
> diff -u -p -r1.267 uvm_map.c
> --- uvm/uvm_map.c 14 Sep 2020 20:31:09 -  1.267
> +++ uvm/uvm_map.c 22 Sep 2020 07:11:47 -
> @@ -167,7 +167,7 @@ boolean_t  uvm_map_inentry_fix(struct p
>  * Tree management functions.
>  */
> 
> -static __inline void  uvm_mapent_copy(struct vm_map_entry*,
> +static inline voiduvm_mapent_copy(struct vm_map_entry*,
>   

Re: pppoe: move softc list out of NET_LOCK() into new pppoe lock

2020-09-13 Thread Vitaliy Makkoveev
Hello Klemens.

pppoe(4) input path and pppoe(4) config path (I mean clone/destroy)
is always different context. Your diff introduces the new lock which
protects `pppoe_softc_list’ list but what should protect `sc’ you got
from this list after you released `pppoe_lock’?

I mean this dereference is not safe because concurrent thread can
destroy this `sc’ (at least in theory).

> @@ -460,8 +463,10 @@ static void pppoe_dispatch_disc_pkt(stru
>   err_msg = "TAG HUNIQUE ERROR";
>   break;
>   }
> + rw_enter_read(_lock);
>   sc = pppoe_find_softc_by_hunique(mtod(n, caddr_t) + 
> noff,
>   len, m->m_pkthdr.ph_ifidx);
> + rw_exit_read(_lock);
>   if (sc != NULL)
>   devname = sc->sc_sppp.pp_if.if_xname;

This time we have multiple locks around input and config paths so I
guess this is the real reason you didn’t caught use after free issue.

Also witness allow us to debug lock order, but not concurrent access.


> On 13 Sep 2020, at 16:12, Klemens Nanni  wrote:
> 
> This is my first try trading global locks for interface specific ones.
> 
> pppoe(4) keeps a list of all its interfaces which is then obviously
> traversed during create and destroy.
> 
> Currently, the net lock is grabbed for this, but there seems to be no
> justification other than reusing^Wabusing an existing lock.
> 
> I run this diff with WITNESS and kern.witness=2 on my edgerouter 4
> providing my home uplink via pppoe0:  the kernel runs stable, there's
> not witness log showing up and creating and destroying hundreds of
> additional pppoe(4) devices works without disruption.
> 
> Is this the right direction?
> 
> Index: if_pppoe.c
> ===
> RCS file: /cvs/src/sys/net/if_pppoe.c,v
> retrieving revision 1.73
> diff -u -p -r1.73 if_pppoe.c
> --- if_pppoe.c13 Sep 2020 11:00:40 -  1.73
> +++ if_pppoe.c13 Sep 2020 11:31:12 -
> @@ -114,15 +114,18 @@ struct pppoetag {
> #define   PPPOE_DISC_MAXPADI  4   /* retry PADI four times 
> (quickly) */
> #define   PPPOE_DISC_MAXPADR  2   /* retry PADR twice */
> 
> +struct rwlock pppoe_lock = RWLOCK_INITIALIZER("pppoe");
> +
> /*
>  * Locks used to protect struct members and global data
>  *   I   immutable after creation
>  *   N   net lock
> + *   p   pppoe lock
>  */
> 
> struct pppoe_softc {
>   struct sppp sc_sppp;/* contains a struct ifnet as first 
> element */
> - LIST_ENTRY(pppoe_softc) sc_list;/* [N] */
> + LIST_ENTRY(pppoe_softc) sc_list;/* [p] */
>   unsigned int sc_eth_ifidx;  /* [N] */
> 
>   int sc_state;   /* [N] discovery phase or session 
> connected */
> @@ -233,7 +236,7 @@ pppoe_clone_create(struct if_clone *ifc,
>   bpfattach(>sc_sppp.pp_if.if_bpf, >sc_sppp.pp_if, DLT_PPP_ETHER, 
> 0);
> #endif
> 
> - NET_LOCK();
> + rw_enter_write(_lock);
> retry:
>   unique = arc4random();
>   LIST_FOREACH(tmpsc, _softc_list, sc_list)
> @@ -241,7 +244,7 @@ retry:
>   goto retry;
>   sc->sc_unique = unique;
>   LIST_INSERT_HEAD(_softc_list, sc, sc_list);
> - NET_UNLOCK();
> + rw_exit_write(_lock);
> 
>   return (0);
> }
> @@ -252,9 +255,9 @@ pppoe_clone_destroy(struct ifnet *ifp)
> {
>   struct pppoe_softc *sc = ifp->if_softc;
> 
> - NET_LOCK();
> + rw_enter_write(_lock);
>   LIST_REMOVE(sc, sc_list);
> - NET_UNLOCK();
> + rw_exit_write(_lock);
> 
>   timeout_del(>sc_timeout);
> 
> @@ -460,8 +463,10 @@ static void pppoe_dispatch_disc_pkt(stru
>   err_msg = "TAG HUNIQUE ERROR";
>   break;
>   }
> + rw_enter_read(_lock);
>   sc = pppoe_find_softc_by_hunique(mtod(n, caddr_t) + 
> noff,
>   len, m->m_pkthdr.ph_ifidx);
> + rw_exit_read(_lock);
>   if (sc != NULL)
>   devname = sc->sc_sppp.pp_if.if_xname;
>   break;
> @@ -668,8 +673,12 @@ pppoe_data_input(struct mbuf *m)
> #ifdef PPPOE_TERM_UNKNOWN_SESSIONS
>   u_int8_t shost[ETHER_ADDR_LEN];
> #endif
> - if (LIST_EMPTY(_softc_list))
> + rw_enter_read(_lock);
> + if (LIST_EMPTY(_softc_list)) {
> + rw_exit_read(_lock);
>   goto drop;
> + }
> + rw_exit_read(_lock);
> 
>   KASSERT(m->m_flags & M_PKTHDR);
> 
> @@ -699,7 +708,9 @@ pppoe_data_input(struct mbuf *m)
>   goto drop;
> 
>   session = ntohs(ph->session);
> + rw_enter_read(_lock);
>   sc = pppoe_find_softc_by_session(session, m->m_pkthdr.ph_ifidx);
> + rw_exit_read(_lock);
>   if (sc == NULL) {
> #ifdef 

Re: incorrect result from getppid for ptraced processes

2020-09-05 Thread Vitaliy Makkoveev



> On 5 Sep 2020, at 03:22, Philip Guenther  wrote:
> 
> On Fri, Sep 4, 2020 at 2:59 PM Mateusz Guzik  wrote:
> 
>> On 9/5/20, Philip Guenther  wrote:
>>> On Fri, Sep 4, 2020 at 1:06 PM Mateusz Guzik  wrote:
>>> 
>>>> On 9/4/20, Vitaliy Makkoveev  wrote:
>>>>> On Fri, Sep 04, 2020 at 05:24:42PM +0200, Mateusz Guzik wrote:
>>>>>> getppid blindly follows the parent pointer and reads the pid.
>>>>>> 
>>>>>> The problem is that ptrace reparents the traced process, so in
>>>>>> particular if you gdb -p $something, the target proc will start
>> seeing
>>>>>> gdb instead of its actual parent.
>>>>>> 
>>>>>> There is a lot to say about the entire reparenting business or
>> storing
>>>>>> the original pid in ps_oppid (instead of some form of a reference to
>>>>>> the process).
>>>>>> 
>>>>>> However, I think the most feasible fix for now is the same thing
>>>>>> FreeBSD did: *always* store the actual parent pid in ps_oppid. This
>>>>>> means all repareting will keep updating it (most notably when
>>>>>> abandoning children on exit), while ptrace will skip that part.
>>>>>> 
>>>>>> Side effect of such a change be that getppid will stop requiring the
>>>>>> kernel lock.
>>>>>> 
>>>>> 
>>>>> Thanks for report. But we are in beta stage now so such modification
>> is
>>>>> impossible until next iteration.
>>>>> 
>>>>> Since original parent identifier is stored as `ps_oppid' while process
>>>>> is traced we just return it to userland for this case. This is the way
>>>>> I
>>>>> propose to fix this bug for now.
>>>>> 
>>>>> Comments? OKs?
>>>>> 
>>>>> Index: sys/kern/kern_prot.c
>>>>> ===
>>>>> RCS file: /cvs/src/sys/kern/kern_prot.c,v
>>>>> retrieving revision 1.76
>>>>> diff -u -p -r1.76 kern_prot.c
>>>>> --- sys/kern/kern_prot.c  9 Jul 2019 12:23:25 -   1.76
>>>>> +++ sys/kern/kern_prot.c  4 Sep 2020 21:12:15 -
>>>>> @@ -84,7 +84,11 @@ int
>>>>> sys_getppid(struct proc *p, void *v, register_t *retval)
>>>>> {
>>>>> 
>>>>> - *retval = p->p_p->ps_pptr->ps_pid;
>>>>> + if (p->p_p->ps_flags & PS_TRACED)
>>>>> + *retval = p->p_p->ps_oppid;
>>>>> + else
>>>>> + *retval = p->p_p->ps_pptr->ps_pid;
>>>>> +
>>>>>  return (0);
>>>>> }
>>>> 
>>>> This is definitely a bare minimum fix, but it does the job.
>>>> 
>>> 
>>> ptrace() has behaved like this for the life of OpenBSD and an indefinite
>>> number of years previous in the BSD releases.  What has happened that a
>>> definitely incomplete fix is needed Right Now?
>> 
>> I don't see how this reads as a demand this is fixed Right Now.
>> 
> 
> I didn't call it a demand, but the point stands: what has changed?
> 
> 
> I don't see how the fix is incomplete either. It can be done better
>> with more effort, but AFAICS the above results in correct behavior.
>> 
> 
> There are at least 2 other uses of ps_pptr->ps_pid that should also change,
> unless you like coredumps and ps disagreeing with getppid(), and someone
> needs to think how it affects doas.
> 

Thanks for pointing. I missed these two places. However, doas(1) was not
affected because this diff doesn’t modify tty(4) behaviour:
TIOC{GET,SET}VERAUTH still use ps_pptr->ps_pid.

I checked other BSD’s. NetBSD, DragonflyBSD and OSX have the same
behaviour of getppid(2). And this behaviour don’t contradict POSIX.1 [1]. So
I guess Philip is right, there is no reason to follow this way.

1. https://pubs.opengroup.org/onlinepubs/9699919799/functions/getppid.html


Re: incorrect result from getppid for ptraced processes

2020-09-04 Thread Vitaliy Makkoveev
On Fri, Sep 04, 2020 at 05:24:42PM +0200, Mateusz Guzik wrote:
> getppid blindly follows the parent pointer and reads the pid.
> 
> The problem is that ptrace reparents the traced process, so in
> particular if you gdb -p $something, the target proc will start seeing
> gdb instead of its actual parent.
> 
> There is a lot to say about the entire reparenting business or storing
> the original pid in ps_oppid (instead of some form of a reference to
> the process).
> 
> However, I think the most feasible fix for now is the same thing
> FreeBSD did: *always* store the actual parent pid in ps_oppid. This
> means all repareting will keep updating it (most notably when
> abandoning children on exit), while ptrace will skip that part.
> 
> Side effect of such a change be that getppid will stop requiring the
> kernel lock.
> 

Thanks for report. But we are in beta stage now so such modification is
impossible until next iteration.

Since original parent identifier is stored as `ps_oppid' while process
is traced we just return it to userland for this case. This is the way I
propose to fix this bug for now.

Comments? OKs?

Index: sys/kern/kern_prot.c
===
RCS file: /cvs/src/sys/kern/kern_prot.c,v
retrieving revision 1.76
diff -u -p -r1.76 kern_prot.c
--- sys/kern/kern_prot.c9 Jul 2019 12:23:25 -   1.76
+++ sys/kern/kern_prot.c4 Sep 2020 21:12:15 -
@@ -84,7 +84,11 @@ int
 sys_getppid(struct proc *p, void *v, register_t *retval)
 {
 
-   *retval = p->p_p->ps_pptr->ps_pid;
+   if (p->p_p->ps_flags & PS_TRACED)
+   *retval = p->p_p->ps_oppid;
+   else
+   *retval = p->p_p->ps_pptr->ps_pid;
+
return (0);
 }
 



pipex(4)/ppp{ac,x}(4): don't include "net/netisr.h"

2020-08-28 Thread Vitaliy Makkoveev
It's not needed here.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.102
diff -u -p -r1.102 if_pppx.c
--- sys/net/if_pppx.c   27 Aug 2020 10:47:52 -  1.102
+++ sys/net/if_pppx.c   28 Aug 2020 08:18:08 -
@@ -62,7 +62,6 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.125
diff -u -p -r1.125 pipex.c
--- sys/net/pipex.c 27 Aug 2020 10:47:52 -  1.125
+++ sys/net/pipex.c 28 Aug 2020 08:18:08 -
@@ -48,7 +48,6 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
 



pppx(4)/pipex(4): use per cpu counters with ifnet

2020-08-28 Thread Vitaliy Makkoveev
pppac(4) uses per cpu counters for collect `ifnet' statistics, but in
pipex(4) layer this `ifnet' still uses `if_data'. Also pppx(4) doesn't
use per cpu counters but `if_data'. I propose to use per cpu counters
for pppx(4) and pipex(4) to avoid interface statistics collecting mix.

Also this will be the step to remove netlock from pppx(4) output.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.102
diff -u -p -r1.102 if_pppx.c
--- sys/net/if_pppx.c   27 Aug 2020 10:47:52 -  1.102
+++ sys/net/if_pppx.c   28 Aug 2020 07:48:08 -
@@ -658,6 +658,7 @@ pppx_add_session(struct pppx_dev *pxd, s
ifp->if_type = IFT_PPP;
ifp->if_softc = pxi;
/* ifp->if_rdomain = req->pr_rdomain; */
+   if_counters_alloc(ifp);
 
/* XXXSMP breaks atomicity */
NET_UNLOCK();
@@ -878,7 +879,7 @@ pppx_if_output(struct ifnet *ifp, struct
 
 out:
if (error)
-   ifp->if_oerrors++;
+   counters_inc(ifp->if_counters, ifc_oerrors);
return (error);
 }
 
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.125
diff -u -p -r1.125 pipex.c
--- sys/net/pipex.c 27 Aug 2020 10:47:52 -  1.125
+++ sys/net/pipex.c 28 Aug 2020 07:48:08 -
@@ -912,8 +912,7 @@ pipex_ip_input(struct mbuf *m0, struct p
bpf_mtap_af(ifp->if_bpf, AF_INET, m0, BPF_DIRECTION_IN);
 #endif
 
-   ifp->if_ipackets++;
-   ifp->if_ibytes += len;
+   counters_pkt(ifp->if_counters, ifc_ipackets, ifc_ibytes, len);
session->stat.ipackets++;
session->stat.ibytes += len;
ipv4_input(ifp, m0);
@@ -962,8 +961,7 @@ pipex_ip6_input(struct mbuf *m0, struct 
bpf_mtap_af(ifp->if_bpf, AF_INET6, m0, BPF_DIRECTION_IN);
 #endif
 
-   ifp->if_ipackets++;
-   ifp->if_ibytes += len;
+   counters_pkt(ifp->if_counters, ifc_ipackets, ifc_ibytes, len);
session->stat.ipackets++;
session->stat.ibytes += len;
ipv6_input(ifp, m0);



Re: Make pipex more common for pppac and pppx

2020-08-24 Thread Vitaliy Makkoveev
On Thu, Aug 20, 2020 at 02:32:57PM +0900, YASUOKA Masahiko wrote:

Hello.

I pointed some comments inline.

> Hi,
> 
> Thank you for your comments.
> 
> On Mon, 17 Aug 2020 00:15:08 +0300
> Vitaliy Makkoveev  wrote:
> > I like your idea to kill `pipex_iface_context'. I had trying to keep it
> > by myself and this was wrong way. Could you rework your diff to be
> > against the recent sources?
> 
> I'm sorry the diff was for the old version.
> 
> >> @@ -1122,8 +1051,11 @@ pppacopen(dev_t dev, int flags, int mode, struct 
> >> proc *p)
> >>  #if NBPFILTER > 0
> >>bpfattach(>if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
> >>  #endif
> >> -
> >> -  pipex_iface_init(>sc_pipex_iface, ifp->if_index);
> >> +  /* virtual pipex_session entry for multicast */
> >> +  session = pool_get(_session_pool, PR_WAITOK | PR_ZERO);
> >> +  session->is_multicast = 1;
> >> +  session->ifindex = ifp->if_index;
> >> +  sc->sc_multicast_session = session;
> >>  
> > Interface index is not required for multicast session, because it's
> > never used. Also I like to alloc `sc_multicast_session' before
> > if_attach().
> 
> The diff was to use `ifindex' to select all sessions associated the
> same pppac(4).  But the latest diff uses `ownersc' instead for the
> same purpose.  Also the allocation was moved to earlier part of the
> function.
> 
> >> @@ -1382,7 +1340,10 @@ pppacclose(dev_t dev, int flags, int mode, struct 
> >> proc *p)
> >>klist_invalidate(>sc_wsel.si_note);
> >>splx(s);
> >>  
> >> -  pipex_iface_fini(>sc_pipex_iface);
> >> +  pool_put(_session_pool, sc->sc_multicast_session);
> >> +  NET_LOCK();
> >> +  pipex_destroy_all_sessions(sc);
> >> +  NET_UNLOCK();
> >>  
> >>if_detach(ifp);
> > 
> > The recent sources has pppac(4) with unlocked start routine. I like you
> > detach `ifp' before destroy `sc_multicast_session'.
> 
> The lines were moved after if_detach().
> 
> I'll test this more on this weekend, then I'll ask ok for this.
> 
> Index: sys/net/if_pppx.c
> ===
> RCS file: /cvs/src/sys/net/if_pppx.c,v
> retrieving revision 1.101
> diff -u -p -r1.101 if_pppx.c
> --- sys/net/if_pppx.c 14 Aug 2020 11:05:38 -  1.101
> +++ sys/net/if_pppx.c 20 Aug 2020 05:19:55 -
> @@ -163,7 +163,6 @@ struct pppx_if {
>   struct ifnetpxi_if;
>   struct pppx_dev *pxi_dev;   /* [I] */
>   struct pipex_session*pxi_session;   /* [I] */
> - struct pipex_iface_context  pxi_ifcontext;  /* [N] */
>  };
>  
>  static inline int
> @@ -181,12 +180,6 @@ int  pppx_add_session(struct pppx_dev *,
>   struct pipex_session_req *);
>  int  pppx_del_session(struct pppx_dev *,
>   struct pipex_session_close_req *);
> -int  pppx_config_session(struct pppx_dev *,
> - struct pipex_session_config_req *);
> -int  pppx_get_stat(struct pppx_dev *,
> - struct pipex_session_stat_req *);
> -int  pppx_get_closed(struct pppx_dev *,
> - struct pipex_session_list_req *);
>  int  pppx_set_session_descr(struct pppx_dev *,
>   struct pipex_session_descr_req *);
>  
> @@ -424,17 +417,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
>  
>   NET_LOCK();
>   switch (cmd) {
> - case PIPEXSMODE:
> - /*
> -  * npppd always enables on open, and only disables before
> -  * closing. we cheat and let open and close do that, so lie
> -  * to npppd.
> -  */
> - break;
> - case PIPEXGMODE:
> - *(int *)addr = 1;
> - break;
> -
>   case PIPEXASESSION:
>   error = pppx_add_session(pxd,
>   (struct pipex_session_req *)addr);
> @@ -445,21 +427,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
>   (struct pipex_session_close_req *)addr);
>   break;
>  
> - case PIPEXCSESSION:
> - error = pppx_config_session(pxd,
> - (struct pipex_session_config_req *)addr);
> - break;
> -
> - case PIPEXGSTAT:
> - error = pppx_get_stat(pxd,
> - (struct pipex_session_stat_req *)addr);
> - break;
> -
> - case PIPEXGCLOSED:
> - error = pppx_get_closed(pxd,
> - (struct pipex_session_list

Re: sppp: add size to free() calls

2020-08-22 Thread Vitaliy Makkoveev
Yeah, we override both of 'auth->name' and 'auth->secret’.

Since there is the only difference against your previous diff and
the only place where you touch them I have no objections.

> On 22 Aug 2020, at 18:00, Klemens Nanni  wrote:
> 
> On Sat, Aug 22, 2020 at 02:32:17PM +0200, Klemens Nanni wrote:
>> Another round, this time obvious sizes which are in immediate scope of
>> the free() call, e.g. right below the malloc() call.
>> 
>> This leaves only a few selected free() calls with size zero in
>> if_spppsubr.c due to the fact that there is currently no variable to
>> keep track of username and password string lengths.
>> 
>> Feedback? OK?
> Sorry, here's the correct version of the diff omitting sizes for the
> very string buffers mentioned above.
> 
> 
> Index: if_spppsubr.c
> ===
> RCS file: /cvs/src/sys/net/if_spppsubr.c,v
> retrieving revision 1.185
> diff -u -p -r1.185 if_spppsubr.c
> --- if_spppsubr.c 14 Aug 2020 12:17:34 -  1.185
> +++ if_spppsubr.c 22 Aug 2020 14:55:49 -
> @@ -1737,7 +1737,7 @@ sppp_lcp_RCR(struct sppp *sp, struct lcp
> 
>   len -= 4;
>   origlen = len;
> - buf = r = malloc (len, M_TEMP, M_NOWAIT);
> + buf = r = malloc (origlen, M_TEMP, M_NOWAIT);
>   if (! buf)
>   return (0);
> 
> @@ -1749,7 +1749,7 @@ sppp_lcp_RCR(struct sppp *sp, struct lcp
>   p = (void*) (h+1);
>   for (rlen = 0; len > 1; len -= p[1], p += p[1]) {
>   if (p[1] < 2 || p[1] > len) {
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, origlen);
>   return (-1);
>   }
>   if (debug)
> @@ -1926,7 +1926,7 @@ sppp_lcp_RCR(struct sppp *sp, struct lcp
>   }
> 
>  end:
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, origlen);
>   return (rlen == 0);
> }
> 
> @@ -2312,7 +2312,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
> {
>   u_char *buf, *r, *p;
>   struct ifnet *ifp = >pp_if;
> - int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG;
> + int rlen, origlen, buflen, debug = ifp->if_flags & IFF_DEBUG;
>   u_int32_t hisaddr, desiredaddr;
> 
>   len -= 4;
> @@ -2321,7 +2321,8 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
>* Make sure to allocate a buf that can at least hold a
>* conf-nak with an `address' option.  We might need it below.
>*/
> - buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT);
> + buflen = len < 6? 6: len;
> + buf = r = malloc (buflen, M_TEMP, M_NOWAIT);
>   if (! buf)
>   return (0);
> 
> @@ -2332,7 +2333,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
>   p = (void*) (h+1);
>   for (rlen = 0; len > 1; len -= p[1], p += p[1]) {
>   if (p[1] < 2 || p[1] > len) {
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (-1);
>   }
>   if (debug)
> @@ -2476,7 +2477,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
>   }
> 
>  end:
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (rlen == 0);
> }
> 
> @@ -2773,7 +2774,7 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
> {
>   u_char *buf, *r, *p;
>   struct ifnet *ifp = >pp_if;
> - int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG;
> + int rlen, origlen, buflen, debug = ifp->if_flags & IFF_DEBUG;
>   struct in6_addr myaddr, desiredaddr, suggestaddr;
>   int ifidcount;
>   int type;
> @@ -2786,7 +2787,8 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
>* Make sure to allocate a buf that can at least hold a
>* conf-nak with an `address' option.  We might need it below.
>*/
> - buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT);
> + buflen = len < 6? 6: len;
> + buf = r = malloc (buflen, M_TEMP, M_NOWAIT);
>   if (! buf)
>   return (0);
> 
> @@ -2799,7 +2801,7 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
>   for (rlen=0; len>1 && p[1]; len-=p[1], p+=p[1]) {
>   /* Sanity check option length */
>   if (p[1] < 2 || p[1] > len) {
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (-1);
>   }
>   if (debug)
> @@ -2933,7 +2935,7 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
>   }
> 
> end:
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (rlen == 0);
> }
> 
> @@ -4475,10 +4477,10 @@ sppp_get_params(struct sppp *sp, struct 
>   spr->phase = sp->pp_phase;
> 
>   if (copyout(spr, (caddr_t)ifr->ifr_data, sizeof(*spr)) != 0) {
> - free(spr, M_DEVBUF, 0);
> + free(spr, M_DEVBUF, sizeof(*spr));
>   return EFAULT;
>   }
> - free(spr, M_DEVBUF, 0);
> + free(spr, 

Re: sppp: add size to free() calls

2020-08-22 Thread Vitaliy Makkoveev
ok mvs@

> On 22 Aug 2020, at 15:32, Klemens Nanni  wrote:
> 
> Another round, this time obvious sizes which are in immediate scope of
> the free() call, e.g. right below the malloc() call.
> 
> This leaves only a few selected free() calls with size zero in
> if_spppsubr.c due to the fact that there is currently no variable to
> keep track of username and password string lengths.
> 
> Feedback? OK?
> 
> 
> Index: if_spppsubr.c
> ===
> RCS file: /cvs/src/sys/net/if_spppsubr.c,v
> retrieving revision 1.185
> diff -u -p -r1.185 if_spppsubr.c
> --- if_spppsubr.c 14 Aug 2020 12:17:34 -  1.185
> +++ if_spppsubr.c 22 Aug 2020 12:25:37 -
> @@ -1737,7 +1737,7 @@ sppp_lcp_RCR(struct sppp *sp, struct lcp
> 
>   len -= 4;
>   origlen = len;
> - buf = r = malloc (len, M_TEMP, M_NOWAIT);
> + buf = r = malloc (origlen, M_TEMP, M_NOWAIT);
>   if (! buf)
>   return (0);
> 
> @@ -1749,7 +1749,7 @@ sppp_lcp_RCR(struct sppp *sp, struct lcp
>   p = (void*) (h+1);
>   for (rlen = 0; len > 1; len -= p[1], p += p[1]) {
>   if (p[1] < 2 || p[1] > len) {
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, origlen);
>   return (-1);
>   }
>   if (debug)
> @@ -1926,7 +1926,7 @@ sppp_lcp_RCR(struct sppp *sp, struct lcp
>   }
> 
>  end:
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, origlen);
>   return (rlen == 0);
> }
> 
> @@ -2312,7 +2312,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
> {
>   u_char *buf, *r, *p;
>   struct ifnet *ifp = >pp_if;
> - int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG;
> + int rlen, origlen, buflen, debug = ifp->if_flags & IFF_DEBUG;
>   u_int32_t hisaddr, desiredaddr;
> 
>   len -= 4;
> @@ -2321,7 +2321,8 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
>* Make sure to allocate a buf that can at least hold a
>* conf-nak with an `address' option.  We might need it below.
>*/
> - buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT);
> + buflen = len < 6? 6: len;
> + buf = r = malloc (buflen, M_TEMP, M_NOWAIT);
>   if (! buf)
>   return (0);
> 
> @@ -2332,7 +2333,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
>   p = (void*) (h+1);
>   for (rlen = 0; len > 1; len -= p[1], p += p[1]) {
>   if (p[1] < 2 || p[1] > len) {
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (-1);
>   }
>   if (debug)
> @@ -2476,7 +2477,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
>   }
> 
>  end:
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (rlen == 0);
> }
> 
> @@ -2773,7 +2774,7 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
> {
>   u_char *buf, *r, *p;
>   struct ifnet *ifp = >pp_if;
> - int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG;
> + int rlen, origlen, buflen, debug = ifp->if_flags & IFF_DEBUG;
>   struct in6_addr myaddr, desiredaddr, suggestaddr;
>   int ifidcount;
>   int type;
> @@ -2786,7 +2787,8 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
>* Make sure to allocate a buf that can at least hold a
>* conf-nak with an `address' option.  We might need it below.
>*/
> - buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT);
> + buflen = len < 6? 6: len;
> + buf = r = malloc (buflen, M_TEMP, M_NOWAIT);
>   if (! buf)
>   return (0);
> 
> @@ -2799,7 +2801,7 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
>   for (rlen=0; len>1 && p[1]; len-=p[1], p+=p[1]) {
>   /* Sanity check option length */
>   if (p[1] < 2 || p[1] > len) {
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (-1);
>   }
>   if (debug)
> @@ -2933,7 +2935,7 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
>   }
> 
> end:
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (rlen == 0);
> }
> 
> @@ -4475,10 +4477,10 @@ sppp_get_params(struct sppp *sp, struct 
>   spr->phase = sp->pp_phase;
> 
>   if (copyout(spr, (caddr_t)ifr->ifr_data, sizeof(*spr)) != 0) {
> - free(spr, M_DEVBUF, 0);
> + free(spr, M_DEVBUF, sizeof(*spr));
>   return EFAULT;
>   }
> - free(spr, M_DEVBUF, 0);
> + free(spr, M_DEVBUF, sizeof(*spr));
>   break;
>   }
>   case SPPPIOGMAUTH:
> @@ -4498,10 +4500,10 @@ sppp_get_params(struct sppp *sp, struct 
>   strlcpy(spa->name, auth->name, sizeof(spa->name));
> 
>   if (copyout(spa, (caddr_t)ifr->ifr_data, sizeof(*spa)) != 0) {
> - free(spa, M_DEVBUF, 0);
> +

Re: *_clone_create: leave default ifq_maxlen handling to ifq_init()

2020-08-21 Thread Vitaliy Makkoveev
On Fri, Aug 21, 2020 at 11:05:56PM +0200, Klemens Nanni wrote:
> Creating a cloned interface requires attaching it in the end, that's how
> it works.
> 
> All clonable interfaces start with a fresh softc structure that all
> zeros after allocation due to malloc(9)'s M_ZERO flag.
> 
> After driver dependent setup, all drivers call if_attach() to present
> the new interface to the stack.
> 
> if_attach() starts with calling if_attach_common() which starts with
> preparing the interface queues and therefore calling ifq_init() on the
> send queue.
> 
> ifq_init() eventually checks the queue's maximum length and defaults to
> IFQ_MAXLEN if it is zero, which it always is during this create/attach
> path:
> 
>   if (ifq->ifq_maxlen == 0)
>   ifq_set_maxlen(ifq, IFQ_MAXLEN);
> 
> Now, most clonable interface drivers (except bridge, enc, loop, pppx,
> switch, trunk and vlan) initialise the send queue's length to IFQ_MAXLEN
> the same way, which seems entirely redundant to me.
> 
> The queue API does this in a central place already and it bothered me
> why not all drivers did the same in this regard, until I concluded this.
> 
> Is my analysis correct?
> If so, I'd like to remove the redundant init code and unify drivers a
> tiny bit.
> 
> Feedback? Objections? OK?
> 

I have no objections. Also fgsch@ already did the same in 2001 [1]:

Don't set up ifq_maxlen manually for drivers that uses IFQ_MAXLEN
(or ifqmaxlen); it's done in if_attach() now.
No future drivers needs to set up this anymore unless they want to
use something else.

ok mvs@

1. 
https://github.com/openbsd/src/commit/b59942f79e8c9a7102417b8713ad3ffe9adecf05

> 
> Index: if_aggr.c
> ===
> RCS file: /cvs/src/sys/net/if_aggr.c,v
> retrieving revision 1.33
> diff -u -p -r1.33 if_aggr.c
> --- if_aggr.c 22 Jul 2020 02:16:01 -  1.33
> +++ if_aggr.c 21 Aug 2020 20:33:36 -
> @@ -561,7 +561,6 @@ aggr_clone_create(struct if_clone *ifc, 
>   ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
>   ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
>   ifp->if_link_state = LINK_STATE_DOWN;
> - ifq_set_maxlen(>if_snd, IFQ_MAXLEN);
>   ether_fakeaddr(ifp);
>  
>   if_counters_alloc(ifp);
> Index: if_bpe.c
> ===
> RCS file: /cvs/src/sys/net/if_bpe.c,v
> retrieving revision 1.13
> diff -u -p -r1.13 if_bpe.c
> --- if_bpe.c  22 Jul 2020 08:38:51 -  1.13
> +++ if_bpe.c  21 Aug 2020 20:33:36 -
> @@ -189,7 +189,6 @@ bpe_clone_create(struct if_clone *ifc, i
>   ifp->if_start = bpe_start;
>   ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
>   ifp->if_xflags = IFXF_CLONED;
> - ifq_set_maxlen(>if_snd, IFQ_MAXLEN);
>   ether_fakeaddr(ifp);
>  
>   if_counters_alloc(ifp);
> Index: if_etherip.c
> ===
> RCS file: /cvs/src/sys/net/if_etherip.c,v
> retrieving revision 1.46
> diff -u -p -r1.46 if_etherip.c
> --- if_etherip.c  10 Jul 2020 13:26:41 -  1.46
> +++ if_etherip.c  21 Aug 2020 20:33:36 -
> @@ -150,7 +150,6 @@ etherip_clone_create(struct if_clone *if
>   ifp->if_start = etherip_start;
>   ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
>   ifp->if_xflags = IFXF_CLONED;
> - ifq_set_maxlen(>if_snd, IFQ_MAXLEN);
>   ifp->if_capabilities = IFCAP_VLAN_MTU;
>   ether_fakeaddr(ifp);
>  
> Index: if_gif.c
> ===
> RCS file: /cvs/src/sys/net/if_gif.c,v
> retrieving revision 1.130
> diff -u -p -r1.130 if_gif.c
> --- if_gif.c  10 Jul 2020 13:26:41 -  1.130
> +++ if_gif.c  21 Aug 2020 20:33:36 -
> @@ -170,7 +170,6 @@ gif_clone_create(struct if_clone *ifc, i
>   ifp->if_output = gif_output;
>   ifp->if_rtrequest = p2p_rtrequest;
>   ifp->if_type   = IFT_GIF;
> - ifq_set_maxlen(>if_snd, IFQ_MAXLEN);
>   ifp->if_softc = sc;
>  
>   if_attach(ifp);
> Index: if_gre.c
> ===
> RCS file: /cvs/src/sys/net/if_gre.c,v
> retrieving revision 1.158
> diff -u -p -r1.158 if_gre.c
> --- if_gre.c  10 Jul 2020 13:26:41 -  1.158
> +++ if_gre.c  21 Aug 2020 20:33:36 -
> @@ -715,7 +715,6 @@ egre_clone_create(struct if_clone *ifc, 
>   ifp->if_ioctl = egre_ioctl;
>   ifp->if_start = egre_start;
>   ifp->if_xflags = IFXF_CLONED;
> - ifq_set_maxlen(>if_snd, IFQ_MAXLEN);
>   ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
>   ether_fakeaddr(ifp);
>  
> @@ -777,7 +776,6 @@ nvgre_clone_create(struct if_clone *ifc,
>   ifp->if_ioctl = nvgre_ioctl;
>   ifp->if_start = nvgre_start;
>   ifp->if_xflags = IFXF_CLONED;
> - ifq_set_maxlen(>if_snd, IFQ_MAXLEN);
>   ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
>   

Re: Enable EVFILT_EXCEPT

2020-08-21 Thread Vitaliy Makkoveev
ok mvs@

> On 21 Aug 2020, at 10:32, Martin Pieuchot  wrote:
> 
> The kqueue-based poll(2) backend is still a WIP due to regressions in
> the kqueue layer.  In the meantime should we expose EVFILT_EXCEPT to
> userland?  The diff below should be enough to allow userland apps to
> use the new code paths. 
> 
> ok?
> 
> Index: sys/event.h
> ===
> RCS file: /cvs/src/sys/sys/event.h,v
> retrieving revision 1.44
> diff -u -p -r1.44 event.h
> --- sys/event.h   22 Jun 2020 13:14:32 -  1.44
> +++ sys/event.h   21 Aug 2020 07:09:31 -
> @@ -41,7 +41,7 @@
> #define EVFILT_DEVICE (-8)/* devices */
> #define EVFILT_EXCEPT (-9)/* exceptional conditions */
> 
> -#define EVFILT_SYSCOUNT  8
> +#define EVFILT_SYSCOUNT  9
> 
> #define EV_SET(kevp, a, b, c, d, e, f) do {   \
>   struct kevent *__kevp = (kevp); \
> 



Re: pppoe: add sizes to free() calls

2020-08-20 Thread Vitaliy Makkoveev
ok mvs@

> On 20 Aug 2020, at 17:12, Klemens Nanni  wrote:
> 
> On Thu, Aug 20, 2020 at 03:33:17PM +0200, Klemens Nanni wrote:
>> These are straight forward as we either maintain a size variable all the
>> way or can reuse strlen() for free() just like it's done during malloc().
>> 
>> One exception is freeing the softc structure, which is fixed in size;
>> `ifconfig pppoe1 create; ifconfig pppoe1 destroy' exercises this code
>> path and does not blow up - as expected.
>> 
>> Running fine on a octeon vdsl2 router.
>> 
>> Feedback? OK?
> Sorry for the noise, Peter J. Philipp pointed out how I missed the +1
> byte to account for the NUL character which strlen(9) does not count.
> All corresponding malloc calls do strlen() + 1, so without it there's an
> off-by-one in the size.
> 
> In my setup I do not exercise the service and concentrator name paths,
> but playing with `ifconfig pppoe0 [[-]pppoeac foo] [[-]pppoesvc bar]'
> doesn't blow up on any of those diff versions, either.
> 
> Fixed diff.
> 
> Index: if_pppoe.c
> ===
> RCS file: /cvs/src/sys/net/if_pppoe.c,v
> retrieving revision 1.70
> diff -u -p -r1.70 if_pppoe.c
> --- if_pppoe.c28 Jul 2020 09:52:32 -  1.70
> +++ if_pppoe.c20 Aug 2020 13:50:07 -
> @@ -257,15 +267,17 @@ pppoe_clone_destroy(struct ifnet *ifp)
>   if_detach(ifp);
> 
>   if (sc->sc_concentrator_name)
> - free(sc->sc_concentrator_name, M_DEVBUF, 0);
> + free(sc->sc_concentrator_name, M_DEVBUF,
> + strlen(sc->sc_concentrator_name) + 1);
>   if (sc->sc_service_name)
> - free(sc->sc_service_name, M_DEVBUF, 0);
> + free(sc->sc_service_name, M_DEVBUF,
> + strlen(sc->sc_service_name) + 1);
>   if (sc->sc_ac_cookie)
> - free(sc->sc_ac_cookie, M_DEVBUF, 0);
> + free(sc->sc_ac_cookie, M_DEVBUF, sc->sc_ac_cookie_len);
>   if (sc->sc_relay_sid)
> - free(sc->sc_relay_sid, M_DEVBUF, 0);
> + free(sc->sc_relay_sid, M_DEVBUF, sc->sc_relay_sid_len);
> 
> - free(sc, M_DEVBUF, 0);
> + free(sc, M_DEVBUF, sizeof(*sc));
> 
>   return (0);
> }
> @@ -547,7 +559,8 @@ breakbreak:
>   }
>   if (ac_cookie) {
>   if (sc->sc_ac_cookie)
> - free(sc->sc_ac_cookie, M_DEVBUF, 0);
> + free(sc->sc_ac_cookie, M_DEVBUF,
> + sc->sc_ac_cookie_len);
>   sc->sc_ac_cookie = malloc(ac_cookie_len, M_DEVBUF,
>   M_DONTWAIT);
>   if (sc->sc_ac_cookie == NULL)
> @@ -557,7 +570,8 @@ breakbreak:
>   }
>   if (relay_sid) {
>   if (sc->sc_relay_sid)
> - free(sc->sc_relay_sid, M_DEVBUF, 0);
> + free(sc->sc_relay_sid, M_DEVBUF,
> + sc->sc_relay_sid_len);
>   sc->sc_relay_sid = malloc(relay_sid_len, M_DEVBUF,
>   M_DONTWAIT);
>   if (sc->sc_relay_sid == NULL)
> @@ -610,11 +624,12 @@ breakbreak:
>   sc->sc_state = PPPOE_STATE_INITIAL;
>   memcpy(>sc_dest, etherbroadcastaddr, sizeof(sc->sc_dest));
>   if (sc->sc_ac_cookie) {
> - free(sc->sc_ac_cookie, M_DEVBUF, 0);
> + free(sc->sc_ac_cookie, M_DEVBUF,
> + sc->sc_ac_cookie_len);
>   sc->sc_ac_cookie = NULL;
>   }
>   if (sc->sc_relay_sid) {
> - free(sc->sc_relay_sid, M_DEVBUF, 0);
> + free(sc->sc_relay_sid, M_DEVBUF, sc->sc_relay_sid_len);
>   sc->sc_relay_sid = NULL;
>   }
>   sc->sc_ac_cookie_len = 0;
> @@ -817,7 +847,8 @@ pppoe_ioctl(struct ifnet *ifp, unsigned 
>   }
> 
>   if (sc->sc_concentrator_name)
> - free(sc->sc_concentrator_name, M_DEVBUF, 0);
> + free(sc->sc_concentrator_name, M_DEVBUF,
> + strlen(sc->sc_concentrator_name) + 1);
>   sc->sc_concentrator_name = NULL;
> 
>   len = strlen(parms->ac_name);
> @@ -830,7 +861,8 @@ pppoe_ioctl(struct ifnet *ifp, unsigned 
>   }
> 
>   if (sc->sc_service_name)
> - free(sc->sc_service_name, M_DEVBUF, 0);
> + free(sc->sc_service_name, M_DEVBUF,
> + strlen(sc->sc_service_name) + 1);
>   sc->sc_service_name = NULL;
> 
>   len = strlen(parms->service_name);
> @@ -1175,12 +1207,12 @@ pppoe_disconnect(struct pppoe_softc *sc)
>   sc->sc_state = PPPOE_STATE_INITIAL;
>   memcpy(>sc_dest, etherbroadcastaddr, sizeof(sc->sc_dest));
>   if (sc->sc_ac_cookie) {
> -  

Re: pppoe: add sizes to free() calls

2020-08-20 Thread Vitaliy Makkoveev
> On 20 Aug 2020, at 16:33, Klemens Nanni  wrote:
> 
> These are straight forward as we either maintain a size variable all the
> way or can reuse strlen() for free() just like it's done during malloc().
> 
> One exception is freeing the softc structure, which is fixed in size;
> `ifconfig pppoe1 create; ifconfig pppoe1 destroy' exercises this code
> path and does not blow up - as expected.
> 
> Running fine on a octeon vdsl2 router.
> 
> Feedback? OK?

Hi.

You forgot about ‘\0’ for `sc_concentrator_name’ and `sc_service_name'.

> 
> 
> Index: if_pppoe.c
> ===
> RCS file: /cvs/src/sys/net/if_pppoe.c,v
> retrieving revision 1.70
> diff -u -p -r1.70 if_pppoe.c
> --- if_pppoe.c28 Jul 2020 09:52:32 -  1.70
> +++ if_pppoe.c19 Aug 2020 23:33:23 -
> @@ -257,15 +264,17 @@ pppoe_clone_destroy(struct ifnet *ifp)
>   if_detach(ifp);
> 
>   if (sc->sc_concentrator_name)
> - free(sc->sc_concentrator_name, M_DEVBUF, 0);
> + free(sc->sc_concentrator_name, M_DEVBUF,
> + strlen(sc->sc_concentrator_name));
>   if (sc->sc_service_name)
> - free(sc->sc_service_name, M_DEVBUF, 0);
> + free(sc->sc_service_name, M_DEVBUF,
> + strlen(sc->sc_service_name));
>   if (sc->sc_ac_cookie)
> - free(sc->sc_ac_cookie, M_DEVBUF, 0);
> + free(sc->sc_ac_cookie, M_DEVBUF, sc->sc_ac_cookie_len);
>   if (sc->sc_relay_sid)
> - free(sc->sc_relay_sid, M_DEVBUF, 0);
> + free(sc->sc_relay_sid, M_DEVBUF, sc->sc_relay_sid_len);
> 
> - free(sc, M_DEVBUF, 0);
> + free(sc, M_DEVBUF, sizeof(*sc));
> 
>   return (0);
> }
> @@ -547,7 +556,8 @@ breakbreak:
>   }
>   if (ac_cookie) {
>   if (sc->sc_ac_cookie)
> - free(sc->sc_ac_cookie, M_DEVBUF, 0);
> + free(sc->sc_ac_cookie, M_DEVBUF,
> + sc->sc_ac_cookie_len);
>   sc->sc_ac_cookie = malloc(ac_cookie_len, M_DEVBUF,
>   M_DONTWAIT);
>   if (sc->sc_ac_cookie == NULL)
> @@ -557,7 +567,8 @@ breakbreak:
>   }
>   if (relay_sid) {
>   if (sc->sc_relay_sid)
> - free(sc->sc_relay_sid, M_DEVBUF, 0);
> + free(sc->sc_relay_sid, M_DEVBUF,
> + sc->sc_relay_sid_len);
>   sc->sc_relay_sid = malloc(relay_sid_len, M_DEVBUF,
>   M_DONTWAIT);
>   if (sc->sc_relay_sid == NULL)
> @@ -610,11 +621,12 @@ breakbreak:
>   sc->sc_state = PPPOE_STATE_INITIAL;
>   memcpy(>sc_dest, etherbroadcastaddr, sizeof(sc->sc_dest));
>   if (sc->sc_ac_cookie) {
> - free(sc->sc_ac_cookie, M_DEVBUF, 0);
> + free(sc->sc_ac_cookie, M_DEVBUF,
> + sc->sc_ac_cookie_len);
>   sc->sc_ac_cookie = NULL;
>   }
>   if (sc->sc_relay_sid) {
> - free(sc->sc_relay_sid, M_DEVBUF, 0);
> + free(sc->sc_relay_sid, M_DEVBUF, sc->sc_relay_sid_len);
>   sc->sc_relay_sid = NULL;
>   }
>   sc->sc_ac_cookie_len = 0;
> @@ -817,7 +835,8 @@ pppoe_ioctl(struct ifnet *ifp, unsigned 
>   }
> 
>   if (sc->sc_concentrator_name)
> - free(sc->sc_concentrator_name, M_DEVBUF, 0);
> + free(sc->sc_concentrator_name, M_DEVBUF,
> + strlen(sc->sc_concentrator_name));
>   sc->sc_concentrator_name = NULL;
> 
>   len = strlen(parms->ac_name);
> @@ -830,7 +849,8 @@ pppoe_ioctl(struct ifnet *ifp, unsigned 
>   }
> 
>   if (sc->sc_service_name)
> - free(sc->sc_service_name, M_DEVBUF, 0);
> + free(sc->sc_service_name, M_DEVBUF,
> + strlen(sc->sc_service_name));
>   sc->sc_service_name = NULL;
> 
>   len = strlen(parms->service_name);
> @@ -1175,12 +1195,12 @@ pppoe_disconnect(struct pppoe_softc *sc)
>   sc->sc_state = PPPOE_STATE_INITIAL;
>   memcpy(>sc_dest, etherbroadcastaddr, sizeof(sc->sc_dest));
>   if (sc->sc_ac_cookie) {
> - free(sc->sc_ac_cookie, M_DEVBUF, 0);
> + free(sc->sc_ac_cookie, M_DEVBUF, sc->sc_ac_cookie_len);
>   sc->sc_ac_cookie = NULL;
>   }
>   sc->sc_ac_cookie_len = 0;
>   if (sc->sc_relay_sid) {
> - free(sc->sc_relay_sid, M_DEVBUF, 0);
> + free(sc->sc_relay_sid, M_DEVBUF, sc->sc_relay_sid_len);
>   sc->sc_relay_sid = NULL;
>   }
>   sc->sc_relay_sid_len = 0;
> 



Re: Remove unnecessary field from struct msgbuf

2020-08-18 Thread Vitaliy Makkoveev
ok mvs@

> On 16 Aug 2020, at 14:35, Visa Hankala  wrote:
> 
> The msg_bufl field of struct msgbuf is written but never read. The value
> was used by kernfs which is no longer present, so the code could be
> cleaned up a little by removing the field.
> 
> On some systems the message buffer data are preserved across a reboot.
> However, the preservation is best-effort only, and initmsgbuf() refuses
> to use the old data if the struct's size has changed.
> 
> Changing the struct affects at least dmesg(8). The program has to be
> recompiled.
> 
> OK?
> 
> Index: kern/subr_log.c
> ===
> RCS file: src/sys/kern/subr_log.c,v
> retrieving revision 1.66
> diff -u -p -r1.66 subr_log.c
> --- kern/subr_log.c   7 Apr 2020 13:27:51 -   1.66
> +++ kern/subr_log.c   16 Aug 2020 10:57:59 -
> @@ -151,7 +151,6 @@ msgbuf_putchar(struct msgbuf *mbp, const
> 
>   s = splhigh();
>   mbp->msg_bufc[mbp->msg_bufx++] = c;
> - mbp->msg_bufl = lmin(mbp->msg_bufl+1, mbp->msg_bufs);
>   if (mbp->msg_bufx < 0 || mbp->msg_bufx >= mbp->msg_bufs)
>   mbp->msg_bufx = 0;
>   /* If the buffer is full, keep the most recent data. */
> Index: sys/msgbuf.h
> ===
> RCS file: src/sys/sys/msgbuf.h,v
> retrieving revision 1.11
> diff -u -p -r1.11 msgbuf.h
> --- sys/msgbuf.h  23 Jun 2016 13:15:21 -  1.11
> +++ sys/msgbuf.h  16 Aug 2020 10:57:59 -
> @@ -38,7 +38,6 @@ struct  msgbuf {
>   longmsg_bufx;   /* write pointer */
>   longmsg_bufr;   /* read pointer */
>   longmsg_bufs;   /* real msg_bufc size (bytes) */
> - longmsg_bufl;   /* # chars, <= msg_bufs */
>   longmsg_bufd;   /* number of dropped bytes */
>   charmsg_bufc[1];/* buffer */
> };
> 



Re: Remove unnecessary field from struct msgbuf

2020-08-16 Thread Vitaliy Makkoveev
The diff looks good for me. I’ll recompile system with your diff
tomorrow.

> On 16 Aug 2020, at 14:35, Visa Hankala  wrote:
> 
> The msg_bufl field of struct msgbuf is written but never read. The value
> was used by kernfs which is no longer present, so the code could be
> cleaned up a little by removing the field.
> 
> On some systems the message buffer data are preserved across a reboot.
> However, the preservation is best-effort only, and initmsgbuf() refuses
> to use the old data if the struct's size has changed.
> 
> Changing the struct affects at least dmesg(8). The program has to be
> recompiled.
> 
> OK?
> 
> Index: kern/subr_log.c
> ===
> RCS file: src/sys/kern/subr_log.c,v
> retrieving revision 1.66
> diff -u -p -r1.66 subr_log.c
> --- kern/subr_log.c   7 Apr 2020 13:27:51 -   1.66
> +++ kern/subr_log.c   16 Aug 2020 10:57:59 -
> @@ -151,7 +151,6 @@ msgbuf_putchar(struct msgbuf *mbp, const
> 
>   s = splhigh();
>   mbp->msg_bufc[mbp->msg_bufx++] = c;
> - mbp->msg_bufl = lmin(mbp->msg_bufl+1, mbp->msg_bufs);
>   if (mbp->msg_bufx < 0 || mbp->msg_bufx >= mbp->msg_bufs)
>   mbp->msg_bufx = 0;
>   /* If the buffer is full, keep the most recent data. */
> Index: sys/msgbuf.h
> ===
> RCS file: src/sys/sys/msgbuf.h,v
> retrieving revision 1.11
> diff -u -p -r1.11 msgbuf.h
> --- sys/msgbuf.h  23 Jun 2016 13:15:21 -  1.11
> +++ sys/msgbuf.h  16 Aug 2020 10:57:59 -
> @@ -38,7 +38,6 @@ struct  msgbuf {
>   longmsg_bufx;   /* write pointer */
>   longmsg_bufr;   /* read pointer */
>   longmsg_bufs;   /* real msg_bufc size (bytes) */
> - longmsg_bufl;   /* # chars, <= msg_bufs */
>   longmsg_bufd;   /* number of dropped bytes */
>   charmsg_bufc[1];/* buffer */
> };
> 



Re: pppoe: start without kernel lock

2020-08-16 Thread Vitaliy Makkoveev
On Sun, Aug 16, 2020 at 08:44:07PM +0200, Klemens Nanni wrote:
> On Sun, Aug 16, 2020 at 07:04:46PM +0200, Klemens Nanni wrote:
> > Make sppp(4)/pppoe(4) use the ifq API to send packets outside the big
> > lock.
> > 
> > As far as I understand, pppoe_output() does not require NET_LOCK() since
> > if_get(9)/if_put(9) guarantee the validity of the interface pointer and
> > no `struct ifnet' member is written to;  similar to how vlan(4) does it.
> > 
> > This is running on an EdgeRouter 4 behind a VDSL2 modem, i.e. pppoe0
> > over vlan7 over cnmac0.
> > 
> > Do I miss something?
> It seems I did:  Even though pppoe doesn't write the ifnet, locks must
> be grabbed for each member that is read from.
> 
> `if_flags' for example is documented as protected by the NET_LOCK();
> pppoe_output() checks if the interface is up and running, yet only some
> but not all call paths to pppoe_output() grab the NET_LOCK().
> 
> So it seems more work is needed here, I see if I can identify all code
> paths wrt. their locking situation.
> 

Can I propose to document which locks protects pppoe(4)/sppp(4) data
structures at first step? Yes, current locking scheme will be "kernel
lock" most, but at least it makes visible what is require to change
before unlocking output. Also it makes further reviews easier.



Re: pppac(4): destroy sessions the same way as pppx(4) does

2020-08-16 Thread Vitaliy Makkoveev
On Sat, Aug 15, 2020 at 02:01:52PM +0900, YASUOKA Masahiko wrote:
> On Wed, 12 Aug 2020 12:26:22 +0300
> Vitaliy Makkoveev  wrote:
> > We destroy pppx(4) related sessions while we performing PIPEXDSESSION
> > command. But with pppac(4) we set session's state to
> > PIPEX_STATE_CLOSE_WAIT2 and we wait garbage collector to do destruction.
> 
> pppac's PIPEXDSESSION set the states PIPEX_STATE_CLOSED.  It is to
> wait until pipex{in,out}q becomes empty.
> 

My fault. I looked pipex_notify_close_session().

> > We removed `pipex{in,out}q'. So we can safe destroy session in any time.
> > I propose to make pppac(4) session destruction path the same as pppx(4)
> > does. Now we destroy them while performing PIPEXDSESSION commad too.
> 
> Yes.  I agree this point.
> 
> > Also there is no in-kernel garbage collector for pppac(4) sessions.
> > yasuoka@ pointed me that npppd(8) should kill expired sessions.
> > 
> > This not only makes pppac(4) closer to pppx(4) but simplify code and
> > allow us to make safe pppx(4) session processing by pipex_timer().
> > So this is preparation step to restore in-kernel timeout for pppx(4)
> > too.
> 
> Below, I am asking to keep the timeout behavior.  There is a bug for
> pppx(4) but it had been working for pppac(4) for long time.  If you
> really want to change the behavior please provide a reason.  I have
> not so strong opinion but I don't want to change the behavior without
> a reason.
>

The reason is to make garbage collector's behavior identical for
pppac(4) and pppx(4). It's assumed what userland should destroy expired
sessions, so there is no reason to have differences here. Also it allows
us to not introduce differences to processing pppac(4) and pppx(4)
sessions by pipex_timer(). There is no checks "if (session->is_pppx)"
requred in future at this point. The real sense of this diff is to make
pipex_timer() only processing timeout for both cases.

> > Index: sys/net/pipex.c
> > ===
> > RCS file: /cvs/src/sys/net/pipex.c,v
> > retrieving revision 1.124
> > diff -u -p -r1.124 pipex.c
> > --- sys/net/pipex.c 12 Aug 2020 08:41:39 -  1.124
> > +++ sys/net/pipex.c 12 Aug 2020 09:07:12 -
> > @@ -536,29 +536,6 @@ out:
> > return error;
> >  }
> >  
> > -int
> > -pipex_notify_close_session(struct pipex_session *session)
> > -{
> > -   NET_ASSERT_LOCKED();
> > -   session->state = PIPEX_STATE_CLOSE_WAIT;
> > -   session->stat.idle_time = 0;
> > -   LIST_INSERT_HEAD(_close_wait_list, session, state_list);
> > -
> > -   return (0);
> > -}
> > -
> 
> Unrelated but ok.
> 
> > -int
> > -pipex_notify_close_session_all(void)
> > -{
> > -   struct pipex_session *session;
> > -
> > -   NET_ASSERT_LOCKED();
> > -   LIST_FOREACH(session, _session_list, session_list)
> > -   if (session->state == PIPEX_STATE_OPENED)
> > -   pipex_notify_close_session(session);
> > -   return (0);
> > -}
> > -
> 
> Unrelated but ok.  Since it's not used.
> 
> >  Static int
> >  pipex_close_session(struct pipex_session_close_req *req,
> >  struct pipex_iface_context *iface)
> > @@ -573,13 +550,9 @@ pipex_close_session(struct pipex_session
> > if (session->pipex_iface != iface)
> > return (EINVAL);
> >  
> > -   /* remove from close_wait list */
> > -   if (session->state == PIPEX_STATE_CLOSE_WAIT)
> > -   LIST_REMOVE(session, state_list);
> > -
> 
> This must be kept.  Useland may PIPEXDSESSION before PIPEXGCLOSED for
> this session.
> 

pipex_destroy_session() calls pipex_unlink_session() which checks
`state' and removes session from `state_list' if required.

> > /* get statistics before destroy the session */
> > req->pcr_stat = session->stat;
> > -   session->state = PIPEX_STATE_CLOSED;
> > +   pipex_destroy_session(session);
> >  
> > return (0);
> >  }
> 
> ok
> 
> > @@ -739,47 +712,25 @@ pipex_timer_stop(void)
> >  Static void
> >  pipex_timer(void *ignored_arg)
> >  {
> > -   struct pipex_session *session, *session_tmp;
> > +   struct pipex_session *session;
> >  
> > timeout_add_sec(_timer_ch, pipex_prune);
> >  
> > NET_LOCK();
> > /* walk through */
> > -   LIST_FOREACH_SAFE(session, _session_list, session_list,
> > -   session_tmp) {
> > -   switch (session->state) {
> > -   case PIPEX_STATE_OPENED:
> > -  

Re: Make pipex more common for pppac and pppx

2020-08-16 Thread Vitaliy Makkoveev
On Sat, Aug 15, 2020 at 05:42:06PM +0900, YASUOKA Masahiko wrote:
> Let me update the diff.  A bug found by the test.
>

Hello Yasuoka.

I like your idea to kill `pipex_iface_context'. I had trying to keep it
by myself and this was wrong way. Could you rework your diff to be
against the recent sources?

Also I have little comments below.

> diff --git a/sys/net/if_pppx.c b/sys/net/if_pppx.c
> index 62b85bc34af..6d3de6973bd 100644
> --- a/sys/net/if_pppx.c
> +++ b/sys/net/if_pppx.c
> @@ -163,7 +163,6 @@ struct pppx_if {
>   struct ifnetpxi_if;
>   struct pppx_dev *pxi_dev;   /* [I] */
>   struct pipex_session*pxi_session;   /* [I] */
> - struct pipex_iface_context  pxi_ifcontext;  /* [N] */
>  };
>  
>  static inline int
> @@ -181,12 +180,6 @@ int  pppx_add_session(struct pppx_dev *,
>   struct pipex_session_req *);
>  int  pppx_del_session(struct pppx_dev *,
>   struct pipex_session_close_req *);
> -int  pppx_config_session(struct pppx_dev *,
> - struct pipex_session_config_req *);
> -int  pppx_get_stat(struct pppx_dev *,
> - struct pipex_session_stat_req *);
> -int  pppx_get_closed(struct pppx_dev *,
> - struct pipex_session_list_req *);
>  int  pppx_set_session_descr(struct pppx_dev *,
>   struct pipex_session_descr_req *);
>  
> @@ -424,17 +417,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t addr, int 
> flags, struct proc *p)
>  
>   NET_LOCK();
>   switch (cmd) {
> - case PIPEXSMODE:
> - /*
> -  * npppd always enables on open, and only disables before
> -  * closing. we cheat and let open and close do that, so lie
> -  * to npppd.
> -  */
> - break;
> - case PIPEXGMODE:
> - *(int *)addr = 1;
> - break;
> -
>   case PIPEXASESSION:
>   error = pppx_add_session(pxd,
>   (struct pipex_session_req *)addr);
> @@ -445,21 +427,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t addr, int 
> flags, struct proc *p)
>   (struct pipex_session_close_req *)addr);
>   break;
>  
> - case PIPEXCSESSION:
> - error = pppx_config_session(pxd,
> - (struct pipex_session_config_req *)addr);
> - break;
> -
> - case PIPEXGSTAT:
> - error = pppx_get_stat(pxd,
> - (struct pipex_session_stat_req *)addr);
> - break;
> -
> - case PIPEXGCLOSED:
> - error = pppx_get_closed(pxd,
> - (struct pipex_session_list_req *)addr);
> - break;
> -
>   case PIPEXSIFDESCR:
>   error = pppx_set_session_descr(pxd,
>   (struct pipex_session_descr_req *)addr);
> @@ -472,7 +439,7 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, 
> struct proc *p)
>   break;
>  
>   default:
> - error = ENOTTY;
> + error = pipex_ioctl(pxd, cmd, addr);
>   break;
>   }
>   NET_UNLOCK();
> @@ -742,11 +709,7 @@ pppx_add_session(struct pppx_dev *pxd, struct 
> pipex_session_req *req)
>   if_addrhooks_run(ifp);
>   }
>  
> - /* fake a pipex interface context */
> - pxi->pxi_ifcontext.ifindex = ifp->if_index;
> - pxi->pxi_ifcontext.pipexmode = PIPEX_ENABLED;
> -
> - error = pipex_link_session(session, >pxi_ifcontext);
> + error = pipex_link_session(session, ifp, pxd);
>   if (error)
>   goto detach;
>  
> @@ -786,40 +749,6 @@ pppx_del_session(struct pppx_dev *pxd, struct 
> pipex_session_close_req *req)
>   return (0);
>  }
>  
> -int
> -pppx_config_session(struct pppx_dev *pxd,
> -struct pipex_session_config_req *req)
> -{
> - struct pppx_if *pxi;
> -
> - pxi = pppx_if_find(pxd, req->pcr_session_id, req->pcr_protocol);
> - if (pxi == NULL)
> - return (EINVAL);
> -
> - return pipex_config_session(req, >pxi_ifcontext);
> -}
> -
> -int
> -pppx_get_stat(struct pppx_dev *pxd, struct pipex_session_stat_req *req)
> -{
> - struct pppx_if *pxi;
> -
> - pxi = pppx_if_find(pxd, req->psr_session_id, req->psr_protocol);
> - if (pxi == NULL)
> - return (EINVAL);
> -
> - return pipex_get_stat(req, >pxi_ifcontext);
> -}
> -
> -int
> -pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> -{
> - /* XXX: Only opened sessions exist for pppx(4) */
> - memset(req, 0, sizeof(*req));
> -
> - return 0;
> -}
> -
>  int
>  pppx_set_session_descr(struct pppx_dev *pxd,
>  struct pipex_session_descr_req *req)
> @@ -1022,9 +951,8 @@ struct pppac_softc {
>   struct selinfo  sc_rsel;
>   struct mutexsc_wsel_mtx;
>   struct selinfo  sc_wsel;
> -
> - struct pipex_iface_context
> - sc_pipex_iface;
> + struct 

Re: pipex "idle-timeout" work with pppx(4).

2020-08-12 Thread Vitaliy Makkoveev
On Wed, Aug 12, 2020 at 09:07:15PM +0900, YASUOKA Masahiko wrote:
> Hi,
> 
> On Wed, 12 Aug 2020 12:38:39 +0300
> Vitaliy Makkoveev  wrote:
> > We don't need to mark pppx(4) sessions because there is no special cases
> > for them. We just need to kill pppx(4) related "pr_timeout_sec != 0"
> > checks and call pipex_get_closed() by pppx_get_closed().
> 
> How do you implement that by calling pipex_get_closed() by
> pppx_get_closed()?
> 
> 
> PIPEXGCLOSED is to pick up expired sessions which is associated with
> the character device (/dev/{pppx,pppac}0).  In pppac(4) case, the
> character device is the same object of the interface pppac.  But
> pppx(4) is not the same.  pipex_session has no direct referece to the
> device.  This is why my diff was modifying pipex_get_closed().
> 

You are right. I have my own tree where I divided pppx(4) sessions and
iface_context. So in my tree I have one `pipex_iface_context' in `struct
pppx_dev' and the usage of most pppx(4) is identical to pppac(4). Sorry,
I forgot that it's not shared yet :(

You are right, pppx_get_closed() still needs to do it's own
`pipex_close_wait_list' walkthrough.



Re: pppx(4): move ifnet out of KERNEL_LOCK()

2020-08-12 Thread Vitaliy Makkoveev
Updated to the recent source. The diff is OK'ed by yasuoka@. Also I did
what mpi@ requested. Should I still wait?


Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.100
diff -u -p -r1.100 if_pppx.c
--- sys/net/if_pppx.c   12 Aug 2020 08:41:39 -  1.100
+++ sys/net/if_pppx.c   12 Aug 2020 11:08:12 -
@@ -191,7 +191,7 @@ int pppx_set_session_descr(struct pppx_
struct pipex_session_descr_req *);
 
 void   pppx_if_destroy(struct pppx_dev *, struct pppx_if *);
-void   pppx_if_start(struct ifnet *);
+void   pppx_if_qstart(struct ifqueue *);
 intpppx_if_output(struct ifnet *, struct mbuf *,
struct sockaddr *, struct rtentry *);
 intpppx_if_ioctl(struct ifnet *, u_long, caddr_t);
@@ -683,13 +683,12 @@ pppx_add_session(struct pppx_dev *pxd, s
snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", "pppx", unit);
ifp->if_mtu = req->pr_peer_mru; /* XXX */
ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST | IFF_UP;
-   ifp->if_xflags = IFXF_CLONED;
-   ifp->if_start = pppx_if_start;
+   ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
+   ifp->if_qstart = pppx_if_qstart;
ifp->if_output = pppx_if_output;
ifp->if_ioctl = pppx_if_ioctl;
ifp->if_rtrequest = p2p_rtrequest;
ifp->if_type = IFT_PPP;
-   ifq_set_maxlen(>if_snd, 1);
ifp->if_softc = pxi;
/* ifp->if_rdomain = req->pr_rdomain; */
 
@@ -864,26 +863,21 @@ pppx_if_destroy(struct pppx_dev *pxd, st
 }
 
 void
-pppx_if_start(struct ifnet *ifp)
+pppx_if_qstart(struct ifqueue *ifq)
 {
+   struct ifnet *ifp = ifq->ifq_if;
struct pppx_if *pxi = (struct pppx_if *)ifp->if_softc;
struct mbuf *m;
int proto;
 
-   if (!ISSET(ifp->if_flags, IFF_RUNNING))
-   return;
-
-   for (;;) {
-   m = ifq_dequeue(>if_snd);
-
-   if (m == NULL)
-   break;
-
+   NET_LOCK();
+   while ((m = ifq_dequeue(ifq)) != NULL) {
proto = *mtod(m, int *);
m_adj(m, sizeof(proto));
 
pipex_ppp_output(m, pxi->pxi_session, proto);
}
+   NET_UNLOCK();
 }
 
 int



Re: pipex "idle-timeout" work with pppx(4).

2020-08-12 Thread Vitaliy Makkoveev
On Wed, Aug 12, 2020 at 11:17:29AM +0900, YASUOKA Masahiko wrote:
> On Tue, 11 Aug 2020 23:06:45 +0300
> Vitaliy Makkoveev  wrote:
> > We removed `pipex{in,out}q'. So now we can destroy pppac(4) session just
> > like we do in pppx(4) case. Also there is no reason to allow
> > pipex_timer() to destroy sessions - userland will do this by
> > PIPEXDSESSION. This permit us to use existing pipex_get_closed() for
> > both pppac(4) and pppx(4) without any modifications.
> > 
> > So, I propose pipex_close_session() and pipex_timer() be like below.
> 
> It doesn't seem to fix "idle-timeout".
> 

Yes it's not. It's "pre-" step which makes following fix easier.

We don't need to mark pppx(4) sessions because there is no special cases
for them. We just need to kill pppx(4) related "pr_timeout_sec != 0"
checks and call pipex_get_closed() by pppx_get_closed().

> > We simplify pppac(4) session destruction. We unify behavior with pppx(4)
> > - we killing session just now. There is no reason to modify
> > pipex_get_closed() and pipex_link_session(). pppx(4) related sessions
> > can be processed by pipex_timer(). There is no performance impact.
> 
> We need to modify pppx_get_closed() to implement idle-timeout.
> 
> > Do you like this? We can do two diffs. The first to unify destruction
> > and the second to re-enable in-kernel timeout for pppx(4) and revert man
> > pages modifications.
> 
> I have no objection to your "unify destruction".
> 
> I'll rebase my diff after that work.

Thanks. I posted "unify destruction" here [1].

1. https://marc.info/?l=openbsd-tech=159722447900893=2



pppac(4): destroy sessions the same way as pppx(4) does

2020-08-12 Thread Vitaliy Makkoveev
We destroy pppx(4) related sessions while we performing PIPEXDSESSION
command. But with pppac(4) we set session's state to
PIPEX_STATE_CLOSE_WAIT2 and we wait garbage collector to do destruction.

We removed `pipex{in,out}q'. So we can safe destroy session in any time.
I propose to make pppac(4) session destruction path the same as pppx(4)
does. Now we destroy them while performing PIPEXDSESSION commad too.
Also there is no in-kernel garbage collector for pppac(4) sessions.
yasuoka@ pointed me that npppd(8) should kill expired sessions.

This not only makes pppac(4) closer to pppx(4) but simplify code and
allow us to make safe pppx(4) session processing by pipex_timer().
So this is preparation step to restore in-kernel timeout for pppx(4)
too.


Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.124
diff -u -p -r1.124 pipex.c
--- sys/net/pipex.c 12 Aug 2020 08:41:39 -  1.124
+++ sys/net/pipex.c 12 Aug 2020 09:07:12 -
@@ -536,29 +536,6 @@ out:
return error;
 }
 
-int
-pipex_notify_close_session(struct pipex_session *session)
-{
-   NET_ASSERT_LOCKED();
-   session->state = PIPEX_STATE_CLOSE_WAIT;
-   session->stat.idle_time = 0;
-   LIST_INSERT_HEAD(_close_wait_list, session, state_list);
-
-   return (0);
-}
-
-int
-pipex_notify_close_session_all(void)
-{
-   struct pipex_session *session;
-
-   NET_ASSERT_LOCKED();
-   LIST_FOREACH(session, _session_list, session_list)
-   if (session->state == PIPEX_STATE_OPENED)
-   pipex_notify_close_session(session);
-   return (0);
-}
-
 Static int
 pipex_close_session(struct pipex_session_close_req *req,
 struct pipex_iface_context *iface)
@@ -573,13 +550,9 @@ pipex_close_session(struct pipex_session
if (session->pipex_iface != iface)
return (EINVAL);
 
-   /* remove from close_wait list */
-   if (session->state == PIPEX_STATE_CLOSE_WAIT)
-   LIST_REMOVE(session, state_list);
-
/* get statistics before destroy the session */
req->pcr_stat = session->stat;
-   session->state = PIPEX_STATE_CLOSED;
+   pipex_destroy_session(session);
 
return (0);
 }
@@ -739,47 +712,25 @@ pipex_timer_stop(void)
 Static void
 pipex_timer(void *ignored_arg)
 {
-   struct pipex_session *session, *session_tmp;
+   struct pipex_session *session;
 
timeout_add_sec(_timer_ch, pipex_prune);
 
NET_LOCK();
/* walk through */
-   LIST_FOREACH_SAFE(session, _session_list, session_list,
-   session_tmp) {
-   switch (session->state) {
-   case PIPEX_STATE_OPENED:
-   if (session->timeout_sec == 0)
-   continue;
-
-   session->stat.idle_time++;
-   if (session->stat.idle_time < session->timeout_sec)
-   continue;
-
-   pipex_notify_close_session(session);
-   break;
-
-   case PIPEX_STATE_CLOSE_WAIT:
-   case PIPEX_STATE_CLOSE_WAIT2:
-   /* Wait PIPEXDSESSION from userland */
-   session->stat.idle_time++;
-   if (session->stat.idle_time < PIPEX_CLOSE_TIMEOUT)
-   continue;
-
-   if (session->state == PIPEX_STATE_CLOSE_WAIT)
-   LIST_REMOVE(session, state_list);
-   session->state = PIPEX_STATE_CLOSED;
-   /* FALLTHROUGH */
+   LIST_FOREACH(session, _session_list, session_list) {
+   if (session->state != PIPEX_STATE_OPENED)
+   continue;
+   if (session->timeout_sec == 0)
+   continue;
 
-   case PIPEX_STATE_CLOSED:
-   pipex_destroy_session(session);
-   break;
+   session->stat.idle_time++;
+   if (session->stat.idle_time < session->timeout_sec)
+   continue;
 
-   default:
-   break;
-   }
+   session->state = PIPEX_STATE_CLOSE_WAIT;
+   LIST_INSERT_HEAD(_close_wait_list, session, state_list);
}
-
NET_UNLOCK();
 }
 
Index: sys/net/pipex.h
===
RCS file: /cvs/src/sys/net/pipex.h,v
retrieving revision 1.27
diff -u -p -r1.27 pipex.h
--- sys/net/pipex.h 4 Aug 2020 09:32:05 -   1.27
+++ sys/net/pipex.h 12 Aug 2020 09:07:13 -
@@ -197,9 +197,6 @@ void  pipex_init (void);
 void  pipex_iface_init (struct pipex_iface_context *, u_int);
 void  pipex_iface_fini (struct pipex_iface_context *);
 
-int   

Re: pipex "idle-timeout" work with pppx(4).

2020-08-11 Thread Vitaliy Makkoveev
On Wed, Aug 12, 2020 at 01:36:38AM +0900, YASUOKA Masahiko wrote:
> 
> my diff is to make pppx(4) have the same "idle-timeout"
> functionality.  I strongly think pppx(4) must have the same
> functionalities of pppac(4) because I don't see any reason to have
> any difference between pppx(4) and pppac(4).
>

Yes, I want the same :)

> Your pseudo code is suggesting another thing.  You would like to
> change the existing behavior of pppac(4)?  Then, what is a problem you
> concern.  I'd like you to provide what is the relation of my diff or a
> background of the code.
>

We have the differences with in-kernel garbage collecting. I was
afraid we could have the case while expired pppx(4) sessions will not be
killed until npppd(8) shutdown. In your previous mail [1] you explained
about npppd(8) garbage collecting. I hope you like my solution I
proposed in [2]

1. https://marc.info/?l=openbsd-tech=159716033115495=2
2. https://marc.info/?l=openbsd-tech=159717643020853=2



Re: pipex "idle-timeout" work with pppx(4).

2020-08-11 Thread Vitaliy Makkoveev
On Wed, Aug 12, 2020 at 12:37:13AM +0900, YASUOKA Masahiko wrote:
> Hi,
> 
> On Mon, 10 Aug 2020 16:30:27 +0300
> Vitaliy Makkoveev  wrote:
> > On Mon, Aug 10, 2020 at 03:12:02PM +0900, YASUOKA Masahiko wrote:
> >> On Sun, 9 Aug 2020 20:03:50 +0300
> >> Vitaliy Makkoveev  wrote:
> >> > On Sun, Aug 09, 2020 at 06:20:13PM +0300, Vitaliy Makkoveev wrote:
> >> >> You propose to unlink pppx(4) related session which reached timeout. I'm
> >> >> ok with this direction. But I see no reason to rework _get_closed()
> >> >> routines.
> >> >> 
> >> >> in pppac(4) case it's assumed what if session is not yet destroyed by
> >> >> garbage collector, it will be destroyed while we performing PIPEXGCLOSED
> >> >> command. We can make pppx(4) behavior the same and I propose to
> >> >> pppx_get_closed() be like below. 
> >> >> 
> >> >> Also, nothing requires to modify pipex_get_closed(). 
> >> >> 
> >> >>  cut begin 
> >> > 
> >> > Sorry, I mean
> >> > 
> >> > pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> >> > {
> >> >  struct pppx_if  *pxi;
> >> > 
> >> >  memset(req, 0, sizeof(*req));
> >> > 
> >> >  while ((pxi = LIST_FIRST(>pxd_pxis))) {
> >> >  if (pxi->pxi_session->state == session->state =
> >> >  PIPEX_STATE_CLOSED) {
> >> >  req->plr_ppp_id[req->plr_ppp_id_count++] =
> >> >  pxi->pxi_session->ppp_id;
> >> >  pppx_if_destroy(pxi);
> >> >  }
> >> >  }
> >> > 
> >> >  return 0;
> >> > }
> >> 
> >> Yes, the diff doesn't seem to be completed but this way also will work.
> >> 
> >> Usually there is few CLOSED session even if there is a lot of session.
> >> Also there is no CLOSED session if idle-timeout is not configured.  I
> >> avoided that way because I think checking all sessions' state to find
> >> such the few sessions is too expensive.
> >> 
> >> A way I am suggesting:
> >> 
> >> @@ -622,7 +625,7 @@ pipex_get_stat(struct pipex_session_stat
> >>  
> >>  Static int
> >>  pipex_get_closed(struct pipex_session_list_req *req,
> >> -struct pipex_iface_context *iface)
> >> +int (*isowner)(void *, struct pipex_session *), void *ctx)
> >>  {
> >>struct pipex_session *session, *session_tmp;
> >>  
> >> @@ -630,7 +633,7 @@ pipex_get_closed(struct pipex_session_li
> >>bzero(req, sizeof(*req));
> >>LIST_FOREACH_SAFE(session, _close_wait_list, state_list,
> >>session_tmp) {
> >> -  if (session->pipex_iface != iface)
> >> +  if (!isowner(ctx, session))
> >>continue;
> >>req->plr_ppp_id[req->plr_ppp_id_count++] = session->ppp_id;
> >>LIST_REMOVE(session, state_list);
> >> 
> >> uses pipex_close_wait_list which contains only sessions which is timed
> >> out.
> > 
> > You are right. pipex_get_closed() walks through `pipex_close_wait_list'
> > which contains only CLOSE_WAIT sessions.
> > 
> > According to npppd(8) code we do PIPEXGCLOSED related walkthrough once
> > per NPPPD_TIMER_TICK_IVAL seconds, which is defined as 4. Is this such
> > performance impact?
> 
> It might be not so expensive for you.  But why do you intend to use
> that extra CPU when you have a cheaper way?

Please don't assume my objections like this. Like you, I want to keep
pppac(4) and pppx(4) close as possible.

Let me explain the reason of my objection versus your diff.

We have 2 different cases to destroy pipex(4) session:

1. pppx(4). We just destroy session by PIPEXDSESSION command. We can't
permit this session to be killed by pipex_timer().

2. pppac(4). While we performing PIPEXDSESSION command we mark session
as PIPEX_STATE_CLOSE_WAIT2 and assume that pipex_timer() will kill it.
Also pipex_timer() will always kill expired sessions.

Your diff kept pppac(4) behavior a but introduce new case for pppx(4):
expired sessions will still exist in unlinked state. Userland should do
garbage collecting. I was afraid these sessions will be not killed until
npppd(8) shutdown.

I looked to npppd(8) code, but it was no obvious to me that userland
should do garbage collecting. My fault, I had to be asked before. But
now you explained, it's assumed

ppp{ac,x}(4): interface statistics fix.

2020-08-11 Thread Vitaliy Makkoveev
We count outgoing packets twice (nothing but icmp echo request/reply):

obsd-test# netstat -I pppac0
NameMtu   Network Address Ipkts IfailOpkts Ofail Colls
pppac0  65532 878 0 1756 0 0
pppac0  65532 10.0.0.1/32 10.0.0.1  878 0 1756 0 0
obsd-test# netstat -I pppx0
NameMtu   Network Address Ipkts IfailOpkts Ofail Colls
pppx0   1360   24 0   48 0 0
pppx0   1360  10.0.0.1/32 10.0.0.1   24 0   48 0 0

Diff below removes increment of `if_opackets' and `if_obytes' from
pppx_if_start() and pipex_ip_output(). We shouldn't count them because
`if_snd' does this.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.99
diff -u -p -r1.99 if_pppx.c
--- sys/net/if_pppx.c   10 Aug 2020 10:55:43 -  1.99
+++ sys/net/if_pppx.c   11 Aug 2020 13:10:23 -
@@ -882,9 +882,6 @@ pppx_if_start(struct ifnet *ifp)
proto = *mtod(m, int *);
m_adj(m, sizeof(proto));
 
-   ifp->if_obytes += m->m_pkthdr.len;
-   ifp->if_opackets++;
-
pipex_ppp_output(m, pxi->pxi_session, proto);
}
 }
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.123
diff -u -p -r1.123 pipex.c
--- sys/net/pipex.c 4 Aug 2020 09:32:05 -   1.123
+++ sys/net/pipex.c 11 Aug 2020 13:10:23 -
@@ -839,14 +839,6 @@ Static void
 pipex_ip_output(struct mbuf *m0, struct pipex_session *session)
 {
int is_idle;
-   struct ifnet *ifp;
-
-   if ((ifp = if_get(session->ifindex)) != NULL) {
-   /* output succeed here as a interface */
-   ifp->if_opackets++;
-   ifp->if_obytes+=m0->m_pkthdr.len;
-   }
-   if_put(ifp);
 
if (session->is_multicast == 0) {
/*



Re: pipex "idle-timeout" work with pppx(4).

2020-08-10 Thread Vitaliy Makkoveev



> On 10 Aug 2020, at 19:53, Vitaliy Makkoveev  wrote:
> 
> We are doing all wrong :)
> 
> We can just unlink pppx(4) related session from `pipex_session_list' if
> it's time expired. But since this unlinked session is still exists in
> pppx(4) layer we can access through pppx_get_closed() without any
> search. We should only add flag to session which identifies it as
> pppx(4) related.
> 
> I hope you like this idea.
> 
>  cut begin 
> Static void
> pipex_timer(void *ignored_arg)
> {
>struct pipex_session *session, *session_tmp;
> 
>timeout_add_sec(_timer_ch, pipex_prune);
> 
>NET_LOCK();
>/* walk through */
>LIST_FOREACH_SAFE(session, _session_list, session_list,
>session_tmp) {
>switch (session->state) {
>case PIPEX_STATE_OPENED:
>if (session->timeout_sec == 0)
>continue;
> 
>session->stat.idle_time++;
>if (session->stat.idle_time < session->timeout_sec)
>continue;
> 
>   if (session->pppx_session)
>   pipex_unlink_session(session);
>   else
>   pipex_notify_close_session(session);
>break;
>   /* ... */
> }
> 
> pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> {
>   struct pppx_if *pxi;
> 
>   pxi = pppx_if_find(pxd, req->pdr_session_id, req->pdr_protocol);
>   if (pxi == NULL)
>   return (EINVAL);
> 
>   memset(req, 0, sizeof(*req));
>   if (session->state == PIPEX_STATE_CLOSED) {
>   req->plr_ppp_id[req->plr_ppp_id_count++] = session->ppp_id;
>   pppx_if_destroy(pxi);   
>   }
> 
>   return 0;
> }

Sorry for noise. I should avoid to write pseudo code.



Re: pipex "idle-timeout" work with pppx(4).

2020-08-10 Thread Vitaliy Makkoveev
We are doing all wrong :)

We can just unlink pppx(4) related session from `pipex_session_list' if
it's time expired. But since this unlinked session is still exists in
pppx(4) layer we can access through pppx_get_closed() without any
search. We should only add flag to session which identifies it as
pppx(4) related.

I hope you like this idea.

 cut begin 
Static void
pipex_timer(void *ignored_arg)
{
struct pipex_session *session, *session_tmp;

timeout_add_sec(_timer_ch, pipex_prune);

NET_LOCK();
/* walk through */
LIST_FOREACH_SAFE(session, _session_list, session_list,
session_tmp) {
switch (session->state) {
case PIPEX_STATE_OPENED:
if (session->timeout_sec == 0)
continue;

session->stat.idle_time++;
if (session->stat.idle_time < session->timeout_sec)
continue;

if (session->pppx_session)
pipex_unlink_session(session);
else
pipex_notify_close_session(session);
break;
/* ... */
}

pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
{
struct pppx_if *pxi;

pxi = pppx_if_find(pxd, req->pdr_session_id, req->pdr_protocol);
if (pxi == NULL)
return (EINVAL);

memset(req, 0, sizeof(*req));
if (session->state == PIPEX_STATE_CLOSED) {
req->plr_ppp_id[req->plr_ppp_id_count++] = session->ppp_id;
pppx_if_destroy(pxi);   
}

return 0;
}

 cut end 


On Mon, Aug 10, 2020 at 04:30:27PM +0300, Vitaliy Makkoveev wrote:
> On Mon, Aug 10, 2020 at 03:12:02PM +0900, YASUOKA Masahiko wrote:
> > Hi,
> > 
> > Thank you for your review.
> > 
> > On Sun, 9 Aug 2020 20:03:50 +0300
> > Vitaliy Makkoveev  wrote:
> > > On Sun, Aug 09, 2020 at 06:20:13PM +0300, Vitaliy Makkoveev wrote:
> > >> You propose to unlink pppx(4) related session which reached timeout. I'm
> > >> ok with this direction. But I see no reason to rework _get_closed()
> > >> routines.
> > >> 
> > >> in pppac(4) case it's assumed what if session is not yet destroyed by
> > >> garbage collector, it will be destroyed while we performing PIPEXGCLOSED
> > >> command. We can make pppx(4) behavior the same and I propose to
> > >> pppx_get_closed() be like below. 
> > >> 
> > >> Also, nothing requires to modify pipex_get_closed(). 
> > >> 
> > >>  cut begin 
> > > 
> > > Sorry, I mean
> > > 
> > > pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> > > {
> > >   struct pppx_if  *pxi;
> > > 
> > >   memset(req, 0, sizeof(*req));
> > > 
> > >   while ((pxi = LIST_FIRST(>pxd_pxis))) {
> > >   if (pxi->pxi_session->state == session->state =
> > >   PIPEX_STATE_CLOSED) {
> > >   req->plr_ppp_id[req->plr_ppp_id_count++] =
> > >   pxi->pxi_session->ppp_id;
> > >   pppx_if_destroy(pxi);
> > >   }
> > >   }
> > > 
> > >   return 0;
> > > }
> > 
> > Yes, the diff doesn't seem to be completed but this way also will work.
> > 
> > Usually there is few CLOSED session even if there is a lot of session.
> > Also there is no CLOSED session if idle-timeout is not configured.  I
> > avoided that way because I think checking all sessions' state to find
> > such the few sessions is too expensive.
> > 
> > A way I am suggesting:
> > 
> > @@ -622,7 +625,7 @@ pipex_get_stat(struct pipex_session_stat
> >  
> >  Static int
> >  pipex_get_closed(struct pipex_session_list_req *req,
> > -struct pipex_iface_context *iface)
> > +int (*isowner)(void *, struct pipex_session *), void *ctx)
> >  {
> > struct pipex_session *session, *session_tmp;
> >  
> > @@ -630,7 +633,7 @@ pipex_get_closed(struct pipex_session_li
> > bzero(req, sizeof(*req));
> > LIST_FOREACH_SAFE(session, _close_wait_list, state_list,
> > session_tmp) {
> > -   if (session->pipex_iface != iface)
> > +   if (!isowner(ctx, session))
> > continue;
> > req->plr_ppp_id[req->plr_ppp_id_count++] = session->ppp_id;
> > LIST_REMOVE(session, state

Re: pipex "idle-timeout" work with pppx(4).

2020-08-10 Thread Vitaliy Makkoveev
On Mon, Aug 10, 2020 at 03:12:02PM +0900, YASUOKA Masahiko wrote:
> Hi,
> 
> Thank you for your review.
> 
> On Sun, 9 Aug 2020 20:03:50 +0300
> Vitaliy Makkoveev  wrote:
> > On Sun, Aug 09, 2020 at 06:20:13PM +0300, Vitaliy Makkoveev wrote:
> >> You propose to unlink pppx(4) related session which reached timeout. I'm
> >> ok with this direction. But I see no reason to rework _get_closed()
> >> routines.
> >> 
> >> in pppac(4) case it's assumed what if session is not yet destroyed by
> >> garbage collector, it will be destroyed while we performing PIPEXGCLOSED
> >> command. We can make pppx(4) behavior the same and I propose to
> >> pppx_get_closed() be like below. 
> >> 
> >> Also, nothing requires to modify pipex_get_closed(). 
> >> 
> >>  cut begin 
> > 
> > Sorry, I mean
> > 
> > pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> > {
> > struct pppx_if  *pxi;
> > 
> > memset(req, 0, sizeof(*req));
> > 
> > while ((pxi = LIST_FIRST(>pxd_pxis))) {
> > if (pxi->pxi_session->state == session->state =
> > PIPEX_STATE_CLOSED) {
> > req->plr_ppp_id[req->plr_ppp_id_count++] =
> > pxi->pxi_session->ppp_id;
> > pppx_if_destroy(pxi);
> > }
> > }
> > 
> > return 0;
> > }
> 
> Yes, the diff doesn't seem to be completed but this way also will work.
> 
> Usually there is few CLOSED session even if there is a lot of session.
> Also there is no CLOSED session if idle-timeout is not configured.  I
> avoided that way because I think checking all sessions' state to find
> such the few sessions is too expensive.
> 
> A way I am suggesting:
> 
> @@ -622,7 +625,7 @@ pipex_get_stat(struct pipex_session_stat
>  
>  Static int
>  pipex_get_closed(struct pipex_session_list_req *req,
> -struct pipex_iface_context *iface)
> +int (*isowner)(void *, struct pipex_session *), void *ctx)
>  {
>   struct pipex_session *session, *session_tmp;
>  
> @@ -630,7 +633,7 @@ pipex_get_closed(struct pipex_session_li
>   bzero(req, sizeof(*req));
>   LIST_FOREACH_SAFE(session, _close_wait_list, state_list,
>   session_tmp) {
> - if (session->pipex_iface != iface)
> + if (!isowner(ctx, session))
>   continue;
>   req->plr_ppp_id[req->plr_ppp_id_count++] = session->ppp_id;
>   LIST_REMOVE(session, state_list);
> 
> uses pipex_close_wait_list which contains only sessions which is timed
> out.

You are right. pipex_get_closed() walks through `pipex_close_wait_list'
which contains only CLOSE_WAIT sessions.

According to npppd(8) code we do PIPEXGCLOSED related walkthrough once
per NPPPD_TIMER_TICK_IVAL seconds, which is defined as 4. Is this such
performance impact?

Also who should destroy these sessions? It's assumed npppd(8) will
destroy them by l2tp_ctrl_timeout() and pptp_ctrl_timeout()? Excuse me
if I'm wrong, but who will destroy sessions in pppoe case?

> 
> >> Also I have one inlined comment within your diff. 
> 
> >> > @@ -430,6 +425,7 @@ pipex_link_session(struct pipex_session 
> >> >  struct pipex_iface_context *iface)
> >> >  {
> >> >  struct pipex_hash_head *chain;
> >> > +struct ifnet *ifp;
> >> >  
> >> >  NET_ASSERT_LOCKED();
> >> >  
> >> > @@ -442,6 +438,11 @@ pipex_link_session(struct pipex_session 
> >> >  session->pipex_iface = iface;
> >> >  session->ifindex = iface->ifindex;
> >> >  
> >> > +ifp = if_get(iface->ifindex);
> >> > +if (ifp != NULL && ifp->if_flags & IFF_POINTOPOINT)
> >> > +session->is_p2p = 1;
> >> > +if_put(ifp);
> >> > +
> >> 
> >> I guess NULL `ifp' here exposes us a bug. I like to have assertion here.
> 
> ok, I agree here.
> 
> 
> The diff is updated.
> 
> Index: sys/net/if_pppx.c
> ===
> RCS file: /cvs/src/sys/net/if_pppx.c,v
> retrieving revision 1.98
> diff -u -p -r1.98 if_pppx.c
> --- sys/net/if_pppx.c 28 Jul 2020 09:53:36 -  1.98
> +++ sys/net/if_pppx.c 10 Aug 2020 06:09:52 -
> @@ -185,6 +185,7 @@ int   pppx_config_session(struct pppx_dev
>   struct pipex_session_config_req *);
>

Re: pfsync: start without kernel lock

2020-08-09 Thread Vitaliy Makkoveev
On Sun, Aug 09, 2020 at 08:53:04PM +0200, Klemens Nanni wrote:
> On Sun, Aug 09, 2020 at 06:42:07PM +0300, Vitaliy Makkoveev wrote:
> > Does `IFXF_MPSAFE' bit assume that pfsyncioctl() should not rely to
> > kernel lock and pfsync(4) related data structures already have their own
> > protection?
> I say it does not.
> 
> There's PF_LOCK(), but it a) has to be enabled manually and b) is not
> specific to pfsync(4) alone.
> 
> IFXF_MPSAFE is about the driver's start routing alone, it does not
> concern the ioctl(2) path.
> 
> Does that answer your questions?
> 

Yes, thanks, `IFXF_MPSAFE' has this commentary.

I have no objections. OK mvs@. 



Re: pipex "idle-timeout" work with pppx(4).

2020-08-09 Thread Vitaliy Makkoveev
On Sun, Aug 09, 2020 at 06:20:13PM +0300, Vitaliy Makkoveev wrote:
> Hello Yasuoka.
> 
> You propose to unlink pppx(4) related session which reached timeout. I'm
> ok with this direction. But I see no reason to rework _get_closed()
> routines.
> 
> in pppac(4) case it's assumed what if session is not yet destroyed by
> garbage collector, it will be destroyed while we performing PIPEXGCLOSED
> command. We can make pppx(4) behavior the same and I propose to
> pppx_get_closed() be like below. 
> 
> Also, nothing requires to modify pipex_get_closed(). 
> 
>  cut begin 
> 
> pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> {
>   struct pppx_if *pxi;
> 
>   pxi = pppx_if_find(pxd, req->pcr_session_id, req->pcr_protocol);
>   if (pxi == NULL)
>   return (EINVAL);
> 
>   memset(req, 0, sizeof(*req));
>   req->plr_ppp_id[req->plr_ppp_id_count++] = pxi->pxi_session->ppp_id;
>   pppx_if_destroy(pxi);
> 
> return 0;
> }
> 
>  cut end 
>

Sorry, I mean

pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
{
struct pppx_if  *pxi;

memset(req, 0, sizeof(*req));

while ((pxi = LIST_FIRST(>pxd_pxis))) {
if (pxi->pxi_session->state == session->state =
PIPEX_STATE_CLOSED) {
req->plr_ppp_id[req->plr_ppp_id_count++] =
pxi->pxi_session->ppp_id;
pppx_if_destroy(pxi);
}
}

return 0;
}


> Also I have one inlined comment within your diff. 
> 
> On Sun, Aug 09, 2020 at 05:14:13PM +0900, YASUOKA Masahiko wrote:
> > This diff makes pipex "idle-timeout" work with pppx(4).
> > 
> > ok?
> > 
> > Index: sys/net/if_pppx.c
> > ===
> > RCS file: /disk/cvs/openbsd/src/sys/net/if_pppx.c,v
> > retrieving revision 1.98
> > diff -u -p -r1.98 if_pppx.c
> > --- sys/net/if_pppx.c   28 Jul 2020 09:53:36 -  1.98
> > +++ sys/net/if_pppx.c   9 Aug 2020 08:05:16 -
> > @@ -185,6 +185,7 @@ int pppx_config_session(struct pppx_dev
> > struct pipex_session_config_req *);
> >  intpppx_get_stat(struct pppx_dev *,
> > struct pipex_session_stat_req *);
> > +intpppx_is_owner(void *, struct pipex_session *);
> >  intpppx_get_closed(struct pppx_dev *,
> > struct pipex_session_list_req *);
> >  intpppx_set_session_descr(struct pppx_dev *,
> > @@ -645,14 +646,6 @@ pppx_add_session(struct pppx_dev *pxd, s
> > struct in_ifaddr *ia;
> > struct sockaddr_in ifaddr;
> >  
> > -   /*
> > -* XXX: As long as `session' is allocated as part of a `pxi'
> > -*  it isn't possible to free it separately.  So disallow
> > -*  the timeout feature until this is fixed.
> > -*/
> > -   if (req->pr_timeout_sec != 0)
> > -   return (EINVAL);
> > -
> > error = pipex_init_session(, req);
> > if (error)
> > return (error);
> > @@ -812,12 +805,22 @@ pppx_get_stat(struct pppx_dev *pxd, stru
> >  }
> >  
> >  int
> > -pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> > +pppx_is_owner(void *ctx, struct pipex_session *session)
> >  {
> > -   /* XXX: Only opened sessions exist for pppx(4) */
> > -   memset(req, 0, sizeof(*req));
> > +   struct pppx_dev *pxd = ctx;
> > +   struct pppx_if *pxi;
> >  
> > -   return 0;
> > +   pxi = pppx_if_find(pxd, session->session_id, session->protocol);
> > +   if (pxi != NULL)
> > +   return (1);
> > +
> > +   return (0);
> > +}
> > +
> > +int
> > +pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> > +{
> > +   return (pipex_get_closed(req, pppx_is_owner, pxd));
> >  }
> >  
> >  int
> > @@ -1059,6 +1062,7 @@ static intpppac_ioctl(struct ifnet *, u
> >  static int pppac_output(struct ifnet *, struct mbuf *, struct sockaddr *,
> > struct rtentry *);
> >  static voidpppac_start(struct ifnet *);
> > +static int pppac_is_owner(void *, struct pipex_session *);
> >  
> >  static inline struct pppac_softc *
> >  pppac_lookup(dev_t dev)
> > @@ -1251,6 +1255,16 @@ pppacwrite(dev_t dev, struct uio *uio, i
> >  }
> >  
> >  int
> > +pppac

Re: pfsync: start without kernel lock

2020-08-09 Thread Vitaliy Makkoveev
On Sun, Aug 09, 2020 at 02:33:01PM +0200, Klemens Nanni wrote:
> mvs's vnet(4) diff reminded me of pfsync(4).
> 
> This works on my my pair of amd64 firewalls.
> 
> Feedback? OK?
> 

Does `IFXF_MPSAFE' bit assume that pfsyncioctl() should not rely to
kernel lock and pfsync(4) related data structures already have their own
protection?

> 
> Index: if_pfsync.c
> ===
> RCS file: /cvs/src/sys/net/if_pfsync.c,v
> retrieving revision 1.275
> diff -u -p -r1.275 if_pfsync.c
> --- if_pfsync.c   29 Jul 2020 12:08:15 -  1.275
> +++ if_pfsync.c   9 Aug 2020 00:52:41 -
> @@ -253,7 +253,7 @@ void  pfsync_update_net_tdb(struct pfsync
>  int  pfsyncoutput(struct ifnet *, struct mbuf *, struct sockaddr *,
>   struct rtentry *);
>  int  pfsyncioctl(struct ifnet *, u_long, caddr_t);
> -void pfsyncstart(struct ifnet *);
> +void pfsyncstart(struct ifqueue *);
>  void pfsync_syncdev_state(void *);
>  void pfsync_ifdetach(void *);
>  
> @@ -339,12 +339,12 @@ pfsync_clone_create(struct if_clone *ifc
>   ifp->if_softc = sc;
>   ifp->if_ioctl = pfsyncioctl;
>   ifp->if_output = pfsyncoutput;
> - ifp->if_start = pfsyncstart;
> + ifp->if_qstart = pfsyncstart;
>   ifp->if_type = IFT_PFSYNC;
>   ifq_set_maxlen(>if_snd, IFQ_MAXLEN);
>   ifp->if_hdrlen = sizeof(struct pfsync_header);
>   ifp->if_mtu = ETHERMTU;
> - ifp->if_xflags = IFXF_CLONED;
> + ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
>   timeout_set_proc(>sc_tmo, pfsync_timeout, NULL);
>   timeout_set_proc(>sc_bulk_tmo, pfsync_bulk_update, NULL);
>   timeout_set_proc(>sc_bulkfail_tmo, pfsync_bulk_fail, NULL);
> @@ -418,9 +418,9 @@ pfsync_clone_destroy(struct ifnet *ifp)
>   * Start output on the pfsync interface.
>   */
>  void
> -pfsyncstart(struct ifnet *ifp)
> +pfsyncstart(struct ifqueue *ifq)
>  {
> - ifq_purge(>if_snd);
> + ifq_purge(ifq);
>  }
>  
>  void
> 



Re: pipex "idle-timeout" work with pppx(4).

2020-08-09 Thread Vitaliy Makkoveev
Hello Yasuoka.

You propose to unlink pppx(4) related session which reached timeout. I'm
ok with this direction. But I see no reason to rework _get_closed()
routines.

in pppac(4) case it's assumed what if session is not yet destroyed by
garbage collector, it will be destroyed while we performing PIPEXGCLOSED
command. We can make pppx(4) behavior the same and I propose to
pppx_get_closed() be like below. 

Also, nothing requires to modify pipex_get_closed(). 

 cut begin 

pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
{
struct pppx_if *pxi;

pxi = pppx_if_find(pxd, req->pcr_session_id, req->pcr_protocol);
if (pxi == NULL)
return (EINVAL);

memset(req, 0, sizeof(*req));
req->plr_ppp_id[req->plr_ppp_id_count++] = pxi->pxi_session->ppp_id;
pppx_if_destroy(pxi);

return 0;
}

 cut end 

Also I have one inlined comment within your diff. 

On Sun, Aug 09, 2020 at 05:14:13PM +0900, YASUOKA Masahiko wrote:
> This diff makes pipex "idle-timeout" work with pppx(4).
> 
> ok?
> 
> Index: sys/net/if_pppx.c
> ===
> RCS file: /disk/cvs/openbsd/src/sys/net/if_pppx.c,v
> retrieving revision 1.98
> diff -u -p -r1.98 if_pppx.c
> --- sys/net/if_pppx.c 28 Jul 2020 09:53:36 -  1.98
> +++ sys/net/if_pppx.c 9 Aug 2020 08:05:16 -
> @@ -185,6 +185,7 @@ int   pppx_config_session(struct pppx_dev
>   struct pipex_session_config_req *);
>  int  pppx_get_stat(struct pppx_dev *,
>   struct pipex_session_stat_req *);
> +int  pppx_is_owner(void *, struct pipex_session *);
>  int  pppx_get_closed(struct pppx_dev *,
>   struct pipex_session_list_req *);
>  int  pppx_set_session_descr(struct pppx_dev *,
> @@ -645,14 +646,6 @@ pppx_add_session(struct pppx_dev *pxd, s
>   struct in_ifaddr *ia;
>   struct sockaddr_in ifaddr;
>  
> - /*
> -  * XXX: As long as `session' is allocated as part of a `pxi'
> -  *  it isn't possible to free it separately.  So disallow
> -  *  the timeout feature until this is fixed.
> -  */
> - if (req->pr_timeout_sec != 0)
> - return (EINVAL);
> -
>   error = pipex_init_session(, req);
>   if (error)
>   return (error);
> @@ -812,12 +805,22 @@ pppx_get_stat(struct pppx_dev *pxd, stru
>  }
>  
>  int
> -pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> +pppx_is_owner(void *ctx, struct pipex_session *session)
>  {
> - /* XXX: Only opened sessions exist for pppx(4) */
> - memset(req, 0, sizeof(*req));
> + struct pppx_dev *pxd = ctx;
> + struct pppx_if *pxi;
>  
> - return 0;
> + pxi = pppx_if_find(pxd, session->session_id, session->protocol);
> + if (pxi != NULL)
> + return (1);
> +
> + return (0);
> +}
> +
> +int
> +pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> +{
> + return (pipex_get_closed(req, pppx_is_owner, pxd));
>  }
>  
>  int
> @@ -1059,6 +1062,7 @@ static int  pppac_ioctl(struct ifnet *, u
>  static int   pppac_output(struct ifnet *, struct mbuf *, struct sockaddr *,
>   struct rtentry *);
>  static void  pppac_start(struct ifnet *);
> +static int   pppac_is_owner(void *, struct pipex_session *);
>  
>  static inline struct pppac_softc *
>  pppac_lookup(dev_t dev)
> @@ -1251,6 +1255,16 @@ pppacwrite(dev_t dev, struct uio *uio, i
>  }
>  
>  int
> +pppac_is_owner(void *ctx, struct pipex_session *session)
> +{
> + struct pppac_softc *sc = ctx;
> +
> + if (session->ifindex == sc->sc_if.if_index)
> + return (1);
> + return (0);
> +}
> +
> +int
>  pppacioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct proc *p)
>  {
>   struct pppac_softc *sc = pppac_lookup(dev);
> @@ -1264,6 +1278,13 @@ pppacioctl(dev_t dev, u_long cmd, caddr_
>   break;
>   case FIONREAD:
>   *(int *)data = mq_hdatalen(>sc_mq);
> + break;
> +
> + case PIPEXGCLOSED:
> + NET_LOCK();
> + error = pipex_get_closed((struct pipex_session_list_req *)data,
> + pppac_is_owner, sc);
> + NET_UNLOCK();
>   break;
>  
>   default:
> Index: sys/net/pipex.c
> ===
> RCS file: /disk/cvs/openbsd/src/sys/net/pipex.c,v
> retrieving revision 1.123
> diff -u -p -r1.123 pipex.c
> --- sys/net/pipex.c   4 Aug 2020 09:32:05 -   1.123
> +++ sys/net/pipex.c   9 Aug 2020 08:05:16 -
> @@ -240,11 +240,6 @@ pipex_ioctl(struct pipex_iface_context *
>   pipex_iface);
>   break;
>  
> - case PIPEXGCLOSED:
> - ret = pipex_get_closed((struct pipex_session_list_req *)data,
> - pipex_iface);
> - break;
> -
>   default:

vether(4): move `ifnet' out of KERNEL_LOCK()

2020-08-08 Thread Vitaliy Makkoveev
vether(4) is pretty dummy. Nothing denies it to be `IFXF_MPSAFE'.

Index: sys/net/if_vether.c
===
RCS file: /cvs/src/sys/net/if_vether.c,v
retrieving revision 1.33
diff -u -p -r1.33 if_vether.c
--- sys/net/if_vether.c 28 Jul 2020 09:52:32 -  1.33
+++ sys/net/if_vether.c 9 Aug 2020 00:13:17 -
@@ -36,7 +36,7 @@
 
 void   vetherattach(int);
 intvetherioctl(struct ifnet *, u_long, caddr_t);
-void   vetherstart(struct ifnet *);
+void   vetherqstart(struct ifqueue *);
 intvether_clone_create(struct if_clone *, int);
 intvether_clone_destroy(struct ifnet *);
 intvether_media_change(struct ifnet *);
@@ -83,12 +83,12 @@ vether_clone_create(struct if_clone *ifc
 
ifp->if_softc = sc;
ifp->if_ioctl = vetherioctl;
-   ifp->if_start = vetherstart;
+   ifp->if_qstart = vetherqstart;
ifq_set_maxlen(>if_snd, IFQ_MAXLEN);
 
ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
ifp->if_capabilities = IFCAP_VLAN_MTU;
-   ifp->if_xflags = IFXF_CLONED;
+   ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
 
ifmedia_init(>sc_media, 0, vether_media_change,
vether_media_status);
@@ -117,15 +117,12 @@ vether_clone_destroy(struct ifnet *ifp)
  * and we only need to discard the packets.
  */
 void
-vetherstart(struct ifnet *ifp)
+vetherqstart(struct ifqueue *ifq)
 {
+   struct ifnet*ifp = ifq->ifq_if;
struct mbuf *m;
 
-   for (;;) {
-   m = ifq_dequeue(>if_snd);
-   if (m == NULL)
-   return;
-
+   while ((m = ifq_dequeue(ifq)) != NULL) {
 #if NBPFILTER > 0
if (ifp->if_bpf)
bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_OUT);



Re: describe 'idle-timeout' exception in npppd.conf man page

2020-08-08 Thread Vitaliy Makkoveev
I did audit for "idle-timeout" option.

On Sat, Aug 08, 2020 at 08:49:24PM +0900, YASUOKA Masahiko wrote:
> On Fri, 7 Aug 2020 22:19:05 +0300
> Vitaliy Makkoveev  wrote:
> > Some times ago we disabled in-kernel timeout for pppx(4) related
> > pipex(4) sessions. We did this for prevent use after free issue caused
> > by pipex_timer [1]. By default "idle-timeout" is not set in
> > npppd.conf(5) and I guess this is reason for we forgot to describe this
> > exception in npppd.conf(5).
> > 
> > But looks like one user caught this [2]. So I propose to describe this
> > in BUGS section of npppd.conf(5).
> > 
> > Also current "idle-timeout" description looks incorrect. If this option
> > is missing, there is not in-kernel timeout for this session, but
> > npppd(8) uses it's own timeout for. And we can't configure this value.
> >

I was a little wrong with this. This is a different timeout timer.
In my case `l2tp_ctrl_timeout' kills idle sessions. It's totally
npppd(8) related.

The case for "idle-timeout" described below.

> > YASUOKA, what do you think? May be we can kill in-kernel timeout feature
> > for pipex(4)?, and make npppd(8)'s idle timeout configurable by this
> > option?
> 
> I think we should mention this to the man page until we fix it.
> So I'd like you to update the man page first.
> 
> I'll try to review the problem.
>

We got this option from npppd.conf(5) and store it as `idle_timeout'
within `struct tunnconf'. While we set npppd(8) related session context
we set `timeout_sec' of `npppd_ppp' at npppd/ppp.c:169 by this value.
Also we initialize timeout timer at npppd/pppc.c:172. We have
ppp_reset_idle_timeout() routime which stops and restart this timer if
`idle_timeout > 0'.

 cut begin 

125 ppp_init(npppd *pppd, npppd_ppp *_this)
126 {
...
167 
168 /* load the idle timer configuration */
169 _this->timeout_sec = conf->idle_timeout;
170 
171 if (!evtimer_initialized(&_this->idle_event))
172 evtimer_set(&_this->idle_event, ppp_idle_timeout, _this);
173 


632 ppp_reset_idle_timeout(npppd_ppp *_this)
633 {
...
636 evtimer_del(&_this->idle_event);
637 if (_this->timeout_sec > 0) {
638 tv.tv_usec = 0;
639 tv.tv_sec = _this->timeout_sec;
640 
641 evtimer_add(&_this->idle_event, );

 cut end 

While we create pipex(4) session, we initialize request and pass this
this timeout value to kernel as `req->pr_timeout_sec = ppp->timeout_sec'
at npppd/npppd.c:1013.

If ioctl() at npppd/npppd.c:1153 was successful and in-kernel session
was created we check `timeout_sec' and disable npppd(8) related timer at
npppd/npppd.c:1178. But this timer was not started before.

 cut begin 

986 pipex_setup_common(npppd_ppp *ppp, struct pipex_session_req *req)
987 {
...
1013 req->pr_timeout_sec = ppp->timeout_sec;


1040 npppd_ppp_pipex_enable(npppd *_this, npppd_ppp *ppp)
1041 {
...
1059 pipex_setup_common(ppp, );
...
1153 if ((error = ioctl(_this->iface[ppp->ifidx].devf...
...
1175 if (ppp->timeout_sec > 0) {
1176 /* Stop the npppd's idle-timer.  We use
pipex's idle-timer  */
1177 ppp->timeout_sec = 0;
1178 ppp_reset_idle_timeout(ppp);
1179 }

 cut end 

So we have two cases:

1. "idle-timeout" is null or not set in npppd.conf(5)

npppd(8) related timer is initialized, but not started, in-kernel
timeout disabled.

2. "idle-timeout" is not null in npppd.conf(5)

npppd(8) related timer is initialized, but not started, in-kernel
timeout enabled for pppac(4) sessions.

So in any cases we never enable npppd(8) related timer.

We have some troubles with pppx(4) sessions: they have two parts:
pipex(4) session and pppx(4) related context. Session is a part of this
context. With in-kernel timer we destroy session within pipex(4) layer
and we can't destroy pppx(4) related part. That's the reason we disabled
this feature for pppx(4).

I propose to kill in-kernel timeout. This simplify code and make
pppac(4) and pppx(4) session usage more identical. Also it's easy to
start using npppd(8) related timer.

Do you have objections?



Re: pppac(4) move ifnet out of KERNEL_LOCK()

2020-08-08 Thread Vitaliy Makkoveev
Another update. 
The whole "while ((m = ifq_dequeue(ifq)) != NULL)" wrapped by netlock as
it was made for pppx(4). This is to exclude per-packet lock/unlock in
output path.


Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.98
diff -u -p -r1.98 if_pppx.c
--- sys/net/if_pppx.c   28 Jul 2020 09:53:36 -  1.98
+++ sys/net/if_pppx.c   8 Aug 2020 13:28:04 -
@@ -1058,7 +1058,7 @@ static intpppac_ioctl(struct ifnet *, u
 
 static int pppac_output(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
-static voidpppac_start(struct ifnet *);
+static voidpppac_qstart(struct ifqueue *);
 
 static inline struct pppac_softc *
 pppac_lookup(dev_t dev)
@@ -1107,13 +1107,11 @@ pppacopen(dev_t dev, int flags, int mode
ifp->if_hdrlen = sizeof(uint32_t); /* for BPF */;
ifp->if_mtu = MAXMCLBYTES - sizeof(uint32_t);
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST;
-   ifp->if_xflags = IFXF_CLONED;
+   ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
ifp->if_rtrequest = p2p_rtrequest; /* XXX */
ifp->if_output = pppac_output;
-   ifp->if_start = pppac_start;
+   ifp->if_qstart = pppac_qstart;
ifp->if_ioctl = pppac_ioctl;
-   /* XXXSMP: be sure pppac_start() called under NET_LOCK() */
-   ifq_set_maxlen(>if_snd, 1);
 
if_counters_alloc(ifp);
if_attach(ifp);
@@ -1382,10 +1380,10 @@ pppacclose(dev_t dev, int flags, int mod
klist_invalidate(>sc_wsel.si_note);
splx(s);
 
-   pipex_iface_fini(>sc_pipex_iface);
-
if_detach(ifp);
 
+   pipex_iface_fini(>sc_pipex_iface);
+
LIST_REMOVE(sc, sc_entry);
free(sc, M_DEVBUF, sizeof(*sc));
 
@@ -1459,15 +1457,14 @@ drop:
 }
 
 static void
-pppac_start(struct ifnet *ifp)
+pppac_qstart(struct ifqueue *ifq)
 {
+   struct ifnet *ifp = ifq->ifq_if;
struct pppac_softc *sc = ifp->if_softc;
struct mbuf *m;
 
-   if (!ISSET(ifp->if_flags, IFF_RUNNING))
-   return;
-
-   while ((m = ifq_dequeue(>if_snd)) != NULL) {
+   NET_LOCK();
+   while ((m = ifq_dequeue(ifq)) != NULL) {
 #if NBPFILTER > 0
if (ifp->if_bpf) {
bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, m,
@@ -1489,9 +1486,9 @@ pppac_start(struct ifnet *ifp)
 
mq_enqueue(>sc_mq, m); /* qdrop */
}
+   NET_UNLOCK();
 
if (!mq_empty(>sc_mq)) {
-   KERNEL_ASSERT_LOCKED();
wakeup(sc);
selwakeup(>sc_rsel);
}



Re: describe 'idle-timeout' exception in npppd.conf man page

2020-08-08 Thread Vitaliy Makkoveev
On Sat, Aug 08, 2020 at 08:49:24PM +0900, YASUOKA Masahiko wrote:
> On Fri, 7 Aug 2020 22:19:05 +0300
> Vitaliy Makkoveev  wrote:
> > Some times ago we disabled in-kernel timeout for pppx(4) related
> > pipex(4) sessions. We did this for prevent use after free issue caused
> > by pipex_timer [1]. By default "idle-timeout" is not set in
> > npppd.conf(5) and I guess this is reason for we forgot to describe this
> > exception in npppd.conf(5).
> > 
> > But looks like one user caught this [2]. So I propose to describe this
> > in BUGS section of npppd.conf(5).
> > 
> > Also current "idle-timeout" description looks incorrect. If this option
> > is missing, there is not in-kernel timeout for this session, but
> > npppd(8) uses it's own timeout for. And we can't configure this value.
> > 
> > YASUOKA, what do you think? May be we can kill in-kernel timeout feature
> > for pipex(4)?, and make npppd(8)'s idle timeout configurable by this
> > option?
> 
> I think we should mention this to the man page until we fix it.
> So I'd like you to update the man page first.
> 
> I'll try to review the problem.
> 

Thanks. I updated my diff with changes proposed by jmc@. Are you agree
with them?

> > 1. 
> > https://cvsweb.openbsd.org/src/sys/net/if_pppx.c?rev=1.78=text/x-cvsweb-markup
> > 2. https://marc.info/?l=openbsd-misc=159655468504864=2 
> > 
> > 
> > Index: usr.sbin/npppd/npppd/npppd.conf.5
> > ===
> > RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd.conf.5,v
> > retrieving revision 1.27
> > diff -u -p -r1.27 npppd.conf.5
> > --- usr.sbin/npppd/npppd/npppd.conf.5   23 Apr 2020 21:10:54 -  
> > 1.27
> > +++ usr.sbin/npppd/npppd/npppd.conf.5   7 Aug 2020 19:17:00 -
> > @@ -699,3 +699,9 @@ The current version of
> >  .Xr npppd 8
> >  does not support adding or removing tunnel settings or changing listener
> >  settings (listen address, port and l2tp-ipsec-require).
> > +.Pp
> > +This time
> > +.Xr pppx 4
> > +does not allow to create sessions with non null
> > +.Ic idle-timeout
> > +option. 
> 



Re: describe 'idle-timeout' exception in npppd.conf man page

2020-08-07 Thread Vitaliy Makkoveev
On Fri, Aug 07, 2020 at 09:29:13PM +0100, Jason McIntyre wrote:
> On Fri, Aug 07, 2020 at 10:19:05PM +0300, Vitaliy Makkoveev wrote:
> > Some times ago we disabled in-kernel timeout for pppx(4) related
> > pipex(4) sessions. We did this for prevent use after free issue caused
> > by pipex_timer [1]. By default "idle-timeout" is not set in
> > npppd.conf(5) and I guess this is reason for we forgot to describe this
> > exception in npppd.conf(5).
> > 
> > But looks like one user caught this [2]. So I propose to describe this
> > in BUGS section of npppd.conf(5).
> > 
> > Also current "idle-timeout" description looks incorrect. If this option
> > is missing, there is not in-kernel timeout for this session, but
> > npppd(8) uses it's own timeout for. And we can't configure this value.
> > 
> > YASUOKA, what do you think? May be we can kill in-kernel timeout feature
> > for pipex(4)?, and make npppd(8)'s idle timeout configurable by this
> > option?
> > 
> > 1. 
> > https://cvsweb.openbsd.org/src/sys/net/if_pppx.c?rev=1.78=text/x-cvsweb-markup
> > 2. https://marc.info/?l=openbsd-misc=159655468504864=2 
> > 
> > 
> > Index: usr.sbin/npppd/npppd/npppd.conf.5
> > ===
> > RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd.conf.5,v
> > retrieving revision 1.27
> > diff -u -p -r1.27 npppd.conf.5
> > --- usr.sbin/npppd/npppd/npppd.conf.5   23 Apr 2020 21:10:54 -  
> > 1.27
> > +++ usr.sbin/npppd/npppd/npppd.conf.5   7 Aug 2020 19:17:00 -
> > @@ -699,3 +699,9 @@ The current version of
> >  .Xr npppd 8
> >  does not support adding or removing tunnel settings or changing listener
> >  settings (listen address, port and l2tp-ipsec-require).
> > +.Pp
> > +This time
> > +.Xr pppx 4
> > +does not allow to create sessions with non null
> > +.Ic idle-timeout
> > +option. 
> > 
> 

Thanks for your feedback. My English is bad, so thanks for fixing.

> is this an actual bug? i'm just asking - it might be that the
> idle-timeout text is the best place to warn users, and not BUGS.

It is pppx(4) related bug. Unfortunately it wasn't solved and we just
disabled this feature to avoid panics. May be pipex(4) man page is the
best place to describe this issue in BUGS section.

> 
> regarding your text:
> 
> - "this time" is better written as "At this time" or "currently".
> - "allow to create" is not good sentence structure
> 
> i think the text would read better something like:
> 
>   .Xr pppx 4
>   does not allow sessions with
>   .Ic idle-timeout
>   set to any value other than 0.
> 

I added this to pipex(4) BUGS section.

> if the text was better placed in the idle-timeout section:
> 
>   This value must be 0 for
>   .Xr pppx 4
>   sessions.

And this to npppd.conf(5) idle-timeout section.


Index: share/man/man4/pipex.4
===
RCS file: /cvs/src/share/man/man4/pipex.4,v
retrieving revision 1.12
diff -u -p -r1.12 pipex.4
--- share/man/man4/pipex.4  3 Apr 2020 07:46:04 -   1.12
+++ share/man/man4/pipex.4  7 Aug 2020 20:54:32 -
@@ -288,3 +288,8 @@ The
 .Nm
 was written by
 .An Internet Initiative Japan Inc .
+.Sh BUGS
+.Xr pppx 4
+does not allow sessions with
+.Ic pr_timeout_sec
+set to any value other than 0.
Index: usr.sbin/npppd/npppd/npppd.conf.5
===
RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd.conf.5,v
retrieving revision 1.27
diff -u -p -r1.27 npppd.conf.5
--- usr.sbin/npppd/npppd/npppd.conf.5   23 Apr 2020 21:10:54 -  1.27
+++ usr.sbin/npppd/npppd/npppd.conf.5   7 Aug 2020 20:54:32 -
@@ -325,6 +325,9 @@ The link is disconnected if there are no
 for more than the amount of the
 .Ar idle-timeout .
 The default is 0, which disables the idle timer.
+This value must be 0 for
+.Xr pppx 4
+sessions.
 .It Ic tcp-mss-adjust Ar yes | no
 If
 .Dq yes



describe 'idle-timeout' exception in npppd.conf man page

2020-08-07 Thread Vitaliy Makkoveev
Some times ago we disabled in-kernel timeout for pppx(4) related
pipex(4) sessions. We did this for prevent use after free issue caused
by pipex_timer [1]. By default "idle-timeout" is not set in
npppd.conf(5) and I guess this is reason for we forgot to describe this
exception in npppd.conf(5).

But looks like one user caught this [2]. So I propose to describe this
in BUGS section of npppd.conf(5).

Also current "idle-timeout" description looks incorrect. If this option
is missing, there is not in-kernel timeout for this session, but
npppd(8) uses it's own timeout for. And we can't configure this value.

YASUOKA, what do you think? May be we can kill in-kernel timeout feature
for pipex(4)?, and make npppd(8)'s idle timeout configurable by this
option?

1. 
https://cvsweb.openbsd.org/src/sys/net/if_pppx.c?rev=1.78=text/x-cvsweb-markup
2. https://marc.info/?l=openbsd-misc=159655468504864=2 


Index: usr.sbin/npppd/npppd/npppd.conf.5
===
RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd.conf.5,v
retrieving revision 1.27
diff -u -p -r1.27 npppd.conf.5
--- usr.sbin/npppd/npppd/npppd.conf.5   23 Apr 2020 21:10:54 -  1.27
+++ usr.sbin/npppd/npppd/npppd.conf.5   7 Aug 2020 19:17:00 -
@@ -699,3 +699,9 @@ The current version of
 .Xr npppd 8
 does not support adding or removing tunnel settings or changing listener
 settings (listen address, port and l2tp-ipsec-require).
+.Pp
+This time
+.Xr pppx 4
+does not allow to create sessions with non null
+.Ic idle-timeout
+option. 



Re: pppx(4): move ifnet out of KERNEL_LOCK()

2020-08-06 Thread Vitaliy Makkoveev
On Thu, Aug 06, 2020 at 01:25:14PM +0200, Martin Pieuchot wrote:
> On 05/08/20(Wed) 12:50, Vitaliy Makkoveev wrote:
> > pipex(4) and pppx(4) are ready to became a little bit more MP capable.
> > Diff below moves pppx(4) related `ifnet' out of KERNEL_LOCK().
> 
> Nice, one comment below.
> 
> > Index: sys/net/if_pppx.c
> >
> > [skip]
> >
> > +   NET_LOCK();
> > pipex_ppp_output(m, pxi->pxi_session, proto);
> > +   NET_UNLOCK();
>
> This means the lock is taken and released for every packet.  It would be
> better to grab it outside the loop.

Ok, fixed.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.98
diff -u -p -r1.98 if_pppx.c
--- sys/net/if_pppx.c   28 Jul 2020 09:53:36 -  1.98
+++ sys/net/if_pppx.c   6 Aug 2020 11:54:44 -
@@ -191,7 +191,7 @@ int pppx_set_session_descr(struct pppx_
struct pipex_session_descr_req *);
 
 void   pppx_if_destroy(struct pppx_dev *, struct pppx_if *);
-void   pppx_if_start(struct ifnet *);
+void   pppx_if_qstart(struct ifqueue *);
 intpppx_if_output(struct ifnet *, struct mbuf *,
struct sockaddr *, struct rtentry *);
 intpppx_if_ioctl(struct ifnet *, u_long, caddr_t);
@@ -683,13 +683,12 @@ pppx_add_session(struct pppx_dev *pxd, s
snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", "pppx", unit);
ifp->if_mtu = req->pr_peer_mru; /* XXX */
ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST | IFF_UP;
-   ifp->if_xflags = IFXF_CLONED;
-   ifp->if_start = pppx_if_start;
+   ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
+   ifp->if_qstart = pppx_if_qstart;
ifp->if_output = pppx_if_output;
ifp->if_ioctl = pppx_if_ioctl;
ifp->if_rtrequest = p2p_rtrequest;
ifp->if_type = IFT_PPP;
-   ifq_set_maxlen(>if_snd, 1);
ifp->if_softc = pxi;
/* ifp->if_rdomain = req->pr_rdomain; */
 
@@ -864,21 +863,15 @@ pppx_if_destroy(struct pppx_dev *pxd, st
 }
 
 void
-pppx_if_start(struct ifnet *ifp)
+pppx_if_qstart(struct ifqueue *ifq)
 {
+   struct ifnet *ifp = ifq->ifq_if;
struct pppx_if *pxi = (struct pppx_if *)ifp->if_softc;
struct mbuf *m;
int proto;
 
-   if (!ISSET(ifp->if_flags, IFF_RUNNING))
-   return;
-
-   for (;;) {
-   m = ifq_dequeue(>if_snd);
-
-   if (m == NULL)
-   break;
-
+   NET_LOCK();
+   while ((m = ifq_dequeue(ifq)) != NULL) {
proto = *mtod(m, int *);
m_adj(m, sizeof(proto));
 
@@ -887,6 +880,7 @@ pppx_if_start(struct ifnet *ifp)
 
pipex_ppp_output(m, pxi->pxi_session, proto);
}
+   NET_UNLOCK();
 }
 
 int



Remove unused netisr defines

2020-08-05 Thread Vitaliy Makkoveev
Remove defines for netisr bits which are not used anymore. 

Index: sys/net/netisr.h
===
RCS file: /cvs/src/sys/net/netisr.h,v
retrieving revision 1.52
diff -u -p -r1.52 netisr.h
--- sys/net/netisr.h4 Aug 2020 09:32:05 -   1.52
+++ sys/net/netisr.h5 Aug 2020 21:07:41 -
@@ -41,12 +41,8 @@
  * interrupt used for scheduling the network code to calls
  * on the lowest level routine of each protocol.
  */
-#defineNETISR_IP   2   /* same as AF_INET */
-#defineNETISR_TX   3   /* for if_snd processing */
 #defineNETISR_PFSYNC   5   /* for pfsync "immediate" tx */
 #defineNETISR_ARP  18  /* same as AF_LINK */
-#defineNETISR_IPV6 24  /* same as AF_INET6 */
-#defineNETISR_ISDN 26  /* same as AF_E164 */
 #defineNETISR_PPP  28  /* for PPP processing */
 #defineNETISR_BRIDGE   29  /* for bridge processing */
 #defineNETISR_PPPOE30  /* for pppoe processing */



Re: Don't check pointers against 0

2020-08-05 Thread Vitaliy Makkoveev
ok mvs

> On 5 Aug 2020, at 23:49, Marcus Glocker  wrote:
> 
> Reported by Peter J. Philipp.
> 
> OK?
> 
> 
> Index: sys/netinet/udp_usrreq.c
> ===
> RCS file: /cvs/src/sys/netinet/udp_usrreq.c,v
> retrieving revision 1.260
> diff -u -p -u -p -r1.260 udp_usrreq.c
> --- sys/netinet/udp_usrreq.c  1 Aug 2020 23:41:56 -   1.260
> +++ sys/netinet/udp_usrreq.c  5 Aug 2020 20:49:02 -
> @@ -486,7 +486,7 @@ udp_input(struct mbuf **mp, int *offp, i
>   inp = in_pcbhashlookup(, ip->ip_src, uh->uh_sport,
>   ip->ip_dst, uh->uh_dport, m->m_pkthdr.ph_rtableid);
>   }
> - if (inp == 0) {
> + if (inp == NULL) {
>   udpstat_inc(udps_pcbhashmiss);
> #ifdef INET6
>   if (ip6) {
> @@ -519,7 +519,7 @@ udp_input(struct mbuf **mp, int *offp, i
>   }
> #endif /*IPSEC */
> 
> - if (inp == 0) {
> + if (inp == NULL) {
>   udpstat_inc(udps_noport);
>   if (m->m_flags & (M_BCAST | M_MCAST)) {
>   udpstat_inc(udps_noportbcast);
> 



Re: pppac(4) move ifnet out of KERNEL_LOCK()

2020-08-05 Thread Vitaliy Makkoveev
A little update.

I use `ifq' passed to pppac_start() instead of `ifp->if_snd' for
consistency reason.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.98
diff -u -p -r1.98 if_pppx.c
--- sys/net/if_pppx.c   28 Jul 2020 09:53:36 -  1.98
+++ sys/net/if_pppx.c   5 Aug 2020 20:09:19 -
@@ -1058,7 +1058,7 @@ static intpppac_ioctl(struct ifnet *, u
 
 static int pppac_output(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
-static voidpppac_start(struct ifnet *);
+static voidpppac_qstart(struct ifqueue *);
 
 static inline struct pppac_softc *
 pppac_lookup(dev_t dev)
@@ -1107,13 +1107,11 @@ pppacopen(dev_t dev, int flags, int mode
ifp->if_hdrlen = sizeof(uint32_t); /* for BPF */;
ifp->if_mtu = MAXMCLBYTES - sizeof(uint32_t);
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST;
-   ifp->if_xflags = IFXF_CLONED;
+   ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
ifp->if_rtrequest = p2p_rtrequest; /* XXX */
ifp->if_output = pppac_output;
-   ifp->if_start = pppac_start;
+   ifp->if_qstart = pppac_qstart;
ifp->if_ioctl = pppac_ioctl;
-   /* XXXSMP: be sure pppac_start() called under NET_LOCK() */
-   ifq_set_maxlen(>if_snd, 1);
 
if_counters_alloc(ifp);
if_attach(ifp);
@@ -1382,10 +1380,10 @@ pppacclose(dev_t dev, int flags, int mod
klist_invalidate(>sc_wsel.si_note);
splx(s);
 
-   pipex_iface_fini(>sc_pipex_iface);
-
if_detach(ifp);
 
+   pipex_iface_fini(>sc_pipex_iface);
+
LIST_REMOVE(sc, sc_entry);
free(sc, M_DEVBUF, sizeof(*sc));
 
@@ -1459,15 +1457,13 @@ drop:
 }
 
 static void
-pppac_start(struct ifnet *ifp)
+pppac_qstart(struct ifqueue *ifq)
 {
+   struct ifnet *ifp = ifq->ifq_if;
struct pppac_softc *sc = ifp->if_softc;
struct mbuf *m;
 
-   if (!ISSET(ifp->if_flags, IFF_RUNNING))
-   return;
-
-   while ((m = ifq_dequeue(>if_snd)) != NULL) {
+   while ((m = ifq_dequeue(ifq)) != NULL) {
 #if NBPFILTER > 0
if (ifp->if_bpf) {
bpf_mtap_af(ifp->if_bpf, m->m_pkthdr.ph_family, m,
@@ -1475,8 +1471,10 @@ pppac_start(struct ifnet *ifp)
}
 #endif
 
+   NET_LOCK();
m = pipex_output(m, m->m_pkthdr.ph_family, 0,
>sc_pipex_iface);
+   NET_UNLOCK();
if (m == NULL)
continue;
 
@@ -1491,7 +1489,6 @@ pppac_start(struct ifnet *ifp)
}
 
if (!mq_empty(>sc_mq)) {
-   KERNEL_ASSERT_LOCKED();
wakeup(sc);
selwakeup(>sc_rsel);
}



Re: bugs in bridge ( netlock ? )

2020-08-05 Thread Vitaliy Makkoveev
I guess it’s know issue caused by ifioctl() races.


> On 5 Aug 2020, at 19:06, Sven F.  wrote:
> 
> On Wed, Aug 5, 2020 at 9:14 AM Sven F.  wrote:
>> 
>> Never seen before crash ( 6. 7 stable )
>> 
>> My devices run a lot of things in, load is easily 4
>> which is good for breaking lock code ?
>> 
>> uvm_fault(0xfd820a916cc0, 0x8, 0, 1) -> e
>> kernel: page fault trap, code=0
>> Stopped at  bridge_brlconf+0x24:movq0x8(%rdi),%rax
>> ddb{1}> bridge_brlconf(0,800022e482b0) at bridge_brlconf+0x24
>> bridge_ioctl(80524000,c030694f,800022e482b0) at 
>> bridge_ioctl+0x34f
>> ifioctl(fd8118f33c98,c030694f,800022e482b0,800022895510)
>> at ifioctl+0xa03
>> soo_ioctl(fd814c9cf2e8,c030694f,800022e482b0,800022895510)
>> at soo_ioctl+0x171
>> sys_ioctl(800022895510,800022e483c0,800022e48420) at 
>> sys_ioctl+0x2df
>> syscall(800022e48490) at syscall+0x389
>> Xsyscall() at Xsyscall+0x128
>> end of kernel
>> end trace frame: 0x7f7d2c30, count: -7
>> ddb{1}>PID TID   PPIDUID  S   FLAGS  WAIT  COMMAND
>> 93583  108149190  0  3 0x2  smrbarifconfig
>> 91803  290614  22775  0  2   0perl
>> 10793  382902  22775  0  2   0perl
>> 83906   60674  57936  0  2 0x2ifconfig
>> *80202  204044  63585  0  7 0x2ifconfig
>> 635857414  22775  0  30x80  piperdperl
>> 57936  439878  22775  0  30x80  piperdperl
>> 70874   88156  22775  0  7   0perl
>> 72682  513697  1  0  30x80  poll  openvpn
>> 84518  279046  22775  0  2   0perl
>> 55262   52512  22775  0  2   0perl
>> 36158  256298  22775  0  2   0perl
>> 84969  398264  1  0  30x80  poll  openvpn
>> 31484  239479  1  0  30x80  poll  openvpn
>>  7902   84087  60669  0  30x82  netio sshd
>> 25285  366282  1  0  30x80  poll  openvpn
>> 96838  424361  1  0  30x80  poll  openvpn
>> 42763  368876  1  0  30x80  poll  openvpn
>> 92032  243887  1  0  30x80  poll  openvpn
>> 22775  200805  21119  0  2 0x2perl
>> 21119  407468  75737  0  30x10008a  pause sh
>> ddb{1}> rebooting...
>> OpenBSD 6.7-stable (GENERIC.MP) #43: Tue Jul 28 21:46:24 EDT 2020
>>root@builder:/sys/arch/amd64/compile/GENERIC.MP
>> real mem = 8371683328 (7983MB)
>> avail mem = 8105316352 (7729MB)
>> mpath0 at root
>> scsibus0 at mpath0: 256 targets
>> mainbus0 at root
>> bios0 at mainbus0: SMBIOS rev. 2.8 @ 0xf6830 (11 entries)
>> bios0: vendor SeaBIOS version "2:1.10.2-58953eb7" date 04/01/2014
>> bios0: OpenStack Foundation OpenStack Nova
>> acpi0 at bios0: ACPI 1.0
>> acpi0: sleep states S3 S4 S5
>> acpi0: tables DSDT FACP APIC
>> acpi0: wakeup devices
>> acpitimer0 at acpi0: 3579545 Hz, 24 bits
>> acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
>> cpu0 at mainbus0: apid 0 (boot processor)
>> cpu0: Intel Core Processor (Haswell, no TSX), 2394.80 MHz, 06-3c-01
>> cpu0: 
>> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,SSE3,PCLMUL,VMX,SSSE3,FMA3,CX16,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,HV,NXE,RDTSCP,LONG,LAHF,ABM,FSGSBASE,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MD_CLEAR,ARAT,XSAVEOPT,MELTDOWN
>> cpu0: 64KB 64b/line 2-way I-cache, 64KB 64b/line 2-way D-cache, 512KB
>> 64b/line 16-way L2 cache
>> cpu0: ITLB 255 4KB entries direct-mapped, 255 4MB entries direct-mapped
>> cpu0: DTLB 255 4KB entries direct-mapped, 255 4MB entries direct-mapped
>> cpu0: smt 0, core 0, package 0
>> mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
>> cpu0: apic clock running at 1000MHz
>> cpu1 at mainbus0: apid 1 (application processor)
>> cpu1: Intel Core Processor (Haswell, no TSX), 2394.56 MHz, 06-3c-01
>> cpu1: 
>> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,SSE3,PCLMUL,VMX,SSSE3,FMA3,CX16,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,HV,NXE,RDTSCP,LONG,LAHF,ABM,FSGSBASE,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MD_CLEAR,ARAT,XSAVEOPT,MELTDOWN
>> cpu1: 64KB 64b/line 2-way I-cache, 64KB 64b/line 2-way D-cache, 512KB
>> 64b/line 16-way L2 cache
>> cpu1: ITLB 255 4KB entries direct-mapped, 255 4MB entries direct-mapped
>> cpu1: DTLB 255 4KB entries direct-mapped, 255 4MB entries direct-mapped
>> cpu1: smt 0, core 0, package 1
>> ioapic0 at mainbus0: apid 0 pa 0xfec0, version 11, 24 pins
>> acpiprt0 at acpi0: bus 0 (PCI0)
>> acpicpu0 at acpi0: C1(@1 halt!)
>> acpicpu1 at acpi0: C1(@1 halt!)
>> "ACPI0006" at acpi0 not configured
>> acpipci0 at acpi0 PCI0: _OSC failed
>> 

pppac(4) move ifnet out of KERNEL_LOCK()

2020-08-05 Thread Vitaliy Makkoveev
The same as for pppx(4).

pipex(4) and pppac(4) are ready to became a little bit more MP capable.
Diff below moves pppac(4) related `ifnet' out of KERNEL_LOCK().

The wakeup(9) and selwakeup() are not require KERNEL_LOCK() so this
assertion was wrong and can be dropped. Also we detach `ifnet' before
pipex_iface_fini().

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.98
diff -u -p -r1.98 if_pppx.c
--- sys/net/if_pppx.c   28 Jul 2020 09:53:36 -  1.98
+++ sys/net/if_pppx.c   5 Aug 2020 13:53:33 -
@@ -1058,7 +1058,7 @@ static intpppac_ioctl(struct ifnet *, u
 
 static int pppac_output(struct ifnet *, struct mbuf *, struct sockaddr *,
struct rtentry *);
-static voidpppac_start(struct ifnet *);
+static voidpppac_qstart(struct ifqueue *);
 
 static inline struct pppac_softc *
 pppac_lookup(dev_t dev)
@@ -1107,13 +1107,11 @@ pppacopen(dev_t dev, int flags, int mode
ifp->if_hdrlen = sizeof(uint32_t); /* for BPF */;
ifp->if_mtu = MAXMCLBYTES - sizeof(uint32_t);
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST;
-   ifp->if_xflags = IFXF_CLONED;
+   ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
ifp->if_rtrequest = p2p_rtrequest; /* XXX */
ifp->if_output = pppac_output;
-   ifp->if_start = pppac_start;
+   ifp->if_qstart = pppac_qstart;
ifp->if_ioctl = pppac_ioctl;
-   /* XXXSMP: be sure pppac_start() called under NET_LOCK() */
-   ifq_set_maxlen(>if_snd, 1);
 
if_counters_alloc(ifp);
if_attach(ifp);
@@ -1382,10 +1380,10 @@ pppacclose(dev_t dev, int flags, int mod
klist_invalidate(>sc_wsel.si_note);
splx(s);
 
-   pipex_iface_fini(>sc_pipex_iface);
-
if_detach(ifp);
 
+   pipex_iface_fini(>sc_pipex_iface);
+
LIST_REMOVE(sc, sc_entry);
free(sc, M_DEVBUF, sizeof(*sc));
 
@@ -1459,14 +1457,12 @@ drop:
 }
 
 static void
-pppac_start(struct ifnet *ifp)
+pppac_qstart(struct ifqueue *ifq)
 {
+   struct ifnet *ifp = ifq->ifq_if;
struct pppac_softc *sc = ifp->if_softc;
struct mbuf *m;
 
-   if (!ISSET(ifp->if_flags, IFF_RUNNING))
-   return;
-
while ((m = ifq_dequeue(>if_snd)) != NULL) {
 #if NBPFILTER > 0
if (ifp->if_bpf) {
@@ -1475,8 +1471,10 @@ pppac_start(struct ifnet *ifp)
}
 #endif
 
+   NET_LOCK();
m = pipex_output(m, m->m_pkthdr.ph_family, 0,
>sc_pipex_iface);
+   NET_UNLOCK();
if (m == NULL)
continue;
 
@@ -1491,7 +1489,6 @@ pppac_start(struct ifnet *ifp)
}
 
if (!mq_empty(>sc_mq)) {
-   KERNEL_ASSERT_LOCKED();
wakeup(sc);
selwakeup(>sc_rsel);
}



pppx(4): move ifnet out of KERNEL_LOCK()

2020-08-05 Thread Vitaliy Makkoveev
pipex(4) and pppx(4) are ready to became a little bit more MP capable.
Diff below moves pppx(4) related `ifnet' out of KERNEL_LOCK().

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.98
diff -u -p -r1.98 if_pppx.c
--- sys/net/if_pppx.c   28 Jul 2020 09:53:36 -  1.98
+++ sys/net/if_pppx.c   5 Aug 2020 09:34:50 -
@@ -191,7 +191,7 @@ int pppx_set_session_descr(struct pppx_
struct pipex_session_descr_req *);
 
 void   pppx_if_destroy(struct pppx_dev *, struct pppx_if *);
-void   pppx_if_start(struct ifnet *);
+void   pppx_if_qstart(struct ifqueue *);
 intpppx_if_output(struct ifnet *, struct mbuf *,
struct sockaddr *, struct rtentry *);
 intpppx_if_ioctl(struct ifnet *, u_long, caddr_t);
@@ -683,13 +683,12 @@ pppx_add_session(struct pppx_dev *pxd, s
snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", "pppx", unit);
ifp->if_mtu = req->pr_peer_mru; /* XXX */
ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST | IFF_UP;
-   ifp->if_xflags = IFXF_CLONED;
-   ifp->if_start = pppx_if_start;
+   ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
+   ifp->if_qstart = pppx_if_qstart;
ifp->if_output = pppx_if_output;
ifp->if_ioctl = pppx_if_ioctl;
ifp->if_rtrequest = p2p_rtrequest;
ifp->if_type = IFT_PPP;
-   ifq_set_maxlen(>if_snd, 1);
ifp->if_softc = pxi;
/* ifp->if_rdomain = req->pr_rdomain; */
 
@@ -864,28 +863,23 @@ pppx_if_destroy(struct pppx_dev *pxd, st
 }
 
 void
-pppx_if_start(struct ifnet *ifp)
+pppx_if_qstart(struct ifqueue *ifq)
 {
+   struct ifnet *ifp = ifq->ifq_if;
struct pppx_if *pxi = (struct pppx_if *)ifp->if_softc;
struct mbuf *m;
int proto;
 
-   if (!ISSET(ifp->if_flags, IFF_RUNNING))
-   return;
-
-   for (;;) {
-   m = ifq_dequeue(>if_snd);
-
-   if (m == NULL)
-   break;
-
+   while ((m = ifq_dequeue(ifq)) != NULL) {
proto = *mtod(m, int *);
m_adj(m, sizeof(proto));
 
ifp->if_obytes += m->m_pkthdr.len;
ifp->if_opackets++;
 
+   NET_LOCK();
pipex_ppp_output(m, pxi->pxi_session, proto);
+   NET_UNLOCK();
}
 }
 



Re: pipex(4): kill pipexintr()

2020-08-04 Thread Vitaliy Makkoveev
On Tue, Aug 04, 2020 at 01:53:55PM +0900, YASUOKA Masahiko wrote:
> On Mon, 3 Aug 2020 23:36:09 +0300
> Vitaliy Makkoveev  wrote:
> > On Tue, Aug 04, 2020 at 01:26:14AM +0900, YASUOKA Masahiko wrote:
> >> Comments?
> > 
> > You introduce `cookie' as 
> > 
> > cookie = session->protocol << 16 | session->session_id;
> > 
> > also multicast sessions initialized as 
> > 
> > session->protocol = PIPEX_PROTO_NONE;
> > session->session_id = ifindex;
> > 
> > `protocol' and `session_id' come from userland, so I like to have checks
> > like below. It's allow us to avoid `cookie' be broken while
> > `pr_session_id' exceeds 16 bit integer. Also userland should not pass
> > PIPEX_PROTO_NONE as `pr_protocol' because we shouldn't have multicast
> > and not multicast sessions with the same `cookie'.
> > 
> >  cut begin 
> > 
> > pipex_init_session(struct pipex_session **rsession,
> > struct pipex_session_req *req)
> > {
> > if (req->pr_protocol == PIPEX_PROTO_NONE)
> > return (EINVAL);
> 
> pipex_init_session() has the same check already.

My fault. Sorry.

> 
>  287 int
>  288 pipex_init_session(struct pipex_session **rsession,
>  289 struct pipex_session_req *req)
>  290 {
>  (snip)
>  297 switch (req->pr_protocol) {
>  298 #ifdef PIPEX_PPPOE
>  299 case PIPEX_PROTO_PPPOE:
>  (snip)
>  333 default:
>  334 return (EPROTONOSUPPORT);
>  335 }
> 
> > 
> > if (req->pr_session_id > 0x)
> > return (EINVAL);
> > 
> >  cut end 
> 
> req->pr_session_id can't be > 0x since it's uint16_t.
> 
> > Also cookies introduce invalidation problem. Yes, it has low
> > probability, but we can have operation order like below:
> > 
> > 1. enqueue session with `protocol' = 0xaa and `session_id' = 0xbb, and
> > `cookie' = 0xaabb
> > 2. kill this session
> > 3. create new session `protocol' = 0xaa and `session_id' = 0xbb
> > 4. this newly created session will be used by pipexintr()
> > 
> > As I have seen while played with refcounters, session can be enqueued
> > more than 10 times...
> 
> The diff makes the problem worse, but it could happen already if the
> session-id is reused.
> 
> > Also It's not obvious that interface index will never exceed 16 bit
> > counter. It's unsigned int and may be underlay counter's resolution
> > will be expanded in future. So I like to have at least corresponding
> > assertion in pipex_iface_init().
> 
> Right.  This is fixable with another unique number.
> 
> > So, may be my first solution is the best here. And, as mpi@ pointed,
> > ipsec(4) should be reworked to allow parallelism.
> 
> Does first mean killing the pipexintr?

Yes.

> 
> What I explained was wrong.  I'm sorry about this.
> 
> On Fri, 31 Jul 2020 09:36:32 +0900 (JST)
> YASUOKA Masahiko  wrote:
> > A packet of L2TP/IPsec (encapsulated IP/PPP/L2TP/UDP/ESP/UDP/IP) is
> > processed like:
> > 
> >ipv4_input
> >  ...
> >udp_input
> >  ipsec_common_input
> >  esp_input
> >crypto_dispatch
> >  => crypto_taskq_mp_safe
> > 
> >kthread "crynlk"
> >  crypto_invoke
> >... (*1)
> >  crypto_done
> >  esp_input_cb
> >ipsec_common_input_cb
> >  ip_deliver
> >udp_input
> >  pipex_l2tp_input
> >pipex_common_input
> >  (*2)
> >  pipex_ppp_input
> >pipex_mppe_input (*3)
> >  pipex_ppp_input
> >pipex_ip_input
> >  ipv4_input
> >...
> 
> This should be
> 
>kthread "crynlk"
>  crypto_invoke
>... (*1)
>  crypto_done
>kthread "crypto" < another thread
>  ipsec_input_cb < this is missed
>esp_input_cb
>  ipsec_common_input_cb
>ip_deliver
>  udp_input
>pipex_l2tp_input
>  pipex_common_input
>(*2)
>pipex_ppp_input
>  pipex_mppe_input (*3)
>pipex_ppp_input
>  pipex_ip_input
>ipv4_input
>  ...
> 
> > At *2 there was a queue.  "crynlk" is a busy thread, since it is doing
> > decryption at *1.  I think it's better pipex input is be done by
> > another thread than crypto since it also has decryption at *3.
> 
> This is false.  *3 is done by another thread.
> It is the same if crypto driver is not CRYPTOCAP_F_MPSAFE.
> (crypto_invoke() is done by the caller's thread and the callback
>  (ipsec_input_cb) is called by"crypto" thread.)
> 
> So I have no actual reason to keep the queues.
> 
> ok yasuoka for the diff which kills pipexintr.
>

Thanks for explanation and reviews.



Re: pipex(4): kill pipexintr()

2020-08-03 Thread Vitaliy Makkoveev
On Tue, Aug 04, 2020 at 01:26:14AM +0900, YASUOKA Masahiko wrote:
> On Sat, 1 Aug 2020 18:52:27 +0300
> Vitaliy Makkoveev  wrote:
> > On Sat, Aug 01, 2020 at 07:44:17PM +0900, YASUOKA Masahiko wrote:
> >> I'm not sure when it is broken, in old versions, it was assumed the
> >> pipex queues are empty when pipex_iface_stop() is called.  The problem
> >> mvs@ found is the assumption is not true any more.
> >> 
> >> pipex has a mechanism that delete a session when the queues are empty.
> >> 
> >> 819 Static void
> >> 820 pipex_timer(void *ignored_arg)
> >> 821 {
> >> (snip)
> >> 854 case PIPEX_STATE_CLOSED:
> >> 855 /*
> >> 856  * mbuf queued in pipexinq or pipexoutq 
> >> may have a
> >> 857  * refererce to this session.
> >> 858  */
> >> 859 if (!mq_empty() || 
> >> !mq_empty())
> >> 860 continue;
> >> 861 
> >> 862 pipex_destroy_session(session);
> >> 863 break;
> >> 
> >> I think using this is better.
> >> 
> >> How about this?
> > 
> > Unfortunately your diff is incorrect. It introduces memory leaks and
> > breaks pppx(4). Also it is incomplete.
> 
> Thank you for your feedbacks.
> 
> > We have multiple ways to kill pipex(sessions):
> > 
> > 1. pppx(4)
> > 
> > We have `struct pppx_if' which has pointer to corresponding session and
> > this session is accessed directly within pppx(4) layer. Since we can't
> > destroy `ppp_if' in pipex(4) layer we can't destroy these sessions by
> > pipex_timer(). The only way to destroy them is pppx_if_destroy() which:
> > 
> > 1. unlink session by pipex_unlink_session()
> > 2. detach corresponding `ifnet' by if_detach()
> > 3. release session by pipex_rele_session() 
> > 
> > It's unsafe because mbuf queues can have references to this session.
> 
> Yes.
> 
> > 2. pppac(4)
> > 
> > We have no direct access to corresponding sessions within pppac(4)
> > layer. Also there are multiple ways to do this:
> > 
> > 1. pipex_ioctl() with `PIPEXSMODE' command. Underlay pipex_iface_stop()
> > walks through `pipex_session_list' and destroy sessions by
> > pipex_destroy_session() call. It's unsafe because we don't check queues.
> > 
> > 2. pipex_ioctl() with `PIPEXDSESSION'. pipex_close_session() will change
> > session's  state and pipex_timer() will kill this sessions later. This
> > is the only safe way.
> > 
> > 3. pipex_iface_fini(). The same as `PIPEXSMODE', pipex_iface_stop()
> > kills sessions, Which is also unsafe. Also we have another use after
> > free issue:
> > 
> >  cut begin 
> > 
> > pipex_iface_fini(struct pipex_iface_context *pipex_iface)
> > {
> > pool_put(_session_pool, pipex_iface->multicast_session);
> > NET_LOCK();
> > pipex_iface_stop(pipex_iface);
> > NET_UNLOCK();
> > }
> > 
> >  cut end 
> > 
> > `multicast_session' should be protected too. It also can be pushed to
> > `pipexoutq'.
> 
> Yes, I missed this point.
> 
> > Also since this time pipexintr() and pipex_iface_fini() are
> > both serialized by KERNEL_LOCK() too we can't destroy `multicast_session'
> > which is in use by pipexintr(). But when we will drop KERNEL_LOCK()
> > around pipexintr() we can catch use after free issue here. I already did
> > diff for move this pool_put() under NET_LOCK(), but it was rejectedi by
> > mpi@ because:
> > 
> >  cut begin 
> > pipex_iface_fini() should be called on the last reference of the
> > 
> > descriptor.  So this shouldn't be necessary.  If there's an issue   
> > 
> > with the current order of the operations, we should certainly fix   
> > 
> > it differently.   
> >  cut end 
> 
> Yes, I understand what mpi@ is saying.  But this is a separate story.
> 
> > So I repeat it again: npppd(8) can be killed in every moment by SIGKILL
> > or by SIGSEGV and pppacclose() will be called and it will call
> > pipex_iface_fini(). `multicast_session' can be used in this moment by
> > pipexintr().
> > 
> > And no locks protect `multicast_session' itself.
> > 
> > The two diffs I proposed in this thr

Re: pipex(4): kill pipexintr()

2020-08-01 Thread Vitaliy Makkoveev
On Sat, Aug 01, 2020 at 07:44:17PM +0900, YASUOKA Masahiko wrote:
> Hi,
> 
> I'm not sure when it is broken, in old versions, it was assumed the
> pipex queues are empty when pipex_iface_stop() is called.  The problem
> mvs@ found is the assumption is not true any more.
> 
> pipex has a mechanism that delete a session when the queues are empty.
> 
> 819 Static void
> 820 pipex_timer(void *ignored_arg)
> 821 {
> (snip)
> 854 case PIPEX_STATE_CLOSED:
> 855 /*
> 856  * mbuf queued in pipexinq or pipexoutq may 
> have a
> 857  * refererce to this session.
> 858  */
> 859 if (!mq_empty() || 
> !mq_empty())
> 860 continue;
> 861 
> 862 pipex_destroy_session(session);
> 863 break;
> 
> I think using this is better.
> 
> How about this?
> 

Unfortunately your diff is incorrect. It introduces memory leaks and
breaks pppx(4). Also it is incomplete.

We have multiple ways to kill pipex(sessions):

1. pppx(4)

We have `struct pppx_if' which has pointer to corresponding session and
this session is accessed directly within pppx(4) layer. Since we can't
destroy `ppp_if' in pipex(4) layer we can't destroy these sessions by
pipex_timer(). The only way to destroy them is pppx_if_destroy() which:

1. unlink session by pipex_unlink_session()
2. detach corresponding `ifnet' by if_detach()
3. release session by pipex_rele_session() 

It's unsafe because mbuf queues can have references to this session.

2. pppac(4)

We have no direct access to corresponding sessions within pppac(4)
layer. Also there are multiple ways to do this:

1. pipex_ioctl() with `PIPEXSMODE' command. Underlay pipex_iface_stop()
walks through `pipex_session_list' and destroy sessions by
pipex_destroy_session() call. It's unsafe because we don't check queues.

2. pipex_ioctl() with `PIPEXDSESSION'. pipex_close_session() will change
session's  state and pipex_timer() will kill this sessions later. This
is the only safe way.

3. pipex_iface_fini(). The same as `PIPEXSMODE', pipex_iface_stop()
kills sessions, Which is also unsafe. Also we have another use after
free issue:

 cut begin 

pipex_iface_fini(struct pipex_iface_context *pipex_iface)
{
pool_put(_session_pool, pipex_iface->multicast_session);
NET_LOCK();
pipex_iface_stop(pipex_iface);
NET_UNLOCK();
}

 cut end 

`multicast_session' should be protected too. It also can be pushed to
`pipexoutq'. Also since this time pipexintr() and pipex_iface_fini() are
both serialized by KERNEL_LOCK() too we can't destroy `multicast_session'
which is in use by pipexintr(). But when we will drop KERNEL_LOCK()
around pipexintr() we can catch use after free issue here. I already did
diff for move this pool_put() under NET_LOCK(), but it was rejectedi by
mpi@ because:

 cut begin 
pipex_iface_fini() should be called on the last reference of the
descriptor.  So this shouldn't be necessary.  If there's an issue   
with the current order of the operations, we should certainly fix   
it differently.   
 cut end 

So I repeat it again: npppd(8) can be killed in every moment by SIGKILL
or by SIGSEGV and pppacclose() will be called and it will call
pipex_iface_fini(). `multicast_session' can be used in this moment by
pipexintr().

And no locks protect `multicast_session' itself.

The two diffs I proposed in this thread solve problems caused by
pipexintr().

I'm not sure about bottleneck in crypto thread, because we have no
simultaneous execution with pipexintr(), but the second diff which
introduces refcounters does session destruction well, include
`multicast_session'. I found a little issue with second diff which stops
processing multicast packets, so I included updated version below.

> diff --git a/sys/net/pipex.c b/sys/net/pipex.c
> index 2ad7757fee9..6fe14c400bf 100644
> --- a/sys/net/pipex.c
> +++ b/sys/net/pipex.c
> @@ -190,7 +190,7 @@ pipex_iface_stop(struct pipex_iface_context *pipex_iface)
>   LIST_FOREACH_SAFE(session, _session_list, session_list,
>   session_tmp) {
>   if (session->pipex_iface == pipex_iface)
> - pipex_destroy_session(session);
> + pipex_unlink_session(session);

You assumed pipex_timer() the only place where sessions can be released?
pipex_unlink_session() will detach session from all lists, include
`pipex_session_list'. This session is leaked.

Also what to do with multicast sessions?

>   }
>  }
>  
> @@ -470,9 +470,16 @@ pipex_link_session(struct pipex_session *session,
>  void
>  pipex_unlink_session(struct pipex_session *session)
>  {
> + struct radix_node *rn;
> +
>   session->ifindex = 0;
>  
>   NET_ASSERT_LOCKED();
> + if 

Re: pipex(4): kill pipexintr()

2020-07-31 Thread Vitaliy Makkoveev
On Fri, Jul 31, 2020 at 10:25:48PM +0200, Martin Pieuchot wrote:
> On 31/07/20(Fri) 21:58, Vitaliy Makkoveev wrote:
> > [...] 
> > What denies us to move pipex(4) under it's own lock?
> 
> Such question won't lead us anywhere.  It assumes it makes sense to move
> pipex under its own lock.  This assumption has many drawback which clearly
> haven't been studied and more importantly it doesn't explains what for.
> 
> What is your goal?  What are you trying to achieve?  Improve latency?
> Improve performances?  Of which subsystem?  Where is the bottleneck?
> What is the architecture of the system?
> 

If I understood Yasuoka correctly kthread "crynlk" is the bottleneck
and he wish MPPE be offloaded to another cpu. Since there is no
simultaneous execution of crypto thread and pipexintr() which do MPPE
stuff, the easiest way to reach it is to move pipex(4) under another
lock. I don't mean "stop everything and implement now", I mean we can do
it in the future.

If there is no bottleneck in crypto thread, I see no reason to not
remove pipexintr(). 

If the bottleneck is the crypto thread, I propose to implement
refcounters to fix issue.

And finish this thread.

> IMHO the KERNEL_LOCK() should be removed an anything else postponed at
> least until one has a clear understanding of the whole subsystem under
> the NET_LOCK().
>

The whole subsystem is under NET_LOCK() now. Just after we fix use after
free issue we can start to move pipex(4) out of KERNEL_LOCK().



Re: pipex(4): kill pipexintr()

2020-07-31 Thread Vitaliy Makkoveev
Well, since pipexintr() killing was rejected, I propose to implement
reference counters to protect pipex(4) session itself. Diff below does
this.

Index: sys/net/if_ethersubr.c
===
RCS file: /cvs/src/sys/net/if_ethersubr.c,v
retrieving revision 1.266
diff -u -p -r1.266 if_ethersubr.c
--- sys/net/if_ethersubr.c  22 Jul 2020 02:16:01 -  1.266
+++ sys/net/if_ethersubr.c  31 Jul 2020 13:56:31 -
@@ -527,6 +527,7 @@ ether_input(struct ifnet *ifp, struct mb
 
if ((session = pipex_pppoe_lookup_session(m)) != NULL) {
pipex_pppoe_input(m, session);
+   pipex_rele_session(session);
return;
}
}
Index: sys/net/if_gre.c
===
RCS file: /cvs/src/sys/net/if_gre.c,v
retrieving revision 1.158
diff -u -p -r1.158 if_gre.c
--- sys/net/if_gre.c10 Jul 2020 13:26:41 -  1.158
+++ sys/net/if_gre.c31 Jul 2020 13:56:31 -
@@ -984,9 +984,15 @@ gre_input_1(struct gre_tunnel *key, stru
struct pipex_session *session;
 
session = pipex_pptp_lookup_session(m);
-   if (session != NULL &&
-   pipex_pptp_input(m, session) == NULL)
-   return (NULL);
+   if (session != NULL) {
+   struct mbuf *m0;
+
+   m0 = pipex_pptp_input(m, session);
+   pipex_rele_session(session);
+
+   if (m0 == NULL)
+   return NULL;
+   }
}
 #endif
break;
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.122
diff -u -p -r1.122 pipex.c
--- sys/net/pipex.c 29 Jul 2020 12:09:31 -  1.122
+++ sys/net/pipex.c 31 Jul 2020 13:56:32 -
@@ -165,6 +165,7 @@ pipex_iface_init(struct pipex_iface_cont
 
/* virtual pipex_session entry for multicast */
session = pool_get(_session_pool, PR_WAITOK | PR_ZERO);
+   session->refs = 1;
session->is_multicast = 1;
session->pipex_iface = pipex_iface;
session->ifindex = ifindex;
@@ -197,9 +198,9 @@ pipex_iface_stop(struct pipex_iface_cont
 void
 pipex_iface_fini(struct pipex_iface_context *pipex_iface)
 {
-   pool_put(_session_pool, pipex_iface->multicast_session);
NET_LOCK();
pipex_iface_stop(pipex_iface);
+   pipex_rele_session(pipex_iface->multicast_session);
NET_UNLOCK();
 }
 
@@ -329,6 +330,7 @@ pipex_init_session(struct pipex_session 
 
/* prepare a new session */
session = pool_get(_session_pool, PR_WAITOK | PR_ZERO);
+   session->refs = 1;
session->state = PIPEX_STATE_INITIAL;
session->protocol = req->pr_protocol;
session->session_id = req->pr_session_id;
@@ -421,9 +423,20 @@ pipex_init_session(struct pipex_session 
return 0;
 }
 
+void pipex_ref_session(struct pipex_session *session)
+{
+   atomic_inc_int_nv(>refs);
+   KASSERT(session->refs != 0);
+}
+
 void
 pipex_rele_session(struct pipex_session *session)
 {
+   KASSERT(session->refs > 0);
+
+   if (atomic_dec_int_nv(>refs) > 0)
+   return;
+
if (session->mppe_recv.old_session_keys)
pool_put(_key_pool, session->mppe_recv.old_session_keys);
pool_put(_session_pool, session);
@@ -439,10 +452,12 @@ pipex_link_session(struct pipex_session 
 
if (!iface->pipexmode)
return (ENXIO);
-   if (pipex_lookup_by_session_id(session->protocol,
+   if (pipex_lookup_by_session_id_nonref(session->protocol,
session->session_id))
return (EEXIST);
 
+   pipex_ref_session(session);
+
session->pipex_iface = iface;
session->ifindex = iface->ifindex;
 
@@ -490,6 +505,8 @@ pipex_unlink_session(struct pipex_sessio
/* if final session is destroyed, stop timer */
if (LIST_EMPTY(_session_list))
pipex_timer_stop();
+
+   pipex_rele_session(session);
 }
 
 Static int
@@ -505,8 +522,8 @@ pipex_add_session(struct pipex_session_r
 
/* commit the session */
if (!in_nullhost(session->ip_address.sin_addr)) {
-   if (pipex_lookup_by_ip_address(session->ip_address.sin_addr)
-   != NULL) {
+   if (pipex_lookup_by_ip_address_nonref(
+   session->ip_address.sin_addr) != NULL) {
error = EADDRINUSE;
goto free;
}
@@ -568,6 +585,7 @@ pipex_close_session(struct pipex_session
 struct pipex_iface_context *iface)
 {
struct 

Re: pipex(4): kill pipexintr()

2020-07-31 Thread Vitaliy Makkoveev
On Fri, Jul 31, 2020 at 08:26:22PM +0200, Martin Pieuchot wrote:
> On 31/07/20(Fri) 12:15, Vitaliy Makkoveev wrote:
> > On Fri, Jul 31, 2020 at 09:36:32AM +0900, YASUOKA Masahiko wrote:
> > > On Thu, 30 Jul 2020 22:43:10 +0300
> > > Vitaliy Makkoveev  wrote:
> > > > On Thu, Jul 30, 2020 at 10:05:13PM +0900, YASUOKA Masahiko wrote:
> > > >> On Thu, 30 Jul 2020 15:34:09 +0300
> > > >> Vitaliy Makkoveev  wrote:
> > > >> > On Thu, Jul 30, 2020 at 09:13:46PM +0900, YASUOKA Masahiko wrote:
> > > >> >> Hi,
> > > >> >> 
> > > >> >> sys/net/if_ethersubr.c:
> > > >> >> 372 void
> > > >> >> 373 ether_input(struct ifnet *ifp, struct mbuf *m)
> > > >> >> (snip)
> > > >> >> 519 #if NPPPOE > 0 || defined(PIPEX)
> > > >> >> 520 case ETHERTYPE_PPPOEDISC:
> > > >> >> 521 case ETHERTYPE_PPPOE:
> > > >> >> 522 if (m->m_flags & (M_MCAST | M_BCAST))
> > > >> >> 523 goto dropanyway;
> > > >> >> 524 #ifdef PIPEX
> > > >> >> 525 if (pipex_enable) {
> > > >> >> 526 struct pipex_session *session;
> > > >> >> 527 
> > > >> >> 528 if ((session = 
> > > >> >> pipex_pppoe_lookup_session(m)) != NULL) {
> > > >> >> 529 pipex_pppoe_input(m, session);
> > > >> >> 530 return;
> > > >> >> 531 }
> > > >> >> 532 }
> > > >> >> 533 #endif
> > > >> >> 
> > > >> >> previously a packet which branchces to #529 is enqueued.
> > > >> >> 
> > > >> >> If the diff removes the queue, then the pipex input routine is
> > > >> >> executed by the NIC's interrupt handler.
> > > >> >> 
> > > >> >> The queues had been made to avoid that kind of situations.
> > > >> > 
> > > >> > It's not enqueued in pppoe case. According pipex_pppoe_input() code 
> > > >> > we
> > > >> > call pipex_common_input() with `useq' argument set to '0', so we 
> > > >> > don't
> > > >> > enqueue mbuf(9) but pass it to pipex_ppp_input() which will pass it 
> > > >> > to
> > > >> > ipv{4,6}_input().
> > > >> 
> > > >> You are right.  Sorry, I forgot about this which I did that by myself.
> > > >> 
> > > > 
> > > > I'm interesting the reason why you did that.
> > > > 
> > > >> >> Also I don't see a relation of the use-after-free problem and 
> > > >> >> killing
> > > >> >> queues.  Can't we fix the problem unless we kill the queues?
> > > >> > 
> > > >> > Yes we can. Reference counters allow us to keep orphan sessions in 
> > > >> > these
> > > >> > queues without use after free issue.
> > > >> > 
> > > >> > I will wait your commentaries current enqueuing before to do 
> > > >> > something.
> > > >> 
> > > >> I have another concern.
> > > >> 
> > > >> You might know, when L2TP/IPsec is used heavily, the crypto thread
> > > >> uses 100% of 1 CPU core.  In that case, that thread becomes like
> > > >> below:
> > > >> 
> > > >>   crypto thread -> udp_userreq -> pipex_l2tp_input
> > > >> 
> > > >> some clients are using MPPE(RC4 encryption) on CCP.  It's not so
> > > >> light.
> > > >> 
> > > >> How do we offload this for CPUs?  I am thinking that "pipex" can have
> > > >> a dedicated thread.  Do we have another scenario?
> > > >>
> > > > 
> > > > I suppose you mean udp_input(). What is you call "crypto thread"? I did
> > > > a little backtrace but I didn't find this thread.
> > > > 
> > > > ether_resolve
> > > >   if_input_local
> > > > ipv4_input
> > > >   ip_input_if
> > > > ip_ours
> > > > 

Re: pipex(4): kill pipexintr()

2020-07-31 Thread Vitaliy Makkoveev
On Fri, Jul 31, 2020 at 09:36:32AM +0900, YASUOKA Masahiko wrote:
> On Thu, 30 Jul 2020 22:43:10 +0300
> Vitaliy Makkoveev  wrote:
> > On Thu, Jul 30, 2020 at 10:05:13PM +0900, YASUOKA Masahiko wrote:
> >> On Thu, 30 Jul 2020 15:34:09 +0300
> >> Vitaliy Makkoveev  wrote:
> >> > On Thu, Jul 30, 2020 at 09:13:46PM +0900, YASUOKA Masahiko wrote:
> >> >> Hi,
> >> >> 
> >> >> sys/net/if_ethersubr.c:
> >> >> 372 void
> >> >> 373 ether_input(struct ifnet *ifp, struct mbuf *m)
> >> >> (snip)
> >> >> 519 #if NPPPOE > 0 || defined(PIPEX)
> >> >> 520 case ETHERTYPE_PPPOEDISC:
> >> >> 521 case ETHERTYPE_PPPOE:
> >> >> 522 if (m->m_flags & (M_MCAST | M_BCAST))
> >> >> 523 goto dropanyway;
> >> >> 524 #ifdef PIPEX
> >> >> 525 if (pipex_enable) {
> >> >> 526 struct pipex_session *session;
> >> >> 527 
> >> >> 528 if ((session = 
> >> >> pipex_pppoe_lookup_session(m)) != NULL) {
> >> >> 529 pipex_pppoe_input(m, session);
> >> >> 530 return;
> >> >> 531 }
> >> >> 532 }
> >> >> 533 #endif
> >> >> 
> >> >> previously a packet which branchces to #529 is enqueued.
> >> >> 
> >> >> If the diff removes the queue, then the pipex input routine is
> >> >> executed by the NIC's interrupt handler.
> >> >> 
> >> >> The queues had been made to avoid that kind of situations.
> >> > 
> >> > It's not enqueued in pppoe case. According pipex_pppoe_input() code we
> >> > call pipex_common_input() with `useq' argument set to '0', so we don't
> >> > enqueue mbuf(9) but pass it to pipex_ppp_input() which will pass it to
> >> > ipv{4,6}_input().
> >> 
> >> You are right.  Sorry, I forgot about this which I did that by myself.
> >> 
> > 
> > I'm interesting the reason why you did that.
> > 
> >> >> Also I don't see a relation of the use-after-free problem and killing
> >> >> queues.  Can't we fix the problem unless we kill the queues?
> >> > 
> >> > Yes we can. Reference counters allow us to keep orphan sessions in these
> >> > queues without use after free issue.
> >> > 
> >> > I will wait your commentaries current enqueuing before to do something.
> >> 
> >> I have another concern.
> >> 
> >> You might know, when L2TP/IPsec is used heavily, the crypto thread
> >> uses 100% of 1 CPU core.  In that case, that thread becomes like
> >> below:
> >> 
> >>   crypto thread -> udp_userreq -> pipex_l2tp_input
> >> 
> >> some clients are using MPPE(RC4 encryption) on CCP.  It's not so
> >> light.
> >> 
> >> How do we offload this for CPUs?  I am thinking that "pipex" can have
> >> a dedicated thread.  Do we have another scenario?
> >>
> > 
> > I suppose you mean udp_input(). What is you call "crypto thread"? I did
> > a little backtrace but I didn't find this thread.
> > 
> > ether_resolve
> >   if_input_local
> > ipv4_input
> >   ip_input_if
> > ip_ours
> >   ip_deliver
> > udp_input (through pr_input)
> >   pipex_l2tp_input
> > 
> > ipi{,6}_mloopback
> >   if_input_local
> > ipv4_input
> >   ...
> > udp_input (through pr_input)
> >   pipex_l2tp_input
> > 
> > loinput
> >   if_input_local
> > ipv4_input
> >   ...
> > udp_input (through pr_input)
> >   pipex_l2tp_input
> > 
> > Also various pseudo drivers call ipv{4,6}_input() and underlay
> > udp_unput() too.
> > 
> > Except nfs, we call udp_usrreq() through socket layer only. Do you mean
> > userland as "crypto thread"?
> 
> Sorry, udp_usrreq() should be usr_input() and crypto thread meant a
> kthread for crypto_taskq_mp_safe, whose name is "crynlk" (see
> crypto_init()).
> 
> A packet of L2TP/IPsec (encapsulated IP/PPP/L2TP/UDP/ESP/UDP/IP) is
> processed like:
> 
>ipv4_input
>  ...
>udp_in

Re: pipex(4): kill pipexintr()

2020-07-30 Thread Vitaliy Makkoveev
On Thu, Jul 30, 2020 at 10:05:13PM +0900, YASUOKA Masahiko wrote:
> On Thu, 30 Jul 2020 15:34:09 +0300
> Vitaliy Makkoveev  wrote:
> > On Thu, Jul 30, 2020 at 09:13:46PM +0900, YASUOKA Masahiko wrote:
> >> Hi,
> >> 
> >> sys/net/if_ethersubr.c:
> >> 372 void
> >> 373 ether_input(struct ifnet *ifp, struct mbuf *m)
> >> (snip)
> >> 519 #if NPPPOE > 0 || defined(PIPEX)
> >> 520 case ETHERTYPE_PPPOEDISC:
> >> 521 case ETHERTYPE_PPPOE:
> >> 522 if (m->m_flags & (M_MCAST | M_BCAST))
> >> 523 goto dropanyway;
> >> 524 #ifdef PIPEX
> >> 525 if (pipex_enable) {
> >> 526 struct pipex_session *session;
> >> 527 
> >> 528 if ((session = pipex_pppoe_lookup_session(m)) 
> >> != NULL) {
> >> 529 pipex_pppoe_input(m, session);
> >> 530 return;
> >> 531 }
> >> 532 }
> >> 533 #endif
> >> 
> >> previously a packet which branchces to #529 is enqueued.
> >> 
> >> If the diff removes the queue, then the pipex input routine is
> >> executed by the NIC's interrupt handler.
> >> 
> >> The queues had been made to avoid that kind of situations.
> > 
> > It's not enqueued in pppoe case. According pipex_pppoe_input() code we
> > call pipex_common_input() with `useq' argument set to '0', so we don't
> > enqueue mbuf(9) but pass it to pipex_ppp_input() which will pass it to
> > ipv{4,6}_input().
> 
> You are right.  Sorry, I forgot about this which I did that by myself.
> 

I'm interesting the reason why you did that.

> >> Also I don't see a relation of the use-after-free problem and killing
> >> queues.  Can't we fix the problem unless we kill the queues?
> > 
> > Yes we can. Reference counters allow us to keep orphan sessions in these
> > queues without use after free issue.
> > 
> > I will wait your commentaries current enqueuing before to do something.
> 
> I have another concern.
> 
> You might know, when L2TP/IPsec is used heavily, the crypto thread
> uses 100% of 1 CPU core.  In that case, that thread becomes like
> below:
> 
>   crypto thread -> udp_userreq -> pipex_l2tp_input
> 
> some clients are using MPPE(RC4 encryption) on CCP.  It's not so
> light.
> 
> How do we offload this for CPUs?  I am thinking that "pipex" can have
> a dedicated thread.  Do we have another scenario?
>

I suppose you mean udp_input(). What is you call "crypto thread"? I did
a little backtrace but I didn't find this thread.

ether_resolve
  if_input_local
ipv4_input
  ip_input_if
ip_ours
  ip_deliver
udp_input (through pr_input)
  pipex_l2tp_input

ipi{,6}_mloopback
  if_input_local
ipv4_input
  ...
udp_input (through pr_input)
  pipex_l2tp_input

loinput
  if_input_local
ipv4_input
  ...
udp_input (through pr_input)
  pipex_l2tp_input

Also various pseudo drivers call ipv{4,6}_input() and underlay
udp_unput() too.

Except nfs, we call udp_usrreq() through socket layer only. Do you mean
userland as "crypto thread"?

But upd_input(), udp_usrreq() and pipexintr() are serialized by
NET_LOCK(). We should move pipex(4) under it's own rwlock to allow them
simultaneous execution. Also, should we move outgoing pppx(4) mbufs to
queue too?



Re: pipex(4): kill pipexintr()

2020-07-30 Thread Vitaliy Makkoveev
On Thu, Jul 30, 2020 at 09:13:46PM +0900, YASUOKA Masahiko wrote:
> Hi,
> 
> sys/net/if_ethersubr.c:
> 372 void
> 373 ether_input(struct ifnet *ifp, struct mbuf *m)
> (snip)
> 519 #if NPPPOE > 0 || defined(PIPEX)
> 520 case ETHERTYPE_PPPOEDISC:
> 521 case ETHERTYPE_PPPOE:
> 522 if (m->m_flags & (M_MCAST | M_BCAST))
> 523 goto dropanyway;
> 524 #ifdef PIPEX
> 525 if (pipex_enable) {
> 526 struct pipex_session *session;
> 527 
> 528 if ((session = pipex_pppoe_lookup_session(m)) != 
> NULL) {
> 529 pipex_pppoe_input(m, session);
> 530 return;
> 531 }
> 532 }
> 533 #endif
> 
> previously a packet which branchces to #529 is enqueued.
> 
> If the diff removes the queue, then the pipex input routine is
> executed by the NIC's interrupt handler.
> 
> The queues had been made to avoid that kind of situations.

It's not enqueued in pppoe case. According pipex_pppoe_input() code we
call pipex_common_input() with `useq' argument set to '0', so we don't
enqueue mbuf(9) but pass it to pipex_ppp_input() which will pass it to
ipv{4,6}_input().

 cut begin 

pipex_pppoe_input(struct mbuf *m0, struct pipex_session *session)
{
int hlen;
struct pipex_pppoe_header pppoe;

NET_ASSERT_LOCKED();
/* already checked at pipex_pppoe_lookup_session */
KASSERT(m0->m_pkthdr.len >= (sizeof(struct ether_header) +
sizeof(pppoe)));

m_copydata(m0, sizeof(struct ether_header),
sizeof(struct pipex_pppoe_header), (caddr_t));

hlen = sizeof(struct ether_header) + sizeof(struct pipex_pppoe_header);
if ((m0 = pipex_common_input(session, m0, hlen, ntohs(pppoe.length), 0))
== NULL)
return (NULL);
m_freem(m0); 
session->stat.ierrors++;
return (NULL);
}

pipex_common_input(struct pipex_session *session, struct mbuf *m0, int hlen,
int plen, int useq)
{
/* skip */

if (!useq) {
pipex_ppp_input(m0, session, 0);
return (NULL);
}

/* input ppp packets to kernel session */
if (pipex_ppp_enqueue(m0, session, ) != 0)
goto dropped;
else
return (NULL);


 cut end 

We enqueue pppac(4) related mbufs, except incoming pppoe. We enqueue
pppx(4) related incoming mbufs except pppoe. We don't enqueue pppx(4)
outgoing mbufs, we don't enqueue pppoe incoming mbufs.

> 
> Also I don't see a relation of the use-after-free problem and killing
> queues.  Can't we fix the problem unless we kill the queues?
>

Yes we can. Reference counters allow us to keep orphan sessions in these
queues without use after free issue.

I will wait your commentaries current enqueuing before to do something.



pipex(4): kill pipexintr()

2020-07-29 Thread Vitaliy Makkoveev
Now pipex(4) is fully covered by NET_LOCK() and this is documented. But
we still have an issue with pipex(4) session itself and I guess it's
time to fix it.

We have `pipexinq' and `pipexoutq' mbuf(9) queues to store mbufs. Each
mbuf(9) passed to these queues stores the pointer to corresponding
session referenced as `m_pkthdr.ph_cookie'. We enqueue incoming mbufs for
pppx(4) and incoming and outgoing mbufs for pppac(4). But we don't
enqueue pppoe related mbufs. After packet was enqueued to corresponding
queue we call schednetisr() which just schedules netisr() to run:

 cut begin 

780 pipex_ppp_enqueue(struct mbuf *m0, struct pipex_session *session,
781 struct mbuf_queue *mq)
782 {
783 m0->m_pkthdr.ph_cookie = session;
784 /* XXX need to support other protocols */
785 m0->m_pkthdr.ph_ppp_proto = PPP_IP;
786 
787 if (mq_enqueue(mq, m0) != 0)
788 return (1);
789 
790 schednetisr(NETISR_PIPEX);
791 
792 return (0);
793 }

 cut end 

Also we have pipex_timer() which should destroy session in safe way, but
it does this only for pppac(4) and only for sessions closed by
`PIPEXDSESSION' command:

 cut begin 

812 pipex_timer(void *ignored_arg)
813 {
/* skip */
846 case PIPEX_STATE_CLOSED:
847 /*
848  * mbuf queued in pipexinq or pipexoutq may have a
849  * refererce to this session.
850  */
851 if (!mq_empty() || !mq_empty())
852 continue;
853 
854 pipex_destroy_session(session);
855 break;

 cut end 

While we destroy sessions through pipex_rele_session() or through
pipex_iface_fini() or through `PIPEXSMODE' command we don't check
`pipexinq' and `pipexoutq' state. This means we can break them.

It's not guaranteed that netisr() will start just after schednetisr()
call. This means we can destroy session, but corresponding mbuf(9) is
stored within `pipexinq' or `pipexoutq'. It's `m_pkthdr.ph_cookie' still
stores pointer to destroyed session and we have use after free issue. I
wonder why we didn't caught panic yet.

I propose to kill `pipexinq', `pipexoutq' and pipexintr(). There is
absolutely no reason them to exist. This should not only fix issue
described above but simplifies code too.

Other ways are to implement reference counters for session or walk
through mbuf(9) queues and kill corresponding mbufs. It doesn't make
sense to go these ways.

Index: lib/libc/sys/sysctl.2
===
RCS file: /cvs/src/lib/libc/sys/sysctl.2,v
retrieving revision 1.40
diff -u -p -r1.40 sysctl.2
--- lib/libc/sys/sysctl.2   17 May 2020 05:48:39 -  1.40
+++ lib/libc/sys/sysctl.2   29 Jul 2020 13:47:40 -
@@ -2033,35 +2033,11 @@ The currently defined variable names are
 .Bl -column "Third level name" "integer" "Changeable" -offset indent
 .It Sy "Third level name" Ta Sy "Type" Ta Sy "Changeable"
 .It Dv PIPEXCTL_ENABLE Ta integer Ta yes
-.It Dv PIPEXCTL_INQ Ta node Ta not applicable
-.It Dv PIPEXCTL_OUTQ Ta node Ta not applicable
 .El
 .Bl -tag -width "123456"
 .It Dv PIPEXCTL_ENABLE
 If set to 1, enable PIPEX processing.
 The default is 0.
-.It Dv PIPEXCTL_INQ Pq Va net.pipex.inq
-Fourth level comprises an array of
-.Vt struct ifqueue
-structures containing information about the PIPEX packet input queue.
-The forth level names for the elements of
-.Vt struct ifqueue
-are the same as described in
-.Li ip.arpq
-in the
-.Dv PF_INET
-section.
-.It Dv PIPEXCTL_OUTQ Pq Va net.pipex.outq
-Fourth level comprises an array of
-.Vt struct ifqueue
-structures containing information about PIPEX packet output queue.
-The forth level names for the elements of
-.Vt struct ifqueue
-are the same as described in
-.Li ip.arpq
-in the
-.Dv PF_INET
-section.
 .El
 .El
 .Ss CTL_VFS
Index: sys/net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.616
diff -u -p -r1.616 if.c
--- sys/net/if.c24 Jul 2020 18:17:14 -  1.616
+++ sys/net/if.c29 Jul 2020 13:47:44 -
@@ -909,13 +909,6 @@ if_netisr(void *unused)
KERNEL_UNLOCK();
}
 #endif
-#ifdef PIPEX
-   if (n & (1 << NETISR_PIPEX)) {
-   KERNEL_LOCK();
-   pipexintr();
-   KERNEL_UNLOCK();
-   }
-#endif
t |= n;
}
 
Index: sys/net/netisr.h
===
RCS file: /cvs/src/sys/net/netisr.h,v
retrieving revision 1.51
diff -u -p -r1.51 netisr.h
--- sys/net/netisr.h6 Aug 2019 22:57:54 -   1.51
+++ sys/net/netisr.h29 Jul 2020 13:47:44 -
@@ -48,7 +48,6 @@
 #defineNETISR_IPV6 24  /* 

Re: NET_LOCK and trunk detach

2020-07-28 Thread Vitaliy Makkoveev



> On 29 Jul 2020, at 00:09, sven falempin  wrote:
> 
> On Tue, Jul 28, 2020 at 4:42 PM Vitaliy Makkoveev  wrote:
>> 
>> On Tue, Jul 28, 2020 at 04:10:01PM -0400, sven falempin wrote:
>>> Hello,
>>> 
>>> I am running some trunk interfaces in a multi core environment,
>>> it's a slightly modified version, i have a few NET_ASSERT_LOCKED();
>>> suspecting some multi core shenanigans, which i guess was confirmed:
>>> (unsure the have X meaning, but i ' m pretty sure 256 is very wrong)
>>> the if_trunk.c locking is completely unmodified
>>> The code is 6.7-stable
>>> 
>>> splassert: lacp_detach: want 2 have 0
>>> splassert: lacp_detach: want 2 have 0
>>> splassert: lacp_detach: want 2 have 256
>>> 
>>> I noticed : trunk_clone_destroy ,call
>>> 
>>>if (tr->tr_proto != TRUNK_PROTO_NONE)
>>>tr->tr_detach(tr);
>>> 
>>> outside the lock, and that trunk_ioctl call it
>>> 
>>>if (tr->tr_proto != TRUNK_PROTO_NONE)
>>>error = tr->tr_detach(tr);
>>> 
>>> but ioctl is as far as i understand locked.
>>> I'm unsure if the difficult and amazing unlocking work
>>> did an oopsies or if ioctl is already assumed unlocked.
>>> 
>>> Kindly inform me.
>>> Best regards, thank you for reading.
>>> 
>> 
>> lacp_detach() touches nothing which requires NET_LOCK(). What is the
>> reason you placed assertion to lacp_detach()?
> 
> <>
> 
> the lacp_detach is not yours, i put them here because i have a NULL pointer
> popping in other 'driver' callback.
> 
> I'm tracking this and trying to understand  how this memory is 'nullified'
> mid function.

I have no telepathy skills. Sorry. If you have panics send please dmesg(8)
output and instruction for reproduce.

> 
> I do not think putting NET_ASSERT_LOCKED can be harmful in any way.
> If so please tell me.
> 

What is the data you think needs be protected by netlock?

> I am just tracking  a bug  and noticed these detach locking strangeness.
> 



Re: NET_LOCK and trunk detach

2020-07-28 Thread Vitaliy Makkoveev
On Tue, Jul 28, 2020 at 04:10:01PM -0400, sven falempin wrote:
> Hello,
> 
> I am running some trunk interfaces in a multi core environment,
> it's a slightly modified version, i have a few NET_ASSERT_LOCKED();
> suspecting some multi core shenanigans, which i guess was confirmed:
> (unsure the have X meaning, but i ' m pretty sure 256 is very wrong)
> the if_trunk.c locking is completely unmodified
> The code is 6.7-stable
> 
> splassert: lacp_detach: want 2 have 0
> splassert: lacp_detach: want 2 have 0
> splassert: lacp_detach: want 2 have 256
> 
> I noticed : trunk_clone_destroy ,call
> 
> if (tr->tr_proto != TRUNK_PROTO_NONE)
> tr->tr_detach(tr);
> 
> outside the lock, and that trunk_ioctl call it
> 
> if (tr->tr_proto != TRUNK_PROTO_NONE)
> error = tr->tr_detach(tr);
> 
> but ioctl is as far as i understand locked.
> I'm unsure if the difficult and amazing unlocking work
> did an oopsies or if ioctl is already assumed unlocked.
> 
> Kindly inform me.
> Best regards, thank you for reading.
> 

lacp_detach() touches nothing which requires NET_LOCK(). What is the
reason you placed assertion to lacp_detach()?



Re: pipex(4): document global data locks

2020-07-28 Thread Vitaliy Makkoveev
On Tue, Jul 28, 2020 at 10:26:53AM +0200, Martin Pieuchot wrote:
> On 17/07/20(Fri) 17:04, Vitaliy Makkoveev wrote:
> > Subj. Also add NET_ASSERT_LOCKED() to pipex_{link,unlink,rele}_session()
> > to be sure they called under NET_LOCK().
> 
> pipex_rele_session() is freeing memory.  When this function is called
> those chunks of memory shouldn't be referenced by any other CPU or piece
> of descriptor in the network stack.  So the NET_LOCK() shouldn't be
> required.

Fixed.

> 
> Rest of the diff is fine.  I'd suggest you put the assertions just above
> the LIST_INSERT or LIST_REMOVE like it is done in other parts of the stack.
>

We should not add session to lists if `iface->pipexmode' is null. So
check and modification should be done while the same lock is held.
That's why assertion in pipex_link_session() is in the right place, just
before `iface->pipexmode' check.

I moved assertion just before LIST_REMOVE() within
pipex_unlink_session().

Updated diff below.

Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.120
diff -u -p -r1.120 pipex.c
--- sys/net/pipex.c 17 Jul 2020 08:57:27 -  1.120
+++ sys/net/pipex.c 28 Jul 2020 09:47:51 -
@@ -83,19 +83,24 @@ struct pool pipex_session_pool;
 struct pool mppe_key_pool;
 
 /*
- * static/global variables
+ * Global data
+ * Locks used to protect global data
+ *   A   atomic operation
+ *   I   immutable after creation
+ *   N   net lock
  */
-intpipex_enable = 0;
+
+intpipex_enable = 0;   /* [N] */
 struct pipex_hash_head
-pipex_session_list,/* master session list 
*/
-pipex_close_wait_list, /* expired session list */
-pipex_peer_addr_hashtable[PIPEX_HASH_SIZE],/* peer's address hash 
*/
-pipex_id_hashtable[PIPEX_HASH_SIZE];   /* peer id hash */
+pipex_session_list,/* [N] master session 
list */
+pipex_close_wait_list, /* [N] expired session list */
+pipex_peer_addr_hashtable[PIPEX_HASH_SIZE],/* [N] peer's address 
hash */
+pipex_id_hashtable[PIPEX_HASH_SIZE];   /* [N] peer id hash */
 
-struct radix_node_head *pipex_rd_head4 = NULL;
-struct radix_node_head *pipex_rd_head6 = NULL;
+struct radix_node_head *pipex_rd_head4 = NULL; /* [N] */
+struct radix_node_head *pipex_rd_head6 = NULL; /* [N] */
 struct timeout pipex_timer_ch; /* callout timer context */
-int pipex_prune = 1;   /* walk list every seconds */
+int pipex_prune = 1;   /* [I] walk list every seconds */
 
 /* pipex traffic queue */
 struct mbuf_queue pipexinq = MBUF_QUEUE_INITIALIZER(IFQ_MAXLEN, IPL_NET);
@@ -105,7 +110,7 @@ struct mbuf_queue pipexoutq = MBUF_QUEUE
 #define ph_ppp_proto ether_vtag
 
 #ifdef PIPEX_DEBUG
-int pipex_debug = 0;   /* systcl net.inet.ip.pipex_debug */
+int pipex_debug = 0;   /* [A] systcl net.inet.ip.pipex_debug */
 #endif
 
 /* PPP compression == MPPE is assumed, so don't answer CCP Reset-Request. */
@@ -430,6 +435,8 @@ pipex_link_session(struct pipex_session 
 {
struct pipex_hash_head *chain;
 
+   NET_ASSERT_LOCKED();
+
if (!iface->pipexmode)
return (ENXIO);
if (pipex_lookup_by_session_id(session->protocol,
@@ -465,6 +472,7 @@ pipex_unlink_session(struct pipex_sessio
 {
session->ifindex = 0;
 
+   NET_ASSERT_LOCKED();
LIST_REMOVE(session, id_chain);
 #if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
switch (session->protocol) {



Re: pipex_iface_fini() release multicast session under NET_LOCK()

2020-07-28 Thread Vitaliy Makkoveev
On Tue, Jul 28, 2020 at 10:23:08AM +0200, Martin Pieuchot wrote:
> On 17/07/20(Fri) 16:29, Vitaliy Makkoveev wrote:
> > We are going to lock the whole pipex(4) by NET_LOCK(). So move
> > `multicast_session' freeing undet NET_LOCK() too.
> 
> pipex_iface_fini() should be called on the last reference of the
> descriptor.  So this shouldn't be necessary.  If there's an issue
> with the current order of the operations, we should certainly fix
> it differently.
> 

`multicast_session' can be processed by pipexintr() while we do
acclose(). There is no wrong order, npppd(8) or another userland
prgramm can be killed in any time.

> > Index: sys/net/pipex.c
> > ===
> > RCS file: /cvs/src/sys/net/pipex.c,v
> > retrieving revision 1.120
> > diff -u -p -r1.120 pipex.c
> > --- sys/net/pipex.c 17 Jul 2020 08:57:27 -  1.120
> > +++ sys/net/pipex.c 17 Jul 2020 13:23:16 -
> > @@ -192,8 +192,8 @@ pipex_iface_stop(struct pipex_iface_cont
> >  void
> >  pipex_iface_fini(struct pipex_iface_context *pipex_iface)
> >  {
> > -   pool_put(_session_pool, pipex_iface->multicast_session);
> > NET_LOCK();
> > +   pool_put(_session_pool, pipex_iface->multicast_session);
> > pipex_iface_stop(pipex_iface);
> > NET_UNLOCK();
> >  }
> > 
> 



pipex(4): document global data locks

2020-07-17 Thread Vitaliy Makkoveev
Subj. Also add NET_ASSERT_LOCKED() to pipex_{link,unlink,rele}_session()
to be sure they called under NET_LOCK().

Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.120
diff -u -p -r1.120 pipex.c
--- sys/net/pipex.c 17 Jul 2020 08:57:27 -  1.120
+++ sys/net/pipex.c 17 Jul 2020 14:01:10 -
@@ -83,19 +83,24 @@ struct pool pipex_session_pool;
 struct pool mppe_key_pool;
 
 /*
- * static/global variables
+ * Global data
+ * Locks used to protect global data
+ *   A   atomic operation
+ *   I   immutable after creation
+ *   N   net lock
  */
-intpipex_enable = 0;
+
+intpipex_enable = 0;   /* [N] */
 struct pipex_hash_head
-pipex_session_list,/* master session list 
*/
-pipex_close_wait_list, /* expired session list */
-pipex_peer_addr_hashtable[PIPEX_HASH_SIZE],/* peer's address hash 
*/
-pipex_id_hashtable[PIPEX_HASH_SIZE];   /* peer id hash */
+pipex_session_list,/* [N] master session 
list */
+pipex_close_wait_list, /* [N] expired session list */
+pipex_peer_addr_hashtable[PIPEX_HASH_SIZE],/* [N] peer's address 
hash */
+pipex_id_hashtable[PIPEX_HASH_SIZE];   /* [N] peer id hash */
 
-struct radix_node_head *pipex_rd_head4 = NULL;
-struct radix_node_head *pipex_rd_head6 = NULL;
+struct radix_node_head *pipex_rd_head4 = NULL; /* [N] */
+struct radix_node_head *pipex_rd_head6 = NULL; /* [N] */
 struct timeout pipex_timer_ch; /* callout timer context */
-int pipex_prune = 1;   /* walk list every seconds */
+int pipex_prune = 1;   /* [I] walk list every seconds */
 
 /* pipex traffic queue */
 struct mbuf_queue pipexinq = MBUF_QUEUE_INITIALIZER(IFQ_MAXLEN, IPL_NET);
@@ -105,7 +110,7 @@ struct mbuf_queue pipexoutq = MBUF_QUEUE
 #define ph_ppp_proto ether_vtag
 
 #ifdef PIPEX_DEBUG
-int pipex_debug = 0;   /* systcl net.inet.ip.pipex_debug */
+int pipex_debug = 0;   /* [A] systcl net.inet.ip.pipex_debug */
 #endif
 
 /* PPP compression == MPPE is assumed, so don't answer CCP Reset-Request. */
@@ -419,6 +424,8 @@ pipex_init_session(struct pipex_session 
 void
 pipex_rele_session(struct pipex_session *session)
 {
+   NET_ASSERT_LOCKED();
+
if (session->mppe_recv.old_session_keys)
pool_put(_key_pool, session->mppe_recv.old_session_keys);
pool_put(_session_pool, session);
@@ -430,6 +437,8 @@ pipex_link_session(struct pipex_session 
 {
struct pipex_hash_head *chain;
 
+   NET_ASSERT_LOCKED();
+
if (!iface->pipexmode)
return (ENXIO);
if (pipex_lookup_by_session_id(session->protocol,
@@ -463,6 +472,8 @@ pipex_link_session(struct pipex_session 
 void
 pipex_unlink_session(struct pipex_session *session)
 {
+   NET_ASSERT_LOCKED();
+
session->ifindex = 0;
 
LIST_REMOVE(session, id_chain);



ppp{ac,x}(4): document locks

2020-07-17 Thread Vitaliy Makkoveev
Subj.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.97
diff -u -p -r1.97 if_pppx.c
--- sys/net/if_pppx.c   17 Jul 2020 08:57:27 -  1.97
+++ sys/net/if_pppx.c   17 Jul 2020 13:51:14 -
@@ -115,9 +115,16 @@ int pppxdebug = 0;
 
 struct pppx_if;
 
+/*
+ * Locks used to protect struct members and global data
+ *   I   immutable after creation
+ *   K   kernel lock
+ *   N   net lock
+ */
+
 struct pppx_dev {
-   LIST_ENTRY(pppx_dev)pxd_entry;
-   int pxd_unit;
+   LIST_ENTRY(pppx_dev)pxd_entry;  /* [K] */
+   int pxd_unit;   /* [I] */
 
/* kq shizz */
struct selinfo  pxd_rsel;
@@ -127,34 +134,36 @@ struct pppx_dev {
 
/* queue of packets for userland to service - protected by splnet */
struct mbuf_queue   pxd_svcq;
-   int pxd_waiting;
-   LIST_HEAD(,pppx_if) pxd_pxis;
+   int pxd_waiting;/* [N] */
+   LIST_HEAD(,pppx_if) pxd_pxis;   /* [N] */
 };
 
-LIST_HEAD(, pppx_dev)  pppx_devs = LIST_HEAD_INITIALIZER(pppx_devs);
+LIST_HEAD(, pppx_dev)  pppx_devs =
+   LIST_HEAD_INITIALIZER(pppx_devs); /* [K] */
 struct poolpppx_if_pl;
 
 struct pppx_dev*pppx_dev_lookup(dev_t);
 struct pppx_dev*pppx_dev2pxd(dev_t);
 
 struct pppx_if_key {
-   int pxik_session_id;
-   int pxik_protocol;
+   int pxik_session_id;/* [I] */
+   int pxik_protocol;  /* [I] */
 };
 
 struct pppx_if {
-   struct pppx_if_key  pxi_key; /* must be first in the struct */
+   struct pppx_if_key  pxi_key;/* [I] must be first
+   in the struct */
 
-   RBT_ENTRY(pppx_if)  pxi_entry;
-   LIST_ENTRY(pppx_if) pxi_list;
+   RBT_ENTRY(pppx_if)  pxi_entry;  /* [N] */
+   LIST_ENTRY(pppx_if) pxi_list;   /* [N] */
 
-   int pxi_ready;
+   int pxi_ready;  /* [N] */
 
-   int pxi_unit;
+   int pxi_unit;   /* [I] */
struct ifnetpxi_if;
-   struct pppx_dev *pxi_dev;
-   struct pipex_session*pxi_session;
-   struct pipex_iface_context  pxi_ifcontext;
+   struct pppx_dev *pxi_dev;   /* [I] */
+   struct pipex_session*pxi_session;   /* [I] */
+   struct pipex_iface_context  pxi_ifcontext;  /* [N] */
 };
 
 static inline int
@@ -163,7 +172,7 @@ pppx_if_cmp(const struct pppx_if *a, con
return memcmp(>pxi_key, >pxi_key, sizeof(a->pxi_key));
 }
 
-RBT_HEAD(pppx_ifs, pppx_if)pppx_ifs = RBT_INITIALIZER(_ifs);
+RBT_HEAD(pppx_ifs, pppx_if) pppx_ifs = RBT_INITIALIZER(_ifs); /* [N] */
 RBT_PROTOTYPE(pppx_ifs, pppx_if, pxi_entry, pppx_if_cmp);
 
 intpppx_if_next_unit(void);
@@ -995,12 +1004,19 @@ RBT_GENERATE(pppx_ifs, pppx_if, pxi_entr
 
 #include 
 
+/*
+ * Locks used to protect struct members and global data
+ *   I   immutable after creation
+ *   K   kernel lock
+ *   N   net lock
+ */
+
 struct pppac_softc {
struct ifnetsc_if;
-   unsigned intsc_dead;
-   dev_t   sc_dev;
+   unsigned intsc_dead;/* [N] */
+   dev_t   sc_dev; /* [I] */
LIST_ENTRY(pppac_softc)
-   sc_entry;
+   sc_entry;   /* [K] */
 
struct mutexsc_rsel_mtx;
struct selinfo  sc_rsel;
@@ -1014,7 +1030,7 @@ struct pppac_softc {
sc_mq;
 };
 
-LIST_HEAD(pppac_list, pppac_softc);
+LIST_HEAD(pppac_list, pppac_softc);/* [K] */
 
 static voidfilt_pppac_rdetach(struct knote *);
 static int filt_pppac_read(struct knote *, long);



pipex_iface_fini() release multicast session under NET_LOCK()

2020-07-17 Thread Vitaliy Makkoveev
We are going to lock the whole pipex(4) by NET_LOCK(). So move
`multicast_session' freeing undet NET_LOCK() too.

Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.120
diff -u -p -r1.120 pipex.c
--- sys/net/pipex.c 17 Jul 2020 08:57:27 -  1.120
+++ sys/net/pipex.c 17 Jul 2020 13:23:16 -
@@ -192,8 +192,8 @@ pipex_iface_stop(struct pipex_iface_cont
 void
 pipex_iface_fini(struct pipex_iface_context *pipex_iface)
 {
-   pool_put(_session_pool, pipex_iface->multicast_session);
NET_LOCK();
+   pool_put(_session_pool, pipex_iface->multicast_session);
pipex_iface_stop(pipex_iface);
NET_UNLOCK();
 }



Re: Add missing `IFXF_CLONED' to pseudo-interfaces

2020-07-17 Thread Vitaliy Makkoveev
ping?

> On 10 Jul 2020, at 14:59, Vitaliy Makkoveev  wrote:
> 
> Some pseudo interfaces have missing `IFXF_CLONED' flag. Diff below fixes
> this.
> 
> Index: sys/net/if_ppp.c
> ===
> RCS file: /cvs/src/sys/net/if_ppp.c,v
> retrieving revision 1.114
> diff -u -p -r1.114 if_ppp.c
> --- sys/net/if_ppp.c  24 Jun 2020 22:03:42 -  1.114
> +++ sys/net/if_ppp.c  10 Jul 2020 11:57:39 -
> @@ -220,6 +220,7 @@ ppp_clone_create(struct if_clone *ifc, i
>   sc->sc_if.if_output = pppoutput;
>   sc->sc_if.if_start = ppp_ifstart;
>   sc->sc_if.if_rtrequest = p2p_rtrequest;
> + sc->sc_if.if_xflags = IFXF_CLONED;
>   IFQ_SET_MAXLEN(>sc_if.if_snd, IFQ_MAXLEN);
>   mq_init(>sc_inq, IFQ_MAXLEN, IPL_NET);
>   ppp_pkt_list_init(>sc_rawq, IFQ_MAXLEN);
> Index: sys/net/if_pppoe.c
> ===
> RCS file: /cvs/src/sys/net/if_pppoe.c,v
> retrieving revision 1.68
> diff -u -p -r1.68 if_pppoe.c
> --- sys/net/if_pppoe.c16 Jun 2019 00:10:37 -  1.68
> +++ sys/net/if_pppoe.c10 Jul 2020 11:57:39 -
> @@ -210,6 +210,7 @@ pppoe_clone_create(struct if_clone *ifc,
>   sc->sc_sppp.pp_if.if_ioctl = pppoe_ioctl;
>   sc->sc_sppp.pp_if.if_start = pppoe_start;
>   sc->sc_sppp.pp_if.if_rtrequest = p2p_rtrequest;
> + sc->sc_sppp.pp_if.if_xflags = IFXF_CLONED;
>   sc->sc_sppp.pp_tls = pppoe_tls;
>   sc->sc_sppp.pp_tlf = pppoe_tlf;
>   IFQ_SET_MAXLEN(>sc_sppp.pp_if.if_snd, IFQ_MAXLEN);
> Index: sys/net/if_switch.c
> ===
> RCS file: /cvs/src/sys/net/if_switch.c,v
> retrieving revision 1.30
> diff -u -p -r1.30 if_switch.c
> --- sys/net/if_switch.c   6 Nov 2019 03:51:26 -   1.30
> +++ sys/net/if_switch.c   10 Jul 2020 11:57:39 -
> @@ -159,6 +159,7 @@ switch_clone_create(struct if_clone *ifc
>   ifp->if_start = NULL;
>   ifp->if_type = IFT_BRIDGE;
>   ifp->if_hdrlen = ETHER_HDR_LEN;
> + ifp->if_xflags = IFXF_CLONED;
>   TAILQ_INIT(>sc_swpo_list);
> 
>   sc->sc_unit = unit;
> Index: sys/net/if_trunk.c
> ===
> RCS file: /cvs/src/sys/net/if_trunk.c,v
> retrieving revision 1.146
> diff -u -p -r1.146 if_trunk.c
> --- sys/net/if_trunk.c17 Jun 2020 06:45:22 -  1.146
> +++ sys/net/if_trunk.c10 Jul 2020 11:57:39 -
> @@ -184,6 +184,7 @@ trunk_clone_create(struct if_clone *ifc,
>   ifp->if_ioctl = trunk_ioctl;
>   ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
>   ifp->if_capabilities = trunk_capabilities(tr);
> + ifp->if_xflags = IFXF_CLONED;
> 
>   snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
>   ifc->ifc_name, unit);
> Index: sys/net/if_tun.c
> ===
> RCS file: /cvs/src/sys/net/if_tun.c,v
> retrieving revision 1.222
> diff -u -p -r1.222 if_tun.c
> --- sys/net/if_tun.c  13 May 2020 00:48:06 -  1.222
> +++ sys/net/if_tun.c  10 Jul 2020 11:57:39 -
> @@ -236,6 +236,7 @@ tun_create(struct if_clone *ifc, int uni
>   ifp->if_hardmtu = TUNMRU;
>   ifp->if_link_state = LINK_STATE_DOWN;
>   IFQ_SET_MAXLEN(>if_snd, IFQ_MAXLEN);
> + ifp->if_xflags = IFXF_CLONED;
> 
>   if_counters_alloc(ifp);
> 
> Index: sys/net/if_vether.c
> ===
> RCS file: /cvs/src/sys/net/if_vether.c,v
> retrieving revision 1.30
> diff -u -p -r1.30 if_vether.c
> --- sys/net/if_vether.c   9 Jan 2018 15:24:24 -   1.30
> +++ sys/net/if_vether.c   10 Jul 2020 11:57:39 -
> @@ -88,6 +88,7 @@ vether_clone_create(struct if_clone *ifc
> 
>   ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
>   ifp->if_capabilities = IFCAP_VLAN_MTU;
> + ifp->if_xflags = IFXF_CLONED;
> 
>   ifmedia_init(>sc_media, 0, vether_media_change,
>   vether_media_status);
> Index: sys/net/if_vxlan.c
> ===
> RCS file: /cvs/src/sys/net/if_vxlan.c,v
> retrieving revision 1.77
> diff -u -p -r1.77 if_vxlan.c
> --- sys/net/if_vxlan.c12 Apr 2020 11:56:52 -  1.77
> +++ sys/net/if_vxlan.c10 Jul 2020 11:57:39 -
> @@ -155,6 +155,7 @@ vxlan_clone_create(struct if_clone *ifc,
> 
>   ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
>   ifp->if_capabilities = IFCAP_VLAN_MTU;
> + ifp->if_xflags = IFXF_CLONED;
> 
>   ifmedia_init(>sc_media, 0, vxlan_media_change,
>   vxlan_media_status);



Re: Add missing `IFXF_CLONED' to pseudo-interfaces

2020-07-17 Thread Vitaliy Makkoveev
anyone?

On Fri, Jul 10, 2020 at 02:59:55PM +0300, Vitaliy Makkoveev wrote:
> Some pseudo interfaces have missing `IFXF_CLONED' flag. Diff below fixes
> this.
> 
> Index: sys/net/if_ppp.c
> ===
> RCS file: /cvs/src/sys/net/if_ppp.c,v
> retrieving revision 1.114
> diff -u -p -r1.114 if_ppp.c
> --- sys/net/if_ppp.c  24 Jun 2020 22:03:42 -  1.114
> +++ sys/net/if_ppp.c  10 Jul 2020 11:57:39 -
> @@ -220,6 +220,7 @@ ppp_clone_create(struct if_clone *ifc, i
>   sc->sc_if.if_output = pppoutput;
>   sc->sc_if.if_start = ppp_ifstart;
>   sc->sc_if.if_rtrequest = p2p_rtrequest;
> + sc->sc_if.if_xflags = IFXF_CLONED;
>   IFQ_SET_MAXLEN(>sc_if.if_snd, IFQ_MAXLEN);
>   mq_init(>sc_inq, IFQ_MAXLEN, IPL_NET);
>   ppp_pkt_list_init(>sc_rawq, IFQ_MAXLEN);
> Index: sys/net/if_pppoe.c
> ===
> RCS file: /cvs/src/sys/net/if_pppoe.c,v
> retrieving revision 1.68
> diff -u -p -r1.68 if_pppoe.c
> --- sys/net/if_pppoe.c16 Jun 2019 00:10:37 -  1.68
> +++ sys/net/if_pppoe.c10 Jul 2020 11:57:39 -
> @@ -210,6 +210,7 @@ pppoe_clone_create(struct if_clone *ifc,
>   sc->sc_sppp.pp_if.if_ioctl = pppoe_ioctl;
>   sc->sc_sppp.pp_if.if_start = pppoe_start;
>   sc->sc_sppp.pp_if.if_rtrequest = p2p_rtrequest;
> + sc->sc_sppp.pp_if.if_xflags = IFXF_CLONED;
>   sc->sc_sppp.pp_tls = pppoe_tls;
>   sc->sc_sppp.pp_tlf = pppoe_tlf;
>   IFQ_SET_MAXLEN(>sc_sppp.pp_if.if_snd, IFQ_MAXLEN);
> Index: sys/net/if_switch.c
> ===
> RCS file: /cvs/src/sys/net/if_switch.c,v
> retrieving revision 1.30
> diff -u -p -r1.30 if_switch.c
> --- sys/net/if_switch.c   6 Nov 2019 03:51:26 -   1.30
> +++ sys/net/if_switch.c   10 Jul 2020 11:57:39 -
> @@ -159,6 +159,7 @@ switch_clone_create(struct if_clone *ifc
>   ifp->if_start = NULL;
>   ifp->if_type = IFT_BRIDGE;
>   ifp->if_hdrlen = ETHER_HDR_LEN;
> + ifp->if_xflags = IFXF_CLONED;
>   TAILQ_INIT(>sc_swpo_list);
>  
>   sc->sc_unit = unit;
> Index: sys/net/if_trunk.c
> ===
> RCS file: /cvs/src/sys/net/if_trunk.c,v
> retrieving revision 1.146
> diff -u -p -r1.146 if_trunk.c
> --- sys/net/if_trunk.c17 Jun 2020 06:45:22 -  1.146
> +++ sys/net/if_trunk.c10 Jul 2020 11:57:39 -
> @@ -184,6 +184,7 @@ trunk_clone_create(struct if_clone *ifc,
>   ifp->if_ioctl = trunk_ioctl;
>   ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
>   ifp->if_capabilities = trunk_capabilities(tr);
> + ifp->if_xflags = IFXF_CLONED;
>  
>   snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
>   ifc->ifc_name, unit);
> Index: sys/net/if_tun.c
> ===
> RCS file: /cvs/src/sys/net/if_tun.c,v
> retrieving revision 1.222
> diff -u -p -r1.222 if_tun.c
> --- sys/net/if_tun.c  13 May 2020 00:48:06 -  1.222
> +++ sys/net/if_tun.c  10 Jul 2020 11:57:39 -
> @@ -236,6 +236,7 @@ tun_create(struct if_clone *ifc, int uni
>   ifp->if_hardmtu = TUNMRU;
>   ifp->if_link_state = LINK_STATE_DOWN;
>   IFQ_SET_MAXLEN(>if_snd, IFQ_MAXLEN);
> + ifp->if_xflags = IFXF_CLONED;
>  
>   if_counters_alloc(ifp);
>  
> Index: sys/net/if_vether.c
> ===
> RCS file: /cvs/src/sys/net/if_vether.c,v
> retrieving revision 1.30
> diff -u -p -r1.30 if_vether.c
> --- sys/net/if_vether.c   9 Jan 2018 15:24:24 -   1.30
> +++ sys/net/if_vether.c   10 Jul 2020 11:57:39 -
> @@ -88,6 +88,7 @@ vether_clone_create(struct if_clone *ifc
>  
>   ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
>   ifp->if_capabilities = IFCAP_VLAN_MTU;
> + ifp->if_xflags = IFXF_CLONED;
>  
>   ifmedia_init(>sc_media, 0, vether_media_change,
>   vether_media_status);
> Index: sys/net/if_vxlan.c
> ===
> RCS file: /cvs/src/sys/net/if_vxlan.c,v
> retrieving revision 1.77
> diff -u -p -r1.77 if_vxlan.c
> --- sys/net/if_vxlan.c12 Apr 2020 11:56:52 -  1.77
> +++ sys/net/if_vxlan.c10 Jul 2020 11:57:39 -
> @@ -155,6 +155,7 @@ vxlan_clone_create(struct if_clone *ifc,
>  
>   ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
>   ifp->if_capabilities = IFCAP_VLAN_MTU;
> + ifp->if_xflags = IFXF_CLONED;
>  
>   ifmedia_init(>sc_media, 0, vxlan_media_change,
>   vxlan_media_status);



pipex(4): use interface indexes (if_get(9)) instead of pointers

2020-07-16 Thread Vitaliy Makkoveev
Interface index 0 is never associated with interface descriptor. So
we can assign this value to session's interface index before destroy
corresponding `ifnet'. It's safe to use indexes instead of pointers to
`ifnet' in pipex(4).

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.96
diff -u -p -r1.96 if_pppx.c
--- sys/net/if_pppx.c   15 Jul 2020 13:02:44 -  1.96
+++ sys/net/if_pppx.c   16 Jul 2020 11:19:21 -
@@ -652,9 +652,6 @@ pppx_add_session(struct pppx_dev *pxd, s
ifp = >pxi_if;
 
pxi->pxi_session = session;
-   /* fake a pipex interface context */
-   pxi->pxi_ifcontext.ifnet_this = ifp;
-   pxi->pxi_ifcontext.pipexmode = PIPEX_ENABLED;
 
/* try to set the interface up */
unit = pppx_if_next_unit();
@@ -687,10 +684,6 @@ pppx_add_session(struct pppx_dev *pxd, s
ifp->if_softc = pxi;
/* ifp->if_rdomain = req->pr_rdomain; */
 
-   error = pipex_link_session(session, >pxi_ifcontext);
-   if (error)
-   goto remove;
-
/* XXXSMP breaks atomicity */
NET_UNLOCK();
if_attach(ifp);
@@ -702,7 +695,6 @@ pppx_add_session(struct pppx_dev *pxd, s
 #if NBPFILTER > 0
bpfattach(>if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
 #endif
-   SET(ifp->if_flags, IFF_RUNNING);
 
/* XXX ipv6 support?  how does the caller indicate it wants ipv6
 * instead of ipv4?
@@ -740,11 +732,26 @@ pppx_add_session(struct pppx_dev *pxd, s
} else {
if_addrhooks_run(ifp);
}
+
+   /* fake a pipex interface context */
+   pxi->pxi_ifcontext.ifindex = ifp->if_index;
+   pxi->pxi_ifcontext.pipexmode = PIPEX_ENABLED;
+
+   error = pipex_link_session(session, >pxi_ifcontext);
+   if (error)
+   goto detach;
+
+   SET(ifp->if_flags, IFF_RUNNING);
pxi->pxi_ready = 1;
 
return (error);
 
-remove:
+detach:
+   /* XXXSMP breaks atomicity */
+   NET_UNLOCK();
+   if_detach(ifp);
+   NET_LOCK();
+
if (RBT_REMOVE(pppx_ifs, _ifs, pxi) == NULL)
panic("%s: inconsistent RB tree", __func__);
LIST_REMOVE(pxi, pxi_list);
@@ -1100,7 +1107,7 @@ pppacopen(dev_t dev, int flags, int mode
bpfattach(>if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
 #endif
 
-   pipex_iface_init(>sc_pipex_iface, ifp);
+   pipex_iface_init(>sc_pipex_iface, ifp->if_index);
 
return (0);
 }
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.119
diff -u -p -r1.119 pipex.c
--- sys/net/pipex.c 6 Jul 2020 20:37:51 -   1.119
+++ sys/net/pipex.c 16 Jul 2020 11:19:21 -
@@ -140,12 +140,12 @@ pipex_init(void)
 }
 
 void
-pipex_iface_init(struct pipex_iface_context *pipex_iface, struct ifnet *ifp)
+pipex_iface_init(struct pipex_iface_context *pipex_iface, int ifindex)
 {
struct pipex_session *session;
 
pipex_iface->pipexmode = 0;
-   pipex_iface->ifnet_this = ifp;
+   pipex_iface->ifindex = ifindex;
 
if (pipex_rd_head4 == NULL) {
if (!rn_inithead((void **)_rd_head4,
@@ -162,6 +162,7 @@ pipex_iface_init(struct pipex_iface_cont
session = pool_get(_session_pool, PR_WAITOK | PR_ZERO);
session->is_multicast = 1;
session->pipex_iface = pipex_iface;
+   session->ifindex = ifindex;
pipex_iface->multicast_session = session;
 }
 
@@ -436,6 +437,7 @@ pipex_link_session(struct pipex_session 
return (EEXIST);
 
session->pipex_iface = iface;
+   session->ifindex = iface->ifindex;
 
LIST_INSERT_HEAD(_session_list, session, session_list);
chain = PIPEX_ID_HASHTABLE(session->session_id);
@@ -461,6 +463,8 @@ pipex_link_session(struct pipex_session 
 void
 pipex_unlink_session(struct pipex_session *session)
 {
+   session->ifindex = 0;
+
LIST_REMOVE(session, id_chain);
 #if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
switch (session->protocol) {
@@ -916,10 +920,12 @@ pipex_ip_output(struct mbuf *m0, struct 
int is_idle;
struct ifnet *ifp;
 
-   /* output succeed here as a interface */
-   ifp = session->pipex_iface->ifnet_this;
-   ifp->if_opackets++;
-   ifp->if_obytes+=m0->m_pkthdr.len;
+   if ((ifp = if_get(session->ifindex)) != NULL) {
+   /* output succeed here as a interface */
+   ifp->if_opackets++;
+   ifp->if_obytes+=m0->m_pkthdr.len;
+   }
+   if_put(ifp);
 
if (session->is_multicast == 0) {
/*
@@ -1038,9 +1044,13 @@ pipex_ppp_input(struct mbuf *m0, struct 
 
 #if NBPFILTER > 0
{
-   struct ifnet *ifp = session->pipex_iface->ifnet_this;
-   if (ifp->if_bpf && ifp->if_type == IFT_PPP)
-   

Re: pppac(4): fix races in pppacopen()

2020-07-13 Thread Vitaliy Makkoveev
Forget please about previous diff.

Except ac_ioctl() the only function which can have race with
pppacclose() is pppacopen(), but since `sc' is still linked to
`pppac_devs' list we can't reopen dying `sc'. So the only race is
pppacopen() vs pppacopen().

We only need to malloc(9) before pppac_lookup() to fix this race.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.95
diff -u -p -r1.95 if_pppx.c
--- sys/net/if_pppx.c   10 Jul 2020 13:26:42 -  1.95
+++ sys/net/if_pppx.c   13 Jul 2020 23:35:20 -
@@ -1062,11 +1062,12 @@ pppacopen(dev_t dev, int flags, int mode
struct pppac_softc *sc;
struct ifnet *ifp;
 
-   sc = pppac_lookup(dev);
-   if (sc != NULL)
+   sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+   if (pppac_lookup(dev) != NULL) {
+   free(sc, M_DEVBUF, sizeof(*sc));
return (EBUSY);
+   }
 
-   sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
sc->sc_dev = dev;
 
mtx_init(>sc_rsel_mtx, IPL_SOFTNET);



Re: pppac(4): fix races in pppacopen()

2020-07-13 Thread Vitaliy Makkoveev
On Mon, Jul 13, 2020 at 09:39:38AM +0200, Martin Pieuchot wrote:
> On 11/07/20(Sat) 23:51, Vitaliy Makkoveev wrote:
> > [...] 
> > The way you suggest to go is to introduce rwlock and serialize
> > pppacopen() and pppacclose(). This is bad idea because we will sleep
> > while we are holding rwlock.
> 
> That's the whole point of a rwlock to be able to sleep while holding the
> lock.  The goal is to prevent any other thread coming from userland to
> enter any code path leading to the same data structure.
> 
> This is the same as what the KERNEL_LOCK() was supposed to do assuming
> there where no sleeping point in if_attach() and pipex_iface_init().
> 
> >  Also this is bad idea because you should
> > prevent access to `sc' which is being destroyed because you can grab it
> > by concurrent thread.
> 
> Which data structure is the other thread using to get a reference on `sc'?
> 
> If the data structure is protected by the rwlock, like I suggest for
> ifunit(), there's no problem.  If it is protected by the KERNEL_LOCK()
> then any sleeping point can lead to a race.  That's why we're doing such
> changes.
> 
> In case of a data structure protected by the KERNEL_LOCK() the easiest
> way to deal with sleeping point is to re-check when coming back to
> sleep.  This works well if the sleeping point is not deep into another
> layer. 
> 
> Another way is to have a per-driver lock or serialization mechanism.
> 
> >   You must serialize *all* access to this `sc'
> > elsewhere your "protection" is useless.
> 
> The question is not access to `sc' is access to which global data
> structure having a reference to this `sc'?  If the data structure is
> common to all the network stack, like ifunit()'s then we should look a
> for solution for all the network stack with all the usages of the list.
> That includes many driver's *open() functions, the cloning ioctls, etc. 
> 
> If the data structure is per-driver then the locking/serialization
> mechanism is per-driver.
> 
> > pppx(4) had no problems with unit protection. Also it had no problems
> > to access incomplete `pxi'. Now pppx(4) has fixed access to `pxi' which
> > is being destroyed. And this is the way to go in pppac(4) layer too.
> > 
> > We have pppx_dev2pxd() to obtain `pxd'. While we adding extra check to
> > pppx_dev2pxd() this is not system wide. Also pppac(4) already has
> > `sc_dead' to prevent concurrent pppac_ioctl() access to dying `sc'. You
> > suggest to serialize pppac_ioctl() too?
> 
> The way `sc_dead' is used in other drivers is a way to prevent
> per-driver ioctl(2) while a pseudo-device is being detached.  It assumes
> the NET_LOCK() is held around pseuo-device ioctl(2) so the flag is
> protected (but not documented by the NET_LOCK().  If you want to do the
> same go for it, but please do not add another meaning to the same mechanism.
> Having drivers that work similarly reduces maintains effort. 
>

This time `sc_dead' is used only to prevent `IFF_UP' and `IFF_RUNNING'
bits modification. 

 cut begin 
1372 pppac_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1373 {
1374 struct pppac_softc *sc = ifp->if_softc;
1375 /* struct ifreq *ifr = (struct ifreq *)data; */
1376 int error = 0;
1377 
1378 if (sc->sc_dead)
1379 return (ENXIO);
1380 
1381 switch (cmd) {
1382 case SIOCSIFADDR:
1383 SET(ifp->if_flags, IFF_UP); /* XXX cry cry */
1384 /* FALLTHROUGH */
1385 case SIOCSIFFLAGS:
1386 if (ISSET(ifp->if_flags, IFF_UP))
1387 SET(ifp->if_flags, IFF_RUNNING);
1388 else
1389 CLR(ifp->if_flags, IFF_RUNNING);
1390 break;
1391 case SIOCSIFMTU:
1392 break;
 cut end 

And the only reason for is the order in pppacclose(). Since we don't
destroy `ifp' by if_detach(), we can do detachment as first action.

 cut begin 
1345 pppacclose(dev_t dev, int flags, int mode, struct proc *p)
1346 {
1347 struct pppac_softc *sc = pppac_lookup(dev);
1348 struct ifnet *ifp = >sc_if;
1349 int s;
1350 
1351 NET_LOCK();
1352 sc->sc_dead = 1;
1353 CLR(ifp->if_flags, IFF_RUNNING);
1354 NET_UNLOCK();
1355 
1356 s = splhigh();
1357 klist_invalidate(>sc_rsel.si_note);
1358 klist_invalidate(>sc_wsel.si_note);
1359 splx(s);
1360 
1361 pipex_iface_fini(>sc_pipex_iface);
1362 
1363 if_detach(ifp);
1364 
1365 LIST_REMOVE(sc, sc_entry);
1366 free(sc, M_DEVBUF, sizeof(*sc));
1367 
1368 return (0);
1369 }
-

Re: fix races in if_clone_create()

2020-07-13 Thread Vitaliy Makkoveev
On Mon, Jul 13, 2020 at 12:52:15PM +0300, Vitaliy Makkoveev wrote:
> On Mon, Jul 13, 2020 at 09:53:44AM +0200, Martin Pieuchot wrote:
> > On 06/07/20(Mon) 15:44, Vitaliy Makkoveev wrote:
> > > > On 6 Jul 2020, at 12:17, Martin Pieuchot  wrote:
> > > > Assertions and documentation are more important than preventing races
> > > > because they allow to build awareness and elegant solutions instead of
> > > > hacking diffs until stuff work without knowing why.
> > > > 
> > > > There are two cases where `ifp' are inserted into `ifnet':
> > > > 1. by autoconf during boot or hotplug
> > > > 2. by cloning ioctl
> > > > 
> > > > In the second case it is always about pseudo-devices.  So the assertion
> > > > should be conditional like:
> > > > 
> > > > if (ISSET(ifp->if_xflags, IFXF_CLONED))
> > > > rw_assert_wrlock(_lock);
> > > > 
> > > > In other words this fixes serializes insertions/removal on the global
> > > > list `ifnet', the KERNEL_LOCK() being still required for reading it.
> > > > 
> > > > Is there any other data structure which ends up being protected by this
> > > > approach and could be documented?
> > > 
> > > We should be sure there is no multiple `ifnet’s in `if_list’ with the same
> > > `if_xname’.
> > 
> > That's a symptom of a bug.  Checking for a symptom won't prevent another
> > type of corruption, maybe next time it will be a corrupted pointer?
> 
> Absolutely no. You don't break the list do you understand this?
> 
> > 
> > > And the assertion you proposed looks not obvious here.
> > 
> > Why, is it because of the if() check?  That's required unless we change
> > put all if_attach() functions under the lock which would require changing
> > all driver in-tree.  However since drivers for physical devices are being
> > attached without having multiple CPUs running there's no possible race.
> 
> Because we should keep `if_list’ be linked by objects with unique
> `if_xname’. The modificatios of this list is not the problem. Problem is
> inconsistency caused by not unique `if_xname'. You are not fixing
> problem, just simptom.
> 
> > 
> > > Assertion like below looks more reasonable but introduces performance
> > > impact.
> > 
> > We should first aim for correctness then performance.  In this case,
> > performance is not even an issue because interfaces are not created
> > often compared to the rate of processing packets.
> >
> 
> Well, so let's do "KASSERT(ifunit(ifp->if_xname) == NULL);" here.

Updated diff below.
Now we have assertion in if_attach{,head}().

Index: sys/net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.612
diff -u -p -r1.612 if.c
--- sys/net/if.c10 Jul 2020 13:23:34 -  1.612
+++ sys/net/if.c13 Jul 2020 10:50:44 -
@@ -155,6 +155,8 @@ int if_getgrouplist(caddr_t);
 void   if_linkstate(struct ifnet *);
 void   if_linkstate_task(void *);
 
+intif_clone_lock(struct if_clone *);
+void   if_clone_unlock(struct if_clone *);
 intif_clone_list(struct if_clonereq *);
 struct if_clone*if_clone_lookup(const char *, int *);
 
@@ -524,6 +526,7 @@ if_attachhead(struct ifnet *ifp)
 {
if_attach_common(ifp);
NET_LOCK();
+   KASSERT(ifunit(ifp->if_xname) == NULL);
TAILQ_INSERT_HEAD(, ifp, if_list);
if_attachsetup(ifp);
NET_UNLOCK();
@@ -534,6 +537,7 @@ if_attach(struct ifnet *ifp)
 {
if_attach_common(ifp);
NET_LOCK();
+   KASSERT(ifunit(ifp->if_xname) == NULL);
TAILQ_INSERT_TAIL(, ifp, if_list);
if_attachsetup(ifp);
NET_UNLOCK();
@@ -1244,27 +1248,35 @@ if_clone_create(const char *name, int rd
 {
struct if_clone *ifc;
struct ifnet *ifp;
-   int unit, ret;
+   int unit, error;
 
ifc = if_clone_lookup(name, );
if (ifc == NULL)
return (EINVAL);
 
-   if (ifunit(name) != NULL)
-   return (EEXIST);
+   error = if_clone_lock(ifc);
+   if (error != 0)
+   return (error);
+
+   if (ifunit(name) != NULL) {
+   error = (EEXIST);
+   goto unlock;
+   }
 
-   ret = (*ifc->ifc_create)(ifc, unit);
+   error = (*ifc->ifc_create)(ifc, unit);
 
-   if (ret != 0 || (ifp = ifunit(name)) == NULL)
-   return (ret);
+   if (error != 0 || (ifp = ifunit(name)) == NULL)
+   goto unlock;
 
NET_LOCK();
if_addgroup(ifp, ifc->ifc_name);
 

Re: Add missing `IFXF_CLONED' to pseudo-interfaces

2020-07-13 Thread Vitaliy Makkoveev
ping?

On Fri, Jul 10, 2020 at 02:59:55PM +0300, Vitaliy Makkoveev wrote:
> Some pseudo interfaces have missing `IFXF_CLONED' flag. Diff below fixes
> this.
> 
> Index: sys/net/if_ppp.c
> ===
> RCS file: /cvs/src/sys/net/if_ppp.c,v
> retrieving revision 1.114
> diff -u -p -r1.114 if_ppp.c
> --- sys/net/if_ppp.c  24 Jun 2020 22:03:42 -  1.114
> +++ sys/net/if_ppp.c  10 Jul 2020 11:57:39 -
> @@ -220,6 +220,7 @@ ppp_clone_create(struct if_clone *ifc, i
>   sc->sc_if.if_output = pppoutput;
>   sc->sc_if.if_start = ppp_ifstart;
>   sc->sc_if.if_rtrequest = p2p_rtrequest;
> + sc->sc_if.if_xflags = IFXF_CLONED;
>   IFQ_SET_MAXLEN(>sc_if.if_snd, IFQ_MAXLEN);
>   mq_init(>sc_inq, IFQ_MAXLEN, IPL_NET);
>   ppp_pkt_list_init(>sc_rawq, IFQ_MAXLEN);
> Index: sys/net/if_pppoe.c
> ===
> RCS file: /cvs/src/sys/net/if_pppoe.c,v
> retrieving revision 1.68
> diff -u -p -r1.68 if_pppoe.c
> --- sys/net/if_pppoe.c16 Jun 2019 00:10:37 -  1.68
> +++ sys/net/if_pppoe.c10 Jul 2020 11:57:39 -
> @@ -210,6 +210,7 @@ pppoe_clone_create(struct if_clone *ifc,
>   sc->sc_sppp.pp_if.if_ioctl = pppoe_ioctl;
>   sc->sc_sppp.pp_if.if_start = pppoe_start;
>   sc->sc_sppp.pp_if.if_rtrequest = p2p_rtrequest;
> + sc->sc_sppp.pp_if.if_xflags = IFXF_CLONED;
>   sc->sc_sppp.pp_tls = pppoe_tls;
>   sc->sc_sppp.pp_tlf = pppoe_tlf;
>   IFQ_SET_MAXLEN(>sc_sppp.pp_if.if_snd, IFQ_MAXLEN);
> Index: sys/net/if_switch.c
> ===
> RCS file: /cvs/src/sys/net/if_switch.c,v
> retrieving revision 1.30
> diff -u -p -r1.30 if_switch.c
> --- sys/net/if_switch.c   6 Nov 2019 03:51:26 -   1.30
> +++ sys/net/if_switch.c   10 Jul 2020 11:57:39 -
> @@ -159,6 +159,7 @@ switch_clone_create(struct if_clone *ifc
>   ifp->if_start = NULL;
>   ifp->if_type = IFT_BRIDGE;
>   ifp->if_hdrlen = ETHER_HDR_LEN;
> + ifp->if_xflags = IFXF_CLONED;
>   TAILQ_INIT(>sc_swpo_list);
>  
>   sc->sc_unit = unit;
> Index: sys/net/if_trunk.c
> ===
> RCS file: /cvs/src/sys/net/if_trunk.c,v
> retrieving revision 1.146
> diff -u -p -r1.146 if_trunk.c
> --- sys/net/if_trunk.c17 Jun 2020 06:45:22 -  1.146
> +++ sys/net/if_trunk.c10 Jul 2020 11:57:39 -
> @@ -184,6 +184,7 @@ trunk_clone_create(struct if_clone *ifc,
>   ifp->if_ioctl = trunk_ioctl;
>   ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
>   ifp->if_capabilities = trunk_capabilities(tr);
> + ifp->if_xflags = IFXF_CLONED;
>  
>   snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
>   ifc->ifc_name, unit);
> Index: sys/net/if_tun.c
> ===
> RCS file: /cvs/src/sys/net/if_tun.c,v
> retrieving revision 1.222
> diff -u -p -r1.222 if_tun.c
> --- sys/net/if_tun.c  13 May 2020 00:48:06 -  1.222
> +++ sys/net/if_tun.c  10 Jul 2020 11:57:39 -
> @@ -236,6 +236,7 @@ tun_create(struct if_clone *ifc, int uni
>   ifp->if_hardmtu = TUNMRU;
>   ifp->if_link_state = LINK_STATE_DOWN;
>   IFQ_SET_MAXLEN(>if_snd, IFQ_MAXLEN);
> + ifp->if_xflags = IFXF_CLONED;
>  
>   if_counters_alloc(ifp);
>  
> Index: sys/net/if_vether.c
> ===
> RCS file: /cvs/src/sys/net/if_vether.c,v
> retrieving revision 1.30
> diff -u -p -r1.30 if_vether.c
> --- sys/net/if_vether.c   9 Jan 2018 15:24:24 -   1.30
> +++ sys/net/if_vether.c   10 Jul 2020 11:57:39 -
> @@ -88,6 +88,7 @@ vether_clone_create(struct if_clone *ifc
>  
>   ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
>   ifp->if_capabilities = IFCAP_VLAN_MTU;
> + ifp->if_xflags = IFXF_CLONED;
>  
>   ifmedia_init(>sc_media, 0, vether_media_change,
>   vether_media_status);
> Index: sys/net/if_vxlan.c
> ===
> RCS file: /cvs/src/sys/net/if_vxlan.c,v
> retrieving revision 1.77
> diff -u -p -r1.77 if_vxlan.c
> --- sys/net/if_vxlan.c12 Apr 2020 11:56:52 -  1.77
> +++ sys/net/if_vxlan.c10 Jul 2020 11:57:39 -
> @@ -155,6 +155,7 @@ vxlan_clone_create(struct if_clone *ifc,
>  
>   ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
>   ifp->if_capabilities = IFCAP_VLAN_MTU;
> + ifp->if_xflags = IFXF_CLONED;
>  
>   ifmedia_init(>sc_media, 0, vxlan_media_change,
>   vxlan_media_status);



Re: fix races in if_clone_create()

2020-07-13 Thread Vitaliy Makkoveev
On Mon, Jul 13, 2020 at 09:53:44AM +0200, Martin Pieuchot wrote:
> On 06/07/20(Mon) 15:44, Vitaliy Makkoveev wrote:
> > > On 6 Jul 2020, at 12:17, Martin Pieuchot  wrote:
> > > Assertions and documentation are more important than preventing races
> > > because they allow to build awareness and elegant solutions instead of
> > > hacking diffs until stuff work without knowing why.
> > > 
> > > There are two cases where `ifp' are inserted into `ifnet':
> > > 1. by autoconf during boot or hotplug
> > > 2. by cloning ioctl
> > > 
> > > In the second case it is always about pseudo-devices.  So the assertion
> > > should be conditional like:
> > > 
> > >   if (ISSET(ifp->if_xflags, IFXF_CLONED))
> > >   rw_assert_wrlock(_lock);
> > > 
> > > In other words this fixes serializes insertions/removal on the global
> > > list `ifnet', the KERNEL_LOCK() being still required for reading it.
> > > 
> > > Is there any other data structure which ends up being protected by this
> > > approach and could be documented?
> > 
> > We should be sure there is no multiple `ifnet’s in `if_list’ with the same
> > `if_xname’.
> 
> That's a symptom of a bug.  Checking for a symptom won't prevent another
> type of corruption, maybe next time it will be a corrupted pointer?

Absolutely no. You don't break the list do you understand this?

> 
> > And the assertion you proposed looks not obvious here.
> 
> Why, is it because of the if() check?  That's required unless we change
> put all if_attach() functions under the lock which would require changing
> all driver in-tree.  However since drivers for physical devices are being
> attached without having multiple CPUs running there's no possible race.

Because we should keep `if_list’ be linked by objects with unique
`if_xname’. The modificatios of this list is not the problem. Problem is
inconsistency caused by not unique `if_xname'. You are not fixing
problem, just simptom.

> 
> > Assertion like below looks more reasonable but introduces performance
> > impact.
> 
> We should first aim for correctness then performance.  In this case,
> performance is not even an issue because interfaces are not created
> often compared to the rate of processing packets.
>

Well, so let's do "KASSERT(ifunit(ifp->if_xname) == NULL);" here.



Re: softraid_crypto: add size to free call

2020-07-12 Thread Vitaliy Makkoveev
ok mvs@

> On 13 Jul 2020, at 01:22, Klemens Nanni  wrote:
> 
> On Sun, Jul 12, 2020 at 10:31:49PM +0300, Vitaliy Makkoveev wrote:
>> I like to have "sizeof(*omi)" in corresponding malloc(9) too.
>> 
>>  cut begin 
>> 827 omi = malloc(sizeof(struct sr_meta_opt_item), M_DEVBUF,
>> 828 M_WAITOK | M_ZERO);
>>  cut end 
> If you prefer to have malloc() and free() use the same idiom, I can
> commit the diff below, otherwise I'd refrain from changing existing code
> for this alone to avoid churn.
> 
> Feedback? OK?
> 
> Index: dev/softraid_crypto.c
> ===
> RCS file: /cvs/src/sys/dev/softraid_crypto.c,v
> retrieving revision 1.138
> diff -u -p -r1.138 softraid_crypto.c
> --- dev/softraid_crypto.c 4 Jul 2019 18:09:17 -   1.138
> +++ dev/softraid_crypto.c 12 Jul 2020 22:21:09 -
> @@ -882,7 +882,7 @@ done:
>   for (omi = SLIST_FIRST(); omi != NULL; omi = omi_next) {
>   omi_next = SLIST_NEXT(omi, omi_link);
>   free(omi->omi_som, M_DEVBUF, 0);
> - free(omi, M_DEVBUF, 0);
> + free(omi, M_DEVBUF, sizeof(struct sr_meta_opt_item));
>   }
> 
>   free(sm, M_DEVBUF, SR_META_SIZE * DEV_BSIZE);
> 



Re: softraid_crypto: add size to free call

2020-07-12 Thread Vitaliy Makkoveev
On Sun, Jul 12, 2020 at 08:51:08PM +0200, Klemens Nanni wrote:
> While omi->omi_som seems variable in size, omi is only ever allocated
> with one size and softraid.c uses the same size for free(9) as well.
> 
> Tested with cryto softraid and keydisk.
> 
> Feedback? OK?
> 
> 
> Index: dev/softraid_crypto.c
> ===
> RCS file: /cvs/src/sys/dev/softraid_crypto.c,v
> retrieving revision 1.138
> diff -u -p -r1.138 softraid_crypto.c
> --- dev/softraid_crypto.c 4 Jul 2019 18:09:17 -   1.138
> +++ dev/softraid_crypto.c 12 Jul 2020 18:00:29 -
> @@ -882,7 +882,7 @@ done:
>   for (omi = SLIST_FIRST(); omi != NULL; omi = omi_next) {
>   omi_next = SLIST_NEXT(omi, omi_link);
>   free(omi->omi_som, M_DEVBUF, 0);
> - free(omi, M_DEVBUF, 0);
> + free(omi, M_DEVBUF, sizeof(*omi));
>   }
>  
>   free(sm, M_DEVBUF, SR_META_SIZE * DEV_BSIZE);
> 

I like to have "sizeof(*omi)" in corresponding malloc(9) too.

 cut begin 
827 omi = malloc(sizeof(struct sr_meta_opt_item), M_DEVBUF,
828 M_WAITOK | M_ZERO);
 cut end 



Re: wg: fix build without pf

2020-07-12 Thread Vitaliy Makkoveev
On Sun, Jul 12, 2020 at 07:44:47PM +0200, Klemens Nanni wrote:

OK mvs@

> Feedback? OK?
> 
> 
> Index: sys/net/if_wg.c
> ===
> RCS file: /cvs/src/sys/net/if_wg.c,v
> retrieving revision 1.9
> diff -u -p -r1.9 if_wg.c
> --- sys/net/if_wg.c   10 Jul 2020 13:26:42 -  1.9
> +++ sys/net/if_wg.c   12 Jul 2020 16:31:03 -
> @@ -1666,7 +1666,9 @@ wg_decap(struct wg_softc *sc, struct mbu
>   m->m_pkthdr.ph_ifidx = sc->sc_if.if_index;
>   m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
>   m->m_flags &= ~(M_MCAST | M_BCAST);
> +#if NPF > 0
>   pf_pkt_addr_changed(m);
> +#endif /* NPF > 0 */
>  
>  done:
>   t->t_mbuf = m;
> 



Re: pppac(4): fix races in pppacopen()

2020-07-11 Thread Vitaliy Makkoveev
On Sat, Jul 11, 2020 at 10:11:03AM +0200, Martin Pieuchot wrote:
> On 10/07/20(Fri) 14:38, Vitaliy Makkoveev wrote:
> > On Fri, Jul 10, 2020 at 01:22:40PM +0200, Martin Pieuchot wrote:
> > > On 10/07/20(Fri) 14:07, Vitaliy Makkoveev wrote:
> > > > We have some races in pppac(4)
> > > > 1. malloc(9) can sleep so we must check `sc' presence after malloc(9)
> > > 
> > > Makes sense.
> > > 
> > > > 2. we can sleep between `sc' insertion to `sc_entry' list and 
> > > > `sc_pipex_iface' initialization. Concurrent pppacioctl() can touch
> > > > this incomplete `sc'.
> > > 
> > > Why not insert the descriptor at the end?  Shouldn't the order of
> > > operations be:
> > > 
> > >   pipex_iface_init();
> > >   if_attach();
> > >   LIST_INSERT_HEAD()
> > > 
> > > This way there's no need for a `ready' flag since the descriptor is only
> > > added to global data structures once it is completely initialized.
> > > 
> > > Using a `sc_ready' or `sc_dead' approach is something that require
> > > touching all drivers whereas serializing insertions to global data
> > > structures can be done at once for all the kernel.
> > 
> > No, because we introduce the races with if_attach(). The similar races
> > are in if_clone_attach(). We can do multiple `ifp' attachment with the
> > same name.
> 
> Yes that's the same problem.  It is also present in other parts of the
> userland/network stack boundary.  That's why I'm arguing that the best
> approach is to use a lock and document which data structures it
> protects.
> 
> We should concentrate on protecting access to data structures and not
> code paths.
> 

Let's look what we should protect in pppac(4) layer.

We allocate software context in pppacopen() and destroy it in
pppacclose(). We should have only one `sc' allocated for each device
entry. We don't attach out `sc' to device entry, we link it to list or
three and we do search of this `sc' while we access it through device
entry. The criteria for search is device minor number often called as
`unit'. So we should have entries in list with *unique* units elsewhere
we have out device list inconsistent.

We protect *unit*, not the list. Also we use `pppac_devs' list to store
units which are already in use.

We insert incompete `sc' to `pppac_devs' list to prevent double
allocation of `sc' referenced by unique unit. Also we should keep this
unit busy before we fully destroy this `sc'.

pppx(4) goes this way for units used by `pppx_if' and corresponding
`ifnet's.

We have pppac{open,close,else}() serialized by KERNEL_LOCK() but we
sleep within pppac{open,close,else}(). That means what in fact they are
*not* serialized. Also than means that software context initialization
and destruction are *not* atomic.

The way you suggest to go is to introduce rwlock and serialize
pppacopen() and pppacclose(). This is bad idea because we will sleep
while we are holding rwlock. Also this is bad idea because you should
prevent access to `sc' which is being destroyed because you can grab it
by concurrent thread. You must serialize *all* access to this `sc'
elsewhere your "protection" is useless.

pppx(4) had no problems with unit protection. Also it had no problems
to access incomplete `pxi'. Now pppx(4) has fixed access to `pxi' which
is being destroyed. And this is the way to go in pppac(4) layer too.

We have pppx_dev2pxd() to obtain `pxd'. While we adding extra check to
pppx_dev2pxd() this is not system wide. Also pppac(4) already has
`sc_dead' to prevent concurrent pppac_ioctl() access to dying `sc'. You
suggest to serialize pppac_ioctl() too?



Re: pipex(4): kill pipexintr()

2020-07-11 Thread Vitaliy Makkoveev
On Fri, Jul 10, 2020 at 10:54:44AM +0200, Martin Pieuchot wrote:
> On 07/07/20(Tue) 01:01, Vitaliy Makkoveev wrote:
> > On Mon, Jul 06, 2020 at 08:47:23PM +0200, Martin Pieuchot wrote:
> > > On 06/07/20(Mon) 19:23, Vitaliy Makkoveev wrote:
> > > > > On 6 Jul 2020, at 17:36, Martin Pieuchot  wrote:
> > > > [...] 
> > > > Unfortunately you can’t be sure about NET_LOCK() status while you are
> > > > in pppac_start(). It was described at this thread [1].
> > > > 
> > > > We have two cases:
> > > > 1. pppac_start() called from pppac_output(). NET_LOCK() was inherited.
> > > 
> > > Such recursions should be avoided.  if_enqueue() should take care of
> > > that.
> > 
> > I suggest to finish the route to if_get(9) before. Updated diff which
> > removes pipexintr() below. Just against the most resent source tree.
> 
> The tasks are not orthogonal.  Making sure the NET_LOCK() is taken
> inside the pipex boundaries help for this task as well.
> 
> That said the current code is not ready for the proposed diff.  At
> least `pppx_devs', `pipex_rd_head4' and `pipex_rd_head6' must be
> protected/annotated. 
> 
> What about all the lists/hashtables?  They aren't annotated, are they
> all protected by the NET_LOCK()?
> 
> What about `pppx_ifs' is it only used under the KERNEL_LOCK()?

Ok, let's document pipex(4) globals first.

There is no reason to do extra protection to ppp{ac,x}(4) globals. We
*always* destroy pipex(4) session *before* corresponding ppp{ac,x}(4)
context. And ppp{ac,x}(4) layer is the only place where we create and
destroy sessions. KERNEL_LOCK() does the protection for this layer. 

Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.119
diff -u -p -r1.119 pipex.c
--- sys/net/pipex.c 6 Jul 2020 20:37:51 -   1.119
+++ sys/net/pipex.c 11 Jul 2020 13:09:40 -
@@ -83,19 +83,24 @@ struct pool pipex_session_pool;
 struct pool mppe_key_pool;
 
 /*
- * static/global variables
+ * Global data
+ *
+ * Locks used to protect global data:
+ *   I   immutable after creation
+ *   A   atomic operation
+ *   N   net lock
  */
-intpipex_enable = 0;
+intpipex_enable = 0;   /* [N] */
 struct pipex_hash_head
-pipex_session_list,/* master session list 
*/
-pipex_close_wait_list, /* expired session list */
-pipex_peer_addr_hashtable[PIPEX_HASH_SIZE],/* peer's address hash 
*/
-pipex_id_hashtable[PIPEX_HASH_SIZE];   /* peer id hash */
+pipex_session_list,/* [N] master session 
list */
+pipex_close_wait_list, /* [N] expired session list */
+pipex_peer_addr_hashtable[PIPEX_HASH_SIZE],/* [N] peer's address 
hash */
+pipex_id_hashtable[PIPEX_HASH_SIZE];   /* [N] peer id hash */
 
-struct radix_node_head *pipex_rd_head4 = NULL;
-struct radix_node_head *pipex_rd_head6 = NULL;
+struct radix_node_head *pipex_rd_head4 = NULL; /* [N] */
+struct radix_node_head *pipex_rd_head6 = NULL; /* [N] */
 struct timeout pipex_timer_ch; /* callout timer context */
-int pipex_prune = 1;   /* walk list every seconds */
+int pipex_prune = 1;   /* [I] walk list every seconds */
 
 /* pipex traffic queue */
 struct mbuf_queue pipexinq = MBUF_QUEUE_INITIALIZER(IFQ_MAXLEN, IPL_NET);
@@ -105,7 +110,7 @@ struct mbuf_queue pipexoutq = MBUF_QUEUE
 #define ph_ppp_proto ether_vtag
 
 #ifdef PIPEX_DEBUG
-int pipex_debug = 0;   /* systcl net.inet.ip.pipex_debug */
+int pipex_debug = 0;   /* [A] systcl net.inet.ip.pipex_debug */
 #endif
 
 /* PPP compression == MPPE is assumed, so don't answer CCP Reset-Request. */



Add missing `IFXF_CLONED' to pseudo-interfaces

2020-07-10 Thread Vitaliy Makkoveev
Some pseudo interfaces have missing `IFXF_CLONED' flag. Diff below fixes
this.

Index: sys/net/if_ppp.c
===
RCS file: /cvs/src/sys/net/if_ppp.c,v
retrieving revision 1.114
diff -u -p -r1.114 if_ppp.c
--- sys/net/if_ppp.c24 Jun 2020 22:03:42 -  1.114
+++ sys/net/if_ppp.c10 Jul 2020 11:57:39 -
@@ -220,6 +220,7 @@ ppp_clone_create(struct if_clone *ifc, i
sc->sc_if.if_output = pppoutput;
sc->sc_if.if_start = ppp_ifstart;
sc->sc_if.if_rtrequest = p2p_rtrequest;
+   sc->sc_if.if_xflags = IFXF_CLONED;
IFQ_SET_MAXLEN(>sc_if.if_snd, IFQ_MAXLEN);
mq_init(>sc_inq, IFQ_MAXLEN, IPL_NET);
ppp_pkt_list_init(>sc_rawq, IFQ_MAXLEN);
Index: sys/net/if_pppoe.c
===
RCS file: /cvs/src/sys/net/if_pppoe.c,v
retrieving revision 1.68
diff -u -p -r1.68 if_pppoe.c
--- sys/net/if_pppoe.c  16 Jun 2019 00:10:37 -  1.68
+++ sys/net/if_pppoe.c  10 Jul 2020 11:57:39 -
@@ -210,6 +210,7 @@ pppoe_clone_create(struct if_clone *ifc,
sc->sc_sppp.pp_if.if_ioctl = pppoe_ioctl;
sc->sc_sppp.pp_if.if_start = pppoe_start;
sc->sc_sppp.pp_if.if_rtrequest = p2p_rtrequest;
+   sc->sc_sppp.pp_if.if_xflags = IFXF_CLONED;
sc->sc_sppp.pp_tls = pppoe_tls;
sc->sc_sppp.pp_tlf = pppoe_tlf;
IFQ_SET_MAXLEN(>sc_sppp.pp_if.if_snd, IFQ_MAXLEN);
Index: sys/net/if_switch.c
===
RCS file: /cvs/src/sys/net/if_switch.c,v
retrieving revision 1.30
diff -u -p -r1.30 if_switch.c
--- sys/net/if_switch.c 6 Nov 2019 03:51:26 -   1.30
+++ sys/net/if_switch.c 10 Jul 2020 11:57:39 -
@@ -159,6 +159,7 @@ switch_clone_create(struct if_clone *ifc
ifp->if_start = NULL;
ifp->if_type = IFT_BRIDGE;
ifp->if_hdrlen = ETHER_HDR_LEN;
+   ifp->if_xflags = IFXF_CLONED;
TAILQ_INIT(>sc_swpo_list);
 
sc->sc_unit = unit;
Index: sys/net/if_trunk.c
===
RCS file: /cvs/src/sys/net/if_trunk.c,v
retrieving revision 1.146
diff -u -p -r1.146 if_trunk.c
--- sys/net/if_trunk.c  17 Jun 2020 06:45:22 -  1.146
+++ sys/net/if_trunk.c  10 Jul 2020 11:57:39 -
@@ -184,6 +184,7 @@ trunk_clone_create(struct if_clone *ifc,
ifp->if_ioctl = trunk_ioctl;
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
ifp->if_capabilities = trunk_capabilities(tr);
+   ifp->if_xflags = IFXF_CLONED;
 
snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
ifc->ifc_name, unit);
Index: sys/net/if_tun.c
===
RCS file: /cvs/src/sys/net/if_tun.c,v
retrieving revision 1.222
diff -u -p -r1.222 if_tun.c
--- sys/net/if_tun.c13 May 2020 00:48:06 -  1.222
+++ sys/net/if_tun.c10 Jul 2020 11:57:39 -
@@ -236,6 +236,7 @@ tun_create(struct if_clone *ifc, int uni
ifp->if_hardmtu = TUNMRU;
ifp->if_link_state = LINK_STATE_DOWN;
IFQ_SET_MAXLEN(>if_snd, IFQ_MAXLEN);
+   ifp->if_xflags = IFXF_CLONED;
 
if_counters_alloc(ifp);
 
Index: sys/net/if_vether.c
===
RCS file: /cvs/src/sys/net/if_vether.c,v
retrieving revision 1.30
diff -u -p -r1.30 if_vether.c
--- sys/net/if_vether.c 9 Jan 2018 15:24:24 -   1.30
+++ sys/net/if_vether.c 10 Jul 2020 11:57:39 -
@@ -88,6 +88,7 @@ vether_clone_create(struct if_clone *ifc
 
ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
ifp->if_capabilities = IFCAP_VLAN_MTU;
+   ifp->if_xflags = IFXF_CLONED;
 
ifmedia_init(>sc_media, 0, vether_media_change,
vether_media_status);
Index: sys/net/if_vxlan.c
===
RCS file: /cvs/src/sys/net/if_vxlan.c,v
retrieving revision 1.77
diff -u -p -r1.77 if_vxlan.c
--- sys/net/if_vxlan.c  12 Apr 2020 11:56:52 -  1.77
+++ sys/net/if_vxlan.c  10 Jul 2020 11:57:39 -
@@ -155,6 +155,7 @@ vxlan_clone_create(struct if_clone *ifc,
 
ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
ifp->if_capabilities = IFCAP_VLAN_MTU;
+   ifp->if_xflags = IFXF_CLONED;
 
ifmedia_init(>sc_media, 0, vxlan_media_change,
vxlan_media_status);



Re: pppac(4): fix races in pppacopen()

2020-07-10 Thread Vitaliy Makkoveev
On Fri, Jul 10, 2020 at 01:22:40PM +0200, Martin Pieuchot wrote:
> On 10/07/20(Fri) 14:07, Vitaliy Makkoveev wrote:
> > We have some races in pppac(4)
> > 1. malloc(9) can sleep so we must check `sc' presence after malloc(9)
> 
> Makes sense.
> 
> > 2. we can sleep between `sc' insertion to `sc_entry' list and 
> > `sc_pipex_iface' initialization. Concurrent pppacioctl() can touch
> > this incomplete `sc'.
> 
> Why not insert the descriptor at the end?  Shouldn't the order of
> operations be:
> 
>   pipex_iface_init();
>   if_attach();
>   LIST_INSERT_HEAD()
> 
> This way there's no need for a `ready' flag since the descriptor is only
> added to global data structures once it is completely initialized.
> 
> Using a `sc_ready' or `sc_dead' approach is something that require
> touching all drivers whereas serializing insertions to global data
> structures can be done at once for all the kernel.

No, because we introduce the races with if_attach(). The similar races
are in if_clone_attach(). We can do multiple `ifp' attachment with the
same name.

 cut begin 
int
pppacopen(dev_t dev, int flags, int mode, struct proc *p)
{
sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);

ifp = >sc_if;
snprintf(ifp->if_xname, sizeof(ifp->if_xname), "pppac%u",
minor(dev));
pipex_iface_init(); /* XXX: can sleep */
if_attach(); /* XXX: can sleep */
LIST_INSERT_HEAD();
}
 cut end 

So we insert incomplete `sc' with used `dev' before context switch.
Or if_attach() should check passed `ifp'.



pppac(4): fix races in pppacopen()

2020-07-10 Thread Vitaliy Makkoveev
We have some races in pppac(4)
1. malloc(9) can sleep so we must check `sc' presence after malloc(9)
2. we can sleep between `sc' insertion to `sc_entry' list and 
`sc_pipex_iface' initialization. Concurrent pppacioctl() can touch
this incomplete `sc'.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.91
diff -u -p -r1.91 if_pppx.c
--- sys/net/if_pppx.c   6 Jul 2020 20:37:51 -   1.91
+++ sys/net/if_pppx.c   10 Jul 2020 11:04:53 -
@@ -1019,7 +1019,7 @@ RBT_GENERATE(pppx_ifs, pppx_if, pxi_entr
 
 struct pppac_softc {
struct ifnetsc_if;
-   unsigned intsc_dead;
+   unsigned intsc_ready;
dev_t   sc_dev;
LIST_ENTRY(pppac_softc)
sc_entry;
@@ -1072,8 +1072,12 @@ pppac_lookup(dev_t dev)
struct pppac_softc *sc;
 
LIST_FOREACH(sc, _devs, sc_entry) {
-   if (sc->sc_dev == dev)
-   return (sc);
+   if (sc->sc_dev == dev) {
+   if (sc->sc_ready)
+   return (sc);
+   else
+   break;
+   }
}
 
return (NULL);
@@ -1088,22 +1092,25 @@ pppacattach(int n)
 int
 pppacopen(dev_t dev, int flags, int mode, struct proc *p)
 {
-   struct pppac_softc *sc;
+   struct pppac_softc *sc, *sc_tmp;
struct ifnet *ifp;
 
-   sc = pppac_lookup(dev);
-   if (sc != NULL)
-   return (EBUSY);
-
sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
+
+   LIST_FOREACH(sc_tmp, _devs, sc_entry) {
+   if (sc_tmp->sc_dev == dev) {
+   free(sc, M_DEVBUF, sizeof(*sc));
+   return (EBUSY);
+   }
+   }
+
sc->sc_dev = dev;
+   LIST_INSERT_HEAD(_devs, sc, sc_entry);
 
mtx_init(>sc_rsel_mtx, IPL_SOFTNET);
mtx_init(>sc_wsel_mtx, IPL_SOFTNET);
mq_init(>sc_mq, IFQ_MAXLEN, IPL_SOFTNET);
 
-   LIST_INSERT_HEAD(_devs, sc, sc_entry);
-
ifp = >sc_if;
snprintf(ifp->if_xname, sizeof(ifp->if_xname), "pppac%u", minor(dev));
 
@@ -1129,6 +1136,7 @@ pppacopen(dev_t dev, int flags, int mode
 #endif
 
pipex_iface_init(>sc_pipex_iface, ifp);
+   sc->sc_ready = 1;
 
return (0);
 }
@@ -1136,12 +1144,14 @@ pppacopen(dev_t dev, int flags, int mode
 int
 pppacread(dev_t dev, struct uio *uio, int ioflag)
 {
-   struct pppac_softc *sc = pppac_lookup(dev);
+   struct pppac_softc *sc;
struct ifnet *ifp = >sc_if;
struct mbuf *m0, *m;
int error = 0;
size_t len;
 
+   if ((sc = pppac_lookup(dev)) == NULL)
+   return (EBADF);
if (!ISSET(ifp->if_flags, IFF_RUNNING))
return (EHOSTDOWN);
 
@@ -1181,12 +1191,14 @@ pppacread(dev_t dev, struct uio *uio, in
 int
 pppacwrite(dev_t dev, struct uio *uio, int ioflag)
 {
-   struct pppac_softc *sc = pppac_lookup(dev);
+   struct pppac_softc *sc;
struct ifnet *ifp = >sc_if;
uint32_t proto;
int error;
struct mbuf *m;
 
+   if ((sc = pppac_lookup(dev)) == NULL)
+   return (EBADF);
if (!ISSET(ifp->if_flags, IFF_RUNNING))
return (EHOSTDOWN);
 
@@ -1258,9 +1270,12 @@ pppacwrite(dev_t dev, struct uio *uio, i
 int
 pppacioctl(dev_t dev, u_long cmd, caddr_t data, int flags, struct proc *p)
 {
-   struct pppac_softc *sc = pppac_lookup(dev);
+   struct pppac_softc *sc;
int error = 0;
 
+   if ((sc = pppac_lookup(dev)) == NULL)
+   return (EBADF);
+
switch (cmd) {
case TUNSIFMODE: /* make npppd happy */
break;
@@ -1282,9 +1297,12 @@ pppacioctl(dev_t dev, u_long cmd, caddr_
 int
 pppacpoll(dev_t dev, int events, struct proc *p)
 {
-   struct pppac_softc *sc = pppac_lookup(dev);
+   struct pppac_softc *sc;
int revents = 0;
 
+   if ((sc = pppac_lookup(dev)) == NULL)
+   goto out;
+
if (events & (POLLIN | POLLRDNORM)) {
if (!mq_empty(>sc_mq))
revents |= events & (POLLIN | POLLRDNORM);
@@ -1296,17 +1314,20 @@ pppacpoll(dev_t dev, int events, struct 
if (events & (POLLIN | POLLRDNORM))
selrecord(p, >sc_rsel);
}
-
+out:
return (revents);
 }
 
 int
 pppackqfilter(dev_t dev, struct knote *kn)
 {
-   struct pppac_softc *sc = pppac_lookup(dev);
+   struct pppac_softc *sc;
struct mutex *mtx;
struct klist *klist;
 
+   if ((sc = pppac_lookup(dev)) == NULL)
+   return (EBADF);
+
switch (kn->kn_filter) {
case EVFILT_READ:
mtx = >sc_rsel_mtx;
@@ -1373,12 +1394,15 @@ filt_pppac_write(struct knote *kn, long 
 int
 pppacclose(dev_t dev, int flags, int mode, struct proc *p)
 {
-   struct 

Re: pppx_if_output() don't lock `pppx_devs_lk'

2020-07-10 Thread Vitaliy Makkoveev
On Fri, Jul 10, 2020 at 10:45:54AM +0200, Martin Pieuchot wrote:
> On 08/07/20(Wed) 12:05, Vitaliy Makkoveev wrote:
> > `pppx_devs_lk' used to protect `pxd_entry' list. We lock `pppx_devs_lk'
> > in pppx_if_output() to be sure `pxd' is not destroyed by concurrent
> > pppxclose() but it's useless. We destroy all corresponding `pxi' before
> > `pxd' and `ifnet's are already detached.
> 
> This lock seems to only prevent races if malloc(9) sleeps inside
> pppxopen().  Could you address that and remove the lock altogether?
> 
> What is really protecting the data structure and lifetime of its
> elements is the KERNEL_LOCK() currently.

Updated diff which removes `pppx_devs_lk'.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.91
diff -u -p -r1.91 if_pppx.c
--- sys/net/if_pppx.c   6 Jul 2020 20:37:51 -   1.91
+++ sys/net/if_pppx.c   10 Jul 2020 09:49:11 -
@@ -50,7 +50,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -132,9 +131,8 @@ struct pppx_dev {
LIST_HEAD(,pppx_if) pxd_pxis;
 };
 
-struct rwlock  pppx_devs_lk = RWLOCK_INITIALIZER("pppxdevs");
 LIST_HEAD(, pppx_dev)  pppx_devs = LIST_HEAD_INITIALIZER(pppx_devs);
-struct pool*pppx_if_pl;
+struct poolpppx_if_pl;
 
 struct pppx_dev*pppx_dev_lookup(dev_t);
 struct pppx_dev*pppx_dev2pxd(dev_t);
@@ -218,8 +216,6 @@ pppx_dev_lookup(dev_t dev)
struct pppx_dev *pxd;
int unit = minor(dev);
 
-   /* must hold pppx_devs_lk */
-
LIST_FOREACH(pxd, _devs, pxd_entry) {
if (pxd->pxd_unit == unit)
return (pxd);
@@ -233,9 +229,7 @@ pppx_dev2pxd(dev_t dev)
 {
struct pppx_dev *pxd;
 
-   rw_enter_read(_devs_lk);
pxd = pppx_dev_lookup(dev);
-   rw_exit_read(_devs_lk);
 
return (pxd);
 }
@@ -243,6 +237,8 @@ pppx_dev2pxd(dev_t dev)
 void
 pppxattach(int n)
 {
+   pool_init(_if_pl, sizeof(struct pppx_if), 0, IPL_NONE,
+   PR_WAITOK, "pppxif", NULL);
pipex_init();
 }
 
@@ -250,25 +246,12 @@ int
 pppxopen(dev_t dev, int flags, int mode, struct proc *p)
 {
struct pppx_dev *pxd;
-   int rv = 0;
-
-   rv = rw_enter(_devs_lk, RW_WRITE | RW_INTR);
-   if (rv != 0)
-   return (rv);
-
-   pxd = pppx_dev_lookup(dev);
-   if (pxd != NULL) {
-   rv = EBUSY;
-   goto out;
-   }
-
-   if (LIST_EMPTY(_devs) && pppx_if_pl == NULL) {
-   pppx_if_pl = malloc(sizeof(*pppx_if_pl), M_DEVBUF, M_WAITOK);
-   pool_init(pppx_if_pl, sizeof(struct pppx_if), 0, IPL_NONE,
-   PR_WAITOK, "pppxif", NULL);
-   }
 
pxd = malloc(sizeof(*pxd), M_DEVBUF, M_WAITOK | M_ZERO);
+   if (pppx_dev_lookup(dev) != NULL) {
+   free(pxd, M_DEVBUF, sizeof(*pxd));
+   return (EBUSY);
+   }
 
pxd->pxd_unit = minor(dev);
mtx_init(>pxd_rsel_mtx, IPL_NET);
@@ -278,9 +261,7 @@ pppxopen(dev_t dev, int flags, int mode,
mq_init(>pxd_svcq, 128, IPL_NET);
LIST_INSERT_HEAD(_devs, pxd, pxd_entry);
 
-out:
-   rw_exit(_devs_lk);
-   return (rv);
+   return 0;
 }
 
 int
@@ -587,8 +568,6 @@ pppxclose(dev_t dev, int flags, int mode
struct pppx_dev *pxd;
struct pppx_if  *pxi;
 
-   rw_enter_write(_devs_lk);
-
pxd = pppx_dev_lookup(dev);
 
/* XXX */
@@ -603,13 +582,6 @@ pppxclose(dev_t dev, int flags, int mode
 
free(pxd, M_DEVBUF, sizeof(*pxd));
 
-   if (LIST_EMPTY(_devs)) {
-   pool_destroy(pppx_if_pl);
-   free(pppx_if_pl, M_DEVBUF, sizeof(*pppx_if_pl));
-   pppx_if_pl = NULL;
-   }
-
-   rw_exit_write(_devs_lk);
return (0);
 }
 
@@ -676,7 +648,7 @@ pppx_add_session(struct pppx_dev *pxd, s
if (error)
return (error);
 
-   pxi = pool_get(pppx_if_pl, PR_WAITOK | PR_ZERO);
+   pxi = pool_get(_if_pl, PR_WAITOK | PR_ZERO);
ifp = >pxi_if;
 
pxi->pxi_session = session;
@@ -776,7 +748,7 @@ remove:
panic("%s: inconsistent RB tree", __func__);
LIST_REMOVE(pxi, pxi_list);
 out:
-   pool_put(pppx_if_pl, pxi);
+   pool_put(_if_pl, pxi);
pipex_rele_session(session);
 
return (error);
@@ -871,7 +843,7 @@ pppx_if_destroy(struct pppx_dev *pxd, st
panic("%s: inconsistent RB tree", __func__);
LIST_REMOVE(pxi, pxi_list);
 
-   pool_put(pppx_if_pl, pxi);
+   pool_put(_if_pl, pxi);
 }
 
 void
@@ -957,7 +929,6 @@ pppx_if_output(struct ifnet *ifp, struct
th = mtod(m, struct pppx_hdr *);
th->pppx_

Re: remove compat macros IFQ_ENQUEUE, IFQ_DEQUEUE and IFQ_LEN

2020-07-10 Thread Vitaliy Makkoveev



> On 10 Jul 2020, at 12:13, Patrick Wildt  wrote:
> 
> Hi,
> 
> this is a rather mechanical diff, done using vim and some regex,
> to remove and replace IFQ_ENQUEUE, IFQ_DEQUEUE and IFQ_LEN.
> 
> There are more, but I didn't want the diff to get too big.  I'll
> do that after this one is committed.
> 
> ok?
> 
> Patrick

I guess ifq_enqueue(9) should also have “.Dt ifq_enqueue 9” instead
of “.Dt IFQ_ENQUEUE 9”.

> 
> diff --git a/sys/arch/armv7/omap/if_cpsw.c b/sys/arch/armv7/omap/if_cpsw.c
> index 3b380457e3f..7aee2730989 100644
> --- a/sys/arch/armv7/omap/if_cpsw.c
> +++ b/sys/arch/armv7/omap/if_cpsw.c
> @@ -534,7 +534,7 @@ cpsw_start(struct ifnet *ifp)
>   break;
>   }
> 
> - IFQ_DEQUEUE(>if_snd, m);
> + m = ifq_dequeue(>if_snd);
>   if (m == NULL)
>   break;
> 
> diff --git a/sys/arch/macppc/dev/if_bm.c b/sys/arch/macppc/dev/if_bm.c
> index 348d97400cc..d65dcb879a1 100644
> --- a/sys/arch/macppc/dev/if_bm.c
> +++ b/sys/arch/macppc/dev/if_bm.c
> @@ -615,7 +615,7 @@ bmac_start(struct ifnet *ifp)
>   if (ifq_is_oactive(>if_snd))
>   return;
> 
> - IFQ_DEQUEUE(>if_snd, m);
> + m = ifq_dequeue(>if_snd);
>   if (m == NULL)
>   break;
> #if NBPFILTER > 0
> diff --git a/sys/arch/macppc/dev/if_mc.c b/sys/arch/macppc/dev/if_mc.c
> index 4fd36fb5748..00b2cb0aa88 100644
> --- a/sys/arch/macppc/dev/if_mc.c
> +++ b/sys/arch/macppc/dev/if_mc.c
> @@ -552,7 +552,7 @@ mc_start(struct ifnet *ifp)
>   if (ifq_is_oactive(>if_snd))
>   return;
> 
> - IFQ_DEQUEUE(>if_snd, m);
> + m = ifq_dequeue(>if_snd);
>   if (m == NULL)
>   return;
> 
> diff --git a/sys/arch/sparc64/dev/vnet.c b/sys/arch/sparc64/dev/vnet.c
> index 147caf1f2ec..a30b45dd2b6 100644
> --- a/sys/arch/sparc64/dev/vnet.c
> +++ b/sys/arch/sparc64/dev/vnet.c
> @@ -1132,7 +1132,7 @@ vnet_start(struct ifnet *ifp)
>   break;
>   }
> 
> - IFQ_DEQUEUE(>if_snd, m);
> + m = ifq_dequeue(>if_snd);
>   if (m == NULL) {
>   pool_put(>sc_pool, buf);
>   break;
> @@ -1209,7 +1209,7 @@ vnet_start_desc(struct ifnet *ifp)
>   return;
>   }
> 
> - IFQ_DEQUEUE(>if_snd, m);
> + m = ifq_dequeue(>if_snd);
>   if (m == NULL) {
>   pool_put(>sc_pool, buf);
>   return;
> diff --git a/sys/dev/ic/acx.c b/sys/dev/ic/acx.c
> index d758d837a0b..8643dddee4e 100644
> --- a/sys/dev/ic/acx.c
> +++ b/sys/dev/ic/acx.c
> @@ -950,7 +950,7 @@ acx_start(struct ifnet *ifp)
>   ni = m->m_pkthdr.ph_cookie;
>   goto encapped;
>   } else {
> - IFQ_DEQUEUE(>if_snd, m);
> + m = ifq_dequeue(>if_snd);
>   if (m == NULL)
>   break;
>   }
> diff --git a/sys/dev/ic/am7990.c b/sys/dev/ic/am7990.c
> index ce092ebf5ac..08c10ada528 100644
> --- a/sys/dev/ic/am7990.c
> +++ b/sys/dev/ic/am7990.c
> @@ -483,7 +483,7 @@ am7990_start(struct ifnet *ifp)
>   sc->sc_no_td, sc->sc_last_td);
>   }
> 
> - IFQ_DEQUEUE(>if_snd, m);
> + m = ifq_dequeue(>if_snd);
>   if (m == NULL)
>   break;
> 
> diff --git a/sys/dev/ic/am79900.c b/sys/dev/ic/am79900.c
> index 79e1c9e28cc..9af592e2f73 100644
> --- a/sys/dev/ic/am79900.c
> +++ b/sys/dev/ic/am79900.c
> @@ -506,7 +506,7 @@ am79900_start(struct ifnet *ifp)
>   sc->sc_no_td, sc->sc_last_td);
>   }
> 
> - IFQ_DEQUEUE(>if_snd, m);
> + m = ifq_dequeue(>if_snd);
>   if (m == NULL)
>   break;
> 
> diff --git a/sys/dev/ic/ath.c b/sys/dev/ic/ath.c
> index aae0b2a87ff..c469269eb4e 100644
> --- a/sys/dev/ic/ath.c
> +++ b/sys/dev/ic/ath.c
> @@ -845,7 +845,7 @@ ath_start(struct ifnet *ifp)
>   splx(s);
>   break;
>   }
> - IFQ_DEQUEUE(>if_snd, m);
> + m = ifq_dequeue(>if_snd);
>   if (m == NULL) {
>   s = splnet();
>   TAILQ_INSERT_TAIL(>sc_txbuf, bf, bf_list);
> diff --git a/sys/dev/ic/athn.c b/sys/dev/ic/athn.c
> index 5f84db0b1ea..8384f583934 100644
> --- a/sys/dev/ic/athn.c
> +++ b/sys/dev/ic/athn.c
> @@ -2873,7 +2873,7 @@ athn_start(struct ifnet *ifp)
>   break;
> 
>   /* Encapsulate and send data frames. */
> - IFQ_DEQUEUE(>if_snd, m);
> + m = ifq_dequeue(>if_snd);
>   

pppx_if_output() don't lock `pppx_devs_lk'

2020-07-08 Thread Vitaliy Makkoveev
`pppx_devs_lk' used to protect `pxd_entry' list. We lock `pppx_devs_lk'
in pppx_if_output() to be sure `pxd' is not destroyed by concurrent
pppxclose() but it's useless. We destroy all corresponding `pxi' before
`pxd' and `ifnet's are already detached.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.91
diff -u -p -r1.91 if_pppx.c
--- sys/net/if_pppx.c   6 Jul 2020 20:37:51 -   1.91
+++ sys/net/if_pppx.c   8 Jul 2020 09:04:31 -
@@ -957,7 +957,6 @@ pppx_if_output(struct ifnet *ifp, struct
th = mtod(m, struct pppx_hdr *);
th->pppx_proto = 0; /* not used */
th->pppx_id = pxi->pxi_session->ppp_id;
-   rw_enter_read(_devs_lk);
error = mq_enqueue(>pxi_dev->pxd_svcq, m);
if (error == 0) {
if (pxi->pxi_dev->pxd_waiting) {
@@ -966,7 +965,6 @@ pppx_if_output(struct ifnet *ifp, struct
}
selwakeup(>pxi_dev->pxd_rsel);
}
-   rw_exit_read(_devs_lk);
}
 
 out:



pppx(4): do not collect entropy?

2020-07-07 Thread Vitaliy Makkoveev
pppac(4) related `ifnet' has `IFXF_CLONED' set. I guess this was done
because we don't collect entropy from pseudo interfaces:

 cut begin 
void
if_input_process(struct ifnet *ifp, struct mbuf_list *ml)
{
struct mbuf *m;

if (ml_empty(ml))
return;

if (!ISSET(ifp->if_xflags, IFXF_CLONED))
enqueue_randomness(ml_len(ml) ^ (uintptr_t)MBUF_LIST_FIRST(ml));

/* skip */
}
 cut end 

Should we do the same for pppx(4)?

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.91
diff -u -p -r1.91 if_pppx.c
--- sys/net/if_pppx.c   6 Jul 2020 20:37:51 -   1.91
+++ sys/net/if_pppx.c   7 Jul 2020 11:37:04 -
@@ -705,6 +705,7 @@ pppx_add_session(struct pppx_dev *pxd, s
snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", "pppx", unit);
ifp->if_mtu = req->pr_peer_mru; /* XXX */
ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST | IFF_UP;
+   ifp->if_xflags = IFXF_CLONED;
ifp->if_start = pppx_if_start;
ifp->if_output = pppx_if_output;
ifp->if_ioctl = pppx_if_ioctl;



Re: userland clock_gettime proof of concept

2020-07-06 Thread Vitaliy Makkoveev
Sorry for late reaction. At least VirtualBox based virtual machines
started to panic with the recent kernel. I reverted your diff and panics
stopped.
Screenshot attached.


Re: userland clock_gettime proof of concept

2020-07-06 Thread Vitaliy Makkoveev


> On 5 Jul 2020, at 20:31, Paul Irofti  wrote:
> 
> On Fri, Jul 03, 2020 at 06:36:39PM +0300, Paul Irofti wrote:
>> 
>> 
>> În 3 iulie 2020 17:55:25 EEST, Mark Kettenis  a 
>> scris:
 Date: Fri, 3 Jul 2020 15:13:22 +0200
 From: Robert Nagy 
 
 On 02/07/20 00:31 +0100, Stuart Henderson wrote:
> running on 38 of these, btw.
 
 been running with this on all my workstations and laptops and on 3
>>> build
 servers as well
>>> 
>>> Are the issue that naddy@ saw solved?
>>> 
>>> Did anybody do a *proper* test on anything besides amd64?  Especially
>>> on architectures where the optimized clock_gettime is *not* available?
>> 
>> Yes and yes. 
> 
> So, can we go ahead with this?
> 

Sorry for late reaction. At least VirtualBox based virtual machines started to
panic with the recent kernel. I reverted your diff and panics stopped.

Screenshot attached.



Re: pipex(4): kill pipexintr()

2020-07-06 Thread Vitaliy Makkoveev
On Mon, Jul 06, 2020 at 08:47:23PM +0200, Martin Pieuchot wrote:
> On 06/07/20(Mon) 19:23, Vitaliy Makkoveev wrote:
> > > On 6 Jul 2020, at 17:36, Martin Pieuchot  wrote:
> > [...] 
> > Unfortunately you can’t be sure about NET_LOCK() status while you are
> > in pppac_start(). It was described at this thread [1].
> > 
> > We have two cases:
> > 1. pppac_start() called from pppac_output(). NET_LOCK() was inherited.
> 
> Such recursions should be avoided.  if_enqueue() should take care of
> that.

I suggest to finish the route to if_get(9) before. Updated diff which
removes pipexintr() below. Just against the most resent source tree.

Index: lib/libc/sys/sysctl.2
===
RCS file: /cvs/src/lib/libc/sys/sysctl.2,v
retrieving revision 1.40
diff -u -p -r1.40 sysctl.2
--- lib/libc/sys/sysctl.2   17 May 2020 05:48:39 -  1.40
+++ lib/libc/sys/sysctl.2   6 Jul 2020 21:55:16 -
@@ -2033,35 +2033,11 @@ The currently defined variable names are
 .Bl -column "Third level name" "integer" "Changeable" -offset indent
 .It Sy "Third level name" Ta Sy "Type" Ta Sy "Changeable"
 .It Dv PIPEXCTL_ENABLE Ta integer Ta yes
-.It Dv PIPEXCTL_INQ Ta node Ta not applicable
-.It Dv PIPEXCTL_OUTQ Ta node Ta not applicable
 .El
 .Bl -tag -width "123456"
 .It Dv PIPEXCTL_ENABLE
 If set to 1, enable PIPEX processing.
 The default is 0.
-.It Dv PIPEXCTL_INQ Pq Va net.pipex.inq
-Fourth level comprises an array of
-.Vt struct ifqueue
-structures containing information about the PIPEX packet input queue.
-The forth level names for the elements of
-.Vt struct ifqueue
-are the same as described in
-.Li ip.arpq
-in the
-.Dv PF_INET
-section.
-.It Dv PIPEXCTL_OUTQ Pq Va net.pipex.outq
-Fourth level comprises an array of
-.Vt struct ifqueue
-structures containing information about PIPEX packet output queue.
-The forth level names for the elements of
-.Vt struct ifqueue
-are the same as described in
-.Li ip.arpq
-in the
-.Dv PF_INET
-section.
 .El
 .El
 .Ss CTL_VFS
Index: sys/net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.611
diff -u -p -r1.611 if.c
--- sys/net/if.c30 Jun 2020 09:31:38 -  1.611
+++ sys/net/if.c6 Jul 2020 21:55:17 -
@@ -1012,13 +1012,6 @@ if_netisr(void *unused)
KERNEL_UNLOCK();
}
 #endif
-#ifdef PIPEX
-   if (n & (1 << NETISR_PIPEX)) {
-   KERNEL_LOCK();
-   pipexintr();
-   KERNEL_UNLOCK();
-   }
-#endif
t |= n;
}
 
Index: sys/net/netisr.h
===
RCS file: /cvs/src/sys/net/netisr.h,v
retrieving revision 1.51
diff -u -p -r1.51 netisr.h
--- sys/net/netisr.h6 Aug 2019 22:57:54 -   1.51
+++ sys/net/netisr.h6 Jul 2020 21:55:17 -
@@ -48,7 +48,6 @@
 #defineNETISR_IPV6 24  /* same as AF_INET6 */
 #defineNETISR_ISDN 26  /* same as AF_E164 */
 #defineNETISR_PPP  28  /* for PPP processing */
-#defineNETISR_PIPEX27  /* for pipex processing */
 #defineNETISR_BRIDGE   29  /* for bridge processing */
 #defineNETISR_PPPOE30  /* for pppoe processing */
 #defineNETISR_SWITCH   31  /* for switch dataplane */
@@ -68,7 +67,6 @@ void  bridgeintr(void);
 void   pppoeintr(void);
 void   switchintr(void);
 void   pfsyncintr(void);
-void   pipexintr(void);
 
 #defineschednetisr(anisr)  
\
 do {   \
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.119
diff -u -p -r1.119 pipex.c
--- sys/net/pipex.c 6 Jul 2020 20:37:51 -   1.119
+++ sys/net/pipex.c 6 Jul 2020 21:55:17 -
@@ -97,10 +97,6 @@ struct radix_node_head   *pipex_rd_head6 =
 struct timeout pipex_timer_ch; /* callout timer context */
 int pipex_prune = 1;   /* walk list every seconds */
 
-/* pipex traffic queue */
-struct mbuf_queue pipexinq = MBUF_QUEUE_INITIALIZER(IFQ_MAXLEN, IPL_NET);
-struct mbuf_queue pipexoutq = MBUF_QUEUE_INITIALIZER(IFQ_MAXLEN, IPL_NET);
-
 /* borrow an mbuf pkthdr field */
 #define ph_ppp_proto ether_vtag
 
@@ -713,82 +709,6 @@ pipex_lookup_by_session_id(int protocol,
 }
 
 /***
- * Queue and Software Interrupt Handler
- ***/
-void
-pipexintr(void)
-{
-  

Re: pipex(4): kill pipexintr()

2020-07-06 Thread Vitaliy Makkoveev
> On 6 Jul 2020, at 17:36, Martin Pieuchot  wrote:
> 
> On 06/07/20(Mon) 16:42, Vitaliy Makkoveev wrote:
>> [...] 
>> pipex(4) is simultaneously locked by NET_LOCK() and KERNEL_LOCK() but
>> with two exceptions:
>> 
>> 1. As you pointed pipex_pppoe_input() called without KERNEL_LOCK() held.
>> 2. pppac_start() called without NET_LOCK() held. Or with NET_LOCK()
>>   held. It depends on `if_snd' usage.
>> 
>> Diff below enforces pppac_start() to be called with NET_LOCK() held.
>> Also all externally called pipex(4) input and output routines have
>> NET_ASSERT_LOCKED() assertion.
>> 
>> Now pipex(4) is fully protected by NET_LOCK() so description of struct
>> members chenget too.
>> 
>> Index: sys/net/if_pppx.c
>> ===
>> RCS file: /cvs/src/sys/net/if_pppx.c,v
>> retrieving revision 1.90
>> diff -u -p -r1.90 if_pppx.c
>> --- sys/net/if_pppx.c24 Jun 2020 08:52:53 -  1.90
>> +++ sys/net/if_pppx.c6 Jul 2020 11:10:17 -
>> @@ -1117,6 +1117,8 @@ pppacopen(dev_t dev, int flags, int mode
>>  ifp->if_output = pppac_output;
>>  ifp->if_start = pppac_start;
>>  ifp->if_ioctl = pppac_ioctl;
>> +/* XXXSMP: be sure pppac_start() called under NET_LOCK() */
>> +IFQ_SET_MAXLEN(>if_snd, 1);
> 
> Is it possible to grab the NET_LOCK() inside pppac_start() instead of
> grabbing it outside?  This should allow *start() routine to be called
> from any context.

Unfortunately you can’t be sure about NET_LOCK() status while you are
in pppac_start(). It was described at this thread [1].

We have two cases:
1. pppac_start() called from pppac_output(). NET_LOCK() was inherited.
2. pppac_start() called from `systq’. There is no NET_LOCK() held.

NET_LOCK() is not required for pipex_{,ppp}_output() because underlay
routines were switched to ip{,6}_send().

I intentionally want to made all pipex(4) locked by one lock and be
sure it’s locking is consistent. I hope to start implementing fine
graining locks just after pipex(4) be switched to if_get(9).

> 
> It might be interesting to see that as a difference between the NET_LOCK()
> used to protect the network stack internals and the NET_LOCK() used to
> protect pipex(4) internals.  Such distinction might help to convert the
> latter into a different lock or primitive.
> 
>> Index: sys/net/pipex.c
>> ===
>> RCS file: /cvs/src/sys/net/pipex.c,v
>> retrieving revision 1.117
>> diff -u -p -r1.117 pipex.c
>> --- sys/net/pipex.c  30 Jun 2020 14:05:13 -  1.117
>> +++ sys/net/pipex.c  6 Jul 2020 11:10:17 -
>> @@ -869,6 +869,7 @@ pipex_output(struct mbuf *m0, int af, in
>>  struct ip ip;
>>  struct mbuf *mret;
>> 
>> +NET_ASSERT_LOCKED();
> 
> This function doesn't touch any shared data structure, we'd better move
> the NET_ASSERT_LOCKED() above rn_lookuo() in pipex_lookup_by_ip_address().
> 
> Note that `pipex_rd_head4' and `pipex_rd_head6' are, with this diff,
> also protected by the NET_LOCK() and should be annotated as such.
> 
>>  session = NULL;
>>  mret = NULL;
>>  switch (af) {
>> @@ -962,6 +963,8 @@ pipex_ppp_output(struct mbuf *m0, struct
>> {
>>  u_char *cp, hdr[16];
>> 
>> +NET_ASSERT_LOCKED();
> 
> Same here, it seems that the only reason the NET_LOCK() is necessary in
> the output path is to prevent corruption of the `session' descriptor being
> used.  So we'd rather put the assertion above the LIST_FOREACH(). 

They touch `session->stat’ :) We can switch pipex(4) to percpu counters and
output will be lockless. But I guess this should be done later.

> 
> Anyway all of those can be addressed later, your diff is ok mpi@
> 

Thanks. I will commit it if no one has objections.

1. https://marc.info/?t=15899861152=1=2



pipex(4) prevent `old_session_keys' memory leak

2020-07-06 Thread Vitaliy Makkoveev
Before session freeing pipex_rele_session() will check and release
`old_session_keys' if necessary. So use it instead of pool_put(9) within
pipex_destroy_session().

Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.117
diff -u -p -r1.117 pipex.c
--- sys/net/pipex.c 30 Jun 2020 14:05:13 -  1.117
+++ sys/net/pipex.c 6 Jul 2020 13:23:25 -
@@ -652,7 +652,7 @@ pipex_destroy_session(struct pipex_sessi
}
 
pipex_unlink_session(session);
-   pool_put(_session_pool, session);
+   pipex_rele_session(session);
 
return (0);
 }



Re: pipex(4): kill pipexintr()

2020-07-06 Thread Vitaliy Makkoveev
On Mon, Jul 06, 2020 at 10:59:17AM +0200, Martin Pieuchot wrote:
> On 01/07/20(Wed) 22:42, Vitaliy Makkoveev wrote:
> > pipex(4) has 2 mbuf queues: `pipexinq' and `pipexoutq'. When mbuf passed
> > to pipex it goes to one of these queues and pipexintr() will be
> > scheduled to process them. pipexintr() called from `netisr' context.
> > 
> > It's true for pppac(4) but for pppx(4) only incoming mbufs go to
> > `pipexinq. Outgoing mbufs go directly to stack. pppx(4) enabled in
> > npppd.conf(5) by default so I guess it's the common case of pipex(4)
> > usage.
> > 
> > The code looks like there is no requirements to this delayed mbufs
> > processing, we can pass it directly to stack as we do for pppx(4)
> > outgoing traffic.
> > 
> > Also we have some troubles with pipexintr() as it was described in [1].
> > It's protection of `ph_cookie'. We don't this protection this time and
> > we can't because we should brake if_get(9) logic.
> > 
> > Diff below removes pipexintr(). Now all mbufs passed directly without
> > enqueueing within pipex(4). We also can destroy sessions safe in all
> > cases. We also can use if_get(9) instead using unreferenced pointers to
> > `ifnet' within pipex(4). We also avoided context switch while we
> > processing mbufs within pipex(4). We decreased latency.
> > 
> > I'm seeding debian torrents with this diff an all goes well.
> 
> With this diff the content of pipexintr() is no longer executed with the
> KERNEL_LOCK() held.  This can be seen by following the code starting in
> ether_input().
> 
> Grabbing the KERNEL_LOCK() there is not a way forward.  The whole idea
> of if_input_process() is to be free of KERNEL_LOCK() to not introduce
> latency delay.
> 
> So this changes implies that `pipex_session_list' and possibly other
> global data structures as well as the elements linked in those are all
> protected by the NET_LOCK().  I believe this is the easiest way forward.
> 
> That said I would be comfortable with this diff going in if an audit of
> the data structures accessed in the code path starting by pipex_pppoe_input()
> has been done.  That implies annotating/documenting which data structures
> are now protected by the NET_LOCK() and adding the necessary
> NET_ASSERT_LOCK().  Such audit might lead to consider some ioctl code
> path changes to now serialize on the NET_LOCK() instead of the
> KERNEL_LOCK().
> 

pipex(4) is simultaneously locked by NET_LOCK() and KERNEL_LOCK() but
with two exceptions:

1. As you pointed pipex_pppoe_input() called without KERNEL_LOCK() held.
2. pppac_start() called without NET_LOCK() held. Or with NET_LOCK()
   held. It depends on `if_snd' usage.

Diff below enforces pppac_start() to be called with NET_LOCK() held.
Also all externally called pipex(4) input and output routines have
NET_ASSERT_LOCKED() assertion.

Now pipex(4) is fully protected by NET_LOCK() so description of struct
members chenget too.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.90
diff -u -p -r1.90 if_pppx.c
--- sys/net/if_pppx.c   24 Jun 2020 08:52:53 -  1.90
+++ sys/net/if_pppx.c   6 Jul 2020 11:10:17 -
@@ -1117,6 +1117,8 @@ pppacopen(dev_t dev, int flags, int mode
ifp->if_output = pppac_output;
ifp->if_start = pppac_start;
ifp->if_ioctl = pppac_ioctl;
+   /* XXXSMP: be sure pppac_start() called under NET_LOCK() */
+   IFQ_SET_MAXLEN(>if_snd, 1);
 
if_counters_alloc(ifp);
if_attach(ifp);
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.117
diff -u -p -r1.117 pipex.c
--- sys/net/pipex.c 30 Jun 2020 14:05:13 -  1.117
+++ sys/net/pipex.c 6 Jul 2020 11:10:17 -
@@ -869,6 +869,7 @@ pipex_output(struct mbuf *m0, int af, in
struct ip ip;
struct mbuf *mret;
 
+   NET_ASSERT_LOCKED();
session = NULL;
mret = NULL;
switch (af) {
@@ -962,6 +963,8 @@ pipex_ppp_output(struct mbuf *m0, struct
 {
u_char *cp, hdr[16];
 
+   NET_ASSERT_LOCKED();
+
 #ifdef PIPEX_MPPE
if (pipex_session_is_mppe_enabled(session)) {
if (proto == PPP_IP) {
@@ -1355,6 +1358,7 @@ pipex_pppoe_input(struct mbuf *m0, struc
int hlen;
struct pipex_pppoe_header pppoe;
 
+   NET_ASSERT_LOCKED();
/* already checked at pipex_pppoe_lookup_session */
KASSERT(m0->m_pkthdr.len >= (sizeof(struct ether_header) +
sizeof(pppoe)));
@@ -1586,6 +1590,7 @@ pipex_pptp_input(struct mbuf *m0, struct
struct pipex_pptp_session *pptp_session;
int rewind = 0;
 
+   NET_ASSERT_LO

Re: fix races in if_clone_create()

2020-07-06 Thread Vitaliy Makkoveev



> On 6 Jul 2020, at 12:17, Martin Pieuchot  wrote:
> 
> On 01/07/20(Wed) 00:02, Vitaliy Makkoveev wrote:
>> On Tue, Jun 30, 2020 at 03:48:22PM +0300, Vitaliy Makkoveev wrote:
>>> On Tue, Jun 30, 2020 at 12:08:03PM +0200, Martin Pieuchot wrote:
>>>> On 29/06/20(Mon) 11:59, Vitaliy Makkoveev wrote:
>>>>> [...] 
>>>>> I reworked tool for reproduce. Now I avoided fork()/exec() route and it
>>>>> takes couple of minutes to take panic on 4 cores. Also some screenshots
>>>>> attached.
>>>> 
>>>> Setting kern.pool_debug=2 makes the race reproducible in seconds.
>> 
>> Unfortunately you will catch splassert() caused by kern/sched_bsd.c:304.
>> malloc() will call yield() while we are holding NET_LOCK(). I attached
>> screenshot with splassertion to this mail.
> 
> With kern.splassert < 3 it is fine. 
> 
>>>> Could you turn this test into something committable in regress/?  We can
>>>> link it to the build once a fix is committed.
>>>> 
>>> 
>>> We have 3 races with cloned interfaces:
>>> 1. if_clone_create() vs if_clone_create()
>>> 2. if_clone_destroy() vs if_clone_destroy()
>>> 3. if_clone_destroy() vs the rest of stack
>>> 
>>> It makes sences to commit unified test to regress/, so I suggest to wait
>>> a little.
>> 
>> The another solution.
>> 
>> Diff below introduces per-`ifc' serialization for if_clone_create() and
>> if_clone_destroy(). There is no index bitmap anymore.
> 
> I like the simplification.  More comments below:
> 
>> +/*
>> + * Lock a clone network interface.
>> + */
>> +int
>> +if_clone_lock(struct if_clone *ifc)
>> +{
>> +int error;
>> +
>> +rw_enter_write(>ifc_lock);
>> +
>> +while (ifc->ifc_flags & IFC_CREATE_LOCKED) {
>> +ifc->ifc_flags |= IFC_CREATE_LOCKWAIT;
>> +error = rwsleep_nsec(>ifc_flags, >ifc_lock,
>> +PWAIT|PCATCH, "ifclk", INFSLP);
>> +if(error != 0) {
>> +ifc->ifc_flags &= ~IFC_CREATE_LOCKWAIT;
>> +rw_exit_write(>ifc_lock);
>> +return error;
>> +}
>> +}
>> +ifc->ifc_flags |= IFC_CREATE_LOCKED;
>> +ifc->ifc_flags &= ~IFC_CREATE_LOCKWAIT;
>> +
>> +rw_exit_write(>ifc_lock);
>> +
>> +return 0;
>> +}
> 
> This is like re-implementing a rwlock but loosing the debugging ability of
> WITNESS.

The reason to do this is to avoid call `ifc_create’ with rwlock held.
We have unique sleep points for each underlaying routine for `ifc_create’,
so this "rwlock reimplementation" looks better. Also this lock is used in
one place only and impact of loosing debugging ability is not such big.

> 
> I also don't see any reason for having a per-ifc lock.  If, at least one
> of the problems, is a double insert in `ifnet' then we should be able to
> assert that a lock is held when doing such assertion.

This race breaks ifunit() not `if_list’. I mean LIST_*() operation are
not broken because `le_{prev,next}’ are valid, but list is inconsistent of 
course.

Since only "ifconfig clonerA0 create& ifconfig clonerA0 create” will
break, I see no reason to deny simultaneous execution of
“ifconfig clonerA0 create& ifconfig clonerB0 create”.

Let this lock be per `ifc’ not global.

> 
> Assertions and documentation are more important than preventing races
> because they allow to build awareness and elegant solutions instead of
> hacking diffs until stuff work without knowing why.
> 
> There are two cases where `ifp' are inserted into `ifnet':
> 1. by autoconf during boot or hotplug
> 2. by cloning ioctl
> 
> In the second case it is always about pseudo-devices.  So the assertion
> should be conditional like:
> 
>   if (ISSET(ifp->if_xflags, IFXF_CLONED))
>   rw_assert_wrlock(_lock);
> 
> In other words this fixes serializes insertions/removal on the global
> list `ifnet', the KERNEL_LOCK() being still required for reading it.
> 
> Is there any other data structure which ends up being protected by this
> approach and could be documented?

We should be sure there is no multiple `ifnet’s in `if_list’ with the same
`if_xname’. And the assertion you proposed looks not obvious here.
Assertion like below looks more reasonable but introduces performance
impact.

 cut begin 
void
if_attach(struct ifnet *ifp)
{
if_attach_common(ifp);
NET_LOCK();
KASSERT(ifunit(ifp->if_xname) == NULL);
TAILQ_INSERT_TAIL(, ifp, if_list);
if_attachsetup(ifp);
NET_UNLOCK();
}
 cut end 

I guess the commentary within if_clone_create() is the best solution.
Something like this: “Deny simultaneous execution to prevent multiple
creation of interfaces with the same name”.


Re: pipex(4): kill pipexintr()

2020-07-03 Thread Vitaliy Makkoveev
ping?

> On 1 Jul 2020, at 22:42, Vitaliy Makkoveev  wrote:
> 
> pipex(4) has 2 mbuf queues: `pipexinq' and `pipexoutq'. When mbuf passed
> to pipex it goes to one of these queues and pipexintr() will be
> scheduled to process them. pipexintr() called from `netisr' context.
> 
> It's true for pppac(4) but for pppx(4) only incoming mbufs go to
> `pipexinq. Outgoing mbufs go directly to stack. pppx(4) enabled in
> npppd.conf(5) by default so I guess it's the common case of pipex(4)
> usage.
> 
> The code looks like there is no requirements to this delayed mbufs
> processing, we can pass it directly to stack as we do for pppx(4)
> outgoing traffic.
> 
> Also we have some troubles with pipexintr() as it was described in [1].
> It's protection of `ph_cookie'. We don't this protection this time and
> we can't because we should brake if_get(9) logic.
> 
> Diff below removes pipexintr(). Now all mbufs passed directly without
> enqueueing within pipex(4). We also can destroy sessions safe in all
> cases. We also can use if_get(9) instead using unreferenced pointers to
> `ifnet' within pipex(4). We also avoided context switch while we
> processing mbufs within pipex(4). We decreased latency.
> 
> I'm seeding debian torrents with this diff an all goes well.
> 
> 1. https://marc.info/?t=15930080902=1=2
> 
> Index: lib/libc/sys/sysctl.2
> ===
> RCS file: /cvs/src/lib/libc/sys/sysctl.2,v
> retrieving revision 1.40
> diff -u -p -r1.40 sysctl.2
> --- lib/libc/sys/sysctl.2 17 May 2020 05:48:39 -  1.40
> +++ lib/libc/sys/sysctl.2 1 Jul 2020 19:20:22 -
> @@ -2033,35 +2033,11 @@ The currently defined variable names are
> .Bl -column "Third level name" "integer" "Changeable" -offset indent
> .It Sy "Third level name" Ta Sy "Type" Ta Sy "Changeable"
> .It Dv PIPEXCTL_ENABLE Ta integer Ta yes
> -.It Dv PIPEXCTL_INQ Ta node Ta not applicable
> -.It Dv PIPEXCTL_OUTQ Ta node Ta not applicable
> .El
> .Bl -tag -width "123456"
> .It Dv PIPEXCTL_ENABLE
> If set to 1, enable PIPEX processing.
> The default is 0.
> -.It Dv PIPEXCTL_INQ Pq Va net.pipex.inq
> -Fourth level comprises an array of
> -.Vt struct ifqueue
> -structures containing information about the PIPEX packet input queue.
> -The forth level names for the elements of
> -.Vt struct ifqueue
> -are the same as described in
> -.Li ip.arpq
> -in the
> -.Dv PF_INET
> -section.
> -.It Dv PIPEXCTL_OUTQ Pq Va net.pipex.outq
> -Fourth level comprises an array of
> -.Vt struct ifqueue
> -structures containing information about PIPEX packet output queue.
> -The forth level names for the elements of
> -.Vt struct ifqueue
> -are the same as described in
> -.Li ip.arpq
> -in the
> -.Dv PF_INET
> -section.
> .El
> .El
> .Ss CTL_VFS
> Index: sys/net/if.c
> ===
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.611
> diff -u -p -r1.611 if.c
> --- sys/net/if.c  30 Jun 2020 09:31:38 -  1.611
> +++ sys/net/if.c  1 Jul 2020 19:20:27 -
> @@ -1012,13 +1012,6 @@ if_netisr(void *unused)
>   KERNEL_UNLOCK();
>   }
> #endif
> -#ifdef PIPEX
> - if (n & (1 << NETISR_PIPEX)) {
> - KERNEL_LOCK();
> - pipexintr();
> - KERNEL_UNLOCK();
> - }
> -#endif
>   t |= n;
>   }
> 
> Index: sys/net/netisr.h
> ===
> RCS file: /cvs/src/sys/net/netisr.h,v
> retrieving revision 1.51
> diff -u -p -r1.51 netisr.h
> --- sys/net/netisr.h  6 Aug 2019 22:57:54 -   1.51
> +++ sys/net/netisr.h  1 Jul 2020 19:20:27 -
> @@ -48,7 +48,6 @@
> #define   NETISR_IPV6 24  /* same as AF_INET6 */
> #define   NETISR_ISDN 26  /* same as AF_E164 */
> #define   NETISR_PPP  28  /* for PPP processing */
> -#define  NETISR_PIPEX27  /* for pipex processing */
> #define   NETISR_BRIDGE   29  /* for bridge processing */
> #define   NETISR_PPPOE30  /* for pppoe processing */
> #define   NETISR_SWITCH   31  /* for switch dataplane */
> @@ -68,7 +67,6 @@ voidbridgeintr(void);
> void  pppoeintr(void);
> void  switchintr(void);
> void  pfsyncintr(void);
> -void pipexintr(void);
> 
> #define   schednetisr(anisr)  
> \
> do { 

Re: fix races in if_clone_create()

2020-07-03 Thread Vitaliy Makkoveev
ping?

> On 1 Jul 2020, at 00:02, Vitaliy Makkoveev  wrote:
> 
> On Tue, Jun 30, 2020 at 03:48:22PM +0300, Vitaliy Makkoveev wrote:
>> On Tue, Jun 30, 2020 at 12:08:03PM +0200, Martin Pieuchot wrote:
>>> On 29/06/20(Mon) 11:59, Vitaliy Makkoveev wrote:
>>>> [...] 
>>>> I reworked tool for reproduce. Now I avoided fork()/exec() route and it
>>>> takes couple of minutes to take panic on 4 cores. Also some screenshots
>>>> attached.
>>> 
>>> Setting kern.pool_debug=2 makes the race reproducible in seconds.
> 
> Unfortunately you will catch splassert() caused by kern/sched_bsd.c:304.
> malloc() will call yield() while we are holding NET_LOCK(). I attached
> screenshot with splassertion to this mail.
> 
>>> 
>>> Could you turn this test into something committable in regress/?  We can
>>> link it to the build once a fix is committed.
>>> 
>> 
>> We have 3 races with cloned interfaces:
>> 1. if_clone_create() vs if_clone_create()
>> 2. if_clone_destroy() vs if_clone_destroy()
>> 3. if_clone_destroy() vs the rest of stack
>> 
>> It makes sences to commit unified test to regress/, so I suggest to wait
>> a little.
> 
> The another solution.
> 
> Diff below introduces per-`ifc' serialization for if_clone_create() and
> if_clone_destroy(). There is no index bitmap anymore.
> 
> Diff fixes the following races:
> 1. if_clone_create() vs if_clone_create()
> 2. if_clone_destroy() vs if_clone_destroy()
> 
> `ifc_create' will go the same lock path for each cloner, and
> `ifc_destroy' will go this path but in reverse order. It seems
> reasonable to allow simultaneous create/destroy for different cloners
> but since different instances of one cloner will block each other it's
> no reason have parallelism here.
> 
> Updated test tool
>  cut begin 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> 
> static struct ifreq ifr;
> 
> static void *clone_create(void *arg)
> {
>   int s;
> 
>   if((s=socket(AF_INET, SOCK_DGRAM, 0))<0)
>   err(1, "socket()");
>   while(1){
>   if(ioctl(s, SIOCIFCREATE, )<0)
>   if(errno==EINVAL)
>   exit(1);
>   }
> 
>   return NULL;
> }
> 
> static void *clone_destroy(void *arg)
> {
>   int s;
> 
>   if((s=socket(AF_INET, SOCK_DGRAM, 0))<0)
>   err(1, "socket()");
>   while(1){
>   if(ioctl(s, SIOCIFDESTROY, )<0)
>   if(errno==EINVAL)
>   exit(1);
>   }
> 
>   return NULL;
> }
> 
> int main(int argc, char *argv[])
> {
>   pthread_t thr;
>   int i;
> 
>   if(argc!=2){
>   fprintf(stderr, "usage: %s ifname\n", getprogname());
>   return 1;
>   }
> 
>   if(getuid()!=0){
>   fprintf(stderr, "should be root\n");
>   return 1;
>   }
> 
>   memset(, 0, sizeof(ifr));
>   strlcpy(ifr.ifr_name, argv[1], sizeof(ifr.ifr_name));
> 
>   for(i=0; i<8*4; ++i){
>   if(pthread_create(, NULL, clone_create, NULL)!=0)
>   errx(1, "pthread_create(clone_create)");
>   if(pthread_create(, NULL, clone_destroy, NULL)!=0)
>   errx(1, "pthread_create(clone_destroy)");
>   }
> 
>   select(0, NULL, NULL, NULL, NULL);
> 
>   return 0;
> }
>  cut end 
> 
> 
> 
> Index: sys/net/if.c
> ===
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.611
> diff -u -p -r1.611 if.c
> --- sys/net/if.c  30 Jun 2020 09:31:38 -  1.611
> +++ sys/net/if.c  30 Jun 2020 20:41:50 -
> @@ -155,6 +155,8 @@ int   if_getgrouplist(caddr_t);
> void  if_linkstate(struct ifnet *);
> void  if_linkstate_task(void *);
> 
> +int  if_clone_lock(struct if_clone *);
> +void if_clone_unlock(struct if_clone *);
> int   if_clone_list(struct if_clonereq *);
> struct if_clone   *if_clone_lookup(const char *, int *);
> 
> @@ -1244,27 +1246,35 @@ if_clone_create(const char *name, int rd
> {
>   struct if_clone *ifc;
>   struct ifnet *ifp;
> - int unit, ret;
> + int unit, error;
> 
>   ifc = if_clone_lookup(name, );
>   if (ifc == NULL)
>   return (EINVAL);
> 
> - if (ifunit(name) 

pipex(4): kill pipexintr()

2020-07-01 Thread Vitaliy Makkoveev
pipex(4) has 2 mbuf queues: `pipexinq' and `pipexoutq'. When mbuf passed
to pipex it goes to one of these queues and pipexintr() will be
scheduled to process them. pipexintr() called from `netisr' context.

It's true for pppac(4) but for pppx(4) only incoming mbufs go to
`pipexinq. Outgoing mbufs go directly to stack. pppx(4) enabled in
npppd.conf(5) by default so I guess it's the common case of pipex(4)
usage.

The code looks like there is no requirements to this delayed mbufs
processing, we can pass it directly to stack as we do for pppx(4)
outgoing traffic.

Also we have some troubles with pipexintr() as it was described in [1].
It's protection of `ph_cookie'. We don't this protection this time and
we can't because we should brake if_get(9) logic.

Diff below removes pipexintr(). Now all mbufs passed directly without
enqueueing within pipex(4). We also can destroy sessions safe in all
cases. We also can use if_get(9) instead using unreferenced pointers to
`ifnet' within pipex(4). We also avoided context switch while we
processing mbufs within pipex(4). We decreased latency.

I'm seeding debian torrents with this diff an all goes well.

1. https://marc.info/?t=15930080902=1=2

Index: lib/libc/sys/sysctl.2
===
RCS file: /cvs/src/lib/libc/sys/sysctl.2,v
retrieving revision 1.40
diff -u -p -r1.40 sysctl.2
--- lib/libc/sys/sysctl.2   17 May 2020 05:48:39 -  1.40
+++ lib/libc/sys/sysctl.2   1 Jul 2020 19:20:22 -
@@ -2033,35 +2033,11 @@ The currently defined variable names are
 .Bl -column "Third level name" "integer" "Changeable" -offset indent
 .It Sy "Third level name" Ta Sy "Type" Ta Sy "Changeable"
 .It Dv PIPEXCTL_ENABLE Ta integer Ta yes
-.It Dv PIPEXCTL_INQ Ta node Ta not applicable
-.It Dv PIPEXCTL_OUTQ Ta node Ta not applicable
 .El
 .Bl -tag -width "123456"
 .It Dv PIPEXCTL_ENABLE
 If set to 1, enable PIPEX processing.
 The default is 0.
-.It Dv PIPEXCTL_INQ Pq Va net.pipex.inq
-Fourth level comprises an array of
-.Vt struct ifqueue
-structures containing information about the PIPEX packet input queue.
-The forth level names for the elements of
-.Vt struct ifqueue
-are the same as described in
-.Li ip.arpq
-in the
-.Dv PF_INET
-section.
-.It Dv PIPEXCTL_OUTQ Pq Va net.pipex.outq
-Fourth level comprises an array of
-.Vt struct ifqueue
-structures containing information about PIPEX packet output queue.
-The forth level names for the elements of
-.Vt struct ifqueue
-are the same as described in
-.Li ip.arpq
-in the
-.Dv PF_INET
-section.
 .El
 .El
 .Ss CTL_VFS
Index: sys/net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.611
diff -u -p -r1.611 if.c
--- sys/net/if.c30 Jun 2020 09:31:38 -  1.611
+++ sys/net/if.c1 Jul 2020 19:20:27 -
@@ -1012,13 +1012,6 @@ if_netisr(void *unused)
KERNEL_UNLOCK();
}
 #endif
-#ifdef PIPEX
-   if (n & (1 << NETISR_PIPEX)) {
-   KERNEL_LOCK();
-   pipexintr();
-   KERNEL_UNLOCK();
-   }
-#endif
t |= n;
}
 
Index: sys/net/netisr.h
===
RCS file: /cvs/src/sys/net/netisr.h,v
retrieving revision 1.51
diff -u -p -r1.51 netisr.h
--- sys/net/netisr.h6 Aug 2019 22:57:54 -   1.51
+++ sys/net/netisr.h1 Jul 2020 19:20:27 -
@@ -48,7 +48,6 @@
 #defineNETISR_IPV6 24  /* same as AF_INET6 */
 #defineNETISR_ISDN 26  /* same as AF_E164 */
 #defineNETISR_PPP  28  /* for PPP processing */
-#defineNETISR_PIPEX27  /* for pipex processing */
 #defineNETISR_BRIDGE   29  /* for bridge processing */
 #defineNETISR_PPPOE30  /* for pppoe processing */
 #defineNETISR_SWITCH   31  /* for switch dataplane */
@@ -68,7 +67,6 @@ void  bridgeintr(void);
 void   pppoeintr(void);
 void   switchintr(void);
 void   pfsyncintr(void);
-void   pipexintr(void);
 
 #defineschednetisr(anisr)  
\
 do {   \
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.117
diff -u -p -r1.117 pipex.c
--- sys/net/pipex.c 30 Jun 2020 14:05:13 -  1.117
+++ sys/net/pipex.c 1 Jul 2020 19:20:28 -
@@ -97,10 +97,6 @@ struct radix_node_head   *pipex_rd_head6 =
 struct timeout pipex_timer_ch; /* callout timer context */
 int pipex_prune = 1;   /* walk list every seconds */
 
-/* pipex traffic queue */
-struct mbuf_queue pipexinq = MBUF_QUEUE_INITIALIZER(IFQ_MAXLEN, IPL_NET);
-struct mbuf_queue pipexoutq = 

Re: fix races in if_clone_create()

2020-06-30 Thread Vitaliy Makkoveev
On Tue, Jun 30, 2020 at 03:48:22PM +0300, Vitaliy Makkoveev wrote:
> On Tue, Jun 30, 2020 at 12:08:03PM +0200, Martin Pieuchot wrote:
> > On 29/06/20(Mon) 11:59, Vitaliy Makkoveev wrote:
> > > [...] 
> > > I reworked tool for reproduce. Now I avoided fork()/exec() route and it
> > > takes couple of minutes to take panic on 4 cores. Also some screenshots
> > > attached.
> > 
> > Setting kern.pool_debug=2 makes the race reproducible in seconds.

Unfortunately you will catch splassert() caused by kern/sched_bsd.c:304.
malloc() will call yield() while we are holding NET_LOCK(). I attached
screenshot with splassertion to this mail.

> > 
> > Could you turn this test into something committable in regress/?  We can
> > link it to the build once a fix is committed.
> > 
> 
> We have 3 races with cloned interfaces:
> 1. if_clone_create() vs if_clone_create()
> 2. if_clone_destroy() vs if_clone_destroy()
> 3. if_clone_destroy() vs the rest of stack
> 
> It makes sences to commit unified test to regress/, so I suggest to wait
> a little.

The another solution.

Diff below introduces per-`ifc' serialization for if_clone_create() and
if_clone_destroy(). There is no index bitmap anymore.

Diff fixes the following races:
1. if_clone_create() vs if_clone_create()
2. if_clone_destroy() vs if_clone_destroy()

`ifc_create' will go the same lock path for each cloner, and
`ifc_destroy' will go this path but in reverse order. It seems
reasonable to allow simultaneous create/destroy for different cloners
but since different instances of one cloner will block each other it's
no reason have parallelism here.

Updated test tool
 cut begin 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

static struct ifreq ifr;

static void *clone_create(void *arg)
{
int s;

if((s=socket(AF_INET, SOCK_DGRAM, 0))<0)
err(1, "socket()");
while(1){
if(ioctl(s, SIOCIFCREATE, )<0)
if(errno==EINVAL)
exit(1);
}

return NULL;
}

static void *clone_destroy(void *arg)
{
int s;

if((s=socket(AF_INET, SOCK_DGRAM, 0))<0)
err(1, "socket()");
while(1){
if(ioctl(s, SIOCIFDESTROY, )<0)
if(errno==EINVAL)
exit(1);
}

return NULL;
}

int main(int argc, char *argv[])
{
pthread_t thr;
int i;

if(argc!=2){
fprintf(stderr, "usage: %s ifname\n", getprogname());
return 1;
}

if(getuid()!=0){
fprintf(stderr, "should be root\n");
return 1;
}

memset(, 0, sizeof(ifr));
strlcpy(ifr.ifr_name, argv[1], sizeof(ifr.ifr_name));

for(i=0; i<8*4; ++i){
if(pthread_create(, NULL, clone_create, NULL)!=0)
errx(1, "pthread_create(clone_create)");
if(pthread_create(, NULL, clone_destroy, NULL)!=0)
errx(1, "pthread_create(clone_destroy)");
}

select(0, NULL, NULL, NULL, NULL);

return 0;
}
 cut end 



Index: sys/net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.611
diff -u -p -r1.611 if.c
--- sys/net/if.c30 Jun 2020 09:31:38 -  1.611
+++ sys/net/if.c30 Jun 2020 20:41:50 -
@@ -155,6 +155,8 @@ int if_getgrouplist(caddr_t);
 void   if_linkstate(struct ifnet *);
 void   if_linkstate_task(void *);
 
+intif_clone_lock(struct if_clone *);
+void   if_clone_unlock(struct if_clone *);
 intif_clone_list(struct if_clonereq *);
 struct if_clone*if_clone_lookup(const char *, int *);
 
@@ -1244,27 +1246,35 @@ if_clone_create(const char *name, int rd
 {
struct if_clone *ifc;
struct ifnet *ifp;
-   int unit, ret;
+   int unit, error;
 
ifc = if_clone_lookup(name, );
if (ifc == NULL)
return (EINVAL);
 
-   if (ifunit(name) != NULL)
-   return (EEXIST);
+   error = if_clone_lock(ifc);
+   if (error != 0)
+   return (error);
+
+   if (ifunit(name) != NULL) {
+   error = (EEXIST);
+   goto unlock;
+   }
 
-   ret = (*ifc->ifc_create)(ifc, unit);
+   error = (*ifc->ifc_create)(ifc, unit);
 
-   if (ret != 0 || (ifp = ifunit(name)) == NULL)
-   return (ret);
+   if (error != 0 || (ifp = ifunit(name)) == NULL)
+   goto unlock;
 
NET_LOCK();
if_addgroup(ifp, ifc->ifc_name);
if (rdomain != 0)
if_setrdomain(ifp, rdomain);
NET_UNLOCK(

pipex(4): kill unused declaration

2020-06-30 Thread Vitaliy Makkoveev
`udpcksum' declared but not used in net/pipex.c, so kill it

Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.116
diff -u -p -r1.116 pipex.c
--- sys/net/pipex.c 22 Jun 2020 09:38:15 -  1.116
+++ sys/net/pipex.c 30 Jun 2020 13:28:48 -
@@ -104,9 +104,6 @@ struct mbuf_queue pipexoutq = MBUF_QUEUE
 /* borrow an mbuf pkthdr field */
 #define ph_ppp_proto ether_vtag
 
-/* from udp_usrreq.c */
-extern int udpcksum;
-
 #ifdef PIPEX_DEBUG
 int pipex_debug = 0;   /* systcl net.inet.ip.pipex_debug */
 #endif



Re: fix races in if_clone_create()

2020-06-30 Thread Vitaliy Makkoveev
On Tue, Jun 30, 2020 at 12:08:03PM +0200, Martin Pieuchot wrote:
> On 29/06/20(Mon) 11:59, Vitaliy Makkoveev wrote:
> > [...] 
> > I reworked tool for reproduce. Now I avoided fork()/exec() route and it
> > takes couple of minutes to take panic on 4 cores. Also some screenshots
> > attached.
> 
> Setting kern.pool_debug=2 makes the race reproducible in seconds.
> 
> Could you turn this test into something committable in regress/?  We can
> link it to the build once a fix is committed.
> 

We have 3 races with cloned interfaces:
1. if_clone_create() vs if_clone_create()
2. if_clone_destroy() vs if_clone_destroy()
3. if_clone_destroy() vs the rest of stack

It makes sences to commit unified test to regress/, so I suggest to wait
a little.

> > #include 
> > #include 
> > #include 
> > #include 
> > #include 
> > #include 
> > #include 
> > #include 
> > #include 
> > #include 
> > 
> > static struct ifreq ifr;
> > 
> > static void *clone_create(void *arg)
> > {
> > int s;
> > 
> > if((s=socket(AF_INET, SOCK_DGRAM, 0))<0)
> > err(1, "socket()");
> > while(1){
> > if(ioctl(s, SIOCIFCREATE, )<0)
> > if(errno==EINVAL)
> > exit(1);
> > }
> > 
> > return NULL;
> > }
> > 
> > static void *clone_destroy(void *arg)
> > {
> > int s;
> > 
> > if((s=socket(AF_INET, SOCK_DGRAM, 0))<0)
> > err(1, "socket()");
> > while(1){
> > if(ioctl(s, SIOCIFDESTROY, )<0)
> > if(errno==EINVAL)
> > exit(1);
> > }
> > 
> > return NULL;
> > }
> > 
> > int main(int argc, char *argv[])
> > {
> > pthread_t thr;
> > int i;
> > 
> > if(argc!=2){
> > fprintf(stderr, "usage: %s ifname\n", getprogname());
> > return 1;
> > }
> > 
> > if(getuid()!=0){
> > fprintf(stderr, "should be root\n");
> > return 1;
> > }
> > 
> > memset(, 0, sizeof(ifr));
> > strlcpy(ifr.ifr_name, argv[1], sizeof(ifr.ifr_name));
> > 
> > for(i=0; i<8*4; ++i){
> > if(pthread_create(, NULL, clone_create, NULL)!=0)
> > errx(1, "pthread_create(clone_create)");
> > }
> > 
> > clone_destroy(NULL);
> > 
> > return 0;
> > }
> > 
> >  cut end 
> 
> 
> 
> 
> 



Re: if_delgroup(): Add size to free(9) call

2020-06-30 Thread Vitaliy Makkoveev
On Tue, Jun 30, 2020 at 04:11:59AM +0200, Klemens Nanni wrote:
> Interface groups are allocated as follows:
> 
>   struct ifg_group *
>   if_creategroup(const char *groupname)
>   {
>   struct ifg_group*ifg;
> 
>   if ((ifg = malloc(sizeof(*ifg), M_TEMP, M_NOWAIT)) == NULL)
>   return (NULL);
> 
>   ...
>   }
> 
> Since this allocation per group does not change, we can use the same
> size when freeing it in if_delgroup() accordingly.
> 
> Tested on sparc64.
> 
> Feedback? OK?
> 

OK mvs



Re: fix races in if_clone_create()

2020-06-29 Thread Vitaliy Makkoveev
On Mon, Jun 29, 2020 at 04:27:50PM +0200, Hrvoje Popovski wrote:
> On 29.6.2020. 10:59, Vitaliy Makkoveev wrote:
> > I reworked tool for reproduce. Now I avoided fork()/exec() route and it
> > takes couple of minutes to take panic on 4 cores. Also some screenshots
> > attached.
> > 
> > I hope anyone else will try it.
> 
> Hi,
> 
> i'm getting panic quite fast :)
> i will leave box in ddb if more information is needed
>

Thanks. Right now it takes seconds to catch panic at least with switch(4),
bridge(4), pflog(4), vether(4) and etherip(4). So you can leave ddb(4).

I like to someone will try my solution for this issue. And reviews are
welcomed :)

The latest diff below. If the `unit' was obtained it's guaranteed that
there is no pseudo interface with `name' is system. ifunit() now useless
here and can be dropped.


Index: sys/net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.610
diff -u -p -r1.610 if.c
--- sys/net/if.c22 Jun 2020 09:45:13 -  1.610
+++ sys/net/if.c29 Jun 2020 13:54:29 -
@@ -157,6 +157,8 @@ voidif_linkstate_task(void *);
 
 intif_clone_list(struct if_clonereq *);
 struct if_clone*if_clone_lookup(const char *, int *);
+intif_clone_alloc_unit(struct if_clone *, int);
+void   if_clone_rele_unit(struct if_clone *, int);
 
 intif_group_egress_build(void);
 
@@ -1244,19 +1246,18 @@ if_clone_create(const char *name, int rd
 {
struct if_clone *ifc;
struct ifnet *ifp;
-   int unit, ret;
+   int unit, error;
 
ifc = if_clone_lookup(name, );
if (ifc == NULL)
return (EINVAL);
+   error = if_clone_alloc_unit(ifc, unit);
+   if (error != 0)
+   return (error);
 
-   if (ifunit(name) != NULL)
-   return (EEXIST);
-
-   ret = (*ifc->ifc_create)(ifc, unit);
-
-   if (ret != 0 || (ifp = ifunit(name)) == NULL)
-   return (ret);
+   error = (*ifc->ifc_create)(ifc, unit);
+   if (error != 0 || (ifp = ifunit(name)) == NULL)
+   return (error);
 
NET_LOCK();
if_addgroup(ifp, ifc->ifc_name);
@@ -1264,7 +1265,7 @@ if_clone_create(const char *name, int rd
if_setrdomain(ifp, rdomain);
NET_UNLOCK();
 
-   return (ret);
+   return (0);
 }
 
 /*
@@ -1275,9 +1276,9 @@ if_clone_destroy(const char *name)
 {
struct if_clone *ifc;
struct ifnet *ifp;
-   int ret;
+   int ret, unit;
 
-   ifc = if_clone_lookup(name, NULL);
+   ifc = if_clone_lookup(name, );
if (ifc == NULL)
return (EINVAL);
 
@@ -1297,6 +1298,7 @@ if_clone_destroy(const char *name)
}
NET_UNLOCK();
ret = (*ifc->ifc_destroy)(ifp);
+   if_clone_rele_unit(ifc, unit);
 
return (ret);
 }
@@ -1342,12 +1344,95 @@ if_clone_lookup(const char *name, int *u
unit = (unit * 10) + (*cp++ - '0');
}
 
-   if (unitp != NULL)
-   *unitp = unit;
+   *unitp = unit;
return (ifc);
 }
 
 /*
+ * Allocate unit for cloned network interface.
+ */
+int if_clone_alloc_unit(struct if_clone *ifc, int unit)
+{
+   int word, bit, ret;
+
+   word = unit / (sizeof(*ifc->ifc_map) * 8);
+   bit = unit % (sizeof(*ifc->ifc_map) * 8);
+
+   rw_enter_write(>ifc_lock);
+
+   if(word >= ifc->ifc_map_size) {
+   u_long *map;
+   int size;
+
+   size = word + 1;
+   map = mallocarray(size, sizeof(*map), M_TEMP, M_WAITOK |
+   M_ZERO);
+
+   if (ifc->ifc_map != NULL) {
+   memcpy(map, ifc->ifc_map, ifc->ifc_map_size);
+   free(ifc->ifc_map, M_TEMP,
+   ifc->ifc_map_size * sizeof(*map));
+   }
+
+   ifc->ifc_map = map;
+   ifc->ifc_map_size = size;
+   }
+
+   if (ifc->ifc_map[word] & (1UL << bit))
+   ret = EEXIST;
+   else {
+   ifc->ifc_map[word] |= (1UL << bit);
+   ret = 0;
+   }
+
+   rw_exit_write(>ifc_lock);
+
+   return ret;
+}
+
+/*
+ * Release allocated unit for cloned network interface.
+ */
+void if_clone_rele_unit(struct if_clone *ifc, int unit)
+{
+   int word, bit;
+
+   word = unit / (sizeof(*ifc->ifc_map) * 8);
+   bit = unit % (sizeof(*ifc->ifc_map) * 8);
+
+   rw_enter_write(>ifc_lock);
+   KASSERT(word < ifc->ifc_map_size);
+
+   ifc->ifc_map[word] &= ~(1UL << bit);
+
+   if (ifc->ifc_map[word] == 0) {
+   u_long *map;
+   int size;
+
+   size = ifc->ifc_map_size - 2;
+   while (size>=0) {
+   if (ifc->ifc_map

Re: fix races in if_clone_create()

2020-06-29 Thread Vitaliy Makkoveev
On Sat, Jun 27, 2020 at 12:10:24PM +0200, Martin Pieuchot wrote:
> On 27/06/20(Sat) 00:35, Vitaliy Makkoveev wrote:
> > On Fri, Jun 26, 2020 at 09:12:16PM +0200, Martin Pieuchot wrote:
> > > On 26/06/20(Fri) 16:56, Vitaliy Makkoveev wrote:
> > > > if_clone_create() has the races caused by context switch.
> > > 
> > > Can you share a backtrace of such race?  Where does the kernel panic?
> > >
> > 
> > This diff was inspired by thread [1]. As I explained [2] here is 3
> > issues that cause panics produced by command below:
> > 
> >  cut begin 
> > for i in 1 2 3; do while true; do ifconfig bridge0 create& \
> > ifconfig bridge0 destroy& done& done
> >  cut end 
> 
> Thanks, I couldn't reproduce it on any of the machines I tried.  Did you
> managed to reproduce it with other pseudo-devices or just with bridge0?
> 
> > My system was stable with the last diff I did for thread [1]. But since
> > this final diff [3] which include fixes for tun(4) is quick and dirty
> > and not for commit I decided to make the diff to fix the races caused by
> > if_clone_create() at first.
> > 
> > I included screenshot with panic.
> 
> Thanks, interesting that the corruption happens on a list that should be
> initialized.  Does that mean the context switch on Thread 1 is happening
> before if_attach_common() is called?
> 
> You said your previous email that there's a context switch.  Do you know
> when it happens?  You could see that in ddb by looking at the backtrace
> of the other CPU.
> 
> Is the context switch leading to the race common to all pseudo-drivers
> or is it in the bridge(4) driver?
> 
> Regarding your solution, do I understand correctly that the goal is to
> serialize all if_clone_create()?  Is it really needed to remember which
> unit is being currently created or can't we just serialize all of them?
> 
> The fact that a lock is not held over the cloning operation is imho
> positive.
> 

I reworked tool for reproduce. Now I avoided fork()/exec() route and it
takes couple of minutes to take panic on 4 cores. Also some screenshots
attached.

I hope anyone else will try it.

 cut begin 

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

static struct ifreq ifr;

static void *clone_create(void *arg)
{
int s;

if((s=socket(AF_INET, SOCK_DGRAM, 0))<0)
err(1, "socket()");
while(1){
if(ioctl(s, SIOCIFCREATE, )<0)
if(errno==EINVAL)
exit(1);
}

return NULL;
}

static void *clone_destroy(void *arg)
{
int s;

if((s=socket(AF_INET, SOCK_DGRAM, 0))<0)
err(1, "socket()");
while(1){
if(ioctl(s, SIOCIFDESTROY, )<0)
if(errno==EINVAL)
exit(1);
}

return NULL;
}

int main(int argc, char *argv[])
{
pthread_t thr;
int i;

if(argc!=2){
fprintf(stderr, "usage: %s ifname\n", getprogname());
return 1;
}

if(getuid()!=0){
fprintf(stderr, "should be root\n");
return 1;
}

memset(, 0, sizeof(ifr));
strlcpy(ifr.ifr_name, argv[1], sizeof(ifr.ifr_name));

for(i=0; i<8*4; ++i){
if(pthread_create(, NULL, clone_create, NULL)!=0)
errx(1, "pthread_create(clone_create)");
}

clone_destroy(NULL);

return 0;
}

 cut end 


Re: fix races in if_clone_create()

2020-06-29 Thread Vitaliy Makkoveev
screenshot


Re: fix races in if_clone_create()

2020-06-29 Thread Vitaliy Makkoveev
On Sat, Jun 27, 2020 at 12:10:24PM +0200, Martin Pieuchot wrote:
> On 27/06/20(Sat) 00:35, Vitaliy Makkoveev wrote:
> > On Fri, Jun 26, 2020 at 09:12:16PM +0200, Martin Pieuchot wrote:
> > > On 26/06/20(Fri) 16:56, Vitaliy Makkoveev wrote:
> > > > if_clone_create() has the races caused by context switch.
> > > 
> > > Can you share a backtrace of such race?  Where does the kernel panic?
> > >
> > 
> > This diff was inspired by thread [1]. As I explained [2] here is 3
> > issues that cause panics produced by command below:
> > 
> >  cut begin 
> > for i in 1 2 3; do while true; do ifconfig bridge0 create& \
> > ifconfig bridge0 destroy& done& done
> >  cut end 
> 
> Thanks, I couldn't reproduce it on any of the machines I tried.  Did you
> managed to reproduce it with other pseudo-devices or just with bridge0?
> 
> > My system was stable with the last diff I did for thread [1]. But since
> > this final diff [3] which include fixes for tun(4) is quick and dirty
> > and not for commit I decided to make the diff to fix the races caused by
> > if_clone_create() at first.
> > 
> > I included screenshot with panic.
> 
> Thanks, interesting that the corruption happens on a list that should be
> initialized.  Does that mean the context switch on Thread 1 is happening
> before if_attach_common() is called?
> 
> You said your previous email that there's a context switch.  Do you know
> when it happens?  You could see that in ddb by looking at the backtrace
> of the other CPU.
> 
> Is the context switch leading to the race common to all pseudo-drivers
> or is it in the bridge(4) driver?
> 
> Regarding your solution, do I understand correctly that the goal is to
> serialize all if_clone_create()?  Is it really needed to remember which
> unit is being currently created or can't we just serialize all of them?
> 
> The fact that a lock is not held over the cloning operation is imho
> positive.
> 

I reworked tool for reproduce. Now I avoided fork()/exec() route and it
takes couple of minutes to take panic on 4 cores. I attached some
screenshots with panics caused by various pseudo-interfaces but my
previous mail was banned. I will try to attach them with separate mails.

I hope anyone else will try it. Now switch(4) is the bast way to
reproduce.

 cut begin 

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

static struct ifreq ifr;

static void *clone_create(void *arg)
{
int s;

if((s=socket(AF_INET, SOCK_DGRAM, 0))<0)
err(1, "socket()");
while(1){
if(ioctl(s, SIOCIFCREATE, )<0)
if(errno==EINVAL)
exit(1);
}

return NULL;
}

static void *clone_destroy(void *arg)
{
int s;

if((s=socket(AF_INET, SOCK_DGRAM, 0))<0)
err(1, "socket()");
while(1){
if(ioctl(s, SIOCIFDESTROY, )<0)
if(errno==EINVAL)
exit(1);
}

return NULL;
}

int main(int argc, char *argv[])
{
pthread_t thr;
int i;

if(argc!=2){
fprintf(stderr, "usage: %s ifname\n", getprogname());
return 1;
}

if(getuid()!=0){
fprintf(stderr, "should be root\n");
return 1;
}

memset(, 0, sizeof(ifr));
strlcpy(ifr.ifr_name, argv[1], sizeof(ifr.ifr_name));

for(i=0; i<8*4; ++i){
if(pthread_create(, NULL, clone_create, NULL)!=0)
errx(1, "pthread_create(clone_create)");
}

clone_destroy(NULL);

return 0;
}

 cut end 



  1   2   3   >