from:"Vitaliy Makkoveev"

getsock() api modification

2015-04-21 Thread Vitaliy Makkoveev

Now fd_getfile() function returns unacquired struct file instance and the
typical usage is:

struct file *fp;

if ((fp = fd_getfile(fpd, fd)) == NULL)
return ENOENT;
if (/* obtained fp is unacceptable */)
return EERROR;
FREF(fp);
/* Do something with fp */
FRELE(fp, p);

It is unacceptable on multiprocessor machine because the instance referenced
by fp can be destroyed between fd_getfile() and FREF() calls. So I want
fd_getfile() returns acquired fp. And the usage would be like this:

struct file *fp;

if ((fp = fd_getfile(fpd, fd)) == NULL)
return ENOENT;
/*
 * fp is acquired here and can't be destroyed
 */
if (/* obtained fp is unacceptable */) {
error = EEROR;
goto out;
}
/* Do something with fp */
out:
FRELE(fp, p);

After this modification, FRELE() call would be required after each successful
fd_getfile() call. FRELE() requires pointer to struct proc instance.
Now we have some wrappers around fd_getfile() which return pointer to
acquired struct file instance. The internals of this wrappers are:
1. get unacquired instance by fd_getfile() call
2. if instance in unacceptable return error
3. FREF() call for acquire it
4. return acquired instance

After fd_getfile() modification the internals would be:
1. get acquired instance by fd_getfile() call
2. if instance in unacceptable release it by FRELE() call and return error
3. return acquired instance

So, additional modifications required for fd_getfile() modification. Pointer
to struct proc instance should be passed to wrapper function too. Struct
proc has field “struct filedesc *p_fd” so “struct filedesc *” arg can be 
replaced
by “struct proc *” We have 2 wrappers now: getsock() and getvnode(). I
suggest starting with getsock().

Index: share/man/man9/file.9
===
RCS file: /cvs/src/share/man/man9/file.9,v
retrieving revision 1.12
diff -u -p -r1.12 file.9
--- share/man/man9/file.9   4 Jun 2013 19:27:06 -   1.12
+++ share/man/man9/file.9   21 Apr 2015 13:27:08 -
@@ -42,7 +42,7 @@
.Ft struct file *
.Fn fd_getfile struct filedesc *fdp int fd
.Ft int
-.Fn getsock struct filedesc *fdp int fd struct file **fpp
+.Fn getsock struct proc *p int fd struct file **fpp
.In sys/file.h
.In sys/filedesc.h
.In sys/vnode.h
@@ -74,21 +74,28 @@ recommended to make complicated kernel A
.Pp
The files are extracted from the file descriptor table using the
functions
-.Fn fd_getfile ,
-.Fn getvnode
+.Fn fd_getfile
and
-.Fn getsock .
+.Fn getvnode .
.Fn fd_getfile
performs all necessary checks to see if the file descriptor number is
within the range of file descriptor table, and if the descriptor is
valid.
-.Fn getsock
-and
.Fn getvnode
-are special cases that besides doing
+is special case that besides doing
+.Fn fd_getfile
+also checks if the descriptor is a vnode, returns the proper
+errno on error and increases the use count with
+.Fn FREF .
+.Pp
+The files are extracted from the process context using the
+function
+.Fn getsock .
+.Fn getsock
+is special case that besides doing
.Fn fd_getfile
-also check if the descriptor is a vnode or socket, return the proper
-errno on error and increase the use count with
+also checks if the descriptor is a socket, returns the proper
+errno on error and increases the use count with
.Fn FREF .
.Sh CONCURRENT ACCESS
Since multiple processes can share the same file descriptor table,
Index: sys/compat/linux/linux_socket.c
===
RCS file: /cvs/src/sys/compat/linux/linux_socket.c,v
retrieving revision 1.60
diff -u -p -r1.60 linux_socket.c
--- sys/compat/linux/linux_socket.c 30 Jan 2015 23:38:49 -  1.60
+++ sys/compat/linux/linux_socket.c 21 Apr 2015 13:28:23 -
@@ -937,7 +937,7 @@ linux_setsockopt(p, v, retval)
if ((error = copyin((caddr_t) uap, (caddr_t) lsa, sizeof lsa)))
return error;

-   if ((error = getsock(p-p_fd, lsa.s, fp)) != 0)
+   if ((error = getsock(p, lsa.s, fp)) != 0)
return error;

level = linux_to_bsd_sopt_level(lsa.level);
Index: sys/kern/subr_log.c
===
RCS file: /cvs/src/sys/kern/subr_log.c,v
retrieving revision 1.29
diff -u -p -r1.29 subr_log.c
--- sys/kern/subr_log.c 14 Mar 2015 03:38:50 -  1.29
+++ sys/kern/subr_log.c 21 Apr 2015 13:28:42 -
@@ -334,7 +334,7 @@ logioctl(dev_t dev, u_long com, caddr_t 
case LIOCSFD:
if ((error = suser(p, 0)) != 0)
return (error);
-   if ((error = getsock(p-p_fd, *(int *)data, fp)) != 0)
+   if ((error = getsock(p, *(int *)data, fp)) != 0)
return (error);
if (syslogf)

Re: getsock() api modification

2015-04-30 Thread Vitaliy Makkoveev


On 30 Apr 2015, at 12:49, Martin Pieuchot m...@openbsd.org wrote:

 Your diff makes sense but sadly it is broken, could you send a diff that
 can be applied?

Index: share/man/man9/file.9
===
RCS file: /cvs/src/share/man/man9/file.9,v
retrieving revision 1.12
diff -u -p -r1.12 file.9
--- share/man/man9/file.9   4 Jun 2013 19:27:06 -   1.12
+++ share/man/man9/file.9   30 Apr 2015 13:37:35 -
@@ -42,7 +42,7 @@
 .Ft struct file *
 .Fn fd_getfile struct filedesc *fdp int fd
 .Ft int
-.Fn getsock struct filedesc *fdp int fd struct file **fpp
+.Fn getsock struct proc *p int fd struct file **fpp
 .In sys/file.h
 .In sys/filedesc.h
 .In sys/vnode.h
@@ -74,21 +74,28 @@ recommended to make complicated kernel A
 .Pp
 The files are extracted from the file descriptor table using the
 functions
-.Fn fd_getfile ,
-.Fn getvnode
+.Fn fd_getfile
 and
-.Fn getsock .
+.Fn getvnode .
 .Fn fd_getfile
 performs all necessary checks to see if the file descriptor number is
 within the range of file descriptor table, and if the descriptor is
 valid.
-.Fn getsock
-and
 .Fn getvnode
-are special cases that besides doing
+is special case that besides doing
+.Fn fd_getfile
+also checks if the descriptor is a vnode or socket, returns the proper
+errno on error and increases the use count with
+.Fn FREF .
+.Pp
+The files are extracted from the process context using the
+function
+.Fn getsock .
+.Fn getsock
+is special case that besides doing
 .Fn fd_getfile
-also check if the descriptor is a vnode or socket, return the proper
-errno on error and increase the use count with
+also checks if the descriptor is a socket, returns the proper
+errno on error and increases the use count with
 .Fn FREF .
 .Sh CONCURRENT ACCESS
 Since multiple processes can share the same file descriptor table,
Index: sys/compat/linux/linux_socket.c
===
RCS file: /cvs/src/sys/compat/linux/linux_socket.c,v
retrieving revision 1.60
diff -u -p -r1.60 linux_socket.c
--- sys/compat/linux/linux_socket.c 30 Jan 2015 23:38:49 -  1.60
+++ sys/compat/linux/linux_socket.c 30 Apr 2015 13:37:35 -
@@ -937,7 +937,7 @@ linux_setsockopt(p, v, retval)
if ((error = copyin((caddr_t) uap, (caddr_t) lsa, sizeof lsa)))
return error;
 
-   if ((error = getsock(p-p_fd, lsa.s, fp)) != 0)
+   if ((error = getsock(p, lsa.s, fp)) != 0)
return error;

level = linux_to_bsd_sopt_level(lsa.level);
Index: sys/kern/subr_log.c
===
RCS file: /cvs/src/sys/kern/subr_log.c,v
retrieving revision 1.29
diff -u -p -r1.29 subr_log.c
--- sys/kern/subr_log.c 14 Mar 2015 03:38:50 -  1.29
+++ sys/kern/subr_log.c 30 Apr 2015 13:37:36 -
@@ -334,7 +334,7 @@ logioctl(dev_t dev, u_long com, caddr_t 
case LIOCSFD:
if ((error = suser(p, 0)) != 0)
return (error);
-   if ((error = getsock(p-p_fd, *(int *)data, fp)) != 0)
+   if ((error = getsock(p, *(int *)data, fp)) != 0)
return (error);
if (syslogf)
FRELE(syslogf, p);
Index: sys/kern/uipc_socket.c
===
RCS file: /cvs/src/sys/kern/uipc_socket.c,v
retrieving revision 1.137
diff -u -p -r1.137 uipc_socket.c
--- sys/kern/uipc_socket.c  14 Mar 2015 03:38:51 -  1.137
+++ sys/kern/uipc_socket.c  30 Apr 2015 13:37:36 -
@@ -1076,7 +1076,7 @@ sosplice(struct socket *so, int fd, off_
return (EINVAL);
 
/* Find sosp, the drain socket where data will be spliced into. */
-   if ((error = getsock(curproc-p_fd, fd, fp)) != 0)
+   if ((error = getsock(curproc, fd, fp)) != 0)
return (error);
sosp = fp-f_data;
if (sosp-so_sp == NULL)
Index: sys/kern/uipc_syscalls.c
===
RCS file: /cvs/src/sys/kern/uipc_syscalls.c,v
retrieving revision 1.100
diff -u -p -r1.100 uipc_syscalls.c
--- sys/kern/uipc_syscalls.c14 Mar 2015 03:38:51 -  1.100
+++ sys/kern/uipc_syscalls.c30 Apr 2015 13:37:36 -
@@ -120,7 +120,7 @@ sys_bind(struct proc *p, void *v, regist
struct mbuf *nam;
int error;
 
-   if ((error = getsock(p-p_fd, SCARG(uap, s), fp)) != 0)
+   if ((error = getsock(p, SCARG(uap, s), fp)) != 0)
return (error);
error = sockargs(nam, SCARG(uap, name), SCARG(uap, namelen),
MT_SONAME);
@@ -147,7 +147,7 @@ sys_listen(struct proc *p, void *v, regi
struct file *fp;
int error;
 
-   if ((error = getsock(p-p_fd, SCARG(uap, s), fp)) != 0)
+   if ((error = getsock(p, SCARG(uap, s), fp)) != 0)
return (error);
error = solisten(fp-f_data,

Re: getsock() api modification

2015-04-30 Thread Vitaliy Makkoveev

Mail.app removes spaces from line start :(

File reference count anomaly research and fixup

2015-05-06 Thread Vitaliy Makkoveev

Struct file has reference count f_count field. When f_count  0 file instance
exists, when f_count becomes 0 file instance should be removed. It's simple
but some strange magic exists. When file was created by falloc() it has
f_count == 2. One additional FREF() call was done inside falloc(). After
file was setupped, FILE_SET_MATURE() call do FRELE() and f_count becomes == 1.
Also closef() function exists. It wants f_count be = 2 otherwise it panics.
closef() caller bumps f_count before closef() call just for prevents this
panic. I found this behaviour strange, and I described this before as Strange
magic with f_count Ok, this explanation sucks :) so I dug into cvs
history.

Before sys/sys/file.h rev 1.15 struct file has only one reference counter
f_count. It was used sometimes by direct fp-f_count++ or --fp-f_count
operations. In sys/sys/file.h rev 1.15 another reference counter f_usecount
wa added to struct file. Also new macro FILE_USE() and FILE_UNUSE() were
added for increase and decrease file reference count and they used
f_usecount. Now, those macroses known as FREF() and FRELE(). So, after
sys/sys/file.h rev 1.15 struct file has two reference counters and they are
both used up to sys/sys/file.h rev 1.30 where f_usecount was removed, but
not completely. Within FREF() and FRELE() f_usecount was just replaced by
f_count and existed old fp-f_count increments and decrements haven't be
removed. So after sys/sys/file.h rev 1.30 there are some places where f_count
is bumped twice and decremented twice and other related code has hacks for this
condition.

Now, the legacy of f_usecount coexists with f_count and I think it
should be fixed. I did the diff for this. I did system rebuild on diskless
NFS machine without panics, and kern.nfiles before rebuild is equal to
kern.nfiles after rebuild. So looks like there is no f_count corruption
and file instance leaks. In this diff falloc() returns file instance with
f_count == 1 so FRELE() call removed from FILE_SET_MATURE(). struct proc *
arg removed from FILE_SET_MATURE() because it is unnecessary. closef() wants
f_count = 1 and closef() callers don't do additional bump on f_count.

Really, I think this diff doesn't fix anything :) The code has some places
with direct fp-f_count++ and --fp-f_count operations, the code has some
places with unreferenced fp instances and the code has some places where
FREF() calls on fp instance were made wrong. I think the whole f_count
related stuff should be refactored but even the solution described above
should not be the first step. I think, the right file instance acquisition
(f_count increment) and direct f_count modification cleanups should be done
before.

Index: sys/dev/systrace.c
===
RCS file: /home/cvsync/openbsd-cvs/src/sys/dev/systrace.c,v
retrieving revision 1.75
diff -u -p -r1.75 systrace.c
--- sys/dev/systrace.c  14 Mar 2015 03:38:46 -  1.75
+++ sys/dev/systrace.c  5 May 2015 22:37:31 -
@@ -518,7 +518,7 @@ systraceioctl(dev_t dev, u_long cmd, cad
f-f_ops = systracefops;
f-f_data = (caddr_t) fst;
*(int *)data = fd;
-   FILE_SET_MATURE(f, p);
+   FILE_SET_MATURE(f);
break;
default:
error = EINVAL;
Index: sys/kern/exec_script.c
===
RCS file: /home/cvsync/openbsd-cvs/src/sys/kern/exec_script.c,v
retrieving revision 1.35
diff -u -p -r1.35 exec_script.c
--- sys/kern/exec_script.c  14 Mar 2015 03:38:50 -  1.35
+++ sys/kern/exec_script.c  5 May 2015 22:37:31 -
@@ -185,7 +185,7 @@ check_shell:
fp-f_ops = vnops;
fp-f_data = (caddr_t) scriptvp;
fp-f_flag = FREAD;
-   FILE_SET_MATURE(fp, p);
+   FILE_SET_MATURE(fp);
}
 
/* set up the parameters for the recursive check_exec() call */
Index: sys/kern/kern_descrip.c
===
RCS file: /home/cvsync/openbsd-cvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.119
diff -u -p -r1.119 kern_descrip.c
--- sys/kern/kern_descrip.c 30 Apr 2015 21:18:45 -  1.119
+++ sys/kern/kern_descrip.c 5 May 2015 22:37:31 -
@@ -576,8 +576,6 @@ finishdup(struct proc *p, struct file *f
 * closef can deal with that.
 */
oldfp = fdp-fd_ofiles[new];
-   if (oldfp != NULL)
-   FREF(oldfp);
 
fdp-fd_ofiles[new] = fp;
fdp-fd_ofileflags[new] = fdp-fd_ofileflags[old]  ~UF_EXCLOSE;
@@ -620,7 +618,6 @@ fdrelease(struct proc *p, int fd)
fp = *fpp;
if (fp == NULL)
return (EBADF);
-   FREF(fp);
*fpp = NULL;
fd_unused(fdp, fd);
if (fd  fdp-fd_knlistsize)
@@ -897,7 +894,6 @@ restart:
*resultfp = fp;
if (resultfd)
*resultfd = i;
-

dupfdopen() api modification

2015-05-07 Thread Vitaliy Makkoveev

In the not far future fd_getfile() will return the referenced file
instance, so dupfdopen() should be modified for the same reasons
that getsock() and getvnode().

The modified dupfdopen() receives a thread pointer instead of it's
file descriptor table and of it's file descriptor for duplication.
The thread pointer, passed to dupfdopen() and current thread pointer
used within dupfdopen() are the same, so current was replaced by
passed thread pointer. Thread struct has p_dupfd field which is
descriptor for duplication, so it is used instead of removed dfd arg.

Index: sys/kern/kern_descrip.c
===
RCS file: /cvs/src/sys/kern/kern_descrip.c,v
retrieving revision 1.119
diff -u -p -r1.119 kern_descrip.c
--- sys/kern/kern_descrip.c 30 Apr 2015 21:18:45 -  1.119
+++ sys/kern/kern_descrip.c 7 May 2015 23:22:23 -
@@ -1225,8 +1225,9 @@ filedescopen(dev_t dev, int mode, int ty
  * Duplicate the specified descriptor to a free descriptor.
  */
 int
-dupfdopen(struct filedesc *fdp, int indx, int dfd, int mode)
+dupfdopen(struct proc *p, int indx, int mode)
 {
+   struct filedesc *fdp = p-p_fd;
struct file *wfp;
 
fdpassertlocked(fdp);
@@ -1235,10 +1236,10 @@ dupfdopen(struct filedesc *fdp, int indx
 * Assume that the filename was user-specified; applications do
 * not tend to open /dev/fd/# when they can just call dup()
 */
-   if ((curproc-p_p-ps_flags  (PS_SUGIDEXEC | PS_SUGID))) {
-   if (curproc-p_descfd == 255)
+   if ((p-p_p-ps_flags  (PS_SUGIDEXEC | PS_SUGID))) {
+   if (p-p_descfd == 255)
return (EPERM);
-   if (curproc-p_descfd != curproc-p_dupfd)
+   if (p-p_descfd != p-p_dupfd)
return (EPERM);
}
 
@@ -1249,7 +1250,7 @@ dupfdopen(struct filedesc *fdp, int indx
 * because fd_getfile will return NULL if the file at indx is
 * newly created by falloc (FIF_LARVAL).
 */
-   if ((wfp = fd_getfile(fdp, dfd)) == NULL)
+   if ((wfp = fd_getfile(fdp, p-p_dupfd)) == NULL)
return (EBADF);
 
/*
@@ -1263,7 +1264,7 @@ dupfdopen(struct filedesc *fdp, int indx
 
fdp-fd_ofiles[indx] = wfp;
fdp-fd_ofileflags[indx] = (fdp-fd_ofileflags[indx]  UF_EXCLOSE) |
-   (fdp-fd_ofileflags[dfd]  ~UF_EXCLOSE);
+   (fdp-fd_ofileflags[p-p_dupfd]  ~UF_EXCLOSE);
wfp-f_count++;
fd_used(fdp, indx);
return (0);
Index: sys/kern/vfs_syscalls.c
===
RCS file: /cvs/src/sys/kern/vfs_syscalls.c,v
retrieving revision 1.220
diff -u -p -r1.220 vfs_syscalls.c
--- sys/kern/vfs_syscalls.c 7 May 2015 08:53:33 -   1.220
+++ sys/kern/vfs_syscalls.c 7 May 2015 23:22:23 -
@@ -862,8 +862,7 @@ doopenat(struct proc *p, int fd, const c
if ((error = vn_open(nd, flags, cmode)) != 0) {
if (error == ENODEV 
p-p_dupfd = 0   /* XXX from fdopen */
-   (error =
-   dupfdopen(fdp, indx, p-p_dupfd, flags)) == 0) {
+   (error = dupfdopen(p, indx, flags)) == 0) {
closef(fp, p);
*retval = indx;
goto out;
Index: sys/sys/filedesc.h
===
RCS file: /cvs/src/sys/sys/filedesc.h,v
retrieving revision 1.30
diff -u -p -r1.30 filedesc.h
--- sys/sys/filedesc.h  6 May 2015 08:52:17 -   1.30
+++ sys/sys/filedesc.h  7 May 2015 23:22:23 -
@@ -121,7 +121,7 @@ struct filedesc0 {
  * Kernel global variables and routines.
  */
 void   filedesc_init(void);
-intdupfdopen(struct filedesc *, int, int, int);
+intdupfdopen(struct proc *, int, int);
 intfdalloc(struct proc *p, int want, int *result);
 void   fdexpand(struct proc *);
 intfalloc(struct proc *p, struct file **resultfp, int *resultfd);

Re: Make sysctl_file more smp friendly

2015-05-08 Thread Vitaliy Makkoveev

Unfortunately, in 2 cases this diff will increase needed variable
for non existing files too. This is the fixed version.

Index: sys/kern/kern_sysctl.c
===
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.284
diff -u -p -r1.284 kern_sysctl.c
--- sys/kern/kern_sysctl.c  28 Mar 2015 23:50:55 -  1.284
+++ sys/kern/kern_sysctl.c  8 May 2015 17:08:24 -
@@ -1288,11 +1288,32 @@ sysctl_file(int *name, u_int namelen, ch
if (pr-ps_tracevp)
FILLIT(NULL, NULL, KERN_FILE_TRACE, 
pr-ps_tracevp, pr);
for (i = 0; i  fdp-fd_nfiles; i++) {
-   if ((fp = fdp-fd_ofiles[i]) == NULL)
-   continue;
-   if (!FILE_IS_USABLE(fp))
-   continue;
-   FILLIT(fp, fdp, i, NULL, pr);
+   if (buflen = elem_size  elem_count  0) {
+   fdplock(fdp);
+   fp = fd_getfile(fdp, i);
+   if (fp != NULL ) {
+   fdpunlock(fdp);
+   continue;
+   }
+   fill_file(kf, fp, fdp, i, NULL, NULL,
+   p, show_pointers);
+   fdpunlock(fdp);
+   
+   error = copyout(kf, dp, outsize);
+   if (error)
+   break;
+   dp += elem_size;
+   buflen -= elem_size;
+   elem_count--;
+   } else {
+   fdplock(fdp);
+   fp = fd_getfile(fdp, i);
+   fdpunlock(fdp);
+
+   if (fp == NULL)
+   continue;
+   }
+   needed += elem_size;
}
}
break;
@@ -1316,11 +1337,32 @@ sysctl_file(int *name, u_int namelen, ch
if (pr-ps_tracevp)
FILLIT(NULL, NULL, KERN_FILE_TRACE, 
pr-ps_tracevp, pr);
for (i = 0; i  fdp-fd_nfiles; i++) {
-   if ((fp = fdp-fd_ofiles[i]) == NULL)
-   continue;
-   if (!FILE_IS_USABLE(fp))
-   continue;
-   FILLIT(fp, fdp, i, NULL, pr);
+   if (buflen = elem_size  elem_count  0) {
+   fdplock(fdp);
+   fp = fd_getfile(fdp, i);
+   if (fp != NULL ) {
+   fdpunlock(fdp);
+   continue;
+   }
+   fill_file(kf, fp, fdp, i, NULL, NULL,
+   p, show_pointers);
+   fdpunlock(fdp);
+   
+   error = copyout(kf, dp, outsize);
+   if (error)
+   break;
+   dp += elem_size;
+   buflen -= elem_size;
+   elem_count--;
+   } else {
+   fdplock(fdp);
+   fp = fd_getfile(fdp, i);
+   fdpunlock(fdp);
+
+   if (fp == NULL)
+   continue;
+   }
+   needed += elem_size;
}
}
break;

Re: Make sysctl_file more smp friendly

2015-05-08 Thread Vitaliy Makkoveev

I have questions about sysctl_file():
1. Looks like sysctl_file() should do the same things for KERN_FILE_BYPID and
KERN_FILE_BYUID cases. But pr-ps_textvp (vnode of executable) will be copied
for KERN_FILE_BYPID case only. Is this copying missing for KERN_FILE_BYUID?

2. (arg  -1) check looks be missing for KERN_FILE_BYUID case kvm_getfiles(3)
said that arg can -1 for all users and -1 for processes. So arg  -1 is EINVAL
error for KERN_FILE_BYUID too.

3. In original code FILLIT() does copyout() call, but userland will be noticed
about copyout() error only from the last copyout() call. All errors from the
non-last copyout() calls will be silently dropped. It is expected behaviour?

Re: [patch] config(8) and KARL usage

2020-02-21 Thread Vitaliy Makkoveev



> On 19 Feb 2020, at 20:51, Martin Pieuchot  wrote:
> 
> On 19/02/20(Wed) 14:13, Vitaliy Makkoveev wrote:
>> On Wed, Jan 17, 2018 at 11:40:24AM +0100, Martin Pieuchot wrote:
>>> Hello Sebastien,
>>> 
>>> On 17/01/18(Wed) 10:19, Sebastien Marie wrote:
>>>> [...] 
>>>> kernel modification is desirable in some cases, at least for disabling
>>>> ulpt(4) when using cups with USB printer.
>>> 
>>> Sorry to hijack your thread, but if somebody wants to fix this ulpt(4)
>>> problem permanently here's the plan:
>>> 
>>> - Add the USBD_EXCLUSIVE_USE to usbd_open_pipe() in ulptopen().
>>>   Actually this flag should be the default everywhere.  This should
>>>   prevent open(2) on /dev/ulpt? to work if a userland driver is using
>>>   your printer.
>>> 
>>> - Do some plumbing between libusb/ugen(4)/usb(4) to make it possible
>>>   to submit bulk transfer via /dev/usb?.  The logic in ugenopen()
>>>   should also have the USBD_EXCLUSIVE_USE flag such that it will fail
>>>   if the corresponding /dev/ultp? has already been opened.
>>> 
>>> That should be enough to have CUPS work with GENERIC kernels without
>>> having to disable anything.  I'm here to help/review diffs but since
>>> I don't have a printer myself, I can't do the work.
>>> 
>> 
>> If driver exists for USB device, generic driver for this device will
>> never be attached. So, if ulpt(4) is enabled and attached, corresponding
>> ugen(4) is missing and CUPS/libusb can't work with this printer. I
>> suppose, coexisting with ugen(4) should be allowed.
> 
> Attaching two drivers to the same piece of hardware might confuse the
> stack and the hardware below it.  I don't see anything in your diff
> taking care of the ownership of, at least, interfaces.
> 
> I'd also be afraid that if userland tries to change the configuration of
> the device via the ugen(4) interface that might crash the kernel.  Current
> drivers (or the stack) is written for that.

Original ugen(4) coexist with other drivers in composite USB device case. It 
attached to the first
unsupported interface and it can’t change configuration or interface setting on 
claimed interface.
> 
>> Index: sys/dev/usb/ugen.c
>> ===
>> RCS file: /cvs/src/sys/dev/usb/ugen.c,v
>> retrieving revision 1.101
>> diff -u -p -r1.101 ugen.c
>> --- sys/dev/usb/ugen.c   4 Jan 2020 11:37:33 -   1.101
>> +++ sys/dev/usb/ugen.c   19 Feb 2020 11:07:01 -
>> @@ -222,7 +222,8 @@ ugen_set_config(struct ugen_softc *sc, i
>>  memset(sc->sc_endpoints, 0, sizeof sc->sc_endpoints);
>>  for (ifaceno = 0; ifaceno < cdesc->bNumInterface; ifaceno++) {
>>  DPRINTFN(1,("ugen_set_config: ifaceno %d\n", ifaceno));
>> -if (usbd_iface_claimed(sc->sc_udev, ifaceno)) {
>> +if (!sc->sc_secondary &&
>> +usbd_iface_claimed(sc->sc_udev, ifaceno)) {
>>  DPRINTF(("%s: iface %d not available\n", __func__,
>>  ifaceno));
>>  continue;
> 
> This allows two pieces of code trying to use this interface at the same
> time, what can go wrong?
With my patch, some drivers can destroy endpoints stuff allocated for shared 
interface and crash
kernel. Sorry :( But I suppose, coexisting is possible if ugen(4) will become 
something like simple
wrapper around usbd_open_pipe() calls.

Re: [patch] config(8) and KARL usage

2020-02-19 Thread Vitaliy Makkoveev

On Wed, Jan 17, 2018 at 11:40:24AM +0100, Martin Pieuchot wrote:
> Hello Sebastien,
> 
> On 17/01/18(Wed) 10:19, Sebastien Marie wrote:
> > [...] 
> > kernel modification is desirable in some cases, at least for disabling
> > ulpt(4) when using cups with USB printer.
> 
> Sorry to hijack your thread, but if somebody wants to fix this ulpt(4)
> problem permanently here's the plan:
> 
>  - Add the USBD_EXCLUSIVE_USE to usbd_open_pipe() in ulptopen().
>Actually this flag should be the default everywhere.  This should
>prevent open(2) on /dev/ulpt? to work if a userland driver is using
>your printer.
> 
>  - Do some plumbing between libusb/ugen(4)/usb(4) to make it possible
>to submit bulk transfer via /dev/usb?.  The logic in ugenopen()
>should also have the USBD_EXCLUSIVE_USE flag such that it will fail
>if the corresponding /dev/ultp? has already been opened.
> 
> That should be enough to have CUPS work with GENERIC kernels without
> having to disable anything.  I'm here to help/review diffs but since
> I don't have a printer myself, I can't do the work.
>

If driver exists for USB device, generic driver for this device will
never be attached. So, if ulpt(4) is enabled and attached, corresponding
ugen(4) is missing and CUPS/libusb can't work with this printer. I
suppose, coexisting with ugen(4) should be allowed.

Index: sys/dev/usb/ugen.c
===
RCS file: /cvs/src/sys/dev/usb/ugen.c,v
retrieving revision 1.101
diff -u -p -r1.101 ugen.c
--- sys/dev/usb/ugen.c  4 Jan 2020 11:37:33 -   1.101
+++ sys/dev/usb/ugen.c  19 Feb 2020 11:07:01 -
@@ -222,7 +222,8 @@ ugen_set_config(struct ugen_softc *sc, i
memset(sc->sc_endpoints, 0, sizeof sc->sc_endpoints);
for (ifaceno = 0; ifaceno < cdesc->bNumInterface; ifaceno++) {
DPRINTFN(1,("ugen_set_config: ifaceno %d\n", ifaceno));
-   if (usbd_iface_claimed(sc->sc_udev, ifaceno)) {
+   if (!sc->sc_secondary &&
+   usbd_iface_claimed(sc->sc_udev, ifaceno)) {
DPRINTF(("%s: iface %d not available\n", __func__,
ifaceno));
continue;
Index: sys/dev/usb/usb_subr.c
===
RCS file: /cvs/src/sys/dev/usb/usb_subr.c,v
retrieving revision 1.150
diff -u -p -r1.150 usb_subr.c
--- sys/dev/usb/usb_subr.c  6 Oct 2019 17:11:51 -   1.150
+++ sys/dev/usb/usb_subr.c  19 Feb 2020 11:07:01 -
@@ -885,16 +885,17 @@ usbd_probe_and_attach(struct device *par
DPRINTF(("usbd_probe_and_attach trying device specific drivers\n"));
dv = config_found(parent, , usbd_print);
if (dv) {
-   dev->subdevs = mallocarray(2, sizeof dv, M_USB, M_NOWAIT);
+   /* add 1 for possible ugen and 1 for NULL terminator */
+   dev->subdevs = mallocarray(3, sizeof dv, M_USB, M_NOWAIT);
if (dev->subdevs == NULL) {
err = USBD_NOMEM;
goto fail;
}
-   dev->nsubdev = 2;
+   dev->nsubdev = 3;
dev->subdevs[dev->ndevs++] = dv;
dev->subdevs[dev->ndevs] = 0;
err = USBD_NORMAL_COMPLETION;
-   goto fail;
+   goto generic;
}
 
DPRINTF(("%s: no device specific driver found\n", __func__));
@@ -958,10 +959,7 @@ usbd_probe_and_attach(struct device *par
if (!usbd_iface_claimed(dev, i))
break;
}
-   if (i < nifaces)
-   goto generic;
-else
-   goto fail;
+   goto generic;
}
 
free(dev->subdevs, M_USB, dev->nsubdev * sizeof(*dev->subdevs));
@@ -985,7 +983,8 @@ generic:
dv = config_found(parent, , usbd_print);
if (dv != NULL) {
if (dev->ndevs == 0) {
-   dev->subdevs = mallocarray(2, sizeof dv, M_USB, 
M_NOWAIT);
+   dev->subdevs = mallocarray(2, sizeof dv, M_USB,
+   M_NOWAIT);
if (dev->subdevs == NULL) {
err = USBD_NOMEM;
goto fail;

pppx(4): kill useless rwlocks

2020-04-10 Thread Vitaliy Makkoveev

Subj.


Index: net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.83
diff -u -p -r1.83 if_pppx.c
--- net/if_pppx.c   10 Apr 2020 07:36:52 -  1.83
+++ net/if_pppx.c   10 Apr 2020 11:16:53 -
@@ -132,12 +132,11 @@ struct pppx_dev {
LIST_HEAD(,pppx_if) pxd_pxis;
 };
 
-struct rwlock  pppx_devs_lk = RWLOCK_INITIALIZER("pppxdevs");
 LIST_HEAD(, pppx_dev)  pppx_devs = LIST_HEAD_INITIALIZER(pppx_devs);
 struct pool*pppx_if_pl;
 
 struct pppx_dev*pppx_dev_lookup(dev_t);
-struct pppx_dev*pppx_dev2pxd(dev_t);
+static inline struct pppx_dev  *pppx_dev2pxd(dev_t);
 
 struct pppx_if_key {
int pxik_session_id;
@@ -165,7 +164,6 @@ pppx_if_cmp(const struct pppx_if *a, con
return memcmp(>pxi_key, >pxi_key, sizeof(a->pxi_key));
 }
 
-struct rwlock  pppx_ifs_lk = RWLOCK_INITIALIZER("pppxifs");
 RBT_HEAD(pppx_ifs, pppx_if)pppx_ifs = RBT_INITIALIZER(_ifs);
 RBT_PROTOTYPE(pppx_ifs, pppx_if, pxi_entry, pppx_if_cmp);
 
@@ -219,8 +217,6 @@ pppx_dev_lookup(dev_t dev)
struct pppx_dev *pxd;
int unit = minor(dev);
 
-   /* must hold pppx_devs_lk */
-
LIST_FOREACH(pxd, _devs, pxd_entry) {
if (pxd->pxd_unit == unit)
return (pxd);
@@ -229,16 +225,10 @@ pppx_dev_lookup(dev_t dev)
return (NULL);
 }
 
-struct pppx_dev *
+static inline struct pppx_dev *
 pppx_dev2pxd(dev_t dev)
 {
-   struct pppx_dev *pxd;
-
-   rw_enter_read(_devs_lk);
-   pxd = pppx_dev_lookup(dev);
-   rw_exit_read(_devs_lk);
-
-   return (pxd);
+   return pppx_dev_lookup(dev);
 }
 
 void
@@ -251,17 +241,10 @@ int
 pppxopen(dev_t dev, int flags, int mode, struct proc *p)
 {
struct pppx_dev *pxd;
-   int rv = 0;
-
-   rv = rw_enter(_devs_lk, RW_WRITE | RW_INTR);
-   if (rv != 0)
-   return (rv);
 
pxd = pppx_dev_lookup(dev);
-   if (pxd != NULL) {
-   rv = EBUSY;
-   goto out;
-   }
+   if (pxd != NULL)
+   return EBUSY;
 
if (LIST_EMPTY(_devs) && pppx_if_pl == NULL) {
pppx_if_pl = malloc(sizeof(*pppx_if_pl), M_DEVBUF, M_WAITOK);
@@ -279,9 +262,7 @@ pppxopen(dev_t dev, int flags, int mode,
mq_init(>pxd_svcq, 128, IPL_NET);
LIST_INSERT_HEAD(_devs, pxd, pxd_entry);
 
-out:
-   rw_exit(_devs_lk);
-   return (rv);
+   return 0;
 }
 
 int
@@ -588,8 +569,6 @@ pppxclose(dev_t dev, int flags, int mode
struct pppx_dev *pxd;
struct pppx_if  *pxi;
 
-   rw_enter_write(_devs_lk);
-
pxd = pppx_dev_lookup(dev);
 
/* XXX */
@@ -610,7 +589,6 @@ pppxclose(dev_t dev, int flags, int mode
pppx_if_pl = NULL;
}
 
-   rw_exit_write(_devs_lk);
return (0);
 }
 
@@ -620,8 +598,6 @@ pppx_if_next_unit(void)
struct pppx_if *pxi;
int unit = 0;
 
-   rw_assert_wrlock(_ifs_lk);
-
/* this is safe without splnet since we're not modifying it */
do {
int found = 0;
@@ -650,11 +626,9 @@ pppx_if_find(struct pppx_dev *pxd, int s
key.pxik_session_id = session_id;
key.pxik_protocol = protocol;
 
-   rw_enter_read(_ifs_lk);
pxi = RBT_FIND(pppx_ifs, _ifs, (struct pppx_if *));
if (pxi && pxi->pxi_ready == 0)
pxi = NULL;
-   rw_exit_read(_ifs_lk);
 
return pxi;
 }
@@ -828,12 +802,10 @@ pppx_add_session(struct pppx_dev *pxd, s
 #endif
 
/* try to set the interface up */
-   rw_enter_write(_ifs_lk);
unit = pppx_if_next_unit();
if (unit < 0) {
pool_put(pppx_if_pl, pxi);
error = ENOMEM;
-   rw_exit_write(_ifs_lk);
goto out;
}
 
@@ -846,14 +818,12 @@ pppx_add_session(struct pppx_dev *pxd, s
if (RBT_FIND(pppx_ifs, _ifs, pxi) != NULL) {
pool_put(pppx_if_pl, pxi);
error = EADDRINUSE;
-   rw_exit_write(_ifs_lk);
goto out;
}
 
if (RBT_INSERT(pppx_ifs, _ifs, pxi) != NULL)
panic("%s: pppx_ifs modified while lock was held", __func__);
LIST_INSERT_HEAD(>pxd_pxis, pxi, pxi_list);
-   rw_exit_write(_ifs_lk);
 
snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", "pppx", unit);
ifp->if_mtu = req->pr_peer_mru; /* XXX */
@@ -935,9 +905,8 @@ pppx_add_session(struct pppx_dev *pxd, s
} else {
if_addrhooks_run(ifp);
}
-   rw_enter_write(_ifs_lk);
+
pxi->pxi_ready = 1;
-   rw_exit_write(_ifs_lk);
 
 out:
return (error);
@@ -1038,11 +1007,9 @@ pppx_if_destroy(struct pppx_dev *pxd, st
if_detach(ifp);
NET_LOCK();
 
-   rw_enter_write(_ifs_lk);
if

Re: pppx(4): kill useless rwlocks

2020-04-11 Thread Vitaliy Makkoveev

Drop this diff please.

pppx(4): kill pppx_ifs_lk

2020-04-11 Thread Vitaliy Makkoveev

It protects nothing.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.83
diff -u -p -r1.83 if_pppx.c
--- sys/net/if_pppx.c   10 Apr 2020 07:36:52 -  1.83
+++ sys/net/if_pppx.c   11 Apr 2020 10:55:00 -
@@ -165,7 +165,6 @@ pppx_if_cmp(const struct pppx_if *a, con
return memcmp(>pxi_key, >pxi_key, sizeof(a->pxi_key));
 }
 
-struct rwlock  pppx_ifs_lk = RWLOCK_INITIALIZER("pppxifs");
 RBT_HEAD(pppx_ifs, pppx_if)pppx_ifs = RBT_INITIALIZER(_ifs);
 RBT_PROTOTYPE(pppx_ifs, pppx_if, pxi_entry, pppx_if_cmp);
 
@@ -620,8 +619,6 @@ pppx_if_next_unit(void)
struct pppx_if *pxi;
int unit = 0;
 
-   rw_assert_wrlock(_ifs_lk);
-
/* this is safe without splnet since we're not modifying it */
do {
int found = 0;
@@ -650,11 +647,9 @@ pppx_if_find(struct pppx_dev *pxd, int s
key.pxik_session_id = session_id;
key.pxik_protocol = protocol;
 
-   rw_enter_read(_ifs_lk);
pxi = RBT_FIND(pppx_ifs, _ifs, (struct pppx_if *));
if (pxi && pxi->pxi_ready == 0)
pxi = NULL;
-   rw_exit_read(_ifs_lk);
 
return pxi;
 }
@@ -828,12 +823,10 @@ pppx_add_session(struct pppx_dev *pxd, s
 #endif
 
/* try to set the interface up */
-   rw_enter_write(_ifs_lk);
unit = pppx_if_next_unit();
if (unit < 0) {
pool_put(pppx_if_pl, pxi);
error = ENOMEM;
-   rw_exit_write(_ifs_lk);
goto out;
}
 
@@ -846,14 +839,12 @@ pppx_add_session(struct pppx_dev *pxd, s
if (RBT_FIND(pppx_ifs, _ifs, pxi) != NULL) {
pool_put(pppx_if_pl, pxi);
error = EADDRINUSE;
-   rw_exit_write(_ifs_lk);
goto out;
}
 
if (RBT_INSERT(pppx_ifs, _ifs, pxi) != NULL)
panic("%s: pppx_ifs modified while lock was held", __func__);
LIST_INSERT_HEAD(>pxd_pxis, pxi, pxi_list);
-   rw_exit_write(_ifs_lk);
 
snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", "pppx", unit);
ifp->if_mtu = req->pr_peer_mru; /* XXX */
@@ -935,9 +926,8 @@ pppx_add_session(struct pppx_dev *pxd, s
} else {
if_addrhooks_run(ifp);
}
-   rw_enter_write(_ifs_lk);
+
pxi->pxi_ready = 1;
-   rw_exit_write(_ifs_lk);
 
 out:
return (error);
@@ -1038,11 +1028,9 @@ pppx_if_destroy(struct pppx_dev *pxd, st
if_detach(ifp);
NET_LOCK();
 
-   rw_enter_write(_ifs_lk);
if (RBT_REMOVE(pppx_ifs, _ifs, pxi) == NULL)
panic("%s: pppx_ifs modified while lock was held", __func__);
LIST_REMOVE(pxi, pxi_list);
-   rw_exit_write(_ifs_lk);
 
pool_put(pppx_if_pl, pxi);
 }

Re: pppx(4): kill pppx_ifs_lk

2020-04-11 Thread Vitaliy Makkoveev

I missed explanation.

All the code protected by this rwlock is KERNEL_LOCK()’ed. The context
switches which can be caused by memory allocation or NET_LOCK() dances
within pppx_add_session() and pppx_if_destroy() are not under acquired
pppx_if_lk. So there is no concurrent access to structures it’s protects.


> On 11 Apr 2020, at 16:59, Vitaliy Makkoveev  wrote:
> 
> It protects nothing.
> 
> Index: sys/net/if_pppx.c
> ===
> RCS file: /cvs/src/sys/net/if_pppx.c,v
> retrieving revision 1.83
> diff -u -p -r1.83 if_pppx.c
> --- sys/net/if_pppx.c 10 Apr 2020 07:36:52 -  1.83
> +++ sys/net/if_pppx.c 11 Apr 2020 10:55:00 -
> @@ -165,7 +165,6 @@ pppx_if_cmp(const struct pppx_if *a, con
>   return memcmp(>pxi_key, >pxi_key, sizeof(a->pxi_key));
> }
> 
> -struct rwlockpppx_ifs_lk = 
> RWLOCK_INITIALIZER("pppxifs");
> RBT_HEAD(pppx_ifs, pppx_if)   pppx_ifs = RBT_INITIALIZER(_ifs);
> RBT_PROTOTYPE(pppx_ifs, pppx_if, pxi_entry, pppx_if_cmp);
> 
> @@ -620,8 +619,6 @@ pppx_if_next_unit(void)
>   struct pppx_if *pxi;
>   int unit = 0;
> 
> - rw_assert_wrlock(_ifs_lk);
> -
>   /* this is safe without splnet since we're not modifying it */
>   do {
>   int found = 0;
> @@ -650,11 +647,9 @@ pppx_if_find(struct pppx_dev *pxd, int s
>   key.pxik_session_id = session_id;
>   key.pxik_protocol = protocol;
> 
> - rw_enter_read(_ifs_lk);
>   pxi = RBT_FIND(pppx_ifs, _ifs, (struct pppx_if *));
>   if (pxi && pxi->pxi_ready == 0)
>   pxi = NULL;
> - rw_exit_read(_ifs_lk);
> 
>   return pxi;
> }
> @@ -828,12 +823,10 @@ pppx_add_session(struct pppx_dev *pxd, s
> #endif
> 
>   /* try to set the interface up */
> - rw_enter_write(_ifs_lk);
>   unit = pppx_if_next_unit();
>   if (unit < 0) {
>   pool_put(pppx_if_pl, pxi);
>   error = ENOMEM;
> - rw_exit_write(_ifs_lk);
>   goto out;
>   }
> 
> @@ -846,14 +839,12 @@ pppx_add_session(struct pppx_dev *pxd, s
>   if (RBT_FIND(pppx_ifs, _ifs, pxi) != NULL) {
>   pool_put(pppx_if_pl, pxi);
>   error = EADDRINUSE;
> - rw_exit_write(_ifs_lk);
>   goto out;
>   }
> 
>   if (RBT_INSERT(pppx_ifs, _ifs, pxi) != NULL)
>   panic("%s: pppx_ifs modified while lock was held", __func__);
>   LIST_INSERT_HEAD(>pxd_pxis, pxi, pxi_list);
> - rw_exit_write(_ifs_lk);
> 
>   snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", "pppx", unit);
>   ifp->if_mtu = req->pr_peer_mru; /* XXX */
> @@ -935,9 +926,8 @@ pppx_add_session(struct pppx_dev *pxd, s
>   } else {
>   if_addrhooks_run(ifp);
>   }
> - rw_enter_write(_ifs_lk);
> +
>   pxi->pxi_ready = 1;
> - rw_exit_write(_ifs_lk);
> 
> out:
>   return (error);
> @@ -1038,11 +1028,9 @@ pppx_if_destroy(struct pppx_dev *pxd, st
>   if_detach(ifp);
>   NET_LOCK();
> 
> - rw_enter_write(_ifs_lk);
>   if (RBT_REMOVE(pppx_ifs, _ifs, pxi) == NULL)
>   panic("%s: pppx_ifs modified while lock was held", __func__);
>   LIST_REMOVE(pxi, pxi_list);
> - rw_exit_write(_ifs_lk);
> 
>   pool_put(pppx_if_pl, pxi);
> }

Re: pppx(4): prevent concurency with pppx_if_destroy()

2020-04-07 Thread Vitaliy Makkoveev

On Tue, Apr 07, 2020 at 04:43:55PM +0200, Martin Pieuchot wrote:
> On 07/04/20(Tue) 17:14, Vitaliy Makkoveev wrote:
> > As Claudio Jeker noticed, NET_LOCK() can release KERNEL_LOCK(). pppx(4)
> > code has some NET_LOCK() dances which make it unsafe. [...]
> 
> The easiest way to fix that is to move if_detach() out of pppx_if_destroy().

Yes, but we are going to move pppx(4) code out from locking. My diff
prevents concurent access to pxi_if by pppx_del_session(). Yes
pppx_del_session() can grab pxi_if owned by other threads (in the
future, after KERNEL_LOCK() will gone), but after refcounters this will
be fine and we can push NET_LOCK() deeper.

> It generally makes sense to call if_detach() first then free/close the
> descriptor of a driver.  However some drivers have callbacks and in that
> case you might want to teardown those first then call if_detach().
>

All the code from pppx_if_destroy() and pppx_add_session() has the wrong
ordered netif usage.

> if_detach() will require the NET_LOCK() for some time.  However
> pseudo-driver should start protecting their own data structure with
> different locks.
>

Re: pppx(4): kill forgotten splx(9) stuff

2020-04-07 Thread Vitaliy Makkoveev

On Tue, Apr 07, 2020 at 01:51:45PM +0300, Vitaliy Makkoveev wrote:
> On Tue, Apr 07, 2020 at 11:54:01AM +0200, Claudio Jeker wrote:
> > Unsure about this one here. I would prefer if the panic remained for now
> > (mainly because of the XXXSMP NET_UNLOCK() dance just above). I wonder if 
> > the
> > order of this could not be modified so that the NET_LOCK is released after
> > the RBT_REMOVE.
> Well, let's clenup these dances before. Diff below just breaks giant
> NET_LOCK() in pppxioctl() to many small.
Will be required in the distant future, forget please :)
> 
> Index: net/if_pppx.c
> ===
> RCS file: /cvs/src/sys/net/if_pppx.c,v
> retrieving revision 1.81
> diff -u -p -r1.81 if_pppx.c
> --- net/if_pppx.c 7 Apr 2020 07:11:22 -   1.81
> +++ net/if_pppx.c 7 Apr 2020 10:46:34 -
> @@ -433,7 +433,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
>   struct pppx_dev *pxd = pppx_dev2pxd(dev);
>   int error = 0;
>  
> - NET_LOCK();
>   switch (cmd) {
>   case PIPEXSMODE:
>   /*
> @@ -447,46 +446,59 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
>   break;
>  
>   case PIPEXASESSION:
> + NET_LOCK();
>   error = pppx_add_session(pxd,
>   (struct pipex_session_req *)addr);
> + NET_UNLOCK();
>   break;
>  
>   case PIPEXDSESSION:
> + NET_LOCK();
>   error = pppx_del_session(pxd,
>   (struct pipex_session_close_req *)addr);
> + NET_UNLOCK();
>   break;
>  
>   case PIPEXCSESSION:
> + NET_LOCK();
>   error = pppx_config_session(pxd,
>   (struct pipex_session_config_req *)addr);
> + NET_UNLOCK();
>   break;
>  
>   case PIPEXGSTAT:
> + NET_LOCK();
>   error = pppx_get_stat(pxd,
>   (struct pipex_session_stat_req *)addr);
> + NET_UNLOCK();
>   break;
>  
>   case PIPEXGCLOSED:
> + NET_LOCK();
>   error = pppx_get_closed(pxd,
>   (struct pipex_session_list_req *)addr);
> + NET_UNLOCK();
>   break;
>  
>   case PIPEXSIFDESCR:
> + NET_LOCK();
>   error = pppx_set_session_descr(pxd,
>   (struct pipex_session_descr_req *)addr);
> + NET_UNLOCK();
>   break;
>  
>   case FIONBIO:
>   break;
>   case FIONREAD:
> + NET_LOCK();
>   *(int *)addr = mq_hdatalen(>pxd_svcq);
> + NET_UNLOCK();
>   break;
>  
>   default:
>   error = ENOTTY;
>   break;
>   }
> - NET_UNLOCK();
>  
>   return (error);
>  }

Re: pppx(4): prevent concurency with pppx_if_destroy()

2020-04-07 Thread Vitaliy Makkoveev

On Tue, Apr 07, 2020 at 06:38:11PM +0300, Vitaliy Makkoveev wrote:
> On Tue, Apr 07, 2020 at 04:43:55PM +0200, Martin Pieuchot wrote:
> > On 07/04/20(Tue) 17:14, Vitaliy Makkoveev wrote:
> > > As Claudio Jeker noticed, NET_LOCK() can release KERNEL_LOCK(). pppx(4)
> > > code has some NET_LOCK() dances which make it unsafe. [...]
> > 
> > The easiest way to fix that is to move if_detach() out of pppx_if_destroy().
> 
> Yes, but we are going to move pppx(4) code out from locking. My diff
> prevents concurent access to pxi_if by pppx_del_session(). Yes
> pppx_del_session() can grab pxi_if owned by other threads (in the
> future, after KERNEL_LOCK() will gone), but after refcounters this will
> be fine and we can push NET_LOCK() deeper.
>

In other words my diff kills the unlocked gap between pppx_if_find() and
pppx_if_destroy(). Concurent pppx_del_session() can grab pppx_if (not
for now) in this gap. Also I wish to avoid dances with twice bumped
refcounter here in future.

> > It generally makes sense to call if_detach() first then free/close the
> > descriptor of a driver.  However some drivers have callbacks and in that
> > case you might want to teardown those first then call if_detach().
> >
> 
> All the code from pppx_if_destroy() and pppx_add_session() has the wrong
> ordered netif usage.
> 
> > if_detach() will require the NET_LOCK() for some time.  However
> > pseudo-driver should start protecting their own data structure with
> > different locks.
> >

pppx(4): prevent concurency with pppx_if_destroy()

2020-04-07 Thread Vitaliy Makkoveev

As Claudio Jeker noticed, NET_LOCK() can release KERNEL_LOCK(). pppx(4)
code has some NET_LOCK() dances which make it unsafe. Concurent thread
can receive CPU and enter to pppx_if_destroy() while we dance with
NET_LOCK(). The idea is to deny access to pxi at destruction stage.
If pxi_if is removed from pppx_ifs tree under pppx_ifs_lk nobody will
access to it after pppx_ifs_lk release. Already used pppx_if can't be
accessed by pppx_del_session() because all this code is under
NET_LOCK().

I don't want to do all pxi_if destruction under pppx_ifs_lk. So new
function pppx_if_find_locked() introduced. It does the same what
pppx_if_find() does but pppx_ifs_lk should be held.

So grab the lock, find and remove pxi_if from tree and list, realease
lock, kill pppx_if.

I wish to add refconters to pppx_if in future to protect it the tun(4)
style. This diff will be the first step in this direction.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.81
diff -u -p -r1.81 if_pppx.c
--- sys/net/if_pppx.c   7 Apr 2020 07:11:22 -   1.81
+++ sys/net/if_pppx.c   7 Apr 2020 13:41:07 -
@@ -170,7 +170,8 @@ RBT_HEAD(pppx_ifs, pppx_if) pppx_ifs = R
 RBT_PROTOTYPE(pppx_ifs, pppx_if, pxi_entry, pppx_if_cmp);
 
 intpppx_if_next_unit(void);
-struct pppx_if *pppx_if_find(struct pppx_dev *, int, int);
+struct pppx_if *pppx_if_find_locked(struct pppx_dev *, int, int);
+static inline struct pppx_if *pppx_if_find(struct pppx_dev *, int, int);
 intpppx_add_session(struct pppx_dev *,
struct pipex_session_req *);
 intpppx_del_session(struct pppx_dev *,
@@ -594,8 +595,19 @@ pppxclose(dev_t dev, int flags, int mode
 
/* XXX */
NET_LOCK();
-   while ((pxi = LIST_FIRST(>pxd_pxis)))
+   rw_enter_write(_ifs_lk);
+   while ((pxi = LIST_FIRST(>pxd_pxis))) {
+   if (RBT_REMOVE(pppx_ifs, _ifs, pxi) == NULL)
+   panic("%s: pppx_ifs modified while lock was held",
+   __func__);
+   LIST_REMOVE(pxi, pxi_list);
+   rw_exit_write(_ifs_lk);
+
pppx_if_destroy(pxd, pxi);
+
+   rw_enter_write(_ifs_lk);
+   }
+   rw_exit_write(_ifs_lk);
NET_UNLOCK();
 
LIST_REMOVE(pxd, pxd_entry);
@@ -641,24 +653,37 @@ pppx_if_next_unit(void)
 }
 
 struct pppx_if *
-pppx_if_find(struct pppx_dev *pxd, int session_id, int protocol)
+pppx_if_find_locked(struct pppx_dev *pxd, int session_id, int protocol)
 {
struct pppx_if *s, *p;
+
+   rw_assert_anylock(_ifs_lk);
+
s = malloc(sizeof(*s), M_DEVBUF, M_WAITOK | M_ZERO);
 
s->pxi_key.pxik_session_id = session_id;
s->pxi_key.pxik_protocol = protocol;
 
-   rw_enter_read(_ifs_lk);
p = RBT_FIND(pppx_ifs, _ifs, s);
if (p && p->pxi_ready == 0)
p = NULL;
-   rw_exit_read(_ifs_lk);
 
free(s, M_DEVBUF, sizeof(*s));
return (p);
 }
 
+static inline struct pppx_if *
+pppx_if_find(struct pppx_dev *pxd, int session_id, int protocol)
+{
+   struct pppx_if *pxi;
+
+   rw_enter_read(_ifs_lk);
+   pxi = pppx_if_find_locked(pxd, session_id, protocol);
+   rw_exit_read(_ifs_lk);
+
+   return pxi;
+}
+
 int
 pppx_add_session(struct pppx_dev *pxd, struct pipex_session_req *req)
 {
@@ -948,14 +973,22 @@ pppx_del_session(struct pppx_dev *pxd, s
 {
struct pppx_if *pxi;
 
-   pxi = pppx_if_find(pxd, req->pcr_session_id, req->pcr_protocol);
+   rw_enter_write(_ifs_lk);
+   pxi = pppx_if_find_locked(pxd, req->pcr_session_id,
+   req->pcr_protocol);
if (pxi == NULL)
return (EINVAL);
 
+   if (RBT_REMOVE(pppx_ifs, _ifs, pxi) == NULL)
+   panic("%s: pppx_ifs modified while lock was held", __func__);
+   LIST_REMOVE(pxi, pxi_list);
+   rw_exit_write(_ifs_lk);
+
req->pcr_stat = pxi->pxi_session.stat;
 
pppx_if_destroy(pxd, pxi);
-   return (0);
+
+   return 0;
 }
 
 int
@@ -1037,12 +1070,6 @@ pppx_if_destroy(struct pppx_dev *pxd, st
NET_UNLOCK();
if_detach(ifp);
NET_LOCK();
-
-   rw_enter_write(_ifs_lk);
-   if (RBT_REMOVE(pppx_ifs, _ifs, pxi) == NULL)
-   panic("%s: pppx_ifs modified while lock was held", __func__);
-   LIST_REMOVE(pxi, pxi_list);
-   rw_exit_write(_ifs_lk);
 
pool_put(pppx_if_pl, pxi);
 }

Re: pppx(4): prevent concurency with pppx_if_destroy()

2020-04-07 Thread Vitaliy Makkoveev

Forgot to release lock in pppx_del_session() error case...

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.81
diff -u -p -r1.81 if_pppx.c
--- sys/net/if_pppx.c   7 Apr 2020 07:11:22 -   1.81
+++ sys/net/if_pppx.c   7 Apr 2020 14:39:28 -
@@ -170,7 +170,8 @@ RBT_HEAD(pppx_ifs, pppx_if) pppx_ifs = R
 RBT_PROTOTYPE(pppx_ifs, pppx_if, pxi_entry, pppx_if_cmp);
 
 intpppx_if_next_unit(void);
-struct pppx_if *pppx_if_find(struct pppx_dev *, int, int);
+struct pppx_if *pppx_if_find_locked(struct pppx_dev *, int, int);
+static inline struct pppx_if *pppx_if_find(struct pppx_dev *, int, int);
 intpppx_add_session(struct pppx_dev *,
struct pipex_session_req *);
 intpppx_del_session(struct pppx_dev *,
@@ -594,8 +595,19 @@ pppxclose(dev_t dev, int flags, int mode
 
/* XXX */
NET_LOCK();
-   while ((pxi = LIST_FIRST(>pxd_pxis)))
+   rw_enter_write(_ifs_lk);
+   while ((pxi = LIST_FIRST(>pxd_pxis))) {
+   if (RBT_REMOVE(pppx_ifs, _ifs, pxi) == NULL)
+   panic("%s: pppx_ifs modified while lock was held",
+   __func__);
+   LIST_REMOVE(pxi, pxi_list);
+   rw_exit_write(_ifs_lk);
+   
pppx_if_destroy(pxd, pxi);
+   
+   rw_enter_write(_ifs_lk);
+   }
+   rw_exit_write(_ifs_lk);
NET_UNLOCK();
 
LIST_REMOVE(pxd, pxd_entry);
@@ -641,24 +653,37 @@ pppx_if_next_unit(void)
 }
 
 struct pppx_if *
-pppx_if_find(struct pppx_dev *pxd, int session_id, int protocol)
+pppx_if_find_locked(struct pppx_dev *pxd, int session_id, int protocol)
 {
struct pppx_if *s, *p;
+
+   rw_assert_anylock(_ifs_lk);
+
s = malloc(sizeof(*s), M_DEVBUF, M_WAITOK | M_ZERO);
 
s->pxi_key.pxik_session_id = session_id;
s->pxi_key.pxik_protocol = protocol;
 
-   rw_enter_read(_ifs_lk);
p = RBT_FIND(pppx_ifs, _ifs, s);
if (p && p->pxi_ready == 0)
p = NULL;
-   rw_exit_read(_ifs_lk);
 
free(s, M_DEVBUF, sizeof(*s));
return (p);
 }
 
+static inline struct pppx_if *
+pppx_if_find(struct pppx_dev *pxd, int session_id, int protocol)
+{
+   struct pppx_if *pxi;
+
+   rw_enter_read(_ifs_lk);
+   pxi = pppx_if_find_locked(pxd, session_id, protocol);
+   rw_exit_read(_ifs_lk);
+
+   return pxi;
+}
+
 int
 pppx_add_session(struct pppx_dev *pxd, struct pipex_session_req *req)
 {
@@ -948,14 +973,24 @@ pppx_del_session(struct pppx_dev *pxd, s
 {
struct pppx_if *pxi;
 
-   pxi = pppx_if_find(pxd, req->pcr_session_id, req->pcr_protocol);
-   if (pxi == NULL)
+   rw_enter_write(_ifs_lk);
+   pxi = pppx_if_find_locked(pxd, req->pcr_session_id,
+   req->pcr_protocol);
+   if (pxi == NULL) {
+   rw_exit_write(_ifs_lk);
return (EINVAL);
+   }
+
+   if (RBT_REMOVE(pppx_ifs, _ifs, pxi) == NULL)
+   panic("%s: pppx_ifs modified while lock was held", __func__);
+   LIST_REMOVE(pxi, pxi_list);
+   rw_exit_write(_ifs_lk);
 
req->pcr_stat = pxi->pxi_session.stat;
 
pppx_if_destroy(pxd, pxi);
-   return (0);
+
+   return 0;
 }
 
 int
@@ -1037,12 +1072,6 @@ pppx_if_destroy(struct pppx_dev *pxd, st
NET_UNLOCK();
if_detach(ifp);
NET_LOCK();
-
-   rw_enter_write(_ifs_lk);
-   if (RBT_REMOVE(pppx_ifs, _ifs, pxi) == NULL)
-   panic("%s: pppx_ifs modified while lock was held", __func__);
-   LIST_REMOVE(pxi, pxi_list);
-   rw_exit_write(_ifs_lk);
 
pool_put(pppx_if_pl, pxi);
 }

pppx(4): kill forgotten splx(9) stuff

2020-04-07 Thread Vitaliy Makkoveev

pppx_if containing tree and per pppx_dev list are protected by rwlock so
these splx(9) related dances and commentaries are not actual.
Also pxd_svcq protected by NET_LOCK().

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.81
diff -u -p -r1.81 if_pppx.c
--- sys/net/if_pppx.c   7 Apr 2020 07:11:22 -   1.81
+++ sys/net/if_pppx.c   7 Apr 2020 09:06:35 -
@@ -126,7 +126,7 @@ struct pppx_dev {
struct selinfo  pxd_wsel;
struct mutexpxd_wsel_mtx;
 
-   /* queue of packets for userland to service - protected by splnet */
+   /* queue of packets for userland to service - protected by NET_LOCK() */
struct mbuf_queue   pxd_svcq;
int pxd_waiting;
LIST_HEAD(,pppx_if) pxd_pxis;
@@ -622,7 +622,6 @@ pppx_if_next_unit(void)
 
rw_assert_wrlock(_ifs_lk);
 
-   /* this is safe without splnet since we're not modifying it */
do {
int found = 0;
RBT_FOREACH(pxi, pppx_ifs, _ifs) {
@@ -842,7 +841,6 @@ pppx_add_session(struct pppx_dev *pxd, s
pxi->pxi_key.pxik_protocol = req->pr_protocol;
pxi->pxi_dev = pxd;
 
-   /* this is safe without splnet since we're not modifying it */
if (RBT_FIND(pppx_ifs, _ifs, pxi) != NULL) {
pool_put(pppx_if_pl, pxi);
error = EADDRINUSE;
@@ -850,8 +848,7 @@ pppx_add_session(struct pppx_dev *pxd, s
goto out;
}
 
-   if (RBT_INSERT(pppx_ifs, _ifs, pxi) != NULL)
-   panic("%s: pppx_ifs modified while lock was held", __func__);
+   RBT_INSERT(pppx_ifs, _ifs, pxi);
LIST_INSERT_HEAD(>pxd_pxis, pxi, pxi_list);
rw_exit_write(_ifs_lk);
 
@@ -1039,8 +1036,7 @@ pppx_if_destroy(struct pppx_dev *pxd, st
NET_LOCK();
 
rw_enter_write(_ifs_lk);
-   if (RBT_REMOVE(pppx_ifs, _ifs, pxi) == NULL)
-   panic("%s: pppx_ifs modified while lock was held", __func__);
+   RBT_REMOVE(pppx_ifs, _ifs, pxi);
LIST_REMOVE(pxi, pxi_list);
rw_exit_write(_ifs_lk);

Re: pppx(4): kill forgotten splx(9) stuff

2020-04-07 Thread Vitaliy Makkoveev

On Tue, Apr 07, 2020 at 11:54:01AM +0200, Claudio Jeker wrote:
> Unsure about this one here. I would prefer if the panic remained for now
> (mainly because of the XXXSMP NET_UNLOCK() dance just above). I wonder if the
> order of this could not be modified so that the NET_LOCK is released after
> the RBT_REMOVE.
Well, let's clenup these dances before. Diff below just breaks giant
NET_LOCK() in pppxioctl() to many small.

Index: net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.81
diff -u -p -r1.81 if_pppx.c
--- net/if_pppx.c   7 Apr 2020 07:11:22 -   1.81
+++ net/if_pppx.c   7 Apr 2020 10:46:34 -
@@ -433,7 +433,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
struct pppx_dev *pxd = pppx_dev2pxd(dev);
int error = 0;
 
-   NET_LOCK();
switch (cmd) {
case PIPEXSMODE:
/*
@@ -447,46 +446,59 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
break;
 
case PIPEXASESSION:
+   NET_LOCK();
error = pppx_add_session(pxd,
(struct pipex_session_req *)addr);
+   NET_UNLOCK();
break;
 
case PIPEXDSESSION:
+   NET_LOCK();
error = pppx_del_session(pxd,
(struct pipex_session_close_req *)addr);
+   NET_UNLOCK();
break;
 
case PIPEXCSESSION:
+   NET_LOCK();
error = pppx_config_session(pxd,
(struct pipex_session_config_req *)addr);
+   NET_UNLOCK();
break;
 
case PIPEXGSTAT:
+   NET_LOCK();
error = pppx_get_stat(pxd,
(struct pipex_session_stat_req *)addr);
+   NET_UNLOCK();
break;
 
case PIPEXGCLOSED:
+   NET_LOCK();
error = pppx_get_closed(pxd,
(struct pipex_session_list_req *)addr);
+   NET_UNLOCK();
break;
 
case PIPEXSIFDESCR:
+   NET_LOCK();
error = pppx_set_session_descr(pxd,
(struct pipex_session_descr_req *)addr);
+   NET_UNLOCK();
break;
 
case FIONBIO:
break;
case FIONREAD:
+   NET_LOCK();
*(int *)addr = mq_hdatalen(>pxd_svcq);
+   NET_UNLOCK();
break;
 
default:
error = ENOTTY;
break;
}
-   NET_UNLOCK();
 
return (error);
 }

Re: pppx(4): prevent concurency with pppx_if_destroy()

2020-04-07 Thread Vitaliy Makkoveev




> On 7 Apr 2020, at 17:43, Martin Pieuchot  wrote:
> 
> On 07/04/20(Tue) 17:14, Vitaliy Makkoveev wrote:
>> As Claudio Jeker noticed, NET_LOCK() can release KERNEL_LOCK(). pppx(4)
>> code has some NET_LOCK() dances which make it unsafe. [...]
> 
> The easiest way to fix that is to move if_detach() out of pppx_if_destroy().

pppxioctl() is under KERNEL_LOCK(). pppxioctl() holds NET_LOCK() it’s whole
runtime. So your suggestion is (pseudo code):

KERNEL_LOCK();

pppx_ioctl()
{
NET_LOCK();

switch() {
case PIPEXDSESSION:
NET_UNLOCK();
/*
 * KERNEL_LOCK() can be released here and concurrent
 * thread with PIPEXDSESSION case will enter here
 */
if_detach();
NET_LOCK();

pppx_if_destroy();
break;
}

NET_UNLOCK();
}

KERNEL_UNLOCK();

I understood well?
> 
> It generally makes sense to call if_detach() first then free/close the
> descriptor of a driver.  However some drivers have callbacks and in that
> case you might want to teardown those first then call if_detach().
> 
> if_detach() will require the NET_LOCK() for some time.  However
> pseudo-driver should start protecting their own data structure with
> different locks.
>

pipex(4) fix: check session existence before creation

2020-04-06 Thread Vitaliy Makkoveev

Deny to create pipex_session which is already exist. Newly created
session will be placed to list head so the caller of
pipex_*_lookup_session() will receive wrong session.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.79
diff -u -p -r1.79 if_pppx.c
--- sys/net/if_pppx.c   6 Apr 2020 12:31:30 -   1.79
+++ sys/net/if_pppx.c   6 Apr 2020 13:47:26 -
@@ -719,6 +719,11 @@ pppx_add_session(struct pppx_dev *pxd, s
return (EPROTONOSUPPORT);
}
 
+   session = pipex_lookup_by_session_id(req->pr_protocol,
+   req->pr_session_id);
+   if (session)
+   return (EEXIST);
+
pxi = pool_get(pppx_if_pl, PR_WAITOK | PR_ZERO);
if (pxi == NULL)
return (ENOMEM);
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.112
diff -u -p -r1.112 pipex.c
--- sys/net/pipex.c 6 Apr 2020 13:14:04 -   1.112
+++ sys/net/pipex.c 6 Apr 2020 13:47:33 -
@@ -312,6 +312,11 @@ pipex_add_session(struct pipex_session_r
return (EPROTONOSUPPORT);
}
 
+   session = pipex_lookup_by_session_id(req->pr_protocol,
+   req->pr_session_id);
+   if (session)
+   return (EEXIST);
+
/* prepare a new session */
session = pool_get(_session_pool, PR_WAITOK | PR_ZERO);
session->state = PIPEX_STATE_OPENED;

Re: pipex(4) fix: check session existence before creation

2020-04-06 Thread Vitaliy Makkoveev




> On 6 Apr 2020, at 17:37, Claudio Jeker  wrote:
> 
> On Mon, Apr 06, 2020 at 07:54:20PM +0300, Vitaliy Makkoveev wrote:
>> Deny to create pipex_session which is already exist. Newly created
>> session will be placed to list head so the caller of
>> pipex_*_lookup_session() will receive wrong session.
> 
> I think in the pppx(4) case the code is already doing this check in the
> RBT_FIND() on line 835. Still I think this is a good thing to add.
> OK claudio@
> 

In pppx(4) layer not in pipex(4). Without this check pppx(4) can override 
pppac(4) owned session.

> 
>> Index: sys/net/if_pppx.c
>> ===
>> RCS file: /cvs/src/sys/net/if_pppx.c,v
>> retrieving revision 1.79
>> diff -u -p -r1.79 if_pppx.c
>> --- sys/net/if_pppx.c6 Apr 2020 12:31:30 -   1.79
>> +++ sys/net/if_pppx.c6 Apr 2020 13:47:26 -
>> @@ -719,6 +719,11 @@ pppx_add_session(struct pppx_dev *pxd, s
>>  return (EPROTONOSUPPORT);
>>  }
>> 
>> +session = pipex_lookup_by_session_id(req->pr_protocol,
>> +req->pr_session_id);
>> +if (session)
>> +return (EEXIST);
>> +
>>  pxi = pool_get(pppx_if_pl, PR_WAITOK | PR_ZERO);
>>  if (pxi == NULL)
>>  return (ENOMEM);
>> Index: sys/net/pipex.c
>> ===
>> RCS file: /cvs/src/sys/net/pipex.c,v
>> retrieving revision 1.112
>> diff -u -p -r1.112 pipex.c
>> --- sys/net/pipex.c  6 Apr 2020 13:14:04 -   1.112
>> +++ sys/net/pipex.c  6 Apr 2020 13:47:33 -
>> @@ -312,6 +312,11 @@ pipex_add_session(struct pipex_session_r
>>  return (EPROTONOSUPPORT);
>>  }
>> 
>> +session = pipex_lookup_by_session_id(req->pr_protocol,
>> +req->pr_session_id);
>> +if (session)
>> +return (EEXIST);
>> +
>>  /* prepare a new session */
>>  session = pool_get(_session_pool, PR_WAITOK | PR_ZERO);
>>  session->state = PIPEX_STATE_OPENED;
>> 
> 
> -- 
> :wq Claudio

pppx(4): kill needless check

2020-04-06 Thread Vitaliy Makkoveev

Just quick cleanup.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.79
diff -u -p -r1.79 if_pppx.c
--- sys/net/if_pppx.c   6 Apr 2020 12:31:30 -   1.79
+++ sys/net/if_pppx.c   6 Apr 2020 19:22:13 -
@@ -720,8 +720,6 @@ pppx_add_session(struct pppx_dev *pxd, s
}
 
pxi = pool_get(pppx_if_pl, PR_WAITOK | PR_ZERO);
-   if (pxi == NULL)
-   return (ENOMEM);
 
session = >pxi_session;
ifp = >pxi_if;

Re: Simplify NET_LOCK() variations

2020-04-12 Thread Vitaliy Makkoveev




> On 12 Apr 2020, at 16:26, Martin Pieuchot  wrote:
> 
> The existing variations of the NET_LOCK() macros are confusing.  We're
> now all aware of this fact.  So let's keep them simple to prevent future
> mistakes :)
> 
> The diff below reduces the current set of methods to the following:
> 
>   NET_LOCK()/NET_UNLOCK()
>   NET_ASSERT_LOCKED()
>   NET_ASSERT_UNLOCKED()
> 
> On top of those, two extra methods are provided for a *very specific*
> purpose:
> 
>   NET_RLOCK_IN_IOCTL()
>   NET_RUNLOCK_IN_IOCTL()
> 
> "IN_IOCTL()" mean they should only be used in ioctl(2) context ;)  The
> purpose is to keep previous behavior where read-only ioctl(2) dot not
> wait for packet processing to finish. 
> 
> To achieve this NET_LOCK() and NET_UNLOCK() now contain some magic to
> ensure the only packet processing thread grabbing a read-lock is the
> softnet thread.  In other words, one doesn't need to be aware of all
> this magic, just imagine that the Network Stack is big-locked and use
> NET_LOCK()/NET_UNLOCK() everywhere.
> 
> If you think this is an improvement.  I'd like to know if there is a
> better way to implement the magic in NET_LOCK().  String comparison is
> not great ;)

What about new special P_ flag? Like P_SOFTDEP used by syncerproc.

> 
> Comments?  What do you think about this?  I'm interested to hear all of
> you, 'case I'd like to know how does everyone perceive this complexity
> related to locks.
> 
> Thanks for reading.
> 
> PS: macros have been converted to functions to avoid pulling more
> headers in , the magic requires .
> 
> Index: net/if.c
> ===
> RCS file: /cvs/src/sys/net/if.c,v
> retrieving revision 1.603
> diff -u -p -r1.603 if.c
> --- net/if.c  12 Apr 2020 07:04:03 -  1.603
> +++ net/if.c  12 Apr 2020 12:58:10 -
> @@ -250,6 +250,60 @@ struct task if_input_task_locked = TASK_
> struct rwlock netlock = RWLOCK_INITIALIZER("netlock");
> 
> /*
> + * Network stack data structures are, unless stated otherwise, protected
> + * by the NET_LOCK().  It's a single non-recursive lock for the whole
> + * subsystem.
> + */
> +void
> +NET_LOCK(void)
> +{
> + struct proc *p = curproc;
> +
> + /*
> +  * The "softnet" thread should be the only thread processing
> +  * packets without holding an exclusive lock.  This is done
> +  * to allow read-only ioctl(2) to not block.
> +  */
> + if ((p->p_flag & P_SYSTEM) &&
> + (strncmp(p->p_p->ps_comm, "softnet", 7) == 0))
> + rw_enter_read();
> + else
> + rw_enter_write();
> +}
> +
> +void
> +NET_UNLOCK(void)
> +{
> + struct proc *p = curproc;
> +
> + if ((p->p_flag & P_SYSTEM) &&
> + (strncmp(p->p_p->ps_comm, "softnet", 7) == 0))
> + rw_exit_read();
> + else
> + rw_exit_write();
> +}
> +
> +/*
> + * Reader version of NET_LOCK() to be used in ioctl(2) path only.
> + *
> + * Can be grabbed instead of the exclusive version when no field
> + * protected by the NET_LOCK() is modified by the ioctl(2).
> + */
> +void
> +NET_RLOCK_IN_IOCTL(void)
> +{
> + KASSERT((curproc->p_flag & P_SYSTEM) == 0);
> + rw_enter_read();
> +}
> +
> +void
> +NET_RUNLOCK_IN_IOCTL(void)
> +{
> + KASSERT((curproc->p_flag & P_SYSTEM) == 0);
> + rw_exit_read();
> +}
> +
> +/*
>  * Network interface utility routines.
>  */
> void
> @@ -936,7 +990,7 @@ if_input_process(struct ifnet *ifp, stru
>*
>* Since we have a NET_LOCK() we also use it to serialize access
>* to PF globals, pipex globals, unicast and multicast addresses
> -  * lists.
> +  * lists and the socket layer.
>*/
>   NET_LOCK();
>   while ((m = ml_dequeue(ml)) != NULL)
> @@ -2338,27 +2392,27 @@ ifioctl_get(u_long cmd, caddr_t data)
> 
>   switch(cmd) {
>   case SIOCGIFCONF:
> - NET_RLOCK();
> + NET_RLOCK_IN_IOCTL();
>   error = ifconf(data);
> - NET_RUNLOCK();
> + NET_RUNLOCK_IN_IOCTL();
>   return (error);
>   case SIOCIFGCLONERS:
>   error = if_clone_list((struct if_clonereq *)data);
>   return (error);
>   case SIOCGIFGMEMB:
> - NET_RLOCK();
> + NET_RLOCK_IN_IOCTL();
>   error = if_getgroupmembers(data);
> - NET_RUNLOCK();
> + NET_RUNLOCK_IN_IOCTL();
>   return (error);
>   case SIOCGIFGATTR:
> - NET_RLOCK();
> + NET_RLOCK_IN_IOCTL();
>   error = if_getgroupattribs(data);
> - NET_RUNLOCK();
> + NET_RUNLOCK_IN_IOCTL();
>   return (error);
>   case SIOCGIFGLIST:
> - NET_RLOCK();
> + NET_RLOCK_IN_IOCTL();
>   error = if_getgrouplist(data);
> - NET_RUNLOCK();
> + NET_RUNLOCK_IN_IOCTL();
>   return (error);
>   }
> 
> @@ -2366,7 +2420,7

Experiment with unlocking PF_UNIX sockets

2020-04-19 Thread Vitaliy Makkoveev

Diff below is _not_ for commit. It's just an experiment I wish to share.

Socket layer is already protected by solock(). Inet sockets are
protected by NET_LOCK() which is grabbed by solock(). All other sockets
are still under KERNEL_LOCK().

My suggestion is to implement another lock, identical to NET_LOCK() and
grab it within solock() for PF_UNIX sockets. I suggest solock() is
already placed everythere it's required :) struct file is already
mp-safe.

Diff below implements UNIXDOMAIN_LOCK(). It's just rwlock
"unixdomainlock" like NET_LOCK()'s "netlock".

solock(), sounlock() and soassertlocked() deal with UNIXDOMAIN_LOCK() in
PF_UNIX case. sosleep_nsec() uses unixdomainlock directly in PF_UNIX case.

Garbage collector unp_gc() placed to systqmp instead of systq. unp_gc()
grabs UNIXDOMAIN_LOCK().

Vnode and process credentials are still protected by KERNEL_LOCK().
These are filesystem sockets "connect", "bind" and "disconnect" cases.

pledge(2) checks are protected by KERNEL_LOCK() too. These are "sendfd"
and "recvfd" cases.

All other socket operations protected by solock() and underelaying
UNIXDOMAIN_LOCK().

I wrote and run simultaneously a lot of test programs doing various
socket related things. System looks stable.

May be after 6.7 release PF_UNIX sockets can be direction to go.

Index: sys/kern/uipc_socket2.c
===
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.104
diff -u -p -r1.104 uipc_socket2.c
--- sys/kern/uipc_socket2.c 11 Apr 2020 14:07:06 -  1.104
+++ sys/kern/uipc_socket2.c 19 Apr 2020 11:07:10 -
@@ -282,6 +282,8 @@ solock(struct socket *so)
NET_LOCK();
break;
case PF_UNIX:
+   UNIXDOMAIN_LOCK();
+   break;
case PF_ROUTE:
case PF_KEY:
default:
@@ -306,6 +308,8 @@ sounlock(struct socket *so, int s)
NET_UNLOCK();
break;
case PF_UNIX:
+   UNIXDOMAIN_UNLOCK();
+   break;
case PF_ROUTE:
case PF_KEY:
default:
@@ -323,6 +327,8 @@ soassertlocked(struct socket *so)
NET_ASSERT_LOCKED();
break;
case PF_UNIX:
+   UNIXDOMAIN_ASSERT_LOCKED();
+   break;
case PF_ROUTE:
case PF_KEY:
default:
@@ -335,12 +341,24 @@ int
 sosleep_nsec(struct socket *so, void *ident, int prio, const char *wmesg,
 uint64_t nsecs)
 {
-   if ((so->so_proto->pr_domain->dom_family != PF_UNIX) &&
-   (so->so_proto->pr_domain->dom_family != PF_ROUTE) &&
-   (so->so_proto->pr_domain->dom_family != PF_KEY)) {
-   return rwsleep_nsec(ident, , prio, wmesg, nsecs);
-   } else
-   return tsleep_nsec(ident, prio, wmesg, nsecs);
+   int ret;
+
+   switch (so->so_proto->pr_domain->dom_family) {
+   case PF_INET:
+   case PF_INET6:
+   ret = rwsleep_nsec(ident, , prio, wmesg, nsecs);
+   break;
+   case PF_UNIX:
+   ret = rwsleep_nsec(ident, , prio, wmesg, nsecs);
+   break;
+   case PF_ROUTE:
+   case PF_KEY:
+   default:
+   ret = tsleep_nsec(ident, prio, wmesg, nsecs);
+   break;
+   }
+
+   return ret;
 }
 
 /*
Index: sys/kern/uipc_usrreq.c
===
RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.142
diff -u -p -r1.142 uipc_usrreq.c
--- sys/kern/uipc_usrreq.c  16 Jul 2019 21:41:37 -  1.142
+++ sys/kern/uipc_usrreq.c  19 Apr 2020 11:07:10 -
@@ -51,6 +51,9 @@
 #include 
 #include 
 #include 
+#include 
+
+struct rwlock unixdomainlock = RWLOCK_INITIALIZER("unixdomainlock");
 
 void   uipc_setaddr(const struct unpcb *, struct mbuf *);
 
@@ -152,13 +155,17 @@ uipc_usrreq(struct socket *so, int req, 
case PRU_CONNECT2:
error = unp_connect2(so, (struct socket *)nam);
if (!error) {
+   KERNEL_LOCK();
unp->unp_connid.uid = p->p_ucred->cr_uid;
unp->unp_connid.gid = p->p_ucred->cr_gid;
+   KERNEL_UNLOCK();
unp->unp_connid.pid = p->p_p->ps_pid;
unp->unp_flags |= UNP_FEIDS;
unp2 = sotounpcb((struct socket *)nam);
+   KERNEL_LOCK();
unp2->unp_connid.uid = p->p_ucred->cr_uid;
unp2->unp_connid.gid = p->p_ucred->cr_gid;
+   KERNEL_UNLOCK();
unp2->unp_connid.pid = p->p_p->ps_pid;
unp2->unp_flags |= UNP_FEIDS;
}
@@ -411,10 +418,12 @@ unp_detach(struct unpcb *unp)
 
LIST_REMOVE(unp, unp_link);
if (unp->unp_vnode) {
+   KERNEL_LOCK();

Re: pppx(4): prevent concurency with pppx_if_destroy()

2020-04-08 Thread Vitaliy Makkoveev

On Wed, Apr 08, 2020 at 09:51:45AM +0200, Martin Pieuchot wrote:
> On 07/04/20(Tue) 19:58, Vitaliy Makkoveev wrote:
> > 
> > 
> > > On 7 Apr 2020, at 17:43, Martin Pieuchot  wrote:
> > > 
> > > On 07/04/20(Tue) 17:14, Vitaliy Makkoveev wrote:
> > >> As Claudio Jeker noticed, NET_LOCK() can release KERNEL_LOCK(). pppx(4)
> > >> code has some NET_LOCK() dances which make it unsafe. [...]
> > > 
> > > The easiest way to fix that is to move if_detach() out of 
> > > pppx_if_destroy().
> > 
> > pppxioctl() is under KERNEL_LOCK(). pppxioctl() holds NET_LOCK() it’s whole
> > runtime. So your suggestion is (pseudo code):
> > 
> > KERNEL_LOCK();
> > 
> > pppx_ioctl()
> > {
> > NET_LOCK();
> > 
> > switch() {
> > case PIPEXDSESSION:
> > NET_UNLOCK();
> > /*
> >  * KERNEL_LOCK() can be released here and concurrent
> >  * thread with PIPEXDSESSION case will enter here
> >  */
> > if_detach();
> > NET_LOCK();
> > 
> > pppx_if_destroy();
> > break;
> > }
> > 
> > NET_UNLOCK();
> > }
> > 
> > KERNEL_UNLOCK();
> > 
> > I understood well?
> 
> That or the other way around.  The question is who can grab a reference
> on `ifp'?  What is currently serializing this?  Is the NET_LOCK() at all
> necessary in pppx_ioctl() what is it protecting?  All the pipex(4) code
> seems to be running under KERNEL_LOCK() anyway.
> 
> I'm wondering if it isn't simpler to answer those questions and fix the
> current code once pppx(4) is using pipex_add_session().

As I see (pseudo code):

KERNEL_LOCK();

pppx_ioctl()
{
NET_LOCK();

switch() {
case PIPEXASESSION:
/* pppx_add_session() */
pxi->pxi_ready = 0;

rw_enter_write(_ifs_lk);
RBT_INSERT(pxi)
rw_exit_write(_ifs_lk);

NET_UNLOCK();
/* (1) release KERNEL_LOCK() */
if_attach();
NET_LOCK();

rw_enter_write(_ifs_lk);
pxi->pxi_ready = 1;
rw_exit_write(_ifs_lk);

break;
case PIPEXDSESSION:
/* pppx_del_session() */
/*
 * rw_enter_read(_ifs_lk);
 * find and return pxi with 'pxi_ready != 0'
 * rw_exit_read(_ifs_lk);
 */
pxi = pppx_if_find();

/* (3) unlocked pxi gap. pxi can be grabbed */

NET_UNLOCK();
/* (2) release KERNEL_LOCK() */
if_detach();
NET_LOCK();

/* kill pxi */

break;
case PIPEXCOMMAND:
/*
 * pppx_config_session()
 * pppx_get_stat()
 * pppx_get_closed()
 * pppx_set_session_descr()
 */

/*
 * rw_enter_read(_ifs_lk);
 * find and return pxi with 'pxi_ready != 0'
 * rw_exit_read(_ifs_lk);
 */
pxi = pppx_if_find();

break;
}

NET_UNLOCK();
}

KERNEL_UNLOCK();

1. Since pppx_if_find() can't return half initialized pppx_if there is no
concurency with pppx_add_session() and others.

2. Except pppx_del_session(), various pppx commands don't play with
NET_LOCK() but the can receive cpu if someone is in (1) or (2). They
can't grab half-initilaized pppx_if, but can grab half destroyed
pppx_if.

3. pppx_del_session() can grab half destroyed pppx_if

4. Half destroyed pppx_if is still in pppx_if three and queue, so it can
be grabbed (3)

my diff prevents (3) (pseudo code):

KERNEL_LOCK();

pppx_ioctl()
{
NET_LOCK();

switch() {
case PIPEXDSESSION:
/* pppx_del_session() */

rw_enter_write(_ifs_lk);
/*
 * find pxi with 'pxi_ready != 0'
 * remove it from tree and list
 */

rw_exit_write(_ifs_lk);

/*
 * nobody can grab this pxi here we are the only holder
 * unlocked gap (3) gone
 */

NET_UNLOCK();
/* (2) release KERNEL_LOCK() */
/* nobody enters here with our pxi */
if_detach();
NET_LOCK();

/* kill pxi */

break;
case PIPEXCOMMAND:
/*
 * pppx_config_session()
 * pppx_get_stat()
 *

Re: pppx(4): prevent concurency with pppx_if_destroy()

2020-04-08 Thread Vitaliy Makkoveev

On Wed, Apr 08, 2020 at 11:35:06AM +0200, Martin Pieuchot wrote:
> On 08/04/20(Wed) 12:11, Vitaliy Makkoveev wrote:
> > On Wed, Apr 08, 2020 at 09:51:45AM +0200, Martin Pieuchot wrote:
> > [...] 
> > As I see (pseudo code):
> > [...] 
> > So, I fixed this issue :)
> 
> We fix what we see.  Bugs are always in the unseen :)  This mess is
> clearly complicated.  Mixing 3 different locks in different orders
> with different pieces of code is complex.
They are not in different orders. The order always is:

KERNEL_LOCK();
NET_LOCK();
lock(pppx_ifs_lk);
unlock(pppx_ifs_lk);
NET_UNLOCK();
KERNEL_UNLOCK();

we are not holding pppx_ifs_lk while we releasing NET_LOCK()

and while we dance with NET_LOCK in pppx_add_session() and
pppx_del_session() order is:

KERNEL_LOCK();
NET_LOCK();
ppp_xxx_session()
{
lock(pppx_ifs_lk);
unlock(pppx_ifs_lk);

NET_UNLOCK();
KERNEL_UNLOCK() /* probably (1) */
/* play with netif */
KERNEL_LOCK() /* only if (1) */
NET_LOCK();

lock(pppx_ifs_lk);
unlock(pppx_ifs_lk);
}
NET_UNLOCK();
KERNEL_UNLOCK();

concurency as I see is:

we don't release

thread athread b
KERNEL_LOCK();  try to grab KERNEL_LOCK();
NET_LOCK();

lock(pppx_ifs_lk)
unlock(pppx_ifs_lk)
NET_UNLOCK();
/* play with netif */
NET_LOCK();

NET_UNLOCK();
KERNEL_UNLOCK();
KERNEL_LOCK();

if we release once

thread athread b
KERNEL_LOCK();  try to grab KERNEL_LOCK();
NET_LOCK();

lock(pppx_ifs_lk)
unlock(pppx_ifs_lk)
NET_UNLOCK();
KERNEL_UNLOCK();
/* play with netif */   KERNEL_LOCK();
try to grab KERNEL_LOCK();  NET_LOCK();
lock(pppx_ifs_lk);
unlock(pppx_ifs_lk);
NET_UNLOCK();
/* play with netif */
NET_LOCK();
KERNEL_UNLOCK();
KERNEL_LOCK();
NET_LOCK();

NET_UNLOCK();
KERNEL_UNLOCK();

or if we release twice

thread athread b
KERNEL_LOCK();  try to grab KERNEL_LOCK();
NET_LOCK();

lock(pppx_ifs_lk)
unlock(pppx_ifs_lk)
NET_UNLOCK();
KERNEL_UNLOCK();
/* play with netif */   KERNEL_LOCK();
try to grab KERNEL_LOCK();  NET_LOCK();
lock(pppx_ifs_lk);
unlock(pppx_ifs_lk);
NET_UNLOCK();
KERNEL_UNLOCK();
KERNEL_LOCK()   /* play with netif */
NET_LOCK()  try to grab KERNEL_LOCK();


lock(pppx_ifs_lk)
unlock(pppx_ifs_lk)

NET_UNLOCK();
KERNEL_UNLOCK();KERNEL_LOCK()
NET_LOCK()


> 
> So let's ask this question again: what is the NET_LOCK() protecting
> in this code?  Is it needed?  What is the `pppx_ifs_lk' protecting?
> From whom?  Are they needed or do they introduce more problem than they
> are solving?  We see the problem of context switch breaking atomicity
> of code, the simplest way to get rid of this problem is to get rid of
> the locks, or at least reduce their number.
NET_LOCK() should be held around pipex(4) access. It's not required to
hold it for pppx(4) related things. Really, since pipexintr() is under
KERNEL_LOCK() too, we can kill NET_LOCK() from in all pipex(4)/pppx(4)
code except, iirc, ipv[46]_input() and direct 'ifp->if_ipackets++' and
similar. 

> 
> So does one see how useless is `pppx_ifs_lk' since all the code it
> "protects" is executed under KERNEL_LOCK() and doesn't contain any
> context switch?
it's useless :) someone added it for future, but I dont see any reason
to kill it. It's fine.

> 
> Historically the NET_LOCK() has been "pushed" under all pseudo-device
> ioctl(2).  This was a mechanical change until somebody figures out if
> it is needed or not.  That's the real question.  So let's ask it :o)
>
It's paifully to touch this pre-MP era code :) Big NET_LOCK() here is
not needed.

ugen(4): wait everyone to finish i/o in ugen_detach()

2020-04-09 Thread Vitaliy Makkoveev

usb_detach_wait() will simply wait usb_detach_wakeup() for 60 sec. So
ugen_detach() will continue to destroy device context before threads
finish their io.

Index: sys/dev/usb/ugen.c
===
RCS file: /cvs/src/sys/dev/usb/ugen.c,v
retrieving revision 1.105
diff -u -p -r1.105 ugen.c
--- sys/dev/usb/ugen.c  7 Apr 2020 13:27:51 -   1.105
+++ sys/dev/usb/ugen.c  9 Apr 2020 14:08:20 -
@@ -764,7 +764,8 @@ ugen_detach(struct device *self, int fla
}
 
s = splusb();
-   if (--sc->sc_refcnt >= 0) {
+   --sc->sc_refcnt;
+   while (sc->sc_refcnt >= 0) {
/* Wake everyone */
for (i = 0; i < USB_MAX_ENDPOINTS; i++)
wakeup(>sc_endpoints[i][IN]);

pppx(4) remove context switch from pppx_if_find()

2020-04-08 Thread Vitaliy Makkoveev

As mpi@ noticed malloc() with M_WAITOK can cause context switch too. I
wish to drop concurency while RBT pppx_ifs is accessed. So, remove
malloc() from pppx_if_find(). Looks like original code should do search
in the same way, but it didn't.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.82
diff -u -p -r1.82 if_pppx.c
--- sys/net/if_pppx.c   7 Apr 2020 13:27:52 -   1.82
+++ sys/net/if_pppx.c   8 Apr 2020 13:18:07 -
@@ -643,20 +643,20 @@ pppx_if_next_unit(void)
 struct pppx_if *
 pppx_if_find(struct pppx_dev *pxd, int session_id, int protocol)
 {
-   struct pppx_if *s, *p;
-   s = malloc(sizeof(*s), M_DEVBUF, M_WAITOK | M_ZERO);
-
-   s->pxi_key.pxik_session_id = session_id;
-   s->pxi_key.pxik_protocol = protocol;
+   struct pppx_if_key key;
+   struct pppx_if *pxi;
 
+   memset(, 0, sizeof(key));
+   key.pxik_session_id = session_id;
+   key.pxik_protocol = protocol;
+   
rw_enter_read(_ifs_lk);
-   p = RBT_FIND(pppx_ifs, _ifs, s);
-   if (p && p->pxi_ready == 0)
-   p = NULL;
+   pxi = RBT_FIND(pppx_ifs, _ifs, (struct pppx_if *));
+   if (pxi && pxi->pxi_ready == 0)
+   pxi = NULL;
rw_exit_read(_ifs_lk);
 
-   free(s, M_DEVBUF, sizeof(*s));
-   return (p);
+   return pxi;
 }
 
 int

Re: pipex(4) man page fix

2020-04-02 Thread Vitaliy Makkoveev

On Thu, Apr 02, 2020 at 09:07:23AM +0200, Martin Pieuchot wrote:
> On 29/03/20(Sun) 00:16, Vitaliy Makkoveev wrote:
> > pipex not used with tun(4)
> 
> It seems an oversight from the addition of the new pppac(4) driver.  See
> net/if_tun.c commits from January this year.
> 
> So the right fix would be to replace tun(4) with pppac(4) and add a pppac.4
> manpage.  Do you feel like tackling that?

Sure. Also PIPEXDSESSION describes pipex_session_stat_req instead of
pipex_session_close_req. I fixed it too.

Index: share/man/man4/pipex.4
===
RCS file: /cvs/src/share/man/man4/pipex.4,v
retrieving revision 1.11
diff -u -p -r1.11 pipex.4
--- share/man/man4/pipex.4  18 Apr 2017 03:21:48 -  1.11
+++ share/man/man4/pipex.4  2 Apr 2020 11:45:40 -
@@ -32,7 +32,7 @@
 .Sh DESCRIPTION
 .Nm
 is used with
-.Xr tun 4
+.Xr pppac 4
 and
 .Xr pppx 4 ,
 and handles PPP frames and forwards IP packets in-kernel.
@@ -51,7 +51,7 @@ using
 adds some extensions to the
 .Xr ioctl 2
 requests to
-.Xr tun 4
+.Xr pppac 4
 or
 .Xr pppx 4
 devices.
@@ -104,8 +104,7 @@ struct pipex_session_req {
 uint16_tpr_peer_mru; /* peer's mru */
 uint16_tpr_timeout_sec;  /* idle timer */
 
-struct in_addr  pr_ip_srcaddr;/* local IP address.
-not used by tun(4) */
+struct in_addr  pr_ip_srcaddr;/* local IP address */
 struct in_addr  pr_ip_address;/* framed IP address */
 struct in_addr  pr_ip_netmask;/* framed IP netmask */
 struct sockaddr_in6 pr_ip6_address;   /* framed IPv6 address */
@@ -171,10 +170,10 @@ struct pipex_mppe_req {
 .It Dv PIPEXDSESSION Fa "struct pipex_session_close_req *"
 Delete the specified session from the kernel.
 Specify the session using a
-.Vt pipex_session_stat_req
+.Vt pipex_session_close_req
 structure, which has the following definition:
 .Bd -literal
-struct pipex_session_stat_req {
+struct pipex_session_close_req {
 int  psr_protocol;   /* tunnel protocol */
 uint16_t psr_session_id; /* session-id */
 struct pipex_statistics  psr_stat;   /* statistics */
@@ -265,7 +264,7 @@ Set the
 .Xr pppx 4
 interface's description of the session.
 This command doesn't work on
-.Xr tun 4
+.Xr pppac 4
 devices.
 Specify the session and its description using a
 .Vt pipex_session_descr_req
@@ -280,8 +279,8 @@ struct pipex_session_descr_req {
 .El
 .Sh SEE ALSO
 .Xr ioctl 2 ,
+.Xr pppax 4 ,
 .Xr pppx 4 ,
-.Xr tun 4 ,
 .Xr npppd 8 ,
 .Xr sysctl 8
 .Sh AUTHORS

Fix pipex(4) pipex_ioctl() access to not owned sessions (kernel crash too)

2020-04-02 Thread Vitaliy Makkoveev

pipex(4) has pipex_ioctl() interface for pipex_session related routines.
pipex_ioctl() calls should be done with pipex_iface_contex, so any
operations should be done with pipex_sessions owned by passed
pipex_iface_contex. pipex_session check ownership is missing within
pipex_ioctl() so anybody can do pipex_ioctl() commands PIPEXDSESSION,
PIPEXCSESSION, PIPEXGSTAT and PIPEXGCLOSED on any pipex_session.
PIPEXDSESSION on foreign pppx(4) owned pipex_session will crash kernel.
Code to reproduce and screenshot attached below. Diffs below add
pipes_session ownrship check to pipex_ioctl() internals.

 cut begin

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#include 
#include 
#include 
#include 

#define PIPEX_CLOSE_TIMEOUT 30

int main(void)
{
int fd_pppx, fd_pppac;
struct pipex_session_req reqa;
struct pipex_session_close_req reqd;

if((fd_pppx=open("/dev/pppx0", O_RDWR))<0){
err(1, "open(pppx0)");
}

if((fd_pppac=open("/dev/pppac0", O_RDWR))<0){
err(1, "open(pppac0)");
}

memset(, 0, sizeof(reqa));
reqa.pr_protocol=PIPEX_PROTO_L2TP;
reqa.pr_peer_mru=1024;
reqa.pr_local_address.ss_family=AF_INET;
reqa.pr_local_address.ss_len=sizeof(struct sockaddr_in);
reqa.pr_peer_address.ss_family=AF_INET;
reqa.pr_peer_address.ss_len=sizeof(struct sockaddr_in);
inet_aton("10.0.0.1", _ip_srcaddr);
inet_aton("10.0.0.2", _ip_address);
inet_aton("255.255.255.255", _ip_netmask);

if(ioctl(fd_pppx, PIPEXASESSION, )<0){
err(1, "ioctl(fd_pppx, PIPEXASESSION)");
}

memset(, 0, sizeof(reqd));
reqd.pcr_protocol=PIPEX_PROTO_L2TP;

if(ioctl(fd_pppac, PIPEXDSESSION, )<0){
err(1, "ioctl(fd_ppaac, PIPEDSESSION)");
}

sleep(PIPEX_CLOSE_TIMEOUT+1);

return 0;
}

 cut end

This diff fix pipex_ioctl(PIPEXDSESSION) crash issue

 cut begin
diff --git sys/net/pipex.c sys/net/pipex.c
index 3da8ed8..22edce3 100644
--- sys/net/pipex.c
+++ sys/net/pipex.c
@@ -230,7 +230,7 @@ pipex_ioctl(struct pipex_iface_context *pipex_iface, u_long 
cmd, caddr_t data)
 
case PIPEXDSESSION:
ret = pipex_close_session(
-   (struct pipex_session_close_req *)data);
+   (struct pipex_session_close_req *)data, pipex_iface);
break;
 
case PIPEXCSESSION:
@@ -489,7 +489,8 @@ pipex_notify_close_session_all(void)
 }
 
 Static int
-pipex_close_session(struct pipex_session_close_req *req)
+pipex_close_session(struct pipex_session_close_req *req,
+struct pipex_iface_context *iface)
 {
struct pipex_session *session;
 
@@ -498,6 +499,8 @@ pipex_close_session(struct pipex_session_close_req *req)
req->pcr_session_id);
if (session == NULL)
return (EINVAL);
+   if (session->pipex_iface != iface)
+   return (EINVAL);
 
/* remove from close_wait list */
if (session->state == PIPEX_STATE_CLOSE_WAIT)
diff --git sys/net/pipex_local.h sys/net/pipex_local.h
index cf02c8e..ad3c3d3 100644
--- sys/net/pipex_local.h
+++ sys/net/pipex_local.h
@@ -369,7 +369,8 @@ extern struct pipex_hash_head   pipex_id_hashtable[];
 void  pipex_iface_start (struct pipex_iface_context *);
 void  pipex_iface_stop (struct pipex_iface_context *);
 int   pipex_add_session (struct pipex_session_req *, struct 
pipex_iface_context *);
-int   pipex_close_session (struct pipex_session_close_req *);
+int   pipex_close_session (struct pipex_session_close_req *,
+  struct pipex_iface_context *);
 int   pipex_config_session (struct pipex_session_config_req *);
 int   pipex_get_stat (struct pipex_session_stat_req *);
 int   pipex_get_closed (struct pipex_session_list_req *);
 cut end

This diff add ownership checks to the rest pipex_ioctl() commands. A few
words about pppx_get_closed(): since in-kernel timeout feature was
disabled for pppx(4) related pipex_sessions, closed pipex_sessions can't
exist in system, so this function is dummy. I have an idea how to
reenable this disabled timeout, but some reafactoring requited, and fair
pipex_ioctl(PIPEXGCLOSED) call will be restored.

 cut begin
diff --git sys/net/if_pppx.c sys/net/if_pppx.c
index 37a6af0..6c4977d 100644
--- sys/net/if_pppx.c
+++ sys/net/if_pppx.c
@@ -175,6 +175,12 @@ intpppx_add_session(struct pppx_dev *,
struct pipex_session_req *);
 intpppx_del_session(struct pppx_dev *,
struct pipex_session_close_req *);
+intpppx_config_session(struct pppx_dev *,
+   struct pipex_session_config_req *);
+intpppx_get_stat(struct pppx_dev *,
+

Re: Fix pipex(4) pipex_ioctl() access to not owned sessions (kernel crash too)

2020-04-02 Thread Vitaliy Makkoveev

Sorry, screenshot was from pached kernel. Screenshot from clean kernel
included.

Re: Fix pipex(4) pipex_ioctl() access to not owned sessions (kernel crash too)

2020-04-04 Thread Vitaliy Makkoveev

On Sat, Apr 04, 2020 at 06:52:49PM +0200, Martin Pieuchot wrote:
> On 02/04/20(Thu) 13:44, Vitaliy Makkoveev wrote:
> > This diff add ownership checks to the rest pipex_ioctl() commands. A few
> > words about pppx_get_closed(): since in-kernel timeout feature was
> > disabled for pppx(4) related pipex_sessions, closed pipex_sessions can't
> > exist in system, so this function is dummy. I have an idea how to
> > reenable this disabled timeout, but some reafactoring requited, and fair
> > pipex_ioctl(PIPEXGCLOSED) call will be restored.
> 
> The ownership looks good to me.
> 
> I'd suggest we return an error for PIPEXGCLOSED documenting why this
> ioctl(2) is no longer supported.  That could have been part of the
> previous fix that disabled the timeout.  No need for a separate
> function.

We will restore timeout later. And after some additional steps
PIPEXGCLOSED will return fair list of PIPEX_STATE_CLOSE_WAIT stated
sessions associated with pppx_dev.

> See there's a conceptual problem with pipex_get_closed().  This function
> never returns an error, so when npppd(8) will issue the ioctl, even if
> an error occurs it will proceed with empty data.

npppd(8) does this check. See usr.sbin/npppd/npppd/npppd.c:1327

> 
> Would it make sense to have two separated diffs?

I split it because only PEXDSESSION will crash. But since these diff add
the same checks they can be combined to one.

> 
> >  cut begin
> > diff --git sys/net/if_pppx.c sys/net/if_pppx.c
> > index 37a6af0..6c4977d 100644
> > --- sys/net/if_pppx.c
> > +++ sys/net/if_pppx.c
> > @@ -175,6 +175,12 @@ intpppx_add_session(struct pppx_dev *,
> > struct pipex_session_req *);
> >  intpppx_del_session(struct pppx_dev *,
> > struct pipex_session_close_req *);
> > +intpppx_config_session(struct pppx_dev *,
> > +   struct pipex_session_config_req *);
> > +intpppx_get_stat(struct pppx_dev *,
> > +   struct pipex_session_stat_req *);
> > +intpppx_get_closed(struct pppx_dev *,
> > +   struct pipex_session_list_req *);
> >  intpppx_set_session_descr(struct pppx_dev *,
> > struct pipex_session_descr_req *);
> >  
> > @@ -451,16 +457,18 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t addr, int 
> > flags, struct proc *p)
> > break;
> >  
> > case PIPEXCSESSION:
> > -   error = pipex_config_session(
> > +   error = pppx_config_session(pxd,
> > (struct pipex_session_config_req *)addr);
> > break;
> >  
> > case PIPEXGSTAT:
> > -   error = pipex_get_stat((struct pipex_session_stat_req *)addr);
> > +   error = pppx_get_stat(pxd,
> > +   (struct pipex_session_stat_req *)addr);
> > break;
> >  
> > case PIPEXGCLOSED:
> > -   error = pipex_get_closed((struct pipex_session_list_req *)addr);
> > +   error = pppx_get_closed(pxd,
> > +   (struct pipex_session_list_req *)addr);
> > break;
> >  
> > case PIPEXSIFDESCR:
> > @@ -947,6 +955,40 @@ pppx_del_session(struct pppx_dev *pxd, struct 
> > pipex_session_close_req *req)
> > return (0);
> >  }
> >  
> > +int
> > +pppx_config_session(struct pppx_dev *pxd,
> > +struct pipex_session_config_req *req)
> > +{
> > +   struct pppx_if *pxi;
> > +
> > +   pxi = pppx_if_find(pxd, req->pcr_session_id, req->pcr_protocol);
> > +   if (pxi == NULL)
> > +   return (EINVAL);
> > +
> > +   return pipex_config_session(req, >pxi_ifcontext);
> > +}
> > +
> > +int
> > +pppx_get_stat(struct pppx_dev *pxd, struct pipex_session_stat_req *req)
> > +{
> > +   struct pppx_if *pxi;
> > +
> > +   pxi = pppx_if_find(pxd, req->psr_session_id, req->psr_protocol);
> > +   if (pxi == NULL)
> > +   return (EINVAL);
> > +
> > +   return pipex_get_stat(req, >pxi_ifcontext);
> > +}
> > +
> > +int
> > +pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> > +{
> > +   /* XXX: Only opened sessions exist for pppx(4) */
> > +   memset(req, 0, sizeof(*req));
> > +
> > +   return 0;
> > +}
> > +
> >  int
> >  pppx_set_session_descr(struct pppx_dev *pxd,
> >  struct pipex_session_descr_req *req)
> > diff --git sys/net/pipex.c sys/net/pipex.c
> > index 22edce3..219e18d 100644
>

Prevent memory corruption by pipex_timer()

2020-03-27 Thread Vitaliy Makkoveev

Each pipex_session has timeout_sec field and if it is not 0,
pipex_timer() can destroy pipex_session. Each pppx_if contents
pipex_session. If userland creates pppx_if and pipex_session has
timeout (for example, npppd.conf has idle-timeout option), pipex_timer()
can destroy this pipex_session and break it's pppx_if. Diff below is
quick and dirty fix for this case. At first look, creation of
pipex_timer()-like pppx_timer() is more better, but I am little confused
with pipexintr(). Looks like pipexoutq and pipexinq can contain mbufs
with holds pointer to already destructed pipex_session, because
pipex_destroy_session() and pppx_if_destroy() just frees pipex_session
and potential pppx_timer() is only anoter point to crash kernel.

I suggest, pipex_session and pppx_if should be refactored for good fix,
at least:
1. add reference counter for pipex_session
2. pppx_if should hold a reference to pipex_session
3. pipex_session should have handler to inform it's holder about
destruction

Default npppd.conf has no idle-timeout option, so npppd will not be very
confused with this diff.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.77
diff -u -p -r1.77 if_pppx.c
--- sys/net/if_pppx.c   26 Mar 2020 16:50:46 -  1.77
+++ sys/net/if_pppx.c   27 Mar 2020 08:20:50 -
@@ -665,6 +665,10 @@ pppx_add_session(struct pppx_dev *pxd, s
struct ifnet *over_ifp = NULL;
 #endif
 
+   /* XXX: prevent pxi destruction by pipex_timer() */
+   if (req->pr_timeout_sec != 0)
+   return (EINVAL);
+
switch (req->pr_protocol) {
 #ifdef PIPEX_PPPOE
case PIPEX_PROTO_PPPOE:

Re: Prevent memory corruption by pipex_timer()

2020-03-27 Thread Vitaliy Makkoveev

On Fri, Mar 27, 2020 at 03:16:54PM +0300, Vitaliy Makkoveev wrote:
> On Fri, Mar 27, 2020 at 10:43:52AM +0100, Martin Pieuchot wrote:
> > Do you have a backtrace for the memory corruption?  Could you share it?
> Yes. Apply path below, compile and run code, and when you had see
> "pipex_session ... killed" kill this code. Screenshot attached.
> STABLE-6.[56] are affected too.
> 
>  cut begin 
> 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> 
> #include 
> #include 
> #include 
> #include 
> 
> int main(void)
> {
>   int fd;
>   struct pipex_session_req req;
> 
>   if((fd=open("/dev/pppx0", O_RDWR))<0){
>   err(1, "open()");
>   }
> 
>   memset(, 0, sizeof(req));
> 
>   req.pr_timeout_sec=0;
Sorry, this line should be "req.pr_timeout_sec=1;"

> 
>   req.pr_protocol=PIPEX_PROTO_L2TP;
>   req.pr_local_address.ss_family=AF_INET;
>   req.pr_local_address.ss_len=sizeof(struct sockaddr_in);
>   req.pr_peer_address.ss_family=AF_INET;
>   req.pr_peer_address.ss_len=sizeof(struct sockaddr_in);
> 
>   if(ioctl(fd, PIPEXASESSION, )<0){
>   err(1, "ioctl()");
>   }
> 
>   select(0, NULL, NULL, NULL, NULL);
> 
>   return 0;
> }
> 
>  cut end 
> 
> Index: sys/net/if_pppx.c
> ===
> RCS file: /cvs/src/sys/net/if_pppx.c,v
> retrieving revision 1.77
> diff -u -p -r1.77 if_pppx.c
> --- sys/net/if_pppx.c 26 Mar 2020 16:50:46 -  1.77
> +++ sys/net/if_pppx.c 27 Mar 2020 12:02:33 -
> @@ -665,6 +665,12 @@ pppx_add_session(struct pppx_dev *pxd, s
>   struct ifnet *over_ifp = NULL;
>  #endif
>  
> +#if 0
> + /* XXX: prevent pxi destruction by pipex_timer() */
> + if (req->pr_timeout_sec != 0)
> + return (EINVAL);
> +#endif
> +
>   switch (req->pr_protocol) {
>  #ifdef PIPEX_PPPOE
>   case PIPEX_PROTO_PPPOE:
> @@ -706,6 +712,11 @@ pppx_add_session(struct pppx_dev *pxd, s
>   pxi = pool_get(pppx_if_pl, PR_WAITOK | PR_ZERO);
>   if (pxi == NULL)
>   return (ENOMEM);
> + 
> +#if 1
> + printf("%s: new pppx_if pipex_session %p timeout %u\n",
> + __func__, >pxi_session, req->pr_timeout_sec);
> +#endif
>  
>   session = >pxi_session;
>   ifp = >pxi_if;
> Index: sys/net/pipex.c
> ===
> RCS file: /cvs/src/sys/net/pipex.c,v
> retrieving revision 1.109
> diff -u -p -r1.109 pipex.c
> --- sys/net/pipex.c   26 Mar 2020 16:50:46 -  1.109
> +++ sys/net/pipex.c   27 Mar 2020 12:02:34 -
> @@ -767,6 +767,10 @@ pipex_timer(void *ignored_arg)
>   session->stat.idle_time++;
>   if (session->stat.idle_time < session->timeout_sec)
>   continue;
> +#if 1
> + printf("%s: pipex_session %p timeout\n",
> + __func__, session);
> +#endif
>  
>   pipex_notify_close_session(session);
>   break;
> @@ -792,6 +796,10 @@ pipex_timer(void *ignored_arg)
>   continue;
>  
>   pipex_destroy_session(session);
> +#if 1
> + printf("%s: pipex_session %p killed\n",
> + __func__, session);
> +#endif
>   break;
>  
>   default:

Re: Prevent memory corruption by pipex_timer()

2020-03-27 Thread Vitaliy Makkoveev

On Fri, Mar 27, 2020 at 10:43:52AM +0100, Martin Pieuchot wrote:
> Do you have a backtrace for the memory corruption?  Could you share it?
Yes. Apply path below, compile and run code, and when you had see
"pipex_session ... killed" kill this code. Screenshot attached.
STABLE-6.[56] are affected too.

 cut begin 

#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 

#include 
#include 
#include 
#include 

int main(void)
{
int fd;
struct pipex_session_req req;

if((fd=open("/dev/pppx0", O_RDWR))<0){
err(1, "open()");
}

memset(, 0, sizeof(req));

req.pr_timeout_sec=0;

req.pr_protocol=PIPEX_PROTO_L2TP;
req.pr_local_address.ss_family=AF_INET;
req.pr_local_address.ss_len=sizeof(struct sockaddr_in);
req.pr_peer_address.ss_family=AF_INET;
req.pr_peer_address.ss_len=sizeof(struct sockaddr_in);

if(ioctl(fd, PIPEXASESSION, )<0){
err(1, "ioctl()");
}

select(0, NULL, NULL, NULL, NULL);

return 0;
}

 cut end 

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.77
diff -u -p -r1.77 if_pppx.c
--- sys/net/if_pppx.c   26 Mar 2020 16:50:46 -  1.77
+++ sys/net/if_pppx.c   27 Mar 2020 12:02:33 -
@@ -665,6 +665,12 @@ pppx_add_session(struct pppx_dev *pxd, s
struct ifnet *over_ifp = NULL;
 #endif
 
+#if 0
+   /* XXX: prevent pxi destruction by pipex_timer() */
+   if (req->pr_timeout_sec != 0)
+   return (EINVAL);
+#endif
+
switch (req->pr_protocol) {
 #ifdef PIPEX_PPPOE
case PIPEX_PROTO_PPPOE:
@@ -706,6 +712,11 @@ pppx_add_session(struct pppx_dev *pxd, s
pxi = pool_get(pppx_if_pl, PR_WAITOK | PR_ZERO);
if (pxi == NULL)
return (ENOMEM);
+   
+#if 1
+   printf("%s: new pppx_if pipex_session %p timeout %u\n",
+   __func__, >pxi_session, req->pr_timeout_sec);
+#endif
 
session = >pxi_session;
ifp = >pxi_if;
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.109
diff -u -p -r1.109 pipex.c
--- sys/net/pipex.c 26 Mar 2020 16:50:46 -  1.109
+++ sys/net/pipex.c 27 Mar 2020 12:02:34 -
@@ -767,6 +767,10 @@ pipex_timer(void *ignored_arg)
session->stat.idle_time++;
if (session->stat.idle_time < session->timeout_sec)
continue;
+#if 1
+   printf("%s: pipex_session %p timeout\n",
+   __func__, session);
+#endif
 
pipex_notify_close_session(session);
break;
@@ -792,6 +796,10 @@ pipex_timer(void *ignored_arg)
continue;
 
pipex_destroy_session(session);
+#if 1
+   printf("%s: pipex_session %p killed\n",
+   __func__, session);
+#endif
break;
 
default:

Re: Dedulpicate pipex(4) and pppx(4) code

2020-04-01 Thread Vitaliy Makkoveev

Updated diff. The idea is to use already existing pipex API for
pipex_session creation and destruction. pppx_if now holds a reference
to pipex_session.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.78
diff -u -p -r1.78 if_pppx.c
--- sys/net/if_pppx.c   1 Apr 2020 07:15:59 -   1.78
+++ sys/net/if_pppx.c   1 Apr 2020 09:50:19 -
@@ -155,7 +155,7 @@ struct pppx_if {
int pxi_unit;
struct ifnetpxi_if;
struct pppx_dev *pxi_dev;
-   struct pipex_sessionpxi_session;
+   struct pipex_session*pxi_session;
struct pipex_iface_context  pxi_ifcontext;
 };
 
@@ -655,15 +655,10 @@ int
 pppx_add_session(struct pppx_dev *pxd, struct pipex_session_req *req)
 {
struct pppx_if *pxi;
-   struct pipex_session *session;
-   struct pipex_hash_head *chain;
struct ifnet *ifp;
int unit, error = 0;
struct in_ifaddr *ia;
struct sockaddr_in ifaddr;
-#ifdef PIPEX_PPPOE
-   struct ifnet *over_ifp = NULL;
-#endif
 
/*
 * XXX: As long as `session' is allocated as part of a `pxi'
@@ -673,157 +668,16 @@ pppx_add_session(struct pppx_dev *pxd, s
if (req->pr_timeout_sec != 0)
return (EINVAL);
 
-   switch (req->pr_protocol) {
-#ifdef PIPEX_PPPOE
-   case PIPEX_PROTO_PPPOE:
-   over_ifp = ifunit(req->pr_proto.pppoe.over_ifname);
-   if (over_ifp == NULL)
-   return (EINVAL);
-   if (req->pr_peer_address.ss_family != AF_UNSPEC)
-   return (EINVAL);
-   break;
-#endif
-#if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
-   case PIPEX_PROTO_PPTP:
-   case PIPEX_PROTO_L2TP:
-   switch (req->pr_peer_address.ss_family) {
-   case AF_INET:
-   if (req->pr_peer_address.ss_len != sizeof(struct 
sockaddr_in))
-   return (EINVAL);
-   break;
-#ifdef INET6
-   case AF_INET6:
-   if (req->pr_peer_address.ss_len != sizeof(struct 
sockaddr_in6))
-   return (EINVAL);
-   break;
-#endif
-   default:
-   return (EPROTONOSUPPORT);
-   }
-   if (req->pr_peer_address.ss_family !=
-   req->pr_local_address.ss_family ||
-   req->pr_peer_address.ss_len !=
-   req->pr_local_address.ss_len)
-   return (EINVAL);
-   break;
-#endif /* defined(PIPEX_PPTP) || defined(PIPEX_L2TP) */
-   default:
-   return (EPROTONOSUPPORT);
-   }
-
pxi = pool_get(pppx_if_pl, PR_WAITOK | PR_ZERO);
-   if (pxi == NULL)
-   return (ENOMEM);
-
-   session = >pxi_session;
ifp = >pxi_if;
 
-   /* fake a pipex interface context */
-   session->pipex_iface = >pxi_ifcontext;
-   session->pipex_iface->ifnet_this = ifp;
-   session->pipex_iface->pipexmode = PIPEX_ENABLED;
-
-   /* setup session */
-   session->state = PIPEX_STATE_OPENED;
-   session->protocol = req->pr_protocol;
-   session->session_id = req->pr_session_id;
-   session->peer_session_id = req->pr_peer_session_id;
-   session->peer_mru = req->pr_peer_mru;
-   session->timeout_sec = req->pr_timeout_sec;
-   session->ppp_flags = req->pr_ppp_flags;
-   session->ppp_id = req->pr_ppp_id;
-
-   session->ip_forward = 1;
-
-   session->ip_address.sin_family = AF_INET;
-   session->ip_address.sin_len = sizeof(struct sockaddr_in);
-   session->ip_address.sin_addr = req->pr_ip_address;
-
-   session->ip_netmask.sin_family = AF_INET;
-   session->ip_netmask.sin_len = sizeof(struct sockaddr_in);
-   session->ip_netmask.sin_addr = req->pr_ip_netmask;
-
-   if (session->ip_netmask.sin_addr.s_addr == 0L)
-   session->ip_netmask.sin_addr.s_addr = 0xL;
-   session->ip_address.sin_addr.s_addr &=
-   session->ip_netmask.sin_addr.s_addr;
-
-   if (req->pr_peer_address.ss_len > 0)
-   memcpy(>peer, >pr_peer_address,
-   MIN(req->pr_peer_address.ss_len, sizeof(session->peer)));
-   if (req->pr_local_address.ss_len > 0)
-   memcpy(>local, >pr_local_address,
-   MIN(req->pr_local_address.ss_len, sizeof(session->local)));
-#ifdef PIPEX_PPPOE
-   if (req->pr_protocol == PIPEX_PROTO_PPPOE)
-   session->proto.pppoe.over_ifidx = over_ifp->if_index;
-#endif
-#ifdef PIPEX_PPTP
-   if (req->pr_protocol == PIPEX_PROTO_PPTP) {
-   struct pipex_pptp_session *sess_pptp = >proto.pptp;
-
-   sess_pptp->snd_gap = 0;
-   sess_pptp->rcv_gap = 0;
-   sess_pptp->snd_una =

Re: pipex(4) man page fix

2020-04-03 Thread Vitaliy Makkoveev

On Thu, Apr 02, 2020 at 05:06:30PM +0100, Iain R. Learmonth wrote:
> Hi,
> 
> On 02/04/2020 12:47, Vitaliy Makkoveev wrote:
> > +.Xr pppax 4 ,
> 
> I guess you meant pppac here.
> 
> Thanks,
> Iain.
>
Thanks!

Index: share/man/man4/pipex.4
===
RCS file: /cvs/src/share/man/man4/pipex.4,v
retrieving revision 1.11
diff -u -p -r1.11 pipex.4
--- share/man/man4/pipex.4  18 Apr 2017 03:21:48 -  1.11
+++ share/man/man4/pipex.4  3 Apr 2020 08:20:01 -
@@ -32,7 +32,7 @@
 .Sh DESCRIPTION
 .Nm
 is used with
-.Xr tun 4
+.Xr pppac 4
 and
 .Xr pppx 4 ,
 and handles PPP frames and forwards IP packets in-kernel.
@@ -51,7 +51,7 @@ using
 adds some extensions to the
 .Xr ioctl 2
 requests to
-.Xr tun 4
+.Xr pppac 4
 or
 .Xr pppx 4
 devices.
@@ -104,8 +104,7 @@ struct pipex_session_req {
 uint16_tpr_peer_mru; /* peer's mru */
 uint16_tpr_timeout_sec;  /* idle timer */
 
-struct in_addr  pr_ip_srcaddr;/* local IP address.
-not used by tun(4) */
+struct in_addr  pr_ip_srcaddr;/* local IP address */
 struct in_addr  pr_ip_address;/* framed IP address */
 struct in_addr  pr_ip_netmask;/* framed IP netmask */
 struct sockaddr_in6 pr_ip6_address;   /* framed IPv6 address */
@@ -171,10 +170,10 @@ struct pipex_mppe_req {
 .It Dv PIPEXDSESSION Fa "struct pipex_session_close_req *"
 Delete the specified session from the kernel.
 Specify the session using a
-.Vt pipex_session_stat_req
+.Vt pipex_session_close_req
 structure, which has the following definition:
 .Bd -literal
-struct pipex_session_stat_req {
+struct pipex_session_close_req {
 int  psr_protocol;   /* tunnel protocol */
 uint16_t psr_session_id; /* session-id */
 struct pipex_statistics  psr_stat;   /* statistics */
@@ -265,7 +264,7 @@ Set the
 .Xr pppx 4
 interface's description of the session.
 This command doesn't work on
-.Xr tun 4
+.Xr pppac 4
 devices.
 Specify the session and its description using a
 .Vt pipex_session_descr_req
@@ -280,8 +279,8 @@ struct pipex_session_descr_req {
 .El
 .Sh SEE ALSO
 .Xr ioctl 2 ,
+.Xr pppac 4 ,
 .Xr pppx 4 ,
-.Xr tun 4 ,
 .Xr npppd 8 ,
 .Xr sysctl 8
 .Sh AUTHORS

Re: Fix pipex(4) pipex_ioctl() access to not owned sessions (kernel crash too)

2020-04-03 Thread Vitaliy Makkoveev

On Fri, Apr 03, 2020 at 11:07:48AM +0200, Martin Pieuchot wrote:
> On 02/04/20(Thu) 13:44, Vitaliy Makkoveev wrote:
> > pipex(4) has pipex_ioctl() interface for pipex_session related routines.
> > pipex_ioctl() calls should be done with pipex_iface_contex, so any
> > operations should be done with pipex_sessions owned by passed
> > pipex_iface_contex. pipex_session check ownership is missing within
> > pipex_ioctl() so anybody can do pipex_ioctl() commands PIPEXDSESSION,
> > PIPEXCSESSION, PIPEXGSTAT and PIPEXGCLOSED on any pipex_session.
> > PIPEXDSESSION on foreign pppx(4) owned pipex_session will crash kernel.
> > Code to reproduce and screenshot attached below. Diffs below add
> > pipes_session ownrship check to pipex_ioctl() internals.
> 
> Awesome bug report!  Fix makes sense to me.
> 
> To avoid check duplication would it make sense to have pppxioctl() call
> pipex_ioctl() for a subset of ioctl(2) values? 

It makes sense, but not now. pppxioctl() should find pipex_iface_context
associated with pppx_if and only then call pipex_ioctl(). For
PIPEXASESSION command pipex_iface_context should be created and for
PIPEXDSESSION it should be deleted outside pipex(4). Current pipex(4)
design is: one pipex_iface_context, one ifnet and many pipex_sessions
associated with this pipex_iface_context use this one ifnet. This is ok
for pppac(4), but for pppx(4) it requres to have one pipex_iface_context
per each pppx_if that's overkilling. I have an idea to move ifnet out from
pipex_iface_context and put it directly (really with some wrapper) to
pipex_session. So pipex_sessions will have individual ifnet (for pppx(4))
or share it (for pppac(4)). pppx_dev will have only one
pipex_iface_context for all its pppx_ifs. After this step PIPEXCSESSION,
PIPEXGSTAT and PIPEXGCLOSED can be executed by pipex_ioctl() directly.

The next step I suggest is to destroy pppx_if context within ifnet's
detach hook. pipex_session will call ifnet_detach() by itself and clear
pppx_if. After this step PIPEXDSESSION calls will be allowed directly on
pipex_iface_context and in-kernel timeout feature will be restored.

Also we have PIPEXSIFDESCR command which is not supported by
pipex_ioctl() :)

But at first, I suggest to add to pppx_add_session() and to
pipex_add_session() check: is the session with given req->pr_protocol and
req->pr_session_id already exist.

At second, return to https://marc.info/?t=15854078841=1=2 and
finish cleanups and deduplication.
> 
> >  cut begin
> > 
> > #include 
> > #include 
> > #include 
> > #include 
> > #include 
> > #include 
> > #include 
> > #include 
> > #include 
> > 
> > #include 
> > #include 
> > #include 
> > #include 
> > 
> > #define PIPEX_CLOSE_TIMEOUT 30
> > 
> > int main(void)
> > {
> > int fd_pppx, fd_pppac;
> > struct pipex_session_req reqa;
> > struct pipex_session_close_req reqd;
> > 
> > if((fd_pppx=open("/dev/pppx0", O_RDWR))<0){
> > err(1, "open(pppx0)");
> > }
> > 
> > if((fd_pppac=open("/dev/pppac0", O_RDWR))<0){
> > err(1, "open(pppac0)");
> > }
> > 
> > memset(, 0, sizeof(reqa));
> > reqa.pr_protocol=PIPEX_PROTO_L2TP;
> > reqa.pr_peer_mru=1024;
> > reqa.pr_local_address.ss_family=AF_INET;
> > reqa.pr_local_address.ss_len=sizeof(struct sockaddr_in);
> > reqa.pr_peer_address.ss_family=AF_INET;
> > reqa.pr_peer_address.ss_len=sizeof(struct sockaddr_in);
> > inet_aton("10.0.0.1", _ip_srcaddr);
> > inet_aton("10.0.0.2", _ip_address);
> > inet_aton("255.255.255.255", _ip_netmask);
> > 
> > if(ioctl(fd_pppx, PIPEXASESSION, )<0){
> > err(1, "ioctl(fd_pppx, PIPEXASESSION)");
> > }
> > 
> > memset(, 0, sizeof(reqd));
> > reqd.pcr_protocol=PIPEX_PROTO_L2TP;
> > 
> > if(ioctl(fd_pppac, PIPEXDSESSION, )<0){
> > err(1, "ioctl(fd_ppaac, PIPEDSESSION)");
> > }
> > 
> > sleep(PIPEX_CLOSE_TIMEOUT+1);
> > 
> > return 0;
> > }
> > 
> >  cut end
> > 
> > This diff fix pipex_ioctl(PIPEXDSESSION) crash issue
> > 
> >  cut begin
> > diff --git sys/net/pipex.c sys/net/pipex.c
> > index 3da8ed8..22edce3 100644
> > --- sys/net/pipex.c
> > +++ sys/net/pipex.c
> > @@ -230,7 +230,7 @@ pipex_ioctl(struct pipex_iface_context *pipex_iface, 
> > u_long cmd, caddr_t data)
> >  
> > case PIPEXDSESSION:
> > ret = pipex_cl

Dedulpicate pipex(4) and pppx(4) code

2020-03-28 Thread Vitaliy Makkoveev

pppx(4) has code copypasted from pipex(4). Patch below deduplicates it.
Introduded pipex_session_setup() and pipex_session_destroy() functions.
Original pipex_destroy_session() renamed to pipex_del_session() to be
consistent with PIPEXDSESSION (Delete the specified session from the
kernel).

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.77
diff -u -p -r1.77 if_pppx.c
--- sys/net/if_pppx.c   26 Mar 2020 16:50:46 -  1.77
+++ sys/net/if_pppx.c   28 Mar 2020 14:45:08 -
@@ -655,167 +655,26 @@ int
 pppx_add_session(struct pppx_dev *pxd, struct pipex_session_req *req)
 {
struct pppx_if *pxi;
-   struct pipex_session *session;
-   struct pipex_hash_head *chain;
+   struct pipex_iface_context *iface;
struct ifnet *ifp;
-   int unit, error = 0;
+   int unit, error;
struct in_ifaddr *ia;
struct sockaddr_in ifaddr;
-#ifdef PIPEX_PPPOE
-   struct ifnet *over_ifp = NULL;
-#endif
 
-   switch (req->pr_protocol) {
-#ifdef PIPEX_PPPOE
-   case PIPEX_PROTO_PPPOE:
-   over_ifp = ifunit(req->pr_proto.pppoe.over_ifname);
-   if (over_ifp == NULL)
-   return (EINVAL);
-   if (req->pr_peer_address.ss_family != AF_UNSPEC)
-   return (EINVAL);
-   break;
-#endif
-#if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
-   case PIPEX_PROTO_PPTP:
-   case PIPEX_PROTO_L2TP:
-   switch (req->pr_peer_address.ss_family) {
-   case AF_INET:
-   if (req->pr_peer_address.ss_len != sizeof(struct 
sockaddr_in))
-   return (EINVAL);
-   break;
-#ifdef INET6
-   case AF_INET6:
-   if (req->pr_peer_address.ss_len != sizeof(struct 
sockaddr_in6))
-   return (EINVAL);
-   break;
-#endif
-   default:
-   return (EPROTONOSUPPORT);
-   }
-   if (req->pr_peer_address.ss_family !=
-   req->pr_local_address.ss_family ||
-   req->pr_peer_address.ss_len !=
-   req->pr_local_address.ss_len)
-   return (EINVAL);
-   break;
-#endif /* defined(PIPEX_PPTP) || defined(PIPEX_L2TP) */
-   default:
-   return (EPROTONOSUPPORT);
-   }
+   NET_ASSERT_LOCKED();
 
pxi = pool_get(pppx_if_pl, PR_WAITOK | PR_ZERO);
-   if (pxi == NULL)
-   return (ENOMEM);
 
-   session = >pxi_session;
ifp = >pxi_if;
+   iface = >pxi_ifcontext;
 
-   /* fake a pipex interface context */
-   session->pipex_iface = >pxi_ifcontext;
-   session->pipex_iface->ifnet_this = ifp;
-   session->pipex_iface->pipexmode = PIPEX_ENABLED;
-
-   /* setup session */
-   session->state = PIPEX_STATE_OPENED;
-   session->protocol = req->pr_protocol;
-   session->session_id = req->pr_session_id;
-   session->peer_session_id = req->pr_peer_session_id;
-   session->peer_mru = req->pr_peer_mru;
-   session->timeout_sec = req->pr_timeout_sec;
-   session->ppp_flags = req->pr_ppp_flags;
-   session->ppp_id = req->pr_ppp_id;
-
-   session->ip_forward = 1;
-
-   session->ip_address.sin_family = AF_INET;
-   session->ip_address.sin_len = sizeof(struct sockaddr_in);
-   session->ip_address.sin_addr = req->pr_ip_address;
-
-   session->ip_netmask.sin_family = AF_INET;
-   session->ip_netmask.sin_len = sizeof(struct sockaddr_in);
-   session->ip_netmask.sin_addr = req->pr_ip_netmask;
-
-   if (session->ip_netmask.sin_addr.s_addr == 0L)
-   session->ip_netmask.sin_addr.s_addr = 0xL;
-   session->ip_address.sin_addr.s_addr &=
-   session->ip_netmask.sin_addr.s_addr;
-
-   if (req->pr_peer_address.ss_len > 0)
-   memcpy(>peer, >pr_peer_address,
-   MIN(req->pr_peer_address.ss_len, sizeof(session->peer)));
-   if (req->pr_local_address.ss_len > 0)
-   memcpy(>local, >pr_local_address,
-   MIN(req->pr_local_address.ss_len, sizeof(session->local)));
-#ifdef PIPEX_PPPOE
-   if (req->pr_protocol == PIPEX_PROTO_PPPOE)
-   session->proto.pppoe.over_ifidx = over_ifp->if_index;
-#endif
-#ifdef PIPEX_PPTP
-   if (req->pr_protocol == PIPEX_PROTO_PPTP) {
-   struct pipex_pptp_session *sess_pptp = >proto.pptp;
-
-   sess_pptp->snd_gap = 0;
-   sess_pptp->rcv_gap = 0;
-   sess_pptp->snd_una = req->pr_proto.pptp.snd_una;
-   sess_pptp->snd_nxt = req->pr_proto.pptp.snd_nxt;
-   sess_pptp->rcv_nxt = req->pr_proto.pptp.rcv_nxt;
-   sess_pptp->rcv_acked = req->pr_proto.pptp.rcv_acked;
-
-   sess_pptp->winsz =

pipex(4) man page fix

2020-03-28 Thread Vitaliy Makkoveev

pipex not used with tun(4)

Index: share/man/man4/pipex.4
===
RCS file: /cvs/src/share/man/man4/pipex.4,v
retrieving revision 1.11
diff -u -p -r1.11 pipex.4
--- share/man/man4/pipex.4  18 Apr 2017 03:21:48 -  1.11
+++ share/man/man4/pipex.4  28 Mar 2020 21:13:57 -
@@ -32,9 +32,7 @@
 .Sh DESCRIPTION
 .Nm
 is used with
-.Xr tun 4
-and
-.Xr pppx 4 ,
+.Xr pppx 4
 and handles PPP frames and forwards IP packets in-kernel.
 It accelerates the performance of packet forwarding, because it reduces
 copying of packets between kernel and userland.
@@ -51,10 +49,8 @@ using
 adds some extensions to the
 .Xr ioctl 2
 requests to
-.Xr tun 4
-or
 .Xr pppx 4
-devices.
+device.
 The added requests are as follows:
 .Bl -tag -width Ds
 .It Dv PIPEXGMODEFa "int *"
@@ -104,8 +100,7 @@ struct pipex_session_req {
 uint16_tpr_peer_mru; /* peer's mru */
 uint16_tpr_timeout_sec;  /* idle timer */
 
-struct in_addr  pr_ip_srcaddr;/* local IP address.
-not used by tun(4) */
+struct in_addr  pr_ip_srcaddr;/* local IP address */
 struct in_addr  pr_ip_address;/* framed IP address */
 struct in_addr  pr_ip_netmask;/* framed IP netmask */
 struct sockaddr_in6 pr_ip6_address;   /* framed IPv6 address */
@@ -264,9 +259,6 @@ struct pipex_session_list_req {
 Set the
 .Xr pppx 4
 interface's description of the session.
-This command doesn't work on
-.Xr tun 4
-devices.
 Specify the session and its description using a
 .Vt pipex_session_descr_req
 structure, which has the following definition:
@@ -281,7 +273,6 @@ struct pipex_session_descr_req {
 .Sh SEE ALSO
 .Xr ioctl 2 ,
 .Xr pppx 4 ,
-.Xr tun 4 ,
 .Xr npppd 8 ,
 .Xr sysctl 8
 .Sh AUTHORS

Re: Prevent memory corruption by pipex_timer()

2020-03-27 Thread Vitaliy Makkoveev

On Fri, Mar 27, 2020 at 03:13:01PM +0100, Martin Pieuchot wrote:
> On 27/03/20(Fri) 15:16, Vitaliy Makkoveev wrote:
> > On Fri, Mar 27, 2020 at 10:43:52AM +0100, Martin Pieuchot wrote:
> > > Do you have a backtrace for the memory corruption?  Could you share it?
> > Yes. Apply path below, compile and run code, and when you had see
> > "pipex_session ... killed" kill this code. Screenshot attached.
> > STABLE-6.[56] are affected too.
> 
> Thanks for the screenshot.  The backtraces it contains is the most
> important piece of informations when reporting such bug.
> 
> So we're faced with a double-free.  After setting a timeout to a
> non-NULL value pipex_timer() will free the descriptor in 
> pipex_destroy_session(), later when the program terminates and the
> pseudo-device is closed the same descriptor is being freed by
> pppx_if_destroy().
> 
> But more importantly, pipex_destroy_session() puts a pointer back to a
> pool from which it hasn't been allocated (see line 597 of net/pipex.c).
> 
> To fix this particular double free the question is: do you want to use
> this timer feature with pppx(4)?  Does it even make sense?  If not I
> guess your fix is the way to go.
I don't, but npppd(8) can. With my fix if npppd.conf has lines

tunnel L2TP protocol l2tp {
  listen on 0.0.0.0
  idle-timeout 1 # non-NULL value
}

npppd(8) can't create pppx interface. Without my fix npppd(8) can crash
kernel with this config. Does it make sense? npppd.conf(5) says:
"The default is 0, which disables the idle timer." Does anybody use
non-default value? I don't know, but there is no crash reports before.

> 
> Due to the amount of code duplicated (copy-pasted) between net/pipex.c
> and net/if_pppx.c it is not unlikely that more of such bugs exist.
> 
> So it would be nice to introduce some helper function like session_free()
> and session_setup() to make sure the same code, including safety checks,
> is run everywhere.
> 
I suggest, use-after-free bug exists in pipexintr(). See lines 787-795
of net/pipex.c. Destruction of pipex_session denied if mbuf queues are
not empty. But pipex_destroy_session() call within pipex_iface_stop()
hasn't this check. Destruction by pppx_if_destroy() also hasn't this
check. But mbufs with m_pkthdr.ph_cookie pointed to freed session can
exist in those queues (lines 671 and 708 of net/pipex.c). I suggest it
should be fixed first.

Re: Prevent memory corruption by pipex_timer()

2020-03-31 Thread Vitaliy Makkoveev

I refactored pppx(4). The idea is to use pipex(4) as it was designed.
No one holds pipex_session outside pipex(4) so pipex_timer() calls are
safe. Unfortunately, this way gives some performance impact, because we
need to call pipex_lookup_by_session_id() in some places. This impact
will gone after pipex_session becames safely shared between multiple
holders and this is my next goal.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.77
diff -u -p -r1.77 if_pppx.c
--- sys/net/if_pppx.c   26 Mar 2020 16:50:46 -  1.77
+++ sys/net/if_pppx.c   31 Mar 2020 13:28:54 -
@@ -155,8 +155,8 @@ struct pppx_if {
int pxi_unit;
struct ifnetpxi_if;
struct pppx_dev *pxi_dev;
-   struct pipex_sessionpxi_session;
struct pipex_iface_context  pxi_ifcontext;
+   int pxi_ppp_id;
 };
 
 static inline int
@@ -178,7 +178,8 @@ int pppx_del_session(struct pppx_dev *,
 intpppx_set_session_descr(struct pppx_dev *,
struct pipex_session_descr_req *);
 
-void   pppx_if_destroy(struct pppx_dev *, struct pppx_if *);
+intpppx_if_destroy(struct pppx_dev *, struct pppx_if *,
+   struct pipex_session_close_req *);
 void   pppx_if_start(struct ifnet *);
 intpppx_if_output(struct ifnet *, struct mbuf *,
struct sockaddr *, struct rtentry *);
@@ -587,7 +588,7 @@ pppxclose(dev_t dev, int flags, int mode
/* XXX */
NET_LOCK();
while ((pxi = LIST_FIRST(>pxd_pxis)))
-   pppx_if_destroy(pxd, pxi);
+   pppx_if_destroy(pxd, pxi, NULL);
NET_UNLOCK();
 
LIST_REMOVE(pxd, pxd_entry);
@@ -655,160 +656,14 @@ int
 pppx_add_session(struct pppx_dev *pxd, struct pipex_session_req *req)
 {
struct pppx_if *pxi;
-   struct pipex_session *session;
-   struct pipex_hash_head *chain;
struct ifnet *ifp;
int unit, error = 0;
struct in_ifaddr *ia;
struct sockaddr_in ifaddr;
-#ifdef PIPEX_PPPOE
-   struct ifnet *over_ifp = NULL;
-#endif
-
-   switch (req->pr_protocol) {
-#ifdef PIPEX_PPPOE
-   case PIPEX_PROTO_PPPOE:
-   over_ifp = ifunit(req->pr_proto.pppoe.over_ifname);
-   if (over_ifp == NULL)
-   return (EINVAL);
-   if (req->pr_peer_address.ss_family != AF_UNSPEC)
-   return (EINVAL);
-   break;
-#endif
-#if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
-   case PIPEX_PROTO_PPTP:
-   case PIPEX_PROTO_L2TP:
-   switch (req->pr_peer_address.ss_family) {
-   case AF_INET:
-   if (req->pr_peer_address.ss_len != sizeof(struct 
sockaddr_in))
-   return (EINVAL);
-   break;
-#ifdef INET6
-   case AF_INET6:
-   if (req->pr_peer_address.ss_len != sizeof(struct 
sockaddr_in6))
-   return (EINVAL);
-   break;
-#endif
-   default:
-   return (EPROTONOSUPPORT);
-   }
-   if (req->pr_peer_address.ss_family !=
-   req->pr_local_address.ss_family ||
-   req->pr_peer_address.ss_len !=
-   req->pr_local_address.ss_len)
-   return (EINVAL);
-   break;
-#endif /* defined(PIPEX_PPTP) || defined(PIPEX_L2TP) */
-   default:
-   return (EPROTONOSUPPORT);
-   }
 
pxi = pool_get(pppx_if_pl, PR_WAITOK | PR_ZERO);
-   if (pxi == NULL)
-   return (ENOMEM);
-
-   session = >pxi_session;
ifp = >pxi_if;
 
-   /* fake a pipex interface context */
-   session->pipex_iface = >pxi_ifcontext;
-   session->pipex_iface->ifnet_this = ifp;
-   session->pipex_iface->pipexmode = PIPEX_ENABLED;
-
-   /* setup session */
-   session->state = PIPEX_STATE_OPENED;
-   session->protocol = req->pr_protocol;
-   session->session_id = req->pr_session_id;
-   session->peer_session_id = req->pr_peer_session_id;
-   session->peer_mru = req->pr_peer_mru;
-   session->timeout_sec = req->pr_timeout_sec;
-   session->ppp_flags = req->pr_ppp_flags;
-   session->ppp_id = req->pr_ppp_id;
-
-   session->ip_forward = 1;
-
-   session->ip_address.sin_family = AF_INET;
-   session->ip_address.sin_len = sizeof(struct sockaddr_in);
-   session->ip_address.sin_addr = req->pr_ip_address;
-
-   session->ip_netmask.sin_family = AF_INET;
-   session->ip_netmask.sin_len = sizeof(struct sockaddr_in);
-   session->ip_netmask.sin_addr = req->pr_ip_netmask;
-
-   if (session->ip_netmask.sin_addr.s_addr == 0L)
-   session->ip_netmask.sin_addr.s_addr =

Re: Prevent memory corruption by pipex_timer()

2020-03-31 Thread Vitaliy Makkoveev

On Tue, Mar 31, 2020 at 05:23:46PM +0200, Martin Pieuchot wrote:
> On 31/03/20(Tue) 16:48, Vitaliy Makkoveev wrote:
> > I refactored pppx(4). The idea is to use pipex(4) as it was designed.
> > No one holds pipex_session outside pipex(4) so pipex_timer() calls are
> > safe. Unfortunately, this way gives some performance impact, because we
> > need to call pipex_lookup_by_session_id() in some places. This impact
> > will gone after pipex_session becames safely shared between multiple
> > holders and this is my next goal.
> 
> I'd be more confident if we could go with the one-line change that you
> submitted in the first mail of this thread without the debugging
> messages.
> 
> Mixing bug-fixes (or features) and refactoring is not a great
> development practise as it might hide the point of the change and
> introduce or expose new bugs. 
>
I changed my original patch. Since npppd(8) ignores ioctl() errors and
client will be connected without pppx(4) interface creation I decide to
lie npppd(8).

Index: net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.77
diff -u -p -r1.77 if_pppx.c
--- net/if_pppx.c   26 Mar 2020 16:50:46 -  1.77
+++ net/if_pppx.c   31 Mar 2020 15:48:26 -
@@ -665,6 +665,13 @@ pppx_add_session(struct pppx_dev *pxd, s
struct ifnet *over_ifp = NULL;
 #endif
 
+   /*
+* XXX: prevent pxi destruction by pipex_timer()
+* npppd(8) ignores pppx_add_session() errors so fake it
+*/
+   if (req->pr_timeout_sec != 0)
+   req->pr_timeout_sec = 0;
+
switch (req->pr_protocol) {
 #ifdef PIPEX_PPPOE
case PIPEX_PROTO_PPPOE:

Re: Prevent memory corruption by pipex_timer()

2020-03-31 Thread Vitaliy Makkoveev

On Tue, Mar 31, 2020 at 06:15:46PM +0200, Martin Pieuchot wrote:
> On 31/03/20(Tue) 18:58, Vitaliy Makkoveev wrote:
> > On Tue, Mar 31, 2020 at 05:23:46PM +0200, Martin Pieuchot wrote:
> > > On 31/03/20(Tue) 16:48, Vitaliy Makkoveev wrote:
> > > > I refactored pppx(4). The idea is to use pipex(4) as it was designed.
> > > > No one holds pipex_session outside pipex(4) so pipex_timer() calls are
> > > > safe. Unfortunately, this way gives some performance impact, because we
> > > > need to call pipex_lookup_by_session_id() in some places. This impact
> > > > will gone after pipex_session becames safely shared between multiple
> > > > holders and this is my next goal.
> > > 
> > > I'd be more confident if we could go with the one-line change that you
> > > submitted in the first mail of this thread without the debugging
> > > messages.
> > > 
> > > Mixing bug-fixes (or features) and refactoring is not a great
> > > development practise as it might hide the point of the change and
> > > introduce or expose new bugs. 
> > >
> > I changed my original patch. Since npppd(8) ignores ioctl() errors and
> > client will be connected without pppx(4) interface creation I decide to
> > lie npppd(8).
> 
> Well better fix npppd(8), no?  Not crashing the kernel is priority 1.
I made patch for npppd(8) too. I include it to this email below, without
starting new thread, ok? If ioctl(PIPEXASESSION) failed and error !=
ENXIO it means that pipex is enabled and session creation failed so down
this connection.

> Then if the daemon has a bug, should the kernel work around it? 
In most common cases should :(

 cut begin

Index: usr.sbin/npppd/npppd/ppp.c
===
RCS file: /cvs/src/usr.sbin/npppd/npppd/ppp.c,v
retrieving revision 1.28
diff -u -p -r1.28 ppp.c
--- usr.sbin/npppd/npppd/ppp.c  27 Feb 2019 04:52:19 -  1.28
+++ usr.sbin/npppd/npppd/ppp.c  31 Mar 2020 19:59:21 -
@@ -1134,8 +1134,12 @@ ppp_on_network_pipex(npppd_ppp *_this)
(!MPPE_MUST_NEGO(_this) || _this->ccp.fsm.state == OPENED ||
_this->ccp.fsm.state == STOPPED)) {
/* IPCP is opened and MPPE is not required or MPPE is opened */
-   if (npppd_ppp_pipex_enable(_this->pppd, _this) != 0)
+   if (npppd_ppp_pipex_enable(_this->pppd, _this) != 0) {
ppp_log(_this, LOG_WARNING, "failed enable pipex: %m");
+   /* failed to create pipex session */
+   ppp_phy_downed(_this);
+   return;
+   }
ppp_log(_this, LOG_NOTICE, "Using pipex=%s",
(_this->pipex_enabled != 0)? "yes" : "no");
_this->pipex_started = 1;

 cut end

And patch for pppx()

 cut begin

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.77
diff -u -p -r1.77 if_pppx.c
--- sys/net/if_pppx.c   26 Mar 2020 16:50:46 -  1.77
+++ sys/net/if_pppx.c   31 Mar 2020 20:00:33 -
@@ -665,6 +665,12 @@ pppx_add_session(struct pppx_dev *pxd, s
struct ifnet *over_ifp = NULL;
 #endif
 
+   /*
+* XXX: prevent pxi destruction by pipex_timer()
+*/
+   if (req->pr_timeout_sec != 0)
+   return (EINVAL);
+
switch (req->pr_protocol) {
 #ifdef PIPEX_PPPOE
case PIPEX_PROTO_PPPOE:

 cut end

Re: Dedulpicate pipex(4) and pppx(4) code

2020-03-30 Thread Vitaliy Makkoveev

ping

Re: Dedulpicate pipex(4) and pppx(4) code

2020-04-02 Thread Vitaliy Makkoveev

On Thu, Apr 02, 2020 at 09:26:23AM +0200, Martin Pieuchot wrote:
> Hello Vitaliy,
> 
> On 01/04/20(Wed) 12:59, Vitaliy Makkoveev wrote:
> > Updated diff. The idea is to use already existing pipex API for
> > pipex_session creation and destruction. pppx_if now holds a reference
> > to pipex_session.
> 
> This is great!
> 
> There are many things in this diff which makes it complicated to review,
> at least to me.  Note that I'm not really familiar with this code.
> 
> For example the panic() after the RBT_REMOVE() is now questionable since
> the lock is released then retaken.  Which brings a question for later:
> what is the lock really protecting?  That can be documented in the header
> like it is done for other data structures.
> 
> So the changes includes:
> 
> - pool_get() with PR_WAITOK should never fail.  This could be the first
>   step and also move the allocation at the beginning to make a pattern
>   appear.
> 
> - Allocation of `pxi_session' separately from `pxi', this creates quite
>   some noise because of the line changing indirection.
> 
> - Change in the error paths, which aren't correct and IMHO should be
>   left out for the moment.
> 
> - Use of pipex_add_session(), isn't that independent from the allocation
>   of `session'?
> 
> Could you help me review this by submitting smaller diffs?  Thanks a
> lot!
Let's take a break, another pipex(4) issue found. I'll send diff for it
it today later with new thread.

> 
> > 
> > Index: sys/net/if_pppx.c
> > ===
> > RCS file: /cvs/src/sys/net/if_pppx.c,v
> > retrieving revision 1.78
> > diff -u -p -r1.78 if_pppx.c
> > --- sys/net/if_pppx.c   1 Apr 2020 07:15:59 -   1.78
> > +++ sys/net/if_pppx.c   1 Apr 2020 09:50:19 -
> > @@ -155,7 +155,7 @@ struct pppx_if {
> > int pxi_unit;
> > struct ifnetpxi_if;
> > struct pppx_dev *pxi_dev;
> > -   struct pipex_sessionpxi_session;
> > +   struct pipex_session*pxi_session;
> > struct pipex_iface_context  pxi_ifcontext;
> >  };
> >  
> > @@ -655,15 +655,10 @@ int
> >  pppx_add_session(struct pppx_dev *pxd, struct pipex_session_req *req)
> >  {
> > struct pppx_if *pxi;
> > -   struct pipex_session *session;
> > -   struct pipex_hash_head *chain;
> > struct ifnet *ifp;
> > int unit, error = 0;
> > struct in_ifaddr *ia;
> > struct sockaddr_in ifaddr;
> > -#ifdef PIPEX_PPPOE
> > -   struct ifnet *over_ifp = NULL;
> > -#endif
> >  
> > /*
> >  * XXX: As long as `session' is allocated as part of a `pxi'
> > @@ -673,157 +668,16 @@ pppx_add_session(struct pppx_dev *pxd, s
> > if (req->pr_timeout_sec != 0)
> > return (EINVAL);
> >  
> > -   switch (req->pr_protocol) {
> > -#ifdef PIPEX_PPPOE
> > -   case PIPEX_PROTO_PPPOE:
> > -   over_ifp = ifunit(req->pr_proto.pppoe.over_ifname);
> > -   if (over_ifp == NULL)
> > -   return (EINVAL);
> > -   if (req->pr_peer_address.ss_family != AF_UNSPEC)
> > -   return (EINVAL);
> > -   break;
> > -#endif
> > -#if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
> > -   case PIPEX_PROTO_PPTP:
> > -   case PIPEX_PROTO_L2TP:
> > -   switch (req->pr_peer_address.ss_family) {
> > -   case AF_INET:
> > -   if (req->pr_peer_address.ss_len != sizeof(struct 
> > sockaddr_in))
> > -   return (EINVAL);
> > -   break;
> > -#ifdef INET6
> > -   case AF_INET6:
> > -   if (req->pr_peer_address.ss_len != sizeof(struct 
> > sockaddr_in6))
> > -   return (EINVAL);
> > -   break;
> > -#endif
> > -   default:
> > -   return (EPROTONOSUPPORT);
> > -   }
> > -   if (req->pr_peer_address.ss_family !=
> > -   req->pr_local_address.ss_family ||
> > -   req->pr_peer_address.ss_len !=
> > -   req->pr_local_address.ss_len)
> > -   return (EINVAL);
> > -   break;
> > -#endif /* defined(PIPEX_PPTP) || defined(PIPEX_L2TP) */
> > -   default:
> > -   return (EPROTONOSUPPORT);
> > -   }
> > -
> > pxi = pool_get(pppx_if_pl, PR_WAITOK | PR_ZERO);
> > -   if (pxi == NULL)
> > -   return (ENOMEM);
&g

Re: Add missing #ifdefs to pppx_if_destroy()

2020-03-26 Thread Vitaliy Makkoveev

On Thu, Mar 26, 2020 at 11:56:27AM +0100, Martin Pieuchot wrote:
> On 26/03/20(Thu) 13:34, Vitaliy Makkoveev wrote:
> > Add missing #ifdefs to pppx_if_destroy() as it done in
> > pipex_destroy_session(). Also remove unnecessary cast.
> 
> What's the point of such #ifdef?  I understand the current code is not
> coherent, but does this reduce the binary size?  For a case in a switch?
> Because it clearly complicates the understanding of the code.
Code coherency is the goal.
> So maybe having #if defined(PIPEX_PPTP) || defined(PIPEX_L2TP) there as
> well as in pppx_add_session() and pipex_destroy_session() is the way to
> go.
> 
> But the underlying question would it make sense to have pppx_if_destroy() 
> and pipex_destroy_session() call the same function to clear sessions?
> 
> The same could be add for pipex_add_session() and pppx_add_session().
My next goal.

I updated this diff with '#if defined()...' and for
pipex_destroy_session() too.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.76
diff -u -p -r1.76 if_pppx.c
--- sys/net/if_pppx.c   20 Feb 2020 16:56:52 -  1.76
+++ sys/net/if_pppx.c   26 Mar 2020 11:27:07 -
@@ -967,13 +967,18 @@ pppx_if_destroy(struct pppx_dev *pxd, st
 
LIST_REMOVE(session, id_chain);
LIST_REMOVE(session, session_list);
+#if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
switch (session->protocol) {
+#ifdef PIPEX_PPTP
case PIPEX_PROTO_PPTP:
+#endif
+#ifdef PIPEX_L2TP
case PIPEX_PROTO_L2TP:
-   LIST_REMOVE((struct pipex_session *)session,
-   peer_addr_chain);
+#endif
+   LIST_REMOVE(session, peer_addr_chain);
break;
}
+#endif
 
/* if final session is destroyed, stop timer */
if (LIST_EMPTY(_session_list))
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.108
diff -u -p -r1.108 pipex.c
--- sys/net/pipex.c 25 Mar 2020 11:39:58 -  1.108
+++ sys/net/pipex.c 26 Mar 2020 11:27:08 -
@@ -581,16 +581,19 @@ pipex_destroy_session(struct pipex_sessi
 
LIST_REMOVE(session, id_chain);
LIST_REMOVE(session, session_list);
+#if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
+   switch (session->protocol) {
 #ifdef PIPEX_PPTP
-   if (session->protocol == PIPEX_PROTO_PPTP) {
-   LIST_REMOVE(session, peer_addr_chain);
-   }
+   case PIPEX_PROTO_PPTP:
 #endif
 #ifdef PIPEX_L2TP
-   if (session->protocol == PIPEX_PROTO_L2TP) {
+   case PIPEX_PROTO_L2TP:
+#endif
LIST_REMOVE(session, peer_addr_chain);
+   break;
}
 #endif
+
/* if final session is destroyed, stop timer */
if (LIST_EMPTY(_session_list))
pipex_timer_stop();

Add missing #ifdefs to pppx_if_destroy()

2020-03-26 Thread Vitaliy Makkoveev

Add missing #ifdefs to pppx_if_destroy() as it done in
pipex_destroy_session(). Also remove unnecessary cast.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.76
diff -u -p -r1.76 if_pppx.c
--- sys/net/if_pppx.c   20 Feb 2020 16:56:52 -  1.76
+++ sys/net/if_pppx.c   26 Mar 2020 10:07:26 -
@@ -967,13 +967,14 @@ pppx_if_destroy(struct pppx_dev *pxd, st
 
LIST_REMOVE(session, id_chain);
LIST_REMOVE(session, session_list);
-   switch (session->protocol) {
-   case PIPEX_PROTO_PPTP:
-   case PIPEX_PROTO_L2TP:
-   LIST_REMOVE((struct pipex_session *)session,
-   peer_addr_chain);
-   break;
-   }
+#ifdef PIPEX_PPTP
+   if (session->protocol == PIPEX_PROTO_PPTP)
+   LIST_REMOVE(session, peer_addr_chain);
+#endif
+#ifdef PIPEX_L2TP
+   if (session->protocol == PIPEX_PROTO_L2TP)
+   LIST_REMOVE(session, peer_addr_chain);
+#endif
 
/* if final session is destroyed, stop timer */
if (LIST_EMPTY(_session_list))

Fix pipex crash

2020-03-25 Thread Vitaliy Makkoveev

pipex_destroy_session should be called under NET_LOCK but if it called
by this sequence: pppacclose -> pipex_iface_fini -> pipex_iface_stop
-> pipex_destroy_session, NET_LOCK is missing and kernel crashes.
pipex_iface_stop calls are protected by NET_LOCK, so it should be also
protected within pipex_iface_fini. This problem also desribed at
https://marc.info/?l=openbsd-misc=158496654715242=2

Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.107
diff -u -p -r1.107 pipex.c
--- sys/net/pipex.c 31 Jan 2019 18:01:14 -  1.107
+++ sys/net/pipex.c 25 Mar 2020 10:02:40 -
@@ -197,7 +197,9 @@ void
 pipex_iface_fini(struct pipex_iface_context *pipex_iface)
 {
pool_put(_session_pool, pipex_iface->multicast_session);
+   NET_LOCK();
pipex_iface_stop(pipex_iface);
+   NET_UNLOCK();
 }
 
 int

Re: Add missing #ifdefs to pppx_if_destroy()

2020-03-26 Thread Vitaliy Makkoveev

On Thu, Mar 26, 2020 at 01:46:29PM +0100, Martin Pieuchot wrote:
> Does the diff below works for you?  Are you ok with the direction?  Any
> comment?

Diff works for me, Except you missed switch in the and of
pppx_add_session() and pipex_add_session().

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.76
diff -u -p -r1.76 if_pppx.c
--- sys/net/if_pppx.c   20 Feb 2020 16:56:52 -  1.76
+++ sys/net/if_pppx.c   26 Mar 2020 13:21:32 -
@@ -675,12 +675,9 @@ pppx_add_session(struct pppx_dev *pxd, s
return (EINVAL);
break;
 #endif
-#ifdef PIPEX_PPTP
+#if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
case PIPEX_PROTO_PPTP:
-#endif
-#ifdef PIPEX_L2TP
case PIPEX_PROTO_L2TP:
-#endif
switch (req->pr_peer_address.ss_family) {
case AF_INET:
if (req->pr_peer_address.ss_len != sizeof(struct 
sockaddr_in))
@@ -701,6 +698,7 @@ pppx_add_session(struct pppx_dev *pxd, s
req->pr_local_address.ss_len)
return (EINVAL);
break;
+#endif /* defined(PIPEX_PPTP) || defined(PIPEX_L2TP) */
default:
return (EPROTONOSUPPORT);
}
@@ -854,6 +852,7 @@ pppx_add_session(struct pppx_dev *pxd, s
chain = PIPEX_ID_HASHTABLE(session->session_id);
LIST_INSERT_HEAD(chain, session, id_chain);
LIST_INSERT_HEAD(_session_list, session, session_list);
+#if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
switch (req->pr_protocol) {
case PIPEX_PROTO_PPTP:
case PIPEX_PROTO_L2TP:
@@ -862,6 +861,7 @@ pppx_add_session(struct pppx_dev *pxd, s
LIST_INSERT_HEAD(chain, session, peer_addr_chain);
break;
}
+#endif
 
/* if first session is added, start timer */
if (LIST_NEXT(session, session_list) == NULL)
@@ -967,13 +967,14 @@ pppx_if_destroy(struct pppx_dev *pxd, st
 
LIST_REMOVE(session, id_chain);
LIST_REMOVE(session, session_list);
+#if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
switch (session->protocol) {
case PIPEX_PROTO_PPTP:
case PIPEX_PROTO_L2TP:
-   LIST_REMOVE((struct pipex_session *)session,
-   peer_addr_chain);
+   LIST_REMOVE(session, peer_addr_chain);
break;
}
+#endif
 
/* if final session is destroyed, stop timer */
if (LIST_EMPTY(_session_list))
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.108
diff -u -p -r1.108 pipex.c
--- sys/net/pipex.c 25 Mar 2020 11:39:58 -  1.108
+++ sys/net/pipex.c 26 Mar 2020 13:21:33 -
@@ -283,12 +283,8 @@ pipex_add_session(struct pipex_session_r
break;
 #endif
 #if defined(PIPEX_L2TP) || defined(PIPEX_PPTP)
-#ifdef PIPEX_PPTP
case PIPEX_PROTO_PPTP:
-#endif
-#ifdef PIPEX_L2TP
case PIPEX_PROTO_L2TP:
-#endif
switch (req->pr_peer_address.ss_family) {
case AF_INET:
if (req->pr_peer_address.ss_len !=
@@ -311,7 +307,7 @@ pipex_add_session(struct pipex_session_r
req->pr_local_address.ss_len)
return (EINVAL);
break;
-#endif
+#endif /* defined(PIPEX_PPTP) || defined(PIPEX_L2TP) */
default:
return (EPROTONOSUPPORT);
}
@@ -450,6 +446,7 @@ pipex_add_session(struct pipex_session_r
chain = PIPEX_ID_HASHTABLE(session->session_id);
LIST_INSERT_HEAD(chain, session, id_chain);
LIST_INSERT_HEAD(_session_list, session, session_list);
+#if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
switch (req->pr_protocol) {
case PIPEX_PROTO_PPTP:
case PIPEX_PROTO_L2TP:
@@ -457,6 +454,7 @@ pipex_add_session(struct pipex_session_r
pipex_sockaddr_hash_key(>peer.sa));
LIST_INSERT_HEAD(chain, session, peer_addr_chain);
}
+#endif
 
/* if first session is added, start timer */
if (LIST_NEXT(session, session_list) == NULL)
@@ -581,16 +579,15 @@ pipex_destroy_session(struct pipex_sessi
 
LIST_REMOVE(session, id_chain);
LIST_REMOVE(session, session_list);
-#ifdef PIPEX_PPTP
-   if (session->protocol == PIPEX_PROTO_PPTP) {
-   LIST_REMOVE(session, peer_addr_chain);
-   }
-#endif
-#ifdef PIPEX_L2TP
-   if (session->protocol == PIPEX_PROTO_L2TP) {
+#if defined(PIPEX_PPTP) || defined(PIPEX_L2TP)
+   switch (session->protocol) {
+   case PIPEX_PROTO_PPTP:
+   case PIPEX_PROTO_L2TP:
LIST_REMOVE(session, peer_addr_chain);
+   break;
}
 #endif
+
/* if final session is destroyed, stop timer */
if (LIST_EMPTY(_session_list))

pipex(4) cleanup: use LIST_FOERACH_SAFE() instead of manual rolling

2020-04-06 Thread Vitaliy Makkoveev

Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.111
diff -u -p -r1.111 pipex.c
--- sys/net/pipex.c 6 Apr 2020 12:31:30 -   1.111
+++ sys/net/pipex.c 6 Apr 2020 12:58:31 -
@@ -177,17 +177,15 @@ pipex_iface_start(struct pipex_iface_con
 Static void
 pipex_iface_stop(struct pipex_iface_context *pipex_iface)
 {
-   struct pipex_session *session;
-   struct pipex_session *session_next;
+   struct pipex_session *session, *session_tmp;
 
pipex_iface->pipexmode = 0;
/*
 * traversal all pipex sessions.
 * it will become heavy if the number of pppac devices bocomes large.
 */
-   for (session = LIST_FIRST(_session_list);
-   session; session = session_next) {
-   session_next = LIST_NEXT(session, session_list);
+   LIST_FOREACH_SAFE(session, _session_list, session_list,
+   session_tmp) {
if (session->pipex_iface == pipex_iface)
pipex_destroy_session(session);
}
@@ -555,13 +553,12 @@ Static int
 pipex_get_closed(struct pipex_session_list_req *req,
 struct pipex_iface_context *iface)
 {
-   struct pipex_session *session, *session_next;
+   struct pipex_session *session, *session_tmp;
 
NET_ASSERT_LOCKED();
bzero(req, sizeof(*req));
-   for (session = LIST_FIRST(_close_wait_list);
-   session; session = session_next) {
-   session_next = LIST_NEXT(session, state_list);
+   LIST_FOREACH_SAFE(session, _close_wait_list, state_list,
+   session_tmp) {
if (session->pipex_iface != iface)
continue;
req->plr_ppp_id[req->plr_ppp_id_count++] = session->ppp_id;
@@ -763,16 +760,14 @@ pipex_timer_stop(void)
 Static void
 pipex_timer(void *ignored_arg)
 {
-   struct pipex_session *session;
-   struct pipex_session *session_next;
+   struct pipex_session *session, *session_tmp;
 
timeout_add_sec(_timer_ch, pipex_prune);
 
NET_LOCK();
/* walk through */
-   for (session = LIST_FIRST(_session_list); session;
-   session = session_next) {
-   session_next = LIST_NEXT(session, session_list);
+   LIST_FOREACH_SAFE(session, _session_list, session_list,
+   session_tmp) {
switch (session->state) {
case PIPEX_STATE_OPENED:
if (session->timeout_sec == 0)

proc.h: fix comment for proc's cached credentials

2020-04-24 Thread Vitaliy Makkoveev

Per thread cached credentials are accessed only by curproc. Curproc
doesn't modify it's 'p_ucred' directly. It allocates new copy, then
modyfies newcopy and replaces the old. So 'p_ucred' is owned by curproc.

Index: sys/sys/proc.h
===
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.294
diff -u -p -r1.294 proc.h
--- sys/sys/proc.h  6 Apr 2020 07:52:12 -   1.294
+++ sys/sys/proc.h  24 Apr 2020 10:12:56 -
@@ -392,7 +392,7 @@ struct proc {
u_int   p_estcpu;   /* [s] Time averaged val of p_cpticks */
int p_pledge_syscall;   /* Cache of current syscall */
 
-   struct  ucred *p_ucred; /* cached credentials */
+   struct  ucred *p_ucred; /* [o] cached credentials */
struct  sigaltstack p_sigstk;   /* sp & on stack state variable */
 
u_long  p_prof_addr;/* tmp storage for profiling addr until AST */

Re: [PATCH] pipex(4): rework PPP input

2020-05-19 Thread Vitaliy Makkoveev

Hello Sergey.

I am not the developer, but I works in pipex(4) layer too. Also mpi@
wants I did rewiev for your diffs.

On Mon, May 04, 2020 at 10:03:40PM +0300, Sergey Ryazanov wrote:
> Split checks from frame accepting with header removing in the common
> PPP input function. This should fix packet capture on a PPP interfaces.

Can you describe the problem you fix? As mpi@ pointed to me, reviewers
are stupid and have no telepathy skills :)

Also your diff does two different things: (1) split frames checks and
input, and (2) modify frames passing to bpf(4). I guess you should split
your diff by two different.

I did some inlined comments below.

> 
> Also forbid IP/IPv6 frames (without PPP header) passing to BPF on
> PPP interfaces to avoid mess.
> 
> Initialy this change was made as a part of pipex(4) and ppp(4)
> integration work. But, since this change make the core a bit more clear
> I would like to publish it now.
> 
> Ok?
> 
> ---
>  sys/net/pipex.c | 95 -
>  1 file changed, 54 insertions(+), 41 deletions(-)
> 
> diff --git sys/net/pipex.c sys/net/pipex.c
> index c433e4beaa6..e0066a61598 100644
> --- sys/net/pipex.c
> +++ sys/net/pipex.c
> @@ -970,41 +970,68 @@ drop:
>  Static void
>  pipex_ppp_input(struct mbuf *m0, struct pipex_session *session, int 
> decrypted)
>  {
> - int proto, hlen = 0;
> + int proto, hlen = 0, align = 0;
>   struct mbuf *n;
>  
>   KASSERT(m0->m_pkthdr.len >= PIPEX_PPPMINLEN);
>   proto = pipex_ppp_proto(m0, session, 0, );
> + switch (proto) {
> + case PPP_IP:
> + if (session->ip_forward == 0)
> + goto drop;
> + if (!decrypted && pipex_session_is_mppe_required(session))
> + /*
> +  * if ip packet received when mppe
> +  * is required, discard it.
> +  */
> + goto drop;
> + align = 1;
> + break;
> +#ifdef INET6
> + case PPP_IPV6:
> + if (session->ip6_forward == 0)
> + goto drop;
> + if (!decrypted && pipex_session_is_mppe_required(session))
> + /*
> +  * if ip packet received when mppe
> +  * is required, discard it.
> +  */
> + goto drop;
> + align = 1;
> + break;
> +#endif
>  #ifdef PIPEX_MPPE
> - if (proto == PPP_COMP) {
> + case PPP_COMP:
>   if (decrypted)
>   goto drop;
>  
>   /* checked this on ppp_common_input() already. */
>   KASSERT(pipex_session_is_mppe_accepted(session));
> -
> - m_adj(m0, hlen);
> - pipex_mppe_input(m0, session);
> - return;
> - }
> - if (proto == PPP_CCP) {
> + break;
> + case PPP_CCP:
>   if (decrypted)
>   goto drop;
> + break;
> +#endif
> + default:
> + if (decrypted)
> + goto drop;
> + /* protocol must be checked on pipex_common_input() already */
> + KASSERT(0);
> + goto drop;
> + }
>  
>  #if NBPFILTER > 0
> - {
> + {
>   struct ifnet *ifp = session->pipex_iface->ifnet_this;
> +
>   if (ifp->if_bpf && ifp->if_type == IFT_PPP)
>   bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_IN);
> - }
> -#endif
> - m_adj(m0, hlen);
> - pipex_ccp_input(m0, session);
> - return;
>   }
>  #endif

For INET[46] cases you can align frame after it's passed to bpf(4). Should
this frame be aligned before passing to bpf(4)?

> +
>   m_adj(m0, hlen);
> - if (!ALIGNED_POINTER(mtod(m0, caddr_t), uint32_t)) {
> + if (align && !ALIGNED_POINTER(mtod(m0, caddr_t), uint32_t)) {
>   n = m_dup_pkt(m0, 0, M_NOWAIT);
>   if (n == NULL)
>   goto drop;
> @@ -1014,35 +1041,21 @@ pipex_ppp_input(struct mbuf *m0, struct pipex_session 
> *session, int decrypted)
>  
>   switch (proto) {
>   case PPP_IP:
> - if (session->ip_forward == 0)
> - goto drop;
> - if (!decrypted && pipex_session_is_mppe_required(session))
> - /*
> -  * if ip packet received when mppe
> -  * is required, discard it.
> -  */
> - goto drop;
>   pipex_ip_input(m0, session);
> - return;
> + break;
>  #ifdef INET6
>   case PPP_IPV6:
> - if (session->ip6_forward == 0)
> - goto drop;
> - if (!decrypted && pipex_session_is_mppe_required(session))
> - /*
> -  * if ip packet received when mppe
> -  * is required, discard it.
>

Re: [PATCH] ppp(4): use common bpf filter hook

2020-05-19 Thread Vitaliy Makkoveev

I am OK with this diff. Also all pseudo interfaces except switch(4) do
the same.

On Mon, May 04, 2020 at 10:02:53PM +0300, Sergey Ryazanov wrote:
> Use bpf filter hook from the common interface structure. This simplifies
> the code by unifying it and prepare ppp(4) for pipex(4) support.
> 
> Ok?
> 
> ---
>  sys/net/if_ppp.c| 16 
>  sys/net/if_pppvar.h |  1 -
>  2 files changed, 8 insertions(+), 9 deletions(-)
> 
> diff --git sys/net/if_ppp.c sys/net/if_ppp.c
> index 192ec7c91e0..4cba9a8778c 100644
> --- sys/net/if_ppp.c
> +++ sys/net/if_ppp.c
> @@ -204,9 +204,11 @@ int
>  ppp_clone_create(struct if_clone *ifc, int unit)
>  {
>   struct ppp_softc *sc;
> + struct ifnet *ifp;
>  
>   sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
>   sc->sc_unit = unit;
> + ifp = >sc_if;
>   snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d",
>   ifc->ifc_name, unit);
>   sc->sc_if.if_softc = sc;
> @@ -224,7 +226,7 @@ ppp_clone_create(struct if_clone *ifc, int unit)
>   if_attach(>sc_if);
>   if_alloc_sadl(>sc_if);
>  #if NBPFILTER > 0
> - bpfattach(>sc_bpf, >sc_if, DLT_PPP, PPP_HDRLEN);
> + bpfattach(>if_bpf, ifp, DLT_PPP, PPP_HDRLEN);
>  #endif
>   NET_LOCK();
>   LIST_INSERT_HEAD(_softc_list, sc, sc_list);
> @@ -754,11 +756,9 @@ pppoutput(struct ifnet *ifp, struct mbuf *m0, struct 
> sockaddr *dst,
>   }
>  
>  #if NBPFILTER > 0
> - /*
> -  * See if bpf wants to look at the packet.
> -  */
> - if (sc->sc_bpf)
> - bpf_mtap(sc->sc_bpf, m0, BPF_DIRECTION_OUT);
> + /* See if bpf wants to look at the packet. */
> + if (ifp->if_bpf)
> + bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_OUT);
>  #endif
>  
>   /*
> @@ -1369,8 +1369,8 @@ ppp_inproc(struct ppp_softc *sc, struct mbuf *m)
>  
>  #if NBPFILTER > 0
>   /* See if bpf wants to look at the packet. */
> - if (sc->sc_bpf)
> - bpf_mtap(sc->sc_bpf, m, BPF_DIRECTION_IN);
> + if (ifp->if_bpf)
> + bpf_mtap(ifp->if_bpf, m, BPF_DIRECTION_IN);
>  #endif
>  
>   rv = 0;
> diff --git sys/net/if_pppvar.h sys/net/if_pppvar.h
> index 87f7d1798bb..9dc774a0515 100644
> --- sys/net/if_pppvar.h
> +++ sys/net/if_pppvar.h
> @@ -113,7 +113,6 @@ struct ppp_softc {
>   struct  mbuf *sc_togo;  /* output packet ready to go */
>   struct  mbuf_list sc_npqueue;   /* output packets not to be sent yet */
>   struct  pppstat sc_stats;   /* count of bytes/pkts sent/rcvd */
> - caddr_t sc_bpf; /* hook for BPF */
>   enumNPmode sc_npmode[NUM_NP]; /* what to do with each NP */
>   struct  compressor *sc_xcomp;   /* transmit compressor */
>   void*sc_xc_state;   /* transmit compressor state */
> -- 
> 2.26.0
>

pppx(4): avoid direct usage of pxi owned session.

2020-05-14 Thread Vitaliy Makkoveev

Each `struct pppx_if' holds it's own `pipex_session' and this session is
used directly within ifnet's related handlers pppx_if_start() and
pppx_if_output().

pppx_if_destroy() at first destroys `pipex_session' and calls
if_deatch() which can cause context switch. Hypothetically,
pppx_if_start() or pppx_if_output() can receive cpu at this moment and
start to operate with already destroyed session.

I guess the order of `pppx_if' destruction in pppx_if_destroy() is
right. If we call if_detach() before `pipex_session' destruction, after
context switch caused by this if_detach() this session can be accessed
from network stack, but it's own `ifnet' is already destoyed by
if_detach().

Also pppx_add_session() has the same problem: newly created
`pipex_session' can start to operate with half initilaized ifnet, so
ifnet should be initialized before session creation.

Ifnet should be already exist outside corresponding `pipex_session'
life time. And within ifnet's handlers we should be sure that session is
valid.

Diff below drops direct access of this `pipex_session' in ifnet's
handlers.

In pppx_if_start() we obtain corresponding session by
pipex_lookup_by_session_id(). If pppx_if_start() was called in half of
pppx_if_destroy(), pipex_lookup_by_session_id() will return NULL.
Context switch can't be occured in pppx_if_start().

In pppx_if_output() the only ppp_id is requred.

In pppx_add_session() if_attach() moved before `pipex_session' can be
accessed by pipex(4) internals.

Also this will be useful for future. We are going to move out pipex(4)
related code from pppx(4) and after this diff it will be possible just
remove all duplicating code from pppx_if_destroy() and
pppx_add_session().

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.84
diff -u -p -r1.84 if_pppx.c
--- sys/net/if_pppx.c   18 Apr 2020 04:03:56 -  1.84
+++ sys/net/if_pppx.c   14 May 2020 12:47:35 -
@@ -157,6 +157,9 @@ struct pppx_if {
struct pppx_dev *pxi_dev;
struct pipex_sessionpxi_session;
struct pipex_iface_context  pxi_ifcontext;
+   int pxi_protocol;
+   int pxi_session_id;
+   int pxi_ppp_id;
 };
 
 static inline int
@@ -841,6 +844,9 @@ pppx_add_session(struct pppx_dev *pxd, s
pxi->pxi_key.pxik_session_id = req->pr_session_id;
pxi->pxi_key.pxik_protocol = req->pr_protocol;
pxi->pxi_dev = pxd;
+   pxi->pxi_protocol = req->pr_protocol;
+   pxi->pxi_session_id = req->pr_session_id;
+   pxi->pxi_ppp_id = req->pr_ppp_id;
 
/* this is safe without splnet since we're not modifying it */
if (RBT_FIND(pppx_ifs, _ifs, pxi) != NULL) {
@@ -867,6 +873,11 @@ pppx_add_session(struct pppx_dev *pxd, s
ifp->if_softc = pxi;
/* ifp->if_rdomain = req->pr_rdomain; */
 
+   /* XXXSMP breaks atomicity */
+   NET_UNLOCK();
+   if_attach(ifp);
+   NET_LOCK();
+
/* hook up pipex context */
chain = PIPEX_ID_HASHTABLE(session->session_id);
LIST_INSERT_HEAD(chain, session, id_chain);
@@ -886,11 +897,6 @@ pppx_add_session(struct pppx_dev *pxd, s
if (LIST_NEXT(session, session_list) == NULL)
pipex_timer_start();
 
-   /* XXXSMP breaks atomicity */
-   NET_UNLOCK();
-   if_attach(ifp);
-   NET_LOCK();
-
if_addgroup(ifp, "pppx");
if_alloc_sadl(ifp);
 
@@ -1051,25 +1057,34 @@ void
 pppx_if_start(struct ifnet *ifp)
 {
struct pppx_if *pxi = (struct pppx_if *)ifp->if_softc;
+   struct pipex_session *session;
struct mbuf *m;
int proto;
 
if (!ISSET(ifp->if_flags, IFF_RUNNING))
return;
 
+   session = pipex_lookup_by_session_id(pxi->pxi_protocol,
+   pxi->pxi_session_id);
+
for (;;) {
IFQ_DEQUEUE(>if_snd, m);
 
if (m == NULL)
break;
 
+   if (session == NULL) {
+   m_freem(m);
+   continue;
+   }
+
proto = *mtod(m, int *);
m_adj(m, sizeof(proto));
 
ifp->if_obytes += m->m_pkthdr.len;
ifp->if_opackets++;
 
-   pipex_ppp_output(m, >pxi_session, proto);
+   pipex_ppp_output(m, session, proto);
}
 }
 
@@ -1129,7 +1144,7 @@ pppx_if_output(struct ifnet *ifp, struct
}
th = mtod(m, struct pppx_hdr *);
th->pppx_proto = 0; /* not used */
-   th->pppx_id = pxi->pxi_session.ppp_id;
+   th->pppx_id = pxi->pxi_ppp_id;
rw_enter_read(_devs_lk);
error = mq_enqueue(>pxi_dev->pxd_svcq, m);
if (error == 0) {

fix pppx(4) with net/ifq.c rev 1.38

2020-05-20 Thread Vitaliy Makkoveev

I got splassert with pppx(4) and net/ifq.c rev 1.38 raised by
NET_ASSERT_LOCKED() in netinet/ip_output.c:113 and underlaying routines.

net/ifq.c rev 1.38 is not in snapshot yet so you need to checkout and
build kernel to reproduce.

 dmesg begin 

splassert: ip_output: want 2 have 0
Starting stack trace...
ip_output(fd801f8c8800,0,0,0,0,0) at ip_output+0x8f
pipex_l2tp_output(fd801f8c8800,8e787808) at
pipex_l2tp_output+0x21d
pipex_ppp_output(fd801f8c8800,8e787808,21) at
pipex_ppp_output+0xda
pppx_if_start(8e787268) at pppx_if_start+0x83
if_qstart_compat(8e7874e0) at if_qstart_compat+0x2e
ifq_serialize(8e7874e0,8e7875b0) at ifq_serialize+0x103
taskq_thread(81f3df30) at taskq_thread+0x4d
end trace frame: 0x0, count: 250
End of stack trace.
splassert: ipsp_spd_lookup: want 2 have 0
Starting stack trace...
ipsp_spd_lookup(fd801f8c8800,2,14,8e726c9c,2,0) at
ipsp_spd_lookup+0x80
ip_output_ipsec_lookup(fd801f8c8800,14,8e726c9c,0,1) at
ip_output_ipsec_lookup+0x4d
ip_output(fd801f8c8800,0,0,0,0,0) at ip_output+0x4fa
pipex_l2tp_output(fd801f8c8800,8e787808) at
pipex_l2tp_output+0x21d
pipex_ppp_output(fd801f8c8800,8e787808,21) at
pipex_ppp_output+0xda
pppx_if_start(8e787268) at pppx_if_start+0x83
if_qstart_compat(8e7874e0) at if_qstart_compat+0x2e
ifq_serialize(8e7874e0,8e7875b0) at ifq_serialize+0x103
taskq_thread(81f3df30) at taskq_thread+0x4d
end trace frame: 0x0, count: 248
End of stack trace.
splassert: spd_table_get: want 2 have 0

 dmesg end 

1. `pxi_if' owned by struct pppx_if has IFXF_MPSAFE flag unset
  1.1 pppx(4) sets IFQ_SET_MAXLEN(>if_snd, 1) at net/if_pppx.c:866
2. pppx_if_output() is called under NET_LOCK()
3. pppx_if_output() calls if_enqueue() at net/if_pppx.c:1123
4. pppx(4) doesn't set `ifp->if_enqueue' so if_enqueue() calls
if_enqueue_ifq() at net/if.c:709 (which is set in net/if.c:639)
5. if_enqueue_ifq() calls ifq_start() at net/if.c:734
6. ifq_start() we a still under NET_LOCK() here

6.a. in net/ifq.c rev 1.37 ifq_start() checks "ifq_len(ifq) >=
min(ifp->if_txmit, ifq->ifq_maxlen)" and this was always true because
(1.1) so we always call ifq_run_start() which calls ifq_serialize().

ifq_serialize() will call if_qstart_compat() which calls
pppx_if_start() which calls pipex_ppp_output() etc while we still
holding NET_LOCK() so the assertions I reported above are not raised.

6.b. net/ifq.c rev 1.38 introduce checks of IFXF_MPSAFE flag. so we are
always going to net/ifq.c:132 where we adding out task to `systq'
referenced by `ifq_softnet' (ifq_softnet set to `systq' at
net/ifq.c:199).

taskq_thread() doesn't grab NET_LOCK() so after net/ifq.c rev 1.38
ifq_serialize() and underlaying pppx_if_start() call without NET_LOCK()
and corresponding asserts raised.

The problem I pointed is not in net/ifq.c rev 1.38 but in pppx(4).
`if_start' routines should grab NET_LOCK() by themself if it is required
but pppx_if_start() and pppac_start() did't do that. pppac_start() has
no underlaying NET_ASSERT_LOCKED() so the pppx(4) is the only case were
the problem is shown.

Since NET_LOCK() is required by pipex(4), diff below adds it to
pppx_if_start() and pppac_start().

After net/ifq.c rev 1.38 pppx_if_start() will newer be called from
pppx_if_output() but from `systq' only so I don't add lock/unlock
dances around if_enqueue() at net/if_pppx.c:1123.

Diff tested for both pppx(4) and pppac(4) cases.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.84
diff -u -p -r1.84 if_pppx.c
--- sys/net/if_pppx.c   18 Apr 2020 04:03:56 -  1.84
+++ sys/net/if_pppx.c   20 May 2020 14:17:20 -
@@ -1054,8 +1054,9 @@ pppx_if_start(struct ifnet *ifp)
struct mbuf *m;
int proto;
 
+   NET_LOCK();
if (!ISSET(ifp->if_flags, IFF_RUNNING))
-   return;
+   goto unlock;
 
for (;;) {
IFQ_DEQUEUE(>if_snd, m);
@@ -1071,6 +1072,8 @@ pppx_if_start(struct ifnet *ifp)
 
pipex_ppp_output(m, >pxi_session, proto);
}
+unlock:
+   NET_UNLOCK();
 }
 
 int
@@ -1640,6 +1643,8 @@ pppac_start(struct ifnet *ifp)
struct pppac_softc *sc = ifp->if_softc;
struct mbuf *m;
 
+   NET_LOCK();
+
while ((m = ifq_dequeue(>if_snd)) != NULL) {
 #if NBPFILTER > 0
if (ifp->if_bpf) {
@@ -1668,4 +1673,6 @@ pppac_start(struct ifnet *ifp)
wakeup(sc);
selwakeup(>sc_rsel);
}
+
+   NET_UNLOCK();
 }

Re: [PATCH] pipex(4): rework PPP input

2020-05-20 Thread Vitaliy Makkoveev

On Wed, May 20, 2020 at 04:08:01AM +0300, Sergey Ryazanov wrote:
> 2 Hi Vitaliy,
> 
> On Tue, May 19, 2020 at 12:11 PM Vitaliy Makkoveev
>  wrote:
> > On Mon, May 04, 2020 at 10:03:40PM +0300, Sergey Ryazanov wrote:
> > > Split checks from frame accepting with header removing in the common
> > > PPP input function. This should fix packet capture on a PPP interfaces.
> >
> > Can you describe the problem you fix? As mpi@ pointed to me, reviewers
> > are stupid and have no telepathy skills :)
> 
> When I tried to capture packets on a ppp (4) interface (with pipex
> activated), I noticed that all the PPP CCP frames were ok, but all the
> ingress PPP IP frames were mangled, and they did not contain the PPP
> header at all.

This time only pppx(4) and pppac(4) have pipex(4) support. I don't see
packet capture problems on them. Can you catch and share how to
reproduce this problem with pppx(4) or pppac(4)?

Also did you test your diff with pppx(4) and pppac(4)? I got this

 tcpdump output begin 

[otto@obsd-test ~]$ doas tcpdump -n -i pppx0
doas (otto@obsd-test.local) password:
tcpdump: listening on pppx0, link-type LOOP
21:49:12.138973 
21:49:12.138988 10.0.0.1 > 10.0.0.247: icmp: echo reply
21:49:13.144026 
21:49:13.144044 10.0.0.1 > 10.0.0.247: icmp: echo reply
21:49:14.145388 
21:49:14.145403 10.0.0.1 > 10.0.0.247: icmp: echo reply
21:49:15.148127 
21:49:15.148144 10.0.0.1 > 10.0.0.247: icmp: echo reply
21:49:16.150694 
21:49:16.150710 10.0.0.1 > 10.0.0.247: icmp: echo reply
21:49:17.153204 
21:49:17.153222 10.0.0.1 > 10.0.0.247: icmp: echo reply
21:49:18.155404 
21:49:18.155422 10.0.0.1 > 10.0.0.247: icmp: echo reply
21:53:53.609206 
21:53:53.609555 
21:53:53.615620 192.168.0.1.53 > 10.0.0.247.61412: 56381 2/0/0 A
173.194.222.109, A 173.194.222.108(63)

 tcpdump output end 

with your diff applied.

> 
> [Skip]
> 
> Diff is just unable to express such change in a human friendly way :(
> It is better to look at the code before diff applying and after to see
> the change.

Of course I applied and run your diff before answer to you :)

>
> [Skip]
>
> > Also your diff does two different things: (1) split frames checks and
> > input, and (2) modify frames passing to bpf(4). I guess you should split
> > your diff by two different.
> 
> The bpf call modifications just prevent frame duplication in the dump.
> So I think Its better to keep them here.
> 
> Buf if someone is against such wide modifications, then I am Ok to
> send two patches: one for mangled packets dumping prevention and one
> for PPP input rework.

Yes. I guess "one change - one diff" is the best way.

>
> [Skip]
> 
> > For INET[46] cases you can align frame after it's passed to bpf(4). Should
> > this frame be aligned before passing to bpf(4)?
> 
> We definitely need to align IP packets after the PPP header stripping
> to facilitate work of other networking code. But aligning of the PPP
> frame could cause a double alignment: before and after the PPP header
> stripping, since a PPP header is not always have a proper length. So I
> think that aligning before the PPP header stripping could be too
> expensive and we do not required to do it.

No. Look at net/pipex.c:1023-1041 with your diff applied:

 cut begin 

#if NBPFILTER > 0
{
struct ifnet *ifp = session->pipex_iface->ifnet_this;

if (ifp->if_bpf && ifp->if_type == IFT_PPP)
bpf_mtap(ifp->if_bpf, m0, BPF_DIRECTION_IN);
}
#endif

m_adj(m0, hlen);
if (align && !ALIGNED_POINTER(mtod(m0, caddr_t), uint32_t)) {
n = m_dup_pkt(m0, 0, M_NOWAIT);
if (n == NULL)
goto drop;
m_freem(m0);
m0 = n;
}

 cut end 

You 1. pass frame to bpf_mtap() 2. align this frame if it is requred. Are
you shure this is the right order?

> 
> [skipped]
> 
> > > - default:
> > > - if (decrypted)
> > > - goto drop;
> > > - /* protocol must be checked on pipex_common_input() already 
> > > */
> > > - KASSERT(0);
> > > - goto drop;
> >
> > Hipotethically, someone inncaurate can introduce memory leak here in
> > future. I like to keep default case with assertion or with "goto drop".
> 
> This code was moved to the beginning of the function. So it is still
> will us. Or did you mean that we should handle unknown frame types in
> the second switch too?

I like to keep default case in second switch too.

Re: [PATCH] pipex(4): rework PPP input

2020-05-23 Thread Vitaliy Makkoveev




> On 23 May 2020, at 13:11, Sergey Ryazanov  wrote:
> 
> Hello,
> 
> On Wed, May 20, 2020 at 10:13 PM Vitaliy Makkoveev
>  wrote:
>> On Wed, May 20, 2020 at 04:08:01AM +0300, Sergey Ryazanov wrote:
>>> On Tue, May 19, 2020 at 12:11 PM Vitaliy Makkoveev
>>>  wrote:
>>>> On Mon, May 04, 2020 at 10:03:40PM +0300, Sergey Ryazanov wrote:
>>>>> Split checks from frame accepting with header removing in the common
>>>>> PPP input function. This should fix packet capture on a PPP interfaces.
>>>> 
>>>> Can you describe the problem you fix? As mpi@ pointed to me, reviewers
>>>> are stupid and have no telepathy skills :)
>>> 
>>> When I tried to capture packets on a ppp (4) interface (with pipex
>>> activated), I noticed that all the PPP CCP frames were ok, but all the
>>> ingress PPP IP frames were mangled, and they did not contain the PPP
>>> header at all.
>> 
>> This time only pppx(4) and pppac(4) have pipex(4) support.
> 
> Yes, and as I wrote in the first mail, now I am working on ppp(4) &
> pipex(4) integration to speed up client side of L2TP.

May be you can share you work? Not for commit, but for feedback.

For example, each pipex session should have unique pair of `protocol’ and 
`session_id’. These values are passed from userland. While the only
instance of npppd(8) uses pipex(4) this is not the problem. But you
introduce the case while pipex(4) will be used by multiple independent
userland programs. At least, I have interest how you handle this.

> 
>> I don't see
>> packet capture problems on them. Can you catch and share how to
>> reproduce this problem with pppx(4) or pppac(4)?
>> 
>> Also did you test your diff with pppx(4) and pppac(4)?
> 
> I am entirely missed the fact that pppx(4) also have IFT_PPP type.
> Thank you for pointing me.
> 
> I will recheck pppx(4) work and return as soon as I will have a better 
> solution.

Not only. Since you modify pipex(4) you should check pppac(4) too.

Re: fix pppx(4) with net/ifq.c rev 1.38

2020-05-23 Thread Vitaliy Makkoveev



> On 23 May 2020, at 12:54, Martin Pieuchot  wrote:
> 
> On 22/05/20(Fri) 13:25, Vitaliy Makkoveev wrote:
>> On Fri, May 22, 2020 at 07:57:13AM +1000, David Gwynne wrote:
>>> [...] 
>>> can you try the following diff?
>>> 
>> 
>> I tested this diff and it works for me. But the problem I pointed is
>> about pipex(4) locking.
>> 
>> pipex(4) requires NET_LOCK() be grabbed not only for underlaying
>> ip{,6}_output() but for itself too. But since pppac_start() has
>> unpredictable behavior I suggested to make it predictable [1].
> 
> What needs the NET_LOCK() in their?  We're talking about
> pipex_ppp_output(), right?  Does it really need the NET_LOCK() or
> the KERNEL_LOCK() is what protects those data structures?

Yes, about pipex_ppp_output() and pipex_output(). Except
ip{,6}_output() nothing requires NET_LOCK(). As David Gwynne pointed,
they can be replaced by ip{,6}_send().

> 
>> With net/ifq.c rev 1.37 and rev 1.39 pppx_if_start() routine will be
>> allways called under NET_LOCK(), but pppac_start() will not. It depends
>> on packet count stored in `if_snd' (look at net/ifq.c:125).
> 
> Ideally the *_start() routine should not require the NET_LOCK().
> Ideally the pseudo-drivers should not require the NET_LOCK().  That's
> what we should aim for.

NET_LOCK() is not required. It’s locked while corresponding start routines
were called directly from pppx_if_output() and pppac_output(). In case of
pppac_start() you can't predict NET_LOCK() status.


> 
> In case of pipex(4) is isn't clear that the NET_LOCK() is necessary.

I guess, pipex(4) was wrapped by NET_LOCK() to protect it while it’s
accessed through `pr_input’. Is NET_LOCK() required for this case?

Re: fix pppx(4) with net/ifq.c rev 1.38

2020-05-22 Thread Vitaliy Makkoveev

On Fri, May 22, 2020 at 07:57:13AM +1000, David Gwynne wrote:
> On Wed, May 20, 2020 at 05:42:35PM +0300, Vitaliy Makkoveev wrote:
> > I got splassert with pppx(4) and net/ifq.c rev 1.38 raised by
> > NET_ASSERT_LOCKED() in netinet/ip_output.c:113 and underlaying routines.
> > 
> > net/ifq.c rev 1.38 is not in snapshot yet so you need to checkout and
> > build kernel to reproduce.
> > 
> >  dmesg begin 
> > 
> > splassert: ip_output: want 2 have 0
> > Starting stack trace...
> > ip_output(fd801f8c8800,0,0,0,0,0) at ip_output+0x8f
> > pipex_l2tp_output(fd801f8c8800,8e787808) at
> > pipex_l2tp_output+0x21d
> > pipex_ppp_output(fd801f8c8800,8e787808,21) at
> > pipex_ppp_output+0xda
> > pppx_if_start(8e787268) at pppx_if_start+0x83
> > if_qstart_compat(8e7874e0) at if_qstart_compat+0x2e
> > ifq_serialize(8e7874e0,8e7875b0) at ifq_serialize+0x103
> > taskq_thread(81f3df30) at taskq_thread+0x4d
> > end trace frame: 0x0, count: 250
> > End of stack trace.
> > splassert: ipsp_spd_lookup: want 2 have 0
> > Starting stack trace...
> > ipsp_spd_lookup(fd801f8c8800,2,14,8e726c9c,2,0) at
> > ipsp_spd_lookup+0x80
> > ip_output_ipsec_lookup(fd801f8c8800,14,8e726c9c,0,1) at
> > ip_output_ipsec_lookup+0x4d
> > ip_output(fd801f8c8800,0,0,0,0,0) at ip_output+0x4fa
> > pipex_l2tp_output(fd801f8c8800,8e787808) at
> > pipex_l2tp_output+0x21d
> > pipex_ppp_output(fd801f8c8800,8e787808,21) at
> > pipex_ppp_output+0xda
> > pppx_if_start(8e787268) at pppx_if_start+0x83
> > if_qstart_compat(8e7874e0) at if_qstart_compat+0x2e
> > ifq_serialize(8e7874e0,8e7875b0) at ifq_serialize+0x103
> > taskq_thread(81f3df30) at taskq_thread+0x4d
> > end trace frame: 0x0, count: 248
> > End of stack trace.
> > splassert: spd_table_get: want 2 have 0
> > 
> >  dmesg end 
> > 
> > 1. `pxi_if' owned by struct pppx_if has IFXF_MPSAFE flag unset
> >   1.1 pppx(4) sets IFQ_SET_MAXLEN(>if_snd, 1) at net/if_pppx.c:866
> > 2. pppx_if_output() is called under NET_LOCK()
> > 3. pppx_if_output() calls if_enqueue() at net/if_pppx.c:1123
> > 4. pppx(4) doesn't set `ifp->if_enqueue' so if_enqueue() calls
> > if_enqueue_ifq() at net/if.c:709 (which is set in net/if.c:639)
> > 5. if_enqueue_ifq() calls ifq_start() at net/if.c:734
> > 6. ifq_start() we a still under NET_LOCK() here
> > 
> > 6.a. in net/ifq.c rev 1.37 ifq_start() checks "ifq_len(ifq) >=
> > min(ifp->if_txmit, ifq->ifq_maxlen)" and this was always true because
> > (1.1) so we always call ifq_run_start() which calls ifq_serialize().
> > 
> > ifq_serialize() will call if_qstart_compat() which calls
> > pppx_if_start() which calls pipex_ppp_output() etc while we still
> > holding NET_LOCK() so the assertions I reported above are not raised.
> > 
> > 6.b. net/ifq.c rev 1.38 introduce checks of IFXF_MPSAFE flag. so we are
> > always going to net/ifq.c:132 where we adding out task to `systq'
> > referenced by `ifq_softnet' (ifq_softnet set to `systq' at
> > net/ifq.c:199).
> > 
> > taskq_thread() doesn't grab NET_LOCK() so after net/ifq.c rev 1.38
> > ifq_serialize() and underlaying pppx_if_start() call without NET_LOCK()
> > and corresponding asserts raised.
> > 
> > The problem I pointed is not in net/ifq.c rev 1.38 but in pppx(4).
> > `if_start' routines should grab NET_LOCK() by themself if it is required
> > but pppx_if_start() and pppac_start() did't do that. pppac_start() has
> > no underlaying NET_ASSERT_LOCKED() so the pppx(4) is the only case were
> > the problem is shown.
> > 
> > Since NET_LOCK() is required by pipex(4), diff below adds it to
> > pppx_if_start() and pppac_start().
> > 
> > After net/ifq.c rev 1.38 pppx_if_start() will newer be called from
> > pppx_if_output() but from `systq' only so I don't add lock/unlock
> > dances around if_enqueue() at net/if_pppx.c:1123.
> > 
> > Diff tested for both pppx(4) and pppac(4) cases.
> 
> thanks for the detailed analysis. i wondered how the ifq change
> triggered this exactly, and your mail makes it clear.
> 
> however, pppx/pppac/pipex are not the first or only drivers in the tree
> that encapsulate a packet in IP from their if_start routine and send it
> out with the network stack. the way this has been solved in every other
> driver has been to call ip{,6}_send to transmit the packet instead
> of ip{,6}_output.
>

Re: fix pppx(4) with net/ifq.c rev 1.38

2020-05-21 Thread Vitaliy Makkoveev

After net/ifq.c rev 1.38 was reverted pppac(4) still has this problem.

pppac_output() called under NET_LOCK(). pppac_output() calls if_enqueue()
which calls ifq_start(). But now ifq_start() can run pppac_input()
directly under NET_LOCK() and also is can enqueue work and pppac_input()
will be called without NET_LOCK(). pppac_input() calls pipex_output() which
requires NET_LOCK().

We can be sure what pppx_if_input() will be always called under
NET_LOCK() because it's `if_snd' queue size is 1. I guess to make this
restriction to pppac(4) just to be sure pppac_input() is always called under
NET_LOCK() as temporary solution. I guess it's better then unlock/lock
dances around if_enqueue().

Also 6.7 release has this problem. And I like to know, how to fix it too.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.84
diff -u -p -r1.84 if_pppx.c
--- sys/net/if_pppx.c   18 Apr 2020 04:03:56 -  1.84
+++ sys/net/if_pppx.c   21 May 2020 12:31:49 -
@@ -1054,6 +1054,8 @@ pppx_if_start(struct ifnet *ifp)
struct mbuf *m;
int proto;
 
+   NET_ASSERT_LOCKED();
+
if (!ISSET(ifp->if_flags, IFF_RUNNING))
return;
 
@@ -1290,6 +1292,8 @@ pppacopen(dev_t dev, int flags, int mode
ifp->if_output = pppac_output;
ifp->if_start = pppac_start;
ifp->if_ioctl = pppac_ioctl;
+   /* XXX: To be sure pppac_input() is called under NET_LOCK() */
+   IFQ_SET_MAXLEN(>if_snd, 1);
 
if_counters_alloc(ifp);
if_attach(ifp);
@@ -1609,6 +1613,8 @@ pppac_output(struct ifnet *ifp, struct m
 {
int error;
 
+   NET_ASSERT_LOCKED();
+
if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
error = EHOSTDOWN;
goto drop;
@@ -1639,6 +1645,8 @@ pppac_start(struct ifnet *ifp)
 {
struct pppac_softc *sc = ifp->if_softc;
struct mbuf *m;
+
+   NET_ASSERT_LOCKED();
 
while ((m = ifq_dequeue(>if_snd)) != NULL) {
 #if NBPFILTER > 0

Re: incorrect result from getppid for ptraced processes

2020-09-05 Thread Vitaliy Makkoveev




> On 5 Sep 2020, at 03:22, Philip Guenther  wrote:
> 
> On Fri, Sep 4, 2020 at 2:59 PM Mateusz Guzik  wrote:
> 
>> On 9/5/20, Philip Guenther  wrote:
>>> On Fri, Sep 4, 2020 at 1:06 PM Mateusz Guzik  wrote:
>>> 
>>>> On 9/4/20, Vitaliy Makkoveev  wrote:
>>>>> On Fri, Sep 04, 2020 at 05:24:42PM +0200, Mateusz Guzik wrote:
>>>>>> getppid blindly follows the parent pointer and reads the pid.
>>>>>> 
>>>>>> The problem is that ptrace reparents the traced process, so in
>>>>>> particular if you gdb -p $something, the target proc will start
>> seeing
>>>>>> gdb instead of its actual parent.
>>>>>> 
>>>>>> There is a lot to say about the entire reparenting business or
>> storing
>>>>>> the original pid in ps_oppid (instead of some form of a reference to
>>>>>> the process).
>>>>>> 
>>>>>> However, I think the most feasible fix for now is the same thing
>>>>>> FreeBSD did: *always* store the actual parent pid in ps_oppid. This
>>>>>> means all repareting will keep updating it (most notably when
>>>>>> abandoning children on exit), while ptrace will skip that part.
>>>>>> 
>>>>>> Side effect of such a change be that getppid will stop requiring the
>>>>>> kernel lock.
>>>>>> 
>>>>> 
>>>>> Thanks for report. But we are in beta stage now so such modification
>> is
>>>>> impossible until next iteration.
>>>>> 
>>>>> Since original parent identifier is stored as `ps_oppid' while process
>>>>> is traced we just return it to userland for this case. This is the way
>>>>> I
>>>>> propose to fix this bug for now.
>>>>> 
>>>>> Comments? OKs?
>>>>> 
>>>>> Index: sys/kern/kern_prot.c
>>>>> ===
>>>>> RCS file: /cvs/src/sys/kern/kern_prot.c,v
>>>>> retrieving revision 1.76
>>>>> diff -u -p -r1.76 kern_prot.c
>>>>> --- sys/kern/kern_prot.c  9 Jul 2019 12:23:25 -   1.76
>>>>> +++ sys/kern/kern_prot.c  4 Sep 2020 21:12:15 -
>>>>> @@ -84,7 +84,11 @@ int
>>>>> sys_getppid(struct proc *p, void *v, register_t *retval)
>>>>> {
>>>>> 
>>>>> - *retval = p->p_p->ps_pptr->ps_pid;
>>>>> + if (p->p_p->ps_flags & PS_TRACED)
>>>>> + *retval = p->p_p->ps_oppid;
>>>>> + else
>>>>> + *retval = p->p_p->ps_pptr->ps_pid;
>>>>> +
>>>>>  return (0);
>>>>> }
>>>> 
>>>> This is definitely a bare minimum fix, but it does the job.
>>>> 
>>> 
>>> ptrace() has behaved like this for the life of OpenBSD and an indefinite
>>> number of years previous in the BSD releases.  What has happened that a
>>> definitely incomplete fix is needed Right Now?
>> 
>> I don't see how this reads as a demand this is fixed Right Now.
>> 
> 
> I didn't call it a demand, but the point stands: what has changed?
> 
> 
> I don't see how the fix is incomplete either. It can be done better
>> with more effort, but AFAICS the above results in correct behavior.
>> 
> 
> There are at least 2 other uses of ps_pptr->ps_pid that should also change,
> unless you like coredumps and ps disagreeing with getppid(), and someone
> needs to think how it affects doas.
> 

Thanks for pointing. I missed these two places. However, doas(1) was not
affected because this diff doesn’t modify tty(4) behaviour:
TIOC{GET,SET}VERAUTH still use ps_pptr->ps_pid.

I checked other BSD’s. NetBSD, DragonflyBSD and OSX have the same
behaviour of getppid(2). And this behaviour don’t contradict POSIX.1 [1]. So
I guess Philip is right, there is no reason to follow this way.

1. https://pubs.opengroup.org/onlinepubs/9699919799/functions/getppid.html

Re: pppoe: move softc list out of NET_LOCK() into new pppoe lock

2020-09-13 Thread Vitaliy Makkoveev

Hello Klemens.

pppoe(4) input path and pppoe(4) config path (I mean clone/destroy)
is always different context. Your diff introduces the new lock which
protects `pppoe_softc_list’ list but what should protect `sc’ you got
from this list after you released `pppoe_lock’?

I mean this dereference is not safe because concurrent thread can
destroy this `sc’ (at least in theory).

> @@ -460,8 +463,10 @@ static void pppoe_dispatch_disc_pkt(stru
>   err_msg = "TAG HUNIQUE ERROR";
>   break;
>   }
> + rw_enter_read(_lock);
>   sc = pppoe_find_softc_by_hunique(mtod(n, caddr_t) + 
> noff,
>   len, m->m_pkthdr.ph_ifidx);
> + rw_exit_read(_lock);
>   if (sc != NULL)
>   devname = sc->sc_sppp.pp_if.if_xname;

This time we have multiple locks around input and config paths so I
guess this is the real reason you didn’t caught use after free issue.

Also witness allow us to debug lock order, but not concurrent access.


> On 13 Sep 2020, at 16:12, Klemens Nanni  wrote:
> 
> This is my first try trading global locks for interface specific ones.
> 
> pppoe(4) keeps a list of all its interfaces which is then obviously
> traversed during create and destroy.
> 
> Currently, the net lock is grabbed for this, but there seems to be no
> justification other than reusing^Wabusing an existing lock.
> 
> I run this diff with WITNESS and kern.witness=2 on my edgerouter 4
> providing my home uplink via pppoe0:  the kernel runs stable, there's
> not witness log showing up and creating and destroying hundreds of
> additional pppoe(4) devices works without disruption.
> 
> Is this the right direction?
> 
> Index: if_pppoe.c
> ===
> RCS file: /cvs/src/sys/net/if_pppoe.c,v
> retrieving revision 1.73
> diff -u -p -r1.73 if_pppoe.c
> --- if_pppoe.c13 Sep 2020 11:00:40 -  1.73
> +++ if_pppoe.c13 Sep 2020 11:31:12 -
> @@ -114,15 +114,18 @@ struct pppoetag {
> #define   PPPOE_DISC_MAXPADI  4   /* retry PADI four times 
> (quickly) */
> #define   PPPOE_DISC_MAXPADR  2   /* retry PADR twice */
> 
> +struct rwlock pppoe_lock = RWLOCK_INITIALIZER("pppoe");
> +
> /*
>  * Locks used to protect struct members and global data
>  *   I   immutable after creation
>  *   N   net lock
> + *   p   pppoe lock
>  */
> 
> struct pppoe_softc {
>   struct sppp sc_sppp;/* contains a struct ifnet as first 
> element */
> - LIST_ENTRY(pppoe_softc) sc_list;/* [N] */
> + LIST_ENTRY(pppoe_softc) sc_list;/* [p] */
>   unsigned int sc_eth_ifidx;  /* [N] */
> 
>   int sc_state;   /* [N] discovery phase or session 
> connected */
> @@ -233,7 +236,7 @@ pppoe_clone_create(struct if_clone *ifc,
>   bpfattach(>sc_sppp.pp_if.if_bpf, >sc_sppp.pp_if, DLT_PPP_ETHER, 
> 0);
> #endif
> 
> - NET_LOCK();
> + rw_enter_write(_lock);
> retry:
>   unique = arc4random();
>   LIST_FOREACH(tmpsc, _softc_list, sc_list)
> @@ -241,7 +244,7 @@ retry:
>   goto retry;
>   sc->sc_unique = unique;
>   LIST_INSERT_HEAD(_softc_list, sc, sc_list);
> - NET_UNLOCK();
> + rw_exit_write(_lock);
> 
>   return (0);
> }
> @@ -252,9 +255,9 @@ pppoe_clone_destroy(struct ifnet *ifp)
> {
>   struct pppoe_softc *sc = ifp->if_softc;
> 
> - NET_LOCK();
> + rw_enter_write(_lock);
>   LIST_REMOVE(sc, sc_list);
> - NET_UNLOCK();
> + rw_exit_write(_lock);
> 
>   timeout_del(>sc_timeout);
> 
> @@ -460,8 +463,10 @@ static void pppoe_dispatch_disc_pkt(stru
>   err_msg = "TAG HUNIQUE ERROR";
>   break;
>   }
> + rw_enter_read(_lock);
>   sc = pppoe_find_softc_by_hunique(mtod(n, caddr_t) + 
> noff,
>   len, m->m_pkthdr.ph_ifidx);
> + rw_exit_read(_lock);
>   if (sc != NULL)
>   devname = sc->sc_sppp.pp_if.if_xname;
>   break;
> @@ -668,8 +673,12 @@ pppoe_data_input(struct mbuf *m)
> #ifdef PPPOE_TERM_UNKNOWN_SESSIONS
>   u_int8_t shost[ETHER_ADDR_LEN];
> #endif
> - if (LIST_EMPTY(_softc_list))
> + rw_enter_read(_lock);
> + if (LIST_EMPTY(_softc_list)) {
> + rw_exit_read(_lock);
>   goto drop;
> + }
> + rw_exit_read(_lock);
> 
>   KASSERT(m->m_flags & M_PKTHDR);
> 
> @@ -699,7 +708,9 @@ pppoe_data_input(struct mbuf *m)
>   goto drop;
> 
>   session = ntohs(ph->session);
> + rw_enter_read(_lock);
>   sc = pppoe_find_softc_by_session(session, m->m_pkthdr.ph_ifidx);
> + rw_exit_read(_lock);
>   if (sc == NULL) {
> #ifdef

Re: incorrect result from getppid for ptraced processes

2020-09-04 Thread Vitaliy Makkoveev

On Fri, Sep 04, 2020 at 05:24:42PM +0200, Mateusz Guzik wrote:
> getppid blindly follows the parent pointer and reads the pid.
> 
> The problem is that ptrace reparents the traced process, so in
> particular if you gdb -p $something, the target proc will start seeing
> gdb instead of its actual parent.
> 
> There is a lot to say about the entire reparenting business or storing
> the original pid in ps_oppid (instead of some form of a reference to
> the process).
> 
> However, I think the most feasible fix for now is the same thing
> FreeBSD did: *always* store the actual parent pid in ps_oppid. This
> means all repareting will keep updating it (most notably when
> abandoning children on exit), while ptrace will skip that part.
> 
> Side effect of such a change be that getppid will stop requiring the
> kernel lock.
> 

Thanks for report. But we are in beta stage now so such modification is
impossible until next iteration.

Since original parent identifier is stored as `ps_oppid' while process
is traced we just return it to userland for this case. This is the way I
propose to fix this bug for now.

Comments? OKs?

Index: sys/kern/kern_prot.c
===
RCS file: /cvs/src/sys/kern/kern_prot.c,v
retrieving revision 1.76
diff -u -p -r1.76 kern_prot.c
--- sys/kern/kern_prot.c9 Jul 2019 12:23:25 -   1.76
+++ sys/kern/kern_prot.c4 Sep 2020 21:12:15 -
@@ -84,7 +84,11 @@ int
 sys_getppid(struct proc *p, void *v, register_t *retval)
 {
 
-   *retval = p->p_p->ps_pptr->ps_pid;
+   if (p->p_p->ps_flags & PS_TRACED)
+   *retval = p->p_p->ps_oppid;
+   else
+   *retval = p->p_p->ps_pptr->ps_pid;
+
return (0);
 }

pppx(4)/pipex(4): use per cpu counters with ifnet

2020-08-28 Thread Vitaliy Makkoveev

pppac(4) uses per cpu counters for collect `ifnet' statistics, but in
pipex(4) layer this `ifnet' still uses `if_data'. Also pppx(4) doesn't
use per cpu counters but `if_data'. I propose to use per cpu counters
for pppx(4) and pipex(4) to avoid interface statistics collecting mix.

Also this will be the step to remove netlock from pppx(4) output.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.102
diff -u -p -r1.102 if_pppx.c
--- sys/net/if_pppx.c   27 Aug 2020 10:47:52 -  1.102
+++ sys/net/if_pppx.c   28 Aug 2020 07:48:08 -
@@ -658,6 +658,7 @@ pppx_add_session(struct pppx_dev *pxd, s
ifp->if_type = IFT_PPP;
ifp->if_softc = pxi;
/* ifp->if_rdomain = req->pr_rdomain; */
+   if_counters_alloc(ifp);
 
/* XXXSMP breaks atomicity */
NET_UNLOCK();
@@ -878,7 +879,7 @@ pppx_if_output(struct ifnet *ifp, struct
 
 out:
if (error)
-   ifp->if_oerrors++;
+   counters_inc(ifp->if_counters, ifc_oerrors);
return (error);
 }
 
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.125
diff -u -p -r1.125 pipex.c
--- sys/net/pipex.c 27 Aug 2020 10:47:52 -  1.125
+++ sys/net/pipex.c 28 Aug 2020 07:48:08 -
@@ -912,8 +912,7 @@ pipex_ip_input(struct mbuf *m0, struct p
bpf_mtap_af(ifp->if_bpf, AF_INET, m0, BPF_DIRECTION_IN);
 #endif
 
-   ifp->if_ipackets++;
-   ifp->if_ibytes += len;
+   counters_pkt(ifp->if_counters, ifc_ipackets, ifc_ibytes, len);
session->stat.ipackets++;
session->stat.ibytes += len;
ipv4_input(ifp, m0);
@@ -962,8 +961,7 @@ pipex_ip6_input(struct mbuf *m0, struct 
bpf_mtap_af(ifp->if_bpf, AF_INET6, m0, BPF_DIRECTION_IN);
 #endif
 
-   ifp->if_ipackets++;
-   ifp->if_ibytes += len;
+   counters_pkt(ifp->if_counters, ifc_ipackets, ifc_ibytes, len);
session->stat.ipackets++;
session->stat.ibytes += len;
ipv6_input(ifp, m0);

pipex(4)/ppp{ac,x}(4): don't include "net/netisr.h"

2020-08-28 Thread Vitaliy Makkoveev

It's not needed here.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.102
diff -u -p -r1.102 if_pppx.c
--- sys/net/if_pppx.c   27 Aug 2020 10:47:52 -  1.102
+++ sys/net/if_pppx.c   28 Aug 2020 08:18:08 -
@@ -62,7 +62,6 @@
 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.125
diff -u -p -r1.125 pipex.c
--- sys/net/pipex.c 27 Aug 2020 10:47:52 -  1.125
+++ sys/net/pipex.c 28 Aug 2020 08:18:08 -
@@ -48,7 +48,6 @@
 
 #include 
 #include 
-#include 
 #include 
 #include

Re: *_clone_create: leave default ifq_maxlen handling to ifq_init()

2020-08-21 Thread Vitaliy Makkoveev

On Fri, Aug 21, 2020 at 11:05:56PM +0200, Klemens Nanni wrote:
> Creating a cloned interface requires attaching it in the end, that's how
> it works.
> 
> All clonable interfaces start with a fresh softc structure that all
> zeros after allocation due to malloc(9)'s M_ZERO flag.
> 
> After driver dependent setup, all drivers call if_attach() to present
> the new interface to the stack.
> 
> if_attach() starts with calling if_attach_common() which starts with
> preparing the interface queues and therefore calling ifq_init() on the
> send queue.
> 
> ifq_init() eventually checks the queue's maximum length and defaults to
> IFQ_MAXLEN if it is zero, which it always is during this create/attach
> path:
> 
>   if (ifq->ifq_maxlen == 0)
>   ifq_set_maxlen(ifq, IFQ_MAXLEN);
> 
> Now, most clonable interface drivers (except bridge, enc, loop, pppx,
> switch, trunk and vlan) initialise the send queue's length to IFQ_MAXLEN
> the same way, which seems entirely redundant to me.
> 
> The queue API does this in a central place already and it bothered me
> why not all drivers did the same in this regard, until I concluded this.
> 
> Is my analysis correct?
> If so, I'd like to remove the redundant init code and unify drivers a
> tiny bit.
> 
> Feedback? Objections? OK?
> 

I have no objections. Also fgsch@ already did the same in 2001 [1]:

Don't set up ifq_maxlen manually for drivers that uses IFQ_MAXLEN
(or ifqmaxlen); it's done in if_attach() now.
No future drivers needs to set up this anymore unless they want to
use something else.

ok mvs@

1. 
https://github.com/openbsd/src/commit/b59942f79e8c9a7102417b8713ad3ffe9adecf05

> 
> Index: if_aggr.c
> ===
> RCS file: /cvs/src/sys/net/if_aggr.c,v
> retrieving revision 1.33
> diff -u -p -r1.33 if_aggr.c
> --- if_aggr.c 22 Jul 2020 02:16:01 -  1.33
> +++ if_aggr.c 21 Aug 2020 20:33:36 -
> @@ -561,7 +561,6 @@ aggr_clone_create(struct if_clone *ifc, 
>   ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST | IFF_SIMPLEX;
>   ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
>   ifp->if_link_state = LINK_STATE_DOWN;
> - ifq_set_maxlen(>if_snd, IFQ_MAXLEN);
>   ether_fakeaddr(ifp);
>  
>   if_counters_alloc(ifp);
> Index: if_bpe.c
> ===
> RCS file: /cvs/src/sys/net/if_bpe.c,v
> retrieving revision 1.13
> diff -u -p -r1.13 if_bpe.c
> --- if_bpe.c  22 Jul 2020 08:38:51 -  1.13
> +++ if_bpe.c  21 Aug 2020 20:33:36 -
> @@ -189,7 +189,6 @@ bpe_clone_create(struct if_clone *ifc, i
>   ifp->if_start = bpe_start;
>   ifp->if_flags = IFF_BROADCAST | IFF_MULTICAST;
>   ifp->if_xflags = IFXF_CLONED;
> - ifq_set_maxlen(>if_snd, IFQ_MAXLEN);
>   ether_fakeaddr(ifp);
>  
>   if_counters_alloc(ifp);
> Index: if_etherip.c
> ===
> RCS file: /cvs/src/sys/net/if_etherip.c,v
> retrieving revision 1.46
> diff -u -p -r1.46 if_etherip.c
> --- if_etherip.c  10 Jul 2020 13:26:41 -  1.46
> +++ if_etherip.c  21 Aug 2020 20:33:36 -
> @@ -150,7 +150,6 @@ etherip_clone_create(struct if_clone *if
>   ifp->if_start = etherip_start;
>   ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
>   ifp->if_xflags = IFXF_CLONED;
> - ifq_set_maxlen(>if_snd, IFQ_MAXLEN);
>   ifp->if_capabilities = IFCAP_VLAN_MTU;
>   ether_fakeaddr(ifp);
>  
> Index: if_gif.c
> ===
> RCS file: /cvs/src/sys/net/if_gif.c,v
> retrieving revision 1.130
> diff -u -p -r1.130 if_gif.c
> --- if_gif.c  10 Jul 2020 13:26:41 -  1.130
> +++ if_gif.c  21 Aug 2020 20:33:36 -
> @@ -170,7 +170,6 @@ gif_clone_create(struct if_clone *ifc, i
>   ifp->if_output = gif_output;
>   ifp->if_rtrequest = p2p_rtrequest;
>   ifp->if_type   = IFT_GIF;
> - ifq_set_maxlen(>if_snd, IFQ_MAXLEN);
>   ifp->if_softc = sc;
>  
>   if_attach(ifp);
> Index: if_gre.c
> ===
> RCS file: /cvs/src/sys/net/if_gre.c,v
> retrieving revision 1.158
> diff -u -p -r1.158 if_gre.c
> --- if_gre.c  10 Jul 2020 13:26:41 -  1.158
> +++ if_gre.c  21 Aug 2020 20:33:36 -
> @@ -715,7 +715,6 @@ egre_clone_create(struct if_clone *ifc, 
>   ifp->if_ioctl = egre_ioctl;
>   ifp->if_start = egre_start;
>   ifp->if_xflags = IFXF_CLONED;
> - ifq_set_maxlen(>if_snd, IFQ_MAXLEN);
>   ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
>   ether_fakeaddr(ifp);
>  
> @@ -777,7 +776,6 @@ nvgre_clone_create(struct if_clone *ifc,
>   ifp->if_ioctl = nvgre_ioctl;
>   ifp->if_start = nvgre_start;
>   ifp->if_xflags = IFXF_CLONED;
> - ifq_set_maxlen(>if_snd, IFQ_MAXLEN);
>   ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
>

Re: sppp: add size to free() calls

2020-08-22 Thread Vitaliy Makkoveev

ok mvs@

> On 22 Aug 2020, at 15:32, Klemens Nanni  wrote:
> 
> Another round, this time obvious sizes which are in immediate scope of
> the free() call, e.g. right below the malloc() call.
> 
> This leaves only a few selected free() calls with size zero in
> if_spppsubr.c due to the fact that there is currently no variable to
> keep track of username and password string lengths.
> 
> Feedback? OK?
> 
> 
> Index: if_spppsubr.c
> ===
> RCS file: /cvs/src/sys/net/if_spppsubr.c,v
> retrieving revision 1.185
> diff -u -p -r1.185 if_spppsubr.c
> --- if_spppsubr.c 14 Aug 2020 12:17:34 -  1.185
> +++ if_spppsubr.c 22 Aug 2020 12:25:37 -
> @@ -1737,7 +1737,7 @@ sppp_lcp_RCR(struct sppp *sp, struct lcp
> 
>   len -= 4;
>   origlen = len;
> - buf = r = malloc (len, M_TEMP, M_NOWAIT);
> + buf = r = malloc (origlen, M_TEMP, M_NOWAIT);
>   if (! buf)
>   return (0);
> 
> @@ -1749,7 +1749,7 @@ sppp_lcp_RCR(struct sppp *sp, struct lcp
>   p = (void*) (h+1);
>   for (rlen = 0; len > 1; len -= p[1], p += p[1]) {
>   if (p[1] < 2 || p[1] > len) {
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, origlen);
>   return (-1);
>   }
>   if (debug)
> @@ -1926,7 +1926,7 @@ sppp_lcp_RCR(struct sppp *sp, struct lcp
>   }
> 
>  end:
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, origlen);
>   return (rlen == 0);
> }
> 
> @@ -2312,7 +2312,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
> {
>   u_char *buf, *r, *p;
>   struct ifnet *ifp = >pp_if;
> - int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG;
> + int rlen, origlen, buflen, debug = ifp->if_flags & IFF_DEBUG;
>   u_int32_t hisaddr, desiredaddr;
> 
>   len -= 4;
> @@ -2321,7 +2321,8 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
>* Make sure to allocate a buf that can at least hold a
>* conf-nak with an `address' option.  We might need it below.
>*/
> - buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT);
> + buflen = len < 6? 6: len;
> + buf = r = malloc (buflen, M_TEMP, M_NOWAIT);
>   if (! buf)
>   return (0);
> 
> @@ -2332,7 +2333,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
>   p = (void*) (h+1);
>   for (rlen = 0; len > 1; len -= p[1], p += p[1]) {
>   if (p[1] < 2 || p[1] > len) {
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (-1);
>   }
>   if (debug)
> @@ -2476,7 +2477,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
>   }
> 
>  end:
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (rlen == 0);
> }
> 
> @@ -2773,7 +2774,7 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
> {
>   u_char *buf, *r, *p;
>   struct ifnet *ifp = >pp_if;
> - int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG;
> + int rlen, origlen, buflen, debug = ifp->if_flags & IFF_DEBUG;
>   struct in6_addr myaddr, desiredaddr, suggestaddr;
>   int ifidcount;
>   int type;
> @@ -2786,7 +2787,8 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
>* Make sure to allocate a buf that can at least hold a
>* conf-nak with an `address' option.  We might need it below.
>*/
> - buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT);
> + buflen = len < 6? 6: len;
> + buf = r = malloc (buflen, M_TEMP, M_NOWAIT);
>   if (! buf)
>   return (0);
> 
> @@ -2799,7 +2801,7 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
>   for (rlen=0; len>1 && p[1]; len-=p[1], p+=p[1]) {
>   /* Sanity check option length */
>   if (p[1] < 2 || p[1] > len) {
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (-1);
>   }
>   if (debug)
> @@ -2933,7 +2935,7 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
>   }
> 
> end:
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (rlen == 0);
> }
> 
> @@ -4475,10 +4477,10 @@ sppp_get_params(struct sppp *sp, struct 
>   spr->phase = sp->pp_phase;
> 
>   if (copyout(spr, (caddr_t)ifr->ifr_data, sizeof(*spr)) != 0) {
> - free(spr, M_DEVBUF, 0);
> + free(spr, M_DEVBUF, sizeof(*spr));
>   return EFAULT;
>   }
> - free(spr, M_DEVBUF, 0);
> + free(spr, M_DEVBUF, sizeof(*spr));
>   break;
>   }
>   case SPPPIOGMAUTH:
> @@ -4498,10 +4500,10 @@ sppp_get_params(struct sppp *sp, struct 
>   strlcpy(spa->name, auth->name, sizeof(spa->name));
> 
>   if (copyout(spa, (caddr_t)ifr->ifr_data, sizeof(*spa)) != 0) {
> - free(spa, M_DEVBUF, 0);
> +

Re: sppp: add size to free() calls

2020-08-22 Thread Vitaliy Makkoveev

Yeah, we override both of 'auth->name' and 'auth->secret’.

Since there is the only difference against your previous diff and
the only place where you touch them I have no objections.

> On 22 Aug 2020, at 18:00, Klemens Nanni  wrote:
> 
> On Sat, Aug 22, 2020 at 02:32:17PM +0200, Klemens Nanni wrote:
>> Another round, this time obvious sizes which are in immediate scope of
>> the free() call, e.g. right below the malloc() call.
>> 
>> This leaves only a few selected free() calls with size zero in
>> if_spppsubr.c due to the fact that there is currently no variable to
>> keep track of username and password string lengths.
>> 
>> Feedback? OK?
> Sorry, here's the correct version of the diff omitting sizes for the
> very string buffers mentioned above.
> 
> 
> Index: if_spppsubr.c
> ===
> RCS file: /cvs/src/sys/net/if_spppsubr.c,v
> retrieving revision 1.185
> diff -u -p -r1.185 if_spppsubr.c
> --- if_spppsubr.c 14 Aug 2020 12:17:34 -  1.185
> +++ if_spppsubr.c 22 Aug 2020 14:55:49 -
> @@ -1737,7 +1737,7 @@ sppp_lcp_RCR(struct sppp *sp, struct lcp
> 
>   len -= 4;
>   origlen = len;
> - buf = r = malloc (len, M_TEMP, M_NOWAIT);
> + buf = r = malloc (origlen, M_TEMP, M_NOWAIT);
>   if (! buf)
>   return (0);
> 
> @@ -1749,7 +1749,7 @@ sppp_lcp_RCR(struct sppp *sp, struct lcp
>   p = (void*) (h+1);
>   for (rlen = 0; len > 1; len -= p[1], p += p[1]) {
>   if (p[1] < 2 || p[1] > len) {
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, origlen);
>   return (-1);
>   }
>   if (debug)
> @@ -1926,7 +1926,7 @@ sppp_lcp_RCR(struct sppp *sp, struct lcp
>   }
> 
>  end:
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, origlen);
>   return (rlen == 0);
> }
> 
> @@ -2312,7 +2312,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
> {
>   u_char *buf, *r, *p;
>   struct ifnet *ifp = >pp_if;
> - int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG;
> + int rlen, origlen, buflen, debug = ifp->if_flags & IFF_DEBUG;
>   u_int32_t hisaddr, desiredaddr;
> 
>   len -= 4;
> @@ -2321,7 +2321,8 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
>* Make sure to allocate a buf that can at least hold a
>* conf-nak with an `address' option.  We might need it below.
>*/
> - buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT);
> + buflen = len < 6? 6: len;
> + buf = r = malloc (buflen, M_TEMP, M_NOWAIT);
>   if (! buf)
>   return (0);
> 
> @@ -2332,7 +2333,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
>   p = (void*) (h+1);
>   for (rlen = 0; len > 1; len -= p[1], p += p[1]) {
>   if (p[1] < 2 || p[1] > len) {
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (-1);
>   }
>   if (debug)
> @@ -2476,7 +2477,7 @@ sppp_ipcp_RCR(struct sppp *sp, struct lc
>   }
> 
>  end:
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (rlen == 0);
> }
> 
> @@ -2773,7 +2774,7 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
> {
>   u_char *buf, *r, *p;
>   struct ifnet *ifp = >pp_if;
> - int rlen, origlen, debug = ifp->if_flags & IFF_DEBUG;
> + int rlen, origlen, buflen, debug = ifp->if_flags & IFF_DEBUG;
>   struct in6_addr myaddr, desiredaddr, suggestaddr;
>   int ifidcount;
>   int type;
> @@ -2786,7 +2787,8 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
>* Make sure to allocate a buf that can at least hold a
>* conf-nak with an `address' option.  We might need it below.
>*/
> - buf = r = malloc ((len < 6? 6: len), M_TEMP, M_NOWAIT);
> + buflen = len < 6? 6: len;
> + buf = r = malloc (buflen, M_TEMP, M_NOWAIT);
>   if (! buf)
>   return (0);
> 
> @@ -2799,7 +2801,7 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
>   for (rlen=0; len>1 && p[1]; len-=p[1], p+=p[1]) {
>   /* Sanity check option length */
>   if (p[1] < 2 || p[1] > len) {
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (-1);
>   }
>   if (debug)
> @@ -2933,7 +2935,7 @@ sppp_ipv6cp_RCR(struct sppp *sp, struct 
>   }
> 
> end:
> - free(buf, M_TEMP, 0);
> + free(buf, M_TEMP, buflen);
>   return (rlen == 0);
> }
> 
> @@ -4475,10 +4477,10 @@ sppp_get_params(struct sppp *sp, struct 
>   spr->phase = sp->pp_phase;
> 
>   if (copyout(spr, (caddr_t)ifr->ifr_data, sizeof(*spr)) != 0) {
> - free(spr, M_DEVBUF, 0);
> + free(spr, M_DEVBUF, sizeof(*spr));
>   return EFAULT;
>   }
> - free(spr, M_DEVBUF, 0);
> + free(spr,

Re: pppoe: add sizes to free() calls

2020-08-20 Thread Vitaliy Makkoveev

> On 20 Aug 2020, at 16:33, Klemens Nanni  wrote:
> 
> These are straight forward as we either maintain a size variable all the
> way or can reuse strlen() for free() just like it's done during malloc().
> 
> One exception is freeing the softc structure, which is fixed in size;
> `ifconfig pppoe1 create; ifconfig pppoe1 destroy' exercises this code
> path and does not blow up - as expected.
> 
> Running fine on a octeon vdsl2 router.
> 
> Feedback? OK?

Hi.

You forgot about ‘\0’ for `sc_concentrator_name’ and `sc_service_name'.

> 
> 
> Index: if_pppoe.c
> ===
> RCS file: /cvs/src/sys/net/if_pppoe.c,v
> retrieving revision 1.70
> diff -u -p -r1.70 if_pppoe.c
> --- if_pppoe.c28 Jul 2020 09:52:32 -  1.70
> +++ if_pppoe.c19 Aug 2020 23:33:23 -
> @@ -257,15 +264,17 @@ pppoe_clone_destroy(struct ifnet *ifp)
>   if_detach(ifp);
> 
>   if (sc->sc_concentrator_name)
> - free(sc->sc_concentrator_name, M_DEVBUF, 0);
> + free(sc->sc_concentrator_name, M_DEVBUF,
> + strlen(sc->sc_concentrator_name));
>   if (sc->sc_service_name)
> - free(sc->sc_service_name, M_DEVBUF, 0);
> + free(sc->sc_service_name, M_DEVBUF,
> + strlen(sc->sc_service_name));
>   if (sc->sc_ac_cookie)
> - free(sc->sc_ac_cookie, M_DEVBUF, 0);
> + free(sc->sc_ac_cookie, M_DEVBUF, sc->sc_ac_cookie_len);
>   if (sc->sc_relay_sid)
> - free(sc->sc_relay_sid, M_DEVBUF, 0);
> + free(sc->sc_relay_sid, M_DEVBUF, sc->sc_relay_sid_len);
> 
> - free(sc, M_DEVBUF, 0);
> + free(sc, M_DEVBUF, sizeof(*sc));
> 
>   return (0);
> }
> @@ -547,7 +556,8 @@ breakbreak:
>   }
>   if (ac_cookie) {
>   if (sc->sc_ac_cookie)
> - free(sc->sc_ac_cookie, M_DEVBUF, 0);
> + free(sc->sc_ac_cookie, M_DEVBUF,
> + sc->sc_ac_cookie_len);
>   sc->sc_ac_cookie = malloc(ac_cookie_len, M_DEVBUF,
>   M_DONTWAIT);
>   if (sc->sc_ac_cookie == NULL)
> @@ -557,7 +567,8 @@ breakbreak:
>   }
>   if (relay_sid) {
>   if (sc->sc_relay_sid)
> - free(sc->sc_relay_sid, M_DEVBUF, 0);
> + free(sc->sc_relay_sid, M_DEVBUF,
> + sc->sc_relay_sid_len);
>   sc->sc_relay_sid = malloc(relay_sid_len, M_DEVBUF,
>   M_DONTWAIT);
>   if (sc->sc_relay_sid == NULL)
> @@ -610,11 +621,12 @@ breakbreak:
>   sc->sc_state = PPPOE_STATE_INITIAL;
>   memcpy(>sc_dest, etherbroadcastaddr, sizeof(sc->sc_dest));
>   if (sc->sc_ac_cookie) {
> - free(sc->sc_ac_cookie, M_DEVBUF, 0);
> + free(sc->sc_ac_cookie, M_DEVBUF,
> + sc->sc_ac_cookie_len);
>   sc->sc_ac_cookie = NULL;
>   }
>   if (sc->sc_relay_sid) {
> - free(sc->sc_relay_sid, M_DEVBUF, 0);
> + free(sc->sc_relay_sid, M_DEVBUF, sc->sc_relay_sid_len);
>   sc->sc_relay_sid = NULL;
>   }
>   sc->sc_ac_cookie_len = 0;
> @@ -817,7 +835,8 @@ pppoe_ioctl(struct ifnet *ifp, unsigned 
>   }
> 
>   if (sc->sc_concentrator_name)
> - free(sc->sc_concentrator_name, M_DEVBUF, 0);
> + free(sc->sc_concentrator_name, M_DEVBUF,
> + strlen(sc->sc_concentrator_name));
>   sc->sc_concentrator_name = NULL;
> 
>   len = strlen(parms->ac_name);
> @@ -830,7 +849,8 @@ pppoe_ioctl(struct ifnet *ifp, unsigned 
>   }
> 
>   if (sc->sc_service_name)
> - free(sc->sc_service_name, M_DEVBUF, 0);
> + free(sc->sc_service_name, M_DEVBUF,
> + strlen(sc->sc_service_name));
>   sc->sc_service_name = NULL;
> 
>   len = strlen(parms->service_name);
> @@ -1175,12 +1195,12 @@ pppoe_disconnect(struct pppoe_softc *sc)
>   sc->sc_state = PPPOE_STATE_INITIAL;
>   memcpy(>sc_dest, etherbroadcastaddr, sizeof(sc->sc_dest));
>   if (sc->sc_ac_cookie) {
> - free(sc->sc_ac_cookie, M_DEVBUF, 0);
> + free(sc->sc_ac_cookie, M_DEVBUF, sc->sc_ac_cookie_len);
>   sc->sc_ac_cookie = NULL;
>   }
>   sc->sc_ac_cookie_len = 0;
>   if (sc->sc_relay_sid) {
> - free(sc->sc_relay_sid, M_DEVBUF, 0);
> + free(sc->sc_relay_sid, M_DEVBUF, sc->sc_relay_sid_len);
>   sc->sc_relay_sid = NULL;
>   }
>   sc->sc_relay_sid_len = 0;
>

Re: pppoe: add sizes to free() calls

2020-08-20 Thread Vitaliy Makkoveev

ok mvs@

> On 20 Aug 2020, at 17:12, Klemens Nanni  wrote:
> 
> On Thu, Aug 20, 2020 at 03:33:17PM +0200, Klemens Nanni wrote:
>> These are straight forward as we either maintain a size variable all the
>> way or can reuse strlen() for free() just like it's done during malloc().
>> 
>> One exception is freeing the softc structure, which is fixed in size;
>> `ifconfig pppoe1 create; ifconfig pppoe1 destroy' exercises this code
>> path and does not blow up - as expected.
>> 
>> Running fine on a octeon vdsl2 router.
>> 
>> Feedback? OK?
> Sorry for the noise, Peter J. Philipp pointed out how I missed the +1
> byte to account for the NUL character which strlen(9) does not count.
> All corresponding malloc calls do strlen() + 1, so without it there's an
> off-by-one in the size.
> 
> In my setup I do not exercise the service and concentrator name paths,
> but playing with `ifconfig pppoe0 [[-]pppoeac foo] [[-]pppoesvc bar]'
> doesn't blow up on any of those diff versions, either.
> 
> Fixed diff.
> 
> Index: if_pppoe.c
> ===
> RCS file: /cvs/src/sys/net/if_pppoe.c,v
> retrieving revision 1.70
> diff -u -p -r1.70 if_pppoe.c
> --- if_pppoe.c28 Jul 2020 09:52:32 -  1.70
> +++ if_pppoe.c20 Aug 2020 13:50:07 -
> @@ -257,15 +267,17 @@ pppoe_clone_destroy(struct ifnet *ifp)
>   if_detach(ifp);
> 
>   if (sc->sc_concentrator_name)
> - free(sc->sc_concentrator_name, M_DEVBUF, 0);
> + free(sc->sc_concentrator_name, M_DEVBUF,
> + strlen(sc->sc_concentrator_name) + 1);
>   if (sc->sc_service_name)
> - free(sc->sc_service_name, M_DEVBUF, 0);
> + free(sc->sc_service_name, M_DEVBUF,
> + strlen(sc->sc_service_name) + 1);
>   if (sc->sc_ac_cookie)
> - free(sc->sc_ac_cookie, M_DEVBUF, 0);
> + free(sc->sc_ac_cookie, M_DEVBUF, sc->sc_ac_cookie_len);
>   if (sc->sc_relay_sid)
> - free(sc->sc_relay_sid, M_DEVBUF, 0);
> + free(sc->sc_relay_sid, M_DEVBUF, sc->sc_relay_sid_len);
> 
> - free(sc, M_DEVBUF, 0);
> + free(sc, M_DEVBUF, sizeof(*sc));
> 
>   return (0);
> }
> @@ -547,7 +559,8 @@ breakbreak:
>   }
>   if (ac_cookie) {
>   if (sc->sc_ac_cookie)
> - free(sc->sc_ac_cookie, M_DEVBUF, 0);
> + free(sc->sc_ac_cookie, M_DEVBUF,
> + sc->sc_ac_cookie_len);
>   sc->sc_ac_cookie = malloc(ac_cookie_len, M_DEVBUF,
>   M_DONTWAIT);
>   if (sc->sc_ac_cookie == NULL)
> @@ -557,7 +570,8 @@ breakbreak:
>   }
>   if (relay_sid) {
>   if (sc->sc_relay_sid)
> - free(sc->sc_relay_sid, M_DEVBUF, 0);
> + free(sc->sc_relay_sid, M_DEVBUF,
> + sc->sc_relay_sid_len);
>   sc->sc_relay_sid = malloc(relay_sid_len, M_DEVBUF,
>   M_DONTWAIT);
>   if (sc->sc_relay_sid == NULL)
> @@ -610,11 +624,12 @@ breakbreak:
>   sc->sc_state = PPPOE_STATE_INITIAL;
>   memcpy(>sc_dest, etherbroadcastaddr, sizeof(sc->sc_dest));
>   if (sc->sc_ac_cookie) {
> - free(sc->sc_ac_cookie, M_DEVBUF, 0);
> + free(sc->sc_ac_cookie, M_DEVBUF,
> + sc->sc_ac_cookie_len);
>   sc->sc_ac_cookie = NULL;
>   }
>   if (sc->sc_relay_sid) {
> - free(sc->sc_relay_sid, M_DEVBUF, 0);
> + free(sc->sc_relay_sid, M_DEVBUF, sc->sc_relay_sid_len);
>   sc->sc_relay_sid = NULL;
>   }
>   sc->sc_ac_cookie_len = 0;
> @@ -817,7 +847,8 @@ pppoe_ioctl(struct ifnet *ifp, unsigned 
>   }
> 
>   if (sc->sc_concentrator_name)
> - free(sc->sc_concentrator_name, M_DEVBUF, 0);
> + free(sc->sc_concentrator_name, M_DEVBUF,
> + strlen(sc->sc_concentrator_name) + 1);
>   sc->sc_concentrator_name = NULL;
> 
>   len = strlen(parms->ac_name);
> @@ -830,7 +861,8 @@ pppoe_ioctl(struct ifnet *ifp, unsigned 
>   }
> 
>   if (sc->sc_service_name)
> - free(sc->sc_service_name, M_DEVBUF, 0);
> + free(sc->sc_service_name, M_DEVBUF,
> + strlen(sc->sc_service_name) + 1);
>   sc->sc_service_name = NULL;
> 
>   len = strlen(parms->service_name);
> @@ -1175,12 +1207,12 @@ pppoe_disconnect(struct pppoe_softc *sc)
>   sc->sc_state = PPPOE_STATE_INITIAL;
>   memcpy(>sc_dest, etherbroadcastaddr, sizeof(sc->sc_dest));
>   if (sc->sc_ac_cookie) {
> -

Re: Enable EVFILT_EXCEPT

2020-08-21 Thread Vitaliy Makkoveev

ok mvs@

> On 21 Aug 2020, at 10:32, Martin Pieuchot  wrote:
> 
> The kqueue-based poll(2) backend is still a WIP due to regressions in
> the kqueue layer.  In the meantime should we expose EVFILT_EXCEPT to
> userland?  The diff below should be enough to allow userland apps to
> use the new code paths. 
> 
> ok?
> 
> Index: sys/event.h
> ===
> RCS file: /cvs/src/sys/sys/event.h,v
> retrieving revision 1.44
> diff -u -p -r1.44 event.h
> --- sys/event.h   22 Jun 2020 13:14:32 -  1.44
> +++ sys/event.h   21 Aug 2020 07:09:31 -
> @@ -41,7 +41,7 @@
> #define EVFILT_DEVICE (-8)/* devices */
> #define EVFILT_EXCEPT (-9)/* exceptional conditions */
> 
> -#define EVFILT_SYSCOUNT  8
> +#define EVFILT_SYSCOUNT  9
> 
> #define EV_SET(kevp, a, b, c, d, e, f) do {   \
>   struct kevent *__kevp = (kevp); \
>

Re: Make pipex more common for pppac and pppx

2020-08-24 Thread Vitaliy Makkoveev

On Thu, Aug 20, 2020 at 02:32:57PM +0900, YASUOKA Masahiko wrote:

Hello.

I pointed some comments inline.

> Hi,
> 
> Thank you for your comments.
> 
> On Mon, 17 Aug 2020 00:15:08 +0300
> Vitaliy Makkoveev  wrote:
> > I like your idea to kill `pipex_iface_context'. I had trying to keep it
> > by myself and this was wrong way. Could you rework your diff to be
> > against the recent sources?
> 
> I'm sorry the diff was for the old version.
> 
> >> @@ -1122,8 +1051,11 @@ pppacopen(dev_t dev, int flags, int mode, struct 
> >> proc *p)
> >>  #if NBPFILTER > 0
> >>bpfattach(>if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
> >>  #endif
> >> -
> >> -  pipex_iface_init(>sc_pipex_iface, ifp->if_index);
> >> +  /* virtual pipex_session entry for multicast */
> >> +  session = pool_get(_session_pool, PR_WAITOK | PR_ZERO);
> >> +  session->is_multicast = 1;
> >> +  session->ifindex = ifp->if_index;
> >> +  sc->sc_multicast_session = session;
> >>  
> > Interface index is not required for multicast session, because it's
> > never used. Also I like to alloc `sc_multicast_session' before
> > if_attach().
> 
> The diff was to use `ifindex' to select all sessions associated the
> same pppac(4).  But the latest diff uses `ownersc' instead for the
> same purpose.  Also the allocation was moved to earlier part of the
> function.
> 
> >> @@ -1382,7 +1340,10 @@ pppacclose(dev_t dev, int flags, int mode, struct 
> >> proc *p)
> >>klist_invalidate(>sc_wsel.si_note);
> >>splx(s);
> >>  
> >> -  pipex_iface_fini(>sc_pipex_iface);
> >> +  pool_put(_session_pool, sc->sc_multicast_session);
> >> +  NET_LOCK();
> >> +  pipex_destroy_all_sessions(sc);
> >> +  NET_UNLOCK();
> >>  
> >>if_detach(ifp);
> > 
> > The recent sources has pppac(4) with unlocked start routine. I like you
> > detach `ifp' before destroy `sc_multicast_session'.
> 
> The lines were moved after if_detach().
> 
> I'll test this more on this weekend, then I'll ask ok for this.
> 
> Index: sys/net/if_pppx.c
> ===
> RCS file: /cvs/src/sys/net/if_pppx.c,v
> retrieving revision 1.101
> diff -u -p -r1.101 if_pppx.c
> --- sys/net/if_pppx.c 14 Aug 2020 11:05:38 -  1.101
> +++ sys/net/if_pppx.c 20 Aug 2020 05:19:55 -
> @@ -163,7 +163,6 @@ struct pppx_if {
>   struct ifnetpxi_if;
>   struct pppx_dev *pxi_dev;   /* [I] */
>   struct pipex_session*pxi_session;   /* [I] */
> - struct pipex_iface_context  pxi_ifcontext;  /* [N] */
>  };
>  
>  static inline int
> @@ -181,12 +180,6 @@ int  pppx_add_session(struct pppx_dev *,
>   struct pipex_session_req *);
>  int  pppx_del_session(struct pppx_dev *,
>   struct pipex_session_close_req *);
> -int  pppx_config_session(struct pppx_dev *,
> - struct pipex_session_config_req *);
> -int  pppx_get_stat(struct pppx_dev *,
> - struct pipex_session_stat_req *);
> -int  pppx_get_closed(struct pppx_dev *,
> - struct pipex_session_list_req *);
>  int  pppx_set_session_descr(struct pppx_dev *,
>   struct pipex_session_descr_req *);
>  
> @@ -424,17 +417,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
>  
>   NET_LOCK();
>   switch (cmd) {
> - case PIPEXSMODE:
> - /*
> -  * npppd always enables on open, and only disables before
> -  * closing. we cheat and let open and close do that, so lie
> -  * to npppd.
> -  */
> - break;
> - case PIPEXGMODE:
> - *(int *)addr = 1;
> - break;
> -
>   case PIPEXASESSION:
>   error = pppx_add_session(pxd,
>   (struct pipex_session_req *)addr);
> @@ -445,21 +427,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
>   (struct pipex_session_close_req *)addr);
>   break;
>  
> - case PIPEXCSESSION:
> - error = pppx_config_session(pxd,
> - (struct pipex_session_config_req *)addr);
> - break;
> -
> - case PIPEXGSTAT:
> - error = pppx_get_stat(pxd,
> - (struct pipex_session_stat_req *)addr);
> - break;
> -
> - case PIPEXGCLOSED:
> - error = pppx_get_closed(pxd,
> - (struct pipex_session_list

Re: uvm: __inline -> inline

2020-09-22 Thread Vitaliy Makkoveev

ok mvs

> On 22 Sep 2020, at 10:15, Martin Pieuchot  wrote:
> 
> Spell inline correctly, also reduce the diff with NetBSD for uvm_amap.c
> and uvm_fault.c.
> 
> ok?
> 
> Index: uvm/uvm_addr.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_addr.c,v
> retrieving revision 1.28
> diff -u -p -r1.28 uvm_addr.c
> --- uvm/uvm_addr.c13 Sep 2020 10:05:25 -  1.28
> +++ uvm/uvm_addr.c22 Sep 2020 07:12:10 -
> @@ -186,7 +186,7 @@ uvm_addr_entrybyspace(struct uaddr_free_
> }
> #endif /* !SMALL_KERNEL */
> 
> -static __inline vaddr_t
> +static inline vaddr_t
> uvm_addr_align_forward(vaddr_t addr, vaddr_t align, vaddr_t offset)
> {
>   vaddr_t adjusted;
> @@ -201,7 +201,7 @@ uvm_addr_align_forward(vaddr_t addr, vad
>   return (adjusted < addr ? adjusted + align : adjusted);
> }
> 
> -static __inline vaddr_t
> +static inline vaddr_t
> uvm_addr_align_backward(vaddr_t addr, vaddr_t align, vaddr_t offset)
> {
>   vaddr_t adjusted;
> Index: uvm/uvm_amap.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_amap.c,v
> retrieving revision 1.82
> diff -u -p -r1.82 uvm_amap.c
> --- uvm/uvm_amap.c4 Jan 2020 16:17:29 -   1.82
> +++ uvm/uvm_amap.c22 Sep 2020 07:07:45 -
> @@ -63,20 +63,20 @@ static char amap_small_pool_names[UVM_AM
>  */
> 
> static struct vm_amap *amap_alloc1(int, int, int);
> -static __inline void amap_list_insert(struct vm_amap *);
> -static __inline void amap_list_remove(struct vm_amap *);   
> +static inline void amap_list_insert(struct vm_amap *);
> +static inline void amap_list_remove(struct vm_amap *);   
> 
> struct vm_amap_chunk *amap_chunk_get(struct vm_amap *, int, int, int);
> void amap_chunk_free(struct vm_amap *, struct vm_amap_chunk *);
> void amap_wiperange_chunk(struct vm_amap *, struct vm_amap_chunk *, int, int);
> 
> -static __inline void
> +static inline void
> amap_list_insert(struct vm_amap *amap)
> {
>   LIST_INSERT_HEAD(_list, amap, am_list);
> }
> 
> -static __inline void
> +static inline void
> amap_list_remove(struct vm_amap *amap)
> { 
>   LIST_REMOVE(amap, am_list);
> @@ -190,13 +190,10 @@ amap_chunk_free(struct vm_amap *amap, st
>  * here are some in-line functions to help us.
>  */
> 
> -static __inline void pp_getreflen(int *, int, int *, int *);
> -static __inline void pp_setreflen(int *, int, int, int);
> -
> /*
>  * pp_getreflen: get the reference and length for a specific offset
>  */
> -static __inline void
> +static inline void
> pp_getreflen(int *ppref, int offset, int *refp, int *lenp)
> {
> 
> @@ -212,7 +209,7 @@ pp_getreflen(int *ppref, int offset, int
> /*
>  * pp_setreflen: set the reference and length for a specific offset
>  */
> -static __inline void
> +static inline void
> pp_setreflen(int *ppref, int offset, int ref, int len)
> {
>   if (len == 1) {
> Index: uvm/uvm_aobj.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
> retrieving revision 1.86
> diff -u -p -r1.86 uvm_aobj.c
> --- uvm/uvm_aobj.c18 Jul 2019 23:47:33 -  1.86
> +++ uvm/uvm_aobj.c22 Sep 2020 07:11:50 -
> @@ -256,7 +256,7 @@ uao_find_swhash_elt(struct uvm_aobj *aob
> /*
>  * uao_find_swslot: find the swap slot number for an aobj/pageidx
>  */
> -__inline static int
> +inline static int
> uao_find_swslot(struct uvm_aobj *aobj, int pageidx)
> {
> 
> Index: uvm/uvm_fault.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
> retrieving revision 1.98
> diff -u -p -r1.98 uvm_fault.c
> --- uvm/uvm_fault.c   12 Sep 2020 17:08:49 -  1.98
> +++ uvm/uvm_fault.c   22 Sep 2020 07:07:59 -
> @@ -159,7 +159,7 @@ static struct uvm_advice uvmadvice[MADV_
>  * private prototypes
>  */
> static void uvmfault_amapcopy(struct uvm_faultinfo *);
> -static __inline void uvmfault_anonflush(struct vm_anon **, int);
> +static inline void uvmfault_anonflush(struct vm_anon **, int);
> void  uvmfault_unlockmaps(struct uvm_faultinfo *, boolean_t);
> void  uvmfault_update_stats(struct uvm_faultinfo *);
> 
> @@ -171,7 +171,7 @@ void  uvmfault_update_stats(struct uvm_fa
>  *
>  * => does not have to deactivate page if it is busy
>  */
> -static __inline void
> +static inline void
> uvmfault_anonflush(struct vm_anon **anons, int n)
> {
>   int lcv;
> Index: uvm/uvm_map.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> retrieving revision 1.267
> diff -u -p -r1.267 uvm_map.c
> --- uvm/uvm_map.c 14 Sep 2020 20:31:09 -  1.267
> +++ uvm/uvm_map.c 22 Sep 2020 07:11:47 -
> @@ -167,7 +167,7 @@ boolean_t  uvm_map_inentry_fix(struct p
>  * Tree management functions.
>  */
> 
> -static __inline void  uvm_mapent_copy(struct vm_map_entry*,
> +static inline voiduvm_mapent_copy(struct vm_map_entry*,
>

pipex(4): kill NET_LOCK() in pipex_ioctl()

2020-05-27 Thread Vitaliy Makkoveev

pipex(4) is simultaneously protected by KERNEL_LOCK() and NET_LOCK() and
the last is not required. I guess to start remove NET_LOCK(). Diff below
drops NET_LOCK() in pipex_ioctl() and underlaying routines. At least
this helps to kill unlock/lock mess in pppx_add_session() and
pppx_if_destroy().

Index: sys/net/pipex.c
===
RCS file: /cvs/src/sys/net/pipex.c,v
retrieving revision 1.113
diff -u -p -r1.113 pipex.c
--- sys/net/pipex.c 7 Apr 2020 07:11:22 -   1.113
+++ sys/net/pipex.c 27 May 2020 08:46:32 -
@@ -205,7 +205,8 @@ pipex_ioctl(struct pipex_iface_context *
 {
int pipexmode, ret = 0;
 
-   NET_LOCK();
+   KERNEL_ASSERT_LOCKED();
+
switch (cmd) {
case PIPEXSMODE:
pipexmode = *(int *)data;
@@ -250,7 +251,6 @@ pipex_ioctl(struct pipex_iface_context *
ret = ENOTTY;
break;
}
-   NET_UNLOCK();
 
return (ret);
 }
@@ -269,6 +269,8 @@ pipex_add_session(struct pipex_session_r
struct ifnet *over_ifp = NULL;
 #endif
 
+   KERNEL_ASSERT_LOCKED();
+
/* Checks requeted parameters.  */
if (!iface->pipexmode)
return (ENXIO);
@@ -423,7 +425,6 @@ pipex_add_session(struct pipex_session_r
}
 #endif
 
-   NET_ASSERT_LOCKED();
/* commit the session */
if (!in_nullhost(session->ip_address.sin_addr)) {
if (pipex_lookup_by_ip_address(session->ip_address.sin_addr)
@@ -473,7 +474,7 @@ pipex_add_session(struct pipex_session_r
 int
 pipex_notify_close_session(struct pipex_session *session)
 {
-   NET_ASSERT_LOCKED();
+   KERNEL_ASSERT_LOCKED();
session->state = PIPEX_STATE_CLOSE_WAIT;
session->stat.idle_time = 0;

Re: [RFC] pppd: add pipex(4) L2TP control support

2020-05-26 Thread Vitaliy Makkoveev



> On 26 May 2020, at 11:31, Claudio Jeker  wrote:
> 
> [skip]
> 
> Is pppd(8) still using K function declarations? Can we please add new
> functions with ANSI declarations instead and convert the rest as well. 
> Also it looks like something strange is going on with indentation (just
> look at the last function above, it seems lines start with 4 spaces
> instead of 1 tab). Again this could be bad pppd code.

pppd(8) is not KNF compliant.

Re: [PATCH] pipex(4): rework PPP input

2020-05-26 Thread Vitaliy Makkoveev



> On 25 May 2020, at 22:04, Sergey Ryazanov  wrote:
> 
> Hello Vitaliy,
> 
> On Sat, May 23, 2020 at 3:07 PM Vitaliy Makkoveev
>  wrote:
>>> On 23 May 2020, at 13:11, Sergey Ryazanov  wrote:
>>> On Wed, May 20, 2020 at 10:13 PM Vitaliy Makkoveev
>>>  wrote:
>>>> On Wed, May 20, 2020 at 04:08:01AM +0300, Sergey Ryazanov wrote:
>>>>> On Tue, May 19, 2020 at 12:11 PM Vitaliy Makkoveev
>>>>>  wrote:
>>>>>> On Mon, May 04, 2020 at 10:03:40PM +0300, Sergey Ryazanov wrote:
>>>>>>> Split checks from frame accepting with header removing in the common
>>>>>>> PPP input function. This should fix packet capture on a PPP interfaces.
>>>>>> 
>>>>>> Can you describe the problem you fix? As mpi@ pointed to me, reviewers
>>>>>> are stupid and have no telepathy skills :)
>>>>> 
>>>>> When I tried to capture packets on a ppp (4) interface (with pipex
>>>>> activated), I noticed that all the PPP CCP frames were ok, but all the
>>>>> ingress PPP IP frames were mangled, and they did not contain the PPP
>>>>> header at all.
>>>> 
>>>> This time only pppx(4) and pppac(4) have pipex(4) support.
>>> 
>>> Yes, and as I wrote in the first mail, now I am working on ppp(4) &
>>> pipex(4) integration to speed up client side of L2TP.
>> 
>> May be you can share you work? Not for commit, but for feedback.
>> 
> 
> I send a couple of diffs in separate mails. First change is for ppp(4)
> to support pipex(4) acceleration in DL path. Second diff adds a new
> option for pppd to control pipex activation.
> 
> We will need also to update xl2tpd package to teach it how to
> configure pppd for pipex usage. This is quite simple change, but it
> still require some cleanup so I do not publish it yet.
> 
>> For example, each pipex session should have unique pair of `protocol’ and
>> `session_id’. These values are passed from userland. While the only
>> instance of npppd(8) uses pipex(4) this is not the problem. But you
>> introduce the case while pipex(4) will be used by multiple independent
>> userland programs. At least, I have interest how you handle this.
> 
> This should not be a problem here. npppd(8) support server mode only.
> While my work is to implement acceleration for client side of L2TP
> connection.

I guess they can coexist. Also you can have multiple connections to
ppp servers simultaneously.

Re: pppx(4): avoid direct usage of pxi owned session.

2020-05-24 Thread Vitaliy Makkoveev

Sorry about delay.

> On 20 May 2020, at 10:54, Martin Pieuchot  wrote:
> 
> On 14/05/20(Thu) 15:53, Vitaliy Makkoveev wrote:
>> Each `struct pppx_if' holds it's own `pipex_session' and this session is
>> used directly within ifnet's related handlers pppx_if_start() and
>> pppx_if_output().
> 
> I don't see a problem with keeping a reference on a pipex_session inside
> the softc.  Is there one?  If so we might want to get rid of the descriptor
> completely and always use a lookup function.

This diff was for my private question about if_detach(). I was wrong about
if_detach(), so we can drop this diff :)

Since we are going to move pipex(4) internals back, I tried to go in pppac(4)
way. Pseudo-device logic, you pointed, is very similar.

Also as pipex(4) was designed, session should be destroyed by pipex_ioctl()
command PIPEXDSESSION. Corresponding function pipex_close_session() doesn’t
kill session, but places it to queue, and this session will be deleted later
if `pipexinq’ and `pipexoutq’ are empty. This was done to be sure there is
no mbufs in queues with reference to this session. Real pipex(4) destruction
is done by pipex_destroy_session(). pppac(4) hasn’t references to it’s
sessions, so since I tried pppac(4) way did a step to remove reference in
pppx(4) too.

But in fact, pipex_iface_stop() uses direct pipex_destroy_session(). Also
pppx_if_destroy() uses code copypasted from this function. It’s not clean
for me why this is safe.

Also we have disabled in-kernel timer for pppx(4) which wants to destroy
session within pipex(4).

> 
> Always doing lookups might adds an overhead, so it should be understood.

pppac_start() does one search per mbuf within pipex_output(). In my diff
x_if_start() does one search per queue processing. pipex_output()
uses radix tree, pipex_lookup_by_session_id() uses hash.

> 
>> pppx_if_destroy() at first destroys `pipex_session' and calls
>> if_deatch() which can cause context switch. Hypothetically,
>> pppx_if_start() or pppx_if_output() can receive cpu at this moment and
>> start to operate with already destroyed session.
> 
> What do you mean with 'destroyed session'?  If if_detach() sleeps the
> session still exists, it is just no longer linked.  I don't see what bug
> exist in the scenario you're describing.

In fact this code doesn't destroy session, but since it was copypasted
from pipex_destroy_session() (without pool_put()) the meaning of this code
is session destruction.

May be the first step to deduplicate should be to move this code to new
function pipex_session_unlink() and call it within pipex_destroy_session()
and pppx_if_destroy(). Also we can introduce pipex_session_link() for
pipex session creation.

> 
>> I guess the order of `pppx_if' destruction in pppx_if_destroy() is
>> right. If we call if_detach() before `pipex_session' destruction, after
>> context switch caused by this if_detach() this session can be accessed
>> from network stack, but it's own `ifnet' is already destoyed by
>> if_detach().
> 
> if_detach() doesn't destroy anything.  `ifp' is still valid, just no
> longer referenced by the global data structures of the network stack.
> 
> Concerning the order of operations I'd suggest being coherent with
> other pseudo-devices.  So look at vlan_clone_destroy(),
> gre_clone_destroy(), etc for example.
> 
>> Also pppx_add_session() has the same problem: newly created
>> `pipex_session' can start to operate with half initilaized ifnet, so
>> ifnet should be initialized before session creation.
> 
> Currently the KERNEL_LOCK() is protecting all of that, but yes getting
> the order of operations right helps.
> 
>> Ifnet should be already exist outside corresponding `pipex_session'
>> life time. And within ifnet's handlers we should be sure that session is
>> valid.
> 
> If we follow other pseudo-device logic we have indeed first a 'clone'
> operation that creates and `ifp' then an ioctl like SIOCSVNETID which
> links the descriptor to a global list.
> 
> Your diff goes in this direction but misses some of the bits related to
> the creation of interface like if_addgroup(), bpfattach(), etc.  I'd
> suggest you look at net/if_vlan.c :)
> 
>> Diff below drops direct access of this `pipex_session' in ifnet's
>> handlers.
> 
> I don't see the point of this :/
> 
>> In pppx_if_start() we obtain corresponding session by
>> pipex_lookup_by_session_id(). If pppx_if_start() was called in half of
>> pppx_if_destroy(), pipex_lookup_by_session_id() will return NULL.
>> Context switch can't be occured in pppx_if_start().
> 
> The IFF_RUNNING flag should be used to prevent this scenario.  I'd
> suggest imitating the logic in vlan_clone_destroy() in this case and
> bring the interface down as

pppx(4): kill useless rwlock `pppx_ifs_lk'

2020-05-25 Thread Vitaliy Makkoveev

`pppx_ifs_lk' used to protect `pppx_ifs' tree, But this tree is accessed
under KERNL_LOCK() and there is no concurency. Also we don't sleep while
holding this lock. So it's useless, kill it for simplification.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.84
diff -u -p -r1.84 if_pppx.c
--- sys/net/if_pppx.c   18 Apr 2020 04:03:56 -  1.84
+++ sys/net/if_pppx.c   25 May 2020 08:31:20 -
@@ -165,7 +165,6 @@ pppx_if_cmp(const struct pppx_if *a, con
return memcmp(>pxi_key, >pxi_key, sizeof(a->pxi_key));
 }
 
-struct rwlock  pppx_ifs_lk = RWLOCK_INITIALIZER("pppxifs");
 RBT_HEAD(pppx_ifs, pppx_if)pppx_ifs = RBT_INITIALIZER(_ifs);
 RBT_PROTOTYPE(pppx_ifs, pppx_if, pxi_entry, pppx_if_cmp);
 
@@ -620,8 +619,6 @@ pppx_if_next_unit(void)
struct pppx_if *pxi;
int unit = 0;
 
-   rw_assert_wrlock(_ifs_lk);
-
/* this is safe without splnet since we're not modifying it */
do {
int found = 0;
@@ -650,11 +647,9 @@ pppx_if_find(struct pppx_dev *pxd, int s
key.pxik_session_id = session_id;
key.pxik_protocol = protocol;
 
-   rw_enter_read(_ifs_lk);
pxi = RBT_FIND(pppx_ifs, _ifs, (struct pppx_if *));
if (pxi && pxi->pxi_ready == 0)
pxi = NULL;
-   rw_exit_read(_ifs_lk);
 
return pxi;
 }
@@ -828,12 +823,10 @@ pppx_add_session(struct pppx_dev *pxd, s
 #endif
 
/* try to set the interface up */
-   rw_enter_write(_ifs_lk);
unit = pppx_if_next_unit();
if (unit < 0) {
pool_put(pppx_if_pl, pxi);
error = ENOMEM;
-   rw_exit_write(_ifs_lk);
goto out;
}
 
@@ -846,14 +839,12 @@ pppx_add_session(struct pppx_dev *pxd, s
if (RBT_FIND(pppx_ifs, _ifs, pxi) != NULL) {
pool_put(pppx_if_pl, pxi);
error = EADDRINUSE;
-   rw_exit_write(_ifs_lk);
goto out;
}
 
if (RBT_INSERT(pppx_ifs, _ifs, pxi) != NULL)
panic("%s: pppx_ifs modified while lock was held", __func__);
LIST_INSERT_HEAD(>pxd_pxis, pxi, pxi_list);
-   rw_exit_write(_ifs_lk);
 
snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", "pppx", unit);
ifp->if_mtu = req->pr_peer_mru; /* XXX */
@@ -935,9 +926,7 @@ pppx_add_session(struct pppx_dev *pxd, s
} else {
if_addrhooks_run(ifp);
}
-   rw_enter_write(_ifs_lk);
pxi->pxi_ready = 1;
-   rw_exit_write(_ifs_lk);
 
 out:
return (error);
@@ -1038,11 +1027,9 @@ pppx_if_destroy(struct pppx_dev *pxd, st
if_detach(ifp);
NET_LOCK();
 
-   rw_enter_write(_ifs_lk);
if (RBT_REMOVE(pppx_ifs, _ifs, pxi) == NULL)
panic("%s: pppx_ifs modified while lock was held", __func__);
LIST_REMOVE(pxi, pxi_list);
-   rw_exit_write(_ifs_lk);
 
pool_put(pppx_if_pl, pxi);
 }

pipex(4): document struct members locking

2020-05-25 Thread Vitaliy Makkoveev

This time it's not clean which lock is really required to protect
pipex(4) data structures. In fact KERNL_LOCK() and NET_LOCK() are
used simultaneously. It's time to document it.

Only [k] used do mark mutable members due to NET_LOCK() looks be
unnecesary and can be killed in future.

Index: sys/net/pipex.h
===
RCS file: /cvs/src/sys/net/pipex.h,v
retrieving revision 1.21
diff -u -p -r1.21 pipex.h
--- sys/net/pipex.h 24 Jan 2017 10:08:30 -  1.21
+++ sys/net/pipex.h 25 May 2020 10:12:33 -
@@ -183,11 +183,16 @@ extern intpipex_enable;
 
 struct pipex_session;
 
-/* pipex context for a interface. */
+/* pipex context for a interface
+ *
+ * Locks used to protect struct members:   
+ *  I   immutable after creation   
+ *  k   kernel lock  
+ */
 struct pipex_iface_context {
-   struct  ifnet *ifnet_this;  /* outer interface */
-   u_int   pipexmode;  /* pipex mode */
-   /* virtual pipex_session entry for multicast routing */
+   struct  ifnet *ifnet_this;  /* [I] outer interface */
+   u_int   pipexmode;  /* [k] pipex mode */
+   /* [I] virtual pipex_session entry for multicast routing */
struct pipex_session *multicast_session;
 };
 
Index: sys/net/pipex_local.h
===
RCS file: /cvs/src/sys/net/pipex_local.h,v
retrieving revision 1.33
diff -u -p -r1.33 pipex_local.h
--- sys/net/pipex_local.h   6 Apr 2020 12:31:30 -   1.33
+++ sys/net/pipex_local.h   25 May 2020 10:12:33 -
@@ -53,43 +53,50 @@
 #define PIPEX_MPPE_NOLDKEY 64 /* should be power of two */
 #define PIPEX_MPPE_OLDKEYMASK  (PIPEX_MPPE_NOLDKEY - 1)
 
+/*
+ * Locks used to protect struct members:   
+ *  I   immutable after creation   
+ *  k   kernel lock  
+ */
+
 #ifdef PIPEX_MPPE
 /* mppe rc4 key */
 struct pipex_mppe {
-   int16_t stateless:1,/* key change mode */
-   resetreq:1,
+   int16_t stateless:1,/* [I] key change mode */
+   resetreq:1, /* [k] */
reserved:14;
-   int16_t keylenbits; /* key length */
-   int16_t keylen;
-   uint16_t coher_cnt; /* cohency counter */
-   struct  rc4_ctx rc4ctx;
-   u_char master_key[PIPEX_MPPE_KEYLEN];   /* master key of MPPE */
-   u_char session_key[PIPEX_MPPE_KEYLEN];  /* session key of MPPE */
-   u_char (*old_session_keys)[PIPEX_MPPE_KEYLEN];  /* old session keys */
+   int16_t keylenbits; /* [I] key length */
+   int16_t keylen; /* [I] */
+   uint16_t coher_cnt; /* [k] cohency counter */
+   struct  rc4_ctx rc4ctx; /* [k] */
+   u_char master_key[PIPEX_MPPE_KEYLEN];   /* [k] master key of MPPE */
+   u_char session_key[PIPEX_MPPE_KEYLEN];  /* [k] session key of MPPE */
+   u_char (*old_session_keys)[PIPEX_MPPE_KEYLEN];
+   /* [k] old session keys */
 };
 #endif /* PIPEX_MPPE */
 
 #ifdef PIPEX_PPPOE
 struct pipex_pppoe_session {
-   u_intover_ifidx; /* ether interface */
+   u_intover_ifidx;/* [I] ether interface */
 };
 #endif /* PIPEX_PPPOE */
 
 #ifdef PIPEX_PPTP
 struct pipex_pptp_session {
/* sequence number gap between pipex and userland */
-   int32_t snd_gap;/* gap of our sequence */
-   int32_t rcv_gap;/* gap of peer's sequence */
-   int32_t ul_snd_una; /* userland send acked seq */
-
-   uint32_t snd_nxt;   /* send next */
-   uint32_t rcv_nxt;   /* receive next */
-   uint32_t snd_una;   /* send acked sequence */
-   uint32_t rcv_acked; /* recv acked sequence */
-
-   int winsz;  /* windows size */
-   int maxwinsz;   /* max windows size */
-   int peer_maxwinsz;  /* peer's max windows size */
+   int32_t snd_gap;/* [k] gap of our sequence */
+   int32_t rcv_gap;/* [k] gap of peer's sequence */
+   int32_t ul_snd_una; /* [k] userland send acked seq */
+
+   uint32_t snd_nxt;   /* [k] send next */
+   uint32_t rcv_nxt;   /* [k] receive next */
+   uint32_t snd_una;   /* [k] send acked sequence */
+   uint32_t rcv_acked; /* [k] recv acked sequence */
+
+   int winsz;  /* [I] windows size

Re: pipex(4): kill NET_LOCK() in pipex_ioctl()

2020-05-28 Thread Vitaliy Makkoveev

On Thu, May 28, 2020 at 12:26:39PM +0200, Martin Pieuchot wrote:
> On 27/05/20(Wed) 11:54, Vitaliy Makkoveev wrote:
> > pipex(4) is simultaneously protected by KERNEL_LOCK() and NET_LOCK() and
> > the last is not required. I guess to start remove NET_LOCK(). Diff below
> > drops NET_LOCK() in pipex_ioctl() and underlaying routines. At least
> > this helps to kill unlock/lock mess in pppx_add_session() and
> > pppx_if_destroy().
> 
> Getting rid of the NET_LOCK() might indeed help to untangle the current
> locking mess.  However we should aim towards removing the KERNEL_LOCK()
> from, at least, the packet processing path starting with pipexintr().

I guess we can made `pipexoutq' processing NET_LOCK() free. Also
`pipexinq' processing requires a little amount of places where NET_LOCK()
is required. So we can implement special locks for pipex(4).

> 
> ok mpi@
> 
> > Index: sys/net/pipex.c
> > ===
> > RCS file: /cvs/src/sys/net/pipex.c,v
> > retrieving revision 1.113
> > diff -u -p -r1.113 pipex.c
> > --- sys/net/pipex.c 7 Apr 2020 07:11:22 -   1.113
> > +++ sys/net/pipex.c 27 May 2020 08:46:32 -
> > @@ -205,7 +205,8 @@ pipex_ioctl(struct pipex_iface_context *
> >  {
> > int pipexmode, ret = 0;
> >  
> > -   NET_LOCK();
> > +   KERNEL_ASSERT_LOCKED();
> > +
> > switch (cmd) {
> > case PIPEXSMODE:
> > pipexmode = *(int *)data;
> > @@ -250,7 +251,6 @@ pipex_ioctl(struct pipex_iface_context *
> > ret = ENOTTY;
> > break;
> > }
> > -   NET_UNLOCK();
> >  
> > return (ret);
> >  }
> > @@ -269,6 +269,8 @@ pipex_add_session(struct pipex_session_r
> > struct ifnet *over_ifp = NULL;
> >  #endif
> >  
> > +   KERNEL_ASSERT_LOCKED();
> > +
> > /* Checks requeted parameters.  */
> > if (!iface->pipexmode)
> > return (ENXIO);
> > @@ -423,7 +425,6 @@ pipex_add_session(struct pipex_session_r
> > }
> >  #endif
> >  
> > -   NET_ASSERT_LOCKED();
> > /* commit the session */
> > if (!in_nullhost(session->ip_address.sin_addr)) {
> > if (pipex_lookup_by_ip_address(session->ip_address.sin_addr)
> > @@ -473,7 +474,7 @@ pipex_add_session(struct pipex_session_r
> >  int
> >  pipex_notify_close_session(struct pipex_session *session)
> >  {
> > -   NET_ASSERT_LOCKED();
> > +   KERNEL_ASSERT_LOCKED();
> > session->state = PIPEX_STATE_CLOSE_WAIT;
> > session->stat.idle_time = 0;
> > 
>

unlock PF_UNIX sockets

2020-05-28 Thread Vitaliy Makkoveev

socket(2) layer is already protected by solock(). It grabs NET_LOCK()
for inet{,6}(4) sockets, but all other sockets are still under
KERNEL_LOCK().

I guess solock is already placed everythere it's required. Also `struct
file' is already mp-safe. 

Diff below introduces rwlock `unp_lock'. It's like NET_LOCK()'s
`netlock' but for PF_UNIX sockets. This lock protects the whole PF_UNIX
layer. Vnode layer accessed in "connect" "bind" and "desconnect" cases
is still under KERNEL_LOCK().

I'am experimenting with his diff since 19.04.2020 and my test systems,
include Gnome workstaion are stable, without any issue.


Index: sys/kern/uipc_socket2.c
===
RCS file: /cvs/src/sys/kern/uipc_socket2.c,v
retrieving revision 1.104
diff -u -p -r1.104 uipc_socket2.c
--- sys/kern/uipc_socket2.c 11 Apr 2020 14:07:06 -  1.104
+++ sys/kern/uipc_socket2.c 1 May 2020 13:47:11 -
@@ -53,6 +53,8 @@ u_longsb_max = SB_MAX;/* patchable */
 extern struct pool mclpools[];
 extern struct pool mbpool;
 
+extern struct rwlock unp_lock;
+
 /*
  * Procedures to manipulate state flags of socket
  * and do appropriate wakeups.  Normal sequence from the
@@ -282,6 +284,8 @@ solock(struct socket *so)
NET_LOCK();
break;
case PF_UNIX:
+   rw_enter_write(_lock);
+   break;
case PF_ROUTE:
case PF_KEY:
default:
@@ -306,6 +310,8 @@ sounlock(struct socket *so, int s)
NET_UNLOCK();
break;
case PF_UNIX:
+   rw_exit_write(_lock);
+   break;
case PF_ROUTE:
case PF_KEY:
default:
@@ -323,6 +329,8 @@ soassertlocked(struct socket *so)
NET_ASSERT_LOCKED();
break;
case PF_UNIX:
+   rw_assert_wrlock(_lock);
+   break;
case PF_ROUTE:
case PF_KEY:
default:
@@ -335,12 +343,24 @@ int
 sosleep_nsec(struct socket *so, void *ident, int prio, const char *wmesg,
 uint64_t nsecs)
 {
-   if ((so->so_proto->pr_domain->dom_family != PF_UNIX) &&
-   (so->so_proto->pr_domain->dom_family != PF_ROUTE) &&
-   (so->so_proto->pr_domain->dom_family != PF_KEY)) {
-   return rwsleep_nsec(ident, , prio, wmesg, nsecs);
-   } else
-   return tsleep_nsec(ident, prio, wmesg, nsecs);
+   int ret;
+
+   switch (so->so_proto->pr_domain->dom_family) {
+   case PF_INET:
+   case PF_INET6:
+   ret = rwsleep_nsec(ident, , prio, wmesg, nsecs);
+   break;
+   case PF_UNIX:
+   ret = rwsleep_nsec(ident, _lock, prio, wmesg, nsecs);
+   break;
+   case PF_ROUTE:
+   case PF_KEY:
+   default:
+   ret = tsleep_nsec(ident, prio, wmesg, nsecs);
+   break;
+   }
+
+   return ret;
 }
 
 /*
Index: sys/kern/uipc_usrreq.c
===
RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.142
diff -u -p -r1.142 uipc_usrreq.c
--- sys/kern/uipc_usrreq.c  16 Jul 2019 21:41:37 -  1.142
+++ sys/kern/uipc_usrreq.c  1 May 2020 13:47:11 -
@@ -51,10 +51,17 @@
 #include 
 #include 
 #include 
+#include 
 
+/*
+ * Locks used to protect global data and struct members:
+ *  I   immutable after creation
+ *  U   unp_lock
+ */
+struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock");
 
 void   uipc_setaddr(const struct unpcb *, struct mbuf *);
 
-/* list of all UNIX domain sockets, for unp_gc() */
+/* [U] list of all UNIX domain sockets, for unp_gc() */
 LIST_HEAD(unp_head, unpcb) unp_head = LIST_HEAD_INITIALIZER(unp_head);
 
 /*
@@ -62,10 +69,10 @@ LIST_HEAD(unp_head, unpcb) unp_head = LI
  * not received and need to be closed.
  */
 struct unp_deferral {
-   SLIST_ENTRY(unp_deferral)   ud_link;
-   int ud_n;
+   SLIST_ENTRY(unp_deferral)   ud_link;/* [U] */
+   int ud_n;   /* [I] */
/* followed by ud_n struct fdpass */
-   struct fdpass ud_fp[];
+   struct fdpass   ud_fp[];/* [I] */
 };
 
 void   unp_discard(struct fdpass *, int);
@@ -74,7 +81,7 @@ void  unp_scan(struct mbuf *, void (*)(st
 intunp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
 
 struct pool unpcb_pool;
-/* list of sets of files that were sent over sockets that are now closed */
+/* [U] list of sets of files that were sent over sockets that are now closed */
 SLIST_HEAD(,unp_deferral) unp_deferred = SLIST_HEAD_INITIALIZER(unp_deferred);
 
 struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL);
@@ -89,7 +96,7 @@ struct task unp_gc_task = TASK_INITIALIZ
  * need a proper out-of-band
  */
 struct sockaddr sun_noname = {

Re: pppx(4): prevent access to `pxi' being destroyed

2020-05-28 Thread Vitaliy Makkoveev

On Thu, May 28, 2020 at 12:14:43PM +0200, Martin Pieuchot wrote:
> On 26/05/20(Tue) 16:12, Vitaliy Makkoveev wrote:
> > `pppx_if' has `pxi_ready' field used to prevent access to incomplete
> > `pxi'. But we don't prevent access to `pxi' which we destroy.
> > pppx_if_destroy() can sleep so we can grab `pxi' which we already
> > destroying by concurrent thread and cause use-after-free issue. I guess
> > to use `pxi_ready' to prevent access to `pxi' at destroy stage too.
> 
> What about setting this field as first step in pppx_if_destroy()?

This time it's makes sences.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.86
diff -u -p -r1.86 if_pppx.c
--- sys/net/if_pppx.c   26 May 2020 08:02:54 -  1.86
+++ sys/net/if_pppx.c   28 May 2020 12:06:41 -
@@ -1004,6 +1004,7 @@ pppx_if_destroy(struct pppx_dev *pxd, st
struct pipex_session *session;
 
NET_ASSERT_LOCKED();
+   pxi->pxi_ready = 0;
session = >pxi_session;
ifp = >pxi_if;

pppx(4): rework to be clese to pseudo-interfaces

2020-05-29 Thread Vitaliy Makkoveev

This time pppx_add_session() has mixed initialisation order. It starts
to initialise pipex(4) session, then initialises `ifnet', then links
pipex(4) session, then continue to initialize `ifnet'.
pppx_add_session() can sleep and pppx_if_start() can start to work with
unlinked pipex(4) session.

Also pppx_if_destroy() can sleep and pppx_if_start() can access to this
`pxi' with unlinked session. pppx_if_start() has checking of
`IFF_RUNNING' flag but it's usesless.

Diff below does pppx_add_session() reordering. At first we initilize
and attach `ifnet', at second we initialize and link pipex(4) session
and at last we set `IFF_RUNNING' flag on `ifnet.

Also we cleaning `IFF_RUNNING' flag before pppx(4) session destruction
in pppx_if_destroy().

Since we made `ifnet' and pipex(4) session initialization separately, we
are more close to remove duplicated code.


Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.87
diff -u -p -r1.87 if_pppx.c
--- sys/net/if_pppx.c   29 May 2020 06:51:52 -  1.87
+++ sys/net/if_pppx.c   29 May 2020 09:27:47 -
@@ -674,16 +674,111 @@ pppx_add_session(struct pppx_dev *pxd, s
 *  the timeout feature until this is fixed.
 */
if (req->pr_timeout_sec != 0)
-   return (EINVAL);
+   return EINVAL;
+
+   pxi = pool_get(pppx_if_pl, PR_WAITOK | PR_ZERO);
+
+   /* try to set the interface up */
+   unit = pppx_if_next_unit();
+   if (unit < 0) {
+   error = ENOMEM;
+   goto out;
+   }
+
+   pxi->pxi_unit = unit;
+   pxi->pxi_key.pxik_session_id = req->pr_session_id;
+   pxi->pxi_key.pxik_protocol = req->pr_protocol;
+   pxi->pxi_dev = pxd;
+
+   /* this is safe without splnet since we're not modifying it */
+   if (RBT_FIND(pppx_ifs, _ifs, pxi) != NULL) {
+   error = EADDRINUSE;
+   goto out;
+   }
+
+   if (RBT_INSERT(pppx_ifs, _ifs, pxi) != NULL)
+   panic("%s: pppx_ifs modified while lock was held", __func__);
+   LIST_INSERT_HEAD(>pxd_pxis, pxi, pxi_list);
+
+   ifp = >pxi_if;
+   snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", "pppx", unit);
+   ifp->if_mtu = req->pr_peer_mru; /* XXX */
+   ifp->if_flags = IFF_POINTOPOINT | IFF_MULTICAST | IFF_UP;
+   ifp->if_start = pppx_if_start;
+   ifp->if_output = pppx_if_output;
+   ifp->if_ioctl = pppx_if_ioctl;
+   ifp->if_rtrequest = p2p_rtrequest;
+   ifp->if_type = IFT_PPP;
+   IFQ_SET_MAXLEN(>if_snd, 1);
+   ifp->if_softc = pxi;
+   /* ifp->if_rdomain = req->pr_rdomain; */
+
+   /* XXXSMP breaks atomicity */
+   NET_UNLOCK();
+   if_attach(ifp);
+   NET_LOCK();
+
+   if_addgroup(ifp, "pppx");
+   if_alloc_sadl(ifp);
+
+#if NBPFILTER > 0
+   bpfattach(>if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
+#endif
+
+   /* XXX ipv6 support?  how does the caller indicate it wants ipv6
+* instead of ipv4?
+*/
+   memset(, 0, sizeof(ifaddr));
+   ifaddr.sin_family = AF_INET;
+   ifaddr.sin_len = sizeof(ifaddr);
+   ifaddr.sin_addr = req->pr_ip_srcaddr;
+
+   ia = malloc(sizeof (*ia), M_IFADDR, M_WAITOK | M_ZERO);
+
+   ia->ia_addr.sin_family = AF_INET;
+   ia->ia_addr.sin_len = sizeof(struct sockaddr_in);
+   ia->ia_addr.sin_addr = req->pr_ip_srcaddr;
+
+   ia->ia_dstaddr.sin_family = AF_INET;
+   ia->ia_dstaddr.sin_len = sizeof(struct sockaddr_in);
+   ia->ia_dstaddr.sin_addr = req->pr_ip_address;
+
+   ia->ia_sockmask.sin_family = AF_INET;
+   ia->ia_sockmask.sin_len = sizeof(struct sockaddr_in);
+   ia->ia_sockmask.sin_addr = req->pr_ip_netmask;
+
+   ia->ia_ifa.ifa_addr = sintosa(>ia_addr);
+   ia->ia_ifa.ifa_dstaddr = sintosa(>ia_dstaddr);
+   ia->ia_ifa.ifa_netmask = sintosa(>ia_sockmask);
+   ia->ia_ifa.ifa_ifp = ifp;
+
+   ia->ia_netmask = ia->ia_sockmask.sin_addr.s_addr;
+
+   error = in_ifinit(ifp, ia, , 1);
+   if (error) {
+   printf("pppx: unable to set addresses for %s, error=%d\n",
+   ifp->if_xname, error);
+   goto out_detach;
+   }
+
+   if_addrhooks_run(ifp);
+
+   /* fake a pipex interface context */
+   pxi->pxi_ifcontext.ifnet_this = ifp;
+   pxi->pxi_ifcontext.pipexmode = PIPEX_ENABLED;
 
switch (req->pr_protocol) {
 #ifdef PIPEX_PPPOE
case PIPEX_PROTO_PPPOE:
over_ifp = ifunit(req->pr_proto.pppoe.over_ifname);
-   if (over_ifp == NULL)
-   return (EINVAL);
-   if (req->pr_peer_address.ss_family != AF_UNSPEC)
-   return (EINVAL);
+   if (over_ifp == NULL) {
+   error = EINVAL;
+   goto out_detach;
+   }
+   if (req->pr_peer_address.ss_family !=

Re: pipex(4): kill NET_LOCK() in pipex_ioctl()

2020-05-29 Thread Vitaliy Makkoveev

On Fri, May 29, 2020 at 09:09:22AM +0200, Martin Pieuchot wrote:
> On 28/05/20(Thu) 15:27, Vitaliy Makkoveev wrote:
> > On Thu, May 28, 2020 at 12:26:39PM +0200, Martin Pieuchot wrote:
> > > On 27/05/20(Wed) 11:54, Vitaliy Makkoveev wrote:
> > > > pipex(4) is simultaneously protected by KERNEL_LOCK() and NET_LOCK() and
> > > > the last is not required. I guess to start remove NET_LOCK(). Diff below
> > > > drops NET_LOCK() in pipex_ioctl() and underlaying routines. At least
> > > > this helps to kill unlock/lock mess in pppx_add_session() and
> > > > pppx_if_destroy().
> > > 
> > > Getting rid of the NET_LOCK() might indeed help to untangle the current
> > > locking mess.  However we should aim towards removing the KERNEL_LOCK()
> > > from, at least, the packet processing path starting with pipexintr().
> > 
> > I guess we can made `pipexoutq' processing NET_LOCK() free. Also
> > `pipexinq' processing requires a little amount of places where NET_LOCK()
> > is required. So we can implement special locks for pipex(4).
> 
> After second though, it seems that the easiest way forward is to protect
> `pipex_session_list' by the NET_LOCK().
> 
> The rational is that this global list is dereferenced in pipexintr() and
> its members are compared to the content of `ph_cookie'.
> 
> There's currently no mechanism to ensure the reference saved in `ph_cookie'
> is still valid.  That means what ensures the pointer is kind-of correct
> is the NET_LOCK().  I'm saying "kind-of" because comparing pointers is
> questionable, especially if sessions can be destroyed without purging
> `pipexoutq' or `pipexinq'. 
> 
> Since the KERNEL_LOCK() is not always held when the network stack runs,
> `ph_cookie' can be modified by a CPU without holding it.  So what
> actually protects the data structures here is the NET_LOCK().
> 
> Does that make sense?
> 

This time nothing protects `ph_cookie'. I already pointed this issue.

pipex_ppp_enqueue() (net/pipex.c:737) is the only place where we set
`ph_cookie'. But between pipex_ppp_enqueue() and pipexintr() calls we do
context switch where we can kill session.

There are some cases how we can do it:

1. by pipex_ioctl() with PIPEXDSESSION command.
We call pipex_close_session(), it sets sessions state to
PIPEX_STATE_CLOSED and it's all. pipex_timer() will delete this session
by pipex_destroy_session() but only if `pipexinq' and `pipexoutq' are
empty. This check is required to be sure there is no `mbuf' with
`ph_cookie' pointed to destroyed session. This is the only safe way to
destroy session and the only pppac(4) do this but not always.

2. by pipex_iface_stop().
We travel through all sessions and call pipex_destroy_session() with
our sessions. We don't check `pipexinq' and `pipexoutq' state. So,
hypothetically, we can have `mbuf' with `ph_cookie' pointed to session
we killing and this is potencial use-after-free. pppac(4) do this by
pppacclose() which calls pipex_iface_fini() which calls
pipex_iface_stop(). Hypothetically, npppd(8) can be killed while it
has a lot of active connections and some of them can be referenced by
`pipexinq' or `pipexoutq'.

I guess in normal shutdown npppd(8) does pipex_ioctl() with
PIPEXDSESSION command as described in (1) before close correspondind
/dev/pppac and there is no sessions while we do pppacclose().

3. by pppx_if_destroy().
pppx_if has reference to it's own pipex(4) session and this reference
will be killed by pppx_if_destroy() without `pipexinq' and `pipexoutq'
check. Also potential use-after-free. And this is the only way how
pppx(4) can destroy session.

We always setting (and destroying) and procesing `ph_cookie' in different
threads. So we will always drop lock which protects the whole pipex(4)
layer.

I guess the only way to be sure the data referenced by `ph_cookie' is
still valid is to implement reference counters to pipex(4) session and
rework pipex(4) session destruction.

Re: unlock PF_UNIX sockets

2020-05-29 Thread Vitaliy Makkoveev

On Fri, May 29, 2020 at 08:48:14AM +0200, Martin Pieuchot wrote:
> On 28/05/20(Thu) 14:59, Vitaliy Makkoveev wrote:
> > socket(2) layer is already protected by solock(). It grabs NET_LOCK()
> > for inet{,6}(4) sockets, but all other sockets are still under
> > KERNEL_LOCK().
> > 
> > I guess solock is already placed everythere it's required. Also `struct
> > file' is already mp-safe. 
> > 
> > Diff below introduces rwlock `unp_lock'. It's like NET_LOCK()'s
> > `netlock' but for PF_UNIX sockets. This lock protects the whole PF_UNIX
> > layer. Vnode layer accessed in "connect" "bind" and "desconnect" cases
> > is still under KERNEL_LOCK().
> > 
> > I'am experimenting with his diff since 19.04.2020 and my test systems,
> > include Gnome workstaion are stable, without any issue.
> 
> Looks great, more tests are required :)
> 
> Your diff has many trailing spaces, could you remove them?  Commonly
> used editors or diff programs have a way to highlight them :)

Ok I will fix it.

> 
> One comment:
> 
> > Index: sys/kern/uipc_usrreq.c
> > ===
> > RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
> > retrieving revision 1.142
> > diff -u -p -r1.142 uipc_usrreq.c
> > --- sys/kern/uipc_usrreq.c  16 Jul 2019 21:41:37 -  1.142
> > +++ sys/kern/uipc_usrreq.c  1 May 2020 13:47:11 -
> > @@ -409,12 +418,17 @@ unp_detach(struct unpcb *unp)
> >  {
> > struct vnode *vp;
> >  
> > +   rw_assert_wrlock(_lock);
> > +
> > LIST_REMOVE(unp, unp_link);
> > if (unp->unp_vnode) {
> > +   /* this is an unlocked cleaning of `v_socket', but it's safe */
> > unp->unp_vnode->v_socket = NULL;
> > vp = unp->unp_vnode;
> > unp->unp_vnode = NULL;
> > +   KERNEL_LOCK();
> > vrele(vp);
> > +   KERNEL_UNLOCK();
> 
> Why is it safe?  That's what the comment should explain :)  If the code
> doesn't take the lock that implies it is not required.  Why it is not
> required is not obvious.
>

The only place where `v_socket' is accessed outside PF_UNIX layer is
kern/kern_sysctl.c:1143. This is just pointer to integer cast. Pointer
assignment is atomic.

Also we are accessing this `unp' in unp_detach(), unp_bind() and
unp_connect() but they are serialized by `unp_lock'.

This is the last reference to this `unp' and also there is the only
reference in socket layer to `unp_vnode'. Since `unpcb' has no reference
counter we can't add assertion to be sure this is the last reference to
`unp'. We can check reference count of vnode. But since sys_open() and
unp_detach() are not serialized by KERNEL_LOCK(), vn_open() called by
doopenat() will raise `v_usecount' before `v_type' check
(kern/vfs_vnops.c:112). And `vn->v_usecount' in this unp_detach() call
can be "2".

In fact, only vrele(9) should be called under KERNEL_LOCK() to protect
vnode. If we want to put "KASSERT(vn->v_usecount==1)" we should move it
under KERNEL_LOCK() before `v_socket' cleaning to be sure there is no
concurency with sys_open().

Something like this:

if (unp->unp_vnode) {
KERNEL_LOCK();
KASSERT(unp->unp_vnode->v_usecount == 1);
unp->unp_vnode->v_socket = NULL;
vp = unp->unp_vnode;
unp->unp_vnode = NULL;
vrele(vp);
KERNEL_UNLOCK();
}

I will update diff if this is required.

Re: fix pppx(4) with net/ifq.c rev 1.38

2020-05-30 Thread Vitaliy Makkoveev



> On 30 May 2020, at 09:40, David Gwynne  wrote:
> 
> On Mon, May 25, 2020 at 09:44:22AM +0200, Martin Pieuchot wrote:
>> On 23/05/20(Sat) 15:38, Vitaliy Makkoveev wrote:
>>>> On 23 May 2020, at 12:54, Martin Pieuchot  wrote:
>>>> On 22/05/20(Fri) 13:25, Vitaliy Makkoveev wrote:
>>>>> On Fri, May 22, 2020 at 07:57:13AM +1000, David Gwynne wrote:
>>>>>> [...] 
>>>>>> can you try the following diff?
>>>>>> 
>>>>> 
>>>>> I tested this diff and it works for me. But the problem I pointed is
>>>>> about pipex(4) locking.
>>>>> 
>>>>> pipex(4) requires NET_LOCK() be grabbed not only for underlaying
>>>>> ip{,6}_output() but for itself too. But since pppac_start() has
>>>>> unpredictable behavior I suggested to make it predictable [1].
>>>> 
>>>> What needs the NET_LOCK() in their?  We're talking about
>>>> pipex_ppp_output(), right?  Does it really need the NET_LOCK() or
>>>> the KERNEL_LOCK() is what protects those data structures?
>>> 
>>> Yes, about pipex_ppp_output() and pipex_output(). Except
>>> ip{,6}_output() nothing requires NET_LOCK(). As David Gwynne pointed,
>>> they can be replaced by ip{,6}_send().
>> 
>> Locks protect data structures, you're talking about functions, which
>> data structures are serialized by this lock?  I'm questioning whether
>> there is one.
>> 
>>> [...]
>>>> In case of pipex(4) is isn't clear that the NET_LOCK() is necessary.
>>> 
>>> I guess, pipex(4) was wrapped by NET_LOCK() to protect it while it???s
>>> accessed through `pr_input???. Is NET_LOCK() required for this case?
>> 
>> pipex(4) like all the network stack has been wrapped in the NET_LOCK()
>> because it was easy to do.  That means it isn't a concious decision or
>> design.  The fact that pipex(4) code runs under the NET_LOCK() is a side
>> effect of how the rest of the stack evolved.  I'm questioning whether
>> this lock is required there.  In theory it shouldn't.  What is the
>> reality?
> 
> pipex and pppx pre-date the NET_LOCK, which means you can assume
> that any implicit locking was and is done by the KERNEL_LOCK. mpi is
> asking the right questions here.
> 
> As for the ifq maxlen difference between pppx and pppac, that's more
> about when and how quickly they were written more than anything else.
> The IFQ_SET_MAXLEN(>if_snd, 1) in pppx is because that's a way to
> bypass transmit mitigation for pseudo/virtual interfaces. That was the
> only way to do it historically. It is not an elegant hack to keep
> hold of the NET_LOCK over a call to a start routine.
> 
> As a rule of thumb, network interface drivers should not (maybe
> cannot) rely on the NET_LOCK in their if_start handlers. To be
> clear, they should not rely on it being held by the network stack
> when if_start is called because sometimes the stack calls it without
> holding NET_LOCK, and they should not take it because they might
> be called by the stack when it is being held.
> 
> Also, be aware that the ifq machinery makes sure that the start
> routine is not called concurrently or recursively. You can queue
> packets for transmission on an ifq from anywhere in the kernel at
> any time, but only one cpu will run the start routine. Other cpus
> can queue packets while another one is running if_start, but the
> first one ends up responsible for trying to transmit it.
> 
> ifqs also take the KERNEL_LOCK before calling if_start if the interface
> is not marked as IFXF_MPSAFE.
> 
> The summary is that pppx and pppac are not marked as mpsafe so their
> start routines are called with KERNEL_LOCK held. Currently pppx
> accidentally gets NET_LOCK because of the IFQ_SET_MAXLEN, but shouldn't
> rely on it.
> 
> Cheers,
> dlg
> 

Thanks for explanation.
Will you commit diff you posted in this thread?

pppx(4): prevent access to `pxi' being destroyed

2020-05-26 Thread Vitaliy Makkoveev

`pppx_if' has `pxi_ready' field used to prevent access to incomplete
`pxi'. But we don't prevent access to `pxi' which we destroy.
pppx_if_destroy() can sleep so we can grab `pxi' which we already
destroying by concurrent thread and cause use-after-free issue. I guess
to use `pxi_ready' to prevent access to `pxi' at destroy stage too.

Index: sys/net/if_pppx.c
===
RCS file: /cvs/src/sys/net/if_pppx.c,v
retrieving revision 1.84
diff -u -p -r1.84 if_pppx.c
--- sys/net/if_pppx.c   18 Apr 2020 04:03:56 -  1.84
+++ sys/net/if_pppx.c   26 May 2020 12:59:56 -
@@ -594,8 +594,10 @@ pppxclose(dev_t dev, int flags, int mode
 
/* XXX */
NET_LOCK();
-   while ((pxi = LIST_FIRST(>pxd_pxis)))
+   while ((pxi = LIST_FIRST(>pxd_pxis))) {
+   pxi->pxi_ready = 0;
pppx_if_destroy(pxd, pxi);
+   }
NET_UNLOCK();
 
LIST_REMOVE(pxd, pxd_entry);
@@ -951,6 +953,7 @@ pppx_del_session(struct pppx_dev *pxd, s
pxi = pppx_if_find(pxd, req->pcr_session_id, req->pcr_protocol);
if (pxi == NULL)
return (EINVAL);
+   pxi->pxi_ready = 0;
 
req->pcr_stat = pxi->pxi_session.stat;

Re: [PATCH] pipex(4): rework PPP input

2020-05-26 Thread Vitaliy Makkoveev




> On 27 May 2020, at 01:29, Sergey Ryazanov  wrote:
> 
> On Tue, May 26, 2020 at 12:07 PM Vitaliy Makkoveev
>  wrote:
>>> On 25 May 2020, at 22:04, Sergey Ryazanov  wrote:
>>> On Sat, May 23, 2020 at 3:07 PM Vitaliy Makkoveev
>>>  wrote:
>>>> For example, each pipex session should have unique pair of `protocol’ and
>>>> `session_id’. These values are passed from userland. While the only
>>>> instance of npppd(8) uses pipex(4) this is not the problem. But you
>>>> introduce the case while pipex(4) will be used by multiple independent
>>>> userland programs. At least, I have interest how you handle this.
>>> 
>>> This should not be a problem here. npppd(8) support server mode only.
>>> While my work is to implement acceleration for client side of L2TP
>>> connection.
>> 
>> I guess they can coexist. Also you can have multiple connections to
>> ppp servers simultaneously.
> 
> With 16 bits long session id field, according to birthday problem to
> reach 0.9 collision probability I need 549 simultaneous sessions.
> Should I still be worried or I have a time to complete integration
> work and then update UDP  filter for love of the game?

Why do you ask me? Do what you want.

> 
> -- 
> Sergey
>

pipex(4): kill pipexintr()

2020-07-29 Thread Vitaliy Makkoveev

Now pipex(4) is fully covered by NET_LOCK() and this is documented. But
we still have an issue with pipex(4) session itself and I guess it's
time to fix it.

We have `pipexinq' and `pipexoutq' mbuf(9) queues to store mbufs. Each
mbuf(9) passed to these queues stores the pointer to corresponding
session referenced as `m_pkthdr.ph_cookie'. We enqueue incoming mbufs for
pppx(4) and incoming and outgoing mbufs for pppac(4). But we don't
enqueue pppoe related mbufs. After packet was enqueued to corresponding
queue we call schednetisr() which just schedules netisr() to run:

 cut begin 

780 pipex_ppp_enqueue(struct mbuf *m0, struct pipex_session *session,
781 struct mbuf_queue *mq)
782 {
783 m0->m_pkthdr.ph_cookie = session;
784 /* XXX need to support other protocols */
785 m0->m_pkthdr.ph_ppp_proto = PPP_IP;
786 
787 if (mq_enqueue(mq, m0) != 0)
788 return (1);
789 
790 schednetisr(NETISR_PIPEX);
791 
792 return (0);
793 }

 cut end 

Also we have pipex_timer() which should destroy session in safe way, but
it does this only for pppac(4) and only for sessions closed by
`PIPEXDSESSION' command:

 cut begin 

812 pipex_timer(void *ignored_arg)
813 {
/* skip */
846 case PIPEX_STATE_CLOSED:
847 /*
848  * mbuf queued in pipexinq or pipexoutq may have a
849  * refererce to this session.
850  */
851 if (!mq_empty() || !mq_empty())
852 continue;
853 
854 pipex_destroy_session(session);
855 break;

 cut end 

While we destroy sessions through pipex_rele_session() or through
pipex_iface_fini() or through `PIPEXSMODE' command we don't check
`pipexinq' and `pipexoutq' state. This means we can break them.

It's not guaranteed that netisr() will start just after schednetisr()
call. This means we can destroy session, but corresponding mbuf(9) is
stored within `pipexinq' or `pipexoutq'. It's `m_pkthdr.ph_cookie' still
stores pointer to destroyed session and we have use after free issue. I
wonder why we didn't caught panic yet.

I propose to kill `pipexinq', `pipexoutq' and pipexintr(). There is
absolutely no reason them to exist. This should not only fix issue
described above but simplifies code too.

Other ways are to implement reference counters for session or walk
through mbuf(9) queues and kill corresponding mbufs. It doesn't make
sense to go these ways.

Index: lib/libc/sys/sysctl.2
===
RCS file: /cvs/src/lib/libc/sys/sysctl.2,v
retrieving revision 1.40
diff -u -p -r1.40 sysctl.2
--- lib/libc/sys/sysctl.2   17 May 2020 05:48:39 -  1.40
+++ lib/libc/sys/sysctl.2   29 Jul 2020 13:47:40 -
@@ -2033,35 +2033,11 @@ The currently defined variable names are
 .Bl -column "Third level name" "integer" "Changeable" -offset indent
 .It Sy "Third level name" Ta Sy "Type" Ta Sy "Changeable"
 .It Dv PIPEXCTL_ENABLE Ta integer Ta yes
-.It Dv PIPEXCTL_INQ Ta node Ta not applicable
-.It Dv PIPEXCTL_OUTQ Ta node Ta not applicable
 .El
 .Bl -tag -width "123456"
 .It Dv PIPEXCTL_ENABLE
 If set to 1, enable PIPEX processing.
 The default is 0.
-.It Dv PIPEXCTL_INQ Pq Va net.pipex.inq
-Fourth level comprises an array of
-.Vt struct ifqueue
-structures containing information about the PIPEX packet input queue.
-The forth level names for the elements of
-.Vt struct ifqueue
-are the same as described in
-.Li ip.arpq
-in the
-.Dv PF_INET
-section.
-.It Dv PIPEXCTL_OUTQ Pq Va net.pipex.outq
-Fourth level comprises an array of
-.Vt struct ifqueue
-structures containing information about PIPEX packet output queue.
-The forth level names for the elements of
-.Vt struct ifqueue
-are the same as described in
-.Li ip.arpq
-in the
-.Dv PF_INET
-section.
 .El
 .El
 .Ss CTL_VFS
Index: sys/net/if.c
===
RCS file: /cvs/src/sys/net/if.c,v
retrieving revision 1.616
diff -u -p -r1.616 if.c
--- sys/net/if.c24 Jul 2020 18:17:14 -  1.616
+++ sys/net/if.c29 Jul 2020 13:47:44 -
@@ -909,13 +909,6 @@ if_netisr(void *unused)
KERNEL_UNLOCK();
}
 #endif
-#ifdef PIPEX
-   if (n & (1 << NETISR_PIPEX)) {
-   KERNEL_LOCK();
-   pipexintr();
-   KERNEL_UNLOCK();
-   }
-#endif
t |= n;
}
 
Index: sys/net/netisr.h
===
RCS file: /cvs/src/sys/net/netisr.h,v
retrieving revision 1.51
diff -u -p -r1.51 netisr.h
--- sys/net/netisr.h6 Aug 2019 22:57:54 -   1.51
+++ sys/net/netisr.h29 Jul 2020 13:47:44 -
@@ -48,7 +48,6 @@
 #defineNETISR_IPV6 24  /*

Re: pipex(4): kill pipexintr()

2020-08-01 Thread Vitaliy Makkoveev

On Sat, Aug 01, 2020 at 07:44:17PM +0900, YASUOKA Masahiko wrote:
> Hi,
> 
> I'm not sure when it is broken, in old versions, it was assumed the
> pipex queues are empty when pipex_iface_stop() is called.  The problem
> mvs@ found is the assumption is not true any more.
> 
> pipex has a mechanism that delete a session when the queues are empty.
> 
> 819 Static void
> 820 pipex_timer(void *ignored_arg)
> 821 {
> (snip)
> 854 case PIPEX_STATE_CLOSED:
> 855 /*
> 856  * mbuf queued in pipexinq or pipexoutq may 
> have a
> 857  * refererce to this session.
> 858  */
> 859 if (!mq_empty() || 
> !mq_empty())
> 860 continue;
> 861 
> 862 pipex_destroy_session(session);
> 863 break;
> 
> I think using this is better.
> 
> How about this?
> 

Unfortunately your diff is incorrect. It introduces memory leaks and
breaks pppx(4). Also it is incomplete.

We have multiple ways to kill pipex(sessions):

1. pppx(4)

We have `struct pppx_if' which has pointer to corresponding session and
this session is accessed directly within pppx(4) layer. Since we can't
destroy `ppp_if' in pipex(4) layer we can't destroy these sessions by
pipex_timer(). The only way to destroy them is pppx_if_destroy() which:

1. unlink session by pipex_unlink_session()
2. detach corresponding `ifnet' by if_detach()
3. release session by pipex_rele_session() 

It's unsafe because mbuf queues can have references to this session.

2. pppac(4)

We have no direct access to corresponding sessions within pppac(4)
layer. Also there are multiple ways to do this:

1. pipex_ioctl() with `PIPEXSMODE' command. Underlay pipex_iface_stop()
walks through `pipex_session_list' and destroy sessions by
pipex_destroy_session() call. It's unsafe because we don't check queues.

2. pipex_ioctl() with `PIPEXDSESSION'. pipex_close_session() will change
session's  state and pipex_timer() will kill this sessions later. This
is the only safe way.

3. pipex_iface_fini(). The same as `PIPEXSMODE', pipex_iface_stop()
kills sessions, Which is also unsafe. Also we have another use after
free issue:

 cut begin 

pipex_iface_fini(struct pipex_iface_context *pipex_iface)
{
pool_put(_session_pool, pipex_iface->multicast_session);
NET_LOCK();
pipex_iface_stop(pipex_iface);
NET_UNLOCK();
}

 cut end 

`multicast_session' should be protected too. It also can be pushed to
`pipexoutq'. Also since this time pipexintr() and pipex_iface_fini() are
both serialized by KERNEL_LOCK() too we can't destroy `multicast_session'
which is in use by pipexintr(). But when we will drop KERNEL_LOCK()
around pipexintr() we can catch use after free issue here. I already did
diff for move this pool_put() under NET_LOCK(), but it was rejectedi by
mpi@ because:

 cut begin 
pipex_iface_fini() should be called on the last reference of the
descriptor.  So this shouldn't be necessary.  If there's an issue   
with the current order of the operations, we should certainly fix   
it differently.   
 cut end 

So I repeat it again: npppd(8) can be killed in every moment by SIGKILL
or by SIGSEGV and pppacclose() will be called and it will call
pipex_iface_fini(). `multicast_session' can be used in this moment by
pipexintr().

And no locks protect `multicast_session' itself.

The two diffs I proposed in this thread solve problems caused by
pipexintr().

I'm not sure about bottleneck in crypto thread, because we have no
simultaneous execution with pipexintr(), but the second diff which
introduces refcounters does session destruction well, include
`multicast_session'. I found a little issue with second diff which stops
processing multicast packets, so I included updated version below.

> diff --git a/sys/net/pipex.c b/sys/net/pipex.c
> index 2ad7757fee9..6fe14c400bf 100644
> --- a/sys/net/pipex.c
> +++ b/sys/net/pipex.c
> @@ -190,7 +190,7 @@ pipex_iface_stop(struct pipex_iface_context *pipex_iface)
>   LIST_FOREACH_SAFE(session, _session_list, session_list,
>   session_tmp) {
>   if (session->pipex_iface == pipex_iface)
> - pipex_destroy_session(session);
> + pipex_unlink_session(session);

You assumed pipex_timer() the only place where sessions can be released?
pipex_unlink_session() will detach session from all lists, include
`pipex_session_list'. This session is leaked.

Also what to do with multicast sessions?

>   }
>  }
>  
> @@ -470,9 +470,16 @@ pipex_link_session(struct pipex_session *session,
>  void
>  pipex_unlink_session(struct pipex_session *session)
>  {
> + struct radix_node *rn;
> +
>   session->ifindex = 0;
>  
>   NET_ASSERT_LOCKED();
> + if

Re: pipex(4): kill pipexintr()

2020-08-03 Thread Vitaliy Makkoveev

On Tue, Aug 04, 2020 at 01:26:14AM +0900, YASUOKA Masahiko wrote:
> On Sat, 1 Aug 2020 18:52:27 +0300
> Vitaliy Makkoveev  wrote:
> > On Sat, Aug 01, 2020 at 07:44:17PM +0900, YASUOKA Masahiko wrote:
> >> I'm not sure when it is broken, in old versions, it was assumed the
> >> pipex queues are empty when pipex_iface_stop() is called.  The problem
> >> mvs@ found is the assumption is not true any more.
> >> 
> >> pipex has a mechanism that delete a session when the queues are empty.
> >> 
> >> 819 Static void
> >> 820 pipex_timer(void *ignored_arg)
> >> 821 {
> >> (snip)
> >> 854 case PIPEX_STATE_CLOSED:
> >> 855 /*
> >> 856  * mbuf queued in pipexinq or pipexoutq 
> >> may have a
> >> 857  * refererce to this session.
> >> 858  */
> >> 859 if (!mq_empty() || 
> >> !mq_empty())
> >> 860 continue;
> >> 861 
> >> 862 pipex_destroy_session(session);
> >> 863 break;
> >> 
> >> I think using this is better.
> >> 
> >> How about this?
> > 
> > Unfortunately your diff is incorrect. It introduces memory leaks and
> > breaks pppx(4). Also it is incomplete.
> 
> Thank you for your feedbacks.
> 
> > We have multiple ways to kill pipex(sessions):
> > 
> > 1. pppx(4)
> > 
> > We have `struct pppx_if' which has pointer to corresponding session and
> > this session is accessed directly within pppx(4) layer. Since we can't
> > destroy `ppp_if' in pipex(4) layer we can't destroy these sessions by
> > pipex_timer(). The only way to destroy them is pppx_if_destroy() which:
> > 
> > 1. unlink session by pipex_unlink_session()
> > 2. detach corresponding `ifnet' by if_detach()
> > 3. release session by pipex_rele_session() 
> > 
> > It's unsafe because mbuf queues can have references to this session.
> 
> Yes.
> 
> > 2. pppac(4)
> > 
> > We have no direct access to corresponding sessions within pppac(4)
> > layer. Also there are multiple ways to do this:
> > 
> > 1. pipex_ioctl() with `PIPEXSMODE' command. Underlay pipex_iface_stop()
> > walks through `pipex_session_list' and destroy sessions by
> > pipex_destroy_session() call. It's unsafe because we don't check queues.
> > 
> > 2. pipex_ioctl() with `PIPEXDSESSION'. pipex_close_session() will change
> > session's  state and pipex_timer() will kill this sessions later. This
> > is the only safe way.
> > 
> > 3. pipex_iface_fini(). The same as `PIPEXSMODE', pipex_iface_stop()
> > kills sessions, Which is also unsafe. Also we have another use after
> > free issue:
> > 
> >  cut begin 
> > 
> > pipex_iface_fini(struct pipex_iface_context *pipex_iface)
> > {
> > pool_put(_session_pool, pipex_iface->multicast_session);
> > NET_LOCK();
> > pipex_iface_stop(pipex_iface);
> > NET_UNLOCK();
> > }
> > 
> >  cut end 
> > 
> > `multicast_session' should be protected too. It also can be pushed to
> > `pipexoutq'.
> 
> Yes, I missed this point.
> 
> > Also since this time pipexintr() and pipex_iface_fini() are
> > both serialized by KERNEL_LOCK() too we can't destroy `multicast_session'
> > which is in use by pipexintr(). But when we will drop KERNEL_LOCK()
> > around pipexintr() we can catch use after free issue here. I already did
> > diff for move this pool_put() under NET_LOCK(), but it was rejectedi by
> > mpi@ because:
> > 
> >  cut begin 
> > pipex_iface_fini() should be called on the last reference of the
> > 
> > descriptor.  So this shouldn't be necessary.  If there's an issue   
> > 
> > with the current order of the operations, we should certainly fix   
> > 
> > it differently.   
> >  cut end 
> 
> Yes, I understand what mpi@ is saying.  But this is a separate story.
> 
> > So I repeat it again: npppd(8) can be killed in every moment by SIGKILL
> > or by SIGSEGV and pppacclose() will be called and it will call
> > pipex_iface_fini(). `multicast_session' can be used in this moment by
> > pipexintr().
> > 
> > And no locks protect `multicast_session' itself.
> > 
> > The two diffs I proposed in this thr

Re: pipex(4): kill pipexintr()

2020-08-04 Thread Vitaliy Makkoveev

On Tue, Aug 04, 2020 at 01:53:55PM +0900, YASUOKA Masahiko wrote:
> On Mon, 3 Aug 2020 23:36:09 +0300
> Vitaliy Makkoveev  wrote:
> > On Tue, Aug 04, 2020 at 01:26:14AM +0900, YASUOKA Masahiko wrote:
> >> Comments?
> > 
> > You introduce `cookie' as 
> > 
> > cookie = session->protocol << 16 | session->session_id;
> > 
> > also multicast sessions initialized as 
> > 
> > session->protocol = PIPEX_PROTO_NONE;
> > session->session_id = ifindex;
> > 
> > `protocol' and `session_id' come from userland, so I like to have checks
> > like below. It's allow us to avoid `cookie' be broken while
> > `pr_session_id' exceeds 16 bit integer. Also userland should not pass
> > PIPEX_PROTO_NONE as `pr_protocol' because we shouldn't have multicast
> > and not multicast sessions with the same `cookie'.
> > 
> >  cut begin 
> > 
> > pipex_init_session(struct pipex_session **rsession,
> > struct pipex_session_req *req)
> > {
> > if (req->pr_protocol == PIPEX_PROTO_NONE)
> > return (EINVAL);
> 
> pipex_init_session() has the same check already.

My fault. Sorry.

> 
>  287 int
>  288 pipex_init_session(struct pipex_session **rsession,
>  289 struct pipex_session_req *req)
>  290 {
>  (snip)
>  297 switch (req->pr_protocol) {
>  298 #ifdef PIPEX_PPPOE
>  299 case PIPEX_PROTO_PPPOE:
>  (snip)
>  333 default:
>  334 return (EPROTONOSUPPORT);
>  335 }
> 
> > 
> > if (req->pr_session_id > 0x)
> > return (EINVAL);
> > 
> >  cut end 
> 
> req->pr_session_id can't be > 0x since it's uint16_t.
> 
> > Also cookies introduce invalidation problem. Yes, it has low
> > probability, but we can have operation order like below:
> > 
> > 1. enqueue session with `protocol' = 0xaa and `session_id' = 0xbb, and
> > `cookie' = 0xaabb
> > 2. kill this session
> > 3. create new session `protocol' = 0xaa and `session_id' = 0xbb
> > 4. this newly created session will be used by pipexintr()
> > 
> > As I have seen while played with refcounters, session can be enqueued
> > more than 10 times...
> 
> The diff makes the problem worse, but it could happen already if the
> session-id is reused.
> 
> > Also It's not obvious that interface index will never exceed 16 bit
> > counter. It's unsigned int and may be underlay counter's resolution
> > will be expanded in future. So I like to have at least corresponding
> > assertion in pipex_iface_init().
> 
> Right.  This is fixable with another unique number.
> 
> > So, may be my first solution is the best here. And, as mpi@ pointed,
> > ipsec(4) should be reworked to allow parallelism.
> 
> Does first mean killing the pipexintr?

Yes.

> 
> What I explained was wrong.  I'm sorry about this.
> 
> On Fri, 31 Jul 2020 09:36:32 +0900 (JST)
> YASUOKA Masahiko  wrote:
> > A packet of L2TP/IPsec (encapsulated IP/PPP/L2TP/UDP/ESP/UDP/IP) is
> > processed like:
> > 
> >ipv4_input
> >  ...
> >udp_input
> >  ipsec_common_input
> >  esp_input
> >crypto_dispatch
> >  => crypto_taskq_mp_safe
> > 
> >kthread "crynlk"
> >  crypto_invoke
> >... (*1)
> >  crypto_done
> >  esp_input_cb
> >ipsec_common_input_cb
> >  ip_deliver
> >udp_input
> >  pipex_l2tp_input
> >pipex_common_input
> >  (*2)
> >  pipex_ppp_input
> >pipex_mppe_input (*3)
> >  pipex_ppp_input
> >pipex_ip_input
> >  ipv4_input
> >...
> 
> This should be
> 
>kthread "crynlk"
>  crypto_invoke
>... (*1)
>  crypto_done
>kthread "crypto" < another thread
>  ipsec_input_cb < this is missed
>esp_input_cb
>  ipsec_common_input_cb
>ip_deliver
>  udp_input
>pipex_l2tp_input
>  pipex_common_input
>(*2)
>pipex_ppp_input
>  pipex_mppe_input (*3)
>pipex_ppp_input
>  pipex_ip_input
>ipv4_input
>  ...
> 
> > At *2 there was a queue.  "crynlk" is a busy thread, since it is doing
> > decryption at *1.  I think it's better pipex input is be done by
> > another thread than crypto since it also has decryption at *3.
> 
> This is false.  *3 is done by another thread.
> It is the same if crypto driver is not CRYPTOCAP_F_MPSAFE.
> (crypto_invoke() is done by the caller's thread and the callback
>  (ipsec_input_cb) is called by"crypto" thread.)
> 
> So I have no actual reason to keep the queues.
> 
> ok yasuoka for the diff which kills pipexintr.
>

Thanks for explanation and reviews.

describe 'idle-timeout' exception in npppd.conf man page

2020-08-07 Thread Vitaliy Makkoveev

Some times ago we disabled in-kernel timeout for pppx(4) related
pipex(4) sessions. We did this for prevent use after free issue caused
by pipex_timer [1]. By default "idle-timeout" is not set in
npppd.conf(5) and I guess this is reason for we forgot to describe this
exception in npppd.conf(5).

But looks like one user caught this [2]. So I propose to describe this
in BUGS section of npppd.conf(5).

Also current "idle-timeout" description looks incorrect. If this option
is missing, there is not in-kernel timeout for this session, but
npppd(8) uses it's own timeout for. And we can't configure this value.

YASUOKA, what do you think? May be we can kill in-kernel timeout feature
for pipex(4)?, and make npppd(8)'s idle timeout configurable by this
option?

1. 
https://cvsweb.openbsd.org/src/sys/net/if_pppx.c?rev=1.78=text/x-cvsweb-markup
2. https://marc.info/?l=openbsd-misc=159655468504864=2 


Index: usr.sbin/npppd/npppd/npppd.conf.5
===
RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd.conf.5,v
retrieving revision 1.27
diff -u -p -r1.27 npppd.conf.5
--- usr.sbin/npppd/npppd/npppd.conf.5   23 Apr 2020 21:10:54 -  1.27
+++ usr.sbin/npppd/npppd/npppd.conf.5   7 Aug 2020 19:17:00 -
@@ -699,3 +699,9 @@ The current version of
 .Xr npppd 8
 does not support adding or removing tunnel settings or changing listener
 settings (listen address, port and l2tp-ipsec-require).
+.Pp
+This time
+.Xr pppx 4
+does not allow to create sessions with non null
+.Ic idle-timeout
+option.

Re: pipex "idle-timeout" work with pppx(4).

2020-08-10 Thread Vitaliy Makkoveev

On Mon, Aug 10, 2020 at 03:12:02PM +0900, YASUOKA Masahiko wrote:
> Hi,
> 
> Thank you for your review.
> 
> On Sun, 9 Aug 2020 20:03:50 +0300
> Vitaliy Makkoveev  wrote:
> > On Sun, Aug 09, 2020 at 06:20:13PM +0300, Vitaliy Makkoveev wrote:
> >> You propose to unlink pppx(4) related session which reached timeout. I'm
> >> ok with this direction. But I see no reason to rework _get_closed()
> >> routines.
> >> 
> >> in pppac(4) case it's assumed what if session is not yet destroyed by
> >> garbage collector, it will be destroyed while we performing PIPEXGCLOSED
> >> command. We can make pppx(4) behavior the same and I propose to
> >> pppx_get_closed() be like below. 
> >> 
> >> Also, nothing requires to modify pipex_get_closed(). 
> >> 
> >>  cut begin 
> > 
> > Sorry, I mean
> > 
> > pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> > {
> > struct pppx_if  *pxi;
> > 
> > memset(req, 0, sizeof(*req));
> > 
> > while ((pxi = LIST_FIRST(>pxd_pxis))) {
> > if (pxi->pxi_session->state == session->state =
> > PIPEX_STATE_CLOSED) {
> > req->plr_ppp_id[req->plr_ppp_id_count++] =
> > pxi->pxi_session->ppp_id;
> > pppx_if_destroy(pxi);
> > }
> > }
> > 
> > return 0;
> > }
> 
> Yes, the diff doesn't seem to be completed but this way also will work.
> 
> Usually there is few CLOSED session even if there is a lot of session.
> Also there is no CLOSED session if idle-timeout is not configured.  I
> avoided that way because I think checking all sessions' state to find
> such the few sessions is too expensive.
> 
> A way I am suggesting:
> 
> @@ -622,7 +625,7 @@ pipex_get_stat(struct pipex_session_stat
>  
>  Static int
>  pipex_get_closed(struct pipex_session_list_req *req,
> -struct pipex_iface_context *iface)
> +int (*isowner)(void *, struct pipex_session *), void *ctx)
>  {
>   struct pipex_session *session, *session_tmp;
>  
> @@ -630,7 +633,7 @@ pipex_get_closed(struct pipex_session_li
>   bzero(req, sizeof(*req));
>   LIST_FOREACH_SAFE(session, _close_wait_list, state_list,
>   session_tmp) {
> - if (session->pipex_iface != iface)
> + if (!isowner(ctx, session))
>   continue;
>   req->plr_ppp_id[req->plr_ppp_id_count++] = session->ppp_id;
>   LIST_REMOVE(session, state_list);
> 
> uses pipex_close_wait_list which contains only sessions which is timed
> out.

You are right. pipex_get_closed() walks through `pipex_close_wait_list'
which contains only CLOSE_WAIT sessions.

According to npppd(8) code we do PIPEXGCLOSED related walkthrough once
per NPPPD_TIMER_TICK_IVAL seconds, which is defined as 4. Is this such
performance impact?

Also who should destroy these sessions? It's assumed npppd(8) will
destroy them by l2tp_ctrl_timeout() and pptp_ctrl_timeout()? Excuse me
if I'm wrong, but who will destroy sessions in pppoe case?

> 
> >> Also I have one inlined comment within your diff. 
> 
> >> > @@ -430,6 +425,7 @@ pipex_link_session(struct pipex_session 
> >> >  struct pipex_iface_context *iface)
> >> >  {
> >> >  struct pipex_hash_head *chain;
> >> > +struct ifnet *ifp;
> >> >  
> >> >  NET_ASSERT_LOCKED();
> >> >  
> >> > @@ -442,6 +438,11 @@ pipex_link_session(struct pipex_session 
> >> >  session->pipex_iface = iface;
> >> >  session->ifindex = iface->ifindex;
> >> >  
> >> > +ifp = if_get(iface->ifindex);
> >> > +if (ifp != NULL && ifp->if_flags & IFF_POINTOPOINT)
> >> > +session->is_p2p = 1;
> >> > +if_put(ifp);
> >> > +
> >> 
> >> I guess NULL `ifp' here exposes us a bug. I like to have assertion here.
> 
> ok, I agree here.
> 
> 
> The diff is updated.
> 
> Index: sys/net/if_pppx.c
> ===
> RCS file: /cvs/src/sys/net/if_pppx.c,v
> retrieving revision 1.98
> diff -u -p -r1.98 if_pppx.c
> --- sys/net/if_pppx.c 28 Jul 2020 09:53:36 -  1.98
> +++ sys/net/if_pppx.c 10 Aug 2020 06:09:52 -
> @@ -185,6 +185,7 @@ int   pppx_config_session(struct pppx_dev
>   struct pipex_session_config_req *);
>

Re: pipex "idle-timeout" work with pppx(4).

2020-08-11 Thread Vitaliy Makkoveev

On Wed, Aug 12, 2020 at 12:37:13AM +0900, YASUOKA Masahiko wrote:
> Hi,
> 
> On Mon, 10 Aug 2020 16:30:27 +0300
> Vitaliy Makkoveev  wrote:
> > On Mon, Aug 10, 2020 at 03:12:02PM +0900, YASUOKA Masahiko wrote:
> >> On Sun, 9 Aug 2020 20:03:50 +0300
> >> Vitaliy Makkoveev  wrote:
> >> > On Sun, Aug 09, 2020 at 06:20:13PM +0300, Vitaliy Makkoveev wrote:
> >> >> You propose to unlink pppx(4) related session which reached timeout. I'm
> >> >> ok with this direction. But I see no reason to rework _get_closed()
> >> >> routines.
> >> >> 
> >> >> in pppac(4) case it's assumed what if session is not yet destroyed by
> >> >> garbage collector, it will be destroyed while we performing PIPEXGCLOSED
> >> >> command. We can make pppx(4) behavior the same and I propose to
> >> >> pppx_get_closed() be like below. 
> >> >> 
> >> >> Also, nothing requires to modify pipex_get_closed(). 
> >> >> 
> >> >>  cut begin 
> >> > 
> >> > Sorry, I mean
> >> > 
> >> > pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> >> > {
> >> >  struct pppx_if  *pxi;
> >> > 
> >> >  memset(req, 0, sizeof(*req));
> >> > 
> >> >  while ((pxi = LIST_FIRST(>pxd_pxis))) {
> >> >  if (pxi->pxi_session->state == session->state =
> >> >  PIPEX_STATE_CLOSED) {
> >> >  req->plr_ppp_id[req->plr_ppp_id_count++] =
> >> >  pxi->pxi_session->ppp_id;
> >> >  pppx_if_destroy(pxi);
> >> >  }
> >> >  }
> >> > 
> >> >  return 0;
> >> > }
> >> 
> >> Yes, the diff doesn't seem to be completed but this way also will work.
> >> 
> >> Usually there is few CLOSED session even if there is a lot of session.
> >> Also there is no CLOSED session if idle-timeout is not configured.  I
> >> avoided that way because I think checking all sessions' state to find
> >> such the few sessions is too expensive.
> >> 
> >> A way I am suggesting:
> >> 
> >> @@ -622,7 +625,7 @@ pipex_get_stat(struct pipex_session_stat
> >>  
> >>  Static int
> >>  pipex_get_closed(struct pipex_session_list_req *req,
> >> -struct pipex_iface_context *iface)
> >> +int (*isowner)(void *, struct pipex_session *), void *ctx)
> >>  {
> >>struct pipex_session *session, *session_tmp;
> >>  
> >> @@ -630,7 +633,7 @@ pipex_get_closed(struct pipex_session_li
> >>bzero(req, sizeof(*req));
> >>LIST_FOREACH_SAFE(session, _close_wait_list, state_list,
> >>session_tmp) {
> >> -  if (session->pipex_iface != iface)
> >> +  if (!isowner(ctx, session))
> >>continue;
> >>req->plr_ppp_id[req->plr_ppp_id_count++] = session->ppp_id;
> >>LIST_REMOVE(session, state_list);
> >> 
> >> uses pipex_close_wait_list which contains only sessions which is timed
> >> out.
> > 
> > You are right. pipex_get_closed() walks through `pipex_close_wait_list'
> > which contains only CLOSE_WAIT sessions.
> > 
> > According to npppd(8) code we do PIPEXGCLOSED related walkthrough once
> > per NPPPD_TIMER_TICK_IVAL seconds, which is defined as 4. Is this such
> > performance impact?
> 
> It might be not so expensive for you.  But why do you intend to use
> that extra CPU when you have a cheaper way?

Please don't assume my objections like this. Like you, I want to keep
pppac(4) and pppx(4) close as possible.

Let me explain the reason of my objection versus your diff.

We have 2 different cases to destroy pipex(4) session:

1. pppx(4). We just destroy session by PIPEXDSESSION command. We can't
permit this session to be killed by pipex_timer().

2. pppac(4). While we performing PIPEXDSESSION command we mark session
as PIPEX_STATE_CLOSE_WAIT2 and assume that pipex_timer() will kill it.
Also pipex_timer() will always kill expired sessions.

Your diff kept pppac(4) behavior a but introduce new case for pppx(4):
expired sessions will still exist in unlinked state. Userland should do
garbage collecting. I was afraid these sessions will be not killed until
npppd(8) shutdown.

I looked to npppd(8) code, but it was no obvious to me that userland
should do garbage collecting. My fault, I had to be asked before. But
now you explained, it's assumed

Re: pipex "idle-timeout" work with pppx(4).

2020-08-11 Thread Vitaliy Makkoveev

On Wed, Aug 12, 2020 at 01:36:38AM +0900, YASUOKA Masahiko wrote:
> 
> my diff is to make pppx(4) have the same "idle-timeout"
> functionality.  I strongly think pppx(4) must have the same
> functionalities of pppac(4) because I don't see any reason to have
> any difference between pppx(4) and pppac(4).
>

Yes, I want the same :)

> Your pseudo code is suggesting another thing.  You would like to
> change the existing behavior of pppac(4)?  Then, what is a problem you
> concern.  I'd like you to provide what is the relation of my diff or a
> background of the code.
>

We have the differences with in-kernel garbage collecting. I was
afraid we could have the case while expired pppx(4) sessions will not be
killed until npppd(8) shutdown. In your previous mail [1] you explained
about npppd(8) garbage collecting. I hope you like my solution I
proposed in [2]

1. https://marc.info/?l=openbsd-tech=159716033115495=2
2. https://marc.info/?l=openbsd-tech=159717643020853=2

Re: pipex "idle-timeout" work with pppx(4).

2020-08-10 Thread Vitaliy Makkoveev

We are doing all wrong :)

We can just unlink pppx(4) related session from `pipex_session_list' if
it's time expired. But since this unlinked session is still exists in
pppx(4) layer we can access through pppx_get_closed() without any
search. We should only add flag to session which identifies it as
pppx(4) related.

I hope you like this idea.

 cut begin 
Static void
pipex_timer(void *ignored_arg)
{
struct pipex_session *session, *session_tmp;

timeout_add_sec(_timer_ch, pipex_prune);

NET_LOCK();
/* walk through */
LIST_FOREACH_SAFE(session, _session_list, session_list,
session_tmp) {
switch (session->state) {
case PIPEX_STATE_OPENED:
if (session->timeout_sec == 0)
continue;

session->stat.idle_time++;
if (session->stat.idle_time < session->timeout_sec)
continue;

if (session->pppx_session)
pipex_unlink_session(session);
else
pipex_notify_close_session(session);
break;
/* ... */
}

pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
{
struct pppx_if *pxi;

pxi = pppx_if_find(pxd, req->pdr_session_id, req->pdr_protocol);
if (pxi == NULL)
return (EINVAL);

memset(req, 0, sizeof(*req));
if (session->state == PIPEX_STATE_CLOSED) {
req->plr_ppp_id[req->plr_ppp_id_count++] = session->ppp_id;
pppx_if_destroy(pxi);   
}

return 0;
}

 cut end 


On Mon, Aug 10, 2020 at 04:30:27PM +0300, Vitaliy Makkoveev wrote:
> On Mon, Aug 10, 2020 at 03:12:02PM +0900, YASUOKA Masahiko wrote:
> > Hi,
> > 
> > Thank you for your review.
> > 
> > On Sun, 9 Aug 2020 20:03:50 +0300
> > Vitaliy Makkoveev  wrote:
> > > On Sun, Aug 09, 2020 at 06:20:13PM +0300, Vitaliy Makkoveev wrote:
> > >> You propose to unlink pppx(4) related session which reached timeout. I'm
> > >> ok with this direction. But I see no reason to rework _get_closed()
> > >> routines.
> > >> 
> > >> in pppac(4) case it's assumed what if session is not yet destroyed by
> > >> garbage collector, it will be destroyed while we performing PIPEXGCLOSED
> > >> command. We can make pppx(4) behavior the same and I propose to
> > >> pppx_get_closed() be like below. 
> > >> 
> > >> Also, nothing requires to modify pipex_get_closed(). 
> > >> 
> > >>  cut begin 
> > > 
> > > Sorry, I mean
> > > 
> > > pppx_get_closed(struct pppx_dev *pxd, struct pipex_session_list_req *req)
> > > {
> > >   struct pppx_if  *pxi;
> > > 
> > >   memset(req, 0, sizeof(*req));
> > > 
> > >   while ((pxi = LIST_FIRST(>pxd_pxis))) {
> > >   if (pxi->pxi_session->state == session->state =
> > >   PIPEX_STATE_CLOSED) {
> > >   req->plr_ppp_id[req->plr_ppp_id_count++] =
> > >   pxi->pxi_session->ppp_id;
> > >   pppx_if_destroy(pxi);
> > >   }
> > >   }
> > > 
> > >   return 0;
> > > }
> > 
> > Yes, the diff doesn't seem to be completed but this way also will work.
> > 
> > Usually there is few CLOSED session even if there is a lot of session.
> > Also there is no CLOSED session if idle-timeout is not configured.  I
> > avoided that way because I think checking all sessions' state to find
> > such the few sessions is too expensive.
> > 
> > A way I am suggesting:
> > 
> > @@ -622,7 +625,7 @@ pipex_get_stat(struct pipex_session_stat
> >  
> >  Static int
> >  pipex_get_closed(struct pipex_session_list_req *req,
> > -struct pipex_iface_context *iface)
> > +int (*isowner)(void *, struct pipex_session *), void *ctx)
> >  {
> > struct pipex_session *session, *session_tmp;
> >  
> > @@ -630,7 +633,7 @@ pipex_get_closed(struct pipex_session_li
> > bzero(req, sizeof(*req));
> > LIST_FOREACH_SAFE(session, _close_wait_list, state_list,
> > session_tmp) {
> > -   if (session->pipex_iface != iface)
> > +   if (!isowner(ctx, session))
> > continue;
> > req->plr_ppp_id[req->plr_ppp_id_count++] = session->ppp_id;
> > LIST_REMOVE(session, state

1 2 3 4 5 6 7 8 >

1 - 100 of 774 matches

Mail list logo