Re: bgpd pftable change

2020-11-14 Thread David Gwynne
I've been using this for a week or so now and it's been very boring, which is 
an improvement in my experience.

I has my ok if that has any value.

dlg

> On 9 Nov 2020, at 8:16 pm, Claudio Jeker  wrote:
> 
> Hi bgpd and esp. bgpd-spamd users,
> 
> Currently the pftable code does not keep track how often a prefix was
> added to a pftable. Because of this using the same pftable for multiple
> neighbor tables does not work well. If one neighbor withdraws a route the
> pftable entry is removed from the table no matter if the prefix is still
> available from the other neighbor.
> 
> This diff changes this behaviour and introduces proper reference counting.
> A pftable entry will now remain in the table until the last neighbor
> withdraws the prefix. This makes much more sense and should not break
> working setups. It will fix configs where more than one neighbor feeds
> into the bgpd pftable.
> 
> As a side-effect bgpd will no commit pftable updates on a more regular
> basis and not wait until some operations (full table walk) finished. This
> should result in better responsiveness of updates.
> 
> Please test :)
> -- 
> :wq Claudio
> 
> Index: rde.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> retrieving revision 1.506
> diff -u -p -r1.506 rde.c
> --- rde.c 5 Nov 2020 14:44:59 -   1.506
> +++ rde.c 9 Nov 2020 10:06:51 -
> @@ -69,6 +69,7 @@ void rde_dump_ctx_terminate(pid_t);
> void   rde_dump_mrt_new(struct mrt *, pid_t, int);
> 
> intrde_l3vpn_import(struct rde_community *, struct l3vpn *);
> +static void   rde_commit_pftable(void);
> void   rde_reload_done(void);
> static voidrde_softreconfig_in_done(void *, u_int8_t);
> static voidrde_softreconfig_out_done(void *, u_int8_t);
> @@ -296,6 +297,8 @@ rde_main(int debug, int verbose)
>   for (aid = AID_INET6; aid < AID_MAX; aid++)
>   rde_update6_queue_runner(aid);
>   }
> + /* commit pftable once per poll loop */
> + rde_commit_pftable();
>   }
> 
>   /* do not clean up on shutdown on production, it takes ages. */
> @@ -497,8 +500,6 @@ badnetdel:
>   RDE_RUNNER_ROUNDS, peerself, network_flush_upcall,
>   NULL, NULL) == -1)
>   log_warn("rde_dispatch: IMSG_NETWORK_FLUSH");
> - /* Deletions were performed in network_flush_upcall */
> - rde_send_pftable_commit();
>   break;
>   case IMSG_FILTER_SET:
>   if (imsg.hdr.len - IMSG_HEADER_SIZE !=
> @@ -1389,7 +1390,6 @@ rde_update_dispatch(struct rde_peer *pee
> 
> done:
>   rde_filterstate_clean();
> - rde_send_pftable_commit();
> }
> 
> int
> @@ -2950,10 +2950,31 @@ rde_update6_queue_runner(u_int8_t aid)
> /*
>  * pf table specific functions
>  */
> +struct rde_pftable_node {
> + RB_ENTRY(rde_pftable_node)   entry;
> + struct pt_entry *prefix;
> + int  refcnt;
> + u_int16_tid;
> +};
> +RB_HEAD(rde_pftable_tree, rde_pftable_node);
> +
> +static inline int
> +rde_pftable_cmp(struct rde_pftable_node *a, struct rde_pftable_node *b)
> +{
> + if (a->prefix > b->prefix)
> + return 1;
> + if (a->prefix < b->prefix)
> + return -1;
> + return (a->id - b->id);
> +}
> +
> +RB_GENERATE_STATIC(rde_pftable_tree, rde_pftable_node, entry, 
> rde_pftable_cmp);
> +
> +struct rde_pftable_tree pftable_tree = RB_INITIALIZER(_tree);
> int need_commit;
> -void
> -rde_send_pftable(u_int16_t id, struct bgpd_addr *addr,
> -u_int8_t len, int del)
> +
> +static void
> +rde_pftable_send(u_int16_t id, struct pt_entry *pt, int del)
> {
>   struct pftable_msg pfm;
> 
> @@ -2966,8 +2987,8 @@ rde_send_pftable(u_int16_t id, struct bg
> 
>   bzero(, sizeof(pfm));
>   strlcpy(pfm.pftable, pftable_id2name(id), sizeof(pfm.pftable));
> - memcpy(, addr, sizeof(pfm.addr));
> - pfm.len = len;
> + pt_getaddr(pt, );
> + pfm.len = pt->prefixlen;
> 
>   if (imsg_compose(ibuf_main,
>   del ? IMSG_PFTABLE_REMOVE : IMSG_PFTABLE_ADD,
> @@ -2978,7 +2999,55 @@ rde_send_pftable(u_int16_t id, struct bg
> }
> 
> void
> -rde_send_pftable_commit(void)
> +rde_pftable_add(u_int16_t id, struct prefix *p)
> +{
> + struct rde_pftable_node *pfn, node;
> +
> + memset(, 0, sizeof(node));
> + node.prefix = p->pt;
> + node.id = id;
> +
> + pfn = RB_FIND(rde_pftable_tree, _tree, );
> + if (pfn == NULL) {
> + if ((pfn = calloc(1, sizeof(*pfn))) == NULL)
> + fatal("%s", __func__);
> + pfn->prefix = pt_ref(p->pt);
> + pfn->id = id;
> +
> + if (RB_INSERT(rde_pftable_tree, _tree, pfn) != NULL)
> + fatalx("%s: 

Re: Convert sysctl_sysvsem to sysctl_bounded_args

2020-11-14 Thread Greg Steuck
Greg Steuck  writes:

> I went a tiny bit beyond pure textual conversion and moved a bit of
> code.

Testing log for giggles:

% doas sysctl -w kern.seminfo.semmnu=30
kern.seminfo.semmnu: 30 -> 30
% doas sysctl -w kern.seminfo.semmnu=29
sysctl: kern.seminfo.semmnu: Invalid argument
% doas sysctl -w kern.seminfo.semmnu=30
kern.seminfo.semmnu: 30 -> 30
% doas sysctl -w kern.seminfo.semmnu=31
kern.seminfo.semmnu: 30 -> 31
% doas sysctl -w kern.seminfo.semmnu=30
sysctl: kern.seminfo.semmnu: Invalid argument
% doas sysctl kern.seminfo
kern.seminfo.semmni=10
kern.seminfo.semmns=60
kern.seminfo.semmnu=31
kern.seminfo.semmsl=60
kern.seminfo.semopm=100
kern.seminfo.semume=10
kern.seminfo.semusz=112
kern.seminfo.semvmx=32767
kern.seminfo.semaem=16384
% doas sysctl kern.seminfo.semmni=10
kern.seminfo.semmni: 10 -> 10
% doas sysctl kern.seminfo.semmni=9
sysctl: kern.seminfo.semmni: Invalid argument
% doas sysctl kern.seminfo.semmni=11
kern.seminfo.semmni: 10 -> 11
% doas sysctl kern.seminfo.semmni=10
sysctl: kern.seminfo.semmni: Invalid argument
% doas sysctl kern.seminfo
kern.seminfo.semmni=11
kern.seminfo.semmns=60
kern.seminfo.semmnu=31
kern.seminfo.semmsl=60
kern.seminfo.semopm=100
kern.seminfo.semume=10
kern.seminfo.semusz=112
kern.seminfo.semvmx=32767
kern.seminfo.semaem=16384
% doas sysctl -w kern.seminfo.semusz=112
sysctl: kern.seminfo.semusz: Operation not permitted
% doas sysctl -w kern.seminfo.semmns=65536
sysctl: kern.seminfo.semmns: Invalid argument
% doas sysctl -w kern.seminfo.semmns=65535
kern.seminfo.semmns: 60 -> 65535



Convert sysctl_sysvsem to sysctl_bounded_args

2020-11-14 Thread Greg Steuck
I went a tiny bit beyond pure textual conversion and moved a bit of code.

OK?

>From d8ace6480e76e948631a40bcb8fa812366b82384 Mon Sep 17 00:00:00 2001
From: Greg Steuck 
Date: Sat, 14 Nov 2020 20:55:14 -0800
Subject: [PATCH 3/3] Convert sysctl_sysvsem to sysctl_bounded_args

Used sysctl_int_bounded in many places to shrink code.
Extracted a new function to make the case tidy.
Removed some superflous fluff.
---
 sys/kern/sysv_sem.c | 127 
 1 file changed, 47 insertions(+), 80 deletions(-)

diff --git sys/kern/sysv_sem.c sys/kern/sysv_sem.c
index 8425888ccea..feddabe9550 100644
--- sys/kern/sysv_sem.c
+++ sys/kern/sysv_sem.c
@@ -838,6 +838,35 @@ semexit(struct process *pr)
semutot--;
 }
 
+/* Expand semsegs and semseqs arrays */
+void
+sema_reallocate(int val)
+{
+   struct semid_ds **sema_new;
+   unsigned short *newseqs;
+   sema_new = mallocarray(val, sizeof(struct semid_ds *),
+   M_SEM, M_WAITOK|M_ZERO);
+   memcpy(sema_new, sema,
+   seminfo.semmni * sizeof(struct semid_ds *));
+   newseqs = mallocarray(val, sizeof(unsigned short), M_SEM,
+   M_WAITOK|M_ZERO);
+   memcpy(newseqs, semseqs,
+   seminfo.semmni * sizeof(unsigned short));
+   free(sema, M_SEM, seminfo.semmni * sizeof(struct semid_ds *));
+   free(semseqs, M_SEM, seminfo.semmni * sizeof(unsigned short));
+   sema = sema_new;
+   semseqs = newseqs;
+   seminfo.semmni = val;
+}
+
+const struct sysctl_bounded_args sysvsem_vars[] = {
+   { KERN_SEMINFO_SEMUME, , 1, 0 },
+   { KERN_SEMINFO_SEMUSZ, , 1, 0 },
+   { KERN_SEMINFO_SEMVMX, , 1, 0 },
+   { KERN_SEMINFO_SEMAEM, , 1, 0 },
+   { KERN_SEMINFO_SEMOPM, , 1, INT_MAX },
+};
+
 /*
  * Userland access to struct seminfo.
  */
@@ -846,97 +875,35 @@ sysctl_sysvsem(int *name, u_int namelen, void *oldp, 
size_t *oldlenp,
void *newp, size_t newlen)
 {
int error, val;
-   struct semid_ds **sema_new;
-   unsigned short *newseqs;
 
-   if (namelen != 2) {
-   switch (name[0]) {
-   case KERN_SEMINFO_SEMMNI:
-   case KERN_SEMINFO_SEMMNS:
-   case KERN_SEMINFO_SEMMNU:
-   case KERN_SEMINFO_SEMMSL:
-   case KERN_SEMINFO_SEMOPM:
-   case KERN_SEMINFO_SEMUME:
-   case KERN_SEMINFO_SEMUSZ:
-   case KERN_SEMINFO_SEMVMX:
-   case KERN_SEMINFO_SEMAEM:
-   break;
-   default:
-return (ENOTDIR);   /* overloaded */
-}
-}
+   if (namelen != 1)
+return (ENOTDIR);   /* leaf-only */
 
switch (name[0]) {
case KERN_SEMINFO_SEMMNI:
val = seminfo.semmni;
-   if ((error = sysctl_int(oldp, oldlenp, newp, newlen, )) ||
-   val == seminfo.semmni)
+   error = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
+   , val, 0x);
+   /* returns success and skips reallocation if val is unchanged */
+   if (error || val == seminfo.semmni)
return (error);
-
-   if (val < seminfo.semmni || val > 0x)
-   return (EINVAL);
-
-   /* Expand semsegs and semseqs arrays */
-   sema_new = mallocarray(val, sizeof(struct semid_ds *),
-   M_SEM, M_WAITOK|M_ZERO);
-   memcpy(sema_new, sema,
-   seminfo.semmni * sizeof(struct semid_ds *));
-   newseqs = mallocarray(val, sizeof(unsigned short), M_SEM,
-   M_WAITOK|M_ZERO);
-   memcpy(newseqs, semseqs,
-   seminfo.semmni * sizeof(unsigned short));
-   free(sema, M_SEM, seminfo.semmni * sizeof(struct semid_ds *));
-   free(semseqs, M_SEM, seminfo.semmni * sizeof(unsigned short));
-   sema = sema_new;
-   semseqs = newseqs;
-   seminfo.semmni = val;
+   sema_reallocate(val);
return (0);
case KERN_SEMINFO_SEMMNS:
-   val = seminfo.semmns;
-   if ((error = sysctl_int(oldp, oldlenp, newp, newlen, )) ||
-   val == seminfo.semmns)
-   return (error);
-   if (val < seminfo.semmns || val > 0x)
-   return (EINVAL);/* can't decrease semmns */
-   seminfo.semmns = val;
-   return (0);
+   /* can't decrease semmns or go over 2^16 */
+   return (sysctl_int_bounded(oldp, oldlenp, newp, newlen,
+   , seminfo.semmns, 0x));
case KERN_SEMINFO_SEMMNU:
-   val = seminfo.semmnu;
-   if ((error = sysctl_int(oldp, oldlenp, newp, newlen, )) ||
-   val == seminfo.semmnu)
-   return (error);
-

Remove the cases folded into sysctl_bounded_args but left behind

2020-11-14 Thread Greg Steuck
This an "oops" moment... The code remained correct, but I forgot to
remove junk after converting it.

>From 68a12efd641c4fc5f75ac693bf7ed7487948e0dd Mon Sep 17 00:00:00 2001
From: Greg Steuck 
Date: Sat, 14 Nov 2020 16:12:50 -0800
Subject: [PATCH 2/3] Remove the cases folded into sysctl_bounded_args but left
 behind

divert_sysctl and divert6_sysctl get a tiny bit slimmer.
---
 sys/netinet/ip_divert.c   | 12 
 sys/netinet6/ip6_divert.c | 12 
 2 files changed, 24 deletions(-)

diff --git sys/netinet/ip_divert.c sys/netinet/ip_divert.c
index 47716da5887..f1174fae2c7 100644
--- sys/netinet/ip_divert.c
+++ sys/netinet/ip_divert.c
@@ -379,18 +379,6 @@ divert_sysctl(int *name, u_int namelen, void *oldp, size_t 
*oldlenp, void *newp,
return (ENOTDIR);
 
switch (name[0]) {
-   case DIVERTCTL_SENDSPACE:
-   NET_LOCK();
-   error = sysctl_int(oldp, oldlenp, newp, newlen,
-   _sendspace);
-   NET_UNLOCK();
-   return (error);
-   case DIVERTCTL_RECVSPACE:
-   NET_LOCK();
-   error = sysctl_int(oldp, oldlenp, newp, newlen,
-   _recvspace);
-   NET_UNLOCK();
-   return (error);
case DIVERTCTL_STATS:
return (divert_sysctl_divstat(oldp, oldlenp, newp));
default:
diff --git sys/netinet6/ip6_divert.c sys/netinet6/ip6_divert.c
index 5d071a9a764..fa02dc1b1ec 100644
--- sys/netinet6/ip6_divert.c
+++ sys/netinet6/ip6_divert.c
@@ -385,18 +385,6 @@ divert6_sysctl(int *name, u_int namelen, void *oldp, 
size_t *oldlenp,
return (ENOTDIR);
 
switch (name[0]) {
-   case DIVERT6CTL_SENDSPACE:
-   NET_LOCK();
-   error = sysctl_int(oldp, oldlenp, newp, newlen,
-   _sendspace);
-   NET_UNLOCK();
-   return (error);
-   case DIVERT6CTL_RECVSPACE:
-   NET_LOCK();
-   error = sysctl_int(oldp, oldlenp, newp, newlen,
-   _recvspace);
-   NET_UNLOCK();
-   return (error);
case DIVERT6CTL_STATS:
return (divert6_sysctl_div6stat(oldp, oldlenp, newp));
default:
-- 
2.29.2



Convert fusefs_sysctl to sysctl_bounded_args

2020-11-14 Thread Greg Steuck
This is trivial more-of-the-same. If somebody spots a bug, do speak
up. I feel pretty good about committing this promptly.

>From f34f5853d7b8f58c677850766353fc72cb9ae5b8 Mon Sep 17 00:00:00 2001
From: Greg Steuck 
Date: Sat, 14 Nov 2020 16:00:53 -0800
Subject: [PATCH 1/3] Convert fusefs_sysctl to sysctl_bounded_args

---
 sys/miscfs/fuse/fuse_vfsops.c | 31 +++
 1 file changed, 11 insertions(+), 20 deletions(-)

diff --git sys/miscfs/fuse/fuse_vfsops.c sys/miscfs/fuse/fuse_vfsops.c
index 561c77f9aa4..5e3becfc02b 100644
--- sys/miscfs/fuse/fuse_vfsops.c
+++ sys/miscfs/fuse/fuse_vfsops.c
@@ -353,30 +353,21 @@ fusefs_init(struct vfsconf *vfc)
return (0);
 }
 
+extern int stat_fbufs_in, stat_fbufs_wait, stat_opened_fusedev;
+
+const struct sysctl_bounded_args fusefs_vars[] = {
+   { FUSEFS_OPENDEVS, _opened_fusedev, 1, 0 },
+   { FUSEFS_INFBUFS, _fbufs_in, 1, 0 },
+   { FUSEFS_WAITFBUFS, _fbufs_wait, 1, 0 },
+   { FUSEFS_POOL_NBPAGES, _fbuf_pool.pr_npages, 1, 0 },
+};
+
 int
 fusefs_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp,
 void *newp, size_t newlen, struct proc *p)
 {
-   extern int stat_fbufs_in, stat_fbufs_wait, stat_opened_fusedev;
-
-   /* all sysctl names at this level are terminal */
-   if (namelen != 1)
-   return (ENOTDIR);   /* overloaded */
-
-   switch (name[0]) {
-   case FUSEFS_OPENDEVS:
-   return (sysctl_rdint(oldp, oldlenp, newp,
-   stat_opened_fusedev));
-   case FUSEFS_INFBUFS:
-   return (sysctl_rdint(oldp, oldlenp, newp, stat_fbufs_in));
-   case FUSEFS_WAITFBUFS:
-   return (sysctl_rdint(oldp, oldlenp, newp, stat_fbufs_wait));
-   case FUSEFS_POOL_NBPAGES:
-   return (sysctl_rdint(oldp, oldlenp, newp,
-   fusefs_fbuf_pool.pr_npages));
-   default:
-   return (EOPNOTSUPP);
-   }
+   return sysctl_bounded_arr(fusefs_vars, nitems(fusefs_vars), name,
+   namelen, oldp, oldlenp, newp, newlen);
 }
 
 int
-- 
2.29.2



Re: [PATCH] remove variable names from prototypes in sys_pipe.c

2020-11-14 Thread Sebastian Benoit
henkjan gersen(h.ger...@gmail.com) on 2020.11.14 19:00:15 +:
> OK, no problem it just looked strange when reading the code as most
> definitions in that file don't have them.
> 
> btw) I'm by now way more puzzled why this file has a function to create a pipe
> 
> struct pipe_pair *pipe_pair_create(void);
> 
> but lacks a corresponding destroy function. This makes it look like
> the pipes created as a pipe_pair don't get properly destroyed, but my C
> is rusty.

pipe(2) explains

  The pipe itself persists until all its associated descriptors are closed.



Re: [PATCH] remove variable names from prototypes in sys_pipe.c

2020-11-14 Thread henkjan gersen
OK, no problem it just looked strange when reading the code as most
definitions in that file don't have them.

btw) I'm by now way more puzzled why this file has a function to create a pipe

struct pipe_pair *pipe_pair_create(void);

but lacks a corresponding destroy function. This makes it look like
the pipes created as a pipe_pair don't get properly destroyed, but my C
is rusty.

On Sat, 14 Nov 2020 at 18:21, Theo de Raadt  wrote:
>
> I don't see the point of this.
>
> Yes, style(9) says don't do that.  It is because of cpp issues.
> It definately matters for userland-exposed definitions.
>
> But the kernel is largely immune to this concern, even in .h files
> Sice this is on one file, it matters even less since nothing in
> any .h file will define kn, fp, p, or ub.
>
> In the meantime, this provides a stronger understanding for the
> programmer working *in this file*
>
> Standing back further, how did this happen?  Someone felt more
> comfortable leaving the names there when cloning the definitions
> from the actual function start.  I don't think that is dangerous.
>
> In summary, I think the style(9) advice has't place, and that
> place isn't everywhere, or C itself would have made naming of
> prototype arguments illegal to cover for the risk of cpp confusion.
>
>
>
> henkjan gersen  wrote:
>
> > Patch below removes a couple of variable names from function prototypes
> > in sys_pipe.c to match style(9)
> >
> > ---
> > diff --git sys/kern/sys_pipe.c sys/kern/sys_pipe.c
> > index 04ec907d21f..85e69af7741 100644
> > --- sys/kern/sys_pipe.c
> > +++ sys/kern/sys_pipe.c
> > @@ -62,9 +62,9 @@ int   pipe_read(struct file *, struct uio *, int);
> >  intpipe_write(struct file *, struct uio *, int);
> >  intpipe_close(struct file *, struct proc *);
> >  intpipe_poll(struct file *, int events, struct proc *);
> > -intpipe_kqfilter(struct file *fp, struct knote *kn);
> > +intpipe_kqfilter(struct file *, struct knote *);
> >  intpipe_ioctl(struct file *, u_long, caddr_t, struct proc *);
> > -intpipe_stat(struct file *fp, struct stat *ub, struct proc *p);
> > +intpipe_stat(struct file *, struct stat *, struct proc *);
> >
> >  static const struct fileops pipeops = {
> > .fo_read= pipe_read,
> > @@ -76,9 +76,9 @@ static const struct fileops pipeops = {
> > .fo_close   = pipe_close
> >  };
> >
> > -void   filt_pipedetach(struct knote *kn);
> > -intfilt_piperead(struct knote *kn, long hint);
> > -intfilt_pipewrite(struct knote *kn, long hint);
> > +void   filt_pipedetach(struct knote *);
> > +intfilt_piperead(struct knote *, long);
> > +intfilt_pipewrite(struct knote *, long);
> >
> >  const struct filterops pipe_rfiltops = {
> > .f_flags= FILTEROP_ISFD,
> >



Re: [PATCH] remove variable names from prototypes in sys_pipe.c

2020-11-14 Thread Theo de Raadt
I don't see the point of this.

Yes, style(9) says don't do that.  It is because of cpp issues.
It definately matters for userland-exposed definitions.

But the kernel is largely immune to this concern, even in .h files
Sice this is on one file, it matters even less since nothing in
any .h file will define kn, fp, p, or ub.

In the meantime, this provides a stronger understanding for the
programmer working *in this file*

Standing back further, how did this happen?  Someone felt more
comfortable leaving the names there when cloning the definitions
from the actual function start.  I don't think that is dangerous.

In summary, I think the style(9) advice has't place, and that
place isn't everywhere, or C itself would have made naming of
prototype arguments illegal to cover for the risk of cpp confusion.



henkjan gersen  wrote:

> Patch below removes a couple of variable names from function prototypes
> in sys_pipe.c to match style(9)
> 
> ---
> diff --git sys/kern/sys_pipe.c sys/kern/sys_pipe.c
> index 04ec907d21f..85e69af7741 100644
> --- sys/kern/sys_pipe.c
> +++ sys/kern/sys_pipe.c
> @@ -62,9 +62,9 @@ int   pipe_read(struct file *, struct uio *, int);
>  intpipe_write(struct file *, struct uio *, int);
>  intpipe_close(struct file *, struct proc *);
>  intpipe_poll(struct file *, int events, struct proc *);
> -intpipe_kqfilter(struct file *fp, struct knote *kn);
> +intpipe_kqfilter(struct file *, struct knote *);
>  intpipe_ioctl(struct file *, u_long, caddr_t, struct proc *);
> -intpipe_stat(struct file *fp, struct stat *ub, struct proc *p);
> +intpipe_stat(struct file *, struct stat *, struct proc *);
> 
>  static const struct fileops pipeops = {
> .fo_read= pipe_read,
> @@ -76,9 +76,9 @@ static const struct fileops pipeops = {
> .fo_close   = pipe_close
>  };
> 
> -void   filt_pipedetach(struct knote *kn);
> -intfilt_piperead(struct knote *kn, long hint);
> -intfilt_pipewrite(struct knote *kn, long hint);
> +void   filt_pipedetach(struct knote *);
> +intfilt_piperead(struct knote *, long);
> +intfilt_pipewrite(struct knote *, long);
> 
>  const struct filterops pipe_rfiltops = {
> .f_flags= FILTEROP_ISFD,
> 



[PATCH] remove variable names from prototypes in sys_pipe.c

2020-11-14 Thread henkjan gersen
Patch below removes a couple of variable names from function prototypes
in sys_pipe.c to match style(9)

---
diff --git sys/kern/sys_pipe.c sys/kern/sys_pipe.c
index 04ec907d21f..85e69af7741 100644
--- sys/kern/sys_pipe.c
+++ sys/kern/sys_pipe.c
@@ -62,9 +62,9 @@ int   pipe_read(struct file *, struct uio *, int);
 intpipe_write(struct file *, struct uio *, int);
 intpipe_close(struct file *, struct proc *);
 intpipe_poll(struct file *, int events, struct proc *);
-intpipe_kqfilter(struct file *fp, struct knote *kn);
+intpipe_kqfilter(struct file *, struct knote *);
 intpipe_ioctl(struct file *, u_long, caddr_t, struct proc *);
-intpipe_stat(struct file *fp, struct stat *ub, struct proc *p);
+intpipe_stat(struct file *, struct stat *, struct proc *);

 static const struct fileops pipeops = {
.fo_read= pipe_read,
@@ -76,9 +76,9 @@ static const struct fileops pipeops = {
.fo_close   = pipe_close
 };

-void   filt_pipedetach(struct knote *kn);
-intfilt_piperead(struct knote *kn, long hint);
-intfilt_pipewrite(struct knote *kn, long hint);
+void   filt_pipedetach(struct knote *);
+intfilt_piperead(struct knote *, long);
+intfilt_pipewrite(struct knote *, long);

 const struct filterops pipe_rfiltops = {
.f_flags= FILTEROP_ISFD,



[patch] remove if-clause that is always true from kern_time.c

2020-11-14 Thread henkjan gersen
kern_time.c contains an if-clause that also appears in the NetBSD source
that is always true and hence has no effect. The patch below cleans this
up assuming the wrap-around protection should remain in place (FreeBSD
just wraps).

The patch itself is purely cosmetic and shouldn't affect the compiled code
in any way.
---
diff --git sys/kern/kern_time.c sys/kern/kern_time.c
index d34329ee642..a508289f011 100644
--- sys/kern/kern_time.c
+++ sys/kern/kern_time.c
@@ -824,20 +824,9 @@ ppsratecheck(struct timeval *lasttime, int
*curpps, int maxpps)
else
rv = 0;

-#if 1 /*DIAGNOSTIC?*/
/* be careful about wrap-around */
if (*curpps + 1 > *curpps)
*curpps = *curpps + 1;
-#else
-   /*
-* assume that there's not too many calls to this function.
-* not sure if the assumption holds, as it depends on *caller's*
-* behavior, not the behavior of this function.
-* IMHO it is wrong to make assumption on the caller's behavior,
-* so the above #if is #if 1, not #ifdef DIAGNOSTIC.
-*/
-   *curpps = *curpps + 1;
-#endif

return (rv);
 }



dt: add kernel function boundary tracing provider

2020-11-14 Thread Tom Rollet

Hi,

Here is a diff for dynamic tracing of kernel's functions boundaries.
It's implemented as one of the dt's provider on i386 and amd64.
To activate it, DDBPROF and pseudo device dt must be activated
on GENERIC.

For now it's working like DTRACE fbt(function boundaries tracing).

We replace the prologue and epilogue of each function with a breakpoint.
The replaced instruction on amd64 and 386 are respectively
"push %rbp", "push %ebp" for prologue and "ret", "pop %ebp" for epilogue.
These instructions are emulated at the end of the breakpoint handler,
just after sending an event to userland (to be read by btrace).

For now the lookup to find the instruction to replace is hardcoded
and needs to find if there is a retguard before the prologue of the
function or if there are multiples int3 after the last ret.
It allows to find 31896 entry points and 19513 return points on amd64.

ddb has also  a similar way of tracing all prologue of function. It now
uses this new dt provider for doing tracing.

Perf wise, when all entry probes are enabled, the slow down is
quite massive but expected since breakpoints are slow.
On the kernel compilation with 10 threads on a linux qemu VM we go from:
real   2m38.280s
user   10m2.050s
sys    14m10.360s

to:
real   24m19.280s
user   9m44.110s
sys    220m26.980s

Any comments on the diff?

Tom

diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S
index dd2dfde3e3b..3a0a58ba3ac 100644
--- a/sys/arch/amd64/amd64/vector.S
+++ b/sys/arch/amd64/amd64/vector.S
@@ -188,10 +188,11 @@ INTRENTRY_LABEL(trap03):
 sti
 cld
 SMAP_CLAC
-    movq    %rsp, %rdi
-    call    _C_LABEL(db_prof_hook)
-    cmpl    $1, %eax
-    jne    .Lreal_kern_trap
+    leaq    _C_LABEL(dt_prov_kprobe), %rdi
+    movq    %rsp, %rsi
+    call    _C_LABEL(dt_prov_kprobe_hook)
+    cmpl    $0, %eax
+    je .Lreal_kern_trap

 cli
 movq    TF_RDI(%rsp),%rdi
@@ -210,6 +211,11 @@ INTRENTRY_LABEL(trap03):
 movq    TF_R11(%rsp),%r11
 /* %rax restored below, after being used to shift the stack */

+    cmpl    $2, %eax
+    je  .Lemulate_ret
+
+.Lemulate_push_rbp:
+
 /*
  * We are returning from a probe trap so we need to fix the
  * stack layout and emulate the patched instruction.
@@ -217,6 +223,9 @@ INTRENTRY_LABEL(trap03):
  */
 subq    $16, %rsp

+    movq    (TF_RAX + 16)(%rsp), %rax
+    movq    %rax, TF_RAX(%rsp)
+
 /* Shift hardware-saved registers. */
 movq    (TF_RIP + 16)(%rsp), %rax
 movq    %rax, TF_RIP(%rsp)
@@ -237,7 +246,20 @@ INTRENTRY_LABEL(trap03):

 /* Finally restore %rax */
 movq    (TF_RAX + 16)(%rsp),%rax
+    jmp .ret_int3
+
+.Lemulate_ret:
+
+    /* Store a new return address in %rip */
+    movq    TF_RSP(%rsp), %rax
+    movq    (%rax), %rax
+    movq    %rax, TF_RIP(%rsp)
+    addq    $8, TF_RSP(%rsp)
+
+    /* Finally restore %rax */
+    movq    (TF_RAX)(%rsp),%rax

+.ret_int3:
 addq    $TF_RIP,%rsp
 iretq
 #endif /* !defined(GPROF) && defined(DDBPROF) */
diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s
index 0c5607fe38a..e2d43b47eb3 100644
--- a/sys/arch/i386/i386/locore.s
+++ b/sys/arch/i386/i386/locore.s
@@ -205,7 +205,8 @@ INTRENTRY_LABEL(label):    /* from kernel */    ; \
 #define    INTRFASTEXIT \
 jmp    intr_fast_exit

-#define    INTR_FAKE_TRAP    0xbadabada
+#define    INTR_FAKE_TRAP_PUSH_RPB    0xbadabada
+#define    INTR_FAKE_TRAP_POP_RBP    0xbcbcbcbc

 /*
  * PTmap is recursive pagemap at top of virtual address space.
@@ -1259,17 +1260,32 @@ calltrap:
 jne    .Lreal_trap

 pushl    %esp
-    call    _C_LABEL(db_prof_hook)
-    addl    $4,%esp
-    cmpl    $1,%eax
-    jne    .Lreal_trap
+    subl    $4, %esp
+    pushl   %eax
+    leal    _C_LABEL(dt_prov_kprobe), %eax
+    movl    %eax, 4(%esp)
+    popl    %eax
+    call    _C_LABEL(dt_prov_kprobe_hook)
+    addl    $8,%esp
+    cmpl    $0,%eax
+    je    .Lreal_trap

 /*
  * Abuse the error field to indicate that INTRFASTEXIT needs
  * to emulate the patched instruction.
  */
-    movl    $INTR_FAKE_TRAP, TF_ERR(%esp)
-    jz    .Lalltraps_check_asts
+    cmpl $1, %eax
+    je .Lset_emulate_push_rbp
+
+    cmpl $2, %eax
+    je .Lset_emulate_ret
+
+.Lset_emulate_push_rbp:
+    movl    $INTR_FAKE_TRAP_PUSH_RPB, TF_ERR(%esp)
+    jmp    .Lalltraps_check_asts
+.Lset_emulate_ret:
+    movl    $INTR_FAKE_TRAP_POP_RBP, TF_ERR(%esp)
+    jmp    .Lalltraps_check_asts
 .Lreal_trap:
 #endif /* !defined(GPROF) && defined(DDBPROF) */
 pushl    %esp
@@ -1298,8 +1314,10 @@ calltrap:
  * The code below does that by trashing %eax, so it MUST be
  * restored afterward.
  */
-    cmpl    $INTR_FAKE_TRAP, TF_ERR(%esp)
-    je    .Lprobe_fixup
+    cmpl    $INTR_FAKE_TRAP_PUSH_RPB, TF_ERR(%esp)
+    je    .Lprobe_fixup_push_rbp
+    cmpl    $INTR_FAKE_TRAP_POP_RBP, TF_ERR(%esp)
+    je    .Lprobe_fixup_pop_rbp
 #endif /* !defined(GPROF) && defined(DDBPROF) */
 #ifndef 

Re: [PATCH]: Clearer documentation when using EVFILT_EXCEPT

2020-11-14 Thread Jason McIntyre
On Fri, Nov 13, 2020 at 11:50:09AM +0100, Emil Engler wrote:
> Currently it isn't mentioned that a socket is required when using 
> EVFILT_EXCEPT with NOTE_OOB. To some experienced users it might be clear 
> that it must be a socket but I don't think an additional word would hurt 
> anyone.
> 

committed, thanks, with a tweak - visa points out that pseudo terminals
are also relevant.

jmc

> Index: lib/libc/sys/kqueue.2
> ===
> RCS file: /cvs/src/lib/libc/sys/kqueue.2,v
> retrieving revision 1.42
> diff -u -p -u -p -r1.42 kqueue.2
> --- lib/libc/sys/kqueue.2   22 Jun 2020 13:42:06 -  1.42
> +++ lib/libc/sys/kqueue.2   13 Nov 2020 10:46:44 -
> @@ -315,7 +315,8 @@ Takes a descriptor as the identifier, an
>   specified exceptional conditions has occurred on the descriptor.
>   Conditions are specified in
>   .Fa fflags .
> -Currently, a filter can monitor the reception of out-of-band data with
> +Currently, a filter can monitor the reception of out-of-band data on a
> +socket with
>   .Dv NOTE_OOB .
>   .It Dv EVFILT_WRITE
>   Takes a descriptor as the identifier, and returns whenever
>