Re: pdaemon locking tweak

2022-08-29 Thread Jonathan Gray
On Mon, Aug 29, 2022 at 01:46:20PM +0200, Martin Pieuchot wrote:
> Diff below refactors the pdaemon's locking by introducing a new *trylock()
> function for a given page.  This is shamelessly stolen from NetBSD.
> 
> This is part of my ongoing effort to untangle the locks used by the page
> daemon.
> 
> ok?

if (pmap_is_referenced(p)) {
uvm_pageactivate(p);

is no longer under held slock.  Which I believe is intended,
just not obvious looking at the diff.

The page queue is already locked on entry to uvmpd_scan_inactive()

> 
> Index: uvm//uvm_pdaemon.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
> retrieving revision 1.102
> diff -u -p -r1.102 uvm_pdaemon.c
> --- uvm//uvm_pdaemon.c22 Aug 2022 12:03:32 -  1.102
> +++ uvm//uvm_pdaemon.c29 Aug 2022 11:36:59 -
> @@ -101,6 +101,7 @@ extern void drmbackoff(long);
>   * local prototypes
>   */
>  
> +struct rwlock*uvmpd_trylockowner(struct vm_page *);
>  void uvmpd_scan(struct uvm_pmalloc *);
>  void uvmpd_scan_inactive(struct uvm_pmalloc *, struct pglist *);
>  void uvmpd_tune(void);
> @@ -367,6 +368,34 @@ uvm_aiodone_daemon(void *arg)
>  }
>  
>  
> +/*
> + * uvmpd_trylockowner: trylock the page's owner.
> + *
> + * => return the locked rwlock on success.  otherwise, return NULL.
> + */
> +struct rwlock *
> +uvmpd_trylockowner(struct vm_page *pg)
> +{
> +
> + struct uvm_object *uobj = pg->uobject;
> + struct rwlock *slock;
> +
> + if (uobj != NULL) {
> + slock = uobj->vmobjlock;
> + } else {
> + struct vm_anon *anon = pg->uanon;
> +
> + KASSERT(anon != NULL);
> + slock = anon->an_lock;
> + }
> +
> + if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) {
> + return NULL;
> + }
> +
> + return slock;
> +}
> +
>  
>  /*
>   * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
> @@ -454,53 +483,44 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
>   uvmexp.pdscans++;
>   nextpg = TAILQ_NEXT(p, pageq);
>  
> + /*
> +  * move referenced pages back to active queue
> +  * and skip to next page.
> +  */
> + if (pmap_is_referenced(p)) {
> + uvm_pageactivate(p);
> + uvmexp.pdreact++;
> + continue;
> + }
> +
>   anon = p->uanon;
>   uobj = p->uobject;
> - if (p->pg_flags & PQ_ANON) {
> +
> + /*
> +  * first we attempt to lock the object that this page
> +  * belongs to.  if our attempt fails we skip on to
> +  * the next page (no harm done).  it is important to
> +  * "try" locking the object as we are locking in the
> +  * wrong order (pageq -> object) and we don't want to
> +  * deadlock.
> +  */
> + slock = uvmpd_trylockowner(p);
> + if (slock == NULL) {
> + continue;
> + }
> +
> + if (p->pg_flags & PG_BUSY) {
> + rw_exit(slock);
> + uvmexp.pdbusy++;
> + continue;
> + }
> +
> + /* does the page belong to an object? */
> + if (uobj != NULL) {
> + uvmexp.pdobscan++;
> + } else {
>   KASSERT(anon != NULL);
> - slock = anon->an_lock;
> - if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) {
> - /* lock failed, skip this page */
> - continue;
> - }
> - /*
> -  * move referenced pages back to active queue
> -  * and skip to next page.
> -  */
> - if (pmap_is_referenced(p)) {
> - uvm_pageactivate(p);
> - rw_exit(slock);
> - uvmexp.pdreact++;
> - continue;
> - }
> - if (p->pg_flags & PG_BUSY) {
> - rw_exit(slock);
> - uvmexp.pdbusy++;
> - continue;
> - }
>   uvmexp.pdanscan++;
> - } else {
> - 

Re: [RFC] acpi: add acpitimer_delay(), acpihpet_delay()

2022-08-29 Thread Scott Cheloha
> On Aug 29, 2022, at 22:54, Jonathan Gray  wrote:
> 
> On Mon, Aug 29, 2022 at 12:02:42PM -0500, Scott Cheloha wrote:
>> If hv_delay() never causes a vm exit, but tsc_delay() *might* cause a
>> vm exit, and both have microsecond or better resolution, then
>> hv_delay() is the preferable delay(9) implementation where it is
>> available because vm exits have ambiguous overhead.
> 
> with hv_delay() currently doing rdmsr I wouldn't say never
> 
>> 
>> If that seems sensible to you, I'll commit this switch.
> 
> There is a msr to allow cpuid to report invariant tsc (0x4118).
> Used by linux but not documented in the hyper-v tlfs.
> 
> Without that, tsc delay is never used on hyper-v.  So leave it
> as is until someone running hyper-v/azure would like it changed?

Works for me.



Re: [RFC] acpi: add acpitimer_delay(), acpihpet_delay()

2022-08-29 Thread Jonathan Gray
On Mon, Aug 29, 2022 at 12:02:42PM -0500, Scott Cheloha wrote:
> If hv_delay() never causes a vm exit, but tsc_delay() *might* cause a
> vm exit, and both have microsecond or better resolution, then
> hv_delay() is the preferable delay(9) implementation where it is
> available because vm exits have ambiguous overhead.

with hv_delay() currently doing rdmsr I wouldn't say never

> 
> If that seems sensible to you, I'll commit this switch.

There is a msr to allow cpuid to report invariant tsc (0x4118).
Used by linux but not documented in the hyper-v tlfs.

Without that, tsc delay is never used on hyper-v.  So leave it
as is until someone running hyper-v/azure would like it changed?

> 
> Index: arch/amd64/amd64/tsc.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> retrieving revision 1.26
> diff -u -p -r1.26 tsc.c
> --- arch/amd64/amd64/tsc.c25 Aug 2022 17:38:16 -  1.26
> +++ arch/amd64/amd64/tsc.c29 Aug 2022 16:58:25 -
> @@ -109,7 +109,7 @@ tsc_identify(struct cpu_info *ci)
>  
>   tsc_frequency = tsc_freq_cpuid(ci);
>   if (tsc_frequency > 0)
> - delay_init(tsc_delay, 5000);
> + delay_init(tsc_delay, 4000);
>  }
>  
>  static inline int
> Index: dev/pv/pvbus.c
> ===
> RCS file: /cvs/src/sys/dev/pv/pvbus.c,v
> retrieving revision 1.25
> diff -u -p -r1.25 pvbus.c
> --- dev/pv/pvbus.c25 Aug 2022 17:38:16 -  1.25
> +++ dev/pv/pvbus.c29 Aug 2022 16:58:26 -
> @@ -320,7 +320,7 @@ pvbus_hyperv(struct pvbus_hv *hv)
>  
>  #if NHYPERV > 0
>   if (hv->hv_features & CPUID_HV_MSR_TIME_REFCNT)
> - delay_init(hv_delay, 4000);
> + delay_init(hv_delay, 5000);
>  #endif
>  }
>  
> 
> 



Re: refactor pcb lookup

2022-08-29 Thread Vitaliy Makkoveev
Looks good by me.

> On 29 Aug 2022, at 14:15, Alexander Bluhm  wrote:
> 
> Anyone?
> 
> On Sat, Aug 20, 2022 at 03:24:28PM +0200, Alexander Bluhm wrote:
>> Hi,
>> 
>> Can we rename the the function in_pcbhashlookup() to in_pcblookup()?
>> Then we have in_pcblookup() and in_pcblookup_listen() as public PCB
>> interface.  Using a hash table is only an implementation detail.
>> 
>> For internal use I would like to introduce in_pcbhash_insert() and
>> in_pcbhash_lookup() to avoid code duplication.
>> 
>> Routing domain is unsigned, change the type to u_int.
>> 
>> If the diff is too large for review, I can split these parts.
>> 
>> ok?
>> 
>> bluhm
>> 
>> Index: net/pf.c
>> ===
>> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
>> retrieving revision 1.1137
>> diff -u -p -r1.1137 pf.c
>> --- net/pf.c 8 Aug 2022 12:06:30 -   1.1137
>> +++ net/pf.c 19 Aug 2022 16:22:47 -
>> @@ -3348,7 +3348,7 @@ pf_socket_lookup(struct pf_pdesc *pd)
>>   * Fails when rtable is changed while evaluating the ruleset
>>   * The socket looked up will not match the one hit in the end.
>>   */
>> -inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport,
>> +inp = in_pcblookup(tb, saddr->v4, sport, daddr->v4, dport,
>>  pd->rdomain);
>>  if (inp == NULL) {
>>  inp = in_pcblookup_listen(tb, daddr->v4, dport,
>> @@ -3359,7 +3359,7 @@ pf_socket_lookup(struct pf_pdesc *pd)
>>  break;
>> #ifdef INET6
>>  case AF_INET6:
>> -inp = in6_pcbhashlookup(tb, >v6, sport, >v6,
>> +inp = in6_pcblookup(tb, >v6, sport, >v6,
>>  dport, pd->rdomain);
>>  if (inp == NULL) {
>>  inp = in6_pcblookup_listen(tb, >v6, dport,
>> Index: netinet/in_pcb.c
>> ===
>> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
>> retrieving revision 1.270
>> diff -u -p -r1.270 in_pcb.c
>> --- netinet/in_pcb.c 8 Aug 2022 12:06:30 -   1.270
>> +++ netinet/in_pcb.c 19 Aug 2022 20:41:23 -
>> @@ -120,14 +120,16 @@ struct baddynamicports baddynamicports;
>> struct baddynamicports rootonlyports;
>> struct pool inpcb_pool;
>> 
>> -voidin_pcbrehash_locked(struct inpcb *);
>> +voidin_pcbhash_insert(struct inpcb *);
>> +struct inpcb *in_pcbhash_lookup(struct inpcbtable *, u_int,
>> +const struct in_addr *, u_short, const struct in_addr *, u_short);
>> int  in_pcbresize(struct inpcbtable *, int);
>> 
>> #define  INPCBHASH_LOADFACTOR(_x)(((_x) * 3) / 4)
>> 
>> -struct inpcbhead *in_pcbhash(struct inpcbtable *, int,
>> +struct inpcbhead *in_pcbhash(struct inpcbtable *, u_int,
>> const struct in_addr *, u_short, const struct in_addr *, u_short);
>> -struct inpcbhead *in_pcblhash(struct inpcbtable *, int, u_short);
>> +struct inpcbhead *in_pcblhash(struct inpcbtable *, u_int, u_short);
>> 
>> /*
>>  * in_pcb is used for inet and inet6.  in6_pcb only contains special
>> @@ -141,12 +143,12 @@ in_init(void)
>> }
>> 
>> struct inpcbhead *
>> -in_pcbhash(struct inpcbtable *table, int rdom,
>> +in_pcbhash(struct inpcbtable *table, u_int rdomain,
>> const struct in_addr *faddr, u_short fport,
>> const struct in_addr *laddr, u_short lport)
>> {
>>  SIPHASH_CTX ctx;
>> -u_int32_t nrdom = htonl(rdom);
>> +u_int32_t nrdom = htonl(rdomain);
>> 
>>  SipHash24_Init(, >inpt_key);
>>  SipHash24_Update(, , sizeof(nrdom));
>> @@ -159,10 +161,10 @@ in_pcbhash(struct inpcbtable *table, int
>> }
>> 
>> struct inpcbhead *
>> -in_pcblhash(struct inpcbtable *table, int rdom, u_short lport)
>> +in_pcblhash(struct inpcbtable *table, u_int rdomain, u_short lport)
>> {
>>  SIPHASH_CTX ctx;
>> -u_int32_t nrdom = htonl(rdom);
>> +u_int32_t nrdom = htonl(rdomain);
>> 
>>  SipHash24_Init(, >inpt_lkey);
>>  SipHash24_Update(, , sizeof(nrdom));
>> @@ -226,9 +228,6 @@ int
>> in_pcballoc(struct socket *so, struct inpcbtable *table)
>> {
>>  struct inpcb *inp;
>> -struct inpcbhead *head;
>> -
>> -NET_ASSERT_LOCKED();
>> 
>>  inp = pool_get(_pool, PR_NOWAIT|PR_ZERO);
>>  if (inp == NULL)
>> @@ -257,19 +256,7 @@ in_pcballoc(struct socket *so, struct in
>>  if (table->inpt_count++ > INPCBHASH_LOADFACTOR(table->inpt_size))
>>  (void)in_pcbresize(table, table->inpt_size * 2);
>>  TAILQ_INSERT_HEAD(>inpt_queue, inp, inp_queue);
>> -head = in_pcblhash(table, inp->inp_rtableid, inp->inp_lport);
>> -LIST_INSERT_HEAD(head, inp, inp_lhash);
>> -#ifdef INET6
>> -if (sotopf(so) == PF_INET6)
>> -head = in6_pcbhash(table, rtable_l2(inp->inp_rtableid),
>> ->inp_faddr6, inp->inp_fport,
>> ->inp_laddr6, inp->inp_lport);
>> -else
>> -#endif /* INET6 */
>> -head = in_pcbhash(table, 

Re: udp pcb mutex

2022-08-29 Thread Vitaliy Makkoveev
> On 29 Aug 2022, at 20:34, Alexander Bluhm  wrote:
> 
> Hi,
> 
> The diff below is needed to protect the receive socket buffer in
> UDP input with per PCB mutex.
> 
> With that, parallel UDP input and soreceive can be activated.  There
> are still issues with socket splicing and maybe pipex.  So I will
> not switch to shared lock in receive path soon.  But I want to get
> the mutex part commited so that it gets tested.
> 
> ok?

ok mvs@

> 
> mvs: Do you know whether pipex in udp_input() is MP safe?

Yes, he whole pipex(4) layer is MP safe. But be aware, ipsec(4)
could be not.

> 
> bluhm
> 
> Index: netinet/udp_usrreq.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
> retrieving revision 1.294
> diff -u -p -r1.294 udp_usrreq.c
> --- netinet/udp_usrreq.c  29 Aug 2022 08:08:17 -  1.294
> +++ netinet/udp_usrreq.c  29 Aug 2022 17:12:08 -
> @@ -657,12 +657,17 @@ udp_sbappend(struct inpcb *inp, struct m
>   }
> #endif
>   m_adj(m, hlen);
> +
> + mtx_enter(>inp_mtx);
>   if (sbappendaddr(so, >so_rcv, srcaddr, m, opts) == 0) {
> + mtx_leave(>inp_mtx);
>   udpstat_inc(udps_fullsock);
>   m_freem(m);
>   m_freem(opts);
>   return;
>   }
> + mtx_leave(>inp_mtx);
> +
>   sorwakeup(so);
> }
> 
> 



Re: Race in disk_attach_callback?

2022-08-29 Thread Miod Vallat
> What's the status on this diff?

I'm waiting for review from at least one of the softraid suspects prior
to putting this in, in case there are further cinematics to address.



Re: all architectures: put clockframe definition in frame.h?

2022-08-29 Thread Scott Cheloha
On Mon, Aug 29, 2022 at 06:50:08PM +0200, Mark Kettenis wrote:
> > Date: Mon, 29 Aug 2022 11:33:19 -0500
> > From: Scott Cheloha 
> > 
> > On Fri, Aug 19, 2022 at 01:24:47PM +0200, Mark Kettenis wrote:
> > >
> > > This is one of those annoying corners where there is too much
> > > unecessary MD variation. Currently travelling without a laptop, so I
> > > can't easily check the tree. But one note I wanted to make is that the
> > > definition of struct clockframe and the CLKF_XXX macros should stay
> > > together. 
> > 
> > Sure.  Here's a version that consolidates the CLKF macros into frame.h
> > alongside the clockframe definitions.
> > 
> > Notes by arch:
> > 
> > alpha, amd64, hppa, i386, m88k, mips64, powerpc64, sh, sparc64:
> > 
> > - clockframe is defined in cpu.h with CLKF macros.
> > 
> > - Move clockframe definition and CLKF macros from cpu.h to frame.h.
> > 
> > arm, powerpc:
> > 
> > - clockframe is defined in frame.h.
> > 
> > - CLKF macros are defined in cpu.h.
> > 
> > - Move CLKF macros from cpu.h to frame.h.
> > 
> > arm64, riscv64:
> > 
> > - clockframe is defined in cpu.h with CLKF macros.
> > 
> > - clockframe is *also* defined in frame.h.
> > 
> > - Delete clockframe definition from frame.h
> > 
> > - Move (other) clockframe definition and CLKF macros from cpu.h to frame.h.
> > 
> > sparc64 remains the only one that looks not-quite-right because
> > trapframe64 is defined in reg.h, not frame.h.
> 
> Yes, that is not going to work.
> 
> We really should be getting rid f the xxx32 stuff and rename the xxx64
> ones to xxx.  And move trapframe (and possibly rwindow) to frame.h.

So we would get rid of all the 32-bit compat stuff from arch/sparc64?

That's a pretty big change.

Index: fpu/fpu.c
===
RCS file: /cvs/src/sys/arch/sparc64/fpu/fpu.c,v
retrieving revision 1.21
diff -u -p -r1.21 fpu.c
--- fpu/fpu.c   19 Aug 2020 10:10:58 -  1.21
+++ fpu/fpu.c   29 Aug 2022 18:44:50 -
@@ -81,22 +81,22 @@
 #include 
 
 int fpu_regoffset(int, int);
-int fpu_insn_fmov(struct fpstate64 *, struct fpemu *, union instr);
-int fpu_insn_fabs(struct fpstate64 *, struct fpemu *, union instr);
-int fpu_insn_fneg(struct fpstate64 *, struct fpemu *, union instr);
+int fpu_insn_fmov(struct fpstate *, struct fpemu *, union instr);
+int fpu_insn_fabs(struct fpstate *, struct fpemu *, union instr);
+int fpu_insn_fneg(struct fpstate *, struct fpemu *, union instr);
 int fpu_insn_itof(struct fpemu *, union instr, int, int *,
 int *, u_int *);
 int fpu_insn_ftoi(struct fpemu *, union instr, int *, int, u_int *);
 int fpu_insn_ftof(struct fpemu *, union instr, int *, int *, u_int *);
 int fpu_insn_fsqrt(struct fpemu *, union instr, int *, int *, u_int *);
-int fpu_insn_fcmp(struct fpstate64 *, struct fpemu *, union instr, int);
+int fpu_insn_fcmp(struct fpstate *, struct fpemu *, union instr, int);
 int fpu_insn_fmul(struct fpemu *, union instr, int *, int *, u_int *);
 int fpu_insn_fmulx(struct fpemu *, union instr, int *, int *, u_int *);
 int fpu_insn_fdiv(struct fpemu *, union instr, int *, int *, u_int *);
 int fpu_insn_fadd(struct fpemu *, union instr, int *, int *, u_int *);
 int fpu_insn_fsub(struct fpemu *, union instr, int *, int *, u_int *);
-int fpu_insn_fmovcc(struct proc *, struct fpstate64 *, union instr);
-int fpu_insn_fmovr(struct proc *, struct fpstate64 *, union instr);
+int fpu_insn_fmovcc(struct proc *, struct fpstate *, union instr);
+int fpu_insn_fmovr(struct proc *, struct fpstate *, union instr);
 void fpu_fcopy(u_int *, u_int *, int);
 
 #ifdef DEBUG
@@ -115,7 +115,7 @@ fpu_dumpfpn(struct fpn *fp)
fp->fp_mant[2], fp->fp_mant[3], fp->fp_exp);
 }
 void
-fpu_dumpstate(struct fpstate64 *fs)
+fpu_dumpstate(struct fpstate *fs)
 {
int i;
 
@@ -189,7 +189,7 @@ fpu_fcopy(src, dst, type)
 void
 fpu_cleanup(p, fs)
register struct proc *p;
-   register struct fpstate64 *fs;
+   register struct fpstate *fs;
 {
register int i, fsr = fs->fs_fsr, error;
union instr instr;
@@ -455,7 +455,7 @@ fpu_execute(p, fe, instr)
  */
 int
 fpu_insn_fmov(fs, fe, instr)
-   struct fpstate64 *fs;
+   struct fpstate *fs;
struct fpemu *fe;
union instr instr;
 {
@@ -478,7 +478,7 @@ fpu_insn_fmov(fs, fe, instr)
  */
 int
 fpu_insn_fabs(fs, fe, instr)
-   struct fpstate64 *fs;
+   struct fpstate *fs;
struct fpemu *fe;
union instr instr;
 {
@@ -502,7 +502,7 @@ fpu_insn_fabs(fs, fe, instr)
  */
 int
 fpu_insn_fneg(fs, fe, instr)
-   struct fpstate64 *fs;
+   struct fpstate *fs;
struct fpemu *fe;
union instr instr;
 {
@@ -644,7 +644,7 @@ fpu_insn_fsqrt(fe, instr, rdp, rdtypep, 
  */
 int
 fpu_insn_fcmp(fs, fe, instr, cmpe)
-   struct fpstate64 *fs;
+   struct fpstate *fs;
struct fpemu *fe;
union instr instr;
int cmpe;
@@ -848,7 +848,7 @@ fpu_insn_fsub(fe, instr, rdp, rdtypep, s
 int
 fpu_insn_fmovcc(p, 

Re: Can't use hostname in hostctl command.

2022-08-29 Thread Masato Asou
> Hi!
> 
> I can use ip and can'5 use hostname in hostctl command as blow:
> 
> $ hostctl guestinfo.ip
> 172.16.100.131
> $ hostctl guestinfo.hostname
> hostctl: ioctl: Invalid argument

I can read it after writing to hostname.

However, I do not know how to read this in HOSTMACHINE.
--
ASOU Masato

> man hostctl has the forrowing description:
> 
> EXAMPLES
>  The vmt(4) driver provides access to the ``guestinfo''
>  information that
>  is available in VMware virtual machines:
> 
># hostctl guestinfo.hostname
>vm-111.example.com
># hostctl guestinfo.ip 192.168.100.111
> 
> Is hostname no longer available due to a change in VMware version?
> 
> 
> I use following system:
> 
> $ sysctl kern.version
> kern.version=OpenBSD 7.1 (GENERIC) #443: Mon Apr 11 17:55:15 MDT 2022
> dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC
> 
> VMware Fusion 12.2.4 (20071091)
> 
> Thank you.
> --
> ASOU Masato
> 



Re: softraid raid1c keydisk cosmetic fix

2022-08-29 Thread Klemens Nanni
On Sat Aug 13, 2022 at 12:39 AM +04, Stefan Sperling wrote:
> Use raid1c-specific meta-data while looking for a key disk that
> belongs to a RAID 1C volume.
>
> By dumb luck this is only a cosmetic issue, because struct layout
> happens to put the field in the same place.

Good catch.  OK kn
One suggestion inline.

>  
> diff a87e94ce1617958c99b63ed0a056e46650a27375 
> ba39970f6365aeeb1b486101f6573c60b7f88573
> commit - a87e94ce1617958c99b63ed0a056e46650a27375
> commit + ba39970f6365aeeb1b486101f6573c60b7f88573
> blob - 762f6ee57d5e7902c097d71830fea6e63080e7b5
> blob + 737fafbaad517c55293c69f0a6ddcb8fbb720c83
> --- sys/dev/softraid.c
> +++ sys/dev/softraid.c
> @@ -2593,10 +2593,13 @@ sr_ioctl_vol(struct sr_softc *sc, struct bioc_vol *bv)
>   bv->bv_nodisk = sd->sd_meta->ssdi.ssd_chunk_no;
>  
>  #ifdef CRYPTO
> - if ((sd->sd_meta->ssdi.ssd_level == 'C' ||
> - sd->sd_meta->ssdi.ssd_level == 0x1C) &&
> + if (sd->sd_meta->ssdi.ssd_level == 'C' &&
>   sd->mds.mdd_crypto.key_disk != NULL)
>   bv->bv_nodisk++;
> +
> + if (sd->sd_meta->ssdi.ssd_level == 0x1C &&

I'd use either `else if' here or merge them into
if ((C && mdd_crypto.key_disk) ||
(1C && sr1c_crypto.key_disk))

> + sd->mds.mdd_raid1c.sr1c_crypto.key_disk != NULL)
> + bv->bv_nodisk++;
>  #endif
>   if (bv->bv_status == BIOC_SVREBUILD)
>   bv->bv_percent = sr_rebuild_percent(sd);
> @@ -2650,10 +2653,13 @@ sr_ioctl_disk(struct sr_softc *sc, struct bioc_disk *b
>   src = sd->sd_vol.sv_chunks[bd->bd_diskid];
>  #ifdef CRYPTO
>   else if (bd->bd_diskid == sd->sd_meta->ssdi.ssd_chunk_no &&
> - (sd->sd_meta->ssdi.ssd_level == 'C' ||
> - sd->sd_meta->ssdi.ssd_level == 0x1C) &&
> + sd->sd_meta->ssdi.ssd_level == 'C' &&
>   sd->mds.mdd_crypto.key_disk != NULL)
>   src = sd->mds.mdd_crypto.key_disk;
> + else if (bd->bd_diskid == sd->sd_meta->ssdi.ssd_chunk_no &&
> + sd->sd_meta->ssdi.ssd_level == 0x1C &&
> + sd->mds.mdd_raid1c.sr1c_crypto.key_disk != NULL)
> + src = sd->mds.mdd_crypto.key_disk;

Since this is the same plus a third condition, `else if' in both places
seems cleanest here.

>  #endif
>   else
>   break;



Continuous USB transfers

2022-08-29 Thread Marcus Glocker
xhci(4) and ehci(4) are scheduling continuous isoc transfers.  For that
we have implemented a kind of workaround, which checks whether
usbd_start_next() is calling in between and try to start an transfer
which is still in progress.

NetBSD has introduced a variable called 'serialise' for the pipe,
with which one can control whether to skip the usbd_start_next()
call at all, and therefore schedule continuous transfers without
the need for explicit checks whether the transfer is still in progress.

By default we set 'serialise' to 1 in pipe initialization, so that the
default behavior remains to call usbd_start_next() after a completed
transfer.  If we want to schedule continuous transfers, we overwrite
'serialise' with 0 in the HC driver.

I like the approach and I think it's more straight forward than what
we do today.  It will also allow to easily implement continuous
transfers in other HC drivers, i.e. in dwctwo(4).

Comments?  OKs?


Index: sys/dev/usb/ehci.c
===
RCS file: /cvs/src/sys/dev/usb/ehci.c,v
retrieving revision 1.219
diff -u -p -u -p -r1.219 ehci.c
--- sys/dev/usb/ehci.c  12 Apr 2022 19:41:11 -  1.219
+++ sys/dev/usb/ehci.c  29 Aug 2022 15:40:54 -
@@ -1505,6 +1505,7 @@ ehci_open(struct usbd_pipe *pipe)
case EHCI_QH_SPEED_HIGH:
case EHCI_QH_SPEED_FULL:
pipe->methods = _device_isoc_methods;
+   pipe->serialise = 0;
break;
case EHCI_QH_SPEED_LOW:
default:
@@ -3247,7 +3248,7 @@ ehci_device_isoc_transfer(struct usbd_xf
usbd_status err;
 
err = usb_insert_transfer(xfer);
-   if (err && err != USBD_IN_PROGRESS)
+   if (err)
return (err);
 
return (ehci_device_isoc_start(xfer));
@@ -3267,15 +3268,6 @@ ehci_device_isoc_start(struct usbd_xfer 
 
KASSERT(!(xfer->rqflags & URQ_REQUEST));
KASSERT(ival > 0 && ival <= 16);
-
-   /*
-* To allow continuous transfers, above we start all transfers
-* immediately. However, we're still going to get usbd_start_next call
-* this when another xfer completes. So, check if this is already
-* in progress or not
-*/
-   if (ex->itdstart != NULL)
-   return (USBD_IN_PROGRESS);
 
if (sc->sc_bus.dying)
return (USBD_IOERROR);
Index: sys/dev/usb/usb_subr.c
===
RCS file: /cvs/src/sys/dev/usb/usb_subr.c,v
retrieving revision 1.158
diff -u -p -u -p -r1.158 usb_subr.c
--- sys/dev/usb/usb_subr.c  16 Feb 2022 06:23:42 -  1.158
+++ sys/dev/usb/usb_subr.c  29 Aug 2022 15:40:54 -
@@ -793,6 +793,7 @@ usbd_setup_pipe(struct usbd_device *dev,
p->endpoint = ep;
ep->refcnt++;
p->interval = ival;
+   p->serialise = 1;
SIMPLEQ_INIT(>queue);
err = dev->bus->methods->open_pipe(p);
if (err) {
Index: sys/dev/usb/usbdi.c
===
RCS file: /cvs/src/sys/dev/usb/usbdi.c,v
retrieving revision 1.110
diff -u -p -u -p -r1.110 usbdi.c
--- sys/dev/usb/usbdi.c 3 Feb 2021 11:34:24 -   1.110
+++ sys/dev/usb/usbdi.c 29 Aug 2022 15:40:54 -
@@ -822,7 +822,7 @@ usb_transfer_complete(struct usbd_xfer *
 status == USBD_TIMEOUT) &&
pipe->iface != NULL)/* not control pipe */
pipe->running = 0;
-   else
+   else if (pipe->serialise)
usbd_start_next(pipe);
}
 }
@@ -845,7 +845,7 @@ usb_insert_transfer(struct usbd_xfer *xf
 #endif
s = splusb();
SIMPLEQ_INSERT_TAIL(>queue, xfer, next);
-   if (pipe->running)
+   if (pipe->running && pipe->serialise)
err = USBD_IN_PROGRESS;
else {
pipe->running = 1;
Index: sys/dev/usb/usbdivar.h
===
RCS file: /cvs/src/sys/dev/usb/usbdivar.h,v
retrieving revision 1.82
diff -u -p -u -p -r1.82 usbdivar.h
--- sys/dev/usb/usbdivar.h  12 Apr 2022 19:41:11 -  1.82
+++ sys/dev/usb/usbdivar.h  29 Aug 2022 15:40:55 -
@@ -186,6 +186,7 @@ struct usbd_pipe {
size_t  pipe_size;
charrunning;
charaborting;
+   charserialise;
SIMPLEQ_HEAD(, usbd_xfer) queue;
LIST_ENTRY(usbd_pipe)   next;
 
Index: sys/dev/usb/xhci.c
===
RCS file: /cvs/src/sys/dev/usb/xhci.c,v
retrieving revision 1.126
diff -u -p -u -p -r1.126 xhci.c
--- sys/dev/usb/xhci.c  15 Jul 2022 07:52:06 -  1.126
+++ sys/dev/usb/xhci.c  29 Aug 2022 15:40:55 -
@@ -1224,6 +1224,7 @@ xhci_pipe_open(struct usbd_pipe *pipe)
  

Re: bgpd remove aspath cache for more speed

2022-08-29 Thread Claudio Jeker
On Mon, Aug 29, 2022 at 07:44:33PM +0200, Theo Buehler wrote:
> On Mon, Aug 29, 2022 at 07:28:58PM +0200, Claudio Jeker wrote:
> > This diff removes the aspath cache. I tried replacing it with an RB tree
> > but it still consumes a lot of CPU cycles for little gain. So instead just
> > copy the aspath for all rde_aspath structs. It will use more memory but in
> > most cases the amount is in the 20-30% order.
> 
> ok
> 
> > This also includes a lot of fatal("%s", __func__) changes. I can strip
> > those out and commit them first.
> 
> I'd commit them separately. You missed a few fatalx() in rde_attr.c:
> 
> fatalx("attr_optadd: others_len overflow");
> fatalx("attr_diff: equal attributes encountered");
> fatalx("aspath_prepend: bad aspath length");
> fatalx("aspath_prepend: preposterous prepend");

I was going after the simple malloc() related fatals. I should look into
those and maybe reword them e.g. "preposterous aspath prepend" or refactor
the code.

-- 
:wq Claudio



snmpd(8): Minor logging cleanup

2022-08-29 Thread Martijn van Duren
Apparently I mistyped one AgentX as Agentx, and when moving sess_id to
uint32_t (in an early draft) I forgot to adjust the %d in two places.

OK?

martijn@

Index: application_agentx.c
===
RCS file: /cvs/src/usr.sbin/snmpd/application_agentx.c,v
retrieving revision 1.1
diff -u -p -r1.1 application_agentx.c
--- application_agentx.c23 Aug 2022 08:56:20 -  1.1
+++ application_agentx.c29 Aug 2022 17:50:50 -
@@ -145,7 +145,7 @@ appl_agentx_listen(struct agentx_master 
bind(master->axm_fd, (struct sockaddr *)&(master->axm_sun),
sizeof(master->axm_sun)) == -1 ||
listen(master->axm_fd, 5)) {
-   log_warn("Agentx: listen %s", master->axm_sun.sun_path);
+   log_warn("AgentX: listen %s", master->axm_sun.sun_path);
umask(mask);
return;
}
@@ -208,7 +208,7 @@ appl_agentx_accept(int masterfd, short e
appl_agentx_recv, conn);
event_add(&(conn->conn_rev), NULL);
event_set(&(conn->conn_wev), fd, EV_WRITE, appl_agentx_send, conn);
-   log_info("AgentX(%d): new connection", conn->conn_id);
+   log_info("AgentX(%"PRIu32"): new connection", conn->conn_id);
 
return;
  fail:
@@ -442,7 +442,7 @@ appl_agentx_open(struct appl_agentx_conn
if (asprintf(&(session->sess_backend.ab_name),
"AgentX(%"PRIu32"/%"PRIu32")",
conn->conn_id, session->sess_id) == -1) {
-   log_warn("AgentX(%d): asprintf: Open Failed",
+   log_warn("AgentX(%"PRIu32"): asprintf: Open Failed",
conn->conn_id);
goto fail;
}



Re: snmpd(8): make sure oidbuf is properly initialized on overlapping regions

2022-08-29 Thread Theo Buehler
On Mon, Aug 29, 2022 at 07:32:25PM +0200, Martijn van Duren wrote:
> I think the subject speaks for itself.
> 
> Not a really big problem, since non of the available software that we
> currently have in base/ports have overlapping regions, but definitely
> worth fixing.
> 
> OK?

ok tb



Re: snmpd(8): Allow overlapping region from same backend

2022-08-29 Thread Theo Buehler
On Mon, Aug 29, 2022 at 07:39:35PM +0200, Martijn van Duren wrote:
> Right now we don't allow overlapping regions when the subtree flag is
> set, . However I don't see a reason why a single backend can't
> make an overlapping region with itself.
> 
> I would also like to use this feature when moving mib.c code into an
> libagentx based backend.
> 
> OK?

Sure, makes sense.

ok



Re: [matth...@openbsd.org: Re: xlock don't take my password anymore]

2022-08-29 Thread Matthieu Herrb
On Mon, Aug 29, 2022 at 09:08:26AM +0200, Greg Steuck wrote:
> Greg Steuck  writes:
> 
> Matthieu> +   authok = priv_pw_check(user, style, pass);
> 
> I suspect your original patch may have swapped the arguments. The
> password should go before style.

Oops you're right thanks.


> 
> What do you thing about this patch (tested locally, but I don't have
> style):

Works for me. I also cannot check style easyly (otherwise I would
probably have caught the mistake, but I've check with
and without : in the password).

> 
> diff --git a/app/xlockmore/xlock/passwd.c b/app/xlockmore/xlock/passwd.c
> index 914db414f..23ba9043e 100644
> --- a/app/xlockmore/xlock/passwd.c
> +++ b/app/xlockmore/xlock/passwd.c
> @@ -1278,17 +1278,15 @@ checkPasswd(char *buffer)
>  
>  #ifdef USE_PRIVSEP
>   char*pass;
> - char*style;
>  
>   /* buffer can be in the form style:pass */
>   if ((pass = strchr(buffer, ':')) != NULL) {
> - *pass++ = '\0';
> - style = buffer;
> - } else {
> - pass = buffer;
> - style = NULL;
> + *pass++ = '\0';
> + if (priv_pw_check(user, pass, buffer))
> + return True;
> + *--pass = ':';
>   }
> - return priv_pw_check(user, pass, style);
> + return priv_pw_check(user, buffer, NULL);
>  #elif defined(BSD_AUTH)
>   char   *pass;
>   char   *style;
> -- 
> 2.37.2
> 

-- 
Matthieu Herrb



Re: bgpd remove aspath cache for more speed

2022-08-29 Thread Theo Buehler
On Mon, Aug 29, 2022 at 07:28:58PM +0200, Claudio Jeker wrote:
> This diff removes the aspath cache. I tried replacing it with an RB tree
> but it still consumes a lot of CPU cycles for little gain. So instead just
> copy the aspath for all rde_aspath structs. It will use more memory but in
> most cases the amount is in the 20-30% order.

ok

> This also includes a lot of fatal("%s", __func__) changes. I can strip
> those out and commit them first.

I'd commit them separately. You missed a few fatalx() in rde_attr.c:

fatalx("attr_optadd: others_len overflow");
fatalx("attr_diff: equal attributes encountered");
fatalx("aspath_prepend: bad aspath length");
fatalx("aspath_prepend: preposterous prepend");



Re: udp pcb mutex

2022-08-29 Thread Claudio Jeker
On Mon, Aug 29, 2022 at 07:34:14PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> The diff below is needed to protect the receive socket buffer in
> UDP input with per PCB mutex.
> 
> With that, parallel UDP input and soreceive can be activated.  There
> are still issues with socket splicing and maybe pipex.  So I will
> not switch to shared lock in receive path soon.  But I want to get
> the mutex part commited so that it gets tested.
> 
> ok?

OK claudio@

It is not great to use use a PCB mutex to lock something that is only
in struct socket. While I understand that this allows you to move forward
it would be nice if the lock interaction between socket, pcb and
socketbuffers would be slowly streamlined instead of introducing more
complex locking (which depends on a lot if stuff).
 
> mvs: Do you know whether pipex in udp_input() is MP safe?
> 
> bluhm
> 
> Index: netinet/udp_usrreq.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
> retrieving revision 1.294
> diff -u -p -r1.294 udp_usrreq.c
> --- netinet/udp_usrreq.c  29 Aug 2022 08:08:17 -  1.294
> +++ netinet/udp_usrreq.c  29 Aug 2022 17:12:08 -
> @@ -657,12 +657,17 @@ udp_sbappend(struct inpcb *inp, struct m
>   }
>  #endif
>   m_adj(m, hlen);
> +
> + mtx_enter(>inp_mtx);
>   if (sbappendaddr(so, >so_rcv, srcaddr, m, opts) == 0) {
> + mtx_leave(>inp_mtx);
>   udpstat_inc(udps_fullsock);
>   m_freem(m);
>   m_freem(opts);
>   return;
>   }
> + mtx_leave(>inp_mtx);
> +
>   sorwakeup(so);
>  }
>  
> 

-- 
:wq Claudio



snmpd(8): Allow overlapping region from same backend

2022-08-29 Thread Martijn van Duren
Right now we don't allow overlapping regions when the subtree flag is
set, . However I don't see a reason why a single backend can't
make an overlapping region with itself.

I would also like to use this feature when moving mib.c code into an
libagentx based backend.

OK?

martijn@

Index: application.c
===
RCS file: /cvs/src/usr.sbin/snmpd/application.c,v
retrieving revision 1.10
diff -u -p -r1.10 application.c
--- application.c   29 Aug 2022 13:25:18 -  1.10
+++ application.c   29 Aug 2022 17:39:15 -
@@ -223,12 +223,14 @@ appl_region(struct appl_context *ctx, ui
 * This allows us to keep control of certain regions like system.
 */
region = appl_region_find(ctx, oid);
-   if (region != NULL && region->ar_subtree)
+   if (region != NULL && region->ar_subtree &&
+   region->ar_backend != backend)
goto overlap;
 
search.ar_oid = *oid;
region = RB_NFIND(appl_regions, &(ctx->ac_regions), );
-   if (region != NULL && region->ar_subtree && (
+   if (region != NULL && region->ar_subtree && 
+   region->ar_backend != backend && (
appl_region_cmp(, region) == 0 ||
appl_region_cmp(, region) == -2))
goto overlap;



udp pcb mutex

2022-08-29 Thread Alexander Bluhm
Hi,

The diff below is needed to protect the receive socket buffer in
UDP input with per PCB mutex.

With that, parallel UDP input and soreceive can be activated.  There
are still issues with socket splicing and maybe pipex.  So I will
not switch to shared lock in receive path soon.  But I want to get
the mutex part commited so that it gets tested.

ok?

mvs: Do you know whether pipex in udp_input() is MP safe?

bluhm

Index: netinet/udp_usrreq.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.294
diff -u -p -r1.294 udp_usrreq.c
--- netinet/udp_usrreq.c29 Aug 2022 08:08:17 -  1.294
+++ netinet/udp_usrreq.c29 Aug 2022 17:12:08 -
@@ -657,12 +657,17 @@ udp_sbappend(struct inpcb *inp, struct m
}
 #endif
m_adj(m, hlen);
+
+   mtx_enter(>inp_mtx);
if (sbappendaddr(so, >so_rcv, srcaddr, m, opts) == 0) {
+   mtx_leave(>inp_mtx);
udpstat_inc(udps_fullsock);
m_freem(m);
m_freem(opts);
return;
}
+   mtx_leave(>inp_mtx);
+
sorwakeup(so);
 }
 



snmpd(8): make sure oidbuf is properly initialized on overlapping regions

2022-08-29 Thread Martijn van Duren
I think the subject speaks for itself.

Not a really big problem, since non of the available software that we
currently have in base/ports have overlapping regions, but definitely
worth fixing.

OK?

martijn@

Index: application.c
===
RCS file: /cvs/src/usr.sbin/snmpd/application.c,v
retrieving revision 1.10
diff -u -p -r1.10 application.c
--- application.c   29 Aug 2022 13:25:18 -  1.10
+++ application.c   29 Aug 2022 17:30:44 -
@@ -218,6 +218,16 @@ appl_region(struct appl_context *ctx, ui
char oidbuf[1024], regionbuf[1024], subidbuf[11];
size_t i;
 
+   /* Don't use smi_oid2string, because appl_register can't use it */
+   oidbuf[0] = '\0';
+   for (i = 0; i < oid->bo_n; i++) {
+   if (i != 0)
+   strlcat(oidbuf, ".", sizeof(oidbuf));
+   snprintf(subidbuf, sizeof(subidbuf), "%"PRIu32,
+   oid->bo_id[i]);
+   strlcat(oidbuf, subidbuf, sizeof(oidbuf));
+   }
+
/*
 * Don't allow overlap when subtree flag is set.
 * This allows us to keep control of certain regions like system.
@@ -233,15 +243,6 @@ appl_region(struct appl_context *ctx, ui
appl_region_cmp(, region) == -2))
goto overlap;
 
-   /* Don't use smi_oid2string, because appl_register can't use it */
-   oidbuf[0] = '\0';
-   for (i = 0; i < oid->bo_n; i++) {
-   if (i != 0)
-   strlcat(oidbuf, ".", sizeof(oidbuf));
-   snprintf(subidbuf, sizeof(subidbuf), "%"PRIu32,
-   oid->bo_id[i]);
-   strlcat(oidbuf, subidbuf, sizeof(oidbuf));
-   }
if ((nregion = malloc(sizeof(*nregion))) == NULL) {
log_warn("%s: Can't register %s: Processing error",
backend->ab_name, oidbuf);



bgpd remove aspath cache for more speed

2022-08-29 Thread Claudio Jeker
This diff removes the aspath cache. I tried replacing it with an RB tree
but it still consumes a lot of CPU cycles for little gain. So instead just
copy the aspath for all rde_aspath structs. It will use more memory but in
most cases the amount is in the 20-30% order.

This also includes a lot of fatal("%s", __func__) changes. I can strip
those out and commit them first.
-- 
:wq Claudio

Index: usr.sbin/bgpctl/output.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
retrieving revision 1.27
diff -u -p -r1.27 output.c
--- usr.sbin/bgpctl/output.c29 Aug 2022 14:58:15 -  1.27
+++ usr.sbin/bgpctl/output.c29 Aug 2022 17:22:10 -
@@ -1003,9 +1003,7 @@ show_rib_mem(struct rde_memstats *stats)
printf("\t   and holding %lld references\n",
stats->path_refs);
printf("%10lld BGP AS-PATH attribute entries using "
-   "%s of memory\n\t   and holding %lld references\n",
-   stats->aspath_cnt, fmt_mem(stats->aspath_size),
-   stats->aspath_refs);
+   "%s of memory\n", stats->aspath_cnt, fmt_mem(stats->aspath_size));
printf("%10lld entries for %lld BGP communities "
"using %s of memory\n", stats->comm_cnt, stats->comm_nmemb,
fmt_mem(stats->comm_cnt * sizeof(struct rde_community) +
Index: usr.sbin/bgpctl/output_json.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v
retrieving revision 1.21
diff -u -p -r1.21 output_json.c
--- usr.sbin/bgpctl/output_json.c   29 Aug 2022 14:58:15 -  1.21
+++ usr.sbin/bgpctl/output_json.c   29 Aug 2022 17:22:10 -
@@ -936,7 +936,7 @@ json_rib_mem(struct rde_memstats *stats)
stats->path_cnt * sizeof(struct rde_aspath),
stats->path_refs);
json_rib_mem_element("aspath", stats->aspath_cnt,
-   stats->aspath_size, stats->aspath_refs);
+   stats->aspath_size, UINT64_MAX);
json_rib_mem_element("community_entries", stats->comm_cnt,
stats->comm_cnt * sizeof(struct rde_community), UINT64_MAX);
json_rib_mem_element("community", stats->comm_nmemb,
Index: usr.sbin/bgpd/bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.450
diff -u -p -r1.450 bgpd.h
--- usr.sbin/bgpd/bgpd.h26 Aug 2022 14:10:52 -  1.450
+++ usr.sbin/bgpd/bgpd.h29 Aug 2022 12:07:01 -
@@ -1193,7 +1193,6 @@ struct rde_memstats {
long long   nexthop_cnt;
long long   aspath_cnt;
long long   aspath_size;
-   long long   aspath_refs;
long long   comm_cnt;
long long   comm_nmemb;
long long   comm_size;
Index: usr.sbin/bgpd/rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.568
diff -u -p -r1.568 rde.c
--- usr.sbin/bgpd/rde.c 29 Aug 2022 16:44:47 -  1.568
+++ usr.sbin/bgpd/rde.c 29 Aug 2022 16:46:09 -
@@ -197,7 +197,6 @@ rde_main(int debug, int verbose)
 
/* initialize the RIB structures */
pt_init();
-   aspath_init(pathhashsize);
attr_init(attrhashsize);
nexthop_init(nexthophashsize);
peer_init(peerhashsize);
@@ -630,9 +629,6 @@ badnetdel:
case IMSG_CTL_SHOW_RIB_MEM:
imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_MEM, 0,
imsg.hdr.pid, -1, , sizeof(rdemem));
-   aspath_hash_stats();
-   imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
-   imsg.hdr.pid, -1, , sizeof(rdehash));
attr_hash_stats();
imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
imsg.hdr.pid, -1, , sizeof(rdehash));
@@ -1631,7 +1627,9 @@ pathid_assign(struct rde_peer *peer, uin
struct prefix *p = NULL;
uint32_t path_id_tx;
 
-   /* Assign a send side path_id to all paths */
+   /*
+* Assign a send side path_id to all paths.
+*/
re = rib_get(rib_byid(RIB_ADJ_IN), prefix, prefixlen);
if (re != NULL)
p = prefix_bypeer(re, peer, path_id);
@@ -4308,7 +4306,6 @@ rde_shutdown(void)
rib_shutdown();
nexthop_shutdown();
path_shutdown();
-   aspath_shutdown();
attr_shutdown();
pt_shutdown();
peer_shutdown();
Index: usr.sbin/bgpd/rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.265
diff -u -p -r1.265 rde.h
--- usr.sbin/bgpd/rde.h 29 Aug 2022 16:44:47 -  1.265
+++ usr.sbin/bgpd/rde.h 29 Aug 2022 16:45:09 -
@@ -71,7 +71,6 @@ struct rib {
  * Currently I assume that we can do that with the neighbor_ip...

Re: [RFC] acpi: add acpitimer_delay(), acpihpet_delay()

2022-08-29 Thread Scott Cheloha
On Thu, Aug 25, 2022 at 03:57:48PM +1000, Jonathan Gray wrote:
> On Wed, Aug 24, 2022 at 11:05:30PM -0500, Scott Cheloha wrote:
> > On Wed, Aug 24, 2022 at 05:51:14PM +1000, Jonathan Gray wrote:
> > > On Tue, Aug 23, 2022 at 12:20:39PM -0500, Scott Cheloha wrote:
> > > > > Hyper-V generation 1 VMs are bios boot with emulation of the usual
> > > > > devices.  32-bit and 64-bit guests.
> > > > > 
> > > > > Hyper-V generation 2 VMs are 64-bit uefi with paravirtualised devices.
> > > > > 64-bit guests only.
> > > > > 
> > > > > There is no 8254 in generation 2.
> > > > > No HPET in either generation.
> > > > > 
> > > > > hv_delay uses the "Partition Reference Counter MSR" described in
> > > > > https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/tlfs/timers
> > > > > It seems it is available in both generations and could be used from 
> > > > > i386?
> > > > > 
> > > > > From reading that page hv_delay() should be preferred over 
> > > > > lapic_delay()
> > > > 
> > > > Alright, I have nudged hv_delay's quality up over lapic_delay's
> > > > quality.
> > > 
> > > Before these changes, tsc is probed before pvbus.  Do the tsc sanity
> > > checks result in it not being considered an option on hyper-v?  I think
> > > the tsc_delay and hv_delay numbers should be swapped in a later commit.
> > > It is unclear if that would change the final delay_func setting.
> > 
> > Why would we prefer hv_delay() to tsc_delay() if we had a
> > constant/invariant TSC available in our Hyper-V guest?
> > 
> > When patrick@ emailed me last year about issues with delay(9) on
> > Hyper-V, he started by saying that the root of the problem was that
> > the OpenBSD guest was not opting to use tsc_delay() because the host
> > wasn't reporting a constant/invariant TSC.  So the guest was trying to
> > use i8254_delay(), which was impossible because Hyper-V Gen2 guests
> > don't have an i8254.  Hence, hv_delay() was added to the tree.
> > 
> > So, my understanding is that the addition of hv_delay() does not mean
> > tsc_delay() is worse than hv_delay().  hv_delay() was added because
> > tsc_delay() isn't always an option and (to our surprise) neither is
> > i8254_delay().
> 
> I'm not clear on when rdtsc and rdmsr would cause a vm exit.
> Presumably the reference tsc page is provided to avoid that,
> but we don't use it.  rdtsc and rdmsr don't always cause an exit.
> 
> The wording of Microsoft's "Hypervisor Top Level Functional
> Specification" reads as the interface is only available when
> the underlying machine has a constant frequency tsc.  It also
> makes the point that the interface being in time not cycles avoids
> problems when the tsc frequency changes on live migration.
> 
> "12.3 Partition Reference Time Enlightenment
> 
> The partition reference time enlightenment presents a reference
> time source to a partition which does not require an intercept into
> the hypervisor. This enlightenment is available only when the
> underlying platform provides support of an invariant processor Time
> Stamp Counter (TSC), or iTSC. In such platforms, the processor TSC
> frequency remains constant irrespective of changes in the processor's
> clock frequency due to the use of power management states such as
> ACPI processor performance states, processor idle sleep states (ACPI
> C-states), etc.
> 
> The partition reference time enlightenment uses a virtual TSC value,
> an offset and a multiplier to enable a guest partition to compute
> the normalized reference time since partition creation, in 100nS
> units. The mechanism also allows a guest partition to atomically
> compute the reference time when the guest partition is migrated to
> a platform with a different TSC rate, and provides a fallback
> mechanism to support migration to platforms without the constant
> rate TSC feature.
> 
> This facility is not intended to be used a source of wall clock
> time, since the reference time computed using this facility will
> appear to stop during the time that a guest partition is saved until
> the subsequent restore."

If hv_delay() never causes a vm exit, but tsc_delay() *might* cause a
vm exit, and both have microsecond or better resolution, then
hv_delay() is the preferable delay(9) implementation where it is
available because vm exits have ambiguous overhead.

If that seems sensible to you, I'll commit this switch.

Index: arch/amd64/amd64/tsc.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
retrieving revision 1.26
diff -u -p -r1.26 tsc.c
--- arch/amd64/amd64/tsc.c  25 Aug 2022 17:38:16 -  1.26
+++ arch/amd64/amd64/tsc.c  29 Aug 2022 16:58:25 -
@@ -109,7 +109,7 @@ tsc_identify(struct cpu_info *ci)
 
tsc_frequency = tsc_freq_cpuid(ci);
if (tsc_frequency > 0)
-   delay_init(tsc_delay, 5000);
+   delay_init(tsc_delay, 4000);
 }
 
 static inline int
Index: dev/pv/pvbus.c

Re: all architectures: put clockframe definition in frame.h?

2022-08-29 Thread Mark Kettenis
> Date: Mon, 29 Aug 2022 11:33:19 -0500
> From: Scott Cheloha 
> 
> On Fri, Aug 19, 2022 at 01:24:47PM +0200, Mark Kettenis wrote:
> >
> > This is one of those annoying corners where there is too much
> > unecessary MD variation. Currently travelling without a laptop, so I
> > can't easily check the tree. But one note I wanted to make is that the
> > definition of struct clockframe and the CLKF_XXX macros should stay
> > together. 
> 
> Sure.  Here's a version that consolidates the CLKF macros into frame.h
> alongside the clockframe definitions.
> 
> Notes by arch:
> 
> alpha, amd64, hppa, i386, m88k, mips64, powerpc64, sh, sparc64:
> 
> - clockframe is defined in cpu.h with CLKF macros.
> 
> - Move clockframe definition and CLKF macros from cpu.h to frame.h.
> 
> arm, powerpc:
> 
> - clockframe is defined in frame.h.
> 
> - CLKF macros are defined in cpu.h.
> 
> - Move CLKF macros from cpu.h to frame.h.
> 
> arm64, riscv64:
> 
> - clockframe is defined in cpu.h with CLKF macros.
> 
> - clockframe is *also* defined in frame.h.
> 
> - Delete clockframe definition from frame.h
> 
> - Move (other) clockframe definition and CLKF macros from cpu.h to frame.h.
> 
> sparc64 remains the only one that looks not-quite-right because
> trapframe64 is defined in reg.h, not frame.h.

Yes, that is not going to work.

We really should be getting rid f the xxx32 stuff and rename the xxx64
ones to xxx.  And move trapframe (and possibly rwindow) to frame.h.

> Index: alpha/include/cpu.h
> ===
> RCS file: /cvs/src/sys/arch/alpha/include/cpu.h,v
> retrieving revision 1.66
> diff -u -p -r1.66 cpu.h
> --- alpha/include/cpu.h   10 Aug 2022 10:41:35 -  1.66
> +++ alpha/include/cpu.h   29 Aug 2022 16:28:56 -
> @@ -297,25 +297,6 @@ cpu_rnd_messybits(void)
>  }
>  
>  /*
> - * Arguments to hardclock and gatherstats encapsulate the previous
> - * machine state in an opaque clockframe.  On the Alpha, we use
> - * what we push on an interrupt (a trapframe).
> - */
> -struct clockframe {
> - struct trapframecf_tf;
> -};
> -#define  CLKF_USERMODE(framep)   
> \
> - (((framep)->cf_tf.tf_regs[FRAME_PS] & ALPHA_PSL_USERMODE) != 0)
> -#define  CLKF_PC(framep) ((framep)->cf_tf.tf_regs[FRAME_PC])
> -
> -/*
> - * This isn't perfect; if the clock interrupt comes in before the
> - * r/m/w cycle is complete, we won't be counted... but it's not
> - * like this statistic has to be extremely accurate.
> - */
> -#define  CLKF_INTR(framep)   (curcpu()->ci_intrdepth)
> -
> -/*
>   * This is used during profiling to integrate system time.
>   */
>  #define  PROC_PC(p)  ((p)->p_md.md_tf->tf_regs[FRAME_PC])
> Index: alpha/include/frame.h
> ===
> RCS file: /cvs/src/sys/arch/alpha/include/frame.h,v
> retrieving revision 1.4
> diff -u -p -r1.4 frame.h
> --- alpha/include/frame.h 23 Mar 2011 16:54:34 -  1.4
> +++ alpha/include/frame.h 29 Aug 2022 16:28:56 -
> @@ -92,4 +92,23 @@ struct trapframe {
>   unsigned long   tf_regs[FRAME_SIZE];/* See above */
>  };
>  
> +/*
> + * Arguments to hardclock and gatherstats encapsulate the previous
> + * machine state in an opaque clockframe.  On the Alpha, we use
> + * what we push on an interrupt (a trapframe).
> + */
> +struct clockframe {
> + struct trapframecf_tf;
> +};
> +#define  CLKF_USERMODE(framep)   
> \
> + (((framep)->cf_tf.tf_regs[FRAME_PS] & ALPHA_PSL_USERMODE) != 0)
> +#define  CLKF_PC(framep) ((framep)->cf_tf.tf_regs[FRAME_PC])
> +
> +/*
> + * This isn't perfect; if the clock interrupt comes in before the
> + * r/m/w cycle is complete, we won't be counted... but it's not
> + * like this statistic has to be extremely accurate.
> + */
> +#define  CLKF_INTR(framep)   (curcpu()->ci_intrdepth)
> +
>  #endif /* _MACHINE_FRAME_H_ */
> Index: amd64/include/cpu.h
> ===
> RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
> retrieving revision 1.149
> diff -u -p -r1.149 cpu.h
> --- amd64/include/cpu.h   25 Aug 2022 17:25:25 -  1.149
> +++ amd64/include/cpu.h   29 Aug 2022 16:28:56 -
> @@ -336,17 +336,6 @@ cpu_rnd_messybits(void)
>  #define curpcb   curcpu()->ci_curpcb
>  
>  /*
> - * Arguments to hardclock, softclock and statclock
> - * encapsulate the previous machine state in an opaque
> - * clockframe; for now, use generic intrframe.
> - */
> -#define clockframe intrframe
> -
> -#define  CLKF_USERMODE(frame)USERMODE((frame)->if_cs, 
> (frame)->if_rflags)
> -#define CLKF_PC(frame)   ((frame)->if_rip)
> -#define CLKF_INTR(frame) (curcpu()->ci_idepth > 1)
> -
> -/*
>   * Give a profiling tick to the current process when the user profiling
>   * buffer pages are 

Re: all architectures: put clockframe definition in frame.h?

2022-08-29 Thread Scott Cheloha
On Fri, Aug 19, 2022 at 01:24:47PM +0200, Mark Kettenis wrote:
>
> This is one of those annoying corners where there is too much
> unecessary MD variation. Currently travelling without a laptop, so I
> can't easily check the tree. But one note I wanted to make is that the
> definition of struct clockframe and the CLKF_XXX macros should stay
> together. 

Sure.  Here's a version that consolidates the CLKF macros into frame.h
alongside the clockframe definitions.

Notes by arch:

alpha, amd64, hppa, i386, m88k, mips64, powerpc64, sh, sparc64:

- clockframe is defined in cpu.h with CLKF macros.

- Move clockframe definition and CLKF macros from cpu.h to frame.h.

arm, powerpc:

- clockframe is defined in frame.h.

- CLKF macros are defined in cpu.h.

- Move CLKF macros from cpu.h to frame.h.

arm64, riscv64:

- clockframe is defined in cpu.h with CLKF macros.

- clockframe is *also* defined in frame.h.

- Delete clockframe definition from frame.h

- Move (other) clockframe definition and CLKF macros from cpu.h to frame.h.

sparc64 remains the only one that looks not-quite-right because
trapframe64 is defined in reg.h, not frame.h.

Index: alpha/include/cpu.h
===
RCS file: /cvs/src/sys/arch/alpha/include/cpu.h,v
retrieving revision 1.66
diff -u -p -r1.66 cpu.h
--- alpha/include/cpu.h 10 Aug 2022 10:41:35 -  1.66
+++ alpha/include/cpu.h 29 Aug 2022 16:28:56 -
@@ -297,25 +297,6 @@ cpu_rnd_messybits(void)
 }
 
 /*
- * Arguments to hardclock and gatherstats encapsulate the previous
- * machine state in an opaque clockframe.  On the Alpha, we use
- * what we push on an interrupt (a trapframe).
- */
-struct clockframe {
-   struct trapframecf_tf;
-};
-#defineCLKF_USERMODE(framep)   
\
-   (((framep)->cf_tf.tf_regs[FRAME_PS] & ALPHA_PSL_USERMODE) != 0)
-#defineCLKF_PC(framep) ((framep)->cf_tf.tf_regs[FRAME_PC])
-
-/*
- * This isn't perfect; if the clock interrupt comes in before the
- * r/m/w cycle is complete, we won't be counted... but it's not
- * like this statistic has to be extremely accurate.
- */
-#defineCLKF_INTR(framep)   (curcpu()->ci_intrdepth)
-
-/*
  * This is used during profiling to integrate system time.
  */
 #definePROC_PC(p)  ((p)->p_md.md_tf->tf_regs[FRAME_PC])
Index: alpha/include/frame.h
===
RCS file: /cvs/src/sys/arch/alpha/include/frame.h,v
retrieving revision 1.4
diff -u -p -r1.4 frame.h
--- alpha/include/frame.h   23 Mar 2011 16:54:34 -  1.4
+++ alpha/include/frame.h   29 Aug 2022 16:28:56 -
@@ -92,4 +92,23 @@ struct trapframe {
unsigned long   tf_regs[FRAME_SIZE];/* See above */
 };
 
+/*
+ * Arguments to hardclock and gatherstats encapsulate the previous
+ * machine state in an opaque clockframe.  On the Alpha, we use
+ * what we push on an interrupt (a trapframe).
+ */
+struct clockframe {
+   struct trapframecf_tf;
+};
+#defineCLKF_USERMODE(framep)   
\
+   (((framep)->cf_tf.tf_regs[FRAME_PS] & ALPHA_PSL_USERMODE) != 0)
+#defineCLKF_PC(framep) ((framep)->cf_tf.tf_regs[FRAME_PC])
+
+/*
+ * This isn't perfect; if the clock interrupt comes in before the
+ * r/m/w cycle is complete, we won't be counted... but it's not
+ * like this statistic has to be extremely accurate.
+ */
+#defineCLKF_INTR(framep)   (curcpu()->ci_intrdepth)
+
 #endif /* _MACHINE_FRAME_H_ */
Index: amd64/include/cpu.h
===
RCS file: /cvs/src/sys/arch/amd64/include/cpu.h,v
retrieving revision 1.149
diff -u -p -r1.149 cpu.h
--- amd64/include/cpu.h 25 Aug 2022 17:25:25 -  1.149
+++ amd64/include/cpu.h 29 Aug 2022 16:28:56 -
@@ -336,17 +336,6 @@ cpu_rnd_messybits(void)
 #define curpcb curcpu()->ci_curpcb
 
 /*
- * Arguments to hardclock, softclock and statclock
- * encapsulate the previous machine state in an opaque
- * clockframe; for now, use generic intrframe.
- */
-#define clockframe intrframe
-
-#defineCLKF_USERMODE(frame)USERMODE((frame)->if_cs, 
(frame)->if_rflags)
-#define CLKF_PC(frame) ((frame)->if_rip)
-#define CLKF_INTR(frame)   (curcpu()->ci_idepth > 1)
-
-/*
  * Give a profiling tick to the current process when the user profiling
  * buffer pages are invalid.  On the i386, request an ast to send us
  * through usertrap(), marking the proc as needing a profiling tick.
Index: amd64/include/frame.h
===
RCS file: /cvs/src/sys/arch/amd64/include/frame.h,v
retrieving revision 1.10
diff -u -p -r1.10 frame.h
--- amd64/include/frame.h   10 Jul 2018 08:57:44 -  1.10
+++ amd64/include/frame.h   29 Aug 2022 16:28:56 -
@@ -171,4 +171,14 @@ struct callframe {
long 

Re: unbound update

2022-08-29 Thread Stuart Henderson
On 2022/08/26 17:47, void wrote:
> On Wed, Aug 24, 2022 at 03:03:01PM +0100, Stuart Henderson wrote:
> > Anyone want to test this?
> > 
> > Any OKs?
> 
> Hello,
> 
> Seemed to patch OK and built OK with a -current made yesterday, on aarch64.
> 
> I'm a newbie at building/patching openbsd, so if there's anything you
> can suggest I test, I'll test. unbound is working.
> 
> unbound -V still reports Version 1.16.0 though.

Something went wrong with your patching/build if it shows 1.16.2, I confirmed
that it was updated.

for reference for building parts of base which have a Makefile.bsd-wrapper
file, normally use this:

make -f Makefile.bsd-wrapper obj
make -f Makefile.bsd-wrapper
doas make -f Makefile.bsd-wrapper install



Re: bgpd switch rde_aspath to RB tree

2022-08-29 Thread Claudio Jeker
On Mon, Aug 29, 2022 at 05:49:06PM +0200, Theo Buehler wrote:
> On Mon, Aug 29, 2022 at 05:28:50PM +0200, Claudio Jeker wrote:
> > This is the 2nd RB tree transformation instead of the hash table.
> > 
> > The rde.c part will probably conflict with the change for communities.
> 
> Apart from an unchecked RB_INSERT() this also looks fine.

I just fatal in this case because this is a serious bug. 

-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.566
diff -u -p -r1.566 rde.c
--- rde.c   29 Aug 2022 14:57:27 -  1.566
+++ rde.c   29 Aug 2022 15:11:57 -
@@ -197,7 +197,6 @@ rde_main(int debug, int verbose)
 
/* initialize the RIB structures */
pt_init();
-   path_init(pathhashsize);
aspath_init(pathhashsize);
communities_init(attrhashsize);
attr_init(attrhashsize);
@@ -632,9 +631,6 @@ badnetdel:
case IMSG_CTL_SHOW_RIB_MEM:
imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_MEM, 0,
imsg.hdr.pid, -1, , sizeof(rdemem));
-   path_hash_stats();
-   imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
-   imsg.hdr.pid, -1, , sizeof(rdehash));
aspath_hash_stats();
imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
imsg.hdr.pid, -1, , sizeof(rdehash));
Index: rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.263
diff -u -p -r1.263 rde.h
--- rde.h   26 Aug 2022 14:10:52 -  1.263
+++ rde.h   29 Aug 2022 15:11:16 -
@@ -73,7 +73,6 @@ struct rib {
 LIST_HEAD(rde_peer_head, rde_peer);
 LIST_HEAD(aspath_list, aspath);
 LIST_HEAD(attr_list, attr);
-LIST_HEAD(aspath_head, rde_aspath);
 RB_HEAD(prefix_tree, prefix);
 RB_HEAD(prefix_index, prefix);
 struct iq;
@@ -219,20 +218,18 @@ struct rde_community {
 #define DEFAULT_LPREF  100
 
 struct rde_aspath {
-   LIST_ENTRY(rde_aspath)   path_l;
+   RB_ENTRY(rde_aspath) entry;
struct attr **others;
struct aspath   *aspath;
uint64_t hash;
int  refcnt;
uint32_t flags; /* internally used */
-#defineaspath_hashstartmed
uint32_t med;   /* multi exit disc */
uint32_t lpref; /* local pref */
uint32_t weight;/* low prio lpref */
uint16_t rtlabelid; /* route label id */
uint16_t pftableid; /* pf table id */
uint8_t  origin;
-#defineaspath_hashend  others_len
uint8_t  others_len;
 };
 
@@ -594,10 +591,7 @@ re_rib(struct rib_entry *re)
return rib_byid(re->rib_id);
 }
 
-voidpath_init(uint32_t);
 voidpath_shutdown(void);
-voidpath_hash_stats(struct rde_hashstats *);
-int path_compare(struct rde_aspath *, struct rde_aspath *);
 uint32_tpath_remove_stale(struct rde_aspath *, uint8_t, time_t);
 struct rde_aspath *path_copy(struct rde_aspath *, const struct rde_aspath *);
 struct rde_aspath *path_prep(struct rde_aspath *);
Index: rde_rib.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.244
diff -u -p -r1.244 rde_rib.c
--- rde_rib.c   25 Aug 2022 08:10:25 -  1.244
+++ rde_rib.c   29 Aug 2022 15:56:59 -
@@ -547,99 +547,10 @@ rib_dump_new(uint16_t id, uint8_t aid, u
 /* path specific functions */
 
 static struct rde_aspath *path_lookup(struct rde_aspath *);
-static uint64_t path_hash(struct rde_aspath *);
 static void path_link(struct rde_aspath *);
 static void path_unlink(struct rde_aspath *);
 
-struct path_table {
-   struct aspath_head  *path_hashtbl;
-   uint64_t path_hashmask;
-} pathtable;
-
-SIPHASH_KEY pathtablekey;
-
-#definePATH_HASH(x)_hashtbl[x & 
pathtable.path_hashmask]
-
-static inline struct rde_aspath *
-path_ref(struct rde_aspath *asp)
-{
-   if ((asp->flags & F_ATTR_LINKED) == 0)
-   fatalx("%s: unlinked object", __func__);
-   asp->refcnt++;
-   rdemem.path_refs++;
-
-   return asp;
-}
-
-static inline void
-path_unref(struct rde_aspath *asp)
-{
-   if (asp == NULL)
-   return;
-   if ((asp->flags & F_ATTR_LINKED) == 0)
-   fatalx("%s: unlinked object", __func__);
-   asp->refcnt--;
-   rdemem.path_refs--;
-   if (asp->refcnt <= 0)
-   

Re: bgpd switch communities to RB tree

2022-08-29 Thread Claudio Jeker
On Mon, Aug 29, 2022 at 05:45:58PM +0200, Claudio Jeker wrote:
> On Mon, Aug 29, 2022 at 05:36:49PM +0200, Theo Buehler wrote:
> > On Mon, Aug 29, 2022 at 05:05:59PM +0200, Claudio Jeker wrote:
> > > The hash table for communities is not great. Instead of implementing
> > > dynamic hash resize use a RB tree. Also drop the hash calculation and
> > > just use memcmp() for now. My non scientific test seems to indicate that
> > > the overhead of SipHash is about the same as the memcmp().
> > 
> > Generally looks good. Two comments.
> > 
> > > +static inline int
> > > +communities_compare(struct rde_community *a, struct rde_community *b)
> > >  {
> > > - uint32_ths, i;
> > > + if (a->nentries != b->nentries)
> > > + return a->nentries - b->nentries;
> > 
> > I dislike comparison by subtraction, but I guess it's fine since nentries
> > should never be negative. Should this check and fatal?
> 
> I think I copied this from existing code but I can change it to the double
> if we use in other places.
>  
> > > + if (a->flags != b->flags)
> > > + return a->flags - b->flags;
> > 
> > [...]
> 
> Here as well
>  
> > >  struct rde_community *
> > >  communities_link(struct rde_community *comm)
> > >  {
> > >   struct rde_community *n;
> > > - struct commhead *head;
> > >  
> > >   if ((n = malloc(sizeof(*n))) == NULL)
> > >   fatal(__func__);
> > > -
> > >   communities_copy(n, comm);
> > >  
> > > - head = communities_hash(n);
> > > - LIST_INSERT_HEAD(head, n, entry);
> > > + RB_INSERT(comm_tree, , n);
> > 
> > I would prefer if this insertion was checked.
> 
> Since we did not check in the original code either I decided to YOLO it
> but maybe an extra check makes sense.
>  

Better like this?

-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.566
diff -u -p -r1.566 rde.c
--- rde.c   29 Aug 2022 14:57:27 -  1.566
+++ rde.c   29 Aug 2022 14:58:38 -
@@ -199,7 +199,6 @@ rde_main(int debug, int verbose)
pt_init();
path_init(pathhashsize);
aspath_init(pathhashsize);
-   communities_init(attrhashsize);
attr_init(attrhashsize);
nexthop_init(nexthophashsize);
peer_init(peerhashsize);
@@ -636,9 +635,6 @@ badnetdel:
imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
imsg.hdr.pid, -1, , sizeof(rdehash));
aspath_hash_stats();
-   imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
-   imsg.hdr.pid, -1, , sizeof(rdehash));
-   communities_hash_stats();
imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
imsg.hdr.pid, -1, , sizeof(rdehash));
attr_hash_stats();
Index: rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.263
diff -u -p -r1.263 rde.h
--- rde.h   26 Aug 2022 14:10:52 -  1.263
+++ rde.h   29 Aug 2022 14:58:38 -
@@ -183,9 +183,9 @@ struct mpattr {
 };
 
 struct rde_community {
-   LIST_ENTRY(rde_community)   entry;
-   size_t  size;
-   size_t  nentries;
+   RB_ENTRY(rde_community) entry;
+   int size;
+   int nentries;
int flags;
int refcnt;
struct community*communities;
@@ -486,9 +486,7 @@ int community_large_write(struct rde_com
 intcommunity_ext_write(struct rde_community *, int, void *, uint16_t);
 intcommunity_writebuf(struct ibuf *, struct rde_community *);
 
-voidcommunities_init(uint32_t);
 voidcommunities_shutdown(void);
-voidcommunities_hash_stats(struct rde_hashstats *);
 struct rde_community   *communities_lookup(struct rde_community *);
 struct rde_community   *communities_link(struct rde_community *);
 voidcommunities_unlink(struct rde_community *);
Index: rde_community.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_community.c,v
retrieving revision 1.7
diff -u -p -r1.7 rde_community.c
--- rde_community.c 28 Jul 2022 13:11:51 -  1.7
+++ rde_community.c 29 Aug 2022 15:49:41 -
@@ -209,12 +209,12 @@ mask_match(struct community *a, struct c
 static void
 insert_community(struct rde_community *comm, struct community *c)
 {
-   size_t l;
+   int l;
int r;
 
if (comm->nentries + 1 > comm->size) {
struct community *new;
-   size_t newsize = comm->size + 8;
+   int newsize = comm->size + 8;
 
if ((new = 

Re: bgpd switch rde_aspath to RB tree

2022-08-29 Thread Theo Buehler
On Mon, Aug 29, 2022 at 05:28:50PM +0200, Claudio Jeker wrote:
> This is the 2nd RB tree transformation instead of the hash table.
> 
> The rde.c part will probably conflict with the change for communities.

Apart from an unchecked RB_INSERT() this also looks fine.



Re: bgpd switch communities to RB tree

2022-08-29 Thread Claudio Jeker
On Mon, Aug 29, 2022 at 05:36:49PM +0200, Theo Buehler wrote:
> On Mon, Aug 29, 2022 at 05:05:59PM +0200, Claudio Jeker wrote:
> > The hash table for communities is not great. Instead of implementing
> > dynamic hash resize use a RB tree. Also drop the hash calculation and
> > just use memcmp() for now. My non scientific test seems to indicate that
> > the overhead of SipHash is about the same as the memcmp().
> 
> Generally looks good. Two comments.
> 
> > +static inline int
> > +communities_compare(struct rde_community *a, struct rde_community *b)
> >  {
> > -   uint32_ths, i;
> > +   if (a->nentries != b->nentries)
> > +   return a->nentries - b->nentries;
> 
> I dislike comparison by subtraction, but I guess it's fine since nentries
> should never be negative. Should this check and fatal?

I think I copied this from existing code but I can change it to the double
if we use in other places.
 
> > +   if (a->flags != b->flags)
> > +   return a->flags - b->flags;
> 
> [...]

Here as well
 
> >  struct rde_community *
> >  communities_link(struct rde_community *comm)
> >  {
> > struct rde_community *n;
> > -   struct commhead *head;
> >  
> > if ((n = malloc(sizeof(*n))) == NULL)
> > fatal(__func__);
> > -
> > communities_copy(n, comm);
> >  
> > -   head = communities_hash(n);
> > -   LIST_INSERT_HEAD(head, n, entry);
> > +   RB_INSERT(comm_tree, , n);
> 
> I would prefer if this insertion was checked.

Since we did not check in the original code either I decided to YOLO it
but maybe an extra check makes sense.
 
> > n->refcnt = 1;  /* initial reference by the cache */
> >  
> > rdemem.comm_size += n->size;
> > @@ -886,7 +817,7 @@ communities_unlink(struct rde_community 
> > if (comm->refcnt != 1)
> > fatalx("%s: unlinking still referenced communities", __func__);
> >  
> > -   LIST_REMOVE(comm, entry);
> > +   RB_REMOVE(comm_tree, , comm);
> >  
> > rdemem.comm_size -= comm->size;
> > rdemem.comm_nmemb -= comm->nentries;
> > 
> 

-- 
:wq Claudio



Re: bgpd switch communities to RB tree

2022-08-29 Thread Theo Buehler
On Mon, Aug 29, 2022 at 05:05:59PM +0200, Claudio Jeker wrote:
> The hash table for communities is not great. Instead of implementing
> dynamic hash resize use a RB tree. Also drop the hash calculation and
> just use memcmp() for now. My non scientific test seems to indicate that
> the overhead of SipHash is about the same as the memcmp().

Generally looks good. Two comments.

> +static inline int
> +communities_compare(struct rde_community *a, struct rde_community *b)
>  {
> - uint32_ths, i;
> + if (a->nentries != b->nentries)
> + return a->nentries - b->nentries;

I dislike comparison by subtraction, but I guess it's fine since nentries
should never be negative. Should this check and fatal?

> + if (a->flags != b->flags)
> + return a->flags - b->flags;

[...]

>  struct rde_community *
>  communities_link(struct rde_community *comm)
>  {
>   struct rde_community *n;
> - struct commhead *head;
>  
>   if ((n = malloc(sizeof(*n))) == NULL)
>   fatal(__func__);
> -
>   communities_copy(n, comm);
>  
> - head = communities_hash(n);
> - LIST_INSERT_HEAD(head, n, entry);
> + RB_INSERT(comm_tree, , n);

I would prefer if this insertion was checked.

>   n->refcnt = 1;  /* initial reference by the cache */
>  
>   rdemem.comm_size += n->size;
> @@ -886,7 +817,7 @@ communities_unlink(struct rde_community 
>   if (comm->refcnt != 1)
>   fatalx("%s: unlinking still referenced communities", __func__);
>  
> - LIST_REMOVE(comm, entry);
> + RB_REMOVE(comm_tree, , comm);
>  
>   rdemem.comm_size -= comm->size;
>   rdemem.comm_nmemb -= comm->nentries;
> 



bgpd switch rde_aspath to RB tree

2022-08-29 Thread Claudio Jeker
This is the 2nd RB tree transformation instead of the hash table.

The rde.c part will probably conflict with the change for communities.
-- 
:wq Claudio

? obj
Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.566
diff -u -p -r1.566 rde.c
--- rde.c   29 Aug 2022 14:57:27 -  1.566
+++ rde.c   29 Aug 2022 15:20:43 -
@@ -197,7 +197,6 @@ rde_main(int debug, int verbose)
 
/* initialize the RIB structures */
pt_init();
-   path_init(pathhashsize);
aspath_init(pathhashsize);
communities_init(attrhashsize);
attr_init(attrhashsize);
@@ -632,9 +631,6 @@ badnetdel:
case IMSG_CTL_SHOW_RIB_MEM:
imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_MEM, 0,
imsg.hdr.pid, -1, , sizeof(rdemem));
-   path_hash_stats();
-   imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
-   imsg.hdr.pid, -1, , sizeof(rdehash));
aspath_hash_stats();
imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
imsg.hdr.pid, -1, , sizeof(rdehash));
Index: rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.263
diff -u -p -r1.263 rde.h
--- rde.h   26 Aug 2022 14:10:52 -  1.263
+++ rde.h   29 Aug 2022 15:20:43 -
@@ -73,7 +73,6 @@ struct rib {
 LIST_HEAD(rde_peer_head, rde_peer);
 LIST_HEAD(aspath_list, aspath);
 LIST_HEAD(attr_list, attr);
-LIST_HEAD(aspath_head, rde_aspath);
 RB_HEAD(prefix_tree, prefix);
 RB_HEAD(prefix_index, prefix);
 struct iq;
@@ -219,20 +218,18 @@ struct rde_community {
 #define DEFAULT_LPREF  100
 
 struct rde_aspath {
-   LIST_ENTRY(rde_aspath)   path_l;
+   RB_ENTRY(rde_aspath) entry;
struct attr **others;
struct aspath   *aspath;
uint64_t hash;
int  refcnt;
uint32_t flags; /* internally used */
-#defineaspath_hashstartmed
uint32_t med;   /* multi exit disc */
uint32_t lpref; /* local pref */
uint32_t weight;/* low prio lpref */
uint16_t rtlabelid; /* route label id */
uint16_t pftableid; /* pf table id */
uint8_t  origin;
-#defineaspath_hashend  others_len
uint8_t  others_len;
 };
 
@@ -594,10 +591,7 @@ re_rib(struct rib_entry *re)
return rib_byid(re->rib_id);
 }
 
-voidpath_init(uint32_t);
 voidpath_shutdown(void);
-voidpath_hash_stats(struct rde_hashstats *);
-int path_compare(struct rde_aspath *, struct rde_aspath *);
 uint32_tpath_remove_stale(struct rde_aspath *, uint8_t, time_t);
 struct rde_aspath *path_copy(struct rde_aspath *, const struct rde_aspath *);
 struct rde_aspath *path_prep(struct rde_aspath *);
Index: rde_rib.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.244
diff -u -p -r1.244 rde_rib.c
--- rde_rib.c   25 Aug 2022 08:10:25 -  1.244
+++ rde_rib.c   29 Aug 2022 15:20:43 -
@@ -547,99 +547,10 @@ rib_dump_new(uint16_t id, uint8_t aid, u
 /* path specific functions */
 
 static struct rde_aspath *path_lookup(struct rde_aspath *);
-static uint64_t path_hash(struct rde_aspath *);
 static void path_link(struct rde_aspath *);
 static void path_unlink(struct rde_aspath *);
 
-struct path_table {
-   struct aspath_head  *path_hashtbl;
-   uint64_t path_hashmask;
-} pathtable;
-
-SIPHASH_KEY pathtablekey;
-
-#definePATH_HASH(x)_hashtbl[x & 
pathtable.path_hashmask]
-
-static inline struct rde_aspath *
-path_ref(struct rde_aspath *asp)
-{
-   if ((asp->flags & F_ATTR_LINKED) == 0)
-   fatalx("%s: unlinked object", __func__);
-   asp->refcnt++;
-   rdemem.path_refs++;
-
-   return asp;
-}
-
-static inline void
-path_unref(struct rde_aspath *asp)
-{
-   if (asp == NULL)
-   return;
-   if ((asp->flags & F_ATTR_LINKED) == 0)
-   fatalx("%s: unlinked object", __func__);
-   asp->refcnt--;
-   rdemem.path_refs--;
-   if (asp->refcnt <= 0)
-   path_unlink(asp);
-}
-
-void
-path_init(uint32_t hashsize)
-{
-   uint32_ths, i;
-
-   for (hs = 1; hs < hashsize; hs <<= 1)
-   ;
-   pathtable.path_hashtbl = calloc(hs, sizeof(*pathtable.path_hashtbl));
-   if 

bgpd switch communities to RB tree

2022-08-29 Thread Claudio Jeker
The hash table for communities is not great. Instead of implementing
dynamic hash resize use a RB tree. Also drop the hash calculation and
just use memcmp() for now. My non scientific test seems to indicate that
the overhead of SipHash is about the same as the memcmp().

-- 
:wq Claudio

Index: bgpd/rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.566
diff -u -p -r1.566 rde.c
--- bgpd/rde.c  29 Aug 2022 14:57:27 -  1.566
+++ bgpd/rde.c  29 Aug 2022 14:58:52 -
@@ -199,7 +199,6 @@ rde_main(int debug, int verbose)
pt_init();
path_init(pathhashsize);
aspath_init(pathhashsize);
-   communities_init(attrhashsize);
attr_init(attrhashsize);
nexthop_init(nexthophashsize);
peer_init(peerhashsize);
@@ -636,9 +635,6 @@ badnetdel:
imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
imsg.hdr.pid, -1, , sizeof(rdehash));
aspath_hash_stats();
-   imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
-   imsg.hdr.pid, -1, , sizeof(rdehash));
-   communities_hash_stats();
imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_RIB_HASH, 0,
imsg.hdr.pid, -1, , sizeof(rdehash));
attr_hash_stats();
Index: bgpd/rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.263
diff -u -p -r1.263 rde.h
--- bgpd/rde.h  26 Aug 2022 14:10:52 -  1.263
+++ bgpd/rde.h  29 Aug 2022 14:58:52 -
@@ -183,9 +183,9 @@ struct mpattr {
 };
 
 struct rde_community {
-   LIST_ENTRY(rde_community)   entry;
-   size_t  size;
-   size_t  nentries;
+   RB_ENTRY(rde_community) entry;
+   int size;
+   int nentries;
int flags;
int refcnt;
struct community*communities;
@@ -486,9 +486,7 @@ int community_large_write(struct rde_com
 intcommunity_ext_write(struct rde_community *, int, void *, uint16_t);
 intcommunity_writebuf(struct ibuf *, struct rde_community *);
 
-voidcommunities_init(uint32_t);
 voidcommunities_shutdown(void);
-voidcommunities_hash_stats(struct rde_hashstats *);
 struct rde_community   *communities_lookup(struct rde_community *);
 struct rde_community   *communities_link(struct rde_community *);
 voidcommunities_unlink(struct rde_community *);
Index: bgpd/rde_community.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_community.c,v
retrieving revision 1.7
diff -u -p -r1.7 rde_community.c
--- bgpd/rde_community.c28 Jul 2022 13:11:51 -  1.7
+++ bgpd/rde_community.c29 Aug 2022 14:58:52 -
@@ -209,12 +209,12 @@ mask_match(struct community *a, struct c
 static void
 insert_community(struct rde_community *comm, struct community *c)
 {
-   size_t l;
+   int l;
int r;
 
if (comm->nentries + 1 > comm->size) {
struct community *new;
-   size_t newsize = comm->size + 8;
+   int newsize = comm->size + 8;
 
if ((new = reallocarray(comm->communities, newsize,
sizeof(struct community))) == NULL)
@@ -261,7 +261,7 @@ community_match(struct rde_community *co
 struct rde_peer *peer)
 {
struct community test, mask;
-   size_t l;
+   int l;
 
if (fc->flags >> 8 == 0) {
/* fast path */
@@ -288,7 +288,7 @@ struct rde_peer *peer)
 int
 community_count(struct rde_community *comm, uint8_t type)
 {
-   size_t l;
+   int l;
int count = 0;
 
/* use the fact that the array is ordered by type */
@@ -351,7 +351,7 @@ struct rde_peer *peer)
 {
struct community test, mask;
struct community *match;
-   size_t l = 0;
+   int l = 0;
 
if (fc->flags >> 8 == 0) {
/* fast path */
@@ -501,8 +501,8 @@ community_write(struct rde_community *co
 {
uint8_t *b = buf;
uint16_t c;
-   size_t l, n = 0;
-   int r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
+   size_t n = 0;
+   int l, r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
 
if (comm->flags & PARTIAL_COMMUNITIES)
flags |= ATTR_PARTIAL;
@@ -545,8 +545,8 @@ community_large_write(struct rde_communi
 {
uint8_t *b = buf;
uint32_t c;
-   size_t l, n = 0;
-   int r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
+   size_t n = 0;
+   int l, r, flags = ATTR_OPTIONAL | ATTR_TRANSITIVE;
 
if (comm->flags & 

Re: bgpd/bgpctl report number of pending updates/withdraws

2022-08-29 Thread Claudio Jeker
On Mon, Aug 29, 2022 at 04:47:51PM +0200, Theo Buehler wrote:
> On Mon, Aug 29, 2022 at 04:41:56PM +0200, Claudio Jeker wrote:
> > The RDE has a queue of pending updates and withdraws. Those are already
> > counted but not shown. On big setups it may be helpful to know about the
> > queue progress.
> 
> I'm not sure "withdraws" is correct English. Shouldn't that be
> "withdrawals"?

Correct English would be "withdrawals". But all of the tree should be
fixed if want to.
 
> Apart from that, ok
> 
> > 
> > -- 
> > :wq Claudio
> > 
> > Index: bgpctl/output.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
> > retrieving revision 1.26
> > diff -u -p -r1.26 output.c
> > --- bgpctl/output.c 10 Aug 2022 10:21:47 -  1.26
> > +++ bgpctl/output.c 29 Aug 2022 14:38:09 -
> > @@ -221,13 +221,16 @@ show_neighbor_msgstats(struct peer *p)
> > p->stats.msg_rcvd_update + p->stats.msg_rcvd_keepalive +
> > p->stats.msg_rcvd_rrefresh);
> > printf("  Update statistics:\n");
> > -   printf("  %-15s %-10s %-10s\n", "", "Sent", "Received");
> > +   printf("  %-15s %-10s %-10s %-10s\n", "", "Sent", "Received",
> > +   "Pending");
> > printf("  %-15s %10u %10u\n", "Prefixes",
> > p->stats.prefix_out_cnt, p->stats.prefix_cnt);
> > -   printf("  %-15s %10llu %10llu\n", "Updates",
> > -   p->stats.prefix_sent_update, p->stats.prefix_rcvd_update);
> > -   printf("  %-15s %10llu %10llu\n", "Withdraws",
> > -   p->stats.prefix_sent_withdraw, p->stats.prefix_rcvd_withdraw);
> > +   printf("  %-15s %10llu %10llu %10u\n", "Updates",
> > +   p->stats.prefix_sent_update, p->stats.prefix_rcvd_update,
> > +   p->stats.pending_update);
> > +   printf("  %-15s %10llu %10llu %10u\n", "Withdraws",
> > +   p->stats.prefix_sent_withdraw, p->stats.prefix_rcvd_withdraw,
> > +   p->stats.pending_withdraw);
> > printf("  %-15s %10llu %10llu\n", "End-of-Rib",
> > p->stats.prefix_sent_eor, p->stats.prefix_rcvd_eor);
> > printf("  Route Refresh statistics:\n");
> > Index: bgpctl/output_json.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v
> > retrieving revision 1.20
> > diff -u -p -r1.20 output_json.c
> > --- bgpctl/output_json.c28 Jul 2022 10:40:25 -  1.20
> > +++ bgpctl/output_json.c29 Aug 2022 14:38:09 -
> > @@ -190,6 +190,11 @@ json_neighbor_stats(struct peer *p)
> > json_do_uint("eor", p->stats.prefix_rcvd_eor);
> > json_do_end();
> >  
> > +   json_do_object("pending");
> > +   json_do_uint("updates", p->stats.pending_update);
> > +   json_do_uint("withdraws", p->stats.pending_withdraw);
> > +   json_do_end();
> > +
> > json_do_end();
> >  
> > json_do_object("route-refresh");
> > Index: bgpd/rde.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> > retrieving revision 1.565
> > diff -u -p -r1.565 rde.c
> > --- bgpd/rde.c  26 Aug 2022 14:10:52 -  1.565
> > +++ bgpd/rde.c  29 Aug 2022 14:39:17 -
> > @@ -623,6 +623,8 @@ badnetdel:
> > peer->prefix_sent_withdraw;
> > p.stats.prefix_sent_eor =
> > peer->prefix_sent_eor;
> > +   p.stats.pending_update = peer->up_nlricnt;
> > +   p.stats.pending_withdraw = peer->up_wcnt;
> > }
> > imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0,
> > imsg.hdr.pid, -1, , sizeof(struct peer));
> > Index: bgpd/session.h
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
> > retrieving revision 1.157
> > diff -u -p -r1.157 session.h
> > --- bgpd/session.h  28 Jul 2022 13:11:51 -  1.157
> > +++ bgpd/session.h  29 Aug 2022 14:38:43 -
> > @@ -179,6 +179,8 @@ struct peer_stats {
> > time_t   last_write;
> > uint32_t prefix_cnt;
> > uint32_t prefix_out_cnt;
> > +   uint32_t pending_update;
> > +   uint32_t pending_withdraw;
> > uint8_t  last_sent_errcode;
> > uint8_t  last_sent_suberr;
> > uint8_t  last_rcvd_errcode;
> > 
> 

-- 
:wq Claudio



Re: bgpd/bgpctl report number of pending updates/withdraws

2022-08-29 Thread Theo Buehler
On Mon, Aug 29, 2022 at 04:41:56PM +0200, Claudio Jeker wrote:
> The RDE has a queue of pending updates and withdraws. Those are already
> counted but not shown. On big setups it may be helpful to know about the
> queue progress.

I'm not sure "withdraws" is correct English. Shouldn't that be
"withdrawals"?

Apart from that, ok

> 
> -- 
> :wq Claudio
> 
> Index: bgpctl/output.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
> retrieving revision 1.26
> diff -u -p -r1.26 output.c
> --- bgpctl/output.c   10 Aug 2022 10:21:47 -  1.26
> +++ bgpctl/output.c   29 Aug 2022 14:38:09 -
> @@ -221,13 +221,16 @@ show_neighbor_msgstats(struct peer *p)
>   p->stats.msg_rcvd_update + p->stats.msg_rcvd_keepalive +
>   p->stats.msg_rcvd_rrefresh);
>   printf("  Update statistics:\n");
> - printf("  %-15s %-10s %-10s\n", "", "Sent", "Received");
> + printf("  %-15s %-10s %-10s %-10s\n", "", "Sent", "Received",
> + "Pending");
>   printf("  %-15s %10u %10u\n", "Prefixes",
>   p->stats.prefix_out_cnt, p->stats.prefix_cnt);
> - printf("  %-15s %10llu %10llu\n", "Updates",
> - p->stats.prefix_sent_update, p->stats.prefix_rcvd_update);
> - printf("  %-15s %10llu %10llu\n", "Withdraws",
> - p->stats.prefix_sent_withdraw, p->stats.prefix_rcvd_withdraw);
> + printf("  %-15s %10llu %10llu %10u\n", "Updates",
> + p->stats.prefix_sent_update, p->stats.prefix_rcvd_update,
> + p->stats.pending_update);
> + printf("  %-15s %10llu %10llu %10u\n", "Withdraws",
> + p->stats.prefix_sent_withdraw, p->stats.prefix_rcvd_withdraw,
> + p->stats.pending_withdraw);
>   printf("  %-15s %10llu %10llu\n", "End-of-Rib",
>   p->stats.prefix_sent_eor, p->stats.prefix_rcvd_eor);
>   printf("  Route Refresh statistics:\n");
> Index: bgpctl/output_json.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v
> retrieving revision 1.20
> diff -u -p -r1.20 output_json.c
> --- bgpctl/output_json.c  28 Jul 2022 10:40:25 -  1.20
> +++ bgpctl/output_json.c  29 Aug 2022 14:38:09 -
> @@ -190,6 +190,11 @@ json_neighbor_stats(struct peer *p)
>   json_do_uint("eor", p->stats.prefix_rcvd_eor);
>   json_do_end();
>  
> + json_do_object("pending");
> + json_do_uint("updates", p->stats.pending_update);
> + json_do_uint("withdraws", p->stats.pending_withdraw);
> + json_do_end();
> +
>   json_do_end();
>  
>   json_do_object("route-refresh");
> Index: bgpd/rde.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
> retrieving revision 1.565
> diff -u -p -r1.565 rde.c
> --- bgpd/rde.c26 Aug 2022 14:10:52 -  1.565
> +++ bgpd/rde.c29 Aug 2022 14:39:17 -
> @@ -623,6 +623,8 @@ badnetdel:
>   peer->prefix_sent_withdraw;
>   p.stats.prefix_sent_eor =
>   peer->prefix_sent_eor;
> + p.stats.pending_update = peer->up_nlricnt;
> + p.stats.pending_withdraw = peer->up_wcnt;
>   }
>   imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0,
>   imsg.hdr.pid, -1, , sizeof(struct peer));
> Index: bgpd/session.h
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
> retrieving revision 1.157
> diff -u -p -r1.157 session.h
> --- bgpd/session.h28 Jul 2022 13:11:51 -  1.157
> +++ bgpd/session.h29 Aug 2022 14:38:43 -
> @@ -179,6 +179,8 @@ struct peer_stats {
>   time_t   last_write;
>   uint32_t prefix_cnt;
>   uint32_t prefix_out_cnt;
> + uint32_t pending_update;
> + uint32_t pending_withdraw;
>   uint8_t  last_sent_errcode;
>   uint8_t  last_sent_suberr;
>   uint8_t  last_rcvd_errcode;
> 



bgpd/bgpctl report number of pending updates/withdraws

2022-08-29 Thread Claudio Jeker
The RDE has a queue of pending updates and withdraws. Those are already
counted but not shown. On big setups it may be helpful to know about the
queue progress.

-- 
:wq Claudio

Index: bgpctl/output.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
retrieving revision 1.26
diff -u -p -r1.26 output.c
--- bgpctl/output.c 10 Aug 2022 10:21:47 -  1.26
+++ bgpctl/output.c 29 Aug 2022 14:38:09 -
@@ -221,13 +221,16 @@ show_neighbor_msgstats(struct peer *p)
p->stats.msg_rcvd_update + p->stats.msg_rcvd_keepalive +
p->stats.msg_rcvd_rrefresh);
printf("  Update statistics:\n");
-   printf("  %-15s %-10s %-10s\n", "", "Sent", "Received");
+   printf("  %-15s %-10s %-10s %-10s\n", "", "Sent", "Received",
+   "Pending");
printf("  %-15s %10u %10u\n", "Prefixes",
p->stats.prefix_out_cnt, p->stats.prefix_cnt);
-   printf("  %-15s %10llu %10llu\n", "Updates",
-   p->stats.prefix_sent_update, p->stats.prefix_rcvd_update);
-   printf("  %-15s %10llu %10llu\n", "Withdraws",
-   p->stats.prefix_sent_withdraw, p->stats.prefix_rcvd_withdraw);
+   printf("  %-15s %10llu %10llu %10u\n", "Updates",
+   p->stats.prefix_sent_update, p->stats.prefix_rcvd_update,
+   p->stats.pending_update);
+   printf("  %-15s %10llu %10llu %10u\n", "Withdraws",
+   p->stats.prefix_sent_withdraw, p->stats.prefix_rcvd_withdraw,
+   p->stats.pending_withdraw);
printf("  %-15s %10llu %10llu\n", "End-of-Rib",
p->stats.prefix_sent_eor, p->stats.prefix_rcvd_eor);
printf("  Route Refresh statistics:\n");
Index: bgpctl/output_json.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v
retrieving revision 1.20
diff -u -p -r1.20 output_json.c
--- bgpctl/output_json.c28 Jul 2022 10:40:25 -  1.20
+++ bgpctl/output_json.c29 Aug 2022 14:38:09 -
@@ -190,6 +190,11 @@ json_neighbor_stats(struct peer *p)
json_do_uint("eor", p->stats.prefix_rcvd_eor);
json_do_end();
 
+   json_do_object("pending");
+   json_do_uint("updates", p->stats.pending_update);
+   json_do_uint("withdraws", p->stats.pending_withdraw);
+   json_do_end();
+
json_do_end();
 
json_do_object("route-refresh");
Index: bgpd/rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.565
diff -u -p -r1.565 rde.c
--- bgpd/rde.c  26 Aug 2022 14:10:52 -  1.565
+++ bgpd/rde.c  29 Aug 2022 14:39:17 -
@@ -623,6 +623,8 @@ badnetdel:
peer->prefix_sent_withdraw;
p.stats.prefix_sent_eor =
peer->prefix_sent_eor;
+   p.stats.pending_update = peer->up_nlricnt;
+   p.stats.pending_withdraw = peer->up_wcnt;
}
imsg_compose(ibuf_se_ctl, IMSG_CTL_SHOW_NEIGHBOR, 0,
imsg.hdr.pid, -1, , sizeof(struct peer));
Index: bgpd/session.h
===
RCS file: /cvs/src/usr.sbin/bgpd/session.h,v
retrieving revision 1.157
diff -u -p -r1.157 session.h
--- bgpd/session.h  28 Jul 2022 13:11:51 -  1.157
+++ bgpd/session.h  29 Aug 2022 14:38:43 -
@@ -179,6 +179,8 @@ struct peer_stats {
time_t   last_write;
uint32_t prefix_cnt;
uint32_t prefix_out_cnt;
+   uint32_t pending_update;
+   uint32_t pending_withdraw;
uint8_t  last_sent_errcode;
uint8_t  last_sent_suberr;
uint8_t  last_rcvd_errcode;



Re: uvmpd_dropswap()

2022-08-29 Thread Mike Larkin
On Mon, Aug 29, 2022 at 01:58:38PM +0200, Martin Pieuchot wrote:
> Small refactoring to introduce uvmpd_dropswap().  This will make an
> upcoming rewrite of the pdaemon smaller & easier to review :o)
>
> ok?
>

reads ok to me. ok mlarkin

> Index: uvm/uvm_pdaemon.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
> retrieving revision 1.102
> diff -u -p -r1.102 uvm_pdaemon.c
> --- uvm/uvm_pdaemon.c 22 Aug 2022 12:03:32 -  1.102
> +++ uvm/uvm_pdaemon.c 29 Aug 2022 11:55:52 -
> @@ -105,6 +105,7 @@ void  uvmpd_scan(struct uvm_pmalloc *);
>  void uvmpd_scan_inactive(struct uvm_pmalloc *, struct pglist *);
>  void uvmpd_tune(void);
>  void uvmpd_drop(struct pglist *);
> +void uvmpd_dropswap(struct vm_page *);
>
>  /*
>   * uvm_wait: wait (sleep) for the page daemon to free some pages
> @@ -367,6 +368,23 @@ uvm_aiodone_daemon(void *arg)
>  }
>
>
> +/*
> + * uvmpd_dropswap: free any swap allocated to this page.
> + *
> + * => called with owner locked.
> + */
> +void
> +uvmpd_dropswap(struct vm_page *pg)
> +{
> + struct vm_anon *anon = pg->uanon;
> +
> + if ((pg->pg_flags & PQ_ANON) && anon->an_swslot) {
> + uvm_swap_free(anon->an_swslot, 1);
> + anon->an_swslot = 0;
> + } else if (pg->pg_flags & PQ_AOBJ) {
> + uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT);
> + }
> +}
>
>  /*
>   * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
> @@ -566,16 +584,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
>   KASSERT(uvmexp.swpginuse <= uvmexp.swpages);
>   if ((p->pg_flags & PQ_SWAPBACKED) &&
>   uvmexp.swpginuse == uvmexp.swpages) {
> -
> - if ((p->pg_flags & PQ_ANON) &&
> - p->uanon->an_swslot) {
> - uvm_swap_free(p->uanon->an_swslot, 1);
> - p->uanon->an_swslot = 0;
> - }
> - if (p->pg_flags & PQ_AOBJ) {
> - uao_dropswap(p->uobject,
> -  p->offset >> PAGE_SHIFT);
> - }
> + uvmpd_dropswap(p);
>   }
>
>   /*
> @@ -599,16 +608,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
>*/
>   if (swap_backed) {
>   /* free old swap slot (if any) */
> - if (anon) {
> - if (anon->an_swslot) {
> - uvm_swap_free(anon->an_swslot,
> - 1);
> - anon->an_swslot = 0;
> - }
> - } else {
> - uao_dropswap(uobj,
> -  p->offset >> PAGE_SHIFT);
> - }
> + uvmpd_dropswap(p);
>
>   /* start new cluster (if necessary) */
>   if (swslot == 0) {
>



bgpd speedup diff

2022-08-29 Thread Claudio Jeker
On large bgpd instances the hash tables used for rde_aspath, aspath and
communities get overloaded to a point that aspath_get() consumes a large
amount of CPU time.

This diff improves the situation by a) using a RB tree for rde_aspath and
communities and b) dropping the hash table for aspath all together. In
most cases the memory saving of the aspath cache does not justify the
extra CPU the lookups consume (even when using an RB tree aspath_get() is
so hot that it uses 30-40% CPU.

This is one big diff but if people prefer I can split it up.
Tested on the route collector with 80Mio prefixes (where this diff has a
noticable effect).
-- 
:wq Claudio

Index: usr.sbin/bgpctl/output.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/output.c,v
retrieving revision 1.26
diff -u -p -r1.26 output.c
--- usr.sbin/bgpctl/output.c10 Aug 2022 10:21:47 -  1.26
+++ usr.sbin/bgpctl/output.c25 Aug 2022 13:31:27 -
@@ -221,13 +221,16 @@ show_neighbor_msgstats(struct peer *p)
p->stats.msg_rcvd_update + p->stats.msg_rcvd_keepalive +
p->stats.msg_rcvd_rrefresh);
printf("  Update statistics:\n");
-   printf("  %-15s %-10s %-10s\n", "", "Sent", "Received");
+   printf("  %-15s %-10s %-10s %-10s\n", "", "Sent", "Received",
+   "Pending");
printf("  %-15s %10u %10u\n", "Prefixes",
p->stats.prefix_out_cnt, p->stats.prefix_cnt);
-   printf("  %-15s %10llu %10llu\n", "Updates",
-   p->stats.prefix_sent_update, p->stats.prefix_rcvd_update);
-   printf("  %-15s %10llu %10llu\n", "Withdraws",
-   p->stats.prefix_sent_withdraw, p->stats.prefix_rcvd_withdraw);
+   printf("  %-15s %10llu %10llu %10u\n", "Updates",
+   p->stats.prefix_sent_update, p->stats.prefix_rcvd_update,
+   p->stats.pending_update);
+   printf("  %-15s %10llu %10llu %10u\n", "Withdraws",
+   p->stats.prefix_sent_withdraw, p->stats.prefix_rcvd_withdraw,
+   p->stats.pending_withdraw);
printf("  %-15s %10llu %10llu\n", "End-of-Rib",
p->stats.prefix_sent_eor, p->stats.prefix_rcvd_eor);
printf("  Route Refresh statistics:\n");
@@ -1000,9 +1003,7 @@ show_rib_mem(struct rde_memstats *stats)
printf("\t   and holding %lld references\n",
stats->path_refs);
printf("%10lld BGP AS-PATH attribute entries using "
-   "%s of memory\n\t   and holding %lld references\n",
-   stats->aspath_cnt, fmt_mem(stats->aspath_size),
-   stats->aspath_refs);
+   "%s of memory\n", stats->aspath_cnt, fmt_mem(stats->aspath_size));
printf("%10lld entries for %lld BGP communities "
"using %s of memory\n", stats->comm_cnt, stats->comm_nmemb,
fmt_mem(stats->comm_cnt * sizeof(struct rde_community) +
Index: usr.sbin/bgpctl/output_json.c
===
RCS file: /cvs/src/usr.sbin/bgpctl/output_json.c,v
retrieving revision 1.20
diff -u -p -r1.20 output_json.c
--- usr.sbin/bgpctl/output_json.c   28 Jul 2022 10:40:25 -  1.20
+++ usr.sbin/bgpctl/output_json.c   25 Aug 2022 13:31:14 -
@@ -190,6 +190,11 @@ json_neighbor_stats(struct peer *p)
json_do_uint("eor", p->stats.prefix_rcvd_eor);
json_do_end();
 
+   json_do_object("pending");
+   json_do_uint("updates", p->stats.pending_update);
+   json_do_uint("withdraws", p->stats.pending_withdraw);
+   json_do_end();
+
json_do_end();
 
json_do_object("route-refresh");
@@ -931,7 +936,7 @@ json_rib_mem(struct rde_memstats *stats)
stats->path_cnt * sizeof(struct rde_aspath),
stats->path_refs);
json_rib_mem_element("aspath", stats->aspath_cnt,
-   stats->aspath_size, stats->aspath_refs);
+   stats->aspath_size, UINT64_MAX);
json_rib_mem_element("community_entries", stats->comm_cnt,
stats->comm_cnt * sizeof(struct rde_community), UINT64_MAX);
json_rib_mem_element("community", stats->comm_nmemb,
Index: usr.sbin/bgpd/bgpd.h
===
RCS file: /cvs/src/usr.sbin/bgpd/bgpd.h,v
retrieving revision 1.450
diff -u -p -r1.450 bgpd.h
--- usr.sbin/bgpd/bgpd.h26 Aug 2022 14:10:52 -  1.450
+++ usr.sbin/bgpd/bgpd.h29 Aug 2022 12:07:01 -
@@ -1193,7 +1193,6 @@ struct rde_memstats {
long long   nexthop_cnt;
long long   aspath_cnt;
long long   aspath_size;
-   long long   aspath_refs;
long long   comm_cnt;
long long   comm_nmemb;
long long   comm_size;
Index: usr.sbin/bgpd/rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.565
diff -u -p -r1.565 rde.c
--- usr.sbin/bgpd/rde.c 26 Aug 2022 14:10:52 -  1.565
+++ 

Re: static inline, not inline static

2022-08-29 Thread Anders Andersson
On Mon, Aug 29, 2022 at 2:01 AM Philip Guenther  wrote:
>
> On Sun, Aug 28, 2022 at 2:11 PM Anders Andersson  wrote:
>>
>> On Sun, Aug 28, 2022 at 3:15 PM Jonathan Gray  wrote:
>> >
>> > diff --git lib/libc/locale/wctoint.h lib/libc/locale/wctoint.h
>> > index ea50c5ae1b6..14c7f0c466d 100644
>> > --- lib/libc/locale/wctoint.h
>> > +++ lib/libc/locale/wctoint.h
>> > @@ -30,7 +30,7 @@
>> >   */
>> >
>> >
>> > -inline static int
>> > +static inline int
>> >  wctoint(wchar_t wc)
>> >  {
>> > int n;
>> > [...]
>>
>> Why this change? As far as I can see, the standard allows for any order.
>
>
> C99 standard stated:
> "The placement of a storage-class specifier other than at the beginning of 
> the declaration
>  specifiers in a declaration is an obsolescent feature.
>
> My recall is that it was officially removed in C11.
>
> ok guenther@

This intrigued me so I had to do some archaeology. The grammar allows
any random order from K first edition up to (and including) this
month's C23 draft, but the warning has been in there since the first
ANSI C standard (and is still in C23).

Maybe someone forgot to push for this, and no one dares to remove the warning.



uvmpd_dropswap()

2022-08-29 Thread Martin Pieuchot
Small refactoring to introduce uvmpd_dropswap().  This will make an
upcoming rewrite of the pdaemon smaller & easier to review :o)

ok?

Index: uvm/uvm_pdaemon.c
===
RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
retrieving revision 1.102
diff -u -p -r1.102 uvm_pdaemon.c
--- uvm/uvm_pdaemon.c   22 Aug 2022 12:03:32 -  1.102
+++ uvm/uvm_pdaemon.c   29 Aug 2022 11:55:52 -
@@ -105,6 +105,7 @@ voiduvmpd_scan(struct uvm_pmalloc *);
 void   uvmpd_scan_inactive(struct uvm_pmalloc *, struct pglist *);
 void   uvmpd_tune(void);
 void   uvmpd_drop(struct pglist *);
+void   uvmpd_dropswap(struct vm_page *);
 
 /*
  * uvm_wait: wait (sleep) for the page daemon to free some pages
@@ -367,6 +368,23 @@ uvm_aiodone_daemon(void *arg)
 }
 
 
+/*
+ * uvmpd_dropswap: free any swap allocated to this page.
+ *
+ * => called with owner locked.
+ */
+void
+uvmpd_dropswap(struct vm_page *pg)
+{
+   struct vm_anon *anon = pg->uanon;
+
+   if ((pg->pg_flags & PQ_ANON) && anon->an_swslot) {
+   uvm_swap_free(anon->an_swslot, 1);
+   anon->an_swslot = 0;
+   } else if (pg->pg_flags & PQ_AOBJ) {
+   uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT);
+   }
+}
 
 /*
  * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
@@ -566,16 +584,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
KASSERT(uvmexp.swpginuse <= uvmexp.swpages);
if ((p->pg_flags & PQ_SWAPBACKED) &&
uvmexp.swpginuse == uvmexp.swpages) {
-
-   if ((p->pg_flags & PQ_ANON) &&
-   p->uanon->an_swslot) {
-   uvm_swap_free(p->uanon->an_swslot, 1);
-   p->uanon->an_swslot = 0;
-   }
-   if (p->pg_flags & PQ_AOBJ) {
-   uao_dropswap(p->uobject,
-p->offset >> PAGE_SHIFT);
-   }
+   uvmpd_dropswap(p);
}
 
/*
@@ -599,16 +608,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
 */
if (swap_backed) {
/* free old swap slot (if any) */
-   if (anon) {
-   if (anon->an_swslot) {
-   uvm_swap_free(anon->an_swslot,
-   1);
-   anon->an_swslot = 0;
-   }
-   } else {
-   uao_dropswap(uobj,
-p->offset >> PAGE_SHIFT);
-   }
+   uvmpd_dropswap(p);
 
/* start new cluster (if necessary) */
if (swslot == 0) {



Re: [PATCH] Correctly (per POSIX) round up df usage percentage

2022-08-29 Thread наб
On Mon, Aug 29, 2022 at 05:10:10AM -0600, Theo de Raadt wrote:
> I would really prefer if this did not need floating point.

In that case, how about this scissor-patch?
It has the added benefit of removing the existing floating-point usage.

Best,

-- >8 --
Subject: [PATCH] Correctly (per POSIX) round up df usage percentage

Quoth POSIX Issue 7:
  
The percentage of the normally available space that is currently
allocated to all files on the file system. This shall be calculated
using the fraction:
  /( + )
expressed as a percentage. This percentage may be greater than 100
if  is less than zero. The percentage value shall be
expressed as a positive integer, with any fractional result causing
it to be rounded to the next highest integer.

Nominally this only applies to -P and -Pk behaviour (the last hunk),
but for consistency it may be best to apply that everywhere
---
 bin/df/df.c | 23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/bin/df/df.c b/bin/df/df.c
index fd51f906f89..1f235dadeb4 100644
--- a/bin/df/df.c
+++ b/bin/df/df.c
@@ -51,6 +51,7 @@ intbread(int, off_t, void *, int);
 static void bsdprint(struct statfs *, long, int);
 char   *getmntpt(char *);
 static void maketypelist(char *);
+static int  percent(u_int64_t, u_int64_t);
 static void posixprint(struct statfs *, long, int);
 static void prthuman(struct statfs *sfsp, unsigned long long);
 static void prthumanval(long long);
@@ -323,13 +324,12 @@ prtstat(struct statfs *sfsp, int maxwidth, int headerlen, 
int blocksize)
fsbtoblk(sfsp->f_blocks, sfsp->f_bsize, blocksize),
fsbtoblk(used, sfsp->f_bsize, blocksize),
fsbtoblk(sfsp->f_bavail, sfsp->f_bsize, blocksize));
-   (void)printf(" %5.0f%%",
-   availblks == 0 ? 100.0 : (double)used / (double)availblks * 100.0);
+   (void)printf(" %5d%%", percent(used, availblks));
if (iflag) {
inodes = sfsp->f_files;
used = inodes - sfsp->f_ffree;
-   (void)printf(" %7llu %7llu %5.0f%% ", used, sfsp->f_ffree,
-  inodes == 0 ? 100.0 : (double)used / (double)inodes * 100.0);
+   (void)printf(" %7llu %7llu %5d%% ", used, sfsp->f_ffree,
+  percent(used, inodes));
} else
(void)printf("  ");
(void)printf("  %s\n", sfsp->f_mntonname);
@@ -372,6 +372,12 @@ bsdprint(struct statfs *mntbuf, long mntsize, int maxwidth)
return;
 }
 
+static int
+percent(u_int64_t used, u_int64_t avail)
+{
+   return avail ? (100 * used + (avail - 1)) / avail : 100;
+}
+
 /*
  * Print in format defined by POSIX 1002.2, invoke with -P option.
  */
@@ -383,7 +389,6 @@ posixprint(struct statfs *mntbuf, long mntsize, int 
maxwidth)
char *blockstr;
struct statfs *sfsp;
long long used, avail;
-   double percentused;
 
if (kflag) {
blocksize = 1024;
@@ -401,18 +406,14 @@ posixprint(struct statfs *mntbuf, long mntsize, int 
maxwidth)
sfsp = [i];
used = sfsp->f_blocks - sfsp->f_bfree;
avail = sfsp->f_bavail + used;
-   if (avail == 0)
-   percentused = 100.0;
-   else
-   percentused = (double)used / (double)avail * 100.0;
 
-   (void) printf ("%-*.*s %*lld %10lld %11lld %5.0f%%   %s\n",
+   (void) printf ("%-*.*s %*lld %10lld %11lld %5d%%   %s\n",
maxwidth, maxwidth, sfsp->f_mntfromname,
(int)strlen(blockstr),
fsbtoblk(sfsp->f_blocks, sfsp->f_bsize, blocksize),
fsbtoblk(used, sfsp->f_bsize, blocksize),
fsbtoblk(sfsp->f_bavail, sfsp->f_bsize, blocksize),
-   percentused, sfsp->f_mntonname);
+   percent(used, avail), sfsp->f_mntonname);
}
 }
 
-- 
2.30.2


signature.asc
Description: PGP signature


pdaemon locking tweak

2022-08-29 Thread Martin Pieuchot
Diff below refactors the pdaemon's locking by introducing a new *trylock()
function for a given page.  This is shamelessly stolen from NetBSD.

This is part of my ongoing effort to untangle the locks used by the page
daemon.

ok?

Index: uvm//uvm_pdaemon.c
===
RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
retrieving revision 1.102
diff -u -p -r1.102 uvm_pdaemon.c
--- uvm//uvm_pdaemon.c  22 Aug 2022 12:03:32 -  1.102
+++ uvm//uvm_pdaemon.c  29 Aug 2022 11:36:59 -
@@ -101,6 +101,7 @@ extern void drmbackoff(long);
  * local prototypes
  */
 
+struct rwlock  *uvmpd_trylockowner(struct vm_page *);
 void   uvmpd_scan(struct uvm_pmalloc *);
 void   uvmpd_scan_inactive(struct uvm_pmalloc *, struct pglist *);
 void   uvmpd_tune(void);
@@ -367,6 +368,34 @@ uvm_aiodone_daemon(void *arg)
 }
 
 
+/*
+ * uvmpd_trylockowner: trylock the page's owner.
+ *
+ * => return the locked rwlock on success.  otherwise, return NULL.
+ */
+struct rwlock *
+uvmpd_trylockowner(struct vm_page *pg)
+{
+
+   struct uvm_object *uobj = pg->uobject;
+   struct rwlock *slock;
+
+   if (uobj != NULL) {
+   slock = uobj->vmobjlock;
+   } else {
+   struct vm_anon *anon = pg->uanon;
+
+   KASSERT(anon != NULL);
+   slock = anon->an_lock;
+   }
+
+   if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) {
+   return NULL;
+   }
+
+   return slock;
+}
+
 
 /*
  * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
@@ -454,53 +483,44 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
uvmexp.pdscans++;
nextpg = TAILQ_NEXT(p, pageq);
 
+   /*
+* move referenced pages back to active queue
+* and skip to next page.
+*/
+   if (pmap_is_referenced(p)) {
+   uvm_pageactivate(p);
+   uvmexp.pdreact++;
+   continue;
+   }
+
anon = p->uanon;
uobj = p->uobject;
-   if (p->pg_flags & PQ_ANON) {
+
+   /*
+* first we attempt to lock the object that this page
+* belongs to.  if our attempt fails we skip on to
+* the next page (no harm done).  it is important to
+* "try" locking the object as we are locking in the
+* wrong order (pageq -> object) and we don't want to
+* deadlock.
+*/
+   slock = uvmpd_trylockowner(p);
+   if (slock == NULL) {
+   continue;
+   }
+
+   if (p->pg_flags & PG_BUSY) {
+   rw_exit(slock);
+   uvmexp.pdbusy++;
+   continue;
+   }
+
+   /* does the page belong to an object? */
+   if (uobj != NULL) {
+   uvmexp.pdobscan++;
+   } else {
KASSERT(anon != NULL);
-   slock = anon->an_lock;
-   if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) {
-   /* lock failed, skip this page */
-   continue;
-   }
-   /*
-* move referenced pages back to active queue
-* and skip to next page.
-*/
-   if (pmap_is_referenced(p)) {
-   uvm_pageactivate(p);
-   rw_exit(slock);
-   uvmexp.pdreact++;
-   continue;
-   }
-   if (p->pg_flags & PG_BUSY) {
-   rw_exit(slock);
-   uvmexp.pdbusy++;
-   continue;
-   }
uvmexp.pdanscan++;
-   } else {
-   KASSERT(uobj != NULL);
-   slock = uobj->vmobjlock;
-   if (rw_enter(slock, RW_WRITE|RW_NOSLEEP)) {
-   continue;
-   }
-   /*
-* move referenced pages back to active queue
-* and skip 

Re: unbound update

2022-08-29 Thread Theo Buehler
On Wed, Aug 24, 2022 at 03:03:01PM +0100, Stuart Henderson wrote:
> Anyone want to test this?
> 
> Any OKs?

Works fine here and nothing jumps out at me in the diff.

ok tb



Re: refactor pcb lookup

2022-08-29 Thread Alexander Bluhm
Anyone?

On Sat, Aug 20, 2022 at 03:24:28PM +0200, Alexander Bluhm wrote:
> Hi,
> 
> Can we rename the the function in_pcbhashlookup() to in_pcblookup()?
> Then we have in_pcblookup() and in_pcblookup_listen() as public PCB
> interface.  Using a hash table is only an implementation detail.
> 
> For internal use I would like to introduce in_pcbhash_insert() and
> in_pcbhash_lookup() to avoid code duplication.
> 
> Routing domain is unsigned, change the type to u_int.
> 
> If the diff is too large for review, I can split these parts.
> 
> ok?
> 
> bluhm
> 
> Index: net/pf.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pf.c,v
> retrieving revision 1.1137
> diff -u -p -r1.1137 pf.c
> --- net/pf.c  8 Aug 2022 12:06:30 -   1.1137
> +++ net/pf.c  19 Aug 2022 16:22:47 -
> @@ -3348,7 +3348,7 @@ pf_socket_lookup(struct pf_pdesc *pd)
>* Fails when rtable is changed while evaluating the ruleset
>* The socket looked up will not match the one hit in the end.
>*/
> - inp = in_pcbhashlookup(tb, saddr->v4, sport, daddr->v4, dport,
> + inp = in_pcblookup(tb, saddr->v4, sport, daddr->v4, dport,
>   pd->rdomain);
>   if (inp == NULL) {
>   inp = in_pcblookup_listen(tb, daddr->v4, dport,
> @@ -3359,7 +3359,7 @@ pf_socket_lookup(struct pf_pdesc *pd)
>   break;
>  #ifdef INET6
>   case AF_INET6:
> - inp = in6_pcbhashlookup(tb, >v6, sport, >v6,
> + inp = in6_pcblookup(tb, >v6, sport, >v6,
>   dport, pd->rdomain);
>   if (inp == NULL) {
>   inp = in6_pcblookup_listen(tb, >v6, dport,
> Index: netinet/in_pcb.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/in_pcb.c,v
> retrieving revision 1.270
> diff -u -p -r1.270 in_pcb.c
> --- netinet/in_pcb.c  8 Aug 2022 12:06:30 -   1.270
> +++ netinet/in_pcb.c  19 Aug 2022 20:41:23 -
> @@ -120,14 +120,16 @@ struct baddynamicports baddynamicports;
>  struct baddynamicports rootonlyports;
>  struct pool inpcb_pool;
> 
> -void in_pcbrehash_locked(struct inpcb *);
> +void in_pcbhash_insert(struct inpcb *);
> +struct inpcb *in_pcbhash_lookup(struct inpcbtable *, u_int,
> +const struct in_addr *, u_short, const struct in_addr *, u_short);
>  int  in_pcbresize(struct inpcbtable *, int);
> 
>  #define  INPCBHASH_LOADFACTOR(_x)(((_x) * 3) / 4)
> 
> -struct inpcbhead *in_pcbhash(struct inpcbtable *, int,
> +struct inpcbhead *in_pcbhash(struct inpcbtable *, u_int,
>  const struct in_addr *, u_short, const struct in_addr *, u_short);
> -struct inpcbhead *in_pcblhash(struct inpcbtable *, int, u_short);
> +struct inpcbhead *in_pcblhash(struct inpcbtable *, u_int, u_short);
> 
>  /*
>   * in_pcb is used for inet and inet6.  in6_pcb only contains special
> @@ -141,12 +143,12 @@ in_init(void)
>  }
> 
>  struct inpcbhead *
> -in_pcbhash(struct inpcbtable *table, int rdom,
> +in_pcbhash(struct inpcbtable *table, u_int rdomain,
>  const struct in_addr *faddr, u_short fport,
>  const struct in_addr *laddr, u_short lport)
>  {
>   SIPHASH_CTX ctx;
> - u_int32_t nrdom = htonl(rdom);
> + u_int32_t nrdom = htonl(rdomain);
> 
>   SipHash24_Init(, >inpt_key);
>   SipHash24_Update(, , sizeof(nrdom));
> @@ -159,10 +161,10 @@ in_pcbhash(struct inpcbtable *table, int
>  }
> 
>  struct inpcbhead *
> -in_pcblhash(struct inpcbtable *table, int rdom, u_short lport)
> +in_pcblhash(struct inpcbtable *table, u_int rdomain, u_short lport)
>  {
>   SIPHASH_CTX ctx;
> - u_int32_t nrdom = htonl(rdom);
> + u_int32_t nrdom = htonl(rdomain);
> 
>   SipHash24_Init(, >inpt_lkey);
>   SipHash24_Update(, , sizeof(nrdom));
> @@ -226,9 +228,6 @@ int
>  in_pcballoc(struct socket *so, struct inpcbtable *table)
>  {
>   struct inpcb *inp;
> - struct inpcbhead *head;
> -
> - NET_ASSERT_LOCKED();
> 
>   inp = pool_get(_pool, PR_NOWAIT|PR_ZERO);
>   if (inp == NULL)
> @@ -257,19 +256,7 @@ in_pcballoc(struct socket *so, struct in
>   if (table->inpt_count++ > INPCBHASH_LOADFACTOR(table->inpt_size))
>   (void)in_pcbresize(table, table->inpt_size * 2);
>   TAILQ_INSERT_HEAD(>inpt_queue, inp, inp_queue);
> - head = in_pcblhash(table, inp->inp_rtableid, inp->inp_lport);
> - LIST_INSERT_HEAD(head, inp, inp_lhash);
> -#ifdef INET6
> - if (sotopf(so) == PF_INET6)
> - head = in6_pcbhash(table, rtable_l2(inp->inp_rtableid),
> - >inp_faddr6, inp->inp_fport,
> - >inp_laddr6, inp->inp_lport);
> - else
> -#endif /* INET6 */
> - head = in_pcbhash(table, rtable_l2(inp->inp_rtableid),
> - >inp_faddr, inp->inp_fport,
> - >inp_laddr, inp->inp_lport);
> - LIST_INSERT_HEAD(head, 

boot_*.8: reference installboot.8, rectify FFS note on sparc64

2022-08-29 Thread Klemens Nanni
On sparc64, installboot does not deal with any file system; its -p is a NOOP
and the old sys/arch/sparc64/installboot/ never prepared a file system,
either, according to CVS history.

Remove this single misleading mention alltogether and rely on the now
consistently cross-linked MI installboot(8) for details, if any.

This syncs the list of currently existing MD boot_*(8/*) pages with the list
of usr.sbin/installboot/*_installboot.c files.

boot_alpha(8/alpha) already references installboot(8) but still uses
sys/arch/alpha/stand/installboot/ instead of usr.sbin/installboot/.

boot_luna88k(8/luna88k) exists but luna88k does not use installboot at all.

installboot(8) has additional support for efi (armv7, arm64, riscv64),
landisk, loongson, octeon and powerpc64 but those architectures currently
lack their own MD boot_*(8/*) manual.

Feedback? OK?

diff --git a/share/man/man8/man8.hppa/boot_hppa.8 
b/share/man/man8/man8.hppa/boot_hppa.8
index b4070bc9789..c119e36d8ce 100644
--- a/share/man/man8/man8.hppa/boot_hppa.8
+++ b/share/man/man8/man8.hppa/boot_hppa.8
@@ -340,6 +340,7 @@ system bootstrap (usually also installed as
 .Xr dhcpd 8 ,
 .Xr halt 8 ,
 .Xr init 8 ,
+.Xr installboot 8 ,
 .Xr rbootd 8 ,
 .Xr reboot 8 ,
 .Xr savecore 8 ,
diff --git a/share/man/man8/man8.macppc/boot_macppc.8 
b/share/man/man8/man8.macppc/boot_macppc.8
index af4fbbceedf..20af14cfae0 100644
--- a/share/man/man8/man8.macppc/boot_macppc.8
+++ b/share/man/man8/man8.macppc/boot_macppc.8
@@ -172,6 +172,7 @@ Apple HFS partition, to be readable by Open Firmware)
 .Xr boot_config 8 ,
 .Xr halt 8 ,
 .Xr init 8 ,
+.Xr installboot 8 ,
 .Xr reboot 8 ,
 .Xr savecore 8 ,
 .Xr shutdown 8
diff --git a/share/man/man8/man8.sparc64/boot_sparc64.8 
b/share/man/man8/man8.sparc64/boot_sparc64.8
index 90d9339f93c..2d6ab05e278 100644
--- a/share/man/man8/man8.sparc64/boot_sparc64.8
+++ b/share/man/man8/man8.sparc64/boot_sparc64.8
@@ -66,10 +66,6 @@ respectively.
 The second-stage boot program commonly resides in the root directory as
 .Pa /ofwboot .
 .Pp
-System boot blocks are installed using
-.Xr installboot 8 ,
-which prepares an FFS filesystem partition for boot-strapping from the PROM.
-.Pp
 The boot program attempts to load the kernel from the selected
 boot device, which must currently be an SCSI
 .Pq Pa sd



Re: [PATCH] Correctly (per POSIX) round up df usage percentage

2022-08-29 Thread Theo de Raadt
I would really prefer if this did not need floating point.

>From owner-tech+m90...@openbsd.org Mon Aug 29 03:52:24 2022
>Delivered-To: dera...@cvs.openbsd.org
>Date: Mon, 29 Aug 2022 11:47:16 +0200
>From: =?utf-8?B?0L3QsNCx?= 
>To: Stuart Henderson 
>Cc: tech@openbsd.org
>Subject: Re: [PATCH] Correctly (per POSIX) round up df usage percentage
>References: <20220827135316.4l2aawaoylbmb...@tarta.nabijaczleweli.xyz>
> 
>MIME-Version: 1.0
>Content-Type: multipart/signed; micalg=pgp-sha512;
>   protocol="application/pgp-signature"; boundary="g24jmql4w5mrdtfc"
>Content-Disposition: inline
>In-Reply-To: 
>User-Agent: NeoMutt/20220429
>List-Help: 
>List-ID: 
>List-Owner: 
>List-Post: 
>List-Subscribe: 
>List-Unsubscribe: 
>X-Loop: tech@openbsd.org
>Precedence: list
>Sender: owner-t...@openbsd.org
>
>
>--g24jmql4w5mrdtfc
>Content-Type: text/plain; charset=utf-8
>Content-Disposition: inline
>Content-Transfer-Encoding: quoted-printable
>
>Hi!
>
>On Sat, Aug 27, 2022 at 05:20:00PM +0100, Stuart Henderson wrote:
>> df is used on the ramdisk, so this would need testing there (at least on
>> the tighter media on some archs).
>>=20
>> at least one other ramdisk binary does pull in libm so the overall size
>> increase might not be terrible, but definitely would need checking.
>
>I lack the knowledge and capability to do testing beyond "I just built
>the patched version on CI and it rounded correctly", sorry.
>
>But if linking to libm is an issue, we could substitute the ceil(3)s
>with something trivial like
>-- >8 --
>int iceil(double d) {
>  int ret =3D d;
>  if(ret < d)
>++ret;
>  return ret;
>}
>-- >8 --
>since all the formatting specifiers use %.0f anyway and the [0, 200)
>domain is small enough that precision loss/overflow is not a factor.
>
>Best,
>=D0=BD=D0=B0=D0=B1
>
>--g24jmql4w5mrdtfc
>Content-Type: application/pgp-signature; name="signature.asc"
>
>-BEGIN PGP SIGNATURE-
>
>iQIzBAABCgAdFiEEfWlHToQCjFzAxEFjvP0LAY0mWPEFAmMMiyEACgkQvP0LAY0m
>WPFsSBAAqhg/9kgI6wLF1tX/S8PslI/LcIarSSw5C97acfarU9KbI++k1doVsKL4
>MeCXfBYqHVN4abVjpnQdVbf+wxsbj0g+SYNj+bEs1kQnpt0444tfgN8ea/6KrDS2
>l4eUHss1Bp667BoWyil8MgIESyPyswxdRwk5Ik/sXCSj7f89xTJAnwfDuuqEZpU9
>/xIey7E0d72lhpiTXDFdB50TsH3Cvtggf/ImXgbLdALZ1L7j8LurOu5jtjENTWtL
>4lkP/h6J46yuh0NAAFQk35mP48zeuMuICt5KxdlSTJAdmrDpQz+4W74g0izF1Lqs
>SNUWrkIHYirtPtjbPLB/+FgQt9okGY7ZGrRFSAwR0jxQh28+xTxpyaLSbrUqxYor
>rKG5JsNtCkV9ylXHpeQNeXu7uGy7ZO6mdnVGnwK7CdiQCU0QQ/BbbnNJ1lyhgkiB
>gHuK4MibINo/psF33tDMfN4hck6X6FW6mfemYei8sEsVLI/L1ioECJLXLcUyZKGl
>v00g4usuAl8Xb/WRhSOnGc98HQEcDuLtiE3gc8VT30NsQsKruPtX4gf47aGUbt8S
>dRVM5sdSAaxMixS7VN2cmIOJtt1y4GysrO7v3Gt06mvcYLrRLkfazIWYMXTjpgRT
>7k4fTZqMFWa1wDuPe9wL72oVv/VKZsKUkfkpj3WQmFO9Wu4yCp8=
>=RKzL
>-END PGP SIGNATURE-
>
>--g24jmql4w5mrdtfc--
>
>



Re: remove net/ofp.h? switch(4) remnant

2022-08-29 Thread Stuart Henderson

It could move to a private header in tcpdump though.

--
 Sent from a phone, apologies for poor formatting.

On 29 August 2022 08:03:30 Klemens Nanni  wrote:


Scratch that, tcpdump uses it.




Re: [PATCH] Correctly (per POSIX) round up df usage percentage

2022-08-29 Thread наб
Hi!

On Sat, Aug 27, 2022 at 05:20:00PM +0100, Stuart Henderson wrote:
> df is used on the ramdisk, so this would need testing there (at least on
> the tighter media on some archs).
> 
> at least one other ramdisk binary does pull in libm so the overall size
> increase might not be terrible, but definitely would need checking.

I lack the knowledge and capability to do testing beyond "I just built
the patched version on CI and it rounded correctly", sorry.

But if linking to libm is an issue, we could substitute the ceil(3)s
with something trivial like
-- >8 --
int iceil(double d) {
  int ret = d;
  if(ret < d)
++ret;
  return ret;
}
-- >8 --
since all the formatting specifiers use %.0f anyway and the [0, 200)
domain is small enough that precision loss/overflow is not a factor.

Best,
наб


signature.asc
Description: PGP signature


move PRU_SENDOOB request to (*pru_sendoob)()

2022-08-29 Thread Vitaliy Makkoveev
PRU_SENDOOB request always consumes passed `top' and `control' mbufs. We
don't want to have dummy m_freem(9) handlers for all protocols, so we
release passed mbufs in the pru_sendoob() EOPNOTSUPP error path.

Also we had the `control' mbuf(9) leak in the tcp(4) PRU_SENDOOB error
path, which was fixed in this diff.

Index: sys/kern/uipc_usrreq.c
===
RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
retrieving revision 1.179
diff -u -p -r1.179 uipc_usrreq.c
--- sys/kern/uipc_usrreq.c  29 Aug 2022 08:08:17 -  1.179
+++ sys/kern/uipc_usrreq.c  29 Aug 2022 09:28:42 -
@@ -247,10 +247,6 @@ uipc_usrreq(struct socket *so, int req, 
}
break;
 
-   case PRU_SENDOOB:
-   error = EOPNOTSUPP;
-   break;
-
case PRU_SOCKADDR:
uipc_setaddr(unp, nam);
break;
Index: sys/net/pfkeyv2.c
===
RCS file: /cvs/src/sys/net/pfkeyv2.c,v
retrieving revision 1.247
diff -u -p -r1.247 pfkeyv2.c
--- sys/net/pfkeyv2.c   29 Aug 2022 08:08:17 -  1.247
+++ sys/net/pfkeyv2.c   29 Aug 2022 09:28:42 -
@@ -426,9 +426,6 @@ pfkeyv2_usrreq(struct socket *so, int re
nam->m_len = pfkey_addr.sa_len;
break;
 
-   case PRU_SENDOOB:
-   error = EOPNOTSUPP;
-   break;
default:
panic("pfkeyv2_usrreq");
}
Index: sys/net/rtsock.c
===
RCS file: /cvs/src/sys/net/rtsock.c,v
retrieving revision 1.348
diff -u -p -r1.348 rtsock.c
--- sys/net/rtsock.c29 Aug 2022 08:08:17 -  1.348
+++ sys/net/rtsock.c29 Aug 2022 09:28:42 -
@@ -251,9 +251,6 @@ route_usrreq(struct socket *so, int req,
nam->m_len = route_src.sa_len;
break;
 
-   case PRU_SENDOOB:
-   error = EOPNOTSUPP;
-   break;
default:
panic("route_usrreq");
}
Index: sys/netinet/ip_divert.c
===
RCS file: /cvs/src/sys/netinet/ip_divert.c,v
retrieving revision 1.81
diff -u -p -r1.81 ip_divert.c
--- sys/netinet/ip_divert.c 29 Aug 2022 08:08:17 -  1.81
+++ sys/netinet/ip_divert.c 29 Aug 2022 09:28:42 -
@@ -280,7 +280,6 @@ divert_usrreq(struct socket *so, int req
break;
 
case PRU_CONNECT2:
-   case PRU_SENDOOB:
case PRU_FASTTIMO:
case PRU_SLOWTIMO:
case PRU_PROTORCV:
Index: sys/netinet/raw_ip.c
===
RCS file: /cvs/src/sys/netinet/raw_ip.c,v
retrieving revision 1.142
diff -u -p -r1.142 raw_ip.c
--- sys/netinet/raw_ip.c29 Aug 2022 08:08:17 -  1.142
+++ sys/netinet/raw_ip.c29 Aug 2022 09:28:42 -
@@ -482,13 +482,6 @@ rip_usrreq(struct socket *so, int req, s
error = EOPNOTSUPP;
break;
 
-   /*
-* Not supported.
-*/
-   case PRU_SENDOOB:
-   error = EOPNOTSUPP;
-   break;
-
case PRU_SOCKADDR:
in_setsockaddr(inp, nam);
break;
Index: sys/netinet/tcp_usrreq.c
===
RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
retrieving revision 1.199
diff -u -p -r1.199 tcp_usrreq.c
--- sys/netinet/tcp_usrreq.c29 Aug 2022 08:08:17 -  1.199
+++ sys/netinet/tcp_usrreq.c29 Aug 2022 09:28:42 -
@@ -126,6 +126,7 @@ const struct pr_usrreqs tcp_usrreqs = {
.pru_abort  = tcp_abort,
.pru_sense  = tcp_sense,
.pru_rcvoob = tcp_rcvoob,
+   .pru_sendoob= tcp_sendoob,
 };
 
 static int pr_slowhz = PR_SLOWHZ;
@@ -229,27 +230,6 @@ tcp_usrreq(struct socket *so, int req, s
error = EOPNOTSUPP;
break;
 
-   case PRU_SENDOOB:
-   if (sbspace(so, >so_snd) < -512) {
-   m_freem(m);
-   error = ENOBUFS;
-   break;
-   }
-   /*
-* According to RFC961 (Assigned Protocols),
-* the urgent pointer points to the last octet
-* of urgent data.  We continue, however,
-* to consider it to indicate the first octet
-* of data past the urgent section.
-* Otherwise, snd_up should be one lower.
-*/
-   sbappendstream(so, >so_snd, m);
-   tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
-   tp->t_force = 1;
-   error = tcp_output(tp);
-   tp->t_force = 0;
-   break;
-
case PRU_SOCKADDR:
 #ifdef INET6
if (inp->inp_flags & INP_IPV6)
@@ -1015,6 +995,60 @@ out:

Can't use hostname in hostctl command.

2022-08-29 Thread Masato Asou
Hi!

I can use ip and can'5 use hostname in hostctl command as blow:

$ hostctl guestinfo.ip
172.16.100.131
$ hostctl guestinfo.hostname
hostctl: ioctl: Invalid argument


man hostctl has the forrowing description:

EXAMPLES
 The vmt(4) driver provides access to the ``guestinfo''
 information that
 is available in VMware virtual machines:

   # hostctl guestinfo.hostname
   vm-111.example.com
   # hostctl guestinfo.ip 192.168.100.111

Is hostname no longer available due to a change in VMware version?


I use following system:

$ sysctl kern.version
kern.version=OpenBSD 7.1 (GENERIC) #443: Mon Apr 11 17:55:15 MDT 2022
dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC

VMware Fusion 12.2.4 (20071091)

Thank you.
--
ASOU Masato



Re: [matth...@openbsd.org: Re: xlock don't take my password anymore]

2022-08-29 Thread Greg Steuck
Greg Steuck  writes:

Matthieu> + authok = priv_pw_check(user, style, pass);

I suspect your original patch may have swapped the arguments. The
password should go before style.

What do you thing about this patch (tested locally, but I don't have style):

diff --git a/app/xlockmore/xlock/passwd.c b/app/xlockmore/xlock/passwd.c
index 914db414f..23ba9043e 100644
--- a/app/xlockmore/xlock/passwd.c
+++ b/app/xlockmore/xlock/passwd.c
@@ -1278,17 +1278,15 @@ checkPasswd(char *buffer)
 
 #ifdef USE_PRIVSEP
char*pass;
-   char*style;
 
/* buffer can be in the form style:pass */
if ((pass = strchr(buffer, ':')) != NULL) {
-   *pass++ = '\0';
-   style = buffer;
-   } else {
-   pass = buffer;
-   style = NULL;
+   *pass++ = '\0';
+   if (priv_pw_check(user, pass, buffer))
+   return True;
+   *--pass = ':';
}
-   return priv_pw_check(user, pass, style);
+   return priv_pw_check(user, buffer, NULL);
 #elif defined(BSD_AUTH)
char   *pass;
char   *style;
-- 
2.37.2



Re: remove net/ofp.h? switch(4) remnant

2022-08-29 Thread Jonathan Gray
On Mon, Aug 29, 2022 at 06:55:53AM +, Klemens Nanni wrote:
> This header came to be when switch(4) was imported in 2016.
> Today nothing includes it and we don't have any other OpenFlow software
> in base.
> 
> Does it still serve any purpose?
> https://codesearch.debian.net/search?q=ofp.h+filetype%3Ac=1
> shows no usage of this header.

used by tcpdump

> 
> OK to remove it?

removing it would break the build



Re: remove net/ofp.h? switch(4) remnant

2022-08-29 Thread Klemens Nanni
Scratch that, tcpdump uses it.



remove net/ofp.h? switch(4) remnant

2022-08-29 Thread Klemens Nanni
This header came to be when switch(4) was imported in 2016.
Today nothing includes it and we don't have any other OpenFlow software
in base.

Does it still serve any purpose?
https://codesearch.debian.net/search?q=ofp.h+filetype%3Ac=1
shows no usage of this header.

OK to remove it?

Index: ofp.h
===
RCS file: ofp.h
diff -N ofp.h
--- ofp.h   10 Mar 2021 10:21:48 -  1.14
+++ /dev/null   1 Jan 1970 00:00:00 -
@@ -1,897 +0,0 @@
-/* $OpenBSD: ofp.h,v 1.14 2021/03/10 10:21:48 jsg Exp $*/
-
-/*
- * Copyright (c) 2013-2016 Reyk Floeter 
- * Copyright (c) 2016 Kazuya GODA 
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#ifndef _NET_OFP_H_
-#define _NET_OFP_H_
-
-#include 
-#include 
-
-#include 
-#include 
-#include 
-
-#define OFP_IFNAMSIZ   16  /* on-wire (not IF_NAMSIZE) */
-#define OFP_ALIGNMENT  8   /* OFP alignment */
-#define OFP_ALIGN(_x)  (((_x) + (OFP_ALIGNMENT - 1)) & ~(OFP_ALIGNMENT - 1))
-
-struct ofp_header {
-   uint8_t  oh_version;/* OpenFlow version */
-   uint8_t  oh_type;   /* message type */
-   uint16_t oh_length; /* message length */
-   uint32_t oh_xid;/* transaction Id */
-} __packed;
-
-/* OpenFlow version */
-#define OFP_V_00x00/* OpenFlow 0.0 */
-#define OFP_V_1_0  0x01/* OpenFlow 1.0 */
-#define OFP_V_1_1  0x02/* OpenFlow 1.1 */
-#define OFP_V_1_2  0x03/* OpenFlow 1.2 */
-#define OFP_V_1_3  0x04/* OpenFlow 1.3 */
-#define OFP_V_1_4  0x05/* OpenFlow 1.4 */
-#define OFP_V_1_5  0x06/* OpenFlow 1.5 */
-
-/* OpenFlow message type */
-#define OFP_T_HELLO0   /* Hello */
-#define OFP_T_ERROR1   /* Error */
-#define OFP_T_ECHO_REQUEST 2   /* Echo Request */
-#define OFP_T_ECHO_REPLY   3   /* Echo Reply */
-#define OFP_T_EXPERIMENTER 4   /* Vendor/Experimenter */
-#define OFP_T_FEATURES_REQUEST 5   /* Features Request (switch) */
-#define OFP_T_FEATURES_REPLY   6   /* Features Reply (switch) */
-#define OFP_T_GET_CONFIG_REQUEST   7   /* Get Config Request (switch) 
*/
-#define OFP_T_GET_CONFIG_REPLY 8   /* Get Config Reply (switch) */
-#define OFP_T_SET_CONFIG   9   /* Set Config (switch) */
-#define OFP_T_PACKET_IN10  /* Packet In (async) */
-#define OFP_T_FLOW_REMOVED 11  /* Flow Removed (async) */
-#define OFP_T_PORT_STATUS  12  /* Port Status (async) */
-#define OFP_T_PACKET_OUT   13  /* Packet Out (controller) */
-#define OFP_T_FLOW_MOD 14  /* Flow Mod (controller) */
-#define OFP_T_GROUP_MOD15  /* Group Mod 
(controller) */
-#define OFP_T_PORT_MOD 16  /* Port Mod (controller) */
-#define OFP_T_TABLE_MOD17  /* Table Mod 
(controller) */
-#define OFP_T_MULTIPART_REQUEST18  /* Multipart Message 
Request */
-#define OFP_T_MULTIPART_REPLY  19  /* Multipart Message Request */
-#define OFP_T_BARRIER_REQUEST  20  /* Barrier Request */
-#define OFP_T_BARRIER_REPLY21  /* Barrier Reply */
-#define OFP_T_QUEUE_GET_CONFIG_REQUEST 22  /* Queue Get Config Request */
-#define OFP_T_QUEUE_GET_CONFIG_REPLY   23  /* Queue Get Config Reply */
-#define OFP_T_ROLE_REQUEST 24  /* Role Request */
-#define OFP_T_ROLE_REPLY   25  /* Role Reply */
-#define OFP_T_GET_ASYNC_REQUEST26  /* Get Async Request */
-#define OFP_T_GET_ASYNC_REPLY  27  /* Get Async Reply */
-#define OFP_T_SET_ASYNC28  /* Set Async */
-#define OFP_T_METER_MOD29  /* Meter Mod */
-#define OFP_T_TYPE_MAX 30
-
-/* OpenFlow Hello Message */
-struct ofp_hello_element_header {
-   uint16_the_type;
-   uint16_the_length;
-} __packed;
-
-#define 

Re: move PRU_RCVOOB request to (*pru_rcvoob)()

2022-08-29 Thread Alexander Bluhm
OK bluhm@

On Mon, Aug 29, 2022 at 01:14:26AM +0300, Vitaliy Makkoveev wrote:
> Index: sys/kern/uipc_usrreq.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
> retrieving revision 1.178
> diff -u -p -r1.178 uipc_usrreq.c
> --- sys/kern/uipc_usrreq.c28 Aug 2022 21:35:11 -  1.178
> +++ sys/kern/uipc_usrreq.c28 Aug 2022 22:12:04 -
> @@ -247,7 +247,6 @@ uipc_usrreq(struct socket *so, int req, 
>   }
>   break;
>  
> - case PRU_RCVOOB:
>   case PRU_SENDOOB:
>   error = EOPNOTSUPP;
>   break;
> Index: sys/net/pfkeyv2.c
> ===
> RCS file: /cvs/src/sys/net/pfkeyv2.c,v
> retrieving revision 1.246
> diff -u -p -r1.246 pfkeyv2.c
> --- sys/net/pfkeyv2.c 28 Aug 2022 21:35:11 -  1.246
> +++ sys/net/pfkeyv2.c 28 Aug 2022 22:12:05 -
> @@ -426,7 +426,6 @@ pfkeyv2_usrreq(struct socket *so, int re
>   nam->m_len = pfkey_addr.sa_len;
>   break;
>  
> - case PRU_RCVOOB:
>   case PRU_SENDOOB:
>   error = EOPNOTSUPP;
>   break;
> Index: sys/net/rtsock.c
> ===
> RCS file: /cvs/src/sys/net/rtsock.c,v
> retrieving revision 1.346
> diff -u -p -r1.346 rtsock.c
> --- sys/net/rtsock.c  28 Aug 2022 21:35:12 -  1.346
> +++ sys/net/rtsock.c  28 Aug 2022 22:12:05 -
> @@ -251,7 +251,6 @@ route_usrreq(struct socket *so, int req,
>   nam->m_len = route_src.sa_len;
>   break;
>  
> - case PRU_RCVOOB:
>   case PRU_SENDOOB:
>   error = EOPNOTSUPP;
>   break;
> Index: sys/netinet/ip_divert.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_divert.c,v
> retrieving revision 1.80
> diff -u -p -r1.80 ip_divert.c
> --- sys/netinet/ip_divert.c   28 Aug 2022 21:35:12 -  1.80
> +++ sys/netinet/ip_divert.c   28 Aug 2022 22:12:05 -
> @@ -285,7 +285,6 @@ divert_usrreq(struct socket *so, int req
>   case PRU_SLOWTIMO:
>   case PRU_PROTORCV:
>   case PRU_PROTOSEND:
> - case PRU_RCVOOB:
>   error =  EOPNOTSUPP;
>   break;
>  
> Index: sys/netinet/raw_ip.c
> ===
> RCS file: /cvs/src/sys/netinet/raw_ip.c,v
> retrieving revision 1.141
> diff -u -p -r1.141 raw_ip.c
> --- sys/netinet/raw_ip.c  28 Aug 2022 21:35:12 -  1.141
> +++ sys/netinet/raw_ip.c  28 Aug 2022 22:12:05 -
> @@ -486,7 +486,6 @@ rip_usrreq(struct socket *so, int req, s
>* Not supported.
>*/
>   case PRU_SENDOOB:
> - case PRU_RCVOOB:
>   error = EOPNOTSUPP;
>   break;
>  
> Index: sys/netinet/tcp_usrreq.c
> ===
> RCS file: /cvs/src/sys/netinet/tcp_usrreq.c,v
> retrieving revision 1.198
> diff -u -p -r1.198 tcp_usrreq.c
> --- sys/netinet/tcp_usrreq.c  28 Aug 2022 21:35:12 -  1.198
> +++ sys/netinet/tcp_usrreq.c  28 Aug 2022 22:12:05 -
> @@ -125,6 +125,7 @@ const struct pr_usrreqs tcp_usrreqs = {
>   .pru_send   = tcp_send,
>   .pru_abort  = tcp_abort,
>   .pru_sense  = tcp_sense,
> + .pru_rcvoob = tcp_rcvoob,
>  };
>  
>  static int pr_slowhz = PR_SLOWHZ;
> @@ -228,24 +229,6 @@ tcp_usrreq(struct socket *so, int req, s
>   error = EOPNOTSUPP;
>   break;
>  
> - case PRU_RCVOOB:
> - if ((so->so_oobmark == 0 &&
> - (so->so_state & SS_RCVATMARK) == 0) ||
> - so->so_options & SO_OOBINLINE ||
> - tp->t_oobflags & TCPOOB_HADDATA) {
> - error = EINVAL;
> - break;
> - }
> - if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
> - error = EWOULDBLOCK;
> - break;
> - }
> - m->m_len = 1;
> - *mtod(m, caddr_t) = tp->t_iobc;
> - if (((long)nam & MSG_PEEK) == 0)
> - tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
> - break;
> -
>   case PRU_SENDOOB:
>   if (sbspace(so, >so_snd) < -512) {
>   m_freem(m);
> @@ -998,6 +981,39 @@ tcp_sense(struct socket *so, struct stat
>   if (so->so_options & SO_DEBUG)
>   tcp_trace(TA_USER, tp->t_state, tp, tp, NULL, PRU_SENSE, 0);
>   return (0);
> +}
> +
> +int
> +tcp_rcvoob(struct socket *so, struct mbuf *m, int flags)
> +{
> + struct inpcb *inp;
> + struct tcpcb *tp;
> + int error;
> +
> + soassertlocked(so);
> +
> + if ((error = tcp_sogetpcb(so, , )))
> + return (error);
> +
> + if ((so->so_oobmark == 0 &&
> + (so->so_state & SS_RCVATMARK) == 0) ||
> + 

remove unused macppc headers

2022-08-29 Thread Jonathan Gray
powerpc/include/kbio.h was removed in 2001

--- sys/arch/macppc/include/kbio.h  Sun Sep  2 01:49:06 2001
+++ /dev/null   Mon Aug 29 16:17:05 2022
@@ -1,3 +0,0 @@
-/* $OpenBSD: kbio.h,v 1.1 2001/09/01 15:49:06 drahn Exp $  */
-
-#include 
--- sys/arch/macppc/include/ipkdb.h Sun Sep  2 01:49:06 2001
+++ /dev/null   Mon Aug 29 16:17:05 2022
@@ -1,3 +0,0 @@
-/* $OpenBSD: ipkdb.h,v 1.1 2001/09/01 15:49:06 drahn Exp $ */
-
-#include 
--- sys/arch/powerpc/include/ipkdb.hSun Sep 15 19:01:59 2002
+++ /dev/null   Mon Aug 29 16:17:05 2022
@@ -1,80 +0,0 @@
-/* $OpenBSD: ipkdb.h,v 1.6 2002/09/15 09:01:59 deraadt Exp $   */
-
-/*
- * Copyright (C) 1995, 1996 Wolfgang Solfrank.
- * Copyright (C) 1995, 1996 TooLs GmbH.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *notice, this list of conditions and the following disclaimer in the
- *documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *must display the following acknowledgement:
- * This product includes software developed by TooLs GmbH.
- * 4. The name of TooLs GmbH may not be used to endorse or promote products
- *derived from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
- * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
- * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
- * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
- * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-/* register array */
-#defineFIX 0
-#defineLR  32
-#defineCR  33
-#defineCTR 34
-#defineXER 35
-#definePC  36
-#defineMSR 37
-#defineNREG38
-
-#ifndef _LOCORE
-extern int ipkdbregs[NREG];
-
-/* Doesn't handle overlapping regions */
-__inline extern void
-ipkdbcopy(s,d,n)
-   void *s, *d;
-   int n;
-{
-   char *sp = s, *dp = d;
-   
-   while (--n >= 0)
-   *dp++ = *sp++;
-}
-
-__inline extern void
-ipkdbzero(d,n)
-   void *d;
-   int n;
-{
-   char *dp = d;
-   
-   while (--n >= 0)
-   *dp++ = 0;
-}
-
-__inline extern int
-ipkdbcmp(s,d,n)
-   void *s, *d;
-{
-   char *sp = s, *dp = d;
-   
-   while (--n >= 0)
-   if (*sp++ != *dp++)
-   return *--dp - *--sp;
-   return 0;
-}
-#endif /* _LOCORE */