Re: Wireguard: can't remove multiple peers at once.

2021-01-13 Thread YASUOKA Masahiko
Hi,

On Thu, 14 Jan 2021 08:54:36 +0900
Yuichiro NAITO  wrote:
> Does anybody please review my code?
> 
> Yasuoka-san is my coleague of my work.
> So, he is interested in this topic. That’s why I CCed this mail.
> I don’t mean he is an reviewer.
> 
>> 2021/01/12 11:27、Yuichiro NAITO のメール:
>> I have set up multiple peers in a wg0 interface,
>> and tried to remove more than one peers at once.
>> Ifconfig(1) only removes the first peer.
>> 
>> Command line was like following.
>> 
>> ```
>> # ifconfig wg0 -wgpeer  -wgpeer  -wgpeer 
>> ```
>> 
>> Only  was removed.
>> 
>> I think next peer pointer isn't calculated in case of removing peer
>> in sys/net/if_wg.c: wg_ioctl_set() function.
>> 
>> I have tried following patch that can fix this problem.

Yes, the diff seems good.

I made the following whitespace change.

> @@ -2333,6 +2333,11 @@ wg_ioctl_set(struct wg_softc *sc, struct wg_data_io 
> *data)
>   }
> 
>   peer_p = (struct wg_peer_io *)aip_p;
> + continue;
> + next_peer:
> + aip_p = _p->p_aips[0];
> + aip_p += peer_o.p_aips_count;
> + peer_p = (struct wg_peer_io *)aip_p;
>   }
> 
> error:

It seems we prefer putting goto labels at the beginning of the line.


ok?

Fix wg(4) ioctl to be able to handle multiple wgpeers.
Diff from Yuichiro NAITO.

Index: sys/net/if_wg.c
===
RCS file: /cvs/src/sys/net/if_wg.c,v
retrieving revision 1.14
diff -u -p -r1.14 if_wg.c
--- sys/net/if_wg.c 1 Sep 2020 19:06:59 -   1.14
+++ sys/net/if_wg.c 14 Jan 2021 07:26:48 -
@@ -2270,7 +2270,7 @@ wg_ioctl_set(struct wg_softc *sc, struct
 
/* Peer must have public key */
if (!(peer_o.p_flags & WG_PEER_HAS_PUBLIC))
-   continue;
+   goto next_peer;
 
/* 0 = latest protocol, 1 = this protocol */
if (peer_o.p_protocol_version != 0) {
@@ -2283,7 +2283,7 @@ wg_ioctl_set(struct wg_softc *sc, struct
/* Get local public and check that peer key doesn't match */
if (noise_local_keys(>sc_local, public, NULL) == 0 &&
bcmp(public, peer_o.p_public, WG_KEY_SIZE) == 0)
-   continue;
+   goto next_peer;
 
/* Lookup peer, or create if it doesn't exist */
if ((peer = wg_peer_lookup(sc, peer_o.p_public)) == NULL) {
@@ -2291,7 +2291,7 @@ wg_ioctl_set(struct wg_softc *sc, struct
 * Also, don't create a new one if we only want to
 * update. */
if (peer_o.p_flags & (WG_PEER_REMOVE|WG_PEER_UPDATE))
-   continue;
+   goto next_peer;
 
if ((peer = wg_peer_create(sc,
peer_o.p_public)) == NULL) {
@@ -2303,7 +2303,7 @@ wg_ioctl_set(struct wg_softc *sc, struct
/* Remove peer and continue if specified */
if (peer_o.p_flags & WG_PEER_REMOVE) {
wg_peer_destroy(peer);
-   continue;
+   goto next_peer;
}
 
if (peer_o.p_flags & WG_PEER_HAS_ENDPOINT)
@@ -2332,6 +2332,11 @@ wg_ioctl_set(struct wg_softc *sc, struct
aip_p++;
}
 
+   peer_p = (struct wg_peer_io *)aip_p;
+   continue;
+next_peer:
+   aip_p = _p->p_aips[0];
+   aip_p += peer_o.p_aips_count;
peer_p = (struct wg_peer_io *)aip_p;
}
 



Re: uvm_fault: access_type fixup for wired mapping

2021-01-13 Thread Vitaliy Makkoveev
On Tue, Jan 12, 2021 at 09:41:15AM -0300, Martin Pieuchot wrote:
> Diff below moves `access_type' to the context structure passed down to
> the various routines and fix a regression introduced in a previous
> refactoring.
> 
> `access_type' is overwritten for wired mapping and the value of
> `enter_prot' is used instead.
> 
> ok?

ok mvs@

> 
> Index: uvm/uvm_fault.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
> retrieving revision 1.111
> diff -u -p -r1.111 uvm_fault.c
> --- uvm/uvm_fault.c   2 Jan 2021 02:39:59 -   1.111
> +++ uvm/uvm_fault.c   12 Jan 2021 12:36:38 -
> @@ -477,6 +477,7 @@ struct uvm_faultctx {
>* read-only after that.
>*/
>   vm_prot_t enter_prot;
> + vm_prot_t access_type;
>   vaddr_t startva;
>   int npages;
>   int centeridx;
> @@ -486,7 +487,7 @@ struct uvm_faultctx {
>  };
>  
>  int  uvm_fault_lower(struct uvm_faultinfo *, struct uvm_faultctx *,
> - struct vm_page **, vm_fault_t, vm_prot_t);
> + struct vm_page **, vm_fault_t);
>  
>  /*
>   * uvm_fault_check: check prot, handle needs-copy, etc.
> @@ -505,7 +506,7 @@ int   uvm_fault_lower(struct uvm_faultinfo
>   */
>  int
>  uvm_fault_check(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
> -struct vm_anon ***ranons, vm_prot_t access_type)
> +struct vm_anon ***ranons)
>  {
>   struct vm_amap *amap;
>   struct uvm_object *uobj;
> @@ -523,7 +524,7 @@ uvm_fault_check(struct uvm_faultinfo *uf
>  #endif
>  
>   /* check protection */
> - if ((ufi->entry->protection & access_type) != access_type) {
> + if ((ufi->entry->protection & flt->access_type) != flt->access_type) {
>   uvmfault_unlockmaps(ufi, FALSE);
>   return (EACCES);
>   }
> @@ -539,11 +540,11 @@ uvm_fault_check(struct uvm_faultinfo *uf
>   flt->pa_flags = UVM_ET_ISWC(ufi->entry) ? PMAP_WC : 0;
>   flt->wired = VM_MAPENT_ISWIRED(ufi->entry) || (flt->narrow == TRUE);
>   if (flt->wired)
> - access_type = flt->enter_prot; /* full access for wired */
> + flt->access_type = flt->enter_prot; /* full access for wired */
>  
>   /* handle "needs_copy" case. */
>   if (UVM_ET_ISNEEDSCOPY(ufi->entry)) {
> - if ((access_type & PROT_WRITE) ||
> + if ((flt->access_type & PROT_WRITE) ||
>   (ufi->entry->object.uvm_obj == NULL)) {
>   /* need to clear */
>   uvmfault_unlockmaps(ufi, FALSE);
> @@ -648,7 +649,7 @@ uvm_fault_check(struct uvm_faultinfo *uf
>   */
>  int
>  uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
> -   struct vm_anon **anons, vm_fault_t fault_type, vm_prot_t access_type)
> +   struct vm_anon **anons, vm_fault_t fault_type)
>  {
>   struct vm_amap *amap = ufi->entry->aref.ar_amap;
>   struct vm_anon *oanon, *anon = anons[flt->centeridx];
> @@ -699,7 +700,7 @@ uvm_fault_upper(struct uvm_faultinfo *uf
>* if we are out of anon VM we wait for RAM to become available.
>*/
>  
> - if ((access_type & PROT_WRITE) != 0 && anon->an_ref > 1) {
> + if ((flt->access_type & PROT_WRITE) != 0 && anon->an_ref > 1) {
>   counters_inc(uvmexp_counters, flt_acow);
>   oanon = anon;   /* oanon = old */
>   anon = uvm_analloc();
> @@ -761,7 +762,7 @@ uvm_fault_upper(struct uvm_faultinfo *uf
>*/
>   if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr,
>   VM_PAGE_TO_PHYS(pg) | flt->pa_flags, flt->enter_prot,
> - access_type | PMAP_CANFAIL | (flt->wired ? PMAP_WIRED : 0)) != 0) {
> + flt->access_type | PMAP_CANFAIL | (flt->wired ? PMAP_WIRED : 0)) != 
> 0) {
>   /*
>* No need to undo what we did; we can simply think of
>* this as the pmap throwing away the mapping information.
> @@ -922,6 +923,7 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad
>* pages on wire */
>   else
>   flt.narrow = FALSE; /* normal fault */
> + flt.access_type = access_type;
>  
>  
>   /*
> @@ -930,7 +932,7 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad
>   while (error == ERESTART) {
>   anons = anons_store;
>  
> - error = uvm_fault_check(, , , access_type);
> + error = uvm_fault_check(, , );
>   if (error != 0)
>   continue;
>  
> @@ -938,13 +940,11 @@ uvm_fault(vm_map_t orig_map, vaddr_t vad
>   shadowed = uvm_fault_upper_lookup(, , anons, pages);
>   if (shadowed == TRUE) {
>   /* case 1: fault on an anon in our amap */
> - error = uvm_fault_upper(, , anons, fault_type,
> - access_type);
> + error = uvm_fault_upper(, , anons, fault_type);
>   } else {
>   /* 

Re: tpm(4): don't use tvtohz(9)

2021-01-13 Thread Scott Cheloha
On Fri, Jan 08, 2021 at 08:21:14PM +0100, Florian Obser wrote:
> On Fri, Jan 08, 2021 at 11:48:33AM -0600, Scott Cheloha wrote:
> > On Fri, Jan 08, 2021 at 05:41:24PM +0100, Mark Kettenis wrote:
> > > > Date: Fri, 8 Jan 2021 10:27:38 -0600
> > > > From: Scott Cheloha 
> > > > 
> > > > On Wed, Jan 06, 2021 at 11:26:27PM +0100, Mark Kettenis wrote:
> > > > > > Date: Wed, 6 Jan 2021 16:16:27 -0600
> > > > > > From: Scott Cheloha 
> > > > > > 
> > > > > > On Wed, Jan 06, 2021 at 12:16:13PM -0600, Scott Cheloha wrote:
> > > > > > > As mentioned in a prior mail, tpm(4) is the last user of 
> > > > > > > tvtohz(9) in
> > > > > > > the tree.
> > > > > > > 
> > > > > > > However, we don't need to use tvtohz(9) in tpm(4) at all.  
> > > > > > > Converting
> > > > > > > from milliseconds to ticks is trivial.  Using an intermediary 
> > > > > > > timeval
> > > > > > > is just pointless indirection.
> > > > > > > 
> > > > > > > With this committed I will be able to remove both tvtohz(9) and
> > > > > > > tstohz(9) from the tree in a subsequent patch.
> > > > > > 
> > > > > > Whoops, made a mistake in the prior diff.
> > > > > > 
> > > > > > We want to MAX() the result up to 1 to avoid dividing to zero and
> > > > > > blocking indefinitely in e.g. tsleep(9).  Previous diff MIN'd the
> > > > > > result down to 1, which is very wrong.
> > > > > 
> > > > > To be honest I'd just zap the function completely and instead simply 
> > > > > do:
> > > > > 
> > > > > to = TPM_ACCESS_TMO / 10;
> > > > > 
> > > > > and
> > > > > 
> > > > > to = TPM_BURST_TMO / 10;
> > > > 
> > > > That won't work on custom kernels where HZ is not 100, no?
> > > 
> > > HZ is irrelevant here.  These TPM_XXX_TMO defines specify a timeout in
> > > microseconds and DELAY(10) just delays for 10us.  So you just need to
> > > figure out how many times you need to do the 10us delay to reach the
> > > timeout specified by TPM_XXX_TMO.
> > 
> > Whoops, yes, you're right.
> > 
> > > Also, this driver is only supported on amd64/i386 at this point (but
> > > might show up on arm64 at some point).
> > > 
> > > > > There is no magic happening here.  The code is just doing a hardware
> > > > > register poll loop in 10us steps.
> > > > > 
> > > > > Hmm, actually it seems the code is broken and uses steps of 10
> > > > > microseconds instead of milliseconds.  So instead it should probably
> > > > > use:
> > > > > 
> > > > > to = TPM_ACCESS_TMO * 100;
> > > > > 
> > > > > and
> > > > > 
> > > > > to = TPM_BURST_TMO * 100;
> > > > 
> > > > This problem came up in a different thread:
> > > > 
> > > > https://marc.info/?l=openbsd-tech=160833962329381=2
> > > > 
> > > > jcs@ said, and I paraphrase, "tpm(4) sucks, we should merge NetBSD's
> > > > latest code, which is much nicer now."
> > > > 
> > > > That sounds like a much taller order than I can fill, so I'm just
> > > > trying to remove the tvtohz(9) call without changing any behavior,
> > > > even if the behavior looks wrong (like using the wrong units).
> > > > 
> > > > I don't even have a tpm(4) device to test.  Until I have one I'm
> > > > reluctant to do things like expanding the delay time in these loops.
> > > > As of now the driver "works" for some subset of people, which is not
> > > > nothing.
> > > 
> > > But having illogical code is a problem as well.  So I think we should
> > > at least attempt to fix it the right way.  All it takes is to find
> > > someone with a laptop where the driver attaches and have them
> > > suspend/resume it.
> > 
> > We can try that, sure.  Here's the patch:
> > 
> > - Remove tpm_tmotohz().
> > 
> > - tpm_waitfor() does DELAY(1), so in that case convert from
> >   milliseconds to microseconds.  Note in the function argument
> >   that we expect milliseconds, not "tries".
> > 
> > - In the other cases we do DELAY(10), so we only multiply by 100.
> >   Leave a comment explaining what we're doing.
> > 
> > Unsure who can test.  We need a suspend/resume test with this patch.
> > Probably an older machine.  Newer machines tend to have TPM 2.0 chips.
> 
> My x1 gen 2 seems to have a TPM 1.2 chip. Or can emulate one?
> The bios is confusing. I had it disabled until now.
> It seems to be able to suspend/resume just fine with this.
> 
> It shows up like this:
> 
> tpm0 at acpi0 TPM_ addr 0xfed4/0x5000, device 0x104a rev 0x4e
> 
> No idea what this does, I'm going to disable it again, but let me know
> if you want further tests.

kettenis, jcs: Is this sufficient?  Or do I need more tests?

> OpenBSD 6.8-current (GENERIC.MP) #103: Fri Jan  8 20:11:51 CET 2021
> flor...@x1.home.narrans.de:/usr/src/sys/arch/amd64/compile/GENERIC.MP
> real mem = 8233394176 (7851MB)
> avail mem = 7968542720 (7599MB)
> random: good seed from bootblocks
> mpath0 at root
> scsibus0 at mpath0: 256 targets
> mainbus0 at root
> bios0 at mainbus0: SMBIOS rev. 2.7 @ 0xdcd3d000 (61 entries)
> bios0: vendor LENOVO version "GRET40WW (1.17 )" date 09/02/2014
> bios0: LENOVO 20A7006VUS
> acpi0 

Re: all platforms: isolate hardclock(9) from statclock()

2021-01-13 Thread Scott Cheloha
On Sat, Jan 09, 2021 at 12:52:22PM -0600, Dale Rahn wrote:
> The 'magic' here was that MD code could choose to implement statclock (and
> set stathz appropriately), or MD code could not care about the multiple
> statclock/hardclock interfaces into the scheduler. Also some clock drivers
> on a platform could enable split hardclock/statclock where others did not.

Gotcha.

> Back near the beginning of OpenBSD platforms existed that had no higher
> precision clock than that timer interrupt, and nothing existed to even
> approximate higher precision (eg cycle counters or instruction counters).

My understanding is that HZ=100 was a practical choice because the
machines of the day could not reliably drive a faster clock interrupt.

> Some clock drivers have a separate timer/interrupt or separate 'event'
> tracked to schedule the stat() event. These platforms may also
> (dynamically) change the stathz clock when profiling is enabled/disabled.
> This is implemented in arm64 in agtimer_setstatclockrate()

My hope is to make this the case for *all* platforms with MI code.
More on that in a later patch.  You'll be CC'd.

> Any clock driver that directly calls statclock() should make certain to
> stathz (and profhz) appropriately, as no assumptions to it's rate/frequency
> should be assumed.

Do we need to properly set stathz for each platform in *this* diff?
Or can it wait?

I was hoping to do a sweep of the tree in a later patch and ensure
that stathz is non-zero everywhere and simultaneously remove code like
this:

int realstathz = stathz ? stathz : hz;

Near as I can tell, stathz doesn't need to be nonzero for statclock()
to work correctly.  Also, setstatclockrate() doesn't even run unless
stathz is non-zero, so there are no issues with the profiling clock
stuff yet.

> This isn't to say that removing the stathz== 0 magic should not be done,
> but if done, make certain that stathz and profhz are properly
> updated/configured.



Re: Wireguard: can't remove multiple peers at once.

2021-01-13 Thread Yuichiro NAITO
Does anybody please review my code?

Yasuoka-san is my coleague of my work.
So, he is interested in this topic. That’s why I CCed this mail.
I don’t mean he is an reviewer.

> 2021/01/12 11:27、Yuichiro NAITO のメール:
> 
> 
> Hi.
> 
> I have set up multiple peers in a wg0 interface,
> and tried to remove more than one peers at once.
> Ifconfig(1) only removes the first peer.
> 
> Command line was like following.
> 
> ```
> # ifconfig wg0 -wgpeer  -wgpeer  -wgpeer 
> ```
> 
> Only  was removed.
> 
> I think next peer pointer isn't calculated in case of removing peer
> in sys/net/if_wg.c: wg_ioctl_set() function.
> 
> I have tried following patch that can fix this problem.
> Is it OK?
> 
> diff --git a/sys/net/if_wg.c b/sys/net/if_wg.c
> index c534f966363..c21f883269f 100644
> --- a/sys/net/if_wg.c
> +++ b/sys/net/if_wg.c
> @@ -2270,7 +2270,7 @@ wg_ioctl_set(struct wg_softc *sc, struct wg_data_io 
> *data)
> 
>   /* Peer must have public key */
>   if (!(peer_o.p_flags & WG_PEER_HAS_PUBLIC))
> - continue;
> + goto next_peer;
> 
>   /* 0 = latest protocol, 1 = this protocol */
>   if (peer_o.p_protocol_version != 0) {
> @@ -2283,7 +2283,7 @@ wg_ioctl_set(struct wg_softc *sc, struct wg_data_io 
> *data)
>   /* Get local public and check that peer key doesn't match */
>   if (noise_local_keys(>sc_local, public, NULL) == 0 &&
>   bcmp(public, peer_o.p_public, WG_KEY_SIZE) == 0)
> - continue;
> + goto next_peer;
> 
>   /* Lookup peer, or create if it doesn't exist */
>   if ((peer = wg_peer_lookup(sc, peer_o.p_public)) == NULL) {
> @@ -2291,7 +2291,7 @@ wg_ioctl_set(struct wg_softc *sc, struct wg_data_io 
> *data)
>* Also, don't create a new one if we only want to
>* update. */
>   if (peer_o.p_flags & (WG_PEER_REMOVE|WG_PEER_UPDATE))
> - continue;
> + goto next_peer;
> 
>   if ((peer = wg_peer_create(sc,
>   peer_o.p_public)) == NULL) {
> @@ -2303,7 +2303,7 @@ wg_ioctl_set(struct wg_softc *sc, struct wg_data_io 
> *data)
>   /* Remove peer and continue if specified */
>   if (peer_o.p_flags & WG_PEER_REMOVE) {
>   wg_peer_destroy(peer);
> - continue;
> + goto next_peer;
>   }
> 
>   if (peer_o.p_flags & WG_PEER_HAS_ENDPOINT)
> @@ -2333,6 +2333,11 @@ wg_ioctl_set(struct wg_softc *sc, struct wg_data_io 
> *data)
>   }
> 
>   peer_p = (struct wg_peer_io *)aip_p;
> + continue;
> + next_peer:
> + aip_p = _p->p_aips[0];
> + aip_p += peer_o.p_aips_count;
> + peer_p = (struct wg_peer_io *)aip_p;
>   }
> 
> error:
> 
> —
> Yuichiro NAITO
> naito.yuich...@gmail.com
> 

—
Yuichiro NAITO
naito.yuich...@gmail.com






struct process: mark `ps_oppid' as atomic

2021-01-13 Thread Vitaliy Makkoveev
Subj. It's integer so assignments are atomic.

Index: sys/sys/proc.h
===
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.304
diff -u -p -r1.304 proc.h
--- sys/sys/proc.h  11 Jan 2021 13:55:53 -  1.304
+++ sys/sys/proc.h  13 Jan 2021 19:17:51 -
@@ -216,7 +216,7 @@ struct process {
u_int   ps_xexit;   /* Exit status for wait */
int ps_xsig;/* Stopping or killing signal */
 
-   pid_t   ps_oppid;   /* Save parent pid during ptrace. */
+   pid_t   ps_oppid;   /* [a] Save parent pid during ptrace. */
int ps_ptmask;  /* Ptrace event mask */
struct  ptrace_state *ps_ptstat;/* Ptrace state */
 



Re: Cache parent's pid as `ps_ppid' and use it instead of `ps_pptr->ps_pid'.

2021-01-13 Thread Vitaliy Makkoveev
On Wed, Jan 13, 2021 at 02:10:25PM -0300, Martin Pieuchot wrote:
> On 02/01/21(Sat) 21:54, Vitaliy Makkoveev wrote:
> > This allows us to unlock getppid(2). Also NetBSD, DragonflyBSD and OSX
> > do the same.
> 
> Seems the way to go, two comments below.
> 
> > @@ -694,6 +694,7 @@ process_reparent(struct process *child, 
> > }
> >  
> > child->ps_pptr = parent;
> > +   child->ps_ppid = parent->ps_pid;
> 
> Should the parent pid be changed when a process is re-parented when
> being traced?  It seems that both Free and Net only change it when a
> process is re-arented to process 1 (init).
> 
> > +   pid_t   ps_ppid;/* Cached parent pid */
> > pid_t   ps_oppid;   /* Save parent pid during ptrace. */
> 
> Can't we re-use `ps_oppid' and always cache it?  It seems that FreeBSD
> is doing something like that.  Could this field be documented to make it
> clear why geppid(2) can be unlocked?
> 

This changes getppid(2), ps(1) and coredump behavior. Currently we
return debuger's pid as parent while process is being traced. FreeBSD
returns original parent's pid. Philip Guenther pointed, that this
behavior was for the life of OpenBSD and it's not obvious that why it
should be changed. Also at least DragonflyBSD and OSX do the same.

I marked new field `ps_ppid' as atomic. `ps_oppid' is also atomic and
I'll mark it with separate diff.

Index: sys/kern/exec_elf.c
===
RCS file: /cvs/src/sys/kern/exec_elf.c,v
retrieving revision 1.156
diff -u -p -r1.156 exec_elf.c
--- sys/kern/exec_elf.c 7 Dec 2020 16:55:28 -   1.156
+++ sys/kern/exec_elf.c 13 Jan 2021 19:01:08 -
@@ -1257,7 +1257,7 @@ coredump_notes_elf(struct proc *p, void 
cpi.cpi_sigcatch = pr->ps_sigacts->ps_sigcatch;
 
cpi.cpi_pid = pr->ps_pid;
-   cpi.cpi_ppid = pr->ps_pptr->ps_pid;
+   cpi.cpi_ppid = pr->ps_ppid;
cpi.cpi_pgrp = pr->ps_pgid;
if (pr->ps_session->s_leader)
cpi.cpi_sid = pr->ps_session->s_leader->ps_pid;
Index: sys/kern/kern_exit.c
===
RCS file: /cvs/src/sys/kern/kern_exit.c,v
retrieving revision 1.193
diff -u -p -r1.193 kern_exit.c
--- sys/kern/kern_exit.c9 Dec 2020 18:58:19 -   1.193
+++ sys/kern/kern_exit.c13 Jan 2021 19:01:08 -
@@ -694,6 +694,7 @@ process_reparent(struct process *child, 
}
 
child->ps_pptr = parent;
+   child->ps_ppid = parent->ps_pid;
 }
 
 void
Index: sys/kern/kern_fork.c
===
RCS file: /cvs/src/sys/kern/kern_fork.c,v
retrieving revision 1.230
diff -u -p -r1.230 kern_fork.c
--- sys/kern/kern_fork.c7 Dec 2020 16:55:28 -   1.230
+++ sys/kern/kern_fork.c13 Jan 2021 19:01:08 -
@@ -231,6 +231,7 @@ process_new(struct proc *p, struct proce
 
/* post-copy fixups */
pr->ps_pptr = parent;
+   pr->ps_ppid = parent->ps_pid;
 
/* bump references to the text vnode (for sysctl) */
pr->ps_textvp = parent->ps_textvp;
Index: sys/kern/kern_prot.c
===
RCS file: /cvs/src/sys/kern/kern_prot.c,v
retrieving revision 1.76
diff -u -p -r1.76 kern_prot.c
--- sys/kern/kern_prot.c9 Jul 2019 12:23:25 -   1.76
+++ sys/kern/kern_prot.c13 Jan 2021 19:01:08 -
@@ -84,7 +84,7 @@ int
 sys_getppid(struct proc *p, void *v, register_t *retval)
 {
 
-   *retval = p->p_p->ps_pptr->ps_pid;
+   *retval = p->p_p->ps_ppid;
return (0);
 }
 
Index: sys/kern/kern_sysctl.c
===
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.387
diff -u -p -r1.387 kern_sysctl.c
--- sys/kern/kern_sysctl.c  9 Jan 2021 23:33:18 -   1.387
+++ sys/kern/kern_sysctl.c  13 Jan 2021 19:01:08 -
@@ -1647,7 +1647,7 @@ fill_kproc(struct process *pr, struct ki
 
/* stuff that's too painful to generalize into the macros */
if (pr->ps_pptr)
-   ki->p_ppid = pr->ps_pptr->ps_pid;
+   ki->p_ppid = pr->ps_ppid;
if (s->s_leader)
ki->p_sid = s->s_leader->ps_pid;
 
Index: sys/sys/proc.h
===
RCS file: /cvs/src/sys/sys/proc.h,v
retrieving revision 1.304
diff -u -p -r1.304 proc.h
--- sys/sys/proc.h  11 Jan 2021 13:55:53 -  1.304
+++ sys/sys/proc.h  13 Jan 2021 19:01:08 -
@@ -216,6 +216,7 @@ struct process {
u_int   ps_xexit;   /* Exit status for wait */
int ps_xsig;/* Stopping or killing signal */
 
+   pid_t   ps_ppid;/* [a] Cached parent pid */
pid_t   ps_oppid;   /* Save parent pid during ptrace. */
int 

Re: more refactor bgpd route decision process

2021-01-13 Thread Sebastian Benoit
Claudio Jeker(cje...@diehard.n-r-g.com) on 2021.01.13 13:10:23 +0100:
> This is another cleanup round of the route decision process.
> This time focusing on prefix_cmp(). Make sure that when using
> return (a - b) that the results always fits in an int type.
> Also make sure the check of the remote_addr at the end is done
> properly. The result is probably the same but this is the same
> way it is done in many other places.
> 
> Unless I made a mistake the result should still be the same.

ok


> -- 
> :wq Claudio
> 
> ? obj
> Index: rde_decide.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
> retrieving revision 1.79
> diff -u -p -r1.79 rde_decide.c
> --- rde_decide.c  13 Jan 2021 11:34:01 -  1.79
> +++ rde_decide.c  13 Jan 2021 12:08:21 -
> @@ -113,12 +113,12 @@ prefix_cmp(struct prefix *p1, struct pre
>   struct rde_peer *peer1, *peer2;
>   struct attr *a;
>   u_int32_tp1id, p2id;
> - int  p1cnt, p2cnt;
> + int  p1cnt, p2cnt, i;
>  
>   if (p1 == NULL)
> - return (-1);
> + return -1;
>   if (p2 == NULL)
> - return (1);
> + return 1;
>  
>   asp1 = prefix_aspath(p1);
>   asp2 = prefix_aspath(p2);
> @@ -127,15 +127,15 @@ prefix_cmp(struct prefix *p1, struct pre
>  
>   /* pathes with errors are not eligible */
>   if (asp1 == NULL || asp1->flags & F_ATTR_PARSE_ERR)
> - return (-1);
> + return -1;
>   if (asp2 == NULL || asp2->flags & F_ATTR_PARSE_ERR)
> - return (1);
> + return 1;
>  
>   /* only loop free pathes are eligible */
>   if (asp1->flags & F_ATTR_LOOP)
> - return (-1);
> + return -1;
>   if (asp2->flags & F_ATTR_LOOP)
> - return (1);
> + return 1;
>  
>   /*
>* 1. check if prefix is eligible a.k.a reachable
> @@ -144,14 +144,16 @@ prefix_cmp(struct prefix *p1, struct pre
>*/
>   if (prefix_nexthop(p2) != NULL &&
>   prefix_nexthop(p2)->state != NEXTHOP_REACH)
> - return (1);
> + return 1;
>   if (prefix_nexthop(p1) != NULL &&
>   prefix_nexthop(p1)->state != NEXTHOP_REACH)
> - return (-1);
> + return -1;
>  
>   /* 2. local preference of prefix, bigger is better */
> - if ((asp1->lpref - asp2->lpref) != 0)
> - return (asp1->lpref - asp2->lpref);
> + if (asp1->lpref > asp2->lpref)
> + return 1;
> + if (asp1->lpref < asp2->lpref)
> + return -1;
>  
>   /* 3. aspath count, the shorter the better */
>   if ((asp2->aspath->ascnt - asp1->aspath->ascnt) != 0)
> @@ -161,12 +163,19 @@ prefix_cmp(struct prefix *p1, struct pre
>   if ((asp2->origin - asp1->origin) != 0)
>   return (asp2->origin - asp1->origin);
>  
> - /* 5. MED decision, only comparable between the same neighboring AS */
> - if (rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS ||
> - aspath_neighbor(asp1->aspath) == aspath_neighbor(asp2->aspath))
> + /*
> +  * 5. MED decision
> +  * Only comparable between the same neighboring AS or if
> +  * 'rde med compare always' is set.
> +  */
> + if ((rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS) ||
> + aspath_neighbor(asp1->aspath) == aspath_neighbor(asp2->aspath)) {
>   /* lowest value wins */
> - if ((asp2->med - asp1->med) != 0)
> - return (asp2->med - asp1->med);
> + if (asp1->med < asp2->med)
> + return 1;
> + if (asp1->med > asp2->med)
> + return -1;
> + }
>  
>   /*
>* 6. EBGP is cooler than IBGP
> @@ -187,8 +196,10 @@ prefix_cmp(struct prefix *p1, struct pre
>* a metric that weights a prefix at a very late stage in the
>* decision process.
>*/
> - if ((asp1->weight - asp2->weight) != 0)
> - return (asp1->weight - asp2->weight);
> + if (asp1->weight > asp2->weight)
> + return 1;
> + if (asp1->weight < asp2->weight)
> + return -1;
>  
>   /* 8. nexthop costs. NOT YET -> IGNORE */
>  
> @@ -196,9 +207,12 @@ prefix_cmp(struct prefix *p1, struct pre
>* 9. older route (more stable) wins but only if route-age
>* evaluation is enabled.
>*/
> - if (rde_decisionflags() & BGPD_FLAG_DECISION_ROUTEAGE)
> - if ((p2->lastchange - p1->lastchange) != 0)
> - return (p2->lastchange - p1->lastchange);
> + if (rde_decisionflags() & BGPD_FLAG_DECISION_ROUTEAGE) {
> + if (p1->lastchange < p2->lastchange) /* p1 is older */
> + return 1;
> + if (p1->lastchange > p2->lastchange)
> + return -1;
> + }

OpenBSD Errata: January 13th, 2021 (carp)

2021-01-13 Thread Sebastian Benoit
Errata patches for the kernel have been released for OpenBSD 6.8.

Use of bpf(4) on a carp interface could result in a use after free
error.

Binary updates for the amd64, i386, and arm64 platforms are available via
the syspatch utility. Source code patches can be found on the respective
errata page:

  https://www.openbsd.org/errata68.html

As these affect the kernel, a reboot will be needed after patching.



Re: Cache parent's pid as `ps_ppid' and use it instead of `ps_pptr->ps_pid'.

2021-01-13 Thread Martin Pieuchot
On 02/01/21(Sat) 21:54, Vitaliy Makkoveev wrote:
> This allows us to unlock getppid(2). Also NetBSD, DragonflyBSD and OSX
> do the same.

Seems the way to go, two comments below.

> Index: kern/exec_elf.c
> ===
> RCS file: /cvs/src/sys/kern/exec_elf.c,v
> retrieving revision 1.156
> diff -u -p -r1.156 exec_elf.c
> --- kern/exec_elf.c   7 Dec 2020 16:55:28 -   1.156
> +++ kern/exec_elf.c   2 Jan 2021 15:47:46 -
> @@ -1257,7 +1257,7 @@ coredump_notes_elf(struct proc *p, void 
>   cpi.cpi_sigcatch = pr->ps_sigacts->ps_sigcatch;
>  
>   cpi.cpi_pid = pr->ps_pid;
> - cpi.cpi_ppid = pr->ps_pptr->ps_pid;
> + cpi.cpi_ppid = pr->ps_ppid;
>   cpi.cpi_pgrp = pr->ps_pgid;
>   if (pr->ps_session->s_leader)
>   cpi.cpi_sid = pr->ps_session->s_leader->ps_pid;
> Index: kern/kern_exit.c
> ===
> RCS file: /cvs/src/sys/kern/kern_exit.c,v
> retrieving revision 1.193
> diff -u -p -r1.193 kern_exit.c
> --- kern/kern_exit.c  9 Dec 2020 18:58:19 -   1.193
> +++ kern/kern_exit.c  2 Jan 2021 15:47:46 -
> @@ -694,6 +694,7 @@ process_reparent(struct process *child, 
>   }
>  
>   child->ps_pptr = parent;
> + child->ps_ppid = parent->ps_pid;

Should the parent pid be changed when a process is re-parented when
being traced?  It seems that both Free and Net only change it when a
process is re-arented to process 1 (init).

> Index: kern/kern_fork.c
> ===
> RCS file: /cvs/src/sys/kern/kern_fork.c,v
> retrieving revision 1.230
> diff -u -p -r1.230 kern_fork.c
> --- kern/kern_fork.c  7 Dec 2020 16:55:28 -   1.230
> +++ kern/kern_fork.c  2 Jan 2021 15:47:46 -
> @@ -231,6 +231,7 @@ process_new(struct proc *p, struct proce
>  
>   /* post-copy fixups */
>   pr->ps_pptr = parent;
> + pr->ps_ppid = parent->ps_pid;
>  
>   /* bump references to the text vnode (for sysctl) */
>   pr->ps_textvp = parent->ps_textvp;
> Index: kern/kern_prot.c
> ===
> RCS file: /cvs/src/sys/kern/kern_prot.c,v
> retrieving revision 1.76
> diff -u -p -r1.76 kern_prot.c
> --- kern/kern_prot.c  9 Jul 2019 12:23:25 -   1.76
> +++ kern/kern_prot.c  2 Jan 2021 15:47:46 -
> @@ -84,7 +84,7 @@ int
>  sys_getppid(struct proc *p, void *v, register_t *retval)
>  {
>  
> - *retval = p->p_p->ps_pptr->ps_pid;
> + *retval = p->p_p->ps_ppid;
>   return (0);
>  }
>  
> Index: kern/kern_sysctl.c
> ===
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.385
> diff -u -p -r1.385 kern_sysctl.c
> --- kern/kern_sysctl.c28 Dec 2020 18:28:11 -  1.385
> +++ kern/kern_sysctl.c2 Jan 2021 15:47:46 -
> @@ -1666,7 +1666,7 @@ fill_kproc(struct process *pr, struct ki
>  
>   /* stuff that's too painful to generalize into the macros */
>   if (pr->ps_pptr)
> - ki->p_ppid = pr->ps_pptr->ps_pid;
> + ki->p_ppid = pr->ps_ppid;
>   if (s->s_leader)
>   ki->p_sid = s->s_leader->ps_pid;
>  
> Index: sys/proc.h
> ===
> RCS file: /cvs/src/sys/sys/proc.h,v
> retrieving revision 1.303
> diff -u -p -r1.303 proc.h
> --- sys/proc.h9 Dec 2020 18:58:19 -   1.303
> +++ sys/proc.h2 Jan 2021 15:47:47 -
> @@ -216,6 +216,7 @@ struct process {
>   u_int   ps_xexit;   /* Exit status for wait */
>   int ps_xsig;/* Stopping or killing signal */
>  
> + pid_t   ps_ppid;/* Cached parent pid */
>   pid_t   ps_oppid;   /* Save parent pid during ptrace. */

Can't we re-use `ps_oppid' and always cache it?  It seems that FreeBSD
is doing something like that.  Could this field be documented to make it
clear why geppid(2) can be unlocked?

>   int ps_ptmask;  /* Ptrace event mask */
>   struct  ptrace_state *ps_ptstat;/* Ptrace state */
> 



more refactor bgpd route decision process

2021-01-13 Thread Claudio Jeker
This is another cleanup round of the route decision process.
This time focusing on prefix_cmp(). Make sure that when using
return (a - b) that the results always fits in an int type.
Also make sure the check of the remote_addr at the end is done
properly. The result is probably the same but this is the same
way it is done in many other places.

Unless I made a mistake the result should still be the same.
-- 
:wq Claudio

? obj
Index: rde_decide.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
retrieving revision 1.79
diff -u -p -r1.79 rde_decide.c
--- rde_decide.c13 Jan 2021 11:34:01 -  1.79
+++ rde_decide.c13 Jan 2021 12:08:21 -
@@ -113,12 +113,12 @@ prefix_cmp(struct prefix *p1, struct pre
struct rde_peer *peer1, *peer2;
struct attr *a;
u_int32_tp1id, p2id;
-   int  p1cnt, p2cnt;
+   int  p1cnt, p2cnt, i;
 
if (p1 == NULL)
-   return (-1);
+   return -1;
if (p2 == NULL)
-   return (1);
+   return 1;
 
asp1 = prefix_aspath(p1);
asp2 = prefix_aspath(p2);
@@ -127,15 +127,15 @@ prefix_cmp(struct prefix *p1, struct pre
 
/* pathes with errors are not eligible */
if (asp1 == NULL || asp1->flags & F_ATTR_PARSE_ERR)
-   return (-1);
+   return -1;
if (asp2 == NULL || asp2->flags & F_ATTR_PARSE_ERR)
-   return (1);
+   return 1;
 
/* only loop free pathes are eligible */
if (asp1->flags & F_ATTR_LOOP)
-   return (-1);
+   return -1;
if (asp2->flags & F_ATTR_LOOP)
-   return (1);
+   return 1;
 
/*
 * 1. check if prefix is eligible a.k.a reachable
@@ -144,14 +144,16 @@ prefix_cmp(struct prefix *p1, struct pre
 */
if (prefix_nexthop(p2) != NULL &&
prefix_nexthop(p2)->state != NEXTHOP_REACH)
-   return (1);
+   return 1;
if (prefix_nexthop(p1) != NULL &&
prefix_nexthop(p1)->state != NEXTHOP_REACH)
-   return (-1);
+   return -1;
 
/* 2. local preference of prefix, bigger is better */
-   if ((asp1->lpref - asp2->lpref) != 0)
-   return (asp1->lpref - asp2->lpref);
+   if (asp1->lpref > asp2->lpref)
+   return 1;
+   if (asp1->lpref < asp2->lpref)
+   return -1;
 
/* 3. aspath count, the shorter the better */
if ((asp2->aspath->ascnt - asp1->aspath->ascnt) != 0)
@@ -161,12 +163,19 @@ prefix_cmp(struct prefix *p1, struct pre
if ((asp2->origin - asp1->origin) != 0)
return (asp2->origin - asp1->origin);
 
-   /* 5. MED decision, only comparable between the same neighboring AS */
-   if (rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS ||
-   aspath_neighbor(asp1->aspath) == aspath_neighbor(asp2->aspath))
+   /*
+* 5. MED decision
+* Only comparable between the same neighboring AS or if
+* 'rde med compare always' is set.
+*/
+   if ((rde_decisionflags() & BGPD_FLAG_DECISION_MED_ALWAYS) ||
+   aspath_neighbor(asp1->aspath) == aspath_neighbor(asp2->aspath)) {
/* lowest value wins */
-   if ((asp2->med - asp1->med) != 0)
-   return (asp2->med - asp1->med);
+   if (asp1->med < asp2->med)
+   return 1;
+   if (asp1->med > asp2->med)
+   return -1;
+   }
 
/*
 * 6. EBGP is cooler than IBGP
@@ -187,8 +196,10 @@ prefix_cmp(struct prefix *p1, struct pre
 * a metric that weights a prefix at a very late stage in the
 * decision process.
 */
-   if ((asp1->weight - asp2->weight) != 0)
-   return (asp1->weight - asp2->weight);
+   if (asp1->weight > asp2->weight)
+   return 1;
+   if (asp1->weight < asp2->weight)
+   return -1;
 
/* 8. nexthop costs. NOT YET -> IGNORE */
 
@@ -196,9 +207,12 @@ prefix_cmp(struct prefix *p1, struct pre
 * 9. older route (more stable) wins but only if route-age
 * evaluation is enabled.
 */
-   if (rde_decisionflags() & BGPD_FLAG_DECISION_ROUTEAGE)
-   if ((p2->lastchange - p1->lastchange) != 0)
-   return (p2->lastchange - p1->lastchange);
+   if (rde_decisionflags() & BGPD_FLAG_DECISION_ROUTEAGE) {
+   if (p1->lastchange < p2->lastchange) /* p1 is older */
+   return 1;
+   if (p1->lastchange > p2->lastchange)
+   return -1;
+   }
 
/* 10. lowest BGP Id wins, use ORIGINATOR_ID if present */
if ((a = attr_optget(asp1, ATTR_ORIGINATOR_ID)) != NULL) {
@@ 

Re: bgpd refactor route decision process

2021-01-13 Thread Claudio Jeker
On Wed, Jan 13, 2021 at 11:24:32AM +0100, Denis Fondras wrote:
> Le Tue, Jan 12, 2021 at 05:39:02PM +0100, Claudio Jeker a écrit :
> > This diff changes two things:
> > - First, it move the kroute update into rde_generate_updates() simplifying
> > prefix_evaluate a little bit.
> > 
> > - Second, it changes prefix_evaluate to take an additional argument for the
> > old prefix (to be removed). Instead of doing this outside of
> > prefix_evaluate() with some drawbacks in case the same prefix is removed
> > and readded, the code is now in prefix_evaluate() and does all the magic
> > itself.
> > 
> > Index: rde_decide.c
> > ===
> > RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
> > retrieving revision 1.78
> > diff -u -p -r1.78 rde_decide.c
> > --- rde_decide.c9 Aug 2019 13:44:27 -   1.78
> > +++ rde_decide.c12 Jan 2021 16:24:36 -
> > @@ -238,14 +238,16 @@ prefix_cmp(struct prefix *p1, struct pre
> >   * The to evaluate prefix must not be in the prefix list.
> >   */
> >  void
> > -prefix_evaluate(struct prefix *p, struct rib_entry *re)
> > +prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix 
> > *old)
> >  {
> > struct prefix   *xp;
> >  
> > if (re_rib(re)->flags & F_RIB_NOEVALUATE) {
> > /* decision process is turned off */
> > -   if (p != NULL)
> > -   LIST_INSERT_HEAD(>prefix_h, p, entry.list.rib);
> > +   if (old != NULL)
> > +   LIST_REMOVE(old, entry.list.rib);
> > +   if (new != NULL)
> > +   LIST_INSERT_HEAD(>prefix_h, new, entry.list.rib);
> 
> Would it be beneficial to have a p == new test ?

You mean old == new? Not sure if it is worth the trouble. 
Currently this has the benefit that that most recent update is at the head
of the list. Now one could argue that this code is supposed to be as fast
as possible and so skipping the remove & insert could be benefitial.
I'm currently after a bigger issue so I'm happy to leave this for others
:)

> 
> Otherwise OK denis@
> 

-- 
:wq Claudio



Re: bgpd refactor route decision process

2021-01-13 Thread Denis Fondras
Le Tue, Jan 12, 2021 at 05:39:02PM +0100, Claudio Jeker a écrit :
> This diff changes two things:
> - First, it move the kroute update into rde_generate_updates() simplifying
> prefix_evaluate a little bit.
> 
> - Second, it changes prefix_evaluate to take an additional argument for the
> old prefix (to be removed). Instead of doing this outside of
> prefix_evaluate() with some drawbacks in case the same prefix is removed
> and readded, the code is now in prefix_evaluate() and does all the magic
> itself.
> 
> Index: rde_decide.c
> ===
> RCS file: /cvs/src/usr.sbin/bgpd/rde_decide.c,v
> retrieving revision 1.78
> diff -u -p -r1.78 rde_decide.c
> --- rde_decide.c  9 Aug 2019 13:44:27 -   1.78
> +++ rde_decide.c  12 Jan 2021 16:24:36 -
> @@ -238,14 +238,16 @@ prefix_cmp(struct prefix *p1, struct pre
>   * The to evaluate prefix must not be in the prefix list.
>   */
>  void
> -prefix_evaluate(struct prefix *p, struct rib_entry *re)
> +prefix_evaluate(struct rib_entry *re, struct prefix *new, struct prefix *old)
>  {
>   struct prefix   *xp;
>  
>   if (re_rib(re)->flags & F_RIB_NOEVALUATE) {
>   /* decision process is turned off */
> - if (p != NULL)
> - LIST_INSERT_HEAD(>prefix_h, p, entry.list.rib);
> + if (old != NULL)
> + LIST_REMOVE(old, entry.list.rib);
> + if (new != NULL)
> + LIST_INSERT_HEAD(>prefix_h, new, entry.list.rib);

Would it be beneficial to have a p == new test ?

Otherwise OK denis@