from:"mike"

Re: Some bwfm(4) diffs

2023-10-08 Thread Mike Larkin

On Sun, Oct 08, 2023 at 07:42:54PM +0200, Mark Kettenis wrote:
> Hector Martin has added support for the BCM4388 that is found on the
> last generation of Apple Macs.  Based on his commits I've managed to
> get it working on my M2 Pro mini.  I still have to clean up some of
> that stuff, but here is a forst batch of two diffs.
>
> The changes to dev/ic/bwfm.c correspond to:
>
> https://github.com/AsahiLinux/linux/commit/81e3cc7bec8b9d9c436f63662d8fcfda4f637807
>
> The changes to dev/pci/if_bwfm_pci.c corrspond to:
>
> https://github.com/AsahiLinux/linux/commit/8190add8671fc49c12d04b5ac8fced70f835e69f
>
> Both changes seem to be a good idea and potentially affect other chips
> as well.  So if you have a machine with bwfm(4), please test.
>
> ok?
>

This bwfm on my Rpi4 seems to work, can still scan, associate, and pass
packets.

bwfm0 at sdmmc0 function 1

Not sure I grok this enough to give an ok though.

-ml

>
> Index: dev/ic/bwfm.c
> ===
> RCS file: /cvs/src/sys/dev/ic/bwfm.c,v
> retrieving revision 1.109
> diff -u -p -r1.109 bwfm.c
> --- dev/ic/bwfm.c 28 Mar 2023 14:01:42 -  1.109
> +++ dev/ic/bwfm.c 8 Oct 2023 17:29:35 -
> @@ -1089,15 +1089,9 @@ void
>  bwfm_chip_ai_reset(struct bwfm_softc *sc, struct bwfm_core *core,
>  uint32_t prereset, uint32_t reset, uint32_t postreset)
>  {
> - struct bwfm_core *core2 = NULL;
>   int i;
>
> - if (core->co_id == BWFM_AGENT_CORE_80211)
> - core2 = bwfm_chip_get_core_idx(sc, BWFM_AGENT_CORE_80211, 1);
> -
>   bwfm_chip_ai_disable(sc, core, prereset, reset);
> - if (core2)
> - bwfm_chip_ai_disable(sc, core2, prereset, reset);
>
>   for (i = 50; i > 0; i--) {
>   if ((sc->sc_buscore_ops->bc_read(sc,
> @@ -1110,32 +1104,12 @@ bwfm_chip_ai_reset(struct bwfm_softc *sc
>   }
>   if (i == 0)
>   printf("%s: timeout on core reset\n", DEVNAME(sc));
> - if (core2) {
> - for (i = 50; i > 0; i--) {
> - if ((sc->sc_buscore_ops->bc_read(sc,
> - core2->co_wrapbase + BWFM_AGENT_RESET_CTL) &
> - BWFM_AGENT_RESET_CTL_RESET) == 0)
> - break;
> - sc->sc_buscore_ops->bc_write(sc,
> - core2->co_wrapbase + BWFM_AGENT_RESET_CTL, 0);
> - delay(60);
> - }
> - if (i == 0)
> - printf("%s: timeout on core reset\n", DEVNAME(sc));
> - }
>
>   sc->sc_buscore_ops->bc_write(sc,
>   core->co_wrapbase + BWFM_AGENT_IOCTL,
>   postreset | BWFM_AGENT_IOCTL_CLK);
>   sc->sc_buscore_ops->bc_read(sc,
>   core->co_wrapbase + BWFM_AGENT_IOCTL);
> - if (core2) {
> - sc->sc_buscore_ops->bc_write(sc,
> - core2->co_wrapbase + BWFM_AGENT_IOCTL,
> - postreset | BWFM_AGENT_IOCTL_CLK);
> - sc->sc_buscore_ops->bc_read(sc,
> - core2->co_wrapbase + BWFM_AGENT_IOCTL);
> - }
>  }
>
>  void
> @@ -1338,6 +1312,7 @@ bwfm_chip_ca7_set_passive(struct bwfm_so
>  {
>   struct bwfm_core *core;
>   uint32_t val;
> + int i = 0;
>
>   core = bwfm_chip_get_core(sc, BWFM_AGENT_CORE_ARM_CA7);
>   val = sc->sc_buscore_ops->bc_read(sc,
> @@ -1347,10 +1322,11 @@ bwfm_chip_ca7_set_passive(struct bwfm_so
>   BWFM_AGENT_IOCTL_ARMCR4_CPUHALT,
>   BWFM_AGENT_IOCTL_ARMCR4_CPUHALT);
>
> - core = bwfm_chip_get_core(sc, BWFM_AGENT_CORE_80211);
> - sc->sc_chip.ch_core_reset(sc, core, BWFM_AGENT_D11_IOCTL_PHYRESET |
> - BWFM_AGENT_D11_IOCTL_PHYCLOCKEN, BWFM_AGENT_D11_IOCTL_PHYCLOCKEN,
> - BWFM_AGENT_D11_IOCTL_PHYCLOCKEN);
> + while ((core = bwfm_chip_get_core_idx(sc, BWFM_AGENT_CORE_80211, i++)))
> + sc->sc_chip.ch_core_disable(sc, core,
> + BWFM_AGENT_D11_IOCTL_PHYRESET |
> + BWFM_AGENT_D11_IOCTL_PHYCLOCKEN,
> + BWFM_AGENT_D11_IOCTL_PHYCLOCKEN);
>  }
>
>  int
> Index: dev/pci/if_bwfm_pci.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_bwfm_pci.c,v
> retrieving revision 1.75
> diff -u -p -r1.75 if_bwfm_pci.c
> --- dev/pci/if_bwfm_pci.c 30 Dec 2022 14:10:17 -  1.75
> +++ dev/pci/if_bwfm_pci.c 8 Oct 2023 17:29:35 -
> @@ -134,6 +134,10 @@ struct bwfm_pci_softc {
>   bus_space_handle_t   sc_reg_ioh;
>   bus_size_t   sc_reg_ios;
>
> + bus_space_tag_t  sc_pcie_iot;
> + bus_space_handle_t   sc_pcie_ioh;
> + bus_size_t   sc_pcie_ios;
> +
>   bus_space_tag_t  sc_tcm_iot;
>   bus_space_handle_t   sc_tcm_ioh;
>   bus_size_t   sc_tcm_ios;
> @@ -379,6 +383,10 @@ bwfm_pci_attach(struct device *parent, s
>   goto bar1;
>   }
>
> + sc->sc_pcie_iot =

Re: vmd(8): fix deadlock during pausing

2023-09-25 Thread Mike Larkin

On Sun, Sep 24, 2023 at 01:07:43AM -0400, Dave Voutila wrote:
> vmd has a sneaky little deadlock hidden in the pause logic related to
> the use of mutexes and condition variables.
>
> When pausing, the vcpu is holding the "run" mutex. It then sleeps
> waiting for the unpause condition. If the event thread is trying to
> assert an irq, it will try to lock that "run" mutex in an attempt to
> signal a halted vcpu that it should start running. This deadlocks the
> thread.
>
> Diff below releases the run mutex (by the vcpu thread) before sleeping
> on the "pause" condition and reacquires it afterwards. This lets the
> event thread advance as needed.
>
> A simple reproducer is run `iperf3 -s` inside a guest, `iperf -c  -t
> 60` from outside the guest, and them `vmctl pause `. vmctl will hang
> as the vm can't respond to the pause request.
>
> I'm also simplifying the offending vcpu_assert_pic_irq (called by the
> event thread), so the diff should be pretty self contained for review.
>
> ok?

ok mlarkin

>
> diffstat /usr/src
>  M  usr.sbin/vmd/vm.c  |  6+  8-
>
> 1 file changed, 6 insertions(+), 8 deletions(-)
>
> diff /usr/src
> commit - 89fcd96b33617c4eddd2306889179a96a934ebe8
> path + /usr/src
> blob - fe804b4e3b9e8b59b883e58f02e15a178c035742
> file + usr.sbin/vmd/vm.c
> --- usr.sbin/vmd/vm.c
> +++ usr.sbin/vmd/vm.c
> @@ -1564,16 +1564,20 @@ vcpu_run_loop(void *arg)
>   __func__, (int)ret);
>   return ((void *)ret);
>   }
>
> + /* i8259 may be firing as we pause, release run mtx. */
> + mutex_unlock(_run_mtx[n]);
>   ret = pthread_cond_wait(_unpause_cond[n],
>   _unpause_mtx[n]);
>   if (ret) {
>   log_warnx(
>   "%s: can't wait on unpause cond (%d)",
>   __func__, (int)ret);
>   break;
>   }
> + mutex_lock(_run_mtx[n]);
> +
>   ret = pthread_mutex_unlock(_unpause_mtx[n]);
>   if (ret) {
>   log_warnx("%s: can't unlock unpause mtx (%d)",
>   __func__, (int)ret);
> @@ -2135,20 +2139,14 @@ vcpu_assert_pic_irq(uint32_t vm_id, uint32_t vcpu_id,
>
>   if (i8259_is_pending()) {
>   if (vcpu_pic_intr(vm_id, vcpu_id, 1))
>   fatalx("%s: can't assert INTR", __func__);
> -
> - ret = pthread_mutex_lock(_run_mtx[vcpu_id]);
> - if (ret)
> - fatalx("%s: can't lock vcpu mtx (%d)", __func__, ret);
> -
> + mutex_lock(_run_mtx[vcpu_id]);
>   vcpu_hlt[vcpu_id] = 0;
>   ret = pthread_cond_signal(_run_cond[vcpu_id]);
>   if (ret)
>   fatalx("%s: can't signal (%d)", __func__, ret);
> - ret = pthread_mutex_unlock(_run_mtx[vcpu_id]);
> - if (ret)
> - fatalx("%s: can't unlock vcpu mtx (%d)", __func__, ret);
> + mutex_unlock(_run_mtx[vcpu_id]);
>   }
>  }
>
>  /*
>

Re: Dell R7615 kernel protection fault

2023-09-11 Thread Mike Larkin

On Mon, Sep 11, 2023 at 03:23:28PM +0200, Hrvoje Popovski wrote:
> On 11.9.2023. 6:27, Hrvoje Popovski wrote:
> > On 11.9.2023. 2:48, Mike Larkin wrote:
> >> On Sun, Sep 10, 2023 at 01:36:33AM +0200, Hrvoje Popovski wrote:
> >>> Hi all,
> >>>
> >>> I've installed latest snapshot with uefi on Dell R7615 with AMD EPYC
> >>> 9554P, with some NVMe disks on BOSS-N1 adapter and with Samsung NVMe
> >>> disks directly connected to backplane and installation was fast and
> >>> without any problems.
> >>> But after that machine panics with this message
> >>> https://kosjenka.srce.hr/~hrvoje/openbsd/r7615-ddb1.jpg
> >>>
> >>
> >> did it work before on an older snapshot?
> >>
> >
> > this is brand new machine and I installed latest snapshot.
> > will try older snapshot now ...
> >
> >
>
> Hi,
>
> I've tried snapshots from 2023-06-30 and 2023-06-07 and I'm getting same
> kernel protection fault.
>
>
>

hm. I think we'd need to see a backtrace here, and if you can get that far,
a register dump would be useful as well. either %rsi or %rdi here is probably
trash.

Re: Dell R7615 kernel protection fault

2023-09-10 Thread Mike Larkin

On Sun, Sep 10, 2023 at 01:36:33AM +0200, Hrvoje Popovski wrote:
> Hi all,
>
> I've installed latest snapshot with uefi on Dell R7615 with AMD EPYC
> 9554P, with some NVMe disks on BOSS-N1 adapter and with Samsung NVMe
> disks directly connected to backplane and installation was fast and
> without any problems.
> But after that machine panics with this message
> https://kosjenka.srce.hr/~hrvoje/openbsd/r7615-ddb1.jpg
>

did it work before on an older snapshot?

> I can't do anything with keyboard and I've tried over ipmi console but I
> can't get it to work.
>
>
> BOSS-N1 is in raid1
> https://kosjenka.srce.hr/~hrvoje/openbsd/r7615-ramdisk1.jpg
>
> Samsung NVMe connected to backplane
> https://kosjenka.srce.hr/~hrvoje/openbsd/r7615-ramdisk2.jpg
>
>
> I will try somehow to get console output
>

Re: clockintr: add clockintr_advance_random()

2023-09-05 Thread Mike Larkin

On Tue, Sep 05, 2023 at 09:17:27AM -0500, Scott Cheloha wrote:
> mpi@ suggests folding the pseudorandom advance code from
> clockintr_statclock() into the clockintr API itself.  This replaces
> three API calls -- clockintr_expiration(), clockintr_nsecuptime(), and
> clockintr_schedule() -- we just one call to a new function,
> clockintr_advance_random().
>
> I'm fine with it.  A pseudorandom period is an odd thing and
> supporting it is difficult.  Having a single bespoke API to support it
> might be the lesser of two evils.
>
> With this in place, the statclock() patch on tech@ can be simplified.
>
> ok?
>

This seems like a good idea. ok mlarkin

> Index: kern_clockintr.c
> ===
> RCS file: /cvs/src/sys/kern/kern_clockintr.c,v
> retrieving revision 1.33
> diff -u -p -r1.33 kern_clockintr.c
> --- kern_clockintr.c  26 Aug 2023 22:21:00 -  1.33
> +++ kern_clockintr.c  5 Sep 2023 14:11:38 -
> @@ -42,8 +42,8 @@ uint32_t statclock_avg; /* [I] average
>  uint32_t statclock_min;  /* [I] minimum statclock period 
> (ns) */
>  uint32_t statclock_mask; /* [I] set of allowed offsets */
>
> +uint64_t clockintr_advance_random(struct clockintr *, uint64_t, uint32_t);
>  void clockintr_cancel_locked(struct clockintr *);
> -uint64_t clockintr_expiration(const struct clockintr *);
>  void clockintr_hardclock(struct clockintr *, void *);
>  uint64_t clockintr_nsecuptime(const struct clockintr *);
>  void clockintr_schedule(struct clockintr *, uint64_t);
> @@ -345,6 +345,30 @@ clockintr_advance(struct clockintr *cl,
>   return count;
>  }
>
> +/*
> + * Custom version of clockintr_advance() to support a pseudorandom
> + * statclock() period.  Hopefully we can throw this out at some point
> + * in the future.
> + */
> +uint64_t
> +clockintr_advance_random(struct clockintr *cl, uint64_t lo, uint32_t mask)
> +{
> + uint64_t count = 0;
> + struct clockintr_queue *cq = cl->cl_queue;
> + uint32_t off;
> +
> + KASSERT(cl == >cq_shadow);
> +
> + while (cl->cl_expiration <= cq->cq_uptime) {
> + while ((off = (random() & mask)) == 0)
> + continue;
> + cl->cl_expiration += lo + off;
> + count++;
> + }
> + SET(cl->cl_flags, CLST_SHADOW_PENDING);
> + return count;
> +}
> +
>  void
>  clockintr_cancel(struct clockintr *cl)
>  {
> @@ -402,21 +426,6 @@ clockintr_establish(struct clockintr_que
>   return cl;
>  }
>
> -uint64_t
> -clockintr_expiration(const struct clockintr *cl)
> -{
> - uint64_t expiration;
> - struct clockintr_queue *cq = cl->cl_queue;
> -
> - if (cl == >cq_shadow)
> - return cl->cl_expiration;
> -
> - mtx_enter(>cq_mtx);
> - expiration = cl->cl_expiration;
> - mtx_leave(>cq_mtx);
> - return expiration;
> -}
> -
>  void
>  clockintr_schedule(struct clockintr *cl, uint64_t expiration)
>  {
> @@ -478,13 +487,6 @@ clockintr_stagger(struct clockintr *cl,
>   mtx_leave(>cq_mtx);
>  }
>
> -uint64_t
> -clockintr_nsecuptime(const struct clockintr *cl)
> -{
> - KASSERT(cl == >cl_queue->cq_shadow);
> - return cl->cl_queue->cq_uptime;
> -}
> -
>  void
>  clockintr_hardclock(struct clockintr *cl, void *frame)
>  {
> @@ -498,20 +500,11 @@ clockintr_hardclock(struct clockintr *cl
>  void
>  clockintr_statclock(struct clockintr *cl, void *frame)
>  {
> - uint64_t count, expiration, i, uptime;
> - uint32_t off;
> + uint64_t count, i;
>
>   if (ISSET(clockintr_flags, CL_RNDSTAT)) {
> - count = 0;
> - expiration = clockintr_expiration(cl);
> - uptime = clockintr_nsecuptime(cl);
> - while (expiration <= uptime) {
> - while ((off = (random() & statclock_mask)) == 0)
> - continue;
> - expiration += statclock_min + off;
> - count++;
> - }
> - clockintr_schedule(cl, expiration);
> + count = clockintr_advance_random(cl, statclock_min,
> + statclock_mask);
>   } else {
>   count = clockintr_advance(cl, statclock_avg);
>   }
>

Re: vmd/vmm: remove an ioctl from the vcpu hotpath, go brrr

2023-09-04 Thread Mike Larkin

On Mon, Sep 04, 2023 at 07:57:18PM +0200, Mischa wrote:
> On 2023-09-04 18:58, Mischa wrote:
> > On 2023-09-04 18:55, Mischa wrote:
> > > On 2023-09-04 17:57, Dave Voutila wrote:
> > > > Mischa  writes:
> > > > > On 2023-09-04 16:23, Mike Larkin wrote:
> > > > > > On Mon, Sep 04, 2023 at 02:30:23PM +0200, Mischa wrote:
> > > > > > > On 2023-09-03 21:18, Dave Voutila wrote:
> > > > > > > > Mischa  writes:
> > > > > > > >
> > > > > > > > > Nice!! Thanx Dave!
> > > > > > > > >
> > > > > > > > > Running go brrr as we speak.
> > > > > > > > > Testing with someone who is running Debian.
> > > > > > > >
> > > > > > > > Great. I'll plan on committing this tomorrow afternoon (4 Sep) 
> > > > > > > > my time
> > > > > > > > unless I hear of any issues.
> > > > > > > There are a couple of permanent VMs running on this host, 1 ToR
> > > > > > > node,
> > > > > > > OpenBSD VM and a Debian VM.
> > > > > > > While they were running I started my stress script.
> > > > > > > The first round I started 40 VMs with just bsd.rd, 2G memory
> > > > > > > All good, then I started 40 VMs with a base disk and 2G memory.
> > > > > > > After 20 VMs started I got the following messages on the console:
> > > > > > > [umd116390/221323 sp=752d7ac9f090 inside 75c264948000-75c26147fff:
> > > > > > > not
> > > > > > > MAP_STACK
> > > > > > > [umd159360/355276 sp=783369$96750 inside
> > > > > > > 7256d538c000-725645b8bFff:
> > > > > > > not
> > > > > > > MAP_STACK
> > > > > > > [umd172263/319211 sp=70fb86794b60 inside
> > > > > > > 75247a4d2000-75247acdifff:
> > > > > > > not
> > > > > > > MAP_STACK
> > > > > > > [umd142824/38950 sp=7db1ed2a64d0 inside
> > > > > > > 756c57d18000-756c58517fff: not
> > > > > > > MAP_STACK
> > > > > > > [umd19808/286658 sp=7dbied2a64d0 inside
> > > > > > > 70f685f41000-70f6867dofff: not
> > > > > > > MAP_STACK
> > > > > > > [umd193279/488634 sp=72652c3e3da0 inside
> > > > > > > 7845f168d000-7845f1e8cfff:
> > > > > > > not
> > > > > > > MAP_STACK
> > > > > > > [umd155924/286116 sp=7eac5a1ff060 inside
> > > > > > > 7b88bcb79000-7b88b4378fff:
> > > > > > > not
> > > > > > > MAP_STACK
> > > > > > > Not sure if this is related to starting of the VMs or something
> > > > > > > else, the
> > > > > > > ToR node was consuming 100%+ CPU at the time. :)
> > > > > > > Mischa
> > > > > > I have not seen this; can you try without the ToR node
> > > > > > some time and
> > > > > > see if
> > > > > > this still happens?
> > > > >
> > > > > Testing again without any other VMs running.
> > > > > Things wrong when I run the following command and wait a little.
> > > > >
> > > > > for i in $(jot 10 10); do vmctl create -b /var/vmm/vm09.qcow2
> > > > > /var/vmm/vm${i}.qcow2 && vmctl start -L -d
> > > > > /var/vmm/vm${i}.qcow2 -m 2G
> > > > > vm${i}; done
> > > >
> > > > Can you try adding a "sleep 2" or something in the loop? I can't
> > > > think
> > > > of a reason my changes would cause this. Do you see this on -current
> > > > without the diff?
> > >
> > > Adding the sleep 2 does indeed help. I managed to get 20 VMs started
> > > this way, before it would choke on 2-3.
> > >
> > > Do I only need the unpatched kernel or also the vmd/vmctl from snap?
> >
> > I do still get the same message on the console, but the machine isn't
> > freezing up.
> >
> > [umd173152/210775 sp=7a5f577a1780 inside 702698535000-702698d34fff: not
> > MAP_STACK
>
> Starting 30 VMs this way caused the machine to become unresponsive again,
> but nothing on the console. :(
>
> Mischa

Were you seeing these uvm errors before this diff? If so, this isn't
causing the problem and something else is.

If this diff causes the errors to occur, and without the diff it's fine, then
we need to look into that.


Also I think a pid number in that printf might be useful, I'll see what I can
find. If it's not vmd causing this and rather some other process then that
would be good to know also.

Re: vmd/vmm: remove an ioctl from the vcpu hotpath, go brrr

2023-09-04 Thread Mike Larkin

On Mon, Sep 04, 2023 at 02:30:23PM +0200, Mischa wrote:
> On 2023-09-03 21:18, Dave Voutila wrote:
> > Mischa  writes:
> >
> > > Nice!! Thanx Dave!
> > >
> > > Running go brrr as we speak.
> > > Testing with someone who is running Debian.
> >
> > Great. I'll plan on committing this tomorrow afternoon (4 Sep) my time
> > unless I hear of any issues.
>
> There are a couple of permanent VMs running on this host, 1 ToR node,
> OpenBSD VM and a Debian VM.
> While they were running I started my stress script.
> The first round I started 40 VMs with just bsd.rd, 2G memory
> All good, then I started 40 VMs with a base disk and 2G memory.
> After 20 VMs started I got the following messages on the console:
>
> [umd116390/221323 sp=752d7ac9f090 inside 75c264948000-75c26147fff: not
> MAP_STACK
> [umd159360/355276 sp=783369$96750 inside 7256d538c000-725645b8bFff: not
> MAP_STACK
> [umd172263/319211 sp=70fb86794b60 inside 75247a4d2000-75247acdifff: not
> MAP_STACK
> [umd142824/38950 sp=7db1ed2a64d0 inside 756c57d18000-756c58517fff: not
> MAP_STACK
> [umd19808/286658 sp=7dbied2a64d0 inside 70f685f41000-70f6867dofff: not
> MAP_STACK
> [umd193279/488634 sp=72652c3e3da0 inside 7845f168d000-7845f1e8cfff: not
> MAP_STACK
> [umd155924/286116 sp=7eac5a1ff060 inside 7b88bcb79000-7b88b4378fff: not
> MAP_STACK
>
> Not sure if this is related to starting of the VMs or something else, the
> ToR node was consuming 100%+ CPU at the time. :)
>
> Mischa

I have not seen this; can you try without the ToR node some time and see if
this still happens?

Re: vmd/vmm: remove an ioctl from the vcpu hotpath, go brrr

2023-09-03 Thread Mike Larkin

On Fri, Sep 01, 2023 at 03:50:31PM -0400, Dave Voutila wrote:
> Now that my i8259 fix is in, it's safe to expand the testing pool for
> this diff. (Without that fix, users would definitely hit the hung block
> device issue testing this one.) Hoping that folks that run non-OpenBSD
> guests or strange configurations can give it a spin.
>
> This change removes an ioctl(2) call from the vcpu thread hot path in
> vmd. Instead of making that syscall to toggle on/off a pending interrupt
> flag on the vcpu object in vmm(4), it adds a flag into the vm_run_params
> struct sent with the VMM_IOC_RUN ioctl. The in-kernel vcpu runloop can
> now toggle the pending interrupt state prior to vm entry.
>
> mbuhl@ and phessler@ have run this diff on their machines. Current
> observations are reduced average network latency for guests.
>
> My terse measurements using the following btrace script show some
> promising changes in terms of reducing ioctl syscalls:
>
>   /* VMM_IOC_INTR: 0x800c5606 -> 2148292102 */
>   syscall:ioctl:entry
>   /arg1 == 2148292102/
>   {
> @total[tid] = count();
> @running[tid] = count();
>   }
>   interval:hz:1
>   {
> print(@running);
> clear(@running);
>   }
>
> Measuring from boot of an OpenBSD guest to after the guest finishes
> relinking (based on my manual observation of the libevent thread
> settling down in syscall rate), I see a huge reduction in VMM_IOC_INTR
> ioctls for a single guest:
>
> ## -current
> @total[433237]: 1325100  # vcpu thread (!!)
> @total[187073]: 80239# libevent thread
>
> ## with diff
> @total[550347]: 42   # vcpu thread (!!)
> @total[256550]: 86946# libevent thread
>
> Most of the VMM_IOC_INTR ioctls on the vcpu threads come from seabios
> and the bootloader prodding some of the emulated hardware, but even
> after the bootloader you'll see ~10-20k/s of ioctl's on -current
> vs. ~4-5k/s with the diff.
>
> At steady-state, the vcpu thread no longer makes the VMM_IOC_INTR calls
> at all and you should see the libevent thread calling it at a rate ~100/s
> (probably hardclock?). *Without* the diff, I see a steady 650/s rate on
> the vcpu thread at idle. *With* the diff, it's 0/s at idle. :)
>
> To test:
> - rebuild & install new kernel
> - copy/symlink vmmvar.h into /usr/include/machine/
> - rebuild & re-install vmd & vmctl
> - reboot
>
> -dv
>
>

ok mlarkin, thanks!

> diffstat refs/heads/master refs/heads/vmm-vrp_intr_pending
>  M  sys/arch/amd64/amd64/vmm_machdep.c  |  10+   0-
>  M  sys/arch/amd64/include/vmmvar.h |   1+   0-
>  M  usr.sbin/vmd/vm.c   |   2+  16-
>
> 3 files changed, 13 insertions(+), 16 deletions(-)
>
> diff refs/heads/master refs/heads/vmm-vrp_intr_pending
> commit - 8afcf90fb39e4a84606e93137c2b6c20f44312cb
> commit + 10eeb8a0414ec927b6282473c50043a7027d6b41
> blob - 24a376a8f3bc94bc4a4203fe66c5994594adff46
> blob + e3b6d10a0ae78b12ec2f3296f708b42540ce798e
> --- sys/arch/amd64/amd64/vmm_machdep.c
> +++ sys/arch/amd64/amd64/vmm_machdep.c
> @@ -3973,6 +3973,11 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *
>*/
>   irq = vrp->vrp_irq;
>
> + if (vrp->vrp_intr_pending)
> + vcpu->vc_intr = 1;
> + else
> + vcpu->vc_intr = 0;
> +
>   if (vrp->vrp_continue) {
>   switch (vcpu->vc_gueststate.vg_exit_reason) {
>   case VMX_EXIT_IO:
> @@ -6381,6 +6386,11 @@ vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *
>
>   irq = vrp->vrp_irq;
>
> + if (vrp->vrp_intr_pending)
> + vcpu->vc_intr = 1;
> + else
> + vcpu->vc_intr = 0;
> +
>   /*
>* If we are returning from userspace (vmd) because we exited
>* last time, fix up any needed vcpu state first. Which state
> blob - e9f8384cccfde33034d7ac9782610f93eb5dc640
> blob + 88545b54b35dd60280ba87403e343db9463d7419
> --- sys/arch/amd64/include/vmmvar.h
> +++ sys/arch/amd64/include/vmmvar.h
> @@ -456,6 +456,7 @@ struct vm_run_params {
>   uint32_tvrp_vcpu_id;
>   uint8_t vrp_continue;   /* Continuing from an exit */
>   uint16_tvrp_irq;/* IRQ to inject */
> + uint8_t vrp_intr_pending;   /* Additional intrs pending? */
>
>   /* Input/output parameter to VMM_IOC_RUN */
>   struct vm_exit  *vrp_exit;  /* updated exit data */
> blob - 5f598bcc14af5115372d34a4176254d377aad91c
> blob + 447fc219adadf945de2bf25d5335993c2abdc26f
> --- usr.sbin/vmd/vm.c
> +++ usr.sbin/vmd/vm.c
> @@ -1610,22 +1610,8 @@ vcpu_run_loop(void *arg)
>   } else
>   vrp->vrp_irq = 0x;
>
> - /* Still more pending? */
> - if (i8259_is_pending()) {
> - /*
> -  * XXX can probably avoid ioctls here by providing intr
> -  * in vrp
> -  */
> - if (vcpu_pic_intr(vrp->vrp_vm_id,
> - vrp->vrp_vcpu_id, 1)) {
> -

Re: all platforms: separate cpu_initclocks() from cpu_startclock()

2023-08-23 Thread Mike Larkin

On Mon, Aug 21, 2023 at 10:23:53PM -0500, Scott Cheloha wrote:
> On Tue, Aug 22, 2023 at 02:36:31AM +0000, Mike Larkin wrote:
> > On Mon, Aug 21, 2023 at 09:26:00PM -0500, Scott Cheloha wrote:
> > > On Mon, Aug 21, 2023 at 10:10:58PM +, Mike Larkin wrote:
> > > > On Sat, Aug 19, 2023 at 01:44:47PM -0500, Scott Cheloha wrote:
> > > > > On Sun, Aug 13, 2023 at 01:48:21PM -0500, Scott Cheloha wrote:
> > > > > > This is the next patch in the clock interrupt reorganization series.
> > > > > >
> > > > > > Before we continue breaking up the hardclock(9) we need to detour 
> > > > > > into
> > > > > > the MD code.
> > > > > >
> > > > > > This patch divides the "initialization" parts of cpu_initclocks() 
> > > > > > from
> > > > > > the "start the clock interrupt" parts.  Seprating the two parts 
> > > > > > leaves
> > > > > > initclocks() an opportunity to prepare the primary CPU for clock
> > > > > > interrupt dispatch in a machine-independent manner before actually
> > > > > > pulling the trigger.  It's nearly impossible to do any MI setup 
> > > > > > during
> > > > > > initclocks() because cpu_initclocks() does everything in one go: 
> > > > > > both
> > > > > > initialization and kickoff are done when cpu_initclocks() returns.
> > > > > >
> > > > > > Many platforms have a "cpu_startclock()" function, so this patch 
> > > > > > takes
> > > > > > that de facto standard and makes it a rule: cpu_startclock() is now
> > > > > > required.  It is prototyped in sys/systm.h and every platform must
> > > > > > implement it.
> > > > > >
> > > > > > The revised initclocks() sequence is then:
> > > > > >
> > > > > > 1. Call cpu_initclocks().  At minimum, cpu_initclocks() ensures
> > > > > >hz, stathz, and profhz are initialized.  All the machine
> > > > > >independent setup in step (2) (currently) depends upon
> > > > > >these machine-dependent values.
> > > > > >
> > > > > > 2. Compute intervals using hz, stathz, and profhz.
> > > > > >
> > > > > >In a later step I will move the full contents of clockintr_init()
> > > > > >up into initclocks() and get rid of clockintr_init() entirely.
> > > > > >
> > > > > > 3. Call cpu_startclock().  At minimum, cpu_startclock() starts the
> > > > > >clock interrupt dispatch cycle on the primary CPU.
> > > > > >
> > > > > > I have compiled/booted this patch on amd64 (lapic path), arm64, i386
> > > > > > (lapic path), macppc, octeon, and sparc64 (sun4v).
> > > > > >
> > > > > > I am looking for compile/boot tests on alpha, armv7, hppa, landisk,
> > > > > > luna88k, powerpc64, and riscv64.  I think armv7 is the tricky one
> > > > > > here.  Everything else is relatively straightforward, though I may
> > > > > > have missed a few stray variables here or there.
> > > > > >
> > > > > > Test results?  Ok?
> > > > >
> > > > > Here is an updated patch that removes several MD prototypes for
> > > > > cpu_startclock() that I missed the first time through.
> > > > >
> > > > > I went back and tested these again:
> > > > >
> > > > > - amd64 (lapic)
> > > > > - arm64
> > > > > - i386 (lapic)
> > > > > - powerpc/macppc
> > > > > - mips64/octeon (loongson should be fine)
> > > > > - sparc64 (sys_tick; tick/stick should be fine)
> > > > >
> > > > > arm/armv7 and riscv64 were tested under the previous version, but I
> > > > > would appreciate a second compile-test to make sure the header changes
> > > > > in the updated patch did not break the build (CC phessler@, jsg@).
> > > > >
> > > > > I am still seeking compile/boot-tests for the following:
> > > > >
> > > > > - alpha
> > > > > - hppa
> > > > > - m88k/luna88k
> > > >
> > > > if you are really interested in doing this [...]
> > >
> > > "really interested" is a bit strong.  As always, my primary goal is
> > > not to break anything when I make a commit.
> > >
> > > The luna88k patch looks pretty straightfoward, but it's hard to be
> > > completely sure I didn't screw something up.
> > >
> > > > [...] you could run this in nono since you're just looking for
> > > > a compile/boot test.
> > >
> > > Apparently the license forbids redistribution.  Super annoying.
> >
> > so? install it, boot a luna88k "vm", test your diff, then you have your
> > question answered. you aren't redistributing anything.
>
> FWIW, I think vmctl/vmd have a nicer user interface.

Same :)

>
> I feel like I'm... boxing... with nono, not using it.

Re: all platforms: separate cpu_initclocks() from cpu_startclock()

2023-08-21 Thread Mike Larkin

On Mon, Aug 21, 2023 at 09:26:00PM -0500, Scott Cheloha wrote:
> On Mon, Aug 21, 2023 at 10:10:58PM +0000, Mike Larkin wrote:
> > On Sat, Aug 19, 2023 at 01:44:47PM -0500, Scott Cheloha wrote:
> > > On Sun, Aug 13, 2023 at 01:48:21PM -0500, Scott Cheloha wrote:
> > > > This is the next patch in the clock interrupt reorganization series.
> > > >
> > > > Before we continue breaking up the hardclock(9) we need to detour into
> > > > the MD code.
> > > >
> > > > This patch divides the "initialization" parts of cpu_initclocks() from
> > > > the "start the clock interrupt" parts.  Seprating the two parts leaves
> > > > initclocks() an opportunity to prepare the primary CPU for clock
> > > > interrupt dispatch in a machine-independent manner before actually
> > > > pulling the trigger.  It's nearly impossible to do any MI setup during
> > > > initclocks() because cpu_initclocks() does everything in one go: both
> > > > initialization and kickoff are done when cpu_initclocks() returns.
> > > >
> > > > Many platforms have a "cpu_startclock()" function, so this patch takes
> > > > that de facto standard and makes it a rule: cpu_startclock() is now
> > > > required.  It is prototyped in sys/systm.h and every platform must
> > > > implement it.
> > > >
> > > > The revised initclocks() sequence is then:
> > > >
> > > > 1. Call cpu_initclocks().  At minimum, cpu_initclocks() ensures
> > > >hz, stathz, and profhz are initialized.  All the machine
> > > >independent setup in step (2) (currently) depends upon
> > > >these machine-dependent values.
> > > >
> > > > 2. Compute intervals using hz, stathz, and profhz.
> > > >
> > > >In a later step I will move the full contents of clockintr_init()
> > > >up into initclocks() and get rid of clockintr_init() entirely.
> > > >
> > > > 3. Call cpu_startclock().  At minimum, cpu_startclock() starts the
> > > >clock interrupt dispatch cycle on the primary CPU.
> > > >
> > > > I have compiled/booted this patch on amd64 (lapic path), arm64, i386
> > > > (lapic path), macppc, octeon, and sparc64 (sun4v).
> > > >
> > > > I am looking for compile/boot tests on alpha, armv7, hppa, landisk,
> > > > luna88k, powerpc64, and riscv64.  I think armv7 is the tricky one
> > > > here.  Everything else is relatively straightforward, though I may
> > > > have missed a few stray variables here or there.
> > > >
> > > > Test results?  Ok?
> > >
> > > Here is an updated patch that removes several MD prototypes for
> > > cpu_startclock() that I missed the first time through.
> > >
> > > I went back and tested these again:
> > >
> > > - amd64 (lapic)
> > > - arm64
> > > - i386 (lapic)
> > > - powerpc/macppc
> > > - mips64/octeon (loongson should be fine)
> > > - sparc64 (sys_tick; tick/stick should be fine)
> > >
> > > arm/armv7 and riscv64 were tested under the previous version, but I
> > > would appreciate a second compile-test to make sure the header changes
> > > in the updated patch did not break the build (CC phessler@, jsg@).
> > >
> > > I am still seeking compile/boot-tests for the following:
> > >
> > > - alpha
> > > - hppa
> > > - m88k/luna88k
> >
> > if you are really interested in doing this [...]
>
> "really interested" is a bit strong.  As always, my primary goal is
> not to break anything when I make a commit.
>
> The luna88k patch looks pretty straightfoward, but it's hard to be
> completely sure I didn't screw something up.
>
> > [...] you could run this in nono since you're just looking for
> > a compile/boot test.
>
> Apparently the license forbids redistribution.  Super annoying.
>

so? install it, boot a luna88k "vm", test your diff, then you have your
question answered. you aren't redistributing anything.

> > > - powerpc64
> >
> > builds and boots on powerpc64
>
> Noted.  Thank you!
>

Re: all platforms: separate cpu_initclocks() from cpu_startclock()

2023-08-21 Thread Mike Larkin

On Sat, Aug 19, 2023 at 01:44:47PM -0500, Scott Cheloha wrote:
> On Sun, Aug 13, 2023 at 01:48:21PM -0500, Scott Cheloha wrote:
> > This is the next patch in the clock interrupt reorganization series.
> >
> > Before we continue breaking up the hardclock(9) we need to detour into
> > the MD code.
> >
> > This patch divides the "initialization" parts of cpu_initclocks() from
> > the "start the clock interrupt" parts.  Seprating the two parts leaves
> > initclocks() an opportunity to prepare the primary CPU for clock
> > interrupt dispatch in a machine-independent manner before actually
> > pulling the trigger.  It's nearly impossible to do any MI setup during
> > initclocks() because cpu_initclocks() does everything in one go: both
> > initialization and kickoff are done when cpu_initclocks() returns.
> >
> > Many platforms have a "cpu_startclock()" function, so this patch takes
> > that de facto standard and makes it a rule: cpu_startclock() is now
> > required.  It is prototyped in sys/systm.h and every platform must
> > implement it.
> >
> > The revised initclocks() sequence is then:
> >
> > 1. Call cpu_initclocks().  At minimum, cpu_initclocks() ensures
> >hz, stathz, and profhz are initialized.  All the machine
> >independent setup in step (2) (currently) depends upon
> >these machine-dependent values.
> >
> > 2. Compute intervals using hz, stathz, and profhz.
> >
> >In a later step I will move the full contents of clockintr_init()
> >up into initclocks() and get rid of clockintr_init() entirely.
> >
> > 3. Call cpu_startclock().  At minimum, cpu_startclock() starts the
> >clock interrupt dispatch cycle on the primary CPU.
> >
> > I have compiled/booted this patch on amd64 (lapic path), arm64, i386
> > (lapic path), macppc, octeon, and sparc64 (sun4v).
> >
> > I am looking for compile/boot tests on alpha, armv7, hppa, landisk,
> > luna88k, powerpc64, and riscv64.  I think armv7 is the tricky one
> > here.  Everything else is relatively straightforward, though I may
> > have missed a few stray variables here or there.
> >
> > Test results?  Ok?
>
> Here is an updated patch that removes several MD prototypes for
> cpu_startclock() that I missed the first time through.
>
> I went back and tested these again:
>
> - amd64 (lapic)
> - arm64
> - i386 (lapic)
> - powerpc/macppc
> - mips64/octeon (loongson should be fine)
> - sparc64 (sys_tick; tick/stick should be fine)
>
> arm/armv7 and riscv64 were tested under the previous version, but I
> would appreciate a second compile-test to make sure the header changes
> in the updated patch did not break the build (CC phessler@, jsg@).
>
> I am still seeking compile/boot-tests for the following:
>
> - alpha
> - hppa
> - m88k/luna88k

if you are really interested in doing this you could run this in nono since
you're just looking for a compile/boot test.

> - powerpc64

builds and boots on powerpc64

> - sh/landisk
>
> Test results?  Ok?
>
> Index: kern/kern_clock.c
> ===
> RCS file: /cvs/src/sys/kern/kern_clock.c,v
> retrieving revision 1.113
> diff -u -p -r1.113 kern_clock.c
> --- kern/kern_clock.c 12 Aug 2023 13:19:28 -  1.113
> +++ kern/kern_clock.c 19 Aug 2023 18:16:16 -
> @@ -103,6 +103,9 @@ initclocks(void)
>   profclock_period = 10 / profhz;
>
>   inittimecounter();
> +
> + /* Start dispatching clock interrupts on the primary CPU. */
> + cpu_startclock();
>  }
>
>  /*
> Index: sys/systm.h
> ===
> RCS file: /cvs/src/sys/sys/systm.h,v
> retrieving revision 1.164
> diff -u -p -r1.164 systm.h
> --- sys/systm.h   5 Aug 2023 20:07:56 -   1.164
> +++ sys/systm.h   19 Aug 2023 18:16:17 -
> @@ -243,6 +243,7 @@ void  initclocks(void);
>  void inittodr(time_t);
>  void resettodr(void);
>  void cpu_initclocks(void);
> +void cpu_startclock(void);
>
>  void startprofclock(struct process *);
>  void stopprofclock(struct process *);
> Index: arch/alpha/alpha/clock.c
> ===
> RCS file: /cvs/src/sys/arch/alpha/alpha/clock.c,v
> retrieving revision 1.28
> diff -u -p -r1.28 clock.c
> --- arch/alpha/alpha/clock.c  25 Jul 2023 18:16:19 -  1.28
> +++ arch/alpha/alpha/clock.c  19 Aug 2023 18:16:17 -
> @@ -193,7 +193,11 @@ cpu_initclocks(void)
>   stathz = hz;
>   profhz = stathz;
>   clockintr_init(0);
> +}
>
> +void
> +cpu_startclock(void)
> +{
>   clockintr_cpu_init(NULL);
>
>   /*
> Index: arch/amd64/amd64/machdep.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
> retrieving revision 1.286
> diff -u -p -r1.286 machdep.c
> --- arch/amd64/amd64/machdep.c27 Jul 2023 00:28:25 -  1.286
> +++ arch/amd64/amd64/machdep.c19 Aug 2023 18:16:18 -
> @@ -227,6 +227,7 @@ paddr_t avail_end;
>

Re: i386: i8254_initclocks: set IPL_MPSAFE for clock, rtc IRQs

2023-08-21 Thread Mike Larkin

On Sun, Aug 20, 2023 at 10:39:46PM -0500, Scott Cheloha wrote:
> pOn amd64 we lie about the interrupts established during
> i8254_initclocks().  We claim they are MP-safe in order to mollify a
> KASSERT in intr_establish() and continue booting.
>
> See amd64/isa/clock.c:
>279  void
>280  i8254_initclocks(void)
>281  {
>282  i8254_inittimecounter();/* hook the interrupt-based 
> i8254 tc */
>283
>284  stathz = 128;
>285  profhz = 1024;  /* XXX does not divide into 1 billion 
> */
>286  clockintr_init(0);
>287
>288  clockintr_cpu_init(NULL);
>289
>290  /*
>291   * While the clock interrupt handler isn't really MPSAFE, the
>292   * i8254 can't really be used as a clock on a true MP system.
>293   */
>294  isa_intr_establish(NULL, 0, IST_PULSE, IPL_CLOCK | IPL_MPSAFE,
>295  clockintr, 0, "clock");
>296  isa_intr_establish(NULL, 8, IST_PULSE, IPL_STATCLOCK | 
> IPL_MPSAFE,
>297  rtcintr, 0, "rtc");
>
> and amd64/amd64/intr.c:
>
>332  void *
>333  intr_establish(int legacy_irq, struct pic *pic, int pin, int type, 
> int level,
>334  struct cpu_info *ci, int (*handler)(void *), void *arg, const 
> char *what)
>335  {
>336  struct intrhand **p, *q, *ih;
>337  int slot, error, idt_vec;
>338  struct intrsource *source;
>339  struct intrstub *stubp;
>340  int flags;
>341
>342  #ifdef DIAGNOSTIC
>343  if (legacy_irq != -1 && (legacy_irq < 0 || legacy_irq > 15))
>344  panic("intr_establish: bad legacy IRQ value");
>345
>346  if (legacy_irq == -1 && pic == _pic)
>347  panic("intr_establish: non-legacy IRQ on i8259");
>348  #endif
>349
>350  flags = level & IPL_MPSAFE;
>351  level &= ~IPL_MPSAFE;
>352
>353  KASSERT(level <= IPL_TTY || level >= IPL_CLOCK || flags & 
> IPL_MPSAFE);
>
> Can we do the same on i386?  I'm trying to test the i8254 path on
> modern hardware and I'm tripping the equivalent KASSERT in
> apic_intr_establish().
>
> See i386/i386/ioapic.c:
>
>661  void *
>662  apic_intr_establish(int irq, int type, int level, int (*ih_fun)(void 
> *),
>663  void *ih_arg, const char *ih_what)
>664  {
>665  unsigned int ioapic = APIC_IRQ_APIC(irq);
>666  unsigned int intr = APIC_IRQ_PIN(irq);
>667  struct ioapic_softc *sc = ioapic_find(ioapic);
>668  struct ioapic_pin *pin;
>669  struct intrhand **p, *q, *ih;
>670  extern int cold;
>671  int minlevel, maxlevel;
>672  extern void intr_calculatemasks(void); /* XXX */
>673  int flags;
>674
>675  flags = level & IPL_MPSAFE;
>676  level &= ~IPL_MPSAFE;
>677
>678  KASSERT(level <= IPL_TTY || flags & IPL_MPSAFE);
>
> The patch below lets me test the i8254 clockintr path on modern
> hardware in 32-bit mode without needing to rototill the GENERIC
> config to delete all the things that implicitly depend upon the
> ioapic.
>
> I don't think lying in this case is harmful.  We can only get to
> i8254_initclocks() if we have no local APIC, or if
> lapic_calibrate_timer() fails.
>
> ok?
>
> Index: clock.c
> ===
> RCS file: /cvs/src/sys/arch/i386/isa/clock.c,v
> retrieving revision 1.65
> diff -u -p -r1.65 clock.c
> --- clock.c   25 Jul 2023 18:16:20 -  1.65
> +++ clock.c   21 Aug 2023 03:26:39 -
> @@ -431,9 +431,9 @@ i8254_initclocks(void)
>   clockintr_cpu_init(NULL);
>
>   /* When using i8254 for clock, we also use the rtc for profclock */
> - (void)isa_intr_establish(NULL, 0, IST_PULSE, IPL_CLOCK,
> + (void)isa_intr_establish(NULL, 0, IST_PULSE, IPL_CLOCK | IPL_MPSAFE,
>   clockintr, 0, "clock");
> - (void)isa_intr_establish(NULL, 8, IST_PULSE, IPL_STATCLOCK,
> + (void)isa_intr_establish(NULL, 8, IST_PULSE, IPL_STATCLOCK | IPL_MPSAFE,
>   rtcintr, 0, "rtc");
>
>   rtcstart(); /* start the mc146818 clock */

I think this is fine. I tried to come up with a scenario where you'd be doing
smp i386 without a local apic and even the ancient 82489 (for 80486 systems)
acted as a lapic. And since we don't run on real 80386 anymore, I think we can
ignore someone trying to do smp there.

ok mlarkin if it makes your work easier.

Re: vmd(8): fix setting log verbosity in child processes

2023-07-27 Thread Mike Larkin

On Wed, Jul 26, 2023 at 12:23:58PM -0400, Dave Voutila wrote:
> When adding exec for vm's and fork/exec'd vio{blk,net} devices, the
> current verbosity wasn't being set on the new process. The below change
> keeps it simple, avoiding runtime string manipulation. Also tosses in an
> ifdef around a very chatty debug message related to ipc with devices.
>
> This doesn't address runtime toggling of verbosity with vmctl(8) nor
> does it address the fact vmd has a janky concept of verbosity. Those are
> future fixes.
>
> ok?
>

ok mlarkin if you're still looking for oks.

-ml

> diffstat /usr/src
>  M  usr.sbin/vmd/virtio.c  |  9+  4-
>  M  usr.sbin/vmd/vmd.h |  4+  0-
>  M  usr.sbin/vmd/vmm.c |  7+  4-
>
> 3 files changed, 20 insertions(+), 8 deletions(-)
>
> diff /usr/src
> commit - 3228b0c4b8598ac2f799f997d457a8ba24307bec
> path + /usr/src
> blob - a58e35115432b3d16fb456e71bd71f93d9e2467d
> file + usr.sbin/vmd/virtio.c
> --- usr.sbin/vmd/virtio.c
> +++ usr.sbin/vmd/virtio.c
> @@ -1475,12 +1475,15 @@ virtio_dev_launch(struct vmd_vm *vm, struct virtio_dev
>   nargv[5] = "-i";
>   nargv[6] = vmm_fd;
>   nargv[7] = "-n";
> + nargv[8] = NULL;
>
> - if (env->vmd_verbose) {
> - nargv[8] = "-v";
> + if (env->vmd_verbose == 1) {
> + nargv[8] = VMD_VERBOSE_1;
>   nargv[9] = NULL;
> - } else
> - nargv[8] = NULL;
> + } else if (env->vmd_verbose > 1) {
> + nargv[8] = VMD_VERBOSE_2;
> + nargv[9] = NULL;
> + }
>
>   /* Control resumes in vmd.c:main(). */
>   execvp(nargv[0], nargv);
> @@ -1699,8 +1702,10 @@ virtio_pci_io(int dir, uint16_t reg, uint32_t *data, u
>   imsg_free();
>
>   if (msg.type == VIODEV_MSG_IO_READ && msg.data_valid) {
> +#if DEBUG
>   log_debug("%s: got sync read response (reg=%s)",
>   __func__, virtio_reg_name(msg.reg));
> +#endif /* DEBUG */
>   *data = msg.data;
>   /*
>* It's possible we're asked to {de,}assert after the
> blob - 744b8d1957423b91202b9630fe4a5a6dc4158089
> file + usr.sbin/vmd/vmd.h
> --- usr.sbin/vmd/vmd.h
> +++ usr.sbin/vmd/vmd.h
> @@ -102,6 +102,10 @@ enum imsg_type {
>  /* Unique local address for IPv6 */
>  #define VMD_ULA_PREFIX   "fd00::/8"
>
> +/* Verbosity arguments for use when caling execvp(2). */
> +#define VMD_VERBOSE_1"-v";
> +#define VMD_VERBOSE_2"-vv";
> +
>  enum imsg_type {
>   IMSG_VMDOP_START_VM_REQUEST = IMSG_PROC_MAX,
>   IMSG_VMDOP_START_VM_CDROM,
> blob - 541222e027294ea6d85c957e9cc1a55bb1ac829c
> file + usr.sbin/vmd/vmm.c
> --- usr.sbin/vmd/vmm.c
> +++ usr.sbin/vmd/vmm.c
> @@ -782,12 +782,15 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *p
>   nargv[3] = "-n";
>   nargv[4] = "-i";
>   nargv[5] = vmm_fd;
> + nargv[6] = NULL;
>
> - if (env->vmd_verbose) {
> - nargv[6] = "-v";
> + if (env->vmd_verbose == 1) {
> + nargv[6] = VMD_VERBOSE_1;
>   nargv[7] = NULL;
> - } else
> - nargv[6] = NULL;
> + } else if (env->vmd_verbose > 1) {
> + nargv[6] = VMD_VERBOSE_2;
> + nargv[7] = NULL;
> + }
>
>   /* Control resumes in vmd main(). */
>   execvp(nargv[0], nargv);
>

Re: [v2] statclock: move profil(2), GPROF code into other clock interrupts

2023-07-21 Thread Mike Larkin

On Fri, Jul 21, 2023 at 05:46:32PM +0200, Jeremie Courreges-Anglas wrote:
> On Thu, Jul 20 2023, Scott Cheloha  wrote:
> > On Wed, Jul 19, 2023 at 05:09:04AM +0000, Mike Larkin wrote:
> >> On Tue, Jul 18, 2023 at 08:21:41AM -0500, Scott Cheloha wrote:
> >> > This patch moves the profil(2)- and GPROF-specific parts of
> >> > statclock() out into into separate clock interrupt routines.  The
> >> > profil(2) part moves into profclock() and is enabled/disabled as
> >> > needed during mi_switch().  The GPROF part moves into gmonclock() and
> >> > is enabled/disabled as needed via sysctl(2).
> >> >
> >> > Moving those parts out of statclock() eliminates the need for an
> >> > effective statclock frequency and we can delete all the junk related
> >> > to that: psratio/psdiv/pscnt and corresponding members of
> >> > schedstate_percpu, clockintr_setstatclockrate(), a bunch of other
> >> > clockintr-internal code
> >> >
> >> > In separate commits I have addressed:
> >> >
> >> > - General GPROF instability on amd64
> >> > - GPROF causing a crash during suspend/resume
> >> > - CTASSERT breakage on amd64 related to schedstate_percpu
> >> >   changes in this patch
> >> >
> >> > This has been kicking around for over two months.  Personally, I have
> >> > tested it on amd64, arm64, macppc, octeon, and sparc64.
> >> >
> >> > Compile- and boot-tests on other platforms (alpha, i386, luna88k,
> >> > riscv64, sh) would be appreciated, but the last time I asked for tests
> >> > I got zero reports back.
> >>
> >> i386 compiles and boots.
> >
> > Great!
> >
> >> as reported in separate mail, riscv64 doesn't compile.
> >
> > I think we're missing a 'struct user' definition on riscv64.  Can you
> > try this?
>
> GENERIC.MP with option GPROF doesn't build on riscv64, but this diff
> doesn't introduce any new error.  Runtime untested.
>
> --
> jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE
>

Yes, I should have pointed out I did a normal build and not a GPROF build
which I have no idea how to test, nor do I use. Same disclaimer applies to
i386.

Re: [v2] statclock: move profil(2), GPROF code into other clock interrupts

2023-07-20 Thread Mike Larkin

On Thu, Jul 20, 2023 at 01:23:01PM -0500, Scott Cheloha wrote:
> On Wed, Jul 19, 2023 at 05:09:04AM +0000, Mike Larkin wrote:
> > On Tue, Jul 18, 2023 at 08:21:41AM -0500, Scott Cheloha wrote:
> > > This patch moves the profil(2)- and GPROF-specific parts of
> > > statclock() out into into separate clock interrupt routines.  The
> > > profil(2) part moves into profclock() and is enabled/disabled as
> > > needed during mi_switch().  The GPROF part moves into gmonclock() and
> > > is enabled/disabled as needed via sysctl(2).
> > >
> > > Moving those parts out of statclock() eliminates the need for an
> > > effective statclock frequency and we can delete all the junk related
> > > to that: psratio/psdiv/pscnt and corresponding members of
> > > schedstate_percpu, clockintr_setstatclockrate(), a bunch of other
> > > clockintr-internal code
> > >
> > > In separate commits I have addressed:
> > >
> > > - General GPROF instability on amd64
> > > - GPROF causing a crash during suspend/resume
> > > - CTASSERT breakage on amd64 related to schedstate_percpu
> > >   changes in this patch
> > >
> > > This has been kicking around for over two months.  Personally, I have
> > > tested it on amd64, arm64, macppc, octeon, and sparc64.
> > >
> > > Compile- and boot-tests on other platforms (alpha, i386, luna88k,
> > > riscv64, sh) would be appreciated, but the last time I asked for tests
> > > I got zero reports back.
> >
> > i386 compiles and boots.
>
> Great!
>
> > as reported in separate mail, riscv64 doesn't compile.
>
> I think we're missing a 'struct user' definition on riscv64.  Can you
> try this?
>

compiles and boots ok.

> Index: kern/kern_clock.c
> ===
> RCS file: /cvs/src/sys/kern/kern_clock.c,v
> retrieving revision 1.108
> diff -u -p -r1.108 kern_clock.c
> --- kern/kern_clock.c 25 Apr 2023 00:58:47 -  1.108
> +++ kern/kern_clock.c 19 Jul 2023 14:33:04 -
> @@ -49,10 +49,6 @@
>  #include 
>  #include 
>
> -#if defined(GPROF) || defined(DDBPROF)
> -#include 
> -#endif
> -
>  #include "dt.h"
>  #if NDT > 0
>  #include 
> @@ -87,8 +83,6 @@ int schedhz;
>  int  profhz;
>  int  profprocs;
>  int  ticks = INT_MAX - (15 * 60 * HZ);
> -static int psdiv, pscnt; /* prof => stat divider */
> -int  psratio;/* ratio: prof / stat */
>
>  volatile unsigned long jiffies = ULONG_MAX - (10 * 60 * HZ);
>
> @@ -99,16 +93,13 @@ void
>  initclocks(void)
>  {
>   /*
> -  * Set divisors to 1 (normal case) and let the machine-specific
> -  * code do its bit.
> +  * Let the machine-specific code do its bit.
>*/
> - psdiv = pscnt = 1;
>   cpu_initclocks();
>
> - /*
> -  * Compute profhz/stathz.
> -  */
> - psratio = profhz / stathz;
> + KASSERT(profhz >= stathz && profhz <= 10);
> + KASSERT(profhz % stathz == 0);
> + profclock_period = 10 / profhz;
>
>   inittimecounter();
>  }
> @@ -256,7 +247,6 @@ startprofclock(struct process *pr)
>   atomic_setbits_int(>ps_flags, PS_PROFIL);
>   if (++profprocs == 1) {
>   s = splstatclock();
> - psdiv = pscnt = psratio;
>   setstatclockrate(profhz);
>   splx(s);
>   }
> @@ -275,7 +265,6 @@ stopprofclock(struct process *pr)
>   atomic_clearbits_int(>ps_flags, PS_PROFIL);
>   if (--profprocs == 0) {
>   s = splstatclock();
> - psdiv = pscnt = 1;
>   setstatclockrate(stathz);
>   splx(s);
>   }
> @@ -289,35 +278,13 @@ stopprofclock(struct process *pr)
>  void
>  statclock(struct clockframe *frame)
>  {
> -#if defined(GPROF) || defined(DDBPROF)
> - struct gmonparam *g;
> - u_long i;
> -#endif
>   struct cpu_info *ci = curcpu();
>   struct schedstate_percpu *spc = >ci_schedstate;
>   struct proc *p = curproc;
>   struct process *pr;
>
> - /*
> -  * Notice changes in divisor frequency, and adjust clock
> -  * frequency accordingly.
> -  */
> - if (spc->spc_psdiv != psdiv) {
> - spc->spc_psdiv = psdiv;
> - spc->spc_pscnt = psdiv;
> - if (psdiv == 1) {
> - setstatclockrate(stathz);
> - } else {
> -

Re: ietp cleanup

2023-07-20 Thread Mike Larkin

On Thu, Jul 20, 2023 at 12:58:33PM -0500, joshua stein wrote:
> bmercer@ noticed there was no newline printed after a successful attachment.
> I did some other minor cleanup removing duplicate dv_xname printing during
> attachment and wrapping at 80 chars.
>
> ok?
>

ok mlarkin

>
> Index: sys/dev/i2c/ietp.c
> ===
> RCS file: /cvs/src/sys/dev/i2c/ietp.c,v
> retrieving revision 1.1
> diff -u -p -u -p -r1.1 ietp.c
> --- sys/dev/i2c/ietp.c8 Jul 2023 02:43:02 -   1.1
> +++ sys/dev/i2c/ietp.c20 Jul 2023 17:55:40 -
> @@ -1,6 +1,6 @@
>  /* $OpenBSD: ietp.c,v 1.1 2023/07/08 02:43:02 jcs Exp $ */
>  /*
> - * elan-i2c driver
> + * Elan I2C Touchpad driver
>   *
>   * Copyright (c) 2015, 2016 joshua stein 
>   * Copyright (c) 2020, 2022 Vladimir Kondratyev 
> @@ -19,9 +19,10 @@
>   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
>   */
> -/* Protocol documentation: 
> https://lkml.indiana.edu/hypermail/linux/kernel/1205.0/02551.html.
> -   Based on FreeBSD ietp driver.
> -*/
> +/* Protocol documentation:
> + * https://lkml.indiana.edu/hypermail/linux/kernel/1205.0/02551.html
> + * Based on FreeBSD ietp driver.
> + */
>  #include 
>  #include 
> @@ -166,7 +167,7 @@ ietp_attach(struct device *parent, struc
>   sc->sc_ih = iic_intr_establish(sc->sc_tag, ia->ia_intr,
>   IPL_TTY, ietp_intr, sc, sc->sc_dev.dv_xname);
>   if (sc->sc_ih == NULL) {
> - printf(", can't establish interrupt");
> + printf(", can't establish interrupt\n");
>   return;
>   }
>   }
> @@ -177,13 +178,13 @@ ietp_attach(struct device *parent, struc
>   buf8 = (uint8_t *)
>   if (ietp_iic_read_reg(sc, IETP_UNIQUEID, sizeof(buf), ) != 0) {
> - printf("%s: failed reading product ID\n", sc->sc_dev.dv_xname);
> + printf(": failed reading product ID\n");
>   return;
>   }
>   sc->product_id = le16toh(buf);
>   if (ietp_iic_read_reg(sc, IETP_PATTERN, sizeof(buf), ) != 0) {
> - printf("%s: failed reading pattern\n", sc->sc_dev.dv_xname);
> + printf(": failed reading pattern\n");
>   return;
>   }
>   pattern = buf == 0x ? 0 : buf8[1];
> @@ -191,61 +192,62 @@ ietp_attach(struct device *parent, struc
>   reg = pattern >= 0x01 ? IETP_IC_TYPE : IETP_OSM_VERSION;
>   if (ietp_iic_read_reg(sc, reg, sizeof(buf), ) != 0) {
> - printf("%s: failed reading IC type\n", sc->sc_dev.dv_xname);
> + printf(": failed reading IC type\n");
>   return;
>   }
>   sc->ic_type = pattern >= 0x01 ? be16toh(buf) : buf8[1];
>   if (ietp_iic_read_reg(sc, IETP_NSM_VERSION, sizeof(buf), ) != 0) {
> - printf("%s: failed reading SM version\n", sc->sc_dev.dv_xname);
> + printf(": failed reading SM version\n");
>   return;
>   }
>   sc->is_clickpad = (buf8[0] & 0x10) != 0;
>   if (ietp_iic_set_absolute_mode(sc, true) != 0) {
> - printf("%s: failed to set absolute mode\n", 
> sc->sc_dev.dv_xname);
> + printf(": failed to set absolute mode\n");
>   return;
>   }
>   if (ietp_iic_read_reg(sc, IETP_MAX_X_AXIS, sizeof(buf), ) != 0) {
> - printf("%s: failed reading max x\n", sc->sc_dev.dv_xname);
> + printf(": failed reading max x\n");
>   return;
>   }
>   sc->max_x = le16toh(buf);
>   if (ietp_iic_read_reg(sc, IETP_MAX_Y_AXIS, sizeof(buf), ) != 0) {
> - printf("%s: failed reading max y\n", sc->sc_dev.dv_xname);
> + printf(": failed reading max y\n");
>   return;
>   }
>   sc->max_y = le16toh(buf);
>   if (ietp_iic_read_reg(sc, IETP_TRACENUM, sizeof(buf), ) != 0) {
> - printf("%s: failed reading trace info\n", sc->sc_dev.dv_xname);
> + printf(": failed reading trace info\n");
>   return;
>   }
>   sc->trace_x = sc->max_x / buf8[0];
>   sc->trace_y = sc->max_y / buf8[1];
>   if (ietp_iic_read_reg(sc, IETP_PRESSURE, sizeof(buf), ) != 0) {
> - printf("%s: failed reading pressure format\n", 
> sc->sc_dev.dv_xname);
> + printf(": failed reading pressure format\n");
>   return;
>   }
>   sc->pressure_base = (buf8[0] & 0x10) ? 0 : IETP_PRESSURE_BASE;
>   if (ietp_iic_read_reg(sc, IETP_RESOLUTION, sizeof(buf), )  != 0) {
> - printf("%s: failed reading resolution\n", sc->sc_dev.dv_xname);
> + printf(": failed reading resolution\n");
>   return;
>   }
>   /* Conversion from internal format to dot per mm */
>   sc->res_x = ietp_res2dpmm(buf8[0], sc->hi_precision);
>   sc->res_y = ietp_res2dpmm(buf8[1], sc->hi_precision);
> -
> +
>   sc->report_id = sc->hi_precision ?
>

Re: [v2] statclock: move profil(2), GPROF code into other clock interrupts

2023-07-18 Thread Mike Larkin

On Tue, Jul 18, 2023 at 08:21:41AM -0500, Scott Cheloha wrote:
> This patch moves the profil(2)- and GPROF-specific parts of
> statclock() out into into separate clock interrupt routines.  The
> profil(2) part moves into profclock() and is enabled/disabled as
> needed during mi_switch().  The GPROF part moves into gmonclock() and
> is enabled/disabled as needed via sysctl(2).
>
> Moving those parts out of statclock() eliminates the need for an
> effective statclock frequency and we can delete all the junk related
> to that: psratio/psdiv/pscnt and corresponding members of
> schedstate_percpu, clockintr_setstatclockrate(), a bunch of other
> clockintr-internal code
>
> In separate commits I have addressed:
>
> - General GPROF instability on amd64
> - GPROF causing a crash during suspend/resume
> - CTASSERT breakage on amd64 related to schedstate_percpu
>   changes in this patch
>
> This has been kicking around for over two months.  Personally, I have
> tested it on amd64, arm64, macppc, octeon, and sparc64.
>
> Compile- and boot-tests on other platforms (alpha, i386, luna88k,
> riscv64, sh) would be appreciated, but the last time I asked for tests
> I got zero reports back.

i386 compiles and boots. as reported in separate mail, riscv64 doesn't
compile.

>
> I don't know how to proceed.
>
> FWIW, GPROF is not enabled in any default kernel configurations and
> profil(2) is more-or-less useless (and painful to test) until I finish
> changing the libc gmon code and gprof(1).  So, the patch is low-risk.
>
> v1: https://marc.info/?l=openbsd-tech=168721453821801=2
>
> Index: kern/kern_clock.c
> ===
> RCS file: /cvs/src/sys/kern/kern_clock.c,v
> retrieving revision 1.108
> diff -u -p -r1.108 kern_clock.c
> --- kern/kern_clock.c 25 Apr 2023 00:58:47 -  1.108
> +++ kern/kern_clock.c 18 Jul 2023 13:14:27 -
> @@ -49,10 +49,6 @@
>  #include 
>  #include 
>
> -#if defined(GPROF) || defined(DDBPROF)
> -#include 
> -#endif
> -
>  #include "dt.h"
>  #if NDT > 0
>  #include 
> @@ -87,8 +83,6 @@ int schedhz;
>  int  profhz;
>  int  profprocs;
>  int  ticks = INT_MAX - (15 * 60 * HZ);
> -static int psdiv, pscnt; /* prof => stat divider */
> -int  psratio;/* ratio: prof / stat */
>
>  volatile unsigned long jiffies = ULONG_MAX - (10 * 60 * HZ);
>
> @@ -99,16 +93,13 @@ void
>  initclocks(void)
>  {
>   /*
> -  * Set divisors to 1 (normal case) and let the machine-specific
> -  * code do its bit.
> +  * Let the machine-specific code do its bit.
>*/
> - psdiv = pscnt = 1;
>   cpu_initclocks();
>
> - /*
> -  * Compute profhz/stathz.
> -  */
> - psratio = profhz / stathz;
> + KASSERT(profhz >= stathz && profhz <= 10);
> + KASSERT(profhz % stathz == 0);
> + profclock_period = 10 / profhz;
>
>   inittimecounter();
>  }
> @@ -256,7 +247,6 @@ startprofclock(struct process *pr)
>   atomic_setbits_int(>ps_flags, PS_PROFIL);
>   if (++profprocs == 1) {
>   s = splstatclock();
> - psdiv = pscnt = psratio;
>   setstatclockrate(profhz);
>   splx(s);
>   }
> @@ -275,7 +265,6 @@ stopprofclock(struct process *pr)
>   atomic_clearbits_int(>ps_flags, PS_PROFIL);
>   if (--profprocs == 0) {
>   s = splstatclock();
> - psdiv = pscnt = 1;
>   setstatclockrate(stathz);
>   splx(s);
>   }
> @@ -289,35 +278,13 @@ stopprofclock(struct process *pr)
>  void
>  statclock(struct clockframe *frame)
>  {
> -#if defined(GPROF) || defined(DDBPROF)
> - struct gmonparam *g;
> - u_long i;
> -#endif
>   struct cpu_info *ci = curcpu();
>   struct schedstate_percpu *spc = >ci_schedstate;
>   struct proc *p = curproc;
>   struct process *pr;
>
> - /*
> -  * Notice changes in divisor frequency, and adjust clock
> -  * frequency accordingly.
> -  */
> - if (spc->spc_psdiv != psdiv) {
> - spc->spc_psdiv = psdiv;
> - spc->spc_pscnt = psdiv;
> - if (psdiv == 1) {
> - setstatclockrate(stathz);
> - } else {
> - setstatclockrate(profhz);
> - }
> - }
> -
>   if (CLKF_USERMODE(frame)) {
>   pr = p->p_p;
> - if (pr->ps_flags & PS_PROFIL)
> - addupc_intr(p, CLKF_PC(frame), 1);
> - if (--spc->spc_pscnt > 0)
> - return;
>   /*
>* Came from user mode; CPU was in user state.
>* If this process is being profiled record the tick.
> @@ -328,23 +295,6 @@ statclock(struct clockframe *frame)
>   else
>   spc->spc_cp_time[CP_USER]++;
>   } else {
> -#if defined(GPROF) ||

Re: all platforms, kernel: remove __HAVE_CLOCKINTR symbol

2023-07-02 Thread Mike Larkin

On Sat, Jul 01, 2023 at 08:35:47PM -0500, Scott Cheloha wrote:
> Every platform made the clockintr switch six months ago or more.  The
> __HAVE_CLOCKINTR symbol is now redundant and can be removed.
>
> ok?
>

makes sense if every platform defines it all the time.



> Index: ./ddb/db_command.c
> ===
> RCS file: /cvs/src/sys/ddb/db_command.c,v
> retrieving revision 1.98
> diff -u -p -r1.98 db_command.c
> --- ./ddb/db_command.c8 Mar 2023 04:43:07 -   1.98
> +++ ./ddb/db_command.c2 Jul 2023 01:34:00 -
> @@ -579,9 +579,7 @@ db_bcstats_print_cmd(db_expr_t addr, int
>  const struct db_command db_show_all_cmds[] = {
>   { "procs",  db_show_all_procs,  0, NULL },
>   { "callout",db_show_callout,0, NULL },
> -#ifdef __HAVE_CLOCKINTR
>   { "clockintr",  db_show_all_clockintr,  0, NULL },
> -#endif
>   { "pools",  db_show_all_pools,  0, NULL },
>   { "mounts", db_show_all_mounts, 0, NULL },
>   { "vnodes", db_show_all_vnodes, 0, NULL },
> Index: ./ddb/db_interface.h
> ===
> RCS file: /cvs/src/sys/ddb/db_interface.h,v
> retrieving revision 1.25
> diff -u -p -r1.25 db_interface.h
> --- ./ddb/db_interface.h  5 Nov 2022 19:29:45 -   1.25
> +++ ./ddb/db_interface.h  2 Jul 2023 01:34:00 -
> @@ -44,9 +44,7 @@ void db_kill_cmd(db_expr_t, int, db_expr
>  void db_show_all_procs(db_expr_t, int, db_expr_t, char *);
>
>  /* kern/kern_clockintr.c */
> -#ifdef __HAVE_CLOCKINTR
>  void db_show_all_clockintr(db_expr_t, int, db_expr_t, char *);
> -#endif
>
>  /* kern/kern_timeout.c */
>  void db_show_callout(db_expr_t, int, db_expr_t, char *);
> Index: ./kern/kern_clockintr.c
> ===
> RCS file: /cvs/src/sys/kern/kern_clockintr.c,v
> retrieving revision 1.26
> diff -u -p -r1.26 kern_clockintr.c
> --- ./kern/kern_clockintr.c   2 Jul 2023 00:55:18 -   1.26
> +++ ./kern/kern_clockintr.c   2 Jul 2023 01:34:00 -
> @@ -29,8 +29,6 @@
>  #include 
>  #include 
>
> -#ifdef __HAVE_CLOCKINTR
> -
>  /*
>   * Protection for global variables in this file:
>   *
> @@ -773,4 +771,3 @@ db_show_clockintr(const struct clockintr
>  }
>
>  #endif /* DDB */
> -#endif /*__HAVE_CLOCKINTR */
> Index: ./kern/kern_sysctl.c
> ===
> RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
> retrieving revision 1.415
> diff -u -p -r1.415 kern_sysctl.c
> --- ./kern/kern_sysctl.c  21 May 2023 12:47:54 -  1.415
> +++ ./kern/kern_sysctl.c  2 Jul 2023 01:34:00 -
> @@ -430,11 +430,9 @@ kern_sysctl_dirs(int top_name, int *name
>   case KERN_CPUSTATS:
>   return (sysctl_cpustats(name, namelen, oldp, oldlenp,
>   newp, newlen));
> -#ifdef __HAVE_CLOCKINTR
>   case KERN_CLOCKINTR:
>   return sysctl_clockintr(name, namelen, oldp, oldlenp, newp,
>   newlen);
> -#endif
>   default:
>   return (ENOTDIR);   /* overloaded */
>   }
> Index: ./kern/subr_suspend.c
> ===
> RCS file: /cvs/src/sys/kern/subr_suspend.c,v
> retrieving revision 1.14
> diff -u -p -r1.14 subr_suspend.c
> --- ./kern/subr_suspend.c 10 Nov 2022 10:37:40 -  1.14
> +++ ./kern/subr_suspend.c 2 Jul 2023 01:34:00 -
> @@ -165,10 +165,9 @@ fail_suspend:
>   splx(s);
>
>   inittodr(gettime());
> -#ifdef __HAVE_CLOCKINTR
>   clockintr_cpu_init(NULL);
>   clockintr_trigger();
> -#endif
> +
>   sleep_resume(v);
>   resume_randomness(rndbuf, rndbuflen);
>  #ifdef MULTIPROCESSOR
> Index: ./arch/alpha/include/_types.h
> ===
> RCS file: /cvs/src/sys/arch/alpha/include/_types.h,v
> retrieving revision 1.25
> diff -u -p -r1.25 _types.h
> --- ./arch/alpha/include/_types.h 10 Dec 2022 15:02:29 -  1.25
> +++ ./arch/alpha/include/_types.h 2 Jul 2023 01:34:00 -
> @@ -35,8 +35,6 @@
>  #ifndef _MACHINE__TYPES_H_
>  #define _MACHINE__TYPES_H_
>
> -#define  __HAVE_CLOCKINTR
> -
>  #if defined(_KERNEL)
>  typedef struct label_t {
>   long val[10];
> Index: ./arch/amd64/include/_types.h
> ===
> RCS file: /cvs/src/sys/arch/amd64/include/_types.h,v
> retrieving revision 1.18
> diff -u -p -r1.18 _types.h
> --- ./arch/amd64/include/_types.h 8 Nov 2022 17:34:13 -   1.18
> +++ ./arch/amd64/include/_types.h 2 Jul 2023 01:34:00 -
> @@ -35,8 +35,6 @@
>  #ifndef _MACHINE__TYPES_H_
>  #define _MACHINE__TYPES_H_
>
> -#define  __HAVE_CLOCKINTR
> -
>  /*
>   * _ALIGN(p) rounds p (pointer or byte index) up to a correctly-aligned
>   * value for all data types (int, long, ...).   The result is an
>

Re: vmm/vmd: use anon shared mappings for devices

2023-05-10 Thread Mike Larkin

On Tue, May 09, 2023 at 06:12:55AM -0400, Dave Voutila wrote:
> tech@,
>
> The diff below adds a new ioctl for vmm(4) that allows an emulated
> device process request vmm(4) enter a shared mapping in its vmspace so
> it can access guest memory without using a shared mapping backed by a
> named file.
>
> Similar to the vm creation ioctl (VMM_IOC_CREATE), the caller requires
> the "vmm" and "proc" pledge(2) promises. This allows the emulated
> devices to do this setup early and drop these promises back down to just
> "stdio" before any device emulation occurs.
>
> Feel free to skip to the diff (the regress change shows how it works in
> a simplified case) or continue reading for reasoning behind this
> change. I share this primarily for testers and feedback from other devs
> while mlarkin@ reviews.
>
> To test:
>
> 1. apply diff
> 2. build and install new kernel
> 3. copy or symlink new vmm.h into /usr/include/dev/vmm/
> 4. build and reinstall vmd (no changes for vmctl needed)
>
> You should see no change during vm usage, however you should now see no
> change in /tmp consumption while unmounting things like NFS mounts or
> usb disks. Read on for details.
>
> ...
>
> vmd(8) began emulating virtio network and block devices in subprocesses
> with a commit I made at the recent hackathon in Morocco. It relies on
> creating shared memory mappings using shm_mkstemp(3) and passing file
> descriptors to the fork/exec'd child processes.
>
> I've since received reports that using named mappings for shared memory
> is having 2 negative impacts:
>
> 1. increased overhead during vm teardown, often making systems
>unresponsive (this is my conclusion based on only minimal evidence)
>
> 2. unmounting any device on the host while a vm is running causes some
>guest memory to be flushed to disk (the backing file is already
>unlinked, so not visible to other processes).
>
> (2) can cause /tmp to fill up or introduce failure conditions I'm not
> sure we can recover from in vmd. It also has implications for other
> services on the host.
>
> I don't own a fireproof suit that fits...so I'm not about to wade into
> the VFS & UVM layers to figure out if (1) or (2) can be mitigated or
> fixed on their own.
>
> One idea was to implement what FreeBSD borrowed from Linux in their
> forever quest to become LinuxBSD: memfd_create(2) [1].
>
> I took one look and decided this was not the time for me to be trying to
> land a new syscall primarily for vmd (and some ports) and went another
> route.
>
> [1] https://man7.org/linux/man-pages/man2/memfd_create.2.html
>
> -dv
>

This does fix the unexpected shm issues. Thanks!

Diff reads ok to me, go for it when you are happy with the test results.

-ml

>
> diff refs/heads/master refs/heads/vmm-mapshare
> commit - cec1ace2d4d21c85f4c8bacc2dd971721bf6b694
> commit + 8f533c371094c044b0127d468be5feaaf775811b
> blob - f221b58f75c4eb01a3a04ae45c7cdb066b11361a
> blob + 0e6f5ff858c51bd9707c657b154b0df1f8944c3b
> --- regress/sys/arch/amd64/vmm/vcpu.c
> +++ regress/sys/arch/amd64/vmm/vcpu.c
> @@ -83,6 +83,7 @@ main(int argc, char **argv)
>   struct vm_resetcpu_paramsvresetp;
>   struct vm_run_params vrunp;
>   struct vm_terminate_params   vtp;
> + struct vm_sharemem_paramsvsp;
>
>   struct vm_mem_range *vmr;
>   int  fd, ret = 1;
> @@ -127,8 +128,9 @@ main(int argc, char **argv)
>   ((uint8_t*)p)[j + 1] = PCKBC_AUX;
>   }
>   vmr->vmr_va = (vaddr_t)p;
> - printf("mapped region %zu: { gpa: 0x%08lx, size: %lu }\n",
> - i, vmr->vmr_gpa, vmr->vmr_size);
> + printf("created mapped region %zu: { gpa: 0x%08lx, size: %lu,"
> + " hva: 0x%lx }\n", i, vmr->vmr_gpa, vmr->vmr_size,
> + vmr->vmr_va);
>   }
>
>   if (ioctl(fd, VMM_IOC_CREATE, ) == -1)
> @@ -136,8 +138,55 @@ main(int argc, char **argv)
>   printf("created vm %d named \"%s\"\n", vcp.vcp_id, vcp.vcp_name);
>
>   /*
> -  * 2. Check that our VM exists.
> +  * 2. Check we can create shared memory mappings.
>*/
> + memset(, 0, sizeof(vsp));
> + vsp.vsp_nmemranges = vcp.vcp_nmemranges;
> + memcpy(_memranges, _memranges,
> + sizeof(vsp.vsp_memranges));
> + vsp.vsp_vm_id = vcp.vcp_id;
> +
> + /* Find some new va ranges... */
> + for (i = 0; i < vsp.vsp_nmemranges; i++) {
> + vmr = _memranges[i];
> + p = mmap(NULL, vmr->vmr_size, PROT_READ | PROT_WRITE,
> + MAP_PRIVATE | MAP_ANON, -1, 0);
> + if (p == MAP_FAILED)
> + err(1, "mmap");
> + vmr->vmr_va = (vaddr_t)p;
> + }
> +
> + /* Release our mappings so vmm can replace them. */
> + for (i = 0; i < vsp.vsp_nmemranges; i++) {
> + vmr = _memranges[i];
> + munmap((void*)vmr->vmr_va, vmr->vmr_size);
> + }
> +
> +

Re: riscv64 RAMDISK: enable softraid

2023-04-25 Thread Mike Larkin

On Tue, Apr 25, 2023 at 01:12:24PM +, Klemens Nanni wrote:
> (Thought I already committed this months ago, noticed now looking into
> bootloaders again...)
>
> GENERIC, efiboot and installboot(8) all have softraid support already,
> softraid(4) documents boot support for riscv64,
> "just" the ramdisk kernel lacks it.
>
> Still boots fine on the SiFive HiFive Unmatched A00.
>
> OK?
>
> Index: sys/arch/riscv64/conf/RAMDISK
> ===
> RCS file: /cvs/src/sys/arch/riscv64/conf/RAMDISK,v
> retrieving revision 1.35
> diff -u -p -r1.35 RAMDISK
> --- sys/arch/riscv64/conf/RAMDISK 26 Jun 2022 20:05:06 -  1.35
> +++ sys/arch/riscv64/conf/RAMDISK 25 Apr 2023 10:55:23 -
> @@ -27,6 +27,7 @@ config  bsd root on rd0a swap on rd0b
>
>  # mainbus
>  mainbus0 at root
> +softraid0at root
>
>  # cpu0
>  cpu0 at mainbus0
>

ok mlarkin

Re: vmd: silence error on missing optional config

2023-04-24 Thread Mike Larkin

On Mon, Apr 24, 2023 at 02:29:38PM +, Klemens Nanni wrote:
> On Mon, Apr 24, 2023 at 01:46:17PM +, Klemens Nanni wrote:
> > I have on vm.conf, vmd does not need it, but complains:
> > Apr 23 17:12:50 atar vmd[79320]: failed to open /etc/vm.conf: No such file 
> > or directory
> >
> > We've fixed the same for dhcpleased a year ago, port the diff from there.
> >
> > Works for me with good, with bad and without /etc/vm.conf.
> > Feedback? OK?
>
> Make it a debug hint, after talking with mlarking.
>
> This keeps /var/log/{messages,daemon} clean on a default install with
> config-less vmd enabled, but will show up under `-vv'.
>
> Nicely quiet:
>   # /usr/sbin/vmd -d [-v]
>   startup
>   ...
>
> Debug:
>   # /usr/sbin/vmd -d -vv
>   startup
>   /etc/vm.conf: missing
>   vmd_configure: setting staggered start configuration to parallelism: 12 
> and delay: 30
>   ...
>
> If passed explicitly, it remains an error:
>   # /usr/sbin/vmd -d -f/etc/vm.conf
>   startup
>   failed to open /etc/vm.conf: No such file or directory
>   ...
>
> Non-default paths still behave as expected, just to make sure:
>   # /usr/sbin/vmd -d -f/dev/zero
>   startup
>   ...
>
>   # /usr/sbin/vmd -d -f/nonexistent
>   startup
>   failed to open /nonexistent: No such file or directory
>   ...
>
> Feedback? OK?
>

go for it

> Index: parse.y
> ===
> RCS file: /cvs/src/usr.sbin/vmd/parse.y,v
> retrieving revision 1.63
> diff -u -p -r1.63 parse.y
> --- parse.y   28 Jan 2023 14:40:53 -  1.63
> +++ parse.y   24 Apr 2023 14:18:07 -
> @@ -1181,9 +1181,15 @@ popfile(void)
>  int
>  parse_config(const char *filename)
>  {
> - struct sym  *sym, *next;
> + extern const chardefault_conffile[];
> + struct sym  *sym, *next;
>
>   if ((file = pushfile(filename, 0)) == NULL) {
> + /* no default config file is fine */
> + if (errno == ENOENT && filename == default_conffile) {
> + log_debug("%s: missing", filename);
> + return (0);
> + }
>   log_warn("failed to open %s", filename);
>   if (errno == ENOENT)
>   return (0);
> Index: vmd.c
> ===
> RCS file: /cvs/src/usr.sbin/vmd/vmd.c,v
> retrieving revision 1.142
> diff -u -p -r1.142 vmd.c
> --- vmd.c 23 Apr 2023 12:11:37 -  1.142
> +++ vmd.c 24 Apr 2023 13:39:21 -
> @@ -89,6 +89,9 @@ static struct privsep_proc *proc_priv =
>  static struct passwd proc_privpw;
>  static const uint8_t zero_mac[ETHER_ADDR_LEN];
>
> +const chardefault_conffile[] = VMD_CONF;
> +const char   *conffile = default_conffile;
> +
>  int
>  vmd_dispatch_control(int fd, struct privsep_proc *p, struct imsg *imsg)
>  {
> @@ -767,7 +770,6 @@ main(int argc, char **argv)
>  {
>   struct privsep  *ps;
>   int  ch;
> - const char  *conffile = VMD_CONF;
>   enum privsep_procid  proc_id = PROC_PARENT;
>   int  proc_instance = 0, vm_launch = 0, vm_fd = -1;
>   const char  *errp, *title = NULL;
>

Re: vmd(8): fix restoring virtio devs on vm receive

2023-04-22 Thread Mike Larkin

On Sun, Apr 16, 2023 at 01:14:00PM -0400, Dave Voutila wrote:
> Moving vmd to use zero-copy semantics for virtqueues introduced a bug in
> the vm send/receive functionality. The host va is potentially invalid on
> restore if vmd has restarted and re-randomized the address space of the
> vmm process that forks vm's.
>
> This NULL's out the hva to and resets it on restore.
>
> This fix is also required for my upcoming "+exec" diff because each vm
> will get a new address space every execution, so the hva has practically
> no chance of being valid on restore.
>
> ok?
>

ok mlarkin, sorry for the delay reviewing this.

> Index: virtio.c
> ===
> RCS file: /cvs/src/usr.sbin/vmd/virtio.c,v
> retrieving revision 1.99
> diff -u -p -r1.99 virtio.c
> --- virtio.c  28 Dec 2022 21:30:19 -  1.99
> +++ virtio.c  16 Apr 2023 17:11:29 -
> @@ -2015,6 +2015,8 @@ vmmci_restore(int fd, uint32_t vm_id)
>  int
>  viornd_restore(int fd, struct vm_create_params *vcp)
>  {
> + void *hva = NULL;
> +
>   log_debug("%s: receiving viornd", __func__);
>   if (atomicio(read, fd, , sizeof(viornd)) != sizeof(viornd)) {
>   log_warnx("%s: error reading viornd from fd", __func__);
> @@ -2028,6 +2030,11 @@ viornd_restore(int fd, struct vm_create_
>   viornd.vm_id = vcp->vcp_id;
>   viornd.irq = pci_get_dev_irq(viornd.pci_id);
>
> + hva = hvaddr_mem(viornd.vq[0].q_gpa, vring_size(VIORND_QUEUE_SIZE));
> + if (hva == NULL)
> + fatal("failed to restore viornd virtqueue");
> + viornd.vq[0].q_hva = hva;
> +
>   return (0);
>  }
>
> @@ -2038,6 +2045,7 @@ vionet_restore(int fd, struct vmd_vm *vm
>   struct vm_create_params *vcp = >vmc_params;
>   uint8_t i;
>   int ret;
> + void *hva = NULL;
>
>   nr_vionet = vcp->vcp_nnics;
>   if (vcp->vcp_nnics > 0) {
> @@ -2079,6 +2087,18 @@ vionet_restore(int fd, struct vmd_vm *vm
>   vionet[i].vm_vmid = vm->vm_vmid;
>   vionet[i].irq = pci_get_dev_irq(vionet[i].pci_id);
>
> + hva = hvaddr_mem(vionet[i].vq[RXQ].q_gpa,
> + vring_size(VIONET_QUEUE_SIZE));
> + if (hva == NULL)
> + fatal("failed to restore vionet RX virtqueue");
> + vionet[i].vq[RXQ].q_hva = hva;
> +
> + hva = hvaddr_mem(vionet[i].vq[TXQ].q_gpa,
> + vring_size(VIONET_QUEUE_SIZE));
> + if (hva == NULL)
> + fatal("failed to restore vionet TX virtqueue");
> + vionet[i].vq[TXQ].q_hva = hva;
> +
>   memset([i].event, 0, sizeof(struct event));
>   event_set([i].event, vionet[i].fd,
>   EV_READ | EV_PERSIST, vionet_rx_event, [i]);
> @@ -2093,6 +2113,7 @@ vioblk_restore(int fd, struct vmop_creat
>  {
>   struct vm_create_params *vcp = >vmc_params;
>   uint8_t i;
> + void *hva = NULL;
>
>   nr_vioblk = vcp->vcp_ndisks;
>   vioblk = calloc(vcp->vcp_ndisks, sizeof(struct vioblk_dev));
> @@ -2123,6 +2144,12 @@ vioblk_restore(int fd, struct vmop_creat
>   }
>   vioblk[i].vm_id = vcp->vcp_id;
>   vioblk[i].irq = pci_get_dev_irq(vioblk[i].pci_id);
> +
> + hva = hvaddr_mem(vioblk[i].vq[0].q_gpa,
> + vring_size(VIOBLK_QUEUE_SIZE));
> + if (hva == NULL)
> + fatal("failed to restore vioblk virtqueue");
> + vioblk[i].vq[0].q_hva = hva;
>   }
>   return (0);
>  }
> @@ -2130,6 +2157,9 @@ vioblk_restore(int fd, struct vmop_creat
>  int
>  vioscsi_restore(int fd, struct vm_create_params *vcp, int child_cdrom)
>  {
> + void *hva = NULL;
> + unsigned int i;
> +
>   if (!strlen(vcp->vcp_cdrom))
>   return (0);
>
> @@ -2161,6 +2191,15 @@ vioscsi_restore(int fd, struct vm_create
>   vioscsi->vm_id = vcp->vcp_id;
>   vioscsi->irq = pci_get_dev_irq(vioscsi->pci_id);
>
> + /* vioscsi uses 3 virtqueues. */
> + for (i = 0; i < 3; i++) {
> + hva = hvaddr_mem(vioscsi->vq[i].q_gpa,
> + vring_size(VIOSCSI_QUEUE_SIZE));
> + if (hva == NULL)
> + fatal("failed to restore vioscsi virtqueue");
> + vioscsi->vq[i].q_hva = hva;
> + }
> +
>   return (0);
>  }
>
> @@ -2194,6 +2233,9 @@ int
>  viornd_dump(int fd)
>  {
>   log_debug("%s: sending viornd", __func__);
> +
> + viornd.vq[0].q_hva = NULL;
> +
>   if (atomicio(vwrite, fd, , sizeof(viornd)) != sizeof(viornd)) {
>   log_warnx("%s: error writing viornd to fd", __func__);
>   return (-1);
> @@ -2205,6 +2247,7 @@ int
>  vmmci_dump(int fd)
>  {
>   log_debug("%s: sending vmmci", __func__);
> +
>   if (atomicio(vwrite, fd, , sizeof(vmmci)) != sizeof(vmmci)) {
>

Re: cleanup vmm_start_vm, simplifying fd cleanup

2023-04-07 Thread Mike Larkin

On Fri, Apr 07, 2023 at 12:07:07PM -0400, Dave Voutila wrote:
>
> Dave Voutila  writes:
>
> > In vmd, the vmm process forks to create the resulting vm process. After
> > this fork, the vmm parent process closes all the file descriptors
> > pointing to the vm's devices (cdrom, kernel, disks, nics, etc.).
> >
> > The logic was a bit funky, so this change relies on the fact we can
> > attempt the close(2) call and use its success/failure to determine if we
> > have an fd to mark -1 in the vm structure. (I guess we could just
> > blindly set them to -1 post-close, but this feels more sensical to me.)
> >
> > While in the function, it cleans up some commentary and logging around
> > the actual fork(2) call. Since fork(2) sets errno, we can use it in the
> > log message, too.
> >
> > This reduces some noise in my upcoming diff to introduce execvp to the
> > forking of child vm processes (as presented at AsiaBSDCon).
> >
>
> Touch longer, but won't generate ktrace noise by blind calls to close(2)
> and also accounts for the other error conditions (EINTR, EIO).
>
> For EIO, not sure yet how we want to handle it other than log it.
>
> For EINTR, we want to account for that race and make sure we retry since
> the vmm process is long-lived and could inadvertently keep things like
> tty fds or disk image fds open after the guest vm terminates.
>
> I need to check on other calls to close(2) in vmd, but most fds are
> passed via imsg, so I'd presume any interruption is handled in the
> kernel. (Someone correct me if I'm wrong.)
>
>

as we discussed this effort previously in .jp, ok mlarkin@

> diff refs/heads/master refs/heads/vmd-vmm-fd-dance
> commit - 8371c6b4d5765a69d520f93d64f57c6a2989f9ae
> commit + 9c07553a618956ba89f29b93906dcd6ea6c19de8
> blob - 75af37a29a6cd4917d8c4ca9be35c48772314f4b
> blob + e0af71dac5d730c7b88166384a95b94bf14fcfc4
> --- usr.sbin/vmd/vmd.c
> +++ usr.sbin/vmd/vmd.c
> @@ -1956,3 +1956,21 @@ vm_terminate(struct vmd_vm *vm, const char *caller)
>   vm_remove(vm, caller);
>   }
>  }
> +
> +int
> +close_fd(int fd)
> +{
> + int ret;
> +
> + if (fd == -1)
> + return (0);
> +
> + do
> + ret = close(fd);
> + while (ret == -1 && errno == EINTR);
> +
> + if (ret == -1 && errno == EIO)
> + log_warn("%s(%d)", __func__, fd);
> +
> + return (ret);
> +}
> blob - 7bbbf62734bc7cd575c4fee384193037b0495bb4
> blob + 7228ace4b31a9fd6b5eb90bf1d048198a03a04fc
> --- usr.sbin/vmd/vmd.h
> +++ usr.sbin/vmd/vmd.h
> @@ -443,6 +443,7 @@ void   getmonotime(struct timeval *);
>  uint32_t prefixlen2mask(uint8_t);
>  void  prefixlen2mask6(u_int8_t, struct in6_addr *);
>  void  getmonotime(struct timeval *);
> +int   close_fd(int);
>
>  /* priv.c */
>  void  priv(struct privsep *, struct privsep_proc *);
> blob - d9eff3c8f703854c7b1e49fee04b8e426956cfbb
> blob + 7db920beec7e34a63f5ba3602f17185eec33d3f7
> --- usr.sbin/vmd/vmm.c
> +++ usr.sbin/vmd/vmm.c
> @@ -641,12 +641,10 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *p
>   if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) == -1)
>   fatal("socketpair");
>
> - /* Start child vmd for this VM (fork, chroot, drop privs) */
> + /* Fork the vmm process to create the vm, inheriting open device fds. */
>   vm_pid = fork();
> -
> - /* Start child failed? - cleanup and leave */
>   if (vm_pid == -1) {
> - log_warnx("%s: start child failed", __func__);
> + log_warn("%s: fork child failed", __func__);
>   ret = EIO;
>   goto err;
>   }
> @@ -654,31 +652,24 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *p
>   if (vm_pid > 0) {
>   /* Parent */
>   vm->vm_pid = vm_pid;
> - close(fds[1]);
> + close_fd(fds[1]);
>
>   for (i = 0 ; i < vcp->vcp_ndisks; i++) {
>   for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
> - if (vm->vm_disks[i][j] != -1)
> - close(vm->vm_disks[i][j]);
> - vm->vm_disks[i][j] = -1;
> + if (close_fd(vm->vm_disks[i][j]) == 0)
> + vm->vm_disks[i][j] = -1;
>   }
>   }
>   for (i = 0 ; i < vcp->vcp_nnics; i++) {
> - close(vm->vm_ifs[i].vif_fd);
> - vm->vm_ifs[i].vif_fd = -1;
> + if (close_fd(vm->vm_ifs[i].vif_fd) == 0)
> + vm->vm_ifs[i].vif_fd = -1;
>   }
> - if (vm->vm_kernel != -1) {
> - close(vm->vm_kernel);
> + if (close_fd(vm->vm_kernel) == 0)
>   vm->vm_kernel = -1;
> - }
> - if (vm->vm_cdrom != -1) {
> - close(vm->vm_cdrom);
> + if (close_fd(vm->vm_cdrom) == 0)
>   vm->vm_cdrom =

Re: wire in efi_reset on MSFT Surface systems to rix reboots

2023-04-01 Thread Mike Larkin

On Sat, Jan 21, 2023 at 09:59:13AM -0500, Dave Voutila wrote:
> I've long moaned about how my Go3 can't reboot. Woe is me. Now that
> kettenis@ landed some scaffolding for efi(4), I would love to get my Go3
> working in the reboot department.
>
> The approach I'm thinking, in the diff below, is to hook in via
> comparing the FirmwareVendor "string" to make sure we're doing this on
> amd64-based Microsoft EFI systems.
>
> The last time we went down this route, we found reports of arbitrarily
> switching EFI systems over to efi_reset caused reboots to break on
> machines that had been happily using acpi_reset. The struggle is real.
>
> I only have access to my Go3, so would appreciate someone else with a
> Surface brand device check for regression before I ask for OK. Feedback
> from kettenis@ also welcome.
>
> If your Surface has the same problem as mine, what you experience is
> having to do a powerdown (e.g. halt -p) in order to reset the machine. A
> reboot causes the machine to reset, but get stuck bringing itself back
> up and you stare at the MSFT logo splash until your battery runs out or
> you die of boredom.
>
> -dv
>

Following up on an old thread - do you want me to try this on my Go3 (which
does not have this issue)?

kettenis - any objection here?

-ml

>
> diff refs/heads/master refs/heads/efi-powerdown
> commit - 009dd187d54193e7f98e87ccd11c616924278c5e
> commit + c6f9dc35c81aa79313b1ad12bfcdacfb6074803d
> blob - 502bd70a7eddbb271ee54b567c4ffd7a8426
> blob + c53604d2c38321bd3151a9008560ce52d3034fec
> --- sys/arch/amd64/amd64/acpi_machdep.c
> +++ sys/arch/amd64/amd64/acpi_machdep.c
> @@ -334,7 +334,8 @@ acpi_attach_machdep(struct acpi_softc *sc)
>
>   sc->sc_interrupt = isa_intr_establish(NULL, sc->sc_fadt->sci_int,
>   IST_LEVEL, IPL_BIO, acpi_interrupt, sc, sc->sc_dev.dv_xname);
> - cpuresetfn = acpi_reset;
> + if (!cpuresetfn)
> + cpuresetfn = acpi_reset;
>
>  #ifndef SMALL_KERNEL
>   /*
> blob - a5f4563ce7d54e53c9aaadf2823b35d36cd3b1e9
> blob + 88aa3f343f059136b1cfd842717ac1ff1fcec3c0
> --- sys/arch/amd64/amd64/efi_machdep.c
> +++ sys/arch/amd64/amd64/efi_machdep.c
> @@ -39,12 +39,18 @@ void  efi_map_runtime(struct efi_softc *);
>   sizeof(struct efi_softc), efi_match, efi_attach
>  };
>
> +extern struct cfdriver efi_cd;
> +
>  void efi_map_runtime(struct efi_softc *);
>  int  efi_gettime(struct todr_chip_handle *, struct timeval *);
>  int  efi_settime(struct todr_chip_handle *, struct timeval *);
> +void efi_reset(void);
>
>  label_t efi_jmpbuf;
>
> +const CHAR16 fv_msft[5] = { 'M', 'S', 'F', 'T', 0 };
> +extern void (*cpuresetfn)(void);
> +
>  int
>  efi_match(struct device *parent, void *match, void *aux)
>  {
> @@ -119,6 +125,9 @@ efi_attach(struct device *parent, struct device *self,
>   for (i = 0; st->FirmwareVendor[i]; i++)
>   printf("%c", st->FirmwareVendor[i]);
>   printf(" rev 0x%x\n", st->FirmwareRevision);
> +
> + if (memcmp(st->FirmwareVendor, fv_msft, sizeof(fv_msft)) == 0)
> + cpuresetfn = efi_reset;
>   }
>   efi_leave(sc);
>
> @@ -305,3 +314,14 @@ efi_settime(struct todr_chip_handle *handle, struct ti
>   return EIO;
>   return 0;
>  }
> +
> +void
> +efi_reset(void)
> +{
> + struct efi_softc *sc = efi_cd.cd_devs[0];
> +
> + printf("%s\n", __func__);
> + efi_enter(sc);
> + sc->sc_rs->ResetSystem(EfiResetCold, EFI_SUCCESS, 0, NULL);
> + efi_leave(sc);
> +}
>

Re: timer(4/sparc64): remove driver

2023-03-17 Thread Mike Larkin

On Thu, Mar 16, 2023 at 12:25:15PM -0500, Scott Cheloha wrote:
> This code has been dead since we switched sparc64 to clockintr several
> months ago.  Nobody has come forward asking for a timer(4/sparc64)
> intrclock.
>
> As of now, you need %TICK_CMPR or %STICK_CMPR to run OpenBSD on
> sparc64.  The only machines maybe lacking these registers are certain
> early HAL/Fujitsu models like SPARC64 I and II, and maybe SPARC64 III.
>
> We can remove the driver now or wait until after unlock.
>
> The driver implementation is mixed into sparc64/clock.c.  I think I
> got everything, but I'm not positive.

I would just wait until after unlock. ok mlarkin for removing dead code
once unlock happens.

>
> Index: distrib/sets/lists/man/mi
> ===
> RCS file: /cvs/src/distrib/sets/lists/man/mi,v
> retrieving revision 1.1695
> diff -u -p -r1.1695 mi
> --- distrib/sets/lists/man/mi 14 Mar 2023 04:51:34 -  1.1695
> +++ distrib/sets/lists/man/mi 16 Mar 2023 17:18:43 -
> @@ -1979,7 +1979,6 @@
>  ./usr/share/man/man4/sparc64/spif.4
>  ./usr/share/man/man4/sparc64/ssm.4
>  ./usr/share/man/man4/sparc64/tda.4
> -./usr/share/man/man4/sparc64/timer.4
>  ./usr/share/man/man4/sparc64/tvtwo.4
>  ./usr/share/man/man4/sparc64/upa.4
>  ./usr/share/man/man4/sparc64/uperf.4
> Index: share/man/man4/man4.sparc64/Makefile
> ===
> RCS file: /cvs/src/share/man/man4/man4.sparc64/Makefile,v
> retrieving revision 1.82
> diff -u -p -r1.82 Makefile
> --- share/man/man4/man4.sparc64/Makefile  25 Apr 2019 16:47:56 -  
> 1.82
> +++ share/man/man4/man4.sparc64/Makefile  16 Mar 2023 17:18:43 -
> @@ -9,7 +9,7 @@ MAN=  agten.4 apio.4 asio.4 audioce.4 aud
>   pcons.4 pmc.4 power.4 ppm.4 prtc.4 psycho.4 pyro.4 qe.4 qec.4 \
>   radeonfb.4 raptor.4 rfx.4 \
>   sab.4 sbbc.4 schizo.4 spif.4 ssm.4 \
> - tda.4 timer.4 tvtwo.4 upa.4 uperf.4 \
> + tda.4 tvtwo.4 upa.4 uperf.4 \
>   vbus.4 vcc.4 vcons.4 vds.4 vdsk.4 vigra.4 vldc.4 vnet.4 vpci.4 \
>   vrng.4 vrtc.4 vsw.4 \
>   xbox.4 zs.4 zx.4
> Index: share/man/man4/man4.sparc64/timer.4
> ===
> RCS file: share/man/man4/man4.sparc64/timer.4
> diff -N share/man/man4/man4.sparc64/timer.4
> --- share/man/man4/man4.sparc64/timer.4   31 May 2007 19:19:57 -  
> 1.2
> +++ /dev/null 1 Jan 1970 00:00:00 -
> @@ -1,43 +0,0 @@
> -.\" $OpenBSD: timer.4,v 1.2 2007/05/31 19:19:57 jmc Exp $
> -.\"
> -.\" Copyright (c) 2004 Jason L. Wright (ja...@thought.net)
> -.\" All rights reserved.
> -.\"
> -.\" Redistribution and use in source and binary forms, with or without
> -.\" modification, are permitted provided that the following conditions
> -.\" are met:
> -.\" 1. Redistributions of source code must retain the above copyright
> -.\"notice, this list of conditions and the following disclaimer.
> -.\" 2. Redistributions in binary form must reproduce the above copyright
> -.\"notice, this list of conditions and the following disclaimer in the
> -.\"documentation and/or other materials provided with the distribution.
> -.\"
> -.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
> -.\" IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
> -.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
> -.\" DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
> -.\" INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
> -.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
> -.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
> -.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
> -.\" STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
> -.\" ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
> -.\" POSSIBILITY OF SUCH DAMAGE.
> -.\"
> -.Dd $Mdocdate: May 31 2007 $
> -.Dt TIMER 4 sparc64
> -.Os
> -.Sh NAME
> -.Nm timer
> -.Nd SPARC64 Timer
> -.Sh SYNOPSIS
> -.Cd "timer* at mainbus0"
> -.Sh DESCRIPTION
> -The
> -.Nm
> -device provides support for the onboard timer on SBus based
> -UltraSPARC machines.
> -The timers are used to control various time services in the
> -kernel and are not user accessible.
> -.Sh SEE ALSO
> -.Xr intro 4
> Index: sys/arch/sparc64/conf/GENERIC
> ===
> RCS file: /cvs/src/sys/arch/sparc64/conf/GENERIC,v
> retrieving revision 1.322
> diff -u -p -r1.322 GENERIC
> --- sys/arch/sparc64/conf/GENERIC 2 Jan 2022 23:14:27 -   1.322
> +++ sys/arch/sparc64/conf/GENERIC 16 Mar 2023 17:18:43 -
> @@ -342,9 +342,6 @@ clkbrd*   at fhc?
>  ## PROM clock -- if all else failse
>  prtc0at mainbus0
>
> -## Timer chip found on (some) sun4u systems.
> -timer*   at mainbus0
> -
>  # Virtual devices

Re: Patch to add -p (reading/parsing /etc/sysctl.conf) option to sysctl

2023-02-21 Thread Mike Fischer

> Am 21.02.2023 um 12:00 schrieb Crystal Kolipe :
> 
> On Tue, Feb 21, 2023 at 11:29:17AM +0100, jhx wrote:
>> this patch adds the "-p" option to sysctl which makes it possible to
>> read/parse/apply settings from /etc/sysctl.conf at any time the user wishes.
> 
> This is an interesting idea, but you can effectively do the same thing
> from the shell with:
> 
> # sysctl `cat /etc/sysctl.conf`
> 
> Also, note that both your patch and the above shell command do not
> gracefully handle comment lines in /etc/sysctl.conf, (although that
> shouldn't cause any problem in practice).

Also there is a difference between the use of /etc/sysctl.conf at boot and when 
using either the proposed -p option or the shell command: At boot we start out 
with the default values. When re-reading /etc/sysctl.conf the current state is 
the base. That is very relevant if you e.g. remove a line from /etc/sysctl.conf 
(to go back to the default).

Mike

Re: proc.c: remove some dead code

2023-02-15 Thread Mike Larkin

On Wed, Feb 15, 2023 at 12:02:46PM +0100, Tobias Heider wrote:
> Many of our networking daemons use proc.c to set up processes and ipc. I 
> couldn't
> find two that are actually exactly the same, but it looks like none of them 
> are ever
> using proc_ispeer, so here is a diff to delete it from all of them.
> 
> Index: sbin/iked/proc.c
> ===
> RCS file: /cvs/src/sbin/iked/proc.c,v
> retrieving revision 1.35
> diff -u -p -r1.35 proc.c
> --- sbin/iked/proc.c  20 Apr 2021 21:11:56 -  1.35
> +++ sbin/iked/proc.c  15 Feb 2023 10:56:52 -
> @@ -39,23 +39,10 @@ enum privsep_procid privsep_process;
>  void  proc_open(struct privsep *, struct privsep_proc *,
>   struct privsep_proc *, size_t);
>  void  proc_close(struct privsep *);
> -int   proc_ispeer(struct privsep_proc *, unsigned int, enum privsep_procid);
>  void  proc_shutdown(struct privsep_proc *);
>  void  proc_sig_handler(int, short, void *);
>  void  proc_range(struct privsep *, enum privsep_procid, int *, int *);
>  int   proc_dispatch_null(int, struct privsep_proc *, struct imsg *);
> -
> -int
> -proc_ispeer(struct privsep_proc *procs, unsigned int nproc,
> -enum privsep_procid type)
> -{
> - unsigned inti;
> -
> - for (i = 0; i < nproc; i++)
> - if (procs[i].p_id == type)
> - return (1);
> - return (0);
> -}
>  
>  void
>  proc_init(struct privsep *ps, struct privsep_proc *procs, unsigned int nproc)
> Index: usr.sbin/httpd/proc.c
> ===
> RCS file: /cvs/src/usr.sbin/httpd/proc.c,v
> retrieving revision 1.41
> diff -u -p -r1.41 proc.c
> --- usr.sbin/httpd/proc.c 4 Dec 2021 06:52:58 -   1.41
> +++ usr.sbin/httpd/proc.c 15 Feb 2023 10:56:52 -
> @@ -43,23 +43,10 @@ void   proc_open(struct privsep *, int, i
>  void  proc_accept(struct privsep *, int, enum privsep_procid,
>   unsigned int);
>  void  proc_close(struct privsep *);
> -int   proc_ispeer(struct privsep_proc *, unsigned int, enum privsep_procid);
>  void  proc_shutdown(struct privsep_proc *);
>  void  proc_sig_handler(int, short, void *);
>  void  proc_range(struct privsep *, enum privsep_procid, int *, int *);
>  int   proc_dispatch_null(int, struct privsep_proc *, struct imsg *);
> -
> -int
> -proc_ispeer(struct privsep_proc *procs, unsigned int nproc,
> -enum privsep_procid type)
> -{
> - unsigned inti;
> -
> - for (i = 0; i < nproc; i++)
> - if (procs[i].p_id == type)
> - return (1);
> - return (0);
> -}
>  
>  enum privsep_procid
>  proc_getid(struct privsep_proc *procs, unsigned int nproc,
> Index: usr.sbin/relayd/proc.c
> ===
> RCS file: /cvs/src/usr.sbin/relayd/proc.c,v
> retrieving revision 1.43
> diff -u -p -r1.43 proc.c
> --- usr.sbin/relayd/proc.c3 Sep 2022 20:07:31 -   1.43
> +++ usr.sbin/relayd/proc.c15 Feb 2023 10:56:52 -
> @@ -43,23 +43,10 @@ void   proc_open(struct privsep *, int, i
>  void  proc_accept(struct privsep *, int, enum privsep_procid,
>   unsigned int);
>  void  proc_close(struct privsep *);
> -int   proc_ispeer(struct privsep_proc *, unsigned int, enum privsep_procid);
>  void  proc_shutdown(struct privsep_proc *);
>  void  proc_sig_handler(int, short, void *);
>  void  proc_range(struct privsep *, enum privsep_procid, int *, int *);
>  int   proc_dispatch_null(int, struct privsep_proc *, struct imsg *);
> -
> -int
> -proc_ispeer(struct privsep_proc *procs, unsigned int nproc,
> -enum privsep_procid type)
> -{
> - unsigned inti;
> -
> - for (i = 0; i < nproc; i++)
> - if (procs[i].p_id == type)
> - return (1);
> - return (0);
> -}
>  
>  enum privsep_procid
>  proc_getid(struct privsep_proc *procs, unsigned int nproc,
> Index: usr.sbin/snmpd/proc.c
> ===
> RCS file: /cvs/src/usr.sbin/snmpd/proc.c,v
> retrieving revision 1.27
> diff -u -p -r1.27 proc.c
> --- usr.sbin/snmpd/proc.c 30 Jun 2020 17:11:49 -  1.27
> +++ usr.sbin/snmpd/proc.c 15 Feb 2023 10:56:52 -
> @@ -43,23 +43,10 @@ void   proc_open(struct privsep *, int, i
>  void  proc_accept(struct privsep *, int, enum privsep_procid,
>   unsigned int);
>  void  proc_close(struct privsep *);
> -int   proc_ispeer(struct privsep_proc *, unsigned int, enum privsep_procid);
>  void  proc_shutdown(struct privsep_proc *);
>  void  proc_sig_handler(int, short, void *);
>  void  proc_range(struct privsep *, enum privsep_procid, int *, int *);
>  int   proc_dispatch_null(int, struct privsep_proc *, struct imsg *);
> -
> -int
> -proc_ispeer(struct privsep_proc *procs, unsigned int nproc,
> -enum privsep_procid type)
> -{
> - unsigned inti;
> -
> - for (i = 0; i < nproc; i++)
> - if

Re: Add ASMedia ASM2142 xhci

2023-02-07 Thread Mike Larkin

On Tue, Feb 07, 2023 at 02:35:37PM +0800, Kevin Lo wrote:
> Hi,
> 
> I have a machine with the ASMedia USB 3.1 controller:
> 
> xhci1 at pci3 dev 0 function 0 vendor "ASMedia", unknown product 0x2142 rev 
> 0x00: msi, xHCI 1.10
> 
> ok?
> 

ok mlarkin

> Index: sys/dev/pci/pcidevs
> ===
> RCS file: /cvs/src/sys/dev/pci/pcidevs,v
> retrieving revision 1.2020
> diff -u -p -u -p -r1.2020 pcidevs
> --- sys/dev/pci/pcidevs   5 Feb 2023 01:57:59 -   1.2020
> +++ sys/dev/pci/pcidevs   7 Feb 2023 06:27:00 -
> @@ -1185,6 +1185,7 @@ product ASMEDIA ASM1182E0x1182  ASM1182e
>  product ASMEDIA ASM1184E 0x1184  ASM1184e
>  product ASMEDIA ASM1042AE0x1242  ASM1042AE xHCI
>  product ASMEDIA ASM1143  0x1343  ASM1143 xHCI
> +product ASMEDIA ASM2142  0x2142  ASM2142 xHCI
>  product ASMEDIA ASM2824  0x2824  ASM2824
>  
>  /* ASPEED Technology products */
>

Re: vmd(8): fix serial console race leading to interrupt storm

2023-01-30 Thread Mike Larkin

On Mon, Jan 30, 2023 at 08:03:52PM +, Mike Larkin wrote:
> On Mon, Jan 30, 2023 at 12:32:22PM -0500, Dave Voutila wrote:
> > vmd's serial console has a race condition and can generate interrupt
> > storms as the ns8250 device constantly asserts and deasserts its irq.
> > 
> > Easiest way to see this is on older, slower hardware OR running nested
> > such as OpenBSD guest inside OpenBSD vmm atop Linux KVM. I don't know
> > enough about how consoles work to understand why, but if you use a shell
> > with history that lets you hit the up-arrow it can trigger this fairly
> > consistently.
> > 
> > When triggered, you will experience the serial console "lock up" and the
> > vmd process for the vm spike > 100% (the cpu thread is constantly
> > exiting due to interrupt and the event thread is constantly firing
> > ioctls). Anywhere from 10s to a few minutes later, it may stop and
> > control in the console resumes.
> > 
> > btrace shows the vm exit rate spike with the exit reason being external
> > interrupt.
> > 
> > AFAICT what happens is the kevent for data available on the com1 fd
> > fires and, if it was !EV_WRITE, we assert/deassert the irq even if we
> > weren't ready to actually receive data off the fd. This keeps kicking
> > the poor vcpu for no reason making it slower to get the ns8250 into a
> > state where it is slow to get back into a LSR_RXRDY state.
> > 
> > OK?
> > 
> 
> This needs to be widely tested. IIRC there were old linux kernels that
> behaved differently and were sensitive to where/when irqs were injected
> for the serial ports, leading to various issues like needing to press a
> key for the console to advance, and/or seeing "too much work" messages
> from the kernel.
> 
> I'd recommend testing on a bunch of platforms.
> 

I'm happy with it, I tested on a few old linux and don't see any
problems.

> -ml
> 
> > 
> > diff refs/heads/master refs/heads/vmd-ns8250
> > commit - ef14a9e8cae106563ff9ce15d913365f9ad3fa0e
> > commit + 5c76914a8c33243ec5ccc82689d0dadaa7cae666
> > blob - dbb6568714c192447a99017fcf92bcc4bcc90ba6
> > blob + 96e1f2533691205bdeb8e8b6dafee92603494c44
> > --- usr.sbin/vmd/ns8250.c
> > +++ usr.sbin/vmd/ns8250.c
> > @@ -153,14 +153,15 @@ com_rcv_event(int fd, short kind, void *arg)
> > return;
> > }
> > 
> > -   if ((com1_dev.regs.lsr & LSR_RXRDY) == 0)
> > +   if ((com1_dev.regs.lsr & LSR_RXRDY) == 0) {
> > com_rcv(_dev, (uintptr_t)arg, 0);
> > 
> > -   /* If pending interrupt, inject */
> > -   if ((com1_dev.regs.iir & IIR_NOPEND) == 0) {
> > -   /* XXX: vcpu_id */
> > -   vcpu_assert_pic_irq((uintptr_t)arg, 0, com1_dev.irq);
> > -   vcpu_deassert_pic_irq((uintptr_t)arg, 0, com1_dev.irq);
> > +   /* If pending interrupt, inject */
> > +   if ((com1_dev.regs.iir & IIR_NOPEND) == 0) {
> > +   /* XXX: vcpu_id */
> > +   vcpu_assert_pic_irq((uintptr_t)arg, 0, com1_dev.irq);
> > +   vcpu_deassert_pic_irq((uintptr_t)arg, 0, com1_dev.irq);
> > +   }
> > }
> > 
> > mutex_unlock(_dev.mutex);

Re: vmd(8): fix serial console race leading to interrupt storm

2023-01-30 Thread Mike Larkin

On Mon, Jan 30, 2023 at 12:32:22PM -0500, Dave Voutila wrote:
> vmd's serial console has a race condition and can generate interrupt
> storms as the ns8250 device constantly asserts and deasserts its irq.
> 
> Easiest way to see this is on older, slower hardware OR running nested
> such as OpenBSD guest inside OpenBSD vmm atop Linux KVM. I don't know
> enough about how consoles work to understand why, but if you use a shell
> with history that lets you hit the up-arrow it can trigger this fairly
> consistently.
> 
> When triggered, you will experience the serial console "lock up" and the
> vmd process for the vm spike > 100% (the cpu thread is constantly
> exiting due to interrupt and the event thread is constantly firing
> ioctls). Anywhere from 10s to a few minutes later, it may stop and
> control in the console resumes.
> 
> btrace shows the vm exit rate spike with the exit reason being external
> interrupt.
> 
> AFAICT what happens is the kevent for data available on the com1 fd
> fires and, if it was !EV_WRITE, we assert/deassert the irq even if we
> weren't ready to actually receive data off the fd. This keeps kicking
> the poor vcpu for no reason making it slower to get the ns8250 into a
> state where it is slow to get back into a LSR_RXRDY state.
> 
> OK?
> 

This needs to be widely tested. IIRC there were old linux kernels that
behaved differently and were sensitive to where/when irqs were injected
for the serial ports, leading to various issues like needing to press a
key for the console to advance, and/or seeing "too much work" messages
from the kernel.

I'd recommend testing on a bunch of platforms.

-ml

> 
> diff refs/heads/master refs/heads/vmd-ns8250
> commit - ef14a9e8cae106563ff9ce15d913365f9ad3fa0e
> commit + 5c76914a8c33243ec5ccc82689d0dadaa7cae666
> blob - dbb6568714c192447a99017fcf92bcc4bcc90ba6
> blob + 96e1f2533691205bdeb8e8b6dafee92603494c44
> --- usr.sbin/vmd/ns8250.c
> +++ usr.sbin/vmd/ns8250.c
> @@ -153,14 +153,15 @@ com_rcv_event(int fd, short kind, void *arg)
>   return;
>   }
> 
> - if ((com1_dev.regs.lsr & LSR_RXRDY) == 0)
> + if ((com1_dev.regs.lsr & LSR_RXRDY) == 0) {
>   com_rcv(_dev, (uintptr_t)arg, 0);
> 
> - /* If pending interrupt, inject */
> - if ((com1_dev.regs.iir & IIR_NOPEND) == 0) {
> - /* XXX: vcpu_id */
> - vcpu_assert_pic_irq((uintptr_t)arg, 0, com1_dev.irq);
> - vcpu_deassert_pic_irq((uintptr_t)arg, 0, com1_dev.irq);
> + /* If pending interrupt, inject */
> + if ((com1_dev.regs.iir & IIR_NOPEND) == 0) {
> + /* XXX: vcpu_id */
> + vcpu_assert_pic_irq((uintptr_t)arg, 0, com1_dev.irq);
> + vcpu_deassert_pic_irq((uintptr_t)arg, 0, com1_dev.irq);
> + }
>   }
> 
>   mutex_unlock(_dev.mutex);

Re: amd64, i386: set lapic timer mode, mask, divisor once

2023-01-27 Thread Mike Larkin

On Fri, Jan 27, 2023 at 08:07:26AM -0600, Scott Cheloha wrote:
> mlarkin@ noted about a month or so ago that setting the lapic timer
> mode, mask, and divisor every time we rearm it is unnecessary.  We
> only need to configure those registers once during
> lapic_timer_trigger().  After that, it is sufficient to set the ICR
> when rearming the timer.
> 
> While here, add the missing intr_disable/intr_restore wrapper to
> lapic_timer_trigger().  Writing multiple registers is not atomic, so
> we need to disable interrupts for safety.  Setting the ICR during
> lapic_timer_rearm() is atomic, so we don't need to disable interrupts
> there.
> 
> ok?
> 

ok mlarkin if you verified the mode/mask/divisor reset properly after 
un-zzz/un-ZZZ (which I think we do but wanted to point it out just in case).

> Index: amd64/amd64/lapic.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/lapic.c,v
> retrieving revision 1.65
> diff -u -p -r1.65 lapic.c
> --- amd64/amd64/lapic.c   10 Nov 2022 08:26:54 -  1.65
> +++ amd64/amd64/lapic.c   27 Jan 2023 13:58:15 -
> @@ -431,13 +431,17 @@ lapic_timer_rearm(void *unused, uint64_t
>   cycles = (nsecs * lapic_timer_nsec_cycle_ratio) >> 32;
>   if (cycles == 0)
>   cycles = 1;
> - lapic_timer_oneshot(0, cycles);
> + lapic_writereg(LAPIC_ICR_TIMER, cycles);
>  }
>  
>  void
>  lapic_timer_trigger(void *unused)
>  {
> + u_long s;
> +
> + s = intr_disable(); 
>   lapic_timer_oneshot(0, 1);
> + intr_restore(s);
>  }
>  
>  /*
> Index: i386/i386/lapic.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/lapic.c,v
> retrieving revision 1.53
> diff -u -p -r1.53 lapic.c
> --- i386/i386/lapic.c 6 Dec 2022 01:56:44 -   1.53
> +++ i386/i386/lapic.c 27 Jan 2023 13:58:15 -
> @@ -268,13 +268,17 @@ lapic_timer_rearm(void *unused, uint64_t
>   cycles = (nsecs * lapic_timer_nsec_cycle_ratio) >> 32;
>   if (cycles == 0)
>   cycles = 1;
> - lapic_timer_oneshot(0, cycles);
> + i82489_writereg(LAPIC_ICR_TIMER, cycles);
>  }
>  
>  void
>  lapic_timer_trigger(void *unused)
>  {
> + u_long s;
> +
> + s = intr_disable();
>   lapic_timer_oneshot(0, 1);
> + intr_restore(s);
>  }
>  
>  /*

Re: don't remove known vmd vm's on failure

2023-01-26 Thread Mike Larkin

On Sun, Jan 15, 2023 at 09:08:29AM -0500, Dave Voutila wrote:
> 
> Dave Voutila  writes:
> 
> > It turns out not only does vmd have numerous error paths for handling
> > when something is amiss with a guest, most of the paths don't check if
> > it's a known vm defined in vm.conf.
> >
> > As a result, vmd often removes the vm from the SLIST of vm's meaning
> > one can't easily attempt to start it again or see it in vmctl's status
> > output.
> >
> > A simple reproduction:
> >
> >   1. define a vm with memory > 4gb in vm.conf
> >   2. run vmd in the foreground (doas vmd -d) so it's not started by rc.d
> >   3. try to start with `vmctl start -c ${vm_name}`, you should trigger
> >  an ENOMEM and get the "Cannot allocate memory" message from vmctl.
> >   4. try to start the same vm again...now you get EPERM!
> >   5. the vm is no longer visible in the output from `vmctl status` :(
> >
> > The problem is most of the error paths call vm_remove, which not only
> > tears down the vm via vm_stop, but also removes it from the vm list and
> > frees it. Only clean stops or restarts seem to perform this check
> > currently.
> >
> > Below diff refactors into checking if the vm is defined in the global
> > config before deciding to call vm_stop or vm_remove.
> 
> Slight tweak... __func__->caller to actually pass the correct name to
> vm_{stop,remove}() from vm_terminate()
> 

Finally getting caught up. ok mlarkin on this if you didn't commit it
already.

-ml

> 
> diff refs/heads/master refs/heads/vmd-accounting
> commit - d4e23fe7544b01187ebf3ac8ae32e955445ee666
> commit + 46503195403bfab50cd34bd8682f35a17d54d03d
> blob - 6bffb2519a31464836aa573dbccb7aa14ea97722
> blob + f30dc14de1ff9d5cf121cbc08b6db183a06d0c07
> --- usr.sbin/vmd/vmd.c
> +++ usr.sbin/vmd/vmd.c
> @@ -67,6 +67,8 @@ struct vmd  *env;
>  int   vm_claimid(const char *, int, uint32_t *);
>  void  start_vm_batch(int, short, void*);
> 
> +static inline void vm_terminate(struct vmd_vm *, const char *);
> +
>  struct vmd   *env;
> 
>  static struct privsep_proc procs[] = {
> @@ -395,14 +397,14 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc
>   errno = vmr.vmr_result;
>   log_warn("%s: failed to forward vm result",
>   vcp->vcp_name);
> - vm_remove(vm, __func__);
> + vm_terminate(vm, __func__);
>   return (-1);
>   }
>   }
> 
>   if (vmr.vmr_result) {
>   log_warnx("%s: failed to start vm", vcp->vcp_name);
> - vm_remove(vm, __func__);
> + vm_terminate(vm, __func__);
>   errno = vmr.vmr_result;
>   break;
>   }
> @@ -410,7 +412,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc
>   /* Now configure all the interfaces */
>   if (vm_priv_ifconfig(ps, vm) == -1) {
>   log_warn("%s: failed to configure vm", vcp->vcp_name);
> - vm_remove(vm, __func__);
> + vm_terminate(vm, __func__);
>   break;
>   }
> 
> @@ -441,10 +443,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc
>   log_info("%s: sent vm %d successfully.",
>   vm->vm_params.vmc_params.vcp_name,
>   vm->vm_vmid);
> - if (vm->vm_from_config)
> - vm_stop(vm, 0, __func__);
> - else
> - vm_remove(vm, __func__);
> + vm_terminate(vm, __func__);
>   }
> 
>   /* Send a response if a control client is waiting for it */
> @@ -470,10 +469,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc
>   }
>   if (vmr.vmr_result != EAGAIN ||
>   vm->vm_params.vmc_bootdevice) {
> - if (vm->vm_from_config)
> - vm_stop(vm, 0, __func__);
> - else
> - vm_remove(vm, __func__);
> + vm_terminate(vm, __func__);
>   } else {
>   /* Stop VM instance but keep the tty open */
>   vm_stop(vm, 1, __func__);
> @@ -509,7 +505,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc
>   imsg->hdr.peerid, -1, , sizeof(vir)) == -1) {
>   log_debug("%s: GET_INFO_VM failed for vm %d, removing",
>   __func__, vm->vm_vmid);
> - vm_remove(vm, __func__);
> + vm_terminate(vm, __func__);
>   return (-1);
>   }
>   break;
> @@ -545,7 +541,7 @@ vmd_dispatch_vmm(int fd, struct privsep_proc *p, struc
>

Re: add thread names to vmd(8)

2023-01-10 Thread Mike Larkin

On Sun, Jan 08, 2023 at 10:45:11AM -0800, Philip Guenther wrote:
> On Sun, Jan 8, 2023 at 8:50 AM Dave Voutila  wrote:
> 
> > Now that guenther@ landed thread names, this should help users
> > understand vmd host cpu utilization better as it lets us distinguish
> > between the libevent thread and the vcpu thread.
> >
> > Example ps output:
> >
> > $ ps -AHf | grep vmd
> > 30584  502984 p2  S+p  0:00.76 | |-- obj/vmd -d
> > 46975  512305 ??  Sc   0:00.01 | | |-- vmd: priv (vmd)
> > 39911  487276 ??  Spc  0:00.76 | | |-- vmd: control (vmd)
> > 58206  418514 ??  Spc  0:00.01 | | |-- vmd: vmm (vmd)
> > 43525  215149 ??  Sp   0:00.00 | | | |-- vmd: openbsd (vmd/event)
> > 43525  316255 ??  Rp/5 0:00.77 | | | |-- vmd: openbsd (vmd/vcpu-0)
> > 43525  270611 ??  Sp   0:00.00 | | | `-- vmd: openbsd (vmd)
> > 43206  562455 ??  Spc  0:00.01 | | |-- vmd: agentx (vmd)
> > 70005  431096 p3  R+p/90:00.00 | |-- grep vmd
> >
> > ok?
> >
>  ...
> 
> > --- usr.sbin/vmd/vm.c
> > +++ usr.sbin/vmd/vm.c
> > ...
> > @@ -1353,6 +1355,10 @@ run_vm(int child_cdrom, int
> > child_disks[][VM_MAX_BASE_
> > __func__, i);
> > return (ret);
> > }
> > +
> > +   memset(tname, 0, sizeof(tname));
> >
> 
> Delete the memset(); the API requires a C string and snprintf() already
> guarantees that.
> 
> Other than that, ok guenther!

ok mlarkin also if not already taken care of

Re: vmm: mask WAITPKG cpuid feature to hide TPAUSE

2023-01-10 Thread Mike Larkin

On Mon, Jan 09, 2023 at 06:51:27PM -0500, Dave Voutila wrote:
> 
> This ok with folks? Had OK's for the original diff but double checking
> before I commit.
> 

This is only half of what you need to do to stop guests from using
unwanted/unsupported instructions. Removing the CPUID feature flag bit
only lets the guest know "don't use this instruction" but if they ignore
that and use it anyway, well... shrug.

If you want to remove support for WAITPKG, we'll need to enable
TPAUSE/UMWAIT exiting, and then inject #UD if we ever exit for that
reason. I think there are a few examples of this in vmm.c already.

Other than that, this diff can go in but we need to do the other part
too.

-ml

> Dave Voutila  writes:
> 
> > Philip Guenther  writes:
> >
> >> On Sat, Jan 7, 2023 at 11:04 AM Dave Voutila  wrote:
> >>
> >>  Bringing this to tech@ to increase my chance of someone testing my
> >>  diff.
> >>
> >>  As reported in this thread on misc@ [1], I believe newer Intel hardware
> >>  may be experiencing issues hosting Linux guests under vmm/vmd. It looks
> >>  like there are some newer instructions Intel added (TPAUSE specifically)
> >>  that also involve some new MSR(s).
> >>
> >>  I don't have 12th gen Intel hardware to test this on (I think that's
> >>  Alder Lake). I'd like to mask this feature from vmm guests since it's
> >>  related to an MSR we don't yet pass through or emulate and has to do
> >>  with the TSC (which has it's own challenges in vmm).
> >>
> >>  For someone testing, you should be able to grab an Alpine Linux iso
> >>  (-virt flavor) and boot it with vmd with the diff. (Without it should
> >>  "hang" and spike CPU or just die.) Also check that WAITPKG shows up in
> >>  your dmesg on the cpu feature output.
> >>
> >> This seem like it'll obviously work, but I guess it seems to me that this 
> >> "opt-out" approach is generally
> >> unsafe/unstable and vmd should consider actively switching to "opt-in" on 
> >> all these CPUID feature bits.  I mean,
> >> what bits are defined in the SEFF first-leaf EDX that _do_ work with vmd?
> >>
> >
> > Great point (I think you mean ECX). Here's an updated diff that flips it
> > to a whitelist so Intel/AMD don't burn me with these new bits in the
> > future. This better?
> >
> >
> > diff refs/heads/master refs/heads/vmm-tsleep
> > commit - bfce157fda90a812e1a99aa179a4c42f12ebfa24
> > commit + 5b434c89250e1901340c11c8f9c380dc18d0ae91
> > blob - 001a437045be145322be30288c1f47d63fb07634
> > blob + 0bd908e273a1c0e6324e1bc9f8c8ca921555c86f
> > --- sys/arch/amd64/amd64/identcpu.c
> > +++ sys/arch/amd64/amd64/identcpu.c
> > @@ -208,6 +208,7 @@ const struct {
> > { SEFF0ECX_AVX512VBMI,  "AVX512VBMI" },
> > { SEFF0ECX_UMIP,"UMIP" },
> > { SEFF0ECX_PKU, "PKU" },
> > +   { SEFF0ECX_WAITPKG, "WAITPKG" },
> >  }, cpu_seff0_edxfeatures[] = {
> > { SEFF0EDX_AVX512_4FNNIW, "AVX512FNNIW" },
> > { SEFF0EDX_AVX512_4FMAPS, "AVX512FMAPS" },
> > blob - cbde6cf9b02fc882a8ed17aa6adb5c43249e0302
> > blob + b26bd32e2d9ea7386b1f58960dea40b787d6a341
> > --- sys/arch/amd64/include/specialreg.h
> > +++ sys/arch/amd64/include/specialreg.h
> > @@ -201,6 +201,7 @@
> >  #define SEFF0ECX_AVX512VBMI0x0002 /* AVX-512 vector bit inst */
> >  #define SEFF0ECX_UMIP  0x0004 /* UMIP support */
> >  #define SEFF0ECX_PKU   0x0008 /* Page prot keys for user 
> > mode */
> > +#define SEFF0ECX_WAITPKG   0x0010 /* UMONITOR/UMWAIT/TPAUSE insns */
> >  /* SEFF EDX bits */
> >  #define SEFF0EDX_AVX512_4FNNIW 0x0004 /* AVX-512 neural network 
> > insns */
> >  #define SEFF0EDX_AVX512_4FMAPS 0x0008 /* AVX-512 mult accum single 
> > prec */
> > blob - 6b4802abf4b508495cdbc961bd799d3fa83b9c36
> > blob + 032444b05e19d7fbec96a0d11b5b340f668c0917
> > --- sys/arch/amd64/include/vmmvar.h
> > +++ sys/arch/amd64/include/vmmvar.h
> > @@ -672,8 +672,10 @@ struct vm_mprotect_ept_params {
> >  SEFF0EBX_AVX512IFMA | SEFF0EBX_AVX512PF | \
> >  SEFF0EBX_AVX512ER | SEFF0EBX_AVX512CD | \
> >  SEFF0EBX_AVX512BW | SEFF0EBX_AVX512VL)
> > -#define VMM_SEFF0ECX_MASK ~(SEFF0ECX_AVX512VBMI)
> >
> > +/* ECX mask contains the bits to include */
> > +#define VMM_SEFF0ECX_MASK (SEFF0ECX_PREFETCHWT1 | SEFF0ECX_UMIP | 
> > SEFF0ECX_PKU)
> > +
> >  /* EDX mask contains the bits to include */
> >  #define VMM_SEFF0EDX_MASK (SEFF0EDX_MD_CLEAR)
> >
> > blob - 310208ac4cdb262aaedfa9b78d869fd5911607b2
> > blob + ccf1164fd658a69dc383e1602ae0ce1f269de4e4
> > --- sys/arch/i386/i386/machdep.c
> > +++ sys/arch/i386/i386/machdep.c
> > @@ -1038,6 +1038,7 @@ const struct cpu_cpuid_feature cpu_seff0_ecxfeatures[]
> > { SEFF0ECX_UMIP,"UMIP" },
> > { SEFF0ECX_AVX512VBMI,  "AVX512VBMI" },
> > { SEFF0ECX_PKU, "PKU" },
> > +   { SEFF0ECX_WAITPKG, "WAITPKG" },
> >  };
> >
> >  const struct cpu_cpuid_feature cpu_seff0_edxfeatures[] = {
> > blob - 392b4ff412e2dd3c4c48ed6c9c84aa2358721c6a
> > blob +

Re: ESRT support for the amd64 bootloader

2023-01-03 Thread Mike Larkin

On Wed, Dec 28, 2022 at 09:13:16PM +0100, Mark Kettenis wrote:
> Dear Sergii,
> 
> Sorry for the delay, but I have finally found the time to work on the
> EFI variable and ESRT support for OpenBSD.  As a first step, here is a
> diff that adds support for copying the ESRT in the bootloader and
> passing it on to the kernel.
> 
> I adjusted your diff a bit.  It now adds the new config_esrt member at
> the end of the bios_efiinfo struct and sets a flag to indicate that
> extra bit of information is present.  That makes it possible to load
> new kernels with the old bootloader and vice versa.
> 
> patrick@, mlarkin@, yasuoka@ and other devs: ok?
> 

I am ok with this if not committed already. Thanks!

-ml

> 
> Index: arch/amd64/include/biosvar.h
> ===
> RCS file: /cvs/src/sys/arch/amd64/include/biosvar.h,v
> retrieving revision 1.29
> diff -u -p -r1.29 biosvar.h
> --- arch/amd64/include/biosvar.h  29 Nov 2022 21:41:39 -  1.29
> +++ arch/amd64/include/biosvar.h  28 Dec 2022 20:03:32 -
> @@ -218,11 +218,13 @@ typedef struct _bios_efiinfo {
>   uint32_tfb_reserved_mask;
>   uint32_tflags;
>  #define BEI_64BIT0x0001  /* 64-bit EFI implementation */
> +#define BEI_ESRT 0x0002  /* ESRT table */
>   uint32_tmmap_desc_ver;
>   uint32_tmmap_desc_size;
>   uint32_tmmap_size;
>   uint64_tmmap_start;
>   uint64_tsystem_table;
> + uint64_tconfig_esrt;
>  } __packed bios_efiinfo_t;
>  
>  #define  BOOTARG_UCODE 12
> Index: arch/amd64/stand/efiboot/conf.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/stand/efiboot/conf.c,v
> retrieving revision 1.39
> diff -u -p -r1.39 conf.c
> --- arch/amd64/stand/efiboot/conf.c   12 Aug 2022 20:18:58 -  1.39
> +++ arch/amd64/stand/efiboot/conf.c   28 Dec 2022 20:03:32 -
> @@ -40,7 +40,7 @@
>  #include "efidev.h"
>  #include "efipxe.h"
>  
> -const char version[] = "3.62";
> +const char version[] = "3.63";
>  
>  #ifdef EFI_DEBUG
>  int  debug = 0;
> Index: arch/amd64/stand/efiboot/efiboot.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/stand/efiboot/efiboot.c,v
> retrieving revision 1.40
> diff -u -p -r1.40 efiboot.c
> --- arch/amd64/stand/efiboot/efiboot.c11 Jul 2022 19:45:02 -  
> 1.40
> +++ arch/amd64/stand/efiboot/efiboot.c28 Dec 2022 20:03:32 -
> @@ -831,6 +831,7 @@ efi_com_putc(dev_t dev, int c)
>   */
>  static EFI_GUID   acpi_guid = ACPI_20_TABLE_GUID;
>  static EFI_GUID   smbios_guid = SMBIOS_TABLE_GUID;
> +static EFI_GUID   esrt_guid = 
> EFI_SYSTEM_RESOURCE_TABLE_GUID;
>  static intgopmode = -1;
>  
>  #define  efi_guidcmp(_a, _b) memcmp((_a), (_b), sizeof(EFI_GUID))
> @@ -870,6 +871,34 @@ efi_makebootargs(void)
>   >ConfigurationTable[i].VendorGuid) == 0)
>   ei->config_smbios = (uintptr_t)
>   ST->ConfigurationTable[i].VendorTable;
> + else if (efi_guidcmp(_guid,
> + >ConfigurationTable[i].VendorGuid) == 0)
> + ei->config_esrt = (uintptr_t)
> + ST->ConfigurationTable[i].VendorTable;
> + }
> +
> + /*
> +  * Need to copy ESRT because call to ExitBootServices() frees memory of
> +  * type EfiBootServicesData in which ESRT resides.
> +  */
> + if (ei->config_esrt != 0) {
> + EFI_SYSTEM_RESOURCE_TABLE *esrt =
> + (EFI_SYSTEM_RESOURCE_TABLE *)ei->config_esrt;
> + size_t esrt_size = sizeof(*esrt) +
> + esrt->FwResourceCount * sizeof(EFI_SYSTEM_RESOURCE_ENTRY);
> + void *esrt_copy;
> +
> + /*
> +  * Using EfiRuntimeServicesData as it maps to BIOS_MAP_RES,
> +  * while EfiLoaderData becomes BIOS_MAP_FREE.
> +  */
> + status = BS->AllocatePool(EfiRuntimeServicesData,
> + esrt_size, _copy);
> + if (status == EFI_SUCCESS) {
> + memcpy(esrt_copy, esrt, esrt_size);
> + ei->config_esrt = (uintptr_t)esrt_copy;
> + ei->flags |= BEI_ESRT;
> + }
>   }
>  
>   /*
> Index: stand/efi/include/efiapi.h
> ===
> RCS file: /cvs/src/sys/stand/efi/include/efiapi.h,v
> retrieving revision 1.3
> diff -u -p -r1.3 efiapi.h
> --- stand/efi/include/efiapi.h7 Dec 2022 23:04:26 -   1.3
> +++ stand/efi/include/efiapi.h28 Dec 2022 20:03:32 -
> @@ -871,6 +871,10 @@ typedef struct {
>{ 0x49152e77, 0x1ada, 0x4764,  \
>  { 0xb7, 0xa2, 0x7a, 0xfe, 0xfe, 0xd9, 0x5e, 0x8b } }
>  
>

Re: ESRT support for the amd64 bootloader

2023-01-03 Thread Mike Larkin

On Mon, Jan 02, 2023 at 05:52:12PM +, Peter Stuge wrote:
> Mark Kettenis wrote:
> > patrick@, mlarkin@, yasuoka@ and other devs: ok?
> 
> Do what you have to do, but I plead to avoid (U)EFI lock-in as far as
> technically possible, since most (U)EFI implementations are not open
> source, and that does exist.
> 
> I understand too well that (U)EFI is the modern popular firmware
> interface and thus the only choice sometimes, just please do not
> indulge here.
> 

This diff does not indulge in anything.

> 
> Thanks and kind regards
> 
> //Peter
>

Re: vmt.c: Change space character to TAB

2022-12-27 Thread Mike Larkin

On Wed, Dec 28, 2022 at 03:36:56PM +0900, Masato Asou wrote:
> ok?
> 
> --
> ASOU Masato

ok mlarkin

> 
> Index: sys/dev/pv/vmt.c
> ===
> RCS file: /cvs/src/sys/dev/pv/vmt.c,v
> retrieving revision 1.28
> diff -u -p -r1.28 vmt.c
> --- sys/dev/pv/vmt.c  26 Dec 2022 04:09:14 -  1.28
> +++ sys/dev/pv/vmt.c  28 Dec 2022 06:29:24 -
> @@ -572,7 +572,7 @@ vmt_kvop(void *arg, int op, char *key, c
>  
>   close:
>   if (vm_rpc_close() != 0)
> -DPRINTF("%s: unable to close rpci channel\n", DEVNAME(sc));
> + DPRINTF("%s: unable to close rpci channel\n", DEVNAME(sc));
>   done:
>   free(buf, M_TEMP, bufsz);
>   return (error);
>

Re: Machine after unhibernate sometimes won't suspend/hibernate again or dim screen

2022-12-26 Thread Mike Larkin

On Sun, Dec 25, 2022 at 11:57:24PM -0600, Abel Abraham Camarillo Ojeda wrote:
> # apmd -d
> battery status: high. external power status: not connected. estimated
> battery life 97% (223 minutes life time estimate)
> can't disable driver messages, error: Inappropriate ioctl for device
> apmevent  index 0
> 
> (press zzz in another xterm)
> system suspending
> battery status: high. external power status: not connected. estimated
> battery life 97% (223 minutes life time estimate)
> do_etc_file(): cannot access file /etc/apm/suspend
> 
> (press ZZZ in another xterm)
> system hibernating
> battery status: high. external power status: not connected. estimated
> battery life 97% (223 minutes life time estimate)
> do_etc_file(): cannot access file /etc/apm/hibernate
> 
> =
> 
> Notice also that battery life gets stuck and never updates again (not even
> notices when I plug/unplug from charger)
> 
> >

acpi thread likely stuck, as kettenis surmised.

Re: Machine after unhibernate sometimes won't suspend/hibernate again or dim screen

2022-12-26 Thread Mike Larkin

On Mon, Dec 26, 2022 at 12:51:05AM -0600, Abel Abraham Camarillo Ojeda wrote:
> On Mon, Dec 26, 2022 at 12:08 AM Mike Larkin  wrote:
> 
> > On Sun, Dec 25, 2022 at 11:39:29PM -0600, Abel Abraham Camarillo Ojeda
> > wrote:
> > > On Sun, Dec 25, 2022 at 9:46 PM Mike Larkin  wrote:
> > >
> > > > On Fri, Dec 23, 2022 at 03:13:53PM -0600, Abel Abraham Camarillo Ojeda
> > > > wrote:
> > > > > On Fri, Dec 23, 2022 at 2:46 PM Abel Abraham Camarillo Ojeda <
> > > > > acam...@verlet.org> wrote:
> > > > >
> > > > > > Forgot to mention I don't think this is a regression, just started
> > to
> > > > use
> > > > > > hibernate/unhibernate more often lately.
> > > > > > But I think I can reproduce this at least since 6.8 (the first
> > that I
> > > > > > installed to this machine)
> > > > > >
> > > > > >>
> > > > > >>
> > > > > >> But still this apply https://www.openbsd.org/report.html (point
> > 2)
> > > > > >>
> > > > > >
> > > > > > By doesn't work I mean:
> > > > > >
> > > > > > $ zzz
> > > > > > Suspending system...
> > > > > > $ (nothing happened)
> > > > > >
> > > > > > > real mem = 17021566976 (16233MB)
> > > > > >> > avail mem = 16488275968 (15724MB)
> > > > > >> > random: good seed from bootblocks
> > > > > >> > mpath0 at root
> > > > > >> > scsibus0 at mpath0: 256 targets
> > > > > >> > mainbus0 at root
> > > > > >> > bios0 at mainbus0: SMBIOS rev. 3.0 @ 0xb9908000 (58 entries)
> > > > > >> > bios0: vendor LENOVO version "R0GET56W (1.56 )" date 08/31/2017
> > > > > >>
> > > > > >> You should try
> > > > > >>
> > > >
> > https://pcsupport.lenovo.com/us/en/products/laptops-and-netbooks/thinkpad-l-series-laptops/thinkpad-l470/downloads/ds120327
> > > > > >> and see if problem is still present (of course good to have backup
> > > > :-))
> > > > > >>
> > > > > >
> > > > > > yes, forgot about that. Will update bios and retry
> > > > > >
> > > > >
> > > > > machine now with bios updated, can reproduce issue after 1
> > unhibernate,
> > > > > dmesg right now at "zzz does nothing stage":
> > > > >
> > > >
> > > > 1. acpi thread might be stuck as kettenis points out. to verify this,
> > > > try a suspend (lowercase zzz) instead of a hibernate (capital ZZZ) when
> > > > it gets stuck. If you can zzz but not ZZZ, then it's not the acpi
> > > > thread.
> > > >
> > > > Both zzz and ZZZ wont work, they only say 'Suspending/Hibernating...'
> > and
> > > nothing happens (don't have the exact message right now)
> > >
> > > any way to confirm the acpi thread is stuck?
> > >
> > > 2. more likely, IMO, is not being able to find a consecutive region in
> > > > free memory to store the hibernate data structures. If memory gets
> > > > fragmented, ZZZ will fail. It should print something to dmesg though,
> > > > so check that. This matches your symptoms of "always works the first
> > > > time but sometimes not on subsequent tries".
> > > >
> > >
> > > notice also screen dimming via F5/F6 won't work (pressing F5 or F6 and
> > > nothing happens)
> >
> > probably something like kettenis suggested then. make sure the bios is
> > updated.
> >
> 
> Bios is at last version bios0: vendor LENOVO version "R0GET79W (1.79 )"
> date 07/28/2022
> (issue was present also with the previous 2019-ish one)
> 
> Any idea what else to try to gather more info? This is pretty reproducible

vmstat -zi after resuming the first time? are you getting tons of acpi0
interrupts?

Re: Machine after unhibernate sometimes won't suspend/hibernate again or dim screen

2022-12-25 Thread Mike Larkin

On Sun, Dec 25, 2022 at 11:39:29PM -0600, Abel Abraham Camarillo Ojeda wrote:
> On Sun, Dec 25, 2022 at 9:46 PM Mike Larkin  wrote:
> 
> > On Fri, Dec 23, 2022 at 03:13:53PM -0600, Abel Abraham Camarillo Ojeda
> > wrote:
> > > On Fri, Dec 23, 2022 at 2:46 PM Abel Abraham Camarillo Ojeda <
> > > acam...@verlet.org> wrote:
> > >
> > > > Forgot to mention I don't think this is a regression, just started to
> > use
> > > > hibernate/unhibernate more often lately.
> > > > But I think I can reproduce this at least since 6.8 (the first that I
> > > > installed to this machine)
> > > >
> > > >>
> > > >>
> > > >> But still this apply https://www.openbsd.org/report.html (point 2)
> > > >>
> > > >
> > > > By doesn't work I mean:
> > > >
> > > > $ zzz
> > > > Suspending system...
> > > > $ (nothing happened)
> > > >
> > > > > real mem = 17021566976 (16233MB)
> > > >> > avail mem = 16488275968 (15724MB)
> > > >> > random: good seed from bootblocks
> > > >> > mpath0 at root
> > > >> > scsibus0 at mpath0: 256 targets
> > > >> > mainbus0 at root
> > > >> > bios0 at mainbus0: SMBIOS rev. 3.0 @ 0xb9908000 (58 entries)
> > > >> > bios0: vendor LENOVO version "R0GET56W (1.56 )" date 08/31/2017
> > > >>
> > > >> You should try
> > > >>
> > https://pcsupport.lenovo.com/us/en/products/laptops-and-netbooks/thinkpad-l-series-laptops/thinkpad-l470/downloads/ds120327
> > > >> and see if problem is still present (of course good to have backup
> > :-))
> > > >>
> > > >
> > > > yes, forgot about that. Will update bios and retry
> > > >
> > >
> > > machine now with bios updated, can reproduce issue after 1 unhibernate,
> > > dmesg right now at "zzz does nothing stage":
> > >
> >
> > 1. acpi thread might be stuck as kettenis points out. to verify this,
> > try a suspend (lowercase zzz) instead of a hibernate (capital ZZZ) when
> > it gets stuck. If you can zzz but not ZZZ, then it's not the acpi
> > thread.
> >
> > Both zzz and ZZZ wont work, they only say 'Suspending/Hibernating...' and
> nothing happens (don't have the exact message right now)
> 
> any way to confirm the acpi thread is stuck?
> 
> 2. more likely, IMO, is not being able to find a consecutive region in
> > free memory to store the hibernate data structures. If memory gets
> > fragmented, ZZZ will fail. It should print something to dmesg though,
> > so check that. This matches your symptoms of "always works the first
> > time but sometimes not on subsequent tries".
> >
> 
> notice also screen dimming via F5/F6 won't work (pressing F5 or F6 and
> nothing happens)

probably something like kettenis suggested then. make sure the bios is
updated.

Re: Machine after unhibernate sometimes won't suspend/hibernate again or dim screen

2022-12-25 Thread Mike Larkin

On Fri, Dec 23, 2022 at 03:13:53PM -0600, Abel Abraham Camarillo Ojeda wrote:
> On Fri, Dec 23, 2022 at 2:46 PM Abel Abraham Camarillo Ojeda <
> acam...@verlet.org> wrote:
> 
> > Forgot to mention I don't think this is a regression, just started to use
> > hibernate/unhibernate more often lately.
> > But I think I can reproduce this at least since 6.8 (the first that I
> > installed to this machine)
> >
> >>
> >>
> >> But still this apply https://www.openbsd.org/report.html (point 2)
> >>
> >
> > By doesn't work I mean:
> >
> > $ zzz
> > Suspending system...
> > $ (nothing happened)
> >
> > > real mem = 17021566976 (16233MB)
> >> > avail mem = 16488275968 (15724MB)
> >> > random: good seed from bootblocks
> >> > mpath0 at root
> >> > scsibus0 at mpath0: 256 targets
> >> > mainbus0 at root
> >> > bios0 at mainbus0: SMBIOS rev. 3.0 @ 0xb9908000 (58 entries)
> >> > bios0: vendor LENOVO version "R0GET56W (1.56 )" date 08/31/2017
> >>
> >> You should try
> >> https://pcsupport.lenovo.com/us/en/products/laptops-and-netbooks/thinkpad-l-series-laptops/thinkpad-l470/downloads/ds120327
> >> and see if problem is still present (of course good to have backup :-))
> >>
> >
> > yes, forgot about that. Will update bios and retry
> >
> 
> machine now with bios updated, can reproduce issue after 1 unhibernate,
> dmesg right now at "zzz does nothing stage":
> 

1. acpi thread might be stuck as kettenis points out. to verify this,
try a suspend (lowercase zzz) instead of a hibernate (capital ZZZ) when
it gets stuck. If you can zzz but not ZZZ, then it's not the acpi
thread.

2. more likely, IMO, is not being able to find a consecutive region in
free memory to store the hibernate data structures. If memory gets
fragmented, ZZZ will fail. It should print something to dmesg though,
so check that. This matches your symptoms of "always works the first
time but sometimes not on subsequent tries".

Re: pvbus: pass M_ZERO properly

2022-12-07 Thread Mike Larkin

On Thu, Dec 08, 2022 at 11:35:33AM +0900, YASUOKA Masahiko wrote:
> This is obvious.  M_ZERO must be for 3rd argument.
> 
> ok?
> 
> Index: sys/dev/pv/pvbus.c
> ===
> RCS file: /cvs/src/sys/dev/pv/pvbus.c,v
> retrieving revision 1.25
> diff -u -p -r1.25 pvbus.c
> --- sys/dev/pv/pvbus.c25 Aug 2022 17:38:16 -  1.25
> +++ sys/dev/pv/pvbus.c8 Dec 2022 02:32:46 -
> @@ -408,7 +408,7 @@ pvbusgetstr(size_t srclen, const char *s
>   else if (srclen > PAGE_SIZE)
>   return (ENAMETOOLONG);
>  
> - *dstp = dst = malloc(srclen + 1, M_TEMP|M_ZERO, M_WAITOK);
> + *dstp = dst = malloc(srclen + 1, M_TEMP, M_WAITOK | M_ZERO);
>   if (src != NULL) {
>   error = copyin(src, dst, srclen);
>   dst[srclen] = '\0';
> 

ok mlarkin. thanks!

Re: riscv64: print SBI vendor/version

2022-12-05 Thread Mike Larkin

On Sat, Dec 03, 2022 at 11:48:16PM +0100, Jeremie Courreges-Anglas wrote:
> 
> With the diff below we get more details about the SBI version running on
> the machine.  My Unmatched machine has OpenBSD version 0.9 but upstream
> has released 1.1 since, it implements v0.2 of the SBI spec but 0.3 and
> 1.0 have been released since.  I suspect this information could be
> useful for new boards.
> 
> OpenBSD 7.2-current (GENERIC.MP) #11: Thu Dec  1 23:39:45 CET 2022
> j...@hifive.wxcvbn.org:/usr/src/sys/arch/riscv64/compile/GENERIC.MP
> real mem  = 17179869184 (16384MB)
> avail mem = 16416411648 (15655MB)
> SBI: OpenSBI v0.9, SBI Specification Version 0.2
> random: good seed from bootblocks
> mainbus0 at root: SiFive HiFive Unmatched A00
> cpu0 at mainbus0: SiFive U7 imp 20181004 rv64imafdc
> intc0 at cpu0
> [...]
> 
> I tweaked the code in sbi.c to put all the information on a single line.
> Printing the information in cpu_startup() seemed the most appropriate
> since this code isn't hooked up in a driver.
> 
> ok?
> 

ok mlarkin if not already done.

> 
> Index: machdep.c
> ===
> RCS file: /cvs/src/sys/arch/riscv64/riscv64/machdep.c,v
> retrieving revision 1.29
> diff -u -p -r1.29 machdep.c
> --- machdep.c 30 Oct 2022 17:43:40 -  1.29
> +++ machdep.c 3 Dec 2022 21:41:55 -
> @@ -273,6 +273,8 @@ cpu_startup(void)
>   printf("avail mem = %lu (%luMB)\n", ptoa(uvmexp.free),
>   ptoa(uvmexp.free) / 1024 / 1024);
>  
> + sbi_print_version();
> +
>   curpcb = _addr->u_pcb;
>   curpcb->pcb_flags = 0;
>   curpcb->pcb_tf = 
> Index: sbi.c
> ===
> RCS file: /cvs/src/sys/arch/riscv64/riscv64/sbi.c,v
> retrieving revision 1.6
> diff -u -p -r1.6 sbi.c
> --- sbi.c 2 Jul 2021 08:44:37 -   1.6
> +++ sbi.c 4 Nov 2022 21:38:54 -
> @@ -76,22 +76,22 @@ sbi_print_version(void)
>  
>   switch (sbi_impl_id) {
>   case (SBI_IMPL_ID_BBL):
> - printf("SBI: Berkely Boot Loader %lu\n", sbi_impl_version);
> + printf("SBI: Berkely Boot Loader %lu", sbi_impl_version);
>   break;
>   case (SBI_IMPL_ID_OPENSBI):
>   major = sbi_impl_version >> OPENSBI_VERSION_MAJOR_OFFSET;
>   minor = sbi_impl_version & OPENSBI_VERSION_MINOR_MASK;
> - printf("SBI: OpenSBI v%u.%u\n", major, minor);
> + printf("SBI: OpenSBI v%u.%u", major, minor);
>   break;
>   default:
> - printf("SBI: Unrecognized Implementation: %lu\n", sbi_impl_id);
> + printf("SBI: Unrecognized Implementation: %lu", sbi_impl_id);
>   break;
>   }
>  
>   major = (sbi_spec_version & SBI_SPEC_VERS_MAJOR_MASK) >>
>   SBI_SPEC_VERS_MAJOR_OFFSET;
>   minor = (sbi_spec_version & SBI_SPEC_VERS_MINOR_MASK);
> - printf("SBI Specification Version: %u.%u\n", major, minor);
> + printf(", SBI Specification Version %u.%u\n", major, minor);
>  }
>  
>  #ifdef MULTIPROCESSOR
> 
> 
> -- 
> jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE
>

Re: riscv64: use evcount_percpu(9) for clock interrupts

2022-12-05 Thread Mike Larkin

On Sat, Dec 03, 2022 at 01:15:09AM +0100, Jeremie Courreges-Anglas wrote:
> 
> ok?
> 
> 

Probably needs cheloha@'s ok but this looks ok to me.

-ml

> Index: clock.c
> ===
> RCS file: /cvs/src/sys/arch/riscv64/riscv64/clock.c,v
> retrieving revision 1.6
> diff -u -p -r1.6 clock.c
> --- clock.c   19 Nov 2022 16:02:37 -  1.6
> +++ clock.c   30 Nov 2022 19:28:49 -
> @@ -100,6 +100,7 @@ cpu_initclocks(void)
>   clock_intr, NULL, NULL);
>  
>   evcount_attach(_count, "clock", NULL);
> + evcount_percpu(_count);
>  
>   cpu_startclock();
>  }
> @@ -136,7 +137,7 @@ clock_intr(void *frame)
>   intr_disable();
>   splx(s);
>  
> - clock_count.ec_count++;
> + evcount_inc(_count);
>  
>   return 0;
>  }
> 
> 
> -- 
> jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE
>

Re: riscv64: drop unused WEAK_REFERENCE macro

2022-12-05 Thread Mike Larkin

On Sat, Dec 03, 2022 at 12:16:43AM +0100, Jeremie Courreges-Anglas wrote:
> 
> WEAK_REFERENCE seems to come from FreeBSD, it's not used in our tree.
> (WEAK_ALIAS is defined a few lines above).
> 
> ok?
> 

ok mlarkin if you didnt get to it already

> 
> Index: sys/arch/riscv64/include/asm.h
> ===
> RCS file: /cvs/src/sys/arch/riscv64/include/asm.h,v
> retrieving revision 1.6
> diff -u -p -r1.6 asm.h
> --- sys/arch/riscv64/include/asm.h2 Dec 2022 12:27:08 -   1.6
> +++ sys/arch/riscv64/include/asm.h2 Dec 2022 23:13:38 -
> @@ -104,10 +104,6 @@
>   .weak alias;\
>   alias = sym
>  
> -#define  WEAK_REFERENCE(sym, alias)  \
> - .weak alias;\
> - .set alias,sym
> -
>  #define  SWAP_FAULT_HANDLER(handler, tmp0, tmp1) \
>   ld  tmp0, CI_CURPCB(tp);/* Load the pcb */  \
>   ld  tmp1, PCB_ONFAULT(tmp0);/* Save old handler */  \
> 
> 
> -- 
> jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE
>

Re: AMD pcidevs updates

2022-12-02 Thread Mike Larkin

On Fri, Dec 02, 2022 at 07:10:43PM +1100, Jonathan Gray wrote:
> On Wed, Nov 30, 2022 at 07:57:33AM +, Laurence Tratt wrote:
> > On Tue, Nov 29, 2022 at 10:42:36PM +, Laurence Tratt wrote:
> > 
> > > The diff below adds some newish AMD elements to pcidevs.
> > 
> > As Mike Larkin kindly pointed out off-list, I sent a diff to the generated
> > file. Sorry!
> 
> With some more of the devices from the dmesg.
> 
> I can't find any documentation from AMD for this.
> 

ok mlarkin


> Index: pcidevs
> ===
> RCS file: /cvs/src/sys/dev/pci/pcidevs,v
> retrieving revision 1.2012
> diff -u -p -r1.2012 pcidevs
> --- pcidevs   2 Dec 2022 07:29:30 -   1.2012
> +++ pcidevs   2 Dec 2022 07:49:20 -
> @@ -780,6 +780,19 @@ product AMD 19_4X_IOMMU  0x14b6  19h/4xh 
>  product AMD 19_4X_HB_1   0x14b7  19h/4xh Host
>  product AMD 19_4X_PCIE_1 0x14b9  19h/4xh PCIE
>  product AMD 19_4X_PCIE_2 0x14ba  19h/4xh PCIE
> +product AMD 19_6X_RC 0x14d8  19h/6xh Root Complex
> +product AMD 19_6X_IOMMU  0x14d9  19h/6xh IOMMU
> +product AMD 19_6X_HB 0x14da  19h/6xh Host
> +product AMD 19_6X_PCIE_1 0x14db  19h/6xh PCIE
> +product AMD 19_6X_PCIE_2 0x14dd  19h/6xh PCIE
> +product AMD 19_6X_DF_1   0x14e0  19h/6xh Data Fabric
> +product AMD 19_6X_DF_2   0x14e1  19h/6xh Data Fabric
> +product AMD 19_6X_DF_3   0x14e2  19h/6xh Data Fabric
> +product AMD 19_6X_DF_4   0x14e3  19h/6xh Data Fabric
> +product AMD 19_6X_DF_5   0x14e4  19h/6xh Data Fabric
> +product AMD 19_6X_DF_6   0x14e5  19h/6xh Data Fabric
> +product AMD 19_6X_DF_7   0x14e6  19h/6xh Data Fabric
> +product AMD 19_6X_DF_8   0x14e7  19h/6xh Data Fabric
>  product AMD 14_HB0x1510  14h Host
>  product AMD 14_PCIE_10x1512  14h PCIE
>  product AMD 14_PCIE_20x1513  14h PCIE
> @@ -814,6 +827,9 @@ product AMD 16_3X_DRAM0x1582  16h DRAM 
>  product AMD 16_3X_MISC   0x1583  16h Misc Cfg
>  product AMD 16_3X_CPU_PM 0x1584  16h CPU Power
>  product AMD 16_3X_MISC_2 0x1585  16h Misc Cfg
> +product AMD 19_6X_XHCI_1 0x15b6  19h/6xh xHCI
> +product AMD 19_6X_XHCI_2 0x15b7  19h/6xh xHCI
> +product AMD 19_6X_XHCI_3 0x15b8  19h/6xh xHCI
>  product AMD 17_1X_RC 0x15d0  17h/1xh Root Complex
>  product AMD 17_1X_IOMMU  0x15d1  17h/1xh IOMMU
>  product AMD 17_1X_PCIE_1 0x15d3  17h/1xh PCIE
> @@ -930,6 +946,10 @@ product AMD 500SERIES_PCIE_1 0x43e9  500 
>  product AMD 500SERIES_PCIE_2 0x43ea  500 Series PCIE
>  product AMD 500SERIES_AHCI   0x43eb  500 Series AHCI
>  product AMD 500SERIES_XHCI   0x43ee  500 Series xHCI
> +product AMD 600SERIES_PCIE_1 0x43f4  600 Series PCIE
> +product AMD 600SERIES_PCIE_2 0x43f5  600 Series PCIE
> +product AMD 600SERIES_AHCI   0x43f6  600 Series AHCI
> +product AMD 600SERIES_XHCI   0x43f7  600 Series xHCI
>  product AMD 500SERIES_PCIE_3 0x57a3  500 Series PCIE
>  product AMD 500SERIES_PCIE_4 0x57a4  500 Series PCIE
>  product AMD 500SERIES_PCIE_5 0x57ad  500 Series PCIE
>

Re: vmd: fix booting 7.2 ramdisks with >= 4G mem

2022-11-28 Thread Mike Larkin

On Mon, Nov 28, 2022 at 11:32:32AM -0500, Dave Voutila wrote:
> tech@ et. al.,
>
> When kettenis@ introduced a newer version of BOOTARG_CONSDEV to add
> additional params for the AMD Ryzen V1000 family, vmd's code that
> configures bootargs to support direct booting a ramdisk kernel didn't
> adjust with it.
>
> Mischa Peters found this and shared a simple reproducer on 7.2 and
> -current:
>
> # vmctl start -c -b /bsd.rd -m 4G test
>
> Where /bsd.rd is a 7.2 or -current ramdisk kernel.
>
> Interestingly, this is only seen when using 4G (or more) memory for the
> guest. I think it's just a happy coincedence it works < 4G because of
> the resulting BOOTARG_MEMMAP sizing things so the BOOTARG_CONSDEV works.
>
> Diff below fixes the issue by simply zero'ing the BOOTARG_CONSDEV
> structure before assigning to members.
>
> While here, I also cleaned up some things like using literal values that
> could be more descriptive boot arg names and also made the arithmetic
> explicitly use the same type (uint32_t) throughout instead of mixing it
> with int.
>
> ok?
>

ok mlarkin

> -dv
>
> diff refs/heads/master refs/heads/vmd-ramdisk
> commit - 8cbcfb178c36f28f6fcb28289719a4f0547eabb4
> commit + 0be12dfaa063ded82837d3a6b2ce8df7ea7e1c2d
> blob - b367721e32b61892955bbf835b873034875c85ec
> blob + d560b8e8eb2cdd87a60c63e8ecb7fed56e5c60dc
> --- usr.sbin/vmd/loadfile_elf.c
> +++ usr.sbin/vmd/loadfile_elf.c
> @@ -382,9 +382,10 @@ create_bios_memmap(struct vm_create_params *vcp, bios_
>   * Parameters:
>   *  memmap: the BIOS memory map
>   *  n: number of entries in memmap
> + *  bootmac: optional PXE boot MAC address
>   *
>   * Return values:
> - *  The size of the bootargs
> + *  The size of the bootargs in bytes
>   */
>  static uint32_t
>  push_bootargs(bios_memmap_t *memmap, size_t n, bios_bootmac_t *bootmac)
> @@ -393,40 +394,41 @@ push_bootargs(bios_memmap_t *memmap, size_t n, bios_bo
>   bios_consdev_t consdev;
>   uint32_t ba[1024];
>
> - memmap_sz = 3 * sizeof(int) + n * sizeof(bios_memmap_t);
> - ba[0] = 0x0;/* memory map */
> + memmap_sz = 3 * sizeof(uint32_t) + n * sizeof(bios_memmap_t);
> + ba[0] = BOOTARG_MEMMAP;
>   ba[1] = memmap_sz;
> - ba[2] = memmap_sz;  /* next */
> + ba[2] = memmap_sz;
>   memcpy([3], memmap, n * sizeof(bios_memmap_t));
> - i = memmap_sz / sizeof(int);
> + i = memmap_sz / sizeof(uint32_t);
>
>   /* Serial console device, COM1 @ 0x3f8 */
> - consdev.consdev = makedev(8, 0);/* com1 @ 0x3f8 */
> + memset(, 0, sizeof(consdev));
> + consdev.consdev = makedev(8, 0);
>   consdev.conspeed = 115200;
>   consdev.consaddr = 0x3f8;
> - consdev.consfreq = 0;
>
> - consdev_sz = 3 * sizeof(int) + sizeof(bios_consdev_t);
> - ba[i] = 0x5;   /* consdev */
> + consdev_sz = 3 * sizeof(uint32_t) + sizeof(bios_consdev_t);
> + ba[i] = BOOTARG_CONSDEV;
>   ba[i + 1] = consdev_sz;
>   ba[i + 2] = consdev_sz;
>   memcpy([i + 3], , sizeof(bios_consdev_t));
> - i += consdev_sz / sizeof(int);
> + i += consdev_sz / sizeof(uint32_t);
>
>   if (bootmac) {
> - bootmac_sz = 3 * sizeof(int) + (sizeof(bios_bootmac_t) + 3) & 
> ~3;
> - ba[i] = 0x7;   /* bootmac */
> + bootmac_sz = 3 * sizeof(uint32_t) +
> + (sizeof(bios_bootmac_t) + 3) & ~3;
> + ba[i] = BOOTARG_BOOTMAC;
>   ba[i + 1] = bootmac_sz;
>   ba[i + 2] = bootmac_sz;
>   memcpy([i + 3], bootmac, sizeof(bios_bootmac_t));
> - i += bootmac_sz / sizeof(int);
> + i += bootmac_sz / sizeof(uint32_t);
>   }
>
>   ba[i++] = 0x; /* BOOTARG_END */
>
>   write_mem(BOOTARGS_PAGE, ba, PAGE_SIZE);
>
> - return (i * sizeof(int));
> + return (i * sizeof(uint32_t));
>  }
>
>  /*

Re: Get rid of UVM_VNODE_CANPERSIST

2022-11-15 Thread Mike Larkin

On Tue, Nov 15, 2022 at 02:31:27PM +0100, Martin Pieuchot wrote:
> UVM vnode objects include a reference count to keep track of the number
> of processes that have the corresponding pages mapped in their VM space.
>
> When the last process referencing a given library or executable dies,
> the reaper will munmap this object on its behalf.  When this happens it
> doesn't free the associated pages to speed-up possible re-use of the
> file.  Instead the pages are placed on the inactive list but stay ready
> to be pmap_enter()'d without requiring I/O as soon as a newly process
> needs to access them.
>
> The mechanism to keep pages populated, known as UVM_VNODE_CANPERSIST,
> doesn't work well with swapping [0].  For some reason when the page daemon
> wants to free pages on the inactive list it tries to flush the pages to
> disk and panic(9) because it needs a valid reference to the vnode to do
> so.
>
> This indicates that the mechanism described above, which seems to work
> fine for RO mappings, is currently buggy in more complex situations.
> Flushing the pages when the last reference of the UVM object is dropped
> also doesn't seem to be enough as bluhm@ reported [1].
>
> The diff below, which has already be committed and reverted, gets rid of
> the UVM_VNODE_CANPERSIST logic.  I'd like to commit it again now that
> the arm64 caching bug has been found and fixed.
>
> Getting rid of this logic means more I/O will be generated and pages
> might have a faster reuse cycle.  I'm aware this might introduce a small
> slowdown, however I believe we should work towards loading files from the
> buffer cache to save I/O cycles instead of having another layer of cache.
> Such work isn't trivial and making sure the vnode <-> UVM relation is
> simple and well understood is the first step in this direction.
>
> I'd appreciate if the diff below could be tested on many architectures,
> include the offending rpi4.
>

arm64 (rpi4): full make build, no issues
arm64 (rpi3): let "make build" run for a few hours then ^C (it would probably
  take days and I didn't feel like waiting)
arm64 (sopine): let "make build" run for a few hours then ^C (same as rpi3)
riscv64 (unmatched):  full make build, no issues
powerpc64 (talos): full make build, no issues
i386 (ESXi VM): full make build, no issues
octeon (rhino): full make build, no issues

Hope this helps.

-ml

> Comments?  Oks?
>
> [0] https://marc.info/?l=openbsd-bugs=164846737707559=2
> [1] https://marc.info/?l=openbsd-bugs=166843373415030=2
>
> Index: uvm/uvm_vnode.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_vnode.c,v
> retrieving revision 1.130
> diff -u -p -r1.130 uvm_vnode.c
> --- uvm/uvm_vnode.c   20 Oct 2022 13:31:52 -  1.130
> +++ uvm/uvm_vnode.c   15 Nov 2022 13:28:28 -
> @@ -161,11 +161,8 @@ uvn_attach(struct vnode *vp, vm_prot_t a
>* add it to the writeable list, and then return.
>*/
>   if (uvn->u_flags & UVM_VNODE_VALID) {   /* already active? */
> + KASSERT(uvn->u_obj.uo_refs > 0);
>
> - /* regain vref if we were persisting */
> - if (uvn->u_obj.uo_refs == 0) {
> - vref(vp);
> - }
>   uvn->u_obj.uo_refs++;   /* bump uvn ref! */
>
>   /* check for new writeable uvn */
> @@ -235,14 +232,14 @@ uvn_attach(struct vnode *vp, vm_prot_t a
>   KASSERT(uvn->u_obj.uo_refs == 0);
>   uvn->u_obj.uo_refs++;
>   oldflags = uvn->u_flags;
> - uvn->u_flags = UVM_VNODE_VALID|UVM_VNODE_CANPERSIST;
> + uvn->u_flags = UVM_VNODE_VALID;
>   uvn->u_nio = 0;
>   uvn->u_size = used_vnode_size;
>
>   /*
>* add a reference to the vnode.   this reference will stay as long
>* as there is a valid mapping of the vnode.   dropped when the
> -  * reference count goes to zero [and we either free or persist].
> +  * reference count goes to zero.
>*/
>   vref(vp);
>
> @@ -323,16 +320,6 @@ uvn_detach(struct uvm_object *uobj)
>*/
>   vp->v_flag &= ~VTEXT;
>
> - /*
> -  * we just dropped the last reference to the uvn.   see if we can
> -  * let it "stick around".
> -  */
> - if (uvn->u_flags & UVM_VNODE_CANPERSIST) {
> - /* won't block */
> - uvn_flush(uobj, 0, 0, PGO_DEACTIVATE|PGO_ALLPAGES);
> - goto out;
> - }
> -
>   /* its a goner! */
>   uvn->u_flags |= UVM_VNODE_DYING;
>
> @@ -382,7 +369,6 @@ uvn_detach(struct uvm_object *uobj)
>   /* wake up any sleepers */
>   if (oldflags & UVM_VNODE_WANTED)
>   wakeup(uvn);
> -out:
>   rw_exit(uobj->vmobjlock);
>
>   /* drop our reference to the vnode. */
> @@ -498,8 +484,8 @@ uvm_vnp_terminate(struct vnode *vp)
>   }
>
>   /*
> -  * done.   now we free the uvn if its reference count is zero
> -  * (true if we are zapping a persisting uvn).   however, if we are
> +  * done.

vmm(4): remove locking in vm_intr_pending

2022-11-08 Thread Mike Larkin

This lock/unlock around an atomic operation was causing delays delivering
interupts into VMs. Pointed out by claudio@ when he ran md5 - in a VM
and it became very sluggish.

Debugging help from dlg and mpi, thanks.

ok?

-ml


Index: arch/amd64/amd64/vmm.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.326
diff -u -p -a -u -r1.326 vmm.c
--- arch/amd64/amd64/vmm.c  7 Nov 2022 12:29:12 -   1.326
+++ arch/amd64/amd64/vmm.c  8 Nov 2022 15:29:10 -
@@ -894,9 +894,7 @@ vm_intr_pending(struct vm_intr_params *v
goto out;
}

-   rw_enter_write(>vc_lock);
vcpu->vc_intr = vip->vip_intr;
-   rw_exit_write(>vc_lock);

refcnt_rele_wake(>vc_refcnt);
 out:
@@ -3526,7 +3524,7 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
vmx_setmsrbrw(vcpu, MSR_FSBASE);
vmx_setmsrbrw(vcpu, MSR_GSBASE);
vmx_setmsrbrw(vcpu, MSR_KERNELGSBASE);
-
+
vmx_setmsrbr(vcpu, MSR_MISC_ENABLE);
vmx_setmsrbr(vcpu, MSR_TSC);

Index: arch/amd64/include/vmmvar.h
===
RCS file: /cvs/src/sys/arch/amd64/include/vmmvar.h,v
retrieving revision 1.81
diff -u -p -a -u -r1.81 vmmvar.h
--- arch/amd64/include/vmmvar.h 1 Sep 2022 22:01:40 -   1.81
+++ arch/amd64/include/vmmvar.h 8 Nov 2022 15:29:10 -
@@ -937,7 +937,7 @@ struct vcpu {
struct cpu_info *vc_last_pcpu;  /* [v] */
struct vm_exit vc_exit; /* [v] */

-   uint16_t vc_intr;   /* [v] */
+   uint16_t vc_intr;   /* [a] */
uint8_t vc_irqready;/* [v] */

uint8_t vc_fpuinited;   /* [v] */

Re: Mark sched_yield(2) as NOLOCK

2022-11-08 Thread Mike Larkin

On Tue, Nov 08, 2022 at 01:14:02PM +, Martin Pieuchot wrote:
> Now that mmap/munmap/mprotect(2) are no longer creating contention it is
> possible to see that sched_yield(2) is one of the syscalls waiting for
> the KERNEL_LOCK() to be released.  However this is no longer necessary.
>
> Traversing `ps_threads' require either the KERNEL_LOCK() or the
> SCHED_LOCK() and we are holding both in this case.  So let's drop the
> requirement for the KERNEL_LOCK().
>
> ok?
>
> Index: kern/syscalls.master
> ===
> RCS file: /cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.235
> diff -u -p -r1.235 syscalls.master
> --- kern/syscalls.master  8 Nov 2022 11:05:57 -   1.235
> +++ kern/syscalls.master  8 Nov 2022 13:09:10 -
> @@ -531,7 +531,7 @@
>  #else
>  297  UNIMPL
>  #endif
> -298  STD { int sys_sched_yield(void); }
> +298  STD NOLOCK  { int sys_sched_yield(void); }
>  299  STD NOLOCK  { pid_t sys_getthrid(void); }
>  300  OBSOL   t32___thrsleep
>  301  STD NOLOCK  { int sys___thrwakeup(const volatile void *ident, \
>

Works here. This doesn't move the needle as much as the mmap unlock diff did
though :)

Re: tc_setclock: don't print warning when tc_windup() rejects RTC time

2022-11-08 Thread Mike Larkin

On Tue, Nov 08, 2022 at 11:59:17AM +, Scott Cheloha wrote:
> On some arm64 machines, the agtimer(4) ticks slowly enough that the
> tc_delta() doesn't overflow across brief suspends.  While working on
> arm64 suspend/resume, kettenis@ has been seeing warnings like this
> during resume:
>
> tc_setclock: cannot rewind uptime to 307.253324249
>
> The warning is misleading and should be removed.  The code is behaving
> as intended, but in a way I didn't anticipate when I added the warning
> a few years ago.
>
> It might be useful print a warning in inittodr(9) during resume if the
> RTC time predates the system UTC suspend timestamp, but that's a
> distinct concern.
>
> ok?
>
> Index: kern_tc.c
> ===
> RCS file: /cvs/src/sys/kern/kern_tc.c,v
> retrieving revision 1.78
> diff -u -p -r1.78 kern_tc.c
> --- kern_tc.c 18 Sep 2022 20:47:09 -  1.78
> +++ kern_tc.c 8 Nov 2022 11:53:01 -
> @@ -552,7 +552,6 @@ void
>  tc_setclock(const struct timespec *ts)
>  {
>   struct bintime new_naptime, old_naptime, uptime, utc;
> - struct timespec tmp;
>   static int first = 1;
>  #ifndef SMALL_KERNEL
>   struct bintime elapsed;
> @@ -582,12 +581,6 @@ tc_setclock(const struct timespec *ts)
>   new_naptime = timehands->th_naptime;
>
>   mtx_leave(_mtx);
> -
> - if (bintimecmp(_naptime, _naptime, ==)) {
> - BINTIME_TO_TIMESPEC(, );
> - printf("%s: cannot rewind uptime to %lld.%09ld\n",
> - __func__, (long long)tmp.tv_sec, tmp.tv_nsec);
> - }
>
>  #ifndef SMALL_KERNEL
>   /* convert the bintime to ticks */
>

ok mlarkin

Re: Please test: unlock mprotect/mmap/munmap

2022-11-06 Thread Mike Larkin

On Sun, Nov 06, 2022 at 11:54:13AM +0100, Martin Pieuchot wrote:
> These 3 syscalls should now be ready to run w/o KERNEL_LOCK().  This
> will reduce contention a lot.  I'd be happy to hear from test reports
> on many architectures and possible workloads.
>
> Do not forget to run "make syscalls" before building the kernel.
>
> Index: syscalls.master
> ===
> RCS file: /cvs/src/sys/kern/syscalls.master,v
> retrieving revision 1.234
> diff -u -p -r1.234 syscalls.master
> --- syscalls.master   25 Oct 2022 16:10:31 -  1.234
> +++ syscalls.master   6 Nov 2022 10:50:45 -
> @@ -126,7 +126,7 @@
>   struct sigaction *osa); }
>  47   STD NOLOCK  { gid_t sys_getgid(void); }
>  48   STD NOLOCK  { int sys_sigprocmask(int how, sigset_t mask); }
> -49   STD { void *sys_mmap(void *addr, size_t len, int prot, \
> +49   STD NOLOCK  { void *sys_mmap(void *addr, size_t len, int prot, \
>   int flags, int fd, off_t pos); }
>  50   STD { int sys_setlogin(const char *namebuf); }
>  #ifdef ACCOUNTING
> @@ -171,8 +171,8 @@
>   const struct kevent *changelist, int nchanges, \
>   struct kevent *eventlist, int nevents, \
>   const struct timespec *timeout); }
> -73   STD { int sys_munmap(void *addr, size_t len); }
> -74   STD { int sys_mprotect(void *addr, size_t len, \
> +73   STD NOLOCK  { int sys_munmap(void *addr, size_t len); }
> +74   STD NOLOCK  { int sys_mprotect(void *addr, size_t len, \
>   int prot); }
>  75   STD { int sys_madvise(void *addr, size_t len, \
>   int behav); }
>

FWIW, this improves build performance by over 12% here locally.

-ml

Re: ThinkPad X13s and OpenBSD

2022-10-07 Thread Mike Larkin

On Fri, Oct 07, 2022 at 11:59:49AM -0700, Bryan Vyhmeister wrote:
> Hey Patrick,
>
> I hope you're doing well. We have not talked in quite a while. I was
> wondering how well the support for the ThinkPad X13s works right now in
> OpenBSD. I did some searching around and couldn't find much other than
> "support being added" but I'm not sure if that means everything works or
> there is just basic booting without most of the hardware working at all.
>
> I am hoping that support for the X55 5G modem might come eventually. I
> bought a ThinkPad X1 Nano with the X55 hoping it might be an easy
> addition to umb(4) but Gerhard said it's totally different and not
> something that can be added to umb(4). I am a little disappointed about
> that but such things happen.
>
> Bryan
>

I'm not patrick@ but here you go:

Stuff I can say for sure works:
 basic stuff like video (simplefb), keyboard, trackpoint/trackpad, usb, nvme, 
smp,
 power on/off/reboot, FDE via softraid

Stuff that doesnt work that you'd probably want on a daily driver laptop:
 battery status, zzz, ZZZ, RTC

Stuff that doesnt work and that will probably take a long time to make work:
 GPU/accelerated video, 5g modem, wifi, camera, sound

Wifi and sound can be handled with usb devices. Maybe camera too. :shrug: on 
the rest.

-ml

Re: vmd: remove the user quota tracking

2022-10-05 Thread Mike Larkin

On Wed, Oct 05, 2022 at 05:03:16PM -0400, Dave Voutila wrote:
> Matthew Martin recently presented a patch on tech@ [1] fixing some missed
> scaling from when I converted vmd(8) to use bytes instead of megabytes
> everywhere. I finally found time to wade through the code it touches and
> am proposing we simply "tedu" the incomplete feature.
>
> Does anyone use this? (And if so, how?)
>
> I don't see much value in this framework and it only adds additional
> state to track. Users can be confined by limits associated in
> login.conf(5) for the most part. There are more interesting things to
> work on, so unless anyone speaks up I'll look for an OK to remove it.
>
> -dv
>
> [1] https://marc.info/?l=openbsd-tech=166346196317673=2
>

I'd wait for someone to speak up and become the owner of this part of vmd and
if nobody does, ok mlarkin to nuke it.

-ml

>
> diff refs/heads/master refs/heads/vmd-user
> commit - bfe2092d87b190d9f89c4a6f2728a539b7f88233
> commit + e84ff2c7628a811e00044a447ad906d6e24beac0
> blob - 374d7de6629e072065b5c0232536c23c1e5bbbe0
> blob + a192223cf118e2a8764b24f965a15acbf8ae506f
> --- usr.sbin/vmd/config.c
> +++ usr.sbin/vmd/config.c
> @@ -98,12 +98,6 @@ config_init(struct vmd *env)
>   return (-1);
>   TAILQ_INIT(env->vmd_switches);
>   }
> - if (what & CONFIG_USERS) {
> - if ((env->vmd_users = calloc(1,
> - sizeof(*env->vmd_users))) == NULL)
> - return (-1);
> - TAILQ_INIT(env->vmd_users);
> - }
>
>   return (0);
>  }
> @@ -238,13 +232,6 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, ui
>   return (EALREADY);
>   }
>
> - /* increase the user reference counter and check user limits */
> - if (vm->vm_user != NULL && user_get(vm->vm_user->usr_id.uid) != NULL) {
> - user_inc(vcp, vm->vm_user, 1);
> - if (user_checklimit(vm->vm_user, vcp) == -1)
> - return (EPERM);
> - }
> -
>   /*
>* Rate-limit the VM so that it cannot restart in a loop:
>* if the VM restarts after less than VM_START_RATE_SEC seconds,
> blob - 2f3ac1a76f2c3e458919eca85c238a668c10422a
> blob + 755cbedb6a18502a87724502ec86e9e426961701
> --- usr.sbin/vmd/vmd.c
> +++ usr.sbin/vmd/vmd.c
> @@ -1188,9 +1188,6 @@ vm_stop(struct vmd_vm *vm, int keeptty, const char *ca
>   vm->vm_state &= ~(VM_STATE_RECEIVED | VM_STATE_RUNNING
>   | VM_STATE_SHUTDOWN);
>
> - user_inc(>vm_params.vmc_params, vm->vm_user, 0);
> - user_put(vm->vm_user);
> -
>   if (vm->vm_iev.ibuf.fd != -1) {
>   event_del(>vm_iev.ev);
>   close(vm->vm_iev.ibuf.fd);
> @@ -1243,7 +1240,6 @@ vm_remove(struct vmd_vm *vm, const char *caller)
>
>   TAILQ_REMOVE(env->vmd_vms, vm, vm_entry);
>
> - user_put(vm->vm_user);
>   vm_stop(vm, 0, caller);
>   free(vm);
>  }
> @@ -1286,7 +1282,6 @@ vm_register(struct privsep *ps, struct vmop_create_par
>   struct vmd_vm   *vm = NULL, *vm_parent = NULL;
>   struct vm_create_params *vcp = >vmc_params;
>   struct vmop_owner   *vmo = NULL;
> - struct vmd_user *usr = NULL;
>   uint32_t nid, rng;
>   unsigned int i, j;
>   struct vmd_switch   *sw;
> @@ -1362,13 +1357,6 @@ vm_register(struct privsep *ps, struct vmop_create_par
>   }
>   }
>
> - /* track active users */
> - if (uid != 0 && env->vmd_users != NULL &&
> - (usr = user_get(uid)) == NULL) {
> - log_warnx("could not add user");
> - goto fail;
> - }
> -
>   if ((vm = calloc(1, sizeof(*vm))) == NULL)
>   goto fail;
>
> @@ -1379,7 +1367,6 @@ vm_register(struct privsep *ps, struct vmop_create_par
>   vm->vm_tty = -1;
>   vm->vm_receive_fd = -1;
>   vm->vm_state &= ~VM_STATE_PAUSED;
> - vm->vm_user = usr;
>
>   for (i = 0; i < VMM_MAX_DISKS_PER_VM; i++)
>   for (j = 0; j < VM_MAX_BASE_PER_DISK; j++)
> @@ -1903,104 +1890,6 @@ struct vmd_user *
>   return (NULL);
>  }
>
> -struct vmd_user *
> -user_get(uid_t uid)
> -{
> - struct vmd_user *usr;
> -
> - if (uid == 0)
> - return (NULL);
> -
> - /* first try to find an existing user */
> - TAILQ_FOREACH(usr, env->vmd_users, usr_entry) {
> - if (usr->usr_id.uid == uid)
> - goto done;
> - }
> -
> - if ((usr = calloc(1, sizeof(*usr))) == NULL) {
> - log_warn("could not allocate user");
> - return (NULL);
> - }
> -
> - usr->usr_id.uid = uid;
> - usr->usr_id.gid = -1;
> - TAILQ_INSERT_TAIL(env->vmd_users, usr, usr_entry);
> -
> - done:
> - DPRINTF("%s: uid %d #%d +",
> - __func__, usr->usr_id.uid, usr->usr_refcnt + 1);
> - usr->usr_refcnt++;
> -
> - return (usr);
> -}
> -
> -void
> -user_put(struct vmd_user *usr)
> -{
> - if (usr == NULL)
> -

Re: [please test] tsc: derive frequency on AMD CPUs from MSRs

2022-09-23 Thread Mike Larkin

On Sat, Sep 24, 2022 at 11:06:24AM +1000, Jonathan Gray wrote:
> On Fri, Sep 23, 2022 at 09:16:25AM -0500, Scott Cheloha wrote:
> > Hi,
> >
> > TL;DR:
> >
> > I want to compute the TSC frequency on AMD CPUs using the methods laid
> > out in the AMD manuals instead of calibrating the TSC by hand.
> >
> > If you have an AMD CPU with an invariant TSC, please apply this patch,
> > recompile/boot the resulting kernel, and send me the resulting dmesg.
> >
> > Family 10h-16h CPUs are especially interesting.  If you've got one,
> > don't be shy!
> >
> > Long explanation:
> >
> > On AMD CPUs we calibrate the TSC with a separate timer.  This is slow
> > and introduces error.  I also worry about a future where legacy timers
> > are absent or heavily gated (read: useless).
> >
> > This patch adds most of the code needed to compute the TSC frequency
> > on AMD family 10h+ CPUs.  CPUs prior to family 10h did not support an
> > invariant TSC so they are irrelevant.
> >
> > I have riddled the code with printf(9) calls so I can work out what's
> > wrong by hand if a test result makes no sense.
> >
> > The only missing piece is code to read the configuration space on
> > family 10h-16h CPUs to determine how many boosted P-states we need to
> > skip to get to the MSR describing the software P0 state.  I would
> > really appreciate it if someone could explain how to do this at this
> > very early point in boot.  jsg@ pointed me to pci_conf_read(9), but
> > I'm a little confused about how I get the needed pci* inputs at this
> > point in boot.
>
> I also said you shouldn't be looking at pci devices for this.

What you want to look at is section 2.1.4 of this:

https://developer.amd.com/wp-content/resources/56255_3_03.PDF

It describes what you need to do. It's for family 17 but I would guess
that there is an equivalent family 10/12/etc doc, and I'd be surprised
if any of this has changed in a long time.

If you can't figure it out, I'd suggest that we don't do this for
family 10/12/etc and use the old method for CPUs that don't have the
MSRs you need. I also sorta share jsg's opinion below, this feels
like a solution for a problem that really doesn't exist.

-ml

>
> I remain unconvinced that all of this is worth it compared to
> calibrating off a timer with a known rate.  And it is the wrong time in
> the release cycle for this.
>
> Boost could be disabled for the measurement if need by.
>
> AMD64 Architecture Programmer's Manual
> Volume 2: System Programming
> Publication No. 24593
> Revision 3.38
>
> "17.2 Core Performance Boost
> ...
> CPB can be disabled using the CPBDis field of the Hardware Configuration
> Register (HWCR MSR) on the appropriate core. When CPB is disabled,
> hardware limits the frequency and voltage of the core to those defined
> by P0.
>
> Support for core performance boost is indicated by
> CPUID Fn8000_0007_EDX[CPB] = 1."
>
> "3.2.10 Hardware Configuration Register (HWCR)
> ...
> CpbDis. Bit 25. Core performance boost disable. When set to 1, core 
> performance boost is disabled.
> "
>
> Processor Programming Reference (PPR)
> for AMD Family 17h Model 01h, Revision B1 Processors
> 54945 Rev 1.14 - April 15, 2017
>
> "MSRC001_0015 [Hardware Configuration] (HWCR)
>
> 25 CpbDis: core performance boost disable. Read-write.
> Reset: 0.  0=CPB is requested to be enabled.  1=CPB is disabled.
> Specifies whether core performance boost is requested to be enabled or
> disabled. If core performance boost is disabled while a core is in a
> boosted P-state, the core automatically transitions to the highest
> performance non-boosted P-state."
>
> also mentioned in
>
> BIOS and Kernel Developer's Guide (BKDG)
> For AMD Family 10h Processors
> 31116 Rev 3.48 - April 22, 2010
>
> >
> > --
> >
> > Test results?  Clues on reading the configuration space?
> >
> > -Scott
> >
> > Index: tsc.c
> > ===
> > RCS file: /cvs/src/sys/arch/amd64/amd64/tsc.c,v
> > retrieving revision 1.29
> > diff -u -p -r1.29 tsc.c
> > --- tsc.c   22 Sep 2022 04:57:08 -  1.29
> > +++ tsc.c   23 Sep 2022 14:04:22 -
> > @@ -100,6 +100,253 @@ tsc_freq_cpuid(struct cpu_info *ci)
> > return (0);
> >  }
> >
> > +uint64_t
> > +tsc_freq_msr(struct cpu_info *ci)
> > +{
> > +   uint64_t base, def, did, did_lsd, did_msd, divisor, fid, multiplier;
> > +   uint32_t msr, off = 0;
> > +
> > +   if (strcmp(cpu_vendor, "AuthenticAMD") != 0)
> > +   return 0;
> > +
> > +   /*
> > +* All family 10h+ CPUs have MSR_HWCR and the TscFreqSel bit.
> > +* If TscFreqSel is not set the TSC does not advance at the P0
> > +* frequency, in which case something is wrong and we need to
> > +* calibrate by hand.
> > +*/
> > +#define HWCR_TSCFREQSEL (1 << 24)
> > +   if (!ISSET(rdmsr(MSR_HWCR), HWCR_TSCFREQSEL))   /* XXX specialreg.h */
> > +   return 0;
> > +#undef HWCR_TSCFREQSEL
> > +
> > +   /*
> > +* For families 10h, 12h, 14h, 15h, and 16h, we need to skip

Re: Help on helping with virtualization?

2022-09-22 Thread Mike Larkin

On Wed, Sep 21, 2022 at 08:09:05AM -0400, Christoff Humphries wrote:
> Hello.
>
> I want to help with the virtualization project to get the things that are
> incomplete or missing completed (ie, the "not available at this time" list
> on https://www.openbsd.org/faq/faq16.html).

Great, always nice to have help.

>
> Is there a point of contact I should direct questions to, as I'll likely
> have a lot of questions once start diving in to load it into my brain and
> understand the code well enough to do something well and useful.

tech@ is best, or mlarkin@/dv@.

>
> Or should I just ask whatever questions I have here?
>
> Asking as I'm not sure how this project handles that stuff (open list or
> pushed more to mentor/owner people for subject areas).
>
> Thanks!
> Christoff Humphries
>

Generally, find something you want to fix/improve, work on it, send a diff.
That's the best way. Might want to chat with dv or I before you bite on
a task as we may already be working on it (for example, dv is working on
instruction decoding and emulation which is one of the first steps toward
smp).

-ml

Re: Towards unlocking mmap(2) & munmap(2)

2022-09-14 Thread Mike Larkin

On Sun, Sep 11, 2022 at 12:26:31PM +0200, Martin Pieuchot wrote:
> Diff below adds a minimalist set of assertions to ensure proper locks
> are held in uvm_mapanon() and uvm_unmap_remove() which are the guts of
> mmap(2) for anons and munmap(2).
>
> Please test it with WITNESS enabled and report back.
>

Do you want this tested in conjunction with the aiodoned diff or by itself?

> Index: uvm/uvm_addr.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_addr.c,v
> retrieving revision 1.31
> diff -u -p -r1.31 uvm_addr.c
> --- uvm/uvm_addr.c21 Feb 2022 10:26:20 -  1.31
> +++ uvm/uvm_addr.c11 Sep 2022 09:08:10 -
> @@ -416,6 +416,8 @@ uvm_addr_invoke(struct vm_map *map, stru
>   !(hint >= uaddr->uaddr_minaddr && hint < uaddr->uaddr_maxaddr))
>   return ENOMEM;
>
> + vm_map_assert_anylock(map);
> +
>   error = (*uaddr->uaddr_functions->uaddr_select)(map, uaddr,
>   entry_out, addr_out, sz, align, offset, prot, hint);
>
> Index: uvm/uvm_fault.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_fault.c,v
> retrieving revision 1.132
> diff -u -p -r1.132 uvm_fault.c
> --- uvm/uvm_fault.c   31 Aug 2022 01:27:04 -  1.132
> +++ uvm/uvm_fault.c   11 Sep 2022 08:57:35 -
> @@ -1626,6 +1626,7 @@ uvm_fault_unwire_locked(vm_map_t map, va
>   struct vm_page *pg;
>
>   KASSERT((map->flags & VM_MAP_INTRSAFE) == 0);
> + vm_map_assert_anylock(map);
>
>   /*
>* we assume that the area we are unwiring has actually been wired
> Index: uvm/uvm_map.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> retrieving revision 1.294
> diff -u -p -r1.294 uvm_map.c
> --- uvm/uvm_map.c 15 Aug 2022 15:53:45 -  1.294
> +++ uvm/uvm_map.c 11 Sep 2022 09:37:44 -
> @@ -162,6 +162,8 @@ int
> uvm_map_inentry_recheck(u_long, v
>struct p_inentry *);
>  boolean_t uvm_map_inentry_fix(struct proc *, struct p_inentry *,
>vaddr_t, int (*)(vm_map_entry_t), u_long);
> +boolean_t uvm_map_is_stack_remappable(struct vm_map *,
> +  vaddr_t, vsize_t);
>  /*
>   * Tree management functions.
>   */
> @@ -491,6 +493,8 @@ uvmspace_dused(struct vm_map *map, vaddr
>   vaddr_t stack_begin, stack_end; /* Position of stack. */
>
>   KASSERT(map->flags & VM_MAP_ISVMSPACE);
> + vm_map_assert_anylock(map);
> +
>   vm = (struct vmspace *)map;
>   stack_begin = MIN((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
>   stack_end = MAX((vaddr_t)vm->vm_maxsaddr, (vaddr_t)vm->vm_minsaddr);
> @@ -570,6 +574,8 @@ uvm_map_isavail(struct vm_map *map, stru
>   if (addr + sz < addr)
>   return 0;
>
> + vm_map_assert_anylock(map);
> +
>   /*
>* Kernel memory above uvm_maxkaddr is considered unavailable.
>*/
> @@ -1446,6 +1452,8 @@ uvm_map_mkentry(struct vm_map *map, stru
>   entry->guard = 0;
>   entry->fspace = 0;
>
> + vm_map_assert_wrlock(map);
> +
>   /* Reset free space in first. */
>   free = uvm_map_uaddr_e(map, first);
>   uvm_mapent_free_remove(map, free, first);
> @@ -1573,6 +1581,8 @@ boolean_t
>  uvm_map_lookup_entry(struct vm_map *map, vaddr_t address,
>  struct vm_map_entry **entry)
>  {
> + vm_map_assert_anylock(map);
> +
>   *entry = uvm_map_entrybyaddr(>addr, address);
>   return *entry != NULL && !UVM_ET_ISHOLE(*entry) &&
>   (*entry)->start <= address && (*entry)->end > address;
> @@ -1692,6 +1702,8 @@ uvm_map_is_stack_remappable(struct vm_ma
>   vaddr_t end = addr + sz;
>   struct vm_map_entry *first, *iter, *prev = NULL;
>
> + vm_map_assert_anylock(map);
> +
>   if (!uvm_map_lookup_entry(map, addr, )) {
>   printf("map stack 0x%lx-0x%lx of map %p failed: no mapping\n",
>   addr, end, map);
> @@ -1843,6 +1855,8 @@ uvm_mapent_mkfree(struct vm_map *map, st
>   vaddr_t  addr;  /* Start of freed range. */
>   vaddr_t  end;   /* End of freed range. */
>
> + UVM_MAP_REQ_WRITE(map);
> +
>   prev = *prev_ptr;
>   if (prev == entry)
>   *prev_ptr = prev = NULL;
> @@ -1971,10 +1985,7 @@ uvm_unmap_remove(struct vm_map *map, vad
>   if (start >= end)
>   return;
>
> - if ((map->flags & VM_MAP_INTRSAFE) == 0)
> - splassert(IPL_NONE);
> - else
> - splassert(IPL_VM);
> + vm_map_assert_wrlock(map);
>
>   /* Find first affected entry. */
>   entry = uvm_map_entrybyaddr(>addr, start);
> @@ -4027,6 +4038,8 @@ uvm_map_checkprot(struct vm_map *map, va
>  {
>   struct vm_map_entry *entry;
>
> + vm_map_assert_anylock(map);
> +
>   if (start < map->min_offset || end > map->max_offset || start > end)
>

Re: [please test] pvclock(4): fix several bugs

2022-09-08 Thread Mike Larkin

On Thu, Sep 08, 2022 at 08:32:27AM -0500, Scott Cheloha wrote:
> On Tue, Sep 06, 2022 at 03:30:44AM -0700, Mike Larkin wrote:
> > On Sun, Sep 04, 2022 at 02:50:10PM +1000, Jonathan Gray wrote:
> > > On Sat, Sep 03, 2022 at 05:33:01PM -0500, Scott Cheloha wrote:
> > > > On Sat, Sep 03, 2022 at 10:37:31PM +1000, Jonathan Gray wrote:
> > > > > On Sat, Sep 03, 2022 at 06:52:20AM -0500, Scott Cheloha wrote:
> > > > > > > On Sep 3, 2022, at 02:22, Jonathan Gray  wrote:
> > > > > > >
> > > > > > > ???On Fri, Sep 02, 2022 at 06:00:25PM -0500, Scott Cheloha wrote:
> > > > > > >> dv@ suggested coming to the list to request testing for the 
> > > > > > >> pvclock(4)
> > > > > > >> driver.  Attached is a patch that corrects several bugs.  Most of
> > > > > > >> these changes will only matter in the non-TSC_STABLE case on a
> > > > > > >> multiprocessor VM.
> > > > > > >>
> > > > > > >> Ideally, nothing should break.
> > > > > > >>
> > > > > > >> - pvclock yields a 64-bit value.  The BSD timecounter layer can 
> > > > > > >> only
> > > > > > >>  use the lower 32 bits, but internally we need to track the full
> > > > > > >>  64-bit value to allow comparisons with the full value in the
> > > > > > >>  non-TSC_STABLE case.  So make pvclock_lastcount a 64-bit 
> > > > > > >> quantity.
> > > > > > >>
> > > > > > >> - In pvclock_get_timecount(), move rdtsc() up into the lockless 
> > > > > > >> read
> > > > > > >>  loop to get a more accurate timestamp.
> > > > > > >>
> > > > > > >> - In pvclock_get_timecount(), use rdtsc_lfence(), not rdtsc().
> > > > > > >>
> > > > > > >> - In pvclock_get_timecount(), check that our TSC value doesn't 
> > > > > > >> predate
> > > > > > >>  ti->ti_tsc_timestamp, otherwise we will produce an enormous 
> > > > > > >> value.
> > > > > > >>
> > > > > > >> - In pvclock_get_timecount(), update pvclock_lastcount in the
> > > > > > >>  non-TSC_STABLE case with more care.  On amd64 we can do this 
> > > > > > >> with an
> > > > > > >>  atomic_cas_ulong(9) loop because u_long is 64 bits.  On i386 we 
> > > > > > >> need
> > > > > > >>  to introduce a mutex to protect our comparison and read/write.
> > > > > > >
> > > > > > > i386 has cmpxchg8b, no need to disable interrupts
> > > > > > > the ifdefs seem excessive
> > > > > >
> > > > > > How do I make use of CMPXCHG8B on i386
> > > > > > in this context?
> > > > > >
> > > > > > atomic_cas_ulong(9) is a 32-bit CAS on
> > > > > > i386.
> > > > >
> > > > > static inline uint64_t
> > > > > atomic_cas_64(volatile uint64_t *p, uint64_t o, uint64_t n)
> > > > > {
> > > > >   return __sync_val_compare_and_swap(p, o, n);
> > > > > }
> > > > >
> > > > > Or md atomic.h files could have an equivalent.
> > > > > Not possible on all 32-bit archs.
> > > > >
> > > > > >
> > > > > > We can't use FP registers in the kernel, no?
> > > > >
> > > > > What do FP registers have to do with it?
> > > > >
> > > > > >
> > > > > > Am I missing some other avenue?
> > > > >
> > > > > There is no rdtsc_lfence() on i386.  Initial diff doesn't build.
> > > >
> > > > LFENCE is an SSE2 extension.  As is MFENCE.  I don't think I can just
> > > > drop rdtsc_lfence() into cpufunc.h and proceed without causing some
> > > > kind of fault on an older CPU.
> > > >
> > > > What are my options on a 586-class CPU for forcing RDTSC to complete
> > > > before later instructions?
> > >
> > > "3.3.2. Serializing Operations
> > > After executing certain instructions the Pentium processor serializes
> > > instruction execution. This means t

Re: [please test] pvclock(4): fix several bugs

2022-09-06 Thread Mike Larkin

On Sun, Sep 04, 2022 at 02:50:10PM +1000, Jonathan Gray wrote:
> On Sat, Sep 03, 2022 at 05:33:01PM -0500, Scott Cheloha wrote:
> > On Sat, Sep 03, 2022 at 10:37:31PM +1000, Jonathan Gray wrote:
> > > On Sat, Sep 03, 2022 at 06:52:20AM -0500, Scott Cheloha wrote:
> > > > > On Sep 3, 2022, at 02:22, Jonathan Gray  wrote:
> > > > >
> > > > > ???On Fri, Sep 02, 2022 at 06:00:25PM -0500, Scott Cheloha wrote:
> > > > >> dv@ suggested coming to the list to request testing for the 
> > > > >> pvclock(4)
> > > > >> driver.  Attached is a patch that corrects several bugs.  Most of
> > > > >> these changes will only matter in the non-TSC_STABLE case on a
> > > > >> multiprocessor VM.
> > > > >>
> > > > >> Ideally, nothing should break.
> > > > >>
> > > > >> - pvclock yields a 64-bit value.  The BSD timecounter layer can only
> > > > >>  use the lower 32 bits, but internally we need to track the full
> > > > >>  64-bit value to allow comparisons with the full value in the
> > > > >>  non-TSC_STABLE case.  So make pvclock_lastcount a 64-bit quantity.
> > > > >>
> > > > >> - In pvclock_get_timecount(), move rdtsc() up into the lockless read
> > > > >>  loop to get a more accurate timestamp.
> > > > >>
> > > > >> - In pvclock_get_timecount(), use rdtsc_lfence(), not rdtsc().
> > > > >>
> > > > >> - In pvclock_get_timecount(), check that our TSC value doesn't 
> > > > >> predate
> > > > >>  ti->ti_tsc_timestamp, otherwise we will produce an enormous value.
> > > > >>
> > > > >> - In pvclock_get_timecount(), update pvclock_lastcount in the
> > > > >>  non-TSC_STABLE case with more care.  On amd64 we can do this with an
> > > > >>  atomic_cas_ulong(9) loop because u_long is 64 bits.  On i386 we need
> > > > >>  to introduce a mutex to protect our comparison and read/write.
> > > > >
> > > > > i386 has cmpxchg8b, no need to disable interrupts
> > > > > the ifdefs seem excessive
> > > >
> > > > How do I make use of CMPXCHG8B on i386
> > > > in this context?
> > > >
> > > > atomic_cas_ulong(9) is a 32-bit CAS on
> > > > i386.
> > >
> > > static inline uint64_t
> > > atomic_cas_64(volatile uint64_t *p, uint64_t o, uint64_t n)
> > > {
> > >   return __sync_val_compare_and_swap(p, o, n);
> > > }
> > >
> > > Or md atomic.h files could have an equivalent.
> > > Not possible on all 32-bit archs.
> > >
> > > >
> > > > We can't use FP registers in the kernel, no?
> > >
> > > What do FP registers have to do with it?
> > >
> > > >
> > > > Am I missing some other avenue?
> > >
> > > There is no rdtsc_lfence() on i386.  Initial diff doesn't build.
> >
> > LFENCE is an SSE2 extension.  As is MFENCE.  I don't think I can just
> > drop rdtsc_lfence() into cpufunc.h and proceed without causing some
> > kind of fault on an older CPU.
> >
> > What are my options on a 586-class CPU for forcing RDTSC to complete
> > before later instructions?
>
> "3.3.2. Serializing Operations
> After executing certain instructions the Pentium processor serializes
> instruction execution. This means that any modifications to flags,
> registers, and memory for previous instructions are completed before
> the next instruction is fetched and executed. The prefetch queue
> is flushed as a result of serializing operations.
>
> The Pentium processor serializes instruction execution after executing
> one of the following instructions: Move to Special Register (except
> CRO), INVD, INVLPG, IRET, IRETD, LGDT, LLDT, LIDT, LTR, WBINVD,
> CPUID, RSM and WRMSR."
>
> from:
> Pentium Processor User's Manual
> Volume 1: Pentium Processor Data Book
> Order Number 241428
>
> http://bitsavers.org/components/intel/pentium/1993_Intel_Pentium_Processor_Users_Manual_Volume_1.pdf
>
> So it could be rdtsc ; cpuid.
> lfence; rdtsc should still be preferred.
>
> It could be tested during boot and set a function pointer.
> Or the codepatch bits could be used.
>
> In the specific case of pvclock, can it be assumed that the host
> has hardware virt and would then have lfence?
>

I think this is a fair assumption.

-ml

Re: [please test] pvclock(4): fix several bugs

2022-09-03 Thread Mike Larkin

On Fri, Sep 02, 2022 at 06:00:25PM -0500, Scott Cheloha wrote:
> dv@ suggested coming to the list to request testing for the pvclock(4)
> driver.  Attached is a patch that corrects several bugs.  Most of
> these changes will only matter in the non-TSC_STABLE case on a
> multiprocessor VM.
>
> Ideally, nothing should break.
>
> - pvclock yields a 64-bit value.  The BSD timecounter layer can only
>   use the lower 32 bits, but internally we need to track the full
>   64-bit value to allow comparisons with the full value in the
>   non-TSC_STABLE case.  So make pvclock_lastcount a 64-bit quantity.
>
> - In pvclock_get_timecount(), move rdtsc() up into the lockless read
>   loop to get a more accurate timestamp.
>
> - In pvclock_get_timecount(), use rdtsc_lfence(), not rdtsc().
>
> - In pvclock_get_timecount(), check that our TSC value doesn't predate
>   ti->ti_tsc_timestamp, otherwise we will produce an enormous value.
>
> - In pvclock_get_timecount(), update pvclock_lastcount in the
>   non-TSC_STABLE case with more care.  On amd64 we can do this with an
>   atomic_cas_ulong(9) loop because u_long is 64 bits.  On i386 we need
>   to introduce a mutex to protect our comparison and read/write.
>

I tested on an 8 core ESXi VM, nothing broke. But it doesn't even
have pvclock as a timesource, so I'm not sure the test is meaningful or
useful.

-ml

> Index: pvclock.c
> ===
> RCS file: /cvs/src/sys/dev/pv/pvclock.c,v
> retrieving revision 1.8
> diff -u -p -r1.8 pvclock.c
> --- pvclock.c 5 Nov 2021 11:38:29 -   1.8
> +++ pvclock.c 2 Sep 2022 22:54:08 -
> @@ -27,6 +27,10 @@
>  #include 
>  #include 
>  #include 
> +#include 
> +#if defined(__i386__)
> +#include 
> +#endif
>
>  #include 
>  #include 
> @@ -35,7 +39,12 @@
>  #include 
>  #include 
>
> -uint pvclock_lastcount;
> +#if defined(__amd64__)
> +volatile u_long pvclock_lastcount;
> +#elif defined(__i386__)
> +struct mutex pvclock_mtx = MUTEX_INITIALIZER(IPL_HIGH);
> +uint64_t pvclock_lastcount;
> +#endif
>
>  struct pvclock_softc {
>   struct devicesc_dev;
> @@ -212,7 +221,7 @@ pvclock_get_timecount(struct timecounter
>  {
>   struct pvclock_softc*sc = tc->tc_priv;
>   struct pvclock_time_info*ti;
> - uint64_t tsc_timestamp, system_time, delta, ctr;
> + uint64_t system_time, delta, ctr, tsc;
>   uint32_t version, mul_frac;
>   int8_t   shift;
>   uint8_t  flags;
> @@ -220,8 +229,12 @@ pvclock_get_timecount(struct timecounter
>   ti = sc->sc_time;
>   do {
>   version = pvclock_read_begin(ti);
> + tsc = rdtsc_lfence();
> + if (ti->ti_tsc_timestamp < tsc)
> + delta = tsc - ti->ti_tsc_timestamp;
> + else
> + delta = 0;
>   system_time = ti->ti_system_time;
> - tsc_timestamp = ti->ti_tsc_timestamp;
>   mul_frac = ti->ti_tsc_to_system_mul;
>   shift = ti->ti_tsc_shift;
>   flags = ti->ti_flags;
> @@ -231,7 +244,6 @@ pvclock_get_timecount(struct timecounter
>* The algorithm is described in
>* linux/Documentation/virtual/kvm/msr.txt
>*/
> - delta = rdtsc() - tsc_timestamp;
>   if (shift < 0)
>   delta >>= -shift;
>   else
> @@ -241,10 +253,20 @@ pvclock_get_timecount(struct timecounter
>   if ((flags & PVCLOCK_FLAG_TSC_STABLE) != 0)
>   return (ctr);
>
> - if (ctr < pvclock_lastcount)
> - return (pvclock_lastcount);
> -
> - atomic_swap_uint(_lastcount, ctr);
> -
> +#if defined(__amd64__)
> + u_long last;
> + do {
> + last = pvclock_lastcount;
> + if (ctr < last)
> + return last;
> + } while (atomic_cas_ulong(_lastcount, last, ctr) != last);
> +#elif defined(__i386__)
> + mtx_enter(_mtx);
> + if (pvclock_lastcount < ctr)
> + pvclock_lastcount = ctr;
> + else
> + ctr = pvclock_lastcount;
> + mtx_leave(_mtx);
> +#endif
>   return (ctr);
>  }

Re: vmd(8): compute i8254 Read-Back latch from singular timestamp

2022-09-02 Thread Mike Larkin

On Fri, Sep 02, 2022 at 11:42:03AM -0500, Scott Cheloha wrote:
> The 8254 data sheet [1] says this about the Read-Back command:
>
> > The read-back command may be used to latch multi-
> > ple counter output latches (OL) by setting the
> > COUNT bit D5 = 0 and selecting the desired coun-
> > ter(s).  This single command is functionally equiva-
> > lent to several counter latch commands, one for
> > each counter latched. [...]
>
> This is a little ambiguous.  But my hunch is that the intent here is
> "you can latch multiple counters all at once".  Simultaneously.
> Otherwise the utility of the read-back command is suspect.
>
> To simulate a simultaneous latch, we should only call clock_gettime(2)
> once and use that singular timestamp to compute olatch for each
> counter.
>
> ok?
>
> [1] 8254 Programmable Interval Timer, p. 8
> https://www.scs.stanford.edu/10wi-cs140/pintos/specs/8254.pdf

Didn't see dv's reply earlier; I agree with what he said.

>
> Index: i8253.c
> ===
> RCS file: /cvs/src/usr.sbin/vmd/i8253.c,v
> retrieving revision 1.34
> diff -u -p -r1.34 i8253.c
> --- i8253.c   16 Jun 2021 16:55:02 -  1.34
> +++ i8253.c   2 Sep 2022 16:25:02 -
> @@ -128,6 +128,8 @@ i8253_do_readback(uint32_t data)
>   int readback_channel[3] = { TIMER_RB_C0, TIMER_RB_C1, TIMER_RB_C2 };
>   int i;
>
> + clock_gettime(CLOCK_MONOTONIC, );
> +
>   /* bits are inverted here - !TIMER_RB_STATUS == enable chan readback */
>   if (data & ~TIMER_RB_STATUS) {
>   i8253_channel[0].rbs = (data & TIMER_RB_C0) ? 1 : 0;
> @@ -139,7 +141,6 @@ i8253_do_readback(uint32_t data)
>   if (data & ~TIMER_RB_COUNT) {
>   for (i = 0; i < 3; i++) {
>   if (data & readback_channel[i]) {
> - clock_gettime(CLOCK_MONOTONIC, );
>   timespecsub(, _channel[i].ts, );
>   ns = delta.tv_sec * 10 + delta.tv_nsec;
>   ticks = ns / NS_PER_TICK;

Re: vmd(8): compute i8254 Read-Back latch from singular timestamp

2022-09-02 Thread Mike Larkin

On Fri, Sep 02, 2022 at 11:42:03AM -0500, Scott Cheloha wrote:
> The 8254 data sheet [1] says this about the Read-Back command:
>
> > The read-back command may be used to latch multi-
> > ple counter output latches (OL) by setting the
> > COUNT bit D5 = 0 and selecting the desired coun-
> > ter(s).  This single command is functionally equiva-
> > lent to several counter latch commands, one for
> > each counter latched. [...]
>
> This is a little ambiguous.  But my hunch is that the intent here is
> "you can latch multiple counters all at once".  Simultaneously.
> Otherwise the utility of the read-back command is suspect.
>
> To simulate a simultaneous latch, we should only call clock_gettime(2)
> once and use that singular timestamp to compute olatch for each
> counter.
>
> ok?
>
> [1] 8254 Programmable Interval Timer, p. 8
> https://www.scs.stanford.edu/10wi-cs140/pintos/specs/8254.pdf
>

Reads ok to me. ok mlarkin

> Index: i8253.c
> ===
> RCS file: /cvs/src/usr.sbin/vmd/i8253.c,v
> retrieving revision 1.34
> diff -u -p -r1.34 i8253.c
> --- i8253.c   16 Jun 2021 16:55:02 -  1.34
> +++ i8253.c   2 Sep 2022 16:25:02 -
> @@ -128,6 +128,8 @@ i8253_do_readback(uint32_t data)
>   int readback_channel[3] = { TIMER_RB_C0, TIMER_RB_C1, TIMER_RB_C2 };
>   int i;
>
> + clock_gettime(CLOCK_MONOTONIC, );
> +
>   /* bits are inverted here - !TIMER_RB_STATUS == enable chan readback */
>   if (data & ~TIMER_RB_STATUS) {
>   i8253_channel[0].rbs = (data & TIMER_RB_C0) ? 1 : 0;
> @@ -139,7 +141,6 @@ i8253_do_readback(uint32_t data)
>   if (data & ~TIMER_RB_COUNT) {
>   for (i = 0; i < 3; i++) {
>   if (data & readback_channel[i]) {
> - clock_gettime(CLOCK_MONOTONIC, );
>   timespecsub(, _channel[i].ts, );
>   ns = delta.tv_sec * 10 + delta.tv_nsec;
>   ticks = ns / NS_PER_TICK;

Re: i386/lapic.c: sync with amd64/lapic.c

2022-08-30 Thread Mike Larkin

On Sun, Aug 28, 2022 at 03:26:49PM -0500, Scott Cheloha wrote:
> As promised off-list: in anticipation of merging the clock interrupt
> code, let's sync up the lapic timer parts of i386/lapic.c with the
> corresponding parts in amd64/lapic.c.  They will need identical
> changes to use the new code, so the more alike they are the better.
>
> Notable differences remaining in the timer code:
>
> - We use i82489_readreg() and i82489_writereg() on i386 instead of
>   lapic_readreg() and lapic_writereg().
>
> - lapic_clockintr() is just plain different on i386, I'm not
>   touching it yet.
>
> - No way to skip_calibration on i386.
>
> We can do synchronized cleanup in a later patch.
>
> Does this compile and boot on i386?  If so, ok?

Yes and yes (at least in an ESXi VM).

If others test on real hardware and it works, ok mlarkin; diff reads ok
to me.

-ml

>
> Index: i386/i386/lapic.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/lapic.c,v
> retrieving revision 1.50
> diff -u -p -r1.50 lapic.c
> --- i386/i386/lapic.c 25 Aug 2022 17:38:16 -  1.50
> +++ i386/i386/lapic.c 28 Aug 2022 20:24:55 -
> @@ -244,11 +244,41 @@ u_int32_t lapic_tval;
>  /*
>   * this gets us up to a 4GHz busclock
>   */
> -u_int32_t lapic_per_second;
> +u_int32_t lapic_per_second = 0;
>  u_int32_t lapic_frac_usec_per_cycle;
>  u_int64_t lapic_frac_cycle_per_usec;
>  u_int32_t lapic_delaytab[26];
>
> +void lapic_timer_oneshot(uint32_t, uint32_t);
> +void lapic_timer_periodic(uint32_t, uint32_t);
> +
> +/*
> + * Start the local apic countdown timer.
> + *
> + * First set the mode, mask, and vector.  Then set the
> + * divisor.  Last, set the cycle count: this restarts
> + * the countdown.
> + */
> +static inline void
> +lapic_timer_start(uint32_t mode, uint32_t mask, uint32_t cycles)
> +{
> + i82489_writereg(LAPIC_LVTT, mode | mask | LAPIC_TIMER_VECTOR);
> + i82489_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
> + i82489_writereg(LAPIC_ICR_TIMER, cycles);
> +}
> +
> +void
> +lapic_timer_oneshot(uint32_t mask, uint32_t cycles)
> +{
> + lapic_timer_start(LAPIC_LVTT_TM_ONESHOT, mask, cycles);
> +}
> +
> +void
> +lapic_timer_periodic(uint32_t mask, uint32_t cycles)
> +{
> + lapic_timer_start(LAPIC_LVTT_TM_PERIODIC, mask, cycles);
> +}
> +
>  void
>  lapic_clockintr(void *arg)
>  {
> @@ -262,17 +292,7 @@ lapic_clockintr(void *arg)
>  void
>  lapic_startclock(void)
>  {
> - /*
> -  * Start local apic countdown timer running, in repeated mode.
> -  *
> -  * Mask the clock interrupt and set mode,
> -  * then set divisor,
> -  * then unmask and set the vector.
> -  */
> - i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_TM|LAPIC_LVTT_M);
> - i82489_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
> - i82489_writereg(LAPIC_ICR_TIMER, lapic_tval);
> - i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_TM|LAPIC_TIMER_VECTOR);
> + lapic_timer_periodic(0, lapic_tval);
>  }
>
>  void
> @@ -284,6 +304,7 @@ lapic_initclocks(void)
>  }
>
>  extern int gettick(void);/* XXX put in header file */
> +extern u_long rtclock_tval; /* XXX put in header file */
>
>  static __inline void
>  wait_next_cycle(void)
> @@ -325,38 +346,45 @@ lapic_calibrate_timer(struct cpu_info *c
>* Configure timer to one-shot, interrupt masked,
>* large positive number.
>*/
> - i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_M);
> - i82489_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1);
> - i82489_writereg(LAPIC_ICR_TIMER, 0x8000);
> + lapic_timer_oneshot(LAPIC_LVTT_M, 0x8000);
>
> - s = intr_disable();
> + if (delay_func == i8254_delay) {
> + s = intr_disable();
>
> - /* wait for current cycle to finish */
> - wait_next_cycle();
> + /* wait for current cycle to finish */
> + wait_next_cycle();
>
> - startapic = lapic_gettick();
> + startapic = lapic_gettick();
>
> - /* wait the next hz cycles */
> - for (i = 0; i < hz; i++)
> - wait_next_cycle();
> + /* wait the next hz cycles */
> + for (i = 0; i < hz; i++)
> + wait_next_cycle();
>
> - endapic = lapic_gettick();
> + endapic = lapic_gettick();
>
> - intr_restore(s);
> + intr_restore(s);
>
> - dtick = hz * TIMER_DIV(hz);
> - dapic = startapic-endapic;
> + dtick = hz * rtclock_tval;
> + dapic = startapic-endapic;
>
> - /*
> -  * there are TIMER_FREQ ticks per second.
> -  * in dtick ticks, there are dapic bus clocks.
> -  */
> - tmp = (TIMER_FREQ * dapic) / dtick;
> + /*
> +  * there are TIMER_FREQ ticks per second.
> +  * in dtick ticks, there are dapic bus clocks.
> +  */
> + tmp = (TIMER_FREQ * dapic) / dtick;
>
> - lapic_per_second = tmp;
> + lapic_per_second = tmp;
> + } else {
> +

Re: stub out initial mmio support for vmm(4)/vmd(8)

2022-08-30 Thread Mike Larkin

(see below)

On Mon, Aug 15, 2022 at 03:06:05PM -0400, Dave Voutila wrote:
>
> Mike Larkin  writes:
>
> > On Fri, Jul 15, 2022 at 12:27:04PM -0400, Dave Voutila wrote:
> >> The following diff adds in formalization around mmio assists for nested
> >> page/ept faults on Intel and AMD vmm(4) hosts. It provides what little
> >> information is available to userland in terms of either the instruction
> >> bytes (on AMD) or the instruction length (on Intel).
> >>
> >> vmd is updated to intercept these vm exit events, but will currently log
> >> the issue and cause the vm process to exit. This is the same behavior
> >> folks experience currently when a guest attempts to read/write guest
> >> physical addresses in the mmio-reserved ranges, but now with an
> >> explanation of the reason and current {e,r,}ip value.
> >>
> >> This is the foundation I'll build upon while implementing instruction
> >> decode and emulation support in userland. No noticeable change should
> >> occur for existing guests that don't trigger mmio assist events.
> >>
> >> ok?
> >>
> >
> > See below.
> >
> > -ml
> >
> >> -dv
> >>
> >>
> >> diff refs/heads/master refs/heads/mmio
> >> commit - 10e026163f31687dba11fb4655500afb4e616258
> >> commit + 7d92e26b51c3fd520807dbcd5233f14b76bc611e
> >> blob - 84da19438b74377276b16b4b4f7db45ae9ec6be2
> >> blob + a89a4dc0fbe7b31a47390de363109092b76ffa22
> >> --- sys/arch/amd64/amd64/vmm.c
> >> +++ sys/arch/amd64/amd64/vmm.c
> >> @@ -4891,11 +4891,20 @@ vcpu_run_vmx(struct vcpu *vcpu, struct 
> >> vm_run_params *
> >>vcpu->vc_gueststate.vg_rax =
> >>vcpu->vc_exit.vei.vei_data;
> >>break;
> >> +  case VMX_EXIT_EPT_VIOLATION:
> >> +  ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_GPRS, 0,
> >> +  >vc_exit.vrs);
> >> +  if (ret) {
> >> +  printf("%s: vm %d vcpu %d failed to update "
> >> +  "registers\n", __func__,
> >> +  vcpu->vc_parent->vm_id, vcpu->vc_id);
> >> +  return (EINVAL);
> >> +  }
> >> +  break;
> >>case VM_EXIT_NONE:
> >>case VMX_EXIT_HLT:
> >>case VMX_EXIT_INT_WINDOW:
> >>case VMX_EXIT_EXTINT:
> >> -  case VMX_EXIT_EPT_VIOLATION:
> >>case VMX_EXIT_CPUID:
> >>case VMX_EXIT_XSETBV:
> >>break;
> >> @@ -4927,6 +4936,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params 
> >> *
> >>break;
> >>  #endif /* VMM_DEBUG */
> >>}
> >> +  memset(>vc_exit, 0, sizeof(vcpu->vc_exit));
> >>}
> >>
> >>setregion(, ci->ci_gdt, GDT_SIZE - 1);
> >> @@ -5658,7 +5668,7 @@ vmm_get_guest_memtype(struct vm *vm, paddr_t gpa)
> >>
> >>if (gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END) {
> >>DPRINTF("guest mmio access @ 0x%llx\n", (uint64_t)gpa);
> >> -  return (VMM_MEM_TYPE_REGULAR);
> >> +  return (VMM_MEM_TYPE_MMIO);
> >>}
> >>
> >>/* XXX Use binary search? */
> >> @@ -5782,18 +5792,28 @@ int
> >>  svm_handle_np_fault(struct vcpu *vcpu)
> >>  {
> >>uint64_t gpa;
> >> -  int gpa_memtype, ret;
> >> +  int gpa_memtype, ret = 0;
> >>struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
> >> +  struct vm_exit_eptviolation *vee = >vc_exit.vee;
> >>
> >> -  ret = 0;
> >> +  memset(vee, 0, sizeof(*vee));
> >> +  vee->vee_fault_type = VEE_FAULT_INVALID;
> >>
> >>gpa = vmcb->v_exitinfo2;
> >>
> >>gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa);
> >>switch (gpa_memtype) {
> >>case VMM_MEM_TYPE_REGULAR:
> >> +  vee->vee_fault_type = VEE_FAULT_HANDLED;
> >>ret = svm_fault_page(vcpu, gpa);
> >>break;
> >> +  case VMM_MEM_TYPE_MMIO:
> >> +  vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST;
> >> +  vee->vee_insn_len = vmcb->v_n_bytes_fetched

Re: uvmpd_dropswap()

2022-08-29 Thread Mike Larkin

On Mon, Aug 29, 2022 at 01:58:38PM +0200, Martin Pieuchot wrote:
> Small refactoring to introduce uvmpd_dropswap().  This will make an
> upcoming rewrite of the pdaemon smaller & easier to review :o)
>
> ok?
>

reads ok to me. ok mlarkin

> Index: uvm/uvm_pdaemon.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_pdaemon.c,v
> retrieving revision 1.102
> diff -u -p -r1.102 uvm_pdaemon.c
> --- uvm/uvm_pdaemon.c 22 Aug 2022 12:03:32 -  1.102
> +++ uvm/uvm_pdaemon.c 29 Aug 2022 11:55:52 -
> @@ -105,6 +105,7 @@ void  uvmpd_scan(struct uvm_pmalloc *);
>  void uvmpd_scan_inactive(struct uvm_pmalloc *, struct pglist *);
>  void uvmpd_tune(void);
>  void uvmpd_drop(struct pglist *);
> +void uvmpd_dropswap(struct vm_page *);
>
>  /*
>   * uvm_wait: wait (sleep) for the page daemon to free some pages
> @@ -367,6 +368,23 @@ uvm_aiodone_daemon(void *arg)
>  }
>
>
> +/*
> + * uvmpd_dropswap: free any swap allocated to this page.
> + *
> + * => called with owner locked.
> + */
> +void
> +uvmpd_dropswap(struct vm_page *pg)
> +{
> + struct vm_anon *anon = pg->uanon;
> +
> + if ((pg->pg_flags & PQ_ANON) && anon->an_swslot) {
> + uvm_swap_free(anon->an_swslot, 1);
> + anon->an_swslot = 0;
> + } else if (pg->pg_flags & PQ_AOBJ) {
> + uao_dropswap(pg->uobject, pg->offset >> PAGE_SHIFT);
> + }
> +}
>
>  /*
>   * uvmpd_scan_inactive: scan an inactive list for pages to clean or free.
> @@ -566,16 +584,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
>   KASSERT(uvmexp.swpginuse <= uvmexp.swpages);
>   if ((p->pg_flags & PQ_SWAPBACKED) &&
>   uvmexp.swpginuse == uvmexp.swpages) {
> -
> - if ((p->pg_flags & PQ_ANON) &&
> - p->uanon->an_swslot) {
> - uvm_swap_free(p->uanon->an_swslot, 1);
> - p->uanon->an_swslot = 0;
> - }
> - if (p->pg_flags & PQ_AOBJ) {
> - uao_dropswap(p->uobject,
> -  p->offset >> PAGE_SHIFT);
> - }
> + uvmpd_dropswap(p);
>   }
>
>   /*
> @@ -599,16 +608,7 @@ uvmpd_scan_inactive(struct uvm_pmalloc *
>*/
>   if (swap_backed) {
>   /* free old swap slot (if any) */
> - if (anon) {
> - if (anon->an_swslot) {
> - uvm_swap_free(anon->an_swslot,
> - 1);
> - anon->an_swslot = 0;
> - }
> - } else {
> - uao_dropswap(uobj,
> -  p->offset >> PAGE_SHIFT);
> - }
> + uvmpd_dropswap(p);
>
>   /* start new cluster (if necessary) */
>   if (swslot == 0) {
>

Re: use libkern bzero on i386

2022-08-21 Thread Mike Larkin

On Mon, Aug 22, 2022 at 12:31:45AM +1000, Jonathan Gray wrote:
> libkern bzero doesn't have the 486 path but is otherwise the same
>

ok mlarkin (but didnt test)

> diff --git sys/arch/i386/i386/locore.s sys/arch/i386/i386/locore.s
> index dba6ce75b81..3055a06812c 100644
> --- sys/arch/i386/i386/locore.s
> +++ sys/arch/i386/i386/locore.s
> @@ -1518,73 +1518,6 @@ _C_LABEL(doreti_iret):
>  #include 
>  #include 
>
> -/*
> - * bzero (void *b, size_t len)
> - *   write len zero bytes to the string b.
> - */
> -
> -ENTRY(bzero)
> - pushl   %edi
> - movl8(%esp),%edi
> - movl12(%esp),%edx
> -
> - xorl%eax,%eax   /* set fill data to 0 */
> -
> - /*
> -  * if the string is too short, it's really not worth the overhead
> -  * of aligning to word boundaries, etc.  So we jump to a plain
> -  * unaligned set.
> -  */
> - cmpl$16,%edx
> - jb  7f
> -
> - movl%edi,%ecx   /* compute misalignment */
> - negl%ecx
> - andl$3,%ecx
> - subl%ecx,%edx
> - rep /* zero until word aligned */
> - stosb
> -
> - cmpl$CPUCLASS_486,_C_LABEL(cpu_class)
> - jne 8f
> -
> - movl%edx,%ecx
> - shrl$6,%ecx
> - jz  8f
> - andl$63,%edx
> -1:   movl%eax,(%edi)
> - movl%eax,4(%edi)
> - movl%eax,8(%edi)
> - movl%eax,12(%edi)
> - movl%eax,16(%edi)
> - movl%eax,20(%edi)
> - movl%eax,24(%edi)
> - movl%eax,28(%edi)
> - movl%eax,32(%edi)
> - movl%eax,36(%edi)
> - movl%eax,40(%edi)
> - movl%eax,44(%edi)
> - movl%eax,48(%edi)
> - movl%eax,52(%edi)
> - movl%eax,56(%edi)
> - movl%eax,60(%edi)
> - addl$64,%edi
> - decl%ecx
> - jnz 1b
> -
> -8:   movl%edx,%ecx   /* zero by words */
> - shrl$2,%ecx
> - andl$3,%edx
> - rep
> - stosl
> -
> -7:   movl%edx,%ecx   /* zero remainder bytes */
> - rep
> - stosb
> -
> - popl%edi
> - ret
> -
>  #if !defined(SMALL_KERNEL)
>  ENTRY(sse2_pagezero)
>   pushl   %ebx
> diff --git sys/lib/libkern/arch/i386/bzero.S sys/lib/libkern/arch/i386/bzero.S
> index 82b64b4d663..684ff54d20b 100644
> --- sys/lib/libkern/arch/i386/bzero.S
> +++ sys/lib/libkern/arch/i386/bzero.S
> @@ -7,7 +7,6 @@
>
>  #include 
>
> -#ifndef _KERNEL
>  ENTRY(bzero)
>   pushl   %edi
>   movl8(%esp),%edi
> @@ -42,4 +41,3 @@ L1: movl%edx,%ecx   /* zero remainder by 
> bytes */
>
>   popl%edi
>   ret
> -#endif
>

Re: regress: vmd: disable on i386

2022-08-21 Thread Mike Larkin

On Sat, Aug 20, 2022 at 09:03:59AM +, Klemens Nanni wrote:
> vmd/Makefile filters for amd64 itself but still, no need to enter
> on !amd64.
>
> Index: ../Makefile
> ===
> RCS file: /cvs/src/regress/usr.sbin/Makefile,v
> retrieving revision 1.26
> diff -u -p -r1.26 Makefile
> --- ../Makefile   11 Nov 2021 10:03:54 -  1.26
> +++ ../Makefile   20 Aug 2022 09:01:56 -
> @@ -16,7 +16,7 @@ SUBDIR += rpki-client
>  SUBDIR += snmpd
>  SUBDIR += syslogd
>
> -.if ${MACHINE} == "amd64" || ${MACHINE} == "i386"
> +.if ${MACHINE} == "amd64"
>  SUBDIR += vmd
>  .endif
>
>

looks like you may have already handled it but ok mlarkin in any case

Re: remove support for Cyrix 486DLC & Cyrix 6x86

2022-08-19 Thread Mike Larkin

On Fri, Aug 19, 2022 at 10:31:30PM -0400, Daniel Dickman wrote:
> The below diff removes detection code for the Cyrix 486DLC and Cyrix 6x86
> CPUs from OpenBSD/i386.
>
> The Cyrix 486DLC is a 486-class CPU which we no longer support.
>
> The 6x86, also known as the M1, does not support CPUID by default. But it
> can be made to support this instruction if bit 7 in CCR4 is enabled. We
> don't do this in the tree today.
>
> The reason to remove support for the 6x86 is because it doesn't support
> the RDTSC instruction which we we use unconditionally. Therefore I don't
> believe Cyrix CPUs older than the 6x86MX (aka the M2) can run
> OpenBSD/i386.
>
> We keep the "cyrix6x86_cpu_setup" function in machdep because those quirks
> would still be needed on the M2.
>
> After this diff, the CPU detection code on i386 would assume that if the
> ID bit is missing from EFLAGS, we're running on an intel 486, while if we
> have the ID bit then we use the CPUID instruction for the detection logic.
>
> We also make the same change to amd64/include/specialreg.h to remove the
> defines for the Cyrix 486DLC. No Cyrix CPU supports amd64, so these
> defines have never made sense there.
>

ok mlarkin

>
> Index: i386/i386/locore0.S
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/locore0.S,v
> retrieving revision 1.7
> diff -u -p -u -r1.7 locore0.S
> --- i386/i386/locore0.S   15 Aug 2022 04:17:50 -  1.7
> +++ i386/i386/locore0.S   20 Aug 2022 02:07:30 -
> @@ -133,56 +133,6 @@ start:   movw$0x1234,0x472   # warm 
> boot
>   jnz .Ltry586
>  .Lis486: movl$CPU_486,RELOC(_C_LABEL(cpu))
>
> - /*
> -  * Check Cyrix CPU
> -  * Cyrix CPUs do not change the undefined flags following
> -  * execution of the divide instruction which divides 5 by 2.
> -  *
> -  * Note: CPUID is enabled on M2, so it passes another way.
> -  */
> - pushfl
> - movl$0x, %eax
> - xorl%edx, %edx
> - movl$2, %ecx
> - clc
> - divl%ecx
> - jnc .Ltrycyrix486
> - popfl
> - jmp 2f
> -.Ltrycyrix486:
> - movl$CPU_6x86,RELOC(_C_LABEL(cpu))  # set CPU type
> - /*
> -  * Check for Cyrix 486 CPU by seeing if the flags change during a
> -  * divide.  This is documented in the Cx486SLC/e SMM Programmer's
> -  * Guide.
> -  */
> - xorl%edx,%edx
> - cmpl%edx,%edx   # set flags to known state
> - pushfl
> - popl%ecx# store flags in ecx
> - movl$-1,%eax
> - movl$4,%ebx
> - divl%ebx# do a long division
> - pushfl
> - popl%eax
> - xorl%ecx,%eax   # are the flags different?
> - testl   $0x8d5,%eax # only check C|PF|AF|Z|N|V
> - jne 2f  # yes; must not be Cyrix CPU
> - movl$CPU_486DLC,RELOC(_C_LABEL(cpu))# set CPU type
> -
> - /* Disable caching of the ISA hole only. */
> - invd
> - movb$CCR0,%al   # Configuration Register index (CCR0)
> - outb%al,$0x22
> - inb $0x23,%al
> - orb $(CCR0_NC1|CCR0_BARB),%al
> - movb%al,%ah
> - movb$CCR0,%al
> - outb%al,$0x22
> - movb%ah,%al
> - outb%al,$0x23
> - invd
> -
>   jmp 2f
>
>  .Ltry586:/* Use the `cpuid' instruction. */
> Index: i386/i386/machdep.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/machdep.c,v
> retrieving revision 1.653
> diff -u -p -u -r1.653 machdep.c
> --- i386/i386/machdep.c   18 Aug 2022 13:05:43 -  1.653
> +++ i386/i386/machdep.c   20 Aug 2022 02:07:30 -
> @@ -498,14 +498,8 @@ char cpu_model[120];
>   * We deal with the rest in a different way.
>   */
>  const struct cpu_nocpuid_nameclass i386_nocpuid_cpus[] = {
> - { CPUVENDOR_INTEL, "Intel", "486SX",CPUCLASS_486,
> - NULL},  /* CPU_486SX */
>   { CPUVENDOR_INTEL, "Intel", "486DX",CPUCLASS_486,
>   NULL},  /* CPU_486   */
> - { CPUVENDOR_CYRIX, "Cyrix", "486DLC",   CPUCLASS_486,
> - NULL},  /* CPU_486DLC */
> - { CPUVENDOR_CYRIX, "Cyrix", "6x86", CPUCLASS_486,
> - cyrix6x86_cpu_setup},   /* CPU_6x86 */
>  };
>
>  const char *classnames[] = {
> @@ -2075,9 +2069,6 @@ identifycpu(struct cpu_info *ci)
>   cpu_class = class;
>
>   ci->cpu_class = class;
> -
> - if (cpu == CPU_486DLC)
> - printf("WARNING: CYRIX 486DLC CACHE UNCHANGED.\n");
>
>   /*
>* Enable ring 0 write protection.
> Index: i386/include/cputypes.h
> ===
> RCS file: /cvs/src/sys/arch/i386/include/cputypes.h,v
> retrieving revision 1.13
> diff -u

Re: Fix a race in uvm_pseg_release()

2022-08-18 Thread Mike Larkin

On Thu, Aug 18, 2022 at 12:39:58PM +0200, Martin Pieuchot wrote:
> The lock must be grabbed before iterating on the global array, ok?
>
> Index: uvm/uvm_pager.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_pager.c,v
> retrieving revision 1.88
> diff -u -p -r1.88 uvm_pager.c
> --- uvm/uvm_pager.c   15 Aug 2022 03:21:04 -  1.88
> +++ uvm/uvm_pager.c   18 Aug 2022 10:31:16 -
> @@ -209,6 +209,7 @@ uvm_pseg_release(vaddr_t segaddr)
>   struct uvm_pseg *pseg;
>   vaddr_t va = 0;
>
> + mtx_enter(_pseg_lck);
>   for (pseg = [0]; pseg != [PSEG_NUMSEGS]; pseg++) {
>   if (pseg->start <= segaddr &&
>   segaddr < pseg->start + MAX_PAGER_SEGS * MAXBSIZE)
> @@ -222,7 +223,6 @@ uvm_pseg_release(vaddr_t segaddr)
>   /* test for no remainder */
>   KDASSERT(segaddr == pseg->start + id * MAXBSIZE);
>
> - mtx_enter(_pseg_lck);
>
>   KASSERT(UVM_PSEG_INUSE(pseg, id));
>
>

ok mlarkin

Re: [RFC] acpi: add acpitimer_delay(), acpihpet_delay()

2022-08-18 Thread Mike Larkin

On Wed, Aug 17, 2022 at 09:00:12PM +1000, Jonathan Gray wrote:
> On Wed, Aug 17, 2022 at 04:53:20PM +1000, Jonathan Gray wrote:
> >
> > It seems to me it would be cleaner if the decision of what to use for
> > delay could be moved into an md file.
> >
> > Or abstract it by having a numeric weight like timecounters or driver
> > match return numbers.
>
> diff against your previous, does not change lapic_delay
>

I think the combination of diffs is a move in the right direction, so ok
mlarkin on these when ready.

-ml

> diff --git sys/arch/amd64/amd64/machdep.c sys/arch/amd64/amd64/machdep.c
> index 932b1dfeb47..c4645b6a6fd 100644
> --- sys/arch/amd64/amd64/machdep.c
> +++ sys/arch/amd64/amd64/machdep.c
> @@ -2069,3 +2069,13 @@ check_context(const struct reg *regs, struct trapframe 
> *tf)
>
>   return 0;
>  }
> +
> +void
> +delay_init(void(*f)(int), int v)
> +{
> + static int c = 0;
> + if (v > c) {
> + delay_func = f;
> + c = v;
> + }
> +}
> diff --git sys/arch/amd64/amd64/tsc.c sys/arch/amd64/amd64/tsc.c
> index fd38dc6359d..8c839357dd2 100644
> --- sys/arch/amd64/amd64/tsc.c
> +++ sys/arch/amd64/amd64/tsc.c
> @@ -109,7 +109,7 @@ tsc_identify(struct cpu_info *ci)
>
>   tsc_frequency = tsc_freq_cpuid(ci);
>   if (tsc_frequency > 0)
> - delay_func = tsc_delay;
> + delay_init(tsc_delay, 300);
>  }
>
>  static inline int
> diff --git sys/arch/amd64/include/cpu.h sys/arch/amd64/include/cpu.h
> index b8db48f2714..a82af172452 100644
> --- sys/arch/amd64/include/cpu.h
> +++ sys/arch/amd64/include/cpu.h
> @@ -359,6 +359,7 @@ void signotify(struct proc *);
>   * We need a machine-independent name for this.
>   */
>  extern void (*delay_func)(int);
> +void delay_init(void(*)(int), int);
>  struct timeval;
>
>  #define DELAY(x) (*delay_func)(x)
> diff --git sys/arch/i386/i386/machdep.c sys/arch/i386/i386/machdep.c
> index e4cb15b4dc1..7da5c26e240 100644
> --- sys/arch/i386/i386/machdep.c
> +++ sys/arch/i386/i386/machdep.c
> @@ -3996,3 +3996,13 @@ cpu_rnd_messybits(void)
>   nanotime();
>   return (ts.tv_nsec ^ (ts.tv_sec << 20));
>  }
> +
> +void
> +delay_init(void(*f)(int), int v)
> +{
> + static int c = 0;
> + if (v > c) {
> + delay_func = f;
> + c = v;
> + }
> +}
> diff --git sys/arch/i386/include/cpu.h sys/arch/i386/include/cpu.h
> index 5f300710562..211ee475678 100644
> --- sys/arch/i386/include/cpu.h
> +++ sys/arch/i386/include/cpu.h
> @@ -302,6 +302,7 @@ void signotify(struct proc *);
>   * We need a machine-independent name for this.
>   */
>  extern void (*delay_func)(int);
> +void delay_init(void(*)(int), int);
>  struct timeval;
>
>  #define  DELAY(x)(*delay_func)(x)
> diff --git sys/dev/acpi/acpihpet.c sys/dev/acpi/acpihpet.c
> index 6dc595e50ab..4332b4dbc0e 100644
> --- sys/dev/acpi/acpihpet.c
> +++ sys/dev/acpi/acpihpet.c
> @@ -27,8 +27,6 @@
>  #include 
>  #include 
>
> -#include "acpitimer.h"
> -
>  int acpihpet_attached;
>
>  int acpihpet_match(struct device *, void *, void *);
> @@ -270,15 +268,7 @@ acpihpet_attach(struct device *parent, struct device 
> *self, void *aux)
>   hpet_timecounter.tc_name = sc->sc_dev.dv_xname;
>   tc_init(_timecounter);
>
> -#if defined(__amd64__) || defined(__i386__)
> - if (delay_func == i8254_delay)
> - delay_func = acpihpet_delay;
> -#if NACPITIMER > 1
> - extern void acpitimer_delay(int);
> - if (delay_func == acpitimer_delay)
> - delay_func = acpihpet_delay;
> -#endif
> -#endif /* defined(__amd64__) || defined(__i386__) */
> + delay_init(acpihpet_delay, 200);
>
>  #if defined(__amd64__)
>   extern void cpu_recalibrate_tsc(struct timecounter *);
> diff --git sys/dev/acpi/acpitimer.c sys/dev/acpi/acpitimer.c
> index 0c4c7b71a01..e2757a40f3d 100644
> --- sys/dev/acpi/acpitimer.c
> +++ sys/dev/acpi/acpitimer.c
> @@ -103,10 +103,8 @@ acpitimerattach(struct device *parent, struct device 
> *self, void *aux)
>   acpi_timecounter.tc_name = sc->sc_dev.dv_xname;
>   tc_init(_timecounter);
>
> -#if defined(__amd64__) || defined(__i386__)
> - if (delay_func == i8254_delay)
> - delay_func = acpitimer_delay;
> -#endif
> + delay_init(acpitimer_delay, 100);
> +
>  #if defined(__amd64__)
>   extern void cpu_recalibrate_tsc(struct timecounter *);
>   cpu_recalibrate_tsc(_timecounter);
> diff --git sys/dev/acpi/files.acpi sys/dev/acpi/files.acpi
> index 8ec3ec2f8b3..f97eb6d4e3e 100644
> --- sys/dev/acpi/files.acpi
> +++ sys/dev/acpi/files.acpi
> @@ -13,7 +13,7 @@ filedev/acpi/acpidebug.cacpi & ddb
>  # ACPI timer
>  device   acpitimer
>  attach   acpitimer at acpi
> -file dev/acpi/acpitimer.cacpitimer needs-flag
> +file dev/acpi/acpitimer.cacpitimer
>
>  # AC device
>  device   acpiac
> diff --git sys/dev/pv/pvbus.c sys/dev/pv/pvbus.c
> index cbe543ac312..ee53afe2138 100644
> ---

Re: fix i386 cpu classnames

2022-08-17 Thread Mike Larkin

On Wed, Aug 17, 2022 at 03:27:51PM +1000, Jonathan Gray wrote:
> broken after rev 1.652 of machdep.c
>
> cpu0: Intel(R) Pentium(R) M processor 1200MHz ("GenuineIntel" 686-class) 1.20 
> GHz, 06-09-05
> cpu0: Intel(R) Pentium(R) M processor 1200MHz ("GenuineIntel" -class) 1.20 
> GHz, 06-09-05
>
> CPUCLASS_* can't be renumbered as machdep.c assumes class is
> family - 3 in at least one path.  486 is family 4, so class 1
>
> Index: sys/arch/i386/i386/machdep.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/machdep.c,v
> retrieving revision 1.652
> diff -u -p -r1.652 machdep.c
> --- sys/arch/i386/i386/machdep.c  15 Aug 2022 04:17:50 -  1.652
> +++ sys/arch/i386/i386/machdep.c  17 Aug 2022 05:18:29 -
> @@ -509,6 +509,7 @@ const struct cpu_nocpuid_nameclass i386_
>  };
>
>  const char *classnames[] = {
> + "",
>   "486",
>   "586",
>   "686"
>

ok mlarkin if not already committed

Re: [RFC] acpi: add acpitimer_delay(), acpihpet_delay()

2022-08-15 Thread Mike Larkin

On Sun, Aug 14, 2022 at 11:24:37PM -0500, Scott Cheloha wrote:
> Hi,
>
> In the future when the LAPIC timer is run in oneshot mode there will
> be no lapic_delay().
>
> This is fine if you have a constant TSC, because we have tsc_delay().
>
> This is *very* bad for older amd64 machines, because you are left with
> i8254_delay().
>
> I would like to offer a less awful delay(9) implementation for this
> class of hardware.  Otherwise we may trip over bizarre phantom bugs on
> MP kernels because only one CPU can read the i8254 at a time.
>
> I think patrick@ was struggling with some version of that problem last
> year, but in a VM.
>
> Real i386 hardware should be fine.  Later models with an ACPI PM timer
> will be fine using acpitimer_delay() instead of i8254_delay().
>
> If this seems reasonable to people I will come back with a cleaned up
> patch for testing.
>
> Thoughts?  Preferences?
>

Seems reasonable to me. Would be interested to see the revised diff once you're
done.

-ml

> -Scott
>
> Here are the sample measurements from my 2017 laptop (kaby lake
> refresh) running the attached patch.  It takes longer than a
> microsecond to read either of the ACPI timers.  The PM timer is better
> than the HPET.  The HPET is a bit better than the i8254.  I hope the
> numbers are a little better on older hardware.
>
> acpitimer_test_delay:  expected  0.01000  actual  0.10638  error  
> 0.09638
> acpitimer_test_delay:  expected  0.1  actual  0.15464  error  
> 0.05464
> acpitimer_test_delay:  expected  0.00010  actual  0.000107619  error  
> 0.07619
> acpitimer_test_delay:  expected  0.00100  actual  0.001007275  error  
> 0.07275
> acpitimer_test_delay:  expected  0.01000  actual  0.010007891  error  
> 0.07891
>
> acpihpet_test_delay:   expected  0.01000  actual  0.22208  error  
> 0.21208
> acpihpet_test_delay:   expected  0.1  actual  0.31690  error  
> 0.21690
> acpihpet_test_delay:   expected  0.00010  actual  0.000112647  error  
> 0.12647
> acpihpet_test_delay:   expected  0.00100  actual  0.001021480  error  
> 0.21480
> acpihpet_test_delay:   expected  0.01000  actual  0.010013736  error  
> 0.13736
>
> i8254_test_delay:  expected  0.01000  actual  0.40110  error  
> 0.39110
> i8254_test_delay:  expected  0.1  actual  0.39471  error  
> 0.29471
> i8254_test_delay:  expected  0.00010  actual  0.000128031  error  
> 0.28031
> i8254_test_delay:  expected  0.00100  actual  0.001024586  error  
> 0.24586
> i8254_test_delay:  expected  0.01000  actual  0.010021859  error  
> 0.21859
>
> Index: dev/acpi/acpihpet.c
> ===
> RCS file: /cvs/src/sys/dev/acpi/acpihpet.c,v
> retrieving revision 1.26
> diff -u -p -r1.26 acpihpet.c
> --- dev/acpi/acpihpet.c   6 Apr 2022 18:59:27 -   1.26
> +++ dev/acpi/acpihpet.c   15 Aug 2022 04:21:58 -
> @@ -18,6 +18,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>
>  #include 
> @@ -31,8 +32,9 @@ int acpihpet_attached;
>  int acpihpet_match(struct device *, void *, void *);
>  void acpihpet_attach(struct device *, struct device *, void *);
>  int acpihpet_activate(struct device *, int);
> -
> +void acpiphet_delay(u_int);
>  u_int acpihpet_gettime(struct timecounter *tc);
> +void acpihpet_test_delay(u_int);
>
>  uint64_t acpihpet_r(bus_space_tag_t _iot, bus_space_handle_t _ioh,
>   bus_size_t _ioa);
> @@ -262,7 +264,7 @@ acpihpet_attach(struct device *parent, s
>   freq = 1000ull / period;
>   printf(": %lld Hz\n", freq);
>
> - hpet_timecounter.tc_frequency = (uint32_t)freq;
> + hpet_timecounter.tc_frequency = freq;
>   hpet_timecounter.tc_priv = sc;
>   hpet_timecounter.tc_name = sc->sc_dev.dv_xname;
>   tc_init(_timecounter);
> @@ -273,10 +275,43 @@ acpihpet_attach(struct device *parent, s
>   acpihpet_attached++;
>  }
>
> +void
> +acpihpet_delay(u_int usecs)
> +{
> + uint64_t d, s;
> + struct acpihpet_softc *sc = hpet_timecounter.tc_priv;
> +
> + d = usecs * hpet_timecounter.tc_frequency / 100;
> + s = acpihpet_r(sc->sc_iot, sc->sc_ioh, HPET_MAIN_COUNTER);
> + while (acpihpet_r(sc->sc_iot, sc->sc_ioh, HPET_MAIN_COUNTER) - s < d)
> + CPU_BUSY_CYCLE();
> +}
> +
>  u_int
>  acpihpet_gettime(struct timecounter *tc)
>  {
>   struct acpihpet_softc *sc = tc->tc_priv;
>
>   return (bus_space_read_4(sc->sc_iot, sc->sc_ioh, HPET_MAIN_COUNTER));
> +}
> +
> +void
> +acpihpet_test_delay(u_int usecs)
> +{
> + struct timespec ac, er, ex, t0, t1;
> +
> + if (!acpihpet_attached) {
> + printf("%s: (no hpet attached)\n", __func__);
> + return;
> + }
> +
> + nanouptime();
> + acpihpet_delay(usecs);
> + nanouptime();
> + timespecsub(, , );
> +

Re: Consistency and cleanup in /share/misc/airport

2022-07-28 Thread Mike Fischer



> Am 29.07.2022 um 03:45 schrieb Daniel Dickman :
> 
>> -HAM:Fuhlsbuettel Hamburg, Germany
>> +HAM:Fuhlsbuettel, Hamburg, Germany
> 
> Wikipedia states this airport is now called Hamburg Airport?

Correct, see the official airport website:
https://www.hamburg-airport.de/en

Fuhlsbüttel is the name of the district within the city of Hamburg where the 
airport is located. As this is the only major public airport in Hamburg, 
colloquial usage often equated Fuhlsbüttel with the airport. The other airport 
Hamburg-Finkenwerder (XFW) is newer and mainly used for the Airbus production 
facilities in Hamburg. Regular flights do not use XFW except possibly in the 
case of serious emergencies.

Mike

Re: remove pre-486 code from i386 platform

2022-07-28 Thread Mike Larkin

On Wed, Jul 27, 2022 at 10:18:55PM -0400, Daniel Dickman wrote:
> The diff below removes support for 386SX/DX processors. We already claim
> we don't support anything older than a Pentium so there's no point to keep
> this code.
>
> The main code change is in locore0.S and is to stop checking if the CPU
> we're on has the alignment check (PSL_AC) flag.
>
> The rest of the diff is about deleting ancient comments about how things
> worked before the 486. 2 files under arch/amd64 are updated to keep them
> in sync with arch/i386.
>
> The 486 (which was launched in 1989) added a small number of changes over
> a system with an 80386 + 80387 co-processor:
> - an alignment check (AC) flag in EFLAGS; this is how we check for 386 vs
>   486 and this is what we're removing
> - new bits in CR0; 386 CPUs did not support ring0 write protection
> - new bits in CR3
> - 3 new userland instructions: bswap / cmpxchg / xadd
> - 3 new kernel mode instructions: invd / wbinvd / invlpg
>
> I can't imagine the current code could possibly work on a real 386sx or
> 386dx CPU since we seem to unconditionally call 5 out of the 6
> instructions mentioned above unconditionally. (The 6th instruction "invd"
> is used in locore0.S, but it looks like that one does check that we're on
> a 486 or newer CPU first).
>
> fwiw, it appears that Linux 3.8 dropped support for 386 CPUs back in 2012.
>
> ok?

ok mlarkin

>
> Index: amd64/amd64/lapic.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/lapic.c,v
> retrieving revision 1.59
> diff -u -p -u -r1.59 lapic.c
> --- amd64/amd64/lapic.c   31 Aug 2021 15:53:36 -  1.59
> +++ amd64/amd64/lapic.c   28 Jul 2022 01:41:15 -
> @@ -213,9 +213,7 @@ lapic_map(paddr_t lapic_base)
>   va = (vaddr_t)_apic;
>   } else {
>   /*
> -  * Map local apic.  If we have a local apic, it's safe to
> -  * assume we're on a 486 or better and can use invlpg and
> -  * non-cacheable PTEs
> +  * Map local apic.
>*
>* Whap the PTE "by hand" rather than calling pmap_kenter_pa
>* because the latter will attempt to invoke TLB shootdown
> Index: amd64/stand/mbr/mbr.S
> ===
> RCS file: /cvs/src/sys/arch/amd64/stand/mbr/mbr.S,v
> retrieving revision 1.7
> diff -u -p -u -r1.7 mbr.S
> --- amd64/stand/mbr/mbr.S 27 Jun 2022 16:10:09 -  1.7
> +++ amd64/stand/mbr/mbr.S 28 Jul 2022 01:41:16 -
> @@ -112,7 +112,7 @@ start:
>*
>* Accordingly, this code will fail on very early 8086/88s, but
>* nick@ will just have to live with it.  Others will note that
> -  * we require an 80386 (or compatible) or above processor, anyway.
> +  * we require at least a Pentium compatible processor anyway.
>*/
>   /* cli */
>   movw%ax, %ss
> Index: i386/i386/cpu.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/cpu.c,v
> retrieving revision 1.108
> diff -u -p -u -r1.108 cpu.c
> --- i386/i386/cpu.c   21 Feb 2022 10:24:28 -  1.108
> +++ i386/i386/cpu.c   28 Jul 2022 01:41:18 -
> @@ -406,8 +406,7 @@ cpu_init(struct cpu_info *ci)
>   patinit(ci);
>
>   /*
> -  * Enable ring 0 write protection (486 or above, but 386
> -  * no longer supported).
> +  * Enable ring 0 write protection
>*/
>   lcr0(rcr0() | CR0_WP);
>
> Index: i386/i386/lapic.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/lapic.c,v
> retrieving revision 1.48
> diff -u -p -u -r1.48 lapic.c
> --- i386/i386/lapic.c 11 Jun 2021 05:33:16 -  1.48
> +++ i386/i386/lapic.c 28 Jul 2022 01:41:18 -
> @@ -85,8 +85,7 @@ lapic_map(paddr_t lapic_base)
>   tpr = lapic_tpr;
>
>   /*
> -  * Map local apic.  If we have a local apic, it's safe to assume
> -  * we're on a 486 or better and can use invlpg and non-cacheable PTEs
> +  * Map local apic.
>*
>* Whap the PTE "by hand" rather than calling pmap_kenter_pa because
>* the latter will attempt to invoke TLB shootdown code just as we
> Index: i386/i386/locore.s
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/locore.s,v
> retrieving revision 1.194
> diff -u -p -u -r1.194 locore.s
> --- i386/i386/locore.s3 Jan 2022 00:44:30 -   1.194
> +++ i386/i386/locore.s28 Jul 2022 01:41:18 -
> @@ -266,7 +266,7 @@ _C_LABEL(lapic_tpr):
>   .long   0
>  #endif
>
> -_C_LABEL(cpu):   .long   0   # are we 386, 386sx, 486, 586 
> or 686
> +_C_LABEL(cpu):   .long   0   # are we 486, 586 or 686
>  _C_LABEL(cpu_id):.long   0   # saved from 'cpuid' instruction
>  _C_LABEL(cpu_pae):   .long   0   # are

Re: stub out initial mmio support for vmm(4)/vmd(8)

2022-07-26 Thread Mike Larkin

On Fri, Jul 15, 2022 at 12:27:04PM -0400, Dave Voutila wrote:
> The following diff adds in formalization around mmio assists for nested
> page/ept faults on Intel and AMD vmm(4) hosts. It provides what little
> information is available to userland in terms of either the instruction
> bytes (on AMD) or the instruction length (on Intel).
>
> vmd is updated to intercept these vm exit events, but will currently log
> the issue and cause the vm process to exit. This is the same behavior
> folks experience currently when a guest attempts to read/write guest
> physical addresses in the mmio-reserved ranges, but now with an
> explanation of the reason and current {e,r,}ip value.
>
> This is the foundation I'll build upon while implementing instruction
> decode and emulation support in userland. No noticeable change should
> occur for existing guests that don't trigger mmio assist events.
>
> ok?
>

See below.

-ml

> -dv
>
>
> diff refs/heads/master refs/heads/mmio
> commit - 10e026163f31687dba11fb4655500afb4e616258
> commit + 7d92e26b51c3fd520807dbcd5233f14b76bc611e
> blob - 84da19438b74377276b16b4b4f7db45ae9ec6be2
> blob + a89a4dc0fbe7b31a47390de363109092b76ffa22
> --- sys/arch/amd64/amd64/vmm.c
> +++ sys/arch/amd64/amd64/vmm.c
> @@ -4891,11 +4891,20 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *
>   vcpu->vc_gueststate.vg_rax =
>   vcpu->vc_exit.vei.vei_data;
>   break;
> + case VMX_EXIT_EPT_VIOLATION:
> + ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_GPRS, 0,
> + >vc_exit.vrs);
> + if (ret) {
> + printf("%s: vm %d vcpu %d failed to update "
> + "registers\n", __func__,
> + vcpu->vc_parent->vm_id, vcpu->vc_id);
> + return (EINVAL);
> + }
> + break;
>   case VM_EXIT_NONE:
>   case VMX_EXIT_HLT:
>   case VMX_EXIT_INT_WINDOW:
>   case VMX_EXIT_EXTINT:
> - case VMX_EXIT_EPT_VIOLATION:
>   case VMX_EXIT_CPUID:
>   case VMX_EXIT_XSETBV:
>   break;
> @@ -4927,6 +4936,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *
>   break;
>  #endif /* VMM_DEBUG */
>   }
> + memset(>vc_exit, 0, sizeof(vcpu->vc_exit));
>   }
>
>   setregion(, ci->ci_gdt, GDT_SIZE - 1);
> @@ -5658,7 +5668,7 @@ vmm_get_guest_memtype(struct vm *vm, paddr_t gpa)
>
>   if (gpa >= VMM_PCI_MMIO_BAR_BASE && gpa <= VMM_PCI_MMIO_BAR_END) {
>   DPRINTF("guest mmio access @ 0x%llx\n", (uint64_t)gpa);
> - return (VMM_MEM_TYPE_REGULAR);
> + return (VMM_MEM_TYPE_MMIO);
>   }
>
>   /* XXX Use binary search? */
> @@ -5782,18 +5792,28 @@ int
>  svm_handle_np_fault(struct vcpu *vcpu)
>  {
>   uint64_t gpa;
> - int gpa_memtype, ret;
> + int gpa_memtype, ret = 0;
>   struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
> + struct vm_exit_eptviolation *vee = >vc_exit.vee;
>
> - ret = 0;
> + memset(vee, 0, sizeof(*vee));
> + vee->vee_fault_type = VEE_FAULT_INVALID;
>
>   gpa = vmcb->v_exitinfo2;
>
>   gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa);
>   switch (gpa_memtype) {
>   case VMM_MEM_TYPE_REGULAR:
> + vee->vee_fault_type = VEE_FAULT_HANDLED;
>   ret = svm_fault_page(vcpu, gpa);
>   break;
> + case VMM_MEM_TYPE_MMIO:
> + vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST;
> + vee->vee_insn_len = vmcb->v_n_bytes_fetched;

If you are going to depend on decode assists, we should probably validate
that the CPU has them:

CPUID Fn8000_000A_EDX[7]:
 DecodeAssists Decode assists. Indicates support for the decode assists. See 
“Decode Assists.”

It does look like we sorta assume that we have some of these features (like
support for cleanbits, and even NP itself), so there are probably a few
checks we should do in attach. We have something similar in vmx but it's
done later on vm create IIRC because at the time we wanted to run on
a variety of host CPUs with different capabilities. I don't think that
is important anymore, so maybe a short-circuit check for some of these
required features in vmmattach for both svm and vmx is warranted now.

FWIW I've never seen a svm CPU without these assists (but I bet they exist).

> + memcpy(>vee_insn_bytes, vmcb->v_guest_ins_bytes,
> + sizeof(vee->vee_insn_bytes));
> + ret = EAGAIN;
> + break;
>   default:
>   printf("unknown memory type %d for GPA 0x%llx\n",
>   gpa_memtype, gpa);
> @@ -5862,10 +5882,13 @@ vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
>  int
>  vmx_handle_np_fault(struct vcpu *vcpu)
>  {
> -

Re: remove rise detection from i386

2022-07-23 Thread Mike Larkin

On Sat, Jul 23, 2022 at 02:13:27PM -0400, Daniel Dickman wrote:
> The Rise mp6 was a short lived processor that was announced around 20+
> years and didn't make it to market.
>
> I think we can delete the cpu identification for this cpu at this point.
>
> ok?

ok mlarkin

>
> Index: i386/machdep.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/machdep.c,v
> retrieving revision 1.650
> diff -u -p -u -r1.650 machdep.c
> --- i386/machdep.c12 Jul 2022 05:45:49 -  1.650
> +++ i386/machdep.c23 Jul 2022 18:02:05 -
> @@ -782,41 +782,6 @@ const struct cpu_cpuid_nameclass i386_cp
>   } }
>   },
>   {
> - "RiseRiseRise",
> - CPUVENDOR_RISE,
> - "Rise",
> - /* Family 4, not available from Rise */
> - { {
> - CPUCLASS_486,
> - {
> - 0, 0, 0, 0, 0, 0, 0, 0,
> - 0, 0, 0, 0, 0, 0, 0, 0,
> - "486 class" /* Default */
> - },
> - NULL
> - },
> - /* Family 5 */
> - {
> - CPUCLASS_586,
> - {
> - "mP6", 0, "mP6", 0, 0, 0, 0, 0,
> - 0, 0, 0, 0, 0, 0, 0, 0,
> - "mP6"   /* Default */
> - },
> - NULL
> - },
> - /* Family 6, not yet available from Rise */
> - {
> - CPUCLASS_686,
> - {
> - 0, 0, 0, 0, 0, 0, 0, 0,
> - 0, 0, 0, 0, 0, 0, 0, 0,
> - "686 class" /* Default */
> - },
> - NULL
> - } }
> - },
> - {
>   "GenuineTMx86",
>   CPUVENDOR_TRANSMETA,
>   "Transmeta",
> Index: include/cputypes.h
> ===
> RCS file: /cvs/src/sys/arch/i386/include/cputypes.h,v
> retrieving revision 1.11
> diff -u -p -u -r1.11 cputypes.h
> --- include/cputypes.h7 Jul 2022 00:56:46 -   1.11
> +++ include/cputypes.h23 Jul 2022 18:02:05 -
> @@ -63,7 +63,6 @@
>  #define CPUVENDOR_CYRIX  1
>  #define CPUVENDOR_AMD3
>  #define CPUVENDOR_IDT4
> -#define CPUVENDOR_RISE   5
>  #define CPUVENDOR_TRANSMETA  6
>  #define CPUVENDOR_NS 7
>  #define CPUVENDOR_VIA8
>

Re: [v3] amd64: simplify TSC sync testing

2022-07-06 Thread Mike Larkin

On Wed, Jul 06, 2022 at 11:48:41AM -0500, Scott Cheloha wrote:
> > On Jul 6, 2022, at 11:36 AM, Mike Larkin  wrote:
> >
> > On Tue, Jul 05, 2022 at 07:16:26PM -0500, Scott Cheloha wrote:
> >> On Tue, Jul 05, 2022 at 01:38:32PM -0700, Mike Larkin wrote:
> >>> On Mon, Jul 04, 2022 at 09:06:55PM -0500, Scott Cheloha wrote:
> >>>>
> >>>> [...]
> >>>
> >>> Here's the output from a 4 socket 80 thread machine.
> >>
> >> Oh nice.  I think this is the biggest machine we've tried so far.
> >>
> >>> kern.timecounter reports tsc after boot.
> >>
> >> Excellent.
> >>
> >>> Looks like this machine doesn't have the adjust MSR?
> >>
> >> IA32_TSC_ADJUST first appears in the Intel SDM Vol. 3 some time in
> >> 2011 or 2012.  I can't find the exact revision.
> >>
> >> (I really wish there was a comprehensive version history for this sort
> >> of thing, i.e. this MSR first appeared in the blah-blah uarch, this
> >> instruction is available on all uarchs after yada-yada, etc.)
> >>
> >> There are apparently several versions of the E7-4870 in the E7
> >> "family".  If your CPU predates that, or launched 2012-2014, the MSR
> >> may not have made the cut.
> >>
> >> An aside: I cannot find any evidence of AMD supporting this MSR in any
> >> processor.  It would be really, really nice if they did.  If you (or
> >> anyone reading) knows anything about this, or whether they have an
> >> equivalent MSR, shout it out.
> >>
> >>> Other than that, machine seems stable.
> >>
> >> Good, glad to hear it.  Thank you for testing.
> >>
> >> Has this machine had issues using the TSC on -current in the past?
> >>
> >> (If you have the time) what does the dmesg look like on the -current
> >> kernel with TSC_DEBUG enabled?
> >
> > Looks like you enabled TSC_DEBUG in your diff, so what I sent you is what 
> > you
> > are asking for...?
>
> No, I mean on the -current *unpatched* kernel.  Sorry if that wasn't
> clear.
>
> Our -current kernel prints more detailed information if TSC_DEBUG
> is enabled.  In particular, I'm curious if the unpatched kernel
> detects any skew or drift on your machine, and if so, how much.
>

here you go. I didnt run with all 80 cpus since -current doesnt have my
" > 64 cpus" diff, but I think this is what you're after in any case.

-ml

cpu0: TSC skew=0 observed drift=0
cpu1: TSC skew=112 observed drift=0
cpu2: TSC skew=102 observed drift=0
cpu3: TSC skew=-134 observed drift=0
cpu4: TSC skew=4 observed drift=0
cpu5: TSC skew=68 observed drift=0
cpu6: TSC skew=22 observed drift=0
cpu7: TSC skew=-52 observed drift=0
cpu8: TSC skew=8 observed drift=0
cpu9: TSC skew=-18 observed drift=0
cpu10: TSC skew=10 observed drift=0
cpu11: TSC skew=76 observed drift=0
cpu12: TSC skew=-2 observed drift=0
cpu13: TSC skew=-4 observed drift=0
cpu14: TSC skew=-2 observed drift=0
cpu15: TSC skew=-28 observed drift=0
cpu16: TSC skew=6 observed drift=0
cpu17: TSC skew=-8 observed drift=0
cpu18: TSC skew=0 observed drift=0
cpu19: TSC skew=-32 observed drift=0
cpu20: TSC skew=0 observed drift=0
cpu21: TSC skew=-26 observed drift=0
cpu22: TSC skew=0 observed drift=0
cpu23: TSC skew=22 observed drift=0
cpu24: TSC skew=-12 observed drift=0
cpu25: TSC skew=-14 observed drift=0
cpu26: TSC skew=76 observed drift=0
cpu27: TSC skew=-64 observed drift=0
cpu28: TSC skew=-2 observed drift=0
cpu29: TSC skew=34 observed drift=0
cpu30: TSC skew=22 observed drift=0
cpu31: TSC skew=-58 observed drift=0
cpu32: TSC skew=-2 observed drift=0
cpu33: TSC skew=6 observed drift=0
cpu34: TSC skew=46 observed drift=0
cpu35: TSC skew=20 observed drift=0
cpu36: TSC skew=34 observed drift=0
cpu37: TSC skew=-8 observed drift=0
cpu38: TSC skew=48 observed drift=0
cpu39: TSC skew=-10 observed drift=0
cpu40: TSC skew=0 observed drift=0
cpu41: TSC skew=72 observed drift=0
cpu42: TSC skew=2 observed drift=0
cpu43: TSC skew=-46 observed drift=0
cpu44: TSC skew=-2 observed drift=0
cpu45: TSC skew=-14 observed drift=0
cpu46: TSC skew=-2 observed drift=0
cpu47: TSC skew=-32 observed drift=0
cpu48: TSC skew=12 observed drift=0
cpu49: TSC skew=-16 observed drift=0
cpu50: TSC skew=84 observed drift=0
cpu51: TSC skew=-44 observed drift=0
cpu52: TSC skew=-4 observed drift=0
cpu53: TSC skew=4 observed drift=0
cpu54: TSC skew=16 observed drift=0
cpu55: TSC skew=-56 observed drift=0
cpu56: TSC skew=-10 observed drift=0
cpu57: TSC skew=6 observed drift=0
cpu58: TSC skew=6 observed drift=0
cpu59: TSC skew=-40 observed drift=0
cpu60: TSC skew=-4 observed drift=0
cpu61: TSC skew=-6 observed drift=0
cpu62: TSC skew=74 observed drift=0
cpu63: TSC skew=-48 observed drift=0

Re: [v3] amd64: simplify TSC sync testing

2022-07-06 Thread Mike Larkin

On Tue, Jul 05, 2022 at 07:16:26PM -0500, Scott Cheloha wrote:
> On Tue, Jul 05, 2022 at 01:38:32PM -0700, Mike Larkin wrote:
> > On Mon, Jul 04, 2022 at 09:06:55PM -0500, Scott Cheloha wrote:
> > >
> > > [...]
> >
> > Here's the output from a 4 socket 80 thread machine.
>
> Oh nice.  I think this is the biggest machine we've tried so far.
>
> > kern.timecounter reports tsc after boot.
>
> Excellent.
>
> > Looks like this machine doesn't have the adjust MSR?
>
> IA32_TSC_ADJUST first appears in the Intel SDM Vol. 3 some time in
> 2011 or 2012.  I can't find the exact revision.
>
> (I really wish there was a comprehensive version history for this sort
> of thing, i.e. this MSR first appeared in the blah-blah uarch, this
> instruction is available on all uarchs after yada-yada, etc.)
>
> There are apparently several versions of the E7-4870 in the E7
> "family".  If your CPU predates that, or launched 2012-2014, the MSR
> may not have made the cut.
>
> An aside: I cannot find any evidence of AMD supporting this MSR in any
> processor.  It would be really, really nice if they did.  If you (or
> anyone reading) knows anything about this, or whether they have an
> equivalent MSR, shout it out.
>
> > Other than that, machine seems stable.
>
> Good, glad to hear it.  Thank you for testing.
>
> Has this machine had issues using the TSC on -current in the past?
>
> (If you have the time) what does the dmesg look like on the -current
> kernel with TSC_DEBUG enabled?

Looks like you enabled TSC_DEBUG in your diff, so what I sent you is what you
are asking for...?

-ml

Re: [v3] amd64: simplify TSC sync testing

2022-07-05 Thread Mike Larkin

On Mon, Jul 04, 2022 at 09:06:55PM -0500, Scott Cheloha wrote:
> Hi,
>
> Once again, I am trying to change our approach to TSC sync testing to
> eliminate false positive results.  Instead of trying to repair the TSC
> by measuring skew, we just spin in a lockless loop looking for skew
> and mark the TSC as broken if we detect any.
>
> This is motivated in part by some multisocket machines that do not use
> the TSC as a timecounter because the current sync test confuses NUMA
> latency for TSC skew.
>
> The only difference between this version and the prior version (v2) is
> that we check whether we have the IA32_TSC_ADJUST register by hand in
> tsc_reset_adjust().  If someone wants to help me rearrange cpu_hatch()
> so we do CPU identification before TSC sync testing we can remove the
> workaround later.
>
> If you have the IA32_TSC_ADJUST register and it is non-zero going into
> the test, you will see something on the console like this:
>
>   tsc: cpu5: IA32_TSC_ADJUST: -150 -> 0
>
> This does *not* mean you failed the test.  It just means you probably
> have a bug in your BIOS (or some other firmware) and you should report
> it to your vendor.
>
> If you fail the test you will see something like this:
>
>   tsc: cpu0/cpu2: sync test round 1/2 failed
>   tsc: cpu0/cpu2: cpu2: 13043 lags 438 cycles
>
> A printout like this would mean that the sync test for cpu2 failed.
> In particular, cpu2's TSC trails cpu0's TSC by at least 438 cycles.
> If this happens for *any* CPU we mark the TSC timecounter as
> defective.
>
> --
>
> Please test!  Send your dmesg, pass or fail.
>
> I am especially interested in:
>
> 1. A test from dv@.  Your dual-socket machine has the IA32_TSC_ADJUST
>register but it failed the test running patch v2.  Maybe it will pass
>with this version?
>
> 2. Other multisocket machines.
>
> 3. There were reports of TSC issues with OpenBSD VMs running on ESXi.
>What happens when you run with this patch?
>
> 4. OpenBSD VMs on other hypervisors.
>
> 5. Non-Lenovo machines, non-Intel machines.
>
> -Scott

Here's the output from a 4 socket 80 thread machine. kern.timecounter reports
tsc after boot. Looks like this machine doesn't have the adjust MSR?

Other than that, machine seems stable.

-ml

OpenBSD 7.1-current (GENERIC.MP) #14: Tue Jul  5 13:19:28 PDT 2022

mlar...@slave.int.azathoth.net:/u/bin/src/OpenBSD/this/src/sys/arch/amd64/compile/GENERIC.MP
real mem = 274847727616 (262115MB)
avail mem = 266500612096 (254154MB)
random: good seed from bootblocks
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 2.6 @ 0x7f49c000 (103 entries)
bios0: vendor Dell Inc. version "2.11.0" date 06/04/2018
bios0: Dell Inc. PowerEdge R810
acpi0 at bios0: ACPI 3.0
acpi0: sleep states S0 S4 S5
acpi0: tables DSDT FACP APIC SPCR HPET DMAR MCFG WD__ SLIC ERST HEST BERT EINJ 
SRAT TCPA SSDT
acpi0: wakeup devices PCI0(S5)
acpitimer0 at acpi0: 3579545 Hz, 24 bits
acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: Intel(R) Xeon(R) CPU E7- 4870 @ 2.40GHz, 2394.31 MHz, 06-2f-02
cpu0: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,DCA,SSE4.1,SSE4.2,x2APIC,POPCNT,AES,NXE,PAGE1GB,RDTSCP,LONG,LAHF,PERF,ITSC,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,MELTDOWN
cpu0: 32KB 64b/line 8-way D-cache, 32KB 64b/line 4-way I-cache, 256KB 64b/line 
8-way L2 cache, 30MB 64b/line 24-way L3 cache
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 10 var ranges, 88 fixed ranges
cpu0: apic clock running at 132MHz
cpu0: mwait min=64, max=64, C-substates=0.2.1.1, IBE
cpu1 at mainbus0: apid 64 (application processor)
cpu1: Intel(R) Xeon(R) CPU E7- 4870 @ 2.40GHz, 2394.02 MHz, 06-2f-02
cpu1: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,DCA,SSE4.1,SSE4.2,x2APIC,POPCNT,AES,NXE,PAGE1GB,RDTSCP,LONG,LAHF,PERF,ITSC,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,MELTDOWN
cpu1: 32KB 64b/line 8-way D-cache, 32KB 64b/line 4-way I-cache, 256KB 64b/line 
8-way L2 cache, 30MB 64b/line 24-way L3 cache
cpu1: smt 0, core 0, package 1
cpu2 at mainbus0: apid 128 (application processor)
cpu2: Intel(R) Xeon(R) CPU E7- 4870 @ 2.40GHz, 2394.01 MHz, 06-2f-02
cpu2: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,DCA,SSE4.1,SSE4.2,x2APIC,POPCNT,AES,NXE,PAGE1GB,RDTSCP,LONG,LAHF,PERF,ITSC,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,MELTDOWN
cpu2: 32KB 64b/line 8-way D-cache, 32KB 64b/line 4-way I-cache, 256KB 64b/line 
8-way L2 cache, 30MB 64b/line 24-way L3 cache
cpu2: smt 0, core 0, package 2
cpu3 at mainbus0: apid 192 (application

Re: amdgpio(4) : preserve pin configuration on resume

2022-06-26 Thread Mike Larkin

On Wed, Apr 20, 2022 at 11:39:00AM +0200, Mark Kettenis wrote:
> > Date: Tue, 19 Apr 2022 22:02:00 -0700
> > From: Mike Larkin 
> >
> > On at least the Asus ROG Zephyrus 14 (2020), the trackpad fails to generate
> > any interrupts after resume. I tracked this down to amdgpio(4) not 
> > generating
> > interrupts after resume, and started looking at missing soft state.
> >
> > This diff preserves the interrupt pin configurations and restores them after
> > resume. This makes the device function properly post-zzz and post-ZZZ.
>
> I think it might make sense to structure this a bit more like
> pchgpio(4).  There we only restore the configuration for pins that are
> "in use" by OpenBSD.
>
> > Note: amdgpio_read_pin does not return the value that was previously written
> > during amdgpio_intr_establish (it always just returns 0x1 if the pin is
> > in use), so I'm just saving the actual value we write during
> > amdgpio_intr_establish and restoring that during resume.
>
> Well, using amdgpio_read_pin() for the purpose of saving the pin
> configuration doesn't make sense.  That function returns the pin input
> state.
>
> What you need to do is to read the register using bus_space_read_4()
> and restore that value.  Again take a look at pchgpio(4).
>
> > Note 2: In xxx_activate() functions, we usually call 
> > config_activate_children
> > but since amdgpio doesn't have any children, I left that out.
>
> I think that's fine.  But you should do the save/restore in
> DVACT_SUSPEND/DVACT_RESUME.  You want to restore the state as early as
> possible such that you don't get spurious interrupts when the BIOS
> leaves GPIO pins misconfigured.  Again, look at pchgpio(4).
>

I reworked this diff and made it look just like pchgpio. But it's a little
simpler than pchgpio since there is less to save/restore.

ok?

-ml

Index: amdgpio.c
===
RCS file: /cvs/src/sys/dev/acpi/amdgpio.c,v
retrieving revision 1.7
diff -u -p -a -u -r1.7 amdgpio.c
--- amdgpio.c   6 Apr 2022 18:59:27 -   1.7
+++ amdgpio.c   26 Jun 2022 13:53:19 -
@@ -48,6 +48,11 @@ struct amdgpio_intrhand {
void *ih_arg;
 };

+struct amdgpio_pincfg {
+   /* Modeled after pchgpio but we only have one value to save/restore */
+   uint32_tpin_cfg;
+};
+
 struct amdgpio_softc {
struct device sc_dev;
struct acpi_softc *sc_acpi;
@@ -59,6 +64,7 @@ struct amdgpio_softc {
void *sc_ih;

int sc_npins;
+   struct amdgpio_pincfg *sc_pin_cfg;
struct amdgpio_intrhand *sc_pin_ih;

struct acpi_gpio sc_gpio;
@@ -66,9 +72,11 @@ struct amdgpio_softc {

 intamdgpio_match(struct device *, void *, void *);
 void   amdgpio_attach(struct device *, struct device *, void *);
+intamdgpio_activate(struct device *, int);

 const struct cfattach amdgpio_ca = {
-   sizeof(struct amdgpio_softc), amdgpio_match, amdgpio_attach
+   sizeof(struct amdgpio_softc), amdgpio_match, amdgpio_attach,
+   NULL, amdgpio_activate
 };

 struct cfdriver amdgpio_cd = {
@@ -86,6 +94,10 @@ void amdgpio_write_pin(void *, int, int)
 void   amdgpio_intr_establish(void *, int, int, int (*)(void *), void *);
 intamdgpio_pin_intr(struct amdgpio_softc *, int);
 intamdgpio_intr(void *);
+void   amdgpio_save_pin(struct amdgpio_softc *, int pin);
+void   amdgpio_save(struct amdgpio_softc *);
+void   amdgpio_restore_pin(struct amdgpio_softc *, int pin);
+void   amdgpio_restore(struct amdgpio_softc *);

 int
 amdgpio_match(struct device *parent, void *match, void *aux)
@@ -135,6 +147,8 @@ amdgpio_attach(struct device *parent, st
return;
}

+   sc->sc_pin_cfg = mallocarray(sc->sc_npins, sizeof(*sc->sc_pin_cfg),
+   M_DEVBUF, M_WAITOK);
sc->sc_pin_ih = mallocarray(sc->sc_npins, sizeof(*sc->sc_pin_ih),
M_DEVBUF, M_WAITOK | M_ZERO);

@@ -159,6 +173,58 @@ amdgpio_attach(struct device *parent, st
 unmap:
free(sc->sc_pin_ih, M_DEVBUF, sc->sc_npins * sizeof(*sc->sc_pin_ih));
bus_space_unmap(sc->sc_memt, sc->sc_memh, aaa->aaa_size[0]);
+}
+
+int
+amdgpio_activate(struct device *self, int act)
+{
+   struct amdgpio_softc *sc = (struct amdgpio_softc *)self;
+
+   switch (act) {
+   case DVACT_SUSPEND:
+   amdgpio_save(sc);
+   break;
+   case DVACT_RESUME:
+   amdgpio_restore(sc);
+   break;
+   }
+
+   return 0;
+}
+
+void
+amdgpio_save_pin(struct amdgpio_softc *sc, int pin)
+{
+   sc->sc_pin_cfg[pin].pin_cfg = bus_space_read_4(sc->sc_memt, sc->sc_memh,
+   pin * 4);
+}
+
+void
+amdgpio_save(struct amdgpio_softc *sc)
+{
+   int pin;
+
+   for (pin = 0 ; pin < sc->sc_npins; pin++)
+

Re: rewrite amd64 cache printing

2022-06-25 Thread Mike Larkin

On Fri, Jun 24, 2022 at 07:19:47PM +1000, Jonathan Gray wrote:
> Rewrite amd64 printing of cache details.
> Previously we looked at cpuid 0x8005 for L1/TLB details
> which Intel documents as reserved.
> And cpuid 0x8006 for L2 details.
>
> Intel also encode cache details in cpuid 4.
> AMD have mostly the same encoding with cpuid 0x801d
> 0x8005/0x8006 is used as a fallback in this diff
>
> The amount of cache visible to the thread is shown
> and not which groups of cpus share a particular cache.
> In the case of Alder Lake P, P cores have 1.25MB L2, each group of
> 4 E cores shares a 2MB L2.

See below.

-ml

>
> cpu0: AMD Ryzen 5 2600X Six-Core Processor, 3593.83 MHz, 17-08-02
> before:
> cpu0: 64KB 64b/line 4-way I-cache, 32KB 64b/line 8-way D-cache, 512KB 
> 64b/line 8-way L2 cache
> cpu0: ITLB 64 4KB entries fully associative, 64 4MB entries fully associative
> cpu0: DTLB 64 4KB entries fully associative, 64 4MB entries fully associative
> after:
> 0x801d
> cpu0: 32KB 64b/line 8-way D-cache, 64KB 64b/line 4-way I-cache, 512KB 
> 64b/line 8-way L2 cache, 8MB 64b/line 16-way L3 cache
> 0x8005 / 0x8006
> cpu0: 32KB 64b/line 8-way D-cache, 64KB 64b/line 4-way I-cache
> cpu0: 512KB 64b/line 8-way L2 cache
>
> cpu0: Intel(R) Core(TM) i7-5600U CPU @ 2.60GHz, 2494.54 MHz, 06-3d-04
> before:
> cpu0: 256KB 64b/line 8-way L2 cache
> after:
> 4
> cpu0: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 256KB 
> 64b/line 8-way L2 cache, 4MB 64b/line 16-way L3 cache
> 0x8005 / 0x8006
> cpu1: 256KB 64b/line 8-way L2 cache
>
> cpu0: Intel(R) Core(TM)2 Duo CPU T7250 @ 2.00GHz, 798.17 MHz, 06-0f-0d
> before:
> cpu0: 2MB 64b/line 8-way L2 cache
> after:
> 4
> cpu0: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 2MB 64b/line 
> 8-way L2 cache
> 0x8005 / 0x8006
> cpu0: 2MB 64b/line 8-way L2 cache
>
> cpu0: 12th Gen Intel(R) Core(TM) i7-1260P, 1995.55 MHz, 06-9a-03
> before:
> cpu0: 1MB 64b/line disabled L2 cache
> cpu8: 2MB 64b/line 16-way L2 cache
> after:
> 4
> cpu0: 48KB 64b/line 12-way D-cache, 32KB 64b/line 8-way I-cache, 1MB 64b/line 
> 10-way L2 cache, 18MB 64b/line 12-way L3 cache
> cpu8: 32KB 64b/line 8-way D-cache, 64KB 64b/line 8-way I-cache, 2MB 64b/line 
> 16-way L2 cache, 18MB 64b/line 12-way L3 cache
> 0x8005 / 0x8006
> cpu0: 1MB 64b/line  L2 cache
> cpu8: 2MB 64b/line 16-way L2 cache
>
> diff --git sys/arch/amd64/amd64/cacheinfo.c sys/arch/amd64/amd64/cacheinfo.c
> index 9a672186e9e..a80d1e4f553 100644
> --- sys/arch/amd64/amd64/cacheinfo.c
> +++ sys/arch/amd64/amd64/cacheinfo.c
> @@ -1,32 +1,19 @@
>  /*   $OpenBSD: cacheinfo.c,v 1.9 2020/12/22 03:42:03 jsg Exp $   */
>
> -/*-
> - * Copyright (c) 2000 The NetBSD Foundation, Inc.
> - * All rights reserved.
> +/*
> + * Copyright (c) 2022 Jonathan Gray 
>   *
> - * This code is derived from software contributed to The NetBSD Foundation
> - * by Jason R. Thorpe.
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
>   *
> - * Redistribution and use in source and binary forms, with or without
> - * modification, are permitted provided that the following conditions
> - * are met:
> - * 1. Redistributions of source code must retain the above copyright
> - *notice, this list of conditions and the following disclaimer.
> - * 2. Redistributions in binary form must reproduce the above copyright
> - *notice, this list of conditions and the following disclaimer in the
> - *documentation and/or other materials provided with the distribution.
> - *
> - * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
> - * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
> LIMITED
> - * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
> - * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
> - * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
> - * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
> - * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
> - * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
> - * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
> - * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
> - * POSSIBILITY OF SUCH DAMAGE.
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> +

Re: Lenovo ThinkCentre M910q fails to suspend

2022-06-17 Thread Mike Larkin

On Fri, Jun 17, 2022 at 08:32:29PM +0100, Edd Barrett wrote:
> Hi Mike,
>
> On Fri, Jun 17, 2022 at 11:55:51AM -0700, Mike Larkin wrote:
> > >  - disabling xhci in ukc: the system fails to boot multi-user. The first
> > >oddness comes where cpus #1-3 fail to "become ready" (as reported by 
> > > dmesg).
> > >It spends a while thinking about these cores not coming up, before
> > >eventually proceeding, but eventually hard resetting. I guess the 
> > > system
> > >really needs xhci to function then...
> >
> > That really makes no sense. can you try the same experiment again using 
> > GENERIC?
>
> Doing `boot bsd.sp -c` and then `disable xhci` means that the system can at
> least boot with no xhci, but sadly it still won't stay in the suspended state.
>
> That might rule out xhci as a source of the issue, maybe.
>
> --
> Best Regards
> Edd Barrett
>
> https://www.theunixzoo.co.uk

Sorry, out of ideas.

Re: Lenovo ThinkCentre M910q fails to suspend

2022-06-17 Thread Mike Larkin

On Fri, Jun 17, 2022 at 06:41:23PM +0100, Edd Barrett wrote:
> Hi Mike,
>
> On Fri, Jun 17, 2022 at 10:20:35AM -0700, Mike Larkin wrote:
> > Oh, didn't read this closely enough the first time. If ZZZ doesn't 
> > instantly wake
> > the machine, then it's one of the two S3 devices described in your next 
> > email.
> > Since one is XHCI, I'd disable xhci in ukc and see if that helps. Or maybe 
> > the
> > other *hci(4)s also.
>
> Alright, so here's some more info.
>
>  - disabling xhci in ukc: the system fails to boot multi-user. The first
>oddness comes where cpus #1-3 fail to "become ready" (as reported by 
> dmesg).
>It spends a while thinking about these cores not coming up, before
>eventually proceeding, but eventually hard resetting. I guess the system
>really needs xhci to function then...

That really makes no sense. can you try the same experiment again using GENERIC?

>
>  - disabling ehci or ahci: no change to sleep behaviour.
>
>  - disabling usb: no change to sleep behaviour.
>
> I don't see any [XEA]HCI options in the BIOS that I could tweak.
>
> Unless you have any other ideas, I'll try disabling random devices in the hope
> that I can narrow it down... I've already tried the network card, it aint 
> that.
>
> Thanks.
>
> --
> Best Regards
> Edd Barrett
>
> https://www.theunixzoo.co.uk
>

Re: Lenovo ThinkCentre M910q fails to suspend

2022-06-17 Thread Mike Larkin

On Fri, Jun 17, 2022 at 09:14:45AM +0100, Edd Barrett wrote:
> Hi Mike,
>
> On Thu, Jun 16, 2022 at 09:19:50PM -0700, Mike Larkin wrote:
> > From your original dmesg:
> >
> > > acpi0: wakeup devices PEG0(S4) PEGP(S4) PEG1(S4) PEGP(S4) PEG2(S4) 
> > > PEGP(S4) SIO1(S3) RP09(S4) PXSX(S4) RP10(S4) PXSX(S4) RP11(S4) PXSX(S4) 
> > > RP12(S4) PXSX(S4) RP13(S4) [...]
> >
> > Notice the [...] at the end, this is printed after 16 devices. What I'd 
> > suggest
> > is this:
> >
> > 1. remove the code that truncates this list after 16, and note down all the 
> > wake
> > devices.
> >
> > 2. If there are any in S3, try using ZZZ instead of zzz. If the machine 
> > does not instantly
> > wake, it's possible it's because of one of those S3 devices doing the wake 
> > (since ZZZ
> > uses S4).
>
> I'll try removing the truncation then. Bear with me.
>
> In the meantime, notice that the truncated list does include one S3 item
> `SIO1(S3)`. I don't know if that's what we are looking for?
>
> FWIW, I have already tried `ZZZ` on this machine and it does succeed to
> hibernate, but upon wake up, it hangs when decompressing the memory image. I
> left it decompressing a ~50MB image for more than an hour and concluded it had
> got stuck.

Oh, didn't read this closely enough the first time. If ZZZ doesn't instantly 
wake
the machine, then it's one of the two S3 devices described in your next email.
Since one is XHCI, I'd disable xhci in ukc and see if that helps. Or maybe the
other *hci(4)s also.

Now, why ZZZ fails to unpack is some other problem but the instant wake is not
related to that.

-ml

>
> > 3. If everything is S4, well, you're going to have to trace down those 
> > short names
> > like PEGP, PXSX, etc, and disable one at a time until you find the one that 
> > is
> > doing the wake. And it's possible it's none of these and is a fixed function
> > button or something.
>
> One additional piece of info, which may be worthless. I tried a Debian live 
> USB
> stick, to see if Linux was able to sleep this box. It was able to.
>
> I don't know if that rules out the idea of a fixed-function button?
>
> --
> Best Regards
> Edd Barrett
>
> https://www.theunixzoo.co.uk

Re: Lenovo ThinkCentre M910q fails to suspend

2022-06-17 Thread Mike Larkin

On Fri, Jun 17, 2022 at 09:14:45AM +0100, Edd Barrett wrote:
> Hi Mike,
>
> On Thu, Jun 16, 2022 at 09:19:50PM -0700, Mike Larkin wrote:
> > From your original dmesg:
> >
> > > acpi0: wakeup devices PEG0(S4) PEGP(S4) PEG1(S4) PEGP(S4) PEG2(S4) 
> > > PEGP(S4) SIO1(S3) RP09(S4) PXSX(S4) RP10(S4) PXSX(S4) RP11(S4) PXSX(S4) 
> > > RP12(S4) PXSX(S4) RP13(S4) [...]
> >
> > Notice the [...] at the end, this is printed after 16 devices. What I'd 
> > suggest
> > is this:
> >
> > 1. remove the code that truncates this list after 16, and note down all the 
> > wake
> > devices.
> >
> > 2. If there are any in S3, try using ZZZ instead of zzz. If the machine 
> > does not instantly
> > wake, it's possible it's because of one of those S3 devices doing the wake 
> > (since ZZZ
> > uses S4).
>
> I'll try removing the truncation then. Bear with me.
>
> In the meantime, notice that the truncated list does include one S3 item
> `SIO1(S3)`. I don't know if that's what we are looking for?
>
> FWIW, I have already tried `ZZZ` on this machine and it does succeed to
> hibernate, but upon wake up, it hangs when decompressing the memory image. I
> left it decompressing a ~50MB image for more than an hour and concluded it had
> got stuck.
>
> > 3. If everything is S4, well, you're going to have to trace down those 
> > short names
> > like PEGP, PXSX, etc, and disable one at a time until you find the one that 
> > is
> > doing the wake. And it's possible it's none of these and is a fixed function
> > button or something.
>
> One additional piece of info, which may be worthless. I tried a Debian live 
> USB
> stick, to see if Linux was able to sleep this box. It was able to.
>
> I don't know if that rules out the idea of a fixed-function button?
>
> --
> Best Regards
> Edd Barrett
>
> https://www.theunixzoo.co.uk

You're going to have to play trial and error then disabling devices until
you find the one that hangs. Without the hardware in front of me, that's the
best advice I can offer. Sorry.

-ml

Re: Lenovo ThinkCentre M910q fails to suspend

2022-06-16 Thread Mike Larkin

On Thu, Jun 16, 2022 at 08:48:36PM +0100, Edd Barrett wrote:
> On Thu, Jun 16, 2022 at 10:22:16AM -0700, Mike Larkin wrote:
> > did it ever work in the past?
>
> I've only just received the machine, so it's difficult to say.
>
> I've spent the last hour changing various BIOS settings to see if anything
> changes, but alas no. I don't see any sleep-related power options, and any
> fancy power stuff I don't need or recognise, I've disabled. No joy.
>
> I've even updated the BIOS software to no avail. Hrm...
>
> --
> Best Regards
> Edd Barrett
>
> https://www.theunixzoo.co.uk

>From your original dmesg:

> acpi0: wakeup devices PEG0(S4) PEGP(S4) PEG1(S4) PEGP(S4) PEG2(S4) PEGP(S4) 
> SIO1(S3) RP09(S4) PXSX(S4) RP10(S4) PXSX(S4) RP11(S4) PXSX(S4) RP12(S4) 
> PXSX(S4) RP13(S4) [...]

Notice the [...] at the end, this is printed after 16 devices. What I'd suggest
is this:

1. remove the code that truncates this list after 16, and note down all the wake
devices.

2. If there are any in S3, try using ZZZ instead of zzz. If the machine does 
not instantly
wake, it's possible it's because of one of those S3 devices doing the wake 
(since ZZZ
uses S4).

3. If everything is S4, well, you're going to have to trace down those short 
names
like PEGP, PXSX, etc, and disable one at a time until you find the one that is
doing the wake. And it's possible it's none of these and is a fixed function
button or something.

good luck

-ml

Re: Lenovo ThinkCentre M910q fails to suspend

2022-06-16 Thread Mike Larkin

On Thu, Jun 16, 2022 at 05:14:53PM +0100, Edd Barrett wrote:
> Hi,
>
> Has anyone managed to get a Lenovo ThinkCentre M910q (or similar) to suspend
> with OpenBSD?
>
> When invoking `zzz` the system prepares to go down, the screen goes blank, but
> then a short while later, the system comes back, as though it was awoken
> straight away.

did it ever work in the past?

>
> Here's the diff between the initial dmesg and the dmesg after this "suspend 
> and
> come back" described above:
>
> ```
> --- dmesg Thu Jun 16 16:53:44 2022
> +++ dmesg.1   Thu Jun 16 16:55:31 2022
> @@ -360,3 +360,22 @@
>  inteldrm0: 1920x1080, 32bpp
>  wsdisplay0 at inteldrm0 mux 1: console (std, vt100 emulation), using wskbd0
>  wsdisplay0: screen 1-5 added (std, vt100 emulation)
> +uhub0 detached
> +uhub0 at usb0 configuration 1 interface 0 "Intel xHCI root hub" rev 
> 3.00/1.00 addr 1
> +drm:pid42656:intel_ddi_sanitize_encoder_pll_mapping *NOTICE* [drm] 
> [ENCODER:94:DDI J/PHY @] is disabled/in DSI mode with an ungated DDI clock, 
> gate it
> +drm:pid42656:intel_ddi_sanitize_encoder_pll_mapping *NOTICE* [drm] 
> [ENCODER:109:DDI J/PHY @] is disabled/in DSI mode with an ungated DDI clock, 
> gate it
> +drm:pid42656:intel_ddi_sanitize_encoder_pll_mapping *NOTICE* [drm] 
> [ENCODER:119:DDI J/PHY @] is disabled/in DSI mode with an ungated DDI clock, 
> gate it
> +uhidev0 at uhub0 port 11 configuration 1 interface 0 "SINO WEALTH Gaming KB" 
> rev 2.00/1.03 addr 2
> +uhidev0: iclass 3/1
> +ukbd0 at uhidev0: 8 variable keys, 6 key codes
> +wskbd1 at ukbd0 mux 1
> +wskbd1: connecting to wsdisplay0
> +uhidev1 at uhub0 port 11 configuration 1 interface 1 "SINO WEALTH Gaming KB" 
> rev 2.00/1.03 addr 2
> +uhidev1: iclass 3/0, 5 report ids
> +ukbd1 at uhidev1 reportid 1: 120 variable keys, 0 key codes
> +wskbd2 at ukbd1 mux 1
> +wskbd2: connecting to wsdisplay0
> +ucc0 at uhidev1 reportid 2: 573 usages, 18 keys, array
> +wskbd3 at ucc0 mux 1
> +wskbd3: connecting to wsdisplay0
> +uhid0 at uhidev1 reportid 5: input=0, output=0, feature=5
> ```
>
> Is it odd that devices come back which we never saw detach?
>
> Repeating the same again, but with the inteldrm driver disabled, in case those
> scary messages have something to do with this:
>
> ```
> --- dmesg Thu Jun 16 17:04:43 2022
> +++ dmesg.1   Thu Jun 16 17:05:01 2022
> @@ -554,3 +554,19 @@
>  softraid0 at root
>  scsibus4 at softraid0: 256 targets
>  root on sd0a (5d59e5562a788986.a) swap on sd0b dump on sd0b
> +wskbd1: disconnecting from wsdisplay0
> +wskbd1 detached
> +ukbd0 detached
> +uhidev0 detached
> +wskbd2: disconnecting from wsdisplay0
> +wskbd2 detached
> +ukbd1 detached
> +wskbd3: disconnecting from wsdisplay0
> +wskbd3 detached
> +ucc0 detached
> +uhid0 detached
> +uhidev1 detached
> +uhub0 detached
> +uhub0 at usb0 configuration 1 interface 0 "Intel xHCI root hub" rev 
> 3.00/1.00 addr 1
> +uhub0: port 11, set config 0 at addr 2 failed
> +uhub0: device problem, disabling port 11
> ```
>
> This time we see more devices detach, but when uhub0 comes back there is a
> problem. curious...
>
> Does anyone have any idea what is going on here? This was due to be my new
> porting box, but I need it to suspend...
>
> (FWIW: the system also fails to come up from ZZZ hibernate, but one thing at a
> time)
>
> Full dmesg (with inteldrm):
>
> ```
> OpenBSD 7.1-current (GENERIC.MP) #582: Mon Jun 13 15:37:01 MDT 2022
> dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
> real mem = 17044692992 (16255MB)
> avail mem = 16510771200 (15745MB)
> random: good seed from bootblocks
> mpath0 at root
> scsibus0 at mpath0: 256 targets
> mainbus0 at root
> bios0 at mainbus0: SMBIOS rev. 3.0 @ 0xdcd91000 (88 entries)
> bios0: vendor LENOVO version "M1AKT39A" date 07/16/2018
> bios0: LENOVO 10MUS2UG00
> acpi0 at bios0: ACPI 6.1
> acpi0: sleep states S0 S3 S4 S5
> acpi0: tables DSDT FACP APIC FPDT MCFG SSDT FIDT SLIC MSDM SSDT SSDT HPET 
> SSDT UEFI SSDT LPIT WSMT SSDT SSDT DBGP DBG2 DMAR TPM2 LUFT ASF! BGRT
> acpi0: wakeup devices PEG0(S4) PEGP(S4) PEG1(S4) PEGP(S4) PEG2(S4) PEGP(S4) 
> SIO1(S3) RP09(S4) PXSX(S4) RP10(S4) PXSX(S4) RP11(S4) PXSX(S4) RP12(S4) 
> PXSX(S4) RP13(S4) [...]
> acpitimer0 at acpi0: 3579545 Hz, 24 bits
> acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
> cpu0 at mainbus0: apid 0 (boot processor)
> cpu0: Intel(R) Core(TM) i5-6500T CPU @ 2.50GHz, 2394.42 MHz, 06-5e-03
> cpu0: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,HLE,AVX2,SMEP,BMI2,ERMS,INVPCID,RTM,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,SRBDS_CTRL,MD_CLEAR,TSXFA,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES,MELTDOWN
> cpu0: 256KB 64b/line 8-way L2 cache
> cpu0: smt 0, core 0,

Re: Fix rebooting Linux guests in vmd(8)

2022-06-05 Thread Mike Larkin

On Sun, Jun 05, 2022 at 09:25:34AM -0400, Dave Voutila wrote:
> tech@ friends:
>
> tl;dr: testers wanted for fixing Linux guest reboot. If you've got
> Linux guests that no longer reboot properly, please test! For other
> vmd users, please check for any regressions.
>
> Our port of SeaBIOS is configured to enable QEMU features to simplify
> its working with vmd(8). This generally works well.
>
> SeaBIOS provides a reboot routine specifically for QEMU environments.
> One of the reasons is to provide some extra logic for refreshing the
> copy of the BIOS in memory (as if reading from ROM) before attempting
> the reset (first via PCI and falling back to triple-faulting). The way
> SeaBIOS does this appears to be it assumes there's a "pristine copy"
> of the BIOS loaded by the host's emulator to just below the 4GB mark
> in physical memory. (See src/fw/shadow.c in the SeaBIOS source tree.)
>
> This hasn't been a problem until recent Linux kernel changes started
> calling into the BIOS as a way to reboot the guest. (I know at least
> the 5.15 kernel shipped with Alpine does this.)
>
> Since vmd/vmm doesn't create a mapping for that area just below 4GB,
> guests experience a page fault vm-exit and a resulting failure as we
> consider that address part of the MMIO hole and reserved.
>
> This change to vmd(8) loads a second copy of the BIOS, ending at the
> 4GB mark in guest memory. Consequently, vmm(4)'s MMIO memory hole is
> adjusted to end 2MB below 4GB to accomodate SeaBIOS and future
> firmware payloads that may be > 1MB in size. (I believe EDK-II UEFI is
> larger than 1MB...haven't looked in awhile.)
>
> Along the way, I adjusted the use of hardcoded values for 1 MB and 4
> GB to use a more human readable version via #defines.
>
> For testers:
>   0. apply patch
>   1. build, install updated kernel, boot new kernel
>   2. copy or symlink sys/arch/amd64/include/vmmvar.h to
>  /usr/include/amd64/
>   3. build and install vmd(8)
>   4. test!
>
> ~dv
>

Does qemu load 2 copies of the bios or just rely on A20 tricks to make the
bios appear at two addresses?

-ml

>
> diff refs/heads/master refs/heads/vmd-bios4g
> blob - fea4ab52e6db7eff12b913ecde30abf970da0b54
> blob + f06212b18f8ae19b5edc8fa8d64684d7163e35a8
> --- sys/arch/amd64/include/vmmvar.h
> +++ sys/arch/amd64/include/vmmvar.h
> @@ -35,7 +35,7 @@
>  #define VMM_MAX_NICS_PER_VM  4
>
>  #define VMM_PCI_MMIO_BAR_BASE0xF000ULL
> -#define VMM_PCI_MMIO_BAR_END 0xULL
> +#define VMM_PCI_MMIO_BAR_END 0xFFDFULL   /* 2 MiB below 4 GiB */
>  #define VMM_PCI_MMIO_BAR_SIZE0x0001
>  #define VMM_PCI_IO_BAR_BASE  0x1000
>  #define VMM_PCI_IO_BAR_END   0x
> blob - d952ba4d8d0bff700fc09c066ffc284909150417
> blob + c36e17eb5ed4d1799f55fa1af5f7ca158923202e
> --- usr.sbin/vmd/vm.c
> +++ usr.sbin/vmd/vm.c
> @@ -65,6 +65,10 @@
>  #include "vmd.h"
>  #include "vmm.h"
>
> +#define _1_MB(1UL * 1024 * 1024)
> +#define _2_MB(2UL * 1024 * 1024)
> +#define _4_GB(4UL * 1024 * 1024 * 1024)
> +
>  io_fn_t ioports_map[MAX_PORTS];
>
>  int run_vm(int, int[][VM_MAX_BASE_PER_DISK], int *,
> @@ -234,7 +238,7 @@ loadfile_bios(gzFile fp, off_t size, struct vcpu_reg_s
>   return (-1);
>
>   /* The BIOS image must end at 1MB */
> - if ((off = 1048576 - size) < 0)
> + if ((off = _1_MB - size) < 0)
>   return (-1);
>
>   /* Read BIOS image into memory */
> @@ -243,6 +247,16 @@ loadfile_bios(gzFile fp, off_t size, struct vcpu_reg_s
>   return (-1);
>   }
>
> + if (gzseek(fp, 0, SEEK_SET) == -1)
> + return (-1);
> +
> + /* Read a second BIOS copy into memory ending at 4GB */
> + off = _4_GB - size;
> + if (mread(fp, off, size) != (size_t)size) {
> + errno = EIO;
> + return (-1);
> + }
> +
>   log_debug("%s: loaded BIOS image", __func__);
>
>   return (0);
> @@ -872,6 +886,7 @@ void
>  create_memory_map(struct vm_create_params *vcp)
>  {
>   size_t len, mem_bytes;
> + size_t above_1m = 0, above_4g = 0;
>
>   mem_bytes = vcp->vcp_memranges[0].vmr_size;
>   vcp->vcp_nmemranges = 0;
> @@ -893,29 +908,47 @@ create_memory_map(struct vm_create_params *vcp)
>* we need to make sure that vmm(4) permits accesses
>* to it. So allocate guest memory for it.
>*/
> - len = 0x10 - LOWMEM_KB * 1024;
> + len = _1_MB - (LOWMEM_KB * 1024);
>   vcp->vcp_memranges[1].vmr_gpa = LOWMEM_KB * 1024;
>   vcp->vcp_memranges[1].vmr_size = len;
>   mem_bytes -= len;
>
> - /* Make sure that we do not place physical memory into MMIO ranges. */
> - if (mem_bytes > VMM_PCI_MMIO_BAR_BASE - 0x10)
> - len = VMM_PCI_MMIO_BAR_BASE - 0x10;
> - else
> - len = mem_bytes;
> -
> - /* Third memory region: 1MB - (1MB + len) */
> - vcp->vcp_memranges[2].vmr_gpa = 0x10;
> - vcp->vcp_memranges[2].vmr_size = len;
> - mem_bytes -= len;

Re: vmm: remove vm teardown from vcpu run path (testers needed)

2022-06-05 Thread Mike Larkin

On Thu, Jun 02, 2022 at 03:05:16PM -0400, Dave Voutila wrote:
>
> Dave Voutila  writes:
>
> > tech@ et al.:
> >
> > Looking for testers of the following diff for vmm(4). In my efforts to
> > fix some stability issues, I'm taking baby steps tweaking parts of the
> > code to make my upcoming proposal (adding refcnts) easier to swallow.
> >
> > This change removes the calling of vm_teardown from the code path in
> > vm_run after vmm has exited the vm/vcpu and is on its way back to
> > userland/vmd(8).
> >
> > vm_teardown is currently called in 3 areas to destroy/free a vm:
> >
> >   - vm_create: as cleanup in an error path
> >   - vm_terminate: on a vm the ioctl is killing
> >   - vm_run: the run ioctl handler
> >
> > This diff removes that last bullet. It's not needed as vmd will cleanup
> > the vm on child exit, calling vm_terminate. Any non-vmd user of vmm(4)
> > can stop being lazy and use the VMM_IOC_TERM ioctl.
> >
> > Not included in the snippet is the existing final else block that still
> > toggles the vcpu state:
> >
> > } else {
> > vrp->vrp_exit_reason = VM_EXIT_TERMINATED;
> > vcpu->vc_state = VCPU_STATE_TERMINATED;
> > }
> >
> > If testing, please describe *any* difference in shutdown/reboot of vm
> > guests. (n.b. there's a known issue for Linux guests running very recent
> > Linux kernels not being able to reboot. That needs to be addressed in
> > vmd.)
> >
>
> Bumping as the diff has been out for testing and looking for ok's.
>
> -dv
>

ok mlarkin if this helps your subsequent cleanup

> >
> >
> > Index: sys/arch/amd64/amd64/vmm.c
> > ===
> > RCS file: /opt/cvs/src/sys/arch/amd64/amd64/vmm.c,v
> > retrieving revision 1.311
> > diff -u -p -r1.311 vmm.c
> > --- sys/arch/amd64/amd64/vmm.c  20 May 2022 22:42:09 -  1.311
> > +++ sys/arch/amd64/amd64/vmm.c  23 May 2022 11:57:49 -
> > @@ -4495,22 +4495,8 @@ vm_run(struct vm_run_params *vrp)
> > ret = vcpu_run_svm(vcpu, vrp);
> > }
> >
> > -   /*
> > -* We can set the VCPU states here without CAS because once
> > -* a VCPU is in state RUNNING or REQTERM, only the VCPU itself
> > -* can switch the state.
> > -*/
> > atomic_dec_int(>vm_vcpus_running);
> > -   if (vcpu->vc_state == VCPU_STATE_REQTERM) {
> > -   vrp->vrp_exit_reason = VM_EXIT_TERMINATED;
> > -   vcpu->vc_state = VCPU_STATE_TERMINATED;
> > -   if (vm->vm_vcpus_running == 0) {
> > -   rw_enter_write(_softc->vm_lock);
> > -   vm_teardown(vm);
> > -   rw_exit_write(_softc->vm_lock);
> > -   }
> > -   ret = 0;
> > -   } else if (ret == 0 || ret == EAGAIN) {
> > +   if (ret == 0 || ret == EAGAIN) {
> > /* If we are exiting, populate exit data so vmd can help. */
> > vrp->vrp_exit_reason = (ret == 0) ? VM_EXIT_NONE
> > : vcpu->vc_gueststate.vg_exit_reason;
>
>
> --
> -Dave Voutila

Re: move vmm(4) spinout paranoia behind MP_LOCKDEBUG

2022-05-20 Thread Mike Larkin

On Sat, Apr 16, 2022 at 12:09:46PM -0400, Dave Voutila wrote:
> This tucks all the spinout paranoia behind `#ifdef MP_LOCKDEBUG` and
> uses the same approach used in amd64's pmap's TLB shootdown code.
>
> Part of me wants to remove this altogether, but I'm not sure it's
> outlived its usefulness quite yet.
>
> Three areas that busy wait on ipi's are modified:
>
> 1. vmm_start - performs ipi to enable vmm on all cpus
> 2. vmm_stop - performs ipi to disable vmm on all cpus
> 3. vmx_remote_vmclear - performs ipi to vmclear a cpu (only pertinent to
>Intel hosts)
>
> (3) is the most likely to spin out and prior to bumping the spinout to
> the current value (based on __mp_lock_spinout) we had reports from users
> of hitting it on slower/older MP hardware.
>
> For vmm_{start, stop}, I moved the current cpu start/stop routine to
> before performing the ipi broadcast because if we're going to fail to
> (dis)enable vmm we should fail fast. If we fail, there's no need to
> broadcast the ipi. This simplifies the code paths and removes a local
> variable.
>
> All three migrate to infinite busy waits and only have a spinout if
> built with MP_LOCKDEBUG. On a spinout, we enter ddb.
>
> Compiled on amd64 GENERIC, GENERIC.MP, and GENERIC.MP with
> MP_LOCKDEBUG. (This time I won't break GENERIC :)
>
> OK?
>
> -dv

Sorry for the delay. ok mlarkin. I've had this on a few machines for
the better part of a month and haven't seen any problems.

-ml

>
> Index: sys/arch/amd64/amd64/vmm.c
> ===
> RCS file: /opt/cvs/src/sys/arch/amd64/amd64/vmm.c,v
> retrieving revision 1.305
> diff -u -p -r1.305 vmm.c
> --- sys/arch/amd64/amd64/vmm.c28 Mar 2022 06:28:47 -  1.305
> +++ sys/arch/amd64/amd64/vmm.c16 Apr 2022 18:49:01 -
> @@ -43,6 +43,11 @@
>  #include 
>  #include 
>
> +#ifdef MP_LOCKDEBUG
> +#include 
> +extern int __mp_lock_spinout;
> +#endif /* MP_LOCKDEBUG */
> +
>  /* #define VMM_DEBUG */
>
>  void *l1tf_flush_region;
> @@ -1328,17 +1333,26 @@ int
>  vmm_start(void)
>  {
>   struct cpu_info *self = curcpu();
> - int ret = 0;
>  #ifdef MULTIPROCESSOR
>   struct cpu_info *ci;
>   CPU_INFO_ITERATOR cii;
> - int i;
> -#endif
> +#ifdef MP_LOCKDEBUG
> + int nticks;
> +#endif /* MP_LOCKDEBUG */
> +#endif /* MULTIPROCESSOR */
>
>   /* VMM is already running */
>   if (self->ci_flags & CPUF_VMM)
>   return (0);
>
> + /* Start VMM on this CPU */
> + start_vmm_on_cpu(self);
> + if (!(self->ci_flags & CPUF_VMM)) {
> + printf("%s: failed to enter VMM mode\n",
> + self->ci_dev->dv_xname);
> + return (EIO);
> + }
> +
>  #ifdef MULTIPROCESSOR
>   /* Broadcast start VMM IPI */
>   x86_broadcast_ipi(X86_IPI_START_VMM);
> @@ -1346,25 +1360,23 @@ vmm_start(void)
>   CPU_INFO_FOREACH(cii, ci) {
>   if (ci == self)
>   continue;
> - for (i = 10; (!(ci->ci_flags & CPUF_VMM)) && i>0;i--)
> - delay(10);
> - if (!(ci->ci_flags & CPUF_VMM)) {
> - printf("%s: failed to enter VMM mode\n",
> - ci->ci_dev->dv_xname);
> - ret = EIO;
> +#ifdef MP_LOCKDEBUG
> + nticks = __mp_lock_spinout;
> +#endif /* MP_LOCKDEBUG */
> + while (!(ci->ci_flags & CPUF_VMM)) {
> + CPU_BUSY_CYCLE();
> +#ifdef MP_LOCKDEBUG
> + if (--nticks <= 0) {
> + db_printf("%s: spun out", __func__);
> + db_enter();
> + nticks = __mp_lock_spinout;
> + }
> +#endif /* MP_LOCKDEBUG */
>   }
>   }
>  #endif /* MULTIPROCESSOR */
>
> - /* Start VMM on this CPU */
> - start_vmm_on_cpu(self);
> - if (!(self->ci_flags & CPUF_VMM)) {
> - printf("%s: failed to enter VMM mode\n",
> - self->ci_dev->dv_xname);
> - ret = EIO;
> - }
> -
> - return (ret);
> + return (0);
>  }
>
>  /*
> @@ -1376,17 +1388,26 @@ int
>  vmm_stop(void)
>  {
>   struct cpu_info *self = curcpu();
> - int ret = 0;
>  #ifdef MULTIPROCESSOR
>   struct cpu_info *ci;
>   CPU_INFO_ITERATOR cii;
> - int i;
> -#endif
> +#ifdef MP_LOCKDEBUG
> + int nticks;
> +#endif /* MP_LOCKDEBUG */
> +#endif /* MULTIPROCESSOR */
>
>   /* VMM is not running */
>   if (!(self->ci_flags & CPUF_VMM))
>   return (0);
>
> + /* Stop VMM on this CPU */
> + stop_vmm_on_cpu(self);
> + if (self->ci_flags & CPUF_VMM) {
> + printf("%s: failed to exit VMM mode\n",
> + self->ci_dev->dv_xname);
> + return (EIO);
> + }
> +
>  #ifdef MULTIPROCESSOR
>   /* Stop VMM on other CPUs */
>   x86_broadcast_ipi(X86_IPI_STOP_VMM);
> @@ -1394,25 +1415,23 @@ vmm_stop(void)
>

Re: vmm: load vmcs before reading vcpu registers

2022-05-20 Thread Mike Larkin

On Wed, May 18, 2022 at 10:27:11AM -0400, Dave Voutila wrote:
>
> ping...would like to get this in if possible so I can move onto fixing
> some things in vmm.
>

sorry. ok mlarkin

> Dave Voutila  writes:
>
> > tech@,
> >
> > Continuing my vmm/vmd bug hunt, the following diff adapts
> > vcpu_readregs_vmx to optionally load the vmcs on the current cpu. This
> > has gone unnoticed as the ioctl isn't used in typical vmd usage and the
> > usage of vcpu_readregs_vmx in the run ioctl is after the vmcs is already
> > loaded on the current cpu.
> >
> > This fixes `vmctl send` on Intel hosts. (A fix for `vmctl receive` comes
> > next.)
> >
> > Currently, `vmctl send` tries to serialize the vcpu registers as part of
> > serializing the vm state. On an MP machine, it's highly probable that
> > the vmread instructions will fail as they'll be executed on a cpu that
> > doesn't have the vmcs loaded.
> >
> > While here, I noticed the vcpu_writeregs_vmx function doesn't set the
> > vcpu's vmcs state variable to VMCS_CLEARED after running vmclear. This
> > can cause failure to vm-enter as vmm uses that state to determine which
> > of the two Intel instructions to call (vmlaunch or vmresume).
> >
> > ok?
> >
> > -dv
> >
> > Index: sys/arch/amd64/amd64/vmm.c
> > ===
> > RCS file: /opt/cvs/src/sys/arch/amd64/amd64/vmm.c,v
> > retrieving revision 1.308
> > diff -u -p -r1.308 vmm.c
> > --- sys/arch/amd64/amd64/vmm.c  4 May 2022 02:24:26 -   1.308
> > +++ sys/arch/amd64/amd64/vmm.c  8 May 2022 18:37:42 -
> > @@ -140,7 +140,7 @@ int vm_rwregs(struct vm_rwregs_params *,
> >  int vm_mprotect_ept(struct vm_mprotect_ept_params *);
> >  int vm_rwvmparams(struct vm_rwvmparams_params *, int);
> >  int vm_find(uint32_t, struct vm **);
> > -int vcpu_readregs_vmx(struct vcpu *, uint64_t, struct vcpu_reg_state *);
> > +int vcpu_readregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state 
> > *);
> >  int vcpu_readregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
> >  int vcpu_writeregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state 
> > *);
> >  int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
> > @@ -978,7 +978,7 @@ vm_rwregs(struct vm_rwregs_params *vrwp,
> > if (vmm_softc->mode == VMM_MODE_VMX ||
> > vmm_softc->mode == VMM_MODE_EPT)
> > ret = (dir == 0) ?
> > -   vcpu_readregs_vmx(vcpu, vrwp->vrwp_mask, vrs) :
> > +   vcpu_readregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs) :
> > vcpu_writeregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs);
> > else if (vmm_softc->mode == VMM_MODE_SVM ||
> > vmm_softc->mode == VMM_MODE_RVI)
> > @@ -1986,6 +1986,7 @@ vcpu_reload_vmcs_vmx(struct vcpu *vcpu)
> >   * Parameters:
> >   *  vcpu: the vcpu to read register values from
> >   *  regmask: the types of registers to read
> > + *  loadvmcs: bit to indicate whether the VMCS has to be loaded first
> >   *  vrs: output parameter where register values are stored
> >   *
> >   * Return values:
> > @@ -1993,7 +1994,7 @@ vcpu_reload_vmcs_vmx(struct vcpu *vcpu)
> >   *  EINVAL: an error reading registers occurred
> >   */
> >  int
> > -vcpu_readregs_vmx(struct vcpu *vcpu, uint64_t regmask,
> > +vcpu_readregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs,
> >  struct vcpu_reg_state *vrs)
> >  {
> > int i, ret = 0;
> > @@ -2005,6 +2006,11 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
> > struct vcpu_segment_info *sregs = vrs->vrs_sregs;
> > struct vmx_msr_store *msr_store;
> >
> > +   if (loadvmcs) {
> > +   if (vcpu_reload_vmcs_vmx(vcpu))
> > +   return (EINVAL);
> > +   }
> > +
> >  #ifdef VMM_DEBUG
> > /* VMCS should be loaded... */
> > paddr_t pa = 0ULL;
> > @@ -2393,6 +2399,7 @@ out:
> > if (loadvmcs) {
> > if (vmclear(>vc_control_pa))
> > ret = EINVAL;
> > +   atomic_swap_uint(>vc_vmx_vmcs_state, VMCS_CLEARED);
> > }
> > return (ret);
> >  }
> > @@ -4631,7 +4638,7 @@ vmm_translate_gva(struct vcpu *vcpu, uin
> >
> > if (vmm_softc->mode == VMM_MODE_EPT ||
> > vmm_softc->mode == VMM_MODE_VMX) {
> > -   if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, ))
> > +   if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, 1, ))
> > return (EINVAL);
> > } else if (vmm_softc->mode == VMM_MODE_RVI ||
> > vmm_softc->mode == VMM_MODE_SVM) {
> > @@ -5111,7 +5118,7 @@ vcpu_run_vmx(struct vcpu *vcpu, struct v
> > vcpu->vc_last_pcpu = curcpu();
> >
> > /* Copy the VCPU register state to the exit structure */
> > -   if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, >vc_exit.vrs))
> > +   if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL, 0, >vc_exit.vrs))
> > ret = EINVAL;
> > vcpu->vc_exit.cpl = vmm_get_guest_cpu_cpl(vcpu);
>
>
> --
> -Dave Voutila
>

Re: Picky, but much more efficient arc4random_uniform!

2022-05-15 Thread Mike Larkin

On Sun, May 15, 2022 at 08:40:19PM -0500, Luke Small wrote:
> https://marc.info/?l=openbsd-tech=165259528425835=2
>
> This one (which is strongly based upon my first of two versions) which I
> submitted after Guenther correctly trashed version 2, doesn’t reuse any
> part of the sample. It picks up a clean new bitfield upon failure.
>
> I think Guenther didn’t, perhaps like yourself, realize I submitted this
> later program. That’s why he said it wasn’t correct. It didn’t occur to me
> at the time of responding to him: “correct correct correct.”
>

You've had several developers tell you this is not going to go in. I'd suggest
"read the room".

If you want this for your own use, just keep it as a local diff. Nobody will
know (or likely care).

-ml

> On Sun, May 15, 2022 at 7:47 PM Damien Miller  wrote:
>
> > On Sat, 14 May 2022, Luke Small wrote:
> >
> > > Look at my code. I don’t even use a modulus operator. I perform hit and
> > > miss with a random bitstream.
> > >
> > > How can I have a bias of something I don’t do? I return a bitstream which
> > > meets the parameters of being a value less than the upper bound. Much
> > like
> > > arc4random_buf().
> > >
> > > If I use arc4random_uniform() repeatedly to create a random distribution
> > of
> > > say numbers less than 0x1000 or even something weird like 0x1300 will the
> > > random distribution be better with arc4random_uniform() or with mine? For
> > > 0x1000 mine will simply pluck 12 bits of random data straight from the
> > > arc4random() (and preserve the remaining 20 bits for later) on the first
> > > try, just like it’s arc4random_buf().
> > >
> > > arc4random_uniform() will perform a modulus of a 32 bit number which adds
> > > data to the bitstream. Does it make it better? Perhaps it makes it harder
> > > to guess the source bits.
> > >
> > > I don’t know; and I’m not going to pretend to be a cryptologist. But I’m
> > > looking at modulo bias.
> > >
> > > I didn’t know what it was, before, but I basically “rejection sample”:
> > >
> > >
> > https://research.kudelskisecurity.com/2020/07/28/the-definitive-guide-to-modulo-bias-and-how-to-avoid-it/
> >
> > No, you aren't:
> >
> > > for (;;) {
> > > if (rand_bits < bits) {
> > > rand_holder |= ((uint64_t)arc4random()) <<
> > > rand_bits;
> > >
> > > /*
> > >  * rand_bits will be a number between 0 and 31
> > here
> > >  * so the 0x20 bit will be empty
> > >  * rand_bits += 32;
> > >  */
> > > rand_bits |= 32;
> > > }
> > >
> > > ret = rand_holder & uuu;
> > > rand_holder >>= bits;
> > > rand_bits -= bits;
> > >
> > > if (ret < upper_bound)
> > > return ret;
> > > }
> >
> > This isn't rejection sampling. This is reusing part of the rejected
> > sample.
> >
> > Think of it like this: you want to uniformly generate a number in the
> > range [2:10] by rolling 2x 6-sided dice. What do you do when you roll
> > 11 or 12? You can't just reroll one of the dice because the other dice
> > is constrained to be have rolled either 5 or 6, and so proceeding with
> > it would force the output to be in the range [6:11] for these ~5.6%
> > of initial rolls. Your output is no longer uniform.
> >
> > BTW the existing code already implements the prefered approach of the
> > article you quoted.
> >
> > -d
>
> --
> -Luke

Re: vmm: give a lonely enum a friend, fixing `vmctl receive`

2022-05-13 Thread Mike Larkin

On Sun, May 08, 2022 at 10:30:46PM -0400, Dave Voutila wrote:
> tech@,
>
> Another vmm/vmd update: fix `vmctl receive` on Intel hosts by adding
> another fault enum value to disambiguate fault reasons.
>
> It's expected that the guest will trigger nested page faults after being
> received by vmd. When you connect to the vm using `vmctl console` and
> interact with the guest, it generates both a page fault and interrupt.
>
> This combo is special because while the page fault will be handled by
> vmm via uvm_fault(9), it will still exit to userland/vmd to handle the
> interrupt.
>
> vmd always checks the vm-exit reason after the return from vmm before
> looping around and servicing interrupts before re-entering vmm. vmd has
> a single userland handler for nested page faults for when we have a
> protection fault. In this case, it reboots the vm. :-(
>
> Since the enum we used for the fault type flag has only one value, vmm
> isn't able to properly convey the type of nested fault. In this case, I
> chose to add a "VEE_FAULT_HANDLED" value to indicate the fault has
> already been handled by vmm and no userland assist is needed. (And
> HANDLED is the same num of characters of PROTECT.)
>
> This prevents the ambiguity and vm happily skips rebooting the vm.
>
> It's possible this reboot could occur at any point in a vm's lifetime,
> though I think the probability is low, so this is worth fixing
> regardless.
>
> ok?
>

This is ok mlarkin. Thanks!

> -dv
>
>
> Index: sys/arch/amd64/amd64/vmm.c
> ===
> RCS file: /opt/cvs/src/sys/arch/amd64/amd64/vmm.c,v
> retrieving revision 1.308
> diff -u -p -r1.308 vmm.c
> --- sys/arch/amd64/amd64/vmm.c4 May 2022 02:24:26 -   1.308
> +++ sys/arch/amd64/amd64/vmm.c9 May 2022 13:45:02 -
> @@ -5732,14 +5732,16 @@ vmx_fault_page(struct vcpu *vcpu, paddr_
>   int fault_type, ret;
>
>   fault_type = vmx_get_guest_faulttype();
> - if (fault_type == -1) {
> + switch (fault_type) {
> + case -1:
>   printf("%s: invalid fault type\n", __func__);
>   return (EINVAL);
> - }
> -
> - if (fault_type == VM_FAULT_PROTECT) {
> + case VM_FAULT_PROTECT:
>   vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT;
>   return (EAGAIN);
> + default:
> + vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_HANDLED;
> + break;
>   }
>
>   /* We may sleep during uvm_fault(9), so reload VMCS. */
> Index: sys/arch/amd64/include/vmmvar.h
> ===
> RCS file: /opt/cvs/src/sys/arch/amd64/include/vmmvar.h,v
> retrieving revision 1.75
> diff -u -p -r1.75 vmmvar.h
> --- sys/arch/amd64/include/vmmvar.h   3 May 2022 21:39:19 -   1.75
> +++ sys/arch/amd64/include/vmmvar.h   9 May 2022 13:38:18 -
> @@ -324,7 +324,8 @@ enum {
>  };
>
>  enum {
> - VEE_FAULT_PROTECT
> + VEE_FAULT_HANDLED,
> + VEE_FAULT_PROTECT,
>  };
>
>  enum {
>

Re: vmd: fix rebooting a received vm

2022-05-07 Thread Mike Larkin

On Sat, May 07, 2022 at 07:58:15AM -0400, Dave Voutila wrote:
> tech@:
>
> Now that vmd only accounts for memory in bytes [1], this fix is a lot
> simpler!
>
> If you use the send/receive functionality and "receive" a sent vm, it
> functions as expected. However, if that vm tries to reboot, it causes
> vmd to exit. (An ipc socket is closed in some error handling and
> triggers a code path ending vmd's event loop.)
>
> The problem was two-fold (and describing it is probably longer than the
> diff itself):
>
> 1. Not un-toggling the VM_RECEIVE_STATE bit on the vm after initial
>launch, triggering "received vm" code paths upon vm reboot.
>
>vmd's "parent" and "vmm" processes *both* track known vm's. The "vmm"
>process removes the vm from its list upon a loss of the child process
>(vm reboot), but the "parent" process keeps it in the tailq and
>reuses it, knowing the vm just requires a restart. (It has to resend
>the vm to the "vmm" process, which sees it as a "new" vm, creating a
>new child process.)
>
> 2. A "received vm" comes with pre-defined memory ranges created when it
>initially booted and these are restored before the vm is resumed. The
>problem is vmd overloads the use of these memory ranges, setting the
>number of ranges to 0 and using the first range's size as a way to
>communicate "max memory" for the vm. Since a clean reboot of a vm
>results in the "parent" process triggering the "vm start" paths, it
>assumes it can use that logic to determine the max memory.
>
>Depending on if you only fix (1) above, the vm results in either
>using the default vm memory (512MB) _or_ the size of the first
>range...which is always 640KB.
>
>Contrary to popular belief, 640KB is not enough for everyone,
>especially our vm.
>
> The diff below resolves (1) in vmd.c's vm_stop() and (2) in config.c's
> config_setvm().
>
> The fact this issue has been present for awhile indicates few people use
> or care about the send/receive functionality. I want to keep the
> functionality in place for awhile longer because I've begun to
> experiment with it *and* it's helping me find other bugs in vmd(8) as
> well as vmm(4). (Expect a vmm diff shortly.)
>
> For anyone looking to test [2], the simplest approach is to create a vm
> without a disk just boot the bsd.rd ramdisk while using a memory value
> that's *not* the default 512m:
>
>   # vmctl start -c -b /bsd.rd -m 1g test
>
> Wait for it to give you the installer prompt and then send it to a file:
>
>   # vmctl send test > test.vm
>
> You should have a 1g test.vm file. Restore it:
>
>   # vmctl receive test < test.vm
>
> Connect to the console and reboot:
>
>   # vmctl console test
>   (in vm)# reboot
>
> With the diff: the vm reboots and you end up back at the installer
> prompt. `vmctl stat` shows the correct 1g max mem. Reboot at least one
> more time and confirm the same result.
>
> Without the diff: the vmd parent process will exit taking its children
> with it.
>
> ok?
>

reads ok to me, thanks for the explanation. ok mlarkin

> -dv
>
> [1] https://marc.info/?l=openbsd-tech=165151507323339=2
>
> [2] note that the vmm issue I found means this will work reliably on AMD
> hosts, but may not on Intel hosts. fix coming soon.
>
> diff refs/heads/master refs/heads/vmd-memrange
> blob - 2750be4f580896325e5a3971667c64d61231db06
> blob + cf076cdc27ceaee6e2cbb9cce5825452f0a6
> --- usr.sbin/vmd/config.c
> +++ usr.sbin/vmd/config.c
> @@ -231,6 +231,7 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, ui
>   unsigned int unit;
>   struct timeval   tv, rate, since_last;
>   struct vmop_addr_req var;
> + size_t   bytes = 0;
>
>   if (vm->vm_state & VM_STATE_RUNNING) {
>   log_warnx("%s: vm is already running", __func__);
> @@ -518,6 +519,14 @@ config_setvm(struct privsep *ps, struct vmd_vm *vm, ui
>
>   free(tapfds);
>
> + /* Collapse any memranges after the vm was sent to PROC_VMM */
> + if (vcp->vcp_nmemranges > 0) {
> + for (i = 0; i < vcp->vcp_nmemranges; i++)
> + bytes += vcp->vcp_memranges[i].vmr_size;
> + memset(>vcp_memranges, 0, sizeof(vcp->vcp_memranges));
> + vcp->vcp_nmemranges = 0;
> + vcp->vcp_memranges[0].vmr_size = bytes;
> + }
>   vm->vm_state |= VM_STATE_RUNNING;
>   return (0);
>
> blob - 4d7e7b5e613723c2166077523dd6e8b9177d6718
> blob + d5d841fd20d9f82e852e3b844ec81d9383713923
> --- usr.sbin/vmd/vmd.c
> +++ usr.sbin/vmd/vmd.c
> @@ -1162,7 +1162,8 @@ vm_stop(struct vmd_vm *vm, int keeptty, const char *ca
>   __func__, ps->ps_title[privsep_process], caller,
>   vm->vm_vmid, keeptty ? ", keeping tty open" : "");
>
> - vm->vm_state &= ~(VM_STATE_RUNNING | VM_STATE_SHUTDOWN);
> + vm->vm_state &= ~(VM_STATE_RECEIVED | VM_STATE_RUNNING
> + | VM_STATE_SHUTDOWN);
>
>

Re: aml parse error

2022-05-03 Thread Mike Larkin

On Tue, May 03, 2022 at 04:46:55PM +0200, aphekz wrote:
> On Mon, May 02, 2022 at 07:05:24PM -0700, Mike Larkin wrote:
> > On Mon, May 02, 2022 at 11:42:57PM +0200, aphekz wrote:
> > >
> > > May  2 21:29:06 dev /bsd: ### AML PARSE ERROR (0x8f3a): Undefined name: 
> > > OPST
> > > May  2 21:29:06 dev /bsd: error evaluating: \\_SB_.PCI0.LPCB.EC0_._Q14
> > > May  2 21:29:55 dev /bsd: ### AML PARSE ERROR (0x8f3a): Undefined name: 
> > > OPST
> > > May  2 21:29:55 dev /bsd: error evaluating: \\_SB_.PCI0.LPCB.EC0_._Q14
> > > May  2 21:30:00 dev /bsd: ### AML PARSE ERROR (0x8f3a): Undefined name: 
> > > OPST
> > > May  2 21:30:00 dev /bsd: error evaluating: \\_SB_.PCI0.LPCB.EC0_._Q14
> > > May  2 21:30:03 dev /bsd: ### AML PARSE ERROR (0x8f3a): Undefined name: 
> > > OPST
> > > May  2 21:30:03 dev /bsd: error evaluating: \\_SB_.PCI0.LPCB.EC0_._Q14
> > > May  2 21:30:28 dev /bsd: ### AML PARSE ERROR (0x8f3a): Undefined name: 
> > > OPST
> > > May  2 21:30:28 dev /bsd: error evaluating: \\_SB_.PCI0.LPCB.EC0_._Q14
> > > May  2 21:31:40 dev /bsd: ### AML PARSE ERROR (0x8f3a): Undefined name: 
> > > OPST
> > > May  2 21:31:40 dev /bsd: error evaluating: \\_SB_.PCI0.LPCB.EC0_._Q14
> > >
> > > any hint what is going on?  some acpi/temp related stuff?
> > >
> >
> > likely bad AML. BIOS on this machine is over 10 years old, I'd look for a 
> > newer
> > one.
> >
> > -ml
>
> old machine with no newest BIOS avaliable.
>
> i haven't noticed such problem on 7.0-stable, so i'd like to at least

I'd start bisecting diffs then. Shouldn't take too long to find the commit
that broke it.

-ml

> undestand what the problem is or might be. would be great to fix it too.
>
> as for now acpidump/iasl says.
>
> External (OPST, UnknownObj)
>
> Method (_Q14, 0, NotSerialized)  // _Qxx: EC Query, xx=0x00-0xFF
> {
> P80H = 0x14
> If ((Zero == OPST))
> {
> OPST = One
> }
> Else
> {
> OPST = Zero
> }
>
> Notify (^^^PEG0.PEGP, 0xDF) // Hardware-Specific
> }
>
> isn't it some acpitz / inteldrm related method?
>
>
> >
> > > --
> > >
> > > OpenBSD 7.1-stable (GENERIC.MP) #0: Mon May  2 20:31:55 CEST 2022
> > > aph...@dev.kroczynski.net:/usr/src/sys/arch/amd64/compile/GENERIC.MP
> > > real mem = 8483532800 (8090MB)
> > > avail mem = 8209141760 (7828MB)
> > > random: good seed from bootblocks
> > > mpath0 at root
> > > scsibus0 at mpath0: 256 targets
> > > mainbus0 at root
> > > bios0 at mainbus0: SMBIOS rev. 2.6 @ 0xf9e10 (66 entries)
> > > bios0: vendor LENOVO version "44CN45WW" date 02/16/2012
> > > bios0: LENOVO HuronRiver Platform
> > > acpi0 at bios0: ACPI 3.0
> > > acpi0: sleep states S0 S1 S3 S4 S5
> > > acpi0: tables DSDT FACP SLIC SSDT ASF! HPET APIC MCFG SSDT SSDT UEFI UEFI 
> > > UEFI
> > > acpi0: wakeup devices P0P1(S4) GLAN(S4) EHC1(S3) EHC2(S3) HDEF(S4) 
> > > RP01(S4) PXSX(S4) RP02(S4) PXSX(S4) RP03(S4) PXSX(S4) RP04(S4) PXSX(S4) 
> > > RP05(S4) PXSX(S4) RP06(S4) [...]
> > > acpitimer0 at acpi0: 3579545 Hz, 24 bits
> > > acpihpet0 at acpi0: 14318179 Hz
> > > acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
> > > cpu0 at mainbus0: apid 0 (boot processor)
> > > cpu0: Intel(R) Celeron(R) CPU B830 @ 1.80GHz, 1796.21 MHz, 06-2a-07
> > > cpu0: 
> > > FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,XSAVE,NXE,RDTSCP,LONG,LAHF,PERF,ITSC,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,MELTDOWN
> > > cpu0: 256KB 64b/line 8-way L2 cache
> > > cpu0: smt 0, core 0, package 0
> > > mtrr: Pentium Pro MTRR support, 10 var ranges, 88 fixed ranges
> > > cpu0: apic clock running at 99MHz
> > > cpu0: mwait min=64, max=64, C-substates=0.2.1, IBE
> > > cpu1 at mainbus0: apid 2 (application processor)
> > > cpu1: Intel(R) Celeron(R) CPU B830 @ 1.80GHz, 1795.94 MHz, 06-2a-07
> > > cpu1: 
> > > FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,XSAVE,NXE,RDTSCP,LONG,LAHF,PERF,ITSC,MD_CLEAR,IBRS,IB

Re: aml parse error

2022-05-02 Thread Mike Larkin

On Mon, May 02, 2022 at 11:42:57PM +0200, aphekz wrote:
>
> May  2 21:29:06 dev /bsd: ### AML PARSE ERROR (0x8f3a): Undefined name: OPST
> May  2 21:29:06 dev /bsd: error evaluating: \\_SB_.PCI0.LPCB.EC0_._Q14
> May  2 21:29:55 dev /bsd: ### AML PARSE ERROR (0x8f3a): Undefined name: OPST
> May  2 21:29:55 dev /bsd: error evaluating: \\_SB_.PCI0.LPCB.EC0_._Q14
> May  2 21:30:00 dev /bsd: ### AML PARSE ERROR (0x8f3a): Undefined name: OPST
> May  2 21:30:00 dev /bsd: error evaluating: \\_SB_.PCI0.LPCB.EC0_._Q14
> May  2 21:30:03 dev /bsd: ### AML PARSE ERROR (0x8f3a): Undefined name: OPST
> May  2 21:30:03 dev /bsd: error evaluating: \\_SB_.PCI0.LPCB.EC0_._Q14
> May  2 21:30:28 dev /bsd: ### AML PARSE ERROR (0x8f3a): Undefined name: OPST
> May  2 21:30:28 dev /bsd: error evaluating: \\_SB_.PCI0.LPCB.EC0_._Q14
> May  2 21:31:40 dev /bsd: ### AML PARSE ERROR (0x8f3a): Undefined name: OPST
> May  2 21:31:40 dev /bsd: error evaluating: \\_SB_.PCI0.LPCB.EC0_._Q14
>
> any hint what is going on?  some acpi/temp related stuff?
>

likely bad AML. BIOS on this machine is over 10 years old, I'd look for a newer
one.

-ml

> --
>
> OpenBSD 7.1-stable (GENERIC.MP) #0: Mon May  2 20:31:55 CEST 2022
> aph...@dev.kroczynski.net:/usr/src/sys/arch/amd64/compile/GENERIC.MP
> real mem = 8483532800 (8090MB)
> avail mem = 8209141760 (7828MB)
> random: good seed from bootblocks
> mpath0 at root
> scsibus0 at mpath0: 256 targets
> mainbus0 at root
> bios0 at mainbus0: SMBIOS rev. 2.6 @ 0xf9e10 (66 entries)
> bios0: vendor LENOVO version "44CN45WW" date 02/16/2012
> bios0: LENOVO HuronRiver Platform
> acpi0 at bios0: ACPI 3.0
> acpi0: sleep states S0 S1 S3 S4 S5
> acpi0: tables DSDT FACP SLIC SSDT ASF! HPET APIC MCFG SSDT SSDT UEFI UEFI UEFI
> acpi0: wakeup devices P0P1(S4) GLAN(S4) EHC1(S3) EHC2(S3) HDEF(S4) RP01(S4) 
> PXSX(S4) RP02(S4) PXSX(S4) RP03(S4) PXSX(S4) RP04(S4) PXSX(S4) RP05(S4) 
> PXSX(S4) RP06(S4) [...]
> acpitimer0 at acpi0: 3579545 Hz, 24 bits
> acpihpet0 at acpi0: 14318179 Hz
> acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
> cpu0 at mainbus0: apid 0 (boot processor)
> cpu0: Intel(R) Celeron(R) CPU B830 @ 1.80GHz, 1796.21 MHz, 06-2a-07
> cpu0: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,XSAVE,NXE,RDTSCP,LONG,LAHF,PERF,ITSC,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,MELTDOWN
> cpu0: 256KB 64b/line 8-way L2 cache
> cpu0: smt 0, core 0, package 0
> mtrr: Pentium Pro MTRR support, 10 var ranges, 88 fixed ranges
> cpu0: apic clock running at 99MHz
> cpu0: mwait min=64, max=64, C-substates=0.2.1, IBE
> cpu1 at mainbus0: apid 2 (application processor)
> cpu1: Intel(R) Celeron(R) CPU B830 @ 1.80GHz, 1795.94 MHz, 06-2a-07
> cpu1: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,POPCNT,DEADLINE,XSAVE,NXE,RDTSCP,LONG,LAHF,PERF,ITSC,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,MELTDOWN
> cpu1: 256KB 64b/line 8-way L2 cache
> cpu1: smt 0, core 1, package 0
> ioapic0 at mainbus0: apid 2 pa 0xfec0, version 20, 24 pins
> acpimcfg0 at acpi0
> acpimcfg0: addr 0xf800, bus 0-63
> acpiprt0 at acpi0: bus 0 (PCI0)
> acpiprt1 at acpi0: bus -1 (P0P1)
> acpiprt2 at acpi0: bus 1 (RP01)
> acpiprt3 at acpi0: bus -1 (RP02)
> acpiprt4 at acpi0: bus -1 (RP03)
> acpiprt5 at acpi0: bus 2 (RP04)
> acpiprt6 at acpi0: bus -1 (RP05)
> acpiprt7 at acpi0: bus -1 (RP06)
> acpiprt8 at acpi0: bus -1 (RP07)
> acpiprt9 at acpi0: bus -1 (RP08)
> acpiprt10 at acpi0: bus -1 (PEG0)
> acpiprt11 at acpi0: bus -1 (PEG1)
> acpiprt12 at acpi0: bus -1 (PEG2)
> acpiprt13 at acpi0: bus -1 (PEG3)
> acpiec0 at acpi0
> acpipci0 at acpi0 PCI0: 0x0004 0x0011 0x0001
> acpicmos0 at acpi0
> acpiac0 at acpi0: AC unit online
> acpibat0 at acpi0: BAT0 model "L08L6Y02" serial 44863 type LION oem 
> "4f594e4153"
> "VPC2004" at acpi0 not configured
> "ETD0604" at acpi0 not configured
> acpibtn0 at acpi0: LID0
> acpibtn1 at acpi0: SLPB
> "PNP0C14" at acpi0 not configured
> acpicpu0 at acpi0: C2(500@80 io@0x414), C1(1000@1 halt), PSS
> acpicpu1 at acpi0: C2(500@80 io@0x414), C1(1000@1 halt), PSS
> acpitz0 at acpi0: critical temperature is 98 degC
> acpitz1 at acpi0: critical temperature is 126 degC
> acpivideo0 at acpi0: PEGP
> acpivideo1 at acpi0: GFX0
> acpivout0 at acpivideo1: DD02
> cpu0: using VERW MDS workaround (except on vmm entry)
> cpu0: Enhanced SpeedStep 1796 MHz: speeds: 1800, 1700, 1600, 1500, 1400, 
> 1300, 1200, 1100, 1000, 900, 800 MHz
> pci0 at mainbus0 bus 0
> pchb0 at pci0 dev 0 function 0 "Intel Core 2G Host" rev 0x09
> inteldrm0 at pci0 dev 2 function 0 "Intel HD Graphics 2000" rev 0x09
> drm0 at inteldrm0
> inteldrm0: msi, SANDYBRIDGE, gen 6
> "Intel 6 Series MEI" rev 0x04

Re: migrate vmd/vmm/vmctl to use bytes, not MBs

2022-05-02 Thread Mike Larkin

On Mon, May 02, 2022 at 04:09:19PM -0400, Dave Voutila wrote:
>
> Dave Voutila  writes:
>
> > tech@,
> >
> > tl;dr: standardize vmd/vmm/vmctl on counting memory in bytes at all
> > times instead of a mix of MiB and bytes.
> >
> > There's some design friction between vmd(8)/vmctl(8) and vmm(4).
> >
> > For instance, the user-facing code deals in MiB, but internally a vm's
> > memory ranges are defined in terms of bytes...but only after being
> > converted at vm launch.
> >
> > Consequently, at different points in vmd's lifecycle, the same struct
> > member for storing a vm's requested memory size contains a value in
> > bytes OR in MiB meaning any code accessing the value needs to be
> > contextually aware of if/when the value must be scaled.
> >
> > Given we dropped vmm(4) on i386 awhile ago, let's make use of 64-bit
> > values! Plus this helps my other queued up changes simpler as they can
> > avoid confusing scaling at points.
> >
> > There *is* some existing code duplication between vmd/vmctl related to
> > parsing user provided memory values via scan_scaled(3), but I'm not
> > looking to consolidate that now.
> >
> > If you're going to test, you'll need to build the kernel and either copy
> > or link the patched vmmvar.h into /usr/include/machine/ before building
> > vmd(8)/vmctl(8). (Don't forget to actually boot the kernel.)
> >
> > Otherwise, looking for ok's so I can continue squashing a few bugs in
> > vmd that will be easier/cleaner to fix once this goes in.
> >
> > While the diff looks long-ish, it shouldn't require deep vmm/vmd
> > knowledge to help review ;)
> >
>
> Updated with a fix (printing wrong limit value) and a tweak (checking a
> size_t == 0 vs < 1). No functional changes so if by chance you already
> applied the previous, please feel free to continue to test.
>
> -dv
>

Thanks. ok mlarkin@

-ml

>
> diff refs/heads/master refs/heads/vmd-bytes
> blob - 765fc19bca559dbfd83cd14c48dee94f86c4b3cc
> blob + 699798c1bbffafe7074fea43755ef7e20f073a90
> --- sys/arch/amd64/amd64/vmm.c
> +++ sys/arch/amd64/amd64/vmm.c
> @@ -1575,7 +1575,7 @@ vm_create_check_mem_ranges(struct vm_create_params *vc
>  {
>   size_t i, memsize = 0;
>   struct vm_mem_range *vmr, *pvmr;
> - const paddr_t maxgpa = (uint64_t)VMM_MAX_VM_MEM_SIZE * 1024 * 1024;
> + const paddr_t maxgpa = VMM_MAX_VM_MEM_SIZE;
>
>   if (vcp->vcp_nmemranges == 0 ||
>   vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES)
> blob - 94bb172832d4c2847b1e83ebb9cc05538db6ac80
> blob + 012a023943b9fbc70339166889070ff0b4619046
> --- sys/arch/amd64/include/vmmvar.h
> +++ sys/arch/amd64/include/vmmvar.h
> @@ -31,7 +31,7 @@
>  #define VMM_MAX_KERNEL_PATH  128
>  #define VMM_MAX_VCPUS512
>  #define VMM_MAX_VCPUS_PER_VM 64
> -#define VMM_MAX_VM_MEM_SIZE  32768
> +#define VMM_MAX_VM_MEM_SIZE  32L * 1024 * 1024 * 1024/* 32 GiB */
>  #define VMM_MAX_NICS_PER_VM  4
>
>  #define VMM_PCI_MMIO_BAR_BASE0xF000ULL
> blob - 0f7e4329a00d54a64fe41e1fb2bd2afcbaa9d68a
> blob + c54aebcb982fdc14cc7a02910301d561e6623e4d
> --- usr.sbin/vmctl/main.c
> +++ usr.sbin/vmctl/main.c
> @@ -404,24 +404,39 @@ parse_network(struct parse_result *res, char *word)
>  int
>  parse_size(struct parse_result *res, char *word)
>  {
> - long long val = 0;
> + char result[FMT_SCALED_STRSIZE];
> + long longval = 0;
>
>   if (word != NULL) {
>   if (scan_scaled(word, ) != 0) {
> - warn("invalid size: %s", word);
> + warn("invalid memory size: %s", word);
>   return (-1);
>   }
>   }
>
>   if (val < (1024 * 1024)) {
> - warnx("size must be at least one megabyte");
> + warnx("memory size must be at least 1M");
>   return (-1);
> - } else
> - res->size = val / 1024 / 1024;
> + }
>
> - if ((res->size * 1024 * 1024) != val)
> - warnx("size rounded to %lld megabytes", res->size);
> + if (val > VMM_MAX_VM_MEM_SIZE) {
> + if (fmt_scaled(VMM_MAX_VM_MEM_SIZE, result) == 0)
> + warnx("memory size too large (limit is %s)", result);
> + else
> + warnx("memory size too large");
> + return (-1);
> + }
>
> + /* Round down to the megabyte. */
> + res->size = (val / (1024 * 1024)) * (1024 * 1024);
> +
> + if (res->size != (size_t)val) {
> + if (fmt_scaled(res->size, result) == 0)
> + warnx("memory size rounded to %s", result);
> + else
> + warnx("memory size rounded to %zu bytes", res->size);
> + }
> +
>   return (0);
>  }
>
> blob - 4c0b62fc6e16adbeb5cf951dcafbaebdbc356da8
> blob + 15e6dd89ec15fa2501dcf6539c9ae9d90879ba56
> --- usr.sbin/vmctl/vmctl.c
> +++ usr.sbin/vmctl/vmctl.c
> @@ -73,7 +73,7 @@ struct imsgbuf *ibuf;
>   *  ENOMEM if a memory allocation failure occurred.
>   */
>  int
>

Re: DPTF sensors driver

2022-04-25 Thread Mike Larkin

On Sun, Apr 24, 2022 at 08:00:50PM -0500, joshua stein wrote:
> Any interest in this?
>
> acpidptfs0 at acpi0: SEN2, sensor "Sensor 2 USB2"
> acpidptfs1 at acpi0: SEN4, sensor "Sensor 4 Ambience"
> acpidptfs2 at acpi0: SEN1, sensor "Thermistor CPU SOC"
> acpidptfs3 at acpi0: SEN3, sensor "Sensor 3 SSD"
> acpidptfs4 at acpi0: SEN5, sensor "Thermistor USB Type-C"
>
> hw.sensors.acpidptfs0.temp0=32.05 degC (Sensor 2 USB2)
> hw.sensors.acpidptfs1.temp0=26.05 degC (Sensor 4 Ambience)
> hw.sensors.acpidptfs2.temp0=35.05 degC (Thermistor CPU SOC)
> hw.sensors.acpidptfs3.temp0=35.05 degC (Sensor 3 SSD)
> hw.sensors.acpidptfs4.temp0=29.05 degC (Thermistor USB Type-C)
>

I like it, one question below.

otherwise ok mlarkin

-ml

>
>
> commit 959656ab8227367705adc45d73f5b6d47d552ac3
> Author: joshua stein 
> Date:   Mon Aug 9 12:45:15 2021 -0500
>
> acpidptfs: Add a driver for Dynamic Platform and Thermal Framework sensors
>
> diff --git sys/arch/amd64/conf/GENERIC sys/arch/amd64/conf/GENERIC
> index ecbf4d82305..3fc30b1e941 100644
> --- sys/arch/amd64/conf/GENERIC
> +++ sys/arch/amd64/conf/GENERIC
> @@ -85,6 +85,7 @@ acpihid*at acpi?
>  ipmi0at acpi? disable
>  ccpmic*  at iic?
>  tipmic*  at iic?
> +acpidptfs*   at acpi?
>
>  mpbios0  at bios0
>
> diff --git sys/dev/acpi/acpidptfs.c sys/dev/acpi/acpidptfs.c
> new file mode 100644
> index 000..c863c8d1f97
> --- /dev/null
> +++ sys/dev/acpi/acpidptfs.c
> @@ -0,0 +1,173 @@
> +/* $OpenBSD$ */
> +/*
> + * Copyright (c) 2021 joshua stein 
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +
> +struct acpidptfs_softc {
> + struct device   sc_dev;
> +
> + struct acpi_softc   *sc_acpi;
> + struct aml_node *sc_devnode;
> +
> + int sc_devtype;
> +
> + struct ksensor  sc_sensor;
> + struct ksensordev   sc_sensdev;
> +};
> +
> +#define ACPIDPTFS_TYPE_SENSOR0x03
> +#define ACPIDPTFS_TYPE_CHARGER   0x0B
> +#define ACPIDPTFS_TYPE_BATTERY   0x0C
> +
> +int  acpidptfs_match(struct device *, void *, void *);
> +void acpidptfs_attach(struct device *, struct device *, void *);
> +void acpidptfs_sensor_add(struct acpidptfs_softc *);
> +int  acpidptfs_notify(struct aml_node *, int, void *);
> +void acpidptfs_update(struct acpidptfs_softc *);
> +
> +struct cfattach acpidptfs_ca = {
> + sizeof(struct acpidptfs_softc),
> + acpidptfs_match,
> + acpidptfs_attach,
> + NULL,
> +};
> +
> +struct cfdriver acpidptfs_cd = {
> + NULL, "acpidptfs", DV_DULL
> +};
> +
> +const char *acpidptfs_hids[] = {
> + "INT3403",
> + "INTC1043",
> + "INTC1046",
> + NULL
> +};
> +
> +int
> +acpidptfs_match(struct device *parent, void *match, void *aux)
> +{
> + struct acpi_attach_args *aaa = aux;
> + struct cfdata *cf = match;
> +
> + return acpi_matchhids(aaa, acpidptfs_hids, cf->cf_driver->cd_name);
> +}
> +
> +void
> +acpidptfs_attach(struct device *parent, struct device *self, void *aux)
> +{
> + struct acpidptfs_softc *sc = (struct acpidptfs_softc *)self;
> + struct acpi_attach_args *aa = aux;
> + int64_t res;
> +
> + sc->sc_acpi = (struct acpi_softc *)parent;
> + sc->sc_devnode = aa->aaa_node;
> + sc->sc_devtype = -1;
> +
> + printf(": %s", sc->sc_devnode->name);
> +
> + if (aml_evalinteger((struct acpi_softc *)parent, aa->aaa_node,
> + "_TMP", 0, NULL, ) == 0)
> + sc->sc_devtype = ACPIDPTFS_TYPE_SENSOR;
> + else if (aml_evalinteger((struct acpi_softc *)parent, aa->aaa_node,
> + "PTYP", 0, NULL, ) == 0)
> + sc->sc_devtype = res;
> +
> + switch (sc->sc_devtype) {
> + case ACPIDPTFS_TYPE_SENSOR:
> + acpidptfs_sensor_add(sc);
> + break;
> + case ACPIDPTFS_TYPE_CHARGER:
> + /* TODO */
> + printf(", charger\n");
> + break;
> + case ACPIDPTFS_TYPE_BATTERY:
> + /* TODO */
> + printf(", battery\n");
> + break;
> + default:
> + printf(", unknown type\n");
> +

Re: amdgpio(4) : preserve pin configuration on resume

2022-04-20 Thread Mike Larkin

On Wed, Apr 20, 2022 at 11:39:00AM +0200, Mark Kettenis wrote:
> > Date: Tue, 19 Apr 2022 22:02:00 -0700
> > From: Mike Larkin 
> >
> > On at least the Asus ROG Zephyrus 14 (2020), the trackpad fails to generate
> > any interrupts after resume. I tracked this down to amdgpio(4) not 
> > generating
> > interrupts after resume, and started looking at missing soft state.
> >
> > This diff preserves the interrupt pin configurations and restores them after
> > resume. This makes the device function properly post-zzz and post-ZZZ.
>
> I think it might make sense to structure this a bit more like
> pchgpio(4).  There we only restore the configuration for pins that are
> "in use" by OpenBSD.
>
> > Note: amdgpio_read_pin does not return the value that was previously written
> > during amdgpio_intr_establish (it always just returns 0x1 if the pin is
> > in use), so I'm just saving the actual value we write during
> > amdgpio_intr_establish and restoring that during resume.
>
> Well, using amdgpio_read_pin() for the purpose of saving the pin
> configuration doesn't make sense.  That function returns the pin input
> state.
>
> What you need to do is to read the register using bus_space_read_4()
> and restore that value.  Again take a look at pchgpio(4).
>
> > Note 2: In xxx_activate() functions, we usually call 
> > config_activate_children
> > but since amdgpio doesn't have any children, I left that out.
>
> I think that's fine.  But you should do the save/restore in
> DVACT_SUSPEND/DVACT_RESUME.  You want to restore the state as early as
> possible such that you don't get spurious interrupts when the BIOS
> leaves GPIO pins misconfigured.  Again, look at pchgpio(4).
>

Will take a look, thanks!

-ml

> >
> > ok?
> >
> > -ml
> >
> >
> > diff a82721d2c9ea32a8f6043a3e06b2a7f8280ef68b /export/bin/src/OpenBSD/g14
> > blob - 1d0cd5fcede71f0495a271a9d06fc9c0ecb16412
> > file + sys/dev/acpi/amdgpio.c
> > --- sys/dev/acpi/amdgpio.c
> > +++ sys/dev/acpi/amdgpio.c
> > @@ -62,13 +62,17 @@ struct amdgpio_softc {
> > struct amdgpio_intrhand *sc_pin_ih;
> >
> > struct acpi_gpio sc_gpio;
> > +
> > +   uint32_t *sc_pincfg;
> >  };
> >
> >  intamdgpio_match(struct device *, void *, void *);
> >  void   amdgpio_attach(struct device *, struct device *, void *);
> > +intamdgpio_activate(struct device *, int);
> >
> >  const struct cfattach amdgpio_ca = {
> > -   sizeof(struct amdgpio_softc), amdgpio_match, amdgpio_attach
> > +   sizeof(struct amdgpio_softc), amdgpio_match, amdgpio_attach, NULL,
> > +   amdgpio_activate
> >  };
> >
> >  struct cfdriver amdgpio_cd = {
> > @@ -98,6 +102,24 @@ amdgpio_match(struct device *parent, void *match, void
> > return acpi_matchhids(aaa, amdgpio_hids, cf->cf_driver->cd_name);
> >  }
> >
> > +int
> > +amdgpio_activate(struct device *self, int act)
> > +{
> > +   struct amdgpio_softc *sc = (struct amdgpio_softc *)self;
> > +   int rv = 0, i;
> > +
> > +   switch (act) {
> > +   case DVACT_WAKEUP:
> > +   for (i = 0; i < sc->sc_npins; i++) {
> > +   if (sc->sc_pincfg[i])
> > +   bus_space_write_4(sc->sc_memt, sc->sc_memh,
> > +   i * 4, sc->sc_pincfg[i]);
> > +   }
> > +   }
> > +
> > +   return (rv);
> > +}
> > +
> >  void
> >  amdgpio_attach(struct device *parent, struct device *self, void *aux)
> >  {
> > @@ -152,6 +174,8 @@ amdgpio_attach(struct device *parent, struct device *s
> > sc->sc_node->gpio = >sc_gpio;
> >
> > printf(", %d pins\n", sc->sc_npins);
> > +   sc->sc_pincfg = malloc(sc->sc_npins * sizeof(uint32_t), M_DEVBUF,
> > +   M_WAITOK | M_ZERO);
> >
> > acpi_register_gpio(sc->sc_acpi, sc->sc_node);
> > return;
> > @@ -210,6 +234,8 @@ amdgpio_intr_establish(void *cookie, int pin, int flag
> > reg |= AMDGPIO_CONF_ACTBOTH;
> > reg |= (AMDGPIO_CONF_INT_MASK | AMDGPIO_CONF_INT_EN);
> > bus_space_write_4(sc->sc_memt, sc->sc_memh, pin * 4, reg);
> > +
> > +   sc->sc_pincfg[pin] = reg;
> >  }
> >
> >  int
> >
> >
>

amdgpio(4) : preserve pin configuration on resume

2022-04-19 Thread Mike Larkin

On at least the Asus ROG Zephyrus 14 (2020), the trackpad fails to generate
any interrupts after resume. I tracked this down to amdgpio(4) not generating
interrupts after resume, and started looking at missing soft state.

This diff preserves the interrupt pin configurations and restores them after
resume. This makes the device function properly post-zzz and post-ZZZ.

Note: amdgpio_read_pin does not return the value that was previously written
during amdgpio_intr_establish (it always just returns 0x1 if the pin is
in use), so I'm just saving the actual value we write during
amdgpio_intr_establish and restoring that during resume.

Note 2: In xxx_activate() functions, we usually call config_activate_children
but since amdgpio doesn't have any children, I left that out.

ok?

-ml


diff a82721d2c9ea32a8f6043a3e06b2a7f8280ef68b /export/bin/src/OpenBSD/g14
blob - 1d0cd5fcede71f0495a271a9d06fc9c0ecb16412
file + sys/dev/acpi/amdgpio.c
--- sys/dev/acpi/amdgpio.c
+++ sys/dev/acpi/amdgpio.c
@@ -62,13 +62,17 @@ struct amdgpio_softc {
struct amdgpio_intrhand *sc_pin_ih;

struct acpi_gpio sc_gpio;
+
+   uint32_t *sc_pincfg;
 };

 intamdgpio_match(struct device *, void *, void *);
 void   amdgpio_attach(struct device *, struct device *, void *);
+intamdgpio_activate(struct device *, int);

 const struct cfattach amdgpio_ca = {
-   sizeof(struct amdgpio_softc), amdgpio_match, amdgpio_attach
+   sizeof(struct amdgpio_softc), amdgpio_match, amdgpio_attach, NULL,
+   amdgpio_activate
 };

 struct cfdriver amdgpio_cd = {
@@ -98,6 +102,24 @@ amdgpio_match(struct device *parent, void *match, void
return acpi_matchhids(aaa, amdgpio_hids, cf->cf_driver->cd_name);
 }

+int
+amdgpio_activate(struct device *self, int act)
+{
+   struct amdgpio_softc *sc = (struct amdgpio_softc *)self;
+   int rv = 0, i;
+
+   switch (act) {
+   case DVACT_WAKEUP:
+   for (i = 0; i < sc->sc_npins; i++) {
+   if (sc->sc_pincfg[i])
+   bus_space_write_4(sc->sc_memt, sc->sc_memh,
+   i * 4, sc->sc_pincfg[i]);
+   }
+   }
+
+   return (rv);
+}
+
 void
 amdgpio_attach(struct device *parent, struct device *self, void *aux)
 {
@@ -152,6 +174,8 @@ amdgpio_attach(struct device *parent, struct device *s
sc->sc_node->gpio = >sc_gpio;

printf(", %d pins\n", sc->sc_npins);
+   sc->sc_pincfg = malloc(sc->sc_npins * sizeof(uint32_t), M_DEVBUF,
+   M_WAITOK | M_ZERO);

acpi_register_gpio(sc->sc_acpi, sc->sc_node);
return;
@@ -210,6 +234,8 @@ amdgpio_intr_establish(void *cookie, int pin, int flag
reg |= AMDGPIO_CONF_ACTBOTH;
reg |= (AMDGPIO_CONF_INT_MASK | AMDGPIO_CONF_INT_EN);
bus_space_write_4(sc->sc_memt, sc->sc_memh, pin * 4, reg);
+
+   sc->sc_pincfg[pin] = reg;
 }

 int

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1643 matches

Mail list logo