OF_getpropstr()

2023-04-07 Thread David Gwynne
turns out OF_getprop is like memcpy, but sometimes you want something
like strlcpy. this is what OF_getpropstr aims to provide.

i know openfirm.h is used on other archs that don't use fdt as their
backend, but i figure we can port this wrapper over to them as need
demands.

ok?

Index: fdt.c
===
RCS file: /cvs/src/sys/dev/ofw/fdt.c,v
retrieving revision 1.33
diff -u -p -r1.33 fdt.c
--- fdt.c   19 Sep 2022 16:12:19 -  1.33
+++ fdt.c   8 Apr 2023 03:16:39 -
@@ -980,6 +980,18 @@ OF_getprop(int handle, char *prop, void 
 }
 
 int
+OF_getpropstr(int handle, char *prop, char *buf, int buflen)
+{
+   int len;
+
+   len = OF_getprop(handle, prop, buf, buflen);
+   if (buflen > 0)
+   buf[min(len, buflen - 1)] = '\0';
+
+   return (len);
+}
+
+int
 OF_getpropbool(int handle, char *prop)
 {
void *node = (char *)tree.header + handle;
Index: openfirm.h
===
RCS file: /cvs/src/sys/dev/ofw/openfirm.h,v
retrieving revision 1.18
diff -u -p -r1.18 openfirm.h
--- openfirm.h  6 May 2021 19:45:16 -   1.18
+++ openfirm.h  8 Apr 2023 03:16:39 -
@@ -50,6 +50,7 @@ int OF_parent(int phandle);
 int OF_instance_to_package(int ihandle);
 int OF_getproplen(int handle, char *prop);
 int OF_getprop(int handle, char *prop, void *buf, int buflen);
+int OF_getpropstr(int handle, char *prop, char *buf, int buflen);
 int OF_getpropbool(int handle, char *);
 uint32_t OF_getpropint(int handle, char *, uint32_t);
 int OF_getpropintarray(int, char *, uint32_t *, int);



Re: cleanup vmm_start_vm, simplifying fd cleanup

2023-04-07 Thread Mike Larkin
On Fri, Apr 07, 2023 at 12:07:07PM -0400, Dave Voutila wrote:
>
> Dave Voutila  writes:
>
> > In vmd, the vmm process forks to create the resulting vm process. After
> > this fork, the vmm parent process closes all the file descriptors
> > pointing to the vm's devices (cdrom, kernel, disks, nics, etc.).
> >
> > The logic was a bit funky, so this change relies on the fact we can
> > attempt the close(2) call and use its success/failure to determine if we
> > have an fd to mark -1 in the vm structure. (I guess we could just
> > blindly set them to -1 post-close, but this feels more sensical to me.)
> >
> > While in the function, it cleans up some commentary and logging around
> > the actual fork(2) call. Since fork(2) sets errno, we can use it in the
> > log message, too.
> >
> > This reduces some noise in my upcoming diff to introduce execvp to the
> > forking of child vm processes (as presented at AsiaBSDCon).
> >
>
> Touch longer, but won't generate ktrace noise by blind calls to close(2)
> and also accounts for the other error conditions (EINTR, EIO).
>
> For EIO, not sure yet how we want to handle it other than log it.
>
> For EINTR, we want to account for that race and make sure we retry since
> the vmm process is long-lived and could inadvertently keep things like
> tty fds or disk image fds open after the guest vm terminates.
>
> I need to check on other calls to close(2) in vmd, but most fds are
> passed via imsg, so I'd presume any interruption is handled in the
> kernel. (Someone correct me if I'm wrong.)
>
>

as we discussed this effort previously in .jp, ok mlarkin@

> diff refs/heads/master refs/heads/vmd-vmm-fd-dance
> commit - 8371c6b4d5765a69d520f93d64f57c6a2989f9ae
> commit + 9c07553a618956ba89f29b93906dcd6ea6c19de8
> blob - 75af37a29a6cd4917d8c4ca9be35c48772314f4b
> blob + e0af71dac5d730c7b88166384a95b94bf14fcfc4
> --- usr.sbin/vmd/vmd.c
> +++ usr.sbin/vmd/vmd.c
> @@ -1956,3 +1956,21 @@ vm_terminate(struct vmd_vm *vm, const char *caller)
>   vm_remove(vm, caller);
>   }
>  }
> +
> +int
> +close_fd(int fd)
> +{
> + int ret;
> +
> + if (fd == -1)
> + return (0);
> +
> + do
> + ret = close(fd);
> + while (ret == -1 && errno == EINTR);
> +
> + if (ret == -1 && errno == EIO)
> + log_warn("%s(%d)", __func__, fd);
> +
> + return (ret);
> +}
> blob - 7bbbf62734bc7cd575c4fee384193037b0495bb4
> blob + 7228ace4b31a9fd6b5eb90bf1d048198a03a04fc
> --- usr.sbin/vmd/vmd.h
> +++ usr.sbin/vmd/vmd.h
> @@ -443,6 +443,7 @@ void   getmonotime(struct timeval *);
>  uint32_t prefixlen2mask(uint8_t);
>  void  prefixlen2mask6(u_int8_t, struct in6_addr *);
>  void  getmonotime(struct timeval *);
> +int   close_fd(int);
>
>  /* priv.c */
>  void  priv(struct privsep *, struct privsep_proc *);
> blob - d9eff3c8f703854c7b1e49fee04b8e426956cfbb
> blob + 7db920beec7e34a63f5ba3602f17185eec33d3f7
> --- usr.sbin/vmd/vmm.c
> +++ usr.sbin/vmd/vmm.c
> @@ -641,12 +641,10 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *p
>   if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) == -1)
>   fatal("socketpair");
>
> - /* Start child vmd for this VM (fork, chroot, drop privs) */
> + /* Fork the vmm process to create the vm, inheriting open device fds. */
>   vm_pid = fork();
> -
> - /* Start child failed? - cleanup and leave */
>   if (vm_pid == -1) {
> - log_warnx("%s: start child failed", __func__);
> + log_warn("%s: fork child failed", __func__);
>   ret = EIO;
>   goto err;
>   }
> @@ -654,31 +652,24 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *p
>   if (vm_pid > 0) {
>   /* Parent */
>   vm->vm_pid = vm_pid;
> - close(fds[1]);
> + close_fd(fds[1]);
>
>   for (i = 0 ; i < vcp->vcp_ndisks; i++) {
>   for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
> - if (vm->vm_disks[i][j] != -1)
> - close(vm->vm_disks[i][j]);
> - vm->vm_disks[i][j] = -1;
> + if (close_fd(vm->vm_disks[i][j]) == 0)
> + vm->vm_disks[i][j] = -1;
>   }
>   }
>   for (i = 0 ; i < vcp->vcp_nnics; i++) {
> - close(vm->vm_ifs[i].vif_fd);
> - vm->vm_ifs[i].vif_fd = -1;
> + if (close_fd(vm->vm_ifs[i].vif_fd) == 0)
> + vm->vm_ifs[i].vif_fd = -1;
>   }
> - if (vm->vm_kernel != -1) {
> - close(vm->vm_kernel);
> + if (close_fd(vm->vm_kernel) == 0)
>   vm->vm_kernel = -1;
> - }
> - if (vm->vm_cdrom != -1) {
> - close(vm->vm_cdrom);
> + if (close_fd(vm->vm_cdrom) == 0)
>   vm->vm_cdrom = 

Re: ahci at fdt where the firmware doesnt help

2023-04-07 Thread Mark Kettenis
> Date: Fri, 7 Apr 2023 07:58:02 +1000
> From: David Gwynne 
> 
> the banana pi r2 pro has a usable sata port, but ahci wasnt finding
> anything attached to it.
> 
> the fdt glue looked like it assumed the boot loader did a lot of
> work to get things going. however, i havent figured out how to
> configure u-boot to do ahci, and quartz_uefi doesn't have ahci/sata
> working yet. the boot loaders arent quite there yet, so i tried to
> make the driver better.
> 
> the obvious tweak was to enable clocks and the phy, but that wasnt
> enough to get it working.
> 
> jared mcneill showed me the code he's been trying to get ahci working in
> quartz_uefi with, and it showed setting the ports implemented register.
> 
> the sata nodes in the rk356x device tree populates a ports-implemented
> property, which the yaml doco says can be provided if the platform/
> firmware doesnt init the register itself. i tried using that value
> instead of a PI read, and that didnt work. having the fdt glue set
> the register (like jmcneill does) before calling the bus independent
> code does work though.
> 
> so i see this now:
> 
> ahci0 at mainbus0: AHCI 1.3
> ahci0: port 0: 3.0Gb/s
> scsibus0 at ahci0: 32 targets
> sd0 at scsibus0 targ 0 lun 0:  naa.5002538250006f47
> sd0: 95396MB, 512 bytes/sector, 195371568 sectors, thin
> 
> ok?

ok kettenis@

> Index: ahci_fdt.c
> ===
> RCS file: /cvs/src/sys/dev/fdt/ahci_fdt.c,v
> retrieving revision 1.7
> diff -u -p -r1.7 ahci_fdt.c
> --- ahci_fdt.c25 May 2022 03:03:58 -  1.7
> +++ ahci_fdt.c6 Apr 2023 21:30:31 -
> @@ -30,6 +30,8 @@
>  #include 
>  
>  #include 
> +#include 
> +#include 
>  #include 
>  
>  int  ahci_fdt_match(struct device *, void *, void *);
> @@ -63,6 +65,7 @@ ahci_fdt_attach(struct device *parent, s
>  {
>   struct ahci_softc *sc = (struct ahci_softc *) self;
>   struct fdt_attach_args *faa = aux;
> + uint32_t pi;
>  
>   if (faa->fa_nreg < 1)
>   return;
> @@ -82,11 +85,19 @@ ahci_fdt_attach(struct device *parent, s
>   goto unmap;
>   }
>  
> + clock_set_assigned(faa->fa_node);
> + clock_enable_all(faa->fa_node);
> + phy_enable(faa->fa_node, "sata-phy");
> +
> + pi = OF_getpropint(faa->fa_node, "ports-implemented", 0x0);
> + if (pi != 0)
> + bus_space_write_4(sc->sc_iot, sc->sc_ioh, AHCI_REG_PI, pi);
> +
>   printf(":");
>  
>   if (ahci_attach(sc) != 0) {
>   /* error printed by ahci_attach */
> - goto irq;
> + goto irq; /* disable phy and clocks? */
>   }
>  
>   return;
> 



Re: acpithinkpad: don't setup non-existent temp sensors

2023-04-07 Thread Klemens Nanni
05.04.2023 03:45, joshua stein пишет:
> acpithinkpad sets up a hard-coded number of temperature sensors and 
> doesn't check the result of aml_evalinteger when polling, so for all 
> invalid sensors it ends up reporting the value of the previous 
> successful sensor check resulting in this (for a machine with only a 
> TMP0 sensor):
> 
> hw.sensors.acpithinkpad0.temp0=47.00 degC
> hw.sensors.acpithinkpad0.temp1=47.00 degC
> hw.sensors.acpithinkpad0.temp2=47.00 degC
> hw.sensors.acpithinkpad0.temp3=47.00 degC
> hw.sensors.acpithinkpad0.temp4=47.00 degC
> hw.sensors.acpithinkpad0.temp5=47.00 degC
> hw.sensors.acpithinkpad0.temp6=47.00 degC
> hw.sensors.acpithinkpad0.temp7=47.00 degC
> hw.sensors.acpithinkpad0.fan0=3605 RPM
> hw.sensors.acpithinkpad0.indicator0=Off (port replicator), UNKNOWN
> 
> This diff checks whether the TMP[0-8] node actually exists during 
> attach and if not, it doesn't create the sensor.  It also checks the 
> return value during polling and sets the sensor to invalid if it 
> failed for some reason.

This makes sense, however...

> 
> hw.sensors.acpithinkpad0.temp0=55.00 degC
> hw.sensors.acpithinkpad0.fan0=5045 RPM
> hw.sensors.acpithinkpad0.indicator0=Off (port replicator), UNKNOWN
> 
> 
> Index: sys/dev/acpi/acpithinkpad.c
> ===
> RCS file: /cvs/src/sys/dev/acpi/acpithinkpad.c,v
> retrieving revision 1.70
> diff -u -p -u -p -r1.70 acpithinkpad.c
> --- sys/dev/acpi/acpithinkpad.c   6 Apr 2022 18:59:27 -   1.70
> +++ sys/dev/acpi/acpithinkpad.c   5 Apr 2023 03:43:59 -
> @@ -112,11 +112,10 @@
>  #define  THINKPAD_TABLET_SCREEN_CHANGED  0x60c0
>  #define  THINKPAD_SWITCH_WIRELESS0x7000
>  
> -#define THINKPAD_NSENSORS 10
> -#define THINKPAD_NTEMPSENSORS 8
> -
> -#define THINKPAD_SENSOR_FANRPM   THINKPAD_NTEMPSENSORS
> -#define THINKPAD_SENSOR_PORTREPL THINKPAD_NTEMPSENSORS + 1
> +#define THINKPAD_SENSOR_FANRPM   0
> +#define THINKPAD_SENSOR_PORTREPL 1
> +#define THINKPAD_SENSOR_TMP0 2
> +#define THINKPAD_NSENSORS10
>  
>  #define THINKPAD_ECOFFSET_VOLUME 0x30
>  #define THINKPAD_ECOFFSET_VOLUME_MUTE_MASK 0x40
> @@ -140,6 +139,7 @@ struct acpithinkpad_softc {
>  
>   struct ksensor   sc_sens[THINKPAD_NSENSORS];
>   struct ksensordevsc_sensdev;
> + int  sc_ntempsens;
>  
>   uint64_t sc_hkey_version;
>  
> @@ -178,6 +178,7 @@ int   thinkpad_get_brightness(struct acpit
>  int  thinkpad_set_brightness(void *, int);
>  int  thinkpad_get_param(struct wsdisplay_param *);
>  int  thinkpad_set_param(struct wsdisplay_param *);
> +int  thinkpad_get_temp(struct acpithinkpad_softc *, int, int64_t *);
>  
>  voidthinkpad_sensor_attach(struct acpithinkpad_softc *sc);
>  voidthinkpad_sensor_refresh(void *);
> @@ -228,6 +229,7 @@ thinkpad_match(struct device *parent, vo
>  void
>  thinkpad_sensor_attach(struct acpithinkpad_softc *sc)
>  {
> + int64_t tmp;
>   int i;
>  
>   if (sc->sc_acpi->sc_ec == NULL)
> @@ -237,10 +239,16 @@ thinkpad_sensor_attach(struct acpithinkp
>   /* Add temperature probes */
>   strlcpy(sc->sc_sensdev.xname, DEVNAME(sc),
>   sizeof(sc->sc_sensdev.xname));
> - for (i=0; i - sc->sc_sens[i].type = SENSOR_TEMP;
> - sensor_attach(>sc_sensdev, >sc_sens[i]);
> - }
> + sc->sc_ntempsens = 0;
> + for (i = 0; i < THINKPAD_NSENSORS - THINKPAD_SENSOR_TMP0; i++) {
> + if (thinkpad_get_temp(sc, i, ) != 0)
> + break;

doesn't this mean, that legit sensors which happen to fail this read
upon attach won't ever be visible at runtime?

I have no idea how reliable those sensors are, but given that the code
handles spurious failure, this seems not too far fetched.

> +
> + sc->sc_sens[THINKPAD_SENSOR_TMP0 + i].type = SENSOR_TEMP;
> + sensor_attach(>sc_sensdev,
> + >sc_sens[THINKPAD_SENSOR_TMP0 + i]);
> + sc->sc_ntempsens++;
> + }
>  
>   /* Add fan probe */
>   sc->sc_sens[THINKPAD_SENSOR_FANRPM].type = SENSOR_FANRPM;
> @@ -262,17 +270,19 @@ thinkpad_sensor_refresh(void *arg)
>   struct acpithinkpad_softc *sc = arg;
>   uint8_t lo, hi, i;
>   int64_t tmp;
> - char sname[5];
>  
>   /* Refresh sensor readings */
> - for (i=0; i - snprintf(sname, sizeof(sname), "TMP%d", i);
> - aml_evalinteger(sc->sc_acpi, sc->sc_ec->sc_devnode,
> - sname, 0, 0, );
> - sc->sc_sens[i].value = (tmp * 100) + 27315;
> - if (tmp > 127 || tmp < -127)
> - sc->sc_sens[i].flags = SENSOR_FINVALID;
> - }
> + for (i = 0; i < sc->sc_ntempsens; i++) {
> + if (thinkpad_get_temp(sc, i, ) != 0) {
> + sc->sc_sens[i].flags = SENSOR_FINVALID;
> + continue;
> +  

Re: cleanup vmm_start_vm, simplifying fd cleanup

2023-04-07 Thread Dave Voutila


Dave Voutila  writes:

> In vmd, the vmm process forks to create the resulting vm process. After
> this fork, the vmm parent process closes all the file descriptors
> pointing to the vm's devices (cdrom, kernel, disks, nics, etc.).
>
> The logic was a bit funky, so this change relies on the fact we can
> attempt the close(2) call and use its success/failure to determine if we
> have an fd to mark -1 in the vm structure. (I guess we could just
> blindly set them to -1 post-close, but this feels more sensical to me.)
>
> While in the function, it cleans up some commentary and logging around
> the actual fork(2) call. Since fork(2) sets errno, we can use it in the
> log message, too.
>
> This reduces some noise in my upcoming diff to introduce execvp to the
> forking of child vm processes (as presented at AsiaBSDCon).
>

Touch longer, but won't generate ktrace noise by blind calls to close(2)
and also accounts for the other error conditions (EINTR, EIO).

For EIO, not sure yet how we want to handle it other than log it.

For EINTR, we want to account for that race and make sure we retry since
the vmm process is long-lived and could inadvertently keep things like
tty fds or disk image fds open after the guest vm terminates.

I need to check on other calls to close(2) in vmd, but most fds are
passed via imsg, so I'd presume any interruption is handled in the
kernel. (Someone correct me if I'm wrong.)


diff refs/heads/master refs/heads/vmd-vmm-fd-dance
commit - 8371c6b4d5765a69d520f93d64f57c6a2989f9ae
commit + 9c07553a618956ba89f29b93906dcd6ea6c19de8
blob - 75af37a29a6cd4917d8c4ca9be35c48772314f4b
blob + e0af71dac5d730c7b88166384a95b94bf14fcfc4
--- usr.sbin/vmd/vmd.c
+++ usr.sbin/vmd/vmd.c
@@ -1956,3 +1956,21 @@ vm_terminate(struct vmd_vm *vm, const char *caller)
vm_remove(vm, caller);
}
 }
+
+int
+close_fd(int fd)
+{
+   int ret;
+
+   if (fd == -1)
+   return (0);
+
+   do
+   ret = close(fd);
+   while (ret == -1 && errno == EINTR);
+
+   if (ret == -1 && errno == EIO)
+   log_warn("%s(%d)", __func__, fd);
+
+   return (ret);
+}
blob - 7bbbf62734bc7cd575c4fee384193037b0495bb4
blob + 7228ace4b31a9fd6b5eb90bf1d048198a03a04fc
--- usr.sbin/vmd/vmd.h
+++ usr.sbin/vmd/vmd.h
@@ -443,6 +443,7 @@ void getmonotime(struct timeval *);
 uint32_t prefixlen2mask(uint8_t);
 voidprefixlen2mask6(u_int8_t, struct in6_addr *);
 voidgetmonotime(struct timeval *);
+int close_fd(int);

 /* priv.c */
 voidpriv(struct privsep *, struct privsep_proc *);
blob - d9eff3c8f703854c7b1e49fee04b8e426956cfbb
blob + 7db920beec7e34a63f5ba3602f17185eec33d3f7
--- usr.sbin/vmd/vmm.c
+++ usr.sbin/vmd/vmm.c
@@ -641,12 +641,10 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *p
if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) == -1)
fatal("socketpair");

-   /* Start child vmd for this VM (fork, chroot, drop privs) */
+   /* Fork the vmm process to create the vm, inheriting open device fds. */
vm_pid = fork();
-
-   /* Start child failed? - cleanup and leave */
if (vm_pid == -1) {
-   log_warnx("%s: start child failed", __func__);
+   log_warn("%s: fork child failed", __func__);
ret = EIO;
goto err;
}
@@ -654,31 +652,24 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *p
if (vm_pid > 0) {
/* Parent */
vm->vm_pid = vm_pid;
-   close(fds[1]);
+   close_fd(fds[1]);

for (i = 0 ; i < vcp->vcp_ndisks; i++) {
for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
-   if (vm->vm_disks[i][j] != -1)
-   close(vm->vm_disks[i][j]);
-   vm->vm_disks[i][j] = -1;
+   if (close_fd(vm->vm_disks[i][j]) == 0)
+   vm->vm_disks[i][j] = -1;
}
}
for (i = 0 ; i < vcp->vcp_nnics; i++) {
-   close(vm->vm_ifs[i].vif_fd);
-   vm->vm_ifs[i].vif_fd = -1;
+   if (close_fd(vm->vm_ifs[i].vif_fd) == 0)
+   vm->vm_ifs[i].vif_fd = -1;
}
-   if (vm->vm_kernel != -1) {
-   close(vm->vm_kernel);
+   if (close_fd(vm->vm_kernel) == 0)
vm->vm_kernel = -1;
-   }
-   if (vm->vm_cdrom != -1) {
-   close(vm->vm_cdrom);
+   if (close_fd(vm->vm_cdrom) == 0)
vm->vm_cdrom = -1;
-   }
-   if (vm->vm_tty != -1) {
-   close(vm->vm_tty);
+   if (close_fd(vm->vm_tty) == 0)
vm->vm_tty = -1;
-   }

/* Read back the 

Re: cleanup vmm_start_vm, simplifying fd cleanup

2023-04-07 Thread Dave Voutila


"Theo de Raadt"  writes:

> Florian Obser  wrote:
>
>> On 2023-04-07 10:51 -04, Dave Voutila  wrote:
>> > In vmd, the vmm process forks to create the resulting vm process. After
>> > this fork, the vmm parent process closes all the file descriptors
>> > pointing to the vm's devices (cdrom, kernel, disks, nics, etc.).
>> >
>> > The logic was a bit funky, so this change relies on the fact we can
>> > attempt the close(2) call and use its success/failure to determine if we
>> > have an fd to mark -1 in the vm structure. (I guess we could just
>> > blindly set them to -1 post-close, but this feels more sensical to me.)
>> >
>>
>> this will create some noise in ktrace every time you pass -1 to close(2)
>> you'll see
>>
>> CALL  close(-1)
>> RET   close -1 errno 9 Bad file descriptor
>>
>> Not a vmd user and I don't plan to hack on it any time soon, so
>> *shrug*.
>
> And also, have you considered EINTR?
>

Ah, no...even the current usage doesn't. Good point.

> I prefer if you keep state in the application.



Re: cleanup vmm_start_vm, simplifying fd cleanup

2023-04-07 Thread Theo de Raadt
Florian Obser  wrote:

> On 2023-04-07 10:51 -04, Dave Voutila  wrote:
> > In vmd, the vmm process forks to create the resulting vm process. After
> > this fork, the vmm parent process closes all the file descriptors
> > pointing to the vm's devices (cdrom, kernel, disks, nics, etc.).
> >
> > The logic was a bit funky, so this change relies on the fact we can
> > attempt the close(2) call and use its success/failure to determine if we
> > have an fd to mark -1 in the vm structure. (I guess we could just
> > blindly set them to -1 post-close, but this feels more sensical to me.)
> >
> 
> this will create some noise in ktrace every time you pass -1 to close(2)
> you'll see
> 
> CALL  close(-1)
> RET   close -1 errno 9 Bad file descriptor
> 
> Not a vmd user and I don't plan to hack on it any time soon, so
> *shrug*.

And also, have you considered EINTR?

I prefer if you keep state in the application.



Re: cleanup vmm_start_vm, simplifying fd cleanup

2023-04-07 Thread Florian Obser
On 2023-04-07 10:51 -04, Dave Voutila  wrote:
> In vmd, the vmm process forks to create the resulting vm process. After
> this fork, the vmm parent process closes all the file descriptors
> pointing to the vm's devices (cdrom, kernel, disks, nics, etc.).
>
> The logic was a bit funky, so this change relies on the fact we can
> attempt the close(2) call and use its success/failure to determine if we
> have an fd to mark -1 in the vm structure. (I guess we could just
> blindly set them to -1 post-close, but this feels more sensical to me.)
>

this will create some noise in ktrace every time you pass -1 to close(2)
you'll see

CALL  close(-1)
RET   close -1 errno 9 Bad file descriptor

Not a vmd user and I don't plan to hack on it any time soon, so
*shrug*.

-- 
In my defence, I have been left unsupervised.



cleanup vmm_start_vm, simplifying fd cleanup

2023-04-07 Thread Dave Voutila
In vmd, the vmm process forks to create the resulting vm process. After
this fork, the vmm parent process closes all the file descriptors
pointing to the vm's devices (cdrom, kernel, disks, nics, etc.).

The logic was a bit funky, so this change relies on the fact we can
attempt the close(2) call and use its success/failure to determine if we
have an fd to mark -1 in the vm structure. (I guess we could just
blindly set them to -1 post-close, but this feels more sensical to me.)

While in the function, it cleans up some commentary and logging around
the actual fork(2) call. Since fork(2) sets errno, we can use it in the
log message, too.

This reduces some noise in my upcoming diff to introduce execvp to the
forking of child vm processes (as presented at AsiaBSDCon).

9 insertions, 18 deletions :)

ok?


diff refs/heads/master refs/heads/vmd-vmm-fd-dance
commit - 8371c6b4d5765a69d520f93d64f57c6a2989f9ae
commit + ebe7117d30cfe8a792d4c2e9a05f3b9b3e86c651
blob - d9eff3c8f703854c7b1e49fee04b8e426956cfbb
blob + 859e27652ba129a23c6e4ff510dab5bafaac653a
--- usr.sbin/vmd/vmm.c
+++ usr.sbin/vmd/vmm.c
@@ -641,12 +641,10 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *p
if (socketpair(AF_UNIX, SOCK_STREAM, PF_UNSPEC, fds) == -1)
fatal("socketpair");

-   /* Start child vmd for this VM (fork, chroot, drop privs) */
+   /* Fork the vmm process to create the vm, inheriting open device fds. */
vm_pid = fork();
-
-   /* Start child failed? - cleanup and leave */
if (vm_pid == -1) {
-   log_warnx("%s: start child failed", __func__);
+   log_warn("%s: fork child failed", __func__);
ret = EIO;
goto err;
}
@@ -658,27 +656,20 @@ vmm_start_vm(struct imsg *imsg, uint32_t *id, pid_t *p

for (i = 0 ; i < vcp->vcp_ndisks; i++) {
for (j = 0; j < VM_MAX_BASE_PER_DISK; j++) {
-   if (vm->vm_disks[i][j] != -1)
-   close(vm->vm_disks[i][j]);
-   vm->vm_disks[i][j] = -1;
+   if (close(vm->vm_disks[i][j]) == 0)
+   vm->vm_disks[i][j] = -1;
}
}
for (i = 0 ; i < vcp->vcp_nnics; i++) {
-   close(vm->vm_ifs[i].vif_fd);
-   vm->vm_ifs[i].vif_fd = -1;
+   if (close(vm->vm_ifs[i].vif_fd) == 0)
+   vm->vm_ifs[i].vif_fd = -1;
}
-   if (vm->vm_kernel != -1) {
-   close(vm->vm_kernel);
+   if (close(vm->vm_kernel) == 0)
vm->vm_kernel = -1;
-   }
-   if (vm->vm_cdrom != -1) {
-   close(vm->vm_cdrom);
+   if (close(vm->vm_cdrom) == 0)
vm->vm_cdrom = -1;
-   }
-   if (vm->vm_tty != -1) {
-   close(vm->vm_tty);
+   if (close(vm->vm_tty) == 0)
vm->vm_tty = -1;
-   }

/* Read back the kernel-generated vm id from the child */
sz = atomicio(read, fds[0], >vcp_id, sizeof(vcp->vcp_id));



Re: arp input remove kernel lock

2023-04-07 Thread Hrvoje Popovski
On 6.4.2023. 22:46, Alexander Bluhm wrote:
> Hi,
> 
> When removing these kernel locks from the ARP input path, the machine
> runs stable in my tests.  Caller if_netisr() grabs the exclusive
> netlock and that should be sufficent for in_arpinput() and arpcache().
> 
> To stress the ARP resolver I run arp -nd ... in a loop.
> 
> Hrvoje: Could you run this diff on your testsetup?
> 
> bluhm


Hi,

I'm running this diff in lab and on production firewalls and boxes seems
happy and little faster. In lab whatever I do I couldn't panic boxes,
generating incomplete arp entries, arp -ad, destroy vlan, destroy carp,
up/down physical interfaces and stuff like that while sending traffic.





Re: bgpd rib_get/rib_add change

2023-04-07 Thread Theo Buehler
On Fri, Apr 07, 2023 at 02:35:29PM +0200, Claudio Jeker wrote:
> This diff switches rib_get and rib_add to work on struct pt_entry
> pointers and introduces rib_get_addr() which works like rib_get before.
> I want to use rib_get and rib_add in the flowspec code.

ok tb



bgpd rib_get/rib_add change

2023-04-07 Thread Claudio Jeker
This diff switches rib_get and rib_add to work on struct pt_entry
pointers and introduces rib_get_addr() which works like rib_get before.
I want to use rib_get and rib_add in the flowspec code.

Long term I want to push struct pt_entry further out and closer to the
NLRI handling in the input path but this is enough to get me unlocked for
now.
-- 
:wq Claudio

Index: rde.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.c,v
retrieving revision 1.599
diff -u -p -r1.599 rde.c
--- rde.c   3 Apr 2023 10:48:00 -   1.599
+++ rde.c   7 Apr 2023 10:38:03 -
@@ -1734,7 +1734,7 @@ pathid_assign(struct rde_peer *peer, uin
return peer->path_id_tx;
 
/* peer uses add-path, therefore new path_ids need to be assigned */
-   re = rib_get(rib_byid(RIB_ADJ_IN), prefix, prefixlen);
+   re = rib_get_addr(rib_byid(RIB_ADJ_IN), prefix, prefixlen);
if (re != NULL) {
struct prefix *p;
 
@@ -3052,14 +3052,15 @@ rde_dump_ctx_new(struct ctl_show_rib_req
 
if (req->flags & F_SHORTER) {
for (plen = 0; plen <= req->prefixlen; plen++) {
-   re = rib_get(rib_byid(rid), >prefix, plen);
+   re = rib_get_addr(rib_byid(rid), >prefix,
+   plen);
rde_dump_upcall(re, ctx);
}
} else if (req->prefixlen == hostplen) {
re = rib_match(rib_byid(rid), >prefix);
rde_dump_upcall(re, ctx);
} else {
-   re = rib_get(rib_byid(rid), >prefix,
+   re = rib_get_addr(rib_byid(rid), >prefix,
req->prefixlen);
rde_dump_upcall(re, ctx);
}
Index: rde.h
===
RCS file: /cvs/src/usr.sbin/bgpd/rde.h,v
retrieving revision 1.290
diff -u -p -r1.290 rde.h
--- rde.h   30 Mar 2023 12:11:18 -  1.290
+++ rde.h   7 Apr 2023 10:37:17 -
@@ -548,7 +548,8 @@ struct rib  *rib_byid(uint16_t);
 uint16_trib_find(char *);
 voidrib_free(struct rib *);
 voidrib_shutdown(void);
-struct rib_entry *rib_get(struct rib *, struct bgpd_addr *, int);
+struct rib_entry *rib_get(struct rib *, struct pt_entry *);
+struct rib_entry *rib_get_addr(struct rib *, struct bgpd_addr *, int);
 struct rib_entry *rib_match(struct rib *, struct bgpd_addr *);
 int rib_dump_pending(void);
 voidrib_dump_runner(void);
Index: rde_rib.c
===
RCS file: /cvs/src/usr.sbin/bgpd/rde_rib.c,v
retrieving revision 1.257
diff -u -p -r1.257 rde_rib.c
--- rde_rib.c   29 Mar 2023 10:46:11 -  1.257
+++ rde_rib.c   7 Apr 2023 10:40:21 -
@@ -38,7 +38,7 @@
 uint16_t rib_size;
 struct rib **ribs;
 
-struct rib_entry *rib_add(struct rib *, struct bgpd_addr *, int);
+struct rib_entry *rib_add(struct rib *, struct pt_entry *);
 static inline int rib_compare(const struct rib_entry *,
const struct rib_entry *);
 void rib_remove(struct rib_entry *);
@@ -297,12 +297,12 @@ rib_shutdown(void)
 }
 
 struct rib_entry *
-rib_get(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
+rib_get(struct rib *rib, struct pt_entry *pte)
 {
struct rib_entry xre, *re;
 
memset(, 0, sizeof(xre));
-   xre.prefix = pt_fill(prefix, prefixlen);
+   xre.prefix = pte;
 
re = RB_FIND(rib_tree, rib_tree(rib), );
if (re && re->rib_id != rib->id)
@@ -312,6 +312,12 @@ rib_get(struct rib *rib, struct bgpd_add
 }
 
 struct rib_entry *
+rib_get_addr(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
+{
+   return rib_get(rib, pt_fill(prefix, prefixlen));
+}
+
+struct rib_entry *
 rib_match(struct rib *rib, struct bgpd_addr *addr)
 {
struct rib_entry *re;
@@ -321,7 +327,7 @@ rib_match(struct rib *rib, struct bgpd_a
case AID_INET:
case AID_VPN_IPv4:
for (i = 32; i >= 0; i--) {
-   re = rib_get(rib, addr, i);
+   re = rib_get_addr(rib, addr, i);
if (re != NULL)
return (re);
}
@@ -329,7 +335,7 @@ rib_match(struct rib *rib, struct bgpd_a
case AID_INET6:
case AID_VPN_IPv6:
for (i = 128; i >= 0; i--) {
-   re = rib_get(rib, addr, i);
+   re = rib_get_addr(rib, addr, i);
if (re != NULL)
return (re);
}
@@ -342,15 +348,10 @@ rib_match(struct rib *rib, struct bgpd_a
 
 
 struct rib_entry *
-rib_add(struct rib *rib, struct bgpd_addr *prefix, int prefixlen)
+rib_add(struct rib *rib, struct pt_entry *pte)
 {
- 

Re: use labels in the device tree to init interface descriptions

2023-04-07 Thread Mark Kettenis
> From: "Theo de Raadt" 
> Date: Fri, 07 Apr 2023 05:39:00 -0600
> 
> Claudio Jeker  wrote:
> 
> > On Fri, Apr 07, 2023 at 04:53:52PM +1000, David Gwynne wrote:
> > > ethernet interfaces in device trees can have a "label" property which
> > > is generally used (when it is used) to identify which connector it is on
> > > the case or something like that. eg, eth2 in the turris omnia device
> > > tree has 'label = "wan"' on the mvneta interface.
> > > 
> > > ive been using labels in the dts recently to help me figure out what
> > > goes where, so this has been useful to me. if/when we support switches
> > > (eg mvsw), their ports are often labelled so i'd like to apply this idea
> > > to them too.
> > > 
> > > ok?
> > 
> > I think doing this is OK but I'm not sure if the usage of OF_getprop()
> > is correct. OF_getprop() uses 'memcpy(buf, data, min(len, buflen));'
> > to fill the if_description buffer. So if the provided label is larger than
> > sizeof(ifp->if_description) the description string is no longer NUL
> > terminated.
> 
> 
> I don't have a problem with the pre-population of the string either.
> It's informational either way.
> 
> But claudio is right, OF_getprop() is not like strlcpy, it behaves like
> strncpy() so if you want to use it you need to copy n-1 bytes, and
> manuall set the last byte to 0.  Or make a new function which is more
> suitable.

Might be worth adding a OF_getpropstr() function that behaves like
strlcpy() in the sense that it makes sure the string is
NUL-terminated.  There are other places in drivers where this issue
comes up.



Re: use labels in the device tree to init interface descriptions

2023-04-07 Thread Theo de Raadt
Claudio Jeker  wrote:

> On Fri, Apr 07, 2023 at 04:53:52PM +1000, David Gwynne wrote:
> > ethernet interfaces in device trees can have a "label" property which
> > is generally used (when it is used) to identify which connector it is on
> > the case or something like that. eg, eth2 in the turris omnia device
> > tree has 'label = "wan"' on the mvneta interface.
> > 
> > ive been using labels in the dts recently to help me figure out what
> > goes where, so this has been useful to me. if/when we support switches
> > (eg mvsw), their ports are often labelled so i'd like to apply this idea
> > to them too.
> > 
> > ok?
> 
> I think doing this is OK but I'm not sure if the usage of OF_getprop()
> is correct. OF_getprop() uses 'memcpy(buf, data, min(len, buflen));'
> to fill the if_description buffer. So if the provided label is larger than
> sizeof(ifp->if_description) the description string is no longer NUL
> terminated.


I don't have a problem with the pre-population of the string either.
It's informational either way.

But claudio is right, OF_getprop() is not like strlcpy, it behaves like
strncpy() so if you want to use it you need to copy n-1 bytes, and
manuall set the last byte to 0.  Or make a new function which is more
suitable.


  



Re: use labels in the device tree to init interface descriptions

2023-04-07 Thread Claudio Jeker
On Fri, Apr 07, 2023 at 04:53:52PM +1000, David Gwynne wrote:
> ethernet interfaces in device trees can have a "label" property which
> is generally used (when it is used) to identify which connector it is on
> the case or something like that. eg, eth2 in the turris omnia device
> tree has 'label = "wan"' on the mvneta interface.
> 
> ive been using labels in the dts recently to help me figure out what
> goes where, so this has been useful to me. if/when we support switches
> (eg mvsw), their ports are often labelled so i'd like to apply this idea
> to them too.
> 
> ok?

I think doing this is OK but I'm not sure if the usage of OF_getprop()
is correct. OF_getprop() uses 'memcpy(buf, data, min(len, buflen));'
to fill the if_description buffer. So if the provided label is larger than
sizeof(ifp->if_description) the description string is no longer NUL
terminated.

> Index: if_dwqe_fdt.c
> ===
> RCS file: /cvs/src/sys/dev/fdt/if_dwqe_fdt.c,v
> retrieving revision 1.7
> diff -u -p -r1.7 if_dwqe_fdt.c
> --- if_dwqe_fdt.c 7 Apr 2023 06:33:49 -   1.7
> +++ if_dwqe_fdt.c 7 Apr 2023 06:47:11 -
> @@ -213,6 +213,10 @@ dwqe_fdt_attach(struct device *parent, s
>   printf("%s: can't establish interrupt\n", sc->sc_dev.dv_xname);
>  
>   ifp = >sc_ac.ac_if;
> +
> + OF_getprop(faa->fa_node, "label",
> + ifp->if_description, sizeof(ifp->if_description));
> +
>   sc->sc_ifd.if_node = faa->fa_node;
>   sc->sc_ifd.if_ifp = ifp;
>   if_register(>sc_ifd);
> Index: if_mvneta.c
> ===
> RCS file: /cvs/src/sys/dev/fdt/if_mvneta.c,v
> retrieving revision 1.29
> diff -u -p -r1.29 if_mvneta.c
> --- if_mvneta.c   3 Apr 2023 01:46:18 -   1.29
> +++ if_mvneta.c   7 Apr 2023 06:47:12 -
> @@ -810,6 +810,9 @@ mvneta_attach_deferred(struct device *se
>   if_attach(ifp);
>   ether_ifattach(ifp);
>  
> + OF_getprop(sc->sc_node, "label",
> + ifp->if_description, sizeof(ifp->if_description));
> +
>   sc->sc_ifd.if_node = sc->sc_node;
>   sc->sc_ifd.if_ifp = ifp;
>   if_register(>sc_ifd);
> 

-- 
:wq Claudio



Re: MALLOC_STATS: dump internal state and leak info via utrace(2)

2023-04-07 Thread Otto Moerbeek
On Wed, Apr 05, 2023 at 10:54:19AM +0200, Otto Moerbeek wrote:

> Hi,
> 
> This is work in progress. I have to think if the flags to kdump I'm
> introducing should be two or a single one.
> 
> Currently, malloc.c can be compiled with MALLOC_STATS defined. If run
> with option D it dumps its state to a malloc.out file at exit. This
> state can be used to find leaks amongst other things.
> 
> This is not ideal for pledged processes, as they often have no way to
> write files.
> 
> This changes malloc to use utrace(2) for that.
> 
> As kdump has no nice way to show those lines without all extras it
> normally shows, so add two options to it to just show the lines.
> 
> To use, compile and install libc with MALLOC_STATS defined.
> 
> Run :
> 
> $ MALLOC_OPTIONS=D ktrace -tu your_program
> ...
> $ kdump -hu
> 
> Feedback appreciated.

Second iteration. Main difference: docs and the leak report is greatly
enhanced by adding info that can be fed to addr2line. Exmaple:

...
Leak report:
 f sum  #avg
 0x9fd0eb58abb   40960 10   4096 addr2line -e ./a.out 0x1abb
 0x9fd0eb58ae98192  1   8192 addr2line -e ./a.out 0x1ae9
 0x9ffa92d9daf   65536  1  65536 addr2line -e /usr/lib/libc.so.97.0 
0x87daf

$ addr2line -e /usr/lib/libc.so.97.0 0x87daf
/usr/src/lib/libc/stdio/makebuf.c:62

Note that only the top caller is reported. I once wrote code to record
more stack frames, but I do not think I want that in. More elaborate
memory allocation analysis is better done by external tools or
instrumentation wrappers.

But even them, the current report is very useful IMO. It might be even
usefull enough to compile it always #ifndef SMALL. The runtime
overhead (if malloc option D is not used) is small. I'll be thinking
about possible reasons not to compile it in by default.

-Otto

Index: lib/libc/stdlib/malloc.3
===
RCS file: /home/cvs/src/lib/libc/stdlib/malloc.3,v
retrieving revision 1.130
diff -u -p -r1.130 malloc.3
--- lib/libc/stdlib/malloc.31 Apr 2023 18:47:51 -   1.130
+++ lib/libc/stdlib/malloc.37 Apr 2023 07:43:22 -
@@ -284,10 +284,13 @@ If it has been corrupted, the process is
 .It Cm D
 .Dq Dump .
 .Fn malloc
-will dump statistics to the file
-.Pa ./malloc.out ,
-if it already exists,
+will dump statistics using
+.Xr utrace 2
 at exit.
+To record the dump:
+.Dl $ MALLOC_OPTIONS=D ktrace -tu program ...
+To view the dump:
+.Dl $ kdump -hu ...
 This option requires the library to have been compiled with -DMALLOC_STATS in
 order to have any effect.
 .It Cm F
Index: lib/libc/stdlib/malloc.c
===
RCS file: /home/cvs/src/lib/libc/stdlib/malloc.c,v
retrieving revision 1.280
diff -u -p -r1.280 malloc.c
--- lib/libc/stdlib/malloc.c5 Apr 2023 06:25:38 -   1.280
+++ lib/libc/stdlib/malloc.c7 Apr 2023 07:43:22 -
@@ -23,7 +23,7 @@
  * can buy me a beer in return. Poul-Henning Kamp
  */
 
-/* #define MALLOC_STATS */
+#define MALLOC_STATS
 
 #include 
 #include 
@@ -39,8 +39,10 @@
 #include 
 
 #ifdef MALLOC_STATS
+#include 
 #include 
-#include 
+#include 
+#include 
 #endif
 
 #include "thread_private.h"
@@ -243,10 +245,8 @@ static __dead void wrterror(struct dir_i
 __attribute__((__format__ (printf, 2, 3)));
 
 #ifdef MALLOC_STATS
-void malloc_dump(int, int, struct dir_info *);
+void malloc_dump(void);
 PROTO_NORMAL(malloc_dump);
-void malloc_gdump(int);
-PROTO_NORMAL(malloc_gdump);
 static void malloc_exit(void);
 #define CALLER __builtin_return_address(0)
 #else
@@ -319,7 +319,7 @@ wrterror(struct dir_info *d, char *msg, 
 
 #ifdef MALLOC_STATS
if (mopts.malloc_stats)
-   malloc_gdump(STDERR_FILENO);
+   malloc_dump();
 #endif /* MALLOC_STATS */
 
errno = saved_errno;
@@ -2225,6 +2225,21 @@ aligned_alloc(size_t alignment, size_t s
 
 #ifdef MALLOC_STATS
 
+static void
+ulog(const char *format, ...)
+{
+   va_list ap;
+   char buf[100];
+   int len;
+
+   va_start(ap, format);
+   len = vsnprintf(buf, sizeof(buf), format, ap);
+   if (len > (int)sizeof(buf))
+   len = sizeof(buf);
+   utrace("mallocdumpline", buf, len);
+   va_end(ap);
+}
+
 struct malloc_leak {
void *f;
size_t total_size;
@@ -2280,21 +2295,32 @@ putleakinfo(void *f, size_t sz, int cnt)
 static struct malloc_leak *malloc_leaks;
 
 static void
-dump_leaks(int fd)
+dump_leaks(void)
 {
struct leaknode *p;
unsigned int i = 0;
 
-   dprintf(fd, "Leak report\n");
-   dprintf(fd, " f sum  #avg\n");
+   ulog("Leak report:\n");
+   ulog(" f sum  #avg\n");
/* XXX only one page of summary */
if (malloc_leaks == NULL)
malloc_leaks = MMAP(MALLOC_PAGESIZE, 0);
if (malloc_leaks != MAP_FAILED)

use labels in the device tree to init interface descriptions

2023-04-07 Thread David Gwynne
ethernet interfaces in device trees can have a "label" property which
is generally used (when it is used) to identify which connector it is on
the case or something like that. eg, eth2 in the turris omnia device
tree has 'label = "wan"' on the mvneta interface.

ive been using labels in the dts recently to help me figure out what
goes where, so this has been useful to me. if/when we support switches
(eg mvsw), their ports are often labelled so i'd like to apply this idea
to them too.

ok?

Index: if_dwqe_fdt.c
===
RCS file: /cvs/src/sys/dev/fdt/if_dwqe_fdt.c,v
retrieving revision 1.7
diff -u -p -r1.7 if_dwqe_fdt.c
--- if_dwqe_fdt.c   7 Apr 2023 06:33:49 -   1.7
+++ if_dwqe_fdt.c   7 Apr 2023 06:47:11 -
@@ -213,6 +213,10 @@ dwqe_fdt_attach(struct device *parent, s
printf("%s: can't establish interrupt\n", sc->sc_dev.dv_xname);
 
ifp = >sc_ac.ac_if;
+
+   OF_getprop(faa->fa_node, "label",
+   ifp->if_description, sizeof(ifp->if_description));
+
sc->sc_ifd.if_node = faa->fa_node;
sc->sc_ifd.if_ifp = ifp;
if_register(>sc_ifd);
Index: if_mvneta.c
===
RCS file: /cvs/src/sys/dev/fdt/if_mvneta.c,v
retrieving revision 1.29
diff -u -p -r1.29 if_mvneta.c
--- if_mvneta.c 3 Apr 2023 01:46:18 -   1.29
+++ if_mvneta.c 7 Apr 2023 06:47:12 -
@@ -810,6 +810,9 @@ mvneta_attach_deferred(struct device *se
if_attach(ifp);
ether_ifattach(ifp);
 
+   OF_getprop(sc->sc_node, "label",
+   ifp->if_description, sizeof(ifp->if_description));
+
sc->sc_ifd.if_node = sc->sc_node;
sc->sc_ifd.if_ifp = ifp;
if_register(>sc_ifd);