Re: vmt(4): use shared netlock to protect ifnet data within vmt_tclo_broadcastip()

2023-09-24 Thread YASUOKA Masahiko
On Sat, 23 Sep 2023 15:38:40 +0300
Vitaliy Makkoveev  wrote:
> This makes ifnet protection consistent. Execute vmt_tclo_tick() timeout
> handler in process context to allow context switch within
> vmt_tclo_broadcastip().

ok yasuoka

> Index: sys/dev/pv/vmt.c
> ===
> RCS file: /cvs/src/sys/dev/pv/vmt.c,v
> retrieving revision 1.30
> diff -u -p -r1.30 vmt.c
> --- sys/dev/pv/vmt.c  7 Jan 2023 06:40:21 -   1.30
> +++ sys/dev/pv/vmt.c  23 Sep 2023 12:15:27 -
> @@ -471,7 +471,7 @@ vmt_attach(struct device *parent, struct
>  
>   config_mountroot(self, vmt_tick_hook);
>  
> - timeout_set(>sc_tclo_tick, vmt_tclo_tick, sc);
> + timeout_set_proc(>sc_tclo_tick, vmt_tclo_tick, sc);
>   timeout_add_sec(>sc_tclo_tick, 1);
>   sc->sc_tclo_ping = 1;
>  
> @@ -899,9 +899,12 @@ vmt_tclo_broadcastip(struct vmt_softc *s
>  {
>   struct ifnet *iface;
>   struct sockaddr_in *guest_ip;
> + char ip[INET_ADDRSTRLEN];
>  
>   /* find first available ipv4 address */
>   guest_ip = NULL;
> +
> + NET_LOCK_SHARED();
>   TAILQ_FOREACH(iface, , if_list) {
>   struct ifaddr *iface_addr;
>  
> @@ -918,14 +921,14 @@ vmt_tclo_broadcastip(struct vmt_softc *s
>   continue;
>  
>   guest_ip = satosin(iface_addr->ifa_addr);
> + inet_ntop(AF_INET, _ip->sin_addr, ip,
> + sizeof(ip));
>   break;
>   }
>   }
> + NET_UNLOCK_SHARED();
>  
>   if (guest_ip != NULL) {
> - char ip[INET_ADDRSTRLEN];
> -
> - inet_ntop(AF_INET, _ip->sin_addr, ip, sizeof(ip));
>   if (vm_rpc_send_rpci_tx(sc, "info-set guestinfo.ip %s",
>   ip) != 0) {
>   DPRINTF("%s: unable to send guest IP address\n",



fix a wireguard mbuf leak

2023-09-21 Thread YASUOKA Masahiko
A leak may happens when wgpeer is deleted.

ok?

The state queue should be freeed when wg_peer is destroyed.
diff from IIJ.

Index: sys/net/if_wg.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/if_wg.c,v
retrieving revision 1.29
diff -u -p -r1.29 if_wg.c
--- sys/net/if_wg.c 3 Aug 2023 09:49:08 -   1.29
+++ sys/net/if_wg.c 22 Sep 2023 03:11:47 -
@@ -518,6 +518,9 @@ wg_peer_destroy(struct wg_peer *peer)
taskq_barrier(wg_crypt_taskq);
taskq_barrier(net_tq(sc->sc_if.if_index));
 
+   if (!mq_empty(>p_stage_queue))
+   mq_purge(>p_stage_queue);
+
DPRINTF(sc, "Peer %llu destroyed\n", peer->p_id);
explicit_bzero(peer, sizeof(*peer));
pool_put(_peer_pool, peer);



Re: diff: trigger acpiac_refresh when acpibat notification

2023-09-14 Thread YASUOKA Masahiko
ping.

I think we have no reason not having this.

Alternatively acpiac_notify_triggered can be deleted and doing the
triggering unconditionally is also good because it's simpler.

comments? ok?

On Thu, 17 Aug 2023 16:12:07 +0900 (JST)
YASUOKA Masahiko  wrote:
> Hi,
> 
> Update the AC status when the battery notification is happened.
> Because the AC status notification doesn't happen on some machines.
> My vaio actually has this problem.
> 
> Also Linux is doing the same thing
> 
> https://github.com/torvalds/linux/blob/v6.4/drivers/acpi/ac.c#L165-L183
> 
> ok? comments?
> 
> Index: sys/dev/acpi/acpiac.c
> ===
> RCS file: /cvs/src/sys/dev/acpi/acpiac.c,v
> retrieving revision 1.36
> diff -u -p -r1.36 acpiac.c
> --- sys/dev/acpi/acpiac.c 6 Apr 2022 18:59:27 -   1.36
> +++ sys/dev/acpi/acpiac.c 17 Aug 2023 06:57:44 -
> @@ -140,6 +140,8 @@ acpiac_getpsr(struct acpiac_softc *sc)
>   return (0);
>  }
>  
> +static int acpiac_notify_triggered = 0;
> +
>  int
>  acpiac_notify(struct aml_node *node, int notify_type, void *arg)
>  {
> @@ -148,6 +150,8 @@ acpiac_notify(struct aml_node *node, int
>   dnprintf(10, "acpiac_notify: %.2x %s\n", notify_type,
>   DEVNAME(sc));
>  
> + acpiac_notify_triggered = 1;
> +
>   switch (notify_type) {
>   case 0x00:
>   case 0x01:
> @@ -164,4 +168,22 @@ acpiac_notify(struct aml_node *node, int
>   break;
>   }
>   return (0);
> +}
> +
> +void
> +acpiac_battery_notify(void)
> +{
> + struct acpi_softc *sc = acpi_softc;
> + struct acpi_ac *ac;
> +
> + if (acpiac_notify_triggered)
> + return;
> + /*
> +  * On some machines (vaio VJPK23 at least) AC status notifications
> +  * are not triggered.  Update the AC status when battery notifications.
> +  */
> + SLIST_FOREACH(ac, >sc_ac, aac_link) {
> + acpiac_refresh(ac->aac_softc);
> + acpi_record_event(sc, APM_POWER_CHANGE);
> + }
>  }
> Index: sys/dev/acpi/acpibat.c
> ===
> RCS file: /cvs/src/sys/dev/acpi/acpibat.c,v
> retrieving revision 1.70
> diff -u -p -r1.70 acpibat.c
> --- sys/dev/acpi/acpibat.c6 Apr 2022 18:59:27 -   1.70
> +++ sys/dev/acpi/acpibat.c17 Aug 2023 06:57:45 -
> @@ -536,5 +536,7 @@ acpibat_notify(struct aml_node *node, in
>   acpibat_refresh(sc);
>   acpi_record_event(sc->sc_acpi, APM_POWER_CHANGE);
>  
> + acpiac_battery_notify();
> +
>   return (0);
>  }
> Index: sys/dev/acpi/acpidev.h
> ===
> RCS file: /cvs/src/sys/dev/acpi/acpidev.h,v
> retrieving revision 1.44
> diff -u -p -r1.44 acpidev.h
> --- sys/dev/acpi/acpidev.h29 Jun 2018 17:39:18 -  1.44
> +++ sys/dev/acpi/acpidev.h17 Aug 2023 06:57:45 -
> @@ -306,6 +306,8 @@ struct acpiac_softc {
>   struct ksensordev   sc_sensdev;
>  };
>  
> +void acpiac_battery_notify(void);
> +
>  struct acpibat_softc {
>   struct device   sc_dev;
>  
> 



diff: relayd generate an output rule for "route to"

2023-09-12 Thread YASUOKA Masahiko
Hi,

After 6.9 packets passed by "route-to" started to be evaluated when
output.  As the result, states are created for output direction,
because it is not considered about "direct server return", has some
problems (eg. the state is deleted because the state tracking is
failed.)

relayd(8) creates the input rule automatically.  In the same way, a
rule for output should be created.

example output of "pfctl -sr"

pass in quick on rdomain 0 inet proto tcp from any to 192.168.2.212 port = 
8080 flags any keep state (sloppy, tcp.established 600) route-to @en2 
source-hash 0x11121314212223243132333441424344
+   pass out quick on rdomain 0 inet proto tcp from any to 192.168.2.212 port = 
8080 flags any keep state (sloppy, tcp.established 600)

ok?

Index: usr.sbin/relayd/pfe_filter.c
===
RCS file: /cvs/src/usr.sbin/relayd/pfe_filter.c,v
retrieving revision 1.63
diff -u -p -r1.63 pfe_filter.c
--- usr.sbin/relayd/pfe_filter.c30 Jun 2023 12:16:00 -  1.63
+++ usr.sbin/relayd/pfe_filter.c13 Sep 2023 04:58:36 -
@@ -486,6 +486,20 @@ sync_ruleset(struct relayd *env, struct 
if (ioctl(env->sc_pf->dev, DIOCADDRULE, ) == -1)
fatal("cannot add rule");
log_debug("%s: rule added to anchor \"%s\"", __func__, anchor);
+
+   /*
+* Create "pass out" rule for "route to" which is needed to
+* make the states sloppy, short timeout and so on.
+*/
+   if (t->conf.fwdmode == FWD_ROUTE) {
+   rio.rule.direction = PF_OUT;
+   rio.rule.rt &= ~PF_ROUTETO;
+   rio.rule.route.addr.type = PF_ADDR_NONE;
+   if (ioctl(env->sc_pf->dev, DIOCADDRULE, ) == -1)
+   fatal("cannot add rule");
+   log_debug("%s: rule added to anchor \"%s\"", __func__,
+   anchor);
+   }
}
if (transaction_commit(env) == -1)
log_warn("%s: add rules transaction failed", __func__);



Re: diff: trigger acpiac_refresh when acpibat notification

2023-08-18 Thread YASUOKA Masahiko
On Thu, 17 Aug 2023 10:07:27 -0500
joshua stein  wrote:
> On Thu, 17 Aug 2023 at 16:12:07 +0900, YASUOKA Masahiko wrote:
>> Hi,
>> 
>> Update the AC status when the battery notification is happened.
>> Because the AC status notification doesn't happen on some machines.
>> My vaio actually has this problem.
>> 
>> Also Linux is doing the same thing
>> 
>> https://github.com/torvalds/linux/blob/v6.4/drivers/acpi/ac.c#L165-L183
>> 
>> ok? comments?
> 
> When does acpiac_notify_triggered get reset back to 0 to run again 
> on the next battery notification?

Never.  acpiac_notify_triggered is to disable permanently the updating
the AC status when battery notification.  Because the updating is not
needed on machines which acpiac_notify() is called normally.

I decided to have the flag to stop the updating to prevent unknown
side effects.  But it might be unnecessary.

>> Index: sys/dev/acpi/acpiac.c
>> ===
>> RCS file: /cvs/src/sys/dev/acpi/acpiac.c,v
>> retrieving revision 1.36
>> diff -u -p -r1.36 acpiac.c
>> --- sys/dev/acpi/acpiac.c6 Apr 2022 18:59:27 -   1.36
>> +++ sys/dev/acpi/acpiac.c17 Aug 2023 06:57:44 -
>> @@ -140,6 +140,8 @@ acpiac_getpsr(struct acpiac_softc *sc)
>>  return (0);
>>  }
>>  
>> +static int acpiac_notify_triggered = 0;
>> +
>>  int
>>  acpiac_notify(struct aml_node *node, int notify_type, void *arg)
>>  {
>> @@ -148,6 +150,8 @@ acpiac_notify(struct aml_node *node, int
>>  dnprintf(10, "acpiac_notify: %.2x %s\n", notify_type,
>>  DEVNAME(sc));
>>  
>> +acpiac_notify_triggered = 1;
>> +
>>  switch (notify_type) {
>>  case 0x00:
>>  case 0x01:
>> @@ -164,4 +168,22 @@ acpiac_notify(struct aml_node *node, int
>>  break;
>>  }
>>  return (0);
>> +}
>> +
>> +void
>> +acpiac_battery_notify(void)
>> +{
>> +struct acpi_softc *sc = acpi_softc;
>> +struct acpi_ac *ac;
>> +
>> +if (acpiac_notify_triggered)
>> +return;
>> +/*
>> + * On some machines (vaio VJPK23 at least) AC status notifications
>> + * are not triggered.  Update the AC status when battery notifications.
>> + */
>> +SLIST_FOREACH(ac, >sc_ac, aac_link) {
>> +acpiac_refresh(ac->aac_softc);
>> +acpi_record_event(sc, APM_POWER_CHANGE);
>> +}
>>  }
>> Index: sys/dev/acpi/acpibat.c
>> ===
>> RCS file: /cvs/src/sys/dev/acpi/acpibat.c,v
>> retrieving revision 1.70
>> diff -u -p -r1.70 acpibat.c
>> --- sys/dev/acpi/acpibat.c   6 Apr 2022 18:59:27 -   1.70
>> +++ sys/dev/acpi/acpibat.c   17 Aug 2023 06:57:45 -
>> @@ -536,5 +536,7 @@ acpibat_notify(struct aml_node *node, in
>>  acpibat_refresh(sc);
>>  acpi_record_event(sc->sc_acpi, APM_POWER_CHANGE);
>>  
>> +acpiac_battery_notify();
>> +
>>  return (0);
>>  }
>> Index: sys/dev/acpi/acpidev.h
>> ===
>> RCS file: /cvs/src/sys/dev/acpi/acpidev.h,v
>> retrieving revision 1.44
>> diff -u -p -r1.44 acpidev.h
>> --- sys/dev/acpi/acpidev.h   29 Jun 2018 17:39:18 -  1.44
>> +++ sys/dev/acpi/acpidev.h   17 Aug 2023 06:57:45 -
>> @@ -306,6 +306,8 @@ struct acpiac_softc {
>>  struct ksensordev   sc_sensdev;
>>  };
>>  
>> +void acpiac_battery_notify(void);
>> +
>>  struct acpibat_softc {
>>  struct device   sc_dev;
>>  
>> 
> 



Re: diff: trigger acpiac_refresh when acpibat notification

2023-08-17 Thread YASUOKA Masahiko
Let me clarify some.

On Thu, 17 Aug 2023 16:12:07 +0900 (JST)
YASUOKA Masahiko  wrote:
> Update the AC status when the battery notification is happened.
> Because the AC status notification doesn't happen on some machines.

At that time (plugging or unpluggin the AC), a battery notification
always happens.  So the diff is to use the battery notification
instead of the AC status notification.

> My vaio actually has this problem.
> 
> Also Linux is doing the same thing
> 
> https://github.com/torvalds/linux/blob/v6.4/drivers/acpi/ac.c#L165-L183
> 
> ok? comments?
> 
> Index: sys/dev/acpi/acpiac.c
> ===
> RCS file: /cvs/src/sys/dev/acpi/acpiac.c,v
> retrieving revision 1.36
> diff -u -p -r1.36 acpiac.c
> --- sys/dev/acpi/acpiac.c 6 Apr 2022 18:59:27 -   1.36
> +++ sys/dev/acpi/acpiac.c 17 Aug 2023 06:57:44 -
> @@ -140,6 +140,8 @@ acpiac_getpsr(struct acpiac_softc *sc)
>   return (0);
>  }
>  
> +static int acpiac_notify_triggered = 0;
> +
>  int
>  acpiac_notify(struct aml_node *node, int notify_type, void *arg)
>  {
> @@ -148,6 +150,8 @@ acpiac_notify(struct aml_node *node, int
>   dnprintf(10, "acpiac_notify: %.2x %s\n", notify_type,
>   DEVNAME(sc));
>  
> + acpiac_notify_triggered = 1;
> +
>   switch (notify_type) {
>   case 0x00:
>   case 0x01:
> @@ -164,4 +168,22 @@ acpiac_notify(struct aml_node *node, int
>   break;
>   }
>   return (0);
> +}
> +
> +void
> +acpiac_battery_notify(void)
> +{
> + struct acpi_softc *sc = acpi_softc;
> + struct acpi_ac *ac;
> +
> + if (acpiac_notify_triggered)
> + return;
> + /*
> +  * On some machines (vaio VJPK23 at least) AC status notifications
> +  * are not triggered.  Update the AC status when battery notifications.
> +  */
> + SLIST_FOREACH(ac, >sc_ac, aac_link) {
> + acpiac_refresh(ac->aac_softc);
> + acpi_record_event(sc, APM_POWER_CHANGE);
> + }
>  }
> Index: sys/dev/acpi/acpibat.c
> ===
> RCS file: /cvs/src/sys/dev/acpi/acpibat.c,v
> retrieving revision 1.70
> diff -u -p -r1.70 acpibat.c
> --- sys/dev/acpi/acpibat.c6 Apr 2022 18:59:27 -   1.70
> +++ sys/dev/acpi/acpibat.c17 Aug 2023 06:57:45 -
> @@ -536,5 +536,7 @@ acpibat_notify(struct aml_node *node, in
>   acpibat_refresh(sc);
>   acpi_record_event(sc->sc_acpi, APM_POWER_CHANGE);
>  
> + acpiac_battery_notify();
> +
>   return (0);
>  }
> Index: sys/dev/acpi/acpidev.h
> ===
> RCS file: /cvs/src/sys/dev/acpi/acpidev.h,v
> retrieving revision 1.44
> diff -u -p -r1.44 acpidev.h
> --- sys/dev/acpi/acpidev.h29 Jun 2018 17:39:18 -  1.44
> +++ sys/dev/acpi/acpidev.h17 Aug 2023 06:57:45 -
> @@ -306,6 +306,8 @@ struct acpiac_softc {
>   struct ksensordev   sc_sensdev;
>  };
>  
> +void acpiac_battery_notify(void);
> +
>  struct acpibat_softc {
>   struct device   sc_dev;
>  
> 



Re: diff: trigger acpiac_refresh when acpibat notification

2023-08-17 Thread YASUOKA Masahiko
Hi,

Thank you for the Japanese subtitle.  It helps me. :)

On Thu, 17 Aug 2023 17:10:51 +0900
"lain."  wrote:
> If I understand correctly, you want to check the battery level, and give
> you a notification when battery is low, right?
> 正しく理解しているなら、YASUOKAさんはバッテリーのレベルを確認し、
> バッテリーが低くなったら通知を受け取りたい、ということですよね?

No, I'd like to fix hw_power (hw.power in sysctl) variable to be
changed properly.  On my vaio it doesn't change when the AC is plugged
or unplugged. 

What I actually want to fix is a problem of "apm -A".  "Performance
adjustment mode" uses "hw_power" variable.  For example, hw.set_perf
is always 100 if the vaio booted with the AC connected because
hw_power is always 1.  I suppose that hw.set_perf should be lower when
the PC is idle and the AC is unplugged.

> In this case, you can use apm for that.
> その場合、それには「apm」を使うことができます。
> 
> Battery level (percent) checking/バッテリーのレベル(パーセント)を確認するため:apm -l
> Estimated battery duration (minutes)/バッテリーが持つ予定の時間まで(分)を確認するため:apm -m
> 
> On 2023年08月17日 16:12, YASUOKA Masahiko wrote:
>> Hi,
>> 
>> Update the AC status when the battery notification is happened.
>> Because the AC status notification doesn't happen on some machines.
>> My vaio actually has this problem.
>> 
>> Also Linux is doing the same thing
>> 
>> https://github.com/torvalds/linux/blob/v6.4/drivers/acpi/ac.c#L165-L183
>> 
>> ok? comments?
>> 
>> Index: sys/dev/acpi/acpiac.c
>> ===
>> RCS file: /cvs/src/sys/dev/acpi/acpiac.c,v
>> retrieving revision 1.36
>> diff -u -p -r1.36 acpiac.c
>> --- sys/dev/acpi/acpiac.c6 Apr 2022 18:59:27 -   1.36
>> +++ sys/dev/acpi/acpiac.c17 Aug 2023 06:57:44 -
>> @@ -140,6 +140,8 @@ acpiac_getpsr(struct acpiac_softc *sc)
>>  return (0);
>>  }
>>  
>> +static int acpiac_notify_triggered = 0;
>> +
>>  int
>>  acpiac_notify(struct aml_node *node, int notify_type, void *arg)
>>  {
>> @@ -148,6 +150,8 @@ acpiac_notify(struct aml_node *node, int
>>  dnprintf(10, "acpiac_notify: %.2x %s\n", notify_type,
>>  DEVNAME(sc));
>>  
>> +acpiac_notify_triggered = 1;
>> +
>>  switch (notify_type) {
>>  case 0x00:
>>  case 0x01:
>> @@ -164,4 +168,22 @@ acpiac_notify(struct aml_node *node, int
>>  break;
>>  }
>>  return (0);
>> +}
>> +
>> +void
>> +acpiac_battery_notify(void)
>> +{
>> +struct acpi_softc *sc = acpi_softc;
>> +struct acpi_ac *ac;
>> +
>> +if (acpiac_notify_triggered)
>> +return;
>> +/*
>> + * On some machines (vaio VJPK23 at least) AC status notifications
>> + * are not triggered.  Update the AC status when battery notifications.
>> + */
>> +SLIST_FOREACH(ac, >sc_ac, aac_link) {
>> +acpiac_refresh(ac->aac_softc);
>> +acpi_record_event(sc, APM_POWER_CHANGE);
>> +}
>>  }
>> Index: sys/dev/acpi/acpibat.c
>> ===
>> RCS file: /cvs/src/sys/dev/acpi/acpibat.c,v
>> retrieving revision 1.70
>> diff -u -p -r1.70 acpibat.c
>> --- sys/dev/acpi/acpibat.c   6 Apr 2022 18:59:27 -   1.70
>> +++ sys/dev/acpi/acpibat.c   17 Aug 2023 06:57:45 -
>> @@ -536,5 +536,7 @@ acpibat_notify(struct aml_node *node, in
>>  acpibat_refresh(sc);
>>  acpi_record_event(sc->sc_acpi, APM_POWER_CHANGE);
>>  
>> +acpiac_battery_notify();
>> +
>>  return (0);
>>  }
>> Index: sys/dev/acpi/acpidev.h
>> ===
>> RCS file: /cvs/src/sys/dev/acpi/acpidev.h,v
>> retrieving revision 1.44
>> diff -u -p -r1.44 acpidev.h
>> --- sys/dev/acpi/acpidev.h   29 Jun 2018 17:39:18 -  1.44
>> +++ sys/dev/acpi/acpidev.h   17 Aug 2023 06:57:45 -
>> @@ -306,6 +306,8 @@ struct acpiac_softc {
>>  struct ksensordev   sc_sensdev;
>>  };
>>  
>> +void acpiac_battery_notify(void);
>> +
>>  struct acpibat_softc {
>>  struct device   sc_dev;
>>  
>> 
> 
> -- 
> lain.


diff: trigger acpiac_refresh when acpibat notification

2023-08-17 Thread YASUOKA Masahiko
Hi,

Update the AC status when the battery notification is happened.
Because the AC status notification doesn't happen on some machines.
My vaio actually has this problem.

Also Linux is doing the same thing

https://github.com/torvalds/linux/blob/v6.4/drivers/acpi/ac.c#L165-L183

ok? comments?

Index: sys/dev/acpi/acpiac.c
===
RCS file: /cvs/src/sys/dev/acpi/acpiac.c,v
retrieving revision 1.36
diff -u -p -r1.36 acpiac.c
--- sys/dev/acpi/acpiac.c   6 Apr 2022 18:59:27 -   1.36
+++ sys/dev/acpi/acpiac.c   17 Aug 2023 06:57:44 -
@@ -140,6 +140,8 @@ acpiac_getpsr(struct acpiac_softc *sc)
return (0);
 }
 
+static int acpiac_notify_triggered = 0;
+
 int
 acpiac_notify(struct aml_node *node, int notify_type, void *arg)
 {
@@ -148,6 +150,8 @@ acpiac_notify(struct aml_node *node, int
dnprintf(10, "acpiac_notify: %.2x %s\n", notify_type,
DEVNAME(sc));
 
+   acpiac_notify_triggered = 1;
+
switch (notify_type) {
case 0x00:
case 0x01:
@@ -164,4 +168,22 @@ acpiac_notify(struct aml_node *node, int
break;
}
return (0);
+}
+
+void
+acpiac_battery_notify(void)
+{
+   struct acpi_softc *sc = acpi_softc;
+   struct acpi_ac *ac;
+
+   if (acpiac_notify_triggered)
+   return;
+   /*
+* On some machines (vaio VJPK23 at least) AC status notifications
+* are not triggered.  Update the AC status when battery notifications.
+*/
+   SLIST_FOREACH(ac, >sc_ac, aac_link) {
+   acpiac_refresh(ac->aac_softc);
+   acpi_record_event(sc, APM_POWER_CHANGE);
+   }
 }
Index: sys/dev/acpi/acpibat.c
===
RCS file: /cvs/src/sys/dev/acpi/acpibat.c,v
retrieving revision 1.70
diff -u -p -r1.70 acpibat.c
--- sys/dev/acpi/acpibat.c  6 Apr 2022 18:59:27 -   1.70
+++ sys/dev/acpi/acpibat.c  17 Aug 2023 06:57:45 -
@@ -536,5 +536,7 @@ acpibat_notify(struct aml_node *node, in
acpibat_refresh(sc);
acpi_record_event(sc->sc_acpi, APM_POWER_CHANGE);
 
+   acpiac_battery_notify();
+
return (0);
 }
Index: sys/dev/acpi/acpidev.h
===
RCS file: /cvs/src/sys/dev/acpi/acpidev.h,v
retrieving revision 1.44
diff -u -p -r1.44 acpidev.h
--- sys/dev/acpi/acpidev.h  29 Jun 2018 17:39:18 -  1.44
+++ sys/dev/acpi/acpidev.h  17 Aug 2023 06:57:45 -
@@ -306,6 +306,8 @@ struct acpiac_softc {
struct ksensordev   sc_sensdev;
 };
 
+void acpiac_battery_notify(void);
+
 struct acpibat_softc {
struct device   sc_dev;
 



attach azalia(4) for "Intel 700 Series HD Audio"

2023-07-30 Thread YASUOKA Masahiko
Hello,

New vaio has an audio device which is not configured.

  "Intel 700 Series HD Audio" rev 0x01 at pci0 dev 31 function 3 not configured

 0:31:3: Intel unknown
0x: Vendor ID: 8086, Product ID: 51ca
0x0004: Command: 0006, Status: 0010
0x0008: Class: 04 Multimedia, Subclass: 01 Audio,
Interface: 00, Revision: 01

The diff attach azalia(4) the audio and it works fine.

ok?

Index: sys/dev/pci/azalia.c
===
RCS file: /disk/cvs/openbsd/src/sys/dev/pci/azalia.c,v
retrieving revision 1.283
diff -u -p -r1.283 azalia.c
--- sys/dev/pci/azalia.c21 Feb 2023 13:42:59 -  1.283
+++ sys/dev/pci/azalia.c30 Jul 2023 07:31:30 -
@@ -463,6 +463,7 @@ azalia_configure_pci(azalia_t *az)
case PCI_PRODUCT_INTEL_600SERIES_HDA:
case PCI_PRODUCT_INTEL_600SERIES_LP_HDA:
case PCI_PRODUCT_INTEL_700SERIES_HDA:
+   case PCI_PRODUCT_INTEL_700SERIES_LP_HDA:
case PCI_PRODUCT_INTEL_C600_HDA:
case PCI_PRODUCT_INTEL_C610_HDA_1:
case PCI_PRODUCT_INTEL_C610_HDA_2:
@@ -492,6 +493,7 @@ const struct pci_matchid azalia_pci_devi
{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_500SERIES_HDA },
{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_500SERIES_LP_HDA },
{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_600SERIES_LP_HDA },
+   { PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_700SERIES_LP_HDA },
{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_APOLLOLAKE_HDA },
{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_GLK_HDA },
{ PCI_VENDOR_INTEL, PCI_PRODUCT_INTEL_JSL_HDA },



assign wsdisplay0 to the glass console

2023-07-19 Thread YASUOKA Masahiko
Hi,

I noticed that the keyboard doesn't work for RAMDISK kernel on HP DL20
Gen10.  The kernel assigns wsdisplay0 for VGA and wsdisplay1 for
efifb.  But actually the glass console is efieb but it doesn't have
any keyboard assigned because the keyboard is assigned to wsdisplay0.

GENERIC kernel doesn't have the same problem because it was fixed at

  sys/arch/amd64/conf/GENERIC 1.347
  sys/arch/i386/conf/GENERIC 1.753
  (https://github.com/openbsd/src/commit/ea71d02a7c89)

So the diff does the same thing, make sure wsdisplay0 is always
assigned for the console.


ok?

Assign wsdisplay0 to the glass console always.  The same change is
done for GENERIC already.

Index: sys/arch/amd64/conf/RAMDISK
===
RCS file: /var/cvs/openbsd/src/sys/arch/amd64/conf/RAMDISK,v
retrieving revision 1.85
diff -u -p -r1.85 RAMDISK
--- sys/arch/amd64/conf/RAMDISK 26 Dec 2021 13:55:36 -  1.85
+++ sys/arch/amd64/conf/RAMDISK 19 Jul 2023 07:08:36 -
@@ -74,7 +74,7 @@ pckbd*at pckbc?   # PC keyboard
 wskbd* at pckbd? mux 1
 vga0   at isa?
 vga*   at pci?
-wsdisplay* at vga?
+wsdisplay0 at vga? console 1
 
 com0   at isa? port 0x3f8 irq 4# standard PC serial ports
 com1   at isa? port 0x2f8 irq 3
Index: sys/arch/amd64/conf/RAMDISK_CD
===
RCS file: /var/cvs/openbsd/src/sys/arch/amd64/conf/RAMDISK_CD,v
retrieving revision 1.201
diff -u -p -r1.201 RAMDISK_CD
--- sys/arch/amd64/conf/RAMDISK_CD  2 Apr 2023 03:40:54 -   1.201
+++ sys/arch/amd64/conf/RAMDISK_CD  19 Jul 2023 07:08:36 -
@@ -149,7 +149,7 @@ pckbd*  at pckbc?   # PC keyboard
 wskbd* at pckbd? mux 1
 vga0   at isa?
 vga*   at pci?
-wsdisplay* at vga?
+wsdisplay0 at vga? console 1
 
 efifb0 at mainbus? # EFI Framebuffer
 wsdisplay0 at efifb? console 1
Index: sys/arch/i386/conf/RAMDISK
===
RCS file: /var/cvs/openbsd/src/sys/arch/i386/conf/RAMDISK,v
retrieving revision 1.201
diff -u -p -r1.201 RAMDISK
--- sys/arch/i386/conf/RAMDISK  16 Feb 2021 00:03:54 -  1.201
+++ sys/arch/i386/conf/RAMDISK  19 Jul 2023 07:08:36 -
@@ -81,8 +81,8 @@ wskbd*at pckbd? mux 1
 vga0   at isa?
 vga*   at pci?
 pcdisplay0 at isa? # CGA, MDA, EGA, HGA
-wsdisplay* at vga?
-wsdisplay* at pcdisplay?
+wsdisplay0 at vga? console 1
+wsdisplay0 at pcdisplay? console 1
 
 com0   at isa? port 0x3f8 irq 4# standard PC serial ports
 com1   at isa? port 0x2f8 irq 3
Index: sys/arch/i386/conf/RAMDISK_CD
===
RCS file: /var/cvs/openbsd/src/sys/arch/i386/conf/RAMDISK_CD,v
retrieving revision 1.252
diff -u -p -r1.252 RAMDISK_CD
--- sys/arch/i386/conf/RAMDISK_CD   26 Jun 2022 20:05:06 -  1.252
+++ sys/arch/i386/conf/RAMDISK_CD   19 Jul 2023 07:08:36 -
@@ -121,8 +121,8 @@ wskbd*  at pckbd? mux 1
 vga0   at isa?
 vga*   at pci?
 pcdisplay0 at isa? # CGA, MDA, EGA, HGA
-wsdisplay* at vga?
-wsdisplay* at pcdisplay?
+wsdisplay0 at vga? console 1
+wsdisplay0 at pcdisplay? console 1
 
 com0   at isa? port 0x3f8 irq 4# standard PC serial ports
 com1   at isa? port 0x2f8 irq 3



make mbstat smaller (was Re: make mstat smaller)

2023-07-09 Thread YASUOKA Masahiko
On Sat, 08 Jul 2023 21:58:30 +0300 (EEST)
YASUOKA Masahiko  wrote:
> The diff makes the mbstat be the same size which is actually used.
> Also revert the previous that the mbstat is located on the stack.

The userland program also needed to be changed.

ok?

Index: sys/kern/kern_sysctl.c
===
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.417
diff -u -p -r1.417 kern_sysctl.c
--- sys/kern/kern_sysctl.c  7 Jul 2023 16:27:46 -   1.417
+++ sys/kern/kern_sysctl.c  9 Jul 2023 07:22:58 -
@@ -515,22 +515,20 @@ kern_sysctl(int *name, u_int namelen, vo
case KERN_MBSTAT: {
extern struct cpumem *mbstat;
uint64_t counters[MBSTAT_COUNT];
-   struct mbstat *mbs;
+   struct mbstat mbs;
unsigned int i;
-   int ret;
 
-   mbs = malloc(sizeof(*mbs), M_TEMP, M_WAITOK | M_ZERO);
+   memset(, 0, sizeof(mbs));
counters_read(mbstat, counters, MBSTAT_COUNT);
for (i = 0; i < MBSTAT_TYPES; i++)
-   mbs->m_mtypes[i] = counters[i];
+   mbs.m_mtypes[i] = counters[i];
 
-   mbs->m_drops = counters[MBSTAT_DROPS];
-   mbs->m_wait = counters[MBSTAT_WAIT];
-   mbs->m_drain = counters[MBSTAT_DRAIN];
+   mbs.m_drops = counters[MBSTAT_DROPS];
+   mbs.m_wait = counters[MBSTAT_WAIT];
+   mbs.m_drain = counters[MBSTAT_DRAIN];
 
-   ret = sysctl_rdstruct(oldp, oldlenp, newp, mbs, sizeof(*mbs));
-   free(mbs, M_TEMP, sizeof(*mbs));
-   return (ret);
+   return (sysctl_rdstruct(oldp, oldlenp, newp,
+   , sizeof(mbs)));
}
case KERN_MSGBUFSIZE:
case KERN_CONSBUFSIZE: {
Index: sys/sys/mbuf.h
===
RCS file: /cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.260
diff -u -p -r1.260 mbuf.h
--- sys/sys/mbuf.h  7 Jul 2023 14:17:34 -   1.260
+++ sys/sys/mbuf.h  9 Jul 2023 07:22:58 -
@@ -363,6 +363,12 @@ u_int mextfree_register(void (*)(caddr_t
 /* length to m_copy to copy all */
 #defineM_COPYALL   10
 
+#define MBSTAT_TYPES   MT_NTYPES
+#define MBSTAT_DROPS   (MBSTAT_TYPES + 0)
+#define MBSTAT_WAIT(MBSTAT_TYPES + 1)
+#define MBSTAT_DRAIN   (MBSTAT_TYPES + 2)
+#define MBSTAT_COUNT   (MBSTAT_TYPES + 3)
+
 /*
  * Mbuf statistics.
  * For statistics related to mbuf and cluster allocations, see also the
@@ -372,14 +378,9 @@ struct mbstat {
u_long  m_drops;/* times failed to find space */
u_long  m_wait; /* times waited for space */
u_long  m_drain;/* times drained protocols for space */
-   u_long  m_mtypes[256];  /* type specific mbuf allocations */
+   u_long  m_mtypes[MBSTAT_COUNT];
+   /* type specific mbuf allocations */
 };
-
-#define MBSTAT_TYPES   MT_NTYPES
-#define MBSTAT_DROPS   (MBSTAT_TYPES + 0)
-#define MBSTAT_WAIT(MBSTAT_TYPES + 1)
-#define MBSTAT_DRAIN   (MBSTAT_TYPES + 2)
-#define MBSTAT_COUNT   (MBSTAT_TYPES + 3)
 
 #include 
 
Index: usr.bin/netstat/mbuf.c
===
RCS file: /cvs/src/usr.bin/netstat/mbuf.c,v
retrieving revision 1.44
diff -u -p -r1.44 mbuf.c
--- usr.bin/netstat/mbuf.c  7 Jul 2023 14:17:35 -   1.44
+++ usr.bin/netstat/mbuf.c  9 Jul 2023 07:22:58 -
@@ -93,7 +93,7 @@ mbpr(void)
struct mbtypes *mp;
size_t size;
 
-   if (nmbtypes != 256) {
+   if (nmbtypes != MBSTAT_COUNT) {
fprintf(stderr,
"%s: unexpected change to mbstat; check source\n",
__progname);






make mstat smaller

2023-07-08 Thread YASUOKA Masahiko
Hi,

The diff makes the mbstat be the same size which is actually used.
Also revert the previous that the mbstat is located on the stack.

ok?

Index: sys/kern/kern_sysctl.c
===
RCS file: /cvs/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.417
diff -u -p -r1.417 kern_sysctl.c
--- sys/kern/kern_sysctl.c  7 Jul 2023 16:27:46 -   1.417
+++ sys/kern/kern_sysctl.c  8 Jul 2023 18:51:32 -
@@ -515,22 +515,20 @@ kern_sysctl(int *name, u_int namelen, vo
case KERN_MBSTAT: {
extern struct cpumem *mbstat;
uint64_t counters[MBSTAT_COUNT];
-   struct mbstat *mbs;
+   struct mbstat mbs;
unsigned int i;
-   int ret;
 
-   mbs = malloc(sizeof(*mbs), M_TEMP, M_WAITOK | M_ZERO);
+   memset(, 0, sizeof(mbs));
counters_read(mbstat, counters, MBSTAT_COUNT);
for (i = 0; i < MBSTAT_TYPES; i++)
-   mbs->m_mtypes[i] = counters[i];
+   mbs.m_mtypes[i] = counters[i];
 
-   mbs->m_drops = counters[MBSTAT_DROPS];
-   mbs->m_wait = counters[MBSTAT_WAIT];
-   mbs->m_drain = counters[MBSTAT_DRAIN];
+   mbs.m_drops = counters[MBSTAT_DROPS];
+   mbs.m_wait = counters[MBSTAT_WAIT];
+   mbs.m_drain = counters[MBSTAT_DRAIN];
 
-   ret = sysctl_rdstruct(oldp, oldlenp, newp, mbs, sizeof(*mbs));
-   free(mbs, M_TEMP, sizeof(*mbs));
-   return (ret);
+   return (sysctl_rdstruct(oldp, oldlenp, newp,
+   , sizeof(mbs)));
}
case KERN_MSGBUFSIZE:
case KERN_CONSBUFSIZE: {
Index: sys/sys/mbuf.h
===
RCS file: /cvs/src/sys/sys/mbuf.h,v
retrieving revision 1.260
diff -u -p -r1.260 mbuf.h
--- sys/sys/mbuf.h  7 Jul 2023 14:17:34 -   1.260
+++ sys/sys/mbuf.h  8 Jul 2023 18:51:33 -
@@ -363,6 +363,12 @@ u_int mextfree_register(void (*)(caddr_t
 /* length to m_copy to copy all */
 #defineM_COPYALL   10
 
+#define MBSTAT_TYPES   MT_NTYPES
+#define MBSTAT_DROPS   (MBSTAT_TYPES + 0)
+#define MBSTAT_WAIT(MBSTAT_TYPES + 1)
+#define MBSTAT_DRAIN   (MBSTAT_TYPES + 2)
+#define MBSTAT_COUNT   (MBSTAT_TYPES + 3)
+
 /*
  * Mbuf statistics.
  * For statistics related to mbuf and cluster allocations, see also the
@@ -372,14 +378,9 @@ struct mbstat {
u_long  m_drops;/* times failed to find space */
u_long  m_wait; /* times waited for space */
u_long  m_drain;/* times drained protocols for space */
-   u_long  m_mtypes[256];  /* type specific mbuf allocations */
+   u_long  m_mtypes[MBSTAT_COUNT];
+   /* type specific mbuf allocations */
 };
-
-#define MBSTAT_TYPES   MT_NTYPES
-#define MBSTAT_DROPS   (MBSTAT_TYPES + 0)
-#define MBSTAT_WAIT(MBSTAT_TYPES + 1)
-#define MBSTAT_DRAIN   (MBSTAT_TYPES + 2)
-#define MBSTAT_COUNT   (MBSTAT_TYPES + 3)
 
 #include 
 



Re: acpi: move acpiioctl to x86

2023-07-08 Thread YASUOKA Masahiko
On Fri, 7 Jul 2023 11:56:42 +0200
Tobias Heider  wrote:
> On Wed, Jul 05, 2023 at 04:53:33PM +0200, Tobias Heider wrote:
>> I am planning to restructure the APM/sleep APIs to make it easier to suspend
>> from more places like as a suspend keyboard shortcut.
>> 
>> The acpiioctl handler is x86 specific code which is currently built on all
>> platforms but only hooked up on i386 and amd64.  It is also in the way of
>> my plans, so I'd prefer if we move it to acpi_x86.c where all the other
>> x86-only acpi code lives.
>> 
> 
> The previous diff wasn't enough to solve the problem and broke RAMDISK,
> so here's a next try.  This moves anything /dev/apm related to the
> new acpi_apm.c file which is only included on i386 and amd64 and properly
> handles SMALL_KERNEL.
> 
> Some apm related code (apm_apminfo, acpi_sleep_task) remains in acpi.c for
> now because it is also used by arm64 or called from MI paths.
> I plan to clean up the sleep task in the follow-up diff but this one is
> big enough already.
> 
> Tested on amd64, i386, macppc and arm64 with GENERIC.MP and RAMDISK.

Tested on my vaio.  No regression so far.
Also the diff seems correct.



Use u_long for struct mstat

2023-07-07 Thread YASUOKA Masahiko
Hi,

I'd like to expand the counters in struct mbstat from u_short to u_long.

When I was debugging a mbuf leak, I saw the result of "netstat -m"
---
28647 mbufs in use:
28551 mbufs allocated to data
4 mbufs allocated to packet headers
92 mbufs allocated to socket names and addresses
159506/160736 mbuf 2048 byte clusters in use (current/peak)
0/30 mbuf 2112 byte clusters in use (current/peak)
0/24 mbuf 4096 byte clusters in use (current/peak)
0/24 mbuf 8192 byte clusters in use (current/peak)
0/0 mbuf 9216 byte clusters in use (current/peak)
0/0 mbuf 12288 byte clusters in use (current/peak)
0/16 mbuf 16384 byte clusters in use (current/peak)
0/0 mbuf 65536 byte clusters in use (current/peak)
360980/362484/2097152 Kbytes allocated to network (current/peak/max)
0 requests for memory denied
0 requests for memory delayed
0 calls to protocol drain routines
---

I couldn't figure out why mcl2k is leaked without leaking mbuf for few
days.  Actually it was shown in u_short (actual number % 65535).


ok? comments?

Index: sys/sys/mbuf.h
===
RCS file: /disk/cvs/openbsd/src/sys/sys/mbuf.h,v
retrieving revision 1.259
diff -u -p -r1.259 mbuf.h
--- sys/sys/mbuf.h  4 Jul 2023 09:47:51 -   1.259
+++ sys/sys/mbuf.h  6 Jul 2023 14:36:15 -
@@ -372,7 +372,7 @@ struct mbstat {
u_long  m_drops;/* times failed to find space */
u_long  m_wait; /* times waited for space */
u_long  m_drain;/* times drained protocols for space */
-   u_short m_mtypes[256];  /* type specific mbuf allocations */
+   u_long  m_mtypes[256];  /* type specific mbuf allocations */
 };
 
 #define MBSTAT_TYPES   MT_NTYPES
Index: sys/kern/kern_sysctl.c
===
RCS file: /disk/cvs/openbsd/src/sys/kern/kern_sysctl.c,v
retrieving revision 1.416
diff -u -p -r1.416 kern_sysctl.c
--- sys/kern/kern_sysctl.c  2 Jul 2023 19:02:27 -   1.416
+++ sys/kern/kern_sysctl.c  6 Jul 2023 14:36:15 -
@@ -515,20 +515,22 @@ kern_sysctl(int *name, u_int namelen, vo
case KERN_MBSTAT: {
extern struct cpumem *mbstat;
uint64_t counters[MBSTAT_COUNT];
-   struct mbstat mbs;
+   struct mbstat *mbs;
unsigned int i;
+   int ret;
 
-   memset(, 0, sizeof(mbs));
+   mbs = malloc(sizeof(*mbs), M_TEMP, M_WAITOK | M_ZERO);
counters_read(mbstat, counters, MBSTAT_COUNT);
for (i = 0; i < MBSTAT_TYPES; i++)
-   mbs.m_mtypes[i] = counters[i];
+   mbs->m_mtypes[i] = counters[i];
 
-   mbs.m_drops = counters[MBSTAT_DROPS];
-   mbs.m_wait = counters[MBSTAT_WAIT];
-   mbs.m_drain = counters[MBSTAT_DRAIN];
+   mbs->m_drops = counters[MBSTAT_DROPS];
+   mbs->m_wait = counters[MBSTAT_WAIT];
+   mbs->m_drain = counters[MBSTAT_DRAIN];
 
-   return (sysctl_rdstruct(oldp, oldlenp, newp,
-   , sizeof(mbs)));
+   ret = sysctl_rdstruct(oldp, oldlenp, newp, mbs, sizeof(*mbs));
+   free(mbs, M_TEMP, sizeof(*mbs));
+   return (ret);
}
case KERN_MSGBUFSIZE:
case KERN_CONSBUFSIZE: {
Index: usr.bin/netstat/mbuf.c
===
RCS file: /disk/cvs/openbsd/src/usr.bin/netstat/mbuf.c,v
retrieving revision 1.43
diff -u -p -r1.43 mbuf.c
--- usr.bin/netstat/mbuf.c  16 Jul 2019 17:39:02 -  1.43
+++ usr.bin/netstat/mbuf.c  6 Jul 2023 14:36:15 -
@@ -78,7 +78,7 @@ static struct mbtypes {
{ 0, 0 }
 };
 
-int nmbtypes = sizeof(mbstat.m_mtypes) / sizeof(short);
+int nmbtypes = sizeof(mbstat.m_mtypes) / sizeof(u_long);
 bool seen[256];/* "have we seen this type yet?" */
 
 /*
@@ -172,7 +172,7 @@ mbpr(void)
for (mp = mbtypes; mp->mt_name; mp++)
if (mbstat.m_mtypes[mp->mt_type]) {
seen[mp->mt_type] = YES;
-   printf("\t%u mbuf%s allocated to %s\n",
+   printf("\t%lu mbuf%s allocated to %s\n",
mbstat.m_mtypes[mp->mt_type],
plural(mbstat.m_mtypes[mp->mt_type]),
mp->mt_name);
@@ -180,7 +180,7 @@ mbpr(void)
seen[MT_FREE] = YES;
for (i = 0; i < nmbtypes; i++)
if (!seen[i] && mbstat.m_mtypes[i]) {
-   printf("\t%u mbuf%s allocated to \n",
+   printf("\t%lu mbuf%s allocated to \n",
mbstat.m_mtypes[i],
plural(mbstat.m_mtypes[i]), i);
}



Re: tcp timer wrap around, use 64 bit

2023-07-07 Thread YASUOKA Masahiko
Hi,

This netstat diff is already needed to avoid compiler warnings since
it uses struct tcpcb directly.

ok?

Index: usr.bin/netstat/inet.c
===
RCS file: /cvs/src/usr.bin/netstat/inet.c,v
retrieving revision 1.177
diff -u -p -r1.177 inet.c
--- usr.bin/netstat/inet.c  2 Jul 2023 19:59:15 -   1.177
+++ usr.bin/netstat/inet.c  7 Jul 2023 09:01:04 -
@@ -1556,8 +1556,8 @@ tcpcb_dump(u_long off)
p("%lu", snd_cwnd, ", ");
p("%lu", snd_ssthresh, ", ");
p("%lu", max_sndwnd, "\n ");
-   p("%u", t_rcvtime, ", ");
-   p("%u", t_rtttime, ", ");
+   p("%llu", t_rcvtime, ", ");
+   p("%llu", t_rtttime, ", ");
p("%u", t_rtseq, "\n ");
p("%u", t_srtt, ", ");
p("%u", t_rttvar, ", ");
@@ -1570,7 +1570,7 @@ tcpcb_dump(u_long off)
p("%u", request_r_scale, ", ");
p("%u", requested_s_scale, "\n ");
p("%u", ts_recent, ", ");
-   p("%u", ts_recent_age, "\n ");
+   p("%llu", ts_recent_age, "\n ");
p("%u", last_ack_sent, "\n ");
HTONS(tcpcb.t_pmtud_ip_len);
HTONS(tcpcb.t_pmtud_nextmtu);



Re: tcp timer wrap around, use 64 bit

2023-07-07 Thread YASUOKA Masahiko
On Fri, 7 Jul 2023 10:43:21 +0200
Claudio Jeker  wrote:
>> @@ -411,7 +412,7 @@ tcp_stats_display(unsigned long long tot
>>  P(tcpi, rcv_up, "%u")
>>  P(tcpi, rcv_wscale, "%hhu")
>>  P(tcpi, rfbuf_cnt, "%u")
>> -P(tcpi, rfbuf_ts, "%u")
>> +P(tcpi, rfbuf_ts, "%" PRIu64)
> 
> I don't think we need these ugly PRIu64 here. Just use %llu since in
> OpenBSD uint64_t is always a unsigned long long.

Thanks, that was what I didn't see clearly.

ok?

Index: sys/netinet/tcp.h
===
RCS file: /cvs/src/sys/netinet/tcp.h,v
retrieving revision 1.24
diff -u -p -r1.24 tcp.h
--- sys/netinet/tcp.h   19 May 2023 01:04:39 -  1.24
+++ sys/netinet/tcp.h   7 Jul 2023 08:51:40 -
@@ -194,9 +194,9 @@ struct tcp_info {
uint32_ttcpi_snd_wl2;
uint32_ttcpi_snd_max;
uint32_ttcpi_ts_recent;
-   uint32_ttcpi_ts_recent_age;
+   uint64_ttcpi_ts_recent_age;
uint32_ttcpi_rfbuf_cnt;
-   uint32_ttcpi_rfbuf_ts;
+   uint64_ttcpi_rfbuf_ts;
uint32_ttcpi_so_rcv_sb_cc;
uint32_ttcpi_so_rcv_sb_hiwat;
uint32_ttcpi_so_rcv_sb_lowat;
Index: usr.bin/tcpbench/tcpbench.c
===
RCS file: /cvs/src/usr.bin/tcpbench/tcpbench.c,v
retrieving revision 1.69
diff -u -p -r1.69 tcpbench.c
--- usr.bin/tcpbench/tcpbench.c 22 May 2023 12:53:04 -  1.69
+++ usr.bin/tcpbench/tcpbench.c 7 Jul 2023 08:51:41 -
@@ -411,7 +411,7 @@ tcp_stats_display(unsigned long long tot
P(tcpi, rcv_up, "%u")
P(tcpi, rcv_wscale, "%hhu")
P(tcpi, rfbuf_cnt, "%u")
-   P(tcpi, rfbuf_ts, "%u")
+   P(tcpi, rfbuf_ts, "%llu")
P(tcpi, rtt, "%u")
P(tcpi, rttmin, "%u")
P(tcpi, rttvar, "%u")
@@ -436,7 +436,7 @@ tcp_stats_display(unsigned long long tot
P(tcpi, so_snd_sb_lowat, "%u")
P(tcpi, so_snd_sb_wat, "%u")
P(tcpi, ts_recent, "%u")
-   P(tcpi, ts_recent_age, "%u")
+   P(tcpi, ts_recent_age, "%llu")
 #undef S
 #undef P
}



Re: tcp timer wrap around, use 64 bit

2023-07-07 Thread YASUOKA Masahiko
Hi,

Does using 64 bit for timer in tcpcb require this?

ok?

Index: sys/netinet/tcp.h
===
RCS file: /cvs/src/sys/netinet/tcp.h,v
retrieving revision 1.24
diff -u -p -r1.24 tcp.h
--- sys/netinet/tcp.h   19 May 2023 01:04:39 -  1.24
+++ sys/netinet/tcp.h   7 Jul 2023 08:33:26 -
@@ -194,9 +194,9 @@ struct tcp_info {
uint32_ttcpi_snd_wl2;
uint32_ttcpi_snd_max;
uint32_ttcpi_ts_recent;
-   uint32_ttcpi_ts_recent_age;
+   uint64_ttcpi_ts_recent_age;
uint32_ttcpi_rfbuf_cnt;
-   uint32_ttcpi_rfbuf_ts;
+   uint64_ttcpi_rfbuf_ts;
uint32_ttcpi_so_rcv_sb_cc;
uint32_ttcpi_so_rcv_sb_hiwat;
uint32_ttcpi_so_rcv_sb_lowat;
Index: usr.bin/tcpbench/tcpbench.c
===
RCS file: /cvs/src/usr.bin/tcpbench/tcpbench.c,v
retrieving revision 1.69
diff -u -p -r1.69 tcpbench.c
--- usr.bin/tcpbench/tcpbench.c 22 May 2023 12:53:04 -  1.69
+++ usr.bin/tcpbench/tcpbench.c 7 Jul 2023 08:33:26 -
@@ -51,6 +51,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #define DEFAULT_PORT "12345"
 #define DEFAULT_STATS_INTERVAL 1000 /* ms */
@@ -411,7 +412,7 @@ tcp_stats_display(unsigned long long tot
P(tcpi, rcv_up, "%u")
P(tcpi, rcv_wscale, "%hhu")
P(tcpi, rfbuf_cnt, "%u")
-   P(tcpi, rfbuf_ts, "%u")
+   P(tcpi, rfbuf_ts, "%" PRIu64)
P(tcpi, rtt, "%u")
P(tcpi, rttmin, "%u")
P(tcpi, rttvar, "%u")
@@ -436,7 +437,7 @@ tcp_stats_display(unsigned long long tot
P(tcpi, so_snd_sb_lowat, "%u")
P(tcpi, so_snd_sb_wat, "%u")
P(tcpi, ts_recent, "%u")
-   P(tcpi, ts_recent_age, "%u")
+   P(tcpi, ts_recent_age, "%" PRIu64)
 #undef S
 #undef P
}



Re: tcp timer wrap around, use 64 bit

2023-07-06 Thread YASUOKA Masahiko
On Tue, 4 Jul 2023 12:14:47 +0300
Alexander Bluhm  wrote:
> After changing tcp now tick to milliseconds, it will wrap around
> after 49 days of uptime.  That may be a problem in some places of
> our stack.  Better use a 64 bit counter.

I agree since we sometimes hit a problem from where we could not see
in advance.

> As timestamp option is 32 bit in TCP protocol, we have to use the
> lower 32 bit there.  There are casts to 32 bits that should behave
> correctly.  More eyes to review would be helpful.
> 
> As a bonus, start with random 63 bit offset to avoid uptime leakage.
> I am not aware that we leak anywhere, but more random is always
> good.  2^63 milliseconds gives us 2.9*10^8 years of possible uptime.
> 
> ok?

ok yasuoka

> bluhm
> 
> Index: netinet/tcp_input.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
> retrieving revision 1.388
> diff -u -p -r1.388 tcp_input.c
> --- netinet/tcp_input.c   30 May 2023 19:32:57 -  1.388
> +++ netinet/tcp_input.c   4 Jul 2023 08:49:47 -
> @@ -130,8 +130,8 @@ struct timeval tcp_ackdrop_ppslim_last;
>  #define TCP_PAWS_IDLETCP_TIME(24 * 24 * 60 * 60)
>  
>  /* for modulo comparisons of timestamps */
> -#define TSTMP_LT(a,b)((int)((a)-(b)) < 0)
> -#define TSTMP_GEQ(a,b)   ((int)((a)-(b)) >= 0)
> +#define TSTMP_LT(a,b)((int32_t)((a)-(b)) < 0)
> +#define TSTMP_GEQ(a,b)   ((int32_t)((a)-(b)) >= 0)
>  
>  /* for TCP SACK comparisons */
>  #define  SEQ_MIN(a,b)(SEQ_LT(a,b) ? (a) : (b))
> @@ -190,7 +190,7 @@ void   tcp_newreno_partialack(struct tcpc
>  
>  void  syn_cache_put(struct syn_cache *);
>  void  syn_cache_rm(struct syn_cache *);
> -int   syn_cache_respond(struct syn_cache *, struct mbuf *, uint32_t);
> +int   syn_cache_respond(struct syn_cache *, struct mbuf *, uint64_t);
>  void  syn_cache_timer(void *);
>  void  syn_cache_reaper(void *);
>  void  syn_cache_insert(struct syn_cache *, struct tcpcb *);
> @@ -198,10 +198,10 @@ void syn_cache_reset(struct sockaddr *,
>   struct tcphdr *, u_int);
>  int   syn_cache_add(struct sockaddr *, struct sockaddr *, struct tcphdr *,
>   unsigned int, struct socket *, struct mbuf *, u_char *, int,
> - struct tcp_opt_info *, tcp_seq *, uint32_t);
> + struct tcp_opt_info *, tcp_seq *, uint64_t);
>  struct socket *syn_cache_get(struct sockaddr *, struct sockaddr *,
>   struct tcphdr *, unsigned int, unsigned int, struct socket *,
> - struct mbuf *, uint32_t);
> + struct mbuf *, uint64_t);
>  struct syn_cache *syn_cache_lookup(struct sockaddr *, struct sockaddr *,
>   struct syn_cache_head **, u_int);
>  
> @@ -375,7 +375,7 @@ tcp_input(struct mbuf **mp, int *offp, i
>   short ostate;
>   caddr_t saveti;
>   tcp_seq iss, *reuse = NULL;
> - uint32_t now;
> + uint64_t now;
>   u_long tiwin;
>   struct tcp_opt_info opti;
>   struct tcphdr *th;
> @@ -885,7 +885,7 @@ findpcb:
>   goto drop;
>  
>   if (opti.ts_present && opti.ts_ecr) {
> - int rtt_test;
> + int32_t rtt_test;
>  
>   /* subtract out the tcp timestamp modulator */
>   opti.ts_ecr -= tp->ts_modulate;
> @@ -1272,7 +1272,7 @@ trimthenstep6:
>   TSTMP_LT(opti.ts_val, tp->ts_recent)) {
>  
>   /* Check to see if ts_recent is over 24 days old.  */
> - if ((int)(now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
> + if (now - tp->ts_recent_age > TCP_PAWS_IDLE) {
>   /*
>* Invalidate ts_recent.  If this segment updates
>* ts_recent, the age will be reset later and ts_recent
> @@ -2120,7 +2120,7 @@ drop:
>  int
>  tcp_dooptions(struct tcpcb *tp, u_char *cp, int cnt, struct tcphdr *th,
>  struct mbuf *m, int iphlen, struct tcp_opt_info *oi,
> -u_int rtableid, uint32_t now)
> +u_int rtableid, uint64_t now)
>  {
>   u_int16_t mss = 0;
>   int opt, optlen;
> @@ -2686,7 +2686,7 @@ tcp_pulloutofband(struct socket *so, u_i
>   * and update averages and current timeout.
>   */
>  void
> -tcp_xmit_timer(struct tcpcb *tp, int rtt)
> +tcp_xmit_timer(struct tcpcb *tp, int32_t rtt)
>  {
>   int delta, rttmin;
>  
> @@ -3335,7 +3335,7 @@ void
>  syn_cache_timer(void *arg)
>  {
>   struct syn_cache *sc = arg;
> - uint32_t now;
> + uint64_t now;
>  
>   NET_LOCK();
>   if (sc->sc_flags & SCF_DEAD)
> @@ -3469,7 +3469,7 @@ syn_cache_lookup(struct sockaddr *src, s
>   */
>  struct socket *
>  syn_cache_get(struct sockaddr *src, struct sockaddr *dst, struct tcphdr *th,
> -u_int hlen, u_int tlen, struct socket *so, struct mbuf *m, uint32_t now)
> +u_int hlen, u_int tlen, struct socket *so, struct mbuf *m, uint64_t now)
>  {
>   struct syn_cache *sc;
>   struct syn_cache_head *scp;
> 

diff asr_run.3

2023-05-31 Thread YASUOKA Masahiko
Hi,

asr_run.3 is explaining that ar_rrsetinfo must be freed only for the
blocking function, but I think it must be freed regardless of the
function blocking type.

ok?

Index: lib/libc/asr/asr_run.3
===
RCS file: /cvs/src/lib/libc/asr/asr_run.3,v
retrieving revision 1.5
diff -u -p -r1.5 asr_run.3
--- lib/libc/asr/asr_run.3  31 Mar 2022 17:27:15 -  1.5
+++ lib/libc/asr/asr_run.3  20 Mar 2023 05:42:02 -
@@ -259,7 +259,7 @@ Upon completion, the return code is foun
 .Fa ar_rrset_errno
 and the address to the newly allocated result set is set in
 .Fa ar_rrsetinfo .
-As for the blocking function, it must be freed by calling
+The caller must free it by calling
 .Xr freerrset 3 .
 .Pp
 The



Re: atactl(8): 'readattr' subcommand quits silently.

2023-04-26 Thread YASUOKA Masahiko
Hello,

On Wed, 26 Apr 2023 16:32:28 +0900
Yuichiro NAITO  wrote:
> These 2 revisions of 'attr_val' and 'attr_thr' are different on this
> disk.
> The comment says that it's wrong vendor implementation but I can see
> 'smartctl' shows the attributes as follows. NetBSD's atactl doesn't
> see these 2 revisions are same but checks each checksum is valid.

The diff seems correct.

ok?

> diff --git a/sbin/atactl/atactl.c b/sbin/atactl/atactl.c
> index 85dfced8c9a..1f77460ce3d 100644
> --- a/sbin/atactl/atactl.c
> +++ b/sbin/atactl/atactl.c
> @@ -1657,13 +1657,11 @@ device_attr(int argc, char *argv[])
>   req.datalen = sizeof(attr_thr);
>   ata_command();
> 
> - if (attr_val.revision != attr_thr.revision) {
> - /*
> -  * Non standard vendor implementation.
> -  * Return, since we don't know how to use this.
> -  */
> - return;
> - }
> + if (smart_cksum((u_int8_t *)_val, sizeof(attr_val)) != 0)
> + errx(1, "Checksum mismatch (attr_val)");
> +
> + if (smart_cksum((u_int8_t *)_thr, sizeof(attr_thr)) != 0)
> + errx(1, "Checksum mismatch (attr_thr)");
> 
>   attr = attr_val.attribute;
>   thr = attr_thr.threshold;
> 



diff: www/faq/pf/carp.html

2023-04-17 Thread YASUOKA Masahiko
Hi,

carpdemote is increased/decreased when the link state of the carpdev
is down/up.  This behavior is not related to net.inet.carp.preempt since 
"carpdemote" is introduced.

But the faq still says the "net.inet.carp.preempt" variable enables it.

I'd like to commit the diff.

ok or any comment is welcome.

Index: faq/pf/carp.html
===
RCS file: /cvs/www/faq/pf/carp.html,v
retrieving revision 1.65
diff -u -p -r1.65 carp.html
--- faq/pf/carp.html5 May 2021 21:49:29 -   1.65
+++ faq/pf/carp.html17 Apr 2023 11:32:46 -
@@ -191,8 +191,9 @@ As such, CARP is configured using
 By default, all carp interfaces are added to the carp group.
 Each group has a carpdemote counter affecting all carp
 interfaces belonging to that group.
-As described below, it can be useful to group certain interfaces together
-for failover purposes.
+If one physical CARP-enabled interface goes down, CARP will increase
+the demotion counter by 1 on interface groups that the carp(4) interface is
+a member of, in effect causing all group members to fail-over together.
 
 ipaddress
 This is the shared IP address assigned to the redundancy group.
@@ -216,12 +217,6 @@ Further CARP behavior can be controlled 
 net.inet.carp.preempt
 Allow hosts within a redundancy group that have a better
 advbase and advskew to preempt the master.
-In addition, this option also enables failing over a group of interfaces
-together in the event that one interface goes down.
-If one physical CARP-enabled interface goes down, CARP will increase
-the demotion counter, carpdemote, by 1 on interface groups
-that the carp(4) interface is a member of, in effect causing all group
-members to fail-over together.
 net.inet.carp.preempt is 0 (disabled) by default.
 
 net.inet.carp.log



Re: npppd(8): remove "pipex" option

2023-02-01 Thread YASUOKA Masahiko
Hi,

On Wed, 1 Feb 2023 21:32:29 +0300
Vitaliy Makkoveev  wrote:
> On Wed, Feb 01, 2023 at 09:00:13PM +0900, YASUOKA Masahiko wrote:
>> ...
>> 
>> But I think we should keep the part since it is needed when adding a
>> tunneling protocol which is not supported by pipex, or running npppd
>> on another OS.
>> 
>> >> If having "pipex yes/no" configuration is misleading, we can improve
>> >> the man page or the configuration itself.
>> > 
>> >  pipex yes | no
>> >  Specify whether npppd(8) uses pipex(4).  The default is
>> >  “yes”. The sysctl(8) variable net.pipex.enable should
>> >  also be enabled to use pipex(4).
>> > 
>> > There is no misleading. But with "pipex no" npppd(8) is usable with
>> > pppac(4), but with pppx(4) it is not. Also, I don't like that it
>> > successfully creates connection. Guess, it better to deny "pipex no"
>> > for pppx(4).
>> 
>> I agree both.
>>
> 
> So, deny "pipex no" for pppx(4) interfaces.

ok yasuoka

Thanks,

> Index: usr.sbin/npppd/npppd/npppd.conf.5
> ===
> RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd.conf.5,v
> retrieving revision 1.30
> diff -u -p -r1.30 npppd.conf.5
> --- usr.sbin/npppd/npppd/npppd.conf.5 31 Mar 2022 17:27:30 -  1.30
> +++ usr.sbin/npppd/npppd/npppd.conf.5 1 Feb 2023 18:28:29 -
> @@ -362,6 +362,11 @@ variable
>  .Va net.pipex.enable
>  should also be enabled to use
>  .Xr pipex 4 .
> +This value must be
> +.Dq yes
> +for
> +.Xr pppx 4
> +interfaces.
>  .It Ic debug-dump-pktin Ar protocol ...
>  If this option is specified,
>  .Xr npppd 8
> Index: usr.sbin/npppd/npppd/parse.y
> ===
> RCS file: /cvs/src/usr.sbin/npppd/npppd/parse.y,v
> retrieving revision 1.25
> diff -u -p -r1.25 parse.y
> --- usr.sbin/npppd/npppd/parse.y  15 Oct 2021 15:01:28 -  1.25
> +++ usr.sbin/npppd/npppd/parse.y  1 Feb 2023 18:28:29 -
> @@ -924,6 +924,14 @@ bind : BIND TUNNEL FROM STRING AUTHENTI
>   free($9);
>   YYERROR;
>   }
> + if (tunn->pipex == 0 && iface->is_pppx) {
> + yyerror("pipex should be enabled for"
> + " interface %s", $9);
> + free($4);
> + free($7);
> + free($9);
> + YYERROR;
> + }
>   if ((n = malloc(sizeof(struct confbind))) == NULL) {
>   yyerror("out of memory");
>   free($4);
> 



Re: npppd(8): remove "pipex" option

2023-02-01 Thread YASUOKA Masahiko
I'm sorry for sending 2 mails.  First one was a draft.  Please read second one.



Re: npppd(8): remove "pipex" option

2023-02-01 Thread YASUOKA Masahiko
Hi

On Tue, 31 Jan 2023 11:30:43 +0300
Vitaliy Makkoveev  wrote:
> On Tue, Jan 31, 2023 at 01:40:19PM +0900, YASUOKA Masahiko wrote:
>> Hi,
>> 
>> On Sun, 29 Jan 2023 14:35:05 +0300
>> Vitaliy Makkoveev  wrote:
>> > While switchind pppx(4) and pppac(4) from selwakeup() to KNOTE(9), I
>> > found npppd(8) doesn't create pppx interface with "pipex no" in
>> > npppd.conf, but successfully connects the client. So packets don't flow.
>> > However, the pppac(4) has no this problem, because corresponding pppac
>> > interface always created when npppd(8) opened device node.
>> > 
>> > In fact, npppd(8) will not work with pppx(4) interfaces without pipex(4)
>> > support. Otherwise npppd(8) should create pppx(4) sessions with not
>> > pipex(4) specific PIPEXASESSION ioctl(2) command.
>> > 
>> > I propose to remove "pipex" option from npppd(8). We already have
>> > "net.pipex.enable" sysctl MIB to control pipex behaviour. In the case
>> > then "net.pipex.enable" is set to 0, pipex(4) sessions will be always
>> > created, but the traffic will go outside pipex(4) layer.
>> > 
>> > The "ifdef USE_NPPPD_PIPEX" left as is. If we decide to remove them, I
>> > will do this with the next diffs.
>> 
>> Will the next diff remove the networking part (MPPE, IP) as well?
>> 
>> > Please note, we never have complains about the problem described above,
>> > so I doubt someone uses npppd(8) with "pipex no" in the npppd.conf(5).
>> 
>> I don't know why you configured "pipex no", I suppose it was for
>> debug.  I also actually use "pipex no" when debug or development.
> 
> I used this option to test my/visa@ diff which replaced selwakeup() by
> KNOTE(9) and found that pppx(4) case is broken if this option is set to
> "no".

So you used "pipex no" for test.  That is the purpose of "pipex yes/no".

> Since we have the ability of enable/disable pipex(4) globally, I
> propose to remove this option.

No, they have different purposes.  Without "pipex yes/no" option, we
can't test the networking part (IP, MPPE) of npppd without pipex.

> in fact, for the pppx(4) case npppd(8) is absolutely useless without
> pipex(4) support, so I don't see any reasons to build it without
> pipex(4). I don't propose to remove the whole "ifdef USE_NPPPD_PIPEX"
> blocks, only preprocessor directives. Sorry, if my suggestion was not
> clear.

Note that the networking part (IP, MPPE) of npppd works with/without
pipex.  So just removing #ifdef lines, npppd still has "the networking
part without pipex".  As the result of removing "pipex yes/no" option,
we can't test that part.

If you are to remove the networking part (IP, MPPE) of npppd
completely, removing "pipex yes/no" option makes sense since we don't
need to test it anymore.

But I think we should keep the part since it is needed when adding a
tunneling protocol which is not supported by pipex, or running npppd
on another OS.

>> If having "pipex yes/no" configuration is misleading, we can improve
>> the man page or the configuration itself.
> 
>  pipex yes | no
>  Specify whether npppd(8) uses pipex(4).  The default is
>  “yes”. The sysctl(8) variable net.pipex.enable should
>  also be enabled to use pipex(4).
> 
> There is no misleading. But with "pipex no" npppd(8) is usable with
> pppac(4), but with pppx(4) it is not. Also, I don't like that it
> successfully creates connection. Guess, it better to deny "pipex no"
> for pppx(4).

I agree both.



Re: npppd(8): remove "pipex" option

2023-02-01 Thread YASUOKA Masahiko
On Tue, 31 Jan 2023 11:30:43 +0300
Vitaliy Makkoveev  wrote:
> On Tue, Jan 31, 2023 at 01:40:19PM +0900, YASUOKA Masahiko wrote:
>> Hi,
>> 
>> On Sun, 29 Jan 2023 14:35:05 +0300
>> Vitaliy Makkoveev  wrote:
>> > While switchind pppx(4) and pppac(4) from selwakeup() to KNOTE(9), I
>> > found npppd(8) doesn't create pppx interface with "pipex no" in
>> > npppd.conf, but successfully connects the client. So packets don't flow.
>> > However, the pppac(4) has no this problem, because corresponding pppac
>> > interface always created when npppd(8) opened device node.
>> > 
>> > In fact, npppd(8) will not work with pppx(4) interfaces without pipex(4)
>> > support. Otherwise npppd(8) should create pppx(4) sessions with not
>> > pipex(4) specific PIPEXASESSION ioctl(2) command.
>> > 
>> > I propose to remove "pipex" option from npppd(8). We already have
>> > "net.pipex.enable" sysctl MIB to control pipex behaviour. In the case
>> > then "net.pipex.enable" is set to 0, pipex(4) sessions will be always
>> > created, but the traffic will go outside pipex(4) layer.
>> > 
>> > The "ifdef USE_NPPPD_PIPEX" left as is. If we decide to remove them, I
>> > will do this with the next diffs.
>> 
>> Will the next diff remove the networking part (MPPE, IP) as well?
>> 
>> > Please note, we never have complains about the problem described above,
>> > so I doubt someone uses npppd(8) with "pipex no" in the npppd.conf(5).
>> 
>> I don't know why you configured "pipex no", I suppose it was for
>> debug.  I also actually use "pipex no" when debug or development.
> 
> I used this option to test my/visa@ diff which replaced selwakeup() by
> KNOTE(9) and found that pppx(4) case is broken if this option is set to
> "no".

So you used "pipex no" for test.  That exactly is the purpose of
"pipex yes/no".

> Since we have the ability of enable/disable pipex(4) globally, I
> propose to remove this option.

No, they have different purposes.  Without "pipex yes/no" option, we
can't test the networking part (IP, MPPE) of npppd for without pipex.

> in fact, for the pppx(4) case npppd(8) is absolutely useless without
> pipex(4) support, so I don't see any reasons to build it without
> pipex(4). I don't propose to remove the whole "ifdef USE_NPPPD_PIPEX"
> blocks, only preprocessor directives. Sorry, if my suggestion was not
> clear.

Note that the networking part (IP, MPPE) of npppd works with/without
pipex.  So just removing #ifdef lines, npppd still has "the networking
part without pipex".  As the result of removing "pipex yes/no" option,
we can't test that part.

If you are to remove the networking part (IP, MPPE) of npppd
completely, removing "pipex yes/no" option makes sense since we don't
need to test it anymore.

But I'd like to keep the networking part since it is needed when adding a 
tunneling protocol which is not supported by pipex, or running npppd on another 
OS.



>> If having "pipex yes/no" configuration is misleading, we can improve
>> the man page or the configuration itself.
> 
>  pipex yes | no
>  Specify whether npppd(8) uses pipex(4).  The default is
>  “yes”. The sysctl(8) variable net.pipex.enable should
>  also be enabled to use pipex(4).
> 
> There is no misleading. But with "pipex no" npppd(8) is usable with
> pppac(4), but with pppx(4) it is not. Also, I don't like that it
> successfully creates connection. Guess, it better to deny "pipex no"
> for pppx(4).
> 



Re: npppd(8): remove "pipex" option

2023-01-30 Thread YASUOKA Masahiko
Hi,

On Sun, 29 Jan 2023 14:35:05 +0300
Vitaliy Makkoveev  wrote:
> While switchind pppx(4) and pppac(4) from selwakeup() to KNOTE(9), I
> found npppd(8) doesn't create pppx interface with "pipex no" in
> npppd.conf, but successfully connects the client. So packets don't flow.
> However, the pppac(4) has no this problem, because corresponding pppac
> interface always created when npppd(8) opened device node.
> 
> In fact, npppd(8) will not work with pppx(4) interfaces without pipex(4)
> support. Otherwise npppd(8) should create pppx(4) sessions with not
> pipex(4) specific PIPEXASESSION ioctl(2) command.
> 
> I propose to remove "pipex" option from npppd(8). We already have
> "net.pipex.enable" sysctl MIB to control pipex behaviour. In the case
> then "net.pipex.enable" is set to 0, pipex(4) sessions will be always
> created, but the traffic will go outside pipex(4) layer.
> 
> The "ifdef USE_NPPPD_PIPEX" left as is. If we decide to remove them, I
> will do this with the next diffs.

Will the next diff remove the networking part (MPPE, IP) as well?

> Please note, we never have complains about the problem described above,
> so I doubt someone uses npppd(8) with "pipex no" in the npppd.conf(5).

I don't know why you configured "pipex no", I suppose it was for
debug.  I also actually use "pipex no" when debug or development.

If having "pipex yes/no" configuration is misleading, we can improve
the man page or the configuration itself.

> I tested both pppac(4) and pppx(4) cases with both "net.pipex.enable=1"
> and "net.pipex.enable=0".
> 
> Index: usr.sbin/npppd/npppd/npppd.c
> ===
> RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd.c,v
> retrieving revision 1.53
> diff -u -p -r1.53 npppd.c
> --- usr.sbin/npppd/npppd/npppd.c  1 Jul 2022 09:57:24 -   1.53
> +++ usr.sbin/npppd/npppd/npppd.c  29 Jan 2023 11:04:30 -
> @@ -235,14 +235,12 @@ npppd_get_npppd()
>  int
>  npppd_init(npppd *_this, const char *config_file)
>  {
> - int  i, status = -1, value;
> + int  i, status = -1;
>   const char  *pidpath0;
>   FILE*pidfp = NULL;
>   struct tunnconf *tunn;
>   struct ipcpconf *ipcpconf;
>   struct ipcpstat *ipcpstat;
> - int  mib[] = { CTL_NET, PF_PIPEX, PIPEXCTL_ENABLE };
> - size_t   size;
>  
>   memset(_this, 0, sizeof(npppd));
>  #ifndef  NO_ROUTE_FOR_POOLED_ADDRESS
> @@ -294,17 +292,6 @@ npppd_init(npppd *_this, const char *con
>   if ((status = npppd_reload_config(_this)) != 0)
>   return status;
>  
> - TAILQ_FOREACH(tunn, &_this->conf.tunnconfs, entry) {
> - if (tunn->pipex) {
> - size = sizeof(value);
> - if (!sysctl(mib, nitems(mib), , , NULL, 0)
> - && value == 0)
> - log_printf(LOG_WARNING,
> - "pipex(4) is disabled by sysctl");
> - break;
> - }
> - }
> -
>   if ((_this->map_user_ppp = hash_create(
>   (int (*) (const void *, const void *))strcmp, str_hash,
>   NPPPD_USER_HASH_SIZ)) == NULL) {
> @@ -1052,7 +1039,6 @@ npppd_ppp_pipex_enable(npppd *_this, npp
>  
>   NPPPD_ASSERT(ppp != NULL);
>   NPPPD_ASSERT(ppp->phy_context != NULL);
> - NPPPD_ASSERT(ppp->use_pipex != 0);
>  
>   pipex_setup_common(ppp, );
>  
> Index: usr.sbin/npppd/npppd/npppd.conf.5
> ===
> RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd.conf.5,v
> retrieving revision 1.30
> diff -u -p -r1.30 npppd.conf.5
> --- usr.sbin/npppd/npppd/npppd.conf.5 31 Mar 2022 17:27:30 -  1.30
> +++ usr.sbin/npppd/npppd/npppd.conf.5 29 Jan 2023 11:04:30 -
> @@ -349,19 +349,6 @@ the address assigned by
>  for the link.
>  The default value is
>  .Dq no .
> -.It Ic pipex Ar yes | no
> -Specify whether
> -.Xr npppd 8
> -uses
> -.Xr pipex 4 .
> -The default is
> -.Dq yes .
> -The
> -.Xr sysctl 8
> -variable
> -.Va net.pipex.enable
> -should also be enabled to use
> -.Xr pipex 4 .
>  .It Ic debug-dump-pktin Ar protocol ...
>  If this option is specified,
>  .Xr npppd 8
> Index: usr.sbin/npppd/npppd/npppd.h
> ===
> RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd.h,v
> retrieving revision 1.19
> diff -u -p -r1.19 npppd.h
> --- usr.sbin/npppd/npppd/npppd.h  12 Aug 2017 11:20:34 -  1.19
> +++ usr.sbin/npppd/npppd/npppd.h  29 Jan 2023 11:04:30 -
> @@ -133,8 +133,6 @@ struct tunnconf {
>   bool   ingress_filter;
>   intcallnum_check;
>  
> - bool   pipex;
> -
>   u_int  debug_dump_pktin;
>   u_int  debug_dump_pktout;
>  };
> Index: 

Re: efi(4): Support for EFI variables and tables in the kernel

2023-01-07 Thread YASUOKA Masahiko
Hi,

On Wed, 04 Jan 2023 21:52:35 +0100
Mark Kettenis  wrote:
> Dear Sergii and others,
> 
> I've committed the change that passes the ESRT from the bootloader to
> the kernel.  So now it is time to add the interfaces to the kernel to
> read it.  And add the interfaces to manipulate EFI variables.
> 
> For those out of the loop: this could allow us to run fwupd on
> OpenBSD, bringing us a way to update the firmware on many machines
> without running through several hoops like booting Windows.
> 
> What it will also allow us to do is properly set a boot option for
> OpenBSD and place it at the fron of the list.  Setting EFI variables
> is only allowed at securelevel 0 and below.
> 
> This diff is an adaptation of the code that Sergii wrote:
> 
>   https://marc.info/?l=openbsd-tech=166405001006952=2
> 
> A few notable changes I made to that code:
> 
> * This also adds support for arm64
> 
> * Uses a different device major for /dev/efi since I used an unused
>   number in the middle of the table.
> 
> * Renumbered some of the ioctls to avoid leaving a gap
> 
> * Made  self-contained.
> 
> * Fixed the copyright on  to match the FreeBSD file
>   from which significant chunks were copied.
> 
> * Reworked the error handling a bit, reducing the errors to those that
>   actually can happen according to the EFI spec.
> 
> Other than the location of the header file, this should all be
> consistent with the interfaces that FreeBSD and NetBSD provide.
> 
> ok?

I tested the diff roughly with the test programs from
https://github.com/3mdeb/openbsd-src/tree/efivars-api on my vaio.

There is 2 tiny feedbacks.  Other than these, ok yasuoka

Thanks,

> 
> 
> Index: etc/MAKEDEV.common
> ===
> RCS file: /cvs/src/etc/MAKEDEV.common,v
> retrieving revision 1.118
> diff -u -p -r1.118 MAKEDEV.common
> --- etc/MAKEDEV.common10 Nov 2022 09:50:00 -  1.118
> +++ etc/MAKEDEV.common4 Jan 2023 19:44:01 -
> @@ -533,3 +533,5 @@ __devitem(dt, dt, Dynamic Tracer)dnl
>  _mkdev(dt, dt, {-M dt c major_dt_c 0 600-})dnl
>  __devitem(kstat, kstat, Kernel Statistics)dnl
>  _mkdev(kstat, kstat, {-M kstat c major_kstat_c 0 640-})dnl
> +__devitem(efi, efi, EFI runtime services)dnl
> +_mkdev(efi, efi, {-M efi c major_efi_c 0 600-})dnl
> Index: etc/etc.amd64/MAKEDEV
> ===
> RCS file: /cvs/src/etc/etc.amd64/MAKEDEV,v
> retrieving revision 1.138
> diff -u -p -r1.138 MAKEDEV
> --- etc/etc.amd64/MAKEDEV 10 Nov 2022 11:02:26 -  1.138
> +++ etc/etc.amd64/MAKEDEV 4 Jan 2023 19:44:01 -
> @@ -75,6 +75,7 @@
>  #dt  Dynamic Tracer
>  #diskmap Disk mapper
>  #dri Direct Rendering Infrastructure
> +#efi EFI runtime services
>  #fd  fd/* nodes
>  #fuseUserland Filesystem
>  #gpio*   General Purpose Input/Output
> @@ -358,6 +359,10 @@ fd)
>   MKlist[${#MKlist[*]}]=";chmod 555 fd"
>   ;;
>  
> +efi)
> + M efi c 84 0 600
> + ;;
> +
>  dri)
>   RMlist[${#RMlist[*]}]=";mkdir -p dri;rm -f"
>   n=0
> @@ -593,19 +598,19 @@ local)
>   ;;
>  
>  all)
> - R ipmi0 ttyVI00 ttyVI10 ttyVI20 ttyVI30 ttyVI40 dri nvram
> - R gpio0 gpio1 gpio2 bktr0 vnd0 vnd1 vnd2 vnd3 sd0 sd1 sd2 sd3
> - R sd4 sd5 sd6 sd7 sd8 sd9 cd0 cd1 rd0 tap0 tap1 tap2 tap3 tun0
> - R tun1 tun2 tun3 bio pty0 fd1 fd1B fd1C fd1D fd1E fd1F fd1G
> - R fd1H fd0 fd0B fd0C fd0D fd0E fd0F fd0G fd0H diskmap vscsi0
> - R ch0 audio0 audio1 audio2 audio3 kstat dt kcov bpf pvbus0
> - R pvbus1 vmm fuse pppac pppx hotplug ptm local wscons pci0
> - R pci1 pci2 pci3 uall rmidi0 rmidi1 rmidi2 rmidi3 rmidi4
> - R rmidi5 rmidi6 rmidi7 tuner0 radio0 speaker video0 video1 uk0
> - R random lpa0 lpa1 lpa2 lpt0 lpt1 lpt2 tty00 tty01 tty02 tty03
> - R tty04 tty05 tty06 tty07 tty08 tty09 tty0a tty0b ttyc0 ttyc1
> - R ttyc2 ttyc3 ttyc4 ttyc5 ttyc6 ttyc7 apm pf pctr wd0 wd1 wd2
> - R wd3 std st0 st1 fd
> + R efi0 ipmi0 ttyVI00 ttyVI10 ttyVI20 ttyVI30 ttyVI40 dri
> + R nvram gpio0 gpio1 gpio2 bktr0 vnd0 vnd1 vnd2 vnd3 sd0 sd1
> + R sd2 sd3 sd4 sd5 sd6 sd7 sd8 sd9 cd0 cd1 rd0 tap0 tap1 tap2
> + R tap3 tun0 tun1 tun2 tun3 bio pty0 fd1 fd1B fd1C fd1D fd1E
> + R fd1F fd1G fd1H fd0 fd0B fd0C fd0D fd0E fd0F fd0G fd0H
> + R diskmap vscsi0 ch0 audio0 audio1 audio2 audio3 kstat dt kcov
> + R bpf pvbus0 pvbus1 vmm fuse pppac pppx hotplug ptm local
> + R wscons pci0 pci1 pci2 pci3 uall rmidi0 rmidi1 rmidi2 rmidi3
> + R rmidi4 rmidi5 rmidi6 rmidi7 tuner0 radio0 speaker video0
> + R video1 uk0 random lpa0 lpa1 lpa2 lpt0 lpt1 lpt2 tty00 tty01
> + R tty02 tty03 tty04 tty05 tty06 tty07 tty08 tty09 tty0a tty0b
> + R ttyc0 ttyc1 ttyc2 ttyc3 ttyc4 ttyc5 ttyc6 ttyc7 apm pf pctr
> + R wd0 wd1 wd2 wd3 std st0 st1 fd
>   ;;
>  
>  wd*|sd*)
> Index: etc/etc.amd64/MAKEDEV.md
> 

Re: hostctl: Change from fixed length to variable length

2023-01-05 Thread YASUOKA Masahiko
ok yasuoka

On Fri, 06 Jan 2023 15:14:05 +0900 (JST)
Masato Asou  wrote:
> I have updated my patch.
> 
> From: YASUOKA Masahiko 
> Date: Tue, 27 Dec 2022 11:58:34 +0900 (JST)
> 
>> After diff, it doesn't use PAGE_SIZE any more.  And VMware software
>> limit seems 1MB and changable by its configuration(*1).  So we can't
>> say PVBUS_KVOP_MAXSIZE is enough.
>> 
>> + * - Known pv backends other than vmware has a hard limit smaller than
>> + *   PVBUS_KVOP_MAXSIZE in their messaging.  vmware has a software
>> + *   limit at 1MB, but current open-vm-tools has a limit at 64KB
>> +     *   (=PVBUS_KVOP_MAXSIZE).
> 
> Use this comment.
> 
> From: YASUOKA Masahiko 
> Date: Tue, 27 Dec 2022 12:23:39 +0900 (JST)
> 
>> Also I don't think replacing strlcat() by an own calculation is necessary.
>> 
>> diff --git a/sys/dev/pv/xenstore.c b/sys/dev/pv/xenstore.c
>> index 494eb40bfb0..01ecebdf4af 100644
>> --- a/sys/dev/pv/xenstore.c
>> +++ b/sys/dev/pv/xenstore.c
>> @@ -1116,11 +1116,16 @@ xs_kvop(void *xsc, int op, char *key, char *value, 
>> size_t valuelen)
>>  /* FALLTHROUGH */
>>  case XS_LIST:
>>  for (i = 0; i < iov_cnt; i++) {
>> -if (i && strlcat(value, "\n", valuelen) >= valuelen)
>> +if (i > 0 && strlcat(value, "\n", valuelen) >=
>> +valuelen) {
>> +error = ERANGE;
>>  break;
>> +}
>>  if (strlcat(value, iovp[i].iov_base,
>> -valuelen) >= valuelen)
>> +valuelen) >= valuelen) {
>> +error = ERANGE;
>>  break;
>> +}
>>  }
>>  xs_resfree(, iovp, iov_cnt);
>>  break;
>> @@ -1128,5 +1133,5 @@ xs_kvop(void *xsc, int op, char *key, char *value, 
>> size_t valuelen)
>>  break;
>>  }
>>  
>> -return (0);
>> +return (error);
>>  }
>> 
> 
> And use above diff.
> 
> comment, ok?
> --
> ASOU Masato
> 
> Index: share/man/man4/pvbus.4
> ===
> RCS file: /cvs/src/share/man/man4/pvbus.4,v
> retrieving revision 1.14
> diff -u -p -r1.14 pvbus.4
> --- share/man/man4/pvbus.414 Jun 2017 12:42:09 -  1.14
> +++ share/man/man4/pvbus.45 Jan 2023 23:20:39 -
> @@ -125,6 +125,13 @@ Read the value from
>  .Fa pvr_key
>  and return it in
>  .Fa pvr_value .
> +If
> +.Fa pvr_valuelen
> +is not enough for the value,
> +the command will fail and
> +.Xr errno 2
> +is set to
> +.Er ERANGE .
>  .It Dv PVBUSIOC_KVTYPE
>  Return the type of the attached hypervisor interface as a string in
>  .Fa pvr_key ;
> Index: sys/dev/pv/hypervic.c
> ===
> RCS file: /cvs/src/sys/dev/pv/hypervic.c,v
> retrieving revision 1.17
> diff -u -p -r1.17 hypervic.c
> --- sys/dev/pv/hypervic.c 8 Sep 2022 10:22:06 -   1.17
> +++ sys/dev/pv/hypervic.c 5 Jan 2023 23:20:40 -
> @@ -1151,11 +1151,12 @@ hv_kvop(void *arg, int op, char *key, ch
>   kvpl = >kvp_pool[pool];
>   if (strlen(key) == 0) {
>   for (next = 0; next < MAXPOOLENTS; next++) {
> - if ((val + vallen < vp + HV_KVP_MAX_KEY_SIZE / 2) ||
> - kvp_pool_keys(kvpl, next, vp, ))
> + if (val + vallen < vp + HV_KVP_MAX_KEY_SIZE / 2)
> + return (ERANGE);
> + if (kvp_pool_keys(kvpl, next, vp, ))
>   goto out;
>   if (strlcat(val, "\n", vallen) >= vallen)
> - goto out;
> + return (ERANGE);
>   vp += keylen;
>   }
>   out:
> Index: sys/dev/pv/pvbus.c
> ===
> RCS file: /cvs/src/sys/dev/pv/pvbus.c,v
> retrieving revision 1.26
> diff -u -p -r1.26 pvbus.c
> --- sys/dev/pv/pvbus.c8 Dec 2022 05:45:36 -   1.26
> +++ sys/dev/pv/pvbus.c5 Jan 2023 23:20:40 -
> @@ -399,13 +399,14 @@ pvbusgetstr(size_t srclen, const char *s
>  
>   /*
>* Reject size that is too short or obviously too long:
> -  * - at least one byte for the nul terminator.
> -  * - PAGE_SIZE is an arbi

Re: hostctl: Change from fixed length to variable length

2022-12-26 Thread YASUOKA Masahiko
Hi,

Sorry for separating emails.

On Tue, 27 Dec 2022 11:58:34 +0900 (JST)
YASUOKA Masahiko  wrote:
>> @@ -1115,12 +1116,19 @@ xs_kvop(void *xsc, int op, char *key, char *value, 
>> size_t valuelen)
>>  }
>>  /* FALLTHROUGH */
>>  case XS_LIST:
>> -for (i = 0; i < iov_cnt; i++) {
>> -if (i && strlcat(value, "\n", valuelen) >= valuelen)
>> -break;
>> -if (strlcat(value, iovp[i].iov_base,
>> -valuelen) >= valuelen)
>> +for (i = pos = 0; i < iov_cnt; i++) {
>> +if (i) {
> 
> this is come from the previous, but I prefer comparing with 0
> 
> + if (i > 0) {
> 
>> +if (pos + 1 >= valuelen) {
>> +error = ERANGE;
>> +break;
>> +}
>> +value[pos++] = '\n';
>> +}
>> +if (strlen(iovp[i].iov_base) >= valuelen) {
>> +error = ERANGE;
>>  break;
>> +}
>> +pos += strlcat([pos], iovp[i].iov_base, valuelen 
>> - pos);
>>  }
>>  xs_resfree(, iovp, iov_cnt);
>>  break;
>

Also I don't think replacing strlcat() by an own calculation is necessary.

diff --git a/sys/dev/pv/xenstore.c b/sys/dev/pv/xenstore.c
index 494eb40bfb0..01ecebdf4af 100644
--- a/sys/dev/pv/xenstore.c
+++ b/sys/dev/pv/xenstore.c
@@ -1116,11 +1116,16 @@ xs_kvop(void *xsc, int op, char *key, char *value, 
size_t valuelen)
/* FALLTHROUGH */
case XS_LIST:
for (i = 0; i < iov_cnt; i++) {
-   if (i && strlcat(value, "\n", valuelen) >= valuelen)
+   if (i > 0 && strlcat(value, "\n", valuelen) >=
+   valuelen) {
+   error = ERANGE;
break;
+   }
if (strlcat(value, iovp[i].iov_base,
-   valuelen) >= valuelen)
+   valuelen) >= valuelen) {
+   error = ERANGE;
break;
+   }
}
xs_resfree(, iovp, iov_cnt);
break;
@@ -1128,5 +1133,5 @@ xs_kvop(void *xsc, int op, char *key, char *value, size_t 
valuelen)
break;
}
 
-   return (0);
+   return (error);
 }



Re: hostctl: Change from fixed length to variable length

2022-12-26 Thread YASUOKA Masahiko
Hi,

On Mon, 26 Dec 2022 13:37:45 +0900 (JST)
Masato Asou  wrote:
> My new patch is not returned value length from ioctl() system call
> when read the KEY's value.
> 
> The hostctl command allocate 4k bytes memory for store the value. Then
> read the value by ioctl() system call. If ioctl() returned -1 end
> errno is ERANGE, then hostctl comannd reallocate twice as much space.

I will support this direction.

> The upper limit is PVBUS_KVOP_MAXSIZE (64k bytes).

Let me note that open-vm-tools also has the same hard-coded limit

  
https://github.com/vmware/open-vm-tools/blob/162eba6ab52d664551ffae343ef7e9a7f211ca69/open-vm-tools/lib/include/guest_msg_def.h#L108

There are 2 small feedbacks

> diff --git a/sys/dev/pv/pvbus.c b/sys/dev/pv/pvbus.c
> index 5f7c4b57fe0..c76a9e81444 100644
> --- a/sys/dev/pv/pvbus.c
> +++ b/sys/dev/pv/pvbus.c
> @@ -400,12 +400,12 @@ pvbusgetstr(size_t srclen, const char *src, char **dstp)
>   /*
>* Reject size that is too short or obviously too long:
>* - at least one byte for the nul terminator.
> -  * - PAGE_SIZE is an arbitrary value, but known pv backends seem
> -  *   to have a hard (PAGE_SIZE - x) limit in their messaging.
> +  * - PVBUS_KVOP_MAXSIZE is an arbitrary value, but known pv backends
> +  *   seem to have a hard (PAGE_SIZE - x) limit in their messaging.

After diff, it doesn't use PAGE_SIZE any more.  And VMware software
limit seems 1MB and changable by its configuration(*1).  So we can't
say PVBUS_KVOP_MAXSIZE is enough.

+* - Known pv backends other than vmware has a hard limit smaller than
+*   PVBUS_KVOP_MAXSIZE in their messaging.  vmware has a software
+*   limit at 1MB, but current open-vm-tools has a limit at 64KB
+*   (=PVBUS_KVOP_MAXSIZE).

*1 
https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.security.doc/GUID-91BF834E-CB92-4014-8CF7-29CE40F3E8A3.html

>*/
>   if (srclen < 1)
>   return (EINVAL);
> - else if (srclen > PAGE_SIZE)
> + else if (srclen > PVBUS_KVOP_MAXSIZE)
>   return (ENAMETOOLONG);
>  
>   *dstp = dst = malloc(srclen + 1, M_TEMP, M_WAITOK | M_ZERO);

> @@ -1115,12 +1116,19 @@ xs_kvop(void *xsc, int op, char *key, char *value, 
> size_t valuelen)
>   }
>   /* FALLTHROUGH */
>   case XS_LIST:
> - for (i = 0; i < iov_cnt; i++) {
> - if (i && strlcat(value, "\n", valuelen) >= valuelen)
> - break;
> - if (strlcat(value, iovp[i].iov_base,
> - valuelen) >= valuelen)
> + for (i = pos = 0; i < iov_cnt; i++) {
> + if (i) {

this is come from the previous, but I prefer comparing with 0

+   if (i > 0) {

> + if (pos + 1 >= valuelen) {
> + error = ERANGE;
> + break;
> + }
> + value[pos++] = '\n';
> + }
> + if (strlen(iovp[i].iov_base) >= valuelen) {
> + error = ERANGE;
>   break;
> + }
> + pos += strlcat([pos], iovp[i].iov_base, valuelen 
> - pos);
>   }
>   xs_resfree(, iovp, iov_cnt);
>   break;



Re: vmt is not closed

2022-12-20 Thread YASUOKA Masahiko
Yes, my previous commit was wrong.  Calling vm_rpc_close() was missing.
Thank you for finding.

ok yasuoka

On Tue, 13 Dec 2022 19:26:05 +0900 (JST)
Masato Asou  wrote:
> From: Masato Asou 
> Date: Tue, 13 Dec 2022 18:26:22 +0900 (JST)
> 
> Delete #define VMT_DEBUG
> ok?
> --
> ASOU Masato
> 
>> comment, ok?
>> --
>> ASOU Masato
>> 
>> Index: sys/dev/pv/vmt.c
>> ===
>> RCS file: /cvs/src/sys/dev/pv/vmt.c,v
>> retrieving revision 1.27
>> diff -u -p -r1.27 vmt.c
>> --- sys/dev/pv/vmt.c 3 Dec 2022 10:57:04 -   1.27
>> +++ sys/dev/pv/vmt.c 13 Dec 2022 09:23:57 -
>> @@ -259,6 +259,7 @@ struct vmt_softc {
>>  char*sc_nic_info;
>>  };
>>  
>> +#define VMT_DEBUG
>>  #ifdef VMT_DEBUG
>>  #define DPRINTF(_arg...)printf(_arg)
>>  #else
>> @@ -534,7 +535,7 @@ vmt_kvop(void *arg, int op, char *key, c
>>  DPRINTF("%s: unable to send rpci command\n", DEVNAME(sc));
>>  sc->sc_rpc_error = 1;
>>  error = EIO;
>> -goto done;
>> +goto close;
>>  }
>>  
>>  if (vm_rpc_get_length(, , ) != 0) {
>> @@ -542,13 +543,13 @@ vmt_kvop(void *arg, int op, char *key, c
>>  DEVNAME(sc));
>>  sc->sc_rpc_error = 1;
>>  error = EIO;
>> -goto done;
>> +goto close;
>>  }
>>  
>>  if (rlen > 0) {
>>  if (rlen + 1 > valuelen) {
>>  error = EMSGSIZE;
>> -goto done;
>> +goto close;
>>  }
>>  
>>  if (vm_rpc_get_data(, value, rlen, ack) != 0) {
>> @@ -556,20 +557,23 @@ vmt_kvop(void *arg, int op, char *key, c
>>  DEVNAME(sc));
>>  sc->sc_rpc_error = 1;
>>  error = EIO;
>> -goto done;
>> +goto close;
>>  }
>>  /* test if response success  */
>>  if (rlen < 2 || value[0] != '1' || value[1] != ' ') {
>>  DPRINTF("%s: host rejected command: %s\n", DEVNAME(sc),
>>  buf);
>>  error = EINVAL;
>> -goto done;
>> +goto close;
>>  }
>>  /* skip response that was tested */
>>  bcopy(value + 2, value, valuelen - 2);
>>  value[rlen - 2] = '\0';
>>  }
>>  
>> + close:
>> +if (vm_rpc_close() != 0)
>> +DPRINTF("%s: unable to close rpci channel\n", DEVNAME(sc));
>>   done:
>>  free(buf, M_TEMP, bufsz);
>>  return (error);
> 
> Index: sys/dev/pv/vmt.c
> ===
> RCS file: /cvs/src/sys/dev/pv/vmt.c,v
> retrieving revision 1.27
> diff -u -p -r1.27 vmt.c
> --- sys/dev/pv/vmt.c  3 Dec 2022 10:57:04 -   1.27
> +++ sys/dev/pv/vmt.c  13 Dec 2022 10:23:45 -
> @@ -534,7 +534,7 @@ vmt_kvop(void *arg, int op, char *key, c
>   DPRINTF("%s: unable to send rpci command\n", DEVNAME(sc));
>   sc->sc_rpc_error = 1;
>   error = EIO;
> - goto done;
> + goto close;
>   }
>  
>   if (vm_rpc_get_length(, , ) != 0) {
> @@ -542,13 +542,13 @@ vmt_kvop(void *arg, int op, char *key, c
>   DEVNAME(sc));
>   sc->sc_rpc_error = 1;
>   error = EIO;
> - goto done;
> + goto close;
>   }
>  
>   if (rlen > 0) {
>   if (rlen + 1 > valuelen) {
>   error = EMSGSIZE;
> - goto done;
> + goto close;
>   }
>  
>   if (vm_rpc_get_data(, value, rlen, ack) != 0) {
> @@ -556,20 +556,23 @@ vmt_kvop(void *arg, int op, char *key, c
>   DEVNAME(sc));
>   sc->sc_rpc_error = 1;
>   error = EIO;
> - goto done;
> + goto close;
>   }
>   /* test if response success  */
>   if (rlen < 2 || value[0] != '1' || value[1] != ' ') {
>   DPRINTF("%s: host rejected command: %s\n", DEVNAME(sc),
>   buf);
>   error = EINVAL;
> - goto done;
> + goto close;
>   }
>   /* skip response that was tested */
>   bcopy(value + 2, value, valuelen - 2);
>   value[rlen - 2] = '\0';
>   }
>  
> + close:
> + if (vm_rpc_close() != 0)
> +DPRINTF("%s: unable to close rpci channel\n", DEVNAME(sc));
>   done:
>   free(buf, M_TEMP, bufsz);
>   return (error);
> 



pvbus: pass M_ZERO properly

2022-12-07 Thread YASUOKA Masahiko
This is obvious.  M_ZERO must be for 3rd argument.

ok?

Index: sys/dev/pv/pvbus.c
===
RCS file: /cvs/src/sys/dev/pv/pvbus.c,v
retrieving revision 1.25
diff -u -p -r1.25 pvbus.c
--- sys/dev/pv/pvbus.c  25 Aug 2022 17:38:16 -  1.25
+++ sys/dev/pv/pvbus.c  8 Dec 2022 02:32:46 -
@@ -408,7 +408,7 @@ pvbusgetstr(size_t srclen, const char *s
else if (srclen > PAGE_SIZE)
return (ENAMETOOLONG);
 
-   *dstp = dst = malloc(srclen + 1, M_TEMP|M_ZERO, M_WAITOK);
+   *dstp = dst = malloc(srclen + 1, M_TEMP, M_WAITOK | M_ZERO);
if (src != NULL) {
error = copyin(src, dst, srclen);
dst[srclen] = '\0';



Re: pppx(4): decrease netlock pressure in pppxioctl()

2022-11-19 Thread YASUOKA Masahiko
It doesn't seem to have a problem.  Sorry for my delay.

ok yasuoka

On Wed, 9 Nov 2022 13:24:21 +0300
Vitaliy Makkoveev  wrote:
> ping...
> 
> On Tue, Nov 01, 2022 at 03:16:02PM +0300, Vitaliy Makkoveev wrote:
>> Push netlock down to pppx_add_session(). The 'pppx_if' structure has
>> the `pxi_ready' member to prevent access to incomplete `pxi', so we
>> don't need to hold netlock during all initialisation process. This
>> removes potential PR_WAITOK/M_WAITOK allocations impact on packet
>> processing. Also this removes relock dances around if_attach() and
>> if_detach() calls.
>> 
>> Do not grab netlock for FIONREAD. mbuf(9) queue doesn't rely on it.
>> 
>> Do not grab netlock around pipex_ioctl() call. pipex(4) has its own
>> protection and doesn't rely on netlock. We need to unlink  pipex(4)
>> session before destroy associated `pxi', it can't be killed
>> concurrently. Also this stops to block packet processing when npppd(8)
>> periodically does PIPEXGCLOSED ioctl(2) commands.
>> 
>> The dummy FIONBIO case doesn't require any lock to be held.
>> 
>> The netlock remains to be taken around pppx_del_session() and
>> pppx_set_session_descr() because pppx(4) data structures rely on it.
>> 
>> Index: sys/net/if_pppx.c
>> ===
>> RCS file: /cvs/src/sys/net/if_pppx.c,v
>> retrieving revision 1.122
>> diff -u -p -r1.122 if_pppx.c
>> --- sys/net/if_pppx.c29 Aug 2022 07:51:45 -  1.122
>> +++ sys/net/if_pppx.c1 Nov 2022 10:08:37 -
>> @@ -414,7 +414,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
>>  struct pppx_dev *pxd = pppx_dev2pxd(dev);
>>  int error = 0;
>>  
>> -NET_LOCK();
>>  switch (cmd) {
>>  case PIPEXASESSION:
>>  error = pppx_add_session(pxd,
>> @@ -422,13 +421,17 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
>>  break;
>>  
>>  case PIPEXDSESSION:
>> +NET_LOCK();
>>  error = pppx_del_session(pxd,
>>  (struct pipex_session_close_req *)addr);
>> +NET_UNLOCK();
>>  break;
>>  
>>  case PIPEXSIFDESCR:
>> +NET_LOCK();
>>  error = pppx_set_session_descr(pxd,
>>  (struct pipex_session_descr_req *)addr);
>> +NET_UNLOCK();
>>  break;
>>  
>>  case FIONBIO:
>> @@ -441,7 +444,6 @@ pppxioctl(dev_t dev, u_long cmd, caddr_t
>>  error = pipex_ioctl(pxd, cmd, addr);
>>  break;
>>  }
>> -NET_UNLOCK();
>>  
>>  return (error);
>>  }
>> @@ -607,6 +609,7 @@ pppx_add_session(struct pppx_dev *pxd, s
>>  
>>  pxi->pxi_session = session;
>>  
>> +NET_LOCK();
>>  /* try to set the interface up */
>>  unit = pppx_if_next_unit();
>>  if (unit < 0) {
>> @@ -624,6 +627,7 @@ pppx_add_session(struct pppx_dev *pxd, s
>>  goto out;
>>  }
>>  LIST_INSERT_HEAD(>pxd_pxis, pxi, pxi_list);
>> +NET_UNLOCK();
>>  
>>  snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d", "pppx", unit);
>>  ifp->if_mtu = req->pr_peer_mru; /* XXX */
>> @@ -638,13 +642,12 @@ pppx_add_session(struct pppx_dev *pxd, s
>>  /* ifp->if_rdomain = req->pr_rdomain; */
>>  if_counters_alloc(ifp);
>>  
>> -/* XXXSMP breaks atomicity */
>> -NET_UNLOCK();
>>  if_attach(ifp);
>> -NET_LOCK();
>>  
>> +NET_LOCK();
>>  if_addgroup(ifp, "pppx");
>>  if_alloc_sadl(ifp);
>> +NET_UNLOCK();
>>  
>>  #if NBPFILTER > 0
>>  bpfattach(>if_bpf, ifp, DLT_LOOP, sizeof(u_int32_t));
>> @@ -680,6 +683,7 @@ pppx_add_session(struct pppx_dev *pxd, s
>>  
>>  ia->ia_netmask = ia->ia_sockmask.sin_addr.s_addr;
>>  
>> +NET_LOCK();
>>  error = in_ifinit(ifp, ia, , 1);
>>  if (error) {
>>  printf("pppx: unable to set addresses for %s, error=%d\n",
>> @@ -687,26 +691,29 @@ pppx_add_session(struct pppx_dev *pxd, s
>>  } else {
>>  if_addrhooks_run(ifp);
>>  }
>> +NET_UNLOCK();
>>  
>>  error = pipex_link_session(session, ifp, pxd);
>>  if (error)
>>  goto detach;
>>  
>> +NET_LOCK();
>>  SET(ifp->if_flags, IFF_RUNNING);
>>  pxi->pxi_ready = 1;
>> +NET_UNLOCK();
>>  
>>  return (error);
>>  
>>  detach:
>> -/* XXXSMP breaks atomicity */
>> -NET_UNLOCK();
>>  if_detach(ifp);
>> -NET_LOCK();
>>  
>> +NET_LOCK();
>>  if (RBT_REMOVE(pppx_ifs, _ifs, pxi) == NULL)
>>  panic("%s: inconsistent RB tree", __func__);
>>  LIST_REMOVE(pxi, pxi_list);
>>  out:
>> +NET_UNLOCK();
>> +
>>  pool_put(_if_pl, pxi);
>>  pipex_rele_session(session);
>>  
> 



Re: hostctl: Change from fixed length to variable length

2022-11-18 Thread YASUOKA Masahiko
On Sat, 19 Nov 2022 14:41:18 +0900 (JST)
YASUOKA Masahiko  wrote:
> On Wed, 12 Oct 2022 07:58:20 +0900 (JST)
> YASUOKA Masahiko  wrote:
>> On Wed, 05 Oct 2022 13:37:35 +0900 (JST)
>> Masato Asou  wrote:
>>> From: "Theo de Raadt" 
>>> Date: Tue, 04 Oct 2022 21:58:13 -0600
>>>> Userland may not ask the kernel to allocate such a huge object.  The
>>>> kernel address space is quite small.  First off, huge allocations will
>>>> fail.
>>>> 
>>>> But it is worse -- the small kernel address space is shared for many
>>>> purposes, so large allocations will harm the other subsystems.
>>>> 
>>>> As written, this diff is too dangerous.  Arbitrary allocation inside
>>>> the kernel is not reasonable.  object sizes requested by userland must
>>>> be small, or the operations must be cut up, which does create impact
>>>> on atomicity or other things.
>>> 
>>> As you pointed out, it is not a good idea to allocate large spaces
>>> in kernel.
>>> 
>>> Would it be better to keep the current fixed length?
>> 
>> Currently the value on VMware may be truncated silently.  It's simply
>> broken.  I think we should fix it by having a way to know if the value
>> is reached the limit.
>> 
>> Also I think we should be able to pass larger size of data.  Since at
>> least on VMware, people is useing for parameters when deployment
>> through OVF tamplate.  Sometimes the parameter includes large data
>> like X.509 certificate.
>> 
>> https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.vm_admin.doc/GUID-D0F9E3B9-B77B-4DEF-A982-49B9F6358FF3.html
>> 
>> What do you think?
>> 
>>> Prepare a variable like kern.maxpvbus and default it to
>>> 4096.  Futhermore, how about free() after copyout() to user space?
>> 
>> I suppose we can use the space prepared by the userland directly.
> 
> I admit there is no way to use the user space directly in case of the
> vmware.
> 
> But current vmt(4) uses the buffer in vmt_softc for all RPC commands.
> The buffer seems to have beeen created for RPC done by the tclo
> process.  The tclo process executes RPC periodically, so having a long
> term buffer does make sense.
> 
> On the otherhand, pvbus(4) prepares a buffer for
> PVBUSIOC_KV{READ|WRITE} and pass it to the driver handler.  So vmt(4)
> can use the buffer.
> 
> The diff is to change vmt(4) to use the buffer given by pvbuf(4) for
> the rpc output directly.  Also make it return EMSGSIZE when the buffer
> is not enough instead of truncating silently.
> 
> The diff is first step.  We need to more hack for pvbus(4) and
> vmt(4).  For example, the buffer size pvbuf(4) allocates is not enough
> to store the size user requested, since vmt(4) neeeds extra 2 bytes
> for the RPC output.
> 
> + bcopy(value + 2, value, valuelen - 2);
> 
> 
> But I'd like to commit this first.
> 
> ok?

sorry, using existing vm_rpc_send_rpci_tx_buf() or
vm_rpci_response_successful() wasn't good idea and had a bug.

Let me update the diff.

Index: sys/dev/pv/vmt.c
===
RCS file: /var/cvs/openbsd/src/sys/dev/pv/vmt.c,v
retrieving revision 1.26
diff -u -p -r1.26 vmt.c
--- sys/dev/pv/vmt.c8 Sep 2022 10:22:06 -   1.26
+++ sys/dev/pv/vmt.c19 Nov 2022 07:32:47 -
@@ -491,9 +491,12 @@ int
 vmt_kvop(void *arg, int op, char *key, char *value, size_t valuelen)
 {
struct vmt_softc *sc = arg;
-   char *buf = NULL, *ptr;
+   struct vm_rpc rpci;
+   char *buf = NULL;
size_t bufsz;
int error = 0;
+   uint32_t rlen;
+   uint16_t ack;
 
bufsz = VMT_RPC_BUFLEN;
buf = malloc(bufsz, M_TEMP, M_WAITOK | M_ZERO);
@@ -520,25 +523,52 @@ vmt_kvop(void *arg, int op, char *key, c
goto done;
}
 
-   if (vm_rpc_send_rpci_tx(sc, "%s", buf) != 0) {
-   DPRINTF("%s: error sending command: %s\n", DEVNAME(sc), buf);
+   if (vm_rpc_open(, VM_RPC_OPEN_RPCI) != 0) {
+   DPRINTF("%s: rpci channel open failed\n", DEVNAME(sc));
sc->sc_rpc_error = 1;
error = EIO;
goto done;
}
 
-   if (vm_rpci_response_successful(sc) == 0) {
-   DPRINTF("%s: host rejected command: %s\n", DEVNAME(sc), buf);
-   error = EINVAL;
+   if (vm_rpc_send(, buf, bufsz) != 0) {
+   DPRINTF("%s: unable to send rpci command\n", DEVNAME(sc));
+   sc->sc_rpc_error = 1;
+   error = EIO;
goto done;
}
 
-   /* skip response that

Re: hostctl: Change from fixed length to variable length

2022-11-18 Thread YASUOKA Masahiko
On Wed, 12 Oct 2022 07:58:20 +0900 (JST)
YASUOKA Masahiko  wrote:
> On Wed, 05 Oct 2022 13:37:35 +0900 (JST)
> Masato Asou  wrote:
>> From: "Theo de Raadt" 
>> Date: Tue, 04 Oct 2022 21:58:13 -0600
>>> Userland may not ask the kernel to allocate such a huge object.  The
>>> kernel address space is quite small.  First off, huge allocations will
>>> fail.
>>> 
>>> But it is worse -- the small kernel address space is shared for many
>>> purposes, so large allocations will harm the other subsystems.
>>> 
>>> As written, this diff is too dangerous.  Arbitrary allocation inside
>>> the kernel is not reasonable.  object sizes requested by userland must
>>> be small, or the operations must be cut up, which does create impact
>>> on atomicity or other things.
>> 
>> As you pointed out, it is not a good idea to allocate large spaces
>> in kernel.
>> 
>> Would it be better to keep the current fixed length?
> 
> Currently the value on VMware may be truncated silently.  It's simply
> broken.  I think we should fix it by having a way to know if the value
> is reached the limit.
> 
> Also I think we should be able to pass larger size of data.  Since at
> least on VMware, people is useing for parameters when deployment
> through OVF tamplate.  Sometimes the parameter includes large data
> like X.509 certificate.
> 
> https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.vm_admin.doc/GUID-D0F9E3B9-B77B-4DEF-A982-49B9F6358FF3.html
> 
> What do you think?
> 
>> Prepare a variable like kern.maxpvbus and default it to
>> 4096.  Futhermore, how about free() after copyout() to user space?
> 
> I suppose we can use the space prepared by the userland directly.

I admit there is no way to use the user space directly in case of the
vmware.

But current vmt(4) uses the buffer in vmt_softc for all RPC commands.
The buffer seems to have beeen created for RPC done by the tclo
process.  The tclo process executes RPC periodically, so having a long
term buffer does make sense.

On the otherhand, pvbus(4) prepares a buffer for
PVBUSIOC_KV{READ|WRITE} and pass it to the driver handler.  So vmt(4)
can use the buffer.

The diff is to change vmt(4) to use the buffer given by pvbuf(4) for
the rpc output directly.  Also make it return EMSGSIZE when the buffer
is not enough instead of truncating silently.

The diff is first step.  We need to more hack for pvbus(4) and
vmt(4).  For example, the buffer size pvbuf(4) allocates is not enough
to store the size user requested, since vmt(4) neeeds extra 2 bytes
for the RPC output.

+   bcopy(value + 2, value, valuelen - 2);


But I'd like to commit this first.

ok?

Index: sys/dev/pv/vmt.c
===
RCS file: /var/cvs/openbsd/src/sys/dev/pv/vmt.c,v
retrieving revision 1.26
diff -u -p -r1.26 vmt.c
--- sys/dev/pv/vmt.c8 Sep 2022 10:22:06 -   1.26
+++ sys/dev/pv/vmt.c19 Nov 2022 04:13:47 -
@@ -277,7 +277,8 @@ int  vm_rpc_send(const struct vm_rpc *, 
 int vm_rpc_send_str(const struct vm_rpc *, const uint8_t *);
 int vm_rpc_get_length(const struct vm_rpc *, uint32_t *, uint16_t *);
 int vm_rpc_get_data(const struct vm_rpc *, char *, uint32_t, uint16_t);
-int vm_rpc_send_rpci_tx_buf(struct vmt_softc *, const uint8_t *, uint32_t);
+int vm_rpc_send_rpci_tx_buf(struct vmt_softc *, const uint8_t *, uint32_t,
+   uint8_t *, uint32_t);
 int vm_rpc_send_rpci_tx(struct vmt_softc *, const char *, ...)
__attribute__((__format__(__kprintf__,2,3)));
 int vm_rpci_response_successful(struct vmt_softc *);
@@ -491,7 +492,7 @@ int
 vmt_kvop(void *arg, int op, char *key, char *value, size_t valuelen)
 {
struct vmt_softc *sc = arg;
-   char *buf = NULL, *ptr;
+   char *buf = NULL;
size_t bufsz;
int error = 0;
 
@@ -520,10 +521,9 @@ vmt_kvop(void *arg, int op, char *key, c
goto done;
}
 
-   if (vm_rpc_send_rpci_tx(sc, "%s", buf) != 0) {
-   DPRINTF("%s: error sending command: %s\n", DEVNAME(sc), buf);
+   if ((error = vm_rpc_send_rpci_tx_buf(sc, buf, strlen(buf), value,
+   valuelen)) != 0) {
sc->sc_rpc_error = 1;
-   error = EIO;
goto done;
}
 
@@ -534,11 +534,7 @@ vmt_kvop(void *arg, int op, char *key, c
}
 
/* skip response that was tested in vm_rpci_response_successful() */
-   ptr = sc->sc_rpc_buf + 2;
-
-   /* might truncate, copy anyway but return error */
-   if (strlcpy(value, ptr, valuelen) >= valuelen)
-   error = ENOMEM;
+   bcopy(value + 2, value, valuelen - 2);
 
  done:
free(buf, M_TEMP, bufsz);
@@ -1348,8 +1344,8 @@ vmt_ni

Re: hostctl: Change from fixed length to variable length

2022-10-11 Thread YASUOKA Masahiko
Hello,

On Wed, 05 Oct 2022 13:37:35 +0900 (JST)
Masato Asou  wrote:
> From: "Theo de Raadt" 
> Date: Tue, 04 Oct 2022 21:58:13 -0600
>> Userland may not ask the kernel to allocate such a huge object.  The
>> kernel address space is quite small.  First off, huge allocations will
>> fail.
>> 
>> But it is worse -- the small kernel address space is shared for many
>> purposes, so large allocations will harm the other subsystems.
>> 
>> As written, this diff is too dangerous.  Arbitrary allocation inside
>> the kernel is not reasonable.  object sizes requested by userland must
>> be small, or the operations must be cut up, which does create impact
>> on atomicity or other things.
> 
> As you pointed out, it is not a good idea to allocate large spaces
> in kernel.
> 
> Would it be better to keep the current fixed length?

Currently the value on VMware may be truncated silently.  It's simply
broken.  I think we should fix it by having a way to know if the value
is reached the limit.

Also I think we should be able to pass larger size of data.  Since at
least on VMware, people is useing for parameters when deployment
through OVF tamplate.  Sometimes the parameter includes large data
like X.509 certificate.

https://docs.vmware.com/en/VMware-vSphere/7.0/com.vmware.vsphere.vm_admin.doc/GUID-D0F9E3B9-B77B-4DEF-A982-49B9F6358FF3.html

What do you think?

> Prepare a variable like kern.maxpvbus and default it to
> 4096.  Futhermore, how about free() after copyout() to user space?

I suppose we can use the space prepared by the userland directly.



diff: fix panic in pf_state_export()

2022-09-20 Thread YASUOKA Masahiko
Hello,

My colleague hit the following kernel panic when he is doing "psctl
-ss" repeatedly by a script during a performance test.

 uvm_fault(0xfd811b869110, 0x10, 0, 1) -> e
 kernel: page fault trap, code=0
 Stopped at  pf_state_export+0x42:   movq0x10(%rax),%rcx
 TIDPIDUID PRFLAGS PFLAGS  CPU  COMMAND
  313402  43614  0 0x3  00  awk
 * 66859  46071  0 0x3  03K pfctl
   48782  57541  0 0x14000  0x2001  softnet
 pf_state_export(80003a863218,fd81117155a8) at pf_state_export+0x42
 pf_states_get(80003a863680) at pf_states_get+0xe1
 pfioctl(4900,c0104419,80003a863680,1,80003a823cf0) at pfioctl+0x11ba
 
VOP_IOCTL(fd811f894770,c0104419,80003a863680,1,fd811a4e3ba8,80003a823cf0)
 at VOP_IOCTL+0x57
 vn_ioctl(fd812742c168,c0104419,80003a863680,80003a823cf0) at 
vn_ioctl+0x79
 sys_ioctl(80003a823cf0,80003a863790,80003a863780) at 
sys_ioctl+0x2b9
 syscall(80003a863830) at syscall+0x35b
 Xsyscall() at Xsyscall+0xef
 end of kernel
 end trace frame: 0x7f7f3960, count: 7
 Running script...
 ddb{3}> 

Look at "pf_state_export+0x42":

 (gdb) l *(pf_state_export+0x42)
 0x819bb112 is in pf_state_export (/usr/src/sys/net/pf.c:1216).
 1211
 1212memset(sp, 0, sizeof(struct pfsync_state));
 1213
 1214/* copy from state key */
 1215sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
 1216sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
 1217sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
 1218sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
 1219sp->key[PF_SK_WIRE].rdomain = 
htons(st->key[PF_SK_WIRE]->rdomain);
 1220sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af;
 (gdb) 

the uvm_fault happened because st->key[PF_SK_WIRE] is NULL.  This is
can happen after pf_state_key_detach() <- pf_state_remove() is called
for the state.

The diff is to take care of that condition.

ok?  comments?


Don't assume pf state keys of pf_state are always there.  Take a
reference of pf state key with a reference counter and access its
members through it.  Found by IIJ.

Index: sys/net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1140
diff -u -p -r1.1140 pf.c
--- sys/net/pf.c3 Sep 2022 19:22:19 -   1.1140
+++ sys/net/pf.c21 Sep 2022 05:50:16 -
@@ -1193,26 +1193,37 @@ void
 pf_state_export(struct pfsync_state *sp, struct pf_state *st)
 {
int32_t expire;
+   struct pf_state_key *skw, *sks;
 
memset(sp, 0, sizeof(struct pfsync_state));
 
/* copy from state key */
-   sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
-   sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
-   sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
-   sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
-   sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain);
-   sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af;
-   sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
-   sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
-   sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
-   sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
-   sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain);
-   sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af;
+   skw = pf_state_key_ref(st->key[PF_SK_WIRE]);
+   if (skw != NULL) {
+   sp->key[PF_SK_WIRE].addr[0] = skw->addr[0];
+   sp->key[PF_SK_WIRE].addr[1] = skw->addr[1];
+   sp->key[PF_SK_WIRE].port[0] = skw->port[0];
+   sp->key[PF_SK_WIRE].port[1] = skw->port[1];
+   sp->key[PF_SK_WIRE].rdomain = htons(skw->rdomain);
+   sp->key[PF_SK_WIRE].af = skw->af;
+   sp->proto = skw->proto;
+   sp->af = skw->af;
+   pf_state_key_unref(skw);
+   skw = NULL;
+   }
+   sks = pf_state_key_ref(st->key[PF_SK_STACK]);
+   if (sks != NULL) {
+   sp->key[PF_SK_STACK].addr[0] = sks->addr[0];
+   sp->key[PF_SK_STACK].addr[1] = sks->addr[1];
+   sp->key[PF_SK_STACK].port[0] = sks->port[0];
+   sp->key[PF_SK_STACK].port[1] = sks->port[1];
+   sp->key[PF_SK_STACK].rdomain = htons(sks->rdomain);
+   sp->key[PF_SK_STACK].af = sks->af;
+   pf_state_key_unref(sks);
+   sks = NULL;
+   }
sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]);
sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]);
-   sp->proto = st->key[PF_SK_WIRE]->proto;
-   sp->af = st->key[PF_SK_WIRE]->af;
 
/* copy from 

Re: tcp timer mutex

2022-09-03 Thread YASUOKA Masahiko
ok yasuoka

On Fri, 2 Sep 2022 14:44:29 +0200
Alexander Bluhm  wrote:
> + now = READ_ONCE(tcp_now);
> +
>   /*
>* Determine length of data that should be transmitted,
>* and flags that will be used.
> @@ -228,7 +231,7 @@ tcp_output(struct tcpcb *tp)
>* to send, then transmit; otherwise, investigate further.
>*/
>   idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una);
> - if (idle && (tcp_now - tp->t_rcvtime) >= tp->t_rxtcur)
> + if (idle && (now - tp->t_rcvtime) >= tp->t_rxtcur)
>   /*
>* We have been idle for "a while" and no acks are
>* expected to clock out any data we send --
> @@ -539,13 +542,13 @@ send:
>  
>   /* Form timestamp option as shown in appendix A of RFC 1323. */
>   *lp++ = htonl(TCPOPT_TSTAMP_HDR);
> - *lp++ = htonl(tcp_now + tp->ts_modulate);
> + *lp++ = htonl(now + tp->ts_modulate);
>   *lp   = htonl(tp->ts_recent);
>   optlen += TCPOLEN_TSTAMP_APPA;
>  
>   /* Set receive buffer autosizing timestamp. */
>   if (tp->rfbuf_ts == 0)
> - tp->rfbuf_ts = tcp_now;
> + tp->rfbuf_ts = now;
>  
>   }
>  
> @@ -691,7 +694,7 @@ send:
>*/
>   if (off + len == so->so_snd.sb_cc && !soissending(so))
>   flags |= TH_PUSH;
> - tp->t_sndtime = tcp_now;
> + tp->t_sndtime = now;
>   } else {
>   if (tp->t_flags & TF_ACKNOW)
>   tcpstat_inc(tcps_sndacks);
> @@ -924,7 +927,7 @@ send:
>* not currently timing anything.
>*/
>   if (tp->t_rtttime == 0) {
> - tp->t_rtttime = tcp_now;
> + tp->t_rtttime = now;
>   tp->t_rtseq = startseq;
>   tcpstat_inc(tcps_segstimed);
>   }
> @@ -1123,7 +1126,7 @@ out:
>   if (win > 0 && SEQ_GT(tp->rcv_nxt+win, tp->rcv_adv))
>   tp->rcv_adv = tp->rcv_nxt + win;
>   tp->last_ack_sent = tp->rcv_nxt;
> - tp->t_sndacktime = tcp_now;
> + tp->t_sndacktime = now;
>   tp->t_flags &= ~TF_ACKNOW;
>   TCP_TIMER_DISARM(tp, TCPT_DELACK);
>   if (sendalot)
> Index: netinet/tcp_subr.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_subr.c,v
> retrieving revision 1.186
> diff -u -p -r1.186 tcp_subr.c
> --- netinet/tcp_subr.c30 Aug 2022 11:53:04 -  1.186
> +++ netinet/tcp_subr.c1 Sep 2022 14:47:22 -
> @@ -71,6 +71,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
> @@ -98,6 +99,14 @@
>  #include 
>  #include 
>  
> +/*
> + * Locks used to protect struct members in this file:
> + *   I   immutable after creation
> + *   T   tcp_timer_mtx   global tcp timer data structures
> + */
> +
> +struct mutex tcp_timer_mtx;
> +
>  /* patchable/settable parameters for tcp */
>  int  tcp_mssdflt = TCP_MSS;
>  int  tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
> @@ -111,8 +120,6 @@ int   tcp_do_ecn = 0; /* RFC3168 ECN enab
>  #endif
>  int  tcp_do_rfc3390 = 2; /* Increase TCP's Initial Window to 10*mss */
>  
> -u_int32_ttcp_now = 1;
> -
>  #ifndef TCB_INITIAL_HASH_SIZE
>  #define  TCB_INITIAL_HASH_SIZE   128
>  #endif
> @@ -126,9 +133,10 @@ struct pool sackhl_pool;
>  
>  struct cpumem *tcpcounters;  /* tcp statistics */
>  
> -u_char   tcp_secret[16];
> -SHA2_CTX tcp_secret_ctx;
> -tcp_seq  tcp_iss;
> +u_char   tcp_secret[16]; /* [I] */
> +SHA2_CTX tcp_secret_ctx; /* [I] */
> +tcp_seq  tcp_iss;/* [T] updated by timer and connection 
> */
> +uint32_t tcp_now;/* [T] incremented by slow timer */
>  
>  /*
>   * Tcp initialization
> @@ -137,6 +145,7 @@ void
>  tcp_init(void)
>  {
>   tcp_iss = 1;/* wrong */
> + tcp_now = 1;
>   pool_init(_pool, sizeof(struct tcpcb), 0, IPL_SOFTNET, 0,
>   "tcpcb", NULL);
>   pool_init(_pool, sizeof(struct tcpqent), 0, IPL_SOFTNET, 0,
> @@ -281,7 +290,7 @@ tcp_template(struct tcpcb *tp)
>   */
>  void
>  tcp_respond(struct tcpcb *tp, caddr_t template, struct tcphdr *th0,
> -tcp_seq ack, tcp_seq seq, int flags, u_int rtableid)
> +tcp_seq ack, tcp_seq seq, int flags, u_int rtableid, uint32_t now)
>  {
>   int tlen;
>   int win = 0;
> @@ -362,7 +371,7 @@ tcp_respond(struct tcpcb *tp, caddr_t te
>   u_int32_t *lp = (u_int32_t *)(th + 1);
>   /* Form timestamp option as shown in appendix A of RFC 1323. */
>   *lp++ = htonl(TCPOPT_TSTAMP_HDR);
> - *lp++ = htonl(tcp_now + tp->ts_modulate);
> + *lp++ = htonl(now + tp->ts_modulate);
>   

Re: divert-reply: keep pf state after pcb is dropped

2022-09-02 Thread YASUOKA Masahiko
Hi,

On Fri, 2 Sep 2022 17:40:13 +0200
Alexander Bluhm  wrote:
> On Fri, Sep 02, 2022 at 03:04:34PM +0200, YASUOKA Masahiko wrote:
>> The diff is to fix a problem in a complex setup.
>> 
>> Normal setup of divert-reply for TCP connection:
>> 
>>   client --- relayd  --- server
>> 
>> - transparently forward TCP connections
>> - divert-reply is configured the outbound connection to the server
>>   - so that the PF state is removed when the PCB is deleted
>>   - otherwise if packets from server is comming after the PCB is
>> deleted, they are accidentally forwarded directly to the client
>> 
>> In addtion to this, "match out nat-to" is configured for the outbound
>> connection instead of dropping "transparent".  The purpose of doing
>> this is to expand the space of ephemeral ports of NAT.  Ephemeral
>> ports of PCB is limitted in one 2^16 space, but ephemeral ports of PF
>> is limitted in 2^16 for each remote address.
>> 
>> In this case, if the PF state is dropped immediately after the PCB is
>> dropped, the port number of NAT might be reused quickly, then a
>> problem can happen on the server side since the port is used for the
>> old connection.
>> 
>> So the diff is to keep the state until timeout.
>> 
>> comment?
> 
> How does that work together with port reuse?
> 
> One reason I have introduced pf_remove_divert_state() is to behave
> correctly in case the client does port reuse.
> 
> When client creates and closes a lot of connections it will reuse
> its port before the timeout triggers.

My company IIJ is using divert reply in a similar situation.

> We have code in pf_state_key_attach(), pf_test_state() and tcp_input()
> to remove old states and create new ones in that case.  For divert
> the old state has to be removed, so that the new packet reaches the
> listen state.
> 
> I don't know if this still works with your diff.  Have you considered
> it?

I have hit the same or a similar problem.

If the pf state is kept remain and the client reuses the same port,
SYN packet of the new connection might be dropped by the old
state. (Old version of the diff didn't consider this, the problem
actually happened)

The diff already considers that situation.

If the pf state is kept remain and the client reuses the same port,
SYN packet matches the old state in pf_find_state() but it returns
PF_DROP.

1151 if (ISSET(s->state_flags, PFSTATE_INP_UNLINKED))
1152 return (PF_DROP);

Then it goes through to pf_test_rule(), then a new state is created
and old state is removed in pf_state_key_attach().  The flag of the
old state will not inherit to the new state.  So the packet is passed
and the old state is removed.

> Reuse is tested in /usr/src/regress/sys/net/pf_divert/.  But I do
> not use nat or rdr there.  So my test may not cover the code in
> your diff as you check "key[PF_SK_WIRE] != si->s->key[PF_SK_STACK]".
> 
> I am running regress test with diff right now, we will see if it
> still works.

Thanks,

> bluhm
> 
>> Index: sys/net/pf.c
>> ===
>> RCS file: /cvs/src/sys/net/pf.c,v
>> retrieving revision 1.1138
>> diff -u -p -r1.1138 pf.c
>> --- sys/net/pf.c 30 Aug 2022 11:53:03 -  1.1138
>> +++ sys/net/pf.c 2 Sep 2022 12:54:36 -
>> @@ -1148,6 +1148,8 @@ pf_find_state(struct pf_pdesc *pd, struc
>>  
>>  if (s == NULL)
>>  return (PF_DROP);
>> +if (ISSET(s->state_flags, PFSTATE_INP_UNLINKED))
>> +return (PF_DROP);
>>  
>>  if (s->rule.ptr->pktrate.limit && pd->dir == s->direction) {
>>  pf_add_threshold(>rule.ptr->pktrate);
>> @@ -1461,7 +1463,23 @@ pf_remove_divert_state(struct pf_state_k
>>  if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr &&
>>  (si->s->rule.ptr->divert.type == PF_DIVERT_TO ||
>>  si->s->rule.ptr->divert.type == PF_DIVERT_REPLY)) {
>> -pf_remove_state(si->s);
>> +if (si->s->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
>> +si->s->key[PF_SK_WIRE] != si->s->key[PF_SK_STACK]) {
>> +/*
>> + * If the local address is translated, keep
>> + * the state for "tcp.closed" seconds to
>> + * prevent its source port from being reused.
>> + */
>> + 

divert-reply: keep pf state after pcb is dropped

2022-09-02 Thread YASUOKA Masahiko
Hi,

The diff is to fix a problem in a complex setup.

Normal setup of divert-reply for TCP connection:

  client --- relayd  --- server

- transparently forward TCP connections
- divert-reply is configured the outbound connection to the server
  - so that the PF state is removed when the PCB is deleted
  - otherwise if packets from server is comming after the PCB is
deleted, they are accidentally forwarded directly to the client

In addtion to this, "match out nat-to" is configured for the outbound
connection instead of dropping "transparent".  The purpose of doing
this is to expand the space of ephemeral ports of NAT.  Ephemeral
ports of PCB is limitted in one 2^16 space, but ephemeral ports of PF
is limitted in 2^16 for each remote address.

In this case, if the PF state is dropped immediately after the PCB is
dropped, the port number of NAT might be reused quickly, then a
problem can happen on the server side since the port is used for the
old connection.

So the diff is to keep the state until timeout.

comment?

Index: sys/net/pf.c
===
RCS file: /cvs/src/sys/net/pf.c,v
retrieving revision 1.1138
diff -u -p -r1.1138 pf.c
--- sys/net/pf.c30 Aug 2022 11:53:03 -  1.1138
+++ sys/net/pf.c2 Sep 2022 12:54:36 -
@@ -1148,6 +1148,8 @@ pf_find_state(struct pf_pdesc *pd, struc
 
if (s == NULL)
return (PF_DROP);
+   if (ISSET(s->state_flags, PFSTATE_INP_UNLINKED))
+   return (PF_DROP);
 
if (s->rule.ptr->pktrate.limit && pd->dir == s->direction) {
pf_add_threshold(>rule.ptr->pktrate);
@@ -1461,7 +1463,23 @@ pf_remove_divert_state(struct pf_state_k
if (sk == si->s->key[PF_SK_STACK] && si->s->rule.ptr &&
(si->s->rule.ptr->divert.type == PF_DIVERT_TO ||
si->s->rule.ptr->divert.type == PF_DIVERT_REPLY)) {
-   pf_remove_state(si->s);
+   if (si->s->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
+   si->s->key[PF_SK_WIRE] != si->s->key[PF_SK_STACK]) {
+   /*
+* If the local address is translated, keep
+* the state for "tcp.closed" seconds to
+* prevent its source port from being reused.
+*/
+   if (si->s->src.state < TCPS_FIN_WAIT_2 ||
+   si->s->dst.state < TCPS_FIN_WAIT_2) {
+   pf_set_protostate(si->s, PF_PEER_BOTH,
+   TCPS_TIME_WAIT);
+   si->s->timeout = PFTM_TCP_CLOSED;
+   si->s->expire = getuptime();
+   }
+   si->s->state_flags |= PFSTATE_INP_UNLINKED;
+   } else
+   pf_remove_state(si->s);
break;
}
}
Index: sys/net/pfvar.h
===
RCS file: /cvs/src/sys/net/pfvar.h,v
retrieving revision 1.509
diff -u -p -r1.509 pfvar.h
--- sys/net/pfvar.h 20 Jul 2022 09:33:11 -  1.509
+++ sys/net/pfvar.h 2 Sep 2022 12:54:37 -
@@ -784,6 +784,7 @@ struct pf_state {
 #definePFSTATE_RANDOMID0x0080
 #definePFSTATE_SCRUB_TCP   0x0100
 #definePFSTATE_SETPRIO 0x0200
+#definePFSTATE_INP_UNLINKED0x0400
 #definePFSTATE_SCRUBMASK 
(PFSTATE_NODF|PFSTATE_RANDOMID|PFSTATE_SCRUB_TCP)
 #definePFSTATE_SETMASK   (PFSTATE_SETTOS|PFSTATE_SETPRIO)
u_int8_t log;






httpd: fix default request body size

2022-09-02 Thread YASUOKA Masahiko
Hello,

For HTTP request body, if neither "Content-Encoding: chunked" nor
"Content-Length" is specified, it should mean body length is 0.

In RFC 9112 Section 6.3, 7.:
|   7.  If this is a request message and none of the above are true, then
|   the message body length is zero (no message body is present).

The behavior can be tested by requesting POST to a cgi, like this:

  $ curl -X POST http://127.0.0.1/cgi-bin/test
  (Ctrl-C is needed without the diff)

ok?

# first round https://marc.info/?l=openbsd-tech=158173705129829=2

Index: usr.sbin/httpd/server_http.c
===
RCS file: /cvs/src/usr.sbin/httpd/server_http.c,v
retrieving revision 1.151
diff -u -p -r1.151 server_http.c
--- usr.sbin/httpd/server_http.c15 Aug 2022 09:36:19 -  1.151
+++ usr.sbin/httpd/server_http.c1 Sep 2022 20:36:10 -
@@ -474,12 +474,9 @@ server_read_http(struct bufferevent *bev
/* HTTP request payload */
if (clt->clt_toread > 0)
bev->readcb = server_read_httpcontent;
-
-   /* Single-pass HTTP body */
-   if (clt->clt_toread < 0) {
-   clt->clt_toread = TOREAD_UNLIMITED;
-   bev->readcb = server_read;
-   }
+   if (clt->clt_toread < 0 && !desc->http_chunked)
+   /* 7. of RFC 9112 Section 6.3 */
+   clt->clt_toread = 0;
break;
default:
server_abort_http(clt, 405, "method not allowed");



Re: pipex syzkaller keylen

2022-08-30 Thread YASUOKA Masahiko
Hi,

Tested.

ok yasuoka

On Tue, 30 Aug 2022 15:41:29 +0200
Alexander Bluhm  wrote:
> Hi,
> 
> I looks like syzkaller has found a missing input validation in pipex.
> 
> https://syzkaller.appspot.com/bug?id=c7ac769bd7ee15549b8a2be188bcee07d98a5357
> 
> As I have no pipex setup, can anyone test this diff please?
> 
> bluhm
> 
> Index: net/pipex.c
> ===
> RCS file: /data/mirror/openbsd/cvs/src/sys/net/pipex.c,v
> retrieving revision 1.147
> diff -u -p -r1.147 pipex.c
> --- net/pipex.c   25 Jul 2022 08:28:42 -  1.147
> +++ net/pipex.c   30 Aug 2022 13:21:03 -
> @@ -277,12 +277,24 @@ pipex_init_session(struct pipex_session 
>   }
>  #ifdef PIPEX_MPPE
>   if ((req->pr_ppp_flags & PIPEX_PPP_MPPE_ACCEPTED) != 0) {
> - if (req->pr_mppe_recv.keylenbits <= 0)
> + switch (req->pr_mppe_recv.keylenbits) {
> + case 40:
> + case 56:
> + case 128:
> + break;
> + default:
>   return (EINVAL);
> + }
>   }
>   if ((req->pr_ppp_flags & PIPEX_PPP_MPPE_ENABLED) != 0) {
> - if (req->pr_mppe_send.keylenbits <= 0)
> + switch (req->pr_mppe_send.keylenbits) {
> + case 40:
> + case 56:
> + case 128:
> + break;
> + default:
>   return (EINVAL);
> + }
>   }
>   if ((req->pr_ppp_flags & PIPEX_PPP_MPPE_REQUIRED) != 0) {
>   if ((req->pr_ppp_flags &
> 



Re: diff: b64decode(1) for long line

2022-08-30 Thread YASUOKA Masahiko
On Tue, 30 Aug 2022 14:09:40 +0200
Theo Buehler  wrote:
> On Tue, Aug 30, 2022 at 01:01:47PM +0200, YASUOKA Masahiko wrote:
>> On Tue, 30 Aug 2022 11:56:53 +0200
>> Claudio Jeker  wrote:
>> > On Tue, Aug 30, 2022 at 11:18:01AM +0200, YASUOKA Masahiko wrote:
>> >> @@ -423,11 +423,13 @@ uu_decode(void)
>> >>   }
>> >>  }
>> >>  
>> >> +#define  ROUNDDOWN(x,y)  (((x)/(y)) * (y))
>> >> +
>> >>  static int
>> >>  base64_decode(void)
>> >>  {
>> >>   int n;
>> >> - char inbuf[PATH_MAX];
>> >> + char inbuf[ROUNDDOWN(PATH_MAX, 4) + 1];
>> >>   unsigned char outbuf[PATH_MAX * 4];
>> >>  
>> >>   for (;;) {
>> >> 
>> > 
>> > The fix is right but I wonder why is this code using PATH_MAX for a buffer
>> > size that has nothing to do with a file system path?
>> 
>> It is a mystery of the history?
>> 
>>   
>> https://github.com/sergev/4.4BSD-Lite2/blob/master/usr/src/usr.bin/uudecode/uudecode.c#L92
>> 
>> MAXPATHLEN was replaced to PATH_MAX in 2015.
>> 
>> ok?
>> 
>> Index: usr.bin/uudecode/uudecode.c
>> ===
>> RCS file: /cvs/src/usr.bin/uudecode/uudecode.c,v
>> retrieving revision 1.27
>> diff -u -p -r1.27 uudecode.c
>> --- usr.bin/uudecode/uudecode.c  28 Jun 2019 13:35:05 -  1.27
>> +++ usr.bin/uudecode/uudecode.c  30 Aug 2022 10:54:17 -
>> @@ -190,7 +190,7 @@ decode2(void)
>>  void *handle;
>>  struct passwd *pw;
>>  struct stat st;
>> -char buf[PATH_MAX];
>> +char buf[BUFSIZ];
>>  
>>  base64 = 0;
>>  /* search for header line */
>> @@ -342,7 +342,7 @@ uu_decode(void)
>>  {
>>  int i, ch;
>>  char *p;
>> -char buf[PATH_MAX];
>> +char buf[BUFSIZ];
>>  
>>  /* for each input line */
>>  for (;;) {
>> @@ -427,8 +427,8 @@ static int
>>  base64_decode(void)
>>  {
>>  int n;
>> -char inbuf[PATH_MAX];
>> -unsigned char outbuf[PATH_MAX * 4];
>> +char inbuf[BUFSIZ];
> 
> You need an extra byte for the terminating NUL to fix the problem you
> wanted to fix:
> 
>   char inbuf[BUFSIZ + 1];
> 
> I'm not sure if we can rely on BUFSIZ to be a multiple of 4, I can't
> find anything that requires that. I think it's better to use a local
> #define with a comment as Claudio suggested.

Ah, I showed the diff separated from first one.
I'm sorry for confusing.  The following diff is combined.

> 
> 
>> +unsigned char outbuf[BUFSIZ * 4];
>>  
>>  for (;;) {
>>  switch (get_line(inbuf, sizeof(inbuf))) {
>> 

Index: usr.bin/uudecode/uudecode.c
===
RCS file: /cvs/src/usr.bin/uudecode/uudecode.c,v
retrieving revision 1.27
diff -u -p -r1.27 uudecode.c
--- usr.bin/uudecode/uudecode.c 28 Jun 2019 13:35:05 -  1.27
+++ usr.bin/uudecode/uudecode.c 30 Aug 2022 12:15:56 -
@@ -190,7 +190,7 @@ decode2(void)
void *handle;
struct passwd *pw;
struct stat st;
-   char buf[PATH_MAX];
+   char buf[BUFSIZ];
 
base64 = 0;
/* search for header line */
@@ -342,7 +342,7 @@ uu_decode(void)
 {
int i, ch;
char *p;
-   char buf[PATH_MAX];
+   char buf[BUFSIZ];
 
/* for each input line */
for (;;) {
@@ -423,12 +423,14 @@ uu_decode(void)
}
 }
 
+#defineROUNDDOWN(x,y)  (((x)/(y)) * (y))
+
 static int
 base64_decode(void)
 {
int n;
-   char inbuf[PATH_MAX];
-   unsigned char outbuf[PATH_MAX * 4];
+   char inbuf[ROUNDDOWN(BUFSIZ, 4) + 1];
+   unsigned char outbuf[BUFSIZ * 4];
 
for (;;) {
switch (get_line(inbuf, sizeof(inbuf))) {



Re: diff: b64decode(1) for long line

2022-08-30 Thread YASUOKA Masahiko
On Tue, 30 Aug 2022 11:56:53 +0200
Claudio Jeker  wrote:
> On Tue, Aug 30, 2022 at 11:18:01AM +0200, YASUOKA Masahiko wrote:
>> @@ -423,11 +423,13 @@ uu_decode(void)
>>  }
>>  }
>>  
>> +#define ROUNDDOWN(x,y)  (((x)/(y)) * (y))
>> +
>>  static int
>>  base64_decode(void)
>>  {
>>  int n;
>> -char inbuf[PATH_MAX];
>> +char inbuf[ROUNDDOWN(PATH_MAX, 4) + 1];
>>  unsigned char outbuf[PATH_MAX * 4];
>>  
>>  for (;;) {
>> 
> 
> The fix is right but I wonder why is this code using PATH_MAX for a buffer
> size that has nothing to do with a file system path?

It is a mystery of the history?

  
https://github.com/sergev/4.4BSD-Lite2/blob/master/usr/src/usr.bin/uudecode/uudecode.c#L92

MAXPATHLEN was replaced to PATH_MAX in 2015.

ok?

Index: usr.bin/uudecode/uudecode.c
===
RCS file: /cvs/src/usr.bin/uudecode/uudecode.c,v
retrieving revision 1.27
diff -u -p -r1.27 uudecode.c
--- usr.bin/uudecode/uudecode.c 28 Jun 2019 13:35:05 -  1.27
+++ usr.bin/uudecode/uudecode.c 30 Aug 2022 10:54:17 -
@@ -190,7 +190,7 @@ decode2(void)
void *handle;
struct passwd *pw;
struct stat st;
-   char buf[PATH_MAX];
+   char buf[BUFSIZ];
 
base64 = 0;
/* search for header line */
@@ -342,7 +342,7 @@ uu_decode(void)
 {
int i, ch;
char *p;
-   char buf[PATH_MAX];
+   char buf[BUFSIZ];
 
/* for each input line */
for (;;) {
@@ -427,8 +427,8 @@ static int
 base64_decode(void)
 {
int n;
-   char inbuf[PATH_MAX];
-   unsigned char outbuf[PATH_MAX * 4];
+   char inbuf[BUFSIZ];
+   unsigned char outbuf[BUFSIZ * 4];
 
for (;;) {
switch (get_line(inbuf, sizeof(inbuf))) {



diff: b64decode(1) for long line

2022-08-30 Thread YASUOKA Masahiko
b64decode(8) fails if a long line is given.

 % wc test
1   11370 test
 %
 % ./b64decode -r test > /dev/null
 b64decode: test: /dev/stdout: error decoding base64 input stream
 % 

uudecode.c

426 static int
427 base64_decode(void)
428 {
429 int n;
430 char inbuf[PATH_MAX];
431 unsigned char outbuf[PATH_MAX * 4];
432 
433 for (;;) {
434 switch (get_line(inbuf, sizeof(inbuf))) {
435 case 0:
436 return (0);
437 case 1:
438 return (1);
439 }
440 n = b64_pton(inbuf, outbuf, sizeof(outbuf));

b64_pton() assumes that input ends at end of 24-bit group.  Other than
that, it returns -1.

For a line longer than 1023 get_line(), it returns a 1023 byte string
which ends 18 of 24-bit group, then the error happens.

The diff fixes this by giving a string to b64_pton() which length is
multiple of 4.

ok?

Index: usr.bin/uudecode/uudecode.c
===
RCS file: /cvs/src/usr.bin/uudecode/uudecode.c,v
retrieving revision 1.27
diff -u -p -r1.27 uudecode.c
--- usr.bin/uudecode/uudecode.c 28 Jun 2019 13:35:05 -  1.27
+++ usr.bin/uudecode/uudecode.c 30 Aug 2022 08:49:21 -
@@ -423,11 +423,13 @@ uu_decode(void)
}
 }
 
+#defineROUNDDOWN(x,y)  (((x)/(y)) * (y))
+
 static int
 base64_decode(void)
 {
int n;
-   char inbuf[PATH_MAX];
+   char inbuf[ROUNDDOWN(PATH_MAX, 4) + 1];
unsigned char outbuf[PATH_MAX * 4];
 
for (;;) {



Re: pppac(4): don't grab netlock within pppacioctl()

2022-07-22 Thread YASUOKA Masahiko
ok yasuoka

On Mon, 18 Jul 2022 13:50:37 +0300
Vitaliy Makkoveev  wrote:
> pipex(4) doesn't rely on netlock anymore.
> 
> Index: sys/net/if_pppx.c
> ===
> RCS file: /cvs/src/sys/net/if_pppx.c,v
> retrieving revision 1.119
> diff -u -p -r1.119 if_pppx.c
> --- sys/net/if_pppx.c 15 Jul 2022 22:56:13 -  1.119
> +++ sys/net/if_pppx.c 18 Jul 2022 10:48:31 -
> @@ -1161,7 +1161,6 @@ pppacioctl(dev_t dev, u_long cmd, caddr_
>   struct pppac_softc *sc = pppac_lookup(dev);
>   int error = 0;
>  
> - NET_LOCK();
>   switch (cmd) {
>   case FIONBIO:
>   break;
> @@ -1180,7 +1179,6 @@ pppacioctl(dev_t dev, u_long cmd, caddr_
>   error = pipex_ioctl(sc, cmd, data);
>   break;
>   }
> - NET_UNLOCK();
>  
>   return (error);
>  }
> 



Re: pipex(4): kill "Static" keyword

2022-07-22 Thread YASUOKA Masahiko
ok yasuoka

On Mon, 18 Jul 2022 12:31:47 +0300
Vitaliy Makkoveev  wrote:
> We don't use "static" keyword for functions declaration to allow ddb(4)
> debug. Also, many "Static" functions are called by pppx(4) layer outside
> pipex(4) layer.
> 
> This is the mostly mechanic diff, except the `pipex_pppoe_padding' which
> should be "static const".
> 
> Index: sys/net/pipex.c
> ===
> RCS file: /cvs/src/sys/net/pipex.c,v
> retrieving revision 1.146
> diff -u -p -r1.146 pipex.c
> --- sys/net/pipex.c   15 Jul 2022 22:56:13 -  1.146
> +++ sys/net/pipex.c   18 Jul 2022 09:30:49 -
> @@ -74,9 +74,6 @@
>  #include 
>  #include 
>  
> -/* drop static for ddb debuggability */
> -#define  Static
> -
>  #include 
>  #include "pipex_local.h"
>  
> @@ -559,7 +556,7 @@ pipex_export_session_stats(struct pipex_
>   stats->idle_time = session->idle_time;
>  }
>  
> -Static int
> +int
>  pipex_get_stat(struct pipex_session_stat_req *req, void *ownersc)
>  {
>   struct pipex_session *session;
> @@ -580,7 +577,7 @@ pipex_get_stat(struct pipex_session_stat
>   return error;
>  }
>  
> -Static int
> +int
>  pipex_get_closed(struct pipex_session_list_req *req, void *ownersc)
>  {
>   struct pipex_session *session, *session_tmp;
> @@ -608,7 +605,7 @@ pipex_get_closed(struct pipex_session_li
>   return (0);
>  }
>  
> -Static struct pipex_session *
> +struct pipex_session *
>  pipex_lookup_by_ip_address_locked(struct in_addr addr)
>  {
>   struct pipex_session *session;
> @@ -660,7 +657,7 @@ pipex_lookup_by_ip_address(struct in_add
>  }
>  
>  
> -Static struct pipex_session *
> +struct pipex_session *
>  pipex_lookup_by_session_id_locked(int protocol, int session_id)
>  {
>   struct pipex_hash_head *list;
> @@ -704,20 +701,20 @@ pipex_lookup_by_session_id(int protocol,
>  /***
>   * Timer functions
>   ***/
> -Static void
> +void
>  pipex_timer_start(void)
>  {
>   timeout_set_proc(_timer_ch, pipex_timer, NULL);
>   timeout_add_sec(_timer_ch, pipex_prune);
>  }
>  
> -Static void
> +void
>  pipex_timer_stop(void)
>  {
>   timeout_del(_timer_ch);
>  }
>  
> -Static void
> +void
>  pipex_timer(void *ignored_arg)
>  {
>   struct pipex_session *session, *session_tmp;
> @@ -764,7 +761,7 @@ pipex_timer(void *ignored_arg)
>  /***
>   * Common network I/O functions.  (tunnel protocol independent)
>   ***/
> -Static void
> +void
>  pipex_ip_output(struct mbuf *m0, struct pipex_session *session)
>  {
>   int is_idle;
> @@ -840,7 +837,7 @@ dropped:
>   counters_inc(session->stat_counters, pxc_oerrors);
>  }
>  
> -Static void
> +void
>  pipex_ppp_output(struct mbuf *m0, struct pipex_session *session, int proto)
>  {
>   u_char *cp, hdr[16];
> @@ -897,7 +894,7 @@ drop:
>   counters_inc(session->stat_counters, pxc_oerrors);
>  }
>  
> -Static void
> +void
>  pipex_ppp_input(struct mbuf *m0, struct pipex_session *session, int 
> decrypted)
>  {
>   int proto, hlen = 0;
> @@ -990,7 +987,7 @@ drop:
>   counters_inc(session->stat_counters, pxc_ierrors);
>  }
>  
> -Static void
> +void
>  pipex_ip_input(struct mbuf *m0, struct pipex_session *session)
>  {
>   struct ifnet *ifp;
> @@ -1067,7 +1064,7 @@ drop:
>  }
>  
>  #ifdef INET6
> -Static void
> +void
>  pipex_ip6_input(struct mbuf *m0, struct pipex_session *session)
>  {
>   struct ifnet *ifp;
> @@ -1115,7 +1112,7 @@ drop:
>  }
>  #endif
>  
> -Static struct mbuf *
> +struct mbuf *
>  pipex_common_input(struct pipex_session *session, struct mbuf *m0, int hlen,
>  int plen, int locked)
>  {
> @@ -1187,7 +1184,7 @@ not_ours:
>  /*
>   * pipex_ppp_proto
>   */
> -Static int
> +int
>  pipex_ppp_proto(struct mbuf *m0, struct pipex_session *session, int off,
>  int *hlenp)
>  {
> @@ -1228,7 +1225,7 @@ pipex_ppp_proto(struct mbuf *m0, struct 
>  /***
>   * PPPoE
>   ***/
> -Static u_charpipex_pppoe_padding[ETHERMIN];
> +static const u_char  pipex_pppoe_padding[ETHERMIN];
>  /*
>   * pipex_pppoe_lookup_session
>   */
> @@ -1286,7 +1283,7 @@ pipex_pppoe_input(struct mbuf *m0, struc
>  /*
>   * pipex_ppope_output
>   */
> -Static void
> +void
>  pipex_pppoe_output(struct mbuf *m0, struct pipex_session *session)
>  {
>   struct pipex_pppoe_header *pppoe;
> @@ -1332,7 +1329,7 @@ pipex_pppoe_output(struct mbuf *m0, stru
>  /***
>   * PPTP
>   ***/
> -Static void
> +void
>  pipex_pptp_output(struct mbuf 

Re: wg(4): 'Address already in use' when wgrtable is changed

2022-07-21 Thread YASUOKA Masahiko
Hello,

Let me ask "ok",

The diff fixes the problem as follows:

Configure wg0 without wgrtable

# ifconfig wg0 create wgport 7111 wgkey `openssl rand -base64 32` up
# ifconfig wg0   
wg0: flags=80c3 mtu 1420
index 6 priority 0 llprio 3
wgport 7111
wgpubkey OVDrQ6wTjZckC12gcUk8aeoYvF5oZ0wuyH17eZcS2BA=
groups: wg

afterwards, we start using a rtable which belongs rdomain 0,

# netstat -R
Rdomain 0
  Interfaces: lo0 vio0 vio1 enc0 pflog0 wg0
  Routing tables: 0 1

then we want to configure wgrtable to use that rtable,

# ifconfig wg0 wgrtable 1
ifconfig: SIOCSWG: Address already in use
# 

After the diff, the command is executed successfully.

  # ifconfig wg0 wgrtable 1
  #

ok?


When changing wgrtable to a rtable which belongs to the same rdomain
of the old one, close the existing socket first to prevent
EADDRINUSE.

Index: sys/net/if_wg.c
===
RCS file: /var/cvs/openbsd/src/sys/net/if_wg.c,v
retrieving revision 1.26
diff -u -p -r1.26 if_wg.c
--- sys/net/if_wg.c 21 Jul 2022 11:26:50 -  1.26
+++ sys/net/if_wg.c 22 Jul 2022 01:59:14 -
@@ -750,6 +750,16 @@ wg_bind(struct wg_softc *sc, in_port_t *
int  retries = 0;
 retry:
 #endif
+
+   if (port == sc->sc_udp_port &&
+   rtable_l2(rtable) == rtable_l2(sc->sc_udp_rtable)) {
+   /* changing rtable in the same domain */
+   wg_socket_close(>sc_so4);
+#ifdef INET6
+   wg_socket_close(>sc_so6);
+#endif
+   }
+
if ((ret = wg_socket_open(, AF_INET, , , sc)) != 0)
return ret;
 



Re: Is rdomain correct for rtable in man ifconfig?

2022-07-14 Thread YASUOKA Masahiko
Hello,

On Thu, 14 Jul 2022 14:09:52 +0900 (JST)
Masato Asou  wrote:
> The TUNNEL in the man ifconfig(8) is described as follows:
> 
> TUNNEL
> 
>  tunneldomain rtable
>   ^^here
>  Use routing table rtable instead of the default table.  The
>^^here
>  tunnel does not need to terminate in the same routing domain as
>  the interface itself.  rtable can be set to any valid routing
> ^^here
>  table ID; the corresponding routing domain is derived from this
>  table.
> 
>  -tunneldomain
>  Use the default routing table and routing domain 0.
> 
> Shouldn't rdomain be specified for TUNNELDOMAIN, not rtable?

I think it actually means rtable.

> When tunneldomain is set, rdomain is displayed and Rdomain 1 is
> created as shown below:
> 
> $ netstat -R
> Rdomain 0
>   Interfaces: lo0 em0 enc0 pflog0 gif0
>   Routing table: 0
> 
> $ doas ifconfig gif0 tunneldomain 1
> 0 asou@asou-curr: ~  14:04:15
> $ ifconfig gif0   
> gif0: flags=8010 mtu 1280
> index 7 priority 0 llprio 3
> encap: txprio payload rxprio payload
> groups: gif
> tunnel: (unset) ttl 64 nodf ecn rdomain 1
> $ netstat -R
> Rdomain 0
>   Interfaces: lo0 em0 enc0 pflog0 gif0 wg0
>   Routing table: 0
> 
> Rdomain 1
>   Interface: lo1
>   Routing table: 1
> 
> $ 

Which version?  This doesn't match my test.

 # ifconfig  
 lo0: flags=8049 mtu 32768
 index 3 priority 0 llprio 3
 groups: lo
 inet6 ::1 prefixlen 128
 inet6 fe80::1%lo0 prefixlen 64 scopeid 0x3
 inet 127.0.0.1 netmask 0xff00
 em0: flags=8802 mtu 1500
 lladdr 52:54:00:12:34:56
 index 1 priority 0 llprio 3
 media: Ethernet autoselect (1000baseT full-duplex)
 status: active
 enc0: flags=0<>
 index 2 priority 0 llprio 3
 groups: enc
 status: active
 pflog0: flags=141 mtu 33136
 index 4 priority 0 llprio 3
 groups: pflog
 # 
 # netstat -R
 Rdomain 0
   Interfaces: lo0 em0 enc0 pflog0
   Routing table: 0
 
 # 
 # ifconfig gif0 tunneldomain 1
 ifconfig: SIOCSLIFPHYRTABLE: Invalid argument
 # 

tunneldomain X fails if X doesn't exist.

Also,

 # route -T1 add 10.0.0.0/8 127.0.0.1
 add net 10.0.0.0/8: gateway 127.0.0.1
 # 

create a rtable 1 by creating a dummy route.

 # ifconfig gif0 tunneldomain 1
 #
 # ifconfig gif0
 gif0: flags=8010 mtu 1280
 index 5 priority 0 llprio 3
 encap: txprio payload rxprio payload
 groups: gif
 tunnel: (unset) ttl 64 nodf ecn rdomain 1

the command becomes ok.

 # 
 # netstat -R 
 Rdomain 0
   Interfaces: lo0 em0 enc0 pflog0 gif0
   Routing tables: 0 1
 
 # sysctl kern.version   
 kern.version=OpenBSD 7.1 (GENERIC.MP) #465: Mon Apr 11 18:03:57 MDT 2022
 dera...@amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
 
 # 

It seems a rtable can be specified for "tunneldomain".



Re: pipex(4): remove PIPEXCSESSION ioctl(2) command

2022-07-12 Thread YASUOKA Masahiko
ok yasuoka

On Mon, 11 Jul 2022 01:11:22 +0300
Vitaliy Makkoveev  wrote:
> Long time ago pipex(4) session can't be deleted until both pipex(4)
> input and output queues become empty. Dead sessions were linked to the
> stack and the `ip_forward' flag was used to prevent packets forwarding.
> npppd(8) marked such sessions by doing PIPEXCSESSION ioctl(2) call.
> 
> But since we started to unlink close session from the stack, this logic
> became unnecessary. Also pipex(4) session could be closed just after
> close request.
> 
> npppd(8) was the only userland program which did PIPEXCSESSION ioctl(2)
> call, and we removed it week ago. It's time to remove the remains from
> kernel and pipex(4) man page. 
> 
> Now the `flags' member of 'pipex_session' structure became immutable.
> 
> Index: share/man/man4/pipex.4
> ===
> RCS file: /cvs/src/share/man/man4/pipex.4,v
> retrieving revision 1.14
> diff -u -p -r1.14 pipex.4
> --- share/man/man4/pipex.42 Jan 2021 13:15:15 -   1.14
> +++ share/man/man4/pipex.410 Jul 2022 21:59:42 -
> @@ -179,18 +179,6 @@ See
>  section for a description of the
>  .Vt pipex_statistics
>  structure.
> -.It Dv PIPEXCSESSION Fa "struct pipex_session_config_req *"
> -Change the configuration of the specified session.
> -The session and configuration are specified by a
> -.Vt pipex_session_config_req
> -structure, which has the following definition:
> -.Bd -literal
> -struct pipex_session_config_req {
> -int   pcr_protocol;   /* tunnel protocol  */
> -uint16_t  pcr_session_id; /* session-id */
> -int   pcr_ip_forward; /* ip_forwarding on/off */
> -};
> -.Ed
>  .It Dv PIPEXGSTATFa "struct pipex_session_stat_req *"
>  Get statistics for the specified session.
>  Specify the session using a
> Index: sys/net/pipex.c
> ===
> RCS file: /cvs/src/sys/net/pipex.c,v
> retrieving revision 1.144
> diff -u -p -r1.144 pipex.c
> --- sys/net/pipex.c   10 Jul 2022 21:28:10 -  1.144
> +++ sys/net/pipex.c   10 Jul 2022 21:59:56 -
> @@ -173,11 +173,6 @@ pipex_ioctl(void *ownersc, u_long cmd, c
>  
>   NET_ASSERT_LOCKED();
>   switch (cmd) {
> - case PIPEXCSESSION:
> - ret = pipex_config_session(
> - (struct pipex_session_config_req *)data, ownersc);
> - break;
> -
>   case PIPEXGSTAT:
>   ret = pipex_get_stat((struct pipex_session_stat_req *)data,
>   ownersc);
> @@ -323,8 +318,6 @@ pipex_init_session(struct pipex_session 
>   session->ppp_flags = req->pr_ppp_flags;
>   session->ppp_id = req->pr_ppp_id;
>  
> - session->flags |= PIPEX_SFLAGS_IP_FORWARD;
> -
>   session->stat_counters = counters_alloc(pxc_ncounters);
>  
>   session->ip_address.sin_family = AF_INET;
> @@ -569,32 +562,6 @@ pipex_export_session_stats(struct pipex_
>  }
>  
>  Static int
> -pipex_config_session(struct pipex_session_config_req *req, void *ownersc)
> -{
> - struct pipex_session *session;
> - int error = 0;
> -
> - NET_ASSERT_LOCKED();
> -
> - session = pipex_lookup_by_session_id(req->pcr_protocol,
> - req->pcr_session_id);
> - if (session == NULL)
> - return (EINVAL);
> -
> - if (session->ownersc == ownersc) {
> - if (req->pcr_ip_forward != 0)
> - session->flags |= PIPEX_SFLAGS_IP_FORWARD;
> - else
> - session->flags &= ~PIPEX_SFLAGS_IP_FORWARD;
> - } else
> - error = EINVAL;
> -
> - pipex_rele_session(session);
> -
> - return error;
> -}
> -
> -Static int
>  pipex_get_stat(struct pipex_session_stat_req *req, void *ownersc)
>  {
>   struct pipex_session *session;
> @@ -810,9 +777,7 @@ pipex_ip_output(struct mbuf *m0, struct 
>   /*
>* Multicast packet is a idle packet and it's not TCP.
>*/
> - if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD |
> - PIPEX_SFLAGS_IP6_FORWARD)) == 0)
> - goto drop;
> +
>   /* reset idle timer */
>   if (session->timeout_sec != 0) {
>   is_idle = 0;
> @@ -850,9 +815,6 @@ pipex_ip_output(struct mbuf *m0, struct 
>  
>   if (session_tmp->ownersc != session->ownersc)
>   goto next;
> - if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD |
> - PIPEX_SFLAGS_IP6_FORWARD)) == 0)
> - goto next;
>  
>   refcnt_take(_tmp->pxs_refcnt);
>   mtx_leave(_list_mtx);
> @@ -878,8 +840,6 @@ next:
>   }
>  
>   return;
> -drop:
> - m_freem(m0);
>  dropped:
>   counters_inc(session->stat_counters, pxc_oerrors);
>  }
> @@ -989,8 +949,6 @@ pipex_ppp_input(struct mbuf *m0, struct 
>  
>  

Re: pipex(4): Add missing lock around all sessions loop within pipex_ip_output()

2022-07-09 Thread YASUOKA Masahiko
ok yasuoka

On Sat, 9 Jul 2022 18:04:04 +0300
Vitaliy Makkoveev  wrote:
> On Sat, Jul 09, 2022 at 10:46:56PM +0900, YASUOKA Masahiko wrote:
>> Hello,
>> 
>> On Sat, 9 Jul 2022 01:43:41 +0300
>> Vitaliy Makkoveev  wrote:
>> > On Sat, Jul 09, 2022 at 12:08:49AM +0300, Vitaliy Makkoveev wrote:
>> >> Thanks for pointing.
>> >> 
>> >> > On 8 Jul 2022, at 23:13, Alexander Bluhm  
>> >> > wrote:
>> >> > 
>> >> > On Fri, Jul 08, 2022 at 05:42:23PM +0300, Vitaliy Makkoveev wrote:
>> >> >> The update diff below. I also found we need to increment 'pxc_oerrors'
>> >> >> counter on `session_tmp' instead of session.
>> >> > 
>> >> >> +  m = m_copym(m0, 0, M_COPYALL, M_NOWAIT)
>> >> > 
>> >> > Does this compile?  There is a ; missing.
>> >> > 
>> >> > otherwise OK bluhm@
>> >> > 
>> >> 
>> > 
>> > The fixed diff. yasuoka@, it this diff ok by you?
>> 
>> Other than the compile error, ok yasuoka
>> 
>> but the last diff seems not ok.
>> 
> 
> Sorry, I sent the previous diff again. There is the right one.
> 
> Index: sys/net/pipex.c
> ===
> RCS file: /cvs/src/sys/net/pipex.c,v
> retrieving revision 1.143
> diff -u -p -r1.143 pipex.c
> --- sys/net/pipex.c   2 Jul 2022 08:50:42 -   1.143
> +++ sys/net/pipex.c   8 Jul 2022 22:40:44 -
> @@ -842,20 +842,38 @@ pipex_ip_output(struct mbuf *m0, struct 
>  
>   m0->m_flags &= ~(M_BCAST|M_MCAST);
>  
> - LIST_FOREACH(session_tmp, _session_list, session_list) {
> + mtx_enter(_list_mtx);
> +
> + session_tmp = LIST_FIRST(_session_list);
> + while (session_tmp != NULL) {
> + struct pipex_session *session_save = NULL;
> +
>   if (session_tmp->ownersc != session->ownersc)
> - continue;
> + goto next;
>   if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD |
>   PIPEX_SFLAGS_IP6_FORWARD)) == 0)
> - continue;
> + goto next;
> +
> + refcnt_take(_tmp->pxs_refcnt);
> + mtx_leave(_list_mtx);
> +
>   m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
> - if (m == NULL) {
> - counters_inc(session->stat_counters,
> + if (m != NULL)
> + pipex_ppp_output(m, session_tmp, PPP_IP);
> + else
> + counters_inc(session_tmp->stat_counters,
>   pxc_oerrors);
> - continue;
> - }
> - pipex_ppp_output(m, session_tmp, PPP_IP);
> +
> + mtx_enter(_list_mtx);
> + session_save = session_tmp;
> +next:
> + session_tmp = LIST_NEXT(session_tmp, session_list);
> + if (session_save != NULL)
> + pipex_rele_session(session_save);
>   }
> +
> + mtx_leave(_list_mtx);
> +
>   m_freem(m0);
>   }
>  
> 



Re: pipex(4): Add missing lock around all sessions loop within pipex_ip_output()

2022-07-09 Thread YASUOKA Masahiko
Hello,

On Sat, 9 Jul 2022 01:43:41 +0300
Vitaliy Makkoveev  wrote:
> On Sat, Jul 09, 2022 at 12:08:49AM +0300, Vitaliy Makkoveev wrote:
>> Thanks for pointing.
>> 
>> > On 8 Jul 2022, at 23:13, Alexander Bluhm  wrote:
>> > 
>> > On Fri, Jul 08, 2022 at 05:42:23PM +0300, Vitaliy Makkoveev wrote:
>> >> The update diff below. I also found we need to increment 'pxc_oerrors'
>> >> counter on `session_tmp' instead of session.
>> > 
>> >> + m = m_copym(m0, 0, M_COPYALL, M_NOWAIT)
>> > 
>> > Does this compile?  There is a ; missing.
>> > 
>> > otherwise OK bluhm@
>> > 
>> 
> 
> The fixed diff. yasuoka@, it this diff ok by you?

Other than the compile error, ok yasuoka

but the last diff seems not ok.

> Index: sys/net/pipex.c
> ===
> RCS file: /cvs/src/sys/net/pipex.c,v
> retrieving revision 1.143
> diff -u -p -r1.143 pipex.c
> --- sys/net/pipex.c   2 Jul 2022 08:50:42 -   1.143
> +++ sys/net/pipex.c   7 Jul 2022 21:52:16 -
> @@ -842,20 +842,39 @@ pipex_ip_output(struct mbuf *m0, struct 
>  
>   m0->m_flags &= ~(M_BCAST|M_MCAST);
>  
> - LIST_FOREACH(session_tmp, _session_list, session_list) {
> + mtx_enter(_list_mtx);
> +
> + session_tmp = LIST_FIRST(_session_list);
> + while (session_tmp != NULL) {
> + struct pipex_session *session_save = NULL;
> +
>   if (session_tmp->ownersc != session->ownersc)
> - continue;
> + goto next;
>   if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD |
>   PIPEX_SFLAGS_IP6_FORWARD)) == 0)
> - continue;
> + goto next;
>   m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
>   if (m == NULL) {
>   counters_inc(session->stat_counters,
>   pxc_oerrors);
> - continue;
> + goto next;
>   }
> +
> + refcnt_take(>pxs_refcnt);
> + mtx_leave(_list_mtx);
> +

it lost 2 things?

- "need to increment 'pxc_oerrors' counter on `session_tmp' instead of
  session."
- "unlock list just before m_copym(9) call"

>   pipex_ppp_output(m, session_tmp, PPP_IP);
> +
> + mtx_enter(_list_mtx);
> + session_save = session_tmp;
> +next:
> + session_tmp = LIST_NEXT(session_tmp, session_list);
> + if (session_save != NULL)
> + pipex_rele_session(session_save);
>   }
> +
> + mtx_leave(_list_mtx);
> +
>   m_freem(m0);
>   }
>  
> 



Re: pipex(4): Add missing lock around all sessions loop within pipex_ip_output()

2022-07-08 Thread YASUOKA Masahiko
Hello,

On Fri, 8 Jul 2022 00:53:16 +0300
Vitaliy Makkoveev  wrote:
> The `pipex_list_mtx' mutex(9) protects global pipex(4) lists so it need
> to be taken while we perform this foreach loop.
> 
> The all sessions loop was reworked to make possible to drop the lock
> within. This is required because pipex_ppp_output() takes scheduler lock
> when performs ip_send().
>
> Index: sys/net/pipex.c
> ===
> RCS file: /cvs/src/sys/net/pipex.c,v
> retrieving revision 1.143
> diff -u -p -r1.143 pipex.c
> --- sys/net/pipex.c   2 Jul 2022 08:50:42 -   1.143
> +++ sys/net/pipex.c   7 Jul 2022 21:52:16 -
> @@ -842,20 +842,39 @@ pipex_ip_output(struct mbuf *m0, struct 
>  
>   m0->m_flags &= ~(M_BCAST|M_MCAST);
>  
> - LIST_FOREACH(session_tmp, _session_list, session_list) {
> + mtx_enter(_list_mtx);
> +
> + session_tmp = LIST_FIRST(_session_list);
> + while (session_tmp != NULL) {
> + struct pipex_session *session_save = NULL;
> +
>   if (session_tmp->ownersc != session->ownersc)
> - continue;
> + goto next;
>   if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD |
>   PIPEX_SFLAGS_IP6_FORWARD)) == 0)
> - continue;
> + goto next;
>   m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
>   if (m == NULL) {
>   counters_inc(session->stat_counters,
>   pxc_oerrors);
> - continue;
> + goto next;
>   }
> +
> + refcnt_take(>pxs_refcnt);

this "session" should be session_tmp?

Also, isn't it needed to take reference count on top of the block?

session_ = LIST_FIRST(_session_list);
while (session_tmp != NULL) {
refcnt_take(_tmp->pxs_refcnt);

if (session_tmp->ownersc != session->ownersc)
goto next;
if ((session->flags & (PIPEX_SFLAGS_IP_FORWARD |
PIPEX_SFLAGS_IP6_FORWARD)) == 0)
goto next;
m = m_copym(m0, 0, M_COPYALL, M_NOWAIT);
if (m == NULL) {
counters_inc(session->stat_counters,
pxc_oerrors);
goto next;
}

mtx_leave(_list_mtx);
pipex_ppp_output(m, session_tmp, PPP_IP);
mtx_enter(_list_mtx);
next:
session_save = session_tmp;
session_tmp = LIST_NEXT(session_tmp, session_list);
pipex_rele_session(session_save);
}


> + mtx_leave(_list_mtx);
> +
>   pipex_ppp_output(m, session_tmp, PPP_IP);
> +
> + mtx_enter(_list_mtx);
> + session_save = session_tmp;
> +next:
> + session_tmp = LIST_NEXT(session_tmp, session_list);
> + if (session_save != NULL)
> + pipex_rele_session(session_save);
>   }
> +
> + mtx_leave(_list_mtx);
> +
>   m_freem(m0);
>   }
>  



Re: [v3] amd64: simplify TSC sync testing

2022-07-06 Thread YASUOKA Masahiko
On Thu, 07 Jul 2022 14:02:35 +0900 (JST)
YASUOKA Masahiko  wrote:
> Hello Scott,
> 
> With the patch, my machine on ESXi it doesn't show any extra message.
> 
> *Without* the patch, the machine shows
> 
>  % grep 'TSC.*skew' dmesg.current-tsc-debug 
>  cpu1: disabling user TSC (skew=-2603)
>  cpu2: disabling user TSC (skew=-2959)
>  cpu3: disabling user TSC (skew=-3784)
>  %
> 
> and monotonic time goes backward (the test in
> https://marc.info/?l=openbsd-tech=161699406119704=2 failed)

Oops, the link was wrong..

https://marc.info/?l=openbsd-tech=161657532610882=2

is the correct one.  Also, with the patch, the test is passed.

> dmesg of with the patch:
> 
> OpenBSD 7.1-current (GENERIC.MP) #24: Thu Jul  7 10:09:32 JST 2022
> 
> yasu...@yasuoka-ob-c.tokyo.iiji.jp:/source/yasuoka/head/git/src/sys/arch/amd64/compile/GENERIC.MP
> real mem = 2113290240 (2015MB)
> avail mem = 2031927296 (1937MB)
> random: good seed from bootblocks
> mpath0 at root
> scsibus0 at mpath0: 256 targets
> mainbus0 at root
> bios0 at mainbus0: SMBIOS rev. 2.7 @ 0xff7401f (161 entries)
> bios0: vendor VMware, Inc. version "VMW71.00V.16460286.B64.2006250725" date 
> 06/25/2020
> bios0: VMware, Inc. VMware7,1
> acpi0 at bios0: ACPI 4.0
> acpi0: sleep states S0 S1 S4 S5
> acpi0: tables DSDT SRAT FACP APIC MCFG HPET WAET WSMT
> acpi0: wakeup devices PCI0(S3) USB_(S1) P2P0(S3) S1F0(S3) S2F0(S3) S8F0(S3) 
> S16F(S3) S17F(S3) S18F(S3) S22F(S3) S23F(S3) S24F(S3) S25F(S3) PE40(S3) 
> S1F0(S3) PE41(S3) [...]
> acpitimer0 at acpi0: 3579545 Hz, 24 bits
> acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
> cpu0 at mainbus0: apid 0 (boot processor)
> cpu0: Intel(R) Xeon(R) Silver 4210R CPU @ 2.40GHz, 2393.81 MHz, 06-55-07
> cpu0: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,SS,HTT,SSE3,PCLMUL,VMX,SSSE3,FMA3,CX16,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,HV,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,AVX512F,AVX512DQ,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,AVX512CD,AVX512BW,AVX512VL,PKU,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> cpu0: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 1MB 64b/line 
> 16-way L2 cache, 13MB 64b/line 11-way L3 cache
> cpu0: smt 0, core 0, package 0
> mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
> cpu0: apic clock running at 65MHz
> cpu1 at mainbus0: apid 1 (application processor)
> cpu1: Intel(R) Xeon(R) Silver 4210R CPU @ 2.40GHz, 2393.61 MHz, 06-55-07
> cpu1: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,SS,HTT,SSE3,PCLMUL,VMX,SSSE3,FMA3,CX16,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,HV,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,AVX512F,AVX512DQ,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,AVX512CD,AVX512BW,AVX512VL,PKU,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> cpu1: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 1MB 64b/line 
> 16-way L2 cache, 13MB 64b/line 11-way L3 cache
> cpu1: smt 0, core 1, package 0
> cpu2 at mainbus0: apid 2 (application processor)
> cpu2: Intel(R) Xeon(R) Silver 4210R CPU @ 2.40GHz, 2393.62 MHz, 06-55-07
> cpu2: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,SS,HTT,SSE3,PCLMUL,VMX,SSSE3,FMA3,CX16,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,HV,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,AVX512F,AVX512DQ,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,AVX512CD,AVX512BW,AVX512VL,PKU,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> cpu2: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 1MB 64b/line 
> 16-way L2 cache, 13MB 64b/line 11-way L3 cache
> cpu2: smt 0, core 2, package 0
> cpu3 at mainbus0: apid 3 (application processor)
> cpu3: Intel(R) Xeon(R) Silver 4210R CPU @ 2.40GHz, 2393.62 MHz, 06-55-07
> cpu3: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,SS,HTT,SSE3,PCLMUL,VMX,SSSE3,FMA3,CX16,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,HV,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,AVX512F,AVX512DQ,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,AVX512CD,AVX512BW,AVX512VL,PKU,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> cpu3: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 1MB 64b/line 
> 16-way L2 cache, 13MB 64b/line 11-way L3 cache
> cpu3: smt 0, core 3, package 0
> ioapic0 at mainbus0: api

Re: [v3] amd64: simplify TSC sync testing

2022-07-06 Thread YASUOKA Masahiko
Hello Scott,

With the patch, my machine on ESXi it doesn't show any extra message.

*Without* the patch, the machine shows

 % grep 'TSC.*skew' dmesg.current-tsc-debug 
 cpu1: disabling user TSC (skew=-2603)
 cpu2: disabling user TSC (skew=-2959)
 cpu3: disabling user TSC (skew=-3784)
 %

and monotonic time goes backward (the test in
https://marc.info/?l=openbsd-tech=161699406119704=2 failed)

dmesg of with the patch:

OpenBSD 7.1-current (GENERIC.MP) #24: Thu Jul  7 10:09:32 JST 2022

yasu...@yasuoka-ob-c.tokyo.iiji.jp:/source/yasuoka/head/git/src/sys/arch/amd64/compile/GENERIC.MP
real mem = 2113290240 (2015MB)
avail mem = 2031927296 (1937MB)
random: good seed from bootblocks
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 2.7 @ 0xff7401f (161 entries)
bios0: vendor VMware, Inc. version "VMW71.00V.16460286.B64.2006250725" date 
06/25/2020
bios0: VMware, Inc. VMware7,1
acpi0 at bios0: ACPI 4.0
acpi0: sleep states S0 S1 S4 S5
acpi0: tables DSDT SRAT FACP APIC MCFG HPET WAET WSMT
acpi0: wakeup devices PCI0(S3) USB_(S1) P2P0(S3) S1F0(S3) S2F0(S3) S8F0(S3) 
S16F(S3) S17F(S3) S18F(S3) S22F(S3) S23F(S3) S24F(S3) S25F(S3) PE40(S3) 
S1F0(S3) PE41(S3) [...]
acpitimer0 at acpi0: 3579545 Hz, 24 bits
acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: Intel(R) Xeon(R) Silver 4210R CPU @ 2.40GHz, 2393.81 MHz, 06-55-07
cpu0: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,SS,HTT,SSE3,PCLMUL,VMX,SSSE3,FMA3,CX16,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,HV,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,AVX512F,AVX512DQ,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,AVX512CD,AVX512BW,AVX512VL,PKU,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu0: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 1MB 64b/line 
16-way L2 cache, 13MB 64b/line 11-way L3 cache
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
cpu0: apic clock running at 65MHz
cpu1 at mainbus0: apid 1 (application processor)
cpu1: Intel(R) Xeon(R) Silver 4210R CPU @ 2.40GHz, 2393.61 MHz, 06-55-07
cpu1: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,SS,HTT,SSE3,PCLMUL,VMX,SSSE3,FMA3,CX16,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,HV,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,AVX512F,AVX512DQ,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,AVX512CD,AVX512BW,AVX512VL,PKU,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu1: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 1MB 64b/line 
16-way L2 cache, 13MB 64b/line 11-way L3 cache
cpu1: smt 0, core 1, package 0
cpu2 at mainbus0: apid 2 (application processor)
cpu2: Intel(R) Xeon(R) Silver 4210R CPU @ 2.40GHz, 2393.62 MHz, 06-55-07
cpu2: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,SS,HTT,SSE3,PCLMUL,VMX,SSSE3,FMA3,CX16,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,HV,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,AVX512F,AVX512DQ,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,AVX512CD,AVX512BW,AVX512VL,PKU,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu2: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 1MB 64b/line 
16-way L2 cache, 13MB 64b/line 11-way L3 cache
cpu2: smt 0, core 2, package 0
cpu3 at mainbus0: apid 3 (application processor)
cpu3: Intel(R) Xeon(R) Silver 4210R CPU @ 2.40GHz, 2393.62 MHz, 06-55-07
cpu3: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,MMX,FXSR,SSE,SSE2,SS,HTT,SSE3,PCLMUL,VMX,SSSE3,FMA3,CX16,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,HV,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,AVX512F,AVX512DQ,RDSEED,ADX,SMAP,CLFLUSHOPT,CLWB,AVX512CD,AVX512BW,AVX512VL,PKU,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
cpu3: 32KB 64b/line 8-way D-cache, 32KB 64b/line 8-way I-cache, 1MB 64b/line 
16-way L2 cache, 13MB 64b/line 11-way L3 cache
cpu3: smt 0, core 3, package 0
ioapic0 at mainbus0: apid 4 pa 0xfec0, version 20, 24 pins, remapped
acpimcfg0 at acpi0
acpimcfg0: addr 0xe000, bus 0-127
acpihpet0 at acpi0: 14318179 Hz
acpiprt0 at acpi0: bus 0 (PCI0)
acpipci0 at acpi0 PCI0: 0x 0x0011 0x0001
acpicmos0 at acpi0
"PNP0A05" at acpi0 not configured
com0 at acpi0 COMA addr 0x3f8/0x8 irq 4: ns16550a, 16 byte fifo
com0: console
acpiac0 at acpi0: AC unit online
acpicpu0 at acpi0: C1(@1 halt!)
acpicpu1 at acpi0: C1(@1 halt!)
acpicpu2 at acpi0: C1(@1 halt!)
acpicpu3 at acpi0: C1(@1 halt!)
pvbus0 at mainbus0: VMware
vmt0 at pvbus0
pci0 at 

Re: npppd(8): remove PIPEXCSESSION ioctl(2) command

2022-06-30 Thread YASUOKA Masahiko
ok yasuoka

On Thu, 30 Jun 2022 12:26:55 +0300
Vitaliy Makkoveev  wrote:
> yasuoka@ remonded me, long time ago pipex(4) sessions can't be deleted
> until both input and output queues become empty:
> 
> pipex_timer(void *ignored_arg)
> {
>   /* ... */
>   switch (session->state) {
>   /* ... */
>   case PIPEX_STATE_CLOSED:
>   /*
>* mbuf queued in pipexinq or pipexoutq may have a
>* refererce to this session.
>*/
>   if (!mq_empty() || !mq_empty())
>   continue;
> 
>   pipex_destroy_session(session);
>   break;
>   /* ... */
> }
> 
> Such dead sessions were linked to the stack and the `ip_forward' flag
> was used to prevent packets forwarding.
> 
> But since we started to unlink close session from the stack, this logic
> became unnecessary. Also pipex(4) session could be closed just after
> close request.
> 
> I want to remove it. This makes the pipex(4) session flags immutable and
> reduces locking games.
> 
> This diff removes PIPEXCSESSION call only from npppd(8). It deletes
> session just after PIPEXCSESSION ioctl(2) call so nothing changed in
> session life within kernel space. I will modify kernel and pipex(4) man
> page with separate diff, after I finish to fix pipex(4) locking.
> 
> Index: usr.sbin/npppd/npppd/npppd.c
> ===
> RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd.c,v
> retrieving revision 1.52
> diff -u -p -r1.52 npppd.c
> --- usr.sbin/npppd/npppd/npppd.c  15 Nov 2021 15:14:24 -  1.52
> +++ usr.sbin/npppd/npppd/npppd.c  30 Jun 2022 08:49:29 -
> @@ -114,7 +114,6 @@ static struct in_addr loop;   /* initializ
>  static uint32_tstr_hash(const void *, int);
>  
>  #ifdef USE_NPPPD_PIPEX
> -static int npppd_ppp_pipex_ip_disable(npppd *, npppd_ppp *);
>  static void pipex_periodic(npppd *);
>  #endif /* USE_NPPPD_PIPEX */
>  
> @@ -1246,62 +1245,6 @@ npppd_ppp_pipex_disable(npppd *_this, np
>   return error;
>  }
>  
> -/* XXX: s/npppd_ppp_pipex_ip_disable/npppd_ppp_pipex_stop/ ?? */
> -
> -/** Stop PIPEX of the {@link npppd_ppp ppp} */
> -static int
> -npppd_ppp_pipex_ip_disable(npppd *_this, npppd_ppp *ppp)
> -{
> - struct pipex_session_config_req req;
> -#ifdef USE_NPPPD_PPPOE
> - pppoe_session *pppoe;
> -#endif
> -#ifdef USE_NPPPD_PPTP
> - pptp_call *call;
> -#endif
> -#ifdef USE_NPPPD_L2TP
> - l2tp_call *l2tp;
> -#endif
> - if (ppp->pipex_started == 0)
> - return 0;   /* not started */
> -
> - bzero(, sizeof(req));
> - switch(ppp->tunnel_type) {
> -#ifdef USE_NPPPD_PPPOE
> - case NPPPD_TUNNEL_PPPOE:
> - pppoe = (pppoe_session *)ppp->phy_context;
> -
> - /* PPPoE specific information */
> - req.pcr_protocol = PIPEX_PROTO_PPPOE;
> - req.pcr_session_id = pppoe->session_id;
> - break;
> -#endif
> -#ifdef USE_NPPPD_PPTP
> - case NPPPD_TUNNEL_PPTP:
> - call = (pptp_call *)ppp->phy_context;
> -
> - /* PPTP specific information */
> - req.pcr_session_id = call->id;
> - req.pcr_protocol = PIPEX_PROTO_PPTP;
> - break;
> -#endif
> -#ifdef USE_NPPPD_L2TP
> - case NPPPD_TUNNEL_L2TP:
> - l2tp = (l2tp_call *)ppp->phy_context;
> -
> - /* L2TP specific context */
> - req.pcr_session_id = l2tp->session_id;
> - req.pcr_protocol = PIPEX_PROTO_L2TP;
> - break;
> -#endif
> - default:
> - return 1;
> - }
> - req.pcr_ip_forward = 0;
> -
> - return ioctl(_this->iface[ppp->ifidx].devf, PIPEXCSESSION, );
> -}
> -
>  static void
>  pipex_periodic(npppd *_this)
>  {
> @@ -1565,11 +1508,6 @@ npppd_set_ip_enabled(npppd *_this, npppd
>   hl->key = ppp1->username;
>   }
>   }
> -#ifdef USE_NPPPD_PIPEX
> - if (npppd_ppp_pipex_ip_disable(_this, ppp) != 0)
> - ppp_log(ppp, LOG_ERR,
> - "npppd_ppp_pipex_ip_disable() failed: %m");
> -#endif /* USE_NPPPD_PIPEX */
>   }
>  }
>  
> 



Re: selecting proper GOP when there are multiple GOPs

2022-06-16 Thread YASUOKA Masahiko
On Thu, 16 Jun 2022 19:37:57 +0200 (CEST)
Mark Kettenis  wrote:
>> Date: Thu, 16 Jun 2022 23:49:05 +0900 (JST)
>> From: YASUOKA Masahiko 
(snip)
>> @@ -444,6 +445,30 @@ efi_video_init(void)
>>  int  i, mode80x25, mode100x31;
>>  UINTNcols, rows;
>>  EFI_STATUS   status;
>> +EFI_HANDLE  *handles;
>> +UINTNnhandles;
>> +EFI_GRAPHICS_OUTPUT *first_gop = NULL;
>> +EFI_DEVICE_PATH *devp_test = NULL;
>> +
>> +status = BS->LocateHandleBuffer(ByProtocol, _guid, NULL, ,
>> +);
>> +if (status != EFI_SUCCESS)
>> +panic("BS->LocateHandleBuffer() returns %d", status);
> 
> What about headless machines?  I suspect that most x86 machines
> without a GPU of some sorts will still provide a framebuffer of some
> sorts in their UEFI implementations.  But maybe some machines don't?

I have not seen a machine which doesn't yet.

> If there are no GOP protocol handles, LocateHandleBuffer() seems to
> return EFI_NOT_FOUND, which would result in a panic, which wouldn't be
> very helpful.

Yes, previous version seems to care about that.  We don't need to
change that behavior.

The diff is updated.

ok?

Index: sys/arch/amd64/stand//efiboot/conf.c
===
RCS file: /cvs/src/sys/arch/amd64/stand/efiboot/conf.c,v
retrieving revision 1.36
diff -u -p -r1.36 conf.c
--- sys/arch/amd64/stand//efiboot/conf.c8 Jun 2021 02:45:49 -   
1.36
+++ sys/arch/amd64/stand//efiboot/conf.c17 Jun 2022 00:23:40 -
@@ -40,7 +40,7 @@
 #include "efidev.h"
 #include "efipxe.h"
 
-const char version[] = "3.59";
+const char version[] = "3.60";
 
 #ifdef EFI_DEBUG
 intdebug = 0;
Index: sys/arch/amd64/stand//efiboot/efiboot.c
===
RCS file: /cvs/src/sys/arch/amd64/stand/efiboot/efiboot.c,v
retrieving revision 1.38
diff -u -p -r1.38 efiboot.c
--- sys/arch/amd64/stand//efiboot/efiboot.c 7 Jun 2021 00:04:20 -   
1.38
+++ sys/arch/amd64/stand//efiboot/efiboot.c 17 Jun 2022 00:23:40 -
@@ -424,8 +424,9 @@ efi_memprobe_internal(void)
 /***
  * Console
  ***/
-static SIMPLE_TEXT_OUTPUT_INTERFACE *conout = NULL;
-static SIMPLE_INPUT_INTERFACE   *conin;
+static SIMPLE_TEXT_OUTPUT_INTERFACE*conout = NULL;
+static SIMPLE_INPUT_INTERFACE  *conin;
+static EFI_GRAPHICS_OUTPUT *gop = NULL;
 static EFI_GUID con_guid
= EFI_CONSOLE_CONTROL_PROTOCOL_GUID;
 static EFI_GUID gop_guid
@@ -444,6 +445,28 @@ efi_video_init(void)
int  i, mode80x25, mode100x31;
UINTNcols, rows;
EFI_STATUS   status;
+   EFI_HANDLE  *handles;
+   UINTNnhandles;
+   EFI_GRAPHICS_OUTPUT *first_gop = NULL;
+   EFI_DEVICE_PATH *devp_test = NULL;
+
+   status = BS->LocateHandleBuffer(ByProtocol, _guid, NULL, ,
+   );
+   if (!EFI_ERROR(status)) {
+   for (i = 0; i < nhandles; i++) {
+   status = BS->HandleProtocol(handles[i], _guid,
+   (void **));
+   if (first_gop == NULL)
+   first_gop = gop;
+   status = BS->HandleProtocol(handles[i], _guid,
+   (void **)_test);
+   if (status == EFI_SUCCESS)
+   break;
+   }
+   if (status != EFI_SUCCESS)
+   gop = first_gop;
+   BS->FreePool(handles);
+   }
 
conout = ST->ConOut;
status = BS->LocateProtocol(_guid, NULL, (void **));
@@ -808,7 +831,6 @@ efi_com_putc(dev_t dev, int c)
  */
 static EFI_GUID acpi_guid = ACPI_20_TABLE_GUID;
 static EFI_GUID smbios_guid = SMBIOS_TABLE_GUID;
-static EFI_GRAPHICS_OUTPUT *gop;
 static int  gopmode = -1;
 
 #defineefi_guidcmp(_a, _b) memcmp((_a), (_b), sizeof(EFI_GUID))
@@ -853,8 +875,7 @@ efi_makebootargs(void)
/*
 * Frame buffer
 */
-   status = BS->LocateProtocol(_guid, NULL, (void **));
-   if (!EFI_ERROR(status)) {
+   if (gop != NULL) {
i

Re: selecting proper GOP when there are multiple GOPs

2022-06-16 Thread YASUOKA Masahiko
On Thu, 16 Jun 2022 15:52:41 +0300
Nick Henderson  wrote:
> Any updates on this patch? Would love to see it included in the next release.

Yes.

I'll commit this this weekend even if I don't get no ok.

ok?

Index: sys/arch/amd64/stand/efiboot/efiboot.c
===
RCS file: /disk/cvs/openbsd/src/sys/arch/amd64/stand/efiboot/efiboot.c,v
retrieving revision 1.38
diff -u -p -r1.38 efiboot.c
--- sys/arch/amd64/stand/efiboot/efiboot.c  7 Jun 2021 00:04:20 -   
1.38
+++ sys/arch/amd64/stand/efiboot/efiboot.c  2 May 2022 07:53:38 -
@@ -424,8 +424,9 @@ efi_memprobe_internal(void)
 /***
  * Console
  ***/
-static SIMPLE_TEXT_OUTPUT_INTERFACE *conout = NULL;
-static SIMPLE_INPUT_INTERFACE   *conin;
+static SIMPLE_TEXT_OUTPUT_INTERFACE*conout = NULL;
+static SIMPLE_INPUT_INTERFACE  *conin;
+static EFI_GRAPHICS_OUTPUT *gop = NULL;
 static EFI_GUID con_guid
= EFI_CONSOLE_CONTROL_PROTOCOL_GUID;
 static EFI_GUID gop_guid
@@ -444,6 +445,30 @@ efi_video_init(void)
int  i, mode80x25, mode100x31;
UINTNcols, rows;
EFI_STATUS   status;
+   EFI_HANDLE  *handles;
+   UINTNnhandles;
+   EFI_GRAPHICS_OUTPUT *first_gop = NULL;
+   EFI_DEVICE_PATH *devp_test = NULL;
+
+   status = BS->LocateHandleBuffer(ByProtocol, _guid, NULL, ,
+   );
+   if (status != EFI_SUCCESS)
+   panic("BS->LocateHandleBuffer() returns %d", status);
+   for (i = 0; i < nhandles; i++) {
+   status = BS->HandleProtocol(handles[i], _guid,
+   (void **));
+   if (first_gop == NULL)
+   first_gop = gop;
+   status = BS->HandleProtocol(handles[i], _guid,
+   (void **)_test);
+   if (status == EFI_SUCCESS)
+   break;
+   }
+   if (status != EFI_SUCCESS)
+   gop = first_gop;
+   if (gop == NULL)
+   panic("no gop found");
+   BS->FreePool(handles);
 
conout = ST->ConOut;
status = BS->LocateProtocol(_guid, NULL, (void **));
@@ -808,7 +833,6 @@ efi_com_putc(dev_t dev, int c)
  */
 static EFI_GUID acpi_guid = ACPI_20_TABLE_GUID;
 static EFI_GUID smbios_guid = SMBIOS_TABLE_GUID;
-static EFI_GRAPHICS_OUTPUT *gop;
 static int  gopmode = -1;
 
 #defineefi_guidcmp(_a, _b) memcmp((_a), (_b), sizeof(EFI_GUID))
@@ -853,57 +877,54 @@ efi_makebootargs(void)
/*
 * Frame buffer
 */
-   status = BS->LocateProtocol(_guid, NULL, (void **));
-   if (!EFI_ERROR(status)) {
-   if (gopmode < 0) {
-   for (i = 0; i < gop->Mode->MaxMode; i++) {
-   status = gop->QueryMode(gop, i, , );
-   if (EFI_ERROR(status))
-   continue;
-   gopsiz = gopi->HorizontalResolution *
-   gopi->VerticalResolution;
-   if (gopsiz > bestsiz) {
-   gopmode = i;
-   bestsiz = gopsiz;
-   }
+   if (gopmode < 0) {
+   for (i = 0; i < gop->Mode->MaxMode; i++) {
+   status = gop->QueryMode(gop, i, , );
+   if (EFI_ERROR(status))
+   continue;
+   gopsiz = gopi->HorizontalResolution *
+   gopi->VerticalResolution;
+   if (gopsiz > bestsiz) {
+   gopmode = i;
+   bestsiz = gopsiz;
}
}
-   if (gopmode >= 0 && gopmode != gop->Mode->Mode) {
-   curmode = gop->Mode->Mode;
-   if (efi_gop_setmode(gopmode) != EFI_SUCCESS)
-   (void)efi_gop_setmode(curmode);
-   }
-
-   gopi = gop->Mode->Info;
-   switch (gopi->PixelFormat) {
-   case PixelBlueGreenRedReserved8BitPerColor:
-   ei->fb_red_mask  = 0x00ff;
-   ei->fb_green_mask= 0xff00;
-   ei->fb_blue_mask = 0x00ff;
-   ei->fb_reserved_mask = 0xff00;
-   break;
-   case PixelRedGreenBlueReserved8BitPerColor:

Re: wg(4): 'Address already in use' when wgrtable is changed

2022-06-11 Thread YASUOKA Masahiko
Hi,

On Sun, 27 Mar 2022 18:25:18 +0900 (JST)
YASUOKA Masahiko  wrote:
> On Wed, 9 Mar 2022 15:28:44 +0900
> Yuichiro NAITO  wrote:
>> I see 'Address already in use' message,
>> when I change wgrtable for a running wg interface.
>> It doesn't make sense to me.
>> 
>> It can be reproduced by the following command sequence.
>> 
>> ```
>> # route -T1 add default `cat /etc/mygate`
>> # ifconfig wg0 create wgport 7111 wgkey `cat /etc/mykey.wg0`
>> # ifconfig wg0 up
>> # ifconfig wg0 wgrtable 1
>> ifconfig: SIOCSWG: Address already in use
>> ```
>> 
>> When I down wg0 interface before changing wgrtable,
>> It succeeds and no messages are shown.
>> 
>> I investigated the reason why 'Address already in use' is shown.
>> 
>> If wgrtable is specified by ifconfig argument,
>> `wg_ioctl_set` function in `sys/net/if_wg.c` is called.
>> 
>> And if the wg interface is running, `wg_bind` function is called.
>> `wg_bind` creates new sockets (IPv4 and 6) and replace them from old
>> ones.
>> 
>> If only wgrtable is changed, `wg_bind` binds as same port as existing
>> sockets.
>> So 'Address already in use' is shown.
>> 
>> Here is a simple patch to close existing sockets before `wg_bind`.
>> It works for me but I'm not 100% sure this is right fix.
>> 
>> Any other ideas?
>> 
>> ```
>> diff --git a/sys/net/if_wg.c b/sys/net/if_wg.c
>> index 4dae3e3c976..0159664fb34 100644
>> --- a/sys/net/if_wg.c
>> +++ b/sys/net/if_wg.c
>> @@ -2253,11 +2253,14 @@ wg_ioctl_set(struct wg_softc *sc, struct
>> wg_data_io *data)
>>  if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {
>>  TAILQ_FOREACH(peer, >sc_peer_seq, p_seq_entry)
>>  wg_peer_clear_src(peer);
>> 
>> -if (sc->sc_if.if_flags & IFF_RUNNING)
>> +if (sc->sc_if.if_flags & IFF_RUNNING) {
>> +if (port == sc->sc_udp_port)
>> +wg_unbind(sc);
>>  if ((ret = wg_bind(sc, , )) != 0)
>>  goto error;
>> +}
>> 
>>  sc->sc_udp_port = port;
>>  sc->sc_udp_rtable = rtable;
>>  }
>> ```
> 
> If rdomain 1 exists, the error will not shown.
> 
>  # ifconfig vether0 rdomain 1 up
>  # ifconfig wg0 create wgport 7111 wgkey `openssl rand -base64 32` up
>  # ifconfig wg0 wgrtable 1
>  # 
> 
> In the case which you reported to, it is supposed that rtable 1 exists
> but rdomain 1 doesn't exist.
> 
> Even when "wgtable 1" is configured, becase there is no dedicated
> rdomain, rdomain 0 will be used to bind the UDP port.

This is not correct.

Configuring "wgtable 1" when rdomain 1 doesn't exist should mean that
default(rdomain 0) is used for inbound and rtable 1 used for outbound.

Binding the same address/port causes EADDRINUSE even if the rtable is
different.  This is the original problem.  Then we need to unbind the
old socket.

So your original diff has been basically OK.  New diff is almost the
same of that diff, but tweak a condition which checks rdomain is the
same before unbinding the socket.

I belive this diff is ok.

Index: sys/net/if_wg.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/if_wg.c,v
retrieving revision 1.25
diff -u -p -r1.25 if_wg.c
--- sys/net/if_wg.c 6 Jun 2022 14:45:41 -   1.25
+++ sys/net/if_wg.c 11 Jun 2022 10:55:18 -
@@ -751,6 +751,16 @@ wg_bind(struct wg_softc *sc, in_port_t *
int  retries = 0;
 retry:
 #endif
+
+   if (port == sc->sc_udp_port &&
+   rtable_l2(rtable) == rtable_l2(sc->sc_udp_rtable)) {
+   /* changing rtable in the same domain */
+   wg_socket_close(>sc_so4);
+#ifdef INET6
+   wg_socket_close(>sc_so6);
+#endif
+   }
+
if ((ret = wg_socket_open(, AF_INET, , , sc)) != 0)
return ret;
 



selecting proper GOP when there are multiple GOPs

2022-05-02 Thread YASUOKA Masahiko
Hello,

The below diff originally posted by Alexei K. on bugs@:

  Garbled screen when booting with UEFI
  https://marc.info/?l=openbsd-bugs=165087969227708=2  

The same problem had been reported periodically and we have asked to
use "machine gop" to workaround it.  But the diff from Alexei seems to
be a proper way.

I've tested it by some my machines including HPE DL20 Gen10 which has
a virtual video and serial console.

I'd like to commit the diff and ask people to test it.

ok?

Index: sys/arch/amd64/stand/efiboot/efiboot.c
===
RCS file: /disk/cvs/openbsd/src/sys/arch/amd64/stand/efiboot/efiboot.c,v
retrieving revision 1.38
diff -u -p -r1.38 efiboot.c
--- sys/arch/amd64/stand/efiboot/efiboot.c  7 Jun 2021 00:04:20 -   
1.38
+++ sys/arch/amd64/stand/efiboot/efiboot.c  2 May 2022 07:53:38 -
@@ -424,8 +424,9 @@ efi_memprobe_internal(void)
 /***
  * Console
  ***/
-static SIMPLE_TEXT_OUTPUT_INTERFACE *conout = NULL;
-static SIMPLE_INPUT_INTERFACE   *conin;
+static SIMPLE_TEXT_OUTPUT_INTERFACE*conout = NULL;
+static SIMPLE_INPUT_INTERFACE  *conin;
+static EFI_GRAPHICS_OUTPUT *gop = NULL;
 static EFI_GUID con_guid
= EFI_CONSOLE_CONTROL_PROTOCOL_GUID;
 static EFI_GUID gop_guid
@@ -444,6 +445,30 @@ efi_video_init(void)
int  i, mode80x25, mode100x31;
UINTNcols, rows;
EFI_STATUS   status;
+   EFI_HANDLE  *handles;
+   UINTNnhandles;
+   EFI_GRAPHICS_OUTPUT *first_gop = NULL;
+   EFI_DEVICE_PATH *devp_test = NULL;
+
+   status = BS->LocateHandleBuffer(ByProtocol, _guid, NULL, ,
+   );
+   if (status != EFI_SUCCESS)
+   panic("BS->LocateHandleBuffer() returns %d", status);
+   for (i = 0; i < nhandles; i++) {
+   status = BS->HandleProtocol(handles[i], _guid,
+   (void **));
+   if (first_gop == NULL)
+   first_gop = gop;
+   status = BS->HandleProtocol(handles[i], _guid,
+   (void **)_test);
+   if (status == EFI_SUCCESS)
+   break;
+   }
+   if (status != EFI_SUCCESS)
+   gop = first_gop;
+   if (gop == NULL)
+   panic("no gop found");
+   BS->FreePool(handles);
 
conout = ST->ConOut;
status = BS->LocateProtocol(_guid, NULL, (void **));
@@ -808,7 +833,6 @@ efi_com_putc(dev_t dev, int c)
  */
 static EFI_GUID acpi_guid = ACPI_20_TABLE_GUID;
 static EFI_GUID smbios_guid = SMBIOS_TABLE_GUID;
-static EFI_GRAPHICS_OUTPUT *gop;
 static int  gopmode = -1;
 
 #defineefi_guidcmp(_a, _b) memcmp((_a), (_b), sizeof(EFI_GUID))
@@ -853,57 +877,54 @@ efi_makebootargs(void)
/*
 * Frame buffer
 */
-   status = BS->LocateProtocol(_guid, NULL, (void **));
-   if (!EFI_ERROR(status)) {
-   if (gopmode < 0) {
-   for (i = 0; i < gop->Mode->MaxMode; i++) {
-   status = gop->QueryMode(gop, i, , );
-   if (EFI_ERROR(status))
-   continue;
-   gopsiz = gopi->HorizontalResolution *
-   gopi->VerticalResolution;
-   if (gopsiz > bestsiz) {
-   gopmode = i;
-   bestsiz = gopsiz;
-   }
+   if (gopmode < 0) {
+   for (i = 0; i < gop->Mode->MaxMode; i++) {
+   status = gop->QueryMode(gop, i, , );
+   if (EFI_ERROR(status))
+   continue;
+   gopsiz = gopi->HorizontalResolution *
+   gopi->VerticalResolution;
+   if (gopsiz > bestsiz) {
+   gopmode = i;
+   bestsiz = gopsiz;
}
}
-   if (gopmode >= 0 && gopmode != gop->Mode->Mode) {
-   curmode = gop->Mode->Mode;
-   if (efi_gop_setmode(gopmode) != EFI_SUCCESS)
-   (void)efi_gop_setmode(curmode);
-   }
-
-   gopi = gop->Mode->Info;
-   switch (gopi->PixelFormat) {
-   case PixelBlueGreenRedReserved8BitPerColor:
-   ei->fb_red_mask  = 

Re: wg(4): 'Address already in use' when wgrtable is changed

2022-04-01 Thread YASUOKA Masahiko
Hi,

On Tue, 29 Mar 2022 17:28:23 +0900
Yuichiro NAITO  wrote:
> There is one thing I'm worrying about.
> Ifconfig doesn't show wgrtable value with your patch.
> In my use case as follows, it seems that setting `wgrtable 1` is
> ignored.
> 
> ```
> # route -T1 add default `cat /etc/mygate`
> # ifconfig wg0 create wgport 7111 wgkey `cat /etc/mykey.wg0`
> # ifconfig wg0 up
> # ifconfig wg0 wgrtable 1
> # ifconfig wg0
> wg0: flags=80c3 mtu 1420
> index 6 priority 0 llprio 3
> wgport 7111
> wgpubkey e/CYTG1RGqT4jmrY0Fom8cAdtOWP7F/gBVwamyINRlg=
> groups: wg
> ```

Thank you for pointing this out.

In this case, wg0 is binding 7111/udp on rdomain 0.  So I have
supposed ignoring "wgrtable 1" is correct.  But if we configure
wgrtable when creating,

 % doas ifconfig wg0 create wgport 7111 wgrtable 1 wgkey `openssl rand -base64 
32` up
 % doas ifconfig wg0 
 wg0: flags=80c3 mtu 1420
 index 13 priority 0 llprio 3
 wgport 7111
 wgrtable 1
 wgpubkey /4v4hsi426MsVZojJ0rwRvk8kK0jSckjcU2Z1L/k5W8=
 groups: wg
 % 

It displays "wgrtable 1".  And actually

 % netstat -T0 -naf inet | grep 7111 
 % netstat -T1 -naf inet | grep 7111 
 udp  0  0  *.7111 *.*   
 % 

it binds 7111/udp on rtable 1.

So I start wondering why binding 7111/udp on table 1 fails with
EADDRINUSE when 7111/udp on rtable 0 is used.

> On 3/28/22 15:59, YASUOKA Masahiko wrote:
>> On Mon, 28 Mar 2022 15:20:02 +0900
>> Yuichiro NAITO  wrote:
>>> Thanks for the explanation.
>>> I understand how your patch works.
>>>
>>> I want to ask the goal of your patch.
>>> It seems just removing 'Address already in use' message.
>>> Is my guessing right?
>> Yes.  There is nothing to do, since the command is to bind the same
>> port, protocol, and domain of prevous.
>> The code seems to do such the skip already, but it lacks consideration
>> for rtable_l2(rtable) != rtable case.
>> 
>>> On 3/28/22 14:01, YASUOKA Masahiko wrote:
>>>> Hi,
>>>> On Mon, 28 Mar 2022 12:12:39 +0900
>>>> Yuichiro NAITO  wrote:
>>>>> On 3/27/22 18:25, YASUOKA Masahiko wrote:
>>>>>> Hi,
>>>>>> On Wed, 9 Mar 2022 15:28:44 +0900
>>>>>> Yuichiro NAITO  wrote:
>>>>>>> I see 'Address already in use' message,
>>>>>>> when I change wgrtable for a running wg interface.
>>>>>>> It doesn't make sense to me.
>>>>>>>
>>>>>>> It can be reproduced by the following command sequence.
>>>>>>>
>>>>>>> ```
>>>>>>> # route -T1 add default `cat /etc/mygate`
>>>>>>> # ifconfig wg0 create wgport 7111 wgkey `cat /etc/mykey.wg0`
>>>>>>> # ifconfig wg0 up
>>>>>>> # ifconfig wg0 wgrtable 1
>>>>>>> ifconfig: SIOCSWG: Address already in use
>>>>>>> ```
>>>>>>>
>>>>>>> When I down wg0 interface before changing wgrtable,
>>>>>>> It succeeds and no messages are shown.
>>>>>>>
>>>>>>> I investigated the reason why 'Address already in use' is shown.
>>>>>>>
>>>>>>> If wgrtable is specified by ifconfig argument,
>>>>>>> `wg_ioctl_set` function in `sys/net/if_wg.c` is called.
>>>>>>>
>>>>>>> And if the wg interface is running, `wg_bind` function is called.
>>>>>>> `wg_bind` creates new sockets (IPv4 and 6) and replace them from old
>>>>>>> ones.
>>>>>>>
>>>>>>> If only wgrtable is changed, `wg_bind` binds as same port as existing
>>>>>>> sockets.
>>>>>>> So 'Address already in use' is shown.
>>>>>>>
>>>>>>> Here is a simple patch to close existing sockets before `wg_bind`.
>>>>>>> It works for me but I'm not 100% sure this is right fix.
>>>>>>>
>>>>>>> Any other ideas?
>>>>>>>
>>>>>>> ```
>>>>>>> diff --git a/sys/net/if_wg.c b/sys/net/if_wg.c
>>>>>>> index 4dae3e3c976..0159664fb34 100644
>>>>>>> --- a/sys/net/if_wg.c
>>>>>>> +++ b/sys/net/if_wg.c
>>>>>>> @@ -2253,11 +2253,14 @@ wg_ioctl_set(struct wg_softc *sc, struct
>>>

Re: wg(4): 'Address already in use' when wgrtable is changed

2022-03-28 Thread YASUOKA Masahiko
On Mon, 28 Mar 2022 15:20:02 +0900
Yuichiro NAITO  wrote:
> Thanks for the explanation.
> I understand how your patch works.
> 
> I want to ask the goal of your patch.
> It seems just removing 'Address already in use' message.
> Is my guessing right?

Yes.  There is nothing to do, since the command is to bind the same
port, protocol, and domain of prevous.

The code seems to do such the skip already, but it lacks consideration
for rtable_l2(rtable) != rtable case.

> On 3/28/22 14:01, YASUOKA Masahiko wrote:
>> Hi,
>> On Mon, 28 Mar 2022 12:12:39 +0900
>> Yuichiro NAITO  wrote:
>>> On 3/27/22 18:25, YASUOKA Masahiko wrote:
>>>> Hi,
>>>> On Wed, 9 Mar 2022 15:28:44 +0900
>>>> Yuichiro NAITO  wrote:
>>>>> I see 'Address already in use' message,
>>>>> when I change wgrtable for a running wg interface.
>>>>> It doesn't make sense to me.
>>>>>
>>>>> It can be reproduced by the following command sequence.
>>>>>
>>>>> ```
>>>>> # route -T1 add default `cat /etc/mygate`
>>>>> # ifconfig wg0 create wgport 7111 wgkey `cat /etc/mykey.wg0`
>>>>> # ifconfig wg0 up
>>>>> # ifconfig wg0 wgrtable 1
>>>>> ifconfig: SIOCSWG: Address already in use
>>>>> ```
>>>>>
>>>>> When I down wg0 interface before changing wgrtable,
>>>>> It succeeds and no messages are shown.
>>>>>
>>>>> I investigated the reason why 'Address already in use' is shown.
>>>>>
>>>>> If wgrtable is specified by ifconfig argument,
>>>>> `wg_ioctl_set` function in `sys/net/if_wg.c` is called.
>>>>>
>>>>> And if the wg interface is running, `wg_bind` function is called.
>>>>> `wg_bind` creates new sockets (IPv4 and 6) and replace them from old
>>>>> ones.
>>>>>
>>>>> If only wgrtable is changed, `wg_bind` binds as same port as existing
>>>>> sockets.
>>>>> So 'Address already in use' is shown.
>>>>>
>>>>> Here is a simple patch to close existing sockets before `wg_bind`.
>>>>> It works for me but I'm not 100% sure this is right fix.
>>>>>
>>>>> Any other ideas?
>>>>>
>>>>> ```
>>>>> diff --git a/sys/net/if_wg.c b/sys/net/if_wg.c
>>>>> index 4dae3e3c976..0159664fb34 100644
>>>>> --- a/sys/net/if_wg.c
>>>>> +++ b/sys/net/if_wg.c
>>>>> @@ -2253,11 +2253,14 @@ wg_ioctl_set(struct wg_softc *sc, struct
>>>>> wg_data_io *data)
>>>>>   if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {
>>>>>   TAILQ_FOREACH(peer, >sc_peer_seq, p_seq_entry)
>>>>>   wg_peer_clear_src(peer);
>>>>>
>>>>> - if (sc->sc_if.if_flags & IFF_RUNNING)
>>>>> + if (sc->sc_if.if_flags & IFF_RUNNING) {
>>>>> + if (port == sc->sc_udp_port)
>>>>> + wg_unbind(sc);
>>>>>   if ((ret = wg_bind(sc, , )) != 0)
>>>>>   goto error;
>>>>> + }
>>>>>
>>>>>   sc->sc_udp_port = port;
>>>>>   sc->sc_udp_rtable = rtable;
>>>>>   }
>>>>> ```
>>>> If rdomain 1 exists, the error will not shown.
>>>># ifconfig vether0 rdomain 1 up
>>>># ifconfig wg0 create wgport 7111 wgkey `openssl rand -base64 32` up
>>>># ifconfig wg0 wgrtable 1
>>>>#
>>>
>>> Yes, if rdomain 1 is created before `ifconfig wg0 wgrtable 1`,
>>> setting wgrtable succeeds and there is no problem.
>>>
>>>> In the case which you reported to, it is supposed that rtable 1 exists
>>>> but rdomain 1 doesn't exist.
>>>> Even when "wgtable 1" is configured, becase there is no dedicated
>>>> rdomain, rdomain 0 will be used to bind the UDP port.
>>>
>>> Exactly, it's the case that I reported and want to fix.
>>>
>>>> So what wg(4) should do for this case is "nothing".
>>>
>>> I'm a little bit confused.
>>> As you said, I can confirm your patch doesn't set wgrtable in my use
>>> case.
>>> It is not the result that I wanted

Re: wg(4): 'Address already in use' when wgrtable is changed

2022-03-27 Thread YASUOKA Masahiko
Hi,

On Mon, 28 Mar 2022 12:12:39 +0900
Yuichiro NAITO  wrote:
> On 3/27/22 18:25, YASUOKA Masahiko wrote:
>> Hi,
>> On Wed, 9 Mar 2022 15:28:44 +0900
>> Yuichiro NAITO  wrote:
>>> I see 'Address already in use' message,
>>> when I change wgrtable for a running wg interface.
>>> It doesn't make sense to me.
>>>
>>> It can be reproduced by the following command sequence.
>>>
>>> ```
>>> # route -T1 add default `cat /etc/mygate`
>>> # ifconfig wg0 create wgport 7111 wgkey `cat /etc/mykey.wg0`
>>> # ifconfig wg0 up
>>> # ifconfig wg0 wgrtable 1
>>> ifconfig: SIOCSWG: Address already in use
>>> ```
>>>
>>> When I down wg0 interface before changing wgrtable,
>>> It succeeds and no messages are shown.
>>>
>>> I investigated the reason why 'Address already in use' is shown.
>>>
>>> If wgrtable is specified by ifconfig argument,
>>> `wg_ioctl_set` function in `sys/net/if_wg.c` is called.
>>>
>>> And if the wg interface is running, `wg_bind` function is called.
>>> `wg_bind` creates new sockets (IPv4 and 6) and replace them from old
>>> ones.
>>>
>>> If only wgrtable is changed, `wg_bind` binds as same port as existing
>>> sockets.
>>> So 'Address already in use' is shown.
>>>
>>> Here is a simple patch to close existing sockets before `wg_bind`.
>>> It works for me but I'm not 100% sure this is right fix.
>>>
>>> Any other ideas?
>>>
>>> ```
>>> diff --git a/sys/net/if_wg.c b/sys/net/if_wg.c
>>> index 4dae3e3c976..0159664fb34 100644
>>> --- a/sys/net/if_wg.c
>>> +++ b/sys/net/if_wg.c
>>> @@ -2253,11 +2253,14 @@ wg_ioctl_set(struct wg_softc *sc, struct
>>> wg_data_io *data)
>>> if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {
>>> TAILQ_FOREACH(peer, >sc_peer_seq, p_seq_entry)
>>> wg_peer_clear_src(peer);
>>>
>>> -   if (sc->sc_if.if_flags & IFF_RUNNING)
>>> +   if (sc->sc_if.if_flags & IFF_RUNNING) {
>>> +   if (port == sc->sc_udp_port)
>>> +   wg_unbind(sc);
>>> if ((ret = wg_bind(sc, , )) != 0)
>>> goto error;
>>> +   }
>>>
>>> sc->sc_udp_port = port;
>>> sc->sc_udp_rtable = rtable;
>>> }
>>> ```
>> If rdomain 1 exists, the error will not shown.
>>   # ifconfig vether0 rdomain 1 up
>>   # ifconfig wg0 create wgport 7111 wgkey `openssl rand -base64 32` up
>>   # ifconfig wg0 wgrtable 1
>>   #
> 
> Yes, if rdomain 1 is created before `ifconfig wg0 wgrtable 1`,
> setting wgrtable succeeds and there is no problem.
> 
>> In the case which you reported to, it is supposed that rtable 1 exists
>> but rdomain 1 doesn't exist.
>> Even when "wgtable 1" is configured, becase there is no dedicated
>> rdomain, rdomain 0 will be used to bind the UDP port.
> 
> Exactly, it's the case that I reported and want to fix.
> 
>> So what wg(4) should do for this case is "nothing".
> 
> I'm a little bit confused.
> As you said, I can confirm your patch doesn't set wgrtable in my use
> case.
> It is not the result that I wanted.

   # ifconfig wg0 create wgport 7111 wgkey `openssl rand -base64 32` up
   -> bind 7111/udp on rdomain 0  (1)

is expected. (1)

   # ifconfig wg0 wgrtable 1
   -> bind 7111/udp on rdomain 0  (2)

is expected, since there is no "domain 1".

If trying to do (1) and (2), then it causes EADDRINUSE since it is to
bind the same port, proto, and domain.  The latest diff is skip (2)
properly.

Previous

>>   -  if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {

"rtable != sc->sc_udp_rtable" was wrong since rdomain for rtable may
not exist.  This is the cause of EADDRINUSE.


>> So the diff is updated.
>> ok?
>> Index: sys/net/if_wg.c
>> ===
>> RCS file: /disk/cvs/openbsd/src/sys/net/if_wg.c,v
>> retrieving revision 1.22
>> diff -u -p -r1.22 if_wg.c
>> --- sys/net/if_wg.c  22 Feb 2022 01:15:02 -  1.22
>> +++ sys/net/if_wg.c  27 Mar 2022 09:17:08 -
>> @@ -2250,7 +2250,8 @@ wg_ioctl_set(struct wg_softc *sc, struct
>>  else
>>  rtable = sc->sc_udp_rtable;
>>   -  if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {
>> +if (port != sc->sc_udp_port ||
>> +rtable_l2(rtable) != sc->sc_udp_rtable) {
>>  TAILQ_FOREACH(peer, >sc_peer_seq, p_seq_entry)
>>  wg_peer_clear_src(peer);
>>   
> 
> -- 
> Yuichiro NAITO (naito.yuich...@gmail.com)
> 



Re: wg(4): 'Address already in use' when wgrtable is changed

2022-03-27 Thread YASUOKA Masahiko
Hi,

On Wed, 9 Mar 2022 15:28:44 +0900
Yuichiro NAITO  wrote:
> I see 'Address already in use' message,
> when I change wgrtable for a running wg interface.
> It doesn't make sense to me.
> 
> It can be reproduced by the following command sequence.
> 
> ```
> # route -T1 add default `cat /etc/mygate`
> # ifconfig wg0 create wgport 7111 wgkey `cat /etc/mykey.wg0`
> # ifconfig wg0 up
> # ifconfig wg0 wgrtable 1
> ifconfig: SIOCSWG: Address already in use
> ```
> 
> When I down wg0 interface before changing wgrtable,
> It succeeds and no messages are shown.
> 
> I investigated the reason why 'Address already in use' is shown.
> 
> If wgrtable is specified by ifconfig argument,
> `wg_ioctl_set` function in `sys/net/if_wg.c` is called.
> 
> And if the wg interface is running, `wg_bind` function is called.
> `wg_bind` creates new sockets (IPv4 and 6) and replace them from old
> ones.
> 
> If only wgrtable is changed, `wg_bind` binds as same port as existing
> sockets.
> So 'Address already in use' is shown.
> 
> Here is a simple patch to close existing sockets before `wg_bind`.
> It works for me but I'm not 100% sure this is right fix.
> 
> Any other ideas?
> 
> ```
> diff --git a/sys/net/if_wg.c b/sys/net/if_wg.c
> index 4dae3e3c976..0159664fb34 100644
> --- a/sys/net/if_wg.c
> +++ b/sys/net/if_wg.c
> @@ -2253,11 +2253,14 @@ wg_ioctl_set(struct wg_softc *sc, struct
> wg_data_io *data)
>   if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {
>   TAILQ_FOREACH(peer, >sc_peer_seq, p_seq_entry)
>   wg_peer_clear_src(peer);
> 
> - if (sc->sc_if.if_flags & IFF_RUNNING)
> + if (sc->sc_if.if_flags & IFF_RUNNING) {
> + if (port == sc->sc_udp_port)
> + wg_unbind(sc);
>   if ((ret = wg_bind(sc, , )) != 0)
>   goto error;
> + }
> 
>   sc->sc_udp_port = port;
>   sc->sc_udp_rtable = rtable;
>   }
> ```

If rdomain 1 exists, the error will not shown.

 # ifconfig vether0 rdomain 1 up
 # ifconfig wg0 create wgport 7111 wgkey `openssl rand -base64 32` up
 # ifconfig wg0 wgrtable 1
 # 

In the case which you reported to, it is supposed that rtable 1 exists
but rdomain 1 doesn't exist.

Even when "wgtable 1" is configured, becase there is no dedicated
rdomain, rdomain 0 will be used to bind the UDP port.

So what wg(4) should do for this case is "nothing".

So the diff is updated.

ok?

Index: sys/net/if_wg.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/if_wg.c,v
retrieving revision 1.22
diff -u -p -r1.22 if_wg.c
--- sys/net/if_wg.c 22 Feb 2022 01:15:02 -  1.22
+++ sys/net/if_wg.c 27 Mar 2022 09:17:08 -
@@ -2250,7 +2250,8 @@ wg_ioctl_set(struct wg_softc *sc, struct
else
rtable = sc->sc_udp_rtable;
 
-   if (port != sc->sc_udp_port || rtable != sc->sc_udp_rtable) {
+   if (port != sc->sc_udp_port ||
+   rtable_l2(rtable) != sc->sc_udp_rtable) {
TAILQ_FOREACH(peer, >sc_peer_seq, p_seq_entry)
wg_peer_clear_src(peer);
 



Re: parallel ip forwarding

2021-12-30 Thread YASUOKA Masahiko
Hi,

On Sat, 25 Dec 2021 21:50:47 +0300
Vitaliy Makkoveev  wrote:
> On Fri, Dec 24, 2021 at 12:50:23PM +0100, Alexander Bluhm wrote:
>> On Fri, Dec 24, 2021 at 04:16:28PM +0900, YASUOKA Masahiko wrote:
>> > > - npppd l2pt ipsecflowinfo is not MP safe
>> > 
>> > Does this mean the things we are discussing on the "Fix
>> > ipsp_spd_lookup() for transport mode" thread?  I wonder if there is
>> > another issue.
>> 
>> In this mail thread I was concerned about things might get worse.
>> 
>> Currently I see these problems:
>> 
>> tdb_free() will be called with a shared netlock.  From there
>> ipsp_ids_free() is called.
>> 
>> if (--ids->id_refcount > 0)
>> return;
>> 
>> This ref count needs to be atomic.
>> 
>> if (LIST_EMPTY(_ids_gc_list))
>> timeout_add_sec(_ids_gc_timeout, 1);
>> LIST_INSERT_HEAD(_ids_gc_list, ids, id_gc_list);
>> 
>> And some mutex should protect ipsp_ids_gc_list.

Thanks, I suppose I could catch up the problem.

> The diff below adds `ipsec_flows_mtx' mutex(9) to protect `ipsp_ids_*'
> list and trees. ipsp_ids_lookup() returns `ids' with bumped reference
> counter.

This direction seems good.

One thing, I found a problem.

> Index: sys/netinet/ip_spd.c
> ===
> RCS file: /cvs/src/sys/netinet/ip_spd.c,v
> retrieving revision 1.110
> diff -u -p -r1.110 ip_spd.c
> --- sys/netinet/ip_spd.c  16 Dec 2021 15:38:03 -  1.110
> +++ sys/netinet/ip_spd.c  25 Dec 2021 18:34:22 -
> @@ -418,6 +418,7 @@ ipsp_spd_lookup(struct mbuf *m, int af, 
>   /* Cached entry is good. */
>   error = ipsp_spd_inp(m, inp, ipo, tdbout);
>   mtx_leave(_tdb_mtx);
> + ipsp_ids_free(ids);
>   return error;
>  
>nomatchout:
> @@ -452,6 +453,7 @@ ipsp_spd_lookup(struct mbuf *m, int af, 
>   dignore ?  : >ipo_dst,
>   ipo->ipo_sproto, ids ? ids: ipo->ipo_ids,
>   >ipo_addr, >ipo_mask);
> + ipsp_ids_free(ids);
>   mtx_enter(_tdb_mtx);
>   if ((tdbp_new != NULL) &&
>   (tdbp_new->tdb_flags & TDBF_DELETED)) {

ids will remain unfreed since there are some code paths which doesn't
pass the above lines.

I tried to fix that, but adding a lot of ipsp_ids_free() looks a mess.
Instead, how about changing ipsp_spd_lookup() to take a "struct
ipsec_ids *ids" as an argument  and letting the caller take the
resposibility of the ids?

Index: sys/net/if_bridge.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/if_bridge.c,v
retrieving revision 1.362
diff -u -p -r1.362 if_bridge.c
--- sys/net/if_bridge.c 23 Dec 2021 12:21:48 -  1.362
+++ sys/net/if_bridge.c 30 Dec 2021 08:12:18 -
@@ -1595,7 +1595,7 @@ bridge_ipsec(struct ifnet *ifp, struct e
}
} else { /* Outgoing from the bridge. */
error = ipsp_spd_lookup(m, af, hlen, IPSP_DIRECTION_OUT,
-   NULL, NULL, , 0);
+   NULL, NULL, , NULL);
if (error == 0 && tdb != NULL) {
/*
 * We don't need to do loop detection, the
Index: sys/net/if_veb.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/if_veb.c,v
retrieving revision 1.21
diff -u -p -r1.21 if_veb.c
--- sys/net/if_veb.c8 Nov 2021 04:15:46 -   1.21
+++ sys/net/if_veb.c30 Dec 2021 08:12:18 -
@@ -746,7 +746,7 @@ veb_ipsec_proto_out(struct mbuf *m, sa_f
 #endif
 
tdb = ipsp_spd_lookup(m, af, iphlen, , IPSP_DIRECTION_OUT,
-   NULL, NULL, 0);
+   NULL, NULL, NULL);
if (tdb == NULL)
return (m);
 
Index: sys/netinet/ip_ipsp.c
===
RCS file: /disk/cvs/openbsd/src/sys/netinet/ip_ipsp.c,v
retrieving revision 1.267
diff -u -p -r1.267 ip_ipsp.c
--- sys/netinet/ip_ipsp.c   20 Dec 2021 15:59:09 -  1.267
+++ sys/netinet/ip_ipsp.c   30 Dec 2021 08:12:18 -
@@ -47,6 +47,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
@@ -84,6 +86,13 @@ void tdb_hashstats(void);
do { } while (0)
 #endif
 
+/*
+ * Locks used to protect global data and struct members:
+ * F   ipsec_flows_mtx
+ */
+
+struct mutex ipsec_flows_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
+
 inttdb_rehash(void);
 void   tdb_tim

Re: parallel ip forwarding

2021-12-23 Thread YASUOKA Masahiko
Hello,

On Fri, 24 Dec 2021 00:55:04 +0100
Alexander Bluhm  wrote:
> On Fri, Dec 03, 2021 at 08:35:45PM +0100, Alexander Bluhm wrote:
>> Note that IPsec still has the workaround to disable multiple queues.
> 
> I think we can remove the ipsec_in_use workaround now.  The IPsec
> path is protected with the kernel lock.
> 
> There are some issues left:
> - npppd l2pt ipsecflowinfo is not MP safe

Does this mean the things we are discussing on the "Fix
ipsp_spd_lookup() for transport mode" thread?  I wonder if there is
another issue.

> - the acquire SA feature is not MP safe
> - Hrvoje has seen a panic with sasync



Re: Fix ipsp_spd_lookup() for transport mode

2021-12-23 Thread YASUOKA Masahiko
Hi,

On Mon, 20 Dec 2021 13:20:46 +0100
Alexander Bluhm  wrote:
> On Tue, Dec 14, 2021 at 06:25:20PM +0900, YASUOKA Masahiko wrote:
>> Yes, if there is another better idea, it will be welcome.
>> For this moment, the diff is the best idea for me.
> 
> Sorry, no better idea.  I have no experiance with l2pt.  Codewise
> the diff looks fine, but I don't understand the consequences.

Thank you for your review and comments.

>> +if (tdbflow != NULL)
>> +rn = rn_lookup((caddr_t)>tdb_filter,
>> +(caddr_t)>tdb_filtermask, rnh);
> 
> Does rn_lookup() modify the radix tree?  I looks like rn_lookup ->
> rn_addmask -> rn_insert() does that.  This will make it impossible
> to make IPsec MP capable.  The radix tree is not MP safe, art has
> been implemented as an alternative.  An ipsp_spd_lookup() should
> not modify the flows.  It is stange that a function named rn_lookup()
> does modifications.  Did I miss something?

rn_lookup() doesn't make any modification.  rn_lookup() calls
rn_addmask() with second argument search=1.

 183 /* return a perfect match if m_arg is set, else do a regular rn_match */
 184 struct radix_node *
 185 rn_lookup(void *v_arg, void *m_arg, struct radix_node_head *head)
 186 {
 187 struct radix_node *x, *tm;
 188 caddr_t netmask = 0;
 189 
 190 if (m_arg) {
 191 tm = rn_addmask(m_arg, 1, head->rnh_treetop->rn_off);

and then rn_addmask()

 416 struct radix_node *
 417 rn_addmask(void *n_arg, int search, int skip)
 418 {
 (snip)
 449 if (tm || search)
 450 return (tm);
 451 tm = malloc(max_keylen + 2 * sizeof(*tm), M_RTABLE, M_NOWAIT | 
M_ZERO);
 452 if (tm == NULL)
 453 return (0);
 454 saved_tm = tm;
 455 netmask = cp = (caddr_t)(tm + 2);
 456 memcpy(cp, addmask_key, mlen);
 457 tm = rn_insert(cp, mask_rnhead, , tm);

returns at #449-450 before calling rn_insert().  It seems that
rn_addmask() does read only operations when "search".

> Why do you call rn_lookup() here?

Since rn_match() doesn't take a mask and returns the best one.

For an example, if there are multiple peers behind a NAT, flows like
below can be configured at the same time.

  (a) Windows:  REMOTE_IP:1701/udp <=> LOCAL_IP:1701/udp
  (b) Linux:REMOTE_IP:ANY/udp  <=> LOCAL_IP:1701/udp

If source port of a packet from the Linux is 1701, rn_match() will
return (a) for it, then ipsp_spd_lookup() will fail to verify that the
given tdb matches the policy.

Policies can be created with wildcards (any port, any protocol), then
it is compared with a packet whose port and protocol is concreted.

Since rn_match() is to find a bestmatch, it can't find a wildcard
policy properly if there is a non wildcard policy which is overlapped
by the wildcard.

So the diff uses rn_lookup() to find the correct policy.


> Could we add the masks earlier when the flows are added?
> 
>> +else if (tdbp != NULL)
>> +rn = rn_lookup((caddr_t)>tdb_filter,
>> +(caddr_t)>tdb_filtermask, rnh);
> 
> What are the consequences of this chunk for regular IPsec?

I have thought that again.  Now I realized the problem is only for
transport mode.  For tunnel mode, since best match is always
preferred, rn_lookup() should be used.  I'll update the diff that uses
rn_lookup() for transport mode only.

>>  /* Match source/dest IDs. */
>> -if (ipo->ipo_ids)
>> -if (tdbp->tdb_ids == NULL ||
>> -!ipsp_ids_match(ipo->ipo_ids, 
>> tdbp->tdb_ids))
>> +if (ipo->ipo_ids != NULL) {
>> +if ((tdbp->tdb_flags & TDBF_TUNNELING) == 0 &&
>> +(tdbp->tdb_flags & TDBF_UDPENCAP) != 0) {
>> +/*
>> + * Skip IDs check for transport mode
>> + * with NAT-T.  Multiple clients (IDs)
>> + * can use a same policy.
aima>> + */
>> +} else if (tdbp->tdb_ids == NULL &&
>> +!ipsp_ids_match(ipo->ipo_ids,
>> +tdbp->tdb_ids))
>>  goto nomatchin;
>> +}
> 
> This was added to make IPsec/l2tp work in rev 1.85.  And now you
> change it to make it work.  I wish markus@ or mikeb@ could give a
> clue.

At the change of 1.85, "ipsec-id bundles" is intr

Re: Fix ipsp_spd_lookup() for transport mode

2021-12-14 Thread YASUOKA Masahiko
Hi,

On Tue, 14 Dec 2021 01:20:49 +0100
Alexander Bluhm  wrote:
> I don't know much about l2tp, pipex or npppd.  So I cannot say if
> the new logic is correct.  But I guess you have tested that.

Yes, I've tested some L2TP/IPsec cases already.

> The tdb mutex and ref counting looks correct.
> 
>> +struct tdb *tdb, *tdblocal = NULL;
> 
> The variable names tdb and tdbp are used very inconsistently within
> IPsec.  Don't use both.  I think tdpb and a tdbflow are sufficient.

Ok,

> 
>> +if (ipsecflowinfo != 0)
>> +ids = ipsp_ids_lookup(ipsecflowinfo);
> 
> Can you move that to the place where it is needed?

Yes,

> Perhaps it is easier to understand this way:
> 
>   if (ipsecflowinfo != 0) {

Sure.  Let me update the diff.

> It is hard to say whether the new
> rn_lookup(tdbp->tdb_filter/tdbp->tdb_filtermask) changes existing
> IPsec behavior for setups without l2tp.

I suppose it has no regression on other setups.
But I'll look it more carefully and test the other setups.

> Do we need it there?

Yes, if there is another better idea, it will be welcome.
For this moment, the diff is the best idea for me.

> I never ran into problems patching the correct policy.

Index: sys/netinet/ip_ipsp.c
===
RCS file: /disk/cvs/openbsd/src/sys/netinet/ip_ipsp.c,v
retrieving revision 1.264
diff -u -p -r1.264 ip_ipsp.c
--- sys/netinet/ip_ipsp.c   11 Dec 2021 16:33:47 -  1.264
+++ sys/netinet/ip_ipsp.c   14 Dec 2021 06:32:07 -
@@ -91,6 +91,8 @@ void  tdb_soft_timeout(void *);
 void   tdb_soft_firstuse(void *);
 inttdb_hash(u_int32_t, union sockaddr_union *, u_int8_t);
 void   tdb_dodelete(struct tdb *, int locked);
+intsockaddr_encap_match(struct sockaddr_encap *,
+   struct sockaddr_encap *, struct sockaddr_encap *);
 
 int ipsec_in_use = 0;
 u_int64_t ipsec_last_added = 0;
@@ -510,6 +512,78 @@ gettdbbysrc(u_int rdomain, union sockadd
tdb_ref(tdbp);
mtx_leave(_sadb_mtx);
return tdbp;
+}
+
+/*
+ * Get an SA given the flow, the direction, the security protocol type, and
+ * the desired IDs.
+ */
+struct tdb *
+gettdbbyflow(u_int rdomain, int direction, struct sockaddr_encap *senflow,
+u_int8_t sproto, struct ipsec_ids *ids)
+{
+   u_int32_t hashval;
+   struct tdb *tdbp;
+   union sockaddr_union srcdst;
+
+   if (ids == NULL)/* ids is mandatory */
+   return NULL;
+
+   memset(, 0, sizeof(srcdst));
+   switch (senflow->sen_type) {
+   case SENT_IP4:
+   srcdst.sin.sin_len = sizeof(srcdst.sin);
+   srcdst.sin.sin_family = AF_INET;
+   if (direction == IPSP_DIRECTION_OUT)
+   srcdst.sin.sin_addr = senflow->Sen.Sip4.Dst;
+   else
+   srcdst.sin.sin_addr = senflow->Sen.Sip4.Src;
+   break;
+   case SENT_IP6:
+   srcdst.sin6.sin6_len = sizeof(srcdst.sin6);
+   srcdst.sin6.sin6_family = AF_INET6;
+   if (direction == IPSP_DIRECTION_OUT)
+   srcdst.sin6.sin6_addr = senflow->Sen.Sip6.Dst;
+   else
+   srcdst.sin6.sin6_addr = senflow->Sen.Sip6.Src;
+   break;
+   }
+
+   mtx_enter(_sadb_mtx);
+   hashval = tdb_hash(0, , sproto);
+
+   for (tdbp = tdbdst[hashval]; tdbp != NULL; tdbp = tdbp->tdb_dnext)
+   if (tdbp->tdb_sproto == sproto &&
+   tdbp->tdb_rdomain == rdomain &&
+   (tdbp->tdb_flags & TDBF_INVALID) == 0 &&
+   ipsp_ids_match(ids, tdbp->tdb_ids) &&
+   ((direction == IPSP_DIRECTION_OUT &&
+   !memcmp(>tdb_dst, , srcdst.sa.sa_len)) ||
+   (direction == IPSP_DIRECTION_IN &&
+   !memcmp(>tdb_src, , srcdst.sa.sa_len {
+   if (sockaddr_encap_match(>tdb_filter,
+   >tdb_filtermask, senflow))
+   break;
+   }
+
+   tdb_ref(tdbp);
+   mtx_leave(_sadb_mtx);
+   return tdbp;
+}
+
+int
+sockaddr_encap_match(struct sockaddr_encap *addr, struct sockaddr_encap *mask,
+struct sockaddr_encap *dest)
+{
+   size_t  off;
+
+   for (off = offsetof(struct sockaddr_encap, sen_type);
+   off < dest->sen_len; off++) {
+   if ((*((u_char *)addr + off) & *((u_char *)mask + off)) !=
+   (*((u_char *)dest + off) & *((u_char *)mask + off)))
+   break;
+   }
+   return (off < dest->sen_len)? 0 : 1;
 }
 
 #ifdef DDB
Index: sys/netinet/ip_ipsp.h
===
RCS file: /disk/cvs/openbsd/src/sys/netinet/ip_ipsp.h,v
retrieving revision 1.230
diff -u -p -r1.230 ip_ipsp.h
--- sys/netinet/ip_ipsp.h   11 Dec 2021 16:33:47 -  1.230
+++ 

Re: Fix ipsp_spd_lookup() for transport mode

2021-12-01 Thread YASUOKA Masahiko
On Wed, 1 Dec 2021 00:27:06 +0100
Alexander Bluhm  wrote:
> On Tue, Nov 30, 2021 at 05:53:34PM +0300, Vitaliy Makkoveev wrote:
>> Hi,
>> 
>> This question is mostly for bluhm@. Should the gettdbbyflow() grab the
>> extra reference on returned `tdbp' like other other gettdb*() do? I'm
>> pointing this because we are going to not rely on the netlock when doing
>> `tdbp' dereference.
> 
> Yes.  Call tdb_ref(tdbp) withing the tdb_sadb_mtx mutex.
> 
> The interesting question is when to unref it.  You use the same
> variable for the tdb parameter and the tdb from gettdbbyflow().
> Tracking when you don't use the new TDB anymore, gets tricky.

Let me update the diff.  That grabs a reference now.

Also the diff fixes gettdbbyflow().  Comparing ids was missing.


Index: sys/netinet/ip_ipsp.c
===
RCS file: /disk/cvs/openbsd/src/sys/netinet/ip_ipsp.c,v
retrieving revision 1.258
diff -u -p -r1.258 ip_ipsp.c
--- sys/netinet/ip_ipsp.c   29 Nov 2021 19:19:00 -  1.258
+++ sys/netinet/ip_ipsp.c   1 Dec 2021 12:19:53 -
@@ -90,6 +90,8 @@ void  tdb_firstuse(void *);
 void   tdb_soft_timeout(void *);
 void   tdb_soft_firstuse(void *);
 inttdb_hash(u_int32_t, union sockaddr_union *, u_int8_t);
+intsockaddr_encap_match(struct sockaddr_encap *,
+   struct sockaddr_encap *, struct sockaddr_encap *);
 
 int ipsec_in_use = 0;
 u_int64_t ipsec_last_added = 0;
@@ -507,6 +509,78 @@ gettdbbysrc(u_int rdomain, union sockadd
tdb_ref(tdbp);
mtx_leave(_sadb_mtx);
return tdbp;
+}
+
+/*
+ * Get an SA given the flow, the direction, the security protocol type, and
+ * the desired IDs.
+ */
+struct tdb *
+gettdbbyflow(u_int rdomain, int direction, struct sockaddr_encap *senflow,
+u_int8_t sproto, struct ipsec_ids *ids)
+{
+   u_int32_t hashval;
+   struct tdb *tdbp;
+   union sockaddr_union srcdst;
+
+   if (ids == NULL)/* ids is mandatory */
+   return NULL;
+
+   memset(, 0, sizeof(srcdst));
+   switch (senflow->sen_type) {
+   case SENT_IP4:
+   srcdst.sin.sin_len = sizeof(srcdst.sin);
+   srcdst.sin.sin_family = AF_INET;
+   if (direction == IPSP_DIRECTION_OUT)
+   srcdst.sin.sin_addr = senflow->Sen.Sip4.Dst;
+   else
+   srcdst.sin.sin_addr = senflow->Sen.Sip4.Src;
+   break;
+   case SENT_IP6:
+   srcdst.sin6.sin6_len = sizeof(srcdst.sin6);
+   srcdst.sin6.sin6_family = AF_INET6;
+   if (direction == IPSP_DIRECTION_OUT)
+   srcdst.sin6.sin6_addr = senflow->Sen.Sip6.Dst;
+   else
+   srcdst.sin6.sin6_addr = senflow->Sen.Sip6.Src;
+   break;
+   }
+
+   mtx_enter(_sadb_mtx);
+   hashval = tdb_hash(0, , sproto);
+
+   for (tdbp = tdbdst[hashval]; tdbp != NULL; tdbp = tdbp->tdb_dnext)
+   if (tdbp->tdb_sproto == sproto &&
+   tdbp->tdb_rdomain == rdomain &&
+   (tdbp->tdb_flags & TDBF_INVALID) == 0 &&
+   ipsp_ids_match(ids, tdbp->tdb_ids) &&
+   ((direction == IPSP_DIRECTION_OUT &&
+   !memcmp(>tdb_dst, , srcdst.sa.sa_len)) ||
+   (direction == IPSP_DIRECTION_IN &&
+   !memcmp(>tdb_src, , srcdst.sa.sa_len {
+   if (sockaddr_encap_match(>tdb_filter,
+   >tdb_filtermask, senflow))
+   break;
+   }
+
+   tdb_ref(tdbp);
+   mtx_leave(_sadb_mtx);
+   return tdbp;
+}
+
+int
+sockaddr_encap_match(struct sockaddr_encap *addr, struct sockaddr_encap *mask,
+struct sockaddr_encap *dest)
+{
+   size_t  off;
+
+   for (off = offsetof(struct sockaddr_encap, sen_type);
+   off < dest->sen_len; off++) {
+   if ((*((u_char *)addr + off) & *((u_char *)mask + off)) !=
+   (*((u_char *)dest + off) & *((u_char *)mask + off)))
+   break;
+   }
+   return (off < dest->sen_len)? 0 : 1;
 }
 
 #ifdef DDB
Index: sys/netinet/ip_ipsp.h
===
RCS file: /disk/cvs/openbsd/src/sys/netinet/ip_ipsp.h,v
retrieving revision 1.224
diff -u -p -r1.224 ip_ipsp.h
--- sys/netinet/ip_ipsp.h   30 Nov 2021 13:17:43 -  1.224
+++ sys/netinet/ip_ipsp.h   1 Dec 2021 12:19:53 -
@@ -565,6 +565,8 @@ struct  tdb *gettdbbysrcdst_dir(u_int, u_
union sockaddr_union *, u_int8_t, int);
 #define gettdbbysrcdst(a,b,c,d,e) gettdbbysrcdst_dir((a),(b),(c),(d),(e),0)
 #define gettdbbysrcdst_rev(a,b,c,d,e) gettdbbysrcdst_dir((a),(b),(c),(d),(e),1)
+struct tdb *gettdbbyflow(u_int, int, struct sockaddr_encap *, u_int8_t,
+   struct ipsec_ids *);
 void   

Re: Fix ipsp_spd_lookup() for transport mode

2021-11-29 Thread YASUOKA Masahiko
Hi,

Let me update the diff.  Previous has a problem in ipsp_spd_lookup()
which uses "rn" without initialization.

On Sat, 20 Nov 2021 21:44:20 +0900 (JST)
YASUOKA Masahiko  wrote:
> On Wed, 12 May 2021 19:11:09 +0900 (JST)
> YASUOKA Masahiko  wrote:
>> Radek reported a problem to misc@ that multiple Windows clients behind
>> a NAT cannot use a L2TP/IPsec server simultaneously.
>> 
>> https://marc.info/?t=16099681611=1=2
>> 
>> There is two problems.  First is pipex(4) doesn't pass the proper
>> ipsecflowinfo to ip_output().  Second is the IPsec policy check which
>> is done by ipsp_spd_lookup() returns -1 (EINVAL) if the given tdb is
>> not cached.  This happens when its flow is shared by another tdb (for
>> another client of the same NAT).
> 
> This problem is not fixed yet.  The diff for the second problem was
> not committed in.  It was to fix the check in ipsp_spd_lookup() by
> making a IPsec policy have a list of IDs.
> 
> Also my colleague Kawai pointed out there is another problem if there
> is a Linux client among with Windows clients behind a NAT.  Windows
> uses 1701/udp for its local ID, but the Linux uses ANY/udp for its
> local ID.
> 
> In the situation, policies will be overlapped.
> 
>   (a) Windows:  REMOTE_IP:1701/udp <=> LOCAL_IP:1701/udp
>   (b) Linux:REMOTE_IP:ANY/udp  <=> LOCAL_IP:1701/udp
>   
> Since we use a radix tree for the policies, when rn_match() is used to
> find a policy, as it's best match, (b) is never selected.
> 
> Let me update the diff.
> 
> As for the incomming, we know the tdb when is used.  The diff uses the
> tdb to find the proper policy.
> 
> As for the outgoing, other than using "ipsecflowinfo" there is no way
> to select a proper policy.  So only when "ipsecflowinfo" is used, get
> a tdb from the packet flow and the IDs (retributed by the
> ipsecflowinfo), then we can find the proper policy by the tdb.
> 
> Also the diff skips the IDs check against the policy only if it is
> transport mode and using NAT-T.  Since when NAT-T is used for a policy
> for transport mode is shared by multiple clients which has a different
> IDs, checking the IDs is difficult and I think the checks other than
> is enough.
> 
> ok?  comments?
> 
> Fix some problems when accepting IPsec transport mode connections from
> multiple clients behind a NAT.  In the situation, policies can be
> overlapped, but previous could not choice a proper policy both for
> incoming and outgoing.  To solve this problem, use
> tdb->tdb_filter{,mask} to find a proper policy for incoming and find the
> tdb by the given ipsecflowinfo and use it for outgoing.  Also skip
> checking IDs of the policy since a policy is shared by multiple clients
> in the situation.

Index: sys/netinet/ip_ipsp.c
===
RCS file: /disk/cvs/openbsd/src/sys/netinet/ip_ipsp.c,v
retrieving revision 1.258
diff -u -p -r1.258 ip_ipsp.c
--- sys/netinet/ip_ipsp.c   29 Nov 2021 19:19:00 -  1.258
+++ sys/netinet/ip_ipsp.c   30 Nov 2021 04:44:48 -
@@ -90,6 +90,8 @@ void  tdb_firstuse(void *);
 void   tdb_soft_timeout(void *);
 void   tdb_soft_firstuse(void *);
 inttdb_hash(u_int32_t, union sockaddr_union *, u_int8_t);
+intsockaddr_encap_match(struct sockaddr_encap *,
+   struct sockaddr_encap *, struct sockaddr_encap *);
 
 int ipsec_in_use = 0;
 u_int64_t ipsec_last_added = 0;
@@ -507,6 +509,76 @@ gettdbbysrc(u_int rdomain, union sockadd
tdb_ref(tdbp);
mtx_leave(_sadb_mtx);
return tdbp;
+}
+
+/*
+ * Get an SA given the flow, the direction, the security protocol type, and
+ * the desired IDs.
+ */
+struct tdb *
+gettdbbyflow(u_int rdomain, int direction, struct sockaddr_encap *senflow,
+u_int8_t sproto, struct ipsec_ids *ids)
+{
+   u_int32_t hashval;
+   struct tdb *tdbp;
+   union sockaddr_union srcdst;
+
+   if (ids == NULL)/* ids is mandatory */
+   return NULL;
+
+   memset(, 0, sizeof(srcdst));
+   switch (senflow->sen_type) {
+   case SENT_IP4:
+   srcdst.sin.sin_len = sizeof(srcdst.sin);
+   srcdst.sin.sin_family = AF_INET;
+   if (direction == IPSP_DIRECTION_OUT)
+   srcdst.sin.sin_addr = senflow->Sen.Sip4.Dst;
+   else
+   srcdst.sin.sin_addr = senflow->Sen.Sip4.Src;
+   break;
+   case SENT_IP6:
+   srcdst.sin6.sin6_len = sizeof(srcdst.sin6);
+   srcdst.sin6.sin6_family = AF_INET6;
+   if (direction == IPSP_DIRECTION_OUT)
+   srcdst.sin6.sin6_addr = senflow->Sen.Sip6.Dst;
+   el

Fix ipsp_spd_lookup() for transport mode (was Re: Fix IPsec NAT-T for L2TP/IPsec)

2021-11-20 Thread YASUOKA Masahiko
Hi,

On Wed, 12 May 2021 19:11:09 +0900 (JST)
YASUOKA Masahiko  wrote:
> Radek reported a problem to misc@ that multiple Windows clients behind
> a NAT cannot use a L2TP/IPsec server simultaneously.
> 
> https://marc.info/?t=16099681611=1=2
> 
> There is two problems.  First is pipex(4) doesn't pass the proper
> ipsecflowinfo to ip_output().  Second is the IPsec policy check which
> is done by ipsp_spd_lookup() returns -1 (EINVAL) if the given tdb is
> not cached.  This happens when its flow is shared by another tdb (for
> another client of the same NAT).

This problem is not fixed yet.  The diff for the second problem was
not committed in.  It was to fix the check in ipsp_spd_lookup() by
making a IPsec policy have a list of IDs.

Also my colleague Kawai pointed out there is another problem if there
is a Linux client among with Windows clients behind a NAT.  Windows
uses 1701/udp for its local ID, but the Linux uses ANY/udp for its
local ID.

In the situation, policies will be overlapped.

  (a) Windows:  REMOTE_IP:1701/udp <=> LOCAL_IP:1701/udp
  (b) Linux:REMOTE_IP:ANY/udp  <=> LOCAL_IP:1701/udp
  
Since we use a radix tree for the policies, when rn_match() is used to
find a policy, as it's best match, (b) is never selected.

Let me update the diff.

As for the incomming, we know the tdb when is used.  The diff uses the
tdb to find the proper policy.

As for the outgoing, other than using "ipsecflowinfo" there is no way
to select a proper policy.  So only when "ipsecflowinfo" is used, get
a tdb from the packet flow and the IDs (retributed by the
ipsecflowinfo), then we can find the proper policy by the tdb.

Also the diff skips the IDs check against the policy only if it is
transport mode and using NAT-T.  Since when NAT-T is used for a policy
for transport mode is shared by multiple clients which has a different
IDs, checking the IDs is difficult and I think the checks other than
is enough.

ok?  comments?

Fix some problems when accepting IPsec transport mode connections from
multiple clients behind a NAT.  In the situation, policies can be
overlapped, but previous could not choice a proper policy both for
incoming and outgoing.  To solve this problem, use
tdb->tdb_filter{,mask} to find a proper policy for incoming and find the
tdb by the given ipsecflowinfo and use it for outgoing.  Also skip
checking IDs of the policy since a policy is shared by multiple clients
in the situation.

Index: sys/netinet/ip_ipsp.c
===
RCS file: /cvs/src/sys/netinet/ip_ipsp.c,v
retrieving revision 1.251
diff -u -p -r1.251 ip_ipsp.c
--- sys/netinet/ip_ipsp.c   18 Nov 2021 11:04:10 -  1.251
+++ sys/netinet/ip_ipsp.c   20 Nov 2021 12:42:36 -
@@ -91,6 +91,8 @@ void  tdb_firstuse(void *);
 void   tdb_soft_timeout(void *);
 void   tdb_soft_firstuse(void *);
 inttdb_hash(u_int32_t, union sockaddr_union *, u_int8_t);
+intsockaddr_encap_match(struct sockaddr_encap *,
+   struct sockaddr_encap *, struct sockaddr_encap *);
 
 int ipsec_in_use = 0;
 u_int64_t ipsec_last_added = 0;
@@ -501,6 +503,76 @@ gettdbbysrc(u_int rdomain, union sockadd
 
mtx_leave(_sadb_mtx);
return tdbp;
+}
+
+/*
+ * Get an SA given the flow, the direction, the security protocol type, and
+ * the desired IDs.
+ */
+struct tdb *
+gettdbbyflow(u_int rdomain, int direction, struct sockaddr_encap *senflow,
+u_int8_t sproto, struct ipsec_ids *ids)
+{
+   u_int32_t hashval;
+   struct tdb *tdbp;
+   union sockaddr_union srcdst;
+
+   if (ids == NULL)/* ids is mandatory */
+   return NULL;
+
+   memset(, 0, sizeof(srcdst));
+   switch (senflow->sen_type) {
+   case SENT_IP4:
+   srcdst.sin.sin_len = sizeof(srcdst.sin);
+   srcdst.sin.sin_family = AF_INET;
+   if (direction == IPSP_DIRECTION_OUT)
+   srcdst.sin.sin_addr = senflow->Sen.Sip4.Dst;
+   else
+   srcdst.sin.sin_addr = senflow->Sen.Sip4.Src;
+   break;
+   case SENT_IP6:
+   srcdst.sin6.sin6_len = sizeof(srcdst.sin6);
+   srcdst.sin6.sin6_family = AF_INET6;
+   if (direction == IPSP_DIRECTION_OUT)
+   srcdst.sin6.sin6_addr = senflow->Sen.Sip6.Dst;
+   else
+   srcdst.sin6.sin6_addr = senflow->Sen.Sip6.Src;
+   break;
+   }
+
+   mtx_enter(_sadb_mtx);
+   hashval = tdb_hash(0, , sproto);
+
+   for (tdbp = tdbdst[hashval]; tdbp != NULL; tdbp = tdbp->tdb_dnext)
+   if (tdbp->tdb_sproto == sproto &&
+   tdbp->tdb_rdomain == rdomain &&
+   (tdbp->tdb_flags & TDBF_INVALID) == 0 &&
+   ((directi

Re: diff: ipsec.conf(5), clarify "aes" accepts 128:256 bits

2021-11-02 Thread YASUOKA Masahiko
Hi,

On Tue, 2 Nov 2021 07:03:43 +
Jason McIntyre  wrote:
> On Tue, Nov 02, 2021 at 12:02:07PM +0900, YASUOKA Masahiko wrote:
>> I'd like to clarify "aes" in ipsec.conf accepts 128:256 bits.
>> 
>> sbin/ipsecctl/ike.c:
>> 201 case ENCXF_AES:
>> 202 enc_alg = "AES";
>> 203 key_length = "128,128:256";
>> 204 break;
>> 
>> 
>> ok?
>> 
>> Clarify "aes" will accept keys which length is in 128:256 bits.
>> 
> 
> i notice that the enc lists in ipsec.conf.5 and iked.conf.5 differ.
> aren;t they supposed to be in sync?
> 
> for example, iked.conf.5 doesn;t mention "aes" or "aesctr". also the
> *-gmac and *-gcm-12 discrepancy.

As for "aes", *only isakmpd(8)* supports "aes" keyword or having a
range for the key length.  So there isn't need to sync it to
iked.conf.5.

Also I belive "aesctr" is to support 160:288 range for key length, but
the implemention doesn't seem to be completed.  I have another plan to
handle this separately, then I'll update the man page.


Other than the key length range, it seems there are some differences
between iked.conf.5 and ipsec.conf.5.

1. "-gcm-12" 
   missing this in ipsec.conf.5 is ok since isakmpd(8) doesn't support
   it yet.  (It is actually an alias ID for "-gcm" though.)

2. "-gmac" and "null"
   iked.conf.5 has a separeted list for them to clarify they don't do
   encryption.  Applied the same to isakmpd.conf.5.

3. "chacha20-poly1305"
   It is missing in ipsec.conf.5.

4. explanation of "[IKE only]" or "[phase 2]"
   It is missing in ipsec.conf.5.  Copied the section from iked.conf
   and modified it.

5. explanation of "keysize" for AES-CTR and so on
   The explanation in ipsec.conf.5 is better.  Copied that to
   iked.conf.5.

6. "cast"
   ipsecctl(8) program doesn't support "cast" keyword actually,
   it supports "cast128" instead.  Correct "cast" to "cast128"


ok?

Index: sbin/iked/iked.conf.5
===
RCS file: /cvs/src/sbin/iked/iked.conf.5,v
retrieving revision 1.87
diff -u -p -r1.87 iked.conf.5
--- sbin/iked/iked.conf.5   26 Oct 2021 17:31:22 -  1.87
+++ sbin/iked/iked.conf.5   3 Nov 2021 05:42:48 -
@@ -998,9 +998,9 @@ keyword.
 3DES requires 24 bytes to form its 168-bit key.
 This is because the most significant bit of each byte is used for parity.
 .Pp
-The keysize of AES-CTR is actually 128-bit.
+The keysize of AES-CTR can be 128, 192, or 256 bits.
 However as well as the key, a 32-bit nonce has to be supplied.
-Thus 160 bits of key material have to be supplied.
+Thus 160, 224, or 288 bits of key material, respectively, have to be supplied.
 The same applies to AES-GCM, AES-GMAC and Chacha20-Poly1305,
 however in the latter case the keysize is 256 bit.
 .Pp
Index: sbin/ipsecctl/ipsec.conf.5
===
RCS file: /cvs/src/sbin/ipsecctl/ipsec.conf.5,v
retrieving revision 1.160
diff -u -p -r1.160 ipsec.conf.5
--- sbin/ipsecctl/ipsec.conf.5  22 Oct 2021 12:30:54 -  1.160
+++ sbin/ipsecctl/ipsec.conf.5  3 Nov 2021 05:42:49 -
@@ -637,10 +637,10 @@ keyword:
 The following cipher types are permitted with the
 .Ic enc
 keyword:
-.Bl -column "aes-128-gmac" "Key Length" "Description" -offset indent
+.Bl -column "chacha20-poly1305" "128-256 bits" "Description" -offset indent
 .It Em "Cipher" Ta Em "Key Length" Ta ""
 .It Li 3des Ta "168 bits" Ta ""
-.It Li aes Ta "128 bits" Ta ""
+.It Li aes Ta "128-256 bits" Ta ""
 .It Li aes-128 Ta "128 bits" Ta ""
 .It Li aes-192 Ta "192 bits" Ta ""
 .It Li aes-256 Ta "256 bits" Ta ""
@@ -651,21 +651,37 @@ keyword:
 .It Li aes-128-gcm Ta "160 bits" Ta "[phase 2 only, IKE only]"
 .It Li aes-192-gcm Ta "224 bits" Ta "[phase 2 only, IKE only]"
 .It Li aes-256-gcm Ta "288 bits" Ta "[phase 2 only, IKE only]"
+.It Li blowfish Ta "160 bits" Ta ""
+.It Li cast128 Ta "128 bits" Ta ""
+.It Li chacha20-poly1305 Ta "288 bits" Ta ""
+.El
+.Pp
+The following cipher types provide only authentication, not encryption:
+.Bl -column "chacha20-poly1305" "128-256 bits" "Description" -offset indent
 .It Li aes-128-gmac Ta "160 bits" Ta "[phase 2 only, IKE only]"
 .It Li aes-192-g

diff: isakmpd.conf.5, clarify ANY can be used for some params

2021-11-01 Thread YASUOKA Masahiko
ok?

Clarify that ANY can be used for several parameters of IPsec transform.

Index: sbin/isakmpd/isakmpd.conf.5
===
RCS file: /cvs/src/sbin/isakmpd/isakmpd.conf.5,v
retrieving revision 1.135
diff -u -p -r1.135 isakmpd.conf.5
--- sbin/isakmpd/isakmpd.conf.5 17 Apr 2018 12:13:29 -  1.135
+++ sbin/isakmpd/isakmpd.conf.5 2 Nov 2021 02:57:23 -
@@ -726,7 +726,7 @@ See below.
 Parameters for IPsec transform configuration
 .Bl -tag -width Ds
 .It Em AUTHENTICATION_ALGORITHM
-The optional authentication algorithm in the case of this
+The optional authentication algorithm or ANY in the case of this
 being an ESP transform.
 .It Em ENCAPSULATION_MODE
 The encapsulation mode as given by the RFCs.
@@ -745,7 +745,8 @@ List of lifetimes, each element is a
 .Aq Sy Lifetime
 section name.
 .It Em TRANSFORM_ID
-The transform ID as given by the RFCs.
+The transform ID as given by the RFCs, or ANY to denote that any
+transform proposed will be accepted.
 .El
 .It Aq Sy IPsec-ID
 Parameters for IPsec ID configuration



diff: ipsec.conf(5), clarify "aes" accepts 128:256 bits

2021-11-01 Thread YASUOKA Masahiko
I'd like to clarify "aes" in ipsec.conf accepts 128:256 bits.

sbin/ipsecctl/ike.c:
201 case ENCXF_AES:
202 enc_alg = "AES";
203 key_length = "128,128:256";
204 break;


ok?

Clarify "aes" will accept keys which length is in 128:256 bits.

Index: sbin/ipsecctl/ipsec.conf.5
===
RCS file: /cvs/src/sbin/ipsecctl/ipsec.conf.5,v
retrieving revision 1.160
diff -u -p -r1.160 ipsec.conf.5
--- sbin/ipsecctl/ipsec.conf.5  22 Oct 2021 12:30:54 -  1.160
+++ sbin/ipsecctl/ipsec.conf.5  2 Nov 2021 02:58:13 -
@@ -637,10 +637,10 @@ keyword:
 The following cipher types are permitted with the
 .Ic enc
 keyword:
-.Bl -column "aes-128-gmac" "Key Length" "Description" -offset indent
+.Bl -column "aes-128-gmac" "128-256 bits" "Description" -offset indent
 .It Em "Cipher" Ta Em "Key Length" Ta ""
 .It Li 3des Ta "168 bits" Ta ""
-.It Li aes Ta "128 bits" Ta ""
+.It Li aes Ta "128-256 bits" Ta ""
 .It Li aes-128 Ta "128 bits" Ta ""
 .It Li aes-192 Ta "192 bits" Ta ""
 .It Li aes-256 Ta "256 bits" Ta ""



Re: Exit status of pkg_add

2021-10-18 Thread YASUOKA Masahiko
Hi,

# drop ccing misc@

The diff seems ok for me.

ok to commit it in?

On Tue, 19 Oct 2021 10:42:04 +0900
Yuichiro NAITO  wrote:
> Following patch changes pkg_add to return a error code,
> if a package name is wrong.
> 
> diff --git a/usr.sbin/pkg_add/OpenBSD/AddDelete.pm
> b/usr.sbin/pkg_add/OpenBSD/AddDelete.pm
> index 7a968cbf05d..39bee874ff1 100644
> --- a/usr.sbin/pkg_add/OpenBSD/AddDelete.pm
> +++ b/usr.sbin/pkg_add/OpenBSD/AddDelete.pm
> @@ -403,12 +403,13 @@ sub check_root
>  sub choose_location
>  {
>   my ($state, $name, $list, $is_quirks) = @_;
>   if (@$list == 0) {
>   if (!$is_quirks) {
>   $state->errsay("Can't find #1", $name);
> + $state->{bad}++;
>   $state->run_quirks(
>   sub {
>   my $quirks = shift;
>   $quirks->filter_obsolete([$name], $state);
>   });
>   }
> 
> Is it OK?
> 
> On 10/18/21 16:53, Yuichiro NAITO wrote:
>> Hi, I have a question about exit status of pkg_add command.
>> When I wrote a package install script which included typo in a package
>> name
>> (of course it's my fault), the script didn't stop in spite of `set
>> -e`.
>> Because pkg_add command returns 0 even if a package name is wrong.
>> Is this exit status intended or design policy of pkg_add command?
>> If not, I want a error status getting returned.
>> It will save my time to look for a typo or similar bug.
>> I can't see 'EXIT STATUS' section in the pkg_add manual of OpenBSD
>> 7.0.
>> So, I e-mailed this question.
>> 
> 
> -- 
> Yuichiro NAITO (naito.yuich...@gmail.com)
> 



Re: Fix IPsec NAT-T for L2TP/IPsec

2021-05-12 Thread YASUOKA Masahiko
On Wed, 12 May 2021 19:11:09 +0900 (JST)
YASUOKA Masahiko  wrote:
> Radek reported a problem to misc@ that multiple Windows clients behind
> a NAT cannot use a L2TP/IPsec server simultaneously.
> 
> https://marc.info/?t=16099681611=1=2
> 
> There is two problems.  First is pipex(4) doesn't pass the proper
> ipsecflowinfo to ip_output().  Second is the IPsec policy check which
> is done by ipsp_spd_lookup() returns -1 (EINVAL) if the given tdb is
> not cached.  This happens when its flow is shared by another tdb (for
> another client of the same NAT).
> 
> The following 2 diffs fix these problem.
> 
> comment?
> ok?
> 
> diff #1
> 
> Fix IPsec NAT-T work with pipex.

The original diff #1 used m_tag to specify the ipsecflowinfo.

I noticed "ph_cookie" is usable instead of the m_tag.  It seems simpler.

Is it better?

Index: sys/net/if_etherip.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/if_etherip.c,v
retrieving revision 1.48
diff -u -p -r1.48 if_etherip.c
--- sys/net/if_etherip.c9 Jan 2021 21:00:58 -   1.48
+++ sys/net/if_etherip.c12 May 2021 23:29:41 -
@@ -547,7 +547,7 @@ ip_etherip_output(struct ifnet *ifp, str
etheripstat_pkt(etherips_opackets, etherips_obytes, m->m_pkthdr.len -
(sizeof(struct ip) + sizeof(struct etherip_header)));
 
-   ip_send(m);
+   ip_send(m, 0);
 
return (0);
 }
Index: sys/net/if_gif.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/if_gif.c,v
retrieving revision 1.132
diff -u -p -r1.132 if_gif.c
--- sys/net/if_gif.c20 Feb 2021 04:58:29 -  1.132
+++ sys/net/if_gif.c12 May 2021 23:29:45 -
@@ -340,7 +340,7 @@ gif_send(struct gif_softc *sc, struct mb
ip->ip_src = sc->sc_tunnel.t_src4;
ip->ip_dst = sc->sc_tunnel.t_dst4;
 
-   ip_send(m);
+   ip_send(m, 0);
break;
}
 #ifdef INET6
Index: sys/net/if_gre.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/if_gre.c,v
retrieving revision 1.171
diff -u -p -r1.171 if_gre.c
--- sys/net/if_gre.c10 Mar 2021 10:21:47 -  1.171
+++ sys/net/if_gre.c12 May 2021 23:29:52 -
@@ -1999,7 +1999,7 @@ gre_ip_output(const struct gre_tunnel *t
 
switch (tunnel->t_af) {
case AF_INET:
-   ip_send(m);
+   ip_send(m, 0);
break;
 #ifdef INET6
case AF_INET6:
Index: sys/net/pf.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/pf.c,v
retrieving revision 1.1116
diff -u -p -r1.1116 pf.c
--- sys/net/pf.c27 Apr 2021 09:38:29 -  1.1116
+++ sys/net/pf.c12 May 2021 23:29:56 -
@@ -2896,7 +2896,7 @@ pf_send_tcp(const struct pf_rule *r, sa_
 
switch (af) {
case AF_INET:
-   ip_send(m);
+   ip_send(m, 0);
break;
 #ifdef INET6
case AF_INET6:
Index: sys/net/pipex.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/pipex.c,v
retrieving revision 1.132
diff -u -p -r1.132 pipex.c
--- sys/net/pipex.c 10 Mar 2021 10:21:48 -  1.132
+++ sys/net/pipex.c 12 May 2021 23:31:24 -
@@ -1258,7 +1258,7 @@ pipex_pptp_output(struct mbuf *m0, struc
gre->flags = htons(gre->flags);
 
m0->m_pkthdr.ph_ifidx = session->ifindex;
-   ip_send(m0);
+   ip_send(m0, 0);
if (len > 0) {  /* network layer only */
/* countup statistics */
session->stat.opackets++;
@@ -1704,7 +1704,7 @@ pipex_l2tp_output(struct mbuf *m0, struc
ip->ip_tos = 0;
ip->ip_off = 0;
 
-   ip_send(m0);
+   ip_send(m0, session->proto.l2tp.ipsecflowinfo);
break;
 #ifdef INET6
case AF_INET6:
Index: sys/netinet/ip_icmp.c
===
RCS file: /disk/cvs/openbsd/src/sys/netinet/ip_icmp.c,v
retrieving revision 1.186
diff -u -p -r1.186 ip_icmp.c
--- sys/netinet/ip_icmp.c   30 Mar 2021 08:37:10 -  1.186
+++ sys/netinet/ip_icmp.c   12 May 2021 23:31:57 -
@@ -860,7 +860,7 @@ icmp_send(struct mbuf *m, struct mbuf *o
ipstat_inc(ips_localout);
ip_send_raw(m);
} else
-   ip_send(m);
+   ip_send(m, 0);
 }
 
 u_int32_t
Index: sys/netinet/ip_input.c
===
RCS file: /disk/cvs/openbsd/src/sys/netinet/ip_input.c,v
retrieving revision 1.359
diff -u -p -r1.359 ip_input.c
--- sys/netinet/ip_input.c  30 Apr 2021 13:52:48 -  1.359
+++ sys/n

Re: Fix IPsec NAT-T for L2TP/IPsec

2021-05-12 Thread YASUOKA Masahiko
On Wed, 12 May 2021 19:15:29 +0300
Vitaliy Makkoveev  wrote:
>> On 12 May 2021, at 18:42, YASUOKA Masahiko  wrote:
>> On Wed, 12 May 2021 17:26:51 +0300
>> Vitaliy Makkoveev  wrote:
>>> On Wed, May 12, 2021 at 07:11:09PM +0900, YASUOKA Masahiko wrote:
>>>> Radek reported a problem to misc@ that multiple Windows clients behind a 
>>>> NAT
>>>> cannot use a L2TP/IPsec server simultaneously.
>>>> 
>>>> https://marc.info/?t=16099681611=1=2
>>>> 
>>>> There is two problems.  First is pipex(4) doesn't pass the proper
>>>> ipsecflowinfo to ip_output().  Second is the IPsec policy check which is
>>>> done by ipsp_spd_lookup() returns -1 (EINVAL) if the given tdb is not
>>>> cached.  This happens when its flow is shared by another tdb (for another
>>>> client of the same NAT).
>>>> 
>>>> The following 2 diffs fix these problem.
>>>> 
>>>> comment?
>>>> ok?
>>>> 
>>> 
>>> Hi.
>>> 
>>> I have two comments for the diff 1:
>>> 
>>> 1. You should add PACKET_TAG_IPSEC_FLOWINFO description to
>>>m_tag_get(9).
>>> 2. You introduced mbuf(9) leak in pipex_l2tp_output() error path. I
>>>   pointed the place in your diff.
>> 
>> Good catch.  Thanks.
>> 
> 
> m_freem(9) accepts NULL so this check before is redundant.

Yes,

> It seems to me that "Used by the IPv4 stack to specify the IPsec flow
> of an output IP packet. The tag contains a u_int32_t identifying the
> IPsec flow.” is enough. Anyway it’s better to ask jmc@.

Ok,

> Also I like to remove PACKET_TAG_PIPEX with separate diff.

I removed PACKET_TAG_PIPEX separetely.  

Let me update the diff.

Index: sys/net/pipex.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/pipex.c,v
retrieving revision 1.132
diff -u -p -r1.132 pipex.c
--- sys/net/pipex.c 10 Mar 2021 10:21:48 -  1.132
+++ sys/net/pipex.c 12 May 2021 23:18:52 -
@@ -1628,6 +1628,7 @@ pipex_l2tp_output(struct mbuf *m0, struc
 #ifdef INET6
struct ip6_hdr *ip6;
 #endif
+   struct m_tag *mtag;
 
hlen = sizeof(struct pipex_l2tp_header) +
((pipex_session_is_l2tp_data_sequencing_on(session))
@@ -1704,6 +1705,15 @@ pipex_l2tp_output(struct mbuf *m0, struc
ip->ip_tos = 0;
ip->ip_off = 0;
 
+   if (session->proto.l2tp.ipsecflowinfo > 0) {
+   if ((mtag = m_tag_get(PACKET_TAG_IPSEC_FLOWINFO,
+   sizeof(u_int32_t), M_NOWAIT)) == NULL)
+   goto drop;
+   *(u_int32_t *)(mtag + 1) =
+   session->proto.l2tp.ipsecflowinfo;
+   m_tag_prepend(m0, mtag);
+   }
+
ip_send(m0);
break;
 #ifdef INET6
@@ -1733,6 +1743,7 @@ pipex_l2tp_output(struct mbuf *m0, struc
 
return;
 drop:
+   m_freem(m0);
session->stat.oerrors++;
 }
 
Index: sys/netinet/ip_input.c
===
RCS file: /disk/cvs/openbsd/src/sys/netinet/ip_input.c,v
retrieving revision 1.359
diff -u -p -r1.359 ip_input.c
--- sys/netinet/ip_input.c  30 Apr 2021 13:52:48 -  1.359
+++ sys/netinet/ip_input.c  12 May 2021 23:18:52 -
@@ -1790,6 +1790,8 @@ ip_send_do_dispatch(void *xmq, int flags
struct mbuf_queue *mq = xmq;
struct mbuf *m;
struct mbuf_list ml;
+   struct m_tag *mtag;
+   u_int32_t ipsecflowinfo = 0;
 
mq_delist(mq, );
if (ml_empty())
@@ -1797,7 +1799,12 @@ ip_send_do_dispatch(void *xmq, int flags
 
NET_LOCK();
while ((m = ml_dequeue()) != NULL) {
-   ip_output(m, NULL, NULL, flags, NULL, NULL, 0);
+   if ((mtag = m_tag_find(m, PACKET_TAG_IPSEC_FLOWINFO, NULL))
+   != NULL) {
+   ipsecflowinfo = *(u_int32_t *)(mtag + 1);
+   m_tag_delete(m, mtag);
+   }
+   ip_output(m, NULL, NULL, flags, NULL, NULL, ipsecflowinfo);
}
NET_UNLOCK();
 }
Index: sys/sys/mbuf.h
===
RCS file: /disk/cvs/openbsd/src/sys/sys/mbuf.h,v
retrieving revision 1.252
diff -u -p -r1.252 mbuf.h
--- sys/sys/mbuf.h  25 Feb 2021 02:43:31 -  1.252
+++ sys/sys/mbuf.h  12 May 2021 23:18:52 -
@@ -469,6 +469,7 @@ struct m_tag *m_tag_next(struct mbuf *, 
 /* Packet tag types */
 #define PACKET_TAG_IPSEC_IN_DONE   0x0001  /* IPsec applied, in */
 #define PACKET_TAG_IPSEC_OUT_DONE  0x0002  /* IPsec applied, out */
+#define PACKET_TAG_IPSEC_FLOWINFO   

Re: Fix IPsec NAT-T for L2TP/IPsec

2021-05-12 Thread YASUOKA Masahiko
On Wed, 12 May 2021 17:26:51 +0300
Vitaliy Makkoveev  wrote:
> On Wed, May 12, 2021 at 07:11:09PM +0900, YASUOKA Masahiko wrote:
>> Radek reported a problem to misc@ that multiple Windows clients behind a NAT
>> cannot use a L2TP/IPsec server simultaneously.
>> 
>> https://marc.info/?t=16099681611=1=2
>> 
>> There is two problems.  First is pipex(4) doesn't pass the proper
>> ipsecflowinfo to ip_output().  Second is the IPsec policy check which is
>> done by ipsp_spd_lookup() returns -1 (EINVAL) if the given tdb is not
>> cached.  This happens when its flow is shared by another tdb (for another
>> client of the same NAT).
>> 
>> The following 2 diffs fix these problem.
>> 
>> comment?
>> ok?
>> 
> 
> Hi.
> 
> I have two comments for the diff 1:
> 
> 1. You should add PACKET_TAG_IPSEC_FLOWINFO description to
> m_tag_get(9).
> 2. You introduced mbuf(9) leak in pipex_l2tp_output() error path. I
>pointed the place in your diff.

Good catch.  Thanks.


Let me update the diff.

Index: sys/net/pipex.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/pipex.c,v
retrieving revision 1.132
diff -u -p -r1.132 pipex.c
--- sys/net/pipex.c 10 Mar 2021 10:21:48 -  1.132
+++ sys/net/pipex.c 12 May 2021 15:33:33 -
@@ -1628,6 +1628,7 @@ pipex_l2tp_output(struct mbuf *m0, struc
 #ifdef INET6
struct ip6_hdr *ip6;
 #endif
+   struct m_tag *mtag;
 
hlen = sizeof(struct pipex_l2tp_header) +
((pipex_session_is_l2tp_data_sequencing_on(session))
@@ -1704,6 +1705,15 @@ pipex_l2tp_output(struct mbuf *m0, struc
ip->ip_tos = 0;
ip->ip_off = 0;
 
+   if (session->proto.l2tp.ipsecflowinfo > 0) {
+   if ((mtag = m_tag_get(PACKET_TAG_IPSEC_FLOWINFO,
+   sizeof(u_int32_t), M_NOWAIT)) == NULL)
+   goto drop;
+   *(u_int32_t *)(mtag + 1) =
+   session->proto.l2tp.ipsecflowinfo;
+   m_tag_prepend(m0, mtag);
+   }
+
ip_send(m0);
break;
 #ifdef INET6
@@ -1733,6 +1743,8 @@ pipex_l2tp_output(struct mbuf *m0, struc
 
return;
 drop:
+   if (m0 != NULL)
+   m_freem(m0);
session->stat.oerrors++;
 }
 
Index: sys/netinet/ip_input.c
===
RCS file: /disk/cvs/openbsd/src/sys/netinet/ip_input.c,v
retrieving revision 1.359
diff -u -p -r1.359 ip_input.c
--- sys/netinet/ip_input.c  30 Apr 2021 13:52:48 -  1.359
+++ sys/netinet/ip_input.c  12 May 2021 15:31:52 -
@@ -1790,6 +1790,8 @@ ip_send_do_dispatch(void *xmq, int flags
struct mbuf_queue *mq = xmq;
struct mbuf *m;
struct mbuf_list ml;
+   struct m_tag *mtag;
+   u_int32_t ipsecflowinfo = 0;
 
mq_delist(mq, );
if (ml_empty())
@@ -1797,7 +1799,12 @@ ip_send_do_dispatch(void *xmq, int flags
 
NET_LOCK();
while ((m = ml_dequeue()) != NULL) {
-   ip_output(m, NULL, NULL, flags, NULL, NULL, 0);
+   if ((mtag = m_tag_find(m, PACKET_TAG_IPSEC_FLOWINFO, NULL))
+   != NULL) {
+   ipsecflowinfo = *(u_int32_t *)(mtag + 1);
+   m_tag_delete(m, mtag);
+   }
+   ip_output(m, NULL, NULL, flags, NULL, NULL, ipsecflowinfo);
}
NET_UNLOCK();
 }
Index: sys/sys/mbuf.h
===
RCS file: /disk/cvs/openbsd/src/sys/sys/mbuf.h,v
retrieving revision 1.252
diff -u -p -r1.252 mbuf.h
--- sys/sys/mbuf.h  25 Feb 2021 02:43:31 -  1.252
+++ sys/sys/mbuf.h  12 May 2021 15:31:52 -
@@ -469,6 +469,7 @@ struct m_tag *m_tag_next(struct mbuf *, 
 /* Packet tag types */
 #define PACKET_TAG_IPSEC_IN_DONE   0x0001  /* IPsec applied, in */
 #define PACKET_TAG_IPSEC_OUT_DONE  0x0002  /* IPsec applied, out */
+#define PACKET_TAG_IPSEC_FLOWINFO  0x0004  /* IPsec flowinfo */
 #define PACKET_TAG_WIREGUARD   0x0040  /* WireGuard data */
 #define PACKET_TAG_GRE 0x0080  /* GRE processing done */
 #define PACKET_TAG_DLT 0x0100 /* data link layer type */
@@ -479,7 +480,7 @@ struct m_tag *m_tag_next(struct mbuf *, 
 #define PACKET_TAG_CARP_BAL_IP 0x4000  /* carp(4) ip balanced marker */
 
 #define MTAG_BITS \
-("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_IN_CRYPTO_DONE" \
+("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_FLOWINFO" \
 "\4IPSEC_OUT_CRYPTO_NEEDED\5IPSEC_PENDING_TDB\6BRIDGE\7WG\10GRE\11DLT" \
 "\12PF_DIVERT\14PF_REASSEMBLED\15SRCROUTE\16TUNNEL\17CARP_BAL_IP")
 
Index: share/man/man9

Fix IPsec NAT-T for L2TP/IPsec

2021-05-12 Thread YASUOKA Masahiko

Hi,

Radek reported a problem to misc@ that multiple Windows clients 
behind a NAT cannot use a L2TP/IPsec server simultaneously.


https://marc.info/?t=16099681611=1=2

There is two problems.  First is pipex(4) doesn't pass the proper 
ipsecflowinfo to ip_output().  Second is the IPsec policy check which 
is done by ipsp_spd_lookup() returns -1 (EINVAL) if the given tdb is 
not cached.  This happens when its flow is shared by another tdb (for 
another client of the same NAT).


The following 2 diffs fix these problem.

comment?
ok?

diff #1

Fix IPsec NAT-T work with pipex.

Index: sys/net/pipex.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/pipex.c,v
retrieving revision 1.132
diff -u -p -r1.132 pipex.c
--- sys/net/pipex.c 10 Mar 2021 10:21:48 -  1.132
+++ sys/net/pipex.c 12 May 2021 09:38:32 -
@@ -1628,6 +1628,7 @@ pipex_l2tp_output(struct mbuf *m0, struc
 #ifdef INET6
struct ip6_hdr *ip6;
 #endif
+   struct m_tag *mtag;

hlen = sizeof(struct pipex_l2tp_header) +
((pipex_session_is_l2tp_data_sequencing_on(session))
@@ -1703,6 +1704,15 @@ pipex_l2tp_output(struct mbuf *m0, struc
ip->ip_ttl = MAXTTL;
ip->ip_tos = 0;
ip->ip_off = 0;
+
+   if (session->proto.l2tp.ipsecflowinfo > 0) {
+			if ((mtag = 
m_tag_get(PACKET_TAG_IPSEC_FLOWINFO,

+   sizeof(u_int32_t), M_NOWAIT)) == NULL)
+   goto drop;
+   *(u_int32_t *)(mtag + 1) =
+   session->proto.l2tp.ipsecflowinfo;
+   m_tag_prepend(m0, mtag);
+   }

ip_send(m0);
break;
Index: sys/netinet/ip_input.c
===
RCS file: /disk/cvs/openbsd/src/sys/netinet/ip_input.c,v
retrieving revision 1.359
diff -u -p -r1.359 ip_input.c
--- sys/netinet/ip_input.c  30 Apr 2021 13:52:48 -  1.359
+++ sys/netinet/ip_input.c  12 May 2021 09:38:32 -
@@ -1790,6 +1790,8 @@ ip_send_do_dispatch(void *xmq, int flags
struct mbuf_queue *mq = xmq;
struct mbuf *m;
struct mbuf_list ml;
+   struct m_tag *mtag;
+   u_int32_t ipsecflowinfo = 0;

mq_delist(mq, );
if (ml_empty())
@@ -1797,7 +1799,12 @@ ip_send_do_dispatch(void *xmq, int flags

NET_LOCK();
while ((m = ml_dequeue()) != NULL) {
-   ip_output(m, NULL, NULL, flags, NULL, NULL, 0);
+		if ((mtag = m_tag_find(m, PACKET_TAG_IPSEC_FLOWINFO, 
NULL))

+   != NULL) {
+   ipsecflowinfo = *(u_int32_t *)(mtag + 1);
+   m_tag_delete(m, mtag);
+   }
+		ip_output(m, NULL, NULL, flags, NULL, NULL, 
ipsecflowinfo);

}
NET_UNLOCK();
 }
Index: sys/sys/mbuf.h
===
RCS file: /disk/cvs/openbsd/src/sys/sys/mbuf.h,v
retrieving revision 1.252
diff -u -p -r1.252 mbuf.h
--- sys/sys/mbuf.h  25 Feb 2021 02:43:31 -  1.252
+++ sys/sys/mbuf.h  12 May 2021 09:38:32 -
@@ -469,6 +469,7 @@ struct m_tag *m_tag_next(struct mbuf *,
 /* Packet tag types */
 #define PACKET_TAG_IPSEC_IN_DONE	0x0001  /* IPsec applied, in 
*/
 #define PACKET_TAG_IPSEC_OUT_DONE	0x0002  /* IPsec applied, out 
*/

+#define PACKET_TAG_IPSEC_FLOWINFO  0x0004  /* IPsec flowinfo */
 #define PACKET_TAG_WIREGUARD   0x0040  /* WireGuard data */
 #define PACKET_TAG_GRE			0x0080  /* GRE 
processing done */
 #define PACKET_TAG_DLT			0x0100 /* data link 
layer type */

@@ -479,7 +480,7 @@ struct m_tag *m_tag_next(struct mbuf *,
 #define PACKET_TAG_CARP_BAL_IP		0x4000  /* carp(4) ip 
balanced marker */


 #define MTAG_BITS \
-("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_IN_CRYPTO_DONE" \
+("\20\1IPSEC_IN_DONE\2IPSEC_OUT_DONE\3IPSEC_FLOWINFO" \
 
"\4IPSEC_OUT_CRYPTO_NEEDED\5IPSEC_PENDING_TDB\6BRIDGE\7WG\10GRE\11DLT" 
\
 
"\12PF_DIVERT\14PF_REASSEMBLED\15SRCROUTE\16TUNNEL\17CARP_BAL_IP")





diff #2

Make the IPsec flow can have multiple `ipsec_ids' so that
ipsp_spd_lookup() can check whether the `ipsec_ids` of the given tdb 
is

belonged with a flow shared by mutlple clients behind a NAT.

Index: sys/net/pfkeyv2.c
===
RCS file: /disk/cvs/openbsd/src/sys/net/pfkeyv2.c,v
retrieving revision 1.211
diff -u -p -r1.211 pfkeyv2.c
--- sys/net/pfkeyv2.c   4 May 2021 09:28:04 -   1.211
+++ sys/net/pfkeyv2.c   12 May 2021 10:07:11 -
@@ -1106,6 +1106,7 @@ pfkeyv2_send(struct socket *so, void *me
int i, j, rval = 0, mode = PFKEYV2_SENDMESSAGE_BROADCAST;
int delflag = 0;
struct sockaddr_encap encapdst, encapnetmask;
+   struct ipsec_ids *ids, *ids0;
struct ipsec_policy *ipo;
struct ipsec_acquire *ipa;
struct 

Re: monotonic time going back by wrong skews

2021-04-06 Thread YASUOKA Masahiko
Hi,

I'm sorry..  I send a wrong diff to the people.  The result from
giovanni@ and mcmer seems wrong.  I suppose stu@ used the correct
diff.

giovanni and mcmer, can you test with the correct diff again?

I attached the correct diff at last of this mail.

I'm sorry again.

On Tue, 6 Apr 2021 09:21:40 +0200
Giovanni Bechis  wrote:
> On Mon, Apr 05, 2021 at 07:14:49PM +0900, YASUOKA Masahiko wrote:
>> Hi,
>> 
>> > Another issue that I see is that people have not reported, at least
> [...]
>> > publicly, that this runs fine on their normal OpenBSD machines.
>> 
>> Some dmesgs posted on public lists seems to have the same problem.
>> 
>> https://marc.info/?l=openbsd-bugs=2=1=disabling+user+TSC=b
>> https://marc.info/?l=openbsd-tech=2=1=disabling+user+TSC=b
>> https://marc.info/?l=openbsd-ports=2=1=disabling+user+TSC=b
>> 
>> For example,
>> 
>> https://marc.info/?l=openbsd-bugs=161618496905444=2
>> 
>> |Subject:wg(4) crash
>> |From:   Stuart Henderson 
>> |bios0: vendor Dell Inc. version "2.9.0" date 12/06/2019
>> |bios0: Dell Inc. PowerEdge R620
>> |cpu1: disabling user TSC (skew=135)
>> |cpu1: smt 0, core 0, package 1
>> 
>> https://marc.info/?l=openbsd-ports=161306073708427=2
>> |Subject:Re: sysutils/nut README APC over USB device chgrp/chmod
>> |From:   Marcus MERIGHI 
>> |bios0: vendor American Megatrends Inc. version "3.1" date 06/07/2018
>> |cpu11: disabling user TSC (skew=240)
>> |cpu11: smt 0, core 3, package 1
>> 
>> these 2 are real machine and using 2 CPU sockets.
>> 
>> https://marc.info/?l=openbsd-ports=161562278114172=2
>> |Subject:ruby27 vs Puppet
>> |From:   Giovanni Bechis 
>> |bios0: vendor Phoenix Technologies LTD version "6.00" date 12/12/2018
>> |bios0: VMware, Inc. VMware Virtual Platform
>> |cpu1: disabling user TSC (skew=-12705)
>> 
>> VMware.  seems the same problem of mine.
>> 
>> I'll ask people to do the same test which cheloha@ write in previous
>> mail.
>> 
> Attached my data and dmesg produced by the script on my VMware vm.
> 
>  Cheers
>   Giovanni

Index: sys/arch/amd64/amd64/tsc.c
===
RCS file: /var/cvs/openbsd/src/sys/arch/amd64/amd64/tsc.c,v
retrieving revision 1.23
diff -u -p -r1.23 tsc.c
--- sys/arch/amd64/amd64/tsc.c  23 Feb 2021 04:44:30 -  1.23
+++ sys/arch/amd64/amd64/tsc.c  5 Apr 2021 10:28:00 -
@@ -311,16 +311,42 @@ tsc_read_bp(struct cpu_info *ci, uint64_
*aptscp = tsc_sync_val;
 }
 
+#defineTSC_SYNC_NTIMES 1000
+
+static int tsc_difs[MAXCPUS][TSC_SYNC_NTIMES];
+
+void
+tsc_debug(void)
+{
+   int i, cpuid = curcpu()->ci_cpuid;
+
+   for (i = 0; i < TSC_SYNC_NTIMES; i++) {
+   if (i % 10 == 0)
+   printf("%5d", tsc_difs[cpuid][i]);
+   else
+   printf(" %5d", tsc_difs[cpuid][i]);
+   if (i % 10 == 9)
+   printf("\n");
+   }
+   printf("\n");
+}
+
 void
 tsc_sync_bp(struct cpu_info *ci)
 {
+   int i, mindif = INT_MAX, dif;
uint64_t bptsc, aptsc;
 
-   tsc_read_bp(ci, , ); /* discarded - cache effects */
-   tsc_read_bp(ci, , );
+   for (i = 0; i < TSC_SYNC_NTIMES; i++) {
+   tsc_read_bp(ci, , );
+   dif = bptsc - aptsc;
+   if (abs(dif) < abs(mindif))
+   mindif = dif;
+   tsc_difs[ci->ci_cpuid][i] = dif;
+   }
 
/* Compute final value to adjust for skew. */
-   ci->ci_tsc_skew = bptsc - aptsc;
+   ci->ci_tsc_skew = mindif;
 }
 
 /*
@@ -351,8 +377,10 @@ tsc_post_ap(struct cpu_info *ci)
 void
 tsc_sync_ap(struct cpu_info *ci)
 {
-   tsc_post_ap(ci);
-   tsc_post_ap(ci);
+   int i;
+
+   for (i = 0; i < TSC_SYNC_NTIMES; i++)
+   tsc_post_ap(ci);
 }
 
 void



Re: monotonic time going back by wrong skews

2021-04-05 Thread YASUOKA Masahiko
On Mon, 5 Apr 2021 14:24:03 +0200 (CEST)
Mark Kettenis  wrote:
>> Date: Mon, 05 Apr 2021 19:14:49 +0900 (JST)
>> From: YASUOKA Masahiko 
>> 
>> Hi,
>> 
>> On Mon, 5 Apr 2021 10:43:00 +0300
>> Paul Irofti  wrote:
>> > On 05.04.2021 06:13, Scott Cheloha wrote:
>> >> On Mon, Mar 29, 2021 at 02:00:01PM +0900, YASUOKA Masahiko wrote:
>> >>> On Thu, 25 Mar 2021 19:41:35 +0100 (CET)
>> >>> Mark Kettenis  wrote:
>> >>>>> From: Scott Cheloha 
>> >>>>> Date: Thu, 25 Mar 2021 13:18:04 -0500
>> >>>>>> On Wed, Mar 24, 2021 at 05:40:21PM +0900, YASUOKA Masahiko wrote:
>> >>>>> Which diff did you apply?  Yasuoka provided two diffs.
>> >>>>>
>> >>>>> In any case, ignore this diff:
>> >>>>>
>> >>>>>> diff --git a/sys/arch/amd64/amd64/tsc.c b/sys/arch/amd64/amd64/tsc.c
>> >>>>>> index 238a5a068e1..3b951a8b5a3 100644
>> >>>>>> --- a/sys/arch/amd64/amd64/tsc.c
>> >>>>>> +++ b/sys/arch/amd64/amd64/tsc.c
>> >>>>>> @@ -212,7 +212,8 @@ cpu_recalibrate_tsc(struct timecounter *tc)
>> >>>>>> u_int
>> >>>>>> tsc_get_timecount(struct timecounter *tc)
>> >>>>>> {
>> >>>>>> - return rdtsc_lfence() + curcpu()->ci_tsc_skew;
>> >>>>>> + //return rdtsc_lfence() + curcpu()->ci_tsc_skew;
>> >>>>>> + return rdtsc_lfence();
>> >>>>>> }
>> >>>>>>
>> >>>>>> void
>> >>>>>
>> >>>>>
>> >>>>> We don't want to discard the skews, that's wrong.
>> >>>
>> >>> I'm sorry for the confusion.
>> >> No problem.
>> >> 
>> >>>>> The reason it "fixes" Yasuoka's problem is because the real skews
>> >>>>> on the ESXi VMs in question are probably close to zero but our
>> >>>>> synchronization algorithm is picking huge (wrong) skews due to
>> >>>>> some other variable interfering with our measurement.
>> >>>>
>> >>>> Right.  If a VM exit happens while we're doing our measurement, you'll
>> >>>> see a significant delay.  And a guest OS can't prevent those from
>> >>>> happening.  But even on real hardware SMM mode may interfere with our
>> >>>> measurement.
>> >>>
>> >>> For machines like the ESXi VMs, the measurement seems to have to
>> >>> exclude such delayed values as outliers.  I think taking a lot of
>> >>> samples and choice the minimum is a good enough way for the purpose.
>> >>>
>> >>> I updated the diff.
>> >>>
>> >>> - delete lines for debug
>> >>> - make tsc quality lower if skew is not good enough
>> >>> - reduce difference from NetBSD
>> >>>
>> >>> comment? ok?
>> >> If more iterations fixes your problem, great.  It isn't going to make
>> >> things worse for machines with sync'd TSCs, makes the TSC usable on
>> >> another class of machine, and is relatively cheap.
>> >> This is ok cheloha@.
>> >> You need another ok, though.
>> > 
>> > 
>> > The diff is obviously fine. But it is still a heuristic with no real
>> > motivation except for this particular ESXi VM case. So my question
>> > about why we choose the minimum instead of the median or the mean has
>> > not been answered.
>> 
>> Because median or mean is affected by outliers.  We actually see
>> some outliers in samples from the VMware.
>> 
>> I suppose there is a better mesure, but I am currently no idia and had
>> not used that kind of measure in kernel.  On the other hand, finding
>> the minimum is very simple.
> 
> Using the median should take care of the outliers though.

You are right.  I misunderstood the meaning.

> I'm not at all convinced that taking the absolute value of the
> difference makes sense.  It probably works in this case since the
> actual skew on your VM is zero.  So measurements close to zero are
> "good".  But what if the skew isn't zero?  Take for example an AP that
> is running ahead of the BP by 5000 ticks.  In that case, the right
> value for the skew is -5000.  But now imagine that the BP gets
> "interrupted&qu

Re: monotonic time going back by wrong skews

2021-04-05 Thread YASUOKA Masahiko
Hi,

On Mon, 5 Apr 2021 10:43:00 +0300
Paul Irofti  wrote:
> On 05.04.2021 06:13, Scott Cheloha wrote:
>> On Mon, Mar 29, 2021 at 02:00:01PM +0900, YASUOKA Masahiko wrote:
>>> On Thu, 25 Mar 2021 19:41:35 +0100 (CET)
>>> Mark Kettenis  wrote:
>>>>> From: Scott Cheloha 
>>>>> Date: Thu, 25 Mar 2021 13:18:04 -0500
>>>>>> On Wed, Mar 24, 2021 at 05:40:21PM +0900, YASUOKA Masahiko wrote:
>>>>> Which diff did you apply?  Yasuoka provided two diffs.
>>>>>
>>>>> In any case, ignore this diff:
>>>>>
>>>>>> diff --git a/sys/arch/amd64/amd64/tsc.c b/sys/arch/amd64/amd64/tsc.c
>>>>>> index 238a5a068e1..3b951a8b5a3 100644
>>>>>> --- a/sys/arch/amd64/amd64/tsc.c
>>>>>> +++ b/sys/arch/amd64/amd64/tsc.c
>>>>>> @@ -212,7 +212,8 @@ cpu_recalibrate_tsc(struct timecounter *tc)
>>>>>> u_int
>>>>>> tsc_get_timecount(struct timecounter *tc)
>>>>>> {
>>>>>> -return rdtsc_lfence() + curcpu()->ci_tsc_skew;
>>>>>> +//return rdtsc_lfence() + curcpu()->ci_tsc_skew;
>>>>>> +return rdtsc_lfence();
>>>>>> }
>>>>>>
>>>>>> void
>>>>>
>>>>>
>>>>> We don't want to discard the skews, that's wrong.
>>>
>>> I'm sorry for the confusion.
>> No problem.
>> 
>>>>> The reason it "fixes" Yasuoka's problem is because the real skews
>>>>> on the ESXi VMs in question are probably close to zero but our
>>>>> synchronization algorithm is picking huge (wrong) skews due to
>>>>> some other variable interfering with our measurement.
>>>>
>>>> Right.  If a VM exit happens while we're doing our measurement, you'll
>>>> see a significant delay.  And a guest OS can't prevent those from
>>>> happening.  But even on real hardware SMM mode may interfere with our
>>>> measurement.
>>>
>>> For machines like the ESXi VMs, the measurement seems to have to
>>> exclude such delayed values as outliers.  I think taking a lot of
>>> samples and choice the minimum is a good enough way for the purpose.
>>>
>>> I updated the diff.
>>>
>>> - delete lines for debug
>>> - make tsc quality lower if skew is not good enough
>>> - reduce difference from NetBSD
>>>
>>> comment? ok?
>> If more iterations fixes your problem, great.  It isn't going to make
>> things worse for machines with sync'd TSCs, makes the TSC usable on
>> another class of machine, and is relatively cheap.
>> This is ok cheloha@.
>> You need another ok, though.
> 
> 
> The diff is obviously fine. But it is still a heuristic with no real
> motivation except for this particular ESXi VM case. So my question
> about why we choose the minimum instead of the median or the mean has
> not been answered.

Because median or mean is affected by outliers.  We actually see
some outliers in samples from the VMware.

I suppose there is a better mesure, but I am currently no idia and had
not used that kind of measure in kernel.  On the other hand, finding
the minimum is very simple.

> Another issue that I see is that people have not reported, at least
> publicly, that this runs fine on their normal OpenBSD machines.

Some dmesgs posted on public lists seems to have the same problem.

https://marc.info/?l=openbsd-bugs=2=1=disabling+user+TSC=b
https://marc.info/?l=openbsd-tech=2=1=disabling+user+TSC=b
https://marc.info/?l=openbsd-ports=2=1=disabling+user+TSC=b

For example,

https://marc.info/?l=openbsd-bugs=161618496905444=2

|Subject:wg(4) crash
|From:   Stuart Henderson 
|bios0: vendor Dell Inc. version "2.9.0" date 12/06/2019
|bios0: Dell Inc. PowerEdge R620
|cpu1: disabling user TSC (skew=135)
|cpu1: smt 0, core 0, package 1

https://marc.info/?l=openbsd-ports=161306073708427=2
|Subject:Re: sysutils/nut README APC over USB device chgrp/chmod
|From:   Marcus MERIGHI 
|bios0: vendor American Megatrends Inc. version "3.1" date 06/07/2018
|cpu11: disabling user TSC (skew=240)
|cpu11: smt 0, core 3, package 1

these 2 are real machine and using 2 CPU sockets.

https://marc.info/?l=openbsd-ports=161562278114172=2
|Subject:ruby27 vs Puppet
|From:   Giovanni Bechis 
|bios0: vendor Phoenix Technologies LTD version "6.00" date 12/12/2018
|bios0: VMware, Inc. VMware Virtual Platform
|cpu1: disabling user TSC (skew=-12705)

VMware.  seems the same problem of mine.

I'll ask people to do the same test

Re: monotonic time going back by wrong skews

2021-03-28 Thread YASUOKA Masahiko
On Thu, 25 Mar 2021 19:41:35 +0100 (CET)
Mark Kettenis  wrote:
>> From: Scott Cheloha 
>> Date: Thu, 25 Mar 2021 13:18:04 -0500
>> > On Wed, Mar 24, 2021 at 05:40:21PM +0900, YASUOKA Masahiko wrote:
>> Which diff did you apply?  Yasuoka provided two diffs.
>> 
>> In any case, ignore this diff:
>> 
>> > diff --git a/sys/arch/amd64/amd64/tsc.c b/sys/arch/amd64/amd64/tsc.c
>> > index 238a5a068e1..3b951a8b5a3 100644
>> > --- a/sys/arch/amd64/amd64/tsc.c
>> > +++ b/sys/arch/amd64/amd64/tsc.c
>> > @@ -212,7 +212,8 @@ cpu_recalibrate_tsc(struct timecounter *tc)
>> > u_int
>> > tsc_get_timecount(struct timecounter *tc)
>> > {
>> > -  return rdtsc_lfence() + curcpu()->ci_tsc_skew;
>> > +  //return rdtsc_lfence() + curcpu()->ci_tsc_skew;
>> > +  return rdtsc_lfence();
>> > }
>> > 
>> > void
>> 
>> 
>> We don't want to discard the skews, that's wrong.

I'm sorry for the confusion.

>> The reason it "fixes" Yasuoka's problem is because the real skews
>> on the ESXi VMs in question are probably close to zero but our
>> synchronization algorithm is picking huge (wrong) skews due to
>> some other variable interfering with our measurement.
> 
> Right.  If a VM exit happens while we're doing our measurement, you'll
> see a significant delay.  And a guest OS can't prevent those from
> happening.  But even on real hardware SMM mode may interfere with our
> measurement.

For machines like the ESXi VMs, the measurement seems to have to
exclude such delayed values as outliers.  I think taking a lot of
samples and choice the minimum is a good enough way for the purpose.

I updated the diff.

- delete lines for debug
- make tsc quality lower if skew is not good enough
- reduce difference from NetBSD

comment? ok?

Index: sys/arch/amd64//amd64/tsc.c
===
RCS file: /disk/cvs/openbsd/src/sys/arch/amd64/amd64/tsc.c,v
retrieving revision 1.23
diff -u -p -r1.23 tsc.c
--- sys/arch/amd64//amd64/tsc.c 23 Feb 2021 04:44:30 -  1.23
+++ sys/arch/amd64//amd64/tsc.c 29 Mar 2021 04:18:31 -
@@ -38,6 +38,7 @@ int   tsc_is_invariant;
 
 #defineTSC_DRIFT_MAX   250
 #define TSC_SKEW_MAX   100
+#defineTSC_SYNC_ROUNDS 1000
 int64_ttsc_drift_observed;
 
 volatile int64_t   tsc_sync_val;
@@ -235,6 +236,7 @@ tsc_timecounter_init(struct cpu_info *ci
printf("%s: disabling user TSC (skew=%lld)\n",
ci->ci_dev->dv_xname, (long long)ci->ci_tsc_skew);
tsc_timecounter.tc_user = 0;
+   tsc_timecounter.tc_quality = -1000;
}
 
if (!(ci->ci_flags & CPUF_PRIMARY) ||
@@ -314,13 +316,19 @@ tsc_read_bp(struct cpu_info *ci, uint64_
 void
 tsc_sync_bp(struct cpu_info *ci)
 {
+   int i, val, diff;
uint64_t bptsc, aptsc;
 
-   tsc_read_bp(ci, , ); /* discarded - cache effects */
-   tsc_read_bp(ci, , );
+   val = INT_MAX;
+   for (i = 0; i < TSC_SYNC_ROUNDS; i++) {
+   tsc_read_bp(ci, , );
+   diff = bptsc - aptsc;
+   if (abs(diff) < abs(val))
+   val = diff;
+   }
 
/* Compute final value to adjust for skew. */
-   ci->ci_tsc_skew = bptsc - aptsc;
+   ci->ci_tsc_skew = val;
 }
 
 /*
@@ -351,8 +359,10 @@ tsc_post_ap(struct cpu_info *ci)
 void
 tsc_sync_ap(struct cpu_info *ci)
 {
-   tsc_post_ap(ci);
-   tsc_post_ap(ci);
+   int i;
+
+   for (i = 0; i < TSC_SYNC_ROUNDS; i++)
+   tsc_post_ap(ci);
 }
 
 void



Re: fyi: get HP EliteBook 830 G7/G8 booting

2021-03-26 Thread YASUOKA Masahiko
On Fri, 26 Mar 2021 12:12:44 +0100 (CET)
Mark Kettenis  wrote:
>> Date: Fri, 26 Mar 2021 19:43:23 +0900 (JST)
>> From: YASUOKA Masahiko 
>> 
>> Hi,
>> 
>> On Fri, 26 Mar 2021 09:30:43 +0100
>> Jan Klemkow  wrote:
>> > If you want to boot OpenBSD on an HP EliteBook 830 G7/G8, the bootloader
>> > will hang while loading the kernel.  Because, the UEFI loads the
>> > bootloader on the same place in memory, where the bootloader will copy
>> > the kernel.  We are unable to load the kernel on arbitrary memory.
>> > Thus, the following diff will help you, to get OpenBSD running on these
>> > machines.  It moves the hardcoded Kernel address to a free place.
>> 
>> The openbsd efiboot copies the kernel to that place after
>> ExitBootServices().
>> 
>> sys/arch/amd64/stand/efiboot/exec_i386.c
>> 152 /*
>> 153  * Move the loaded kernel image to the usual place after 
>> calling
>> 154  * ExitBootServices().
>> 155  */
>> 156 #ifdef __amd64__
>> 157 protect_writeable(marks[MARK_START] + delta,
>> 158 marks[MARK_END] - marks[MARK_START]);
>> 159 #endif
>> 160 memmove((void *)marks[MARK_START] + delta, (void 
>> *)marks[MARK_START],
>> 161 marks[MARK_END] - marks[MARK_START]);
>> 162 for (i = 0; i < MARK_MAX; i++)
>> 163 marks[i] += delta;
>> 164 
>> 165 #ifdef __amd64__
>> 166 (*run_i386)((u_long)run_i386, entry, howto, bootdev, 
>> BOOTARG_APIVER,
>> 167 marks[MARK_END], extmem, cnvmem, ac, (intptr_t)av);
>> 
>> 
>> I think it should work without the ld.script change..
> 
> The (likely) problem is that the memmove() on line 160 is overwriting
> the bootloader code itself.
> 
> There are essentially two ways to fix this:
> 
> 1. Have the bootloader relocate itself to an address that doesn't
>conflict with the kernel to be loaded.
> 
> 2. Make it possible for the kernel to be loaded at a (somewhat)
>arbitrary physical address.
> 
> In my view #2 is the way forward.  There are other reasons why that
> would be beneficial as it would make it less predictable at which
> physical address the kernel code lives which could prevent some
> attacks that use the direct map.
> 
> #2 is also the approach taken by the EFIBOOT on armv7 and arm64.  On
> arm64 for example, EFIBOOT loads the kernel into a 64MB memory block
> that is aligned on a 2MB boundary.  The kernel then figures out its
> load address based on that and and patches things up accordingly.

In this senario, what efiboot should do is just jumping "start64"
(entry point for 64bit) of the kernel, and other things are done after
the start64?

> mlarkin@ was doing some work to change how we load the amd64 kernel.
> His approach was to let the bootloader build the initial page tables
> and jump into the kernel in 64-bit mode with the MMU enabled.  That
> was more focussed on running the kernel at a randomized virtual
> address.  But it should be fairly easy to make it run at a different
> physical address as well this way.  Unfortunately that effort was
> mostly focussed on the legacy bootloader.



Re: fyi: get HP EliteBook 830 G7/G8 booting

2021-03-26 Thread YASUOKA Masahiko
Hi,

On Fri, 26 Mar 2021 09:30:43 +0100
Jan Klemkow  wrote:
> If you want to boot OpenBSD on an HP EliteBook 830 G7/G8, the bootloader
> will hang while loading the kernel.  Because, the UEFI loads the
> bootloader on the same place in memory, where the bootloader will copy
> the kernel.  We are unable to load the kernel on arbitrary memory.
> Thus, the following diff will help you, to get OpenBSD running on these
> machines.  It moves the hardcoded Kernel address to a free place.

The openbsd efiboot copies the kernel to that place after
ExitBootServices().

sys/arch/amd64/stand/efiboot/exec_i386.c
152 /*
153  * Move the loaded kernel image to the usual place after calling
154  * ExitBootServices().
155  */
156 #ifdef __amd64__
157 protect_writeable(marks[MARK_START] + delta,
158 marks[MARK_END] - marks[MARK_START]);
159 #endif
160 memmove((void *)marks[MARK_START] + delta, (void 
*)marks[MARK_START],
161 marks[MARK_END] - marks[MARK_START]);
162 for (i = 0; i < MARK_MAX; i++)
163 marks[i] += delta;
164 
165 #ifdef __amd64__
166 (*run_i386)((u_long)run_i386, entry, howto, bootdev, 
BOOTARG_APIVER,
167 marks[MARK_END], extmem, cnvmem, ac, (intptr_t)av);


I think it should work without the ld.script change..



Re: monotonic time going back by wrong skews

2021-03-24 Thread YASUOKA Masahiko
Hi,

> Second, why is taking the minimum value the optimal choice? I would
> assume an average would be better. Basically if you have a sequency
> like 900, 900, 900, 900, 0, 900, 900, 900 you pick 0 which could lead
> to some problems, right? Or am I missing something?"

Skews on VMware

>> -8445 -6643 -52183 0-3-4-7   -11-5 0
>>-11-9-5-3-4-3-7 8-5-6
>> -5-9-3-9-7-1-5-5-9-2
>> -6-4-6-4   -11-8-3-4-8-1
>> -9-1-8 1-8 6-5-4 2-2
>> -8-3-1-5-2-2 1 2-2-9
>>-12 0-9-2-2-5-2 1 2 0


First 3 seem to be storange.  Also there is such a value on middle of
sampling.

>>  9-1   -10 50505-1 2 6   -11 2-2

I suppose such values should be excluded.

Also I did same test on my VAIO.  It seems more constant than VMware.
Full result is attached at last.

Is it possible that the calculation code is taking effects from the
CPU scheduler of its virtual supervisor?

Thanks,

On Wed, 24 Mar 2021 13:04:32 +0200
Paul Irofti  wrote:
> Hi,
> 
> Thank you for taking this to tech@ as requested!
> 
> I will reproduce here what I replied to Yasouka and Scott (which I
> think proposed taking the minimum skew value) in private.
> 
> "First, thank you very much for the in-depth analysis. I would suggest
> you take this to a public forum like tech@ so that we can keep the
> discussion opened and civilized.
> 
> I remember when I wrote the CPU synchronization code, that I tried
> doing sampling but it had some issues that now I don't remember of. So
> let us try this on real hardware too. This is another argument for
> moving this to tech@.
> 
> Second, why is taking the minimum value the optimal choice? I would
> assume an average would be better. Basically if you have a sequency
> like 900, 900, 900, 900, 0, 900, 900, 900 you pick 0 which could lead
> to some problems, right? Or am I missing something?"
> 
> So could people give the minimum skew approach a spin on real machines
> to see if there are any issues popping up?
> 
> All the best,
> Paul
> 
> On 3/24/21 10:40 AM, YASUOKA Masahiko wrote:
>> Hi,
>> I hit a problem which is caused by going back of monotonic time.  It
>> happens on hosts on VMware ESXi.
>> I wrote the program which repeats the problem.
>>   % cc -o monotime monotime.c -lpthread
>>   % ./monotime
>>   194964 Starting
>>   562210 Starting
>>   483046 Starting
>>   148865 Starting
>>   148865 Back 991.808048665 => 991.007447931
>>   562210 Back 991.808048885 => 991.007448224
>>   483046 Back 991.808049115 => 991.007449172
>>   148865 Stopped
>>   562210 Stopped
>>   483046 Stopped
>>   194964 Stopped
>>   % uname -a
>>   OpenBSD yasuoka-ob-c.tokyo.iiji.jp 6.8 GENERIC.MP#5 amd64
>>   % sysctl kern.version
>>   kern.version=OpenBSD 6.8 (GENERIC.MP) #5: Mon Feb 22 04:36:10 MST 2021
>>   
>> r...@syspatch-68-amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
>>   %
>> monotime.c
>> 
>> #include 
>> #include 
>> #include 
>> #include 
>> #include 
>> #include 
>> #include 
>> #define NTHREAD  4
>> #define NTRY 5
>> void *
>> start(void *dummy)
>> {
>>  int i;
>>  struct timespec ts0, ts1;
>>  printf("%d Starting\n", (int)getthrid());
>>  clock_gettime(CLOCK_MONOTONIC, );
>>  for (i = 0; i < NTRY; i++) {
>>  clock_gettime(CLOCK_MONOTONIC, );
>>  if (timespeccmp(, , <=)) {
>>  ts0 = ts1;
>>  continue;
>>  }
>>  printf("%d Back %lld.%09lu => %lld.%09lu\n",
>>  (int)getthrid(), ts0.tv_sec, ts0.tv_nsec, ts1.tv_sec,
>>  ts1.tv_nsec);
>>  break;
>>  }
>>  printf("%d Stopped\n", (int)getthrid());
>>  return (NULL);
>> }
>> int
>> main(int argc, char *argv[])
>> {
>>  int i, n = NTHREAD;
>>  pthread_t *threads;
>>  threads = calloc(n, sizeof(pthread_t));
>>  for (i = 0; i < n; i++)
>>  pthread_create([i], NULL, start, NULL);
>>  for (i = 0; i < n; i++)
>>  pthread_join(threads[i], NULL);
>> }
>> 
>> The machine has 4 vCPUs and showing the following message on boot.
>>cpu1: disabling user TSC

monotonic time going back by wrong skews

2021-03-24 Thread YASUOKA Masahiko
Hi,

I hit a problem which is caused by going back of monotonic time.  It
happens on hosts on VMware ESXi.

I wrote the program which repeats the problem.

 % cc -o monotime monotime.c -lpthread
 % ./monotime
 194964 Starting
 562210 Starting
 483046 Starting
 148865 Starting
 148865 Back 991.808048665 => 991.007447931
 562210 Back 991.808048885 => 991.007448224
 483046 Back 991.808049115 => 991.007449172
 148865 Stopped
 562210 Stopped
 483046 Stopped
 194964 Stopped
 % uname -a
 OpenBSD yasuoka-ob-c.tokyo.iiji.jp 6.8 GENERIC.MP#5 amd64
 % sysctl kern.version
 kern.version=OpenBSD 6.8 (GENERIC.MP) #5: Mon Feb 22 04:36:10 MST 2021
 
r...@syspatch-68-amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
 %

monotime.c

#include 
#include 
#include 
#include 
#include 
#include 
#include 

#define NTHREAD 4
#define NTRY5

void *
start(void *dummy)
{
int i;
struct timespec ts0, ts1;

printf("%d Starting\n", (int)getthrid());
clock_gettime(CLOCK_MONOTONIC, );

for (i = 0; i < NTRY; i++) {
clock_gettime(CLOCK_MONOTONIC, );
if (timespeccmp(, , <=)) {
ts0 = ts1;
continue;
}
printf("%d Back %lld.%09lu => %lld.%09lu\n",
(int)getthrid(), ts0.tv_sec, ts0.tv_nsec, ts1.tv_sec,
ts1.tv_nsec);
break;
}
printf("%d Stopped\n", (int)getthrid());

return (NULL);
}

int
main(int argc, char *argv[])
{
int i, n = NTHREAD;
pthread_t *threads;

threads = calloc(n, sizeof(pthread_t));

for (i = 0; i < n; i++)
pthread_create([i], NULL, start, NULL);
for (i = 0; i < n; i++)
pthread_join(threads[i], NULL);

}


The machine has 4 vCPUs and showing the following message on boot.

  cpu1: disabling user TSC (skew=-5310)
  cpu2: disabling user TSC (skew=-5335)
  cpu3: disabling user TSC (skew=-7386)

This means "user TSC" is disabled because of TSC of cpu{1,2,3} is much
delayed against cpu0.

Simply ignoring the skews by the following diff seems to workaround
this problem.

diff --git a/sys/arch/amd64/amd64/tsc.c b/sys/arch/amd64/amd64/tsc.c
index 238a5a068e1..3b951a8b5a3 100644
--- a/sys/arch/amd64/amd64/tsc.c
+++ b/sys/arch/amd64/amd64/tsc.c
@@ -212,7 +212,8 @@ cpu_recalibrate_tsc(struct timecounter *tc)
 u_int
 tsc_get_timecount(struct timecounter *tc)
 {
-   return rdtsc_lfence() + curcpu()->ci_tsc_skew;
+   //return rdtsc_lfence() + curcpu()->ci_tsc_skew;
+   return rdtsc_lfence();
 }
 
 void

So I supposed the skews are not calculated properly.  Also I found
NetBSD changed the skew calculating so that it checks 1000 times and
take the minimum value.

  https://github.com/NetBSD/src/commit/1dec05c1ae197b4acfc7038e49dfddabcbed0dff
  https://github.com/NetBSD/src/commit/66d76b89792bac1c71cd5507ba62b08ad02129ef


I checked skews on the machine by the following debug code.

diff --git a/sys/arch/amd64/amd64/tsc.c b/sys/arch/amd64/amd64/tsc.c
index 238a5a068e1..83e835e4f82 100644
--- a/sys/arch/amd64/amd64/tsc.c
+++ b/sys/arch/amd64/amd64/tsc.c
@@ -302,16 +302,42 @@ tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, 
uint64_t *aptscp)
*aptscp = tsc_sync_val;
 }
 
+#defineTSC_SYNC_NTIMES 1000
+
+static int tsc_difs[MAXCPUS][TSC_SYNC_NTIMES];
+
+void
+tsc_debug(void)
+{
+   int i, cpuid = curcpu()->ci_cpuid;
+
+   for (i = 0; i < TSC_SYNC_NTIMES; i++) {
+   if (i % 10 == 0)
+   printf("%5d", tsc_difs[cpuid][i]);
+   else
+   printf(" %5d", tsc_difs[cpuid][i]);
+   if (i % 10 == 9)
+   printf("\n");
+   }
+   printf("\n");
+}
+
 void
 tsc_sync_bp(struct cpu_info *ci)
 {
+   int i, mindif = INT_MAX, dif;
uint64_t bptsc, aptsc;
 
-   tsc_read_bp(ci, , ); /* discarded - cache effects */
-   tsc_read_bp(ci, , );
+   for (i = 0; i < TSC_SYNC_NTIMES; i++) {
+   tsc_read_bp(ci, , );
+   dif = bptsc - aptsc;
+   if (abs(dif) < abs(mindif))
+   mindif = dif;
+   tsc_difs[ci->ci_cpuid][i] = dif;
+   }
 
/* Compute final value to adjust for skew. */
-   ci->ci_tsc_skew = bptsc - aptsc;
+   ci->ci_tsc_skew = mindif;
 }
 
 /*
@@ -342,8 +368,10 @@ tsc_post_ap(struct cpu_info *ci)
 void
 tsc_sync_ap(struct cpu_info *ci)
 {
-   tsc_post_ap(ci);
-   tsc_post_ap(ci);
+   int i;
+
+   for (i = 0; i < TSC_SYNC_NTIMES; i++)
+   tsc_post_ap(ci);
 }
 
 void


Stopped at  db_enter+0x10:  popq%rbp
ddb{0}> machine ddbcpu 1
Stopped at  x86_ipi_db+0x12:leave
ddb{1}> call tsc_debug
-8445 -6643 -52183 0-3-4-7   -11-5 0
  -11-9-5-3-4-3-7 8-5-6
   -5-9-3-9-7-1-5

Re: diff: efiboot: alignment for media which has IoAlign > 1

2021-03-10 Thread YASUOKA Masahiko
On Wed, 10 Mar 2021 13:15:58 +0100 (CET)
Mark Kettenis  wrote:
>> On Wed, 10 Mar 2021 20:35:41 +0900 (JST)
>> YASUOKA Masahiko  wrote:
>> > efiboot cannot load the kernel properly on some machines if booted
>> > from CD-ROM.  In that case boot fails with a message like follow:
>> > 
>> >booting cd0a:. [359648read symbols: Unknown error: code 255
>> > 
>> > As far as Asou and my test, this happens on hosts on VMware ESXi 6.7,
>> > 7.0 and asou's physical machine.
>> > 
>> > The problem happens because efiboot calls ReadBlocks function with an
>> > unaligned pointer for medias which requires an aligned pointer.  When
>> > efiboot loads a kernel, the pointer becomes unaligned since there is
>> > an ELF section located at unaligned place in CD-ROM.  Previously our
>> > kernel didn't have such a section but it does after switching lld as
>> > the default linker.
>> > 
>> > For test, let me show sample commands which creates a bootable cdrom
>> > image for EFI:
>> > 
>> > mkdir -p efiboot/EFI/BOOT
>> > cp /usr/mdec/BOOTX64.EFI efiboot/EFI/BOOT
>> > makefs -M 1m -m 1m -t msdos -o fat_type=12,sectors_per_cluster=1 \
>> > efiboot.img efiboot
>> > mkdir -p cd-dir/etc
>> > cp bsd.rd cd-dir/
>> > echo "set image bsd.rd" > cd-dir/etc/boot.conf
>> > makefs -t cd9660 -o 
>> > 'rockridge,bootimage=i386;/usr/mdec/cdbr,no-emul-boot,allow-multidot,bootimage=efi;efiboot.img,no-emul-boot'
>> >  \
>> >boot.iso cd-dir
>> > 
>> > the diff is to fix the problem.
>> > 
>> > ok?
> 
> Maybe it is better to always bounce through an aligned buffer?  That
> would make the code a little bit slower but a lot simpler.  And the
> overhead of doing the copy should be small compared to the actual I/O.

Indeed.  It became much simpler.  As I tested on ESXi 7.0, vaio, and
qemu, I don't feel significant performance regression.

ok?

Index: sys/arch/amd64/stand/efiboot/efidev.c
===
RCS file: /var/cvs/openbsd/src/sys/arch/amd64/stand/efiboot/efidev.c,v
retrieving revision 1.32
diff -u -p -r1.32 efidev.c
--- sys/arch/amd64/stand/efiboot/efidev.c   9 Dec 2020 18:10:18 -   
1.32
+++ sys/arch/amd64/stand/efiboot/efidev.c   11 Mar 2021 05:59:41 -
@@ -84,10 +84,10 @@ efid_init(struct diskinfo *dip, void *ha
 static EFI_STATUS
 efid_io(int rw, efi_diskinfo_t ed, u_int off, int nsect, void *buf)
 {
-   u_intblks, lba, i_lblks, i_tblks, i_nblks;
+   u_intblks, start, end;
EFI_STATUS   status = EFI_SUCCESS;
-   static u_char   *iblk = NULL;
-   static u_int iblksz = 0;
+   static u_char   *ibuf = NULL;
+   static u_int ibufsz = 0;
 
/* block count of the intrisic block size in DEV_BSIZE */
blks = EFI_BLKSPERSEC(ed);
@@ -95,90 +95,46 @@ efid_io(int rw, efi_diskinfo_t ed, u_int
/* block size < 512.  HP Stream 13 actually has such a disk. */
return (EFI_UNSUPPORTED);
 
-   /* leading and trailing unaligned blocks in intrisic block */
-   i_lblks = ((off % blks) == 0)? 0 : blks - (off % blks);
-   i_tblks = (nsect > i_lblks)? (off + nsect) % blks : 0;
-
-   /* aligned blocks in intrisic block */
-   i_nblks = (nsect > i_lblks + i_tblks)? nsect - (i_lblks + i_tblks) : 0;
-
-   lba = (off + i_lblks) / blks;
-
-   /* allocate the space for reading unaligned blocks */
-   if (ed->blkio->Media->BlockSize != DEV_BSIZE) {
-   if (iblk && iblksz < ed->blkio->Media->BlockSize) {
-   free(iblk, iblksz);
-   iblk = NULL;
-   }
-   if (iblk == NULL) {
-   iblk = alloc(ed->blkio->Media->BlockSize);
-   iblksz = ed->blkio->Media->BlockSize;
-   }
+   start = off / blks;
+   end = (off + nsect + blks - 1) / blks;
+   /*
+* Prepare a buffer to use an aligned memory always that might be
+* required by some medias
+*/
+   if (ibuf && ibufsz < (end - start) * ed->blkio->Media->BlockSize) {
+   free(ibuf, ibufsz);
+   ibuf = NULL;
+   }
+   if (ibuf == NULL) {
+   ibufsz = (end - start) * ed->blkio->Media->BlockSize;
+   ibuf = alloc(ibufsz);
}
+
switch (rw) {
case F_READ:
-   if (i_lblks > 0) {
-   status = EFI_CALL(ed->blkio->ReadBlocks,
-   ed->blkio

Re: diff: efiboot: alignment for media which has IoAlign > 1

2021-03-10 Thread YASUOKA Masahiko
Sorry for making noise, let me update the diff.

> + if (ed->blkio->Media->IoAlign > 1 &&
> + ((UINTN)buf + i_lblks * DEV_BSIZE)
> + % ed->blkio->Media->IoAlign == 0)

first condition was reversed..

On Wed, 10 Mar 2021 20:35:41 +0900 (JST)
YASUOKA Masahiko  wrote:
> efiboot cannot load the kernel properly on some machines if booted
> from CD-ROM.  In that case boot fails with a message like follow:
> 
>booting cd0a:. [359648read symbols: Unknown error: code 255
> 
> As far as Asou and my test, this happens on hosts on VMware ESXi 6.7,
> 7.0 and asou's physical machine.
> 
> The problem happens because efiboot calls ReadBlocks function with an
> unaligned pointer for medias which requires an aligned pointer.  When
> efiboot loads a kernel, the pointer becomes unaligned since there is
> an ELF section located at unaligned place in CD-ROM.  Previously our
> kernel didn't have such a section but it does after switching lld as
> the default linker.
> 
> For test, let me show sample commands which creates a bootable cdrom
> image for EFI:
> 
> mkdir -p efiboot/EFI/BOOT
> cp /usr/mdec/BOOTX64.EFI efiboot/EFI/BOOT
> makefs -M 1m -m 1m -t msdos -o fat_type=12,sectors_per_cluster=1 \
> efiboot.img efiboot
> mkdir -p cd-dir/etc
> cp bsd.rd cd-dir/
> echo "set image bsd.rd" > cd-dir/etc/boot.conf
> makefs -t cd9660 -o 
> 'rockridge,bootimage=i386;/usr/mdec/cdbr,no-emul-boot,allow-multidot,bootimage=efi;efiboot.img,no-emul-boot'
>  \
>   boot.iso cd-dir
> 
> the diff is to fix the problem.
> 
> ok?

Index: sys/arch/amd64/stand/efiboot/efidev.c
===
RCS file: /disk/cvs/openbsd/src/sys/arch/amd64/stand/efiboot/efidev.c,v
retrieving revision 1.32
diff -u -p -r1.32 efidev.c
--- sys/arch/amd64/stand/efiboot/efidev.c   9 Dec 2020 18:10:18 -   
1.32
+++ sys/arch/amd64/stand/efiboot/efidev.c   10 Mar 2021 11:41:39 -
@@ -84,7 +84,7 @@ efid_init(struct diskinfo *dip, void *ha
 static EFI_STATUS
 efid_io(int rw, efi_diskinfo_t ed, u_int off, int nsect, void *buf)
 {
-   u_intblks, lba, i_lblks, i_tblks, i_nblks;
+   u_inti, blks, lba, i_lblks, i_tblks, i_nblks;
EFI_STATUS   status = EFI_SUCCESS;
static u_char   *iblk = NULL;
static u_int iblksz = 0;
@@ -127,10 +127,29 @@ efid_io(int rw, efi_diskinfo_t ed, u_int
min(nsect, i_lblks) * DEV_BSIZE);
}
if (i_nblks > 0) {
-   status = EFI_CALL(ed->blkio->ReadBlocks,
-   ed->blkio, ed->mediaid, lba,
-   ed->blkio->Media->BlockSize * (i_nblks / blks),
-   buf + (i_lblks * DEV_BSIZE));
+   /*
+* Pass the buffer directly to the EFI function only if
+* the buffer is properly aligned as the media requires
+*/
+   if (ed->blkio->Media->IoAlign <= 1 ||
+   ((UINTN)buf + i_lblks * DEV_BSIZE)
+   % ed->blkio->Media->IoAlign == 0)
+   status = EFI_CALL(ed->blkio->ReadBlocks,
+   ed->blkio, ed->mediaid, lba,
+   ed->blkio->Media->BlockSize * (i_nblks /
+   blks), buf + i_lblks * DEV_BSIZE);
+   else {
+   for (i = 0; i < i_nblks; i += blks) {
+   status = EFI_CALL(ed->blkio->ReadBlocks,
+   ed->blkio, ed->mediaid,
+   lba + i / blks,
+   ed->blkio->Media->BlockSize, iblk);
+   if (EFI_ERROR(status))
+   break;
+   memcpy(buf + i * DEV_BSIZE, iblk,
+   ed->blkio->Media->BlockSize);
+   }
+   }
if (EFI_ERROR(status))
goto on_eio;
}
@@ -160,10 +179,30 @@ efid_io(int rw, efi_diskinfo_t ed, u_int
ed->blkio->Media->BlockSize, iblk);
}
if (i_nblks > 0) {
-   status = EFI_CALL(ed->blkio->WriteBlocks,
-   ed->blkio, ed->mediaid, lba,
-

diff: efiboot: alignment for media which has IoAlign > 1

2021-03-10 Thread YASUOKA Masahiko
Hi,

efiboot cannot load the kernel properly on some machines if booted
from CD-ROM.  In that case boot fails with a message like follow:

   booting cd0a:. [359648read symbols: Unknown error: code 255

As far as Asou and my test, this happens on hosts on VMware ESXi 6.7,
7.0 and asou's physical machine.

The problem happens because efiboot calls ReadBlocks function with an
unaligned pointer for medias which requires an aligned pointer.  When
efiboot loads a kernel, the pointer becomes unaligned since there is
an ELF section located at unaligned place in CD-ROM.  Previously our
kernel didn't have such a section but it does after switching lld as
the default linker.

For test, let me show sample commands which creates a bootable cdrom
image for EFI:

mkdir -p efiboot/EFI/BOOT
cp /usr/mdec/BOOTX64.EFI efiboot/EFI/BOOT
makefs -M 1m -m 1m -t msdos -o fat_type=12,sectors_per_cluster=1 \
efiboot.img efiboot
mkdir -p cd-dir/etc
cp bsd.rd cd-dir/
echo "set image bsd.rd" > cd-dir/etc/boot.conf
makefs -t cd9660 -o 
'rockridge,bootimage=i386;/usr/mdec/cdbr,no-emul-boot,allow-multidot,bootimage=efi;efiboot.img,no-emul-boot'
 \
boot.iso cd-dir

the diff is to fix the problem.

ok?

Index: sys/arch/amd64/stand/efiboot/efidev.c
===
RCS file: /disk/cvs/openbsd/src/sys/arch/amd64/stand/efiboot/efidev.c,v
retrieving revision 1.32
diff -u -p -r1.32 efidev.c
--- sys/arch/amd64/stand/efiboot/efidev.c   9 Dec 2020 18:10:18 -   
1.32
+++ sys/arch/amd64/stand/efiboot/efidev.c   10 Mar 2021 10:58:35 -
@@ -84,7 +84,7 @@ efid_init(struct diskinfo *dip, void *ha
 static EFI_STATUS
 efid_io(int rw, efi_diskinfo_t ed, u_int off, int nsect, void *buf)
 {
-   u_intblks, lba, i_lblks, i_tblks, i_nblks;
+   u_inti, blks, lba, i_lblks, i_tblks, i_nblks;
EFI_STATUS   status = EFI_SUCCESS;
static u_char   *iblk = NULL;
static u_int iblksz = 0;
@@ -127,10 +127,29 @@ efid_io(int rw, efi_diskinfo_t ed, u_int
min(nsect, i_lblks) * DEV_BSIZE);
}
if (i_nblks > 0) {
-   status = EFI_CALL(ed->blkio->ReadBlocks,
-   ed->blkio, ed->mediaid, lba,
-   ed->blkio->Media->BlockSize * (i_nblks / blks),
-   buf + (i_lblks * DEV_BSIZE));
+   /*
+* Pass the buffer directly to the EFI function only if
+* the buffer is properly aligned as the media requires
+*/
+   if (ed->blkio->Media->IoAlign > 1 &&
+   ((UINTN)buf + i_lblks * DEV_BSIZE)
+   % ed->blkio->Media->IoAlign == 0)
+   status = EFI_CALL(ed->blkio->ReadBlocks,
+   ed->blkio, ed->mediaid, lba,
+   ed->blkio->Media->BlockSize * (i_nblks /
+   blks), buf + i_lblks * DEV_BSIZE);
+   else {
+   for (i = 0; i < i_nblks; i += blks) {
+   status = EFI_CALL(ed->blkio->ReadBlocks,
+   ed->blkio, ed->mediaid,
+   lba + i / blks,
+   ed->blkio->Media->BlockSize, iblk);
+   if (EFI_ERROR(status))
+   break;
+   memcpy(buf + i * DEV_BSIZE, iblk,
+   ed->blkio->Media->BlockSize);
+   }
+   }
if (EFI_ERROR(status))
goto on_eio;
}
@@ -160,10 +179,30 @@ efid_io(int rw, efi_diskinfo_t ed, u_int
ed->blkio->Media->BlockSize, iblk);
}
if (i_nblks > 0) {
-   status = EFI_CALL(ed->blkio->WriteBlocks,
-   ed->blkio, ed->mediaid, lba,
-   ed->blkio->Media->BlockSize * (i_nblks / blks),
-   buf + (i_lblks * DEV_BSIZE));
+   /*
+* Pass the buffer directly to the EFI function only if
+* the buffer is properly aligned as the media requires
+*/
+   if (ed->blkio->Media->IoAlign > 1 &&
+   ((UINTN)buf + i_lblks * DEV_BSIZE)
+   % ed->blkio->Media->IoAlign == 0)
+   status = EFI_CALL(ed->blkio->WriteBlocks,
+   ed->blkio, ed->mediaid, lba,
+   

Re: 2 diffs for dev/acpi/dsdt.c

2021-02-27 Thread YASUOKA Masahiko
Hi,

Let me update "diff #2".

On Fri, 26 Feb 2021 13:42:32 +0900 (JST)
YASUOKA Masahiko  wrote:
> My vaio repeatedly crashed by "Data modified on freelist"(*1) or other
> memory corruptions.  After my long time debug, I found the route cause
> is a handling of references of LocalX, like the following:
> 
> If ((SMRW (0x0B, 0x16, 0x21, RefOf (Local0)) == Zero))
> 
> In the called control method, "RefOf (Local1)" is referred as Arg3, is
> stored a value like the following:
> 
> Arg3 = \_SB.PCI0.LPCB.EC0.SMD0
> 
> In aml_store(), lvalue is reset if lvalue is a LocalX.  But since that
> was done before resolving the reference, lvalue was not reset if
> lvalue is a reference of LocalX.
> 
> diff #1 fixes that problem.  It resets lvalue after resolving
> references.
> 
> ok?
> 
> diff #2 adds aml_die() if any memory corruption occurs when creating
> field in a buffer.  This actually happens on my vaio (pro pk 14) if
> diff #1 is not applied.
> 
> ok?
> 
> diff #1
> 
> Index: sys/dev/acpi/dsdt.c
> ===
> RCS file: /var/cvs/openbsd/src/sys/dev/acpi/dsdt.c,v
> retrieving revision 1.257
> diff -u -p -r1.257 dsdt.c
> --- sys/dev/acpi/dsdt.c   17 Dec 2020 17:57:19 -  1.257
> +++ sys/dev/acpi/dsdt.c   26 Feb 2021 04:12:03 -
> @@ -2961,11 +2961,11 @@ aml_store(struct aml_scope *scope, struc
>   aml_rwfield(rhs, 0, rhs->v_field.bitlen, , ACPI_IOREAD);
>   rhs = 
>   }
> +
> + lhs = aml_gettgt(lhs, AMLOP_STORE);
>   /* Store to LocalX: free value */
>   if (lhs->stack >= AMLOP_LOCAL0 && lhs->stack <= AMLOP_LOCAL7)
>   aml_freevalue(lhs);
> -
> - lhs = aml_gettgt(lhs, AMLOP_STORE);
>   switch (lhs->type) {
>   case AML_OBJTYPE_UNINITIALIZED:
>   aml_copyvalue(lhs, rhs);
> 
> diff #2
> 
> Index: sys/dev/acpi/dsdt.c
> ===
> RCS file: /var/cvs/openbsd/src/sys/dev/acpi/dsdt.c,v
> retrieving revision 1.257
> diff -u -p -r1.257 dsdt.c
> --- sys/dev/acpi/dsdt.c   17 Dec 2020 17:57:19 -  1.257
> +++ sys/dev/acpi/dsdt.c   26 Feb 2021 04:33:21 -
> @@ -2742,11 +2742,17 @@ aml_rwfield(struct aml_value *fld, int b
>   } else if (mode == ACPI_IOREAD) {
>   /* bufferfield:read */
>   _aml_setvalue(val, AML_OBJTYPE_INTEGER, 0, 0);
> + if (ref1->length < aml_bytepos(fld->v_field.bitpos) +
> + aml_bytelen(fld->v_field.bitlen))
> + aml_die("bufferfield:read out of range");
>   aml_bufcpy(>v_integer, 0, ref1->v_buffer,
>   fld->v_field.bitpos, fld->v_field.bitlen);
>   } else {
>   /* bufferfield:write */
>   val = aml_convert(val, AML_OBJTYPE_INTEGER, -1);
> + if (ref1->length < aml_bytepos(fld->v_field.bitpos) +
> + aml_bytelen(fld->v_field.bitlen))
> + aml_die("bufferfield:write out of range");
>   aml_bufcpy(ref1->v_buffer, fld->v_field.bitpos, >v_integer,
>   0, fld->v_field.bitlen);
>   aml_delref(, "wrbuffld");

It's better to die when creating a field which refers out of range
memory.

ok?

Index: sys/dev/acpi/dsdt.c
===
RCS file: /disk/cvs/openbsd/src/sys/dev/acpi/dsdt.c,v
retrieving revision 1.257
diff -u -p -r1.257 dsdt.c
--- sys/dev/acpi/dsdt.c 17 Dec 2020 17:57:19 -  1.257
+++ sys/dev/acpi/dsdt.c 27 Feb 2021 09:58:31 -
@@ -2790,6 +2790,11 @@ aml_createfield(struct aml_value *field,
data->type != AML_OBJTYPE_BUFFER)
data = aml_convert(data, AML_OBJTYPE_BUFFER, -1);
 
+   if (field->type == AML_OBJTYPE_BUFFERFIELD &&
+   data->length < aml_bytepos(bpos) + aml_bytelen(blen))
+   aml_die("%s(%s) out of range\n", aml_mnem(opcode, 0),
+   aml_nodename(field->node));
+
field->v_field.type = opcode;
field->v_field.bitpos = bpos;
field->v_field.bitlen = blen;



2 diffs for dev/acpi/dsdt.c

2021-02-25 Thread YASUOKA Masahiko
Hi,

My vaio repeatedly crashed by "Data modified on freelist"(*1) or other
memory corruptions.  After my long time debug, I found the route cause
is a handling of references of LocalX, like the following:

If ((SMRW (0x0B, 0x16, 0x21, RefOf (Local0)) == Zero))

In the called control method, "RefOf (Local1)" is referred as Arg3, is
stored a value like the following:

Arg3 = \_SB.PCI0.LPCB.EC0.SMD0

In aml_store(), lvalue is reset if lvalue is a LocalX.  But since that
was done before resolving the reference, lvalue was not reset if
lvalue is a reference of LocalX.

diff #1 fixes that problem.  It resets lvalue after resolving
references.

ok?

diff #2 adds aml_die() if any memory corruption occurs when creating
field in a buffer.  This actually happens on my vaio (pro pk 14) if
diff #1 is not applied.

ok?

diff #1

Index: sys/dev/acpi/dsdt.c
===
RCS file: /var/cvs/openbsd/src/sys/dev/acpi/dsdt.c,v
retrieving revision 1.257
diff -u -p -r1.257 dsdt.c
--- sys/dev/acpi/dsdt.c 17 Dec 2020 17:57:19 -  1.257
+++ sys/dev/acpi/dsdt.c 26 Feb 2021 04:12:03 -
@@ -2961,11 +2961,11 @@ aml_store(struct aml_scope *scope, struc
aml_rwfield(rhs, 0, rhs->v_field.bitlen, , ACPI_IOREAD);
rhs = 
}
+
+   lhs = aml_gettgt(lhs, AMLOP_STORE);
/* Store to LocalX: free value */
if (lhs->stack >= AMLOP_LOCAL0 && lhs->stack <= AMLOP_LOCAL7)
aml_freevalue(lhs);
-
-   lhs = aml_gettgt(lhs, AMLOP_STORE);
switch (lhs->type) {
case AML_OBJTYPE_UNINITIALIZED:
aml_copyvalue(lhs, rhs);

diff #2

Index: sys/dev/acpi/dsdt.c
===
RCS file: /var/cvs/openbsd/src/sys/dev/acpi/dsdt.c,v
retrieving revision 1.257
diff -u -p -r1.257 dsdt.c
--- sys/dev/acpi/dsdt.c 17 Dec 2020 17:57:19 -  1.257
+++ sys/dev/acpi/dsdt.c 26 Feb 2021 04:33:21 -
@@ -2742,11 +2742,17 @@ aml_rwfield(struct aml_value *fld, int b
} else if (mode == ACPI_IOREAD) {
/* bufferfield:read */
_aml_setvalue(val, AML_OBJTYPE_INTEGER, 0, 0);
+   if (ref1->length < aml_bytepos(fld->v_field.bitpos) +
+   aml_bytelen(fld->v_field.bitlen))
+   aml_die("bufferfield:read out of range");
aml_bufcpy(>v_integer, 0, ref1->v_buffer,
fld->v_field.bitpos, fld->v_field.bitlen);
} else {
/* bufferfield:write */
val = aml_convert(val, AML_OBJTYPE_INTEGER, -1);
+   if (ref1->length < aml_bytepos(fld->v_field.bitpos) +
+   aml_bytelen(fld->v_field.bitlen))
+   aml_die("bufferfield:write out of range");
aml_bufcpy(ref1->v_buffer, fld->v_field.bitpos, >v_integer,
0, fld->v_field.bitlen);
aml_delref(, "wrbuffld");


*1 example console log

Data modified on freelist: word -35183627074926 of object 
0x824a3060 size 0x10 previous type temp (invalid addr 
0x8027023e55f0)
uvm_fault(0x81f63958, 0x8027023e55f8, 0, 1) -> e
kernel: page fault trap, code=0
Stopped at  malloc+0x482:   movq0x8(%r14),%rcx
Running script...
ddb{0}> malloc(10,91,5) at malloc+0x482

i915_gem_do_execbuffer(802ab078,80ee0c00,8000337a7970,820ca000,0)
 at i915_gem_do_execbuffer+0xa52

i915_gem_execbuffer2_ioctl(802ab078,8000337a7970,80ee0c00) 
at i915_gem_execbuffer2_ioctl+0x144
drmioctl(15700,80406469,8000337a7970,3,8000336a8798) at 
drmioctl+0xd8

VOP_IOCTL(fd8227abbeb0,80406469,8000337a7970,3,fd826bd1dd88,8000336a8798)
 at VOP_IOCTL+0x55
vn_ioctl(fd82282ee8e8,80406469,8000337a7970,8000336a8798) at 
vn_ioctl+0x64
sys_ioctl(8000336a8798,8000337a7a80,8000337a7ae0) at 
sys_ioctl+0x3c2
syscall(8000337a7b50) at syscall+0x389
Xsyscall(6,36,0,36,80406469,7f7f5c00) at Xsyscall+0x128
end of kernel
end trace frame: 0x7f7f5bd0, count: -9



Re: pppac(4): remove `sc_dead' logic

2021-02-10 Thread YASUOKA Masahiko
ok yasuoka

Thanks,

On Tue, 9 Feb 2021 12:06:08 +0300
Vitaliy Makkoveev  wrote:
> `sc_dead' is used to prevent pppac_ioctl() be called on dying pppac(4)
> interface. But now if_detach() makes dying `ifp' inaccessible and waits
> for references which are in-use. This logic is not required anymore.
> Also I moved if_detach() before klist_invalidate() to prevent the case
> while pppac_qstart() bump `sc_rsel'.
> 
> Index: sys/net/if_pppx.c
> ===
> RCS file: /cvs/src/sys/net/if_pppx.c,v
> retrieving revision 1.108
> diff -u -p -r1.108 if_pppx.c
> --- sys/net/if_pppx.c 1 Feb 2021 07:46:55 -   1.108
> +++ sys/net/if_pppx.c 9 Feb 2021 09:05:23 -
> @@ -930,7 +930,6 @@ RBT_GENERATE(pppx_ifs, pppx_if, pxi_entr
>  
>  struct pppac_softc {
>   struct ifnetsc_if;
> - unsigned intsc_dead;/* [N] */
>   dev_t   sc_dev; /* [I] */
>   LIST_ENTRY(pppac_softc)
>   sc_entry;   /* [K] */
> @@ -1305,17 +1304,16 @@ pppacclose(dev_t dev, int flags, int mod
>   int s;
>  
>   NET_LOCK();
> - sc->sc_dead = 1;
>   CLR(ifp->if_flags, IFF_RUNNING);
>   NET_UNLOCK();
>  
> + if_detach(ifp);
> +
>   s = splhigh();
>   klist_invalidate(>sc_rsel.si_note);
>   klist_invalidate(>sc_wsel.si_note);
>   splx(s);
>  
> - if_detach(ifp);
> -
>   pool_put(_session_pool, sc->sc_multicast_session);
>   NET_LOCK();
>   pipex_destroy_all_sessions(sc);
> @@ -1330,12 +1328,8 @@ pppacclose(dev_t dev, int flags, int mod
>  static int
>  pppac_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
>  {
> - struct pppac_softc *sc = ifp->if_softc;
>   /* struct ifreq *ifr = (struct ifreq *)data; */
>   int error = 0;
> -
> - if (sc->sc_dead)
> - return (ENXIO);
>  
>   switch (cmd) {
>   case SIOCSIFADDR:



Re: npppd(8)/pppac(4): remove dummy TUNSIFMODE ioctl(2) call

2021-01-31 Thread YASUOKA Masahiko
Yes,

ok yasuoka

On Fri, 29 Jan 2021 14:32:39 +0300
Vitaliy Makkoveev  wrote:
> Since OpenBSD 6.7 npppd(8) can't work over tun(4) anymore. I propose to
> remove dummy TUNSIFMODE ioctl(2) call.
> 
> Index: sys/net/if_pppx.c
> ===
> RCS file: /cvs/src/sys/net/if_pppx.c,v
> retrieving revision 1.106
> diff -u -p -r1.106 if_pppx.c
> --- sys/net/if_pppx.c 25 Dec 2020 12:59:53 -  1.106
> +++ sys/net/if_pppx.c 29 Jan 2021 11:10:40 -
> @@ -920,12 +920,6 @@ pppx_if_ioctl(struct ifnet *ifp, u_long 
>  RBT_GENERATE(pppx_ifs, pppx_if, pxi_entry, pppx_if_cmp);
>  
>  /*
> - * pppac(4) - PPP Access Concentrator interface
> - */
> -
> -#include 
> -
> -/*
>   * Locks used to protect struct members and global data
>   *   I   immutable after creation
>   *   K   kernel lock
> @@ -1188,9 +1182,6 @@ pppacioctl(dev_t dev, u_long cmd, caddr_
>  
>   NET_LOCK();
>   switch (cmd) {
> - case TUNSIFMODE: /* make npppd happy */
> - break;
> -
>   case FIONBIO:
>   break;
>   case FIONREAD:
> Index: usr.sbin/npppd/npppd/npppd_iface.c
> ===
> RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd_iface.c,v
> retrieving revision 1.14
> diff -u -p -r1.14 npppd_iface.c
> --- usr.sbin/npppd/npppd/npppd_iface.c2 Jan 2021 13:15:15 -   
> 1.14
> +++ usr.sbin/npppd/npppd/npppd_iface.c29 Jan 2021 11:10:41 -
> @@ -275,7 +275,6 @@ npppd_iface_reinit(npppd_iface *_this, s
>  int
>  npppd_iface_start(npppd_iface *_this)
>  {
> - int x;
>   charbuf[PATH_MAX];
>  
>   NPPPD_IFACE_ASSERT(_this != NULL);
> @@ -285,16 +284,6 @@ npppd_iface_start(npppd_iface *_this)
>   if ((_this->devf = priv_open(buf, O_RDWR | O_NONBLOCK)) < 0) {
>   npppd_iface_log(_this, LOG_ERR, "open(%s) failed: %m", buf);
>   goto fail;
> - }
> -
> - if (_this->using_pppx == 0) {
> - x = IFF_BROADCAST;
> - if (ioctl(_this->devf, TUNSIFMODE, ) != 0) {
> - npppd_iface_log(_this, LOG_ERR,
> - "ioctl(TUNSIFMODE=IFF_BROADCAST) failed "
> - "in %s(): %m", __func__);
> - goto fail;
> - }
>   }
>  
>   event_set(&_this->ev, _this->devf, EV_READ | EV_PERSIST,
> 



Re: Wireguard: can't remove multiple peers at once.

2021-01-13 Thread YASUOKA Masahiko
Hi,

On Thu, 14 Jan 2021 08:54:36 +0900
Yuichiro NAITO  wrote:
> Does anybody please review my code?
> 
> Yasuoka-san is my coleague of my work.
> So, he is interested in this topic. That’s why I CCed this mail.
> I don’t mean he is an reviewer.
> 
>> 2021/01/12 11:27、Yuichiro NAITO のメール:
>> I have set up multiple peers in a wg0 interface,
>> and tried to remove more than one peers at once.
>> Ifconfig(1) only removes the first peer.
>> 
>> Command line was like following.
>> 
>> ```
>> # ifconfig wg0 -wgpeer  -wgpeer  -wgpeer 
>> ```
>> 
>> Only  was removed.
>> 
>> I think next peer pointer isn't calculated in case of removing peer
>> in sys/net/if_wg.c: wg_ioctl_set() function.
>> 
>> I have tried following patch that can fix this problem.

Yes, the diff seems good.

I made the following whitespace change.

> @@ -2333,6 +2333,11 @@ wg_ioctl_set(struct wg_softc *sc, struct wg_data_io 
> *data)
>   }
> 
>   peer_p = (struct wg_peer_io *)aip_p;
> + continue;
> + next_peer:
> + aip_p = _p->p_aips[0];
> + aip_p += peer_o.p_aips_count;
> + peer_p = (struct wg_peer_io *)aip_p;
>   }
> 
> error:

It seems we prefer putting goto labels at the beginning of the line.


ok?

Fix wg(4) ioctl to be able to handle multiple wgpeers.
Diff from Yuichiro NAITO.

Index: sys/net/if_wg.c
===
RCS file: /cvs/src/sys/net/if_wg.c,v
retrieving revision 1.14
diff -u -p -r1.14 if_wg.c
--- sys/net/if_wg.c 1 Sep 2020 19:06:59 -   1.14
+++ sys/net/if_wg.c 14 Jan 2021 07:26:48 -
@@ -2270,7 +2270,7 @@ wg_ioctl_set(struct wg_softc *sc, struct
 
/* Peer must have public key */
if (!(peer_o.p_flags & WG_PEER_HAS_PUBLIC))
-   continue;
+   goto next_peer;
 
/* 0 = latest protocol, 1 = this protocol */
if (peer_o.p_protocol_version != 0) {
@@ -2283,7 +2283,7 @@ wg_ioctl_set(struct wg_softc *sc, struct
/* Get local public and check that peer key doesn't match */
if (noise_local_keys(>sc_local, public, NULL) == 0 &&
bcmp(public, peer_o.p_public, WG_KEY_SIZE) == 0)
-   continue;
+   goto next_peer;
 
/* Lookup peer, or create if it doesn't exist */
if ((peer = wg_peer_lookup(sc, peer_o.p_public)) == NULL) {
@@ -2291,7 +2291,7 @@ wg_ioctl_set(struct wg_softc *sc, struct
 * Also, don't create a new one if we only want to
 * update. */
if (peer_o.p_flags & (WG_PEER_REMOVE|WG_PEER_UPDATE))
-   continue;
+   goto next_peer;
 
if ((peer = wg_peer_create(sc,
peer_o.p_public)) == NULL) {
@@ -2303,7 +2303,7 @@ wg_ioctl_set(struct wg_softc *sc, struct
/* Remove peer and continue if specified */
if (peer_o.p_flags & WG_PEER_REMOVE) {
wg_peer_destroy(peer);
-   continue;
+   goto next_peer;
}
 
if (peer_o.p_flags & WG_PEER_HAS_ENDPOINT)
@@ -2332,6 +2332,11 @@ wg_ioctl_set(struct wg_softc *sc, struct
aip_p++;
}
 
+   peer_p = (struct wg_peer_io *)aip_p;
+   continue;
+next_peer:
+   aip_p = _p->p_aips[0];
+   aip_p += peer_o.p_aips_count;
peer_p = (struct wg_peer_io *)aip_p;
}
 



Re: pipex(4)/npppd(8): remove dummy PIPEX{G,S}MODE ioctl(2) calls

2021-01-02 Thread YASUOKA Masahiko
Yes,

ok yasuoka

On Wed, 30 Dec 2020 03:02:55 +0300
Vitaliy Makkoveev  wrote:
> This time pipex(4) related ioctl(2) calls PIPEX{S,G}MODE are pretty 
> dummy and were kept for backward compatibility reasons. The diff below
> removes them.
> 
> ok?
> 
> Index: share/man/man4/pipex.4
> ===
> RCS file: /cvs/src/share/man/man4/pipex.4,v
> retrieving revision 1.13
> diff -u -p -r1.13 pipex.4
> --- share/man/man4/pipex.49 Aug 2020 14:35:31 -   1.13
> +++ share/man/man4/pipex.429 Dec 2020 23:51:57 -
> @@ -57,20 +57,6 @@ or
>  devices.
>  The added requests are as follows:
>  .Bl -tag -width Ds
> -.It Dv PIPEXGMODEFa "int *"
> -Get the devices's
> -.Nm
> -operation mode.
> -1 to enable
> -.Nm
> -on this device; 0 to disable.
> -.It Dv PIPEXSMODEFa "int *"
> -Set the device's
> -.Nm
> -operation mode.
> -1 to enable
> -.Nm
> -on this device; 0 to disable.
>  .It Dv PIPEXASESSION Fa "struct pipex_session_req *"
>  Add a new PPP session to be handled by
>  .Nm .
> Index: sys/net/pipex.c
> ===
> RCS file: /cvs/src/sys/net/pipex.c,v
> retrieving revision 1.127
> diff -u -p -r1.127 pipex.c
> --- sys/net/pipex.c   30 Aug 2020 19:48:16 -  1.127
> +++ sys/net/pipex.c   29 Dec 2020 23:51:59 -
> @@ -163,13 +163,6 @@ pipex_ioctl(void *ownersc, u_long cmd, c
>  
>   NET_ASSERT_LOCKED();
>   switch (cmd) {
> - case PIPEXSMODE:
> - break;
> -
> - case PIPEXGMODE:
> - *(int *)data = 1;
> - break;
> -
>   case PIPEXCSESSION:
>   ret = pipex_config_session(
>   (struct pipex_session_config_req *)data, ownersc);
> Index: sys/net/pipex.h
> ===
> RCS file: /cvs/src/sys/net/pipex.h,v
> retrieving revision 1.28
> diff -u -p -r1.28 pipex.h
> --- sys/net/pipex.h   27 Aug 2020 10:47:52 -  1.28
> +++ sys/net/pipex.h   29 Dec 2020 23:51:59 -
> @@ -165,8 +165,6 @@ struct pipex_session_descr_req {
>  
>  
>  /* PIPEX ioctls */
> -#define PIPEXSMODE   _IOW ('p',  1, int)
> -#define PIPEXGMODE   _IOR ('p',  2, int)
>  #define PIPEXASESSION_IOW ('p',  3, struct pipex_session_req)
>  #define PIPEXDSESSION_IOWR('p',  4, struct pipex_session_close_req)
>  #define PIPEXCSESSION_IOW ('p',  5, struct pipex_session_config_req)
> Index: usr.sbin/npppd/npppd/npppd_iface.c
> ===
> RCS file: /cvs/src/usr.sbin/npppd/npppd/npppd_iface.c,v
> retrieving revision 1.13
> diff -u -p -r1.13 npppd_iface.c
> --- usr.sbin/npppd/npppd/npppd_iface.c5 Dec 2015 16:10:31 -   
> 1.13
> +++ usr.sbin/npppd/npppd/npppd_iface.c29 Dec 2020 23:52:00 -
> @@ -96,11 +96,6 @@ static void  npppd_iface_io_event_handle
>  static int   npppd_iface_log (npppd_iface *, int, const char *, ...)
>   __printflike(3,4);
>  
> -#ifdef USE_NPPPD_PIPEX
> -static int npppd_iface_pipex_enable(npppd_iface *_this);
> -static int npppd_iface_pipex_disable(npppd_iface *_this);
> -#endif /* USE_NPPPD_PIPEX */
> -
>  
>  /** initialize npppd_iface */
>  void
> @@ -311,12 +306,7 @@ npppd_iface_start(npppd_iface *_this)
>   goto fail;
>   }
>  
> -#ifdef USE_NPPPD_PIPEX
> - if (npppd_iface_pipex_enable(_this) != 0) {
> - log_printf(LOG_WARNING,
> - "npppd_iface_pipex_enable() failed: %m");
> - }
> -#else
> +#ifndef USE_NPPPD_PIPEX
>   if (_this->using_pppx) {
>   npppd_iface_log(_this, LOG_ERR,
>   "pipex is required when using pppx interface");
> @@ -358,13 +348,6 @@ npppd_iface_stop(npppd_iface *_this)
>   in_host_route_delete(&_this->ip4addr, );
>   }
>   if (_this->devf >= 0) {
> -#ifdef USE_NPPPD_PIPEX
> - if (npppd_iface_pipex_disable(_this) != 0) {
> - log_printf(LOG_CRIT,
> - "npppd_iface_pipex_disable() failed: %m");
> - }
> -#endif /* USE_NPPPD_PIPEX */
> -
>   event_del(&_this->ev);
>   close(_this->devf);
>   npppd_iface_log(_this, LOG_INFO, "Stopped");
> @@ -381,32 +364,6 @@ npppd_iface_fini(npppd_iface *_this)
>   NPPPD_IFACE_ASSERT(_this != NULL);
>   _this->initialized = 0;
>  }
> -
> -
> -/***
> - * PIPEX related functions
> - ***/
> -#ifdef USE_NPPPD_PIPEX
> -
> -/** enable PIPEX on PPPAC interface */
> -int
> -npppd_iface_pipex_enable(npppd_iface *_this)
> -{
> - int enable = 1;
> -
> - return ioctl(_this->devf, PIPEXSMODE, );
> -}
> -
> -/** disable PIPEX on PPPAC interface */
> -int
> -npppd_iface_pipex_disable(npppd_iface *_this)
> -{
> - int disable = 0;
> -
> - 

Re: diff: pfctl: error message for nonexisting rtable

2020-09-17 Thread YASUOKA Masahiko
the condition was reversed.

ok?
Index: parse.y
===
RCS file: /cvs/src/sbin/pfctl/parse.y,v
retrieving revision 1.702
diff -u -p -r1.702 parse.y
--- parse.y 17 Sep 2020 10:09:43 -  1.702
+++ parse.y 17 Sep 2020 14:23:42 -
@@ -1216,7 +1216,7 @@ antispoof_opt : LABEL label   {
if ($2 < 0 || $2 > RT_TABLEID_MAX) {
yyerror("invalid rtable id");
YYERROR;
-   } else if (lookup_rtable($2) >= 1) {
+   } else if (lookup_rtable($2) < 1) {
yyerror("rtable %lld does not exist", $2);
YYERROR;
}
@@ -2003,7 +2003,7 @@ filter_opt: USER uids {
if ($2 < 0 || $2 > RT_TABLEID_MAX) {
yyerror("invalid rtable id");
YYERROR;
-   } else if (lookup_rtable($2) >= 1) {
+   } else if (lookup_rtable($2) < 1) {
yyerror("rtable %lld does not exist", $2);
YYERROR;
}



Re: diff: pfctl: error message for nonexisting rtable

2020-09-17 Thread YASUOKA Masahiko
Hi,

I just committed yours.

Thanks,

On Wed, 16 Sep 2020 16:07:40 +0200
Klemens Nanni  wrote:
> On Wed, Sep 16, 2020 at 07:49:19PM +0900, YASUOKA Masahiko wrote:
>> New diff is using -1 for ENOENT.
>> 
>> Also domainid == 0 is a valid domain id, but previous diff cannot make
>> a cache of it since 0 is the default value.  So new diff is doing
>> 
>> -static u_int found[RT_TABLEID_MAX+1];
>> +static struct {
>> +int  found;
>> +int  domainid;
>> +}rtables[RT_TABLEID_MAX+1];
>> 
>> to distinguish the default 0 and domainid 0.
> This looks more complicated than it needs to be, but I also don't want
> to bikeshed it;  given that the parser is happy with this and we plan to
> remove this code alltogether anyway in the next release cycle:  OK kn.
> 
> Alternatively, here's a much simpler diff resembling what I had in mind.
> Feel free to commit this instead (with my OK), give me an OK for it or
> go ahead with yours.
> 
> It uses the same function and reflects the fact that every rdomain is a
> rtable but not every rtable is also a rdomain (your choice of `domainid'
> seems inconsistent with that).
> 
> Index: parse.y
> ===
> RCS file: /cvs/src/sbin/pfctl/parse.y,v
> retrieving revision 1.701
> diff -u -p -r1.701 parse.y
> --- parse.y   28 Jan 2020 15:40:35 -  1.701
> +++ parse.y   16 Sep 2020 13:58:23 -
> @@ -392,7 +392,7 @@ intinvalid_redirect(struct node_host *
>  u_int16_t parseicmpspec(char *, sa_family_t);
>  int   kw_casecmp(const void *, const void *);
>  int   map_tos(char *string, int *);
> -int   rdomain_exists(u_int);
> +int   lookup_rtable(u_int);
>  int   filteropts_to_rule(struct pf_rule *, struct filter_opts *);
>  
>  TAILQ_HEAD(loadanchorshead, loadanchors)
> @@ -1216,6 +1216,9 @@ antispoof_opt   : LABEL label   {
>   if ($2 < 0 || $2 > RT_TABLEID_MAX) {
>   yyerror("invalid rtable id");
>   YYERROR;
> + } else if (lookup_rtable($2) >= 1) {
> + yyerror("rtable %lld does not exist", $2);
> + YYERROR;
>   }
>   antispoof_opts.rtableid = $2;
>   }
> @@ -2000,6 +2003,9 @@ filter_opt  : USER uids {
>   if ($2 < 0 || $2 > RT_TABLEID_MAX) {
>   yyerror("invalid rtable id");
>   YYERROR;
> + } else if (lookup_rtable($2) >= 1) {
> + yyerror("rtable %lld does not exist", $2);
> + YYERROR;
>   }
>   filter_opts.rtableid = $2;
>   }
> @@ -2475,7 +2481,7 @@ if_item : STRING{
>   | RDOMAIN NUMBER{
>   if ($2 < 0 || $2 > RT_TABLEID_MAX)
>   yyerror("rdomain %lld outside range", $2);
> - else if (rdomain_exists($2) != 1)
> + else if (lookup_rtable($2) != 2)
>   yyerror("rdomain %lld does not exist", $2);
>  
>   $$ = calloc(1, sizeof(struct node_if));
> @@ -5868,37 +5874,38 @@ map_tos(char *s, int *val)
>  }
>  
>  int
> -rdomain_exists(u_int rdomain)
> +lookup_rtable(u_int rtableid)
>  {
>   size_t   len;
>   struct rt_tableinfo  info;
>   int  mib[6];
>   static u_int found[RT_TABLEID_MAX+1];
>  
> - if (found[rdomain] == 1)
> - return 1;
> + if (found[rtableid])
> + return found[rtableid];
>  
>   mib[0] = CTL_NET;
>   mib[1] = PF_ROUTE;
>   mib[2] = 0;
>   mib[3] = 0;
>   mib[4] = NET_RT_TABLE;
> - mib[5] = rdomain;
> + mib[5] = rtableid;
>  
>   len = sizeof(info);
>   if (sysctl(mib, 6, , , NULL, 0) == -1) {
>   if (errno == ENOENT) {
>   /* table nonexistent */
> + found[rtableid] = 0;
>   return 0;
>   }
>   err(1, "%s", __func__);
>   }
> - if (info.rti_domainid == rdomain) {
> - found[rdomain] = 1;
> - return 1;
> + if (info.rti_domainid == rtableid) {
> + found[rtableid] = 2;
> + return 2;
>   }
> - /* rdomain is a table, but not an rdomain */
> - return 0;
> + found[rtableid] = 1;
> + return 1;
>  }
>  
>  int



Re: diff: pfctl: error message for nonexisting rtable

2020-09-16 Thread YASUOKA Masahiko
Hi,

On Wed, 16 Sep 2020 12:04:55 +0200
Klemens Nanni  wrote:
> Using the function verb would reads a bit clearer/more intuitive,
> i.e.

Yes, "if (!rtable_exists($2))" seems better.

>> @@ -5887,17 +5897,37 @@ rdomain_exists(u_int rdomain)
>>  
>>  len = sizeof(info);
>>  if (sysctl(mib, 6, , , NULL, 0) == -1) {
>> -if (errno == ENOENT) {
>> +if (errno == ENOENT)
>>  /* table nonexistent */
>> -return 0;
>> -}
>> -err(1, "%s", __func__);
>> -}
>> -if (info.rti_domainid == rdomain) {
>> -found[rdomain] = 1;
>> +domainid[rdomain] = RT_TABLEID_MAX;
> This does not look correct, RT_TABLEID_MAX (255) is the biggest *valid*
> id, so you cannot use it to denote a nonexistent routing table.

Good catch.  Thanks,

> Perhaps use `static int domainid[RT_TABLEID_MAX+1]' and `-1' to reflect
> ENOENT?

New diff is using -1 for ENOENT.

Also domainid == 0 is a valid domain id, but previous diff cannot make
a cache of it since 0 is the default value.  So new diff is doing

-   static u_int found[RT_TABLEID_MAX+1];
+   static struct {
+   int  found;
+   int  domainid;
+   }rtables[RT_TABLEID_MAX+1];

to distinguish the default 0 and domainid 0.

ok?


Make pfctl check if the rtable really exists when parsing the config.

Index: sbin/pfctl/parse.y
===
RCS file: /cvs/src/sbin/pfctl/parse.y,v
retrieving revision 1.701
diff -u -p -r1.701 parse.y
--- sbin/pfctl/parse.y  28 Jan 2020 15:40:35 -  1.701
+++ sbin/pfctl/parse.y  16 Sep 2020 10:40:25 -
@@ -392,7 +392,9 @@ int  invalid_redirect(struct node_host *
 u_int16_t parseicmpspec(char *, sa_family_t);
 int kw_casecmp(const void *, const void *);
 int map_tos(char *string, int *);
+int get_domainid(u_int);
 int rdomain_exists(u_int);
+int rtable_exists(u_int);
 int filteropts_to_rule(struct pf_rule *, struct filter_opts *);
 
 TAILQ_HEAD(loadanchorshead, loadanchors)
@@ -1217,6 +1219,10 @@ antispoof_opt: LABEL label   {
yyerror("invalid rtable id");
YYERROR;
}
+   else if (!rtable_exists($2)) {
+   yyerror("rtable %lld does not exist", $2);
+   YYERROR;
+   }
antispoof_opts.rtableid = $2;
}
;
@@ -2001,6 +2007,10 @@ filter_opt   : USER uids {
yyerror("invalid rtable id");
YYERROR;
}
+   else if (!rtable_exists($2)) {
+   yyerror("rtable %lld does not exist", $2);
+   YYERROR;
+   }
filter_opts.rtableid = $2;
}
| DIVERTTO STRING PORT portplain {
@@ -2475,7 +2485,7 @@ if_item   : STRING{
| RDOMAIN NUMBER{
if ($2 < 0 || $2 > RT_TABLEID_MAX)
yyerror("rdomain %lld outside range", $2);
-   else if (rdomain_exists($2) != 1)
+   else if (!rdomain_exists($2))
yyerror("rdomain %lld does not exist", $2);
 
$$ = calloc(1, sizeof(struct node_if));
@@ -5868,36 +5878,60 @@ map_tos(char *s, int *val)
 }
 
 int
-rdomain_exists(u_int rdomain)
+get_domainid(u_int rtable)
 {
size_t   len;
struct rt_tableinfo  info;
int  mib[6];
-   static u_int found[RT_TABLEID_MAX+1];
+   static struct {
+   int  found;
+   int  domainid;
+   }rtables[RT_TABLEID_MAX+1];
 
-   if (found[rdomain] == 1)
-   return 1;
+   if (rtables[rtable].found)
+   return rtables[rtable].domainid;
 
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
mib[2] = 0;
mib[3] = 0;
mib[4] = NET_RT_TABLE;
-   mib[5] = rdomain;
+   mib[5] = rtable;
 
len = sizeof(info);
if (sysctl(mib, 6, , , NULL, 0) == -1) {
-   if (errno == ENOENT) {
+   if (errno == ENOENT)
/* table nonexistent */
-   return 0;
-   }
-   err(1, "%s", __func__);
-   }
-   if (info.rti_domainid == rdomain) {
-   found[rdomain] = 1;
+   rtables[rtable].domainid = -1;
+   else
+   err(1, "%s", __func__);
+   } else
+   rtables[rtable].domainid = info.rti_domainid;
+   

Re: diff: pfctl: error message for nonexisting rtable

2020-09-16 Thread YASUOKA Masahiko
Hi,

So, it seems we need to more code and test for pf(4) part.

Let me continue this separetely.

On Mon, 14 Sep 2020 11:07:53 +0200
Klemens Nanni  wrote:
> On Mon, Sep 14, 2020 at 02:09:27PM +0900, YASUOKA Masahiko wrote:
>> Make pfctl check if the rtable really exists when parsing the config.
> I concur, but you can do this with less (duplicated) code.
> 
> Instead of copying rdomain_exists() into rtable_exists() with the
> `rti_domainid' check omitted, tweak (and rename) rdomain_exists() into
> returning the information whether the given ID is just an rtable.
> 
> rdomain_exists() merges the "invalid id" and "id is an rtable but not
> an rdmomain" cases - make those separate return codes, check/adjust
> existing callers and use it for your new checks.

Yes, I could reduce the code.  Thanks,

ok?


Make pfctl check if the rtable really exists when parsing the config.

Index: sbin/pfctl/parse.y
===
RCS file: /cvs/src/sbin/pfctl/parse.y,v
retrieving revision 1.701
diff -u -p -r1.701 parse.y
--- sbin/pfctl/parse.y  28 Jan 2020 15:40:35 -  1.701
+++ sbin/pfctl/parse.y  16 Sep 2020 09:11:21 -
@@ -392,7 +392,9 @@ int  invalid_redirect(struct node_host *
 u_int16_t parseicmpspec(char *, sa_family_t);
 int kw_casecmp(const void *, const void *);
 int map_tos(char *string, int *);
+int get_domainid(u_int);
 int rdomain_exists(u_int);
+int rtable_exists(u_int);
 int filteropts_to_rule(struct pf_rule *, struct filter_opts *);
 
 TAILQ_HEAD(loadanchorshead, loadanchors)
@@ -1217,6 +1219,10 @@ antispoof_opt: LABEL label   {
yyerror("invalid rtable id");
YYERROR;
}
+   else if (rtable_exists($2) != 1) {
+   yyerror("rtable %lld does not exist", $2);
+   YYERROR;
+   }
antispoof_opts.rtableid = $2;
}
;
@@ -2001,6 +2007,10 @@ filter_opt   : USER uids {
yyerror("invalid rtable id");
YYERROR;
}
+   else if (rtable_exists($2) != 1) {
+   yyerror("rtable %lld does not exist", $2);
+   YYERROR;
+   }
filter_opts.rtableid = $2;
}
| DIVERTTO STRING PORT portplain {
@@ -5868,15 +5878,15 @@ map_tos(char *s, int *val)
 }
 
 int
-rdomain_exists(u_int rdomain)
+get_domainid(u_int rdomain)
 {
size_t   len;
struct rt_tableinfo  info;
int  mib[6];
-   static u_int found[RT_TABLEID_MAX+1];
+   static u_int domainid[RT_TABLEID_MAX+1];
 
-   if (found[rdomain] == 1)
-   return 1;
+   if (domainid[rdomain] != 0)
+   return domainid[rdomain];
 
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
@@ -5887,17 +5897,37 @@ rdomain_exists(u_int rdomain)
 
len = sizeof(info);
if (sysctl(mib, 6, , , NULL, 0) == -1) {
-   if (errno == ENOENT) {
+   if (errno == ENOENT)
/* table nonexistent */
-   return 0;
-   }
-   err(1, "%s", __func__);
-   }
-   if (info.rti_domainid == rdomain) {
-   found[rdomain] = 1;
+   domainid[rdomain] = RT_TABLEID_MAX;
+   else
+   err(1, "%s", __func__);
+   } else
+   domainid[rdomain] = info.rti_domainid;
+
+   return domainid[rdomain];
+}
+
+int
+rdomain_exists(u_int rdomain)
+{
+   int domainid;
+
+   domainid = get_domainid(rdomain);
+   if (domainid == rdomain)
return 1;
-   }
/* rdomain is a table, but not an rdomain */
+   return 0;
+}
+
+int
+rtable_exists(u_int rtable)
+{
+   int domainid;
+
+   domainid = get_domainid(rtable);
+   if (domainid < RT_TABLEID_MAX)
+   return 1;
return 0;
 }
 



  1   2   3   4   >