Re: [External] : [ICMP] IP options lead to malformed reply

2021-03-24 Thread Alexandr Nedvedicky
Hello,



> We really need to fix ip_send() such the output task will handle IP options
> properly.

took a look at bug reported by Dominik earlier today. Looks like
there are two issues:

1) ip_insertoptions() does not update length of IP header (ip_hl)

2) ip_hl is being overridden anyway later in ip_output() called
   by ip_send_dispatch() to send ICMP error packet out. Looks
   like ip_send_dispatch() should use IP_RAWOUTPUT flag so
   ip_hl won't get overridden.

Diff below fixes both issues. We still have no good story when
ip_insertoptions() fails. I'll send another diff later this week.


diff below makes 'nping --ip-options T ...' to work. OK?

thanks and
regards
sashan

8<---8<---8<--8<
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index 0ec3f723be4..234c798e7d4 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -1789,7 +1789,7 @@ ip_send_dispatch(void *xmq)
 
NET_LOCK();
while ((m = ml_dequeue()) != NULL) {
-   ip_output(m, NULL, NULL, 0, NULL, NULL, 0);
+   ip_output(m, NULL, NULL, IP_RAWOUTPUT, NULL, NULL, 0);
}
NET_UNLOCK();
 }
diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c
index c01a3e7803c..ea803077304 100644
--- a/sys/netinet/ip_output.c
+++ b/sys/netinet/ip_output.c
@@ -765,6 +765,11 @@ ip_insertoptions(struct mbuf *m, struct mbuf *opt, int 
*phlen)
optlen = opt->m_len - sizeof(p->ipopt_dst);
if (optlen + ntohs(ip->ip_len) > IP_MAXPACKET)
return (m); /* XXX should fail */
+
+   /* check if options will fit to IP header */
+   if ((optlen + (ip->ip_hl << 2)) > (0x0f << 2))
+   return (m);
+
if (p->ipopt_dst.s_addr)
ip->ip_dst = p->ipopt_dst;
if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
@@ -790,6 +795,7 @@ ip_insertoptions(struct mbuf *m, struct mbuf *opt, int 
*phlen)
memcpy(ip + 1, p->ipopt_list, optlen);
*phlen = sizeof(struct ip) + optlen;
ip->ip_len = htons(ntohs(ip->ip_len) + optlen);
+   ip->ip_hl += (optlen >> 2);
return (m);
 }
 



httpd: Allow overriding global "no index"

2021-03-24 Thread Quentin Rameau
Hello,

It's been noted that the "directory no index" configuration
always shadow any other directory index configuration,
for example:

directory no index
location "/foo/" {
directory auto index
}

Even when requesting /foo/,
the global no index option
overrides the location-specific auto index.

This looks a bit too restrictive
and contrary to having location-specific options.

Here's a patch proposal modifying the current behaviour.
Note that if multiple contradictory options
are defined at the same level,
the most restrictive option, no index,
will still have priority over the others.

What do you think?

Index: config.c
===
RCS file: /var/cvs/src/usr.sbin/httpd/config.c,v
retrieving revision 1.61
diff -u -p -u -r1.61 config.c
--- config.c21 Sep 2020 09:42:07 -  1.61
+++ config.c24 Mar 2021 21:26:39 -
@@ -451,7 +451,7 @@ config_getserver_config(struct httpd *en
 #endif
struct server_config*srv_conf, *parent;
uint8_t *p = imsg->data;
-   unsigned int f;
+   unsigned int f, fo;
size_t   s;
 
if ((srv_conf = calloc(1, sizeof(*srv_conf))) == NULL)
@@ -489,14 +489,19 @@ config_getserver_config(struct httpd *en
/* Inherit configuration from the parent */
f = SRVFLAG_INDEX|SRVFLAG_NO_INDEX;
if ((srv_conf->flags & f) == 0) {
-   srv_conf->flags |= parent->flags & f;
+   fo = SRVFLAG_AUTO_INDEX|SRVFLAG_NO_AUTO_INDEX;
+   if ((srv_conf->flags & fo) == 0)
+   srv_conf->flags |= parent->flags & f;
(void)strlcpy(srv_conf->index, parent->index,
sizeof(srv_conf->index));
}
 
f = SRVFLAG_AUTO_INDEX|SRVFLAG_NO_AUTO_INDEX;
-   if ((srv_conf->flags & f) == 0)
-   srv_conf->flags |= parent->flags & f;
+   if ((srv_conf->flags & f) == 0) {
+   fo = SRVFLAG_INDEX|SRVFLAG_NO_INDEX;
+   if ((srv_conf->flags & fo) == 0)
+   srv_conf->flags |= parent->flags & f;
+   }
 
f = SRVFLAG_ROOT;
if ((srv_conf->flags & f) == 0) {



Re: arm64: make cwfg(4) report battery information to apm(4)

2021-03-24 Thread Jeremie Courreges-Anglas
On Wed, Mar 24 2021, Klemens Nanni  wrote:
> On Wed, Mar 24, 2021 at 12:49:51AM +0100, Jeremie Courreges-Anglas wrote:
>  
>> >> > @@ -202,6 +217,12 @@ cwfg_attach(struct device *parent, struc
>> >> >  
>> >> > sensor_task_register(sc, cwfg_update_sensors, 5);
>> >> >  
>> >> > +#if NAPM > 0
>> >> > +   /* make sure we have the apm state initialized before apm 
>> >> > attaches */
>> >> > +   cwfg_update_sensors(sc);
>> >> > +   apm_setinfohook(cwfg_apminfo);
>> >> > +#endif
>> >> > +
>> >> > printf("\n");
>> >> >  }
>> >> >  
>> >> > @@ -324,6 +345,7 @@ cwfg_update_sensors(void *arg)
>> >> > uint32_t vcell, rtt, tmp;
>> >> > uint8_t val;
>> >> > int error, n;
>> >> > +   u_char bat_percent;
>> >> >  
>> >> > if ((error = cwfg_lock(sc)) != 0)
>> >> > return;
>> >> > @@ -348,6 +370,7 @@ cwfg_update_sensors(void *arg)
>> >> > if ((error = cwfg_read(sc, SOC_HI_REG, )) != 0)
>> >> > goto done;
>> >> > if (val != 0xff) {
>> >> > +   bat_percent = val;
>> >> > sc->sc_sensor[CWFG_SENSOR_SOC].value = val * 1000;
>> >> > sc->sc_sensor[CWFG_SENSOR_SOC].flags &= 
>> >> > ~SENSOR_FINVALID;
>> >> > }
>> >> 
>> >> If val == 0xff bat_percent is unitialized.  Note that in this error
>> >> condition the sensors code leaves sc->sc_sensor[CWFG_SENSOR_SOC].value
>> >> alone.
>> > Oops.  Both `val' and `rtt' can be "useless" and we could end up with
>> > a partially updated `cwfg_power'.
>> 
>> With the current code, rtt is always meaningful.  I was really talking
>> only about bat_percent being possibly uninitialized.
> Yes, `rtt' always means something but the code does not always populate
> the struct member with it from which I read unconditionally - this is
> what I meant.
>
> Hence, instead of always reading from it and possibly getting a stale
> value, the second diff makes sure that each update cycle resets every
> value and only updates those that come from a fresh reading.
>
>> > If we always reset all values up front and then update whatever is
>> > possible, we can avoid the intermediate variable completely and end up
>> > with `cwfg_power' providing consistent readings.
>> 
>> Except if reading one register fails and we take the goto done shortcut.
>> In that case sensors and apm get out of sync, which is bad IMHO.
> My intention was that I'd rather provide an all reset struct with a
> single value updated that is known to be good rather than lazily
> updating whatever is possible.
>
>> >> So to keep apm and sensors in sync I think it would be better to reuse
>> >> the cached sc->sc_sensor[CWFG_SENSOR_SOC].value.
>> > I did `sc->sc_sensor[CWFG_SENSOR_SOC].value / 1000' first but ended up
>> > with bogus values, hence the `bat_percent' buffer to avoid arithmetics.
>> >
>> >
>> >> I don't know whether the error condition mentioned above happens
>> >> frequently with this driver.  Reporting APM_BATT_ABSENT (and similar for
>> >> sensors) would be useful, and could be done in a subsequent step.
>> > But that isn't the case?  From apm(4):
>> >
>> >  APM_BATTERY_ABSENT
>> >   No battery installed.
>> >
>> > The driver just can't tell us enough, but the battery is still there
>> > (we get good percentage/liftime readings), so
>> >
>> >  APM_BATT_UNKNOWN
>> >   Cannot read the current battery state.
>> >
>> > is only appropiate here imho.
>> >
>> >
>> >> What's the underlying hardware, does it involve a pluggable battery?
>> > Nope, Pinebook Pro with one internal battery and AC.
>> 
>> Since the battery can't be removed in your pinebook, then obviously
>> APM_BATTERY_ABSENT isn't very meaningful.  But maybe the CellWise
>> hardware supports pluggable batteries.  Anyway...
>> 
>> The diff below is mostly your diff, except:
>> - statically initialize cwfg_power with proper values.  That way there's
>>   a reason less for initializing it early by forcefully calling
>>   cwfg_update_sensors().
> I like that, thanks.
>
>> - only update cwfg_power when we already fetched new values for sensors.
> As outlined above, I'd like to avoid this approach.
>
> Let's assume the sensor works fine and all values are read correctly,
> then apm shows something like this:
>
>   Battery state: high, 78% remaining, 323 minutes life estimate
>
> Now if for whatever reason in cwfg_update_sensors() only readings for
> RTT (battery remaining minutes) start failing, your diff would keep
> updating SOC (battery percent) without touching RTT, so apm would trend
> like so
>
>   Battery state: high, 70% remaining, 323 minutes life estimate
>   Battery state: high, 50% remaining, 323 minutes life estimate
>
> Or not?  To deal with this I reset everything to invalid/unknown, such
> that in the above scenario apm would trend like so
>
>   Battery state: high, 78% remaining, 323 minutes life 

Re: Huawei ME906s-158 LTE, cdce(4) vs umb(4)

2021-03-24 Thread Stuart Henderson
On 2021/03/25 00:30, Patrick Wildt wrote:
> Without having looked at anything, it might be worth looking at the most
> recent mail in this thread:
> 
> 'Re: [PATCH] umb(4) fix for X20 (DW5821e) in Dell Latitude 7300'
> 

oh, usb runs through all drivers looking for a VID/PID match before
then running through all looking for a class match? I didn't realise
(thought it would try driver-by-driver with first VID/PID then class)
but that does make sense.

Updated diff below adding my pid/vid and tweaking some comments. My card
attaches to umb with this. I've only added it commented-out to the manual
for now until I see it actually pass traffic.

So this fixes Bryan's, at least improves mine (will look for another
sim / sim-adapter tomorrow), and seems targetted enough to not risk
fallout with existing working devices.

I think this makes sense to commit. OK with me if you'd like to commit
it Gerhard (I think it was your diff originally).

I added this product to usbdevs as a short string; the other Huawei
devices all say "HUAWEI Mobile xyz" rather than just "xyz" in the device
string which I think should be trimmed as well, probably worth doing
that on top.


Index: share/man/man4/umb.4
===
RCS file: /cvs/src/share/man/man4/umb.4,v
retrieving revision 1.11
diff -u -p -r1.11 umb.4
--- share/man/man4/umb.412 May 2020 13:03:52 -  1.11
+++ share/man/man4/umb.425 Mar 2021 00:03:58 -
@@ -44,8 +44,10 @@ PIN again even if the system was reboote
 The following devices should work:
 .Pp
 .Bl -tag -width Ds -offset indent -compact
+.It Dell DW5821e
 .It Ericsson H5321gw and N5321gw
 .It Fibocom L831-EAU
+.\" .It Huawei ME906s -- attaches but may need more work
 .It Medion Mobile S4222 (MediaTek OEM)
 .It Sierra Wireless EM7345
 .It Sierra Wireless EM7455
Index: sys/dev/usb/if_umb.c
===
RCS file: /cvs/src/sys/dev/usb/if_umb.c,v
retrieving revision 1.37
diff -u -p -r1.37 if_umb.c
--- sys/dev/usb/if_umb.c29 Jan 2021 17:06:19 -  1.37
+++ sys/dev/usb/if_umb.c24 Mar 2021 23:52:13 -
@@ -225,6 +225,28 @@ const struct cfattach umb_ca = {
 int umb_delay = 4000;
 
 /*
+ * Normally, MBIM devices are detected by their interface class and subclass.
+ * But for some models that have multiple configurations, it is better to
+ * match by vendor and product id so that we can select the desired
+ * configuration ourselves, e.g. to override a class-based match to another
+ * driver.
+ *
+ * OTOH, some devices identify themselves as an MBIM device but fail to speak
+ * the MBIM protocol.
+ */
+struct umb_products {
+   struct usb_devno dev;
+   int  confno;
+};
+const struct umb_products umb_devs[] = {
+   { { USB_VENDOR_DELL, USB_PRODUCT_DELL_DW5821E }, 2 },
+   { { USB_VENDOR_HUAWEI, USB_PRODUCT_HUAWEI_ME906S }, 3 },
+};
+
+#define umb_lookup(vid, pid)   \
+   ((const struct umb_products *)usb_lookup(umb_devs, vid, pid))
+
+/*
  * These devices require an "FCC Authentication" command.
  */
 const struct usb_devno umb_fccauth_devs[] = {
@@ -263,6 +285,8 @@ umb_match(struct device *parent, void *m
struct usb_attach_arg *uaa = aux;
usb_interface_descriptor_t *id;
 
+   if (umb_lookup(uaa->vendor, uaa->product) != NULL)
+   return UMATCH_VENDOR_PRODUCT;
if (!uaa->iface)
return UMATCH_NONE;
if ((id = usbd_get_interface_descriptor(uaa->iface)) == NULL)
@@ -315,6 +339,43 @@ umb_attach(struct device *parent, struct
sc->sc_ctrl_ifaceno = uaa->ifaceno;
ml_init(>sc_tx_ml);
 
+   if (uaa->configno < 0) {
+   /*
+* In case the device was matched by VID/PID instead of
+* InterfaceClass/InterfaceSubClass, we have to pick the
+* correct configuration ourself.
+*/
+   uaa->configno = umb_lookup(uaa->vendor, uaa->product)->confno;
+   DPRINTF("%s: switching to config #%d\n", DEVNAM(sc),
+   uaa->configno);
+   status = usbd_set_config_no(sc->sc_udev, uaa->configno, 1);
+   if (status) {
+   printf("%s: failed to switch to config #%d: %s\n",
+   DEVNAM(sc), uaa->configno, usbd_errstr(status));
+   goto fail;
+   }
+   usbd_delay_ms(sc->sc_udev, 200);
+
+   /*
+* Need to do some manual setups that usbd_probe_and_attach()
+* would do for us otherwise.
+*/
+   uaa->nifaces = uaa->device->cdesc->bNumInterfaces;
+   for (i = 0; i < uaa->nifaces; i++) {
+   if (usbd_iface_claimed(sc->sc_udev, i))
+   continue;
+   id = 
usbd_get_interface_descriptor(>device->ifaces[i]);
+   

Re: Huawei ME906s-158 LTE, cdce(4) vs umb(4)

2021-03-24 Thread Patrick Wildt
On Wed, Mar 24, 2021 at 11:23:11PM +, Stuart Henderson wrote:
> In my ongoing search to find a umb(4) that will actually work that
> isn't the one in my laptop (since my EM7305 has been a failure),
> I picked up a Huawei ME906s-158 off ebay. It attaches to cdce and ugen
> and fails to work:
> 
> cdce0 at uhub2 port 4 configuration 2 interface 0 "Huawei Technologies Co., 
> Ltd. HUAWEI Mobile" rev 2.10/1.02 addr 3
> cdce0: could not find data bulk in
> ugen0 at uhub2 port 4 configuration 2 "Huawei Technologies Co., Ltd. HUAWEI 
> Mobile" rev 2.10/1.02 addr 3
> 
> Any information I can find for it suggests that it does MBIM, and
> indeed if I disable cdce in the kernel it picks up on a different
> config:
> 
> umb0 at uhub2 port 4 configuration 3 interface 0 "Huawei Technologies Co., 
> Ltd. HUAWEI Mobile" rev 2.10/1.02 addr 3
> ugen0 at uhub2 port 4 configuration 3 "Huawei Technologies Co., Ltd. HUAWEI 
> Mobile" rev 2.10/1.02 addr 3
> 
> and after I figured out which of the APU2's mPCIe slots is routed to
> the SIM slot (the middle one) it actually negotiates with the network
> 
> umb0: flags=8855 mtu 1500
> index 11 priority 6 llprio 3
> roaming disabled registration home network
> state up cell-class LTE rssi -83dBm speed 143Mbps up 143Mbps down
> SIM initialized PIN valid (3 attempts left)
> subscriber-id 234 ICC-id 8944xxx provider 3
> device ML1ME906SM IMEI 867 firmware 11.617.04.00.00
> phone# +44xx APN 3internet
> dns 217.171.132.0 217.171.135.0
> groups: egress
> status: active
> inet 94.197.84.2 --> 94.197.84.1 netmask 0xfffc
> 
> It doesn't seem to be working properly yet (packets transmitted over it
> don't arrive at the destination) but I'm not 100% convinced that it's
> not the network yet, I'll find some more SIMs to try.
> 
> In the meantime, any suggestions how to knock it out from attaching
> to cdce? Is there a way to drop out in the attach routine (e.g. if there's
> no data bulk in) to allow another driver to attempt attaching,
> or is it committed to a specific driver once it has matched?
> 
> If the latter, would it make sense for cdce to look for MBIM on
> another configuration and skip matching in that case? Or do (move
> or replicate) some of cdce_attach()'s checks in cdce_match so it
> can skip attaching if cdce isn't going to work?
> 
> There are loads of these showing up (probably from laptops broken for
> parts) for about 20 $currency_units on ebay/similar now so it would
> be quite nice to get them working. A few similar devices showed up
> on the lists before but I haven't noticed anyone trying to disable
> cdce on them yet.
> https://www.google.com/search?q=huawei+%22cdce0:+could+not+find+data+bulk+in%22+site:openbsd-archive.7691.n7.nabble.com
> 
> If anyone else is thinking of getting one to poke at, to use in an APU
> they also need a M.2 -> mPCIe adapter (aka NGFF -> mPCIe) with 'B' key
> (doesn't need a sim carrier on the adapter), plus whatever u.fl pigtails
> and antennas (the proper multiband LTE antennas usually have SMA
> connectors rather than the RP-SMA common with wifi).
> 
> Descriptors from lsusb below (seach for "HUAWEI Mobile Broadband Module"
> for the 'right' one).

Without having looked at anything, it might be worth looking at the most
recent mail in this thread:

'Re: [PATCH] umb(4) fix for X20 (DW5821e) in Dell Latitude 7300'



Huawei ME906s-158 LTE, cdce(4) vs umb(4)

2021-03-24 Thread Stuart Henderson
In my ongoing search to find a umb(4) that will actually work that
isn't the one in my laptop (since my EM7305 has been a failure),
I picked up a Huawei ME906s-158 off ebay. It attaches to cdce and ugen
and fails to work:

cdce0 at uhub2 port 4 configuration 2 interface 0 "Huawei Technologies Co., 
Ltd. HUAWEI Mobile" rev 2.10/1.02 addr 3
cdce0: could not find data bulk in
ugen0 at uhub2 port 4 configuration 2 "Huawei Technologies Co., Ltd. HUAWEI 
Mobile" rev 2.10/1.02 addr 3

Any information I can find for it suggests that it does MBIM, and
indeed if I disable cdce in the kernel it picks up on a different
config:

umb0 at uhub2 port 4 configuration 3 interface 0 "Huawei Technologies Co., Ltd. 
HUAWEI Mobile" rev 2.10/1.02 addr 3
ugen0 at uhub2 port 4 configuration 3 "Huawei Technologies Co., Ltd. HUAWEI 
Mobile" rev 2.10/1.02 addr 3

and after I figured out which of the APU2's mPCIe slots is routed to
the SIM slot (the middle one) it actually negotiates with the network

umb0: flags=8855 mtu 1500
index 11 priority 6 llprio 3
roaming disabled registration home network
state up cell-class LTE rssi -83dBm speed 143Mbps up 143Mbps down
SIM initialized PIN valid (3 attempts left)
subscriber-id 234 ICC-id 8944xxx provider 3
device ML1ME906SM IMEI 867 firmware 11.617.04.00.00
phone# +44xx APN 3internet
dns 217.171.132.0 217.171.135.0
groups: egress
status: active
inet 94.197.84.2 --> 94.197.84.1 netmask 0xfffc

It doesn't seem to be working properly yet (packets transmitted over it
don't arrive at the destination) but I'm not 100% convinced that it's
not the network yet, I'll find some more SIMs to try.

In the meantime, any suggestions how to knock it out from attaching
to cdce? Is there a way to drop out in the attach routine (e.g. if there's
no data bulk in) to allow another driver to attempt attaching,
or is it committed to a specific driver once it has matched?

If the latter, would it make sense for cdce to look for MBIM on
another configuration and skip matching in that case? Or do (move
or replicate) some of cdce_attach()'s checks in cdce_match so it
can skip attaching if cdce isn't going to work?

There are loads of these showing up (probably from laptops broken for
parts) for about 20 $currency_units on ebay/similar now so it would
be quite nice to get them working. A few similar devices showed up
on the lists before but I haven't noticed anyone trying to disable
cdce on them yet.
https://www.google.com/search?q=huawei+%22cdce0:+could+not+find+data+bulk+in%22+site:openbsd-archive.7691.n7.nabble.com

If anyone else is thinking of getting one to poke at, to use in an APU
they also need a M.2 -> mPCIe adapter (aka NGFF -> mPCIe) with 'B' key
(doesn't need a sim carrier on the adapter), plus whatever u.fl pigtails
and antennas (the proper multiband LTE antennas usually have SMA
connectors rather than the RP-SMA common with wifi).

Descriptors from lsusb below (seach for "HUAWEI Mobile Broadband Module"
for the 'right' one).


Bus 001 Device 003: ID 12d1:15c1 Huawei Technologies Co., Ltd.
Device Descriptor:
  bLength18
  bDescriptorType 1
  bcdUSB   2.10
  bDeviceClass0 (Defined at Interface level)
  bDeviceSubClass 0
  bDeviceProtocol   255
  bMaxPacketSize064
  idVendor   0x12d1 Huawei Technologies Co., Ltd.
  idProduct  0x15c1
  bcdDevice1.02
  iManufacturer   1 Huawei Technologies Co., Ltd.
  iProduct2 HUAWEI Mobile
  iSerial 3 0123456789ABCDEF

  bNumConfigurations  3


  Configuration Descriptor:
bLength 9
bDescriptorType 2
wTotalLength  301
bNumInterfaces  6
bConfigurationValue 1
iConfiguration  0
bmAttributes 0xa0
  (Bus Powered)
  Remote Wakeup
MaxPower2mA
Interface Descriptor:
  bLength 9
  bDescriptorType 4
  bInterfaceNumber0
  bAlternateSetting   0
  bNumEndpoints   3
  bInterfaceClass   255 Vendor Specific Class
  bInterfaceSubClass  6
  bInterfaceProtocol 16
  iInterface  7 Huawei Mobile Connect - Modem
  ** UNRECOGNIZED:  05 24 00 10 01
  ** UNRECOGNIZED:  04 24 02 02
  ** UNRECOGNIZED:  05 24 01 00 00
  ** UNRECOGNIZED:  05 24 06 00 00
  Endpoint Descriptor:
bLength 7
bDescriptorType 5
bEndpointAddress 0x82  EP 2 IN
bmAttributes3
  Transfer TypeInterrupt
  Synch Type   None
  Usage Type   Data
wMaxPacketSize 0x000a  1x 10 bytes
bInterval   9
  Endpoint Descriptor:
bLength 7

Re: libcrypto bio_cb.c: fix mangled debug output

2021-03-24 Thread Theo Buehler
On Wed, Mar 24, 2021 at 11:09:41AM +0100, Martin Vahlensieck wrote:
> Hi
> 
> This fixes mangled output from the openssl(1) -debug option:
> 
> Before:
> $ openssl aes-256-cbc -out test -debug
> BIO[0x9102a7e5ctrl(106) - FILE pointer
> BIO[0x9102a7e5ctrl return 1
> BIO[0x9102a801ctrl(108) - FILE pointer
> BIO[0x9102a801ctrl return 1
> ...
> 
> After:
> $ openssl aes-256-cbc -out test -debug
> BIO[0x5770f81ce00]:ctrl(106) - FILE pointer
> BIO[0x5770f81ce00]:ctrl return 1
> BIO[0x5770f81c200]:ctrl(108) - FILE pointer
> BIO[0x5770f81c200]:ctrl return 1
> BIO[0x5770f81c200]:write(0,8) - FILE pointer
> ...
> 
> The issue is that BIO_debug_callback(3) assumes that the pointer
> formatted with %p takes up 6 chars.

Thanks. Some comments inline.

> 
> Best,
> 
> Martin
> 
> Index: bio/bio_cb.c
> ===
> RCS file: /cvs/src/lib/libcrypto/bio/bio_cb.c,v
> retrieving revision 1.16
> diff -u -p -r1.16 bio_cb.c
> --- bio/bio_cb.c  8 Dec 2014 03:54:19 -   1.16
> +++ bio/bio_cb.c  24 Mar 2021 09:50:45 -
> @@ -70,15 +70,20 @@ BIO_debug_callback(BIO *bio, int cmd, co
>   BIO *b;
>   char buf[256];
>   char *p;
> + int nbuf;
>   long r = 1;
>   size_t p_maxlen;
>  
>   if (BIO_CB_RETURN & cmd)
>   r = ret;
>  
> - snprintf(buf, sizeof buf, "BIO[%p]:", bio);
> - p = &(buf[14]);
> - p_maxlen = sizeof buf - 14;
> + nbuf = snprintf(buf, sizeof buf, "BIO[%p]:", bio);
> +
> + if (nbuf > sizeof buf)
> + nbuf = sizeof buf;

While this is correct for the upper bound, it looks odd. Especially in
combination with [nbuf] which looks like an out-of-bounds access
(which happens to be ok since no actual access occurs later on).  If
snprintf fails, this is an actual out-of-bounds access.  I would try to
follow the canonical pattern: "if (nbuf < 0 || nbuf >= sizeof(buf))".

I'd spare the reader the checking and do:

if (nbuf < 0)
nbuf = 0;   /* Ignore error; continue printing. */
if (nbuf >= sizeof(buf))
goto out;

Where "out" is a label after the switch.

> +
> + p = &(buf[nbuf]);

I would at least drop the parentheses and probably write "p = buf + nbuf;".

> + p_maxlen = sizeof buf - nbuf;

Please add parentheses around buf.

>   switch (cmd) {
>   case BIO_CB_FREE:
>   snprintf(p, p_maxlen, "Free - %s\n", bio->method->name);
> 



vmd(8): fix packet handling for dhcpleased(8)

2021-03-24 Thread Dave Voutila
tech@,

While migrating an existing -current vm to use dhcpleased(8), one of the
issues discovered was the dhcp/bootp handling built into vmd(8) for
local interfaces was improperly missing packets sent to the ethernet
address of the host-side tap(4) device. (vmd(8) was only looking for
broadcast packets.)

The following diff:
- includes the host-side tap(4) lladdr in the packet filtering logic for
  intercepting vio(4) dhcp/bootp communication
- removes a conditional check (dhcpsz == 0) pointed out by deraadt@ that
  could contribute to missed packets while iterating through the vionet
  tx ring

Because of vmd(8)'s priv-sep design, the approach taken is to pass a
duplicate of the opened tap(4) fd to the "priv" process that is
unpledged and responsible for setting up networking. A response imsg
travels between processes, being forwarded until it gets to the intended
vm process.

For those looking to test the diff, some steps to follow are:

0. Don't apply the patch yet.
1. Use an OpenBSD guest running a recent snapshot that has the latest
dhcpleased(8)[1] and dhcpleasectl(8)[2] changes committed by florian@.
2. Configure the guest for using dhcpleased(8), ideally via
/etc/hostname.vio0 containing:

  inet autoconf
  up

3. Ensure you get an IP assigned from vmd(8) after boot.
4. Check the lease with `dhcpleasectl show interface vio0`. It should
   report [Bound] and have a _very_ long lease time.
5. Force a renewal with `dhcpleasectl send request vio0`.
6. Check again like in step 4. It will be "stuck" in a [Renewing] state.
7. Shut down the guest, vmd(8), and apply the patch.
8. Repeat steps 4-6, but the guest should no longer be "stuck" in
   [Renewing] and should report [Bound] after forced renewal.

You can also turn up debug logging for vmd(8) and should see
corresponding messages of:

  vionet: dhcp request, local response size 342

If you do not, the packet was not intercepted by vmd(8).

I have tested this diff with a variety of conditions, including multiple
emulated nics per guest. Specifically 1 nic using dhcp/bootp resolved by
vmd(8) and the other nic connected to a virtual switch using veb(4) and
an instance of dhcpd(8) running on the host. Also ran a snapshot upgrade
with no issues.

So in summary, if this diff works, you shouldn't notice a difference ;-)

Thanks,
-Dave

[1] https://marc.info/?l=openbsd-cvs=161643049815177=2
[2] https://marc.info/?l=openbsd-cvs=161652157122736=2


Index: config.c
===
RCS file: /cvs/src/usr.sbin/vmd/config.c,v
retrieving revision 1.60
diff -u -p -r1.60 config.c
--- config.c19 Mar 2021 09:29:33 -  1.60
+++ config.c24 Mar 2021 15:42:13 -
@@ -227,6 +227,7 @@ config_setvm(struct privsep *ps, struct
char base[PATH_MAX];
unsigned int unit;
struct timeval   tv, rate, since_last;
+   struct vmop_addr_req var;

errno = 0;

@@ -499,6 +500,12 @@ config_setvm(struct privsep *ps, struct
proc_compose_imsg(ps, PROC_VMM, -1,
IMSG_VMDOP_START_VM_IF, vm->vm_vmid, tapfds[i],
, sizeof(i));
+
+   memset(, 0, sizeof(var));
+   var.var_vmid = vm->vm_vmid;
+   var.var_nic_idx = i;
+   proc_compose_imsg(ps, PROC_PRIV, -1, IMSG_VMDOP_PRIV_GET_ADDR,
+   vm->vm_vmid, dup(tapfds[i]), , sizeof(var));
}

if (!(vm->vm_state & VM_STATE_RECEIVED))
Index: dhcp.c
===
RCS file: /cvs/src/usr.sbin/vmd/dhcp.c,v
retrieving revision 1.8
diff -u -p -r1.8 dhcp.c
--- dhcp.c  27 Dec 2018 19:51:30 -  1.8
+++ dhcp.c  24 Mar 2021 15:42:13 -
@@ -57,8 +57,11 @@ dhcp_request(struct vionet_dev *dev, cha
if ((offset = decode_hw_header(buf, buflen, 0, , HTYPE_ETHER)) < 0)
return (-1);

-   if (memcmp(pc.pc_smac, dev->mac, ETHER_ADDR_LEN) != 0 ||
-   memcmp(pc.pc_dmac, broadcast, ETHER_ADDR_LEN) != 0)
+   if (memcmp(pc.pc_dmac, broadcast, ETHER_ADDR_LEN) != 0 &&
+   memcmp(pc.pc_dmac, dev->hostmac, ETHER_ADDR_LEN) != 0)
+   return (-1);
+
+   if (memcmp(pc.pc_smac, dev->mac, ETHER_ADDR_LEN) != 0)
return (-1);

if ((offset = decode_udp_ip_header(buf, buflen, offset, )) < 0)
Index: priv.c
===
RCS file: /cvs/src/usr.sbin/vmd/priv.c,v
retrieving revision 1.16
diff -u -p -r1.16 priv.c
--- priv.c  28 Feb 2021 22:56:09 -  1.16
+++ priv.c  24 Mar 2021 15:42:14 -
@@ -92,6 +92,8 @@ priv_dispatch_parent(int fd, struct priv
struct ifaliasreqifra;
struct in6_aliasreq  in6_ifra;
struct if_afreq  ifar;
+   struct vmop_addr_req vareq;
+   struct vmop_addr_result  varesult;
char type[IF_NAMESIZE];

 

Re: athn(4): switch Tx rate control to RA

2021-03-24 Thread Lauri Tirkkonen
On Tue, Mar 23 2021 18:01:27 +0100, Stefan Sperling wrote:
> This switches athn(4) to the new RA Tx rate adaptation module.
> Tests on athn(4) PCI devices are welcome.
> USB devices don't need to be tested in this case Tx rate adaptation
> is taken care of by firmware.
> 
> I could only test on AR9285 so far, but the result looks promising.

Some numbers from quick tests on:

athn0 at pci1 dev 0 function 0 "Atheros AR9281" rev 0x01: apic 5 int 0
athn0: AR9280 rev 2 (2T2R), ROM rev 16, address 

in hostap mode (11n), sending data to a laptop on iwn(4).

tcpbench (tcp), 10sec:
- before: Mbps varied between 0.677  and 15.187, avg 4.647
- after:  Mbps varied between 10.345 and 10.623, avg 10.588

tcpbench (udp), 10sec:
- before: Rx PPS between 1444 and 1461, about 17 Mbps
  athn(4) sent: 495668032 bytes sent over 10.999 seconds
- after:  Rx PPS between 1021 and 1056, about 12 Mbps
  athn(4) sent: 462994048 bytes sent over 10.999 seconds
(I failed to record bytes received on iwn(4) for this test, sorry)

ping -f -c 1 from athn(4) hostap to iwn(4):
before:
1 packets transmitted, 9982 packets received, 0.2% packet loss
round-trip min/avg/max/std-dev = 0.397/1.029/100.599/1.400 ms
after:
1 packets transmitted, 1 packets received, 0.0% packet loss
round-trip min/avg/max/std-dev = 0.402/1.191/10.510/0.463 ms

Subjectively, the reduced loss & more predictable latency feels much better with
eg. interactive ssh sessions.

-- 
Lauri Tirkkonen | lotheac @ IRCnet



Re: sdmmc(4) off-by-one on boundary check

2021-03-24 Thread Marcus Glocker
On Wed, Mar 24, 2021 at 02:05:27PM -0600, Theo de Raadt wrote:

> Marcus Glocker  wrote:
> 
> > On Tue, Mar 23, 2021 at 09:52:42AM -0600, Theo de Raadt wrote:
> > 
> > > Mark Kettenis  wrote:
> > > 
> > > > > > Index: sys/dev/sdmmc/sdmmc_scsi.c
> > > > > > ===
> > > > > > RCS file: /cvs/src/sys/dev/sdmmc/sdmmc_scsi.c,v
> > > > > > retrieving revision 1.59
> > > > > > diff -u -p -u -p -r1.59 sdmmc_scsi.c
> > > > > > --- sys/dev/sdmmc/sdmmc_scsi.c  15 Oct 2020 13:22:13 -  
> > > > > > 1.59
> > > > > > +++ sys/dev/sdmmc/sdmmc_scsi.c  23 Mar 2021 07:32:52 -
> > > > > > @@ -365,9 +365,8 @@ sdmmc_scsi_cmd(struct scsi_xfer *xs)
> > > > > > /* A read or write operation. */
> > > > > > sdmmc_scsi_decode_rw(xs, , );
> > > > > >  
> > > > > > -   if (blockno >= tgt->card->csd.capacity ||
> > > > > > -   blockno + blockcnt > tgt->card->csd.capacity) {
> > > > > > -   DPRINTF(("%s: out of bounds %u-%u >= %u\n", DEVNAME(sc),
> > > > > > +   if (blockno + blockcnt >= tgt->card->csd.capacity) {
> > > > > > +   DPRINTF(("%s: out of bounds %u+%u >= %u\n", DEVNAME(sc),
> > > > > > blockno, blockcnt, tgt->card->csd.capacity));
> > > > > > xs->error = XS_DRIVER_STUFFUP;
> > > > > > scsi_done(xs);
> > > > 
> > > > Apart from a potential overflow, the original condition looks correct
> > > > to me.  If csd.capacity is 16 and blockno is 0 and blockcnt is 16, the
> > > > operation should succeed, shouldn't it?
> > 
> > Yes, you're right.  In my case the patch suppressed the issue by
> > skipping the last block of the disk.
> >  
> > > Hmm.  Maybe this is a specific SD card with a bug accessing the last 
> > > sector.
> > > 
> > > A manual dd using
> > > 
> > >   skip='c partition size -1' count=1 bs=512
> > > 
> > > Is failing on that SD card, but it is working on other SD cards.  Which 
> > > appear
> > > to all be SD v2 cards.
> > > 
> > > Maybe his specific SD card has an off-by-one bug relating to the last 
> > > sector,
> > > and we need to determine a way to handle that, or skip the last sector on
> > > all devices of this sub-class?
> > > 
> > > I don't see any issue in the controller code.   Marcus, can you move the 
> > > card
> > > to another machine (different controller) and use dd to read the last 
> > > sector?
> > > Though I suspect that might not provide enough clue either, the "fail to 
> > > read"
> > > behaviour might vary between controllers...
> > > 
> > > > > blockno and blockcnt are both 32-bit, so this feels dangerously close
> > > > > to integer wrap, and I believe the first condition cannot be deleted.
> > > > 
> > > > But the second condition can still wrap.  So it probably needs to be
> > > > something like:
> > > > 
> > > >if (blockno >= tgt->card->csd.capacity ||
> > > >blockcnt > tgt->card->csd.capacity - blockno)
> > > 
> > > All 3 variables are int.  Not sure how moving the + to - on the other
> > > side changes anything.
> > 
> > In the meantime I figured out how to trigger the SD controller command
> > to fail.  It always happens if you read more than one block through one
> > command, where the last block is accessing the last disk block.  If you
> > only access the last disk block directly, the command doesn't fail.
> > Some examples:
> > 
> > # dd if=/dev/rsd0c of=/dev/null skip=15591935 bs=4096 count=1
> > nblks=8, datalen=4096, blklen=512, arg=0x76f4ff8, cmd=0x8052
> > CMD FAIL!
> > bcmsdhost0: transfer timeout!
> > 
> > # dd if=/dev/rsd0c of=/dev/null skip=31183871 bs=2048 count=1
> > CMD FAIL!
> > bcmsdhost0: transfer timeout!
> > 
> > # dd if=/dev/rsd0c of=/dev/null skip=62367743 bs=1024 count=1
> > CMD FAIL!
> > bcmsdhost0: transfer timeout!
> > 
> > # dd if=/dev/rsd0c of=/dev/null skip=124735487 bs=512 count=1
> > < all good >
> > 
> > I don't think it's an issue with the microSD card since I have NetBSD
> > installed on a different card of exactly the same type/size, and there I
> > also can trigger the issue:
> > 
> > # dd if=/dev/rld0c of=/dev/null skip=15591935 bs=4096 count=1
> > [ 179.4170221] ld0c: error reading fsbn 124735480 of 124735480-124735487
> > (ld0 bn 124735480; cn 60905 tn 63 sn 24), retrying
> > 
> > Also I have tried that on the Pine64/sximmc(4) with the same microSD
> > card, and there all is hunky-dory.
> > 
> > It seems to be a behavior/bug of the BCM2835 SD controller.  But I'm
> > not sure what's the right approach to work around that in the driver, or
> > even if there is a real fix by changing the command code.
> > 
> 
> I think the bug is in bcm2835_sdhost.c bcmsdhost_exec_command()
> 
> This chunk is fishy, why is it increasing the transaction size?
> 
> if (nblks == 0 || (cmd->c_datalen % cmd->c_blklen) != 0)
> ++nblks;
> 
> I guess this does not occur for this operation, but still it is weird.
> 

Re: sdmmc(4) off-by-one on boundary check

2021-03-24 Thread Mark Kettenis
> Date: Wed, 24 Mar 2021 20:58:48 +0100
> From: Marcus Glocker 
> 
> On Tue, Mar 23, 2021 at 09:52:42AM -0600, Theo de Raadt wrote:
> 
> > Mark Kettenis  wrote:
> > 
> > > > > Index: sys/dev/sdmmc/sdmmc_scsi.c
> > > > > ===
> > > > > RCS file: /cvs/src/sys/dev/sdmmc/sdmmc_scsi.c,v
> > > > > retrieving revision 1.59
> > > > > diff -u -p -u -p -r1.59 sdmmc_scsi.c
> > > > > --- sys/dev/sdmmc/sdmmc_scsi.c15 Oct 2020 13:22:13 -  
> > > > > 1.59
> > > > > +++ sys/dev/sdmmc/sdmmc_scsi.c23 Mar 2021 07:32:52 -
> > > > > @@ -365,9 +365,8 @@ sdmmc_scsi_cmd(struct scsi_xfer *xs)
> > > > >   /* A read or write operation. */
> > > > >   sdmmc_scsi_decode_rw(xs, , );
> > > > >  
> > > > > - if (blockno >= tgt->card->csd.capacity ||
> > > > > - blockno + blockcnt > tgt->card->csd.capacity) {
> > > > > - DPRINTF(("%s: out of bounds %u-%u >= %u\n", DEVNAME(sc),
> > > > > + if (blockno + blockcnt >= tgt->card->csd.capacity) {
> > > > > + DPRINTF(("%s: out of bounds %u+%u >= %u\n", DEVNAME(sc),
> > > > >   blockno, blockcnt, tgt->card->csd.capacity));
> > > > >   xs->error = XS_DRIVER_STUFFUP;
> > > > >   scsi_done(xs);
> > > 
> > > Apart from a potential overflow, the original condition looks correct
> > > to me.  If csd.capacity is 16 and blockno is 0 and blockcnt is 16, the
> > > operation should succeed, shouldn't it?
> 
> Yes, you're right.  In my case the patch suppressed the issue by
> skipping the last block of the disk.
>  
> > Hmm.  Maybe this is a specific SD card with a bug accessing the last sector.
> > 
> > A manual dd using
> > 
> >   skip='c partition size -1' count=1 bs=512
> > 
> > Is failing on that SD card, but it is working on other SD cards.  Which 
> > appear
> > to all be SD v2 cards.
> > 
> > Maybe his specific SD card has an off-by-one bug relating to the last 
> > sector,
> > and we need to determine a way to handle that, or skip the last sector on
> > all devices of this sub-class?
> > 
> > I don't see any issue in the controller code.   Marcus, can you move the 
> > card
> > to another machine (different controller) and use dd to read the last 
> > sector?
> > Though I suspect that might not provide enough clue either, the "fail to 
> > read"
> > behaviour might vary between controllers...
> > 
> > > > blockno and blockcnt are both 32-bit, so this feels dangerously close
> > > > to integer wrap, and I believe the first condition cannot be deleted.
> > > 
> > > But the second condition can still wrap.  So it probably needs to be
> > > something like:
> > > 
> > >if (blockno >= tgt->card->csd.capacity ||
> > >blockcnt > tgt->card->csd.capacity - blockno)
> > 
> > All 3 variables are int.  Not sure how moving the + to - on the other
> > side changes anything.
> 
> In the meantime I figured out how to trigger the SD controller command
> to fail.  It always happens if you read more than one block through one
> command, where the last block is accessing the last disk block.  If you
> only access the last disk block directly, the command doesn't fail.
> Some examples:
> 
>   # dd if=/dev/rsd0c of=/dev/null skip=15591935 bs=4096 count=1
>   nblks=8, datalen=4096, blklen=512, arg=0x76f4ff8, cmd=0x8052
>   CMD FAIL!
>   bcmsdhost0: transfer timeout!
> 
>   # dd if=/dev/rsd0c of=/dev/null skip=31183871 bs=2048 count=1
>   CMD FAIL!
>   bcmsdhost0: transfer timeout!
> 
>   # dd if=/dev/rsd0c of=/dev/null skip=62367743 bs=1024 count=1
>   CMD FAIL!
>   bcmsdhost0: transfer timeout!
> 
>   # dd if=/dev/rsd0c of=/dev/null skip=124735487 bs=512 count=1
>   < all good >
> 
> I don't think it's an issue with the microSD card since I have NetBSD
> installed on a different card of exactly the same type/size, and there I
> also can trigger the issue:
> 
>   # dd if=/dev/rld0c of=/dev/null skip=15591935 bs=4096 count=1
>   [ 179.4170221] ld0c: error reading fsbn 124735480 of 124735480-124735487
>   (ld0 bn 124735480; cn 60905 tn 63 sn 24), retrying
> 
> Also I have tried that on the Pine64/sximmc(4) with the same microSD
> card, and there all is hunky-dory.
> 
> It seems to be a behavior/bug of the BCM2835 SD controller.  But I'm
> not sure what's the right approach to work around that in the driver, or
> even if there is a real fix by changing the command code.
> 
> I mean one can just change the disklabel to skip the last block, but
> it's not really a solution since somebody lazy like me, who does a
> very simple disklabel where 'a'=root, and 'b'=swap is the rest of the
> disk can easily trigger that "bug" through e.g. savecore(8) which we
> run by default during boot.  At least in this case it's a very good
> debugger :-)
> 

So SD cards have different commands for reading (and writing) a single
block and multiple blocks.  The command to read a single block does
just 

Re: sdmmc(4) off-by-one on boundary check

2021-03-24 Thread Theo de Raadt
Marcus Glocker  wrote:

> On Tue, Mar 23, 2021 at 09:52:42AM -0600, Theo de Raadt wrote:
> 
> > Mark Kettenis  wrote:
> > 
> > > > > Index: sys/dev/sdmmc/sdmmc_scsi.c
> > > > > ===
> > > > > RCS file: /cvs/src/sys/dev/sdmmc/sdmmc_scsi.c,v
> > > > > retrieving revision 1.59
> > > > > diff -u -p -u -p -r1.59 sdmmc_scsi.c
> > > > > --- sys/dev/sdmmc/sdmmc_scsi.c15 Oct 2020 13:22:13 -  
> > > > > 1.59
> > > > > +++ sys/dev/sdmmc/sdmmc_scsi.c23 Mar 2021 07:32:52 -
> > > > > @@ -365,9 +365,8 @@ sdmmc_scsi_cmd(struct scsi_xfer *xs)
> > > > >   /* A read or write operation. */
> > > > >   sdmmc_scsi_decode_rw(xs, , );
> > > > >  
> > > > > - if (blockno >= tgt->card->csd.capacity ||
> > > > > - blockno + blockcnt > tgt->card->csd.capacity) {
> > > > > - DPRINTF(("%s: out of bounds %u-%u >= %u\n", DEVNAME(sc),
> > > > > + if (blockno + blockcnt >= tgt->card->csd.capacity) {
> > > > > + DPRINTF(("%s: out of bounds %u+%u >= %u\n", DEVNAME(sc),
> > > > >   blockno, blockcnt, tgt->card->csd.capacity));
> > > > >   xs->error = XS_DRIVER_STUFFUP;
> > > > >   scsi_done(xs);
> > > 
> > > Apart from a potential overflow, the original condition looks correct
> > > to me.  If csd.capacity is 16 and blockno is 0 and blockcnt is 16, the
> > > operation should succeed, shouldn't it?
> 
> Yes, you're right.  In my case the patch suppressed the issue by
> skipping the last block of the disk.
>  
> > Hmm.  Maybe this is a specific SD card with a bug accessing the last sector.
> > 
> > A manual dd using
> > 
> >   skip='c partition size -1' count=1 bs=512
> > 
> > Is failing on that SD card, but it is working on other SD cards.  Which 
> > appear
> > to all be SD v2 cards.
> > 
> > Maybe his specific SD card has an off-by-one bug relating to the last 
> > sector,
> > and we need to determine a way to handle that, or skip the last sector on
> > all devices of this sub-class?
> > 
> > I don't see any issue in the controller code.   Marcus, can you move the 
> > card
> > to another machine (different controller) and use dd to read the last 
> > sector?
> > Though I suspect that might not provide enough clue either, the "fail to 
> > read"
> > behaviour might vary between controllers...
> > 
> > > > blockno and blockcnt are both 32-bit, so this feels dangerously close
> > > > to integer wrap, and I believe the first condition cannot be deleted.
> > > 
> > > But the second condition can still wrap.  So it probably needs to be
> > > something like:
> > > 
> > >if (blockno >= tgt->card->csd.capacity ||
> > >blockcnt > tgt->card->csd.capacity - blockno)
> > 
> > All 3 variables are int.  Not sure how moving the + to - on the other
> > side changes anything.
> 
> In the meantime I figured out how to trigger the SD controller command
> to fail.  It always happens if you read more than one block through one
> command, where the last block is accessing the last disk block.  If you
> only access the last disk block directly, the command doesn't fail.
> Some examples:
> 
>   # dd if=/dev/rsd0c of=/dev/null skip=15591935 bs=4096 count=1
>   nblks=8, datalen=4096, blklen=512, arg=0x76f4ff8, cmd=0x8052
>   CMD FAIL!
>   bcmsdhost0: transfer timeout!
> 
>   # dd if=/dev/rsd0c of=/dev/null skip=31183871 bs=2048 count=1
>   CMD FAIL!
>   bcmsdhost0: transfer timeout!
> 
>   # dd if=/dev/rsd0c of=/dev/null skip=62367743 bs=1024 count=1
>   CMD FAIL!
>   bcmsdhost0: transfer timeout!
> 
>   # dd if=/dev/rsd0c of=/dev/null skip=124735487 bs=512 count=1
>   < all good >
> 
> I don't think it's an issue with the microSD card since I have NetBSD
> installed on a different card of exactly the same type/size, and there I
> also can trigger the issue:
> 
>   # dd if=/dev/rld0c of=/dev/null skip=15591935 bs=4096 count=1
>   [ 179.4170221] ld0c: error reading fsbn 124735480 of 124735480-124735487
>   (ld0 bn 124735480; cn 60905 tn 63 sn 24), retrying
> 
> Also I have tried that on the Pine64/sximmc(4) with the same microSD
> card, and there all is hunky-dory.
> 
> It seems to be a behavior/bug of the BCM2835 SD controller.  But I'm
> not sure what's the right approach to work around that in the driver, or
> even if there is a real fix by changing the command code.
> 

I think the bug is in bcm2835_sdhost.c bcmsdhost_exec_command()

This chunk is fishy, why is it increasing the transaction size?

if (nblks == 0 || (cmd->c_datalen % cmd->c_blklen) != 0)
++nblks;

I guess this does not occur for this operation, but still it is weird.

Maybe investigate which of the "goto done" cases gets hit; I am pretty
sure the controller is indicating which failure mode occurs.

Surely c_error isn't zero when this function completes?


> I mean one can just change the disklabel to skip the 

Re: sdmmc(4) off-by-one on boundary check

2021-03-24 Thread Marcus Glocker
On Tue, Mar 23, 2021 at 09:52:42AM -0600, Theo de Raadt wrote:

> Mark Kettenis  wrote:
> 
> > > > Index: sys/dev/sdmmc/sdmmc_scsi.c
> > > > ===
> > > > RCS file: /cvs/src/sys/dev/sdmmc/sdmmc_scsi.c,v
> > > > retrieving revision 1.59
> > > > diff -u -p -u -p -r1.59 sdmmc_scsi.c
> > > > --- sys/dev/sdmmc/sdmmc_scsi.c  15 Oct 2020 13:22:13 -  1.59
> > > > +++ sys/dev/sdmmc/sdmmc_scsi.c  23 Mar 2021 07:32:52 -
> > > > @@ -365,9 +365,8 @@ sdmmc_scsi_cmd(struct scsi_xfer *xs)
> > > > /* A read or write operation. */
> > > > sdmmc_scsi_decode_rw(xs, , );
> > > >  
> > > > -   if (blockno >= tgt->card->csd.capacity ||
> > > > -   blockno + blockcnt > tgt->card->csd.capacity) {
> > > > -   DPRINTF(("%s: out of bounds %u-%u >= %u\n", DEVNAME(sc),
> > > > +   if (blockno + blockcnt >= tgt->card->csd.capacity) {
> > > > +   DPRINTF(("%s: out of bounds %u+%u >= %u\n", DEVNAME(sc),
> > > > blockno, blockcnt, tgt->card->csd.capacity));
> > > > xs->error = XS_DRIVER_STUFFUP;
> > > > scsi_done(xs);
> > 
> > Apart from a potential overflow, the original condition looks correct
> > to me.  If csd.capacity is 16 and blockno is 0 and blockcnt is 16, the
> > operation should succeed, shouldn't it?

Yes, you're right.  In my case the patch suppressed the issue by
skipping the last block of the disk.
 
> Hmm.  Maybe this is a specific SD card with a bug accessing the last sector.
> 
> A manual dd using
> 
>   skip='c partition size -1' count=1 bs=512
> 
> Is failing on that SD card, but it is working on other SD cards.  Which appear
> to all be SD v2 cards.
> 
> Maybe his specific SD card has an off-by-one bug relating to the last sector,
> and we need to determine a way to handle that, or skip the last sector on
> all devices of this sub-class?
> 
> I don't see any issue in the controller code.   Marcus, can you move the card
> to another machine (different controller) and use dd to read the last sector?
> Though I suspect that might not provide enough clue either, the "fail to read"
> behaviour might vary between controllers...
> 
> > > blockno and blockcnt are both 32-bit, so this feels dangerously close
> > > to integer wrap, and I believe the first condition cannot be deleted.
> > 
> > But the second condition can still wrap.  So it probably needs to be
> > something like:
> > 
> >if (blockno >= tgt->card->csd.capacity ||
> >blockcnt > tgt->card->csd.capacity - blockno)
> 
> All 3 variables are int.  Not sure how moving the + to - on the other
> side changes anything.

In the meantime I figured out how to trigger the SD controller command
to fail.  It always happens if you read more than one block through one
command, where the last block is accessing the last disk block.  If you
only access the last disk block directly, the command doesn't fail.
Some examples:

# dd if=/dev/rsd0c of=/dev/null skip=15591935 bs=4096 count=1
nblks=8, datalen=4096, blklen=512, arg=0x76f4ff8, cmd=0x8052
CMD FAIL!
bcmsdhost0: transfer timeout!

# dd if=/dev/rsd0c of=/dev/null skip=31183871 bs=2048 count=1
CMD FAIL!
bcmsdhost0: transfer timeout!

# dd if=/dev/rsd0c of=/dev/null skip=62367743 bs=1024 count=1
CMD FAIL!
bcmsdhost0: transfer timeout!

# dd if=/dev/rsd0c of=/dev/null skip=124735487 bs=512 count=1
< all good >

I don't think it's an issue with the microSD card since I have NetBSD
installed on a different card of exactly the same type/size, and there I
also can trigger the issue:

# dd if=/dev/rld0c of=/dev/null skip=15591935 bs=4096 count=1
[ 179.4170221] ld0c: error reading fsbn 124735480 of 124735480-124735487
(ld0 bn 124735480; cn 60905 tn 63 sn 24), retrying

Also I have tried that on the Pine64/sximmc(4) with the same microSD
card, and there all is hunky-dory.

It seems to be a behavior/bug of the BCM2835 SD controller.  But I'm
not sure what's the right approach to work around that in the driver, or
even if there is a real fix by changing the command code.

I mean one can just change the disklabel to skip the last block, but
it's not really a solution since somebody lazy like me, who does a
very simple disklabel where 'a'=root, and 'b'=swap is the rest of the
disk can easily trigger that "bug" through e.g. savecore(8) which we
run by default during boot.  At least in this case it's a very good
debugger :-)



Re: [External] : [ICMP] IP options lead to malformed reply

2021-03-24 Thread Alexandr Nedvedicky
Hello Dominik,

thanks for reporting a bug. I'll take a look at it later today.

your proposed fix re-introduces a recursion to PF, which we want to avoid,
hence we let icmp_send() to dispatch outbound ICMP response to task.

We really need to fix ip_send() such the output task will handle IP options
properly.

thanks and
regards
sashan

On Wed, Mar 24, 2021 at 01:36:20PM +, Schreilechner, Dominik wrote:
> Hi,
> 
> When sending an ICMP request with IP options to an OpenBSD Box, the
> ICMP payload of the reply is malformed. In the reply the length field
> of the IP header is 20, even though the reply contains the IP options.
> Meaning, in this packet the IP options are part of ICMP and not IP.
> This was tested with OpenBSD 6.8 and Nmap's Nping command:
> nping --ip-options "T" 
> 
> The problem is not specific to this IP option, but it can be reproduced
> with others as well.
> 
> The root cause of the issue seems to be that the IP options of the ICMP
> reply are added in icmp_send() and not in ip_output(). icmp_send() will
> forward the packet to the softnet task via ip_send(). The softnet task
> will then call ip_output(m, ...). Here all arguments to ip_output are
> NULL or 0, except for the mbuf containing the IP packet. Thus, the opt
> mbuf is NULL as well.
> ip_output() cannot assume that the ip->ip_hl field is initialized by
> its caller. Therefore, the header length is set to the default 20 bytes
> and only if an opt mbuf is passed to ip_output(), the options are
> accounted for in the header length. In other words, all packets passed
> to ip_send() will be send with an IP header length of 20 bytes,
> regardless if they contain IP options or not.
> 
> I think that the problem exists since this commit:
> https://urldefense.com/v3/__https://github.com/openbsd/src/commit/528ff3946710c3940efc90589f62c714f31fb812__;!!GqivPVa7Brio!JsIZviL72HJW1cGSiq3PyhzGSGLom2qAQI5JOPj8H_ZwR5yE-ksAGF92-lCfjJDvuoOsv4Gq$
>  
> 
> For ICMP a solution would be to replace ip_send() with ip_output(), as
> it was before the commit above. A quick grep suggests that ICMP is the
> only caller of ip_send(), that uses IP options. However, I am not sure
> what other implications this change has. (Anyways, diff below)
> 
> Another way would be to modify ip_send(), so that it additionally has
> an option-mbuf as parameter. But as far as I can tell, this means the
> mbuf_queue structure and its related functions cannot be used and a new
> queue is needed, that holds two mbufs (the packet and the options) per
> entry. Which means, even more changes...
> 
> Regards
> Dominik
> 
> diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c
> index a007aa6c2b3..b4bb2bb7f6f 100644
> --- a/sys/netinet/ip_icmp.c
> +++ b/sys/netinet/ip_icmp.c
> @@ -846,10 +846,7 @@ icmp_send(struct mbuf *m, struct mbuf *opts)
> printf("icmp_send dst %s src %s\n", dst, src);
> }
>  #endif
> -   if (opts != NULL)
> -   m = ip_insertoptions(m, opts, );
> -
> -   ip_send(m);
> +   ip_output(m, opts, NULL, 0, NULL, NULL, 0);
>  }
> 
>  u_int32_t
> 



efiboot/arm64: fix "mach dtb" return code to avoid bogus boot

2021-03-24 Thread Klemens Nanni
Bootloader command functions must return zero in case of failure,
returning 1 tells the bootloader to boot the file.

arm64's `machine dtb' command has it the wrong way so using it triggers
a boot that doesn't make any sense:

>> OpenBSD/arm64 BOOTAA64 1.4
boot> mach dtb
booting sd0a:/etc/boot.conf: open sd0a:/etc/boot.conf: No such file or 
directory
failed(2). will try /bsd
boot> mach dtb /foo
cannot open sd0a:/foo
NOTE: random seed is being reused.
booting sd0a:/etc/boot.conf: open sd0a:/etc/boot.conf: No such file or 
directory
failed(2). will try /bsd

With this diff:

>> OpenBSD/arm64 BOOTAA64 1.4
boot> mach dtb
dtb file
boot> mach dtb /foo
cannot open sd0a:/foo

While here, tell users how to use that command (like other commands
such as `hexdump' do).

Feedback? OK?

Index: arch/arm64/stand/efiboot/efiboot.c
===
RCS file: /cvs/src/sys/arch/arm64/stand/efiboot/efiboot.c,v
retrieving revision 1.31
diff -u -p -r1.31 efiboot.c
--- arch/arm64/stand/efiboot/efiboot.c  9 Mar 2021 21:11:24 -   1.31
+++ arch/arm64/stand/efiboot/efiboot.c  24 Mar 2021 17:59:52 -
@@ -980,28 +980,30 @@ Xdtb_efi(void)
 
 #define O_RDONLY   0
 
-   if (cmd.argc != 2)
-   return (1);
+   if (cmd.argc != 2) {
+   printf("dtb file\n");
+   return (0);
+   }
 
snprintf(path, sizeof(path), "%s:%s", cmd.bootdev, cmd.argv[1]);
 
fd = open(path, O_RDONLY);
if (fd < 0 || fstat(fd, ) == -1) {
printf("cannot open %s\n", path);
-   return (1);
+   return (0);
}
if (efi_memprobe_find(EFI_SIZE_TO_PAGES(sb.st_size),
0x1000, ) != EFI_SUCCESS) {
printf("cannot allocate memory for %s\n", path);
-   return (1);
+   return (0);
}
if (read(fd, (void *)addr, sb.st_size) != sb.st_size) {
printf("cannot read from %s\n", path);
-   return (1);
+   return (0);
}
 
fdt = (void *)addr;
-   return (0);
+   return (1);
 }
 
 int



Re: wsconsctl.conf: mention mouse.tp.tapping in example

2021-03-24 Thread Raf Czlonka
On Wed, Mar 24, 2021 at 01:42:19PM GMT, Klemens Nanni wrote:
> On Tue, Mar 23, 2021 at 12:41:25PM +, Raf Czlonka wrote:
> > According to wsmouse(4), the orders is as follows:
> > 
> > This list of three parameters configures these mappings,
> > in the order:
> > 
> >one-finger,two-finger,three-finger
> > 
> > Setting a parameter to a positive value enables that tap
> > gesture and maps it to the given mouse button.  To disable
> > all three tap gestures at once, provide the single value
> > of 0.  Conversely, a single non-zero value will enable
> > one-finger, two-finger, and three-finger tap gestures with
> > their default mappings of left button, right button, and
> > middle button, respectively.
> I had to let this be for a day and reread it now.

Hi Klemens,

No worries - I had to re-read your patch and the man page several
times just to be sure I got it right ;^)

> > Your updated patch reads:
> > 
> > #mouse.tp.tapping=0 # 1,3,2=interpret one/three/two (simultaneous) 
> > # tap(s) as left/middle/right mouse button click
> > 
> > Whilst technically correct, if I'm reading the man page correctly,
> > it is a little bit confusing as the gestures are out of order and
> > the digits look like they represent the number of fingers when, in
> > reality, they represent the mouse *buttons* so, unless I got totally
> > confused by either of the above, or am missing something altogether,
> > the patch should probably read:
> > 
> > #mouse.tp.tapping=1 # 1,3,2=interpret one/two/three (simultaneous) 
> > # tap(s) as left/right/middle mouse button click
> > 
> > As it is clear that digits do not map to numerals.
> So yes, you're right.

I suggested "1" instead of "0" on purpose, as it would match the
comment/description. As per the man page, "0" disable all tap gestures.

Regards,

Raf

> Anyone willing to OK this?
> 
> Index: wsconsctl.conf
> ===
> RCS file: /cvs/src/etc/examples/wsconsctl.conf,v
> retrieving revision 1.1
> diff -u -p -r1.1 wsconsctl.conf
> --- wsconsctl.conf16 Jul 2014 13:21:33 -  1.1
> +++ wsconsctl.conf24 Mar 2021 13:40:02 -
> @@ -9,3 +9,5 @@
>  #display.vblank=on   # enable vertical sync blank for screen burner
>  #display.screen_off=6# set screen burner timeout to 60 seconds
>  #display.msact=off   # disable screen unburn w/ mouse
> +#mouse.tp.tapping=0  # 1,3,2=interpret one-/two-/three-finger taps as
> + # 1st/3rd/2nd, i.e. left/right/middle button 
> click



Re: wsconsctl.conf: mention mouse.tp.tapping in example

2021-03-24 Thread Klemens Nanni
On Tue, Mar 23, 2021 at 12:41:25PM +, Raf Czlonka wrote:
> According to wsmouse(4), the orders is as follows:
> 
>   This list of three parameters configures these mappings,
>   in the order:
> 
>one-finger,two-finger,three-finger
> 
>   Setting a parameter to a positive value enables that tap
>   gesture and maps it to the given mouse button.  To disable
>   all three tap gestures at once, provide the single value
>   of 0.  Conversely, a single non-zero value will enable
>   one-finger, two-finger, and three-finger tap gestures with
>   their default mappings of left button, right button, and
>   middle button, respectively.
I had to let this be for a day and reread it now.

> Your updated patch reads:
> 
>   #mouse.tp.tapping=0 # 1,3,2=interpret one/three/two (simultaneous) 
>   # tap(s) as left/middle/right mouse button click
> 
> Whilst technically correct, if I'm reading the man page correctly,
> it is a little bit confusing as the gestures are out of order and
> the digits look like they represent the number of fingers when, in
> reality, they represent the mouse *buttons* so, unless I got totally
> confused by either of the above, or am missing something altogether,
> the patch should probably read:
> 
>   #mouse.tp.tapping=1 # 1,3,2=interpret one/two/three (simultaneous) 
>   # tap(s) as left/right/middle mouse button click
> 
> As it is clear that digits do not map to numerals.
So yes, you're right.

Anyone willing to OK this?

Index: wsconsctl.conf
===
RCS file: /cvs/src/etc/examples/wsconsctl.conf,v
retrieving revision 1.1
diff -u -p -r1.1 wsconsctl.conf
--- wsconsctl.conf  16 Jul 2014 13:21:33 -  1.1
+++ wsconsctl.conf  24 Mar 2021 13:40:02 -
@@ -9,3 +9,5 @@
 #display.vblank=on # enable vertical sync blank for screen burner
 #display.screen_off=6  # set screen burner timeout to 60 seconds
 #display.msact=off # disable screen unburn w/ mouse
+#mouse.tp.tapping=0# 1,3,2=interpret one-/two-/three-finger taps as
+   # 1st/3rd/2nd, i.e. left/right/middle button 
click



[ICMP] IP options lead to malformed reply

2021-03-24 Thread Schreilechner, Dominik
Hi,

When sending an ICMP request with IP options to an OpenBSD Box, the
ICMP payload of the reply is malformed. In the reply the length field
of the IP header is 20, even though the reply contains the IP options.
Meaning, in this packet the IP options are part of ICMP and not IP.
This was tested with OpenBSD 6.8 and Nmap's Nping command:
nping --ip-options "T" 

The problem is not specific to this IP option, but it can be reproduced
with others as well.

The root cause of the issue seems to be that the IP options of the ICMP
reply are added in icmp_send() and not in ip_output(). icmp_send() will
forward the packet to the softnet task via ip_send(). The softnet task
will then call ip_output(m, ...). Here all arguments to ip_output are
NULL or 0, except for the mbuf containing the IP packet. Thus, the opt
mbuf is NULL as well.
ip_output() cannot assume that the ip->ip_hl field is initialized by
its caller. Therefore, the header length is set to the default 20 bytes
and only if an opt mbuf is passed to ip_output(), the options are
accounted for in the header length. In other words, all packets passed
to ip_send() will be send with an IP header length of 20 bytes,
regardless if they contain IP options or not.

I think that the problem exists since this commit:
https://github.com/openbsd/src/commit/528ff3946710c3940efc90589f62c714f31fb812

For ICMP a solution would be to replace ip_send() with ip_output(), as
it was before the commit above. A quick grep suggests that ICMP is the
only caller of ip_send(), that uses IP options. However, I am not sure
what other implications this change has. (Anyways, diff below)

Another way would be to modify ip_send(), so that it additionally has
an option-mbuf as parameter. But as far as I can tell, this means the
mbuf_queue structure and its related functions cannot be used and a new
queue is needed, that holds two mbufs (the packet and the options) per
entry. Which means, even more changes...

Regards
Dominik

diff --git a/sys/netinet/ip_icmp.c b/sys/netinet/ip_icmp.c
index a007aa6c2b3..b4bb2bb7f6f 100644
--- a/sys/netinet/ip_icmp.c
+++ b/sys/netinet/ip_icmp.c
@@ -846,10 +846,7 @@ icmp_send(struct mbuf *m, struct mbuf *opts)
printf("icmp_send dst %s src %s\n", dst, src);
}
 #endif
-   if (opts != NULL)
-   m = ip_insertoptions(m, opts, );
-
-   ip_send(m);
+   ip_output(m, opts, NULL, 0, NULL, NULL, 0);
 }

 u_int32_t



Re: monotonic time going back by wrong skews

2021-03-24 Thread Josh Rickmar
On Wed, Mar 24, 2021 at 05:40:21PM +0900, YASUOKA Masahiko wrote:
> Hi,
> 
> I hit a problem which is caused by going back of monotonic time.  It
> happens on hosts on VMware ESXi.
> 
> I wrote the program which repeats the problem.
> 
>  % cc -o monotime monotime.c -lpthread
>  % ./monotime
>  194964 Starting
>  562210 Starting
>  483046 Starting
>  148865 Starting
>  148865 Back 991.808048665 => 991.007447931
>  562210 Back 991.808048885 => 991.007448224
>  483046 Back 991.808049115 => 991.007449172
>  148865 Stopped
>  562210 Stopped
>  483046 Stopped
>  194964 Stopped
>  % uname -a
>  OpenBSD yasuoka-ob-c.tokyo.iiji.jp 6.8 GENERIC.MP#5 amd64
>  % sysctl kern.version
>  kern.version=OpenBSD 6.8 (GENERIC.MP) #5: Mon Feb 22 04:36:10 MST 2021
>  
> r...@syspatch-68-amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
>  %
> 
> monotime.c
> 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> #include 
> 
> #define NTHREAD   4
> #define NTRY  5
> 
> void *
> start(void *dummy)
> {
>   int i;
>   struct timespec ts0, ts1;
> 
>   printf("%d Starting\n", (int)getthrid());
>   clock_gettime(CLOCK_MONOTONIC, );
> 
>   for (i = 0; i < NTRY; i++) {
>   clock_gettime(CLOCK_MONOTONIC, );
>   if (timespeccmp(, , <=)) {
>   ts0 = ts1;
>   continue;
>   }
>   printf("%d Back %lld.%09lu => %lld.%09lu\n",
>   (int)getthrid(), ts0.tv_sec, ts0.tv_nsec, ts1.tv_sec,
>   ts1.tv_nsec);
>   break;
>   }
>   printf("%d Stopped\n", (int)getthrid());
> 
>   return (NULL);
> }
> 
> int
> main(int argc, char *argv[])
> {
>   int i, n = NTHREAD;
>   pthread_t *threads;
> 
>   threads = calloc(n, sizeof(pthread_t));
> 
>   for (i = 0; i < n; i++)
>   pthread_create([i], NULL, start, NULL);
>   for (i = 0; i < n; i++)
>   pthread_join(threads[i], NULL);
> 
> }
> 
> 
> The machine has 4 vCPUs and showing the following message on boot.
> 
>   cpu1: disabling user TSC (skew=-5310)
>   cpu2: disabling user TSC (skew=-5335)
>   cpu3: disabling user TSC (skew=-7386)
> 
> This means "user TSC" is disabled because of TSC of cpu{1,2,3} is much
> delayed against cpu0.
> 
> Simply ignoring the skews by the following diff seems to workaround
> this problem.
> 
> diff --git a/sys/arch/amd64/amd64/tsc.c b/sys/arch/amd64/amd64/tsc.c
> index 238a5a068e1..3b951a8b5a3 100644
> --- a/sys/arch/amd64/amd64/tsc.c
> +++ b/sys/arch/amd64/amd64/tsc.c
> @@ -212,7 +212,8 @@ cpu_recalibrate_tsc(struct timecounter *tc)
>  u_int
>  tsc_get_timecount(struct timecounter *tc)
>  {
> - return rdtsc_lfence() + curcpu()->ci_tsc_skew;
> + //return rdtsc_lfence() + curcpu()->ci_tsc_skew;
> + return rdtsc_lfence();
>  }
>  
>  void
> 
> So I supposed the skews are not calculated properly.  Also I found
> NetBSD changed the skew calculating so that it checks 1000 times and
> take the minimum value.
> 
>   
> https://github.com/NetBSD/src/commit/1dec05c1ae197b4acfc7038e49dfddabcbed0dff
>   
> https://github.com/NetBSD/src/commit/66d76b89792bac1c71cd5507ba62b08ad02129ef
> 
> 
> I checked skews on the machine by the following debug code.
> 
> diff --git a/sys/arch/amd64/amd64/tsc.c b/sys/arch/amd64/amd64/tsc.c
> index 238a5a068e1..83e835e4f82 100644
> --- a/sys/arch/amd64/amd64/tsc.c
> +++ b/sys/arch/amd64/amd64/tsc.c
> @@ -302,16 +302,42 @@ tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, 
> uint64_t *aptscp)
>   *aptscp = tsc_sync_val;
>  }
>  
> +#define  TSC_SYNC_NTIMES 1000
> +
> +static int tsc_difs[MAXCPUS][TSC_SYNC_NTIMES];
> +
> +void
> +tsc_debug(void)
> +{
> + int i, cpuid = curcpu()->ci_cpuid;
> +
> + for (i = 0; i < TSC_SYNC_NTIMES; i++) {
> + if (i % 10 == 0)
> + printf("%5d", tsc_difs[cpuid][i]);
> + else
> + printf(" %5d", tsc_difs[cpuid][i]);
> + if (i % 10 == 9)
> + printf("\n");
> + }
> + printf("\n");
> +}
> +
>  void
>  tsc_sync_bp(struct cpu_info *ci)
>  {
> + int i, mindif = INT_MAX, dif;
>   uint64_t bptsc, aptsc;
>  
> - tsc_read_bp(ci, , ); /* discarded - cache effects */
> - tsc_read_bp(ci, , );
> + for (i = 0; i < TSC_SYNC_NTIMES; i++) {
> + tsc_read_bp(ci, , );
> + dif = bptsc - aptsc;
> + if (abs(dif) < abs(mindif))
> + mindif = dif;
> + tsc_difs[ci->ci_cpuid][i] = dif;
> + }
>  
>   /* Compute final value to adjust for skew. */
> - ci->ci_tsc_skew = bptsc - aptsc;
> + ci->ci_tsc_skew = mindif;
>  }
>  
>  /*
> @@ -342,8 +368,10 @@ tsc_post_ap(struct cpu_info *ci)
>  void
>  tsc_sync_ap(struct cpu_info *ci)
>  {
> - tsc_post_ap(ci);
> - tsc_post_ap(ci);
> + int i;
> +
> + for (i = 0; i < TSC_SYNC_NTIMES; i++)
> + tsc_post_ap(ci);
>  

Re: monotonic time going back by wrong skews

2021-03-24 Thread YASUOKA Masahiko
Hi,

> Second, why is taking the minimum value the optimal choice? I would
> assume an average would be better. Basically if you have a sequency
> like 900, 900, 900, 900, 0, 900, 900, 900 you pick 0 which could lead
> to some problems, right? Or am I missing something?"

Skews on VMware

>> -8445 -6643 -52183 0-3-4-7   -11-5 0
>>-11-9-5-3-4-3-7 8-5-6
>> -5-9-3-9-7-1-5-5-9-2
>> -6-4-6-4   -11-8-3-4-8-1
>> -9-1-8 1-8 6-5-4 2-2
>> -8-3-1-5-2-2 1 2-2-9
>>-12 0-9-2-2-5-2 1 2 0


First 3 seem to be storange.  Also there is such a value on middle of
sampling.

>>  9-1   -10 50505-1 2 6   -11 2-2

I suppose such values should be excluded.

Also I did same test on my VAIO.  It seems more constant than VMware.
Full result is attached at last.

Is it possible that the calculation code is taking effects from the
CPU scheduler of its virtual supervisor?

Thanks,

On Wed, 24 Mar 2021 13:04:32 +0200
Paul Irofti  wrote:
> Hi,
> 
> Thank you for taking this to tech@ as requested!
> 
> I will reproduce here what I replied to Yasouka and Scott (which I
> think proposed taking the minimum skew value) in private.
> 
> "First, thank you very much for the in-depth analysis. I would suggest
> you take this to a public forum like tech@ so that we can keep the
> discussion opened and civilized.
> 
> I remember when I wrote the CPU synchronization code, that I tried
> doing sampling but it had some issues that now I don't remember of. So
> let us try this on real hardware too. This is another argument for
> moving this to tech@.
> 
> Second, why is taking the minimum value the optimal choice? I would
> assume an average would be better. Basically if you have a sequency
> like 900, 900, 900, 900, 0, 900, 900, 900 you pick 0 which could lead
> to some problems, right? Or am I missing something?"
> 
> So could people give the minimum skew approach a spin on real machines
> to see if there are any issues popping up?
> 
> All the best,
> Paul
> 
> On 3/24/21 10:40 AM, YASUOKA Masahiko wrote:
>> Hi,
>> I hit a problem which is caused by going back of monotonic time.  It
>> happens on hosts on VMware ESXi.
>> I wrote the program which repeats the problem.
>>   % cc -o monotime monotime.c -lpthread
>>   % ./monotime
>>   194964 Starting
>>   562210 Starting
>>   483046 Starting
>>   148865 Starting
>>   148865 Back 991.808048665 => 991.007447931
>>   562210 Back 991.808048885 => 991.007448224
>>   483046 Back 991.808049115 => 991.007449172
>>   148865 Stopped
>>   562210 Stopped
>>   483046 Stopped
>>   194964 Stopped
>>   % uname -a
>>   OpenBSD yasuoka-ob-c.tokyo.iiji.jp 6.8 GENERIC.MP#5 amd64
>>   % sysctl kern.version
>>   kern.version=OpenBSD 6.8 (GENERIC.MP) #5: Mon Feb 22 04:36:10 MST 2021
>>   
>> r...@syspatch-68-amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
>>   %
>> monotime.c
>> 
>> #include 
>> #include 
>> #include 
>> #include 
>> #include 
>> #include 
>> #include 
>> #define NTHREAD  4
>> #define NTRY 5
>> void *
>> start(void *dummy)
>> {
>>  int i;
>>  struct timespec ts0, ts1;
>>  printf("%d Starting\n", (int)getthrid());
>>  clock_gettime(CLOCK_MONOTONIC, );
>>  for (i = 0; i < NTRY; i++) {
>>  clock_gettime(CLOCK_MONOTONIC, );
>>  if (timespeccmp(, , <=)) {
>>  ts0 = ts1;
>>  continue;
>>  }
>>  printf("%d Back %lld.%09lu => %lld.%09lu\n",
>>  (int)getthrid(), ts0.tv_sec, ts0.tv_nsec, ts1.tv_sec,
>>  ts1.tv_nsec);
>>  break;
>>  }
>>  printf("%d Stopped\n", (int)getthrid());
>>  return (NULL);
>> }
>> int
>> main(int argc, char *argv[])
>> {
>>  int i, n = NTHREAD;
>>  pthread_t *threads;
>>  threads = calloc(n, sizeof(pthread_t));
>>  for (i = 0; i < n; i++)
>>  pthread_create([i], NULL, start, NULL);
>>  for (i = 0; i < n; i++)
>>  pthread_join(threads[i], NULL);
>> }
>> 
>> The machine has 4 vCPUs and showing the following message on boot.
>>cpu1: disabling user TSC (skew=-5310)
>>cpu2: disabling user TSC (skew=-5335)
>>cpu3: disabling user TSC (skew=-7386)
>> This means "user TSC" is disabled because of TSC of cpu{1,2,3} is much
>> delayed against cpu0.
>> Simply ignoring the skews by the following diff seems to workaround
>> this problem.
>> diff --git a/sys/arch/amd64/amd64/tsc.c b/sys/arch/amd64/amd64/tsc.c
>> index 238a5a068e1..3b951a8b5a3 100644
>> --- a/sys/arch/amd64/amd64/tsc.c
>> +++ b/sys/arch/amd64/amd64/tsc.c
>> @@ -212,7 +212,8 @@ cpu_recalibrate_tsc(struct timecounter *tc)
>>   u_int
>>   tsc_get_timecount(struct timecounter *tc)
>>   {
>> -return rdtsc_lfence() 

libcrypto bio_cb.c: fix mangled debug output

2021-03-24 Thread Martin Vahlensieck
Hi

This fixes mangled output from the openssl(1) -debug option:

Before:
$ openssl aes-256-cbc -out test -debug
BIO[0x9102a7e5ctrl(106) - FILE pointer
BIO[0x9102a7e5ctrl return 1
BIO[0x9102a801ctrl(108) - FILE pointer
BIO[0x9102a801ctrl return 1
...

After:
$ openssl aes-256-cbc -out test -debug
BIO[0x5770f81ce00]:ctrl(106) - FILE pointer
BIO[0x5770f81ce00]:ctrl return 1
BIO[0x5770f81c200]:ctrl(108) - FILE pointer
BIO[0x5770f81c200]:ctrl return 1
BIO[0x5770f81c200]:write(0,8) - FILE pointer
...

The issue is that BIO_debug_callback(3) assumes that the pointer
formatted with %p takes up 6 chars.

Best,

Martin

Index: bio/bio_cb.c
===
RCS file: /cvs/src/lib/libcrypto/bio/bio_cb.c,v
retrieving revision 1.16
diff -u -p -r1.16 bio_cb.c
--- bio/bio_cb.c8 Dec 2014 03:54:19 -   1.16
+++ bio/bio_cb.c24 Mar 2021 09:50:45 -
@@ -70,15 +70,20 @@ BIO_debug_callback(BIO *bio, int cmd, co
BIO *b;
char buf[256];
char *p;
+   int nbuf;
long r = 1;
size_t p_maxlen;
 
if (BIO_CB_RETURN & cmd)
r = ret;
 
-   snprintf(buf, sizeof buf, "BIO[%p]:", bio);
-   p = &(buf[14]);
-   p_maxlen = sizeof buf - 14;
+   nbuf = snprintf(buf, sizeof buf, "BIO[%p]:", bio);
+
+   if (nbuf > sizeof buf)
+   nbuf = sizeof buf;
+
+   p = &(buf[nbuf]);
+   p_maxlen = sizeof buf - nbuf;
switch (cmd) {
case BIO_CB_FREE:
snprintf(p, p_maxlen, "Free - %s\n", bio->method->name);



Re: monotonic time going back by wrong skews

2021-03-24 Thread Paul Irofti

Hi,

Thank you for taking this to tech@ as requested!

I will reproduce here what I replied to Yasouka and Scott (which I think 
proposed taking the minimum skew value) in private.


"First, thank you very much for the in-depth analysis. I would suggest 
you take this to a public forum like tech@ so that we can keep the 
discussion opened and civilized.


I remember when I wrote the CPU synchronization code, that I tried doing 
sampling but it had some issues that now I don't remember of. So let us 
try this on real hardware too. This is another argument for moving this 
to tech@.


Second, why is taking the minimum value the optimal choice? I would 
assume an average would be better. Basically if you have a sequency like 
900, 900, 900, 900, 0, 900, 900, 900 you pick 0 which could lead to some 
problems, right? Or am I missing something?"


So could people give the minimum skew approach a spin on real machines 
to see if there are any issues popping up?


All the best,
Paul

On 3/24/21 10:40 AM, YASUOKA Masahiko wrote:

Hi,

I hit a problem which is caused by going back of monotonic time.  It
happens on hosts on VMware ESXi.

I wrote the program which repeats the problem.

  % cc -o monotime monotime.c -lpthread
  % ./monotime
  194964 Starting
  562210 Starting
  483046 Starting
  148865 Starting
  148865 Back 991.808048665 => 991.007447931
  562210 Back 991.808048885 => 991.007448224
  483046 Back 991.808049115 => 991.007449172
  148865 Stopped
  562210 Stopped
  483046 Stopped
  194964 Stopped
  % uname -a
  OpenBSD yasuoka-ob-c.tokyo.iiji.jp 6.8 GENERIC.MP#5 amd64
  % sysctl kern.version
  kern.version=OpenBSD 6.8 (GENERIC.MP) #5: Mon Feb 22 04:36:10 MST 2021
  
r...@syspatch-68-amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
  %

monotime.c

#include 
#include 
#include 
#include 
#include 
#include 
#include 

#define NTHREAD 4
#define NTRY5

void *
start(void *dummy)
{
int i;
struct timespec ts0, ts1;

printf("%d Starting\n", (int)getthrid());
clock_gettime(CLOCK_MONOTONIC, );

for (i = 0; i < NTRY; i++) {
clock_gettime(CLOCK_MONOTONIC, );
if (timespeccmp(, , <=)) {
ts0 = ts1;
continue;
}
printf("%d Back %lld.%09lu => %lld.%09lu\n",
(int)getthrid(), ts0.tv_sec, ts0.tv_nsec, ts1.tv_sec,
ts1.tv_nsec);
break;
}
printf("%d Stopped\n", (int)getthrid());

return (NULL);
}

int
main(int argc, char *argv[])
{
int i, n = NTHREAD;
pthread_t *threads;

threads = calloc(n, sizeof(pthread_t));

for (i = 0; i < n; i++)
pthread_create([i], NULL, start, NULL);
for (i = 0; i < n; i++)
pthread_join(threads[i], NULL);

}


The machine has 4 vCPUs and showing the following message on boot.

   cpu1: disabling user TSC (skew=-5310)
   cpu2: disabling user TSC (skew=-5335)
   cpu3: disabling user TSC (skew=-7386)

This means "user TSC" is disabled because of TSC of cpu{1,2,3} is much
delayed against cpu0.

Simply ignoring the skews by the following diff seems to workaround
this problem.

diff --git a/sys/arch/amd64/amd64/tsc.c b/sys/arch/amd64/amd64/tsc.c
index 238a5a068e1..3b951a8b5a3 100644
--- a/sys/arch/amd64/amd64/tsc.c
+++ b/sys/arch/amd64/amd64/tsc.c
@@ -212,7 +212,8 @@ cpu_recalibrate_tsc(struct timecounter *tc)
  u_int
  tsc_get_timecount(struct timecounter *tc)
  {
-   return rdtsc_lfence() + curcpu()->ci_tsc_skew;
+   //return rdtsc_lfence() + curcpu()->ci_tsc_skew;
+   return rdtsc_lfence();
  }
  
  void


So I supposed the skews are not calculated properly.  Also I found
NetBSD changed the skew calculating so that it checks 1000 times and
take the minimum value.

   https://github.com/NetBSD/src/commit/1dec05c1ae197b4acfc7038e49dfddabcbed0dff
   https://github.com/NetBSD/src/commit/66d76b89792bac1c71cd5507ba62b08ad02129ef


I checked skews on the machine by the following debug code.

diff --git a/sys/arch/amd64/amd64/tsc.c b/sys/arch/amd64/amd64/tsc.c
index 238a5a068e1..83e835e4f82 100644
--- a/sys/arch/amd64/amd64/tsc.c
+++ b/sys/arch/amd64/amd64/tsc.c
@@ -302,16 +302,42 @@ tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, 
uint64_t *aptscp)
*aptscp = tsc_sync_val;
  }
  
+#define	TSC_SYNC_NTIMES	1000

+
+static int tsc_difs[MAXCPUS][TSC_SYNC_NTIMES];
+
+void
+tsc_debug(void)
+{
+   int i, cpuid = curcpu()->ci_cpuid;
+
+   for (i = 0; i < TSC_SYNC_NTIMES; i++) {
+   if (i % 10 == 0)
+   printf("%5d", tsc_difs[cpuid][i]);
+   else
+   printf(" %5d", tsc_difs[cpuid][i]);
+   if (i % 10 == 9)
+   printf("\n");
+   }
+   printf("\n");
+}
+
  void
  tsc_sync_bp(struct cpu_info *ci)
  {
+   int i, mindif = INT_MAX, dif;
uint64_t bptsc, 

mfs + cd9660: vop #define cleanup

2021-03-24 Thread Sebastien Marie
Hi,

The following diff removes some #define which hide generic vop
functions behind another classic name.

It makes clearer which vop functions are real
fileystem-implementations and which one are only stubs.

Only mfs and cd9660 are using such #define.

No functional changes are intented.

Comments or OK ?
-- 
Sebastien Marie


blob - a662a6787ef9a6b06363441fb8778d66a29c04d0
blob + 2c54a18f3ff43c3eac65a4e520f4dcb625fa2473
--- sys/ufs/mfs/mfs_extern.h
+++ sys/ufs/mfs/mfs_extern.h
@@ -61,6 +61,5 @@ int mfs_close(void *);
 int mfs_inactive(void *);
 int mfs_reclaim(void *);
 int mfs_print(void *);
-#definemfs_revoke vop_generic_revoke
 int mfs_badop(void *);
 
blob - 00c0c24efbe9fa4046b1569a12be47c74d3d180f
blob + b5b1d430cc9f0dac3eb40bdc71ccc16f5efa67cf
--- sys/ufs/mfs/mfs_vnops.c
+++ sys/ufs/mfs/mfs_vnops.c
@@ -61,9 +61,9 @@ const struct vops mfs_vops = {
 .vop_ioctl  = mfs_ioctl,
 .vop_poll   = mfs_badop,
 .vop_kqfilter   = mfs_badop,
-.vop_revoke = mfs_revoke,
+.vop_revoke = vop_generic_revoke,
 .vop_fsync  = spec_fsync,
 .vop_remove = mfs_badop,
 .vop_link   = mfs_badop,

blob - b7cc93f86c5c8d28c28b3ad81286600941c23875
blob + 3c70501643e8bf15af237c3056919bd222dc4206
--- sys/isofs/cd9660/cd9660_vnops.c
+++ sys/isofs/cd9660/cd9660_vnops.c
@@ -811,44 +811,29 @@ cd9660_pathconf(void *v)
 /*
  * Global vfs data structures for isofs
  */
-#definecd9660_create   eopnotsupp
-#definecd9660_mknodeopnotsupp
-#definecd9660_writeeopnotsupp
-#definecd9660_fsyncnullop
-#definecd9660_remove   eopnotsupp
-#definecd9660_rename   eopnotsupp
-#definecd9660_mkdireopnotsupp
-#definecd9660_rmdireopnotsupp
-#definecd9660_advlock  eopnotsupp
-#definecd9660_valloc   eopnotsupp
-#definecd9660_vfreeeopnotsupp
-#definecd9660_truncate eopnotsupp
-#definecd9660_update   eopnotsupp
-#definecd9660_bwrite   eopnotsupp
-#define cd9660_revoke   vop_generic_revoke
 
 /* Global vfs data structures for cd9660. */
 const struct vops cd9660_vops = {
.vop_lookup = cd9660_lookup,
-   .vop_create = cd9660_create,
-   .vop_mknod  = cd9660_mknod,
+   .vop_create = eopnotsupp,
+   .vop_mknod  = eopnotsupp,
.vop_open   = cd9660_open,
.vop_close  = cd9660_close,
.vop_access = cd9660_access,
.vop_getattr= cd9660_getattr,
.vop_setattr= cd9660_setattr,
.vop_read   = cd9660_read,
-   .vop_write  = cd9660_write,
+   .vop_write  = eopnotsupp,
.vop_ioctl  = cd9660_ioctl,
.vop_poll   = cd9660_poll,
.vop_kqfilter   = cd9660_kqfilter,
-   .vop_revoke = cd9660_revoke,
-   .vop_fsync  = cd9660_fsync,
-   .vop_remove = cd9660_remove,
+   .vop_revoke = vop_generic_revoke,
+   .vop_fsync  = nullop,
+   .vop_remove = eopnotsupp,
.vop_link   = cd9660_link,
-   .vop_rename = cd9660_rename,
-   .vop_mkdir  = cd9660_mkdir,
-   .vop_rmdir  = cd9660_rmdir,
+   .vop_rename = eopnotsupp,
+   .vop_mkdir  = eopnotsupp,
+   .vop_rmdir  = eopnotsupp,
.vop_symlink= cd9660_symlink,
.vop_readdir= cd9660_readdir,
.vop_readlink   = cd9660_readlink,
@@ -862,8 +847,8 @@ const struct vops cd9660_vops = {
.vop_print  = cd9660_print,
.vop_islocked   = cd9660_islocked,
.vop_pathconf   = cd9660_pathconf,
-   .vop_advlock= cd9660_advlock,
-   .vop_bwrite = vop_generic_bwrite
+   .vop_advlock= eopnotsupp,
+   .vop_bwrite = vop_generic_bwrite,
 };
 
 /* Special device vnode ops */



Re: namei/execve: entanglement's simplification

2021-03-24 Thread Sebastien Marie
Please withdraw for now.

sys/exec.h change doesn't fit well in userland.

Thanks.

On Mon, Mar 22, 2021 at 11:05:06AM +0100, Sebastien Marie wrote:
> Hi,
> 
> The following diff tries to simplify a bit the entanglement of namei
> data and execve(2) syscall.
> 
> Currently, when called, sys_execve() might doing recursive calls of
> check_exec() with a shared `struct nameidata' (`ep_ndp' inside
> `struct exec_package').
> 
> I would like to disassociate them, and make check_exec() to "own" the
> `struct nameidata' data. execve(2) is complex enough, no needs to adds
> namei() complexity inside.
> 
> check_exec() will initialize nameidata, call namei() (as now), extract
> useful information for the caller (only the vnode and the
> command-name, returned via exec_package struct), and free other namei
> ressources.
> 
> To proceed, it only needs to know the wanted path (to properly init
> `nd' with NDINIT).
> 
> As the call of check_exec() could be recursive (when scripts are
> involved), the path could come from:
> - directly from sys_execve() via SCARG(uap, path) (from userspace)
> - from exec_script_makecmds() via the shellname (from systemspace)
> 
> In `struct exec_package', I opted to reuse `ep_name' for passing the
> path: it is what sys_execve() is already doing at first call. Later it
> is only used for construct the fake-args list in
> exec_script_makecmds(), and it could being overrided after that (and
> restored on error). I reordered them a bit to make it fit.
> 
> Eventually I could also introduce a new struct field for the wanted
> path.
> 
> `ep_segflg' is introduced to mark if ep_name comes from UIO_USERSPACE
> or UIO_SYSSPACE. it will be used by NDINIT() in check_exec().
> 
> `ep_comm' is the other information (with ep_vp) wanted in result of
> check_exec() call. it will be copied to `ps_comm'.
> 
> 
> Comments or OK ?
> -- 
> Sebastien Marie



monotonic time going back by wrong skews

2021-03-24 Thread YASUOKA Masahiko
Hi,

I hit a problem which is caused by going back of monotonic time.  It
happens on hosts on VMware ESXi.

I wrote the program which repeats the problem.

 % cc -o monotime monotime.c -lpthread
 % ./monotime
 194964 Starting
 562210 Starting
 483046 Starting
 148865 Starting
 148865 Back 991.808048665 => 991.007447931
 562210 Back 991.808048885 => 991.007448224
 483046 Back 991.808049115 => 991.007449172
 148865 Stopped
 562210 Stopped
 483046 Stopped
 194964 Stopped
 % uname -a
 OpenBSD yasuoka-ob-c.tokyo.iiji.jp 6.8 GENERIC.MP#5 amd64
 % sysctl kern.version
 kern.version=OpenBSD 6.8 (GENERIC.MP) #5: Mon Feb 22 04:36:10 MST 2021
 
r...@syspatch-68-amd64.openbsd.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
 %

monotime.c

#include 
#include 
#include 
#include 
#include 
#include 
#include 

#define NTHREAD 4
#define NTRY5

void *
start(void *dummy)
{
int i;
struct timespec ts0, ts1;

printf("%d Starting\n", (int)getthrid());
clock_gettime(CLOCK_MONOTONIC, );

for (i = 0; i < NTRY; i++) {
clock_gettime(CLOCK_MONOTONIC, );
if (timespeccmp(, , <=)) {
ts0 = ts1;
continue;
}
printf("%d Back %lld.%09lu => %lld.%09lu\n",
(int)getthrid(), ts0.tv_sec, ts0.tv_nsec, ts1.tv_sec,
ts1.tv_nsec);
break;
}
printf("%d Stopped\n", (int)getthrid());

return (NULL);
}

int
main(int argc, char *argv[])
{
int i, n = NTHREAD;
pthread_t *threads;

threads = calloc(n, sizeof(pthread_t));

for (i = 0; i < n; i++)
pthread_create([i], NULL, start, NULL);
for (i = 0; i < n; i++)
pthread_join(threads[i], NULL);

}


The machine has 4 vCPUs and showing the following message on boot.

  cpu1: disabling user TSC (skew=-5310)
  cpu2: disabling user TSC (skew=-5335)
  cpu3: disabling user TSC (skew=-7386)

This means "user TSC" is disabled because of TSC of cpu{1,2,3} is much
delayed against cpu0.

Simply ignoring the skews by the following diff seems to workaround
this problem.

diff --git a/sys/arch/amd64/amd64/tsc.c b/sys/arch/amd64/amd64/tsc.c
index 238a5a068e1..3b951a8b5a3 100644
--- a/sys/arch/amd64/amd64/tsc.c
+++ b/sys/arch/amd64/amd64/tsc.c
@@ -212,7 +212,8 @@ cpu_recalibrate_tsc(struct timecounter *tc)
 u_int
 tsc_get_timecount(struct timecounter *tc)
 {
-   return rdtsc_lfence() + curcpu()->ci_tsc_skew;
+   //return rdtsc_lfence() + curcpu()->ci_tsc_skew;
+   return rdtsc_lfence();
 }
 
 void

So I supposed the skews are not calculated properly.  Also I found
NetBSD changed the skew calculating so that it checks 1000 times and
take the minimum value.

  https://github.com/NetBSD/src/commit/1dec05c1ae197b4acfc7038e49dfddabcbed0dff
  https://github.com/NetBSD/src/commit/66d76b89792bac1c71cd5507ba62b08ad02129ef


I checked skews on the machine by the following debug code.

diff --git a/sys/arch/amd64/amd64/tsc.c b/sys/arch/amd64/amd64/tsc.c
index 238a5a068e1..83e835e4f82 100644
--- a/sys/arch/amd64/amd64/tsc.c
+++ b/sys/arch/amd64/amd64/tsc.c
@@ -302,16 +302,42 @@ tsc_read_bp(struct cpu_info *ci, uint64_t *bptscp, 
uint64_t *aptscp)
*aptscp = tsc_sync_val;
 }
 
+#defineTSC_SYNC_NTIMES 1000
+
+static int tsc_difs[MAXCPUS][TSC_SYNC_NTIMES];
+
+void
+tsc_debug(void)
+{
+   int i, cpuid = curcpu()->ci_cpuid;
+
+   for (i = 0; i < TSC_SYNC_NTIMES; i++) {
+   if (i % 10 == 0)
+   printf("%5d", tsc_difs[cpuid][i]);
+   else
+   printf(" %5d", tsc_difs[cpuid][i]);
+   if (i % 10 == 9)
+   printf("\n");
+   }
+   printf("\n");
+}
+
 void
 tsc_sync_bp(struct cpu_info *ci)
 {
+   int i, mindif = INT_MAX, dif;
uint64_t bptsc, aptsc;
 
-   tsc_read_bp(ci, , ); /* discarded - cache effects */
-   tsc_read_bp(ci, , );
+   for (i = 0; i < TSC_SYNC_NTIMES; i++) {
+   tsc_read_bp(ci, , );
+   dif = bptsc - aptsc;
+   if (abs(dif) < abs(mindif))
+   mindif = dif;
+   tsc_difs[ci->ci_cpuid][i] = dif;
+   }
 
/* Compute final value to adjust for skew. */
-   ci->ci_tsc_skew = bptsc - aptsc;
+   ci->ci_tsc_skew = mindif;
 }
 
 /*
@@ -342,8 +368,10 @@ tsc_post_ap(struct cpu_info *ci)
 void
 tsc_sync_ap(struct cpu_info *ci)
 {
-   tsc_post_ap(ci);
-   tsc_post_ap(ci);
+   int i;
+
+   for (i = 0; i < TSC_SYNC_NTIMES; i++)
+   tsc_post_ap(ci);
 }
 
 void


Stopped at  db_enter+0x10:  popq%rbp
ddb{0}> machine ddbcpu 1
Stopped at  x86_ipi_db+0x12:leave
ddb{1}> call tsc_debug
-8445 -6643 -52183 0-3-4-7   -11-5 0
  -11-9-5-3-4-3-7 8-5-6
   -5-9-3-9-7-1-5