Re: My ELFSEC implementation (signed binaries for amd64)

2017-05-05 Thread Christian Weisgerber
On 2017-05-05, "Peter J. Philipp"  wrote:

> This is my second official contribution to what I call ELFSEC, it places a 
> signature in binaries, in the ELF header to be exact.
-snip-

How does this defend against binary code introduced as a shared
library by way of LD_LIBRARY_PATH or LD_PRELOAD?

-- 
Christian "naddy" Weisgerber  na...@mips.inka.de



routing socket panic

2017-05-05 Thread Mark Kettenis
Just got this panic on armv7; got a very similar panic on hppa
yesterday that I didn't have time to look into any further.  This is
completely reproducable.

setting tty flags
pf enabled
kern.allowkmem: 0 -> 1
starting network
panic: pool_do_get: mbufpl free list modified: page 0xc56a4000; item addr 
0xc56a4400; offset 0x0=0x0 != 0x24a4c1a
Stopped at  $d: ldrbr15, [r15, r15, ror r15]!
TIDPIDUID PRFLAGS PFLAGS  CPU  COMMAND
*364338  59716  00x13  00  route
panic+0x18
scp=0xc03cae90 rlv=0xc03c761c ($d)
rsp=0xcc574bf0 rfp=0xcc574c2c
pool_do_get+0xc
scp=0xc03c73ac rlv=0xc03c6f1c (pool_get+0x7c)
rsp=0xcc574c30 rfp=0xcc574c8c
r7=0x r6=0x0002 r5=0xc0726408 r4=0x00ac
pool_get+0x10
scp=0xc03c6eb0 rlv=0xc03e075c (m_get+0x2c)
rsp=0xcc574c90 rfp=0xcc574cbc
r8=0x0044 r7=0x r6=0xc56a4300 r5=0x00ac
r4=0x00ac
m_get+0x10
scp=0xc03e0740 rlv=0xc03e1964 (m_copyback+0x1a8)
rsp=0xcc574cc0 rfp=0xcc574cfc
r10=0xc53cb300 r8=0x0044 r7=0x r6=0xc56a4300
r5=0x00ac r4=0x00ac
m_copyback+0x10
scp=0xc03e17cc rlv=0xc044326c (route_output+0x350)
rsp=0xcc574d00 rfp=0xcc574d8c
r10=0xca435000 r9=0x r8=0xc53cb300 r7=0x
r6=0xc56a4300 r5=0x0001 r4=0x0001
route_output+0xc
scp=0xc0442f28 rlv=0xc043ce04 ($a+0x154)
rsp=0xcc574d90 rfp=0xcc574dbc
r10=0xc56a4300 r9=0xcc574ea0 r8=0x r7=0xc5866780
r6=0xca435000 r5=0x0009 r4=0x
raw_usrreq+0xc
scp=0xc043cc00 rlv=0xc03e56ec (sosend+0x290)
rsp=0xcc574dc0 rfp=0xcc574e1c
r10=0x r8=0x r7=0xffd6 r6=0x1f5c
r5=0xca435000 r4=0x
sosend+0xc 
scp=0xc03e5468 rlv=0xc03d247c (soo_write+0x2c)
rsp=0xcc574e20 rfp=0xcc574e3c
r10=0xca494140 r9=0x00a4 r8=0xcc574f0c r7=0x0003
r6=0x0001 r5=0xcc574fb4 r4=0xcc574f0c
soo_write+0xc
scp=0xc03d245c rlv=0xc03cfa1c (dofilewritev+0x1a4)
rsp=0xcc574e40 rfp=0xcc574ef4
dofilewritev+0xc
scp=0xc03cf884 rlv=0xc03cfc68 (sys_write+0x80)
rsp=0xcc574ef8 rfp=0xcc574f3c
r10=0x0028 r9=0x0004 r8=0xcc574f74 r7=0x0003
r6=0xca4a3cf4 r5=0xcc574fb4 r4=0xca494158
sys_write+0xc
scp=0xc03cfbf4 rlv=0xc054173c (swi_handler+0x174)
rsp=0xcc574f40 rfp=0xcc574fac
r8=0x0004 r7=0xca4a3cf4 r6=0xcc574fb0 r5=0x0003
r4=0xcc574fb4
swi_handler+0xc
scp=0xc05415d4 rlv=0xc0543fe8 (swi_entry+0x28)
rsp=0xcc574fb0 rfp=0xbffc9264
r10=0x0028 r9=0x04e5abc0 r8=0x04e50f24 r7=0x04e5ac60
r6=0x04e5aef8 r5=0x r4=0x4e10c000
https://www.openbsd.org/ddb.html describes the minimum info required in bug
reports.  Insufficient info makes it difficult to find and fix bugs.
ddb> ps
   PID TID   PPIDUID  S   FLAGS  WAIT  COMMAND
*59716  364338  77999  0  70x13route
 77999  518782   1669  0  30x10008b  pause sh
  1669   76642  1  0  30x10008b  pause sh
  2702  287769  0  0  3 0x14200  pgzerozerothread
 58941  172655  0  0  3 0x14200  aiodoned  aiodoned
 17148  492245  0  0  3 0x14200  syncerupdate
 86850  185874  0  0  3 0x14200  cleaner   cleaner
 74158  246265  0  0  3 0x14200  reaperreaper
 96587  269786  0  0  3 0x14200  pgdaemon  pagedaemon
 22576  337424  0  0  3 0x14200  bored crynlk
 17600  522232  0  0  3 0x14200  bored crypto
 19634  523615  0  0  3 0x14200  pftm  pfpurge
 58943  420327  0  0  3 0x14200  usbtskusbtask
 73136  234376  0  0  3 0x14200  usbatsk   usbatsk
 47223  147942  0  0  3 0x14200  mmctsksdmmc0
 56815  511243  0  0  3 0x14200  bored softnet
 95568  281092  0  0  3 0x14200  bored systqmp
 69722  514447  0  0  3 0x14200  bored systq
 83570  426789  0  0  3  0x40014200  bored softclock
 94516  155392  0  0  3  0x40014200idle0
 89150  250451  0  0  3 0x14200  kmalloc   kmthread
 1  308598  0  0  30x82  wait  init
 0   0 -1  0  3 0x10200  scheduler swapper



Update list of invalid users in install.sub

2017-05-05 Thread Callum R. Davies
Hi tech@, was looking through the tree for the providence of the
amusing "No really..." message in the installer and saw that the list
of invalid users needed updating.  Names are in the order found in
passwd, with the exception of ftp.

Index: distrib/miniroot/install.sub
===
RCS file: /cvs/src/distrib/miniroot/install.sub,v
retrieving revision 1.1000
diff -u -p -r1.1000 install.sub
--- distrib/miniroot/install.sub1 May 2017 14:29:39 -   1.1000
+++ distrib/miniroot/install.sub5 May 2017 18:58:54 -
@@ -1945,7 +1945,7 @@ user_setup() {
y|yes)  _q="No really, what is the lower-case loginname, or 
'no'?"
continue
;;
-   root|daemon|operator|bin|sshd|www|nobody|ftp)
+   root|daemon|operator|bin|sshd|uucp|www|nobody|build|ftp)
;;
[a-z]*([a-z0-9_]))
((${#resp} <= 31)) && break



CoDel FSM fixup

2017-05-05 Thread Mike Belopuhov
Due to a poorly chosen initial state, we might jump immediately
into a CONTROL state bypassing the DROPPING one when the initial
state is DROPPING.  The fix is easy: the initial state must be
something neutral, so that we would transition INITIAL->DROPPING
right off the bat in the beggining of the loop and then perfrom
a DROPPING->CONTROL transition and either CONTROL->DROPPING and
restart the loop or CONTROL->RECOVERY/ACCEPTING.

The change in codel_state_change is necessary because valid
transitions are only INITIAL->DROPPING and DROPPING->CONTROL
in this case.

Does this look good?


diff --git sys/net/fq_codel.c sys/net/fq_codel.c
index 1c380f49f3d..c7fe38a7094 100644
--- sys/net/fq_codel.c
+++ sys/net/fq_codel.c
@@ -370,11 +370,11 @@ codel_next_packet(struct codel *cd, struct codel_params 
*cp, int64_t now,
*drop = 1;
}
return (m);
 }
 
-enum { ACCEPTING, FIRSTDROP, DROPPING, CONTROL, RECOVERY };
+enum { INITIAL, ACCEPTING, FIRSTDROP, DROPPING, CONTROL, RECOVERY };
 
 static inline int
 codel_state_change(struct codel *cd, int64_t now, struct mbuf *m, int drop,
 int state)
 {
@@ -383,11 +383,11 @@ codel_state_change(struct codel *cd, int64_t now, struct 
mbuf *m, int drop,
 
if (cd->dropping) {
if (!drop)
return (RECOVERY);
else if (now >= cd->next)
-   return (state == CONTROL ? DROPPING : CONTROL);
+   return (state == DROPPING ? CONTROL : DROPPING);
} else if (drop)
return (FIRSTDROP);
 
if (m == NULL)
return (RECOVERY);
@@ -403,11 +403,11 @@ codel_dequeue(struct codel *cd, struct codel_params *cp, 
int64_t now,
unsigned short delta;
int drop, state, done = 0;
 
*dpkts = *dbytes = 0;
 
-   state = cd->dropping ? DROPPING : ACCEPTING;
+   state = INITIAL;
 
while (!done) {
m = codel_next_packet(cd, cp, now, );
state = codel_state_change(cd, now, m, drop, state);
 



Re: My ELFSEC implementation (signed binaries for amd64)

2017-05-05 Thread Kevin Chadwick
On Fri, 5 May 2017 17:56:11 +0200


> If CMAC's can be truncated then this entire implementation can be
> rewritten to not truncate for 64 bit machines and truncate for 32 bit
> machines.

There is also POLY1305-AES which is a little stronger. The more you
limit failed MAC requests the more you can truncate but obviously only
go as far as you must.



Re: [PATCH] vmd: write and read device state to and from fd

2017-05-05 Thread Reyk Floeter
On Thu, May 04, 2017 at 08:57:00PM -0700, Pratik Vyas wrote:
> Hello tech@,
> 
> This patch adds functions to read and write state of devices in vmd. The
> atomicio parts are copied from usr.bin/ssh.
> 
> Context: This is required for implementing vmctl send and vmctl receive.
> vmctl send / receive are two new options that will support snapshotting
> VMs and migrating VMs from one host to another. This project was
> undertaken at San Jose State University along with my three teammates,
> Ashwin, Harshada and Siri with mlarkin@ as our advisor.
> 
> We are working with reyk@ on cleaning up rest of the vmd changes.
> 

Once again and for the record: nice work!

See comments below, otherwise OK.

Reyk

> 
> 
> Index: usr.sbin/vmd/Makefile
> ===
> RCS file: /home/pdvyas/cvs/src/usr.sbin/vmd/Makefile,v
> retrieving revision 1.14
> diff -u -p -a -u -r1.14 Makefile
> --- usr.sbin/vmd/Makefile 19 Apr 2017 15:38:32 -  1.14
> +++ usr.sbin/vmd/Makefile 5 May 2017 03:43:41 -
> @@ -6,7 +6,7 @@ PROG= vmd
> SRCS= vmd.c control.c log.c priv.c proc.c config.c vmm.c
> SRCS+=vm.c loadfile_elf.c pci.c virtio.c i8259.c mc146818.c
> SRCS+=ns8250.c i8253.c vmboot.c ufs.c disklabel.c dhcp.c 
> packet.c
> -SRCS+=   parse.y
> +SRCS+=   parse.y atomicio.c
> 
> CFLAGS+=  -Wall -I${.CURDIR}
> CFLAGS+=  -Wstrict-prototypes -Wmissing-prototypes
> Index: usr.sbin/vmd/atomicio.c
> ===
> RCS file: usr.sbin/vmd/atomicio.c
> diff -N usr.sbin/vmd/atomicio.c
> --- /dev/null 1 Jan 1970 00:00:00 -
> +++ usr.sbin/vmd/atomicio.c   5 May 2017 03:43:41 -
> @@ -0,0 +1,306 @@
> +/* $OpenBSD: atomicio.c,v 1.28 2016/07/27 23:18:12 djm Exp $ */
> +/*
> + * Copyright (c) 2006 Damien Miller. All rights reserved.
> + * Copyright (c) 2005 Anil Madhavapeddy. All rights reserved.
> + * Copyright (c) 1995,1999 Theo de Raadt.  All rights reserved.
> + * All rights reserved.
> + *
> + * Redistribution and use in source and binary forms, with or without
> + * modification, are permitted provided that the following conditions
> + * are met:
> + * 1. Redistributions of source code must retain the above copyright
> + *notice, this list of conditions and the following disclaimer.
> + * 2. Redistributions in binary form must reproduce the above copyright
> + *notice, this list of conditions and the following disclaimer in the
> + *documentation and/or other materials provided with the distribution.
> + *
> + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
> + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
> + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
> + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
> + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
> + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
> + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
> + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
> + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
> + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
> + */
> +
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include "atomicio.h"
> +
> +/*
> + * ensure all of data on socket comes through. f==read || f==vwrite
> + */
> +size_t
> +atomicio6(ssize_t (*f) (int, void *, size_t), int fd, void *_s, size_t n,
> +int (*cb)(void *, size_t), void *cb_arg)
> +{
> + char *s = _s;
> + size_t pos = 0;
> + ssize_t res;
> + struct pollfd pfd;
> +
> + pfd.fd = fd;
> + pfd.events = f == read ? POLLIN : POLLOUT;
> + while (n > pos) {
> + res = (f) (fd, s + pos, n - pos);
> + switch (res) {
> + case -1:
> + if (errno == EINTR)
> + continue;
> + if (errno == EAGAIN) {
> + (void)poll(, 1, -1);
> + continue;
> + }
> + return 0;
> + case 0:
> + errno = EPIPE;
> + return pos;
> + default:
> + pos += (size_t)res;
> + if (cb != NULL && cb(cb_arg, (size_t)res) == -1) {
> + errno = EINTR;
> + return pos;
> + }
> + }
> + }
> + return pos;
> +}
> +
> +size_t
> +atomicio(ssize_t (*f) (int, void *, size_t), int fd, void *_s, size_t n)
> +{
> + return atomicio6(f, fd, _s, n, NULL, NULL);
> +}
> +
> +/*
> + * ensure all of data on socket comes through. f==readv || f==writev
> + */
> 

Re: My ELFSEC implementation (signed binaries for amd64)

2017-05-05 Thread Peter J. Philipp
On Fri, May 05, 2017 at 05:25:57PM +0100, Kevin Chadwick wrote:
> > There was concern about my use of MD5 HMAC's so I 
> > took them out.  The ELF header of 32 bit systems is too small to fit
> > SHA256 checksums, so I'm leaving it out.
> 
> Have you considered CMAC which can be truncated if need be and also
> could take advantage of AES acceleration.
> 
> Alternatively, signify perhaps.

I never considered that.  In discussion with a friend, I did consider
truncating a SHA256 HMAC, but that didn't feel right.

If CMAC's can be truncated then this entire implementation can be rewritten
to not truncate for 64 bit machines and truncate for 32 bit machines.

The code to this should be straight forward and I'll work on that next I
guess.  I have a 32 bit firewall here that I'd love to ELFSEC.

I know too little about signify in-kernel, I know I love it as a userland
program.

Regards,
-peter



ipv6 mapped address input

2017-05-05 Thread Alexander Bluhm
Hi,

Checking for IPv6 mapped IPv4 addreses is the task of ip6_input().
Doing it again in the protocol input functions gains nothing.

ok to remove the double check?

bluhm

Index: netinet/tcp_input.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/tcp_input.c,v
retrieving revision 1.343
diff -u -p -r1.343 tcp_input.c
--- netinet/tcp_input.c 4 May 2017 17:58:46 -   1.343
+++ netinet/tcp_input.c 5 May 2017 15:16:37 -
@@ -424,13 +424,6 @@ tcp_input(struct mbuf **mp, int *offp, i
iptos = (ntohl(ip6->ip6_flow) >> 20) & 0xff;
 #endif
 
-   /* Be proactive about malicious use of IPv4 mapped address */
-   if (IN6_IS_ADDR_V4MAPPED(>ip6_src) ||
-   IN6_IS_ADDR_V4MAPPED(>ip6_dst)) {
-   /* XXX stat */
-   goto drop;
-   }
-
/*
 * Be proactive about unspecified IPv6 address in source.
 * As we use all-zero to indicate unbounded/unconnected pcb,
Index: netinet/udp_usrreq.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet/udp_usrreq.c,v
retrieving revision 1.236
diff -u -p -r1.236 udp_usrreq.c
--- netinet/udp_usrreq.c4 May 2017 17:58:46 -   1.236
+++ netinet/udp_usrreq.c5 May 2017 15:19:59 -
@@ -225,12 +225,6 @@ udp_input(struct mbuf **mp, int *offp, i
goto bad;
}
ip6 = mtod(m, struct ip6_hdr *);
-   /* Be proactive about malicious use of IPv4 mapped address */
-   if (IN6_IS_ADDR_V4MAPPED(>ip6_src) ||
-   IN6_IS_ADDR_V4MAPPED(>ip6_dst)) {
-   /* XXX stat */
-   goto bad;
-   }
break;
 #endif /* INET6 */
default:
Index: netinet6/raw_ip6.c
===
RCS file: /data/mirror/openbsd/cvs/src/sys/netinet6/raw_ip6.c,v
retrieving revision 1.111
diff -u -p -r1.111 raw_ip6.c
--- netinet6/raw_ip6.c  19 Apr 2017 15:44:45 -  1.111
+++ netinet6/raw_ip6.c  5 May 2017 15:24:10 -
@@ -130,14 +130,6 @@ rip6_input(struct mbuf **mp, int *offp, 
if (proto != IPPROTO_ICMPV6)
rip6stat_inc(rip6s_ipackets);
 
-   /* Be proactive about malicious use of IPv4 mapped address */
-   if (IN6_IS_ADDR_V4MAPPED(>ip6_src) ||
-   IN6_IS_ADDR_V4MAPPED(>ip6_dst)) {
-   /* XXX stat */
-   m_freem(m);
-   return IPPROTO_DONE;
-   }
-
bzero(, sizeof(rip6src));
rip6src.sin6_len = sizeof(struct sockaddr_in6);
rip6src.sin6_family = AF_INET6;



Re: My ELFSEC implementation (signed binaries for amd64)

2017-05-05 Thread Kevin Chadwick
On Fri, 5 May 2017 14:16:37 +0200


> There was concern about my use of MD5 HMAC's so I 
> took them out.  The ELF header of 32 bit systems is too small to fit
> SHA256 checksums, so I'm leaving it out.

Have you considered CMAC which can be truncated if need be and also
could take advantage of AES acceleration.

Alternatively, signify perhaps.



Re: sdhc(4) quirks

2017-05-05 Thread Mark Kettenis
> Date: Fri, 5 May 2017 16:52:17 +0200
> From: Patrick Wildt 
> 
> On Fri, May 05, 2017 at 02:26:51PM +0200, Mark Kettenis wrote:
> > I'm working on support for the SDHC controller on the Rockchip RK3399
> > such that I can use the onboard eMMC on the Firefly-RK3399.  This
> > controller is based on the Arasan eMMC 5.1 "IP", which has a standard
> > SDHC 3.0 interface.  However there are some minor quirks.
> > 
> > Setting the signalling voltage to 1.8V (which is the only setting the
> > device supports) doesn't work.  For this reason, I add a hook to
> > override the sdhc_signal_voltage() function just like we did for the
> > sdhc_card_detect() function a while ago.
> > 
> > I also can't get the double-data rate mode to work.  So I added a flag
> > that disables the DDR52 mode when set.  My intention is to remove that
> > flag again if I get DDR52 to work on my board.
> > 
> > ok?
> 
> I thought you didn't want too many quirks in there. :)

Heh.

I'd like to avoid too much obfuscation/conditionals in the core
command code.  That's why I added the sdhc_voltage_override() function
instead of another flag.

NetBSD tried to support all kinds of controllers that deviate
considerably from the SDHC spec (such as our imxesdhc(4) and it makes
the core code simply too messay.  But it is far from clear where we
should draw the line...

> > Index: dev/sdmmc/sdhc.c
> > ===
> > RCS file: /cvs/src/sys/dev/sdmmc/sdhc.c,v
> > retrieving revision 1.54
> > diff -u -p -r1.54 sdhc.c
> > --- dev/sdmmc/sdhc.c6 Apr 2017 03:15:29 -   1.54
> > +++ dev/sdmmc/sdhc.c5 May 2017 12:17:48 -
> > @@ -25,6 +25,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  
> >  #include 
> > @@ -317,6 +318,9 @@ sdhc_host_found(struct sdhc_softc *sc, b
> > saa.caps |= SMC_CAPS_MMC_DDR52;
> > }
> >  
> > +   if (ISSET(sc->sc_flags, SDHC_F_NODDR50))
> > +   saa.caps &= ~SMC_CAPS_MMC_DDR52;
> 
> Replace those 4 spaces with a tab.
> 
> > +
> > hp->sdmmc = config_found(>sc_dev, , NULL);
> > if (hp->sdmmc == NULL) {
> > error = 0;
> > @@ -683,6 +687,9 @@ int
> >  sdhc_signal_voltage(sdmmc_chipset_handle_t sch, int signal_voltage)
> >  {
> > struct sdhc_host *hp = sch;
> > +
> > +   if (hp->sc->sc_signal_voltage)
> > +   return hp->sc->sc_signal_voltage(hp->sc, signal_voltage);
> >  
> > if (SDHC_SPEC_VERSION(hp->version) < SDHC_SPEC_V3)
> > return EINVAL;
> > Index: dev/sdmmc/sdhcvar.h
> > ===
> > RCS file: /cvs/src/sys/dev/sdmmc/sdhcvar.h,v
> > retrieving revision 1.9
> > diff -u -p -r1.9 sdhcvar.h
> > --- dev/sdmmc/sdhcvar.h 30 Apr 2016 11:32:23 -  1.9
> > +++ dev/sdmmc/sdhcvar.h 5 May 2017 12:17:48 -
> > @@ -32,6 +32,7 @@ struct sdhc_softc {
> > bus_dma_tag_t sc_dmat;
> >  
> > int (*sc_card_detect)(struct sdhc_softc *);
> > +   int (*sc_signal_voltage)(struct sdhc_softc *, int);
> >  };
> >  
> >  /* Host controller functions called by the attachment driver. */
> > @@ -45,5 +46,6 @@ void  sdhc_needs_discover(struct sdhc_sof
> >  
> >  /* flag values */
> >  #define SDHC_F_NOPWR0  (1 << 0)
> > +#define SDHC_F_NODDR50 (1 << 1)
> >  
> >  #endif
> > 
> 



Re: sdhc(4) quirks

2017-05-05 Thread Patrick Wildt
On Fri, May 05, 2017 at 02:26:51PM +0200, Mark Kettenis wrote:
> I'm working on support for the SDHC controller on the Rockchip RK3399
> such that I can use the onboard eMMC on the Firefly-RK3399.  This
> controller is based on the Arasan eMMC 5.1 "IP", which has a standard
> SDHC 3.0 interface.  However there are some minor quirks.
> 
> Setting the signalling voltage to 1.8V (which is the only setting the
> device supports) doesn't work.  For this reason, I add a hook to
> override the sdhc_signal_voltage() function just like we did for the
> sdhc_card_detect() function a while ago.
> 
> I also can't get the double-data rate mode to work.  So I added a flag
> that disables the DDR52 mode when set.  My intention is to remove that
> flag again if I get DDR52 to work on my board.
> 
> ok?

I thought you didn't want too many quirks in there. :)  No objections
though, ok by me apart from a whitespace thing.

> 
> 
> Index: dev/sdmmc/sdhc.c
> ===
> RCS file: /cvs/src/sys/dev/sdmmc/sdhc.c,v
> retrieving revision 1.54
> diff -u -p -r1.54 sdhc.c
> --- dev/sdmmc/sdhc.c  6 Apr 2017 03:15:29 -   1.54
> +++ dev/sdmmc/sdhc.c  5 May 2017 12:17:48 -
> @@ -25,6 +25,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  
>  #include 
> @@ -317,6 +318,9 @@ sdhc_host_found(struct sdhc_softc *sc, b
>   saa.caps |= SMC_CAPS_MMC_DDR52;
>   }
>  
> + if (ISSET(sc->sc_flags, SDHC_F_NODDR50))
> + saa.caps &= ~SMC_CAPS_MMC_DDR52;

Replace those 4 spaces with a tab.

> +
>   hp->sdmmc = config_found(>sc_dev, , NULL);
>   if (hp->sdmmc == NULL) {
>   error = 0;
> @@ -683,6 +687,9 @@ int
>  sdhc_signal_voltage(sdmmc_chipset_handle_t sch, int signal_voltage)
>  {
>   struct sdhc_host *hp = sch;
> +
> + if (hp->sc->sc_signal_voltage)
> + return hp->sc->sc_signal_voltage(hp->sc, signal_voltage);
>  
>   if (SDHC_SPEC_VERSION(hp->version) < SDHC_SPEC_V3)
>   return EINVAL;
> Index: dev/sdmmc/sdhcvar.h
> ===
> RCS file: /cvs/src/sys/dev/sdmmc/sdhcvar.h,v
> retrieving revision 1.9
> diff -u -p -r1.9 sdhcvar.h
> --- dev/sdmmc/sdhcvar.h   30 Apr 2016 11:32:23 -  1.9
> +++ dev/sdmmc/sdhcvar.h   5 May 2017 12:17:48 -
> @@ -32,6 +32,7 @@ struct sdhc_softc {
>   bus_dma_tag_t sc_dmat;
>  
>   int (*sc_card_detect)(struct sdhc_softc *);
> + int (*sc_signal_voltage)(struct sdhc_softc *, int);
>  };
>  
>  /* Host controller functions called by the attachment driver. */
> @@ -45,5 +46,6 @@ voidsdhc_needs_discover(struct sdhc_sof
>  
>  /* flag values */
>  #define SDHC_F_NOPWR0(1 << 0)
> +#define SDHC_F_NODDR50   (1 << 1)
>  
>  #endif
> 



Re: arm64 bus_dmamap_sync(9)

2017-05-05 Thread Patrick Wildt
On Fri, May 05, 2017 at 03:19:07PM +0200, Mark Kettenis wrote:
> To support sdhc(4) on the Firefly-RK3399, I actually need a working
> bus_dmamap_sync(9) implementation that does proper cache flushes.  So
> far we've gotten away with not having one, because the USB stack tends
> to set the BUS_DMA_COHERENT flag which maps memory into uncached
> address space such that the cache flushes can be skipped.
> 
> Diff below follows the approach taken by armv7.  I've left out the
> optimization where BUS_DMASYNC_PREREAD invalidates complete cache
> lines instead of writing them back.  I'm far from certain that this
> optimization actually matters on your typical ARMv8 CPU.
> 
> ok?

ok patrick@

> 
> 
> Index: bus_dma.c
> ===
> RCS file: /cvs/src/sys/arch/arm64/arm64/bus_dma.c,v
> retrieving revision 1.6
> diff -u -p -r1.6 bus_dma.c
> --- bus_dma.c 22 Feb 2017 22:55:27 -  1.6
> +++ bus_dma.c 5 May 2017 13:06:50 -
> @@ -63,9 +63,9 @@
>  
>  #include 
>  
> -#include 
> -
>  #include 
> +#include 
> +#include 
>  
>  /*
>   * Common function for DMA map creation.  May be called by bus-specific
> @@ -351,6 +351,33 @@ _dmamap_unload(bus_dma_tag_t t, bus_dmam
>   map->dm_mapsize = 0;
>  }
>  
> +static void
> +_dmamap_sync_segment(vaddr_t va, vsize_t len, int ops)
> +{
> + switch (ops) {
> + case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE:
> + case BUS_DMASYNC_PREREAD:
> + cpu_dcache_wbinv_range(va, len);
> + break;
> +
> + case BUS_DMASYNC_PREWRITE:
> + cpu_dcache_wb_range(va, len);
> + break;
> +
> + /*
> +  * Cortex CPUs can do speculative loads so we need to clean the cache
> +  * after a DMA read to deal with any speculatively loaded cache lines.
> +  * Since these can't be dirty, we can just invalidate them and don't
> +  * have to worry about having to write back their contents.
> +  */
> + case BUS_DMASYNC_POSTREAD:
> + case BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE:
> + membar_sync();
> + cpu_dcache_inv_range(va, len);
> + break;
> + }
> +}
> +
>  /*
>   * Common function for DMA map synchronization.  May be called
>   * by bus-specific DMA map synchronization functions.
> @@ -376,12 +403,10 @@ _dmamap_sync(bus_dma_tag_t t, bus_dmamap
>   curseg = 0;
>  
>   while (size && nsegs) {
> - paddr_t paddr;
>   vaddr_t vaddr;
>   bus_size_t ssize;
>  
>   ssize = map->dm_segs[curseg].ds_len;
> - paddr = map->dm_segs[curseg]._ds_paddr;
>   vaddr = map->dm_segs[curseg]._ds_vaddr;
>  
>   if (addr != 0) {
> @@ -390,7 +415,6 @@ _dmamap_sync(bus_dma_tag_t t, bus_dmamap
>   ssize = 0;
>   } else {
>   vaddr += addr;
> - paddr += addr;
>   ssize -= addr;
>   addr = 0;
>   }
> @@ -399,21 +423,7 @@ _dmamap_sync(bus_dma_tag_t t, bus_dmamap
>   ssize = size;
>  
>   if (ssize != 0) {
> - /*
> -  * If only PREWRITE is requested, writeback.
> -  * PREWRITE with PREREAD writebacks
> -  * and invalidates (if noncoherent) *all* cache levels.
> -  * Otherwise, just invalidate (if noncoherent).
> -  */
> - if (op & BUS_DMASYNC_PREWRITE) {
> - if (op & BUS_DMASYNC_PREREAD)
> - ; // XXX MUST ADD CACHEFLUSHING
> - else
> - ; // XXX MUST ADD CACHEFLUSHING
> - } else
> - if (op & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_POSTREAD)) {
> - ; // XXX MUST ADD CACHEFLUSHING
> - }
> + _dmamap_sync_segment(vaddr, ssize, op);
>   size -= ssize;
>   }
>   curseg++;
> 



Re: arm64 bus_dmamap_sync(9)

2017-05-05 Thread Jonathan Gray
On Fri, May 05, 2017 at 03:19:07PM +0200, Mark Kettenis wrote:
> To support sdhc(4) on the Firefly-RK3399, I actually need a working
> bus_dmamap_sync(9) implementation that does proper cache flushes.  So
> far we've gotten away with not having one, because the USB stack tends
> to set the BUS_DMA_COHERENT flag which maps memory into uncached
> address space such that the cache flushes can be skipped.
> 
> Diff below follows the approach taken by armv7.  I've left out the
> optimization where BUS_DMASYNC_PREREAD invalidates complete cache
> lines instead of writing them back.  I'm far from certain that this
> optimization actually matters on your typical ARMv8 CPU.
> 
> ok?

ok jsg@

> 
> 
> Index: bus_dma.c
> ===
> RCS file: /cvs/src/sys/arch/arm64/arm64/bus_dma.c,v
> retrieving revision 1.6
> diff -u -p -r1.6 bus_dma.c
> --- bus_dma.c 22 Feb 2017 22:55:27 -  1.6
> +++ bus_dma.c 5 May 2017 13:06:50 -
> @@ -63,9 +63,9 @@
>  
>  #include 
>  
> -#include 
> -
>  #include 
> +#include 
> +#include 
>  
>  /*
>   * Common function for DMA map creation.  May be called by bus-specific
> @@ -351,6 +351,33 @@ _dmamap_unload(bus_dma_tag_t t, bus_dmam
>   map->dm_mapsize = 0;
>  }
>  
> +static void
> +_dmamap_sync_segment(vaddr_t va, vsize_t len, int ops)
> +{
> + switch (ops) {
> + case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE:
> + case BUS_DMASYNC_PREREAD:
> + cpu_dcache_wbinv_range(va, len);
> + break;
> +
> + case BUS_DMASYNC_PREWRITE:
> + cpu_dcache_wb_range(va, len);
> + break;
> +
> + /*
> +  * Cortex CPUs can do speculative loads so we need to clean the cache
> +  * after a DMA read to deal with any speculatively loaded cache lines.
> +  * Since these can't be dirty, we can just invalidate them and don't
> +  * have to worry about having to write back their contents.
> +  */
> + case BUS_DMASYNC_POSTREAD:
> + case BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE:
> + membar_sync();
> + cpu_dcache_inv_range(va, len);
> + break;
> + }
> +}
> +
>  /*
>   * Common function for DMA map synchronization.  May be called
>   * by bus-specific DMA map synchronization functions.
> @@ -376,12 +403,10 @@ _dmamap_sync(bus_dma_tag_t t, bus_dmamap
>   curseg = 0;
>  
>   while (size && nsegs) {
> - paddr_t paddr;
>   vaddr_t vaddr;
>   bus_size_t ssize;
>  
>   ssize = map->dm_segs[curseg].ds_len;
> - paddr = map->dm_segs[curseg]._ds_paddr;
>   vaddr = map->dm_segs[curseg]._ds_vaddr;
>  
>   if (addr != 0) {
> @@ -390,7 +415,6 @@ _dmamap_sync(bus_dma_tag_t t, bus_dmamap
>   ssize = 0;
>   } else {
>   vaddr += addr;
> - paddr += addr;
>   ssize -= addr;
>   addr = 0;
>   }
> @@ -399,21 +423,7 @@ _dmamap_sync(bus_dma_tag_t t, bus_dmamap
>   ssize = size;
>  
>   if (ssize != 0) {
> - /*
> -  * If only PREWRITE is requested, writeback.
> -  * PREWRITE with PREREAD writebacks
> -  * and invalidates (if noncoherent) *all* cache levels.
> -  * Otherwise, just invalidate (if noncoherent).
> -  */
> - if (op & BUS_DMASYNC_PREWRITE) {
> - if (op & BUS_DMASYNC_PREREAD)
> - ; // XXX MUST ADD CACHEFLUSHING
> - else
> - ; // XXX MUST ADD CACHEFLUSHING
> - } else
> - if (op & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_POSTREAD)) {
> - ; // XXX MUST ADD CACHEFLUSHING
> - }
> + _dmamap_sync_segment(vaddr, ssize, op);
>   size -= ssize;
>   }
>   curseg++;
> 



Re: OpenBSD 6.1: relayd does not start more than 3 processes

2017-05-05 Thread Maxim Bourmistrov

> 5 maj 2017 kl. 15:55 skrev Maxim Bourmistrov :
> 
> 
>> 5 maj 2017 kl. 14:41 skrev Hiltjo Posthuma :
>> 
>> On Fri, May 05, 2017 at 12:30:56PM +0200, Maxim Bourmistrov wrote:
>>> 
>>> Hey,
>>> on OpenBSD 6.0-stable I have following configuration for relayd:
>>> 
>>> snip———
>>> interval 10
>>> timeout 1200
>>> prefork 15
>>> log all
>>> ——
>>> 
>>> Respective login.conf to spawn more relayd procs:
>>> 
>>> relayd:\
>>>   :maxproc-max=31:\
>>>   :maxproc-cur=15:\
>>>   :openfiles=65536:\
>>>   :tc=daemon:
>>> 
>>> 
>>> With config options above moved to a 6.1 creates following:
>>> 
>>> relayd starts but brings up no more that 3 relay-processes.
>>> Also after start up it refuses to do any checks configured (in my simple 
>>> test I used check tcp) 
>>> 
>>> [mxb-test]-[12:21:41]# relayctl sh su
>>> Id  TypeNameAvlblty Status
>>> 1   relay   rabbitmqactive
>>> 1   table   rabbitmqpool:5672   empty
>>> 1   host10.5.96.8   unknown
>>> 2   table   rabbitmqfallback:5672   empty
>>> 2   host10.5.96.9   unknown
>>> 
>>> 
>>> Changing ’prefork’ from 15 to 3 makes it work.
>>> 
>>> Is this a bug?
>>> 
>>> Br
>>> 
>>> 
>>> 
>> 
>> Hey,
>> 
>> This is a random guess since you haven't posted the whole config, but I think
>> it has bitten me too sometime:
>> 
>> Do you have the global options such as prefork defined before your
>> relays and routes or not?
>> 
>> The order of the global options matter. If the global options are set after
>> the table they are not initialized on the tables and can actually crash 
>> relayd.
>> This is because the health checking uses a different prefork value and checks
>> the "wrong" amount.
>> 
>> I'm not sure, but I think it is not a bug: it is documented in 
>> relayd.conf(5).
>> 
>> Thinking about it: would it be acceptable if `relayd -n` shows a warning if
>> global options are defined in the wrong order? I can write the patch for it
>> if it makes sense.
>> 
>> I hope this helps you in some way,
>> 
>> -- 
>> Kind regards,
>> Hiltjo
> 
> The whole config is like this:
> 
> include "/etc/pj/nz/akl1/shared/pf/int/networks"
> include "/etc/pj/nz/akl1/shared/pf/int/common_tables"
> 
> interval 10
> timeout 1000
> prefork 3 #15
> log all
> 
> #include "/etc/pj/shared/relayd/protocols"
> 
> tcp protocol tcp_proto {
>tcp { nodelay, sack, socket buffer 65536, backlog 128 }
> }
> 
> relay rabbitmq {
>listen on 10.5.128.16 port 5674
> #protocol tcp_proto
> #   session timeout 10800
>forward to  port 5672 mode roundrobin check tcp
>forward to  port 5672 mode roundrobin check tcp
> }
> 
> Tables are shared with PF so I don’t have to re-define those.
> ”common_tables” in config contains:
> 
> table  { $RMQ1_VLAN302 }
> table  { $RMQ2_VLAN302 }
> 
> I seems not be able to find a place in manage were it states that global 
> options need to go before
> table definitions. 
> 
> Note, config layout exactly the same which runs already on 6.0-stable.
> 
> My original question is why I can’t fork more than 3 procs any more
> and why relayd starts then prefork > 3 and does not do a health check.
> 
> Br

Hm, I tried this out - re-ordering the layout of the config.
You are, indeed, correct here.

Strange that this runs on 6.0.

Case closed.
Sorry for the noise.

Br



iwm nic lock improvements

2017-05-05 Thread Stefan Sperling
The 7000 generation of iwm hawrdware requires that the driver sets a
bit in the CSR_GP_CNTRL register before accessing most other registers
or sending commands to the firmware. The device will keep paying attention
while this bit is set. In the driver, the act of setting this bit is
referred to as "locking the NIC".

With this diff, we avoid locking the NIC redundantly and perform accounting
of locking and unlocking. I am not seeing any of the warning messages
introduced in this diff. So the locking seems to be done correctly.

Also make sure that we don't unlock the NIC in iwm_notif_intr while a
command is being processed. Linux and Drangonfly do this, too, but their
implementation is different (they use a flag rather than a counter).

Index: if_iwm.c
===
RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v
retrieving revision 1.178
diff -u -p -r1.178 if_iwm.c
--- if_iwm.c4 May 2017 09:03:42 -   1.178
+++ if_iwm.c5 May 2017 13:46:24 -
@@ -908,6 +908,11 @@ iwm_nic_lock(struct iwm_softc *sc)
 {
int rv = 0;
 
+   if (sc->sc_nic_locks > 0) {
+   sc->sc_nic_locks++;
+   return 1; /* already locked */
+   }
+
IWM_SETBITS(sc, IWM_CSR_GP_CNTRL,
IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
 
@@ -919,6 +924,7 @@ iwm_nic_lock(struct iwm_softc *sc)
IWM_CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY
 | IWM_CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP, 15000)) {
rv = 1;
+   sc->sc_nic_locks++;
} else {
printf("%s: acquiring device failed\n", DEVNAME(sc));
IWM_WRITE(sc, IWM_CSR_RESET, IWM_CSR_RESET_REG_FLAG_FORCE_NMI);
@@ -930,8 +936,12 @@ iwm_nic_lock(struct iwm_softc *sc)
 void
 iwm_nic_unlock(struct iwm_softc *sc)
 {
-   IWM_CLRBITS(sc, IWM_CSR_GP_CNTRL,
-   IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+   if (sc->sc_nic_locks > 0) {
+   if (--sc->sc_nic_locks == 0)
+   IWM_CLRBITS(sc, IWM_CSR_GP_CNTRL,
+   IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+   } else
+   printf("%s: NIC already unlocked\n", DEVNAME(sc));
 }
 
 void
@@ -1211,6 +1221,11 @@ iwm_reset_tx_ring(struct iwm_softc *sc, 
bus_dmamap_sync(sc->sc_dmat, ring->desc_dma.map, 0,
ring->desc_dma.size, BUS_DMASYNC_PREWRITE);
sc->qfullmsk &= ~(1 << ring->qid);
+   /* 7000 family NICs are locked while commands are in progress. */
+   if (ring->qid == IWM_CMD_QUEUE && ring->queued > 0) {
+   if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000)
+   iwm_nic_unlock(sc);
+   }
ring->queued = 0;
ring->cur = 0;
 }
@@ -1589,6 +1604,10 @@ iwm_stop_device(struct iwm_softc *sc)
/* Make sure (redundant) we've released our request to stay awake */
IWM_CLRBITS(sc, IWM_CSR_GP_CNTRL,
IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
+   if (sc->sc_nic_locks > 0)
+   printf("%s: %d active NIC locks forcefully cleared\n",
+   DEVNAME(sc), sc->sc_nic_locks);
+   sc->sc_nic_locks = 0;
 
/* Stop the device, and put it in low power state */
iwm_apm_stop(sc);
@@ -3724,21 +3743,23 @@ iwm_send_cmd(struct iwm_softc *sc, struc
(char *)(void *)desc - (char *)(void *)ring->desc_dma.vaddr,
sizeof (*desc), BUS_DMASYNC_PREWRITE);
 
-   IWM_SETBITS(sc, IWM_CSR_GP_CNTRL,
-   IWM_CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
-   if (!iwm_poll_bit(sc, IWM_CSR_GP_CNTRL,
-   IWM_CSR_GP_CNTRL_REG_VAL_MAC_ACCESS_EN,
-   (IWM_CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY |
-IWM_CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP), 15000)) {
-   printf("%s: acquiring device failed\n", DEVNAME(sc));
-   err = EBUSY;
-   goto out;
+   /*
+* Wake up the NIC to make sure that the firmware will see the host
+* command - we will let the NIC sleep once all the host commands
+* returned. This needs to be done only on 7000 family NICs.
+*/
+   if (sc->sc_device_family == IWM_DEVICE_FAMILY_7000) {
+   if (ring->queued == 0 && !iwm_nic_lock(sc)) {
+   err = EBUSY;
+   goto out;
+   }
}
 
 #if 0
iwm_update_sched(sc, ring->qid, ring->cur, 0, 0);
 #endif
/* Kick command ring. */
+   ring->queued++;
ring->cur = (ring->cur + 1) % IWM_TX_RING_COUNT;
IWM_WRITE(sc, IWM_HBUS_TARG_WRPTR, ring->qid << 8 | ring->cur);
 
@@ -3859,6 +3880,18 @@ iwm_cmd_done(struct iwm_softc *sc, struc
data->m = NULL;
}
wakeup(>desc[pkt->hdr.idx]);
+
+   if (ring->queued == 0) {
+   DPRINTF(("%s: unexpected firmware response to command 0x%x\n",
+   DEVNAME(sc), IWM_WIDE_ID(pkt->hdr.flags, pkt->hdr.code)));
+   } else 

Re: OpenBSD 6.1: relayd does not start more than 3 processes

2017-05-05 Thread Maxim Bourmistrov

> 5 maj 2017 kl. 14:41 skrev Hiltjo Posthuma :
> 
> On Fri, May 05, 2017 at 12:30:56PM +0200, Maxim Bourmistrov wrote:
>> 
>> Hey,
>> on OpenBSD 6.0-stable I have following configuration for relayd:
>> 
>> snip———
>> interval 10
>> timeout 1200
>> prefork 15
>> log all
>> ——
>> 
>> Respective login.conf to spawn more relayd procs:
>> 
>> relayd:\
>>:maxproc-max=31:\
>>:maxproc-cur=15:\
>>:openfiles=65536:\
>>:tc=daemon:
>> 
>> 
>> With config options above moved to a 6.1 creates following:
>> 
>> relayd starts but brings up no more that 3 relay-processes.
>> Also after start up it refuses to do any checks configured (in my simple 
>> test I used check tcp) 
>> 
>> [mxb-test]-[12:21:41]# relayctl sh su
>> Id  TypeNameAvlblty Status
>> 1   relay   rabbitmqactive
>> 1   table   rabbitmqpool:5672   empty
>> 1   host10.5.96.8   unknown
>> 2   table   rabbitmqfallback:5672   empty
>> 2   host10.5.96.9   unknown
>> 
>> 
>> Changing ’prefork’ from 15 to 3 makes it work.
>> 
>> Is this a bug?
>> 
>> Br
>> 
>> 
>> 
> 
> Hey,
> 
> This is a random guess since you haven't posted the whole config, but I think
> it has bitten me too sometime:
> 
> Do you have the global options such as prefork defined before your
> relays and routes or not?
> 
> The order of the global options matter. If the global options are set after
> the table they are not initialized on the tables and can actually crash 
> relayd.
> This is because the health checking uses a different prefork value and checks
> the "wrong" amount.
> 
> I'm not sure, but I think it is not a bug: it is documented in relayd.conf(5).
> 
> Thinking about it: would it be acceptable if `relayd -n` shows a warning if
> global options are defined in the wrong order? I can write the patch for it
> if it makes sense.
> 
> I hope this helps you in some way,
> 
> -- 
> Kind regards,
> Hiltjo

The whole config is like this:

include "/etc/pj/nz/akl1/shared/pf/int/networks"
include "/etc/pj/nz/akl1/shared/pf/int/common_tables"

interval 10
timeout 1000
prefork 3 #15
log all

#include "/etc/pj/shared/relayd/protocols"

tcp protocol tcp_proto {
tcp { nodelay, sack, socket buffer 65536, backlog 128 }
}

relay rabbitmq {
listen on 10.5.128.16 port 5674
#protocol tcp_proto
#   session timeout 10800
forward to  port 5672 mode roundrobin check tcp
forward to  port 5672 mode roundrobin check tcp
}

Tables are shared with PF so I don’t have to re-define those.
”common_tables” in config contains:

table  { $RMQ1_VLAN302 }
table  { $RMQ2_VLAN302 }

I seems not be able to find a place in manage were it states that global 
options need to go before
table definitions. 

Note, config layout exactly the same which runs already on 6.0-stable.

My original question is why I can’t fork more than 3 procs any more
and why relayd starts then prefork > 3 and does not do a health check.

Br
 


iwm firmware rate table fixes

2017-05-05 Thread Stefan Sperling
This fixes issues with iwm(4) firmware's retry rate table.

For a HT node ni_txrate is always zero and we should be using
ni_txmcs instead. Simplify the if-else logic to make sure of that.

The mimo delimiter in the link quality command was never set.
I don't know how important this is. But Linux sets it, so why not.

Hardcode the lowest rate at the tail of the retry table.
While debugging the old code I have encountered retry tables filled
with only 'MCS 8' which is obviously not ideal.

Index: if_iwm.c
===
RCS file: /cvs/src/sys/dev/pci/if_iwm.c,v
retrieving revision 1.178
diff -u -p -r1.178 if_iwm.c
--- if_iwm.c4 May 2017 09:03:42 -   1.178
+++ if_iwm.c5 May 2017 13:18:44 -
@@ -241,7 +241,7 @@ struct iwm_nvm_section {
uint8_t *data;
 };
 
-intiwm_is_mimo_plcp(uint8_t);
+intiwm_is_mimo_ht_plcp(uint8_t);
 intiwm_is_mimo_mcs(int);
 intiwm_store_cscheme(struct iwm_softc *, uint8_t *, size_t);
 intiwm_firmware_store_section(struct iwm_softc *, enum iwm_ucode_type,
@@ -5375,8 +5375,9 @@ iwm_setrates(struct iwm_node *in)
if (j >= nitems(lq->rs_table))
break;
tab = 0;
-   if ((ni->ni_flags & IEEE80211_NODE_HT) &&
-   ht_plcp != IWM_RATE_HT_SISO_MCS_INV_PLCP) {
+   if (ni->ni_flags & IEEE80211_NODE_HT) {
+   if (ht_plcp == IWM_RATE_HT_SISO_MCS_INV_PLCP)
+   continue;
/* Do not mix SISO and MIMO HT rates. */
if ((mimo && !iwm_is_mimo_ht_plcp(ht_plcp)) ||
(!mimo && iwm_is_mimo_ht_plcp(ht_plcp)))
@@ -5392,8 +5393,7 @@ iwm_setrates(struct iwm_node *in)
break;
}
}
-   }
-   if (tab == 0 && plcp != IWM_RATE_INVM_PLCP) {
+   } else if (plcp != IWM_RATE_INVM_PLCP) {
for (i = ni->ni_txrate; i >= 0; i--) {
if (iwm_rates[ridx].rate == (rs->rs_rates[i] &
IEEE80211_RATE_VAL)) {
@@ -5416,10 +5416,16 @@ iwm_setrates(struct iwm_node *in)
lq->rs_table[j++] = htole32(tab);
}
 
+   lq->mimo_delim = (mimo ? j : 0);
+
/* Fill the rest with the lowest possible rate */
-   i = j > 0 ? j - 1 : 0;
-   while (j < nitems(lq->rs_table))
-   lq->rs_table[j++] = lq->rs_table[i];
+   while (j < nitems(lq->rs_table)) {
+   tab = iwm_rates[ridx_min].plcp;
+   if (IWM_RIDX_IS_CCK(ridx_min))
+   tab |= IWM_RATE_MCS_CCK_MSK;
+   tab |= IWM_RATE_MCS_ANT_A_MSK;
+   lq->rs_table[j++] = htole32(tab);
+   }
 
lq->single_stream_ant_msk = IWM_ANT_A;
lq->dual_stream_ant_msk = IWM_ANT_AB;



arm64 bus_dmamap_sync(9)

2017-05-05 Thread Mark Kettenis
To support sdhc(4) on the Firefly-RK3399, I actually need a working
bus_dmamap_sync(9) implementation that does proper cache flushes.  So
far we've gotten away with not having one, because the USB stack tends
to set the BUS_DMA_COHERENT flag which maps memory into uncached
address space such that the cache flushes can be skipped.

Diff below follows the approach taken by armv7.  I've left out the
optimization where BUS_DMASYNC_PREREAD invalidates complete cache
lines instead of writing them back.  I'm far from certain that this
optimization actually matters on your typical ARMv8 CPU.

ok?


Index: bus_dma.c
===
RCS file: /cvs/src/sys/arch/arm64/arm64/bus_dma.c,v
retrieving revision 1.6
diff -u -p -r1.6 bus_dma.c
--- bus_dma.c   22 Feb 2017 22:55:27 -  1.6
+++ bus_dma.c   5 May 2017 13:06:50 -
@@ -63,9 +63,9 @@
 
 #include 
 
-#include 
-
 #include 
+#include 
+#include 
 
 /*
  * Common function for DMA map creation.  May be called by bus-specific
@@ -351,6 +351,33 @@ _dmamap_unload(bus_dma_tag_t t, bus_dmam
map->dm_mapsize = 0;
 }
 
+static void
+_dmamap_sync_segment(vaddr_t va, vsize_t len, int ops)
+{
+   switch (ops) {
+   case BUS_DMASYNC_PREREAD|BUS_DMASYNC_PREWRITE:
+   case BUS_DMASYNC_PREREAD:
+   cpu_dcache_wbinv_range(va, len);
+   break;
+
+   case BUS_DMASYNC_PREWRITE:
+   cpu_dcache_wb_range(va, len);
+   break;
+
+   /*
+* Cortex CPUs can do speculative loads so we need to clean the cache
+* after a DMA read to deal with any speculatively loaded cache lines.
+* Since these can't be dirty, we can just invalidate them and don't
+* have to worry about having to write back their contents.
+*/
+   case BUS_DMASYNC_POSTREAD:
+   case BUS_DMASYNC_POSTREAD|BUS_DMASYNC_POSTWRITE:
+   membar_sync();
+   cpu_dcache_inv_range(va, len);
+   break;
+   }
+}
+
 /*
  * Common function for DMA map synchronization.  May be called
  * by bus-specific DMA map synchronization functions.
@@ -376,12 +403,10 @@ _dmamap_sync(bus_dma_tag_t t, bus_dmamap
curseg = 0;
 
while (size && nsegs) {
-   paddr_t paddr;
vaddr_t vaddr;
bus_size_t ssize;
 
ssize = map->dm_segs[curseg].ds_len;
-   paddr = map->dm_segs[curseg]._ds_paddr;
vaddr = map->dm_segs[curseg]._ds_vaddr;
 
if (addr != 0) {
@@ -390,7 +415,6 @@ _dmamap_sync(bus_dma_tag_t t, bus_dmamap
ssize = 0;
} else {
vaddr += addr;
-   paddr += addr;
ssize -= addr;
addr = 0;
}
@@ -399,21 +423,7 @@ _dmamap_sync(bus_dma_tag_t t, bus_dmamap
ssize = size;
 
if (ssize != 0) {
-   /*
-* If only PREWRITE is requested, writeback.
-* PREWRITE with PREREAD writebacks
-* and invalidates (if noncoherent) *all* cache levels.
-* Otherwise, just invalidate (if noncoherent).
-*/
-   if (op & BUS_DMASYNC_PREWRITE) {
-   if (op & BUS_DMASYNC_PREREAD)
-   ; // XXX MUST ADD CACHEFLUSHING
-   else
-   ; // XXX MUST ADD CACHEFLUSHING
-   } else
-   if (op & (BUS_DMASYNC_PREREAD | BUS_DMASYNC_POSTREAD)) {
-   ; // XXX MUST ADD CACHEFLUSHING
-   }
+   _dmamap_sync_segment(vaddr, ssize, op);
size -= ssize;
}
curseg++;



Re: OpenBSD 6.1: relayd does not start more than 3 processes

2017-05-05 Thread Hiltjo Posthuma
On Fri, May 05, 2017 at 12:30:56PM +0200, Maxim Bourmistrov wrote:
> 
> Hey,
> on OpenBSD 6.0-stable I have following configuration for relayd:
> 
> snip———
> interval 10
> timeout 1200
> prefork 15
> log all
> ——
>  
> Respective login.conf to spawn more relayd procs:
> 
> relayd:\
> :maxproc-max=31:\
> :maxproc-cur=15:\
> :openfiles=65536:\
> :tc=daemon:
> 
> 
> With config options above moved to a 6.1 creates following:
> 
> relayd starts but brings up no more that 3 relay-processes.
> Also after start up it refuses to do any checks configured (in my simple test 
> I used check tcp) 
> 
> [mxb-test]-[12:21:41]# relayctl sh su
> Id  TypeNameAvlblty Status
> 1   relay   rabbitmqactive
> 1   table   rabbitmqpool:5672   empty
> 1   host10.5.96.8   unknown
> 2   table   rabbitmqfallback:5672   empty
> 2   host10.5.96.9   unknown
> 
> 
> Changing ’prefork’ from 15 to 3 makes it work.
> 
> Is this a bug?
> 
> Br
> 
> 
> 

Hey,

This is a random guess since you haven't posted the whole config, but I think
it has bitten me too sometime:

Do you have the global options such as prefork defined before your
relays and routes or not?

The order of the global options matter. If the global options are set after
the table they are not initialized on the tables and can actually crash relayd.
This is because the health checking uses a different prefork value and checks
the "wrong" amount.

I'm not sure, but I think it is not a bug: it is documented in relayd.conf(5).

Thinking about it: would it be acceptable if `relayd -n` shows a warning if
global options are defined in the wrong order? I can write the patch for it
if it makes sense.

I hope this helps you in some way,

-- 
Kind regards,
Hiltjo



sdhc(4) quirks

2017-05-05 Thread Mark Kettenis
I'm working on support for the SDHC controller on the Rockchip RK3399
such that I can use the onboard eMMC on the Firefly-RK3399.  This
controller is based on the Arasan eMMC 5.1 "IP", which has a standard
SDHC 3.0 interface.  However there are some minor quirks.

Setting the signalling voltage to 1.8V (which is the only setting the
device supports) doesn't work.  For this reason, I add a hook to
override the sdhc_signal_voltage() function just like we did for the
sdhc_card_detect() function a while ago.

I also can't get the double-data rate mode to work.  So I added a flag
that disables the DDR52 mode when set.  My intention is to remove that
flag again if I get DDR52 to work on my board.

ok?


Index: dev/sdmmc/sdhc.c
===
RCS file: /cvs/src/sys/dev/sdmmc/sdhc.c,v
retrieving revision 1.54
diff -u -p -r1.54 sdhc.c
--- dev/sdmmc/sdhc.c6 Apr 2017 03:15:29 -   1.54
+++ dev/sdmmc/sdhc.c5 May 2017 12:17:48 -
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -317,6 +318,9 @@ sdhc_host_found(struct sdhc_softc *sc, b
saa.caps |= SMC_CAPS_MMC_DDR52;
}
 
+   if (ISSET(sc->sc_flags, SDHC_F_NODDR50))
+   saa.caps &= ~SMC_CAPS_MMC_DDR52;
+
hp->sdmmc = config_found(>sc_dev, , NULL);
if (hp->sdmmc == NULL) {
error = 0;
@@ -683,6 +687,9 @@ int
 sdhc_signal_voltage(sdmmc_chipset_handle_t sch, int signal_voltage)
 {
struct sdhc_host *hp = sch;
+
+   if (hp->sc->sc_signal_voltage)
+   return hp->sc->sc_signal_voltage(hp->sc, signal_voltage);
 
if (SDHC_SPEC_VERSION(hp->version) < SDHC_SPEC_V3)
return EINVAL;
Index: dev/sdmmc/sdhcvar.h
===
RCS file: /cvs/src/sys/dev/sdmmc/sdhcvar.h,v
retrieving revision 1.9
diff -u -p -r1.9 sdhcvar.h
--- dev/sdmmc/sdhcvar.h 30 Apr 2016 11:32:23 -  1.9
+++ dev/sdmmc/sdhcvar.h 5 May 2017 12:17:48 -
@@ -32,6 +32,7 @@ struct sdhc_softc {
bus_dma_tag_t sc_dmat;
 
int (*sc_card_detect)(struct sdhc_softc *);
+   int (*sc_signal_voltage)(struct sdhc_softc *, int);
 };
 
 /* Host controller functions called by the attachment driver. */
@@ -45,5 +46,6 @@ void  sdhc_needs_discover(struct sdhc_sof
 
 /* flag values */
 #define SDHC_F_NOPWR0  (1 << 0)
+#define SDHC_F_NODDR50 (1 << 1)
 
 #endif



My ELFSEC implementation (signed binaries for amd64)

2017-05-05 Thread Peter J. Philipp
This is my second official contribution to what I call ELFSEC, it places a 
signature in binaries, in the ELF header to be exact.  This set of patches
are against 6.1 sources not -current.  While I think it's somewhat premature 
to send this in, some people in #openbsd on efnet were very interested in 
seeing this.  The first implementation I shared was this:

http://marc.info/?l=openbsd-misc=149336261728884=2

Currently this implementation has been limited to amd64, but other 64 bit OS's 
should be able to run this.  There was concern about my use of MD5 HMAC's so I 
took them out.  The ELF header of 32 bit systems is too small to fit SHA256 
checksums, so I'm leaving it out.

A bit more work needs to be done:

1) Right now the binaries have to be compiled with 'LDFLAGS+= -Wl,-zelfsec' in 
/etc/mk.conf but this breaks make build.  I'd love it if someone took me on 
their shoulders for fixing that.  Every ELF binary produced should have an 
ELFSEC ELF
program header.  Please school me!

Two programs exist in userland for this:

1) elfsec - a general purpose utility that can activate elfsec in 
/etc/rc.securelevel, sign an ELF binary (that has an ELFSEC header), and output 
the checksum of an ELF  binary.  It can also work in conjunction with elfsecd 
to sign a user's compiled programs.  It can also write a new key to 
/etc/elfsec/key (only root
visible).

2) elfsecd - is a daemon that listens on the UNIX socket /var/run/elfsecd.  It 
is chrooted in the unprivileged stage and also pledged in all stages.  The 
unprivileged stage checks a redblack tree if a user is authenticated to sign 
their programs, a userlist exists in /etc/elfsec/users, when authenticated it 
passes the
path and the uid of the requester to a root daemon via imsg framework.  The 
root process then signs the binary after lstat'ing it.

Some thoughts:

1) This is good for routers and switches since compiling isn't done on them,
usually.
2) ELFSEC will only check against uid > 0, since there is no point in securing
against the root user when the key is on disk.  Also root could read the key
out of kernel memory, what's 0wned is 0wned.
3) ELFSEC is good for protecting daemons preventing access, and being a bit
noisy in the process.  It is good for users who don't need to compile or import
their malware from external sources to the system.
4) ELFSEC is great for hacking inside the new vmm system.  It keeps a 
developers system clean while concepts are being checked.  This is how I did 
development
in the second round.  Yes patches to ELFSEC existed at OpenBSD 6.0 time.

Anyone want to continue my work?

With high regards (patch follows),
-peter


Index: gnu/usr.bin/binutils-2.17/bfd/elf-bfd.h
===
RCS file: /var/cvsroot/src/src/gnu/usr.bin/binutils-2.17/bfd/elf-bfd.h,v
retrieving revision 1.1.1.1
diff -u -p -u -r1.1.1.1 elf-bfd.h
--- gnu/usr.bin/binutils-2.17/bfd/elf-bfd.h 4 May 2017 19:57:26 -   
1.1.1.1
+++ gnu/usr.bin/binutils-2.17/bfd/elf-bfd.h 5 May 2017 09:37:22 -
@@ -1340,6 +1340,9 @@ struct elf_obj_tdata
   /* TRUE if output program should be marked to request W^X permission */
   bfd_boolean wxneeded;
 
+  /* TRUE if output program should be marked to run ELFSEC'ed */
+  bfd_boolean elfsec;
+
   /* Symbol version definitions in external objects.  */
   Elf_Internal_Verdef *verdef;
 
Index: gnu/usr.bin/binutils-2.17/bfd/elf.c
===
RCS file: /var/cvsroot/src/src/gnu/usr.bin/binutils-2.17/bfd/elf.c,v
retrieving revision 1.1.1.1
diff -u -p -u -r1.1.1.1 elf.c
--- gnu/usr.bin/binutils-2.17/bfd/elf.c 4 May 2017 19:57:26 -   1.1.1.1
+++ gnu/usr.bin/binutils-2.17/bfd/elf.c 5 May 2017 09:37:16 -
@@ -1087,6 +1087,7 @@ get_segment_type (unsigned int p_type)
 case PT_GNU_RELRO: pt = "RELRO"; break;
 case PT_OPENBSD_RANDOMIZE: pt = "OPENBSD_RANDOMIZE"; break;
 case PT_OPENBSD_WXNEEDED: pt = "OPENBSD_WXNEEDED"; break;
+case PT_OPENBSD_ELFSEC: pt = "OPENBSD_ELFSEC"; break;
 case PT_OPENBSD_BOOTDATA: pt = "OPENBSD_BOOTDATA"; break;
 default: pt = NULL; break;
 }
@@ -2617,6 +2618,11 @@ bfd_section_from_phdr (bfd *abfd, Elf_In
   return _bfd_elf_make_section_from_phdr (abfd, hdr, index,
  "openbsd_wxneeded");
 
+case PT_OPENBSD_ELFSEC:
+  return _bfd_elf_make_section_from_phdr (abfd, hdr, index, 
+   
"openbsd_elfsec");
+
+
 default:
   /* Check for any processor-specific program segment types.  */
   bed = get_elf_backend_data (abfd);
@@ -3951,6 +3957,22 @@ map_sections_to_segments (bfd *abfd)
   pm = >next;
 }
 
+  if (elf_tdata (abfd)->elfsec)
+{
+  amt = sizeof (struct elf_segment_map);
+  m = bfd_zalloc (abfd, amt);
+  if (m == NULL)
+   goto error_return;
+  m->next = NULL;
+  m->p_type = 

Re: moxa cp-104el puc(4)

2017-05-05 Thread Mark Kettenis
> Date: Fri, 5 May 2017 21:30:13 +1000
> From: Jonathan Matthew 
> 
> I found one of these on my desk.  I tried ports 1 and 4 and then ran out
> of things to plug in, but those two work fine.  Are the entries in
> pucdata.c in some kind of order?

Not really.

I don't think you should introduce an additional blank line, but
otherwise this is ok kettenis@

> Index: pucdata.c
> ===
> RCS file: /cvs/src/sys/dev/pci/pucdata.c,v
> retrieving revision 1.104
> diff -u -p -r1.104 pucdata.c
> --- pucdata.c 24 Apr 2016 06:47:56 -  1.104
> +++ pucdata.c 5 May 2017 11:27:18 -
> @@ -1421,6 +1421,19 @@ const struct puc_device_description puc_
>   },
>   },
>  
> + /* Moxa Technologies Co., Ltd. PCI I/O Card 4S RS232 */
> + {   /* "Moxa Technologies, SmartIO CP104EL/PCI" */
> + {   PCI_VENDOR_MOXA, PCI_PRODUCT_MOXA_CP104EL,  0, 0},
> + {   0x, 0x, 0, 0},
> + {
> + { PUC_COM_POW2(3), 0x18, 0x },
> + { PUC_COM_POW2(3), 0x18, 0x0008 },
> + { PUC_COM_POW2(3), 0x18, 0x0010 },
> + { PUC_COM_POW2(3), 0x18, 0x0018 },
> + },
> + },
> +
> +
>   /* Moxa Technologies Co., Ltd. PCI I/O Card 8S RS232 */
>   {   /* "Moxa Technologies, Industio C168H" */
>   {   PCI_VENDOR_MOXA, PCI_PRODUCT_MOXA_C168H,0, 0},
> 
> 



moxa cp-104el puc(4)

2017-05-05 Thread Jonathan Matthew
I found one of these on my desk.  I tried ports 1 and 4 and then ran out
of things to plug in, but those two work fine.  Are the entries in
pucdata.c in some kind of order?


Index: pucdata.c
===
RCS file: /cvs/src/sys/dev/pci/pucdata.c,v
retrieving revision 1.104
diff -u -p -r1.104 pucdata.c
--- pucdata.c   24 Apr 2016 06:47:56 -  1.104
+++ pucdata.c   5 May 2017 11:27:18 -
@@ -1421,6 +1421,19 @@ const struct puc_device_description puc_
},
},
 
+   /* Moxa Technologies Co., Ltd. PCI I/O Card 4S RS232 */
+   {   /* "Moxa Technologies, SmartIO CP104EL/PCI" */
+   {   PCI_VENDOR_MOXA, PCI_PRODUCT_MOXA_CP104EL,  0, 0},
+   {   0x, 0x, 0, 0},
+   {
+   { PUC_COM_POW2(3), 0x18, 0x },
+   { PUC_COM_POW2(3), 0x18, 0x0008 },
+   { PUC_COM_POW2(3), 0x18, 0x0010 },
+   { PUC_COM_POW2(3), 0x18, 0x0018 },
+   },
+   },
+
+
/* Moxa Technologies Co., Ltd. PCI I/O Card 8S RS232 */
{   /* "Moxa Technologies, Industio C168H" */
{   PCI_VENDOR_MOXA, PCI_PRODUCT_MOXA_C168H,0, 0},



OpenBSD 6.1: relayd does not start more than 3 processes

2017-05-05 Thread Maxim Bourmistrov

Hey,
on OpenBSD 6.0-stable I have following configuration for relayd:

snip———
interval 10
timeout 1200
prefork 15
log all
——
 
Respective login.conf to spawn more relayd procs:

relayd:\
:maxproc-max=31:\
:maxproc-cur=15:\
:openfiles=65536:\
:tc=daemon:


With config options above moved to a 6.1 creates following:

relayd starts but brings up no more that 3 relay-processes.
Also after start up it refuses to do any checks configured (in my simple test I 
used check tcp) 

[mxb-test]-[12:21:41]# relayctl sh su
Id  TypeNameAvlblty Status
1   relay   rabbitmqactive
1   table   rabbitmqpool:5672   empty
1   host10.5.96.8   unknown
2   table   rabbitmqfallback:5672   empty
2   host10.5.96.9   unknown


Changing ’prefork’ from 15 to 3 makes it work.

Is this a bug?

Br





[PATCH] vmm: Add XCR0 to readregs / writeregs

2017-05-05 Thread Pratik Vyas


Hello tech@,


This is a patch that extends the readregs and writeregs vmm(4) ioctl to
read and write XCR0. This is required to send and receive FPU state
correctly for vmctl send and vmctl receive. vmctl send / receive are two
new options that will support snapshotting VMs and migrating VMs from
one host to another. This project was undertaken at San Jose State
University along with my three teammates, Ashwin, Harshada and Siri with
mlarkin@ as our advisor.


Thanks,
Pratik



Index: sys/arch/amd64/amd64/vmm.c
===
RCS file: /home/pdvyas/cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.138
diff -u -p -a -u -r1.138 vmm.c
--- sys/arch/amd64/amd64/vmm.c  2 May 2017 02:57:46 -   1.138
+++ sys/arch/amd64/amd64/vmm.c  5 May 2017 07:07:56 -
@@ -1396,6 +1396,7 @@ vcpu_readregs_vmx(struct vcpu *vcpu, uin
}
if (regmask & VM_RWREGS_CRS) {
crs[VCPU_REGS_CR2] = vcpu->vc_gueststate.vg_cr2;
+   crs[VCPU_REGS_XCR0] = vcpu->vc_gueststate.vg_xcr0;
if (vmread(VMCS_GUEST_IA32_CR0, [VCPU_REGS_CR0]))
goto errout;
if (vmread(VMCS_GUEST_IA32_CR3, [VCPU_REGS_CR3]))
@@ -1522,6 +1523,7 @@ vcpu_writeregs_vmx(struct vcpu *vcpu, ui
goto errout;
}
if (regmask & VM_RWREGS_CRS) {
+   vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR0];
if (vmwrite(VMCS_GUEST_IA32_CR0, crs[VCPU_REGS_CR0]))
goto errout;
if (vmwrite(VMCS_GUEST_IA32_CR3, crs[VCPU_REGS_CR3]))
Index: sys/arch/amd64/include/vmmvar.h
===
RCS file: /home/pdvyas/cvs/src/sys/arch/amd64/include/vmmvar.h,v
retrieving revision 1.36
diff -u -p -a -u -r1.36 vmmvar.h
--- sys/arch/amd64/include/vmmvar.h 2 May 2017 02:57:46 -   1.36
+++ sys/arch/amd64/include/vmmvar.h 5 May 2017 07:07:56 -
@@ -328,7 +328,8 @@ struct vcpu_segment_info {
#define VCPU_REGS_CR3   2
#define VCPU_REGS_CR4   3
#define VCPU_REGS_CR8   4
-#define VCPU_REGS_NCRS (VCPU_REGS_CR8 + 1)
+#define VCPU_REGS_XCR0 5
+#define VCPU_REGS_NCRS (VCPU_REGS_XCR0 + 1)

#define VCPU_REGS_CS0
#define VCPU_REGS_DS1
Index: usr.sbin/vmd/vm.c
===
RCS file: /home/pdvyas/cvs/src/usr.sbin/vmd/vm.c,v
retrieving revision 1.15
diff -u -p -a -u -r1.15 vm.c
--- usr.sbin/vmd/vm.c   2 May 2017 07:19:53 -   1.15
+++ usr.sbin/vmd/vm.c   5 May 2017 07:07:56 -
@@ -139,7 +139,8 @@ static const struct vcpu_reg_state vcpu_
.vrs_msrs[VCPU_REGS_LSTAR] = 0ULL,
.vrs_msrs[VCPU_REGS_CSTAR] = 0ULL,
.vrs_msrs[VCPU_REGS_SFMASK] = 0ULL,
-   .vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL
+   .vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL,
+   .vrs_crs[VCPU_REGS_XCR0] = XCR0_X87
#endif
};

@@ -175,7 +176,8 @@ static const struct vcpu_reg_state vcpu_
.vrs_msrs[VCPU_REGS_LSTAR] = 0ULL,
.vrs_msrs[VCPU_REGS_CSTAR] = 0ULL,
.vrs_msrs[VCPU_REGS_SFMASK] = 0ULL,
-   .vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL
+   .vrs_msrs[VCPU_REGS_KGSBASE] = 0ULL,
+   .vrs_crs[VCPU_REGS_XCR0] = XCR0_X87
#endif
};




Re: vmm(4): respect VPID/ASID limits

2017-05-05 Thread Mike Larkin
On Thu, May 04, 2017 at 11:17:12PM -0700, Mike Larkin wrote:
> This diff limits the ASID/VPID value to 0xFFF (4095), or in the case of SVN,
> the max ASID capability of the CPU. I use a bitmap to record the VPIDs/ASIDs
> in use, and allocate the next one available when needed. Although VMX can
> support 65535 VPIDs, 4095 seems like a reasonable value for the number of
> "VCPUs currently in use" for a given machine. The bitmap is easily extended
> if needed.
> 
> Tested with VM startup, teardown, watching the VPID recycling when VMM_DEBUG
> is enabled.
> 
> ok?
> 
> -ml

Just noticed identcpu.c got omitted from the previous diff.

-ml


Index: arch/amd64/amd64/identcpu.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/identcpu.c,v
retrieving revision 1.83
diff -u -p -a -u -r1.83 identcpu.c
--- arch/amd64/amd64/identcpu.c 14 Apr 2017 01:02:28 -  1.83
+++ arch/amd64/amd64/identcpu.c 5 May 2017 06:25:57 -
@@ -949,8 +949,8 @@ cpu_check_vmm_cap(struct cpu_info *ci)
CPUID(0x800A, dummy, ci->ci_vmm_cap.vcc_svm.svm_max_asid,
dummy, dummy);
 
-   if (ci->ci_vmm_cap.vcc_svm.svm_max_asid > 0x)
-   ci->ci_vmm_cap.vcc_svm.svm_max_asid = 0x;
+   if (ci->ci_vmm_cap.vcc_svm.svm_max_asid > 0xFFF)
+   ci->ci_vmm_cap.vcc_svm.svm_max_asid = 0xFFF;
}
 
/*
Index: arch/amd64/amd64/vmm.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.138
diff -u -p -a -u -r1.138 vmm.c
--- arch/amd64/amd64/vmm.c  2 May 2017 02:57:46 -   1.138
+++ arch/amd64/amd64/vmm.c  5 May 2017 06:12:38 -
@@ -100,6 +100,10 @@ struct vmm_softc {
struct rwlock   vm_lock;
size_t  vm_ct;  /* number of in-memory VMs */
size_t  vm_idx; /* next unique VM index */
+
+   struct rwlock   vpid_lock;
+   uint16_tmax_vpid;
+   uint8_t vpids[512]; /* bitmap of used VPID/ASIDs */
 };
 
 int vmm_enabled(void);
@@ -165,6 +169,8 @@ int svm_get_guest_faulttype(void);
 int vmx_get_exit_qualification(uint64_t *);
 int vmx_fault_page(struct vcpu *, paddr_t);
 int vmx_handle_np_fault(struct vcpu *);
+int vmm_alloc_vpid(uint16_t *);
+void vmm_free_vpid(uint16_t);
 const char *vcpu_state_decode(u_int);
 const char *vmx_exit_reason_decode(uint32_t);
 const char *vmx_instruction_error_decode(uint32_t);
@@ -361,6 +367,15 @@ vmm_attach(struct device *parent, struct
sc->mode = VMM_MODE_UNKNOWN;
}
 
+   if (sc->mode == VMM_MODE_SVM || sc->mode == VMM_MODE_RVI) {
+   sc->max_vpid = ci->ci_vmm_cap.vcc_svm.svm_max_asid;
+   } else {
+   sc->max_vpid = 0xFFF;
+   }
+
+   bzero(>vpids, sizeof(sc->vpids));
+   rw_init(>vpid_lock, "vpidlock");
+
pool_init(_pool, sizeof(struct vm), 0, IPL_NONE, PR_WAITOK,
"vmpool", NULL);
pool_init(_pool, sizeof(struct vcpu), 64, IPL_NONE, PR_WAITOK,
@@ -1033,10 +1048,6 @@ vm_create(struct vm_create_params *vcp, 
vmm_softc->vm_ct++;
vmm_softc->vm_idx++;
 
-   /*
-* XXX we use the vm_id for the VPID/ASID, so we need to prevent
-* wrapping around 65536/4096 entries here
-*/
vm->vm_id = vmm_softc->vm_idx;
vm->vm_vcpu_ct = 0;
vm->vm_vcpus_running = 0;
@@ -1671,6 +1682,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s
 {
struct vmcb *vmcb;
int ret;
+   uint16_t asid;
 
vmcb = (struct vmcb *)vcpu->vc_control_va;
 
@@ -1726,7 +1738,14 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s
svm_setmsrbr(vcpu, MSR_EFER);
 
/* Guest VCPU ASID */
-   vmcb->v_asid = vcpu->vc_parent->vm_id;
+   if (vmm_alloc_vpid()) {
+   DPRINTF("%s: could not allocate asid\n", __func__);
+   ret = EINVAL;
+   goto exit;
+   }
+
+   vmcb->v_asid = asid;
+   vcpu->vc_vpid = asid;
 
/* TLB Control */
vmcb->v_tlb_control = 2;/* Flush this guest's TLB entries */
@@ -1745,6 +1764,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s
vmcb->v_efer |= (EFER_LME | EFER_LMA);
vmcb->v_cr4 |= CR4_PAE;
 
+exit:
return ret;
 }
 
@@ -1947,7 +1967,7 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
uint32_t pinbased, procbased, procbased2, exit, entry;
uint32_t want1, want0;
uint64_t msr, ctrlval, eptp, cr3;
-   uint16_t ctrl;
+   uint16_t ctrl, vpid;
struct vmx_msr_store *msr_store;
 
ret = 0;
@@ -2203,12 +2223,20 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
-

vmm(4): respect VPID/ASID limits

2017-05-05 Thread Mike Larkin
This diff limits the ASID/VPID value to 0xFFF (4095), or in the case of SVN,
the max ASID capability of the CPU. I use a bitmap to record the VPIDs/ASIDs
in use, and allocate the next one available when needed. Although VMX can
support 65535 VPIDs, 4095 seems like a reasonable value for the number of
"VCPUs currently in use" for a given machine. The bitmap is easily extended
if needed.

Tested with VM startup, teardown, watching the VPID recycling when VMM_DEBUG
is enabled.

ok?

-ml

Index: arch/amd64/amd64/vmm.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/vmm.c,v
retrieving revision 1.138
diff -u -p -a -u -r1.138 vmm.c
--- arch/amd64/amd64/vmm.c  2 May 2017 02:57:46 -   1.138
+++ arch/amd64/amd64/vmm.c  5 May 2017 06:12:38 -
@@ -100,6 +100,10 @@ struct vmm_softc {
struct rwlock   vm_lock;
size_t  vm_ct;  /* number of in-memory VMs */
size_t  vm_idx; /* next unique VM index */
+
+   struct rwlock   vpid_lock;
+   uint16_tmax_vpid;
+   uint8_t vpids[512]; /* bitmap of used VPID/ASIDs */
 };
 
 int vmm_enabled(void);
@@ -165,6 +169,8 @@ int svm_get_guest_faulttype(void);
 int vmx_get_exit_qualification(uint64_t *);
 int vmx_fault_page(struct vcpu *, paddr_t);
 int vmx_handle_np_fault(struct vcpu *);
+int vmm_alloc_vpid(uint16_t *);
+void vmm_free_vpid(uint16_t);
 const char *vcpu_state_decode(u_int);
 const char *vmx_exit_reason_decode(uint32_t);
 const char *vmx_instruction_error_decode(uint32_t);
@@ -361,6 +367,15 @@ vmm_attach(struct device *parent, struct
sc->mode = VMM_MODE_UNKNOWN;
}
 
+   if (sc->mode == VMM_MODE_SVM || sc->mode == VMM_MODE_RVI) {
+   sc->max_vpid = ci->ci_vmm_cap.vcc_svm.svm_max_asid;
+   } else {
+   sc->max_vpid = 0xFFF;
+   }
+
+   bzero(>vpids, sizeof(sc->vpids));
+   rw_init(>vpid_lock, "vpidlock");
+
pool_init(_pool, sizeof(struct vm), 0, IPL_NONE, PR_WAITOK,
"vmpool", NULL);
pool_init(_pool, sizeof(struct vcpu), 64, IPL_NONE, PR_WAITOK,
@@ -1033,10 +1048,6 @@ vm_create(struct vm_create_params *vcp, 
vmm_softc->vm_ct++;
vmm_softc->vm_idx++;
 
-   /*
-* XXX we use the vm_id for the VPID/ASID, so we need to prevent
-* wrapping around 65536/4096 entries here
-*/
vm->vm_id = vmm_softc->vm_idx;
vm->vm_vcpu_ct = 0;
vm->vm_vcpus_running = 0;
@@ -1671,6 +1682,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s
 {
struct vmcb *vmcb;
int ret;
+   uint16_t asid;
 
vmcb = (struct vmcb *)vcpu->vc_control_va;
 
@@ -1726,7 +1738,14 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s
svm_setmsrbr(vcpu, MSR_EFER);
 
/* Guest VCPU ASID */
-   vmcb->v_asid = vcpu->vc_parent->vm_id;
+   if (vmm_alloc_vpid()) {
+   DPRINTF("%s: could not allocate asid\n", __func__);
+   ret = EINVAL;
+   goto exit;
+   }
+
+   vmcb->v_asid = asid;
+   vcpu->vc_vpid = asid;
 
/* TLB Control */
vmcb->v_tlb_control = 2;/* Flush this guest's TLB entries */
@@ -1745,6 +1764,7 @@ vcpu_reset_regs_svm(struct vcpu *vcpu, s
vmcb->v_efer |= (EFER_LME | EFER_LMA);
vmcb->v_cr4 |= CR4_PAE;
 
+exit:
return ret;
 }
 
@@ -1947,7 +1967,7 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
uint32_t pinbased, procbased, procbased2, exit, entry;
uint32_t want1, want0;
uint64_t msr, ctrlval, eptp, cr3;
-   uint16_t ctrl;
+   uint16_t ctrl, vpid;
struct vmx_msr_store *msr_store;
 
ret = 0;
@@ -2203,12 +2223,20 @@ vcpu_reset_regs_vmx(struct vcpu *vcpu, s
if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS,
IA32_VMX_ACTIVATE_SECONDARY_CONTROLS, 1)) {
if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS,
-   IA32_VMX_ENABLE_VPID, 1))
-   if (vmwrite(VMCS_GUEST_VPID,
-   (uint16_t)vcpu->vc_parent->vm_id)) {
+   IA32_VMX_ENABLE_VPID, 1)) {
+   if (vmm_alloc_vpid()) {
+   DPRINTF("%s: could not allocate VPID\n",
+   __func__);
+   ret = EINVAL;
+   goto exit;
+   }
+   if (vmwrite(VMCS_GUEST_VPID, vpid)) {
ret = EINVAL;
goto exit;
}
+
+   vcpu->vc_vpid = vpid;
+   }
}
 
/*
@@ -2769,6 +2797,7 @@ vcpu_init(struct vcpu *vcpu)
 
vcpu->vc_virt_mode = vmm_softc->mode;
vcpu->vc_state = VCPU_STATE_STOPPED;
+   vcpu->vc_vpid = 0;
if (vmm_softc->mode ==