Re: openrsync(1): add support for IPv6-only hosts

2020-08-18 Thread Klemens Nanni
On Tue, Aug 18, 2020 at 09:58:56AM +0200, Sasha Romijn wrote:
> The current openrsync client is not able to connect to dual-stack remote 
> hosts, when the local host does not have any IPv4 connectivity. This is 
> because connect() fails with EADDRNOTAVAIL when trying to connect to the 
> remote IPv4 address on an IPv6-only local host - and IPv4 seems to be 
> attempted first. The current client does not try other remote host addresses 
> after EADDRNOTAVAIL, and therefore never tries the remote IPv6 address.
For openrsync(1) this won't happen with `family inet6 inet4' in
resolv.conf(5).

> The included patch allows the client to continue after EADDRNOTAVAIL, making 
> it to try other addresses, until it reaches a working IPv6 address. If the 
> local host is IPv6-only and the remote host IPv4-only, the connection still 
> fails with an error produced from rsync_connect().
Your diff reads correct to me, thanks; regular rsync(1) connects fine
regardless of `family'.

> Perhaps a warning should be issued for the EADDRNOTAVAIL case, like it does 
> for EHOSTUNREACH - but I thought it would be a bit much, as an IPv6-only host 
> would then emit warnings on pretty much every single use of the openrsync 
> client.
Yes, I don't think a warning is warrented here - afterall it can connect
without problems.

OK to commit anyone?



Re: slaacd(8): use correct source link-layer address

2020-08-18 Thread Klemens Nanni
On Tue, Aug 18, 2020 at 06:14:30PM +0200, Florian Obser wrote:
> When sending a router solicitation use the link-layer (mac) address of
> the outgoing interface in the source link-layer address ICMPv6 option
> instead of the address of the last configured autoconf interface.
> 
> It is not the most efficient way to first transform an if_index into
> and interface name and then iterate over all addresses but this is
> also not in the hot path. Under normal operations slaacd will send
> one solicitation when an interface is set to autoconf and then
> never again because it will see unsolicitated router advertisements
> before addresses expire.
Still sensible and works, OK kn.



[PATCH] Add helper vm_find_vcpu function for VMM

2020-08-18 Thread Jordan Hargrave
---
 sys/arch/amd64/amd64/vmm.c | 63 --
 1 file changed, 33 insertions(+), 30 deletions(-)

diff --git a/sys/arch/amd64/amd64/vmm.c b/sys/arch/amd64/amd64/vmm.c
index 84fcb23a5..f6d51737e 100644
--- a/sys/arch/amd64/amd64/vmm.c
+++ b/sys/arch/amd64/amd64/vmm.c
@@ -558,6 +558,34 @@ vmmclose(dev_t dev, int flag, int mode, struct proc *p)
return 0;
 }
 
+/*
+ * vm_find_vcpu
+ *
+ * Lookup VMM VCPU by ID number
+ *
+ * Parameters:
+ *  vm: vm structure
+ *  id: index id of vcpu
+ *
+ * Returns pointer to vcpu structure if successful, NULL otherwise
+ */
+static struct vcpu *
+vm_find_vcpu(struct vm *vm, uint32_t id)
+{
+   struct vcpu *vcpu;
+
+   if (vm == NULL)
+   return NULL;
+   rw_enter_read(>vm_vcpu_lock);
+   SLIST_FOREACH(vcpu, >vm_vcpu_list, vc_vcpu_link) {
+   if (vcpu->vc_id == id)
+   break;
+   }
+   rw_exit_read(>vm_vcpu_lock);
+   return vcpu;
+}
+
+
 /*
  * vm_resetcpu
  *
@@ -591,12 +619,7 @@ vm_resetcpu(struct vm_resetcpu_params *vrp)
return (error);
}
 
-   rw_enter_read(>vm_vcpu_lock);
-   SLIST_FOREACH(vcpu, >vm_vcpu_list, vc_vcpu_link) {
-   if (vcpu->vc_id == vrp->vrp_vcpu_id)
-   break;
-   }
-   rw_exit_read(>vm_vcpu_lock);
+   vcpu = vm_find_vcpu(vm, vrp->vrp_vcpu_id);
 
if (vcpu == NULL) {
DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
@@ -657,12 +680,7 @@ vm_intr_pending(struct vm_intr_params *vip)
return (error);
}
 
-   rw_enter_read(>vm_vcpu_lock);
-   SLIST_FOREACH(vcpu, >vm_vcpu_list, vc_vcpu_link) {
-   if (vcpu->vc_id == vip->vip_vcpu_id)
-   break;
-   }
-   rw_exit_read(>vm_vcpu_lock);
+   vcpu = vm_find_vcpu(vm, vip->vip_vcpu_id);
rw_exit_read(_softc->vm_lock);
 
if (vcpu == NULL)
@@ -722,12 +740,7 @@ vm_rwvmparams(struct vm_rwvmparams_params *vpp, int dir) {
return (error);
}
 
-   rw_enter_read(>vm_vcpu_lock);
-   SLIST_FOREACH(vcpu, >vm_vcpu_list, vc_vcpu_link) {
-   if (vcpu->vc_id == vpp->vpp_vcpu_id)
-   break;
-   }
-   rw_exit_read(>vm_vcpu_lock);
+   vcpu = vm_find_vcpu(vm, vpp->vpp_vcpu_id);
rw_exit_read(_softc->vm_lock);
 
if (vcpu == NULL)
@@ -786,12 +799,7 @@ vm_rwregs(struct vm_rwregs_params *vrwp, int dir)
return (error);
}
 
-   rw_enter_read(>vm_vcpu_lock);
-   SLIST_FOREACH(vcpu, >vm_vcpu_list, vc_vcpu_link) {
-   if (vcpu->vc_id == vrwp->vrwp_vcpu_id)
-   break;
-   }
-   rw_exit_read(>vm_vcpu_lock);
+   vcpu = vm_find_vcpu(vm, vrwp->vrwp_vcpu_id);
rw_exit_read(_softc->vm_lock);
 
if (vcpu == NULL)
@@ -858,12 +866,7 @@ vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
return (ret);
}
 
-   rw_enter_read(>vm_vcpu_lock);
-   SLIST_FOREACH(vcpu, >vm_vcpu_list, vc_vcpu_link) {
-   if (vcpu->vc_id == vmep->vmep_vcpu_id)
-   break;
-   }
-   rw_exit_read(>vm_vcpu_lock);
+   vcpu = vm_find_vcpu(vm, vmep->vmep_vcpu_id);
 
if (vcpu == NULL) {
DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
-- 
2.26.2



PATCH: VMD fixes for PCI Config Space and BAR Allocation [passthrough PCI support]

2020-08-18 Thread Jordan Hargrave
This is the first patch for adding PCI passthrough support to VMD.  
I am splitting up the necessary changes into smaller patches.

This code fixes the pci device union for accessing PCI config space >= 0x40
pcidump -xxx would return garbage data due to union overlap

pci_add_bar now requires specifying the BAR area size to allocate

Extra cookie argument added to pci_add_device

---
 usr.sbin/vmd/pci.c| 50 +++
 usr.sbin/vmd/pci.h| 80 ++-
 usr.sbin/vmd/virtio.c | 25 +++---
 3 files changed, 89 insertions(+), 66 deletions(-)

diff --git a/usr.sbin/vmd/pci.c b/usr.sbin/vmd/pci.c
index 954235eb6..4ce393237 100644
--- a/usr.sbin/vmd/pci.c
+++ b/usr.sbin/vmd/pci.c
@@ -39,28 +39,47 @@ extern char *__progname;
 const uint8_t pci_pic_irqs[PCI_MAX_PIC_IRQS] = {3, 5, 6, 7, 9, 10, 11, 12,
 14, 15};
 
+/*
+ * pci_mkbar
+ *
+ * Calculates BAR address given a size and alignment.
+ * Returns allocated address and updates next address
+ * Returns zero if address is out of range
+ */
+static uint64_t
+pci_mkbar(uint64_t *base, uint32_t size, uint32_t align, uint64_t maxbase)
+{
+   uint64_t mask = size - 1;
+   uint64_t cbase;
+
+   if (*base >= maxbase)
+   return (0);
+   cbase = *base;
+   *base = (*base + size + mask) & ~mask;
+   return cbase;
+}
+
 /*
  * pci_add_bar
  *
  * Adds a BAR for the PCI device 'id'. On access, 'barfn' will be
  * called, and passed 'cookie' as an identifier.
  *
- * BARs are fixed size, meaning all I/O BARs requested have the
- * same size and all MMIO BARs have the same size.
- *
  * Parameters:
  *  id: PCI device to add the BAR to (local count, eg if id == 4,
  *  this BAR is to be added to the VM's 5th PCI device)
  *  type: type of the BAR to add (PCI_MAPREG_TYPE_xxx)
+ *  size: Size of BAR area
  *  barfn: callback function invoked on BAR access
  *  cookie: cookie passed to barfn on access
  *
  * Returns 0 if the BAR was added successfully, 1 otherwise.
  */
 int
-pci_add_bar(uint8_t id, uint32_t type, void *barfn, void *cookie)
+pci_add_bar(uint8_t id, uint32_t type, uint32_t size, void *barfn, void 
*cookie)
 {
uint8_t bar_reg_idx, bar_ct;
+   uint64_t base = 0;
 
/* Check id */
if (id >= pci.pci_dev_ct)
@@ -74,31 +93,31 @@ pci_add_bar(uint8_t id, uint32_t type, void *barfn, void 
*cookie)
/* Compute BAR address and add */
bar_reg_idx = (PCI_MAPREG_START + (bar_ct * 4)) / 4;
if (type == PCI_MAPREG_TYPE_MEM) {
-   if (pci.pci_next_mmio_bar >= VMM_PCI_MMIO_BAR_END)
+   base = pci_mkbar(_next_mmio_bar, size, 4096, 
VMM_PCI_MMIO_BAR_END);
+   if (base == 0)
return (1);
 
pci.pci_devices[id].pd_cfg_space[bar_reg_idx] =
-   PCI_MAPREG_MEM_ADDR(pci.pci_next_mmio_bar);
-   pci.pci_next_mmio_bar += VMM_PCI_MMIO_BAR_SIZE;
+   PCI_MAPREG_MEM_ADDR(base);
pci.pci_devices[id].pd_barfunc[bar_ct] = barfn;
pci.pci_devices[id].pd_bar_cookie[bar_ct] = cookie;
pci.pci_devices[id].pd_bartype[bar_ct] = PCI_BAR_TYPE_MMIO;
-   pci.pci_devices[id].pd_barsize[bar_ct] = VMM_PCI_MMIO_BAR_SIZE;
+   pci.pci_devices[id].pd_barsize[bar_ct] = size;
pci.pci_devices[id].pd_bar_ct++;
} else if (type == PCI_MAPREG_TYPE_IO) {
-   if (pci.pci_next_io_bar >= VMM_PCI_IO_BAR_END)
+   base = pci_mkbar(_next_io_bar, size, 4, 
VMM_PCI_IO_BAR_END);
+   if (base == 0)
return (1);
 
pci.pci_devices[id].pd_cfg_space[bar_reg_idx] =
-   PCI_MAPREG_IO_ADDR(pci.pci_next_io_bar) |
+   PCI_MAPREG_IO_ADDR(base) |
PCI_MAPREG_TYPE_IO;
-   pci.pci_next_io_bar += VMM_PCI_IO_BAR_SIZE;
pci.pci_devices[id].pd_barfunc[bar_ct] = barfn;
pci.pci_devices[id].pd_bar_cookie[bar_ct] = cookie;
DPRINTF("%s: adding pci bar cookie for dev %d bar %d = %p",
__progname, id, bar_ct, cookie);
pci.pci_devices[id].pd_bartype[bar_ct] = PCI_BAR_TYPE_IO;
-   pci.pci_devices[id].pd_barsize[bar_ct] = VMM_PCI_IO_BAR_SIZE;
+   pci.pci_devices[id].pd_barsize[bar_ct] = size;
pci.pci_devices[id].pd_bar_ct++;
}
 
@@ -165,7 +184,7 @@ pci_get_dev_irq(uint8_t id)
 int
 pci_add_device(uint8_t *id, uint16_t vid, uint16_t pid, uint8_t class,
 uint8_t subclass, uint16_t subsys_vid, uint16_t subsys_id,
-uint8_t irq_needed, pci_cs_fn_t csfunc)
+uint8_t irq_needed, pci_cs_fn_t csfunc, void *cookie)
 {
/* Exceeded max devices? */
if (pci.pci_dev_ct >= PCI_CONFIG_MAX_DEV)
@@ -186,6 +205,7 @@ pci_add_device(uint8_t *id, uint16_t vid, uint16_t pid, 
uint8_t class,
pci.pci_devices[*id].pd_subsys_id 

slaacd(8): use correct source link-layer address

2020-08-18 Thread Florian Obser
When sending a router solicitation use the link-layer (mac) address of
the outgoing interface in the source link-layer address ICMPv6 option
instead of the address of the last configured autoconf interface.

It is not the most efficient way to first transform an if_index into
and interface name and then iterate over all addresses but this is
also not in the hot path. Under normal operations slaacd will send
one solicitation when an interface is set to autoconf and then
never again because it will see unsolicitated router advertisements
before addresses expire.

Tests, OKs?

diff --git frontend.c frontend.c
index 2adbdb0f97a..4b94af5ec8c 100644
--- frontend.c
+++ frontend.c
@@ -520,9 +520,6 @@ update_iface(uint32_t if_index, char* if_name)
imsg_ifinfo.soii = !(xflags & IFXF_INET6_NOSOII);
get_lladdr(if_name, _ifinfo.hw_address, _ifinfo.ll_address);
 
-   memcpy(_opt_source_link_addr, _ifinfo.hw_address,
-   sizeof(nd_opt_source_link_addr));
-
frontend_imsg_compose_main(IMSG_UPDATE_IF, 0, _ifinfo,
sizeof(imsg_ifinfo));
 }
@@ -1023,9 +1020,20 @@ send_solicitation(uint32_t if_index)
 {
struct in6_pktinfo  *pi;
struct cmsghdr  *cm;
+   struct ether_addrhw_address;
+   struct sockaddr_in6  ll_address;
+   char if_name[IF_NAMESIZE];
 
log_debug("%s(%u)", __func__, if_index);
 
+   if (if_indextoname(if_index, if_name) == NULL)
+   return;
+
+   get_lladdr(if_name, _address, _address);
+
+   memcpy(_opt_source_link_addr, _address,
+   sizeof(nd_opt_source_link_addr));
+
dst.sin6_scope_id = if_index;
 
cm = CMSG_FIRSTHDR();


-- 
I'm not entirely sure you are real.



Re: Fewer pool_get() in kqueue_register()

2020-08-18 Thread Visa Hankala
On Tue, Aug 18, 2020 at 11:04:47AM +0200, Martin Pieuchot wrote:
> Diff below changes the order of operations in kqueue_register() to get
> rid of an unnecessary pool_get().  When an event is already present on
> the list try to acquire it first.  Note that knote_acquire() may sleep
> in which case the list might have changed so the lookup has to always
> begin from the start.
> 
> This will help with lazy removal of knote in poll/select.  In this
> scenario EV_ADD is generally always done with an knote already on the
> list.

Some of the overhead could be absorbed by using a pool cache, as shown
in the diff below. However, I am not suggesting that the cache should
be taken into use right now. The frequency of knote pool usage is
relatively low currently; there are other pools that would benefit more
from caching.

A related question is what implications the increased use of the pool
cache feature would have under memory pressure.

Index: kern/init_main.c
===
RCS file: src/sys/kern/init_main.c,v
retrieving revision 1.300
diff -u -p -r1.300 init_main.c
--- kern/init_main.c16 Jun 2020 05:09:29 -  1.300
+++ kern/init_main.c18 Aug 2020 15:09:38 -
@@ -71,6 +71,7 @@
 #include 
 #endif
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -148,7 +149,6 @@ voidcrypto_init(void);
 void   db_ctf_init(void);
 void   prof_init(void);
 void   init_exec(void);
-void   kqueue_init(void);
 void   futex_init(void);
 void   taskq_init(void);
 void   timeout_proc_init(void);
@@ -431,7 +431,9 @@ main(void *framep)
prof_init();
 #endif
 
-   mbcpuinit();/* enable per cpu mbuf data */
+   /* Enable per cpu data. */
+   mbcpuinit();
+   kqueue_init_percpu();
 
/* init exec and emul */
init_exec();
Index: kern/kern_event.c
===
RCS file: src/sys/kern/kern_event.c,v
retrieving revision 1.142
diff -u -p -r1.142 kern_event.c
--- kern/kern_event.c   12 Aug 2020 13:49:24 -  1.142
+++ kern/kern_event.c   18 Aug 2020 15:09:38 -
@@ -205,6 +205,12 @@ kqueue_init(void)
PR_WAITOK, "knotepl", NULL);
 }
 
+void
+kqueue_init_percpu(void)
+{
+   pool_cache_init(_pool);
+}
+
 int
 filt_fileattach(struct knote *kn)
 {
Index: sys/event.h
===
RCS file: src/sys/sys/event.h,v
retrieving revision 1.44
diff -u -p -r1.44 event.h
--- sys/event.h 22 Jun 2020 13:14:32 -  1.44
+++ sys/event.h 18 Aug 2020 15:09:38 -
@@ -210,6 +210,8 @@ extern void knote_activate(struct knote 
 extern voidknote_remove(struct proc *p, struct knlist *list);
 extern voidknote_fdclose(struct proc *p, int fd);
 extern voidknote_processexit(struct proc *);
+extern voidkqueue_init(void);
+extern voidkqueue_init_percpu(void);
 extern int kqueue_register(struct kqueue *kq,
struct kevent *kev, struct proc *p);
 extern int filt_seltrue(struct knote *kn, long hint);



Log mutex for msgbuf concurrency control

2020-08-18 Thread Visa Hankala
This diff introduces a mutex that serializes access to the kernel
message buffers. At the moment, the buffers do not have clear
concurrency control.

The mutex controls access to all the modifiable fields of struct msgbuf.
It also protects logsoftc.sc_state for managing thread wakeups.

A tricky thing with the diff is that the code is indirectly used in many
different contexts and in varied machine-specific functions. Luckily,
the code already utilizes splhigh() in critical parts, so the transition
to using a mutex possibly is not such a big step any longer.

To avoid creating problems with lock order, the code does not take
new locks when the log mutex is held. This is visible in logwakeup()
and logread(). It looks that selrecord() does not acquire new locks,
so no extra steps are needed in logpoll().

In logread(), the code assumes that there is only one reader. This keeps
the data extraction logic simple.

An additional benefit (?) of the diff is that now uiomove(9) is called
without splhigh().

I have tested the diff on amd64 and octeon. However, additional tests
on these and other platforms would be very helpful.

OK?

Index: kern/subr_log.c
===
RCS file: src/sys/kern/subr_log.c,v
retrieving revision 1.68
diff -u -p -r1.68 subr_log.c
--- kern/subr_log.c 18 Aug 2020 13:41:49 -  1.68
+++ kern/subr_log.c 18 Aug 2020 14:10:25 -
@@ -52,6 +52,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #ifdef KTRACE
@@ -69,8 +70,12 @@
 #define LOG_ASYNC  0x04
 #define LOG_RDWAIT 0x08
 
+/*
+ * Locking:
+ * L   log_mtx
+ */
 struct logsoftc {
-   int sc_state;   /* see above for possibilities */
+   int sc_state;   /* [L] see above for possibilities */
struct  selinfo sc_selp;/* process waiting on select call */
struct  sigio_ref sc_sigio; /* async I/O registration */
int sc_need_wakeup; /* if set, wake up waiters */
@@ -83,6 +88,14 @@ struct   msgbuf *msgbufp;/* the mapped b
 struct msgbuf *consbufp;   /* console message buffer. */
 struct file *syslogf;
 
+/*
+ * Lock that serializes access to log message buffers.
+ * This should be kept as a leaf lock in order not to constrain where
+ * printf(9) can be used.
+ */
+struct mutex log_mtx =
+MUTEX_INITIALIZER_FLAGS(IPL_HIGH, "logmtx", MTX_NOWITNESS);
+
 void filt_logrdetach(struct knote *kn);
 int filt_logread(struct knote *kn, long hint);
 
@@ -144,13 +157,11 @@ initconsbuf(void)
 void
 msgbuf_putchar(struct msgbuf *mbp, const char c)
 {
-   int s;
-
if (mbp->msg_magic != MSG_MAGIC)
/* Nothing we can do */
return;
 
-   s = splhigh();
+   mtx_enter(_mtx);
mbp->msg_bufc[mbp->msg_bufx++] = c;
if (mbp->msg_bufx < 0 || mbp->msg_bufx >= mbp->msg_bufs)
mbp->msg_bufx = 0;
@@ -160,20 +171,19 @@ msgbuf_putchar(struct msgbuf *mbp, const
mbp->msg_bufr = 0;
mbp->msg_bufd++;
}
-   splx(s);
+   mtx_leave(_mtx);
 }
 
 size_t
 msgbuf_getlen(struct msgbuf *mbp)
 {
long len;
-   int s;
 
-   s = splhigh();
+   mtx_enter(_mtx);
len = mbp->msg_bufx - mbp->msg_bufr;
if (len < 0)
len += mbp->msg_bufs;
-   splx(s);
+   mtx_leave(_mtx);
return (len);
 }
 
@@ -208,34 +218,47 @@ logclose(dev_t dev, int flag, int mode, 
 int
 logread(dev_t dev, struct uio *uio, int flag)
 {
+   struct sleep_state sls;
struct msgbuf *mbp = msgbufp;
-   size_t l;
-   int s, error = 0;
+   size_t l, rpos;
+   int error = 0;
 
-   s = splhigh();
+   mtx_enter(_mtx);
while (mbp->msg_bufr == mbp->msg_bufx) {
if (flag & IO_NDELAY) {
error = EWOULDBLOCK;
goto out;
}
logsoftc.sc_state |= LOG_RDWAIT;
-   error = tsleep_nsec(mbp, LOG_RDPRI | PCATCH, "klog", INFSLP);
+   mtx_leave(_mtx);
+   /*
+* Set up and enter sleep manually instead of using msleep()
+* to keep log_mtx as a leaf lock.
+*/
+   sleep_setup(, mbp, LOG_RDWAIT | PCATCH, "klog");
+   sleep_setup_signal();
+   sleep_finish(, logsoftc.sc_state & LOG_RDWAIT);
+   error = sleep_finish_signal();
+   mtx_enter(_mtx);
if (error)
goto out;
}
-   logsoftc.sc_state &= ~LOG_RDWAIT;
 
if (mbp->msg_bufd > 0) {
char buf[64];
+   long ndropped;
 
+   ndropped = mbp->msg_bufd;
+   mtx_leave(_mtx);
l = snprintf(buf, sizeof(buf),
"<%d>klog: dropped %ld byte%s, message buffer full\n",
-   

Re: Push KERNEL_LOCK/UNLOCK in trapsignal()

2020-08-18 Thread Mark Kettenis
> Date: Tue, 18 Aug 2020 11:52:17 +0200
> From: Martin Pieuchot 
> 
> Taken from a larger diff from claudio@, this reduces the lock dances in
> MD code and put it where we should focus our effort in kern/kern_sig.c.
> 
> ok?

Agreed.  ok kettenis@

> Index: kern/kern_sig.c
> ===
> RCS file: /cvs/src/sys/kern/kern_sig.c,v
> retrieving revision 1.258
> diff -u -p -r1.258 kern_sig.c
> --- kern/kern_sig.c   15 Jun 2020 13:18:33 -  1.258
> +++ kern/kern_sig.c   18 Aug 2020 09:34:11 -
> @@ -802,6 +802,7 @@ trapsignal(struct proc *p, int signum, u
>   struct sigacts *ps = pr->ps_sigacts;
>   int mask;
>  
> + KERNEL_LOCK();
>   switch (signum) {
>   case SIGILL:
>   case SIGBUS:
> @@ -842,6 +843,7 @@ trapsignal(struct proc *p, int signum, u
>   sigexit(p, signum);
>   ptsignal(p, signum, STHREAD);
>   }
> + KERNEL_UNLOCK();
>  }
>  
>  /*
> Index: arch/alpha/alpha/trap.c
> ===
> RCS file: /cvs/src/sys/arch/alpha/alpha/trap.c,v
> retrieving revision 1.88
> diff -u -p -r1.88 trap.c
> --- arch/alpha/alpha/trap.c   6 Sep 2019 12:22:01 -   1.88
> +++ arch/alpha/alpha/trap.c   18 Aug 2020 09:18:54 -
> @@ -488,9 +488,7 @@ do_fault:
>   printtrap(a0, a1, a2, entry, framep, 1, user);
>  #endif
>   sv.sival_ptr = v;
> - KERNEL_LOCK();
>   trapsignal(p, i, ucode, typ, sv);
> - KERNEL_UNLOCK();
>  out:
>   if (user) {
>   /* Do any deferred user pmap operations. */
> Index: arch/amd64/amd64/trap.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/trap.c,v
> retrieving revision 1.79
> diff -u -p -r1.79 trap.c
> --- arch/amd64/amd64/trap.c   21 Jan 2020 03:06:39 -  1.79
> +++ arch/amd64/amd64/trap.c   18 Aug 2020 09:18:54 -
> @@ -391,9 +391,7 @@ usertrap(struct trapframe *frame)
>   }
>  
>   sv.sival_ptr = (void *)frame->tf_rip;
> - KERNEL_LOCK();
>   trapsignal(p, sig, type, code, sv);
> - KERNEL_UNLOCK();
>  
>  out:
>   userret(p);
> Index: arch/arm/arm/fault.c
> ===
> RCS file: /cvs/src/sys/arch/arm/arm/fault.c,v
> retrieving revision 1.39
> diff -u -p -r1.39 fault.c
> --- arch/arm/arm/fault.c  6 Sep 2019 12:22:01 -   1.39
> +++ arch/arm/arm/fault.c  18 Aug 2020 09:18:54 -
> @@ -373,9 +373,7 @@ data_abort_handler(trapframe_t *tf)
>   sd.trap = fsr;
>  do_trapsignal:
>   sv.sival_int = sd.addr;
> - KERNEL_LOCK();
>   trapsignal(p, sd.signo, sd.trap, sd.code, sv);
> - KERNEL_UNLOCK();
>  out:
>   /* If returning to user mode, make sure to invoke userret() */
>   if (user)
> @@ -596,13 +594,9 @@ prefetch_abort_handler(trapframe_t *tf)
>   printf("UVM: pid %d (%s), uid %d killed: "
>   "out of swap\n", p->p_p->ps_pid, p->p_p->ps_comm,
>   p->p_ucred ? (int)p->p_ucred->cr_uid : -1);
> - KERNEL_LOCK();
>   trapsignal(p, SIGKILL, 0, SEGV_MAPERR, sv);
> - KERNEL_UNLOCK();
>   } else {
> - KERNEL_LOCK();
>   trapsignal(p, SIGSEGV, 0, SEGV_MAPERR, sv);
> - KERNEL_UNLOCK();
>   }
>  
>  out:
> Index: arch/arm/arm/undefined.c
> ===
> RCS file: /cvs/src/sys/arch/arm/arm/undefined.c,v
> retrieving revision 1.13
> diff -u -p -r1.13 undefined.c
> --- arch/arm/arm/undefined.c  13 Mar 2019 09:28:21 -  1.13
> +++ arch/arm/arm/undefined.c  18 Aug 2020 09:18:54 -
> @@ -113,9 +113,7 @@ gdb_trapper(u_int addr, u_int insn, stru
>   if (insn == GDB_BREAKPOINT || insn == GDB5_BREAKPOINT) {
>   if (code == FAULT_USER) {
>   sv.sival_int = addr;
> - KERNEL_LOCK();
>   trapsignal(p, SIGTRAP, 0, TRAP_BRKPT, sv);
> - KERNEL_UNLOCK();
>   return 0;
>   }
>   }
> @@ -174,9 +172,7 @@ undefinedinstruction(trapframe_t *frame)
>   if (__predict_false((fault_pc & 3) != 0)) {
>   /* Give the user an illegal instruction signal. */
>   sv.sival_int = (u_int32_t) fault_pc;
> - KERNEL_LOCK();
>   trapsignal(p, SIGILL, 0, ILL_ILLOPC, sv);
> - KERNEL_UNLOCK();
>   userret(p);
>   return;
>   }
> @@ -260,9 +256,7 @@ undefinedinstruction(trapframe_t *frame)
>   }
>  
>   sv.sival_int = frame->tf_pc;
> - KERNEL_LOCK();
>   trapsignal(p, SIGILL, 0, ILL_ILLOPC, sv);
> - KERNEL_UNLOCK();
>   }
>  
>   if ((fault_code & FAULT_USER) == 0)
> Index: arch/arm64/arm64/trap.c
> 

Re: Fewer pool_get() in kqueue_register()

2020-08-18 Thread Martin Pieuchot
On 18/08/20(Tue) 11:22, Mark Kettenis wrote:
> > Date: Tue, 18 Aug 2020 11:04:47 +0200
> > From: Martin Pieuchot 
> > 
> > Diff below changes the order of operations in kqueue_register() to get
> > rid of an unnecessary pool_get().  When an event is already present on
> > the list try to acquire it first.  Note that knote_acquire() may sleep
> > in which case the list might have changed so the lookup has to always
> > begin from the start.
> > 
> > This will help with lazy removal of knote in poll/select.  In this
> > scenario EV_ADD is generally always done with an knote already on the
> > list.
> > 
> > ok?
> 
> But pool_get() may sleep as well.  In my experience it is better to do
> the resource allocation up front and release afterwards if it turned
> out you didn't need the resource.  That's what the current code does.

There's indeed a race possible when multiple threads try to register the
same event on the same kqueue, this will lead to a double insert.  Thanks! 

> > Index: kern/kern_event.c
> > ===
> > RCS file: /cvs/src/sys/kern/kern_event.c,v
> > retrieving revision 1.142
> > diff -u -p -r1.142 kern_event.c
> > --- kern/kern_event.c   12 Aug 2020 13:49:24 -  1.142
> > +++ kern/kern_event.c   18 Aug 2020 08:58:27 -
> > @@ -696,7 +696,7 @@ kqueue_register(struct kqueue *kq, struc
> > struct filedesc *fdp = kq->kq_fdp;
> > const struct filterops *fops = NULL;
> > struct file *fp = NULL;
> > -   struct knote *kn = NULL, *newkn = NULL;
> > +   struct knote *kn, *newkn = NULL;
> > struct knlist *list = NULL;
> > int s, error = 0;
> >  
> > @@ -721,22 +721,12 @@ kqueue_register(struct kqueue *kq, struc
> > return (EBADF);
> > }
> >  
> > -   if (kev->flags & EV_ADD)
> > -   newkn = pool_get(_pool, PR_WAITOK | PR_ZERO);
> > -
> >  again:
> > +   kn = NULL;
> > if (fops->f_flags & FILTEROP_ISFD) {
> > -   if ((fp = fd_getfile(fdp, kev->ident)) == NULL) {
> > -   error = EBADF;
> > -   goto done;
> > -   }
> > -   if (kev->flags & EV_ADD)
> > -   kqueue_expand_list(kq, kev->ident);
> > if (kev->ident < kq->kq_knlistsize)
> > list = >kq_knlist[kev->ident];
> > } else {
> > -   if (kev->flags & EV_ADD)
> > -   kqueue_expand_hash(kq);
> > if (kq->kq_knhashmask != 0) {
> > list = >kq_knhash[
> > KN_HASH((u_long)kev->ident, kq->kq_knhashmask)];
> > @@ -749,10 +739,6 @@ again:
> > s = splhigh();
> > if (!knote_acquire(kn)) {
> > splx(s);
> > -   if (fp != NULL) {
> > -   FRELE(fp, p);
> > -   fp = NULL;
> > -   }
> > goto again;
> > }
> > splx(s);
> > @@ -760,6 +746,21 @@ again:
> > }
> > }
> > }
> > +
> > +   if (kev->flags & EV_ADD && kn == NULL) {
> > +   newkn = pool_get(_pool, PR_WAITOK | PR_ZERO);
> > +   if (fops->f_flags & FILTEROP_ISFD) {
> > +   if ((fp = fd_getfile(fdp, kev->ident)) == NULL) {
> > +   error = EBADF;
> > +   goto done;
> > +   }
> > +   kqueue_expand_list(kq, kev->ident);
> > +   } else {
> > +   kqueue_expand_hash(kq);
> > +   }
> > +
> > +   }
> > +
> > KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0);
> >  
> > if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {
> > 
> > 



Push KERNEL_LOCK/UNLOCK in trapsignal()

2020-08-18 Thread Martin Pieuchot
Taken from a larger diff from claudio@, this reduces the lock dances in
MD code and put it where we should focus our effort in kern/kern_sig.c.

ok?

Index: kern/kern_sig.c
===
RCS file: /cvs/src/sys/kern/kern_sig.c,v
retrieving revision 1.258
diff -u -p -r1.258 kern_sig.c
--- kern/kern_sig.c 15 Jun 2020 13:18:33 -  1.258
+++ kern/kern_sig.c 18 Aug 2020 09:34:11 -
@@ -802,6 +802,7 @@ trapsignal(struct proc *p, int signum, u
struct sigacts *ps = pr->ps_sigacts;
int mask;
 
+   KERNEL_LOCK();
switch (signum) {
case SIGILL:
case SIGBUS:
@@ -842,6 +843,7 @@ trapsignal(struct proc *p, int signum, u
sigexit(p, signum);
ptsignal(p, signum, STHREAD);
}
+   KERNEL_UNLOCK();
 }
 
 /*
Index: arch/alpha/alpha/trap.c
===
RCS file: /cvs/src/sys/arch/alpha/alpha/trap.c,v
retrieving revision 1.88
diff -u -p -r1.88 trap.c
--- arch/alpha/alpha/trap.c 6 Sep 2019 12:22:01 -   1.88
+++ arch/alpha/alpha/trap.c 18 Aug 2020 09:18:54 -
@@ -488,9 +488,7 @@ do_fault:
printtrap(a0, a1, a2, entry, framep, 1, user);
 #endif
sv.sival_ptr = v;
-   KERNEL_LOCK();
trapsignal(p, i, ucode, typ, sv);
-   KERNEL_UNLOCK();
 out:
if (user) {
/* Do any deferred user pmap operations. */
Index: arch/amd64/amd64/trap.c
===
RCS file: /cvs/src/sys/arch/amd64/amd64/trap.c,v
retrieving revision 1.79
diff -u -p -r1.79 trap.c
--- arch/amd64/amd64/trap.c 21 Jan 2020 03:06:39 -  1.79
+++ arch/amd64/amd64/trap.c 18 Aug 2020 09:18:54 -
@@ -391,9 +391,7 @@ usertrap(struct trapframe *frame)
}
 
sv.sival_ptr = (void *)frame->tf_rip;
-   KERNEL_LOCK();
trapsignal(p, sig, type, code, sv);
-   KERNEL_UNLOCK();
 
 out:
userret(p);
Index: arch/arm/arm/fault.c
===
RCS file: /cvs/src/sys/arch/arm/arm/fault.c,v
retrieving revision 1.39
diff -u -p -r1.39 fault.c
--- arch/arm/arm/fault.c6 Sep 2019 12:22:01 -   1.39
+++ arch/arm/arm/fault.c18 Aug 2020 09:18:54 -
@@ -373,9 +373,7 @@ data_abort_handler(trapframe_t *tf)
sd.trap = fsr;
 do_trapsignal:
sv.sival_int = sd.addr;
-   KERNEL_LOCK();
trapsignal(p, sd.signo, sd.trap, sd.code, sv);
-   KERNEL_UNLOCK();
 out:
/* If returning to user mode, make sure to invoke userret() */
if (user)
@@ -596,13 +594,9 @@ prefetch_abort_handler(trapframe_t *tf)
printf("UVM: pid %d (%s), uid %d killed: "
"out of swap\n", p->p_p->ps_pid, p->p_p->ps_comm,
p->p_ucred ? (int)p->p_ucred->cr_uid : -1);
-   KERNEL_LOCK();
trapsignal(p, SIGKILL, 0, SEGV_MAPERR, sv);
-   KERNEL_UNLOCK();
} else {
-   KERNEL_LOCK();
trapsignal(p, SIGSEGV, 0, SEGV_MAPERR, sv);
-   KERNEL_UNLOCK();
}
 
 out:
Index: arch/arm/arm/undefined.c
===
RCS file: /cvs/src/sys/arch/arm/arm/undefined.c,v
retrieving revision 1.13
diff -u -p -r1.13 undefined.c
--- arch/arm/arm/undefined.c13 Mar 2019 09:28:21 -  1.13
+++ arch/arm/arm/undefined.c18 Aug 2020 09:18:54 -
@@ -113,9 +113,7 @@ gdb_trapper(u_int addr, u_int insn, stru
if (insn == GDB_BREAKPOINT || insn == GDB5_BREAKPOINT) {
if (code == FAULT_USER) {
sv.sival_int = addr;
-   KERNEL_LOCK();
trapsignal(p, SIGTRAP, 0, TRAP_BRKPT, sv);
-   KERNEL_UNLOCK();
return 0;
}
}
@@ -174,9 +172,7 @@ undefinedinstruction(trapframe_t *frame)
if (__predict_false((fault_pc & 3) != 0)) {
/* Give the user an illegal instruction signal. */
sv.sival_int = (u_int32_t) fault_pc;
-   KERNEL_LOCK();
trapsignal(p, SIGILL, 0, ILL_ILLOPC, sv);
-   KERNEL_UNLOCK();
userret(p);
return;
}
@@ -260,9 +256,7 @@ undefinedinstruction(trapframe_t *frame)
}
 
sv.sival_int = frame->tf_pc;
-   KERNEL_LOCK();
trapsignal(p, SIGILL, 0, ILL_ILLOPC, sv);
-   KERNEL_UNLOCK();
}
 
if ((fault_code & FAULT_USER) == 0)
Index: arch/arm64/arm64/trap.c
===
RCS file: /cvs/src/sys/arch/arm64/arm64/trap.c,v
retrieving revision 1.28
diff -u -p -r1.28 trap.c
--- arch/arm64/arm64/trap.c 17 Aug 2020 08:09:03 -  1.28
+++ arch/arm64/arm64/trap.c

Re: Fewer pool_get() in kqueue_register()

2020-08-18 Thread Mark Kettenis
> Date: Tue, 18 Aug 2020 11:04:47 +0200
> From: Martin Pieuchot 
> 
> Diff below changes the order of operations in kqueue_register() to get
> rid of an unnecessary pool_get().  When an event is already present on
> the list try to acquire it first.  Note that knote_acquire() may sleep
> in which case the list might have changed so the lookup has to always
> begin from the start.
> 
> This will help with lazy removal of knote in poll/select.  In this
> scenario EV_ADD is generally always done with an knote already on the
> list.
> 
> ok?

But pool_get() may sleep as well.  In my experience it is better to do
the resource allocation up front and release afterwards if it turned
out you didn't need the resource.  That's what the current code does.
I don't fully understand how the code works, but your change looks
wrong to me.

> Index: kern/kern_event.c
> ===
> RCS file: /cvs/src/sys/kern/kern_event.c,v
> retrieving revision 1.142
> diff -u -p -r1.142 kern_event.c
> --- kern/kern_event.c 12 Aug 2020 13:49:24 -  1.142
> +++ kern/kern_event.c 18 Aug 2020 08:58:27 -
> @@ -696,7 +696,7 @@ kqueue_register(struct kqueue *kq, struc
>   struct filedesc *fdp = kq->kq_fdp;
>   const struct filterops *fops = NULL;
>   struct file *fp = NULL;
> - struct knote *kn = NULL, *newkn = NULL;
> + struct knote *kn, *newkn = NULL;
>   struct knlist *list = NULL;
>   int s, error = 0;
>  
> @@ -721,22 +721,12 @@ kqueue_register(struct kqueue *kq, struc
>   return (EBADF);
>   }
>  
> - if (kev->flags & EV_ADD)
> - newkn = pool_get(_pool, PR_WAITOK | PR_ZERO);
> -
>  again:
> + kn = NULL;
>   if (fops->f_flags & FILTEROP_ISFD) {
> - if ((fp = fd_getfile(fdp, kev->ident)) == NULL) {
> - error = EBADF;
> - goto done;
> - }
> - if (kev->flags & EV_ADD)
> - kqueue_expand_list(kq, kev->ident);
>   if (kev->ident < kq->kq_knlistsize)
>   list = >kq_knlist[kev->ident];
>   } else {
> - if (kev->flags & EV_ADD)
> - kqueue_expand_hash(kq);
>   if (kq->kq_knhashmask != 0) {
>   list = >kq_knhash[
>   KN_HASH((u_long)kev->ident, kq->kq_knhashmask)];
> @@ -749,10 +739,6 @@ again:
>   s = splhigh();
>   if (!knote_acquire(kn)) {
>   splx(s);
> - if (fp != NULL) {
> - FRELE(fp, p);
> - fp = NULL;
> - }
>   goto again;
>   }
>   splx(s);
> @@ -760,6 +746,21 @@ again:
>   }
>   }
>   }
> +
> + if (kev->flags & EV_ADD && kn == NULL) {
> + newkn = pool_get(_pool, PR_WAITOK | PR_ZERO);
> + if (fops->f_flags & FILTEROP_ISFD) {
> + if ((fp = fd_getfile(fdp, kev->ident)) == NULL) {
> + error = EBADF;
> + goto done;
> + }
> + kqueue_expand_list(kq, kev->ident);
> + } else {
> + kqueue_expand_hash(kq);
> + }
> +
> + }
> +
>   KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0);
>  
>   if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {
> 
> 



Fewer pool_get() in kqueue_register()

2020-08-18 Thread Martin Pieuchot
Diff below changes the order of operations in kqueue_register() to get
rid of an unnecessary pool_get().  When an event is already present on
the list try to acquire it first.  Note that knote_acquire() may sleep
in which case the list might have changed so the lookup has to always
begin from the start.

This will help with lazy removal of knote in poll/select.  In this
scenario EV_ADD is generally always done with an knote already on the
list.

ok?

Index: kern/kern_event.c
===
RCS file: /cvs/src/sys/kern/kern_event.c,v
retrieving revision 1.142
diff -u -p -r1.142 kern_event.c
--- kern/kern_event.c   12 Aug 2020 13:49:24 -  1.142
+++ kern/kern_event.c   18 Aug 2020 08:58:27 -
@@ -696,7 +696,7 @@ kqueue_register(struct kqueue *kq, struc
struct filedesc *fdp = kq->kq_fdp;
const struct filterops *fops = NULL;
struct file *fp = NULL;
-   struct knote *kn = NULL, *newkn = NULL;
+   struct knote *kn, *newkn = NULL;
struct knlist *list = NULL;
int s, error = 0;
 
@@ -721,22 +721,12 @@ kqueue_register(struct kqueue *kq, struc
return (EBADF);
}
 
-   if (kev->flags & EV_ADD)
-   newkn = pool_get(_pool, PR_WAITOK | PR_ZERO);
-
 again:
+   kn = NULL;
if (fops->f_flags & FILTEROP_ISFD) {
-   if ((fp = fd_getfile(fdp, kev->ident)) == NULL) {
-   error = EBADF;
-   goto done;
-   }
-   if (kev->flags & EV_ADD)
-   kqueue_expand_list(kq, kev->ident);
if (kev->ident < kq->kq_knlistsize)
list = >kq_knlist[kev->ident];
} else {
-   if (kev->flags & EV_ADD)
-   kqueue_expand_hash(kq);
if (kq->kq_knhashmask != 0) {
list = >kq_knhash[
KN_HASH((u_long)kev->ident, kq->kq_knhashmask)];
@@ -749,10 +739,6 @@ again:
s = splhigh();
if (!knote_acquire(kn)) {
splx(s);
-   if (fp != NULL) {
-   FRELE(fp, p);
-   fp = NULL;
-   }
goto again;
}
splx(s);
@@ -760,6 +746,21 @@ again:
}
}
}
+
+   if (kev->flags & EV_ADD && kn == NULL) {
+   newkn = pool_get(_pool, PR_WAITOK | PR_ZERO);
+   if (fops->f_flags & FILTEROP_ISFD) {
+   if ((fp = fd_getfile(fdp, kev->ident)) == NULL) {
+   error = EBADF;
+   goto done;
+   }
+   kqueue_expand_list(kq, kev->ident);
+   } else {
+   kqueue_expand_hash(kq);
+   }
+
+   }
+
KASSERT(kn == NULL || (kn->kn_status & KN_PROCESSING) != 0);
 
if (kn == NULL && ((kev->flags & EV_ADD) == 0)) {



Re: sdmmc(4): add UHS-I support

2020-08-18 Thread Kevin Lo
On Mon, Aug 17, 2020 at 12:57:58PM +0200, Mark Kettenis wrote:
> 
> > Date: Sun, 16 Aug 2020 19:32:03 +0200 (CEST)
> > From: Mark Kettenis 
> > 
> > The diff below adds support for higher speeds as supported by UHS-I SD
> > cards to the generic sdmmc(4) layer.  The diff in itself does not
> > enable the use of those modes.  That needs separate changes to the
> > SD/MMC controller drivers.  I have such a diff for amlmmc(4) that
> > allows me to use SDR50 mode.
> > 
> > However, to make sure this diff doesn't break existing lower speed
> > modes I'd appreciate tests on a variety of hardware.  So if sdmmc(4)
> > shows up in your dmesg, please test this by exercising your (u)SD or
> > (e)MMC cards.
> > 
> > Thanks,
> > 
> > Mark
> 
> Previous diff didn't build properly on amd64.  Here is a new diff.

Tested on the GPD Pocket, no problems seen.

OpenBSD 6.7-current (GENERIC.MP) #0: Tue Aug 18 16:08:43 CST 2020
ke...@gpdpocket.kevlo.org:/usr/src/sys/arch/amd64/compile/GENERIC.MP
real mem = 8477282304 (8084MB)
avail mem = 8213426176 (7832MB)
random: good seed from bootblocks
mpath0 at root
scsibus0 at mpath0: 256 targets
mainbus0 at root
bios0 at mainbus0: SMBIOS rev. 3.0 @ 0x7b8de000 (51 entries)
bios0: vendor American Megatrends Inc. version "5.11" date 08/07/2017
bios0: Default string Default string
acpi0 at bios0: ACPI 5.0
acpi0: sleep states S0 S4 S5
acpi0: tables DSDT FACP APIC FPDT FIDT MCFG SSDT SSDT SSDT UEFI SSDT HPET SSDT 
SSDT SSDT LPIT BCFG PRAM BGRT TPM2 CSRT WDAT SSDT SSDT SSDT
acpi0: wakeup devices
acpitimer0 at acpi0: 3579545 Hz, 24 bits
acpimadt0 at acpi0 addr 0xfee0: PC-AT compat
cpu0 at mainbus0: apid 0 (boot processor)
cpu0: Intel(R) Atom(TM) x7-Z8750 CPU @ 1.60GHz, 1600.40 MHz, 06-4c-04
cpu0: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,SSE4.1,SSE4.2,MOVBE,POPCNT,DEADLINE,AES,RDRAND,NXE,RDTSCP,LONG,LAHF,3DNOWP,PERF,ITSC,TSC_ADJUST,SMEP,ERMS,MD_CLEAR,IBRS,IBPB,STIBP,SENSOR,ARAT,MELTDOWN
cpu0: 1MB 64b/line 16-way L2 cache
cpu0: smt 0, core 0, package 0
mtrr: Pentium Pro MTRR support, 8 var ranges, 88 fixed ranges
cpu0: apic clock running at 79MHz
cpu0: mwait min=64, max=64, C-substates=0.2.0.0.0.0.3.3, IBE
cpu1 at mainbus0: apid 2 (application processor)
cpu1: Intel(R) Atom(TM) x7-Z8750 CPU @ 1.60GHz, 1599.94 MHz, 06-4c-04
cpu1: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,SSE4.1,SSE4.2,MOVBE,POPCNT,DEADLINE,AES,RDRAND,NXE,RDTSCP,LONG,LAHF,3DNOWP,PERF,ITSC,TSC_ADJUST,SMEP,ERMS,MD_CLEAR,IBRS,IBPB,STIBP,SENSOR,ARAT,MELTDOWN
cpu1: 1MB 64b/line 16-way L2 cache
cpu1: smt 0, core 1, package 0
cpu2 at mainbus0: apid 4 (application processor)
cpu2: Intel(R) Atom(TM) x7-Z8750 CPU @ 1.60GHz, 1599.96 MHz, 06-4c-04
cpu2: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,SSE4.1,SSE4.2,MOVBE,POPCNT,DEADLINE,AES,RDRAND,NXE,RDTSCP,LONG,LAHF,3DNOWP,PERF,ITSC,TSC_ADJUST,SMEP,ERMS,MD_CLEAR,IBRS,IBPB,STIBP,SENSOR,ARAT,MELTDOWN
cpu2: 1MB 64b/line 16-way L2 cache
cpu2: smt 0, core 2, package 0
cpu3 at mainbus0: apid 6 (application processor)
cpu3: Intel(R) Atom(TM) x7-Z8750 CPU @ 1.60GHz, 1599.95 MHz, 06-4c-04
cpu3: 
FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,EST,TM2,SSSE3,CX16,xTPR,PDCM,SSE4.1,SSE4.2,MOVBE,POPCNT,DEADLINE,AES,RDRAND,NXE,RDTSCP,LONG,LAHF,3DNOWP,PERF,ITSC,TSC_ADJUST,SMEP,ERMS,MD_CLEAR,IBRS,IBPB,STIBP,SENSOR,ARAT,MELTDOWN
cpu3: 1MB 64b/line 16-way L2 cache
cpu3: smt 0, core 3, package 0
ioapic0 at mainbus0: apid 1 pa 0xfec0, version 20, 115 pins
acpimcfg0 at acpi0
acpimcfg0: addr 0xe000, bus 0-255
acpihpet0 at acpi0: 14318179 Hz
acpiprt0 at acpi0: bus 0 (PCI0)
acpiprt1 at acpi0: bus 1 (RP01)
acpiprt2 at acpi0: bus -1 (RP02)
acpiprt3 at acpi0: bus -1 (RP03)
acpiprt4 at acpi0: bus -1 (RP04)
acpicpu0 at acpi0: C3(10@1000 mwait.1@0x64), C2(10@500 mwait.1@0x58), C1(1000@1 
mwait.1), PSS
acpicpu1 at acpi0: C3(10@1000 mwait.1@0x64), C2(10@500 mwait.1@0x58), C1(1000@1 
mwait.1), PSS
acpicpu2 at acpi0: C3(10@1000 mwait.1@0x64), C2(10@500 mwait.1@0x58), C1(1000@1 
mwait.1), PSS
acpicpu3 at acpi0: C3(10@1000 mwait.1@0x64), C2(10@500 mwait.1@0x58), C1(1000@1 
mwait.1), PSS
acpipwrres0 at acpi0: ID3C, resource for ISP3
acpipwrres1 at acpi0: WWPR, resource for HS03, MDM1
acpipwrres2 at acpi0: WWPR, resource for HS13, MDM1
acpipwrres3 at acpi0: WWPR, resource for SSC1, MDM3
acpipwrres4 at acpi0: WWPR, resource for SSCW, MDM3
acpipwrres5 at acpi0: WWPR, resource for HSC1, MDM2
acpipwrres6 at acpi0: WWPR, resource for HSC3, MDM4
acpipwrres7 at acpi0: CLK3, resource for RTK1, RTKA

openrsync(1): add support for IPv6-only hosts

2020-08-18 Thread Sasha Romijn
Hello,

The current openrsync client is not able to connect to dual-stack remote hosts, 
when the local host does not have any IPv4 connectivity. This is because 
connect() fails with EADDRNOTAVAIL when trying to connect to the remote IPv4 
address on an IPv6-only local host - and IPv4 seems to be attempted first. The 
current client does not try other remote host addresses after EADDRNOTAVAIL, 
and therefore never tries the remote IPv6 address.

The included patch allows the client to continue after EADDRNOTAVAIL, making it 
to try other addresses, until it reaches a working IPv6 address. If the local 
host is IPv6-only and the remote host IPv4-only, the connection still fails 
with an error produced from rsync_connect().

Perhaps a warning should be issued for the EADDRNOTAVAIL case, like it does for 
EHOSTUNREACH - but I thought it would be a bit much, as an IPv6-only host would 
then emit warnings on pretty much every single use of the openrsync client.

Sasha Romijn


Index: usr.bin/rsync/socket.c
===
RCS file: /cvs/src/usr.bin/rsync/socket.c,v
retrieving revision 1.27
diff -u -p -u -p -r1.27 socket.c
--- usr.bin/rsync/socket.c  9 Aug 2019 13:11:26 -   1.27
+++ usr.bin/rsync/socket.c  18 Aug 2020 07:55:03 -
@@ -101,6 +101,8 @@ inet_connect(int *sd, const struct sourc

c = connect(*sd, (const struct sockaddr *)>sa, src->salen);
if (c == -1) {
+   if (errno == EADDRNOTAVAIL)
+   return 0;
if (errno == ECONNREFUSED || errno == EHOSTUNREACH) {
WARNX("connect refused: %s, %s",
src->ip, host);



Re: Enable arm64 PAN feature

2020-08-18 Thread Mark Kettenis
> From: Dale Rahn 
> Date: Mon, 17 Aug 2020 18:33:29 -0500
> 
> could we check that there is not an ESR value that indicates PAN violation
> instead of using 'instruction recognition'?

Doesn't exist unfortunately.  You get a protection fault, but you get
the same protection fault if you try to write to a read-only page for
example.

> Seems that it would be more reliable.
> Thanks
> Dale
> 
> On Mon, Aug 17, 2020 at 1:30 AM Jonathan Gray  wrote:
> 
>  On Sat, Aug 15, 2020 at 01:54:34PM +0200, Mark Kettenis wrote:
>  > > Date: Sat, 15 Aug 2020 20:21:09 +1000
>  > > From: Jonathan Gray 
>  > > 
>  > > On Fri, Aug 14, 2020 at 11:06:59PM +0200, Mark Kettenis wrote:
>  > > > > Date: Fri, 14 Aug 2020 14:40:23 +0200 (CEST)
>  > > > > From: Mark Kettenis 
>  > > > > 
>  > > > > I suppose a way to test this properly is to pick a system call
>  and
>  > > > > replace a copyin() with a direct access?  That will succeed
>  without
>  > > > > PAN but should fail with PAN enabled right?
>  > > > 
>  > > > So that does indeed work.  However, the result is a hard hang.  So
>  > > > here as an additional diff that makes sure we panic instead.  The
>  idea
>  > > > is that all user-space access from the kernel should be done by the
>  > > > special unprivileged load/store instructions.
>  > > 
>  > > Would disabling PSTATE.PAN in copyin/copyout along the lines of how
>  > > stac/clac is done for SMAP avoid having to test the instruction type
>  > > entirely?
>  > 
>  > No.  The problem is that we meed to catch permission faults caused by
>  > PAN.  But since the faulting address may be valid in the sense that
>  > UVM has a mapping for them that allows the requested access.  In that
>  > case we end up looping since uvm_fault() returns 0 and we retry the
>  > faulting instruction.
>  > 
>  > > Is it faulting on valid copyin/copyout the way you have it at the
>  > > moment?  I don't quite follow what is going on.
>  > 
>  > The copyin/copyout functions use the unpriviliged load/store
>  > instructions (LDTR/STTR) which bypass PAN.  But we may still fault
>  > because the memory hasn't been faulted in or because the memory has
>  > been marked read-only for CoW or for MOD/REF emulation.  And some of
>  > those faults manifest themselves as permission faults as well.
>  > 
>  > Currently (without the diff quoted below) those faults will be handled
>  > just fine.  The diff below needs to make sure this continues to be the
>  > case.  The easiest way to do that is to check the instruction.
>  > 
>  > Note that this check is in the "slow path".  In most cases the address
>  > touched by copyin/copyout will already be in the page tables since
>  > userland touched it already.
>  > 
>  > Does that clarfify things?
> 
>  Yes, thanks.  I'm fine with both of these diffs going in but still think
>  you should change the mask.
> 
>  > 
>  > 
>  > > > Index: arch/arm64/arm64/trap.c
>  > > > ===
>  > > > RCS file: /cvs/src/sys/arch/arm64/arm64/trap.c,v
>  > > > retrieving revision 1.27
>  > > > diff -u -p -r1.27 trap.c
>  > > > --- arch/arm64/arm64/trap.c   6 Jan 2020 12:37:30 -  
>  1.27
>  > > > +++ arch/arm64/arm64/trap.c   14 Aug 2020 21:05:54 -
>  > > > @@ -65,6 +65,14 @@ void do_el0_error(struct trapframe *);
>  > > >  
>  > > >  void dumpregs(struct trapframe*);
>  > > >  
>  > > > +/* Check whether we're executing an unprivileged load/store
>  instruction. */
>  > > > +static inline int
>  > > > +is_unpriv_ldst(uint64_t elr)
>  > > > +{
>  > > > + uint32_t insn = *(uint32_t *)elr;
>  > > > + return ((insn & 0x3f200c00) == 0x38000800);
>  > > 
>  > > The value of op1 (bit 26) is not used according to the table in the
>  Arm
>  > > ARM.  The mask would be better as 0x3b200c00
>  > > 
>  > > 
>  > > > +}
>  > > > +
>  > > >  static void
>  > > >  data_abort(struct trapframe *frame, uint64_t esr, uint64_t far,
>  > > >  int lower, int exe)
>  > > > @@ -104,8 +112,18 @@ data_abort(struct trapframe *frame, uint
>  > > >   /* The top bit tells us which range to use */
>  > > >   if ((far >> 63) == 1)
>  > > >   map = kernel_map;
>  > > > - else
>  > > > + else {
>  > > > + /*
>  > > > +  * Only allow user-space access using
>  > > > +  * unprivileged load/store instructions.
>  > > > +  */
>  > > > + if (!is_unpriv_ldst(frame->tf_elr)) {
>  > > > + panic("attempt to access user address"
>  > > > +   " 0x%llx from EL1", far);
>  > > > + }
>  > > > +
>  > > >   map = >p_vmspace->vm_map;
>  > > > + }
>  > > >   }
>  > > >  
>  > > >   if (exe)
>  > > > 
>  > > > 
>  > > 
>  > 
>  >