Re: unp_internalize() and `unp_lock'

2021-10-15 Thread Mark Kettenis
> Date: Fri, 15 Oct 2021 14:04:08 +0300
> From: Vitaliy Makkoveev 
> 
> The next diff before introduce rwlock(9) for UNIX sockets garbage
> collector data.
> 
> Release `unp_lock' before call unp_internalize() and take it within when
> access garbage collector data such as `unp_rights', `unp_msgcount' and
> `unp_file'. The garbage collector rwlock(9) will be introduced with the
> following diff, but right now M_WAITOK/M_WAIT allocations moved outside
> rwlock(9) held.
> 
> The lock order between fdplock() and `unp_lock' has changed to fdplock()
> -> `unp_lock'. unp_internalize() is the only place where these locks
> simultaneously held together so this order doesn't matter. This shoud be
> helpful to mpi@'s knote(9) work.
> 
> sosend() releases solock() before m_getuio() call, so this socket could
> be disconnected before uipc_usrreq() called. uipc_usrreq() checks
> connection oriented sockets state or tries to connect datagram sockets
> after successful unp_internalize() call, so this introduced solock()
> release doesn't modify existing behavior.

Is it safe to call fptounp() without holding the lock?

> Index: sys/kern/uipc_usrreq.c
> ===
> RCS file: /cvs/src/sys/kern/uipc_usrreq.c,v
> retrieving revision 1.149
> diff -u -p -r1.149 uipc_usrreq.c
> --- sys/kern/uipc_usrreq.c14 Oct 2021 23:05:10 -  1.149
> +++ sys/kern/uipc_usrreq.c15 Oct 2021 11:03:16 -
> @@ -219,8 +219,13 @@ uipc_usrreq(struct socket *so, int req, 
>   break;
>  
>   case PRU_SEND:
> - if (control && (error = unp_internalize(control, p)))
> - break;
> + if (control) {
> + sounlock(so, SL_LOCKED);
> + error = unp_internalize(control, p);
> + solock(so);
> + if (error)
> + break;
> + }
>   switch (so->so_type) {
>  
>   case SOCK_DGRAM: {
> @@ -973,8 +978,6 @@ unp_internalize(struct mbuf *control, st
>   int i, error;
>   int nfds, *ip, fd, neededspace;
>  
> - rw_assert_wrlock(_lock);
> -
>   /*
>* Check for two potential msg_controllen values because
>* IETF stuck their nose in a place it does not belong.
> @@ -987,8 +990,13 @@ unp_internalize(struct mbuf *control, st
>   return (EINVAL);
>   nfds = (cm->cmsg_len - CMSG_ALIGN(sizeof(*cm))) / sizeof (int);
>  
> - if (unp_rights + nfds > maxfiles / 10)
> + rw_enter_write(_lock);
> + if (unp_rights + nfds > maxfiles / 10) {
> + rw_exit_write(_lock);
>   return (EMFILE);
> + }
> + unp_rights += nfds;
> + rw_exit_write(_lock);
>  
>   /* Make sure we have room for the struct file pointers */
>  morespace:
> @@ -997,8 +1005,10 @@ morespace:
>   if (neededspace > m_trailingspace(control)) {
>   char *tmp;
>   /* if we already have a cluster, the message is just too big */
> - if (control->m_flags & M_EXT)
> - return (E2BIG);
> + if (control->m_flags & M_EXT) {
> + error = E2BIG;
> + goto nospace;
> + }
>  
>   /* copy cmsg data temporarily out of the mbuf */
>   tmp = malloc(control->m_len, M_TEMP, M_WAITOK);
> @@ -1008,7 +1018,8 @@ morespace:
>   MCLGET(control, M_WAIT);
>   if ((control->m_flags & M_EXT) == 0) {
>   free(tmp, M_TEMP, control->m_len);
> - return (ENOBUFS);   /* allocation failed */
> + error = ENOBUFS;   /* allocation failed */
> + goto nospace;
>   }
>  
>   /* copy the data back into the cluster */
> @@ -1049,10 +1060,11 @@ morespace:
>   rp->flags = fdp->fd_ofileflags[fd] & UF_PLEDGED;
>   rp--;
>   if ((unp = fptounp(fp)) != NULL) {
> + rw_enter_write(_lock);
>   unp->unp_file = fp;
>   unp->unp_msgcount++;
> + rw_exit_write(_lock);
>   }
> - unp_rights++;
>   }
>   fdpunlock(fdp);
>   return (0);
> @@ -1064,11 +1076,18 @@ fail:
>   for ( ; i > 0; i--) {
>   rp++;
>   fp = rp->fp;
> - if ((unp = fptounp(fp)) != NULL)
> + if ((unp = fptounp(fp)) != NULL) {
> + rw_enter_write(_lock);
>   unp->unp_msgcount--;
> + rw_exit_write(_lock);
> + }
>   FRELE(fp, p);
> - unp_rights--;
>   }
> +
> +nospace:
> + rw_enter_write(_lock);
> + unp_rights -= nfds;
> + rw_exit_write(_lock);
>  
>   return (error);
>  }
> 
> 



Re: timecounting: use full 96-bit product when computing high-res time

2021-10-14 Thread Mark Kettenis
> Date: Thu, 14 Oct 2021 16:13:17 -0500
> From: Scott Cheloha 
> 
> Hi,
> 
> When we compute high resolution time, both in the kernel and in libc,
> we get a 32-bit (or smaller) value from the active timecounter and
> scale it up into a 128-bit bintime.
> 
> The scaling math currently looks like this in the kernel:
> 
>   *bt = th->th_offset;
>   bintimeaddfrac(bt, th->th_scale * tc_delta(th), bt);
> 
> It looks similar in libc.
> 
> The problem with this code is that if the product
> 
>   th->tc_scale * tc_delta(th)
> 
> exceeds UINT64_MAX, the result overflows and we lose time.
> 
> This is unlikely to happen under normal circumstances.  Normally, the
> clock interrupt fires about HZ times per second, so the product
> doesn't get very close to overflow.  You'd need to run with interrupts
> disabled on CPU0 for about 1 second to see an overflow.
> 
> However, I have seen this happen before in the wild.  If you try to
> build LLVM on an EdgeRouter Lite (512MB RAM), it will swap *very
> slowly* to the SD card and eventually lose time.
> 
> I think it is inevitable that we will encounter bugs of this nature at
> some point.  In general, I would like the timecounting layer to be
> more robust to nasty situations like these.  Losing time is a
> catastrophic failure.  It screws everything up.
> 
> The solution to this problem is to use the full 96-bit product when we
> scale the count up into a bintime.  We're multiplying a u_int
> (32-bit), the count, by a uint64_t, the scale, but we're not capturing
> the upper 32 bits of that product.  If we did, we would have a longer
> grace period between clock interrupts before we lost time.
> 
> The attached patch adds a TIMECOUNT_TO_BINTIME() function to sys/time.h
> and puts it to use in sys/kern/kern_tc.c and lib/libc/sys/microtime.c.
> The math is a bit boring, see the patch if you are curious.
> 
> As for the cost, there is a small but significant increase in overhead
> when reading the clock with the TSC.  Slower timecounters (HPET, ACPI
> timer) are so slow the extra overhead is noise.
> 
> I ran my usual synthetic benchmark against a patched and unpatched
> libc for a number of different sample sizes.  I ran the benchmark in
> single-user mode to avoid ntpd(8) interference (adjtime(2), adjfreq(2))
> and set hw.setperf=100 to ensure uniform results.
> 
> It looks to me that on amd64, userspace clock_gettime(2) is up to ~10%
> slower with the patch.  But there is a lot of variation between the
> comparisons, so I don't think it's a consistent 10%.  I'd say 10% is
> an upper bound.
> 
> To ensure the discussion is well-framed: we're talking about a 10%
> overhead on a 40-80 nanosecond function call.
> 
> Obviously the overhead will vary by platform.  I imagine 32-bit
> platforms will exceed 10% because we're now doing even more 64-bit
> math.  I'd be interested in results from e.g. macppc or armv7.
> 
> ministat summaries are below.  My benchmark program and script is
> below that.  The patch is attached below that.
> 
> Thoughts?

I never understood this code.  But I don't understand that if our
timecounters are only 32 bits, we need more than 64 bits to store a
time difference on the order of seconds...

> --
> 
> All values (min, max, med, avg, stddev) are in seconds.
> 
> Call clock_gettime(2) 10,000 times.  Do that 1000 times.  ~5.88% increase.
> 
> x clock-gettime-tsc-user-head-1K-10K.dat1
> + clock-gettime-tsc-user-patch-1K-10K.dat1
> N   Min   MaxMedian   AvgStddev
> x 1000   0.000329069   0.000362506   0.000335691 0.00033582683 3.6951597e-06
> + 1000   0.000346276   0.000381319   0.000353605 0.00035557716  3.962225e-06
> Difference at 99.5% confidence
> 1.97503e-05 +/- 5.29405e-07
> 5.88111% +/- 0.157642%
> (Student's t, pooled s = 3.83102e-06)
> 
> Call clock_gettime(2) 100,000 times.  Do that 1000 times.  ~1.31% increase.
> 
> x clock-gettime-tsc-user-head-1K-100K.dat1
> + clock-gettime-tsc-user-patch-1K-100K.dat1
> N   Min   MaxMedian   AvgStddev
> x 1000   0.003432839   0.003679475   0.003472749  0.0034827782 3.0275757e-05
> + 1000   0.003426934   0.003776351   0.003524453  0.0035284519 3.8631925e-05
> Difference at 99.5% confidence
> 4.56738e-05 +/- 4.79602e-06
> 1.31142% +/- 0.137707%
> (Student's t, pooled s = 3.47062e-05)
> 
> Call clock_gettime(2) 5,000 times.  Do that 10,000 times.  ~8.53% increase.
> 
> x clock-gettime-tsc-user-head-10K-5K.dat1
> + clock-gettime-tsc-user-patch-10K-5K.dat1
> N   Min   MaxMedian   AvgStddev
> x 1   0.000159681   0.000220758   0.000186826 0.00017805393  1.177249e-05
> + 1   0.000171105   0.000217009   0.000200789 0.00019325052 1.1280074e-05
> Difference at 99.5% confidence
> 1.51966e-05 +/- 5.03804e-07
> 8.53482% +/- 0.28295%
> (Student's t, pooled s = 1.15289e-05)
> 
> Call clock_gettime(2) 

Re: lrint(3) and llrint(3) implementation

2021-10-14 Thread Mark Kettenis
> From: "Todd C. Miller" 
> Date: Thu, 14 Oct 2021 14:40:13 -0600
> 
> On Thu, 14 Oct 2021 01:15:56 +0200, Mark Kettenis wrote:
> 
> > Currently the lib/libm/msun/run-lrint_test regress fails on powerpc64
> > and other platforms.  Our implementation came from NetBSD, but NetBSD
> > switched to the implementation from FreeBSD some time ago.  That is
> > the same implementation that we already use for lrintl(3) and
> > llrintl(3).
> >
> > Diff below makes us use that implementation for lrint(3), lrintf(3),
> > llrint(3) and llrintf(3) as well.  This makes the regress test pass on
> > powerpc64.
> 
> Doesn't this mean we end up with three copies of what is essentially
> the same code in s_lrint.c, s_lrintf.c and s_lrintl.c?
> 
> I know it's not much but why not just move the actual code to
> s_lrint.c and include that in the others?

This doesn't work because of the hidden symbol madness.  The way
things currently are we need one copy (s_lrint.c) with:

DEF_STD(fn);
LDBL_MAYBE_CLONE(fn);

another version (s_lrintf.c) with

DEF_STD(fn);

and a final version (s_lrintl.c) without any magic.

I suppose I could have the s_lrint.c and s_lrintf.c include
s_lrintfl.c if you think that would be better?



Re: lrint(3) and llrint(3) implementation

2021-10-14 Thread Mark Kettenis
> Date: Thu, 14 Oct 2021 22:23:25 +0200
> From: Alexander Bluhm 
> 
> On Thu, Oct 14, 2021 at 01:15:56AM +0200, Mark Kettenis wrote:
> > Currently the lib/libm/msun/run-lrint_test regress fails on powerpc64
> > and other platforms.  Our implementation came from NetBSD, but NetBSD
> > switched to the implementation from FreeBSD some time ago.  That is
> > the same implementation that we already use for lrintl(3) and
> > llrintl(3).
> >
> > Diff below makes us use that implementation for lrint(3), lrintf(3),
> > llrint(3) and llrintf(3) as well.  This makes the regress test pass on
> > powerpc64.
> >
> > ok?
> 
> libm Tests on powerpc64 pass now.
> amd64 and i386 still passing.
> arm64 and sparc64 still failing.

The remaining arm64 failures are for the "long double" versions.  The
problem there is that "long double" is implemented as soft-float and
the soft-float implementation in libcompiler_rt doesn't set the
appropriate floating-point status flags.  I suspect that the sparc64
failures are similar since it is in the same boat (but uses libgcc of
course).

So don't expect this to be fixed anytime soon.

> So it is an improvement.
> 
> OK bluhm@
> 
> > Index: lib/libm/src/s_llrint.c
> > ===
> > RCS file: /cvs/src/lib/libm/src/s_llrint.c,v
> > retrieving revision 1.6
> > diff -u -p -r1.6 s_llrint.c
> > --- lib/libm/src/s_llrint.c 12 Sep 2016 19:47:02 -  1.6
> > +++ lib/libm/src/s_llrint.c 13 Oct 2021 23:12:11 -
> > @@ -1,14 +1,12 @@
> > -/* $OpenBSD: s_llrint.c,v 1.6 2016/09/12 19:47:02 guenther Exp $   */
> > -/* $NetBSD: llrint.c,v 1.2 2004/10/13 15:18:32 drochner Exp $ */
> > +/* $OpenBSD$   */
> >
> >  /*
> > - * Written by Matthias Drochner .
> > - * Public domain.
> > + * Written by Martynas Venckus.  Public domain
> >   */
> >
> > -#define LRINTNAME llrint
> > -#define RESTYPE long long int
> > -#define RESTYPE_MIN LLONG_MIN
> > -#define RESTYPE_MAX LLONG_MAX
> > +#define type   double
> > +#define rounditrint
> > +#define dtype  long long
> > +#define fn llrint
> >
> >  #include "s_lrint.c"
> > Index: lib/libm/src/s_llrintf.c
> > ===
> > RCS file: /cvs/src/lib/libm/src/s_llrintf.c,v
> > retrieving revision 1.2
> > diff -u -p -r1.2 s_llrintf.c
> > --- lib/libm/src/s_llrintf.c25 Sep 2006 22:16:48 -  1.2
> > +++ lib/libm/src/s_llrintf.c13 Oct 2021 23:12:11 -
> > @@ -1,14 +1,12 @@
> > -/* $OpenBSD: s_llrintf.c,v 1.2 2006/09/25 22:16:48 kettenis Exp $  */
> > -/* $NetBSD: llrintf.c,v 1.2 2004/10/13 15:18:32 drochner Exp $ */
> > +/* $OpenBSD$   */
> >
> >  /*
> > - * Written by Matthias Drochner .
> > - * Public domain.
> > + * Written by Martynas Venckus.  Public domain
> >   */
> >
> > -#define LRINTNAME llrintf
> > -#define RESTYPE long long int
> > -#define RESTYPE_MIN LLONG_MIN
> > -#define RESTYPE_MAX LLONG_MAX
> > +#define type   float
> > +#define rounditrintf
> > +#define dtype  long long
> > +#define fn llrintf
> >
> >  #include "s_lrintf.c"
> > Index: lib/libm/src/s_lrint.c
> > ===
> > RCS file: /cvs/src/lib/libm/src/s_lrint.c,v
> > retrieving revision 1.11
> > diff -u -p -r1.11 s_lrint.c
> > --- lib/libm/src/s_lrint.c  12 Sep 2016 19:47:02 -  1.11
> > +++ lib/libm/src/s_lrint.c  13 Oct 2021 23:12:11 -
> > @@ -1,9 +1,8 @@
> > -/* $OpenBSD: s_lrint.c,v 1.11 2016/09/12 19:47:02 guenther Exp $   */
> > -/* $NetBSD: lrint.c,v 1.3 2004/10/13 15:18:32 drochner Exp $ */
> > +/* $OpenBSD$   */
> >
> >  /*-
> > - * Copyright (c) 2004
> > - * Matthias Drochner. All rights reserved.
> > + * Copyright (c) 2005 David Schultz 
> > + * All rights reserved.
> >   *
> >   * Redistribution and use in source and binary forms, with or without
> >   * modification, are permitted provided that the following conditions
> > @@ -27,75 +26,35 @@
> >   * SUCH DAMAGE.
> >   */
> >
> > -#include 
> > -#include 
> > -#include 
> > +#include 
> >  #include 
> > -#include 
> > -#include 
> >
> > -#include "math_private.h"
> > -
> > -#ifndef LRINTNAME
> > -#define LRINTNAME lrin

lrint(3) and llrint(3) implementation

2021-10-13 Thread Mark Kettenis
Currently the lib/libm/msun/run-lrint_test regress fails on powerpc64
and other platforms.  Our implementation came from NetBSD, but NetBSD
switched to the implementation from FreeBSD some time ago.  That is
the same implementation that we already use for lrintl(3) and
llrintl(3).

Diff below makes us use that implementation for lrint(3), lrintf(3),
llrint(3) and llrintf(3) as well.  This makes the regress test pass on
powerpc64.

ok?


Index: lib/libm/src/s_llrint.c
===
RCS file: /cvs/src/lib/libm/src/s_llrint.c,v
retrieving revision 1.6
diff -u -p -r1.6 s_llrint.c
--- lib/libm/src/s_llrint.c 12 Sep 2016 19:47:02 -  1.6
+++ lib/libm/src/s_llrint.c 13 Oct 2021 23:12:11 -
@@ -1,14 +1,12 @@
-/* $OpenBSD: s_llrint.c,v 1.6 2016/09/12 19:47:02 guenther Exp $   */
-/* $NetBSD: llrint.c,v 1.2 2004/10/13 15:18:32 drochner Exp $ */
+/* $OpenBSD$   */
 
 /*
- * Written by Matthias Drochner .
- * Public domain.
+ * Written by Martynas Venckus.  Public domain
  */
 
-#define LRINTNAME llrint
-#define RESTYPE long long int
-#define RESTYPE_MIN LLONG_MIN
-#define RESTYPE_MAX LLONG_MAX
+#define type   double
+#define rounditrint
+#define dtype  long long
+#define fn llrint
 
 #include "s_lrint.c"
Index: lib/libm/src/s_llrintf.c
===
RCS file: /cvs/src/lib/libm/src/s_llrintf.c,v
retrieving revision 1.2
diff -u -p -r1.2 s_llrintf.c
--- lib/libm/src/s_llrintf.c25 Sep 2006 22:16:48 -  1.2
+++ lib/libm/src/s_llrintf.c13 Oct 2021 23:12:11 -
@@ -1,14 +1,12 @@
-/* $OpenBSD: s_llrintf.c,v 1.2 2006/09/25 22:16:48 kettenis Exp $  */
-/* $NetBSD: llrintf.c,v 1.2 2004/10/13 15:18:32 drochner Exp $ */
+/* $OpenBSD$   */
 
 /*
- * Written by Matthias Drochner .
- * Public domain.
+ * Written by Martynas Venckus.  Public domain
  */
 
-#define LRINTNAME llrintf
-#define RESTYPE long long int
-#define RESTYPE_MIN LLONG_MIN
-#define RESTYPE_MAX LLONG_MAX
+#define type   float
+#define rounditrintf
+#define dtype  long long
+#define fn llrintf
 
 #include "s_lrintf.c"
Index: lib/libm/src/s_lrint.c
===
RCS file: /cvs/src/lib/libm/src/s_lrint.c,v
retrieving revision 1.11
diff -u -p -r1.11 s_lrint.c
--- lib/libm/src/s_lrint.c  12 Sep 2016 19:47:02 -  1.11
+++ lib/libm/src/s_lrint.c  13 Oct 2021 23:12:11 -
@@ -1,9 +1,8 @@
-/* $OpenBSD: s_lrint.c,v 1.11 2016/09/12 19:47:02 guenther Exp $   */
-/* $NetBSD: lrint.c,v 1.3 2004/10/13 15:18:32 drochner Exp $ */
+/* $OpenBSD$   */
 
 /*-
- * Copyright (c) 2004
- * Matthias Drochner. All rights reserved.
+ * Copyright (c) 2005 David Schultz 
+ * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -27,75 +26,35 @@
  * SUCH DAMAGE.
  */
 
-#include 
-#include 
-#include 
+#include 
 #include 
-#include 
-#include 
 
-#include "math_private.h"
-
-#ifndef LRINTNAME
-#define LRINTNAME lrint
-#define RESTYPE long int
-#define RESTYPE_MIN LONG_MIN
-#define RESTYPE_MAX LONG_MAX
+#ifndef type
+#define type   double
+#define rounditrint
+#define dtype  long
+#define fn lrint
 #endif
 
-#define RESTYPE_BITS (sizeof(RESTYPE) * 8)
-
-static const double
-TWO52[2]={
-  4.5035996273704960e+15, /* 0x4330, 0x */
- -4.5035996273704960e+15, /* 0xC330, 0x */
-};
-
-RESTYPE
-LRINTNAME(double x)
+/*
+ * C99 says we should not raise a spurious inexact exception when an
+ * invalid exception is raised.  Unfortunately, the set of inputs
+ * that overflows depends on the rounding mode when 'dtype' has more
+ * significant bits than 'type'.  Hence, we bend over backwards for the
+ * sake of correctness; an MD implementation could be more efficient.
+ */
+dtype
+fn(type x)
 {
-   u_int32_t i0, i1;
-   int e, s, shift;
-   RESTYPE res;
-
-   GET_HIGH_WORD(i0, x);
-   e = i0 >> DBL_FRACHBITS;
-   s = e >> DBL_EXPBITS;
-   e = (e & 0x7ff) - DBL_EXP_BIAS;
-
-   /* 1.0 x 2^31 (or 2^63) is already too large */
-   if (e >= (int)RESTYPE_BITS - 1)
-   return (s ? RESTYPE_MIN : RESTYPE_MAX); /* ??? unspecified */
-
-   /* >= 2^52 is already an exact integer */
-   if (e < DBL_FRACBITS) {
-   volatile double t = x;  /* clip extra precision */
-   /* round, using current direction */
-   t += TWO52[s];
-   t -= TWO52[s];
-   x = t;
-   }
-
-   EXTRACT_WORDS(i0, i1, x);
-   e = ((i0 >> DBL_FRACHBITS) & 0x7ff) - DBL_EXP_BIAS;
-   i0 &= 0xf;
-   i0 |= (1 << DBL_FRACHBITS);
-
-   if (e < 0)
-   return (0);
-
-   shift = e - 

Re: More pchgpio(4)

2021-10-10 Thread Mark Kettenis
> Date: Sat, 9 Oct 2021 22:27:52 +0200 (CEST)
> From: Mark Kettenis 
> 
> > Date: Sat, 9 Oct 2021 20:55:10 +0200 (CEST)
> > From: Mark Kettenis 
> > 
> > This time adding support for Sunrisepoint-H and Sunrisepoint-LP.
> > Because of all the failed attempts by Intel to get their 10nm process
> > under control, this may cover Intel Mobile CPUs marketed as 6th, 7th,
> > 8th, 9th and 10th generation.  So if you have a Laptop that isn't at
> > least 5 years old, give this a try if pchgpio(4) doesn't attach.  This
> > may fix all sorts of issues with keyboards, touchpads or
> > suspend/resume.
> > 
> > ok?
> 
> Updated diff that masks unhandled interrupts like we do in amdgpio(4).

And another update to fix a typo in the pin groups for Sunrisepoint-LP.


Index: dev/acpi/pchgpio.c
===
RCS file: /cvs/src/sys/dev/acpi/pchgpio.c,v
retrieving revision 1.8
diff -u -p -r1.8 pchgpio.c
--- dev/acpi/pchgpio.c  29 Sep 2021 22:03:33 -  1.8
+++ dev/acpi/pchgpio.c  10 Oct 2021 21:40:45 -
@@ -107,13 +107,76 @@ struct cfdriver pchgpio_cd = {
 };
 
 const char *pchgpio_hids[] = {
+   "INT344B",
"INT3450",
+   "INT3451",
+   "INT345D",
"INT34BB",
"INT34C5",
"INT34C6",
NULL
 };
 
+/* Sunrisepoint-LP */
+
+const struct pchgpio_group spt_lp_groups[] =
+{
+   /* Community 0 */
+   { 0, 0, 0, 23, 0 }, /* GPP_A */
+   { 0, 1, 24, 47, 24 },   /* GPP_B */
+
+   /* Community 1 */
+   { 1, 0, 48, 71, 48 },   /* GPP_C */
+   { 1, 1, 72, 95, 72 },   /* GPP_D */
+   { 1, 2, 96, 119, 96 },  /* GPP_E */
+   
+   /* Community 3 */
+   { 2, 0, 120, 143, 120 },/* GPP_F */
+   { 2, 1, 144, 151, 144 },/* GPP_G */
+};
+
+const struct pchgpio_device spt_lp_device =
+{
+   .pad_size = 16,
+   .gpi_is = 0x100,
+   .gpi_ie = 0x120,
+   .groups = spt_lp_groups,
+   .ngroups = nitems(spt_lp_groups),
+   .npins = 176,
+};
+
+/* Sunrisepoint-H */
+
+const struct pchgpio_group spt_h_groups[] =
+{
+   /* Community 0 */
+   { 0, 0, 0, 23, 0 }, /* GPP_A */
+   { 0, 1, 24, 47, 24 },   /* GPP_B */
+
+   /* Community 1 */
+   { 1, 0, 48, 71, 48 },   /* GPP_C */
+   { 1, 1, 72, 95, 72 },   /* GPP_D */
+   { 1, 2, 96, 108, 96 },  /* GPP_E */
+   { 1, 3, 109, 132, 120 },/* GPP_F */
+   { 1, 4, 133, 156, 144 },/* GPP_G */
+   { 1, 5, 157, 180, 168 },/* GPP_H */
+
+   /* Community 3 */
+   { 2, 0, 181, 191, 192 },/* GPP_I */
+};
+
+const struct pchgpio_device spt_h_device =
+{
+   .pad_size = 16,
+   .gpi_is = 0x100,
+   .gpi_ie = 0x120,
+   .groups = spt_h_groups,
+   .ngroups = nitems(spt_h_groups),
+   .npins = 224,
+};
+
+/* Cannon Lake-H */
+
 const struct pchgpio_group cnl_h_groups[] =
 {
/* Community 0 */
@@ -146,6 +209,8 @@ const struct pchgpio_device cnl_h_device
.npins = 384,
 };
 
+/* Cannon Lake-LP */
+
 const struct pchgpio_group cnl_lp_groups[] =
 {
/* Community 0 */
@@ -173,6 +238,8 @@ const struct pchgpio_device cnl_lp_devic
.npins = 320,
 };
 
+/* Tiger Lake-LP */
+
 const struct pchgpio_group tgl_lp_groups[] =
 {
/* Community 0 */
@@ -205,6 +272,8 @@ const struct pchgpio_device tgl_lp_devic
.npins = 360,
 };
 
+/* Tiger Lake-H */
+
 const struct pchgpio_group tgl_h_groups[] =
 {
/* Community 0 */
@@ -242,7 +311,10 @@ const struct pchgpio_device tgl_h_device
 };
 
 struct pchgpio_match pchgpio_devices[] = {
+   { "INT344B", _lp_device },
{ "INT3450", _h_device },
+   { "INT3451", _h_device },
+   { "INT345D", _h_device },
{ "INT34BB", _lp_device },
{ "INT34C5", _lp_device },
{ "INT34C6", _h_device },
@@ -473,11 +545,38 @@ pchgpio_intr_establish(void *cookie, int
 }
 
 int
+pchgpio_intr_handle(struct pchgpio_softc *sc, int group, int bit)
+{
+   uint32_t enable;
+   int gpiobase, pin, handled = 0;
+   uint8_t bank, bar;
+
+   bar = sc->sc_device->groups[group].bar;
+   bank = sc->sc_device->groups[group].bank;
+   gpiobase = sc->sc_device->groups[group].gpiobase;
+
+   pin = gpiobase + bit;
+   if (sc->sc_pin_ih[pin].ih_func) {
+   sc->sc_pin_ih[pin].ih_func(sc->sc_pin_ih[pin].ih_arg);
+   handled = 1;
+   } else {
+   /* Mask unhandled interrupt. */
+   enable = bus_space_read_4(sc->sc_memt[bar], sc->sc_memh[bar],
+   sc->sc_device->gpi_ie + bank * 4);
+   enable &= ~(1 << 

Re: More pchgpio(4)

2021-10-09 Thread Mark Kettenis
> Date: Sat, 9 Oct 2021 20:55:10 +0200 (CEST)
> From: Mark Kettenis 
> 
> This time adding support for Sunrisepoint-H and Sunrisepoint-LP.
> Because of all the failed attempts by Intel to get their 10nm process
> under control, this may cover Intel Mobile CPUs marketed as 6th, 7th,
> 8th, 9th and 10th generation.  So if you have a Laptop that isn't at
> least 5 years old, give this a try if pchgpio(4) doesn't attach.  This
> may fix all sorts of issues with keyboards, touchpads or
> suspend/resume.
> 
> ok?

Updated diff that masks unhandled interrupts like we do in amdgpio(4).


Index: dev/acpi/pchgpio.c
===
RCS file: /cvs/src/sys/dev/acpi/pchgpio.c,v
retrieving revision 1.8
diff -u -p -r1.8 pchgpio.c
--- dev/acpi/pchgpio.c  29 Sep 2021 22:03:33 -  1.8
+++ dev/acpi/pchgpio.c  9 Oct 2021 20:27:01 -
@@ -107,13 +107,76 @@ struct cfdriver pchgpio_cd = {
 };
 
 const char *pchgpio_hids[] = {
+   "INT344B",
"INT3450",
+   "INT3451",
+   "INT345D",
"INT34BB",
"INT34C5",
"INT34C6",
NULL
 };
 
+/* Sunrisepoint-LP */
+
+const struct pchgpio_group spt_lp_groups[] =
+{
+   /* Community 0 */
+   { 0, 0, 0, 23, 0 }, /* GPP_A */
+   { 0, 1, 24, 47, 24 },   /* GPP_B */
+
+   /* Community 1 */
+   { 1, 0, 48, 71, 48 },   /* GPP_C */
+   { 1, 1, 72, 95, 72 },   /* GPP_D */
+   { 1, 2, 96, 119, 96 },  /* GPP_E */
+   
+   /* Community 3 */
+   { 2, 3, 120, 143, 120 },/* GPP_F */
+   { 2, 4, 144, 151, 144 },/* GPP_G */
+};
+
+const struct pchgpio_device spt_lp_device =
+{
+   .pad_size = 16,
+   .gpi_is = 0x100,
+   .gpi_ie = 0x120,
+   .groups = spt_lp_groups,
+   .ngroups = nitems(spt_lp_groups),
+   .npins = 176,
+};
+
+/* Sunrisepoint-H */
+
+const struct pchgpio_group spt_h_groups[] =
+{
+   /* Community 0 */
+   { 0, 0, 0, 23, 0 }, /* GPP_A */
+   { 0, 1, 24, 47, 24 },   /* GPP_B */
+
+   /* Community 1 */
+   { 1, 0, 48, 71, 48 },   /* GPP_C */
+   { 1, 1, 72, 95, 72 },   /* GPP_D */
+   { 1, 2, 96, 108, 96 },  /* GPP_E */
+   { 1, 3, 109, 132, 120 },/* GPP_F */
+   { 1, 4, 133, 156, 144 },/* GPP_G */
+   { 1, 5, 157, 180, 168 },/* GPP_H */
+
+   /* Community 3 */
+   { 2, 0, 181, 191, 192 },/* GPP_I */
+};
+
+const struct pchgpio_device spt_h_device =
+{
+   .pad_size = 16,
+   .gpi_is = 0x100,
+   .gpi_ie = 0x120,
+   .groups = spt_h_groups,
+   .ngroups = nitems(spt_h_groups),
+   .npins = 224,
+};
+
+/* Cannon Lake-H */
+
 const struct pchgpio_group cnl_h_groups[] =
 {
/* Community 0 */
@@ -146,6 +209,8 @@ const struct pchgpio_device cnl_h_device
.npins = 384,
 };
 
+/* Cannon Lake-LP */
+
 const struct pchgpio_group cnl_lp_groups[] =
 {
/* Community 0 */
@@ -173,6 +238,8 @@ const struct pchgpio_device cnl_lp_devic
.npins = 320,
 };
 
+/* Tiger Lake-LP */
+
 const struct pchgpio_group tgl_lp_groups[] =
 {
/* Community 0 */
@@ -205,6 +272,8 @@ const struct pchgpio_device tgl_lp_devic
.npins = 360,
 };
 
+/* Tiger Lake-H */
+
 const struct pchgpio_group tgl_h_groups[] =
 {
/* Community 0 */
@@ -242,7 +311,10 @@ const struct pchgpio_device tgl_h_device
 };
 
 struct pchgpio_match pchgpio_devices[] = {
+   { "INT344B", _lp_device },
{ "INT3450", _h_device },
+   { "INT3451", _h_device },
+   { "INT345D", _h_device },
{ "INT34BB", _lp_device },
{ "INT34C5", _lp_device },
{ "INT34C6", _h_device },
@@ -473,11 +545,38 @@ pchgpio_intr_establish(void *cookie, int
 }
 
 int
+pchgpio_intr_handle(struct pchgpio_softc *sc, int group, int bit)
+{
+   uint32_t enable;
+   int gpiobase, pin, handled = 0;
+   uint8_t bank, bar;
+
+   bar = sc->sc_device->groups[group].bar;
+   bank = sc->sc_device->groups[group].bank;
+   gpiobase = sc->sc_device->groups[group].gpiobase;
+
+   pin = gpiobase + bit;
+   if (sc->sc_pin_ih[pin].ih_func) {
+   sc->sc_pin_ih[pin].ih_func(sc->sc_pin_ih[pin].ih_arg);
+   handled = 1;
+   } else {
+   /* Mask unhandled interrupt. */
+   enable = bus_space_read_4(sc->sc_memt[bar], sc->sc_memh[bar],
+   sc->sc_device->gpi_ie + bank * 4);
+   enable &= ~(1 << bit);
+   bus_space_write_4(sc->sc_memt[bar], sc->sc_memh[bar],
+   sc->sc_device->gpi_ie + bank * 4, enable);
+   }
+
+   return handled;
+}
+
+int
 pchgpio_intr(void *arg)
 {

More pchgpio(4)

2021-10-09 Thread Mark Kettenis
This time adding support for Sunrisepoint-H and Sunrisepoint-LP.
Because of all the failed attempts by Intel to get their 10nm process
under control, this may cover Intel Mobile CPUs marketed as 6th, 7th,
8th, 9th and 10th generation.  So if you have a Laptop that isn't at
least 5 years old, give this a try if pchgpio(4) doesn't attach.  This
may fix all sorts of issues with keyboards, touchpads or
suspend/resume.

ok?


Index: dev/acpi/pchgpio.c
===
RCS file: /cvs/src/sys/dev/acpi/pchgpio.c,v
retrieving revision 1.8
diff -u -p -r1.8 pchgpio.c
--- dev/acpi/pchgpio.c  29 Sep 2021 22:03:33 -  1.8
+++ dev/acpi/pchgpio.c  9 Oct 2021 18:54:41 -
@@ -107,13 +107,76 @@ struct cfdriver pchgpio_cd = {
 };
 
 const char *pchgpio_hids[] = {
+   "INT344B",
"INT3450",
+   "INT3451",
+   "INT345D",
"INT34BB",
"INT34C5",
"INT34C6",
NULL
 };
 
+/* Sunrisepoint-LP */
+
+const struct pchgpio_group spt_lp_groups[] =
+{
+   /* Community 0 */
+   { 0, 0, 0, 23, 0 }, /* GPP_A */
+   { 0, 1, 24, 47, 24 },   /* GPP_B */
+
+   /* Community 1 */
+   { 1, 0, 48, 71, 48 },   /* GPP_C */
+   { 1, 1, 72, 95, 72 },   /* GPP_D */
+   { 1, 2, 96, 119, 96 },  /* GPP_E */
+   
+   /* Community 3 */
+   { 2, 3, 120, 143, 120 },/* GPP_F */
+   { 2, 4, 144, 151, 144 },/* GPP_G */
+};
+
+const struct pchgpio_device spt_lp_device =
+{
+   .pad_size = 16,
+   .gpi_is = 0x100,
+   .gpi_ie = 0x120,
+   .groups = spt_lp_groups,
+   .ngroups = nitems(spt_lp_groups),
+   .npins = 176,
+};
+
+/* Sunrisepoint-H */
+
+const struct pchgpio_group spt_h_groups[] =
+{
+   /* Community 0 */
+   { 0, 0, 0, 23, 0 }, /* GPP_A */
+   { 0, 1, 24, 47, 24 },   /* GPP_B */
+
+   /* Community 1 */
+   { 1, 0, 48, 71, 48 },   /* GPP_C */
+   { 1, 1, 72, 95, 72 },   /* GPP_D */
+   { 1, 2, 96, 108, 96 },  /* GPP_E */
+   { 1, 3, 109, 132, 120 },/* GPP_F */
+   { 1, 4, 133, 156, 144 },/* GPP_G */
+   { 1, 5, 157, 180, 168 },/* GPP_H */
+
+   /* Community 3 */
+   { 2, 0, 181, 191, 192 },/* GPP_I */
+};
+
+const struct pchgpio_device spt_h_device =
+{
+   .pad_size = 16,
+   .gpi_is = 0x100,
+   .gpi_ie = 0x120,
+   .groups = spt_h_groups,
+   .ngroups = nitems(spt_h_groups),
+   .npins = 224,
+};
+
+/* Cannon Lake-H */
+
 const struct pchgpio_group cnl_h_groups[] =
 {
/* Community 0 */
@@ -146,6 +209,8 @@ const struct pchgpio_device cnl_h_device
.npins = 384,
 };
 
+/* Cannon Lake-LP */
+
 const struct pchgpio_group cnl_lp_groups[] =
 {
/* Community 0 */
@@ -173,6 +238,8 @@ const struct pchgpio_device cnl_lp_devic
.npins = 320,
 };
 
+/* Tiger Lake-LP */
+
 const struct pchgpio_group tgl_lp_groups[] =
 {
/* Community 0 */
@@ -205,6 +272,8 @@ const struct pchgpio_device tgl_lp_devic
.npins = 360,
 };
 
+/* Tiger Lake-H */
+
 const struct pchgpio_group tgl_h_groups[] =
 {
/* Community 0 */
@@ -242,7 +311,10 @@ const struct pchgpio_device tgl_h_device
 };
 
 struct pchgpio_match pchgpio_devices[] = {
+   { "INT344B", _lp_device },
{ "INT3450", _h_device },
+   { "INT3451", _h_device },
+   { "INT345D", _h_device },
{ "INT34BB", _lp_device },
{ "INT34C5", _lp_device },
{ "INT34C6", _h_device },



Re: Handle openbsd,dma-constraint on armv7

2021-10-05 Thread Mark Kettenis
> Date: Tue, 5 Oct 2021 12:42:21 +
> From: Visa Hankala 
> 
> On Mon, Oct 04, 2021 at 05:04:11PM +0200, Mark Kettenis wrote:
> > > Date: Mon, 4 Oct 2021 13:42:48 +
> > > From: Visa Hankala 
> > > 
> > > On the Zynq-7000, the lowest 512KiB of physical address space usually
> > > contains RAM that is usable by the CPUs. However, many other bus
> > > masters, such as the Ethernet and SDIO controllers, are not able to
> > > access the 256KiB range that starts at physical address 0x4.
> > > 
> > > So far I have used a device tree that says that RAM starts at 0x8,
> > > to avoid the DMA hole. This is unconventional, though. Typically the
> > > memory node for Zynq-7000 specifies 0x0 as the starting address for RAM.
> > > 
> > > I think armv7 DMA constraint should be adjusted on the Zynq-7000 so that
> > > less device tree customization would be needed.
> > > 
> > > This diff makes armv7 efiboot and kernel handle the
> > > openbsd,dma-constraint device tree property, with a tweak for the Zynq.
> > > The code is similar to what is already present on arm64 and riscv64.
> > > 
> > > OK?
> > 
> > Hmm.  How does Linux know it can't do DMA to that memory range?
> > Normally that is done through a dma-ranges property in the device
> > tree, and my idea has always been to add code to parse these
> > properties to determine the valid DMA constraints.  The reason there
> > is special code for the rpi4 is because the initial device trees for
> > the rpi4 didn't have those dma-ranges properties.
> 
> Linux reserves the first 512KiB in Zynq platform init code to prevent
> the region from being used.

So basically they do what we try to do here, but in the kernel.  With
that explanation, this diff (with the suggested change) is ok kettenis@

Cheers,

Mark

> > I'm not necessarily against this diff going in, just curious if there
> > is a better way.
> > 
> > > Index: arch/armv7/armv7/armv7_machdep.c
> > > ===
> > > RCS file: src/sys/arch/armv7/armv7/armv7_machdep.c,v
> > > retrieving revision 1.63
> > > diff -u -p -r1.63 armv7_machdep.c
> > > --- arch/armv7/armv7/armv7_machdep.c  25 Mar 2021 04:12:01 -  
> > > 1.63
> > > +++ arch/armv7/armv7/armv7_machdep.c  4 Oct 2021 13:32:11 -
> > > @@ -453,6 +453,12 @@ initarm(void *arg0, void *arg1, void *ar
> > >   len = fdt_node_property(node, "openbsd,uefi-mmap-desc-ver", 
> > > );
> > >   if (len == sizeof(mmap_desc_ver))
> > >   mmap_desc_ver = bemtoh32((uint32_t *)prop);
> > > +
> > > + len = fdt_node_property(node, "openbsd,dma-constraint", );
> > > + if (len == sizeof(uint64_t[2])) {
> > > + dma_constraint.ucr_low = bemtoh64((uint64_t *)prop);
> > > + dma_constraint.ucr_high = bemtoh64((uint64_t *)prop + 
> > > 1);
> > > + }
> > >   }
> > >  
> > >   process_kernel_args();
> > > Index: arch/armv7/stand/efiboot/conf.c
> > > ===
> > > RCS file: src/sys/arch/armv7/stand/efiboot/conf.c,v
> > > retrieving revision 1.31
> > > diff -u -p -r1.31 conf.c
> > > --- arch/armv7/stand/efiboot/conf.c   10 Jun 2021 22:17:58 -  
> > > 1.31
> > > +++ arch/armv7/stand/efiboot/conf.c   4 Oct 2021 13:32:11 -
> > > @@ -42,7 +42,7 @@
> > >  #include "efidev.h"
> > >  #include "efipxe.h"
> > >  
> > > -const char version[] = "1.18";
> > > +const char version[] = "1.19";
> > >  int  debug = 0;
> > >  
> > >  struct fs_ops file_system[] = {
> > > Index: arch/armv7/stand/efiboot/efiboot.c
> > > ===
> > > RCS file: src/sys/arch/armv7/stand/efiboot/efiboot.c,v
> > > retrieving revision 1.34
> > > diff -u -p -r1.34 efiboot.c
> > > --- arch/armv7/stand/efiboot/efiboot.c7 Jun 2021 21:18:31 -   
> > > 1.34
> > > +++ arch/armv7/stand/efiboot/efiboot.c4 Oct 2021 13:32:11 -
> > > @@ -435,6 +435,28 @@ efi_framebuffer(void)
> > >   sizeof(framebuffer_path));
> > >  }
> > >  
> > > +uint64_t dma_constraint[2] = { 0, -1 };
> > > +
> > > +void
> >

Re: sigwaitinfo(2) and sigtimedwait(2)

2021-10-04 Thread Mark Kettenis
> Date: Mon, 4 Oct 2021 22:03:32 +0200
> From: Rafael Sadowski 
> 
> On Sun Sep 26, 2021 at 02:36:02PM +0200, Mark Kettenis wrote:
> > > Date: Fri, 24 Sep 2021 19:36:21 +0200
> > > From: Rafael Sadowski 
> > > 
> > > I'm trying to port the more KDE stuff so my question is from porter
> > > perspective.
> > > 
> > > I need sigwaitinfo(2)/sigtimedwait(2) and I found both functions in
> > > lib/libc/gen/sigwait.c with the comment "need kernel to fill in more
> > > siginfo_t bits first". Is the comment still up to date? If no, is it
> > > possible to unlock the functions?
> > 
> > Still true.  These functions are somewhat underspecified by POSIX so
> > it isn't really obvious whatadditional bits need to be filled in.
> > Having examples of code that use these interfaces from ports could
> > help with that.
> > 
> 
> One use-case from kscreenlocker-5.22.5/kcheckpass/kcheckpass.c
> 
> Full code:
> https://github.com/KDE/kscreenlocker/blob/master/kcheckpass/kcheckpass.c
> 
> It tries to handle SIGUSR1 and SIGUSR2. I think this can be solved in
> another way, so this is a bad example, isn't it?

So they're using SIGUSR1 and SIGUSR2 as a primitive form of IPC and
using si_pid to check that the signal actually came from the parent
process.  I suppose this is to prevent a local DOS against the
password checker.  If you don't care about that, you could just drop
sigwaitinfo() call and the si_pid checks.

> /* signal_info for sigwaitinfo() */
> siginfo_t signalInfo;
> 
> // now lets block on the fd
> for (;;) {
> conv_server(ConvPutReadyForAuthentication, 0);
> 
> keventData = kevent(keventQueue, NULL, 0, keventEvent, 1, NULL);
> if (keventData == -1) {
> /* Let's figure this out in the future, shall we */
> message("kevent() failed with %d\n", errno);
> return 1;
> } else if (keventData == 0) {
> /* Do we need to handle timeouts? */
> message("kevent timeout\n");
> continue;
> }
> // We know we got a SIGUSR1 or SIGUSR2, so fetch it via 
> sigwaitinfo()
> // (otherwise, we could have used sigtimedwait() )
> int signalReturn = sigwaitinfo(, );
> if (signalReturn < 0) {
> if (errno == EINTR) {
> message("sigawaitinfo() interrupted by unblocked caught 
> signal");
> continue;
> } else if (errno == EAGAIN) {
> /* This should not happen, as kevent notified us about 
> such a signal */
> message("no signal of type USR1 or USR2 pending.");
> continue;
> } else {
> message("Unhandled error in sigwaitinfo()");
> conv_server(ConvPutAuthError, 0);
> return 1;
> }
> }
> if (signalReturn == SIGUSR1) {
> if (signalInfo.si_pid != parentPid) {
> message("signal from wrong process\n");
> continue;
> }
> /* Now do the fandango */
> ret = Authenticate(method, username, conv_server);
> 
> if (ret == AuthBad) {
> message("Authentication failure\n");
> if (!nullpass) {
> openlog("kcheckpass", LOG_PID, LOG_AUTH);
> syslog(LOG_NOTICE, "Authentication failure for %s 
> (invoked by uid %d)", username, uid);
> }
> }
> switch (ret) {
> case AuthOk:
> conv_server(ConvPutAuthSucceeded, 0);
> break;
> case AuthBad:
> conv_server(ConvPutAuthFailed, 0);
> break;
> case AuthError:
> conv_server(ConvPutAuthError, 0);
> break;
> case AuthAbort:
> conv_server(ConvPutAuthAbort, 0);
> default:
> break;
> }
> if (uid != geteuid()) {
> // we don't support multiple auth for setuid kcheckpass
> break;
> }
> } else if (signalReturn == SIGUSR2) {
> if (signalInfo.si_pid != parentPid) {
> message("signal from wrong process\n");
> continue;
> }
> break;
> }
> 



Re: Handle openbsd,dma-constraint on armv7

2021-10-04 Thread Mark Kettenis
> Date: Mon, 4 Oct 2021 13:42:48 +
> From: Visa Hankala 
> 
> On the Zynq-7000, the lowest 512KiB of physical address space usually
> contains RAM that is usable by the CPUs. However, many other bus
> masters, such as the Ethernet and SDIO controllers, are not able to
> access the 256KiB range that starts at physical address 0x4.
> 
> So far I have used a device tree that says that RAM starts at 0x8,
> to avoid the DMA hole. This is unconventional, though. Typically the
> memory node for Zynq-7000 specifies 0x0 as the starting address for RAM.
> 
> I think armv7 DMA constraint should be adjusted on the Zynq-7000 so that
> less device tree customization would be needed.
> 
> This diff makes armv7 efiboot and kernel handle the
> openbsd,dma-constraint device tree property, with a tweak for the Zynq.
> The code is similar to what is already present on arm64 and riscv64.
> 
> OK?

Hmm.  How does Linux know it can't do DMA to that memory range?
Normally that is done through a dma-ranges property in the device
tree, and my idea has always been to add code to parse these
properties to determine the valid DMA constraints.  The reason there
is special code for the rpi4 is because the initial device trees for
the rpi4 didn't have those dma-ranges properties.

I'm not necessarily against this diff going in, just curious if there
is a better way.

> Index: arch/armv7/armv7/armv7_machdep.c
> ===
> RCS file: src/sys/arch/armv7/armv7/armv7_machdep.c,v
> retrieving revision 1.63
> diff -u -p -r1.63 armv7_machdep.c
> --- arch/armv7/armv7/armv7_machdep.c  25 Mar 2021 04:12:01 -  1.63
> +++ arch/armv7/armv7/armv7_machdep.c  4 Oct 2021 13:32:11 -
> @@ -453,6 +453,12 @@ initarm(void *arg0, void *arg1, void *ar
>   len = fdt_node_property(node, "openbsd,uefi-mmap-desc-ver", 
> );
>   if (len == sizeof(mmap_desc_ver))
>   mmap_desc_ver = bemtoh32((uint32_t *)prop);
> +
> + len = fdt_node_property(node, "openbsd,dma-constraint", );
> + if (len == sizeof(uint64_t[2])) {
> + dma_constraint.ucr_low = bemtoh64((uint64_t *)prop);
> + dma_constraint.ucr_high = bemtoh64((uint64_t *)prop + 
> 1);
> + }
>   }
>  
>   process_kernel_args();
> Index: arch/armv7/stand/efiboot/conf.c
> ===
> RCS file: src/sys/arch/armv7/stand/efiboot/conf.c,v
> retrieving revision 1.31
> diff -u -p -r1.31 conf.c
> --- arch/armv7/stand/efiboot/conf.c   10 Jun 2021 22:17:58 -  1.31
> +++ arch/armv7/stand/efiboot/conf.c   4 Oct 2021 13:32:11 -
> @@ -42,7 +42,7 @@
>  #include "efidev.h"
>  #include "efipxe.h"
>  
> -const char version[] = "1.18";
> +const char version[] = "1.19";
>  int  debug = 0;
>  
>  struct fs_ops file_system[] = {
> Index: arch/armv7/stand/efiboot/efiboot.c
> ===
> RCS file: src/sys/arch/armv7/stand/efiboot/efiboot.c,v
> retrieving revision 1.34
> diff -u -p -r1.34 efiboot.c
> --- arch/armv7/stand/efiboot/efiboot.c7 Jun 2021 21:18:31 -   
> 1.34
> +++ arch/armv7/stand/efiboot/efiboot.c4 Oct 2021 13:32:11 -
> @@ -435,6 +435,28 @@ efi_framebuffer(void)
>   sizeof(framebuffer_path));
>  }
>  
> +uint64_t dma_constraint[2] = { 0, -1 };
> +
> +void
> +efi_dma_constraint(void)
> +{
> + void *node;
> +
> + /* Raspberry Pi 4 is "special". */
> + node = fdt_find_node("/");
> + if (fdt_node_is_compatible(node, "brcm,bcm2711"))
> + dma_constraint[1] = htobe64(0x3bff);
> +
> + /* Not all bus masters can access 0x4-0x7 on Zynq-7000. */
> + if (fdt_node_is_compatible(node, "xlnx,zynq-7000"))
> + dma_constraint[0] = htobe64(0x0008);
> +
> + /* Pass DMA constraint. */
> + node = fdt_find_node("/chosen");
> + fdt_node_add_property(node, "openbsd,dma-constraint",
> + dma_constraint, sizeof(dma_constraint));
> +}
> +
>  void
>  efi_console(void)
>  {
> @@ -515,6 +537,7 @@ efi_makebootargs(char *bootargs, int how
>  
>   efi_framebuffer();
>   efi_console();
> + efi_dma_constraint();
>  
>   fdt_finalize();
>  
> 
> 



athn(4) fix

2021-10-03 Thread Mark Kettenis
Apparently some athn(4) variants are buggy and may hand us corrupted
packets.  Linux has some workarounds for this and the diff below is a
(partial) addaptation of those workarounds.  It seems the idea is that
the error bits in the status word should not be set even if the frame
is marked as ok.  So if those bits are set, ignore the frame and bump
the input error count.  This doesn't filter out all corrupted frames,
but it does seem to drop most of them and seems to prevent filling up
the node cache on my access point that uses athn(4):

athn0 at pci1 dev 0 function 0 "Atheros AR9281" rev 0x01: intx
athn0: AR9280 rev 2 (2T2R), ROM rev 16, address xx:xx:xx:xx:xx:xx

ok?


Index: dev/ic/ar5008.c
===
RCS file: /cvs/src/sys/dev/ic/ar5008.c,v
retrieving revision 1.67
diff -u -p -r1.67 ar5008.c
--- dev/ic/ar5008.c 1 Jul 2021 11:51:55 -   1.67
+++ dev/ic/ar5008.c 3 Oct 2021 19:21:11 -
@@ -921,6 +921,12 @@ ar5008_rx_process(struct athn_softc *sc,
ifp->if_ierrors++;
goto skip;
}
+   } else {
+   if (ds->ds_status8 & (AR_RXS8_CRC_ERR | AR_RXS8_PHY_ERR |
+   AR_RXS8_DECRYPT_CRC_ERR | AR_RXS8_MICHAEL_ERR)) {
+   ifp->if_ierrors++;
+   goto skip;
+   }
}
 
len = MS(ds->ds_status1, AR_RXS1_DATA_LEN);



Re: mi_switch() & setting `p_stat'

2021-10-02 Thread Mark Kettenis
> Date: Sat, 2 Oct 2021 20:35:41 +0200
> From: Martin Pieuchot 
> 
> On 02/10/21(Sat) 20:24, Mark Kettenis wrote:
> > > Date: Sat, 2 Oct 2021 19:55:49 +0200
> > > From: Martin Pieuchot 
> > > 
> > > When a thread running on a CPU schedules itself out, it does the following
> > > (pseudo_code):
> > > 
> > >   SCHED_LOCK()
> > >   curproc->p_stat = SSLEEP;
> > >   // some more operations
> > >   mi_switch()
> > > 
> > > The problem with this is that any instrumentation between setting `p_stat'
> > > and cpu_switchto() is incorrect because 'curproc' is still being executed
> > > and is not yet sleeping.  Its `p_stat' should be SONPROC and not SSLEEP.
> > 
> > Hmm, well, we're holding the scheduler lock, so nothing should really
> > look at our state at this point...
> 
> I added many TRACEPOINT() to investigate the scheduler's behaviour.  They
> look at those states.
> 
> > > It is possible to reproduce the problem with the following btrace(8) 
> > > script:
> > > 
> > >   tracepoint:sched:enqueue { printf("%d -> enqueue (%d)\n", arg0, arg1); }
> > >   tracepoint:sched:dequeue { printf("%d <- dequeue (%d)\n", arg0, arg1); }
> > >   tracepoint:sched:on__cpu { printf("%d -- on cpu (%d)\n", tid, pid); }
> > > 
> > > At which point the KASSERT() in wakeup_n() triggers if `curproc' is going 
> > > to
> > > sleep and its sleep channel collides with the running btrace(8) program:
> > > 
> > >   dt_prov_static_hook() at dt_prov_static_hook+0xe4
> > >   remrunqueue() at remrunqueue+0x1a4
> > >   sched_chooseproc() at sched_chooseproc+0x200
> > >   mi_switch() at mi_switch+0x178
> > >   sleep_finish() at sleep_finish+0x1d0
> > >   tsleep() at tsleep+0x100
> > >   biowait() at biowait+0x4c
> > >   ffs_read() at ffs_read+0x1c0
> > >   VOP_READ() at VOP_READ+0x44
> > >   vn_read() at vn_read+0x84
> > >   dofilereadv() at dofilereadv+0x8c
> > >   sys_read() at sys_read+0x5c
> > 
> > which suggests that something fishy is going on here.  Did we
> > accidentally introduce a sleeping point in the scheduler?
> 
> There's no sleeping point but a call to wakeup().  This wakeup() is
> supposed to wake a btrace(8) process.  But if the curproc, which just
> added itself to the global sleep queue, ends up in the same bucket as
> the btrace process, the KASSERT() line 565 of kern/kern_synch.c will
> trigger:
> 
> /*
>  * If the rwlock passed to rwsleep() is contended, the
>  * CPU will end up calling wakeup() between sleep_setup()
>  * and sleep_finish().
>  */
> if (p == curproc) {
> KASSERT(p->p_stat == SONPROC);
> continue;
> }

Ah, right.  But that means the comment isn't accurate.  At least there
are other cases that make us hit that codepath.

How useful is that KASSERT in catching actual bugs?

> > > To fix this we should set `p_stat' as late a possible, diff below does 
> > > that
> > > just before calling cpu_switchto().
> > > 
> > > Note that there's an exception for SRUN because setrunqueue() change 
> > > `p_stat'
> > > to indicate the thread is on a queue.  I'll discuss that in an upcoming 
> > > diff.
> > > 
> > > ok?
> > 
> > I'm not necessarily against this diff, but it may hide bugs.  And...
> 
> Updated diff that uses a char.

That looks better.

> Index: kern/kern_sched.c
> ===
> RCS file: /cvs/src/sys/kern/kern_sched.c,v
> retrieving revision 1.73
> diff -u -p -r1.73 kern_sched.c
> --- kern/kern_sched.c 9 Sep 2021 18:41:39 -   1.73
> +++ kern/kern_sched.c 2 Oct 2021 17:00:52 -
> @@ -144,10 +144,9 @@ sched_idle(void *v)
>*/
>   SCHED_LOCK(s);
>   cpuset_add(_idle_cpus, ci);
> - p->p_stat = SSLEEP;
>   p->p_cpu = ci;
>   atomic_setbits_int(>p_flag, P_CPUPEG);
> - mi_switch();
> + mi_switch(SSLEEP);
>   cpuset_del(_idle_cpus, ci);
>   SCHED_UNLOCK(s);
>  
> @@ -159,8 +158,7 @@ sched_idle(void *v)
>   struct proc *dead;
>  
>   SCHED_LOCK(s);
> - p->p_stat = SSLEEP;
> - mi_switch();
> + mi_switch(SSLEEP);
>   SCHED_UNLOCK(s);
>  
>  

Re: mi_switch() & setting `p_stat'

2021-10-02 Thread Mark Kettenis
> Date: Sat, 2 Oct 2021 19:55:49 +0200
> From: Martin Pieuchot 
> 
> When a thread running on a CPU schedules itself out, it does the following
> (pseudo_code):
> 
>   SCHED_LOCK()
>   curproc->p_stat = SSLEEP;
>   // some more operations
>   mi_switch()
> 
> The problem with this is that any instrumentation between setting `p_stat'
> and cpu_switchto() is incorrect because 'curproc' is still being executed
> and is not yet sleeping.  Its `p_stat' should be SONPROC and not SSLEEP.

Hmm, well, we're holding the scheduler lock, so nothing should really
look at our state at this point...

> It is possible to reproduce the problem with the following btrace(8) script:
> 
>   tracepoint:sched:enqueue { printf("%d -> enqueue (%d)\n", arg0, arg1); }
>   tracepoint:sched:dequeue { printf("%d <- dequeue (%d)\n", arg0, arg1); }
>   tracepoint:sched:on__cpu { printf("%d -- on cpu (%d)\n", tid, pid); }
> 
> At which point the KASSERT() in wakeup_n() triggers if `curproc' is going to
> sleep and its sleep channel collides with the running btrace(8) program:
> 
>   dt_prov_static_hook() at dt_prov_static_hook+0xe4
>   remrunqueue() at remrunqueue+0x1a4
>   sched_chooseproc() at sched_chooseproc+0x200
>   mi_switch() at mi_switch+0x178
>   sleep_finish() at sleep_finish+0x1d0
>   tsleep() at tsleep+0x100
>   biowait() at biowait+0x4c
>   ffs_read() at ffs_read+0x1c0
>   VOP_READ() at VOP_READ+0x44
>   vn_read() at vn_read+0x84
>   dofilereadv() at dofilereadv+0x8c
>   sys_read() at sys_read+0x5c

which suggests that something fishy is going on here.  Did we
accidentally introduce a sleeping point in the scheduler?

> To fix this we should set `p_stat' as late a possible, diff below does that
> just before calling cpu_switchto().
> 
> Note that there's an exception for SRUN because setrunqueue() change `p_stat'
> to indicate the thread is on a queue.  I'll discuss that in an upcoming diff.
> 
> ok?

I'm not necessarily against this diff, but it may hide bugs.  And...

> Index: kern/kern_sched.c
> ===
> RCS file: /cvs/src/sys/kern/kern_sched.c,v
> retrieving revision 1.73
> diff -u -p -r1.73 kern_sched.c
> --- kern/kern_sched.c 9 Sep 2021 18:41:39 -   1.73
> +++ kern/kern_sched.c 2 Oct 2021 17:00:52 -
> @@ -144,10 +144,9 @@ sched_idle(void *v)
>*/
>   SCHED_LOCK(s);
>   cpuset_add(_idle_cpus, ci);
> - p->p_stat = SSLEEP;
>   p->p_cpu = ci;
>   atomic_setbits_int(>p_flag, P_CPUPEG);
> - mi_switch();
> + mi_switch(SSLEEP);
>   cpuset_del(_idle_cpus, ci);
>   SCHED_UNLOCK(s);
>  
> @@ -159,8 +158,7 @@ sched_idle(void *v)
>   struct proc *dead;
>  
>   SCHED_LOCK(s);
> - p->p_stat = SSLEEP;
> - mi_switch();
> + mi_switch(SSLEEP);
>   SCHED_UNLOCK(s);
>  
>   while ((dead = LIST_FIRST(>spc_deadproc))) {
> @@ -625,7 +623,7 @@ sched_peg_curproc(struct cpu_info *ci)
>   atomic_setbits_int(>p_flag, P_CPUPEG);
>   setrunqueue(ci, p, p->p_usrpri);
>   p->p_ru.ru_nvcsw++;
> - mi_switch();
> + mi_switch(SRUN);
>   SCHED_UNLOCK(s);
>  }
>  
> Index: kern/kern_synch.c
> ===
> RCS file: /cvs/src/sys/kern/kern_synch.c,v
> retrieving revision 1.179
> diff -u -p -r1.179 kern_synch.c
> --- kern/kern_synch.c 9 Sep 2021 18:41:39 -   1.179
> +++ kern/kern_synch.c 2 Oct 2021 17:00:52 -
> @@ -421,10 +421,9 @@ sleep_finish(struct sleep_state *sls, in
>   }
>  
>   if (do_sleep) {
> - p->p_stat = SSLEEP;
>   p->p_ru.ru_nvcsw++;
>   SCHED_ASSERT_LOCKED();
> - mi_switch();
> + mi_switch(SSLEEP);
>   } else {
>   unsleep(p);
>   }
> @@ -603,7 +602,7 @@ sys_sched_yield(struct proc *p, void *v,
>   newprio = max(newprio, q->p_runpri);
>   setrunqueue(p->p_cpu, p, newprio);
>   p->p_ru.ru_nvcsw++;
> - mi_switch();
> + mi_switch(SRUN);
>   SCHED_UNLOCK(s);
>  
>   return (0);
> Index: kern/kern_sig.c
> ===
> RCS file: /cvs/src/sys/kern/kern_sig.c,v
> retrieving revision 1.283
> diff -u -p -r1.283 kern_sig.c
> --- kern/kern_sig.c   28 Sep 2021 10:00:18 -  1.283
> +++ kern/kern_sig.c   2 Oct 2021 17:00:52 -
> @@ -1347,7 +1347,6 @@ proc_stop(struct proc *p, int sw)
>   SCHED_ASSERT_LOCKED();
>  #endif
>  
> - p->p_stat = SSTOP;
>   atomic_clearbits_int(>ps_flags, PS_WAITED);
>   atomic_setbits_int(>ps_flags, PS_STOPPED);
>   atomic_setbits_int(>p_flag, P_SUSPSIG);
> @@ -1357,7 +1356,7 @@ proc_stop(struct proc *p, int sw)
>*/
>   softintr_schedule(proc_stop_si);
>   if (sw)
> - mi_switch();
> + mi_switch(SSTOP);
>  }
>  
>  /*
> @@ 

Re: Unref/free amap w/o KERNEL_LOCK()

2021-10-01 Thread Mark Kettenis
> Date: Fri, 1 Oct 2021 20:07:20 +0200
> From: Martin Pieuchot 
> 
> amaps operation are already serialized by their own lock so it is
> possible to free them w/o holding the KERNEL_LOCK().  This has been
> tested by many as part of the UVM unlocking diff.
> 
> ok?

ok kettenis@

but please wait with putting in further diffs until Theo has started
building snaps

> Index: uvm/uvm_map.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> retrieving revision 1.277
> diff -u -p -r1.277 uvm_map.c
> --- uvm/uvm_map.c 17 Jun 2021 16:10:39 -  1.277
> +++ uvm/uvm_map.c 1 Oct 2021 17:02:29 -
> @@ -1570,9 +1570,15 @@ uvm_unmap_detach(struct uvm_map_deadq *d
>   int waitok = flags & UVM_PLA_WAITOK;
>  
>   TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) {
> + /* Drop reference to amap, if we've got one. */
> + if (entry->aref.ar_amap)
> + amap_unref(entry->aref.ar_amap,
> + entry->aref.ar_pageoff,
> + atop(entry->end - entry->start),
> + flags & AMAP_REFALL);
> +
>   /* Skip entries for which we have to grab the kernel lock. */
> - if (entry->aref.ar_amap || UVM_ET_ISSUBMAP(entry) ||
> - UVM_ET_ISOBJ(entry))
> + if (UVM_ET_ISSUBMAP(entry) || UVM_ET_ISOBJ(entry))
>   continue;
>  
>   TAILQ_REMOVE(deadq, entry, dfree.deadq);
> @@ -1586,13 +1592,6 @@ uvm_unmap_detach(struct uvm_map_deadq *d
>   while ((entry = TAILQ_FIRST(deadq)) != NULL) {
>   if (waitok)
>   uvm_pause();
> - /* Drop reference to amap, if we've got one. */
> - if (entry->aref.ar_amap)
> - amap_unref(entry->aref.ar_amap,
> - entry->aref.ar_pageoff,
> - atop(entry->end - entry->start),
> - flags & AMAP_REFALL);
> -
>   /* Drop reference to our backing object, if we've got one. */
>   if (UVM_ET_ISSUBMAP(entry)) {
>   /* ... unlikely to happen, but play it safe */
> 
> 



Re: i386: pmap_collect()

2021-10-01 Thread Mark Kettenis
> Date: Fri, 1 Oct 2021 20:10:35 +0200
> From: Martin Pieuchot 
> 
> Diff below turns i386's pmap_collect() into a noop like it is on
> amd64/arm64/powerpc64...  This is part of the UVM unlocking diff and
> might no longer be necessary now that pmap_extract() has been fixed.

I think it is no longer necessary.

> So I'd like to know if we want to align i386's behavior with other
> archs, which should help us debug MI issues or if I drop this diff.
> 
> Since sthen@ tested it, I'd be in favor of putting it in.  ok?

There are other architectures that do something similar and get even
less testing than i386.  So I think it makes (some) sense to keep it.

> Index: arch/i386/i386/pmap.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/pmap.c,v
> retrieving revision 1.217
> diff -u -p -r1.217 pmap.c
> --- arch/i386/i386/pmap.c 11 Sep 2021 18:08:32 -  1.217
> +++ arch/i386/i386/pmap.c 1 Oct 2021 17:04:20 -
> @@ -2259,13 +2259,6 @@ pmap_unwire_86(struct pmap *pmap, vaddr_
>  void
>  pmap_collect(struct pmap *pmap)
>  {
> - /*
> -  * free all of the pt pages by removing the physical mappings
> -  * for its entire address space.
> -  */
> -
> - pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS,
> - PMAP_REMOVE_SKIPWIRED);
>  }
>  
>  /*
> 
> 



riscv64 TLB flush optimization

2021-09-29 Thread Mark Kettenis
The diff below optimizes TLB flushes a bit, attempting to flush a
whole range instead of individual pages in pmap_remove() and only do a
flush when we actually insert a mapping in pmap_enter().

This survived a make build on my machine, but some further testing
would be appreciated.


Index: arch/riscv64/riscv64/pmap.c
===
RCS file: /cvs/src/sys/arch/riscv64/riscv64/pmap.c,v
retrieving revision 1.20
diff -u -p -r1.20 pmap.c
--- arch/riscv64/riscv64/pmap.c 14 Sep 2021 16:21:21 -  1.20
+++ arch/riscv64/riscv64/pmap.c 29 Sep 2021 22:03:58 -
@@ -65,6 +65,33 @@ do_tlb_flush_page(pmap_t pm, vaddr_t va)
 }
 
 void
+do_tlb_flush_range(pmap_t pm, vaddr_t sva, vaddr_t eva)
+{
+#ifdef MULTIPROCESSOR
+   CPU_INFO_ITERATOR cii;
+   struct cpu_info *ci;
+   unsigned long hart_mask = 0;
+
+   CPU_INFO_FOREACH(cii, ci) {
+   if (ci == curcpu())
+   continue;
+   if (pmap_is_active(pm, ci))
+   hart_mask |= (1UL << ci->ci_hartid);
+   }
+
+   if (hart_mask != 0)
+   sbi_remote_sfence_vma(_mask, sva, eva - sva);
+#endif
+
+   if (pmap_is_active(pm, curcpu())) {
+   while (sva < eva) {
+   sfence_vma_page(sva);
+   sva += PAGE_SIZE;
+   }
+   }
+}
+
+void
 do_tlb_flush(pmap_t pm)
 {
 #ifdef MULTIPROCESSOR
@@ -95,6 +122,15 @@ tlb_flush_page(pmap_t pm, vaddr_t va)
do_tlb_flush_page(pm, va);
 }
 
+void
+tlb_flush_range(pmap_t pm, vaddr_t sva, vaddr_t eva)
+{
+   if (cpu_errata_sifive_cip_1200)
+   do_tlb_flush(pm);
+   else
+   do_tlb_flush_range(pm, sva, eva);
+}
+
 static inline void
 icache_flush(void)
 {
@@ -480,6 +516,8 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
int cache = PMAP_CACHE_WB;
int need_sync;
 
+   KASSERT((va & PAGE_MASK) == 0);
+
if (pa & PMAP_NOCACHE)
cache = PMAP_CACHE_CI;
if (pa & PMAP_DEVICE)
@@ -490,6 +528,7 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
pted = pmap_vp_lookup(pm, va, NULL);
if (pted && PTED_VALID(pted)) {
pmap_remove_pted(pm, pted);
+   tlb_flush_page(pm, va);
/* we lost our pted if it was user */
if (pm != pmap_kernel())
pted = pmap_vp_lookup(pm, va, NULL);
@@ -540,10 +579,9 @@ pmap_enter(pmap_t pm, vaddr_t va, paddr_
 */
if (flags & (PROT_READ|PROT_WRITE|PROT_EXEC|PMAP_WIRED)) {
pmap_pte_insert(pted);
+   tlb_flush_page(pm, va);
}
 
-   tlb_flush_page(pm, va & ~PAGE_MASK);
-
if (pg != NULL && (flags & PROT_EXEC)) {
need_sync = ((pg->pg_flags & PG_PMAP_EXE) == 0);
atomic_setbits_int(>pg_flags, PG_PMAP_EXE);
@@ -582,6 +620,7 @@ pmap_remove(pmap_t pm, vaddr_t sva, vadd
if (PTED_VALID(pted))
pmap_remove_pted(pm, pted);
}
+   tlb_flush_range(pm, sva, eva);
pmap_unlock(pm);
 }
 
@@ -600,8 +639,6 @@ pmap_remove_pted(pmap_t pm, struct pte_d
 
pmap_pte_remove(pted, pm != pmap_kernel());
 
-   tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
-
if (pted->pted_va & PTED_VA_EXEC_M) {
pted->pted_va &= ~PTED_VA_EXEC_M;
}
@@ -699,7 +736,6 @@ pmap_kremove_pg(vaddr_t va)
 * or that the mapping is not present in the hash table.
 */
pmap_pte_remove(pted, 0);
-
tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
 
if (pted->pted_va & PTED_VA_EXEC_M)
@@ -1514,6 +1550,8 @@ pmap_page_protect(struct vm_page *pg, vm
mtx_leave(>mdpage.pv_mtx);
 
pmap_remove_pted(pm, pted);
+   tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
+
pmap_unlock(pm);
pmap_destroy(pm);
 



Re: riscv makedev wd0

2021-09-28 Thread Mark Kettenis
> Date: Tue, 28 Sep 2021 17:46:47 +0200
> From: Alexander Bluhm 
> 
> Hi,
> 
> in regress/etc/MAKEDEV I see this "wd0: unknown device" error in
> riscv64 ramdisk.
> 
>  run-riscv64-ramdisk 
> rm -rf -- riscv64-ramdisk.dir
> mkdir -m 700 riscv64-ramdisk.dir
> cp /usr/src/regress/etc/MAKEDEV/../../../etc/etc.riscv64/MAKEDEV 
> riscv64-ramdisk.dir/
> chown root:wheel riscv64-ramdisk.dir
> time sh -c 'cd riscv64-ramdisk.dir && sh ./MAKEDEV ramdisk'
> wd0: unknown device
> 0m00.27s real 0m00.06s user 0m00.21s system
> ls -ln riscv64-ramdisk.dir/ |  awk '/^[bcps]/ {printf "%s %x.%x 
> %x,%x%s\n",$1,$3,$4,$5,$6,$10}  /^l/  {printf "%s 
> %s.%s%s>%s\n",$1,$3,$4,$9,$11}' |  sort +5 -n |  sed -e 
> 's/rwx/7/g;s/rw-/6/g;s/r-x/5/g;s/r--/4/g'  -e 
> 's/-wx/3/g;s/-w-/2/g;s/--x/1/g;s/---/0/g'  -e 's/^\([bcpsl]\)\([0-9][0-9]*\) 
> /\2\1/'  >riscv64-ramdisk.out
> 
> If we have no wd(4) on riscv64, we should also remove it from ramdisk.
> 
> ok?

Well, PCIe IDE controllers exist.  So can we get away with not
supporting those...

> Index: etc/etc.riscv64/MAKEDEV
> ===
> RCS file: /data/mirror/openbsd/cvs/src/etc/etc.riscv64/MAKEDEV,v
> retrieving revision 1.2
> diff -u -p -r1.2 MAKEDEV
> --- etc/etc.riscv64/MAKEDEV   26 Jun 2021 00:48:28 -  1.2
> +++ etc/etc.riscv64/MAKEDEV   28 Sep 2021 15:39:22 -
> @@ -213,7 +213,7 @@ U=`unt $i`
>  
>  case $i in
>  ramdisk)
> - R std bpf wd0 sd0 tty00 tty01 rd0 bio diskmap
> + R std bpf sd0 tty00 tty01 rd0 bio diskmap
>   R cd0 ttyC0 wskbd0 wskbd1 wskbd2 random
>   ;;
>  
> Index: etc/etc.riscv64/MAKEDEV.md
> ===
> RCS file: /data/mirror/openbsd/cvs/src/etc/etc.riscv64/MAKEDEV.md,v
> retrieving revision 1.2
> diff -u -p -r1.2 MAKEDEV.md
> --- etc/etc.riscv64/MAKEDEV.md26 Jun 2021 00:38:38 -  1.2
> +++ etc/etc.riscv64/MAKEDEV.md28 Sep 2021 15:39:16 -
> @@ -86,7 +86,7 @@ dnl
>  divert(__mddivert)dnl
>  dnl
>  ramdisk)
> - _recurse std bpf wd0 sd0 tty00 tty01 rd0 bio diskmap
> + _recurse std bpf sd0 tty00 tty01 rd0 bio diskmap
>   _recurse cd0 ttyC0 wskbd0 wskbd1 wskbd2 random
>   ;;
>  
> 
> 



Another diff for modern Intel laptops

2021-09-27 Thread Mark Kettenis
Since we've seen that having a driver for the GPIO controller on
modern Intel laptops is important, here is a diff that makes
pchgpio(4) attach in more cases.  In particular, this adds support for
the "performace" CPUs often found in larger laptop models.

If you currently have a

"INT3450" at acpi0 not configured

or

"INT34C6" at acpi0 not configured

line in your dmesg, please try this diff and report back to me.

Thanks,

Mark


Index: dev/acpi/pchgpio.c
===
RCS file: /cvs/src/sys/dev/acpi/pchgpio.c,v
retrieving revision 1.7
diff -u -p -r1.7 pchgpio.c
--- dev/acpi/pchgpio.c  21 Sep 2021 14:59:13 -  1.7
+++ dev/acpi/pchgpio.c  27 Sep 2021 13:33:50 -
@@ -26,7 +26,7 @@
 #include 
 #include 
 
-#define PCHGPIO_MAXCOM 4
+#define PCHGPIO_MAXCOM 5
 
 #define PCHGPIO_CONF_TXSTATE   0x0001
 #define PCHGPIO_CONF_RXSTATE   0x0002
@@ -107,11 +107,45 @@ struct cfdriver pchgpio_cd = {
 };
 
 const char *pchgpio_hids[] = {
+   "INT3450",
"INT34BB",
"INT34C5",
+   "INT34C6",
NULL
 };
 
+const struct pchgpio_group cnl_h_groups[] =
+{
+   /* Community 0 */
+   { 0, 0, 0, 24, 0 }, /* GPP_A */
+   { 0, 1, 25, 50, 32 },   /* GPP_B */
+
+   /* Community 1 */
+   { 1, 0, 51, 74, 64 },   /* GPP_C */
+   { 1, 1, 75, 98, 96 },   /* GPP_D */
+   { 1, 2, 99, 106, 128 }, /* GPP_G */
+
+   /* Community 3 */
+   { 2, 0, 155, 178, 192 },/* GPP_K */
+   { 2, 1, 179, 202, 224 },/* GPP_H */
+   { 2, 2, 203, 215, 256 },/* GPP_E */
+   { 2, 3, 216, 239, 288 },/* GPP_F */
+
+   /* Community 4 */
+   { 3, 2, 269, 286, 320 },/* GPP_I */
+   { 3, 3, 287, 298, 352 },/* GPP_J */
+};
+
+const struct pchgpio_device cnl_h_device =
+{
+   .pad_size = 16,
+   .gpi_is = 0x100,
+   .gpi_ie = 0x120,
+   .groups = cnl_h_groups,
+   .ngroups = nitems(cnl_h_groups),
+   .npins = 384,
+};
+
 const struct pchgpio_group cnl_lp_groups[] =
 {
/* Community 0 */
@@ -171,9 +205,47 @@ const struct pchgpio_device tgl_lp_devic
.npins = 360,
 };
 
+const struct pchgpio_group tgl_h_groups[] =
+{
+   /* Community 0 */
+   { 0, 0, 0, 24, 0 }, /* GPP_A */
+   { 0, 1, 25, 44, 32 },   /* GPP_R */
+   { 0, 2, 45, 70, 64 },   /* GPP_B */
+
+   /* Community 1 */
+   { 1, 0, 79, 104, 128 }, /* GPP_D */
+   { 1, 1, 105, 128, 160 },/* GPP_C */
+   { 1, 2, 129, 136, 192 },/* GPP_S */
+   { 1, 3, 137, 153, 224 },/* GPP_G */
+
+   /* Community 3 */
+   { 2, 0, 181, 193, 288 },/* GPP_E */
+   { 2, 1, 194, 217, 320 },/* GPP_F */
+
+   /* Community 4 */
+   { 2, 0, 218, 241, 352 },/* GPP_H */
+   { 2, 1, 242, 251, 384 },/* GPP_J */
+   { 2, 2, 252, 266, 416 },/* GPP_K */
+
+   /* Community 5 */
+   { 3, 0, 267, 281, 448 },/* GPP_I */
+};
+
+const struct pchgpio_device tgl_h_device =
+{
+   .pad_size = 16,
+   .gpi_is = 0x100,
+   .gpi_ie = 0x120,
+   .groups = tgl_lp_groups,
+   .ngroups = nitems(tgl_h_groups),
+   .npins = 480,
+};
+
 struct pchgpio_match pchgpio_devices[] = {
+   { "INT3450", _h_device },
{ "INT34BB", _lp_device },
{ "INT34C5", _lp_device },
+   { "INT34C6", _h_device },
 };
 
 intpchgpio_read_pin(void *, int);



Re: sigwaitinfo(2) and sigtimedwait(2)

2021-09-26 Thread Mark Kettenis
> Date: Fri, 24 Sep 2021 19:36:21 +0200
> From: Rafael Sadowski 
> 
> I'm trying to port the more KDE stuff so my question is from porter
> perspective.
> 
> I need sigwaitinfo(2)/sigtimedwait(2) and I found both functions in
> lib/libc/gen/sigwait.c with the comment "need kernel to fill in more
> siginfo_t bits first". Is the comment still up to date? If no, is it
> possible to unlock the functions?

Still true.  These functions are somewhat underspecified by POSIX so
it isn't really obvious whatadditional bits need to be filled in.
Having examples of code that use these interfaces from ports could
help with that.



Re: new gpiocharger driver

2021-09-25 Thread Mark Kettenis
> Date: Sat, 25 Sep 2021 15:45:15 +
> From: Klemens Nanni 
> 
> On Sat, Sep 25, 2021 at 10:53:12AM +, Klemens Nanni wrote:
> > On Fri, Sep 24, 2021 at 10:35:56PM +, Klemens Nanni wrote:
> > > On Thu, Sep 16, 2021 at 11:07:20AM +0200, Mark Kettenis wrote:
> > > > > Date: Thu, 16 Sep 2021 06:14:39 +
> > > > > From: Klemens Nanni 
> > > > > 
> > > > > On 5 September 2021 01:22:53 GMT+05:00, Klemens Nanni 
> > > > >  wrote:
> > > > > >Read a single GPIO pin indicating whether AC is plugged in or not.
> > > > > >
> > > > > >This gives me a sensor on my Pinebook Pro.
> > > > > >cwfg(4) already provides battery information but not the charger 
> > > > > >bits.
> > > > > >
> > > > > >apm(4) integration can follow separately.
> > > > > >
> > > > > >Feedback? OK?
> > > > > 
> > > > > Ping.
> > > > > The diff applies after the "new gpioleds driver" one.
> > > 
> > > New diff.  OK for after unlock modulo the gpioleds bits after being
> > > committed separately (CVS diffs juggling is annoying)?
> > > 
> > > > > >+gpios_len = OF_getproplen(node,
> > > > > >+gpios_property = "charger-status-gpios");
> > > > 
> > > > No, please don't hide assignment statements as function arguments.
> > > 
> > > Fixed, thanks.
> > 
> > Clean diff after gpioleds(4) got in, this time with the same whitespace
> > fixes.
> > 
> > Sorry for the noise.
> 
> Sigh, an eagle-eyed reader noticed my copy-paste error:
> 
> > +   gpios_property = "charger-status-gpios");
> 
> Here's the correct diff that also compiles.

ok kettenis@

> Index: share/man/man4/gpiocharger.4
> ===
> RCS file: share/man/man4/gpiocharger.4
> diff -N share/man/man4/gpiocharger.4
> --- /dev/null 1 Jan 1970 00:00:00 -
> +++ share/man/man4/gpiocharger.4  24 Sep 2021 22:33:26 -
> @@ -0,0 +1,51 @@
> +.\"  $OpenBSD: $
> +.\"
> +.\" Copyright (c) 2021 Klemens Nanni 
> +.\"
> +.\" Permission to use, copy, modify, and distribute this software for any
> +.\" purpose with or without fee is hereby granted, provided that the above
> +.\" copyright notice and this permission notice appear in all copies.
> +.\"
> +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> +.\"
> +.Dd $Mdocdate: September 04 2021 $
> +.Dt GPIOCHARGER 4
> +.Os
> +.Sh NAME
> +.Nm gpiocharger
> +.Nd GPIO battery charger
> +.Sh SYNOPSIS
> +.Cd "gpiocharger* at fdt?"
> +.Sh DESCRIPTION
> +The
> +.Nm
> +driver provides support for battery chargers connected to GPIO pins.
> +Currently, only power supply status events are supported.
> +.Pp
> +The power supply status (connected or disconnected) is set up as a sensor
> +and can be monitored using
> +.Xr sysctl 8
> +or
> +.Xr sensorsd 8 .
> +.Sh SEE ALSO
> +.Xr gpio 4 ,
> +.Xr intro 4 ,
> +.Xr sensorsd 8 ,
> +.Xr sysctl 8
> +.Sh HISTORY
> +The
> +.Nm
> +driver first appeared in
> +.Ox 7.1 .
> +.Sh AUTHORS
> +.An -nosplit
> +The
> +.Nm
> +driver was written by
> +.An Klemens Nanni Aq Mt k...@openbsd.org .
> Index: share/man/man4/Makefile
> ===
> RCS file: /cvs/src/share/man/man4/Makefile,v
> retrieving revision 1.807
> diff -u -p -r1.807 Makefile
> --- share/man/man4/Makefile   25 Sep 2021 10:43:23 -  1.807
> +++ share/man/man4/Makefile   25 Sep 2021 10:45:42 -
> @@ -35,7 +35,8 @@ MAN=aac.4 abcrtc.4 abl.4 ac97.4 acphy.4
>   eso.4 ess.4 et.4 etherip.4 etphy.4 ex.4 exphy.4 exrtc.4 \
>   fanpwr.4 fd.4 fdc.4 fec.4 fido.4 fins.4 fintek.4 fms.4 fusbtc.4 \
>   fuse.4 fxp.4 \
> - gdt.4 gentbi.4 gem.4 gfrtc.4 gif.4 glenv.4 glkgpio.4 gpio.4 gpiodcf.4 \
> + gdt.4 gentbi.4 gem.4 gfrtc

Re: new gpioleds driver

2021-09-16 Thread Mark Kettenis
> Date: Thu, 16 Sep 2021 06:12:39 +
> From: Klemens Nanni 
> 
> On 3 September 2021 20:16:33 GMT+05:00, Klemens Nanni  
> wrote:
> >Here is a tiny driver enabling machines such as the Pinebook Pro to
> >indicate power, it is intentionally minimal and does not expose anything
> >via sysctl(8)/sensorsd(8) or gpioctl(8).
> >
> >This is helpful for machines where graphics, keyboard and/or serial
> >console have problems and people tend to debug things at various
> >stages, e.g. a green LED now tells me that we reached the kernel.
> >
> >Once arm64 has suspend/resume we can indicate that as well.
> >
> >Feedback? OK?
> 
> Ping.

Two small nits below.  With those fixed, ok kettenis@

> >diff 3a5fa1afe4fc417b263a9d7363eaa933acbf5f2c refs/heads/master
> >blob - b597911b8f43a730799bbe34290843f3429c6958
> >blob + eec643f20e0feae7d4a4930f7d30575cffc25913
> >--- distrib/sets/lists/man/mi
> >+++ distrib/sets/lists/man/mi
> >@@ -1415,6 +1415,7 @@
> > ./usr/share/man/man4/gpio.4
> > ./usr/share/man/man4/gpiodcf.4
> > ./usr/share/man/man4/gpioiic.4
> >+./usr/share/man/man4/gpioleds.4
> > ./usr/share/man/man4/gpioow.4
> > ./usr/share/man/man4/graphaudio.4
> > ./usr/share/man/man4/gre.4
> >blob - 1541bb05749defd67419b07460def0f4065cbfce
> >blob + bb62c44d32f152ecf64aa77d60a1a5a3454d3968
> >--- share/man/man4/Makefile
> >+++ share/man/man4/Makefile
> >@@ -36,7 +36,7 @@ MAN=   aac.4 abcrtc.4 abl.4 ac97.4 acphy.4 acrtc.4 \
> > fanpwr.4 fd.4 fdc.4 fec.4 fido.4 fins.4 fintek.4 fms.4 fusbtc.4 \
> > fuse.4 fxp.4 \
> > gdt.4 gentbi.4 gem.4 gfrtc.4 gif.4 glenv.4 glkgpio.4 gpio.4 gpiodcf.4 \
> >-gpioiic.4 gpioow.4 graphaudio.4 gre.4 gscsio.4 \
> >+gpioiic.4 gpioleds.4 gpioow.4 graphaudio.4 gre.4 gscsio.4 \
> > hds.4 hiclock.4 hidwusb.4 hifn.4 hil.4 hilid.4 hilkbd.4 hilms.4 \
> > hireset.4 hitemp.4 hme.4 hotplug.4 hsq.4 \
> > hvn.4 hvs.4 hyperv.4 \
> >blob - /dev/null
> >blob + 5338437382ce25842ac833cfb847d8fe08e90e6d (mode 644)
> >--- /dev/null
> >+++ share/man/man4/gpioleds.4
> >@@ -0,0 +1,45 @@
> >+.\" $OpenBSD: $
> >+.\"
> >+.\" Copyright (c) 2021 Klemens Nanni 
> >+.\"
> >+.\" Permission to use, copy, modify, and distribute this software for any
> >+.\" purpose with or without fee is hereby granted, provided that the above
> >+.\" copyright notice and this permission notice appear in all copies.
> >+.\"
> >+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> >+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> >+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> >+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> >+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> >+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> >+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> >+.\"
> >+.Dd $Mdocdate: September 03 2021 $
> >+.Dt GPIOLEDS 4
> >+.Os
> >+.Sh NAME
> >+.Nm gpioleds
> >+.Nd GPIO LEDs
> >+.Sh SYNOPSIS
> >+.Cd "gpioleds* at fdt?"
> >+.Sh DESCRIPTION
> >+The
> >+.Nm
> >+driver provides support for LEDs connected to GPIO pins.
> >+.Pp
> >+Currently, LEDs are only set to their default state,
> >+e.g. to indicate the power status of the system.
> >+.Sh SEE ALSO
> >+.Xr gpio 4 ,
> >+.Xr intro 4
> >+.Sh HISTORY
> >+The
> >+.Nm
> >+driver first appeared in
> >+.Ox 7.0 .
> >+.Sh AUTHORS
> >+.An -nosplit
> >+The
> >+.Nm
> >+driver was written by
> >+.An Klemens Nanni Aq Mt k...@openbsd.org .
> >blob - 3e6591124cad872771cd68599761c26981d185d8
> >blob + d3c3afb621f20013dc2475b4d87bd959e4127c9d
> >--- sys/arch/arm64/conf/GENERIC
> >+++ sys/arch/arm64/conf/GENERIC
> >@@ -131,6 +131,8 @@ amdgpu*  at pci?
> > drm*at amdgpu?
> > wsdisplay*  at amdgpu?
> > 
> >+gpioleds*   at fdt?
> >+
> > # Apple
> > apldart*at fdt?
> > apldog* at fdt? early 1
> >blob - 3b23523493a12c8b8091f9744561c6bcbb685751
> >blob + f749803b07408179ddc090d484ed4f79bfcb52a7
> >--- sys/dev/fdt/files.fdt
> >+++ sys/dev/fdt/files.fdt
> >@@ -588,3 +588,7 @@ file dev/fdt/cwfg.c  cwfg
> > device  dapmic
> > attach  dapmic at i2c
> > filedev/fdt/dapmic.cdapmic
> >+
> >+device  gpioleds
> >+attach  gpioleds at fdt
> >+filedev/fdt/gpioleds.c  gpioleds
> >blob - /dev/null
> >blob + f5ad5e7b6220f3d20b129fe26f8ada2d63eb5909 (mode 644)
> >--- /dev/null
> >+++ sys/dev/fdt/gpioleds.c
> >@@ -0,0 +1,102 @@
> >+/*  $OpenBSD: $ */
> >+/*
> >+ * Copyright (c) 2021 Klemens Nanni 
> >+ *
> >+ * Permission to use, copy, modify, and distribute this software for any
> >+ * purpose with or without fee is hereby granted, provided that the above
> >+ * copyright notice and this permission notice appear in all copies.
> >+ *
> >+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> >+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF

Re: new gpiocharger driver

2021-09-16 Thread Mark Kettenis
> Date: Thu, 16 Sep 2021 06:14:39 +
> From: Klemens Nanni 
> 
> On 5 September 2021 01:22:53 GMT+05:00, Klemens Nanni  
> wrote:
> >Read a single GPIO pin indicating whether AC is plugged in or not.
> >
> >This gives me a sensor on my Pinebook Pro.
> >cwfg(4) already provides battery information but not the charger bits.
> >
> >apm(4) integration can follow separately.
> >
> >Feedback? OK?
> 
> Ping.
> The diff applies after the "new gpioleds driver" one.
> 
> >diff 4e7699b4cf65fba4bf837b202fb68ee0f66e6d07 refs/heads/master
> >blob - b92b1f60934ba60f92341ce94570d0ae96e78c1d
> >blob + 549a8316b874e4e5a6e1ede1640eeefadd9af0d4
> >--- distrib/sets/lists/man/mi
> >+++ distrib/sets/lists/man/mi
> >@@ -1417,6 +1417,7 @@
> > ./usr/share/man/man4/glenv.4
> > ./usr/share/man/man4/glkgpio.4
> > ./usr/share/man/man4/gpio.4
> >+./usr/share/man/man4/gpiocharger.4
> > ./usr/share/man/man4/gpiodcf.4
> > ./usr/share/man/man4/gpioiic.4
> > ./usr/share/man/man4/gpioleds.4
> >blob - bb62c44d32f152ecf64aa77d60a1a5a3454d3968
> >blob + a40cf91ec22f638f9c030aa7128d10bf75748305
> >--- share/man/man4/Makefile
> >+++ share/man/man4/Makefile
> >@@ -35,7 +35,8 @@ MAN=   aac.4 abcrtc.4 abl.4 ac97.4 acphy.4 acrtc.4 \
> > eso.4 ess.4 et.4 etherip.4 etphy.4 ex.4 exphy.4 exrtc.4 \
> > fanpwr.4 fd.4 fdc.4 fec.4 fido.4 fins.4 fintek.4 fms.4 fusbtc.4 \
> > fuse.4 fxp.4 \
> >-gdt.4 gentbi.4 gem.4 gfrtc.4 gif.4 glenv.4 glkgpio.4 gpio.4 gpiodcf.4 \
> >+gdt.4 gentbi.4 gem.4 gfrtc.4 gif.4 glenv.4 glkgpio.4 gpio.4 \
> >+gpiocharger.4 gpiodcf.4 \
> > gpioiic.4 gpioleds.4 gpioow.4 graphaudio.4 gre.4 gscsio.4 \
> > hds.4 hiclock.4 hidwusb.4 hifn.4 hil.4 hilid.4 hilkbd.4 hilms.4 \
> > hireset.4 hitemp.4 hme.4 hotplug.4 hsq.4 \
> >blob - /dev/null
> >blob + 12aa353d9367f57f07a89ae30dc3966c373032f8 (mode 644)
> >--- /dev/null
> >+++ share/man/man4/gpiocharger.4
> >@@ -0,0 +1,51 @@
> >+.\" $OpenBSD: $
> >+.\"
> >+.\" Copyright (c) 2021 Klemens Nanni 
> >+.\"
> >+.\" Permission to use, copy, modify, and distribute this software for any
> >+.\" purpose with or without fee is hereby granted, provided that the above
> >+.\" copyright notice and this permission notice appear in all copies.
> >+.\"
> >+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> >+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> >+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> >+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> >+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> >+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> >+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> >+.\"
> >+.Dd $Mdocdate: September 04 2021 $
> >+.Dt GPIOCHARGER 4
> >+.Os
> >+.Sh NAME
> >+.Nm gpiocharger
> >+.Nd GPIO battery charger
> >+.Sh SYNOPSIS
> >+.Cd "gpiocharger* at fdt?"
> >+.Sh DESCRIPTION
> >+The
> >+.Nm
> >+driver provides support for battery chargers connected to GPIO pins.
> >+Currently, only power supply status events are supported.
> >+.Pp
> >+The power supply status (connected or disconnected) is set up as a sensor
> >+and can be monitored using
> >+.Xr sysctl 8
> >+or
> >+.Xr sensorsd 8 .
> >+.Sh SEE ALSO
> >+.Xr gpio 4 ,
> >+.Xr intro 4 ,
> >+.Xr sensorsd 8 ,
> >+.Xr sysctl 8
> >+.Sh HISTORY
> >+The
> >+.Nm
> >+driver first appeared in
> >+.Ox 7.0 .
> >+.Sh AUTHORS
> >+.An -nosplit
> >+The
> >+.Nm
> >+driver was written by
> >+.An Klemens Nanni Aq Mt k...@openbsd.org .
> >blob - d3c3afb621f20013dc2475b4d87bd959e4127c9d
> >blob + b0702181cd2e510ef5267460dc71b68a8abd508a
> >--- sys/arch/arm64/conf/GENERIC
> >+++ sys/arch/arm64/conf/GENERIC
> >@@ -132,6 +132,7 @@ drm* at amdgpu?
> > wsdisplay*  at amdgpu?
> > 
> > gpioleds*   at fdt?
> >+gpiocharger*at fdt?
> > 
> > # Apple
> > apldart*at fdt?
> >blob - f749803b07408179ddc090d484ed4f79bfcb52a7
> >blob + f91263405dc183dd60e8ddc1fe69c5fbaf38b651
> >--- sys/dev/fdt/files.fdt
> >+++ sys/dev/fdt/files.fdt
> >@@ -592,3 +592,7 @@ file dev/fdt/dapmic.cdapmic
> > device  gpioleds
> > attach  gpioleds at fdt
> > filedev/fdt/gpioleds.c  gpioleds
> >+
> >+device  gpiocharger
> >+attach  gpiocharger at fdt
> >+filedev/fdt/gpiocharger.c   gpiocharger
> >blob - /dev/null
> >blob + dbf6bf50a13029438b9b7e969306c34640c5747e (mode 644)
> >--- /dev/null
> >+++ sys/dev/fdt/gpiocharger.c
> >@@ -0,0 +1,116 @@
> >+/*  $OpenBSD: $ */
> >+/*
> >+ * Copyright (c) 2021 Klemens Nanni 
> >+ *
> >+ * Permission to use, copy, modify, and distribute this software for any
> >+ * purpose with or without fee is hereby granted, provided that the above
> >+ * copyright notice and this permission notice appear in all copies.
> >+ *
> >+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> >+ * WITH REGARD TO THIS SOFTWARE INCLUDING 

Re: Suspend/Resume on modern Intel laptop platforms

2021-09-15 Thread Mark Kettenis
> Date: Wed, 15 Sep 2021 17:29:39 +0200 (CEST)
> From: Mark Kettenis 
> 
> The diff below is a preliminary diff to fix a suspend/resume issue on
> recent Thinkpads.  This needs testing on a wider range of laptops to
> make sure it doesn't break things.  The diff also puts some
> information in dmesg that will help me improve things in the future.
> 
> So, if you have a laptop where pchgpio(4) attaches *and* supports S3
> suspen/resume, please apply this diff, do a suspend/resume cycle and
> send me a dmesg collected after that suspend/resume cycle.

This diff is now in snapshots, so instead of applying the diff and
build your own kernel, you can just upgrade to the latest snapshot.

Thanks,

Mark

> Index: dev/acpi/pchgpio.c
> ===
> RCS file: /cvs/src/sys/dev/acpi/pchgpio.c,v
> retrieving revision 1.5
> diff -u -p -r1.5 pchgpio.c
> --- dev/acpi/pchgpio.c30 Aug 2021 18:40:19 -  1.5
> +++ dev/acpi/pchgpio.c15 Sep 2021 15:16:52 -
> @@ -28,12 +28,13 @@
>  
>  #define PCHGPIO_MAXCOM   4
>  
> -#define PCHGPIO_CONF_TXSTATE 0x0001
> -#define PCHGPIO_CONF_RXSTATE 0x0002
> -#define PCHGPIO_CONF_RXINV   0x0080
> -#define PCHGPIO_CONF_RXEV_EDGE   0x0200
> -#define PCHGPIO_CONF_RXEV_ZERO   0x0400
> -#define PCHGPIO_CONF_RXEV_MASK   0x0600
> +#define PCHGPIO_CONF_TXSTATE 0x0001
> +#define PCHGPIO_CONF_RXSTATE 0x0002
> +#define PCHGPIO_CONF_RXINV   0x0080
> +#define PCHGPIO_CONF_RXEV_EDGE   0x0200
> +#define PCHGPIO_CONF_RXEV_ZERO   0x0400
> +#define PCHGPIO_CONF_RXEV_MASK   0x0600
> +#define PCHGPIO_CONF_PADRSTCFG_MASK  0xc000
>  
>  #define PCHGPIO_PADBAR   0x00c
>  
> @@ -59,6 +60,11 @@ struct pchgpio_match {
>   const struct pchgpio_device *device;
>  };
>  
> +struct pchgpio_pincfg {
> + uint32_tpad_cfg_dw0;
> + uint32_tpad_cfg_dw1;
> +};
> +
>  struct pchgpio_intrhand {
>   int (*ih_func)(void *);
>   void *ih_arg;
> @@ -80,6 +86,7 @@ struct pchgpio_softc {
>   int sc_padsize;
>  
>   int sc_npins;
> + struct pchgpio_pincfg *sc_pin_cfg;
>   struct pchgpio_intrhand *sc_pin_ih;
>  
>   struct acpi_gpio sc_gpio;
> @@ -87,9 +94,11 @@ struct pchgpio_softc {
>  
>  int  pchgpio_match(struct device *, void *, void *);
>  void pchgpio_attach(struct device *, struct device *, void *);
> +int  pchgpio_activate(struct device *, int);
>  
>  struct cfattach pchgpio_ca = {
> - sizeof(struct pchgpio_softc), pchgpio_match, pchgpio_attach
> + sizeof(struct pchgpio_softc), pchgpio_match, pchgpio_attach,
> + NULL, pchgpio_activate
>  };
>  
>  struct cfdriver pchgpio_cd = {
> @@ -170,6 +179,8 @@ int   pchgpio_read_pin(void *, int);
>  void pchgpio_write_pin(void *, int, int);
>  void pchgpio_intr_establish(void *, int, int, int (*)(void *), void *);
>  int  pchgpio_intr(void *);
> +void pchgpio_save(struct pchgpio_softc *);
> +void pchgpio_restore(struct pchgpio_softc *);
>  
>  int
>  pchgpio_match(struct device *parent, void *match, void *aux)
> @@ -240,6 +251,8 @@ pchgpio_attach(struct device *parent, st
>  
>   sc->sc_padsize = sc->sc_device->pad_size;
>   sc->sc_npins = sc->sc_device->npins;
> + sc->sc_pin_cfg = mallocarray(sc->sc_npins, sizeof(*sc->sc_pin_cfg),
> + M_DEVBUF, M_WAITOK);
>   sc->sc_pin_ih = mallocarray(sc->sc_npins, sizeof(*sc->sc_pin_ih),
>   M_DEVBUF, M_WAITOK | M_ZERO);
>  
> @@ -263,11 +276,48 @@ pchgpio_attach(struct device *parent, st
>  
>  unmap:
>   free(sc->sc_pin_ih, M_DEVBUF, sc->sc_npins * sizeof(*sc->sc_pin_ih));
> + free(sc->sc_pin_cfg, M_DEVBUF, sc->sc_npins * sizeof(*sc->sc_pin_cfg));
>   for (i = 0; i < sc->sc_naddr; i++)
>   bus_space_unmap(sc->sc_memt[i], sc->sc_memh[i],
>   aaa->aaa_size[i]);
>  }
>  
> +int
> +pchgpio_activate(struct device *self, int act)
> +{
> + struct pchgpio_softc *sc = (struct pchgpio_softc *)self;
> +//   int i, j;
> +
> + switch (act) {
> + case DVACT_SUSPEND:
> + printf("%s: suspend\n", sc->sc_dev.dv_xname);
> +#if 0
> + for (i = 0; i < 4; i++) {
> + for (j = 0; j < 0xc00; j += 4) {
> + printf("%04x: 0x%08x\n", j,
> + bus_space_read_4(sc->sc_memt[i], 
> sc->sc_memh[i], j));
> +

Suspend/Resume on modern Intel laptop platforms

2021-09-15 Thread Mark Kettenis
The diff below is a preliminary diff to fix a suspend/resume issue on
recent Thinkpads.  This needs testing on a wider range of laptops to
make sure it doesn't break things.  The diff also puts some
information in dmesg that will help me improve things in the future.

So, if you have a laptop where pchgpio(4) attaches *and* supports S3
suspen/resume, please apply this diff, do a suspend/resume cycle and
send me a dmesg collected after that suspend/resume cycle.

Thanks,

Mark


Index: dev/acpi/pchgpio.c
===
RCS file: /cvs/src/sys/dev/acpi/pchgpio.c,v
retrieving revision 1.5
diff -u -p -r1.5 pchgpio.c
--- dev/acpi/pchgpio.c  30 Aug 2021 18:40:19 -  1.5
+++ dev/acpi/pchgpio.c  15 Sep 2021 15:16:52 -
@@ -28,12 +28,13 @@
 
 #define PCHGPIO_MAXCOM 4
 
-#define PCHGPIO_CONF_TXSTATE   0x0001
-#define PCHGPIO_CONF_RXSTATE   0x0002
-#define PCHGPIO_CONF_RXINV 0x0080
-#define PCHGPIO_CONF_RXEV_EDGE 0x0200
-#define PCHGPIO_CONF_RXEV_ZERO 0x0400
-#define PCHGPIO_CONF_RXEV_MASK 0x0600
+#define PCHGPIO_CONF_TXSTATE   0x0001
+#define PCHGPIO_CONF_RXSTATE   0x0002
+#define PCHGPIO_CONF_RXINV 0x0080
+#define PCHGPIO_CONF_RXEV_EDGE 0x0200
+#define PCHGPIO_CONF_RXEV_ZERO 0x0400
+#define PCHGPIO_CONF_RXEV_MASK 0x0600
+#define PCHGPIO_CONF_PADRSTCFG_MASK0xc000
 
 #define PCHGPIO_PADBAR 0x00c
 
@@ -59,6 +60,11 @@ struct pchgpio_match {
const struct pchgpio_device *device;
 };
 
+struct pchgpio_pincfg {
+   uint32_tpad_cfg_dw0;
+   uint32_tpad_cfg_dw1;
+};
+
 struct pchgpio_intrhand {
int (*ih_func)(void *);
void *ih_arg;
@@ -80,6 +86,7 @@ struct pchgpio_softc {
int sc_padsize;
 
int sc_npins;
+   struct pchgpio_pincfg *sc_pin_cfg;
struct pchgpio_intrhand *sc_pin_ih;
 
struct acpi_gpio sc_gpio;
@@ -87,9 +94,11 @@ struct pchgpio_softc {
 
 intpchgpio_match(struct device *, void *, void *);
 void   pchgpio_attach(struct device *, struct device *, void *);
+intpchgpio_activate(struct device *, int);
 
 struct cfattach pchgpio_ca = {
-   sizeof(struct pchgpio_softc), pchgpio_match, pchgpio_attach
+   sizeof(struct pchgpio_softc), pchgpio_match, pchgpio_attach,
+   NULL, pchgpio_activate
 };
 
 struct cfdriver pchgpio_cd = {
@@ -170,6 +179,8 @@ int pchgpio_read_pin(void *, int);
 void   pchgpio_write_pin(void *, int, int);
 void   pchgpio_intr_establish(void *, int, int, int (*)(void *), void *);
 intpchgpio_intr(void *);
+void   pchgpio_save(struct pchgpio_softc *);
+void   pchgpio_restore(struct pchgpio_softc *);
 
 int
 pchgpio_match(struct device *parent, void *match, void *aux)
@@ -240,6 +251,8 @@ pchgpio_attach(struct device *parent, st
 
sc->sc_padsize = sc->sc_device->pad_size;
sc->sc_npins = sc->sc_device->npins;
+   sc->sc_pin_cfg = mallocarray(sc->sc_npins, sizeof(*sc->sc_pin_cfg),
+   M_DEVBUF, M_WAITOK);
sc->sc_pin_ih = mallocarray(sc->sc_npins, sizeof(*sc->sc_pin_ih),
M_DEVBUF, M_WAITOK | M_ZERO);
 
@@ -263,11 +276,48 @@ pchgpio_attach(struct device *parent, st
 
 unmap:
free(sc->sc_pin_ih, M_DEVBUF, sc->sc_npins * sizeof(*sc->sc_pin_ih));
+   free(sc->sc_pin_cfg, M_DEVBUF, sc->sc_npins * sizeof(*sc->sc_pin_cfg));
for (i = 0; i < sc->sc_naddr; i++)
bus_space_unmap(sc->sc_memt[i], sc->sc_memh[i],
aaa->aaa_size[i]);
 }
 
+int
+pchgpio_activate(struct device *self, int act)
+{
+   struct pchgpio_softc *sc = (struct pchgpio_softc *)self;
+// int i, j;
+
+   switch (act) {
+   case DVACT_SUSPEND:
+   printf("%s: suspend\n", sc->sc_dev.dv_xname);
+#if 0
+   for (i = 0; i < 4; i++) {
+   for (j = 0; j < 0xc00; j += 4) {
+   printf("%04x: 0x%08x\n", j,
+   bus_space_read_4(sc->sc_memt[i], 
sc->sc_memh[i], j));
+   }
+   }
+#endif
+   pchgpio_save(sc);
+   break;
+   case DVACT_RESUME:
+   printf("%s: resume\n", sc->sc_dev.dv_xname);
+#if 0
+   for (i = 0; i < 4; i++) {
+   for (j = 0; j < 0xc00; j += 4) {
+   printf("%04x: 0x%08x\n", j,
+   bus_space_read_4(sc->sc_memt[i], 
sc->sc_memh[i], j));
+   }
+   }
+#endif
+   pchgpio_restore(sc);
+   break;
+   }
+
+   return 0;
+}
+
 const struct pchgpio_group *
 pchgpio_find_group(struct pchgpio_softc *sc, int pin)
 {
@@ -403,4 +453,74 @@ pchgpio_intr(void *arg)
}
 
return handled;
+}
+
+void
+pchgpio_save(struct pchgpio_softc *sc)
+{
+   int gpiobase, group, bit, pin, pad;
+   uint16_t base, limit;
+   

Re: riscv64: icache flush using sysarch(2)

2021-09-13 Thread Mark Kettenis
> From: Jeremie Courreges-Anglas 
> Date: Mon, 13 Sep 2021 21:35:33 +0200
> 
> On Thu, Sep 09 2021, Mark Kettenis  wrote:
> >> From: Jeremie Courreges-Anglas 
> >> Date: Sun, 05 Sep 2021 21:41:35 +0200
> >> 
> >> On Sat, Sep 04 2021, Jeremie Courreges-Anglas  wrote:
> >> > The first problem I was able to diagnose using egdb on riscv was
> >> > lang/python/2.7 using libffi and aborting in libcompiler-rt (the
> >> > compilerrt_abort() call below).
> >> >
> >> > --8<--
> >> > #elif defined(__riscv) && defined(__linux__)
> >> > #define __NR_riscv_flush_icache (244 + 15)
> >> >   register void *start_reg __asm("a0") = start;
> >> >   const register void *end_reg __asm("a1") = end;
> >> >   const register long flags __asm("a2") = 0;
> >> >   const register long syscall_nr __asm("a7") = __NR_riscv_flush_icache;
> >> >   __asm __volatile("ecall"
> >> >: "=r"(start_reg)
> >> >: "r"(start_reg), "r"(end_reg), "r"(flags), 
> >> > "r"(syscall_nr));
> >> >   assert(start_reg == 0 && "Cache flush syscall failed.");
> >> > #else
> >> > #if __APPLE__
> >> >   // On Darwin, sys_icache_invalidate() provides this functionality
> >> >   sys_icache_invalidate(start, end - start);
> >> > #else
> >> >   compilerrt_abort();
> >> > #endif
> >> > #endif
> >> > }
> >> > -->8--
> >> >
> >> > The ususal way we provide this functionality is through sysarch(2).
> >> > Since the RISC-V ISA only provides fence.i as an extension, and that
> >> > fence.i doesn't support parameters to only act on a specific range,
> >> > I figured I would reflect that in the API for the sake of clarity.
> >> >
> >> > If people expect the spec to evolve and new CPUs to ship with
> >> > support for finer-grained invalidation, a more forward-looking approach
> >> > would be to mimic ARM_SYNC_ICACHE and struct arm_sync_icache_args, and
> >> > let the kernel ignore the parameters if appropriate.
> >
> > I think people expect the spec to evolve, and that's why fence.i is
> > considered to be an extention.  The Linux syscal for this does specify
> > and address range to flush, so I think it makes sense for us to do the
> > same.
> 
> 100% fine with this.
> 
> > Modelling this on ARM_SYNC_ICACHE makes sense to me.  Don't
> > think we need to do the UVM dance that arm32_sync_icache() does
> > though.  Keep it simple for now.
> 
> At least arm32_sync_icache() does some kind of validation:
> 
> revision 1.4
> date: 2017/03/21 21:43:11;  author: kettenis;  state: Exp;  lines: +36 -2;  
> commitid: nGOn4iYognC4A9wJ;
> Avoid panic in arm_sync_icache() by only flushing the parts of the address
> space for which we have a userland mapping.
> 
> 
> >> > In the diff below I'm moving the core of the code to cpu.c since it
> >> > doesn't look pmap-specific, but I don't feel strongly about it.
> >> > I haven't even built this since I'm still on the way back from k2k21 but
> >> > I figured I'd ask for feedback early.  Input welcome.
> >
> > While you're not wrong, pmap.c is the traditional place where we deal
> > with dcache to icache incoherency.  So I think keeping the code there
> > makes sense.
> 
> okay.  That results in much less churn.
> 
> > In fact, we already have a standardized pmap interface
> > for this, which is pmap_proc_iflush().  This is the code that gets
> > called by ptrace(2) to make sure an inserted breakpoint is visible to
> > all CPUs.  My suggestion would be to make RISCV64_SYNC_ICACHE call
> > this function and keep the code in pmap.c
> 
> In sys_process.c:process_domem() the various checks probably prevent
> userland from calling pmap_proc_iflush() with a bogus addresse/length.

Actually, I'm not so sure about that.  The uvm_io() call should only
success if the address is invalid, but whether that is still valid
after the uvmspace_free() call is unclear.  The kernel lock probably
saves us here.

> I fear that we could hit the same kind of panic as on arm if people
> later modify riscv64 icache_flush() and pmap_proc_iflush() to actually
> implement finer-grained icache invalidation and sysarch() isn't updated
> to perform argument validation.

Really depends on how the future riscv64 instruction

Re: Fix headphone jack on Cirrus 4206

2021-09-11 Thread Mark Kettenis
> Date: Sat, 11 Sep 2021 08:53:00 -0600
> From: Tracey Emery 
> 
> On Sat, Sep 11, 2021 at 02:14:56PM +0200, Mark Kettenis wrote:
> > > Date: Fri, 10 Sep 2021 18:27:07 -0600
> > > From: Tracey Emery 
> > > 
> > > Hello,
> > > 
> > > After reading some Linux commits, it shows the GPIO2 on the CS4206 chips
> > > needs to be unmuted to make the headphone jack work. The following diff
> > > fixed the headphone jack problem on my iMac12,2, amd64.
> > > 
> > > ok?
> > 
> > I don't think this is right.  The Linux code either frobs GPIOs 1 & 2
> > or GPIOs 1 & 3.
> > 
> 
> The code is a quirk in alsa.
> 
> iMac 14,1 requires the same quirk as iMac 12,2, using GPIO 2 and 3 for
> headphone and speaker output amps.  Add the codec SSID quirk entry
> (106b:0600) accordingly.
> 
> SND_PCI_QUIRK(0x106b, 0x0600, "iMac 14,1", CS420X_IMAC27_122),
> SND_PCI_QUIRK(0x106b, 0x1c00, "MacBookPro 8,1", CS420X_MBP81),
> SND_PCI_QUIRK(0x106b, 0x2000, "iMac 12,2", CS420X_IMAC27_122),
> SND_PCI_QUIRK(0x106b, 0x2800, "MacBookPro 10,1", CS420X_MBP101),
> 
> An option has to be set in their modprobe.d directory to enable the
> heaphone jack:
> 
> options snd-hda-intel model=imac27
> 
> Without the patch below, there is signal on the headphone jack, but its
> volume is all the way down and not controlable. With GPIO2 unmuted,
> there is plenty of volume. Perhaps, another approach needs to be taken?
> Does this break other Macs with that Cirrus audio chip?
> 
> > I also don't quite see how this would match the iMac12,2.  Can you
> > show the pcidump -vxx output for this machine?
> 
> I'm sorry, it's 12,1. That is a typo. The pcidump is attached, although
> rather large.

So your model is supposed to be handled by:

/* this conflicts with too many other models */
/*SND_PCI_QUIRK(0x8086, 0x7270, "IMac 27 Inch", CS420X_IMAC27),*/

That one is commented out and the comment above it suggests it might
indeed break other machines.  The comment in our code suggests that
the same subid is used for the MacbookAir4,1.

Looking at the Linux code it actually actually applies two quirks.
There is an imac27_pincfgs fixup and then it chains to the
CS420X_GPIO13 fixup.  The latter only touches gpio 1 and 3, so I am
puzzled by your statement that gpio 2 fixes things.

> > > diff 51d7e35078715d4d1d1e1bf2fb89362bad8797d8 /usr/src
> > > blob - e3b969d585a96910eca98d31bedc8ba08455f763
> > > file + sys/dev/pci/azalia_codec.c
> > > --- sys/dev/pci/azalia_codec.c
> > > +++ sys/dev/pci/azalia_codec.c
> > > @@ -71,7 +71,7 @@ azalia_codec_init_vtbl(codec_t *this)
> > >   this->subid == 0x72708086 ||/* APPLE_MBA4_1 */
> > >   this->subid == 0xcb7910de) {/* APPLE_MBP5_5 */
> > >   this->qrks |= AZ_QRK_GPIO_UNMUTE_1 |
> > > - AZ_QRK_GPIO_UNMUTE_3;
> > > + AZ_QRK_GPIO_UNMUTE_2 | AZ_QRK_GPIO_UNMUTE_3;
> > >   }
> > >   break;
> > >   case 0x10134208:
> > > 
> > > 
> 
> -- 
> 
> Tracey Emery
> 
> [2:text/plain Show Save:pcidump.txt (333kB)]
> 



Re: Fix headphone jack on Cirrus 4206

2021-09-11 Thread Mark Kettenis
> Date: Fri, 10 Sep 2021 18:27:07 -0600
> From: Tracey Emery 
> 
> Hello,
> 
> After reading some Linux commits, it shows the GPIO2 on the CS4206 chips
> needs to be unmuted to make the headphone jack work. The following diff
> fixed the headphone jack problem on my iMac12,2, amd64.
> 
> ok?

I don't think this is right.  The Linux code either frobs GPIOs 1 & 2
or GPIOs 1 & 3.

I also don't quite see how this would match the iMac12,2.  Can you
show the pcidump -vxx output for this machine?


> diff 51d7e35078715d4d1d1e1bf2fb89362bad8797d8 /usr/src
> blob - e3b969d585a96910eca98d31bedc8ba08455f763
> file + sys/dev/pci/azalia_codec.c
> --- sys/dev/pci/azalia_codec.c
> +++ sys/dev/pci/azalia_codec.c
> @@ -71,7 +71,7 @@ azalia_codec_init_vtbl(codec_t *this)
>   this->subid == 0x72708086 ||/* APPLE_MBA4_1 */
>   this->subid == 0xcb7910de) {/* APPLE_MBP5_5 */
>   this->qrks |= AZ_QRK_GPIO_UNMUTE_1 |
> - AZ_QRK_GPIO_UNMUTE_3;
> + AZ_QRK_GPIO_UNMUTE_2 | AZ_QRK_GPIO_UNMUTE_3;
>   }
>   break;
>   case 0x10134208:
> 
> 



Re: wakeup_n() w/o DIAGNOSTIC fix

2021-09-09 Thread Mark Kettenis
> Date: Thu, 9 Sep 2021 17:03:54 +0200
> From: Martin Pieuchot 
> 
> The check to avoid a panic for contented rwlock(9) should be outside of 
> #ifdef DIAGNOSTIC.
> 
> ok?

ok kettenis@

> Index: kern//kern_synch.c
> ===
> RCS file: /cvs/src/sys/kern/kern_synch.c,v
> retrieving revision 1.177
> diff -u -p -r1.177 kern_synch.c
> --- kern//kern_synch.c4 Mar 2021 09:02:37 -   1.177
> +++ kern//kern_synch.c9 Sep 2021 15:01:07 -
> @@ -556,7 +556,6 @@ wakeup_n(const volatile void *ident, int
>   qp = [LOOKUP(ident)];
>   for (p = TAILQ_FIRST(qp); p != NULL && n != 0; p = pnext) {
>   pnext = TAILQ_NEXT(p, p_runq);
> -#ifdef DIAGNOSTIC
>   /*
>* If the rwlock passed to rwsleep() is contended, the
>* CPU will end up calling wakeup() between sleep_setup()
> @@ -566,6 +565,7 @@ wakeup_n(const volatile void *ident, int
>   KASSERT(p->p_stat == SONPROC);
>   continue;
>   }
> +#ifdef DIAGNOSTIC
>   if (p->p_stat != SSLEEP && p->p_stat != SSTOP)
>   panic("wakeup: p_stat is %d", (int)p->p_stat);
>  #endif
> 
> 



Re: [please test] amd64: schedule clock interrupts against system clock

2021-09-09 Thread Mark Kettenis
> Date: Mon, 6 Sep 2021 21:43:29 +0200
> From: Patrick Wildt 
> 
> Am Fri, Jul 30, 2021 at 07:55:29PM +0200 schrieb Alexander Bluhm:
> > On Mon, Jul 26, 2021 at 08:12:39AM -0500, Scott Cheloha wrote:
> > > On Fri, Jun 25, 2021 at 06:09:27PM -0500, Scott Cheloha wrote:
> > > 1 month bump.  I really appreciate the tests I've gotten so far, thank
> > > you.
> > 
> > On my Xeon machine it works and all regress tests pass.
> > 
> > But it fails on my old Opteron machine.  It hangs after attaching
> > cpu1.
> 
> This seems to be caused by contention on the mutex in i8254's gettick().
> 
> With Scott's diff, delay_func is i8254_delay() on that old AMD machine.
> Its gettick() implementation uses a mutex to protect I/O access to the
> i8254.
> 
> When secondary CPUs come up, they will wait for CPU0 to let them boot up
> further by checking for a flag:
> 
>   /*
>* We need to wait until we can identify, otherwise dmesg
>* output will be messy.
>*/
>   while ((ci->ci_flags & CPUF_IDENTIFY) == 0)
>   delay(10);
> 
> Now that machine has 3 secondary cores that are spinning like that.  At
> the same time CPU0 waits for the core to come up:
> 
>   /* wait for it to identify */
>   for (i = 200; (ci->ci_flags & CPUF_IDENTIFY) && i > 0; i--)
>   delay(10);
> 
> That means we have 3-4 cores spinning just to be able to delay().  Our
> mutex implementation isn't fair, which means whoever manages to claim
> the free mutex wins.  Now if CPU2 and CPU3 are spinning all the time,
> CPU1 identifies and needs delay() and CPU0 waits for CPU1, maybe the
> one that needs to make progress never gets it.
> 
> I changed those delay(10) in cpu_hatch() to CPU_BUSY_CYCLE() and it went
> ahead a bit better instead of hanging forever.
> 
> Then I remembered an idea something from years ago: fair kernel mutexes,
> so basically mutexes implemented as ticket lock, like our kerne lock.
> 
> I did a quick diff, which probably contains a million bugs, but with
> this bluhm's machine boots up well.
> 
> I'm not saying this is the solution, but it might be.

So the idea really is that the kernel mutexes are cheap and simple
spin locks.  The assumption has always been that there shouldn't be a
lot of contention on them.  If you have contention, your locking
probably isn't fine-grained enough, or you're using the wrong lock
type.  Note that our mpsafe pmaps use a per-page mutex.  So increasing
the size of struct mutex is going to have a significant impact.

Maybe we need another lock type, although we already have one that
tries to be "fair": struct __mp_lock, which is what we use for the
kernel lock and the scheduler lock.  A non-recursive version of that
might make sense.

> diff --git a/sys/kern/kern_lock.c b/sys/kern/kern_lock.c
> index 5cc55bb256a..c6a284beb51 100644
> --- a/sys/kern/kern_lock.c
> +++ b/sys/kern/kern_lock.c
> @@ -248,6 +248,8 @@ __mtx_init(struct mutex *mtx, int wantipl)
>   mtx->mtx_owner = NULL;
>   mtx->mtx_wantipl = wantipl;
>   mtx->mtx_oldipl = IPL_NONE;
> + mtx->mtx_ticket = 0;
> + mtx->mtx_cur = 0;
>  }
>  
>  #ifdef MULTIPROCESSOR
> @@ -255,15 +257,26 @@ void
>  mtx_enter(struct mutex *mtx)
>  {
>   struct schedstate_percpu *spc = ()->ci_schedstate;
> + struct cpu_info *ci = curcpu();
> + unsigned int t;
>  #ifdef MP_LOCKDEBUG
>   int nticks = __mp_lock_spinout;
>  #endif
> + int s;
> +
> + /* Avoid deadlocks after panic or in DDB */
> + if (panicstr || db_active)
> + return;
>  
>   WITNESS_CHECKORDER(MUTEX_LOCK_OBJECT(mtx),
>   LOP_EXCLUSIVE | LOP_NEWORDER, NULL);
>  
> + if (mtx->mtx_wantipl != IPL_NONE)
> + s = splraise(mtx->mtx_wantipl);
> +
>   spc->spc_spinning++;
> - while (mtx_enter_try(mtx) == 0) {
> + t = atomic_inc_int_nv(>mtx_ticket) - 1;
> + while (READ_ONCE(mtx->mtx_cur) != t) {
>   CPU_BUSY_CYCLE();
>  
>  #ifdef MP_LOCKDEBUG
> @@ -275,12 +288,21 @@ mtx_enter(struct mutex *mtx)
>  #endif
>   }
>   spc->spc_spinning--;
> +
> + mtx->mtx_owner = curcpu();
> + if (mtx->mtx_wantipl != IPL_NONE)
> + mtx->mtx_oldipl = s;
> +#ifdef DIAGNOSTIC
> + ci->ci_mutex_level++;
> +#endif
> + WITNESS_LOCK(MUTEX_LOCK_OBJECT(mtx), LOP_EXCLUSIVE);
>  }
>  
>  int
>  mtx_enter_try(struct mutex *mtx)
>  {
> - struct cpu_info *owner, *ci = curcpu();
> + struct cpu_info *ci = curcpu();
> + unsigned int t;
>   int s;
>  
>   /* Avoid deadlocks after panic or in DDB */
> @@ -290,13 +312,15 @@ mtx_enter_try(struct mutex *mtx)
>   if (mtx->mtx_wantipl != IPL_NONE)
>   s = splraise(mtx->mtx_wantipl);
>  
> - owner = atomic_cas_ptr(>mtx_owner, NULL, ci);
>  #ifdef DIAGNOSTIC
> - if (__predict_false(owner == ci))
> + if (__predict_false(mtx->mtx_owner == ci))
>   panic("mtx %p: locking against myself", mtx);
>  #endif
> - if (owner == NULL) {
> +
> + t = 

Re: mutex(9): initialize some more mutexes before use?

2021-09-09 Thread Mark Kettenis
> Date: Thu, 9 Sep 2021 15:17:25 +0200
> From: Patrick Wildt 
> 
> Am Thu, Sep 09, 2021 at 12:55:13PM +0200 schrieb Mark Kettenis:
> > > Date: Wed, 8 Sep 2021 10:45:53 +0200
> > > From: Martin Pieuchot 
> > > 
> > > On 07/09/21(Tue) 14:19, Patrick Wildt wrote:
> > > > Hi,
> > > > 
> > > > I was playing around a little with the mutex code and found that on
> > > > arm64 there some uninitialized mutexes out there.
> > > > 
> > > > I think the arm64 specific one is comparatively easy to solve.  We
> > > > either initialize the mtx when we initialize the rest of the pmap, or
> > > > we move it into the global definition of those.  I opted for the former
> > > > version.
> > > 
> > > Is the kernel pmap mutex supposed to be used?  On i386 it isn't so the
> > > mutex's IPL is set to -1 and we added a KASSERT() in splraise() to spot
> > > any mistake.
> > 
> > Indeed.  The kernel pmap is special:
> > 
> > * It can never disappear.
> > 
> > * Page table pages are pre-allocated and are never freed.
> > 
> > * Mappings are (largely) unmanaged (by uvm).
> > 
> > Therefore the per-pmap lock isn't used for the kernel map on most
> > (all?) architectures.
> 
> The one that 'crashed' was pmap_tramp.  I only changed the kernel pmap
> because it was like 5 lines above (or below) and seemed to be missing it
> as well.

Ah, interesting.  What I said above applies to pmap_tramp as well.
Double so, as it doesn't have any managed mappings.  I guess the crash
happened in the pmap_enter() call from pmap_postinit()?

Doing the initialization for pmap_tramp probably makes sense though,
since the mutex will only be used in that one pmap_enter() call and
adding another comparison in pmap_lock() and pmap_unlock() could slow
us down.



Re: riscv64: icache flush using sysarch(2)

2021-09-09 Thread Mark Kettenis
> From: Jeremie Courreges-Anglas 
> Date: Sun, 05 Sep 2021 21:41:35 +0200
> 
> On Sat, Sep 04 2021, Jeremie Courreges-Anglas  wrote:
> > The first problem I was able to diagnose using egdb on riscv was
> > lang/python/2.7 using libffi and aborting in libcompiler-rt (the
> > compilerrt_abort() call below).
> >
> > --8<--
> > #elif defined(__riscv) && defined(__linux__)
> > #define __NR_riscv_flush_icache (244 + 15)
> >   register void *start_reg __asm("a0") = start;
> >   const register void *end_reg __asm("a1") = end;
> >   const register long flags __asm("a2") = 0;
> >   const register long syscall_nr __asm("a7") = __NR_riscv_flush_icache;
> >   __asm __volatile("ecall"
> >: "=r"(start_reg)
> >: "r"(start_reg), "r"(end_reg), "r"(flags), 
> > "r"(syscall_nr));
> >   assert(start_reg == 0 && "Cache flush syscall failed.");
> > #else
> > #if __APPLE__
> >   // On Darwin, sys_icache_invalidate() provides this functionality
> >   sys_icache_invalidate(start, end - start);
> > #else
> >   compilerrt_abort();
> > #endif
> > #endif
> > }
> > -->8--
> >
> > The ususal way we provide this functionality is through sysarch(2).
> > Since the RISC-V ISA only provides fence.i as an extension, and that
> > fence.i doesn't support parameters to only act on a specific range,
> > I figured I would reflect that in the API for the sake of clarity.
> >
> > If people expect the spec to evolve and new CPUs to ship with
> > support for finer-grained invalidation, a more forward-looking approach
> > would be to mimic ARM_SYNC_ICACHE and struct arm_sync_icache_args, and
> > let the kernel ignore the parameters if appropriate.

I think people expect the spec to evolve, and that's why fence.i is
considered to be an extention.  The Linux syscal for this does specify
and address range to flush, so I think it makes sense for us to do the
same.  Modelling this on ARM_SYNC_ICACHE makes sense to me.  Don't
think we need to do the UVM dance that arm32_sync_icache() does
though.  Keep it simple for now.

> > In the diff below I'm moving the core of the code to cpu.c since it
> > doesn't look pmap-specific, but I don't feel strongly about it.
> > I haven't even built this since I'm still on the way back from k2k21 but
> > I figured I'd ask for feedback early.  Input welcome.

While you're not wrong, pmap.c is the traditional place where we deal
with dcache to icache incoherency.  So I think keeping the code there
makes sense.  In fact, we already have a standardized pmap interface
for this, which is pmap_proc_iflush().  This is the code that gets
called by ptrace(2) to make sure an inserted breakpoint is visible to
all CPUs.  My suggestion would be to make RISCV64_SYNC_ICACHE call
this function and keep the code in pmap.c

> Updated diff that builds (one #include was missing) and that appears to
> do the right thing.  The diff also includes the compiler-rt change.

> Index: gnu/llvm/compiler-rt/lib/builtins/clear_cache.c
> ===
> RCS file: /cvs/src/gnu/llvm/compiler-rt/lib/builtins/clear_cache.c,v
> retrieving revision 1.3
> diff -u -p -r1.3 clear_cache.c
> --- gnu/llvm/compiler-rt/lib/builtins/clear_cache.c   2 Jan 2021 17:14:13 
> -   1.3
> +++ gnu/llvm/compiler-rt/lib/builtins/clear_cache.c   19 Aug 2021 17:28:40 
> -
> @@ -33,7 +33,7 @@ uintptr_t GetCurrentProcess(void);
>  #include 
>  #endif
>  
> -#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__))
> +#if defined(__OpenBSD__) && (defined(__arm__) || defined(__mips__) || 
> defined(__riscv))
>  // clang-format off
>  #include 
>  #include 
> @@ -157,6 +157,8 @@ void __clear_cache(void *start, void *en
> : "=r"(start_reg)
> : "r"(start_reg), "r"(end_reg), "r"(flags), 
> "r"(syscall_nr));
>assert(start_reg == 0 && "Cache flush syscall failed.");
> +#elif defined(__riscv) && defined(__OpenBSD__)
> + sysarch(RISCV_SYNC_ICACHE_ALL, NULL);
>  #else
>  #if __APPLE__
>// On Darwin, sys_icache_invalidate() provides this functionality
> Index: sys/arch/riscv64/include/cpufunc.h
> ===
> RCS file: /cvs/src/sys/arch/riscv64/include/cpufunc.h,v
> retrieving revision 1.4
> diff -u -p -r1.4 cpufunc.h
> --- sys/arch/riscv64/include/cpufunc.h18 May 2021 09:14:49 -  
> 1.4
> +++ sys/arch/riscv64/include/cpufunc.h18 Aug 2021 23:24:59 -
> @@ -92,6 +92,7 @@ extern int64_t icache_line_size;
>  extern void (*cpu_dcache_wbinv_range)(paddr_t, psize_t);
>  extern void (*cpu_dcache_inv_range)(paddr_t, psize_t);
>  extern void (*cpu_dcache_wb_range)(paddr_t, psize_t);
> +extern void icache_flush(void);
>  
>  static __inline void
>  load_satp(uint64_t val)
> Index: sys/arch/riscv64/include/sysarch.h
> ===
> RCS file: sys/arch/riscv64/include/sysarch.h
> diff -N 

Re: mutex(9): initialize some more mutexes before use?

2021-09-09 Thread Mark Kettenis
> Date: Wed, 8 Sep 2021 10:45:53 +0200
> From: Martin Pieuchot 
> 
> On 07/09/21(Tue) 14:19, Patrick Wildt wrote:
> > Hi,
> > 
> > I was playing around a little with the mutex code and found that on
> > arm64 there some uninitialized mutexes out there.
> > 
> > I think the arm64 specific one is comparatively easy to solve.  We
> > either initialize the mtx when we initialize the rest of the pmap, or
> > we move it into the global definition of those.  I opted for the former
> > version.
> 
> Is the kernel pmap mutex supposed to be used?  On i386 it isn't so the
> mutex's IPL is set to -1 and we added a KASSERT() in splraise() to spot
> any mistake.

Indeed.  The kernel pmap is special:

* It can never disappear.

* Page table pages are pre-allocated and are never freed.

* Mappings are (largely) unmanaged (by uvm).

Therefore the per-pmap lock isn't used for the kernel map on most
(all?) architectures.

> > The other one prolly needs more discussion/debugging.  So uvm_init()
> > calls first pmap_init() and then uvm_km_page_init().  The latter does
> > initialize the mutex, but arm64's pmap_init() already uses pools, which
> > uses km_alloc, which then uses that mutex.  Now one easy fix would be
> > to just initialize the definition right away instead of during runtime.
> > 
> > But there might be the question if arm64's pmap is allowed to use pools
> > and km_alloc during pmap_init.
> 
> That's a common question for the family of pmaps calling pool_setlowat()
> in pmap_init().  That's where pool_prime() is called from.
> 
> > #0  0xff800073f984 in mtx_enter (mtx=0xff8000f3b048 ) 
> > at /usr/src/sys/kern/kern_lock.c:281
> > #1  0xff8000937e6c in km_alloc (sz= > dwarf expression opcode 0xa3>, kv=0xff8000da6a30 , 
> > kp=0xff8000da6a48 , kd=0xff8000e934d8)
> > at /usr/src/sys/uvm/uvm_km.c:899
> > #2  0xff800084d804 in pool_page_alloc (pp= > Unhandled dwarf expression opcode 0xa3>, flags= > Unhandled dwarf expression opcode 0xa3>,
> > slowdown= > 0xa3>) at /usr/src/sys/kern/subr_pool.c:1633
> > #3  0xff800084f8dc in pool_allocator_alloc (pp=0xff8000ea6e40 
> > , flags=65792, slowdown=0xff80026cd098) at 
> > /usr/src/sys/kern/subr_pool.c:1602
> > #4  0xff800084ef08 in pool_p_alloc (pp=0xff8000ea6e40 
> > , flags=2, slowdown=0xff8000e9359c) at 
> > /usr/src/sys/kern/subr_pool.c:926
> > #5  0xff800084f808 in pool_prime (pp=, n= > variable: Unhandled dwarf expression opcode 0xa3>) at 
> > /usr/src/sys/kern/subr_pool.c:896
> > #6  0xff800048c20c in pmap_init () at 
> > /usr/src/sys/arch/arm64/arm64/pmap.c:1682
> > #7  0xff80009384dc in uvm_init () at /usr/src/sys/uvm/uvm_init.c:118
> > #8  0xff800048e664 in main (framep= > dwarf expression opcode 0xa3>) at /usr/src/sys/kern/init_main.c:235
> > 
> > diff --git a/sys/arch/arm64/arm64/pmap.c b/sys/arch/arm64/arm64/pmap.c
> > index 79a344cc84e..f070f4540ec 100644
> > --- a/sys/arch/arm64/arm64/pmap.c
> > +++ b/sys/arch/arm64/arm64/pmap.c
> > @@ -1308,10 +1308,12 @@ pmap_bootstrap(long kvo, paddr_t lpt1, long 
> > kernelstart, long kernelend,
> > pmap_kernel()->pm_vp.l1 = (struct pmapvp1 *)va;
> > pmap_kernel()->pm_privileged = 1;
> > pmap_kernel()->pm_asid = 0;
> > +   mtx_init(_kernel()->pm_mtx, IPL_VM);
> >  
> > pmap_tramp.pm_vp.l1 = (struct pmapvp1 *)va + 1;
> > pmap_tramp.pm_privileged = 1;
> > pmap_tramp.pm_asid = 0;
> > +   mtx_init(_tramp.pm_mtx, IPL_VM);
> >  
> > /* Mark ASID 0 as in-use. */
> > pmap_asid[0] |= (3U << 0);
> > diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c
> > index 4a60377e9d7..e77afeda832 100644
> > --- a/sys/uvm/uvm_km.c
> > +++ b/sys/uvm/uvm_km.c
> > @@ -644,7 +644,7 @@ uvm_km_page_lateinit(void)
> >   * not zero filled.
> >   */
> >  
> > -struct uvm_km_pages uvm_km_pages;
> > +struct uvm_km_pages uvm_km_pages = { .mtx = MUTEX_INITIALIZER(IPL_VM) };
> >  
> >  void uvm_km_createthread(void *);
> >  void uvm_km_thread(void *);
> > @@ -664,7 +664,6 @@ uvm_km_page_init(void)
> > int len, bulk;
> > vaddr_t addr;
> >  
> > -   mtx_init(_km_pages.mtx, IPL_VM);
> > if (!uvm_km_pages.lowat) {
> > /* based on physmem, calculate a good value here */
> > uvm_km_pages.lowat = physmem / 256;
> > 
> 
> 



Re: Change vm_dsize to vsize_t

2021-09-07 Thread Mark Kettenis
> From: "Theo de Raadt" 
> Date: Tue, 07 Sep 2021 07:08:19 -0600
> 
> Claudio Jeker  wrote:
> 
> > > @@ -443,7 +443,7 @@ struct kinfo_proc {
> > >  
> > >   int32_t p_vm_rssize;/* SEGSZ_T: current resident set size 
> > > in pages */
> > >   int32_t p_vm_tsize; /* SEGSZ_T: text size (pages) */
> > > - int32_t p_vm_dsize; /* SEGSZ_T: data size (pages) */
> > > + u_int64_t   p_vm_dsize; /* VSIZE_T: data size (pages) */
> > >   int32_t p_vm_ssize; /* SEGSZ_T: stack size (pages) */
> > >  
> > >   int64_t p_uvalid;   /* CHAR: following p_u* members from 
> > > struct user are valid */
> > 
> > From my understanding this is not how struct kinfo_proc should be modified.
> > Instead the code should add the u_int64_t version at the end and leave the
> > old in place. This way old userland still works with new kernel.
> 
> If this is done as a size-change inline as Greg suggested, then it is a
> large sysctl ABI bump, with temporary breakage until binaries catch up.
> 
> If the 32-bit value is kept as-is, and a 64-bit one is appended, this is
> still a small sysctl ABI bump depending on what uses the field.  Existing
> binaries will use the 32-bit field for a time.  Such as ps(1).
> 
> Obviously we cannot add a new 64-bit field at the end, and mark the
> 32-bit field "unused", it breaks many utilities in a similar fashion.
> A truncated value must be put into the 32-bit field, or it is just as
> akward as a large sysctl ABI bump.
> 
> Or we could coordinate the Greg approach as a sysctl ABI change near a
> libc major bump.  On the other side of such a bump, all kernel + base +
> packages are updated to use the new storage ABI.  We get orderly .h
> files without a confusing glitch, and kern_sysctl.c doesn't need to
> store the value into two fields (32bit and 64bit) for the forseeable
> future.
> 
> Over the years I've arrived at the conclusion that maintaining binary
> compatibility at all costs collects too much confusing damage.  Instead,
> we've built an software ecosystem where ABI changes are expected and
> carry minimal consequence.

I'm not convinced the original diff is right:

* We have several places in the kernel where we store numbers of pages
  in a (32-bit) int.  Changing just one of these places is dangerous.

* Changing the type of just vm_dsize makes no sense.  We should change
  them all (but see the point above).

* Does ASAN really need to reserve that much VA space?



Re: riscv64 ptrace(2) tweaks

2021-09-03 Thread Mark Kettenis
> From: Jeremie Courreges-Anglas 
> Date: Fri, 03 Sep 2021 14:32:26 +0200
> 
> Two changes that would be useful:
> - enable PT_*FPREGS, Mark has already done the job.
> - hide PT_STEP since hardware support seems missing (the spec only talks
>   about single stepping support in "Debug mode", which is not "Machine" or
>   "Supervisor" mode).  Since we don't emulate it (like mips64 for
>   example) it doesn't make sense to provide the define.  I doubt that
>   hiding PT_STEP won't magically fix devel/gdb runtime support though.
> 
> Thoughts?  ok?

ok kettenis@

> Index: ptrace.h
> ===
> RCS file: /d/cvs/src/sys/arch/riscv64/include/ptrace.h,v
> retrieving revision 1.2
> diff -u -p -p -u -r1.2 ptrace.h
> --- ptrace.h  12 May 2021 01:20:52 -  1.2
> +++ ptrace.h  3 Sep 2021 12:25:54 -
> @@ -16,10 +16,10 @@
>   * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
>   */
>  
> +#if 0
>  #define  PT_STEP (PT_FIRSTMACH + 0)
> +#endif
>  #define  PT_GETREGS  (PT_FIRSTMACH + 1)
>  #define  PT_SETREGS  (PT_FIRSTMACH + 2)
> -#if 0  // XXX ptrace fpreg support
>  #define  PT_GETFPREGS(PT_FIRSTMACH + 3)
>  #define  PT_SETFPREGS(PT_FIRSTMACH + 4)
> -#endif
> 
> 
> -- 
> jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE
> 
> 



Re: pmap & buffer cache dummy pagers

2021-09-03 Thread Mark Kettenis
> Date: Thu, 2 Sep 2021 22:16:52 +0200
> From: Martin Pieuchot 
> 
> Diff below introduces two dummy pagers for subsystem that manipulate UVM
> objects that are 'special'.  Those pagers will be used to enforce checks
> in functions that expect a lock to be held, like:
> 
>   KASSERT(obj == NULL || UVM_OBJ_IS_PMAP(obj) ||
> rw_write_held(obj->vmobjlock));
> 
> They are also used, in the diff below, to document which routines expect
> such objects and a serialization offered by the KERNEL_LOCK().  More
> examples can be seen in my WIP unlocking diff.
> 
> The idea is taken from NetBSD which also use such dummy pager for some
> of their pmaps.  I don't believe there's a need to change anything with
> these usages of the uvm_obj_* API for the moment but at the same time it
> helps me to have such implicit documentation.
> 
> ok?

And it follows a pattern that we already have.

I'm still not sure why the hell we need uvm objects in those pmaps,
but I'm also not too eager to delve into them to see if we can rid of
those bits right now.

ok kettenis@

> Index: arch/amd64/amd64/pmap.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/pmap.c,v
> retrieving revision 1.145
> diff -u -p -r1.145 pmap.c
> --- arch/amd64/amd64/pmap.c   18 Jun 2021 06:17:28 -  1.145
> +++ arch/amd64/amd64/pmap.c   2 Sep 2021 19:55:57 -
> @@ -671,7 +671,7 @@ pmap_bootstrap(paddr_t first_avail, padd
>  
>   kpm = pmap_kernel();
>   for (i = 0; i < PTP_LEVELS - 1; i++) {
> - uvm_obj_init(>pm_obj[i], NULL, 1);
> + uvm_obj_init(>pm_obj[i], _pager, 1);
>   kpm->pm_ptphint[i] = NULL;
>   }
>   memset(>pm_list, 0, sizeof(kpm->pm_list));  /* pm_list not used */
> @@ -1307,7 +1307,7 @@ pmap_create(void)
>  
>   /* init uvm_object */
>   for (i = 0; i < PTP_LEVELS - 1; i++) {
> - uvm_obj_init(>pm_obj[i], NULL, 1);
> + uvm_obj_init(>pm_obj[i], _pager, 1);
>   pmap->pm_ptphint[i] = NULL;
>   }
>   pmap->pm_stats.wired_count = 0;
> Index: arch/hppa/hppa/pmap.c
> ===
> RCS file: /cvs/src/sys/arch/hppa/hppa/pmap.c,v
> retrieving revision 1.175
> diff -u -p -r1.175 pmap.c
> --- arch/hppa/hppa/pmap.c 16 Jun 2021 09:02:21 -  1.175
> +++ arch/hppa/hppa/pmap.c 2 Sep 2021 19:54:23 -
> @@ -496,7 +496,7 @@ pmap_bootstrap(vaddr_t vstart)
>*/
>   kpm = _pmap_store;
>   bzero(kpm, sizeof(*kpm));
> - uvm_obj_init(>pm_obj, NULL, 1);
> + uvm_obj_init(>pm_obj, _pager, 1);
>   kpm->pm_space = HPPA_SID_KERNEL;
>   kpm->pm_pid = HPPA_PID_KERNEL;
>   kpm->pm_pdir_pg = NULL;
> @@ -678,7 +678,7 @@ pmap_create(void)
>  
>   mtx_init(>pm_mtx, IPL_VM);
>  
> - uvm_obj_init(>pm_obj, NULL, 1);
> + uvm_obj_init(>pm_obj, _pager, 1);
>  
>   for (space = 1 + arc4random_uniform(hppa_sid_max);
>   pmap_sdir_get(space); space = (space + 1) % hppa_sid_max);
> Index: arch/i386/i386/pmap.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/pmap.c,v
> retrieving revision 1.214
> diff -u -p -r1.214 pmap.c
> --- arch/i386/i386/pmap.c 16 Jun 2021 09:02:21 -  1.214
> +++ arch/i386/i386/pmap.c 2 Sep 2021 19:55:57 -
> @@ -963,7 +963,7 @@ pmap_bootstrap(vaddr_t kva_start)
>   kpm = pmap_kernel();
>   mtx_init(>pm_mtx, -1); /* must not be used */
>   mtx_init(>pm_apte_mtx, IPL_VM);
> - uvm_obj_init(>pm_obj, NULL, 1);
> + uvm_obj_init(>pm_obj, _pager, 1);
>   bzero(>pm_list, sizeof(kpm->pm_list));  /* pm_list not used */
>   kpm->pm_pdir = (vaddr_t)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE);
>   kpm->pm_pdirpa = proc0.p_addr->u_pcb.pcb_cr3;
> @@ -1348,7 +1348,7 @@ pmap_create(void)
>   mtx_init(>pm_apte_mtx, IPL_VM);
>  
>   /* init uvm_object */
> - uvm_obj_init(>pm_obj, NULL, 1);
> + uvm_obj_init(>pm_obj, _pager, 1);
>   pmap->pm_stats.wired_count = 0;
>   pmap->pm_stats.resident_count = 1;  /* count the PDP allocd below */
>   pmap->pm_ptphint = NULL;
> Index: uvm/uvm_object.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_object.c,v
> retrieving revision 1.19
> diff -u -p -r1.19 uvm_object.c
> --- uvm/uvm_object.c  16 Jun 2021 09:02:21 -  1.19
> +++ uvm/uvm_object.c  2 Sep 2021 20:00:03 -
> @@ -41,6 +41,16 @@
>  
>  #include 
>  
> +/* Dummy object used by some pmaps for sanity checks. */
> +const struct uvm_pagerops pmap_pager = {
> + /* nothing */
> +};
> +
> +/* Dummy object used by the buffer cache for sanity checks. */
> +const struct uvm_pagerops bufcache_pager = {
> + /* nothing */
> +};
> +
>  /* We will fetch this page count per step */
>  #define  FETCH_PAGECOUNT 16
>  
> @@ -159,6 +169,9 @@ uvm_obj_free(struct uvm_object *uobj)
>  {
> 

Re: i386 ioapic mtx not initialized

2021-09-02 Thread Mark Kettenis
> Date: Thu, 2 Sep 2021 09:31:49 +0200
> From: Martin Pieuchot 
> 
> Seen with WITNESS, this has already been fixed in amd64, diff below
> backport the fix, ok?
> 
> ioapic0 at mainbus0: apid 2 pa 0xfec0witness: lock_object uninitialized: 
> 0xd8841440
> Starting stack trace...
> witness_checkorder(f5547000,fec01000,fec0,d1820adc,d03fb01e) at 
> witness_checkorder+0x85 [/home/os/openbsd/sys/kern/subr_witness.c:2497]
> witness_checkorder(d8841440,9,0) at witness_checkorder+0x85 
> [/home/os/openbsd/sys/kern/subr_witness.c:2497]
> mtx_enter(d8841434) at mtx_enter+0x1c 
> [/home/os/openbsd/sys/kern/kern_lock.c:262]
> ioapic_attach(d884a040,d8841400,d1820b84) at ioapic_attach+0xe0 
> [/home/os/openbsd/sys/arch/i386/i386/ioapic.c:125]
> config_attach(d884a040,d0e31314,d1820b84,d068f190) at config_attach+0x18a 
> [/home/os/openbsd/sys/kern/subr_autoconf.c:403]
> config_found_sm(d884a040,d1820b84,d068f190,0) at config_found_sm+0x29 
> [/home/os/openbsd/sys/kern/subr_autoconf.c:313]
> acpimadt_attach(d8840400,d88bc2c0,d1820c78) at acpimadt_attach+0x34c 
> [/home/os/openbsd/sys/dev/acpi/acpimadt.c:0]
> config_attach(d8840400,d0e32574,d1820c78,d07ddd90) at config_attach+0x18a 
> [/home/os/openbsd/sys/kern/subr_autoconf.c:403]
> config_found_sm(d8840400,d1820c78,d07ddd90,d07e0280) at config_found_sm+0x29 
> [/home/os/openbsd/sys/kern/subr_autoconf.c:313]
> acpi_attach_common(d8840400,f0120) at acpi_attach_common+0x585 
> [/home/os/openbsd/sys/dev/acpi/acpi.c:1207]
> acpi_attach(d884a080,d8840400,d1820dd0) at acpi_attach+0x2c 
> [/home/os/openbsd/sys/arch/i386/i386/acpi_machdep.c:112]
> config_attach(d884a080,d0e32734,d1820dd0,d09d73d0) at config_attach+0x18a 
> [/home/os/openbsd/sys/kern/subr_autoconf.c:403]
> config_found_sm(d884a080,d1820dd0,d09d73d0,0) at config_found_sm+0x29 
> [/home/os/openbsd/sys/kern/subr_autoconf.c:313]
> biosattach(d884a040,d884a080,d1820ec0) at biosattach+0x181 
> [/home/os/openbsd/sys/arch/i386/i386/bios.c:392]
> config_attach(d884a040,d0e31274,d1820ec0,d04d3db0) at config_attach+0x18a 
> [/home/os/openbsd/sys/kern/subr_autoconf.c:403]
> config_found_sm(d884a040,d1820ec0,d04d3db0,0) at config_found_sm+0x29 
> [/home/os/openbsd/sys/kern/subr_autoconf.c:313]
> mainbus_attach(0,d884a040,0) at mainbus_attach+0x54 
> [/home/os/openbsd/sys/arch/i386/i386/mainbus.c:157]
> config_attach(0,d0e2ec34,0,0) at config_attach+0x18a 
> [/home/os/openbsd/sys/kern/subr_autoconf.c:403]
> config_rootfound(d0c28d4d,0) at config_rootfound+0xaf 
> [/home/os/openbsd/sys/kern/subr_autoconf.c:328]
> cpu_configure(3327f5e4,181e000,182d000,1821000,0) at cpu_configure+0x4c 
> [/home/os/openbsd/sys/arch/i386/i386/autoconf.c:156]
> main(0,0,0,0,0) at main+0x342 [/home/os/openbsd/sys/kern/init_main.c:377]
> End of stack trace.

ok kettenis@

> Index: i386/ioapic.c
> ===
> RCS file: /cvs/src/sys/arch/i386/i386/ioapic.c,v
> retrieving revision 1.41
> diff -u -p -r1.41 ioapic.c
> --- i386/ioapic.c 25 Aug 2018 16:09:29 -  1.41
> +++ i386/ioapic.c 2 Sep 2021 07:27:16 -
> @@ -309,6 +309,10 @@ ioapic_attach(struct device *parent, str
>   sc->sc_reg = (volatile u_int32_t *)(bh + IOAPIC_REG);
>   sc->sc_data = (volatile u_int32_t *)(bh + IOAPIC_DATA);
>  
> +#ifdef MULTIPROCESSOR
> + mtx_init(>sc_pic.pic_mutex, IPL_NONE);
> +#endif
> +
>   ver_sz = ioapic_read(sc, IOAPIC_VER);
>   sc->sc_apic_vers = (ver_sz & IOAPIC_VER_MASK) >> IOAPIC_VER_SHIFT;
>   sc->sc_apic_sz = (ver_sz & IOAPIC_MAX_MASK) >> IOAPIC_MAX_SHIFT;
> 
> 



Re: Kill SYSCALL_DEBUG

2021-08-30 Thread Mark Kettenis
> From: "Theo de Raadt" 
> Date: Mon, 30 Aug 2021 01:47:43 -0600
> 
> Hang on, SYSCALL_DEBUG is used to bring up new architectures.
> That is the only time the #define is enabled.
> 
> When you are bringing up a new architecture, bt is useless
> 
> I don't think this makes sense.

Indeed.  This needs to stay.

> Martin Pieuchot  wrote:
> 
> > Now that dt(4) and btrace(8) are enabled by default and provide a nice
> > and flexible way to debug syscalls on GENERIC kernels should we get rid
> > of the SYSCALL_DEBUG mechanism?
> > 
> > Note that the auto-generated kern/syscalls.c providing the `syscallnames'
> > array is still needed to build btrace(8).
> > 
> > ok?
> > 
> > Index: kern/exec_elf.c
> > ===
> > RCS file: /cvs/src/sys/kern/exec_elf.c,v
> > retrieving revision 1.160
> > diff -u -p -r1.160 exec_elf.c
> > --- kern/exec_elf.c 10 Mar 2021 10:21:47 -  1.160
> > +++ kern/exec_elf.c 30 Aug 2021 07:19:33 -
> > @@ -107,9 +107,6 @@ int elf_os_pt_note_name(Elf_Note *);
> >  intelf_os_pt_note(struct proc *, struct exec_package *, Elf_Ehdr 
> > *, int *);
> >  
> >  extern char sigcode[], esigcode[], sigcoderet[];
> > -#ifdef SYSCALL_DEBUG
> > -extern char *syscallnames[];
> > -#endif
> >  
> >  /* round up and down to page boundaries. */
> >  #define ELF_ROUND(a, b)(((a) + (b) - 1) & ~((b) - 1))
> > @@ -135,11 +132,7 @@ struct emul emul_elf = {
> > SYS_syscall,
> > SYS_MAXSYSCALL,
> > sysent,
> > -#ifdef SYSCALL_DEBUG
> > -   syscallnames,
> > -#else
> > NULL,
> > -#endif
> > (sizeof(AuxInfo) * ELF_AUX_ENTRIES / sizeof(char *)),
> > elf_copyargs,
> > setregs,
> > Index: kern/kern_xxx.c
> > ===
> > RCS file: /cvs/src/sys/kern/kern_xxx.c,v
> > retrieving revision 1.36
> > diff -u -p -r1.36 kern_xxx.c
> > --- kern/kern_xxx.c 2 Apr 2019 11:00:22 -   1.36
> > +++ kern/kern_xxx.c 30 Aug 2021 07:19:17 -
> > @@ -84,75 +84,3 @@ __stack_smash_handler(char func[], int d
> > panic("smashed stack in %s", func);
> >  }
> >  #endif
> > -
> > -#ifdef SYSCALL_DEBUG
> > -#include 
> > -
> > -#defineSCDEBUG_CALLS   0x0001  /* show calls */
> > -#defineSCDEBUG_RETURNS 0x0002  /* show returns */
> > -#defineSCDEBUG_ALL 0x0004  /* even syscalls that are 
> > implemented */
> > -#defineSCDEBUG_SHOWARGS0x0008  /* show arguments to calls */
> > -
> > -intscdebug = SCDEBUG_CALLS|SCDEBUG_RETURNS|SCDEBUG_SHOWARGS;
> > -
> > -void
> > -scdebug_call(struct proc *p, register_t code, const register_t args[])
> > -{
> > -   struct process *pr;
> > -   struct sysent *sy;
> > -   struct emul *em;
> > -   int i;
> > -
> > -   if (!(scdebug & SCDEBUG_CALLS))
> > -   return;
> > -
> > -   pr = p->p_p;
> > -   em = pr->ps_emul;
> > -   sy = >e_sysent[code];
> > -   if (!(scdebug & SCDEBUG_ALL || code < 0 || code >= em->e_nsysent ||
> > -sy->sy_call == sys_nosys))
> > -   return;
> > -
> > -   printf("proc %d (%s): %s num ", pr->ps_pid, pr->ps_comm, em->e_name);
> > -   if (code < 0 || code >= em->e_nsysent)
> > -   printf("OUT OF RANGE (%ld)", code);
> > -   else {
> > -   printf("%ld call: %s", code, em->e_syscallnames[code]);
> > -   if (scdebug & SCDEBUG_SHOWARGS) {
> > -   printf("(");
> > -   for (i = 0; i < sy->sy_argsize / sizeof(register_t);
> > -   i++)
> > -   printf("%s0x%lx", i == 0 ? "" : ", ", args[i]);
> > -   printf(")");
> > -   }
> > -   }
> > -   printf("\n");
> > -}
> > -
> > -void
> > -scdebug_ret(struct proc *p, register_t code, int error,
> > -const register_t retval[])
> > -{
> > -   struct process *pr;
> > -   struct sysent *sy;
> > -   struct emul *em;
> > -
> > -   if (!(scdebug & SCDEBUG_RETURNS))
> > -   return;
> > -
> > -   pr = p->p_p;
> > -   em = pr->ps_emul;
> > -   sy = >e_sysent[code];
> > -   if (!(scdebug & SCDEBUG_ALL || code < 0 || code >= em->e_nsysent ||
> > -   sy->sy_call == sys_nosys))
> > -   return;
> > -   
> > -   printf("proc %d (%s): %s num ", pr->ps_pid, pr->ps_comm, em->e_name);
> > -   if (code < 0 || code >= em->e_nsysent)
> > -   printf("OUT OF RANGE (%ld)", code);
> > -   else
> > -   printf("%ld ret: err = %d, rv = 0x%lx,0x%lx", code,
> > -   error, retval[0], retval[1]);
> > -   printf("\n");
> > -}
> > -#endif /* SYSCALL_DEBUG */
> > Index: kern/init_main.c
> > ===
> > RCS file: /cvs/src/sys/kern/init_main.c,v
> > retrieving revision 1.308
> > diff -u -p -r1.308 init_main.c
> > --- kern/init_main.c30 Jun 2021 12:21:02 -  1.308
> > +++ kern/init_main.c30 Aug 2021 07:17:55 -
> > @@ -155,9 +155,6 @@ voidpool_gc_pages(void *);
> > 

Re: arm64 rpi4 upgrade, "Failed to install bootblocks" at end

2021-08-29 Thread Mark Kettenis
> Date: Sun, 29 Aug 2021 11:20:06 +0100
> From: Stuart Henderson 
> 
> On 2021/08/28 22:28, Stuart Henderson wrote:
> > Spotted this at the end of a sysupgrade run. No issue with the reboot but
> > it doesn't look quite right, in particular the newfs_msdos is a bit scary.
> > 
> > [...]
> > Installing xshare70.tgz 100% |**|  4505 KB00:36 
> >
> > Installing xfont70.tgz  100% |**| 39344 KB01:47 
> >
> > Installing xserv70.tgz  100% |**| 12346 KB00:11 
> >
> > Location of sets? (disk http nfs or 'done') [done] done
> > Making all device nodes... done.
> > sh: /sbin/fsck_msdos: not found
> > newfs_msdos: /dev/rsd0i is mounted on /mnt/boot
> 
> Oh and I see why I hit it now, I have this from when I was trying various
> u-boot changes (to make it easier to update the files from the booted system).
> 
> $ grep boot /etc/fstab
> a3d7f93f60aec212.i /boot msdos rw,noatime 1 1

Yeah, that'll break things.  The missing fsck itself isn't fatal since
msdos filesystems don't have a bit to mark them clean and can always
be mounted rw even if they weren't unmounted cleanly.

Should installboot(8) handle the case where the filesystem is already
mounted?



Re: virtio(4): don't require legacy mode to have an I/O BAR

2021-08-23 Thread Mark Kettenis
> Date: Mon, 23 Aug 2021 17:53:06 +0200
> From: Patrick Wildt 
> 
> Hi,
> 
> so on the new Parallels version, when using the 'Other' OS setting,
> virtio(4) won't attach.  Apparently it's not Virtio 1.0, because even
> Fedora 34 falls back to the 'legacy' driver.
> 
> While our code expects (and requires) an I/O BAR, it seems to be that
> the PCI device only provides two memory BARs.
> 
> Linux still works, probably because they don't care about the type.
> So I figured, let's just do that as well.  With the following diff,
> virtio(4) attach es again, and I can install over vio(4).
> 
> I don't know if this violates any official virtio(4) spec, but on
> the other hand... it fixes as bug, makes it work, and just loosens
> up the requirement a little.
> 
> ok?

ok kettenis@

> diff --git a/sys/dev/pci/virtio_pci.c b/sys/dev/pci/virtio_pci.c
> index c99f50136cd..0a29293e16c 100644
> --- a/sys/dev/pci/virtio_pci.c
> +++ b/sys/dev/pci/virtio_pci.c
> @@ -508,7 +508,10 @@ int
>  virtio_pci_attach_09(struct virtio_pci_softc *sc, struct pci_attach_args *pa)
>  {
>   struct virtio_softc *vsc = >sc_sc;
> - if (pci_mapreg_map(pa, PCI_MAPREG_START, PCI_MAPREG_TYPE_IO, 0,
> + pcireg_t type;
> +
> + type = pci_mapreg_type(pa->pa_pc, pa->pa_tag, PCI_MAPREG_START);
> + if (pci_mapreg_map(pa, PCI_MAPREG_START, type, 0,
>   >sc_iot, >sc_ioh, NULL, >sc_iosize, 0)) {
>   printf("%s: can't map i/o space\n", vsc->sc_dev.dv_xname);
>   return EIO;
> 
> 



Re: [patch] traceroute timeouts

2021-08-20 Thread Mark Kettenis
> From: Florian Obser 
> Date: Fri, 20 Aug 2021 10:46:21 +0200
> 
> Makes sense to me, OK florian

Doesn't make sense to me.  The RTT for an ICMP packet can be a
significant part of a second (think Europe-Australia the wrong way
around cause that is where all the bandwidth is, or when satellites
are involved).  I think this means that a single dropped packet could
result in a failure to resolve one of the hops on such a path.

I don't necessarily object to giving folks the ammunition to shoot
themselves into the foot by dropping the minimum value to 1 second.
But the default should be larger I think.

> On 2021-08-19 23:47 -07,  wrote:
> > The default traceroute timeout of 5 seconds is excruciatingly long
> > when there are elements of the route that don't respond, and it
> > wasn't allowed to be set lower than 2 seconds.
> >
> > This changes the minimum to 1 second, matching FreeBSD, and also
> > makes that the default, which should be reasonable for the vast
> > majority of users today.
> >
> > The two awk files in this directory are two decades old, and
> > not installed anywhere they can be executed as part of a traceroute
> > pipeline; can they be removed? If the functionality is useful,
> > implementing mean/median reporting as a new option in C would be
> > straightforward.
> >
> > Index: usr.sbin/traceroute/traceroute.8
> > ===
> > RCS file: /cvs/src/usr.sbin/traceroute/traceroute.8,v
> > retrieving revision 1.69
> > diff -u -p -u -r1.69 traceroute.8
> > --- usr.sbin/traceroute/traceroute.811 Feb 2020 18:41:39 -  
> > 1.69
> > +++ usr.sbin/traceroute/traceroute.820 Aug 2021 06:33:30 -
> > @@ -201,7 +201,7 @@ and
> >  are listed.
> >  .It Fl w Ar waittime
> >  Set the time, in seconds, to wait for a response to a probe.
> > -The default is 5.
> > +The default is 1.
> >  .It Fl x
> >  Print the ICMP extended headers if available.
> >  This option is not available for IPv6.
> > Index: usr.sbin/traceroute/traceroute.c
> > ===
> > RCS file: /cvs/src/usr.sbin/traceroute/traceroute.c,v
> > retrieving revision 1.164
> > diff -u -p -u -r1.164 traceroute.c
> > --- usr.sbin/traceroute/traceroute.c12 Jul 2021 15:09:21 -  
> > 1.164
> > +++ usr.sbin/traceroute/traceroute.c20 Aug 2021 06:33:30 -
> > @@ -351,7 +351,7 @@ main(int argc, char *argv[])
> > rcvsock4 = rcvsock6 = sndsock4 = sndsock6 = -1;
> > v4sock_errno = v6sock_errno = 0;
> >  
> > -   conf->waittime = 5 * 1000;
> > +   conf->waittime = 1000;
> >  
> > if ((rcvsock6 = socket(AF_INET6, SOCK_RAW, IPPROTO_ICMPV6)) == -1)
> > v6sock_errno = errno;
> > @@ -554,9 +554,9 @@ main(int argc, char *argv[])
> > err(1, "setsockopt SO_RTABLE");
> > break;
> > case 'w':
> > -   conf->waittime = strtonum(optarg, 2, INT_MAX, );
> > +   conf->waittime = strtonum(optarg, 1, INT_MAX, );
> > if (errstr)
> > -   errx(1, "wait must be >1 sec.");
> > +   errx(1, "wait must be >=1 sec.");
> > conf->waittime *= 1000;
> > break;
> > case 'x':
> >
> >
> 
> -- 
> I'm not entirely sure you are real.
> 
> 



Re: ucc(4): consumer control keyboard device driver

2021-08-18 Thread Mark Kettenis
> Date: Tue, 17 Aug 2021 20:13:41 +0200
> From: Anton Lindqvist 
> 
> Hi,
> 
> Here's a new driver for USB HID Consumer Control keyboards. Such
> keyboard is a pseudo device which is used to expose audio and
> application launch keys. My prime motivation is to get the volume mute,
> increment and decrement keys to just work on my keyboard without the
> need to use usbhidaction(1).
> 
> ucc(4) attaches a wskbd(4) keyboard "on top" making it appear like an
> ordinary keyboard, which also makes it possible to inject key
> press/release input. It supports both translating and raw mode making it
> compatible with the ordinary console and X11.
> 
> My keyboard for instance exposes 42 keys in its input report. I only
> care about the volume and audio related ones and therefore only added
> mappings for those. Additional mappings should be trivial to add if
> desired.
> 
> Testing would be much appreciated.
> 
> Comments? OK?

So the downside of this is that you get a separate wskbd(4) device for
these.  This will be transparent for most users thanks to wsmux(4),
but it does mean that doing a multi-seat wscons setup becomes a little
bit more involved.  That's fine with me as I don't think that's an
important use case for OpenBSD.

This looks reasonable to me.


> diff --git share/man/man4/Makefile share/man/man4/Makefile
> index 6a0ecb20653..63b33660159 100644
> --- share/man/man4/Makefile
> +++ share/man/man4/Makefile
> @@ -84,7 +84,7 @@ MAN=aac.4 abcrtc.4 abl.4 ac97.4 acphy.4 acrtc.4 \
>   tlphy.4 thmc.4 tpm.4 tpmr.4 tqphy.4 trm.4 trunk.4 tsl.4 tty.4 \
>   tun.4 tap.4 twe.4 \
>   txp.4 txphy.4 uaudio.4 uark.4 uath.4 ubcmtp.4 uberry.4 ubsa.4 \
> - ubsec.4 ucom.4 uchcom.4 ucrcom.4 ucycom.4 ukspan.4 uslhcom.4 \
> + ubsec.4 ucc.4 ucom.4 uchcom.4 ucrcom.4 ucycom.4 ukspan.4 uslhcom.4 \
>   udav.4 udcf.4 udl.4 udp.4 udsbr.4 \
>   uftdi.4 ugen.4 ugl.4 ugold.4 uguru.4 uhci.4 uhid.4 uhidev.4 uhidpp.4 \
>   uipaq.4 ujoy.4 uk.4 ukbd.4 \
> diff --git share/man/man4/ucc.4 share/man/man4/ucc.4
> new file mode 100644
> index 000..413c88aa6af
> --- /dev/null
> +++ share/man/man4/ucc.4
> @@ -0,0 +1,45 @@
> +.\"  $OpenBSD$
> +.\"
> +.\" Copyright (c) 2021 Anton Lindqvist 
> +.\"
> +.\" Permission to use, copy, modify, and distribute this software for any
> +.\" purpose with or without fee is hereby granted, provided that the above
> +.\" copyright notice and this permission notice appear in all copies.
> +.\"
> +.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> +.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> +.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> +.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> +.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> +.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> +.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> +.\"
> +.Dd $Mdocdate$
> +.Dt UCC 4
> +.Os
> +.Sh NAME
> +.Nm ucc
> +.Nd Consumer Control keyboards
> +.Sh SYNOPSIS
> +.Cd "ucc* at uhidev?"
> +.Cd "wsbkd* at ucc? mux 1"
> +.Sh DESCRIPTION
> +The
> +.Nm
> +driver provides support for Consumer Control pseudo keyboards, often used to
> +expose audio and application launch keys.
> +.Sh SEE ALSO
> +.Xr intro 4 ,
> +.Xr uhidev 4 ,
> +.Xr usb 4 ,
> +.Xr wskbd 4
> +.Sh HISTORY
> +The
> +.Nm
> +driver first appeared in
> +.Ox 7.0 .
> +.Sh AUTHORS
> +The
> +.Nm
> +driver was written by
> +.An Anton Lindqvist Aq Mt an...@openbsd.org .
> diff --git share/man/man4/uhidev.4 share/man/man4/uhidev.4
> index 02252789a3f..d398c564bd5 100644
> --- share/man/man4/uhidev.4
> +++ share/man/man4/uhidev.4
> @@ -37,6 +37,7 @@
>  .Sh SYNOPSIS
>  .Cd "uhidev*  at uhub?"
>  .Cd "fido*at uhidev?"
> +.Cd "ucc* at uhidev?"
>  .Cd "ucycom*  at uhidev?"
>  .Cd "ugold*   at uhidev?"
>  .Cd "uhid*at uhidev?"
> @@ -72,6 +73,7 @@ only dispatches data to them based on the report id.
>  .Sh SEE ALSO
>  .Xr fido 4 ,
>  .Xr intro 4 ,
> +.Xr ucc 4 ,
>  .Xr ucycom 4 ,
>  .Xr ugold 4 ,
>  .Xr uhid 4 ,
> diff --git share/man/man4/usb.4 share/man/man4/usb.4
> index dad3d3a97d9..d159d8b27f3 100644
> --- share/man/man4/usb.4
> +++ share/man/man4/usb.4
> @@ -249,6 +249,8 @@ D-Link DSB-R100 USB radio device
>  FIDO/U2F security keys
>  .It Xr ubcmtp 4
>  Broadcom trackpad mouse
> +.It Xr ucc 4
> +USB Consumer Control keyboards
>  .It Xr ugold 4
>  TEMPer gold HID thermometer and hygrometer
>  .It Xr uhid 4
> diff --git sys/arch/alpha/conf/GENERIC sys/arch/alpha/conf/GENERIC
> index 8af652ce301..54d4a45cd4e 100644
> --- sys/arch/alpha/conf/GENERIC
> +++ sys/arch/alpha/conf/GENERIC
> @@ -107,6 +107,8 @@ uslhcom* at uhidev?   # Silicon Labs 
> CP2110 USB HID UART
>  ucom*at uslhcom?
>  uhid*at uhidev?  # USB generic HID support
>  fido*at uhidev?  # 

Re: remove efibind.h cruft

2021-07-26 Thread Mark Kettenis
> Date: Mon, 26 Jul 2021 18:45:31 +1000
> From: Jonathan Gray 
> 
> Follow what was done with riscv64 and replace efibind.h with just the
> defines we need.
> 
> Tested on armv7 arm64 and amd64 (bootx64).

ok kettenis@
 
> Index: sys/stand/efi/include/amd64/efibind.h
> ===
> RCS file: /cvs/src/sys/stand/efi/include/amd64/efibind.h,v
> retrieving revision 1.2
> diff -u -p -r1.2 efibind.h
> --- sys/stand/efi/include/amd64/efibind.h 4 Jun 2021 00:09:34 -   
> 1.2
> +++ sys/stand/efi/include/amd64/efibind.h 26 Jul 2021 05:39:34 -
> @@ -1,271 +1,22 @@
> -/* $FreeBSD: head/sys/boot/efi/include/amd64/efibind.h 279038 2015-02-20 
> 01:40:55Z imp $ */
> -/*++
> +/* Public Domain. */
>  
> -Copyright (c)  1999 - 2003 Intel Corporation. All rights reserved
> -This software and associated documentation (if any) is furnished
> -under a license and may only be used or copied in accordance
> -with the terms of the license. Except as permitted by such
> -license, no part of this software or documentation may be
> -reproduced, stored in a retrieval system, or transmitted in any
> -form or by any means without the express written consent of
> -Intel Corporation.
> -
> -Module Name:
> -
> -efefind.h
> -
> -Abstract:
> -
> -EFI to compile bindings
> -
> -
> -
> -
> -Revision History
> -
> ---*/
> -
> -#pragma pack()
> -
> -
> -#if defined(__FreeBSD__) || defined(__OpenBSD__)
>  #include 
> -#else
> -//
> -// Basic int types of various widths
> -//
> -
> -#if (__STDC_VERSION__ < 199901L )
> -
> -// No ANSI C 1999/2000 stdint.h integer width declarations
> -
> -#if _MSC_EXTENSIONS
> -
> -// Use Microsoft C compiler integer width declarations
> -
> -typedef unsigned __int64uint64_t;
> -typedef __int64 int64_t;
> -typedef unsigned __int32uint32_t;
> -typedef __int32 int32_t;
> -typedef unsigned short  uint16_t;
> -typedef short   int16_t;
> -typedef unsigned char   uint8_t;
> -typedef charint8_t;
> -#else
> -#ifdef UNIX_LP64
> -
> -// Use LP64 programming model from C_FLAGS for integer width 
> declarations
> -
> -typedef unsigned long   uint64_t;
> -typedef longint64_t;
> -typedef unsigned intuint32_t;
> -typedef int int32_t;
> -typedef unsigned short  uint16_t;
> -typedef short   int16_t;
> -typedef unsigned char   uint8_t;
> -typedef charint8_t;
> -#else
> -
> -// Assume P64 programming model from C_FLAGS for integer width 
> declarations
> -
> -typedef unsigned long long  uint64_t;
> -typedef long long   int64_t;
> -typedef unsigned intuint32_t;
> -typedef int int32_t;
> -typedef unsigned short  uint16_t;
> -typedef short   int16_t;
> -typedef unsigned char   uint8_t;
> -typedef charint8_t;
> -#endif
> -#endif
> -#endif
> -#endif   /* __FreeBSD__ || __OpenBSD__ */
> -
> -//
> -// Basic EFI types of various widths
> -//
> -
> -#ifndef ACPI_THREAD_ID   /* ACPI's definitions are fine */
> -#define ACPI_USE_SYSTEM_INTTYPES 1   /* Tell ACPI we've defined types */
> -
> -typedef uint64_t   UINT64;
> -typedef int64_tINT64;
> -
> -#ifndef _BASETSD_H_
> -typedef uint32_t   UINT32;
> -typedef int32_tINT32;
> -#endif
> -
> -typedef uint16_t   UINT16;
> -typedef int16_tINT16;
> -typedef uint8_tUINT8;
> -typedef int8_t INT8;
> -
> -#endif
> -
> -#undef VOID
> -#define VOIDvoid
> -
> -
> -typedef int64_tINTN;
> -typedef uint64_t   UINTN;
> -
> -#ifdef EFI_NT_EMULATOR
> -#define POST_CODE(_Data)
> -#else
> -#ifdef EFI_DEBUG
> -#define POST_CODE(_Data)__asm mov eax,(_Data) __asm out 0x80,al
> -#else
> -#define POST_CODE(_Data)
> -#endif
> -#endif
> -
> -#define EFIERR(a)   (0x8000 | a)
> -#define EFI_ERROR_MASK  0x8000
> -#define EFIERR_OEM(a)   (0xc000 | a)
> -
> -
> -#define BAD_POINTER 0xFBFBFBFBFBFBFBFB
> -#define MAX_ADDRESS 0x
> -
> -#define BREAKPOINT()__asm { int 3 }
> -
> -//
> -// Pointers must be aligned to these address to function
> -//
> -
> -#define MIN_ALIGNMENT_SIZE  4
> -
> -#define ALIGN_VARIABLE(Value ,Adjustment) \
> -(UINTN)Adjustment = 0; \
> -if((UINTN)Value % MIN_ALIGNMENT_SIZE) \
> -(UINTN)Adjustment = MIN_ALIGNMENT_SIZE - ((UINTN)Value % 
> MIN_ALIGNMENT_SIZE); \
> -Value = (UINTN)Value + (UINTN)Adjustment
> -
> -
> -//
> -// Define macros to build data structure 

Re: ahci(4): Add support for JMicron JMB585 chipset

2021-07-25 Thread Mark Kettenis
> Date: Sun, 25 Jul 2021 12:29:21 +0100
> From: Stuart Henderson 
> 
> On 2021/07/25 13:25, Mark Kettenis wrote:
> > > Date: Sun, 25 Jul 2021 12:08:09 +0100
> > > From: Stuart Henderson 
> > > 
> > > On 2021/07/25 14:55, Jonathan Matthew wrote:
> > > > On Thu, Jul 22, 2021 at 10:45:17PM -0400, Ashton Fagg wrote:
> > > > > I have two devices here based on the JMicron JMB585 chipset. This diff
> > > > > adds the required pcidev IDs and sets disables native command queuing 
> > > > > in
> > > > > the driver. FreeBSD does something similar for this device:
> > > > > 
> > > > > https://github.com/freebsd/freebsd-src/commit/16b766eed443043f4216d50e40ba283e74f992c2
> > > > 
> > > > Can you explain how you came to the conclusion that you'd need to
> > > > disable NCQ?  The FreeBSD commit you link to doesn't appear to do that
> > > > as they're not applying the AHCI_Q_NONCQ flag to these devices.
> > > > Does it not work with NCQ enabled?
> > > > 
> > > 
> > > That FreeBSD commit prevents using their "hw.ahci.force" tunable on the
> > > device, it's used for attaching as AHCI to certain known chips even if
> > > they're set in legacy IDE mode.
> > > 
> > > Does it work to just add the vid/pid to the ahci_devices[] array
> > > without a specific attach function? (like 
> > > PCI_PRODUCT_ASMEDIA_ASM1061_SATA).
> > 
> > Hmm, that suggests that the right fix might actually be to add
> > pciide(4) on riscv64.
> 
> The FreeBSD commit is "do not allow hw.ahci.force to work with this
> device" so kind-of the opposite of that.

Actually, the commit doesn't set the AHCI_Q_NOFORCE flag for this
particular JMicron device, which seems to indicate that they do allow
hw.ahci.force to work with this device.  And that means that adding
support for it in ahci(4) is fine (and given the 64-bit DMA issue, the
most desirable option).



Re: ahci(4): Add support for JMicron JMB585 chipset

2021-07-25 Thread Mark Kettenis
> Date: Sun, 25 Jul 2021 13:25:49 +0200 (CEST)
> From: Mark Kettenis 
> 
> > Date: Sun, 25 Jul 2021 12:08:09 +0100
> > From: Stuart Henderson 
> > 
> > On 2021/07/25 14:55, Jonathan Matthew wrote:
> > > On Thu, Jul 22, 2021 at 10:45:17PM -0400, Ashton Fagg wrote:
> > > > I have two devices here based on the JMicron JMB585 chipset. This diff
> > > > adds the required pcidev IDs and sets disables native command queuing in
> > > > the driver. FreeBSD does something similar for this device:
> > > > 
> > > > https://github.com/freebsd/freebsd-src/commit/16b766eed443043f4216d50e40ba283e74f992c2
> > > 
> > > Can you explain how you came to the conclusion that you'd need to
> > > disable NCQ?  The FreeBSD commit you link to doesn't appear to do that
> > > as they're not applying the AHCI_Q_NONCQ flag to these devices.
> > > Does it not work with NCQ enabled?
> > > 
> > 
> > That FreeBSD commit prevents using their "hw.ahci.force" tunable on the
> > device, it's used for attaching as AHCI to certain known chips even if
> > they're set in legacy IDE mode.
> > 
> > Does it work to just add the vid/pid to the ahci_devices[] array
> > without a specific attach function? (like PCI_PRODUCT_ASMEDIA_ASM1061_SATA).
> 
> Hmm, that suggests that the right fix might actually be to add
> pciide(4) on riscv64.

However, I'm not sure if we want to do that since legacy IDE mode
doesn't support 64-bit DMA.



Re: ahci(4): Add support for JMicron JMB585 chipset

2021-07-25 Thread Mark Kettenis
> Date: Sun, 25 Jul 2021 12:08:09 +0100
> From: Stuart Henderson 
> 
> On 2021/07/25 14:55, Jonathan Matthew wrote:
> > On Thu, Jul 22, 2021 at 10:45:17PM -0400, Ashton Fagg wrote:
> > > I have two devices here based on the JMicron JMB585 chipset. This diff
> > > adds the required pcidev IDs and sets disables native command queuing in
> > > the driver. FreeBSD does something similar for this device:
> > > 
> > > https://github.com/freebsd/freebsd-src/commit/16b766eed443043f4216d50e40ba283e74f992c2
> > 
> > Can you explain how you came to the conclusion that you'd need to
> > disable NCQ?  The FreeBSD commit you link to doesn't appear to do that
> > as they're not applying the AHCI_Q_NONCQ flag to these devices.
> > Does it not work with NCQ enabled?
> > 
> 
> That FreeBSD commit prevents using their "hw.ahci.force" tunable on the
> device, it's used for attaching as AHCI to certain known chips even if
> they're set in legacy IDE mode.
> 
> Does it work to just add the vid/pid to the ahci_devices[] array
> without a specific attach function? (like PCI_PRODUCT_ASMEDIA_ASM1061_SATA).

Hmm, that suggests that the right fix might actually be to add
pciide(4) on riscv64.



Re: usertc: small consistency tweaks (was: Re: riscv64 usertc)

2021-07-25 Thread Mark Kettenis
> From: Jeremie Courreges-Anglas 
> Date: Sun, 25 Jul 2021 04:31:20 +0200
> 
> On Sat, Jul 24 2021, Mark Kettenis  wrote:
> >> From: Jeremie Courreges-Anglas 
> >> Date: Sat, 24 Jul 2021 21:22:23 +0200
> >> 
> >> hifive /usr/src/regress/sys/kern/gettimeofday$ doas -u build time 
> >> obj/gettimeofday
> >> 6.64 real 6.63 user 0.02 sys
> >> hifive /usr/src/regress/sys/kern/gettimeofday$ doas -u build env 
> >> LIBC_NOUSERTC=1 time obj/gettimeofday
> >> 6.48 real 0.60 user 5.42 sys
> >> 
> >> Initially I thought that a more descriptive name than TC_TB could be
> >> helpful (TC_TIMEBASE?).  But since powerpc also uses TC_TB I think it's
> >> fine as a first step.  We can change it later easily, it's just a define
> >> name.
> >> 
> >> I haven't even built a release with this, not sure it's worth it.
> >> If you have cpu cycles to spare, please say so.
> >> 
> >> ok?
> >
> > Two small nits below.  With that fixed, ok kettenis@
> 
> [...]
> 
> >> Index: lib/libc/arch/riscv64/gen/usertc.c
> >> ===
> >> RCS file: /cvs/src/lib/libc/arch/riscv64/gen/usertc.c,v
> >> retrieving revision 1.1
> >> diff -u -p -r1.1 usertc.c
> >> --- lib/libc/arch/riscv64/gen/usertc.c 29 Apr 2021 18:33:36 -  
> >> 1.1
> >> +++ lib/libc/arch/riscv64/gen/usertc.c 24 Jul 2021 17:07:01 -
> >> @@ -1,6 +1,7 @@
> >>  /*$OpenBSD: usertc.c,v 1.1 2021/04/29 18:33:36 drahn Exp $
> >> */
> >>  /*
> >>   * Copyright (c) 2020 Paul Irofti 
> >> + * Copyright (c) 2021 Jeremie Courreges-Anglas 
> >>   *
> >>   * Permission to use, copy, modify, and distribute this software for any
> >>   * purpose with or without fee is hereby granted, provided that the above
> >> @@ -18,4 +19,24 @@
> >>  #include 
> >>  #include 
> >>  
> >> -int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
> >> +static inline u_int
> >> +rdtime(void)
> >> +{
> >> +  uint64_t ret;
> >> +  asm volatile("rdtime %0" : "=r"(ret));
> >
> > Can you make that __asm vol[a]tile?
> 
> Done.  I copied that from amd64 usertc.c.
> 
> >> +  return ret & 0x;
> >
> > The & 0x isn't really necessary here and the kernel doesn't do
> > it.  So I'd drop that bit and simply return ret.
> 
> I thought I would make it explicit to the reader that we only cared
> about the low 32 bits, rather then rely on the implicit truncation.
> Your nit was about consistency, what about trying to make other
> implementations consistent?  Or should we make it explicit on other
> archs?
> 
> Two changes, only compile-tested on amd64 and sparc64:
> - asm/__asm__ -> __asm
> - val & mask -> val
> 
> I can also drop this diff, consistency is good but so is time on our
> hands.

So the whole idea was to minimize the diffs between the kernel and
userland implementation of the tc_get_timecount() functions.  On some
of the architectures that isn't entirely feasable so there are some
differences.  But it seems you realized this ;).

Masking is in general unnecessary as the generic timecounter code
(kernel and userland) already does the masking.  However, there are
exceptions.
 
> Index: lib/libc/arch/aarch64/gen/usertc.c
> ===
> RCS file: /d/cvs/src/lib/libc/arch/aarch64/gen/usertc.c,v
> retrieving revision 1.2
> diff -u -p -r1.2 usertc.c
> --- lib/libc/arch/aarch64/gen/usertc.c15 Jul 2020 22:58:33 -  
> 1.2
> +++ lib/libc/arch/aarch64/gen/usertc.c24 Jul 2021 23:45:52 -
> @@ -29,7 +29,7 @@ agtimer_get_timecount(struct timecounter
>*/
>   __asm volatile("isb" ::: "memory");
>   __asm volatile("mrs %x0, CNTVCT_EL0" : "=r" (val));
> - return (val & 0x);
> + return val;
>  }

The masking here is deliberate as without the masking the errata
mentioned in the comment comes into play.  So please drop this.

>  static int
> Index: lib/libc/arch/amd64/gen/usertc.c
> ===
> RCS file: /d/cvs/src/lib/libc/arch/amd64/gen/usertc.c,v
> retrieving revision 1.3
> diff -u -p -r1.3 usertc.c
> --- lib/libc/arch/amd64/gen/usertc.c  23 Aug 2020 21:38:47 -  1.3
> +++ lib/libc/arch/amd64/gen/usertc.c  24 Jul 2021 2

Re: riscv64 usertc

2021-07-24 Thread Mark Kettenis
> From: Jeremie Courreges-Anglas 
> Date: Sat, 24 Jul 2021 21:22:23 +0200
> 
> hifive /usr/src/regress/sys/kern/gettimeofday$ doas -u build time 
> obj/gettimeofday
> 6.64 real 6.63 user 0.02 sys
> hifive /usr/src/regress/sys/kern/gettimeofday$ doas -u build env 
> LIBC_NOUSERTC=1 time obj/gettimeofday
> 6.48 real 0.60 user 5.42 sys
> 
> Initially I thought that a more descriptive name than TC_TB could be
> helpful (TC_TIMEBASE?).  But since powerpc also uses TC_TB I think it's
> fine as a first step.  We can change it later easily, it's just a define
> name.
> 
> I haven't even built a release with this, not sure it's worth it.
> If you have cpu cycles to spare, please say so.
> 
> ok?

Two small nits below.  With that fixed, ok kettenis@

> Index: sys/arch/riscv64/include/timetc.h
> ===
> RCS file: /cvs/src/sys/arch/riscv64/include/timetc.h,v
> retrieving revision 1.2
> diff -u -p -r1.2 timetc.h
> --- sys/arch/riscv64/include/timetc.h 12 May 2021 01:20:52 -  1.2
> +++ sys/arch/riscv64/include/timetc.h 23 Jul 2021 13:30:08 -
> @@ -19,5 +19,6 @@
>  #ifndef _MACHINE_TIMETC_H_
>  #define _MACHINE_TIMETC_H_
>  
> +#define TC_TB1
>  
>  #endif   /* _MACHINE_TIMETC_H_ */
> Index: sys/arch/riscv64/riscv64/clock.c
> ===
> RCS file: /cvs/src/sys/arch/riscv64/riscv64/clock.c,v
> retrieving revision 1.2
> diff -u -p -r1.2 clock.c
> --- sys/arch/riscv64/riscv64/clock.c  21 Jun 2021 15:19:39 -  1.2
> +++ sys/arch/riscv64/riscv64/clock.c  23 Jul 2021 13:29:42 -
> @@ -47,6 +47,7 @@ static struct timecounter tb_timecounter
>   .tc_name = "tb",
>   .tc_quality = 0,
>   .tc_priv = NULL,
> + .tc_user = TC_TB,
>  };
>  
>  void cpu_startclock(void);
> Index: lib/libc/arch/riscv64/gen/usertc.c
> ===
> RCS file: /cvs/src/lib/libc/arch/riscv64/gen/usertc.c,v
> retrieving revision 1.1
> diff -u -p -r1.1 usertc.c
> --- lib/libc/arch/riscv64/gen/usertc.c29 Apr 2021 18:33:36 -  
> 1.1
> +++ lib/libc/arch/riscv64/gen/usertc.c24 Jul 2021 17:07:01 -
> @@ -1,6 +1,7 @@
>  /*   $OpenBSD: usertc.c,v 1.1 2021/04/29 18:33:36 drahn Exp $*/
>  /*
>   * Copyright (c) 2020 Paul Irofti 
> + * Copyright (c) 2021 Jeremie Courreges-Anglas 
>   *
>   * Permission to use, copy, modify, and distribute this software for any
>   * purpose with or without fee is hereby granted, provided that the above
> @@ -18,4 +19,24 @@
>  #include 
>  #include 
>  
> -int (*const _tc_get_timecount)(struct timekeep *, u_int *) = NULL;
> +static inline u_int
> +rdtime(void)
> +{
> + uint64_t ret;
> + asm volatile("rdtime %0" : "=r"(ret));

Can you make that __asm voltile?

> + return ret & 0x;

The & 0x isn't really necessary here and the kernel doesn't do
it.  So I'd drop that bit and simply return ret.

> +}
> +
> +static int
> +tc_get_timecount(struct timekeep *tk, u_int *tc)
> +{
> + switch (tk->tk_user) {
> + case TC_TB:
> + *tc = rdtime();
> + return 0;
> + }
> +
> + return -1;
> +}
> +
> +int (*const _tc_get_timecount)(struct timekeep *, u_int *) = 
> tc_get_timecount;
> 
> 
> -- 
> jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE
> 
> 



Re: readelf(1) on riscv64: pretty-print machine name

2021-07-24 Thread Mark Kettenis
> From: Jeremie Courreges-Anglas 
> Date: Sat, 24 Jul 2021 18:17:18 +0200
> 
> Before:
>  Machine:   : f3
> After:
>  Machine:   RISC-V
> 
> ok?

sure

> Index: binutils/readelf.c
> ===
> RCS file: /cvs/src/gnu/usr.bin/binutils-2.17/binutils/readelf.c,v
> retrieving revision 1.22
> diff -u -p -r1.22 readelf.c
> --- binutils/readelf.c3 May 2021 08:53:25 -   1.22
> +++ binutils/readelf.c24 Jul 2021 16:06:13 -
> @@ -1733,6 +1733,7 @@ get_machine_name (unsigned e_machine)
>  case EM_ALTERA_NIOS2:return "Altera Nios II";
>  case EM_XC16X:   return "Infineon Technologies xc16x";
>  case EM_AARCH64: return "AArch64";
> +case EM_RISCV:   return "RISC-V";
>  default:
>snprintf (buff, sizeof (buff), _(": %x"), e_machine);
>return buff;
> 
> 
> -- 
> jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE
> 
> 



Re: riscv64: slightly optimized copyin/copyout/kcopy

2021-07-23 Thread Mark Kettenis
> From: Jeremie Courreges-Anglas 
> Date: Fri, 23 Jul 2021 13:54:31 +0200
> 
> On Fri, Jul 23 2021, Mark Kettenis  wrote:
> >> From: Jeremie Courreges-Anglas 
> >> Date: Fri, 23 Jul 2021 11:54:51 +0200
> >> Content-Type: text/plain
> >> 
> >> 
> >> I've been using a variation of this diff on my hifive unmatched since
> >> a few days.  The goal is to at least optimize the aligned cases by using
> >> 8 or 4 bytes loads/stores.  On this hifive unmatched, I found that
> >> unaligned 8 or 4 bytes loads/stores loops are utterly slow, much slower
> >> than equivalent 1 byte loads/stores (say 40x slower).
> >> 
> >> This improves eg i/o throughput and shaves off between 10 and 15s out of
> >> a total 11m30s in ''make clean; make -j4'' kernel builds.
> >> 
> >> I have another diff that tries to re-align initially unaligned addresses
> >> if possible but it's uglier and it's hard to tell whether it makes any
> >> difference in real life.
> >> 
> >> ok?
> >> 
> >> 
> >> Index: copy.S
> >> ===
> >> RCS file: /d/cvs/src/sys/arch/riscv64/riscv64/copy.S,v
> >> retrieving revision 1.6
> >> diff -u -p -p -u -r1.6 copy.S
> >> --- copy.S 28 Jun 2021 18:53:10 -  1.6
> >> +++ copy.S 23 Jul 2021 07:45:16 -
> >> @@ -49,8 +49,38 @@ ENTRY(copyin)
> >>SWAP_FAULT_HANDLER(a3, a4, a5)
> >>ENTER_USER_ACCESS(a4)
> >>  
> >> -// XXX optimize?
> >>  .Lcopyio:
> >> +.Lcopy8:
> >> +  li  a5, 8
> >> +  bltua2, a5, .Lcopy4
> >> +
> >> +  or  a7, a0, a1
> >> +  andia7, a7, 7
> >> +  bneza7, .Lcopy4
> >> +
> >> +1:ld  a4, 0(a0)
> >> +  addia0, a0, 8
> >> +  sd  a4, 0(a1)
> >> +  addia1, a1, 8
> >> +  addia2, a2, -8
> >> +  bgtua2, a5, 1b
> >
> > Shouldn't this be
> >
> > bgeua2, a5, 1b
> 
> Yes, that's better ideed, thanks!  Updated diff.

ok kettenis@

> Index: copy.S
> ===
> RCS file: /d/cvs/src/sys/arch/riscv64/riscv64/copy.S,v
> retrieving revision 1.6
> diff -u -p -p -u -r1.6 copy.S
> --- copy.S28 Jun 2021 18:53:10 -  1.6
> +++ copy.S23 Jul 2021 11:52:54 -
> @@ -49,8 +49,38 @@ ENTRY(copyin)
>   SWAP_FAULT_HANDLER(a3, a4, a5)
>   ENTER_USER_ACCESS(a4)
>  
> -// XXX optimize?
>  .Lcopyio:
> +.Lcopy8:
> + li  a5, 8
> + bltua2, a5, .Lcopy4
> +
> + or  a7, a0, a1
> + andia7, a7, 7
> + bneza7, .Lcopy4
> +
> +1:   ld  a4, 0(a0)
> + addia0, a0, 8
> + sd  a4, 0(a1)
> + addia1, a1, 8
> + addia2, a2, -8
> + bgeua2, a5, 1b
> +
> +.Lcopy4:
> + li  a5, 4
> + bltua2, a5, .Lcopy1
> +
> + andia7, a7, 3
> + bneza7, .Lcopy1
> +
> +1:   lw  a4, 0(a0)
> + addia0, a0, 4
> + sw  a4, 0(a1)
> + addia1, a1, 4
> + addia2, a2, -4
> + bgeua2, a5, 1b
> +
> +.Lcopy1:
> + beqza2, .Lcopy0
>  1:   lb  a4, 0(a0)
>   addia0, a0, 1
>   sb  a4, 0(a1)
> @@ -58,6 +88,7 @@ ENTRY(copyin)
>   addia2, a2, -1
>   bneza2, 1b
>  
> +.Lcopy0:
>   EXIT_USER_ACCESS(a4)
>   SET_FAULT_HANDLER(a3, a4)
>  .Lcopyiodone:
> 
> 
> -- 
> jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE
> 



Re: riscv64: slightly optimized copyin/copyout/kcopy

2021-07-23 Thread Mark Kettenis
> From: Jeremie Courreges-Anglas 
> Date: Fri, 23 Jul 2021 11:54:51 +0200
> Content-Type: text/plain
> 
> 
> I've been using a variation of this diff on my hifive unmatched since
> a few days.  The goal is to at least optimize the aligned cases by using
> 8 or 4 bytes loads/stores.  On this hifive unmatched, I found that
> unaligned 8 or 4 bytes loads/stores loops are utterly slow, much slower
> than equivalent 1 byte loads/stores (say 40x slower).
> 
> This improves eg i/o throughput and shaves off between 10 and 15s out of
> a total 11m30s in ''make clean; make -j4'' kernel builds.
> 
> I have another diff that tries to re-align initially unaligned addresses
> if possible but it's uglier and it's hard to tell whether it makes any
> difference in real life.
> 
> ok?
> 
> 
> Index: copy.S
> ===
> RCS file: /d/cvs/src/sys/arch/riscv64/riscv64/copy.S,v
> retrieving revision 1.6
> diff -u -p -p -u -r1.6 copy.S
> --- copy.S28 Jun 2021 18:53:10 -  1.6
> +++ copy.S23 Jul 2021 07:45:16 -
> @@ -49,8 +49,38 @@ ENTRY(copyin)
>   SWAP_FAULT_HANDLER(a3, a4, a5)
>   ENTER_USER_ACCESS(a4)
>  
> -// XXX optimize?
>  .Lcopyio:
> +.Lcopy8:
> + li  a5, 8
> + bltua2, a5, .Lcopy4
> +
> + or  a7, a0, a1
> + andia7, a7, 7
> + bneza7, .Lcopy4
> +
> +1:   ld  a4, 0(a0)
> + addia0, a0, 8
> + sd  a4, 0(a1)
> + addia1, a1, 8
> + addia2, a2, -8
> + bgtua2, a5, 1b

Shouldn't this be

bgeua2, a5, 1b

> +
> +.Lcopy4:
> + li  a5, 4
> + bltua2, a5, .Lcopy1
> +
> + andia7, a7, 3
> + bneza7, .Lcopy1
> +
> +1:   lw  a4, 0(a0)
> + addia0, a0, 4
> + sw  a4, 0(a1)
> + addia1, a1, 4
> + addia2, a2, -4
> + bgtua2, a5, 1b

Same here?

> +
> +.Lcopy1:
> + beqza2, .Lcopy0
>  1:   lb  a4, 0(a0)
>   addia0, a0, 1
>   sb  a4, 0(a1)
> @@ -58,6 +88,7 @@ ENTRY(copyin)
>   addia2, a2, -1
>   bneza2, 1b
>  
> +.Lcopy0:
>   EXIT_USER_ACCESS(a4)
>   SET_FAULT_HANDLER(a3, a4)
>  .Lcopyiodone:
> 
> 
> -- 
> jca | PGP : 0x1524E7EE / 5135 92C1 AD36 5293 2BDF  DDCC 0DFA 74AE 1524 E7EE
> 
> 



Re: ix(4)/riscv64: Make ix(4) work when MSI-X interrupts aren't available

2021-07-21 Thread Mark Kettenis
> Date: Wed, 21 Jul 2021 15:15:11 +1000
> From: Jonathan Matthew 
> 
> On Tue, Jul 20, 2021 at 02:21:39PM +0200, Mark Kettenis wrote:
> > > Date: Tue, 20 Jul 2021 21:55:56 +1000
> > > From: Jonathan Matthew 
> > > 
> > > On Mon, Jul 19, 2021 at 07:37:10PM -0400, Ashton Fagg wrote:
> > > > I have an Intel 82599 10 gigabit ethernet card I wanted to get working
> > > > on my SiFive Unmatched board.
> > > > 
> > > > I found the ix(4) driver has some weirdness around MSI-X
> > > > interrupts. While the driver supports operating both with and without
> > > > MSI-X support, it's hard-coded via a flag rather than dynamically 
> > > > checking
> > > > if it's available. If the flag is set (which it always is right now),
> > > > but MSI-X isn't available, the driver will throw an error and the device
> > > > won't work:
> > > > 
> > > > ix0 at pci7 dev 0 function 0 "Intel 82599" rev 0x01ixgbe_allocate_msix: 
> > > > pci_intr_map_msix vec 0 failed
> > > > 
> > > > The root cause is this call failing in if_ix.c:
> > > > 
> > > > if (pci_intr_map_msix(pa, i, )) {
> > > > printf("ixgbe_allocate_msix: "
> > > > "pci_intr_map_msix vec %d failed\n", i);
> > > > error = ENOMEM;
> > > > goto fail;
> > > > }
> > > > 
> > > > 
> > > > Because in _pci_intr_map_msix (in sys/arch/riscv64/dev/pci_machdep.c):
> > > > 
> > > > if ((pa->pa_flags & PCI_FLAGS_MSI_ENABLED) == 0 ||
> > > > pci_get_capability(pc, tag, PCI_CAP_MSI, NULL, NULL) == 0)
> > > > return -1;
> > > > 
> > > > The PCI attach flags would not have PCI_FLAGS_MSI_ENABLED set.
> > > > 
> > > > The following diff remedies that by checking if PCI_FLAGS_MSI_ENABLED is
> > > > actually set, rather than just trying and failing because the hard-coded
> > > > flag says so. It also enables ix(4) in the kernel config for
> > > > riscv64. Effectively, the driver will now only try to use MSI-X if the
> > > > machine is advertising it to be available.
> > > 
> > > I'd rather not have to do this in every driver.  We otherwise check that 
> > > flag
> > > inside the pci interrupt functions rather than in the driver code, so we
> > > should do so in pci_intr_msix_count() too, since that's what we call in
> > > multi-queue nic drivers to decide whether to use MSI-X.  Drivers that only
> > > want a single vector will just call pci_intr_map_msix() and fall back to 
> > > MSI
> > > or legacy interrupts if that fails.
> > > 
> > > I posted the alternate version of this diff to misc@ a few days ago,
> > > which repeats the checks used to set PCI_FLAGS_MSI_ENABLED in
> > > pci_intr_msix_count(), rather than passing in struct
> > > pci_attach_args, in case we prefer to do it that way.
> > 
> > I don't really read misc@, so don't post your patches there.
> 
> Right, it was just there for testing.
> 
> > 
> > > Mark, what do you think?
> > 
> > Yeah, making pci_intr_msix_count() should return 0 if MSIs are not
> > supported.  A bit strange though to pass both pa and pa->pa_tag.  I'd
> > change the function to only take pa as an argument.
> 
> Yes, on second look that makes sense.  Here's a better diff with that change,
> and that also doesn't break arches without __HAVE_PCI_MSIX.  ok?

ok kettenis@

> Index: if_bnxt.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_bnxt.c,v
> retrieving revision 1.32
> diff -u -p -u -p -r1.32 if_bnxt.c
> --- if_bnxt.c 24 Apr 2021 09:37:46 -  1.32
> +++ if_bnxt.c 21 Jul 2021 03:24:44 -
> @@ -537,7 +537,7 @@ bnxt_attach(struct device *parent, struc
>   sc->sc_flags |= BNXT_FLAG_MSIX;
>   intrstr = pci_intr_string(sc->sc_pc, ih);
>  
> - nmsix = pci_intr_msix_count(pa->pa_pc, pa->pa_tag);
> + nmsix = pci_intr_msix_count(pa);
>   if (nmsix > 1) {
>   sc->sc_ih = pci_intr_establish(sc->sc_pc, ih,
>   IPL_NET | IPL_MPSAFE, bnxt_admin_intr, sc, 
> DEVNAME(sc));
> Index: if_ix.c
> =

Re: ix(4)/riscv64: Make ix(4) work when MSI-X interrupts aren't available

2021-07-20 Thread Mark Kettenis
> Date: Tue, 20 Jul 2021 21:55:56 +1000
> From: Jonathan Matthew 
> 
> On Mon, Jul 19, 2021 at 07:37:10PM -0400, Ashton Fagg wrote:
> > I have an Intel 82599 10 gigabit ethernet card I wanted to get working
> > on my SiFive Unmatched board.
> > 
> > I found the ix(4) driver has some weirdness around MSI-X
> > interrupts. While the driver supports operating both with and without
> > MSI-X support, it's hard-coded via a flag rather than dynamically checking
> > if it's available. If the flag is set (which it always is right now),
> > but MSI-X isn't available, the driver will throw an error and the device
> > won't work:
> > 
> > ix0 at pci7 dev 0 function 0 "Intel 82599" rev 0x01ixgbe_allocate_msix: 
> > pci_intr_map_msix vec 0 failed
> > 
> > The root cause is this call failing in if_ix.c:
> > 
> > if (pci_intr_map_msix(pa, i, )) {
> > printf("ixgbe_allocate_msix: "
> > "pci_intr_map_msix vec %d failed\n", i);
> > error = ENOMEM;
> > goto fail;
> > }
> > 
> > 
> > Because in _pci_intr_map_msix (in sys/arch/riscv64/dev/pci_machdep.c):
> > 
> > if ((pa->pa_flags & PCI_FLAGS_MSI_ENABLED) == 0 ||
> > pci_get_capability(pc, tag, PCI_CAP_MSI, NULL, NULL) == 0)
> > return -1;
> > 
> > The PCI attach flags would not have PCI_FLAGS_MSI_ENABLED set.
> > 
> > The following diff remedies that by checking if PCI_FLAGS_MSI_ENABLED is
> > actually set, rather than just trying and failing because the hard-coded
> > flag says so. It also enables ix(4) in the kernel config for
> > riscv64. Effectively, the driver will now only try to use MSI-X if the
> > machine is advertising it to be available.
> 
> I'd rather not have to do this in every driver.  We otherwise check that flag
> inside the pci interrupt functions rather than in the driver code, so we
> should do so in pci_intr_msix_count() too, since that's what we call in
> multi-queue nic drivers to decide whether to use MSI-X.  Drivers that only
> want a single vector will just call pci_intr_map_msix() and fall back to MSI
> or legacy interrupts if that fails.
> 
> I posted the alternate version of this diff to misc@ a few days ago,
> which repeats the checks used to set PCI_FLAGS_MSI_ENABLED in
> pci_intr_msix_count(), rather than passing in struct
> pci_attach_args, in case we prefer to do it that way.

I don't really read misc@, so don't post your patches there.

> Mark, what do you think?

Yeah, making pci_intr_msix_count() should return 0 if MSIs are not
supported.  A bit strange though to pass both pa and pa->pa_tag.  I'd
change the function to only take pa as an argument.

> Index: if_bnxt.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_bnxt.c,v
> retrieving revision 1.32
> diff -u -p -u -p -r1.32 if_bnxt.c
> --- if_bnxt.c 24 Apr 2021 09:37:46 -  1.32
> +++ if_bnxt.c 20 Jul 2021 11:23:22 -
> @@ -537,7 +537,7 @@ bnxt_attach(struct device *parent, struc
>   sc->sc_flags |= BNXT_FLAG_MSIX;
>   intrstr = pci_intr_string(sc->sc_pc, ih);
>  
> - nmsix = pci_intr_msix_count(pa->pa_pc, pa->pa_tag);
> + nmsix = pci_intr_msix_count(pa, pa->pa_tag);
>   if (nmsix > 1) {
>   sc->sc_ih = pci_intr_establish(sc->sc_pc, ih,
>   IPL_NET | IPL_MPSAFE, bnxt_admin_intr, sc, 
> DEVNAME(sc));
> Index: if_ix.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_ix.c,v
> retrieving revision 1.178
> diff -u -p -u -p -r1.178 if_ix.c
> --- if_ix.c   22 Dec 2020 23:25:37 -  1.178
> +++ if_ix.c   20 Jul 2021 11:23:22 -
> @@ -1783,7 +1783,7 @@ ixgbe_setup_msix(struct ix_softc *sc)
>   if (!ixgbe_enable_msix)
>   return;
>  
> - nmsix = pci_intr_msix_count(pa->pa_pc, pa->pa_tag);
> + nmsix = pci_intr_msix_count(pa, pa->pa_tag);
>   if (nmsix <= 1)
>   return;
>  
> Index: if_ixl.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_ixl.c,v
> retrieving revision 1.74
> diff -u -p -u -p -r1.74 if_ixl.c
> --- if_ixl.c  26 Mar 2021 08:02:34 -  1.74
> +++ if_ixl.c  20 Jul 2021 11:23:22 -
> @@ -1795,7 +1795,7 @@ ixl_attach(struct device *parent, struct
>   }
>  
>   if (pci_intr_map_msix(pa, 0, >sc_ih) == 0) {
> - int nmsix = pci_intr_msix_count(pa->pa_pc, pa->pa_tag);
> + int nmsix = pci_intr_msix_count(pa, pa->pa_tag);
>   if (nmsix > 1) { /* we used 1 (the 0th) for the adminq */
>   nmsix--;
>  
> Index: if_mcx.c
> ===
> RCS file: /cvs/src/sys/dev/pci/if_mcx.c,v
> retrieving revision 1.101
> diff -u -p -u -p -r1.101 if_mcx.c
> --- if_mcx.c  2 Jun 2021 19:16:11 -   1.101
> +++ if_mcx.c  20 Jul 

Re: Sync dwctwo(4) with NetBSD

2021-07-14 Thread Mark Kettenis
> Date: Wed, 14 Jul 2021 12:20:08 +0200
> From: Marcus Glocker 
> 
> On Wed, 14 Jul 2021 11:53:50 +0200 (CEST)
> Mark Kettenis  wrote:
> 
> > > Date: Mon, 12 Jul 2021 11:56:28 +1000
> > > From: Jonathan Gray 
> > > 
> > > On Sun, Jul 11, 2021 at 05:55:14PM +0200, Marcus Glocker wrote:  
> > > > dwctwo(4) on the Raspberry Pi 3 has some issues today.  Basically
> > > > uhub2 doesn't work which shows in:
> > > > 
> > > > - mue(4) doesn't attach on uhub2 port 1.
> > > > - Removable devices don't attach on uhub2 port 2 and 3.  
> > > 
> > > on the 3b+ you mean, smsc(4) is fine on the 3b
> > >   
> > > > 
> > > > On NetBSD this works fine, but looking for specific fixes in the
> > > > NetBSD code base shows quickly that we have grown a big gap
> > > > between both code bases.  This makes syncing back specific fixes,
> > > > if you can identify them at all, very difficult, if not
> > > > impossible.
> > > > 
> > > > Therefore I took the path of syncing the entire NetBSD code base
> > > > with ours.  This fixes the above issues on my Raspberry Pi 3
> > > > which is kind of nice, especially since we can use the integrated
> > > > mue(4) Ethernet controller by default now.
> > > > 
> > > > I decided to keep the list_* queue macros from Linux, and use the
> > > > Linux headers which are already included in our code base for
> > > > drm(4).  This just makes future syncing easier.  
> > > 
> > > You should not do this.  They exist because there are millions of
> > > lines of rapidly changing code in drm and only have the barest of
> > > what is required for drm.  dwc2 is quite small in comparison and
> > > I'm sure there are still problems in the drm replacement linux
> > > functions.  
> > 
> > When I looked at the dwc2 code in the past I learned that replacing
> > the Linux list APIs with the interfaces from  is not
> > trivial.  IMHO the Linux list APIs are rather dangerous as they make
> > doing list validation almost impossible but they allow some operations
> > that the BSD interfaces don't implement.  So I think switching back to
> > the Linux APIs in this codebase makes sense.  But the code should
> > probably use its own copy of those interfaces to avoid any unwanted
> > interaction with future drm updates as Linux really doesn't have
> > stable APIs.
> 
> I'm glad you are highlighting this again, since looking once more at the
> porting of the list_* macros just gave me an headache.  As you
> mentioned correctly, there are certain operations of the list_* macros
> which our queue macros don't support, and in the past dwctwo(4)
> introduced things like "linked" and "in_freelist" variables to
> workaround that, which isn't nice IMO, and turn the code base out of
> sync again.
> 
> In my initial version of the synced code I used local copies of the
> Linux headers located in sys/dev/usb/dwc2.  How about importing the
> NetBSD ported Linux headers which are required to sys/dev/usb/dwc2 from
> sys/external/bsd/common/include/linux?
> 
> http://cvsweb.netbsd.org/bsdweb.cgi/src/sys/external/bsd/common/include/linux/?only_with_tag=MAIN
> 
> I can shot an adapted diff for that shortly.

Something like that might make sense since that is the implementation
that the NetBSD code was tested against.

I would prefer if you just imported the bits that you actually need,
so maybe cut down the list.h file to size and avoid the other header
files if you can.



Re: Sync dwctwo(4) with NetBSD

2021-07-14 Thread Mark Kettenis
> Date: Mon, 12 Jul 2021 11:56:28 +1000
> From: Jonathan Gray 
> 
> On Sun, Jul 11, 2021 at 05:55:14PM +0200, Marcus Glocker wrote:
> > dwctwo(4) on the Raspberry Pi 3 has some issues today.  Basically uhub2
> > doesn't work which shows in:
> > 
> > - mue(4) doesn't attach on uhub2 port 1.
> > - Removable devices don't attach on uhub2 port 2 and 3.
> 
> on the 3b+ you mean, smsc(4) is fine on the 3b
> 
> > 
> > On NetBSD this works fine, but looking for specific fixes in the NetBSD
> > code base shows quickly that we have grown a big gap between both code
> > bases.  This makes syncing back specific fixes, if you can identify them
> > at all, very difficult, if not impossible.
> > 
> > Therefore I took the path of syncing the entire NetBSD code base with
> > ours.  This fixes the above issues on my Raspberry Pi 3 which is kind
> > of nice, especially since we can use the integrated mue(4) Ethernet
> > controller by default now.
> > 
> > I decided to keep the list_* queue macros from Linux, and use the Linux
> > headers which are already included in our code base for drm(4).  This
> > just makes future syncing easier.
> 
> You should not do this.  They exist because there are millions of
> lines of rapidly changing code in drm and only have the barest of what
> is required for drm.  dwc2 is quite small in comparison and I'm sure
> there are still problems in the drm replacement linux functions.

When I looked at the dwc2 code in the past I learned that replacing
the Linux list APIs with the interfaces from  is not
trivial.  IMHO the Linux list APIs are rather dangerous as they make
doing list validation almost impossible but they allow some operations
that the BSD interfaces don't implement.  So I think switching back to
the Linux APIs in this codebase makes sense.  But the code should
probably use its own copy of those interfaces to avoid any unwanted
interaction with future drm updates as Linux really doesn't have
stable APIs.



Re: dwiic(4): wait for tx empty when hitting tx limit

2021-07-13 Thread Mark Kettenis
> Date: Tue, 13 Jul 2021 21:29:35 +0200
> From: Patrick Wildt 
> 
> Am Mon, Jul 05, 2021 at 07:52:28PM +0200 schrieb Mark Kettenis:
> > > Date: Mon, 5 Jul 2021 19:30:28 +0200
> > > From: Patrick Wildt 
> > > 
> > > Am Mon, Jul 05, 2021 at 07:07:24PM +0200 schrieb Mark Kettenis:
> > > > > Date: Mon, 5 Jul 2021 19:02:32 +0200
> > > > > From: Patrick Wildt 
> > > > > 
> > > > > Am Mon, Jul 05, 2021 at 06:34:31PM +0200 schrieb Mark Kettenis:
> > > > > > > Date: Mon, 5 Jul 2021 00:04:24 +0200
> > > > > > > From: Patrick Wildt 
> > > > > > > 
> > > > > > > Hi,
> > > > > > > 
> > > > > > > I had trouble interfacing with a machine's IPMI through dwiic(4). 
> > > > > > >  What
> > > > > > > I saw was that when sending 'bigger' commands, it would never 
> > > > > > > receive
> > > > > > > the STOP bit interrupt.
> > > > > > > 
> > > > > > > The trouble is, as can be seen in the log, that we want to send 
> > > > > > > (it
> > > > > > > says read, but it's a write OP, so it's send) 20 bytes, but the tx
> > > > > > > limit says 14.
> > > > > > > 
> > > > > > > What we should do is send 14 bytes, then wait for it to send us 
> > > > > > > the
> > > > > > > tx empty interrupt (like we do when we first enable the 
> > > > > > > controller),
> > > > > > > and then re-read the tx limit.  The last line in the log is some
> > > > > > > debug print I added for myself, but is not part of the diff.
> > > > > > > 
> > > > > > > With this, I was finally able to change the IPMI password and 
> > > > > > > regain
> > > > > > > access to the web interface after updating the BMC's firmware...
> > > > > > > 
> > > > > > > dwiic0: dwiic_i2c_exec: op 7, addr 0x10, cmdlen 2, len 3, flags 
> > > > > > > 0x00
> > > > > > > dwiic0: dwiic_i2c_exec: need to read 3 bytes, can send 14 read 
> > > > > > > reqs
> > > > > > > dwiic0: dwiic_i2c_exec: op 5, addr 0x10, cmdlen 1, len 33, flags 
> > > > > > > 0x00
> > > > > > > dwiic0: dwiic_i2c_exec: need to read 33 bytes, can send 15 read 
> > > > > > > reqs
> > > > > > > dwiic0: dwiic_i2c_exec: op 7, addr 0x10, cmdlen 2, len 20, flags 
> > > > > > > 0x00
> > > > > > > dwiic0: dwiic_i2c_exec: need to read 20 bytes, can send 14 read 
> > > > > > > reqs
> > > > > > > dwiic0: new tx limit 8
> > > > > > > 
> > > > > > > Opinions? ok?
> > > > > > 
> > > > > > I think you're on to something.  But this needs to handle 
> > > > > > I2C_F_POLL.
> > > > > 
> > > > > True that.  The previous code, which waits for the controller to 
> > > > > accept
> > > > > commands, just does DELAY(200), but I'm not sure that's good enough 
> > > > > for
> > > > > inbetween transfers.  One can apparently though poll through the raw
> > > > > interrupt status register, where the interrupt mask isn't applied.  So
> > > > > maybe like that?  Guess I should try setting ipmi to polling mode...
> > > > 
> > > > Polling the interrupt status register should work I suppose.  But for
> > > > read operations we actually poll the DW_IC_RXFLR register.
> > > 
> > > Yeah, that would work for TX as well.  Maybe something like this, but
> > > then the diff still needs to address what happens when we timeout and
> > > there's still no tx_limit > 0.  Maybe timeout like the read stuff:
> > > 
> > > if (rx_avail == 0) {
> > > printf("%s: timed out reading remaining %d\n",
> > > sc->sc_dev.dv_xname, (int)(len - readpos));
> > > sc->sc_i2c_xfer.error = 1;
> > > sc->sc_busy = 0;
> > > 
> > > return (1);
> > > }
> > 
> > Yes.
> 
> This works for me. ok?

ok kettenis@

> diff --git a/sys/dev/ic/dwiic.c b/sys/dev/ic/dw

Re: ddb trace: fix output for too many arguments

2021-07-12 Thread Mark Kettenis
> Date: Mon, 12 Jul 2021 20:11:30 +0200
> From: Jasper Lievisse Adriaanse 
> 
> On Sun, Jul 11, 2021 at 03:58:05PM +0200, Jasper Lievisse Adriaanse wrote:
> > Hi,
> > 
> > When printing a trace from ddb, some architectures are limited by the 
> > number of
> > registers which are used to pass arguments. If the number of arguments to a 
> > function
> > exceeded this number, the code in db_stack_trace_print() would print that 
> > many arguments
> > without any indication that one or more arguments aren't printed.
> > 
> > Here's a diff that tweaks the output to make it clear there were more 
> > arguments.
> > Do we want to print ',...' for each ommited argument (like this diff does)
> > or perhaps just a single ',...'?
> 
> I think just printing a single instance of ',...' gets the point across.
> OK?

Actually, since we use -msave-args on amd64 the arguments are saved on
the stack.  I think this means there is no limit on the number of
arguments we can print...

> Index: arch/amd64/amd64/db_trace.c
> ===
> RCS file: /cvs/src/sys/arch/amd64/amd64/db_trace.c,v
> retrieving revision 1.53
> diff -u -p -r1.53 db_trace.c
> --- arch/amd64/amd64/db_trace.c   14 May 2020 06:58:54 -  1.53
> +++ arch/amd64/amd64/db_trace.c   12 Jul 2021 18:08:21 -
> @@ -137,7 +137,7 @@ db_stack_trace_print(db_expr_t addr, int
>  
>   lastframe = 0;
>   while (count && frame != 0) {
> - int narg;
> + int narg, extra_args = 0;
>   unsigned inti;
>   char *  name;
>   db_expr_t   offset;
> @@ -165,8 +165,12 @@ db_stack_trace_print(db_expr_t addr, int
>   }
>  
>   narg = db_ctf_func_numargs(sym);
> - if (narg < 0 || narg > 6)
> + if (narg < 0)
>   narg = 6;
> + else if (narg > 6) {
> + narg = 6;
> + extra_args = 1;
> + }
>  
>   if (name == NULL)
>   (*pr)("%lx(", callpc);
> @@ -204,6 +208,10 @@ db_stack_trace_print(db_expr_t addr, int
>   if (--narg != 0)
>   (*pr)(",");
>   }
> +
> + if (extra_args)
> + (*pr)(",...");
> +
>   (*pr)(") at ");
>   db_printsym(callpc, DB_STGY_PROC, pr);
>   (*pr)("\n");
> Index: arch/powerpc/ddb/db_trace.c
> ===
> RCS file: /cvs/src/sys/arch/powerpc/ddb/db_trace.c,v
> retrieving revision 1.17
> diff -u -p -r1.17 db_trace.c
> --- arch/powerpc/ddb/db_trace.c   14 May 2020 06:58:54 -  1.17
> +++ arch/powerpc/ddb/db_trace.c   12 Jul 2021 18:08:21 -
> @@ -123,7 +123,7 @@ db_stack_trace_print(db_expr_t addr, int
>   Elf_Sym *sym;
>   char*name;
>   char c, *cp = modif;
> - int  i, narg, trace_proc = 0;
> + int  i, narg, trace_proc = 0, extra_args = 0;
>  
>   while ((c = *cp++) != 0) {
>   if (c == 't')
> @@ -158,8 +158,12 @@ db_stack_trace_print(db_expr_t addr, int
>   (*pr)("at 0x%lx", lr - 4);
>   } else {
>   narg = db_ctf_func_numargs(sym);
> - if (narg < 0 || narg > 8)
> + if (narg < 0)
>   narg = 8;
> + else if (narg > 8) {
> + narg = 8;
> + extra_args = 1;
> + }
>  
>   (*pr)("%s(", name);
>  
> @@ -172,6 +176,9 @@ db_stack_trace_print(db_expr_t addr, int
>   (*pr)(",");
>   }
>   }
> +
> + if (extra_args)
> + (*pr)(",...");
>  
>   (*pr)(") at ");
>   db_printsym(lr - 4, DB_STGY_PROC, pr);
> 
> -- 
> jasper
> 
> 



Re: dwiic(4): wait for tx empty when hitting tx limit

2021-07-05 Thread Mark Kettenis
> Date: Mon, 5 Jul 2021 19:30:28 +0200
> From: Patrick Wildt 
> 
> Am Mon, Jul 05, 2021 at 07:07:24PM +0200 schrieb Mark Kettenis:
> > > Date: Mon, 5 Jul 2021 19:02:32 +0200
> > > From: Patrick Wildt 
> > > 
> > > Am Mon, Jul 05, 2021 at 06:34:31PM +0200 schrieb Mark Kettenis:
> > > > > Date: Mon, 5 Jul 2021 00:04:24 +0200
> > > > > From: Patrick Wildt 
> > > > > 
> > > > > Hi,
> > > > > 
> > > > > I had trouble interfacing with a machine's IPMI through dwiic(4).  
> > > > > What
> > > > > I saw was that when sending 'bigger' commands, it would never receive
> > > > > the STOP bit interrupt.
> > > > > 
> > > > > The trouble is, as can be seen in the log, that we want to send (it
> > > > > says read, but it's a write OP, so it's send) 20 bytes, but the tx
> > > > > limit says 14.
> > > > > 
> > > > > What we should do is send 14 bytes, then wait for it to send us the
> > > > > tx empty interrupt (like we do when we first enable the controller),
> > > > > and then re-read the tx limit.  The last line in the log is some
> > > > > debug print I added for myself, but is not part of the diff.
> > > > > 
> > > > > With this, I was finally able to change the IPMI password and regain
> > > > > access to the web interface after updating the BMC's firmware...
> > > > > 
> > > > > dwiic0: dwiic_i2c_exec: op 7, addr 0x10, cmdlen 2, len 3, flags 0x00
> > > > > dwiic0: dwiic_i2c_exec: need to read 3 bytes, can send 14 read reqs
> > > > > dwiic0: dwiic_i2c_exec: op 5, addr 0x10, cmdlen 1, len 33, flags 0x00
> > > > > dwiic0: dwiic_i2c_exec: need to read 33 bytes, can send 15 read reqs
> > > > > dwiic0: dwiic_i2c_exec: op 7, addr 0x10, cmdlen 2, len 20, flags 0x00
> > > > > dwiic0: dwiic_i2c_exec: need to read 20 bytes, can send 14 read reqs
> > > > > dwiic0: new tx limit 8
> > > > > 
> > > > > Opinions? ok?
> > > > 
> > > > I think you're on to something.  But this needs to handle I2C_F_POLL.
> > > 
> > > True that.  The previous code, which waits for the controller to accept
> > > commands, just does DELAY(200), but I'm not sure that's good enough for
> > > inbetween transfers.  One can apparently though poll through the raw
> > > interrupt status register, where the interrupt mask isn't applied.  So
> > > maybe like that?  Guess I should try setting ipmi to polling mode...
> > 
> > Polling the interrupt status register should work I suppose.  But for
> > read operations we actually poll the DW_IC_RXFLR register.
> 
> Yeah, that would work for TX as well.  Maybe something like this, but
> then the diff still needs to address what happens when we timeout and
> there's still no tx_limit > 0.  Maybe timeout like the read stuff:
> 
> if (rx_avail == 0) {
> printf("%s: timed out reading remaining %d\n",
> sc->sc_dev.dv_xname, (int)(len - readpos));
> sc->sc_i2c_xfer.error = 1;
> sc->sc_busy = 0;
> 
> return (1);
> }

Yes.

> diff --git a/sys/dev/ic/dwiic.c b/sys/dev/ic/dwiic.c
> index 84d97b8645b..d5d77a52b73 100644
> --- a/sys/dev/ic/dwiic.c
> +++ b/sys/dev/ic/dwiic.c
> @@ -416,6 +416,33 @@ dwiic_i2c_exec(void *cookie, i2c_op_t op, i2c_addr_t 
> addr, const void *cmdbuf,
>   tx_limit = sc->tx_fifo_depth -
>   dwiic_read(sc, DW_IC_TXFLR);
>   }
> +
> + if (I2C_OP_WRITE_P(op) && tx_limit == 0 && x < len) {
> + if (flags & I2C_F_POLL) {
> + for (retries = 1000; retries > 0; retries--) {
> + tx_limit = sc->tx_fifo_depth -
> + dwiic_read(sc, DW_IC_TXFLR);
> + if (tx_limit > 0)
> + break;
> + DELAY(50);
> + }
> + } else {
> + s = splbio();
> + dwiic_read(sc, DW_IC_CLR_INTR);
> + dwiic_write(sc, DW_IC_INTR_MASK,
> + DW_IC_INTR_TX_EMPTY);
> +
> + if (tsleep_nsec(>sc_writewait, PRIBIO,
> + "dwiic", MSEC_TO_NSEC(500)) != 0)
> + printf("%s: timed out waiting for "
> + "tx_empty intr\n",
> + sc->sc_dev.dv_xname);
> + splx(s);
> +
> + tx_limit = sc->tx_fifo_depth -
> + dwiic_read(sc, DW_IC_TXFLR);
> + }
> + }
>   }
>  
>   if (I2C_OP_STOP_P(op) && I2C_OP_WRITE_P(op)) {
> 



Re: dwiic(4): wait for tx empty when hitting tx limit

2021-07-05 Thread Mark Kettenis
> Date: Mon, 5 Jul 2021 19:02:32 +0200
> From: Patrick Wildt 
> 
> Am Mon, Jul 05, 2021 at 06:34:31PM +0200 schrieb Mark Kettenis:
> > > Date: Mon, 5 Jul 2021 00:04:24 +0200
> > > From: Patrick Wildt 
> > > 
> > > Hi,
> > > 
> > > I had trouble interfacing with a machine's IPMI through dwiic(4).  What
> > > I saw was that when sending 'bigger' commands, it would never receive
> > > the STOP bit interrupt.
> > > 
> > > The trouble is, as can be seen in the log, that we want to send (it
> > > says read, but it's a write OP, so it's send) 20 bytes, but the tx
> > > limit says 14.
> > > 
> > > What we should do is send 14 bytes, then wait for it to send us the
> > > tx empty interrupt (like we do when we first enable the controller),
> > > and then re-read the tx limit.  The last line in the log is some
> > > debug print I added for myself, but is not part of the diff.
> > > 
> > > With this, I was finally able to change the IPMI password and regain
> > > access to the web interface after updating the BMC's firmware...
> > > 
> > > dwiic0: dwiic_i2c_exec: op 7, addr 0x10, cmdlen 2, len 3, flags 0x00
> > > dwiic0: dwiic_i2c_exec: need to read 3 bytes, can send 14 read reqs
> > > dwiic0: dwiic_i2c_exec: op 5, addr 0x10, cmdlen 1, len 33, flags 0x00
> > > dwiic0: dwiic_i2c_exec: need to read 33 bytes, can send 15 read reqs
> > > dwiic0: dwiic_i2c_exec: op 7, addr 0x10, cmdlen 2, len 20, flags 0x00
> > > dwiic0: dwiic_i2c_exec: need to read 20 bytes, can send 14 read reqs
> > > dwiic0: new tx limit 8
> > > 
> > > Opinions? ok?
> > 
> > I think you're on to something.  But this needs to handle I2C_F_POLL.
> 
> True that.  The previous code, which waits for the controller to accept
> commands, just does DELAY(200), but I'm not sure that's good enough for
> inbetween transfers.  One can apparently though poll through the raw
> interrupt status register, where the interrupt mask isn't applied.  So
> maybe like that?  Guess I should try setting ipmi to polling mode...

Polling the interrupt status register should work I suppose.  But for
read operations we actually poll the DW_IC_RXFLR register.

> > > diff --git a/sys/dev/ic/dwiic.c b/sys/dev/ic/dwiic.c
> > > index 84d97b8645b..d04a7b03979 100644
> > > --- a/sys/dev/ic/dwiic.c
> > > +++ b/sys/dev/ic/dwiic.c
> > > @@ -416,6 +416,21 @@ dwiic_i2c_exec(void *cookie, i2c_op_t op, i2c_addr_t 
> > > addr, const void *cmdbuf,
> > >   tx_limit = sc->tx_fifo_depth -
> > >   dwiic_read(sc, DW_IC_TXFLR);
> > >   }
> > > +
> > > + if (I2C_OP_WRITE_P(op) && tx_limit == 0 && x < len) {
> > > + s = splbio();
> > > + dwiic_read(sc, DW_IC_CLR_INTR);
> > > + dwiic_write(sc, DW_IC_INTR_MASK, DW_IC_INTR_TX_EMPTY);
> > > +
> > > + if (tsleep_nsec(>sc_writewait, PRIBIO, "dwiic",
> > > + MSEC_TO_NSEC(500)) != 0)
> > > + printf("%s: timed out waiting for tx_empty "
> > > + "intr\n", sc->sc_dev.dv_xname);
> > > + splx(s);
> > > +
> > > + tx_limit = sc->tx_fifo_depth -
> > > + dwiic_read(sc, DW_IC_TXFLR);
> > > + }
> > >   }
> > >  
> > >   if (I2C_OP_STOP_P(op) && I2C_OP_WRITE_P(op)) {
> > > 
> > > 
> > 
> 



Re: dwiic(4): wait for tx empty when hitting tx limit

2021-07-05 Thread Mark Kettenis
> Date: Mon, 5 Jul 2021 00:04:24 +0200
> From: Patrick Wildt 
> 
> Hi,
> 
> I had trouble interfacing with a machine's IPMI through dwiic(4).  What
> I saw was that when sending 'bigger' commands, it would never receive
> the STOP bit interrupt.
> 
> The trouble is, as can be seen in the log, that we want to send (it
> says read, but it's a write OP, so it's send) 20 bytes, but the tx
> limit says 14.
> 
> What we should do is send 14 bytes, then wait for it to send us the
> tx empty interrupt (like we do when we first enable the controller),
> and then re-read the tx limit.  The last line in the log is some
> debug print I added for myself, but is not part of the diff.
> 
> With this, I was finally able to change the IPMI password and regain
> access to the web interface after updating the BMC's firmware...
> 
> dwiic0: dwiic_i2c_exec: op 7, addr 0x10, cmdlen 2, len 3, flags 0x00
> dwiic0: dwiic_i2c_exec: need to read 3 bytes, can send 14 read reqs
> dwiic0: dwiic_i2c_exec: op 5, addr 0x10, cmdlen 1, len 33, flags 0x00
> dwiic0: dwiic_i2c_exec: need to read 33 bytes, can send 15 read reqs
> dwiic0: dwiic_i2c_exec: op 7, addr 0x10, cmdlen 2, len 20, flags 0x00
> dwiic0: dwiic_i2c_exec: need to read 20 bytes, can send 14 read reqs
> dwiic0: new tx limit 8
> 
> Opinions? ok?

I think you're on to something.  But this needs to handle I2C_F_POLL.

> diff --git a/sys/dev/ic/dwiic.c b/sys/dev/ic/dwiic.c
> index 84d97b8645b..d04a7b03979 100644
> --- a/sys/dev/ic/dwiic.c
> +++ b/sys/dev/ic/dwiic.c
> @@ -416,6 +416,21 @@ dwiic_i2c_exec(void *cookie, i2c_op_t op, i2c_addr_t 
> addr, const void *cmdbuf,
>   tx_limit = sc->tx_fifo_depth -
>   dwiic_read(sc, DW_IC_TXFLR);
>   }
> +
> + if (I2C_OP_WRITE_P(op) && tx_limit == 0 && x < len) {
> + s = splbio();
> + dwiic_read(sc, DW_IC_CLR_INTR);
> + dwiic_write(sc, DW_IC_INTR_MASK, DW_IC_INTR_TX_EMPTY);
> +
> + if (tsleep_nsec(>sc_writewait, PRIBIO, "dwiic",
> + MSEC_TO_NSEC(500)) != 0)
> + printf("%s: timed out waiting for tx_empty "
> + "intr\n", sc->sc_dev.dv_xname);
> + splx(s);
> +
> + tx_limit = sc->tx_fifo_depth -
> + dwiic_read(sc, DW_IC_TXFLR);
> + }
>   }
>  
>   if (I2C_OP_STOP_P(op) && I2C_OP_WRITE_P(op)) {
> 
> 



Re: recvmsg returns MSG_DONTWAIT

2021-06-27 Thread Mark Kettenis
> Date: Sun, 27 Jun 2021 13:36:03 +
> From: Klemens Nanni 
> 
> On Sat, Jun 12, 2021 at 11:54:58PM -0700, Greg Steuck wrote:
> > I started with a failing test for Haskell network package on 6.9-current 
> > amd64
> > (cabal get network-3.1.2.1 && cabal v2-test)
> > 
> > network-3.1.2.1/build/spec/spec --match 
> > "/Network.Socket.ByteString/recvMsg/works well/"
> > 
> >   tests/Network/Socket/ByteStringSpec.hs:209:21: 
> >   1) Network.Socket.ByteString.recvMsg works well
> >expected: MsgFlag {fromMsgFlag = 0}
> > but got: MsgFlag {fromMsgFlag = 128}
> > 
> > ktrace says:
> > 
> >  47649 spec CALL  sendto(14,0x8a2126e838c,0x16,0,0x8a4a3622df0,0x10)
> >  47649 spec STRU  struct sockaddr { AF_INET, 127.0.0.1:9486 }
> >  47649 spec GIO   fd 14 wrote 22 bytes
> >"This is a test message"
> >  47649 spec RET   sendto 22/0x16
> >  47649 spec CALL  
> > futex(0x8a4aef6f930,0x81,1,0,0)
> >  47649 spec STRU  struct kevent { ident=13, filter=EVFILT_READ, 
> > flags=0x11, fflags=0<>, data=38, udata=0x0 }
> >  47649 spec RET   kevent 1
> >  47649 spec CALL  recvmsg(13,0x8a4a3622c50,0)
> >  47649 spec GIO   fd 13 read 22 bytes
> >"This is a test message"
> >  47649 spec STRU  struct sockaddr { AF_INET, 127.0.0.1:12293 }
> >  47649 spec STRU  struct msghdr { name=0x8a4a3622b70, namelen=16, 
> > iov=0x8a4a3622c30, iovlen=1, control=0x8a4a3622c10, controllen=0, 
> > flags=0x80 }
> >  47649 spec STRU  struct iovec { base=0x8a4a3622666, len=1002 }
> >  47649 spec RET   recvmsg 22/0x16
> > 
> > This seems to contradict recvmsg(2) which doesn't list MSG_DONTWAIT as a
> > possible value of the flags. Would this be useful as a C regress test?
> 
> Looks like this was missed in sys/kern/uipc_syscalls.c revision 1.178
> 
>   date: 2018/07/30 12:22:14;  author: mpi;  state: Exp;  lines: +10 -18;  
> commitid: K43aQe66cQkEOSbc;
>   Use FNONBLOCK instead of SS_NBIO to check/indicate that the I/O mode
>   for sockets is non-blocking.
> 
>   This allows us to G/C SS_NBIO.  Having to keep the two flags in sync
>   in a mp-safe way is complicated.
> 
>   This change introduce a behavior change in sosplice(), it can now
>   always block.  However this should not matter much due to the socket
>   lock being taken beforhand.
> 
>   bluhm@, benno@, visa@
> 
> Wording taken from mpi's commit message which is exactly what recvit()
> does in one place, i.e.
> 
>   if (fp->f_flag & FNONBLOCK)
>   mp->msg_flags |= MSG_DONTWAIT;
> 
> 
> Here's the documentation bits for it?
> Feedback? Objections? OK?

I think this points out that diff wasn't quite right.  I mean,
changing the man page doesn't fix the Haskell test does it?

> Index: lib/libc/sys/recv.2
> ===
> RCS file: /cvs/src/lib/libc/sys/recv.2,v
> retrieving revision 1.47
> diff -u -p -r1.47 recv.2
> --- lib/libc/sys/recv.2   11 Jan 2019 06:10:13 -  1.47
> +++ lib/libc/sys/recv.2   27 Jun 2021 13:30:33 -
> @@ -248,6 +248,8 @@ for ancillary data.
>  Indicates that the packet was received as broadcast.
>  .It Dv MSG_MCAST
>  Indicates that the packet was received as multicast.
> +.It Dv MSG_DONTWAIT
> +Indicates that the I/O mode for the socket is non-blocking.
>  .El
>  .Sh RETURN VALUES
>  These calls return the number of bytes received, or \-1 if an error occurred.
> 
> 



Re: SiFive Unmatched if_cad fix

2021-06-26 Thread Mark Kettenis
> Date: Sat, 26 Jun 2021 11:24:57 +
> From: Visa Hankala 
> 
> On Fri, Jun 25, 2021 at 04:15:43PM +0200, Mark Kettenis wrote:
> > > Date: Fri, 25 Jun 2021 13:27:28 +
> > > From: Visa Hankala 
> > > 
> > > On Thu, Jun 24, 2021 at 07:02:11PM +, Mickael Torres wrote:
> > > > Hello,
> > > > 
> > > > On the risc-v SiFive Unmatched the internal cad0 ethernet interface 
> > > > stops 
> > > > working randomly after some packets are sent/received. It looks like 
> > > > it's 
> > > > because the bus_dmamap used isn't restricted to lower than 4GB physical 
> > > > addresses, and the interface itself is.
> > > 
> > > I am surprised that this has not been raised before. I also wonder if
> > > riscv64's DMA constraints are fully sane.
> > 
> > There is no DMA constraint on riscv64 yet.  We try to avoid having
> > such a constraint on platforms that don't have a long history, hoping
> > those platforms are (and remain) 64-bit "clean".  And on some modern
> > platforms (e.g. arm64) there is no memory below 4GB, so we can't have
> > a DMA constraint on those platforms.  The jury is still out where
> > riscv64 will end up.
> > 
> > There is infrastructure to have the bootloader set the DMA constraint
> > based on the device tree.
> > 
> > > > Configuring the interface for 64 bits DMA fixes the problem, and
> > > > the machine is now useable with its internal ethernet port.
> > > > 
> > > > I didn't test very extensively, but it was very easy to run into
> > > > "cad0: hresp error, interface stopped" before the patch. After the
> > > > patch it survived a couple hours of tests and ping -f from and to
> > > > it.
> > > > 
> > > > It is now depending on being compiled for __riscv64__ or not, would it 
> > > > be 
> > > > better to do it dynamically when matching "sifive,fu740-c000-gem" ?
> > > 
> > > Hopefully all 64-bit platforms have a 64-bit capable revision of the
> > > controller.
> > 
> > So far that seems to be true.  The controller on the PolarFire SoC is
> > also 64-bit capable.
> > 
> > > However, I would avoid #ifdef'ing and make the selecting of
> > > the DMA mode happen at runtime.
> > > 
> > > Below is how I had envisioned how the driver should work.
> > > 
> > > I have not tested the 64-bit side of the patch.
> > 
> > Seems to work fine.  The diff looks good to me.  Your diff does not
> > set a 4GB boundary in the bus_dmamap_create() call for the rings.  It
> > works without that, but if there really is a hardware constraint in
> > crossing a 4GB boundary we may need to add this in.
> 
> So far I have not spotted such a restriction in the documentations
> of Zynq UltraScale+ and PolarFire SoCs. These SoCs have 64-bit capable
> GEM controllers.
> 
> The standalone GEM driver for Zynq in Xilinx embeddedsw library does
> have comments about not crossing the 0x boundary. However, those
> comments predate and seem inconsistent with 64-bit code.

Good.

> > The hardware has a register that indicates whether 64-bit DMA is
> > supported.  Maybe we should look at that instead of checking the
> > compatible string.  But let's get this in and tweak it later.
> 
> The register is undocumented on the Zynq-7000. Reading the register
> returns a constant (?) value (0x200), but the value probably means
> something different.
> 
> However, making the register access conditional to GEM version might
> work. Xilinx Zynq UltraScale+ and SiFive HiFive Unmatched have GEM
> version 0x7, whereas MicroSemi PolarFire and Xilinx Versal appear to
> have GEM version 0x107.
> 
> In addition, as cad(4) is now able to use 64-bit DMA, the DMA maps could
> be created with the 64-bit capability turned on.
> 
> The diff is untested on 64-bit hardware.

Looks good to me and works on the hifive unmatched.

ok kettenis@

> Index: dev/fdt/if_cad.c
> ===
> RCS file: src/sys/dev/fdt/if_cad.c,v
> retrieving revision 1.4
> diff -u -p -r1.4 if_cad.c
> --- dev/fdt/if_cad.c  26 Jun 2021 10:47:59 -  1.4
> +++ dev/fdt/if_cad.c  26 Jun 2021 11:08:36 -
> @@ -126,6 +126,8 @@
>  #define GEM_LADDRH(i)(0x008c + (i) * 8)
>  #define GEM_LADDRNUM 4
>  #define GEM_MID  0x00fc
> +#define  GEM_MID_VERSION_MASK(0xfff << 16)
> +#d

Re: SiFive Unmatched radeondrm/amdgpu

2021-06-25 Thread Mark Kettenis
> Date: Fri, 25 Jun 2021 21:41:38 +0200
> From: Mickael Torres 
> 
> On Fri, 25 Jun 2021 18:26:31 +0200 (CEST)
> Mark Kettenis  wrote:
> 
> > > Date: Fri, 25 Jun 2021 16:13:27 +
> > > From: "Mickael Torres" 
> > > 
> > > Hello,
> > > 
> > > This enables radeondrm/amdgpu and wsconsole/kbd/mouse on risc-v.
> > > This was tested with a radeondrm "7:0:0: ATI Radeon HD 7450" (see
> > > dmesg at the end), I'll try to test amdgpu with an RX550 during the
> > > week end.
> > > 
> > > X doesn't start because of undefined symbols in modesetting_drv.so
> > > (see below), but I didn't take a look yet.
> > 
> > Hi Mickael,
> > 
> > Cool.  Will take a proper look at this later.  Some of your fixes
> > overlap with changes that we already have pending but aren't committed
> > yet.  But there are some good fixes in here.
> > 
> > There is a small problem though.  Your e-mail client seems to replace
> > tabs with spaces, which means your patches don't apply.  You may be
> > able to fix that in your e-mail client.  As a last resort you could
> > include your diffs *both* inline and as an attachment.
> > 
> > Cheers,
> > 
> > Mark
> > 
> 
> Hi Mark,
> 
> Here is a new version of the diff, based on an up-to-date tree, and
> (hopefully) with tabs as tabs.
> 
> Best,
> Mickael

Hi Mickael,

It has tabs now, but this version has wrapped lines :(.  Modern e-mail
clients try very hard to corrupt diffs unfortunately.

Meanwhile I did commit your dwpcie(4) fix, and matthieu@ has committed
his version of the diff.  He only added radeondrm(4) for now, so a
diff (and tests) for amdgpu(4) is still welcome.

Apparently X works now, although you need an ld.so diff that hasn't
been committed yet.  But that should be fixed soon.

Thanks,

Mark


> Index: sys/arch/riscv64/conf/GENERIC
> ===
> RCS file: /cvs/src/sys/arch/riscv64/conf/GENERIC,v
> retrieving revision 1.23
> diff -u -p -u -r1.23 GENERIC
> --- sys/arch/riscv64/conf/GENERIC 23 Jun 2021 15:26:10
> - 1.23 +++ sys/arch/riscv64/conf/GENERIC  25 Jun 2021
> 18:37:25 - @@ -103,9 +103,11 @@ usb*  at xhci?
>  # USB devices
>  uhub*at usb?
>  uhub*at uhub?
> -#uhidev* at uhub?
> -#ukbd*   at uhidev?
> -#wskbd*  at ukbd? mux 1
> +uhidev*  at uhub?
> +ukbd*at uhidev?
> +wskbd*   at ukbd? mux 1
> +ums* at uhidev?  # USB mouse
> +wsmouse* at ums? mux 0
>  umass*   at uhub?
>  aue* at uhub?# ADMtek AN986 Pegasus Ethernet
>  atu* at uhub?# Atmel AT76c50x based 802.11b
> @@ -148,5 +150,17 @@ sqphy*   at mii? #
> Seeq 8x220 PHYs ukphy*at mii? #
> "unknown" PHYs urlphy*at mii? # Realtek
> RTL8150L internal PHY 
> +radeondrm*   at pci?
> +drm* at radeondrm?
> +wsdisplay*   at radeondrm?
> +amdgpu*  at pci?
> +drm* at amdgpu?
> +wsdisplay*   at amdgpu?
> +
> +option WSDISPLAY_COMPAT_USL  # VT handling
> +option WSDISPLAY_COMPAT_RAWKBD   # provide raw scancodes;
> needed for X11 +option WSDISPLAY_DEFAULTSCREENS=6 # initial
> number of text consoles +
>  # Pseudo-Devices
>  pseudo-deviceopenprom
> +pseudo-devicewsmux   2
> Index: sys/arch/riscv64/conf/Makefile.riscv64
> ===
> RCS file: /cvs/src/sys/arch/riscv64/conf/Makefile.riscv64,v
> retrieving revision 1.10
> diff -u -p -u -r1.10 Makefile.riscv64
> --- sys/arch/riscv64/conf/Makefile.riscv6418 May 2021 12:24:12
> - 1.10 +++ sys/arch/riscv64/conf/Makefile.riscv64 25
> Jun 2021 18:37:25 - @@ -21,7 +21,25 @@ S!=cd ../../../..;
> pwd _machdir?=$S/arch/${_mach}
>  _archdir?=   $S/arch/${_arch}
>  
> -INCLUDES=-nostdinc -I$S -I${.OBJDIR} -I$S/arch
> +INCLUDES=-nostdinc -I$S -I${.OBJDIR} -I$S/arch \
> + -I$S/dev/pci/drm/include \
> + -I$S/dev/pci/drm/include/uapi \
> + -I$S/dev/pci/drm/amd/include/asic_reg \
> + -I$S/dev/pci/drm/amd/include \
> + -I$S/dev/pci/drm/amd/amdgpu \
> + -I$S/dev/pci/drm/amd/display \
> + -I$S/dev/pci/drm/amd/display/include \
> + -I$S/dev/pci/drm/amd/display/dc \
> + -I$S/dev/pci/drm/amd/display/amdgpu_dm \
> + -I$S/dev/pci/drm/amd/powerplay/inc \
> + -I$S

Re: SiFive Unmatched radeondrm/amdgpu

2021-06-25 Thread Mark Kettenis
> Date: Fri, 25 Jun 2021 16:13:27 +
> From: "Mickael Torres" 
> 
> Hello,
> 
> This enables radeondrm/amdgpu and wsconsole/kbd/mouse on risc-v. This was
> tested with a radeondrm "7:0:0: ATI Radeon HD 7450" (see dmesg at the end),
> I'll try to test amdgpu with an RX550 during the week end.
> 
> X doesn't start because of undefined symbols in modesetting_drv.so
> (see below), but I didn't take a look yet.

Hi Mickael,

Cool.  Will take a proper look at this later.  Some of your fixes
overlap with changes that we already have pending but aren't committed
yet.  But there are some good fixes in here.

There is a small problem though.  Your e-mail client seems to replace
tabs with spaces, which means your patches don't apply.  You may be
able to fix that in your e-mail client.  As a last resort you could
include your diffs *both* inline and as an attachment.

Cheers,

Mark


> Index: sys/arch/riscv64/conf/GENERIC
> ===
> RCS file: /cvs/src/sys/arch/riscv64/conf/GENERIC,v
> retrieving revision 1.23
> diff -u -p -u -r1.23 GENERIC
> --- sys/arch/riscv64/conf/GENERIC   23 Jun 2021 15:26:10 -  1.23
> +++ sys/arch/riscv64/conf/GENERIC   25 Jun 2021 16:05:48 -
> @@ -103,9 +103,11 @@ usb*   at xhci?
>  # USB devices
>  uhub*  at usb?
>  uhub*  at uhub?
> -#uhidev*   at uhub?
> -#ukbd* at uhidev?
> -#wskbd*at ukbd? mux 1
> +uhidev*at uhub?
> +ukbd*  at uhidev?
> +wskbd* at ukbd? mux 1
> +ums*   at uhidev?  # USB mouse
> +wsmouse*   at ums? mux 0
>  umass* at uhub?
>  aue*   at uhub?# ADMtek AN986 Pegasus Ethernet
>  atu*   at uhub?# Atmel AT76c50x based 802.11b
> @@ -148,5 +150,17 @@ sqphy* at mii? # Seeq 8x220 PHYs
>  ukphy* at mii? # "unknown" PHYs
>  urlphy*at mii? # Realtek RTL8150L internal PHY
>  
> +radeondrm*  at pci?
> +drm*at radeondrm?
> +wsdisplay*  at radeondrm?
> +amdgpu* at pci?
> +drm*at amdgpu?
> +wsdisplay*  at amdgpu?
> +
> +option WSDISPLAY_COMPAT_USL# VT handling
> +option WSDISPLAY_COMPAT_RAWKBD # provide raw scancodes; needed for 
> X11
> +option WSDISPLAY_DEFAULTSCREENS=6  # initial number of text consoles
> +
>  # Pseudo-Devices
>  pseudo-device  openprom
> +pseudo-device  wsmux   2
> 
> Index: sys/arch/riscv64/conf/Makefile.riscv64
> ===
> RCS file: /cvs/src/sys/arch/riscv64/conf/Makefile.riscv64,v
> retrieving revision 1.10
> diff -u -p -u -r1.10 Makefile.riscv64
> --- sys/arch/riscv64/conf/Makefile.riscv64  18 May 2021 12:24:12 -
>   1.10
> +++ sys/arch/riscv64/conf/Makefile.riscv64  25 Jun 2021 16:05:48 -
> @@ -21,7 +21,25 @@ S!=  cd ../../../..; pwd
>  _machdir?= $S/arch/${_mach}
>  _archdir?= $S/arch/${_arch}
>  
> -INCLUDES=  -nostdinc -I$S -I${.OBJDIR} -I$S/arch
> +INCLUDES=  -nostdinc -I$S -I${.OBJDIR} -I$S/arch \
> +   -I$S/dev/pci/drm/include \
> +   -I$S/dev/pci/drm/include/uapi \
> +   -I$S/dev/pci/drm/amd/include/asic_reg \
> +   -I$S/dev/pci/drm/amd/include \
> +   -I$S/dev/pci/drm/amd/amdgpu \
> +   -I$S/dev/pci/drm/amd/display \
> +   -I$S/dev/pci/drm/amd/display/include \
> +   -I$S/dev/pci/drm/amd/display/dc \
> +   -I$S/dev/pci/drm/amd/display/amdgpu_dm \
> +   -I$S/dev/pci/drm/amd/powerplay/inc \
> +   -I$S/dev/pci/drm/amd/powerplay/smumgr \
> +   -I$S/dev/pci/drm/amd/powerplay/hwmgr \
> +   -I$S/dev/pci/drm/amd/display/dc/inc \
> +   -I$S/dev/pci/drm/amd/display/dc/inc/hw \
> +   -I$S/dev/pci/drm/amd/display/dc/clk_mgr \
> +   -I$S/dev/pci/drm/amd/display/modules/inc \
> +   -I$S/dev/pci/drm/amd/display/modules/hdcp \
> +   -I$S/dev/pci/drm/amd/display/dmub/inc
>  CPPFLAGS=  ${INCLUDES} ${IDENT} ${PARAM} -D_KERNEL -D__${_mach}__ -MD -MP
>  CWARNFLAGS=-Werror -Wall -Wimplicit-function-declaration \
> -Wno-uninitialized -Wno-pointer-sign \
> 
> Index: sys/arch/riscv64/conf/files.riscv64
> ===
> RCS file: /cvs/src/sys/arch/riscv64/conf/files.riscv64,v
> retrieving revision 1.14
> diff -u -p -u -r1.14 files.riscv64
> --- sys/arch/riscv64/conf/files.riscv64 17 Jun 2021 16:10:50 -  1.14
> +++ sys/arch/riscv64/conf/files.riscv64 25 Jun 2021 16:05:48 -
> @@ -137,3 +137,7 @@ filedev/fdt/pciecam.c   
> pciecam
>  # Machine-independent USB drivers
>  include "dev/usb/files.usb"
>  
> +# Include WSCONS stuff
> +include "dev/wscons/files.wscons"
> +include "dev/rasops/files.rasops"
> +include 

Re: SiFive Unmatched if_cad fix

2021-06-25 Thread Mark Kettenis
> Date: Fri, 25 Jun 2021 13:27:28 +
> From: Visa Hankala 
> 
> On Thu, Jun 24, 2021 at 07:02:11PM +, Mickael Torres wrote:
> > Hello,
> > 
> > On the risc-v SiFive Unmatched the internal cad0 ethernet interface stops 
> > working randomly after some packets are sent/received. It looks like it's 
> > because the bus_dmamap used isn't restricted to lower than 4GB physical 
> > addresses, and the interface itself is.
> 
> I am surprised that this has not been raised before. I also wonder if
> riscv64's DMA constraints are fully sane.

There is no DMA constraint on riscv64 yet.  We try to avoid having
such a constraint on platforms that don't have a long history, hoping
those platforms are (and remain) 64-bit "clean".  And on some modern
platforms (e.g. arm64) there is no memory below 4GB, so we can't have
a DMA constraint on those platforms.  The jury is still out where
riscv64 will end up.

There is infrastructure to have the bootloader set the DMA constraint
based on the device tree.

> > Configuring the interface for 64 bits DMA fixes the problem, and
> > the machine is now useable with its internal ethernet port.
> > 
> > I didn't test very extensively, but it was very easy to run into
> > "cad0: hresp error, interface stopped" before the patch. After the
> > patch it survived a couple hours of tests and ping -f from and to
> > it.
> > 
> > It is now depending on being compiled for __riscv64__ or not, would it be 
> > better to do it dynamically when matching "sifive,fu740-c000-gem" ?
> 
> Hopefully all 64-bit platforms have a 64-bit capable revision of the
> controller.

So far that seems to be true.  The controller on the PolarFire SoC is
also 64-bit capable.

> However, I would avoid #ifdef'ing and make the selecting of
> the DMA mode happen at runtime.
> 
> Below is how I had envisioned how the driver should work.
> 
> I have not tested the 64-bit side of the patch.

Seems to work fine.  The diff looks good to me.  Your diff does not
set a 4GB boundary in the bus_dmamap_create() call for the rings.  It
works without that, but if there really is a hardware constraint in
crossing a 4GB boundary we may need to add this in.

The hardware has a register that indicates whether 64-bit DMA is
supported.  Maybe we should look at that instead of checking the
compatible string.  But let's get this in and tweak it later.

ok kettenis@


> Index: dev/fdt/if_cad.c
> ===
> RCS file: src/sys/dev/fdt/if_cad.c,v
> retrieving revision 1.2
> diff -u -p -r1.2 if_cad.c
> --- dev/fdt/if_cad.c  13 Jun 2021 02:56:48 -  1.2
> +++ dev/fdt/if_cad.c  25 Jun 2021 13:18:22 -
> @@ -81,6 +81,7 @@
>  #define GEM_NETSR0x0008
>  #define  GEM_NETSR_PHY_MGMT_IDLE (1 << 2)
>  #define GEM_DMACR0x0010
> +#define  GEM_DMACR_DMA64 (1 << 30)
>  #define  GEM_DMACR_AHBDISC   (1 << 24)
>  #define  GEM_DMACR_RXBUF_MASK(0xff << 16)
>  #define  GEM_DMACR_RXBUF_SHIFT   16
> @@ -168,6 +169,8 @@
>  #define GEM_RXIPCCNT 0x01a8
>  #define GEM_RXTCPCCNT0x01ac
>  #define GEM_RXUDPCCNT0x01b0
> +#define GEM_TXQBASEHI0x04c8
> +#define GEM_RXQBASEHI0x04d4
>  
>  #define GEM_CLK_TX   "tx_clk"
>  
> @@ -183,11 +186,18 @@ struct cad_dmamem {
>   caddr_t cdm_kva;
>  };
>  
> -struct cad_desc {
> +struct cad_desc32 {
>   uint32_td_addr;
>   uint32_td_status;
>  };
>  
> +struct cad_desc64 {
> + uint32_td_addrlo;
> + uint32_td_status;
> + uint32_td_addrhi;
> + uint32_td_unused;
> +};
> +
>  #define GEM_RXD_ADDR_WRAP(1 << 1)
>  #define GEM_RXD_ADDR_USED(1 << 0)
>  
> @@ -250,6 +260,8 @@ struct cad_softc {
>   enum cad_phy_mode   sc_phy_mode;
>   unsigned char   sc_rxhang_erratum;
>   unsigned char   sc_rxdone;
> + unsigned char   sc_dma64;
> + size_t  sc_descsize;
>  
>   struct mii_data sc_mii;
>  #define sc_media sc_mii.mii_media
> @@ -257,14 +269,14 @@ struct cad_softc {
>  
>   struct cad_dmamem   *sc_txring;
>   struct cad_buf  *sc_txbuf;
> - struct cad_desc *sc_txdesc;
> + caddr_t sc_txdesc;
>   unsigned intsc_tx_prod;
>   unsigned intsc_tx_cons;
>  
>   struct if_rxringsc_rx_ring;
>   struct cad_dmamem   *sc_rxring;
>   struct cad_buf  *sc_rxbuf;
> - struct cad_desc *sc_rxdesc;
> + caddr_t sc_rxdesc;
>   unsigned intsc_rx_prod;
>   unsigned intsc_rx_cons;
>   uint32_tsc_netctl;
> @@ -409,6 +421,12 @@ 

Re: amd64: softintr_dispatch: remove kernel lock

2021-06-23 Thread Mark Kettenis
> Date: Wed, 23 Jun 2021 15:32:03 +
> From: Visa Hankala 
> 
> On Wed, Jun 23, 2021 at 05:15:05PM +0200, Mark Kettenis wrote:
> > > Date: Wed, 23 Jun 2021 14:56:45 +
> > > From: Visa Hankala 
> > > 
> > > On Tue, Jun 22, 2021 at 09:46:22AM -0500, Scott Cheloha wrote:
> > > > On Mon, Jun 21, 2021 at 02:04:30PM +, Visa Hankala wrote:
> > > > > On Thu, May 27, 2021 at 07:40:26PM -0500, Scott Cheloha wrote:
> > > > > > On Sun, May 23, 2021 at 09:05:24AM +, Visa Hankala wrote:
> > > > > > > When a CPU starts processing a soft interrupt, it reserves the 
> > > > > > > handler
> > > > > > > to prevent concurrent execution. If the soft interrupt gets 
> > > > > > > rescheduled
> > > > > > > during processing, the handler is run again by the same CPU. This 
> > > > > > > breaks
> > > > > > > FIFO ordering, though.
> > > > > > 
> > > > > > If you want to preserve FIFO you can reinsert the handler at the 
> > > > > > queue
> > > > > > tail.  That would be more fair.
> > > > > > 
> > > > > > If FIFO is the current behavior I think we ought to keep it.
> > > > > 
> > > > > I have updated the patch to preserve the FIFO order.
> > > > > 
> > > > > > > +STAILQ_HEAD(x86_soft_intr_queue, x86_soft_intrhand);
> > > > > > > +
> > > > > > > +struct x86_soft_intr_queue softintr_queue[X86_NSOFTINTR];
> > > > > > 
> > > > > > Why did we switch to STAILQ?  I know we don't have very many
> > > > > > softintr_disestablish() calls but isn't O(1) removal worth the extra
> > > > > > pointer?
> > > > > 
> > > > > I used STAILQ because it avoids the hassle of updating the list nodes'
> > > > > back pointers. softintr_disestablish() with multiple items pending in
> > > > > the queue is very rare in comparison to the normal 
> > > > > softintr_schedule() /
> > > > > softintr_dispatch() cycle.
> > > > > 
> > > > > However, I have changed the code back to using TAILQ.
> > > > 
> > > > This looks good to me.  I mean, it looked good before, but it still
> > > > looks good.
> > > > 
> > > > I will run with it for a few days.
> > > > 
> > > > Assuming I hit no issues I'll come back with an OK.
> > > > 
> > > > Is there an easy way to exercise this code from userspace?  There
> > > > aren't many softintr users.
> > > > 
> > > > Maybe audio drivers?
> > > 
> > > audio(4) is one option with a relatively high rate of scheduling.
> > > Serial communications drivers, such as com(4), might be useful for
> > > testing too.
> > > 
> > > softintr_disestablish() can be exercised with uaudio(4) and ucom(4)
> > > for example.
> > > 
> > > I am still uncertain whether the barrier in softintr_disestablish()
> > > is fully safe. The typical detach-side users are audio_detach(),
> > > com_detach() and usb_detach(). They should be fine because the
> > > surrounding code may sleep. However, sbus(4) worries me because it
> > > invokes softintr_disestablish() from PCMCIA intr_disestablish callback,
> > > and I do not know how wild the usage contexts can be. sbus(4) is
> > > specific to sparc64, though.
> > 
> > Suprise-removal is a thing for PCI as well as PCMCIA and USB.  And in
> > the PCI case this will call com_detach() and therefore
> > softintr_disestablish() from interrupt context, where you can't sleep.
> > 
> > So I don't think that using some sort of barrier that sleeps is an
> > option.
> 
> Well, com_detach() does things that may sleep, so then the existing code
> seems wrong.

Hmm, actually, it seems I misremembered and PCI hotplug remove runs in
a task (see dev/pci/ppb.c).  So maybe it is ok.

> I will revise the diff so that it spins rather than sleeps when a handler
> is active.

That wouldn't work on non-MP kernels isn't it?



Re: amd64: softintr_dispatch: remove kernel lock

2021-06-23 Thread Mark Kettenis
> Date: Wed, 23 Jun 2021 14:56:45 +
> From: Visa Hankala 
> 
> On Tue, Jun 22, 2021 at 09:46:22AM -0500, Scott Cheloha wrote:
> > On Mon, Jun 21, 2021 at 02:04:30PM +, Visa Hankala wrote:
> > > On Thu, May 27, 2021 at 07:40:26PM -0500, Scott Cheloha wrote:
> > > > On Sun, May 23, 2021 at 09:05:24AM +, Visa Hankala wrote:
> > > > > When a CPU starts processing a soft interrupt, it reserves the handler
> > > > > to prevent concurrent execution. If the soft interrupt gets 
> > > > > rescheduled
> > > > > during processing, the handler is run again by the same CPU. This 
> > > > > breaks
> > > > > FIFO ordering, though.
> > > > 
> > > > If you want to preserve FIFO you can reinsert the handler at the queue
> > > > tail.  That would be more fair.
> > > > 
> > > > If FIFO is the current behavior I think we ought to keep it.
> > > 
> > > I have updated the patch to preserve the FIFO order.
> > > 
> > > > > +STAILQ_HEAD(x86_soft_intr_queue, x86_soft_intrhand);
> > > > > +
> > > > > +struct x86_soft_intr_queue softintr_queue[X86_NSOFTINTR];
> > > > 
> > > > Why did we switch to STAILQ?  I know we don't have very many
> > > > softintr_disestablish() calls but isn't O(1) removal worth the extra
> > > > pointer?
> > > 
> > > I used STAILQ because it avoids the hassle of updating the list nodes'
> > > back pointers. softintr_disestablish() with multiple items pending in
> > > the queue is very rare in comparison to the normal softintr_schedule() /
> > > softintr_dispatch() cycle.
> > > 
> > > However, I have changed the code back to using TAILQ.
> > 
> > This looks good to me.  I mean, it looked good before, but it still
> > looks good.
> > 
> > I will run with it for a few days.
> > 
> > Assuming I hit no issues I'll come back with an OK.
> > 
> > Is there an easy way to exercise this code from userspace?  There
> > aren't many softintr users.
> > 
> > Maybe audio drivers?
> 
> audio(4) is one option with a relatively high rate of scheduling.
> Serial communications drivers, such as com(4), might be useful for
> testing too.
> 
> softintr_disestablish() can be exercised with uaudio(4) and ucom(4)
> for example.
> 
> I am still uncertain whether the barrier in softintr_disestablish()
> is fully safe. The typical detach-side users are audio_detach(),
> com_detach() and usb_detach(). They should be fine because the
> surrounding code may sleep. However, sbus(4) worries me because it
> invokes softintr_disestablish() from PCMCIA intr_disestablish callback,
> and I do not know how wild the usage contexts can be. sbus(4) is
> specific to sparc64, though.

Suprise-removal is a thing for PCI as well as PCMCIA and USB.  And in
the PCI case this will call com_detach() and therefore
softintr_disestablish() from interrupt context, where you can't sleep.

So I don't think that using some sort of barrier that sleeps is an
option.



Re: sparc64: enable dt(4) in GENERIC

2021-06-23 Thread Mark Kettenis
> Date: Wed, 23 Jun 2021 11:43:47 +0200
> From: Martin Pieuchot 
> 
> Similar to what has been done on x86 & arm64, ok?

ok kettenis@

> Index: conf/GENERIC
> ===
> RCS file: /cvs/src/sys/arch/sparc64/conf/GENERIC,v
> retrieving revision 1.316
> diff -u -p -r1.316 GENERIC
> --- conf/GENERIC  4 Feb 2021 16:25:39 -   1.316
> +++ conf/GENERIC  23 Jun 2021 07:39:53 -
> @@ -556,4 +556,5 @@ owtemp* at onewire?   # Temperature
>  owctr*   at onewire? # Counter device
>  
>  pseudo-devicehotplug 1   # devices hot plugging
> +pseudo-devicedt
>  pseudo-devicewsmux   2   # mouse & keyboard multiplexor
> 
> 



Re: mkuboot(8): reduce usage()

2021-06-22 Thread Mark Kettenis
> Date: Tue, 22 Jun 2021 13:36:47 +0100
> From: Jason McIntyre 
> 
> hi.
> 
> diff to reduce verbosity in mkuboot(8) usage. i don;t have the means to
> build this one.
> 
> ok?

ok kettenis@

> Index: mkuboot.c
> ===
> RCS file: /cvs/src/usr.sbin/mkuboot/mkuboot.c,v
> retrieving revision 1.10
> diff -u -p -r1.10 mkuboot.c
> --- mkuboot.c 1 Jun 2021 02:59:01 -   1.10
> +++ mkuboot.c 22 Jun 2021 12:36:08 -
> @@ -395,16 +395,6 @@ usage(void)
>   (void)fprintf(stderr,
>   "usage: %s [-a arch] [-e entry] [-l loadaddr] [-n name] [-o os] "
>   "[-t type] infile outfile\n", __progname);
> - (void)fprintf(stderr,
> - "arch is one of:");
> - for (mapptr = archmap; mapptr->arch; mapptr++)
> - (void)fprintf(stderr, " %s", mapptr->arch);
> - (void)fprintf(stderr, "\n");
> - (void)fprintf(stderr,
> - "os is one of:");
> - for (osmapptr = osmap; osmapptr->arch; osmapptr++)
> - (void)fprintf(stderr, " %s", osmapptr->arch);
> - (void)fprintf(stderr, "\n");
>   
>   exit(1);
>  }
> 
> 



Re: scsi(8): reduce usage()

2021-06-22 Thread Mark Kettenis
> Date: Tue, 22 Jun 2021 13:30:11 +0100
> From: Jason McIntyre 
> Content-Type: text/plain; charset=us-ascii
> Content-Disposition: inline
> 
> hi.
> 
> diff to reduce verbosity in scsi(8) usage. before:
> 
> $ /sbin/scsi -Z
> scsi: unknown option -- Z
> Usage:
> 
>   scsi -f device -d debug_level# To set debug level
>   scsi -f device -m page [-P pc]   # To read mode pages
>   scsi -f device [-v] [-s seconds] -c cmd_fmt [arg0 ... argn] # A command...
>  -o count out_fmt [arg0 ... argn]  #   EITHER (data out)
>  -i count in_fmt   #   OR (data in)
> 
> "out_fmt" can be "-" to read output data from stdin;
> "in_fmt" can be "-" to write input data to stdout;
> 
> If debugging is not compiled in the kernel, "-d" will have no effect
> 
> after:
> 
> $ /usr/obj/sbin/scsi/scsi -Z
> scsi: unknown option -- Z
> usage: scsi -f device -d debug_level
>scsi -f device -m page [-e] [-P pc]
>scsi -f device [-v] [-s seconds] -c cmd_fmt [arg ...] -o count out_fmt
> [arg ...] -i count in_fmt [arg ...]
> 
> ok?

ok kettenis@

> Index: sbin/scsi/scsi.c
> ===
> RCS file: /cvs/src/sbin/scsi/scsi.c,v
> retrieving revision 1.30
> diff -u -p -r1.30 scsi.c
> --- sbin/scsi/scsi.c  7 Jun 2016 01:29:38 -   1.30
> +++ sbin/scsi/scsi.c  22 Jun 2021 12:27:10 -
> @@ -84,20 +84,11 @@ static void
>  usage(void)
>  {
>   fprintf(stderr,
> -"Usage:\n"
> -"\n"
> -"  scsi -f device -d debug_level# To set debug level\n"
> -"  scsi -f device -m page [-P pc]   # To read mode pages\n"
> -"  scsi -f device [-v] [-s seconds] -c cmd_fmt [arg0 ... argn] # A 
> command...\n"
> -" -o count out_fmt [arg0 ... argn]  #   EITHER (data out)\n"
> -" -i count in_fmt   #   OR (data in)\n"
> -"\n"
> -"\"out_fmt\" can be \"-\" to read output data from stdin;\n"
> -"\"in_fmt\" can be \"-\" to write input data to stdout;\n"
> -"\n"
> -"If debugging is not compiled in the kernel, \"-d\" will have no effect\n"
> -
> -);
> +"usage: scsi -f device -d debug_level\n"
> +"   scsi -f device -m page [-e] [-P pc]\n"
> +"   scsi -f device [-v] [-s seconds] -c cmd_fmt [arg ...]"
> +" -o count out_fmt\n"
> +"[arg ...] -i count in_fmt [arg ...]\n");
>  
>   exit (1);
>  }
> 
> 



Re: setitimer(2): increase interval upper bound to UINT_MAX seconds

2021-06-18 Thread Mark Kettenis
> Date: Fri, 18 Jun 2021 09:29:44 +0200
> From: Claudio Jeker 
> 
> On Thu, Jun 17, 2021 at 08:41:39PM -0500, Scott Cheloha wrote:
> > On Fri, Jun 11, 2021 at 12:17:02PM -0500, Scott Cheloha wrote:
> > > Hi,
> > > 
> > > setitimer(2) has a one hundred million second upper bound for timers.
> > > Any timer interval larger than this is considered invalid and we set
> > > EINVAL.
> > > 
> > > There is no longer any reason to use this particular limit.  Kclock
> > > timeouts support the full range of a timespec, so we can trivially
> > > increase the upper bound without any practical risk of overflow.
> > > 
> > > This patch increases the upper bound to UINT_MAX seconds.
> > > 
> > > Why UINT_MAX?  UINT_MAX is the largest possible input to alarm(3).  We
> > > could then simplify the alarm(3) manpage and the libc alarm.c code in
> > > a subsequent patch.  POSIX says alarm(3) "is always successful".  Our
> > > implementation can fail.  It would be nicer/simpler if ours were free
> > > of failure modes.
> > > 
> > > ok?
> > 
> > 1 week bump.
> > 
> > Updated patch: make the maximum value ("max") static and const.
> 
> OK claudio@
> 
> I wonder if we need a max at all? I guess there is an upper limit to not
> overflow the time_t when calculating the absolute timeout but that is
> probably close to LLONG_MAX / 2.

Not really worth worrying about.  Youu'll be long dead once that alarm
expires ;).

> I think a simplified version of alarm(3) that never fails would be nice.
>  
> > Index: kern_time.c
> > ===
> > RCS file: /cvs/src/sys/kern/kern_time.c,v
> > retrieving revision 1.153
> > diff -u -p -r1.153 kern_time.c
> > --- kern_time.c 11 Jun 2021 16:36:34 -  1.153
> > +++ kern_time.c 18 Jun 2021 01:40:42 -
> > @@ -709,15 +709,16 @@ out:
> >  int
> >  itimerfix(struct itimerval *itv)
> >  {
> > +   static const struct timeval max = { .tv_sec = UINT_MAX, .tv_usec = 0 };
> > struct timeval min_interval = { .tv_sec = 0, .tv_usec = tick };
> >  
> > if (itv->it_value.tv_sec < 0 || !timerisvalid(>it_value))
> > return EINVAL;
> > -   if (itv->it_value.tv_sec > 1)
> > +   if (timercmp(>it_value, , >))
> > return EINVAL;
> > if (itv->it_interval.tv_sec < 0 || !timerisvalid(>it_interval))
> > return EINVAL;
> > -   if (itv->it_interval.tv_sec > 1)
> > +   if (timercmp(>it_interval, , >))
> > return EINVAL;
> >  
> > if (!timerisset(>it_value))
> > 
> 
> -- 
> :wq Claudio
> 
> 



Re: Introduce UVM_OBJ_IS_AOBJ()

2021-06-14 Thread Mark Kettenis
> Date: Mon, 14 Jun 2021 11:53:30 +0200
> From: Martin Pieuchot 
> 
> The diff below introduces a new macro to generalize the test currently
> present in uvm_km_pgremove().  It also uses it in new places to reduce
> the differences with NetBSD.
> 
> This helps me shrink upcoming vmobjlock diff.
> 
> ok?

ok kettenis@


> Index: uvm/uvm_aobj.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
> retrieving revision 1.96
> diff -u -p -r1.96 uvm_aobj.c
> --- uvm/uvm_aobj.c20 May 2021 08:03:35 -  1.96
> +++ uvm/uvm_aobj.c14 Jun 2021 09:39:45 -
> @@ -143,7 +143,7 @@ struct pool uvm_aobj_pool;
>  
>  static struct uao_swhash_elt *uao_find_swhash_elt(struct uvm_aobj *, int,
>boolean_t);
> -static intuao_find_swslot(struct uvm_aobj *, int);
> +static intuao_find_swslot(struct uvm_object *, int);
>  static boolean_t  uao_flush(struct uvm_object *, voff_t,
>voff_t, int);
>  static void   uao_free(struct uvm_aobj *);
> @@ -242,8 +242,11 @@ uao_find_swhash_elt(struct uvm_aobj *aob
>   * uao_find_swslot: find the swap slot number for an aobj/pageidx
>   */
>  inline static int
> -uao_find_swslot(struct uvm_aobj *aobj, int pageidx)
> +uao_find_swslot(struct uvm_object *uobj, int pageidx)
>  {
> + struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
> +
> + KASSERT(UVM_OBJ_IS_AOBJ(uobj));
>  
>   /*
>* if noswap flag is set, then we never return a slot
> @@ -284,6 +287,7 @@ uao_set_swslot(struct uvm_object *uobj, 
>   int oldslot;
>  
>   KERNEL_ASSERT_LOCKED();
> + KASSERT(UVM_OBJ_IS_AOBJ(uobj));
>  
>   /*
>* if noswap flag is set, then we can't set a slot
> @@ -353,6 +357,7 @@ uao_free(struct uvm_aobj *aobj)
>  {
>   struct uvm_object *uobj = >u_obj;
>  
> + KASSERT(UVM_OBJ_IS_AOBJ(uobj));
>   uao_dropswap_range(uobj, 0, 0);
>  
>   if (UAO_USES_SWHASH(aobj)) {
> @@ -881,6 +886,7 @@ uao_flush(struct uvm_object *uobj, voff_
>   struct vm_page *pp;
>   voff_t curoff;
>  
> + KASSERT(UVM_OBJ_IS_AOBJ(uobj));
>   KERNEL_ASSERT_LOCKED();
>  
>   if (flags & PGO_ALLPAGES) {
> @@ -1007,6 +1013,7 @@ uao_get(struct uvm_object *uobj, voff_t 
>   int lcv, gotpages, maxpages, swslot, rv, pageidx;
>   boolean_t done;
>  
> + KASSERT(UVM_OBJ_IS_AOBJ(uobj));
>   KERNEL_ASSERT_LOCKED();
>  
>   /*
> @@ -1036,7 +1043,7 @@ uao_get(struct uvm_object *uobj, voff_t 
>* if page is new, attempt to allocate the page,
>* zero-fill'd.
>*/
> - if (ptmp == NULL && uao_find_swslot(aobj,
> + if (ptmp == NULL && uao_find_swslot(uobj,
>   current_offset >> PAGE_SHIFT) == 0) {
>   ptmp = uvm_pagealloc(uobj, current_offset,
>   NULL, UVM_PGA_ZERO);
> @@ -1175,7 +1182,7 @@ uao_get(struct uvm_object *uobj, voff_t 
>* we have a "fake/busy/clean" page that we just allocated.  
>* do the needed "i/o", either reading from swap or zeroing.
>*/
> - swslot = uao_find_swslot(aobj, pageidx);
> + swslot = uao_find_swslot(uobj, pageidx);
>  
>   /* just zero the page if there's nothing in swap.  */
>   if (swslot == 0) {
> @@ -1241,6 +1248,8 @@ uao_dropswap(struct uvm_object *uobj, in
>  {
>   int slot;
>  
> + KASSERT(UVM_OBJ_IS_AOBJ(uobj));
> +
>   slot = uao_set_swslot(uobj, pageidx, 0);
>   if (slot) {
>   uvm_swap_free(slot, 1);
> @@ -1456,6 +1465,7 @@ uao_dropswap_range(struct uvm_object *uo
>   struct uvm_aobj *aobj = (struct uvm_aobj *)uobj;
>   int swpgonlydelta = 0;
>  
> + KASSERT(UVM_OBJ_IS_AOBJ(uobj));
>   /* KASSERT(mutex_owned(uobj->vmobjlock)); */
>  
>   if (end == 0) {
> Index: uvm/uvm_km.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_km.c,v
> retrieving revision 1.144
> diff -u -p -r1.144 uvm_km.c
> --- uvm/uvm_km.c  16 May 2021 15:10:20 -  1.144
> +++ uvm/uvm_km.c  14 Jun 2021 09:40:39 -
> @@ -246,7 +246,7 @@ uvm_km_pgremove(struct uvm_object *uobj,
>   int slot;
>   int swpgonlydelta = 0;
>  
> - KASSERT(uobj->pgops == _pager);
> + KASSERT(UVM_OBJ_IS_AOBJ(uobj));
>  
>   for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) {
>   pp = uvm_pagelookup(uobj, curoff);
> Index: uvm/uvm_object.h
> ===
> RCS file: /cvs/src/sys/uvm/uvm_object.h,v
> retrieving revision 1.24
> diff -u -p -r1.24 uvm_object.h
> --- uvm/uvm_object.h  21 Oct 2020 09:08:14 -  1.24
> +++ uvm/uvm_object.h  14 Jun 2021 09:34:34 -
> @@ -82,12 

Re: Reaper & amaps

2021-06-14 Thread Mark Kettenis
> Date: Mon, 14 Jun 2021 11:50:24 +0200
> From: Martin Pieuchot 
> 
> Now that operations on amaps are serialized using a per-map rwlock
> the KERNEL_LOCK() shouldn't be necessary to call amap_unref().  The
> diff below allows the reaper to do this operation before grabbing it.
> 
> I haven't seen any relevant contention on the reaper in my profilings,
> so I don't expect any visible change related to this change.  However
> this reflects the current state of locking in UVM and helps me shrink
> my diff.
> 
> ok?

This means we no longer call uvm_pause() for these, but I believe the
main reason for calling uvm_pause() is to prevent us from holding the
kernel lock for too long.  So I think that's fine.

ok kettenis@


> Index: uvm/uvm_map.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> retrieving revision 1.275
> diff -u -p -r1.275 uvm_map.c
> --- uvm/uvm_map.c 22 May 2021 08:38:29 -  1.275
> +++ uvm/uvm_map.c 14 Jun 2021 09:32:04 -
> @@ -1571,10 +1571,16 @@ uvm_unmap_detach(struct uvm_map_deadq *d
>  
>   TAILQ_FOREACH_SAFE(entry, deadq, dfree.deadq, tmp) {
>   /* Skip entries for which we have to grab the kernel lock. */
> - if (entry->aref.ar_amap || UVM_ET_ISSUBMAP(entry) ||
> - UVM_ET_ISOBJ(entry))
> + if (UVM_ET_ISSUBMAP(entry) || UVM_ET_ISOBJ(entry))
>   continue;
>  
> + /* Drop reference to amap, if we've got one. */
> + if (entry->aref.ar_amap)
> + amap_unref(entry->aref.ar_amap,
> + entry->aref.ar_pageoff,
> + atop(entry->end - entry->start),
> + flags & AMAP_REFALL);
> +
>   TAILQ_REMOVE(deadq, entry, dfree.deadq);
>   uvm_mapent_free(entry);
>   }
> @@ -1586,12 +1592,6 @@ uvm_unmap_detach(struct uvm_map_deadq *d
>   while ((entry = TAILQ_FIRST(deadq)) != NULL) {
>   if (waitok)
>   uvm_pause();
> - /* Drop reference to amap, if we've got one. */
> - if (entry->aref.ar_amap)
> - amap_unref(entry->aref.ar_amap,
> - entry->aref.ar_pageoff,
> - atop(entry->end - entry->start),
> - flags & AMAP_REFALL);
>  
>   /* Drop reference to our backing object, if we've got one. */
>   if (UVM_ET_ISSUBMAP(entry)) {
> 
> 



Re: dt(4): skip probe frames on arm64

2021-06-10 Thread Mark Kettenis
> Date: Wed, 9 Jun 2021 23:05:40 +0200
> From: Tobias Heider 
> 
> Hi,
> 
> the diff below adds DT_FA_PROFILE and DT_FA_STATIC defines for arm64
> to skip the probe context frames.
> 
> Here is how a typical arm64 stack trace looks with and without diff:
> 
> dt_pcb_ring_get+0x130
> dt_prov_profile_enter+0x90
> hardclock+0x1b0
> agtimer_intr+0xa4
> ampintc_irq_handler+0x1c0
> arm_cpu_irq+0x34
> handle_el1h_irq+0x70
> sched_idle+0x294  <-- Diff skips everything above this
> sched_idle+0x294
> proc_trampoline+0x14
> 
> ok?

There is something not quite right with the stack unwinder on arm64.
This is illustrated in the stack trace above where sched_idle+0x294
shows up twice.

Another issue is that there are multiple interrupt controllers on
arm64 and I'm not sure the number of frames to skip is the same for
all of these.

> Index: dt_dev.c
> ===
> RCS file: /mount/openbsd/cvs/src/sys/dev/dt/dt_dev.c,v
> retrieving revision 1.14
> diff -u -p -r1.14 dt_dev.c
> --- dt_dev.c  22 May 2021 21:25:38 -  1.14
> +++ dt_dev.c  9 Jun 2021 20:57:14 -
> @@ -56,6 +56,9 @@
>  #if defined(__amd64__)
>  #define DT_FA_PROFILE5
>  #define DT_FA_STATIC 2
> +#elif defined(__arm64__)
> +#define DT_FA_PROFILE7
> +#define DT_FA_STATIC 2
>  #elif defined(__powerpc64__)
>  #define DT_FA_PROFILE6
>  #define DT_FA_STATIC 2
> 
> 



10gbase-r support for mvpp(4)

2021-06-02 Thread Mark Kettenis
Linux folks changed the device tree to use 10gbase-r instead of
10gbase-kr since "it is more correct".  Then the UEFI folks synched
their device trees to Linux and the 10G ports broke.  So accept both
in the code.

ok?


Index: dev/fdt/if_mvpp.c
===
RCS file: /cvs/src/sys/dev/fdt/if_mvpp.c,v
retrieving revision 1.44
diff -u -p -r1.44 if_mvpp.c
--- dev/fdt/if_mvpp.c   12 Dec 2020 11:48:52 -  1.44
+++ dev/fdt/if_mvpp.c   2 Jun 2021 20:34:30 -
@@ -1354,7 +1354,9 @@ mvpp2_port_attach(struct device *parent,
 
phy_mode = malloc(len, M_TEMP, M_WAITOK);
OF_getprop(sc->sc_node, "phy-mode", phy_mode, len);
-   if (!strncmp(phy_mode, "10gbase-kr", strlen("10gbase-kr")))
+   if (!strncmp(phy_mode, "10gbase-r", strlen("10gbase-r")))
+   sc->sc_phy_mode = PHY_MODE_10GBASER;
+   else if (!strncmp(phy_mode, "10gbase-kr", strlen("10gbase-kr")))
sc->sc_phy_mode = PHY_MODE_10GBASER;
else if (!strncmp(phy_mode, "2500base-x", strlen("2500base-x")))
sc->sc_phy_mode = PHY_MODE_2500BASEX;



Re: Larger kernel fonts in RAMDISK_CD?

2021-05-31 Thread Mark Kettenis
> Date: Mon, 31 May 2021 12:21:39 +0200
> From: Frederic Cambus 
> 
> Hi tech@,
> 
> The size of kernel fonts in RAMDISKs has long been a problem on systems
> with large screen resolutions booting via EFI, as currently only the 8x16
> font is built into RAMDISKs. As those systems are becoming more common, I
> would like to revisit the topic.
> 
> Currently, we decide which font is built into which kernel in wsfont(9),
> which will only add the 8x16 one when SMALL_KERNEL is defined, and larger
> fonts for selected architectures for !SMALL_KERNEL. There is no way to
> distinguish between RAMDISK and RAMDISK_CD kernels using #ifdef trickery,
> so with the current way we cannot add larger fonts only on RAMDISK_CD.
> As a reminder, we cannot add them to RAMDISK because there is no space
> left on the floppies, and there is no support for EFI systems on the
> floppies anyway.
> 
> However, unless I overlooked something, this could be solved by adding
> option directives directly in the RAMDISK_CD kernel configuration file.
> 
> This is how it would look like for amd64:
> 
> Index: sys/arch/amd64/conf/RAMDISK_CD
> ===
> RCS file: /cvs/src/sys/arch/amd64/conf/RAMDISK_CD,v
> retrieving revision 1.190
> diff -u -p -r1.190 RAMDISK_CD
> --- sys/arch/amd64/conf/RAMDISK_CD27 Dec 2020 23:05:37 -  1.190
> +++ sys/arch/amd64/conf/RAMDISK_CD31 May 2021 09:39:24 -
> @@ -20,6 +20,11 @@ option MSDOSFS
>  option   INET6
>  option   CRYPTO
>  
> +option   FONT_SPLEEN8x16
> +option   FONT_SPLEEN12x24
> +option   FONT_SPLEEN16x32
> +option   FONT_SPLEEN32x64
> +
>  option   RAMDISK_HOOKS
>  option   MINIROOTSIZE=7360
>  
> Does this look reasonable?

I would skip some sizes.  8x16 is readable on any screen size where
12x24 would be picked.  And maybe 16x32 is good enough for 4K screens
as well?

> If it does and if we want to go this way, I can try to build a release
> and check if MINIROOTSIZE must be bumped on RAMDISK_CD. Then we could do
> the same for i386, armv7 and arm64.

I'm all for it, but last time this came up Theo didn't like it and
suggested adding code to scale up the fonts instead.  I really don't
think you want to upscale the 8x16 font to 32x64.  But if we add the
16x32 font, upscaling that to 32x64 for the really big screens might
be an option and a reasonable compromise?

But figuring out how much things grow by adding the 16x32 font would
be a good start.



Re: iwm: avoid 'mac clock not ready' panic

2021-05-30 Thread Mark Kettenis
> Date: Sun, 30 May 2021 22:26:09 +0200
> From: Stefan Sperling 
> 
> Steven observed a panic ("iwm0: mac clock not ready") while testing
> the iwm firmware update patch on a 9560 device.
> I've also seen this happen one time, at some point during development.
> 
> In hindsight it is a bad idea to look at hardware register state here.
> The point of iwm_nic_assert_locked() is to verify that iwm_nic_lock() has
> been called somewhere up in the call stack. Checking our own lock counter
> is sufficient for this purpose.
> 
> If locking the device worked then these registers had the expected state at
> that time and our lock counter was incremented. Apparently if the device runs
> into some issue later the state of these registers may change and trigger
> these panics. Instead we want to handle such failures gracefully and reset
> the device.
> 
> ok?

ok kettenis@

> (For the curious: iwm_nic_lock() is supposed to avoid a situation where the
> device enters some low power state while the driver expects the device to
> quickly respond to certain I/O requests. That's all I know.)
> 
> diff 385a08f3e862586df8f1803dfa09fc765a5c3610 /usr/src
> blob - 4b502468fea796f103fb84237146879e8c4df267
> file + sys/dev/pci/if_iwm.c
> --- sys/dev/pci/if_iwm.c
> +++ sys/dev/pci/if_iwm.c
> @@ -1069,11 +1069,6 @@ iwm_nic_lock(struct iwm_softc *sc)
>  void
>  iwm_nic_assert_locked(struct iwm_softc *sc)
>  {
> - uint32_t reg = IWM_READ(sc, IWM_CSR_GP_CNTRL);
> - if ((reg & IWM_CSR_GP_CNTRL_REG_FLAG_MAC_CLOCK_READY) == 0)
> - panic("%s: mac clock not ready", DEVNAME(sc));
> - if (reg & IWM_CSR_GP_CNTRL_REG_FLAG_GOING_TO_SLEEP)
> - panic("%s: mac gone to sleep", DEVNAME(sc));
>   if (sc->sc_nic_locks <= 0)
>   panic("%s: nic locks counter %d", DEVNAME(sc), 
> sc->sc_nic_locks);
>  }
> 
> 



Re: setitimer(2): don't round up it_value

2021-05-28 Thread Mark Kettenis
> Date: Thu, 27 May 2021 18:29:04 -0500
> From: Scott Cheloha 

Sorry, but does is one of those areas where I'm not very aware how the
interfaces are used by applications.  So my default position is:
"don't change it".  Especially since these are "legacy" interfaces.

> On Wed, May 19, 2021 at 10:32:55AM -0500, Scott Cheloha wrote:
> > On Wed, May 12, 2021 at 01:15:05PM -0500, Scott Cheloha wrote:
> > > 
> > > [...]
> > > 
> > > Paul de Weerd mentioned off-list that the initial expiration for an
> > > ITIMER_REAL timer is always at least one tick.  I looked into it and
> > > yes, this is the case, because the kernel rounds it_value up to one
> > > tick if it is non-zero.
> > > 
> > > After thinking about it a bit I don't think we should do this
> > > rounding.  At least, not for the initial expiration.

The manual page explicity says:

  "Time values smaller than the resolution of the system clock are
   rounded up to this reolution (typically 10 milliseconds)".

which has been there from revision 1.

Note that POSIX defines timer_gettime() and timer_settime(), which we
don't implement.  We don't implement these, but the POSIX standard
says in the rationale:

  "Practical clocks tick at a finite rate, with rates of 100 hertz and
   1000 hertz being common.  The inverse of this tick rate is the
   clock resolution, also called the clock granularity, which in
   either case is expressed as a time duration, being 10 milliseconds
   and 1 millisecond respectively for these common rates. The
   granularity of practical clocks implies that if one reads a given
   clock twice in rapid succession, one may get the same time value
   twice; and that timers must wait for the next clock tick after the
   theoretical expiration time, to ensure that a timer never returns
   too soon.  Note also that the granularity of the clock may be
   significantly coarser than the resolution of the data format used
   to set and get time and interval values. Also note that some
   implementations may choose to adjust time and/or interval values to
   exactly match the ticks of the underlying clock."

which seems to imply that rounding up is what is desired here as well,
although I presume here the actual resolution of the clock is supposed
to be used.  But for timers associated with the
CLOCK_PROCESS_CPUTIME_ID and CLOCK_THREAD_CPUTIME_ID that would be
realstathz, which is still tick-like...

In other words, I'm not convinced...

> > > 
> > > [...]
> > > 
> > > Currently the rounding is done in itimerfix(), which takes a timeval
> > > pointer as argument.  Given that itimerfix() is used nowhere else in
> > > the kernel I think the easiest thing to do here is to rewrite
> > > itimerfix() to take an itimerval pointer as argument and have it do
> > > all input validation and normalization for setitimer(2) in one go:
> > > 
> > > - Validate it_value, return EINVAL if not.
> > > 
> > > - Validate it_interval, return EINVAL if not.
> > > 
> > > - Clear it_interval if it_value is unset.
> > > 
> > > - Round it_interval up if necessary.
> > > 
> > > The 100 million second upper bound for it_value and it_interval is
> > > arbitrary and will probably change in the future, so I have isolated
> > > that check from the others.
> > > 
> > > While we're changing the itimerfix() prototype we may as well pull it
> > > out of sys/time.h.  As I said before, it isn't used anywhere else.
> > > 
> > > OK?
> > 
> > Ping.
> 
> 2 week bump.
> 
> Index: kern/kern_time.c
> ===
> RCS file: /cvs/src/sys/kern/kern_time.c,v
> retrieving revision 1.151
> diff -u -p -r1.151 kern_time.c
> --- kern/kern_time.c  23 Dec 2020 20:45:02 -  1.151
> +++ kern/kern_time.c  27 May 2021 23:28:20 -
> @@ -52,6 +52,8 @@
>  
>  #include 
>  
> +int itimerfix(struct itimerval *);
> +
>  /* 
>   * Time of day and interval timer support.
>   *
> @@ -628,10 +630,9 @@ sys_setitimer(struct proc *p, void *v, r
>   error = copyin(SCARG(uap, itv), , sizeof(aitv));
>   if (error)
>   return error;
> - if (itimerfix(_value) || itimerfix(_interval))
> - return EINVAL;
> - if (!timerisset(_value))
> - timerclear(_interval);
> + error = itimerfix();
> + if (error)
> + return error;
>   newitvp = 
>   }
>   if (SCARG(uap, oitv) != NULL) {
> @@ -701,21 +702,34 @@ out:
>  }
>  
>  /*
> - * Check that a proposed value to load into the .it_value or
> - * .it_interval part of an interval timer is acceptable.
> + * Check if the given setitimer(2) input is valid.  Clear it_interval
> + * if it_value is unset.  Round it_interval up to the minimum interval
> + * if necessary.
>   */
>  int
> -itimerfix(struct timeval *tv)
> +itimerfix(struct itimerval *itv)
>  {
> + struct timeval min_interval = { .tv_sec = 0, .tv_usec = tick };
>  
> - if (tv->tv_sec < 

Re: Driver for Cadence GEM

2021-05-27 Thread Mark Kettenis
> Date: Thu, 27 May 2021 16:30:17 +
> From: Visa Hankala 
> 
> Here is an initial driver for Cadence GEM. Revisions of this Ethernet
> controller are found on various SoCs, including Xilinx Zynq-7000 and
> Zynq UltraScale+, and SiFive's HiFive Unleashed and Unmatched.
> 
> I have tested the driver on Zynq-7000.
> 
> Unfortunately, Zynq-7000 has a bug in its transmit UDP checksum offload
> capability. It generates wrong checksum if UDP payload size is less than
> three octets and the checksum field has not been initialized to zero.
> As tweaking packets in the driver probably is not desired, transmit
> checksum offload is disabled. There is no way to disable offload with
> just UDP.
> 
> On Zynq-7000, the hardware's Rx path is prone to getting stuck when DMA
> is not able to store incoming frames in memory quickly enough. As
> suggested by the technical reference manual, the driver toggles the
> receiver briefly if there is a pause in reception.
> 
> The driver has been written with just fdt in mind. However, Linux code
> suggests that there are PCI versions of the hardware as well (but no
> ACPI, at least yet). Should I separate the glue layer, to allow new
> attachment types?

Probably not.  I don't think we've seen hardware like that show up.
We can do the work when that happens.

The driver looks reasonable to me, so ok kettenis@


> Index: sys/dev/fdt/files.fdt
> ===
> RCS file: src/sys/dev/fdt/files.fdt,v
> retrieving revision 1.151
> diff -u -p -r1.151 files.fdt
> --- sys/dev/fdt/files.fdt 18 May 2021 11:39:37 -  1.151
> +++ sys/dev/fdt/files.fdt 27 May 2021 16:15:08 -
> @@ -276,6 +276,10 @@ device   amlusbphy
>  attach   amlusbphy at fdt
>  file dev/fdt/amlusbphy.c amlusbphy
>  
> +device   cad: ether, ifnet, mii, ifmedia
> +attach   cad at fdt
> +file dev/fdt/if_cad.ccad
> +
>  device   cduart
>  attach   cduart at fdt
>  file dev/fdt/cduart.ccduart
> Index: sys/dev/fdt/if_cad.c
> ===
> RCS file: sys/dev/fdt/if_cad.c
> diff -N sys/dev/fdt/if_cad.c
> --- /dev/null 1 Jan 1970 00:00:00 -
> +++ sys/dev/fdt/if_cad.c  27 May 2021 16:15:08 -
> @@ -0,0 +1,1727 @@
> +/*   $OpenBSD$   */
> +
> +/*
> + * Copyright (c) 2021 Visa Hankala
> + *
> + * Permission to use, copy, modify, and/or distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +/*
> + * Driver for Cadence 10/100/Gigabit Ethernet device.
> + */
> +
> +#include "bpfilter.h"
> +#include "kstat.h"
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#if NBPFILTER > 0
> +#include 
> +#endif
> +
> +#include 
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +
> +#include 
> +#include 
> +#include 
> +
> +#define GEM_NETCTL   0x
> +#define  GEM_NETCTL_DPRAM(1 << 18)
> +#define  GEM_NETCTL_STARTTX  (1 << 9)
> +#define  GEM_NETCTL_STATCLR  (1 << 5)
> +#define  GEM_NETCTL_MDEN (1 << 4)
> +#define  GEM_NETCTL_TXEN (1 << 3)
> +#define  GEM_NETCTL_RXEN (1 << 2)
> +#define GEM_NETCFG   0x0004
> +#define  GEM_NETCFG_SGMIIEN  (1 << 27)
> +#define  GEM_NETCFG_RXCSUMEN (1 << 24)
> +#define  GEM_NETCFG_MDCCLKDIV_MASK   (0x7 << 18)
> +#define  GEM_NETCFG_MDCCLKDIV_SHIFT  18
> +#define  GEM_NETCFG_FCSREM   (1 << 17)
> +#define  GEM_NETCFG_RXOFFS_MASK  (0x3 << 14)
> +#define  GEM_NETCFG_RXOFFS_SHIFT 14
> +#define  GEM_NETCFG_PCSSEL   (1 << 11)
> +#define  GEM_NETCFG_1000 (1 << 10)
> +#define  GEM_NETCFG_1536RXEN (1 << 8)
> +#define  GEM_NETCFG_UCASTHASHEN  (1 << 7)
> +#define  GEM_NETCFG_MCASTHASHEN  (1 << 6)
> +#define  GEM_NETCFG_BCASTDI  (1 << 5)
> +#define  GEM_NETCFG_COPYALL  (1 << 4)
> +#define  GEM_NETCFG_FDEN (1 << 1)
> +#define  

Re: vga(4): fix vga_doswitch() declaration

2021-05-25 Thread Mark Kettenis
> Date: Tue, 25 May 2021 13:53:57 -0500
> From: Scott Cheloha 
> 
> Timeout callback functions should be void (*)(void *).  I'd rather not
> cast in order to shove the function pointer into timeout_set(9).
> 
> ok?

sure, ok kettenis@

> Index: vga.c
> ===
> RCS file: /cvs/src/sys/dev/ic/vga.c,v
> retrieving revision 1.73
> diff -u -p -r1.73 vga.c
> --- vga.c 25 May 2020 09:55:48 -  1.73
> +++ vga.c 25 May 2021 18:38:59 -
> @@ -254,7 +254,7 @@ void  vga_scrollback(void *, void *, int)
>  void vga_burner(void *v, u_int on, u_int flags);
>  int  vga_getchar(void *, int, int, struct wsdisplay_charcell *);
>  
> -void vga_doswitch(struct vga_config *);
> +void vga_doswitch(void *);
>  
>  const struct wsdisplay_accessops vga_accessops = {
>   .ioctl = vga_ioctl,
> @@ -763,8 +763,7 @@ vga_show_screen(void *v, void *cookie, i
>   vc->switchcb = cb;
>   vc->switchcbarg = cbarg;
>   if (cb) {
> - timeout_set(>vc_switch_timeout,
> - (void(*)(void *))vga_doswitch, vc);
> + timeout_set(>vc_switch_timeout, vga_doswitch, vc);
>   timeout_add(>vc_switch_timeout, 0);
>   return (EAGAIN);
>   }
> @@ -774,8 +773,9 @@ vga_show_screen(void *v, void *cookie, i
>  }
>  
>  void
> -vga_doswitch(struct vga_config *vc)
> +vga_doswitch(void *arg)
>  {
> + struct vga_config *vc = arg;
>   struct vgascreen *scr, *oldscr;
>   struct vga_handle *vh = >hdl;
>   const struct wsscreen_descr *type;
> 
> 



Re: panic(9): set panicstr atomically

2021-05-25 Thread Mark Kettenis
> From: Scott Cheloha 
> Date: Tue, 25 May 2021 08:42:05 -0500
> 
> > On May 25, 2021, at 08:20, Theo de Raadt  wrote:
> > 
> > ´╗┐Scott Cheloha  wrote:
> > 
> >>> On Mon, May 24, 2021 at 10:12:53PM -0500, Scott Cheloha wrote:
> >>> On Sat, May 22, 2021 at 01:35:53AM +0200, Alexander Bluhm wrote:
>  On Fri, May 21, 2021 at 02:00:54PM -0500, Scott Cheloha wrote:
> > Given all of this, would it be better if secondary CPUs spin in
> > panic(9) instead of trying to print anything?
>  
>  The panic code should be as primitive as possible.  The garbled
>  output also tells me something.  Two CPUs are failing simultaneosly.
>  Please don't suppress that information.
>  
>  The crash is the problem, not the ugly printing.
> >>> 
> >>> I get where you're coming from in principle (simpler is better) but I
> >>> think you're prioritizing a minor concern over the bigger picture.
> >> 
> >> To be perfectly clear, I'm not talking about the garbled printing
> >> anymore.  I'm talking about *all* the code that can run from panic().
> >> There is a lot of code.  I think it would be better if we prevented
> >> multiple CPUs from running that code simultaneously by having
> >> secondary CPUs spin in panic() as visa@ suggested.
> > 
> > I think that is incorrect.  There are no "secondary CPUs", there are
> > only "not the first cpu to enter panic".
> 
> Right, exactly. Not sure what else to call
> them.  We are talking about the same thing.
> 
> > If the 2nd cpu to enter panic actually has a more relevant panic, now
> > it will be missed.
> 
> I said we could keep the printing.  I even
> kept the printing in my latest patch. So I
> don't know what you're arguing against
> here.

So the big risk with your diff is that you may end up spinning with
interrupts disabled at which point that CPU is dead.  If you try to
switch to it at that point, you'll lock up the machine.

> > Most of our users don't do the ddbcpu trace dance.  So I expect the
> > reports we get as a result of your change will be less enlightening.
> 
> This sounds like an argument for making the
> "ddbcpu trace dance" easier.  Or automating
> it entirely.
> 
> Personally I find it extremely annoying to flip
> from CPU to CPU to print all the traces.  It
> only gets more annoying as the core count
> grows.
> 
> How hard would it be to implement
> "show all trace" or something equivalent
> to simplify crash reporting for the end user?

Wouldn't be too hard.  But unless you're on a serial console, that
will probably be more than a screenful of information, so not terribly
useful.



Re: audio devices for armv7

2021-05-24 Thread Mark Kettenis
> Date: Mon, 24 May 2021 22:37:14 +0200
> From: Peter Hessler 
> 
> After the recent uaudio dma fixes, I tried out audio playing on my armv7
> system.  Tested on the built-in audio port on hw.product=Tinker-RK3288,
> sounds fine.
> 
> OK?
> 
> (N.B. 'twrget' is not a typo, even if it looks like one)

ok kettenis@

> Index: etc/etc.armv7/MAKEDEV.md
> ===
> RCS file: /cvs/src/etc/etc.armv7/MAKEDEV.md,v
> retrieving revision 1.19
> diff -u -p -u -p -r1.19 MAKEDEV.md
> --- etc/etc.armv7/MAKEDEV.md  23 Jan 2021 05:08:33 -  1.19
> +++ etc/etc.armv7/MAKEDEV.md  24 May 2021 20:29:34 -
> @@ -105,6 +105,7 @@ _std(1, 2, 8, 6)
>  dnl
>  dnl *** armv7 specific targets
>  dnl
> +twrget(all, au, audio, 0, 1, 2)dnl
>  target(all, ch, 0)dnl
>  target(all, vscsi, 0)dnl
>  target(all, diskmap)dnl
> 
> 
> 
> -- 
> "Amnesia used to be my favorite word, but then I forgot it."
> 
> 



Re: [PATCH] [src] sys/dev/usb/usbdevs - add "SHARKOON Technologies GmbH" vendor ID

2021-05-24 Thread Mark Kettenis
> Date: Mon, 24 May 2021 15:52:44 +0100
> From: Raf Czlonka 
> 
> Hello,
> 
> Pretty self-explanatory - add "SHARKOON Technologies GmbH" vendor ID.

Not really self-explanatory.  Why do you need this?  We typically
don't add strings for devices unless we need the vendor ID or device
ID in a driver.

> Index: sys/dev/usb/usbdevs
> ===
> RCS file: /cvs/src/sys/dev/usb/usbdevs,v
> retrieving revision 1.740
> diff -u -p -r1.740 usbdevs
> --- sys/dev/usb/usbdevs   18 May 2021 14:23:03 -  1.740
> +++ sys/dev/usb/usbdevs   24 May 2021 14:37:14 -
> @@ -618,6 +618,7 @@ vendor SELUXIT0x1d6f  Seluxit
>  vendor METAGEEK  0x1dd5  MetaGeek
>  vendor SIMCOM0x1e0e  SIMCom Wireless Solutions Co., Ltd.
>  vendor FESTO 0x1e29  Festo
> +vendor SHARKOON  0x1ea7  SHARKOON Technologies GmbH
>  vendor MODACOM   0x1eb8  Modacom
>  vendor AIRTIES   0x1eda  AirTies
>  vendor LAKESHORE 0x1fb9  Lake Shore
> 
> 



Re: amd64: softintr_dispatch: remove kernel lock

2021-05-22 Thread Mark Kettenis
> Date: Sat, 22 May 2021 11:11:38 +
> From: Visa Hankala 
> 
> On Wed, May 19, 2021 at 05:11:09PM -0500, Scott Cheloha wrote:
> > Hi,
> > 
> > visa@ says I need to unlock softintr_dispatch() before I can
> > unlock softclock(), so let's do that.
> > 
> > Additionally, when we call softintr_disestablish() we want to wait for
> > the underlying softintr handle to finish running if it is running.
> > 
> > We can start with amd64.
> > 
> > I think this approach will work:
> > 
> > - Keep a pointer to the running softintr, if any, in the queue.  NULL
> >   the pointer when we return from sih_func().
> > 
> > - Take/release the kernel lock if the SI_MPSAFE flag is present when
> >   we enter/leave sih_func().
> > 
> > - If the handle is running when you call softintr_disestablish(), spin
> >   until the handle isn't running anymore and retry.
> > 
> > There is no softintr manpage but I think it is understood that
> > softintr_disestablish() is only safe to call from a process context,
> > otherwise you may deadlock.  Maybe we should do splassert(IPL_NONE)?
> > 
> > We could probably sleep here instead of spinning.  We'd have to change
> > every softintr_disestablish() implementation to do that, though.
> > Otherwise you'd have different behavior on different platforms.
> 
> I think your diff does not pay enough attention to the fact that soft
> interrupts are handled by all CPUs. I think the diff that I posted
> a while ago [1] is better in that respect.
> 
> Two biggest things that I do not like in my original diff are
> synchronization of handler execution, and use of the SMR barrier.
> 
> [1] https://marc.info/?l=openbsd-tech=162092714911609
> 
> The kernel lock has guaranteed that at most one CPU is able to run
> a given soft interrupt handler at a time. My diff used a mutex to
> prevent concurrent execution. However, it is wasteful to spin. It would
> be more economical to let the current runner of the handler re-execute
> the code.
> 
> The SMR barrier in softintr_disestablish() was a trick to drain any
> pending activity. However, it made me feel uneasy because I have not
> checked every caller of softintr_disestablish(). My main worry is not
> the latency but unexpected side effects.
> 
> Below is a revised diff that improves the above two points.
> 
> When a soft interrupt handler is scheduled, it is assigned to a CPU.
> That CPU will keep running the handler as long as there are pending
> requests. Once all pending requests have been drained, the CPU
> relinquishes its hold of the handler. This provides natural
> serialization.
> 
> Now softintr_disestablish() uses spinning for draining activity.
> I still have slight qualms about this, though, because the feature
> has not been so explicit before. Integration with witness(4) might be
> in order.
> 
> softintr_disestablish() uses READ_ONCE() to enforce reloading of the
> value in the busy-wait loop. This way the variable does not need to be
> volatile. (As yet another option, CPU_BUSY_CYCLE() could always
> imply memory clobbering, which should make an optimizing compiler
> redo the load.) For consistency with this READ_ONCE(), WRITE_ONCE() is
> used whenever the variable is written, excluding the initialization.
> 
> The patch uses a single mutex for access serialization. The old code
> has used one mutex per each soft IPL level, but I am not sure how
> useful that has been. I think it would be better to have a separate
> mutex for each CPU. However, the increased code complexity might not
> be worthwhile at the moment. Even having the per-CPU queues has
> a whiff of being somewhat overkill.

A few comments:

* Looking at amd64 in isolation does not make sense.  Like a lot of MD
  code in OpenBSD the softintr code was copied from whatever
  Net/FreeBSD had at the time, with no attempt at unification (it
  works, check it in, don't go back to clean it up).  However, with
  powerpc64 and riscv64 we try to do things a little bit better in
  that area.  So arm64, powerpc64 and riscv64 share the same softintr
  implementation that already implements softintr_establish_flags()
  with SOFTINTR_ESTABLISH_MPSAFE.  Now we haven't used that flag
  anywhere in our tree yet, so the code might be completely busted.
  But it may make a lot of sense to migrate other architectures to the
  same codebase.

* The softintr_disestablish() function isn't used a lot in our tree.
  It may make sense to postpone worrying about safely disestablishing
  mpsafe soft interrupts for now and simply panic if someone tries to
  do this.

* Wouldn't it make sense for an mpsafe soft interrupt to protect
  itself from running simultaniously on multiple CPUs?  It probably
  already needs some sort of lock to protect the handler and other
  code running in process context on other CPUs.  And some handlers
  may be safe to run simultaniously anyway.

* I think we should avoid MAXCPU arrays if we can; adding stuff to
  struct cpu_info is probably a better approach here.

Cheers,

Mark


bcmintc(4) diff for raspberry pi3

2021-05-22 Thread Mark Kettenis
Can't find my raspberry pi3 right now.  But here is a diff that avoids
spinning with interrupts disabled while trying to grab the kernel lock
for it.  I'd appreciate it if somebody could give this a spin for me.
Just checking whether it works normally for a bit would be fine.

Thanks,

Mark


Index: arch/arm64/dev/bcm2836_intr.c
===
RCS file: /cvs/src/sys/arch/arm64/dev/bcm2836_intr.c,v
retrieving revision 1.11
diff -u -p -r1.11 bcm2836_intr.c
--- arch/arm64/dev/bcm2836_intr.c   15 May 2021 11:30:27 -  1.11
+++ arch/arm64/dev/bcm2836_intr.c   22 May 2021 10:03:37 -
@@ -96,8 +96,8 @@ struct intrsource {
 
 struct bcm_intc_softc {
struct devicesc_dev;
-   struct intrsourcesc_bcm_intc_handler[INTC_NIRQ];
-   uint32_t sc_bcm_intc_imask[INTC_NBANK][NIPL];
+   struct intrsourcesc_handler[INTC_NIRQ];
+   uint32_t sc_imask[INTC_NBANK][NIPL];
int32_t  sc_localcoremask[MAXCPUS];
bus_space_tag_t  sc_iot;
bus_space_handle_t   sc_ioh;
@@ -115,11 +115,11 @@ intbcm_intc_splraise(int new);
 voidbcm_intc_setipl(int new);
 voidbcm_intc_calc_mask(void);
 void   *bcm_intc_intr_establish(int, int, struct cpu_info *,
-int (*)(void *), void *, char *);
+   int (*)(void *), void *, char *);
 void   *bcm_intc_intr_establish_fdt(void *, int *, int, struct cpu_info *,
-int (*)(void *), void *, char *);
+   int (*)(void *), void *, char *);
 void   *l1_intc_intr_establish_fdt(void *, int *, int, struct cpu_info *,
-int (*)(void *), void *, char *);
+   int (*)(void *), void *, char *);
 voidbcm_intc_intr_disestablish(void *);
 voidbcm_intc_irq_handler(void *);
 voidbcm_intc_intr_route(void *, int , struct cpu_info *);
@@ -204,7 +204,7 @@ bcm_intc_attach(struct device *parent, s
ARM_LOCAL_INT_MAILBOX(i), 0);
 
for (i = 0; i < INTC_NIRQ; i++) {
-   TAILQ_INIT(>sc_bcm_intc_handler[i].is_list);
+   TAILQ_INIT(>sc_handler[i].is_list);
}
 
bcm_intc_calc_mask();
@@ -239,13 +239,13 @@ bcm_intc_intr_enable(int irq, int ipl)
struct bcm_intc_softc   *sc = bcm_intc;
 
if (IS_IRQ_BANK0(irq))
-   sc->sc_bcm_intc_imask[0][ipl] |= (1 << IRQ_BANK0(irq));
+   sc->sc_imask[0][ipl] |= (1 << IRQ_BANK0(irq));
else if (IS_IRQ_BANK1(irq))
-   sc->sc_bcm_intc_imask[1][ipl] |= (1 << IRQ_BANK1(irq));
+   sc->sc_imask[1][ipl] |= (1 << IRQ_BANK1(irq));
else if (IS_IRQ_BANK2(irq))
-   sc->sc_bcm_intc_imask[2][ipl] |= (1 << IRQ_BANK2(irq));
+   sc->sc_imask[2][ipl] |= (1 << IRQ_BANK2(irq));
else if (IS_IRQ_LOCAL(irq))
-   sc->sc_bcm_intc_imask[3][ipl] |= (1 << IRQ_LOCAL(irq));
+   sc->sc_imask[3][ipl] |= (1 << IRQ_LOCAL(irq));
else
printf("%s: invalid irq number: %d\n", __func__, irq);
 }
@@ -256,13 +256,13 @@ bcm_intc_intr_disable(int irq, int ipl)
struct bcm_intc_softc   *sc = bcm_intc;
 
if (IS_IRQ_BANK0(irq))
-   sc->sc_bcm_intc_imask[0][ipl] &= ~(1 << IRQ_BANK0(irq));
+   sc->sc_imask[0][ipl] &= ~(1 << IRQ_BANK0(irq));
else if (IS_IRQ_BANK1(irq))
-   sc->sc_bcm_intc_imask[1][ipl] &= ~(1 << IRQ_BANK1(irq));
+   sc->sc_imask[1][ipl] &= ~(1 << IRQ_BANK1(irq));
else if (IS_IRQ_BANK2(irq))
-   sc->sc_bcm_intc_imask[2][ipl] &= ~(1 << IRQ_BANK2(irq));
+   sc->sc_imask[2][ipl] &= ~(1 << IRQ_BANK2(irq));
else if (IS_IRQ_LOCAL(irq))
-   sc->sc_bcm_intc_imask[3][ipl] &= ~(1 << IRQ_LOCAL(irq));
+   sc->sc_imask[3][ipl] &= ~(1 << IRQ_LOCAL(irq));
else
printf("%s: invalid irq number: %d\n", __func__, irq);
 }
@@ -279,8 +279,7 @@ bcm_intc_calc_mask(void)
for (irq = 0; irq < INTC_NIRQ; irq++) {
int max = IPL_NONE;
int min = IPL_HIGH;
-   TAILQ_FOREACH(ih, >sc_bcm_intc_handler[irq].is_list,
-   ih_list) {
+   TAILQ_FOREACH(ih, >sc_handler[irq].is_list, ih_list) {
if (ih->ih_ipl > max)
max = ih->ih_ipl;
 
@@ -288,7 +287,7 @@ bcm_intc_calc_mask(void)
min = ih->ih_ipl;
}
 
-   sc->sc_bcm_intc_handler[irq].is_irq = max;
+   sc->sc_handler[irq].is_irq = max;
 
if (max == IPL_NONE)
min = IPL_NONE;
@@ -369,16 +368,16 @@ bcm_intc_setipl(int new)
bus_space_write_4(sc->sc_iot, sc->sc_ioh, INTC_DISABLE_BANK2,
0x);
bus_space_write_4(sc->sc_iot, sc->sc_ioh, INTC_ENABLE_BANK0,
-   sc->sc_bcm_intc_imask[0][new]);

Re: Use atomic op for UVM map refcount

2021-05-21 Thread Mark Kettenis
> From: Philip Guenther 
> Date: Thu, 20 May 2021 23:45:46 -0900
> 
> On Wed, May 19, 2021 at 11:29 PM Martin Pieuchot  wrote:
> 
>  On 19/05/21(Wed) 16:17, Mark Kettenis wrote:
> 
> ... 
> 
>  > There are the READ_ONCE() and WRITE_ONCE() macros.  I'm not a big fan
>  > of those (since they add clutter) but they do take care of dependency
>  > ordering issues that exist in the alpha memory model.  Must admit that
>  > I only vaguely understand that issue, but I think it involves ordered
>  > access to two atomic variables which doesn't seem to be the case.
> 
>  These macros are used in places where declaring the field as "volatile"
>  could also work, no?  We can look at __mp_lock and SMR implementations.
>  So could we agree one way to do things?
> 
>  Visa, David, why did you pick READ_ONCE() in SMR and veb(4)?  Anything
>  we overlooked regarding the use of "volatile"?
> 
> If _all_ references to a member/variable use READ/WRITE_ONCE, then declaring
> it volatile should be equivalent, but if there are any uses which have a
> lock for protection instead than making the member volatile will result in
> worse object code for the protected sequences where *_ONCE are not needed.

I'd argue that mixing atomic and non-atomic (protected by a lock)
access to a member would be a bug, but I suppose one can argue that
there are scenarios where using a lock to prevent simultanious writes
and use atomic access for reads is valid.

> Also, initialization of structs with volatile members can be pessimal: the
> compiler has to assume this could be in uncached memory mapped from a device
> where writes to the member have to be of the size of the member: no paving
> with larger writes or deferring initializations.

Right.  But if you really want to access that variable atomically,
that is really what you want.

> volatile is a *really* blunt hammer.  READ/WRITE_ONCE use it carefully to
> build a sharper tool.  Unifying on "just plain volatile" when the work has
> already been done to use a sharper tool correctly..well, if that's a good
> idea then why have SMR at all when locks would be easier for everyone to
> think about, despite being a blunter hammer?  /s

That said, in the example at hand, the checks on the reference count
should be fine as the checks should only be done by CPUs that hold a
reference.  So we don't need true atomic access.  So Martin, I think
you should commit the diff as-is.

ok kettenis@



Re: Use atomic op for UVM map refcount

2021-05-21 Thread Mark Kettenis
> From: Philip Guenther 
> Date: Thu, 20 May 2021 23:32:17 -0900
> 
> On Wed, May 19, 2021 at 5:19 AM Mark Kettenis 
> wrote:
> 
>  > Date: Tue, 18 May 2021 13:24:42 +0200
>  > From: Martin Pieuchot 
> 
> ... 
> 
>  > There's only a couple of 'volatile' usages in sys/sys.  These
>  annotations
>  > do not explicitly indicate which piece of code requires it.  Maybe it
>  would
>  > be clearer to use a cast or a macro where necessary.  This might help
>  us
>  > understand why and where "volatile" is needed.
> 
>  There are the READ_ONCE() and WRITE_ONCE() macros.  I'm not a big fan
>  of those (since they add clutter) but they do take care of dependency
>  ordering issues that exist in the alpha memory model.  Must admit that
>  I only vaguely understand that issue, but I think it involves ordered
>  access to two atomic variables which doesn't seem to be the case.
> 
>  On non-alpha systems, READ_ONCE() and WRITE_ONCE() just do a volatile
>  pointer cast.
> 
> READ/WRITE_ONCE() are 99% about keeping the compiler from deciding to
> rearrange code such that the indicated variable is read/written more than
> once.  To point to Linus posts from 2008/2009, when it was still ACCESS_ONCE
> ():
>  https://yarchive.net/comp/linux/ACCESS_ONCE.html

But that pretty much overlaps with the requirements for atomic access
and the current Linux kernel uses them for that purpose extensively.

> ISTR a paper by Ousterhout describing this same problem to the C standard
> committee(?) in the early 90's, which kinda opened the "memory model"
> rathole.
> 
> If the variable is actually being protected by a lock then these are indeed
> noise/pessimization; it's the lock-less accesses where the compiler can pull
> a rabbit from its hat and stab you with it.

And volatile is the stab-proof vest that might protect you from that.
But more on that in the reply to your next mail...



Re: xhci early enumeration

2021-05-21 Thread Mark Kettenis
> Date: Fri, 21 May 2021 19:01:39 +0200
> From: Patrick Wildt 
> 
> Am Fri, May 21, 2021 at 06:18:40PM +0200 schrieb Martin Pieuchot:
> > On 21/05/21(Fri) 10:48, Patrick Wildt wrote:
> > > Am Wed, May 19, 2021 at 07:15:50AM + schrieb Christian Ludwig:
> > > > The usb(4) driver allows to enumerate the bus early during boot by
> > > > setting its driver flags to 0x1 in UKC. This mechanism can enable a USB
> > > > console keyboard early during autoconf(9), which can come in handy at
> > > > times. This needs USB polling mode to work, which is a bit broken. Here
> > > > is my attempt to fix it for xhci(4) controllers.
> > > > 
> > > > According to the xHCI specification section 4.2 "Host Controller
> > > > Initalization", the host controller must be fully initialized before
> > > > descending into device enumeration. Then xhci(4) sends command TRBs to
> > > > open new pipes during enumeration. They wait for completion using
> > > > tsleep(). This is bad when in polling mode at boot. And finally, the
> > > > behavior should be the same on resume as it is at boot. Therefore also
> > > > enumerate USB devices during resume when the flag is set.
> > > > 
> > > > I am specifically looking for tests on xhci controllers with usb(4)
> > > > flags set to 1 in UKC.
> > > > 
> > > > So long,
> > > > 
> > > > 
> > > >  - Christian
> > > > 
> > > > 
> > > > diff --git a/sys/arch/armv7/marvell/mvxhci.c 
> > > > b/sys/arch/armv7/marvell/mvxhci.c
> > > > index 38a636fd123..2137f68b816 100644
> > > > --- a/sys/arch/armv7/marvell/mvxhci.c
> > > > +++ b/sys/arch/armv7/marvell/mvxhci.c
> > > > @@ -155,12 +155,12 @@ mvxhci_attach(struct device *parent, struct 
> > > > device *self, void *aux)
> > > > goto disestablish_ret;
> > > > }
> > > >  
> > > > -   /* Attach usb device. */
> > > > -   config_found(self, >sc.sc_bus, usbctlprint);
> > > > -
> > > > /* Now that the stack is ready, config' the HC and enable 
> > > > interrupts. */
> > > > xhci_config(>sc);
> > > >  
> > > > +   /* Attach usb device. */
> > > > +   config_found(self, >sc.sc_bus, usbctlprint);
> > > > +
> > > > return;
> > > >  
> > > >  disestablish_ret:
> > > > diff --git a/sys/dev/acpi/xhci_acpi.c b/sys/dev/acpi/xhci_acpi.c
> > > > index 95e69cee896..d762f69a00e 100644
> > > > --- a/sys/dev/acpi/xhci_acpi.c
> > > > +++ b/sys/dev/acpi/xhci_acpi.c
> > > > @@ -112,12 +112,12 @@ xhci_acpi_attach(struct device *parent, struct 
> > > > device *self, void *aux)
> > > > goto disestablish_ret;
> > > > }
> > > >  
> > > > -   /* Attach usb device. */
> > > > -   config_found(self, >sc.sc_bus, usbctlprint);
> > > > -
> > > > /* Now that the stack is ready, config' the HC and enable 
> > > > interrupts. */
> > > > xhci_config(>sc);
> > > >  
> > > > +   /* Attach usb device. */
> > > > +   config_found(self, >sc.sc_bus, usbctlprint);
> > > > +
> > > > return;
> > > >  
> > > >  disestablish_ret:
> > > > diff --git a/sys/dev/fdt/xhci_fdt.c b/sys/dev/fdt/xhci_fdt.c
> > > > index 38c976a6b24..84e00bdadc5 100644
> > > > --- a/sys/dev/fdt/xhci_fdt.c
> > > > +++ b/sys/dev/fdt/xhci_fdt.c
> > > > @@ -116,12 +116,12 @@ xhci_fdt_attach(struct device *parent, struct 
> > > > device *self, void *aux)
> > > > goto disestablish_ret;
> > > > }
> > > >  
> > > > -   /* Attach usb device. */
> > > > -   config_found(self, >sc.sc_bus, usbctlprint);
> > > > -
> > > > /* Now that the stack is ready, config' the HC and enable 
> > > > interrupts. */
> > > > xhci_config(>sc);
> > 
> > > >  
> > > > +   /* Attach usb device. */
> > > > +   config_found(self, >sc.sc_bus, usbctlprint);
> > > > +
> > > > return;
> > > >  
> > > >  disestablish_ret:
> > > > diff --git a/sys/dev/pci/xhci_pci.c b/sys/dev/pci/xhci_pci.c
> > > > index fa3271b0d30..0b46083b705 100644
> > > > --- a/sys/dev/pci/xhci_pci.c
> > > > +++ b/sys/dev/pci/xhci_pci.c
> > > > @@ -195,12 +195,12 @@ xhci_pci_attach(struct device *parent, struct 
> > > > device *self, void *aux)
> > > > if (PCI_VENDOR(psc->sc_id) == PCI_VENDOR_INTEL)
> > > > xhci_pci_port_route(psc);
> > > >  
> > > > -   /* Attach usb device. */
> > > > -   config_found(self, >sc.sc_bus, usbctlprint);
> > > > -
> > > > /* Now that the stack is ready, config' the HC and enable 
> > > > interrupts. */
> > > > xhci_config(>sc);
> > > >  
> > > > +   /* Attach usb device. */
> > > > +   config_found(self, >sc.sc_bus, usbctlprint);
> > > > +
> > > > return;
> > > >  
> > > >  disestablish_ret:
> > > 
> > > The interesting thing is that xhci_config() used to be part of
> > > xhci_init() and was explicitly taken out from it to fix a panic
> > > that showed up when enumeration happened afterwards.
> > > 
> > > https://github.com/openbsd/src/commit/48155c88d2b90737b892a715e56d81bc73254308
> > > 
> > > Is it possible that 

Re: Use atomic op for UVM map refcount

2021-05-20 Thread Mark Kettenis
> Date: Thu, 20 May 2021 10:28:29 +0200
> From: Martin Pieuchot 
> 
> On 19/05/21(Wed) 16:17, Mark Kettenis wrote:
> > > Date: Tue, 18 May 2021 13:24:42 +0200
> > > From: Martin Pieuchot 
> > > 
> > > On 18/05/21(Tue) 12:07, Mark Kettenis wrote:
> > > > > Date: Tue, 18 May 2021 12:02:19 +0200
> > > > > From: Martin Pieuchot 
> > > > > 
> > > > > This allows us to not rely on the KERNEL_LOCK() to check reference
> > > > > counts.
> > > > > 
> > > > > Also reduces differences with NetBSD and shrink my upcoming 
> > > > > `vmobjlock'
> > > > > diff.
> > > > > 
> > > > > ok?
> > > > 
> > > > Shouldn't we make ref_count volatile in that case?
> > > 
> > > I don't know,  I couldn't find any evidence about where to use "volatile"
> > > in the kernel.
> > > 
> > > My understanding is that using "volatile" tells the compiler to not
> > > "cache" the value of such field in a register because it can change at
> > > any time.  Is it so?
> > 
> > Right.  So if you want the access to be atomic, it needs to be
> > "uncached" and therefore you need to use volatile.  Now the atomic
> > APIs explicitly cast their pointer arguments to volatile, so if you
> > exclusively through those APIs you don't strictly need the variable
> > itself to be declared volatile.  But I think it still is a good idea
> > to declare them as such.
> 
> Thanks for the explanation.  Do you suggest we use the "volatile"
> keyword as a hint and/or to avoid surprises?  If we agree on this
> I'll look at similar uses of atomic operations to unify them.

Yes, I think we should do that.  The volatile keyword shouldn't hurt
and is a clear signal that there is something special about a
variable.

> > > There's only a couple of 'volatile' usages in sys/sys.  These annotations
> > > do not explicitly indicate which piece of code requires it.  Maybe it 
> > > would
> > > be clearer to use a cast or a macro where necessary.  This might help us
> > > understand why and where "volatile" is needed.
> > 
> > There are the READ_ONCE() and WRITE_ONCE() macros.  I'm not a big fan
> > of those (since they add clutter) but they do take care of dependency
> > ordering issues that exist in the alpha memory model.  Must admit that
> > I only vaguely understand that issue, but I think it involves ordered
> > access to two atomic variables which doesn't seem to be the case.
> 
> These macros are used in places where declaring the field as "volatile"
> could also work, no?  We can look at __mp_lock and SMR implementations.
> So could we agree one way to do things?

Not 100%; see the comment about the alpha memory model above.  So as
long as we support OpenBSD/alpha, READ_ONCE() and WRITE_ONCE() will be
necessary in certain cases.

> Visa, David, why did you pick READ_ONCE() in SMR and veb(4)?  Anything
> we overlooked regarding the use of "volatile"?



ACPI aml_rwgsb() fix

2021-05-19 Thread Mark Kettenis
My last change to dsdt.c broke one or two of my cheap little Intel
"Atom" laptops.  Seems my interpretation of the ACPI standard wasn't
quite right.  I went back to the original bug report and I think I
understand a bit better what the AML in that report is trying to do.
So here is a diff that fixes things.

Theo, can you try this on that Dell Precision 3640?

Tests on other hardware are welcome, especially on laptops.


Index: dev/acpi/dsdt.c
===
RCS file: /cvs/src/sys/dev/acpi/dsdt.c,v
retrieving revision 1.262
diff -u -p -r1.262 dsdt.c
--- dev/acpi/dsdt.c 30 Mar 2021 16:49:58 -  1.262
+++ dev/acpi/dsdt.c 19 May 2021 22:14:46 -
@@ -2527,7 +2527,7 @@ aml_rwgpio(struct aml_value *conn, int b
 #ifndef SMALL_KERNEL
 
 void
-aml_rwgsb(struct aml_value *conn, int alen, int bpos, int blen,
+aml_rwgsb(struct aml_value *conn, int len, int bpos, int blen,
 struct aml_value *val, int mode, int flag)
 {
union acpi_resource *crs = (union acpi_resource *)conn->v_buffer;
@@ -2535,17 +2535,17 @@ aml_rwgsb(struct aml_value *conn, int al
i2c_tag_t tag;
i2c_op_t op;
i2c_addr_t addr;
-   int cmdlen, buflen, acclen;
-   uint8_t cmd;
+   int cmdlen, buflen;
+   uint8_t cmd[2];
uint8_t *buf;
-   int pos, err;
+   int err;
 
if (conn->type != AML_OBJTYPE_BUFFER || conn->length < 5 ||
AML_CRSTYPE(crs) != LR_SERBUS || AML_CRSLEN(crs) > conn->length ||
crs->lr_i2cbus.revid != 1 || crs->lr_i2cbus.type != LR_SERBUS_I2C)
aml_die("Invalid GenericSerialBus");
if (AML_FIELD_ACCESS(flag) != AML_FIELD_BUFFERACC ||
-   bpos & 0x3 || (blen % 8) != 0)
+   bpos & 0x3 || (blen % 8) != 0 || blen > 16)
aml_die("Invalid GenericSerialBus access");
 
node = aml_searchname(conn->node,
@@ -2556,32 +2556,27 @@ aml_rwgsb(struct aml_value *conn, int al
switch (AML_FIELD_ATTR(flag)) {
case 0x02:  /* AttribQuick */
cmdlen = 0;
-   buflen = acclen = 0;
+   buflen = 0;
break;
case 0x04:  /* AttribSendReceive */
cmdlen = 0;
-   acclen = 1;
-   buflen = blen / 8;
+   buflen = 1;
break;
case 0x06:  /* AttribByte */
-   cmdlen = 1;
-   acclen = 1;
-   buflen = blen / 8;
+   cmdlen = blen / 8;
+   buflen = 1;
break;
case 0x08:  /* AttribWord */
-   cmdlen = 1;
-   acclen = 2;
-   buflen = blen / 8;
+   cmdlen = blen / 8;
+   buflen = 2;
break;
case 0x0b:  /* AttribBytes */
-   cmdlen = 1;
-   acclen = alen;
-   buflen = blen / 8;
+   cmdlen = blen / 8;
+   buflen = len;
break;
case 0x0e:  /* AttribRawBytes */
cmdlen = 0;
-   acclen = alen;
-   buflen = blen / 8;
+   buflen = len;
break;
default:
aml_die("unsupported access type 0x%x", flag);
@@ -2589,12 +2584,12 @@ aml_rwgsb(struct aml_value *conn, int al
}
break;
case 1: /* AttribBytes */
-   cmdlen = 1;
-   acclen = buflen = AML_FIELD_ATTR(flag);
+   cmdlen = blen / 8;
+   buflen = AML_FIELD_ATTR(flag);
break;
case 2: /* AttribRawBytes */
cmdlen = 0;
-   acclen = buflen = AML_FIELD_ATTR(flag);
+   buflen = AML_FIELD_ATTR(flag);
break;
default:
aml_die("unsupported access type 0x%x", flag);
@@ -2621,16 +2616,11 @@ aml_rwgsb(struct aml_value *conn, int al
 
tag = node->i2c;
addr = crs->lr_i2cbus._adr;
-   cmd = bpos >> 3;
+   cmd[0] = bpos >> 3;
+   cmd[1] = bpos >> 11;
 
iic_acquire_bus(tag, 0);
-   for (pos = 0; pos < buflen; pos += acclen) {
-   err = iic_exec(tag, op, addr, , cmdlen,
-   [pos + 2], acclen, 0);
-   if (err)
-   break;
-   cmd++;
-   }
+   err = iic_exec(tag, op, addr, , cmdlen, [2], buflen, 0);
iic_release_bus(tag, 0);
 
/*
@@ -2650,14 +2640,14 @@ aml_rwgsb(struct aml_value *conn, int al
  */
 
 void
-aml_rwgsb(struct aml_value *conn, int alen, int bpos, int 

Re: uao_dropswap_range()

2021-05-19 Thread Mark Kettenis
> Date: Mon, 17 May 2021 12:32:02 +0200
> From: Martin Pieuchot 
> 
> Diff below makes use of uao_dropswap_range() in uao_free() instead of
> duplicating it.  This function has been imported from NetBSD along with
> TMPFS.  I'd like to use it to reduce the difference with their tree and
> reduce the size of my upcoming `vmobjlock' diff.
> 
> ok?

ok kettenis@

> Index: uvm/uvm_aobj.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_aobj.c,v
> retrieving revision 1.95
> diff -u -p -r1.95 uvm_aobj.c
> --- uvm/uvm_aobj.c22 Apr 2021 11:54:32 -  1.95
> +++ uvm/uvm_aobj.c11 May 2021 11:26:15 -
> @@ -351,58 +351,16 @@ uao_set_swslot(struct uvm_object *uobj, 
>  static void
>  uao_free(struct uvm_aobj *aobj)
>  {
> + struct uvm_object *uobj = >u_obj;
>  
> - if (UAO_USES_SWHASH(aobj)) {
> - int i, hashbuckets = aobj->u_swhashmask + 1;
> + uao_dropswap_range(uobj, 0, 0);
>  
> + if (UAO_USES_SWHASH(aobj)) {
>   /*
> -  * free the swslots from each hash bucket,
> -  * then the hash bucket, and finally the hash table itself.
> +  * free the hash table itself.
>*/
> - for (i = 0; i < hashbuckets; i++) {
> - struct uao_swhash_elt *elt, *next;
> -
> - for (elt = LIST_FIRST(>u_swhash[i]);
> -  elt != NULL;
> -  elt = next) {
> - int j;
> -
> - for (j = 0; j < UAO_SWHASH_CLUSTER_SIZE; j++) {
> - int slot = elt->slots[j];
> -
> - if (slot == 0) {
> - continue;
> - }
> - uvm_swap_free(slot, 1);
> - /*
> -  * this page is no longer
> -  * only in swap.
> -  */
> - atomic_dec_int();
> - }
> -
> - next = LIST_NEXT(elt, list);
> - pool_put(_swhash_elt_pool, elt);
> - }
> - }
> -
>   hashfree(aobj->u_swhash, UAO_SWHASH_BUCKETS(aobj->u_pages), 
> M_UVMAOBJ);
>   } else {
> - int i;
> -
> - /*
> -  * free the array
> -  */
> - for (i = 0; i < aobj->u_pages; i++) {
> - int slot = aobj->u_swslots[i];
> -
> - if (slot) {
> - uvm_swap_free(slot, 1);
> -
> - /* this page is no longer only in swap. */
> - atomic_dec_int();
> - }
> - }
>   free(aobj->u_swslots, M_UVMAOBJ, aobj->u_pages * sizeof(int));
>   }
>  
> @@ -1487,9 +1445,6 @@ uao_pagein_page(struct uvm_aobj *aobj, i
>  }
>  
>  /*
> - * XXX pedro: Once we are comfortable enough with this function, we can adapt
> - * uao_free() to use it.
> - *
>   * uao_dropswap_range: drop swapslots in the range.
>   *
>   * => aobj must be locked and is returned locked.
> 
> 



Re: Use atomic op for UVM map refcount

2021-05-19 Thread Mark Kettenis
> Date: Tue, 18 May 2021 13:24:42 +0200
> From: Martin Pieuchot 
> 
> On 18/05/21(Tue) 12:07, Mark Kettenis wrote:
> > > Date: Tue, 18 May 2021 12:02:19 +0200
> > > From: Martin Pieuchot 
> > > 
> > > This allows us to not rely on the KERNEL_LOCK() to check reference
> > > counts.
> > > 
> > > Also reduces differences with NetBSD and shrink my upcoming `vmobjlock'
> > > diff.
> > > 
> > > ok?
> > 
> > Shouldn't we make ref_count volatile in that case?
> 
> I don't know,  I couldn't find any evidence about where to use "volatile"
> in the kernel.
> 
> My understanding is that using "volatile" tells the compiler to not
> "cache" the value of such field in a register because it can change at
> any time.  Is it so?

Right.  So if you want the access to be atomic, it needs to be
"uncached" and therefore you need to use volatile.  Now the atomic
APIs explicitly cast their pointer arguments to volatile, so if you
exclusively through those APIs you don't strictly need the variable
itself to be declared volatile.  But I think it still is a good idea
to declare them as such.

> If that's correct, we should look at any piece of code reading such field
> multiple times without using atomic operation, right?

Right.

> In this case `ref_count' is used once for sanity checks in
> UVM_MAP_REQ_WRITE() and after calling atomic_dec_int_nv() in
> uvm_map_deallocate().  So, I don't see "volatile" necessary here.
> Did I miss anything?

If ref_count isn't marked as volatile, some crazy compiler
optimization might interfere with those sanity checks.  The refcount
manipulation itself is safe for the reason given above.

> There's only a couple of 'volatile' usages in sys/sys.  These annotations
> do not explicitly indicate which piece of code requires it.  Maybe it would
> be clearer to use a cast or a macro where necessary.  This might help us
> understand why and where "volatile" is needed.

There are the READ_ONCE() and WRITE_ONCE() macros.  I'm not a big fan
of those (since they add clutter) but they do take care of dependency
ordering issues that exist in the alpha memory model.  Must admit that
I only vaguely understand that issue, but I think it involves ordered
access to two atomic variables which doesn't seem to be the case.

On non-alpha systems, READ_ONCE() and WRITE_ONCE() just do a volatile
pointer cast.


> > > Index: uvm/uvm_map.c
> > > ===
> > > RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> > > retrieving revision 1.274
> > > diff -u -p -r1.274 uvm_map.c
> > > --- uvm/uvm_map.c 26 Mar 2021 13:40:05 -  1.274
> > > +++ uvm/uvm_map.c 18 May 2021 09:36:55 -
> > > @@ -491,12 +491,13 @@ uvm_mapent_addr_remove(struct vm_map *ma
> > >  /*
> > >   * uvm_map_reference: add reference to a map
> > >   *
> > > - * XXX check map reference counter lock
> > > + * => map need not be locked
> > >   */
> > > -#define uvm_map_reference(_map)  
> > > \
> > > - do {\
> > > - map->ref_count++;   \
> > > - } while (0)
> > > +void
> > > +uvm_map_reference(struct vm_map *map)
> > > +{
> > > + atomic_inc_int(>ref_count);
> > > +}
> > >  
> > >  /*
> > >   * Calculate the dused delta.
> > > @@ -4292,7 +4293,7 @@ uvm_map_deallocate(vm_map_t map)
> > >   int c;
> > >   struct uvm_map_deadq dead;
> > >  
> > > - c = --map->ref_count;
> > > + c = atomic_dec_int_nv(>ref_count);
> > >   if (c > 0) {
> > >   return;
> > >   }
> > > Index: uvm/uvm_map.h
> > > ===
> > > RCS file: /cvs/src/sys/uvm/uvm_map.h,v
> > > retrieving revision 1.69
> > > diff -u -p -r1.69 uvm_map.h
> > > --- uvm/uvm_map.h 12 Mar 2021 14:15:49 -  1.69
> > > +++ uvm/uvm_map.h 18 May 2021 09:36:36 -
> > > @@ -259,6 +259,7 @@ RBT_PROTOTYPE(uvm_map_addr, vm_map_entry
> > >   * read_locks and write_locks are used in lock debugging code.
> > >   *
> > >   *  Locks used to protect struct members in this file:
> > > + *   a   atomic operations
> > >   *   I   immutable after creation or exec(2)
> > >   *   v   `vm_map_lock' (this map `lock' or `mtx')
> > >   */
> > > @@ -272,7 +273,7 @@ struct vm_map {
> > >   struct uvm_map_addr addr;   /* [v] Entry tree, by addr */
> > >  
> > >   vsize_t size;   /* virtual size */
> > > - int ref_count;  /* Reference count */
> > > + int ref_count;  /* [a] Reference count */
> > >   int flags;  /* flags */
> > >   struct mutexflags_lock; /* flags lock */
> > >   unsigned inttimestamp;  /* Version number */
> > > 
> > > 
> 



Re: Use atomic op for UVM map refcount

2021-05-18 Thread Mark Kettenis
> Date: Tue, 18 May 2021 12:02:19 +0200
> From: Martin Pieuchot 
> 
> This allows us to not rely on the KERNEL_LOCK() to check reference
> counts.
> 
> Also reduces differences with NetBSD and shrink my upcoming `vmobjlock'
> diff.
> 
> ok?

Shouldn't we make ref_count volatile in that case?

> Index: uvm/uvm_map.c
> ===
> RCS file: /cvs/src/sys/uvm/uvm_map.c,v
> retrieving revision 1.274
> diff -u -p -r1.274 uvm_map.c
> --- uvm/uvm_map.c 26 Mar 2021 13:40:05 -  1.274
> +++ uvm/uvm_map.c 18 May 2021 09:36:55 -
> @@ -491,12 +491,13 @@ uvm_mapent_addr_remove(struct vm_map *ma
>  /*
>   * uvm_map_reference: add reference to a map
>   *
> - * XXX check map reference counter lock
> + * => map need not be locked
>   */
> -#define uvm_map_reference(_map)  
> \
> - do {\
> - map->ref_count++;   \
> - } while (0)
> +void
> +uvm_map_reference(struct vm_map *map)
> +{
> + atomic_inc_int(>ref_count);
> +}
>  
>  /*
>   * Calculate the dused delta.
> @@ -4292,7 +4293,7 @@ uvm_map_deallocate(vm_map_t map)
>   int c;
>   struct uvm_map_deadq dead;
>  
> - c = --map->ref_count;
> + c = atomic_dec_int_nv(>ref_count);
>   if (c > 0) {
>   return;
>   }
> Index: uvm/uvm_map.h
> ===
> RCS file: /cvs/src/sys/uvm/uvm_map.h,v
> retrieving revision 1.69
> diff -u -p -r1.69 uvm_map.h
> --- uvm/uvm_map.h 12 Mar 2021 14:15:49 -  1.69
> +++ uvm/uvm_map.h 18 May 2021 09:36:36 -
> @@ -259,6 +259,7 @@ RBT_PROTOTYPE(uvm_map_addr, vm_map_entry
>   * read_locks and write_locks are used in lock debugging code.
>   *
>   *  Locks used to protect struct members in this file:
> + *   a   atomic operations
>   *   I   immutable after creation or exec(2)
>   *   v   `vm_map_lock' (this map `lock' or `mtx')
>   */
> @@ -272,7 +273,7 @@ struct vm_map {
>   struct uvm_map_addr addr;   /* [v] Entry tree, by addr */
>  
>   vsize_t size;   /* virtual size */
> - int ref_count;  /* Reference count */
> + int ref_count;  /* [a] Reference count */
>   int flags;  /* flags */
>   struct mutexflags_lock; /* flags lock */
>   unsigned inttimestamp;  /* Version number */
> 
> 



Re: panic(9): set panicstr atomically

2021-05-15 Thread Mark Kettenis
> Date: Sat, 15 May 2021 11:06:39 +
> From: Visa Hankala 
> 
> On Wed, May 12, 2021 at 07:08:39PM -0500, Scott Cheloha wrote:
> > In a separate mail thread, bluhm@ mentioned that panic(9) does not
> > cleanly handle multiple CPUs entering it simultaneously:
> > 
> > https://marc.info/?l=openbsd-tech=161908805925325=2
> > 
> > I'm unsure which part of panic(9) is causing the problem he mentions,
> > but one obvious issue I see is that panicstr is not set atomically,
> > so two CPUs entering panic(9) simultaneously may clobber panicbuf.
> > 
> > If we set panicstr atomically only one CPU will write panicbuf.
> 
> I think most of the clobbering is explained by more than one CPU writing
> to the console at the same time. The vsnprintf() and setting of panicstr
> usually happen quickly, so the kind of garbling occasionally seen with
> nearly simultaneous panicking is not likely to arise there. Console I/O,
> on the other hand, can be orders of magnitude slower. That, and the fact
> that mutexes become no-ops once panicstr is set, create a slow phase
> where multiple CPUs can easily be concurrently even if the initial
> timings were not so close after all.
> 
> I feel that panic() should let only the first panicker run the panic
> code and stop any other CPUs, like NetBSD does. Another option is to
> serialize panic() in a more proper way. Or maybe secondary panickers
> should just delay a little at the start of panic()...

The problem with serializing panics is that you increase the risk that
you deadlock and don't see any messages at all...



Re: macppc: add ld.script for kernel, ofwboot

2021-05-10 Thread Mark Kettenis
> Date: Mon, 10 May 2021 14:22:33 -0400
> From: George Koehler 
> 
> On Fri, 7 May 2021 10:31:55 +0200 (CEST)
> Mark Kettenis  wrote:
> 
> > Makes sense to me.  It seems ldd always seems to require a little bit
> > more coercion to produce non-standard binaries.  We use linker scripts
> > for the various EFI bootloaders as well.
> > 
> > ok kettenis@
> 
> My diff had an extra "pwd" in arch/macppc/stand/ofwboot/Makefile;
> I deleted the "pwd" before committing it.
> 
> > > -${PROG}: ${OBJS} ${LIBSA} ${LIBZ}
> > > - ${LD} -nopie -znorelro -N -X -Ttext ${RELOC} -e ${ENTRY} -o ${PROG} \
> > > +${PROG}: ${OBJS} ${LIBSA} ${LIBZ} ld.script
> > > + pwd
> > > + ${LD} -nopie -znorelro -N -X -T ${.CURDIR}/ld.script -o ${PROG} \
> > >   ${OBJS} ${LIBS}
> 
> >From my experiments with lld 10, I believe that macppc is almost ready
> to switch from ld.bfd to ld.lld.  I know of 2 other problems:
> 
>   1.  ports/lang/gcc/8 needs USE_LLD = No, because lld 10 can't link
>   C++ code from gcc.  (I have not yet checked lld 11.)  lld had no
>   problem with Fortran ports built by gcc.
> 
>   2.  All instances of -Wl,-relax or -Wl,--relax in src or ports must
>   be deleted, because it is an unknown option to lld, but lld can
>   link large binaries without the option.

Maybe just coordinate with Theo and the ports folks and move ahead.



Re: patch: add support for RTLD_NODELETE

2021-05-10 Thread Mark Kettenis
> Date: Mon, 10 May 2021 09:00:37 +0200
> From: Sebastien Marie 
> Content-Type: text/plain; charset=us-ascii
> Content-Disposition: inline
> 
> Hi,
> 
> The following diff adds support for RTLD_NODELETE in ld.so(1).
> 
> It helps Qt programs which is using RTLD_NODELETE per default for
> loading plugins.
> 
> Without this patch, qgis (for example) is crashing systematically on
> exit. With it, it is fine.
> 
> If RTLD_NODELETE isn't POSIX, it is widely deployed: at least linux,
> freebsd, dragonfly, netbsd, solaris, illumos, apple, and fuchsia have
> it.
> 
> I built a full release on i386 with it and built several packages
> (most of dependencies of gqis which is including qt5).
> 
> One drawback will be for ports: a build with the diff might change
> built code as RTLD_NODELETE will be present in headers. So it might
> deserves a libc bump to correctly update installed ports.
> 
> Comments or OK ?

The code is ok kettenis@

However, I have a comment on the man page change...

> diff 393e7b397988bb6abe46729de1794883d2b9d5cf /home/semarie/repos/openbsd/src
> blob - 431065f3eab32299ad39766592e72a1765c8e8dc
> file + include/dlfcn.h
> --- include/dlfcn.h
> +++ include/dlfcn.h
> @@ -42,6 +42,7 @@
>  #define RTLD_GLOBAL  0x100
>  #define RTLD_LOCAL   0x000
>  #define RTLD_TRACE   0x200
> +#define RTLD_NODELETE0x400
>  
>  /*
>   * Special handle arguments for dlsym().
> blob - b8d5512e32bf50351b432a539106b1695a51f10f
> file + libexec/ld.so/dlfcn.c
> --- libexec/ld.so/dlfcn.c
> +++ libexec/ld.so/dlfcn.c
> @@ -54,7 +54,7 @@ dlopen(const char *libname, int flags)
>   int failed = 0;
>   int obj_flags;
>  
> - if (flags & ~(RTLD_TRACE|RTLD_LAZY|RTLD_NOW|RTLD_GLOBAL)) {
> + if (flags & ~(RTLD_TRACE|RTLD_LAZY|RTLD_NOW|RTLD_GLOBAL|RTLD_NODELETE)) 
> {
>   _dl_errno = DL_INVALID_MODE;
>   return NULL;
>   }
> @@ -89,6 +89,9 @@ dlopen(const char *libname, int flags)
>  
>   _dl_link_dlopen(object);
>  
> + if (flags & RTLD_NODELETE)
> + object->obj_flags |= DF_1_NODELETE;
> + 
>   if (OBJECT_REF_CNT(object) > 1) {
>   /* if opened but grpsym_vec has not been filled in */
>   if (object->grpsym_vec.len == 0)
> blob - afdf60ff428680eabc76f667442934511a8576fb
> file + share/man/man3/dlfcn.3
> --- share/man/man3/dlfcn.3
> +++ share/man/man3/dlfcn.3
> @@ -124,6 +124,19 @@ each of the above values together.
>  If an object was opened with RTLD_LOCAL and later opened with RTLD_GLOBAL,
>  then it is promoted to RTLD_GLOBAL.
>  .Pp
> +Additionally, the following flag may be ORed into the mode argument:
> +.Pp
> +.Bl -tag -width "RTLD_NODELETE" -compact -offset indent
> +.It Sy RTLD_NODELETE
> +Prevents unload of the loaded object on
> +.Fn dlclose .
> +The same behaviour may be requested by
> +.Fl z
> +.Cm nodelete
> +option of the static linker
> +.Xr ld 1 .
> +.El
> +.Pp

Should -z nodelete be documented here?  It is related but doesn't do
the same thing.  RTLD_NODELETE lets the process that loads a module
make the decision, whereas -z nodelete puts a marker in the module
itself.

A similar relation exists between RTLD_NOW and -z now, but we don't
document that.

I'm leaning towards leaving out the sentence about -z nodelete.

Cheers,

Mark



Re: patch: add support for RTLD_NODELETE

2021-05-10 Thread Mark Kettenis
> From: Stuart Henderson 
> Date: Mon, 10 May 2021 09:16:01 +0100
> 
> We are due a _SYSTEM_VERSION bump for the clang update, it can ride 
> alongside that

We should probably still do a libc minor bump for this since this adds
an interface.

> 
> -- 
>   Sent from a phone, apologies for poor formatting.
> On 10 May 2021 08:01:18 Sebastien Marie  wrote:
> 
> > Hi,
> >
> > The following diff adds support for RTLD_NODELETE in ld.so(1).
> >
> > It helps Qt programs which is using RTLD_NODELETE per default for
> > loading plugins.
> >
> > Without this patch, qgis (for example) is crashing systematically on
> > exit. With it, it is fine.
> >
> > If RTLD_NODELETE isn't POSIX, it is widely deployed: at least linux,
> > freebsd, dragonfly, netbsd, solaris, illumos, apple, and fuchsia have
> > it.
> >
> > I built a full release on i386 with it and built several packages
> > (most of dependencies of gqis which is including qt5).
> >
> > One drawback will be for ports: a build with the diff might change
> > built code as RTLD_NODELETE will be present in headers. So it might
> > deserves a libc bump to correctly update installed ports.
> >
> > Comments or OK ?
> > --
> > Sebastien Marie
> >
> >
> > diff 393e7b397988bb6abe46729de1794883d2b9d5cf 
> > /home/semarie/repos/openbsd/src
> > blob - 431065f3eab32299ad39766592e72a1765c8e8dc
> > file + include/dlfcn.h
> > --- include/dlfcn.h
> > +++ include/dlfcn.h
> > @@ -42,6 +42,7 @@
> > #define RTLD_GLOBAL 0x100
> > #define RTLD_LOCAL  0x000
> > #define RTLD_TRACE  0x200
> > +#define RTLD_NODELETE  0x400
> >
> > /*
> >  * Special handle arguments for dlsym().
> > blob - b8d5512e32bf50351b432a539106b1695a51f10f
> > file + libexec/ld.so/dlfcn.c
> > --- libexec/ld.so/dlfcn.c
> > +++ libexec/ld.so/dlfcn.c
> > @@ -54,7 +54,7 @@ dlopen(const char *libname, int flags)
> > int failed = 0;
> > int obj_flags;
> >
> > -   if (flags & ~(RTLD_TRACE|RTLD_LAZY|RTLD_NOW|RTLD_GLOBAL)) {
> > +   if (flags & ~(RTLD_TRACE|RTLD_LAZY|RTLD_NOW|RTLD_GLOBAL|RTLD_NODELETE)) 
> > {
> > _dl_errno = DL_INVALID_MODE;
> > return NULL;
> > }
> > @@ -89,6 +89,9 @@ dlopen(const char *libname, int flags)
> >
> > _dl_link_dlopen(object);
> >
> > +   if (flags & RTLD_NODELETE)
> > +   object->obj_flags |= DF_1_NODELETE;
> > +   
> > if (OBJECT_REF_CNT(object) > 1) {
> > /* if opened but grpsym_vec has not been filled in */
> > if (object->grpsym_vec.len == 0)
> > blob - afdf60ff428680eabc76f667442934511a8576fb
> > file + share/man/man3/dlfcn.3
> > --- share/man/man3/dlfcn.3
> > +++ share/man/man3/dlfcn.3
> > @@ -124,6 +124,19 @@ each of the above values together.
> > If an object was opened with RTLD_LOCAL and later opened with RTLD_GLOBAL,
> > then it is promoted to RTLD_GLOBAL.
> > .Pp
> > +Additionally, the following flag may be ORed into the mode argument:
> > +.Pp
> > +.Bl -tag -width "RTLD_NODELETE" -compact -offset indent
> > +.It Sy RTLD_NODELETE
> > +Prevents unload of the loaded object on
> > +.Fn dlclose .
> > +The same behaviour may be requested by
> > +.Fl z
> > +.Cm nodelete
> > +option of the static linker
> > +.Xr ld 1 .
> > +.El
> > +.Pp
> > The main executable's symbols are normally invisible to
> > .Fn dlopen
> > symbol resolution.
> 
> 



Re: macppc bsd.mp pmap's hash lock

2021-05-08 Thread Mark Kettenis
> Date: Thu, 6 May 2021 23:11:30 -0400
> From: George Koehler 
> 
> Hello tech list,
> 
> If you have a macppc with more than one cpu, I would like you to try
> this diff in the GENERIC.MP kernel.  I am running it on a dual G5
> (without radeondrm and not running X).  I don't know whether I want to
> commit this diff.
> 
> In late April, my G5's kernel froze very early during boot, while
> trying to map the framebuffer.  The problem went away after reordering
> and relinking the kernel.  I kept a copy of the bad kernel.  I found
> the problem on Tuesday: __ppc_lock() crossed a page boundary.
> 
> $ nm -n /bsd.crash | grep __ppc_lock
> $ objdump -dlr --start-ad=0x27bf8c /bsd.crash|less
> 
> The disassembly had 0x27fbf8c <= __ppc_lock < 0x27c058, so it crossed
> pages at 0x27c000; page size = 0x1000 = 4096.  On a G5, the kernel
> lazily faults in its own pages.  The page fault at 0x27c000 inside
> __ppc_lock caused a recursive call to __ppc_lock.  I believe that the
> fault happened here in __ppc_lock:
> 
>   s = ppc_intr_disable();
>   if (atomic_cas_ulong(>mpl_count, 0, 1) == 0) {
>   membar_enter();
>   mpl->mpl_cpu = curcpu();
>   }
> 
>   if (mpl->mpl_cpu == curcpu()) {
> //--> // page fault!  recursive call to __ppc_lock
>   mpl->mpl_count++;
>   ppc_intr_enable(s);
> 
> This is bad, because the lock is not in a valid state when the page
> fault happens.  The code tries ppc_intr_disable to protect the lock,
> but this doesn't disable page faults.
> 
> The lock is a recursive spinlock that protects the pmap's hash table.
> My bad kernel tried to grab the lock to insert the 1st page of the
> framebuffer into the hash, and then tried to recursively grab the lock
> to insert page 0x27c000 into the hash.  I debugged the problem by
> copying the bad kernel and overwriting some asm to insert some extra
> printf()s.  The problem went away when my asm caused an earlier access
> to page 0x27c000.
> 
> When I reorder the kernel, __ppc_lock gets a different address, and
> probably doesn't cross pages with a not-valid lock; but I can force
> a page fault this way:
> 
>   __asm volatile("b 1f; . = . + 4096; 1:");
> 
> If I insert this asm at my above "//-->" and compile GENERIC.MP, then
> it reproduces the freezing problem on my G5.
> 
> The problem doesn't happen on a G3 or G4, where the kernel uses block
> address translation (!ppc_nobat); I copied my bad kernel to a G4 and
> it didn't freeze there.  The problem doesn't happen in bsd.sp, where
> the lock doesn't exist.  Our powerpc64 kernel doesn't fault in its own
> pages this way.  I have observed the problem only with macppc on G5.
> 
> In this diff, I try to fix the problem by shrinking the lock to 32
> bits, and using 32-bit atomic ops to keep the lock in a valid state.
> This __ppc_lock is no longer the __mp_lock, but is only the pmap's
> hash lock, so I also delete some unused functions.

Good find!  On powerpc64 I avoid the issue because I guarantee that
the kernel mappings are never evicted from the hash.  But doing so on
powerpc would require more serious development.  I'm not sure we
really need a ticket lock for this, but since you already did the
work, let's stick with it for now.

ok kettenis@

> Index: arch/powerpc/include/mplock.h
> ===
> RCS file: /cvs/src/sys/arch/powerpc/include/mplock.h,v
> retrieving revision 1.4
> diff -u -p -r1.4 mplock.h
> --- arch/powerpc/include/mplock.h 15 Apr 2020 08:09:00 -  1.4
> +++ arch/powerpc/include/mplock.h 6 May 2021 20:01:08 -
> @@ -30,13 +30,13 @@
>  #define __USE_MI_MPLOCK
>  
>  /*
> + * __ppc_lock exists because pte_spill_r() can't use __mp_lock.
>   * Really simple spinlock implementation with recursive capabilities.
>   * Correctness is paramount, no fancyness allowed.
>   */
>  
>  struct __ppc_lock {
> - volatile struct cpu_info *mpl_cpu;
> - volatile long   mpl_count;
> + volatile unsigned int   mpl_bolt;
>  };
>  
>  #ifndef _LOCORE
> @@ -44,10 +44,6 @@ struct __ppc_lock {
>  void __ppc_lock_init(struct __ppc_lock *);
>  void __ppc_lock(struct __ppc_lock *);
>  void __ppc_unlock(struct __ppc_lock *);
> -int __ppc_release_all(struct __ppc_lock *);
> -int __ppc_release_all_but_one(struct __ppc_lock *);
> -void __ppc_acquire_count(struct __ppc_lock *, int);
> -int __ppc_lock_held(struct __ppc_lock *, struct cpu_info *);
>  
>  #endif
>  
> Index: arch/powerpc/powerpc/lock_machdep.c
> ===
> RCS file: /cvs/src/sys/arch/powerpc/powerpc/lock_machdep.c,v
> retrieving revision 1.9
> diff -u -p -r1.9 lock_machdep.c
> --- arch/powerpc/powerpc/lock_machdep.c   15 Apr 2020 08:09:00 -  
> 1.9
> +++ arch/powerpc/powerpc/lock_machdep.c   6 May 2021 20:01:08 -
> @@ -1,6 

Re: emutls and dlopen(3) problem - Re: patch: make ld.so aware of pthread_key_create destructor - Re: multimedia/mpv debug vo=gpu crash on exit

2021-05-08 Thread Mark Kettenis
> Date: Sat, 8 May 2021 13:42:40 +0200
> From: Sebastien Marie 
> 
> On Thu, May 06, 2021 at 09:32:28AM +0200, Sebastien Marie wrote:
> > Hi,
> > 
> > Anindya, did a good analysis of the problem with mpv using gpu video
> > output backend (it is using EGL and mesa if I correctly followed).
> > 
> > 
> > For people not reading ports@ here a resume: the destructor function
> > used in pthread_key_create() needs to be present in memory until
> > _rthread_tls_destructors() is called.
> > 
> > in the case of mesa, eglInitialize() function could load, via
> > dlopen(), code which will use pthread_key_create() with destructor.
> > 
> > once dlclose() is called, the object is unloaded from memory, but a
> > reference to destructor is kept, leading to segfault when
> > _rthread_tls_destructors() run and use the destructor (because
> > pointing to unloaded code).
> >
> 
> I was going deeper in the analysis.
> 
> At first, I tought that the pthread_key_create() call was going from
> mesa driver (radeonsi_dri.so on my machine) as pinning the DSO in
> memory (using LD_PRELOAD) permitted to avoid the segfault.
> 
> In fact, it isn't directly radeonsi_dri.so but another dependant
> library: libLLVM.so.5.0 in this case (by using
> LD_PRELOAD=.../libLLVM.so.5.0, the crash disapparear).
> 
> Searching where is located the pthread_key_create() call, I found that
> it was coming from emutls implementation (which is using
> pthread_key_create + destructor) and which is statically linked with
> compiler-rt.a
> 
> By instrumenting pthread_key_create, I have the following backtrace
> (the abort(3) is mine):
> 
> (gdb) bt
> #0  thrkill () at /tmp/-:3
> #1  0x05188f550abe in _libc_abort () at 
> /usr/src/lib/libc/stdlib/abort.c:51
> #2  0x05191c7e8c2b in pthread_key_create () from 
> /home/semarie/Documents/devel/libhijacking/libthread.so
> #3  0x0519399e6a87 in emutls_init () at 
> /usr/src/gnu/lib/libcompiler_rt/../../llvm/compiler-rt/lib/builtins/emutls.c:118
> #4  0x05188f55b4f7 in pthread_once (once_control=0x51939d00b30 
> , init_routine=0x27240efb23d627ef) at 
> /usr/src/lib/libc/thread/rthread_once.c:26
> #5  0x0519399e68dd in emutls_init_once () at 
> /usr/src/gnu/lib/libcompiler_rt/../../llvm/compiler-rt/lib/builtins/emutls.c:125
> #6  emutls_get_index (control=0x51939cae5c8 
> <__emutls_v._ZL25TimeTraceProfilerInstance>) at 
> /usr/src/gnu/lib/libcompiler_rt/../../llvm/compiler-rt/lib/builtins/emutls.c:316
> #7  __emutls_get_address (control=0x51939cae5c8 
> <__emutls_v._ZL25TimeTraceProfilerInstance>) at 
> /usr/src/gnu/lib/libcompiler_rt/../../llvm/compiler-rt/lib/builtins/emutls.c:379
> #8  0x0519387f296e in llvm::getTimeTraceProfilerInstance() () from 
> /usr/lib/libLLVM.so.5.0
> #9  0x051938ec2bf2 in llvm::legacy::PassManagerImpl::run(llvm::Module&) 
> () from /usr/lib/libLLVM.so.5.0
> #10 0x05193974eb67 in LLVMRunPassManager () from /usr/lib/libLLVM.so.5.0
> #11 0x0518d11276d8 in ?? () from 
> /usr/X11R6/lib/modules/dri/radeonsi_dri.so
> #12 0x0518d1082761 in ?? () from 
> /usr/X11R6/lib/modules/dri/radeonsi_dri.so
> #13 0x0518d110b1ea in ?? () from 
> /usr/X11R6/lib/modules/dri/radeonsi_dri.so
> #14 0x0518d0c7939c in ?? () from 
> /usr/X11R6/lib/modules/dri/radeonsi_dri.so
> #15 0x0518d0c794ed in ?? () from 
> /usr/X11R6/lib/modules/dri/radeonsi_dri.so
> #16 0x0518d1cfbec1 in _rthread_start (v= Unhandled dwarf expression opcode 0xa3>) at 
> /usr/src/lib/librthread/rthread.c:96
> #17 0x05188f52bd2a in __tfork_thread () at 
> /usr/src/lib/libc/arch/amd64/sys/tfork_thread.S:84
> 
> It means that emutls implementation we are using couldn't be safely
> used if the code is using dlopen(3).
> 
> 
> I made the following PoC using __thread :
> 
> $ cat lib.c
> #include 
> 
> __thread int value = 0;
> 
> void
> fn()
> {
>   printf("entering:  %s\n", __func__);
>   value = 1;
>   printf("returning: %s\n", __func__);
> }
> 
> $ cat main.c
> #include 
> #include 
> #include 
> #include 
> #include 
> 
> void *
> loadcode(void *arg)
> {
>   void *lib;
>   void (*fn)();
> 
>   printf("thread: entering\n");
>   
>   printf("dlopen(3)\n");
>   if ((lib = dlopen("./lib.so", 0)) == NULL)
>   errx(EXIT_FAILURE, "dlopen: %s", dlerror());
>   
>   if ((fn = dlsym(lib, "fn")) == NULL)
>   errx(EXIT_FAILURE, "dlsym: %s", dlerror());
> 
>   fn();
> 
>   printf("dlclose(3)\n");
>   if (dlclose(lib) != 0)
>   errx(EXIT_FAILURE, "dlclose: %s", dlerror());
> 
>   printf("thread: returning\n");
>   return arg;
> }
> 
> int
> main(int argc, char *argv[])
> {
>   int error;
>   pthread_t th;
> 
>   if ((error = pthread_create(, NULL, , NULL)) != 0)
>   errc(error, EXIT_FAILURE, "pthread_create");
>   
>   if ((error = pthread_join(th, NULL)) != 0)
>   errc(error, EXIT_FAILURE, "pthread_join");
>   
>   return EXIT_SUCCESS;
> }
> 

Re: macppc: add ld.script for kernel, ofwboot

2021-05-07 Thread Mark Kettenis
{COPTIMIZE} 
> ${COPTS} ${PIPE}
>  AFLAGS=  -D_LOCORE ${CMACHFLAGS}
> -LINKFLAGS=   -N -Ttext 100114 -e start --warn-common -nopie
> +LINKFLAGS=   -N -T ld.script --warn-common -nopie
>  
>  .if ${MACHINE} == "powerpc64"
>  CFLAGS+= -m32
> Index: arch/macppc/conf/ld.script
> ===
> RCS file: /cvs/src/sys/arch/macppc/conf/ld.script,v
> retrieving revision 1.1
> diff -u -p -r1.1 ld.script
> --- arch/macppc/conf/ld.script13 Jun 2017 01:42:52 -  1.1
> +++ arch/macppc/conf/ld.script6 May 2021 20:01:08 -
> @@ -0,0 +1,68 @@
> +/*   $OpenBSD: ld.script,v 1.4 2020/07/18 13:16:32 kettenis Exp $*/
> +
> +/*
> + * Copyright (c) 2013 Mark Kettenis 
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +ENTRY(start)
> +
> +PHDRS
> +{
> + text PT_LOAD;
> + openbsd_randomize PT_OPENBSD_RANDOMIZE;
> +}
> +
> +SECTIONS
> +{
> + . = 0x00100114;
> + .text :
> + {
> + *(.text)
> + } :text
> + PROVIDE (etext = .);
> + PROVIDE (_etext = .);
> +
> + .rodata :
> + {
> + *(.rodata .rodata.*)
> + } :text
> +
> + .data.rel.ro :
> + {
> + *(.data.rel.ro)
> + } :text
> +
> + .openbsd.randomdata :
> + {
> + *(.openbsd.randomdata .openbsd.randomdata.*)
> + } :openbsd_randomize :text
> +
> + .data :
> + {
> + *(.data)
> + } :text
> +
> + .sbss :
> + {
> + *(.sbss)
> + }
> +
> + .bss :
> + {
> + *(.bss)
> + }
> + PROVIDE (end = .);
> + PROVIDE (_end = .);
> +}
> Index: arch/macppc/stand/ofwboot/Makefile
> ===
> RCS file: /cvs/src/sys/arch/macppc/stand/ofwboot/Makefile,v
> retrieving revision 1.20
> diff -u -p -r1.20 Makefile
> --- arch/macppc/stand/ofwboot/Makefile16 Mar 2020 07:02:10 -  
> 1.20
> +++ arch/macppc/stand/ofwboot/Makefile6 May 2021 20:01:08 -
> @@ -31,18 +31,17 @@ SRCS+=moddi3.c
>  
>  NEWVERSWHAT= "OpenFirmware Boot"
>  
> -# For now...
> +# Must match . in ld.script
>  RELOC=   2
>  
> -ENTRY=   _start
> -
>  CPPFLAGS+=   -I. -I${.CURDIR}/../../.. -I${.CURDIR}/../../../..
>  CPPFLAGS+=   -DRELOC=0x${RELOC} -DCONSPEED=57600
>  
>  LIBS!=  cd $(.CURDIR)/$(R); $(MAKE) libdep
>  
> -${PROG}: ${OBJS} ${LIBSA} ${LIBZ}
> - ${LD} -nopie -znorelro -N -X -Ttext ${RELOC} -e ${ENTRY} -o ${PROG} \
> +${PROG}: ${OBJS} ${LIBSA} ${LIBZ} ld.script
> + pwd
> + ${LD} -nopie -znorelro -N -X -T ${.CURDIR}/ld.script -o ${PROG} \
>   ${OBJS} ${LIBS}
>  
>  .include 
> Index: arch/macppc/stand/ofwboot/ld.script
> ===
> RCS file: arch/macppc/stand/ofwboot/ld.script
> diff -N arch/macppc/stand/ofwboot/ld.script
> --- /dev/null 1 Jan 1970 00:00:00 -
> +++ arch/macppc/stand/ofwboot/ld.script   6 May 2021 20:01:08 -
> @@ -0,0 +1,63 @@
> +/*   $OpenBSD: ld.script,v 1.4 2020/07/18 13:16:32 kettenis Exp $*/
> +
> +/*
> + * Copyright (c) 2013 Mark Kettenis 
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +ENTRY(_start)
> +
> +PHDRS
> +{
> + text PT_LOAD;
> +}
> +
> +SECTIONS
> +{
> + /* Must match RELOC in Makefile */
> + . = 0x0002;
> + .text :
> + {
> + *(.text)
> + } :text
> + PROVIDE (etext = .);
> + PROVIDE (_etext = .);
> +
> + .rodata :
> + {
> + *(.rodata .rodata.*)
> + } :text
> +
> + .data.rel.ro :
> + {
> + *(.data.rel.ro)
> + } :text
> +
> + .data :
> + {
> + *(.data)
> + } :text
> +
> + .sbss :
> + {
> + *(.sbss)
> + }
> +
> + .bss :
> + {
> + *(.bss)
> + }
> + PROVIDE (end = .);
> + PROVIDE (_end = .);
> +}
> 



  1   2   3   4   5   6   7   8   9   10   >